Merge pull request #10062 from erik-krogh/redosPrefix

JS: use the shared regular expression libraries in `js/case-sensitive-middleware-path`
2026-07-13 15:28:16 +02:00 · 2022-08-25 12:57:16 +02:00
parent b0ae12850d f1799ae3d2
commit ba1ad00d2a
7 changed files with 606 additions and 485 deletions
--- a/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll
+++ b/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll
@@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm {
   * Holds if this root term is relevant to the ReDoS analysis.
   */
  predicate isRelevant() {
-    // there is at least one repetition
-    getRoot(any(InfiniteRepetitionQuantifier q)) = this and
    // is actually used as a RegExp
    this.isUsedAsRegExp() and
    // not excluded for library specific reasons
@@ -877,6 +875,101 @@ predicate isStartState(State state) {
 */
 signature predicate isCandidateSig(State state, string pump);

+/**
+ * Holds if `state` is a candidate for ReDoS.
+ */
+signature predicate isCandidateSig(State state);
+
+/**
+ * Predicates for constructing a prefix string that leads to a given state.
+ */
+module PrefixConstruction<isCandidateSig/1 isCandidate> {
+  /**
+   * Holds if `state` is the textually last start state for the regular expression.
+   */
+  private predicate lastStartState(State state) {
+    exists(RegExpRoot root |
+      state =
+        max(State s, Location l |
+          s = stateInRelevantRegexp() and
+          isStartState(s) and
+          getRoot(s.getRepr()) = root and
+          l = s.getRepr().getLocation()
+        |
+          s
+          order by
+            l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
+            l.getEndLine()
+        )
+    )
+  }
+
+  /**
+   * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
+   */
+  private predicate existsTransition(State a, State b) { delta(a, _, b) }
+
+  /**
+   * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
+   */
+  int prefixLength(State start, State state) =
+    shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
+
+  /**
+   * Gets the minimum number of transitions it takes to reach `state` from the start state.
+   */
+  private int lengthFromStart(State state) { result = prefixLength(_, state) }
+
+  /**
+   * Gets a string for which the regular expression will reach `state`.
+   *
+   * Has at most one result for any given `state`.
+   * This predicate will not always have a result even if there is a ReDoS issue in
+   * the regular expression.
+   */
+  string prefix(State state) {
+    lastStartState(state) and
+    result = ""
+    or
+    // the search stops past the last redos candidate state.
+    lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
+    exists(State prev |
+      // select a unique predecessor (by an arbitrary measure)
+      prev =
+        min(State s, Location loc |
+          lengthFromStart(s) = lengthFromStart(state) - 1 and
+          loc = s.getRepr().getLocation() and
+          delta(s, _, state)
+        |
+          s
+          order by
+            loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
+            s.getRepr().toString()
+        )
+    |
+      // greedy search for the shortest prefix
+      result = prefix(prev) and delta(prev, Epsilon(), state)
+      or
+      not delta(prev, Epsilon(), state) and
+      result = prefix(prev) + getCanonicalEdgeChar(prev, state)
+    )
+  }
+
+  /**
+   * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
+   */
+  private string getCanonicalEdgeChar(State prev, State next) {
+    result =
+      min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
+  }
+
+  /** Gets a state within a regular expression that contains a candidate state. */
+  pragma[noinline]
+  State stateInRelevantRegexp() {
+    exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
+  }
+}
+
 /**
 * A module for pruning candidate ReDoS states.
 * The candidates are specified by the `isCandidate` signature predicate.
@@ -910,95 +1003,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  /** Gets a state that can reach the `accept-any` state using only epsilon steps. */
  private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }

-  /**
-   * Predicates for constructing a prefix string that leads to a given state.
-   */
-  private module PrefixConstruction {
-    /**
-     * Holds if `state` is the textually last start state for the regular expression.
-     */
-    private predicate lastStartState(State state) {
-      exists(RegExpRoot root |
-        state =
-          max(State s, Location l |
-            s = stateInPumpableRegexp() and
-            isStartState(s) and
-            getRoot(s.getRepr()) = root and
-            l = s.getRepr().getLocation()
-          |
-            s
-            order by
-              l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
-              l.getEndLine()
-          )
-      )
-    }
+  predicate isCandidateState(State s) { isReDoSCandidate(s, _) }

-    /**
-     * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
-     */
-    private predicate existsTransition(State a, State b) { delta(a, _, b) }
-
-    /**
-     * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
-     */
-    int prefixLength(State start, State state) =
-      shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
-
-    /**
-     * Gets the minimum number of transitions it takes to reach `state` from the start state.
-     */
-    private int lengthFromStart(State state) { result = prefixLength(_, state) }
-
-    /**
-     * Gets a string for which the regular expression will reach `state`.
-     *
-     * Has at most one result for any given `state`.
-     * This predicate will not always have a result even if there is a ReDoS issue in
-     * the regular expression.
-     */
-    string prefix(State state) {
-      lastStartState(state) and
-      result = ""
-      or
-      // the search stops past the last redos candidate state.
-      lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
-      exists(State prev |
-        // select a unique predecessor (by an arbitrary measure)
-        prev =
-          min(State s, Location loc |
-            lengthFromStart(s) = lengthFromStart(state) - 1 and
-            loc = s.getRepr().getLocation() and
-            delta(s, _, state)
-          |
-            s
-            order by
-              loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
-              s.getRepr().toString()
-          )
-      |
-        // greedy search for the shortest prefix
-        result = prefix(prev) and delta(prev, Epsilon(), state)
-        or
-        not delta(prev, Epsilon(), state) and
-        result = prefix(prev) + getCanonicalEdgeChar(prev, state)
-      )
-    }
-
-    /**
-     * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
-     */
-    private string getCanonicalEdgeChar(State prev, State next) {
-      result =
-        min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
-    }
-
-    /** Gets a state within a regular expression that has a pumpable state. */
-    pragma[noinline]
-    State stateInPumpableRegexp() {
-      exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
-    }
-  }
+  import PrefixConstruction<isCandidateState/1> as Prefix

  /**
   * Predicates for testing the presence of a rejecting suffix.
@@ -1018,8 +1025,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
   * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
   */
  private module SuffixConstruction {
-    import PrefixConstruction
-
    /**
     * Holds if all states reachable from `fork` by repeating `w`
     * are likely rejectable by appending some suffix.
@@ -1036,7 +1041,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private predicate isLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInRelevantRegexp() and
      (
        // exists a reject edge with some char.
        hasRejectEdge(s)
@@ -1052,7 +1057,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
     */
    predicate isRejectState(State s) {
-      s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
+      s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_)
    }

    /**
@@ -1060,7 +1065,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noopt]
    predicate hasEdgeToLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInRelevantRegexp() and
      // all edges (at least one) with some char leads to another state that is rejectable.
      // the `next` states might not share a common suffix, which can cause FPs.
      exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
@@ -1076,7 +1081,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private string hasEdgeToLikelyRejectableHelper(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInRelevantRegexp() and
      not hasRejectEdge(s) and
      not isRejectState(s) and
      deltaClosedChar(s, result, _)
@@ -1088,8 +1093,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * `prev` to `next` that the character symbol `char`.
     */
    predicate deltaClosedChar(State prev, string char, State next) {
-      prev = stateInPumpableRegexp() and
-      next = stateInPumpableRegexp() and
+      prev = Prefix::stateInRelevantRegexp() and
+      next = Prefix::stateInRelevantRegexp() and
      deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
    }

@@ -1099,18 +1104,28 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
      result = getAnInputSymbolMatching(char)
    }

+    pragma[noinline]
+    RegExpRoot relevantRoot() {
+      exists(RegExpTerm term, State s |
+        s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm()
+      )
+    }
+
    /**
     * Gets a char used for finding possible suffixes inside `root`.
     */
    pragma[noinline]
    private string relevant(RegExpRoot root) {
-      exists(ascii(result)) and exists(root)
-      or
-      exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
-      or
-      // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
-      // The three chars must be kept in sync with `hasSimpleRejectEdge`.
-      result = ["|", "\n", "Z"] and exists(root)
+      root = relevantRoot() and
+      (
+        exists(ascii(result)) and exists(root)
+        or
+        exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
+        or
+        // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
+        // The three chars must be kept in sync with `hasSimpleRejectEdge`.
+        result = ["|", "\n", "Z"] and exists(root)
+      )
    }

    /**
@@ -1208,12 +1223,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
    isReDoSAttackable(t, pump, s) and
    (
-      prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
-      not PrefixConstruction::prefix(s) = ""
+      prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
+      not Prefix::prefix(s) = ""
      or
-      PrefixConstruction::prefix(s) = "" and prefixMsg = ""
+      Prefix::prefix(s) = "" and prefixMsg = ""
      or
-      not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
+      not exists(Prefix::prefix(s)) and prefixMsg = ""
    )
  }

--- a/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll
+++ b/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll
@@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm {
   * Holds if this root term is relevant to the ReDoS analysis.
   */
  predicate isRelevant() {
-    // there is at least one repetition
-    getRoot(any(InfiniteRepetitionQuantifier q)) = this and
    // is actually used as a RegExp
    this.isUsedAsRegExp() and
    // not excluded for library specific reasons
@@ -877,6 +875,101 @@ predicate isStartState(State state) {
 */
 signature predicate isCandidateSig(State state, string pump);

+/**
+ * Holds if `state` is a candidate for ReDoS.
+ */
+signature predicate isCandidateSig(State state);
+
+/**
+ * Predicates for constructing a prefix string that leads to a given state.
+ */
+module PrefixConstruction<isCandidateSig/1 isCandidate> {
+  /**
+   * Holds if `state` is the textually last start state for the regular expression.
+   */
+  private predicate lastStartState(State state) {
+    exists(RegExpRoot root |
+      state =
+        max(State s, Location l |
+          s = stateInRelevantRegexp() and
+          isStartState(s) and
+          getRoot(s.getRepr()) = root and
+          l = s.getRepr().getLocation()
+        |
+          s
+          order by
+            l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
+            l.getEndLine()
+        )
+    )
+  }
+
+  /**
+   * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
+   */
+  private predicate existsTransition(State a, State b) { delta(a, _, b) }
+
+  /**
+   * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
+   */
+  int prefixLength(State start, State state) =
+    shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
+
+  /**
+   * Gets the minimum number of transitions it takes to reach `state` from the start state.
+   */
+  private int lengthFromStart(State state) { result = prefixLength(_, state) }
+
+  /**
+   * Gets a string for which the regular expression will reach `state`.
+   *
+   * Has at most one result for any given `state`.
+   * This predicate will not always have a result even if there is a ReDoS issue in
+   * the regular expression.
+   */
+  string prefix(State state) {
+    lastStartState(state) and
+    result = ""
+    or
+    // the search stops past the last redos candidate state.
+    lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
+    exists(State prev |
+      // select a unique predecessor (by an arbitrary measure)
+      prev =
+        min(State s, Location loc |
+          lengthFromStart(s) = lengthFromStart(state) - 1 and
+          loc = s.getRepr().getLocation() and
+          delta(s, _, state)
+        |
+          s
+          order by
+            loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
+            s.getRepr().toString()
+        )
+    |
+      // greedy search for the shortest prefix
+      result = prefix(prev) and delta(prev, Epsilon(), state)
+      or
+      not delta(prev, Epsilon(), state) and
+      result = prefix(prev) + getCanonicalEdgeChar(prev, state)
+    )
+  }
+
+  /**
+   * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
+   */
+  private string getCanonicalEdgeChar(State prev, State next) {
+    result =
+      min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
+  }
+
+  /** Gets a state within a regular expression that contains a candidate state. */
+  pragma[noinline]
+  State stateInRelevantRegexp() {
+    exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
+  }
+}
+
 /**
 * A module for pruning candidate ReDoS states.
 * The candidates are specified by the `isCandidate` signature predicate.
@@ -910,95 +1003,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  /** Gets a state that can reach the `accept-any` state using only epsilon steps. */
  private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }

-  /**
-   * Predicates for constructing a prefix string that leads to a given state.
-   */
-  private module PrefixConstruction {
-    /**
-     * Holds if `state` is the textually last start state for the regular expression.
-     */
-    private predicate lastStartState(State state) {
-      exists(RegExpRoot root |
-        state =
-          max(State s, Location l |
-            s = stateInPumpableRegexp() and
-            isStartState(s) and
-            getRoot(s.getRepr()) = root and
-            l = s.getRepr().getLocation()
-          |
-            s
-            order by
-              l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
-              l.getEndLine()
-          )
-      )
-    }
+  predicate isCandidateState(State s) { isReDoSCandidate(s, _) }

-    /**
-     * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
-     */
-    private predicate existsTransition(State a, State b) { delta(a, _, b) }
-
-    /**
-     * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
-     */
-    int prefixLength(State start, State state) =
-      shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
-
-    /**
-     * Gets the minimum number of transitions it takes to reach `state` from the start state.
-     */
-    private int lengthFromStart(State state) { result = prefixLength(_, state) }
-
-    /**
-     * Gets a string for which the regular expression will reach `state`.
-     *
-     * Has at most one result for any given `state`.
-     * This predicate will not always have a result even if there is a ReDoS issue in
-     * the regular expression.
-     */
-    string prefix(State state) {
-      lastStartState(state) and
-      result = ""
-      or
-      // the search stops past the last redos candidate state.
-      lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
-      exists(State prev |
-        // select a unique predecessor (by an arbitrary measure)
-        prev =
-          min(State s, Location loc |
-            lengthFromStart(s) = lengthFromStart(state) - 1 and
-            loc = s.getRepr().getLocation() and
-            delta(s, _, state)
-          |
-            s
-            order by
-              loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
-              s.getRepr().toString()
-          )
-      |
-        // greedy search for the shortest prefix
-        result = prefix(prev) and delta(prev, Epsilon(), state)
-        or
-        not delta(prev, Epsilon(), state) and
-        result = prefix(prev) + getCanonicalEdgeChar(prev, state)
-      )
-    }
-
-    /**
-     * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
-     */
-    private string getCanonicalEdgeChar(State prev, State next) {
-      result =
-        min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
-    }
-
-    /** Gets a state within a regular expression that has a pumpable state. */
-    pragma[noinline]
-    State stateInPumpableRegexp() {
-      exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
-    }
-  }
+  import PrefixConstruction<isCandidateState/1> as Prefix

  /**
   * Predicates for testing the presence of a rejecting suffix.
@@ -1018,8 +1025,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
   * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
   */
  private module SuffixConstruction {
-    import PrefixConstruction
-
    /**
     * Holds if all states reachable from `fork` by repeating `w`
     * are likely rejectable by appending some suffix.
@@ -1036,7 +1041,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private predicate isLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInRelevantRegexp() and
      (
        // exists a reject edge with some char.
        hasRejectEdge(s)
@@ -1052,7 +1057,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
     */
    predicate isRejectState(State s) {
-      s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
+      s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_)
    }

    /**
@@ -1060,7 +1065,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noopt]
    predicate hasEdgeToLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInRelevantRegexp() and
      // all edges (at least one) with some char leads to another state that is rejectable.
      // the `next` states might not share a common suffix, which can cause FPs.
      exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
@@ -1076,7 +1081,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private string hasEdgeToLikelyRejectableHelper(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInRelevantRegexp() and
      not hasRejectEdge(s) and
      not isRejectState(s) and
      deltaClosedChar(s, result, _)
@@ -1088,8 +1093,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * `prev` to `next` that the character symbol `char`.
     */
    predicate deltaClosedChar(State prev, string char, State next) {
-      prev = stateInPumpableRegexp() and
-      next = stateInPumpableRegexp() and
+      prev = Prefix::stateInRelevantRegexp() and
+      next = Prefix::stateInRelevantRegexp() and
      deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
    }

@@ -1099,18 +1104,28 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
      result = getAnInputSymbolMatching(char)
    }

+    pragma[noinline]
+    RegExpRoot relevantRoot() {
+      exists(RegExpTerm term, State s |
+        s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm()
+      )
+    }
+
    /**
     * Gets a char used for finding possible suffixes inside `root`.
     */
    pragma[noinline]
    private string relevant(RegExpRoot root) {
-      exists(ascii(result)) and exists(root)
-      or
-      exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
-      or
-      // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
-      // The three chars must be kept in sync with `hasSimpleRejectEdge`.
-      result = ["|", "\n", "Z"] and exists(root)
+      root = relevantRoot() and
+      (
+        exists(ascii(result)) and exists(root)
+        or
+        exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
+        or
+        // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
+        // The three chars must be kept in sync with `hasSimpleRejectEdge`.
+        result = ["|", "\n", "Z"] and exists(root)
+      )
    }

    /**
@@ -1208,12 +1223,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
    isReDoSAttackable(t, pump, s) and
    (
-      prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
-      not PrefixConstruction::prefix(s) = ""
+      prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
+      not Prefix::prefix(s) = ""
      or
-      PrefixConstruction::prefix(s) = "" and prefixMsg = ""
+      Prefix::prefix(s) = "" and prefixMsg = ""
      or
-      not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
+      not exists(Prefix::prefix(s)) and prefixMsg = ""
    )
  }

--- a/javascript/ql/src/Security/CWE-178/CaseSensitiveMiddlewarePath.ql
+++ b/javascript/ql/src/Security/CWE-178/CaseSensitiveMiddlewarePath.ql
@@ -20,55 +20,29 @@ string toOtherCase(string s) {
  if s.regexpMatch(".*[a-z].*") then result = s.toUpperCase() else result = s.toLowerCase()
 }

-RegExpCharacterClass getEnclosingClass(RegExpTerm term) {
-  term = result.getAChild()
-  or
-  term = result.getAChild().(RegExpRange).getAChild()
-}
+import semmle.javascript.security.regexp.NfaUtils as NfaUtils

-/**
- * Holds if `term` seems to distinguish between upper and lower case letters, assuming the `i` flag is not present.
- */
-pragma[inline]
-predicate isLikelyCaseSensitiveRegExp(RegExpTerm term) {
-  exists(RegExpConstant const |
-    const = term.getAChild*() and
-    const.getValue().regexpMatch(".*[a-zA-Z].*") and
-    not getEnclosingClass(const).getAChild().(RegExpConstant).getValue() =
-      toOtherCase(const.getValue()) and
-    not const.getParent*() instanceof RegExpNegativeLookahead and
-    not const.getParent*() instanceof RegExpNegativeLookbehind
+/** Holds if `s` is a relevant regexp term were we want to compute a string that matches the term (for `getCaseSensitiveBypassExample`). */
+predicate isCand(NfaUtils::State s) {
+  s.getRepr() instanceof NfaUtils::RegExpRoot and
+  exists(DataFlow::RegExpCreationNode creation |
+    isCaseSensitiveMiddleware(_, creation, _) and
+    s.getRepr().getRootTerm() = creation.getRoot()
  )
 }

-/**
- * Gets a string matched by `term`, or part of such a string.
- */
+import NfaUtils::PrefixConstruction<isCand/1> as Prefix
+
+/** Gets a string matched by `term`. */
 string getExampleString(RegExpTerm term) {
-  result = term.getAMatchedString()
-  or
-  // getAMatchedString does not recurse into sequences. Perform one step manually.
-  exists(RegExpSequence seq | seq = term |
-    result =
-      strictconcat(RegExpTerm child, int i, string text |
-        child = seq.getChild(i) and
-        (
-          text = child.getAMatchedString()
-          or
-          not exists(child.getAMatchedString()) and
-          text = ""
-        )
-      |
-        text order by i
-      )
-  )
+  result = Prefix::prefix(any(NfaUtils::State s | s.getRepr() = term))
 }

 string getCaseSensitiveBypassExample(RegExpTerm term) {
-  exists(string example |
-    example = getExampleString(term) and
-    result = toOtherCase(example) and
-    result != example // getting an example string is approximate; ensure we got a proper case-change example
+  exists(string byPassExample |
+    byPassExample = getExampleString(term) and
+    result = toOtherCase(byPassExample) and
+    result != byPassExample // getting an byPassExample string is approximate; ensure we got a proper case-change byPassExample
  )
 }

@@ -89,7 +63,6 @@ predicate isCaseSensitiveMiddleware(
    ) and
    arg = call.getArgument(0) and
    regexp.getAReference().flowsTo(arg) and
-    isLikelyCaseSensitiveRegExp(regexp.getRoot()) and
    exists(string flags |
      flags = regexp.getFlags() and
      not RegExp::isIgnoreCase(flags)
@@ -108,14 +81,64 @@ predicate isGuardedCaseInsensitiveEndpoint(
  )
 }

+/**
+ * Gets an example path that will hit `endpoint`.
+ * Query parameters (e.g. the ":param" in "/foo/:param") have been replaced with example values.
+ */
+string getAnEndpointExample(Routing::RouteSetup endpoint) {
+  exists(string raw |
+    raw = endpoint.getRelativePath() and
+    result = raw.regexpReplaceAll(":\\w+\\b|\\*", ["a", "1"])
+  )
+}
+
+import semmle.javascript.security.regexp.RegexpMatching as RegexpMatching
+
+NfaUtils::RegExpRoot getARoot(DataFlow::RegExpCreationNode creator) {
+  result.getRootTerm() = creator.getRoot()
+}
+
+/**
+ * Holds if the regexp matcher should test whether `root` matches `str`.
+ * The result is used to test whether a case-sensitive bypass exists.
+ */
+predicate isMatchingCandidate(
+  RegexpMatching::RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups
+) {
+  exists(
+    Routing::RouteSetup middleware, Routing::RouteSetup endPoint,
+    DataFlow::RegExpCreationNode regexp
+  |
+    isCaseSensitiveMiddleware(middleware, regexp, _) and
+    isGuardedCaseInsensitiveEndpoint(endPoint, middleware)
+  |
+    root = regexp.getRoot() and
+    exists(getCaseSensitiveBypassExample(getARoot(regexp))) and
+    ignorePrefix = true and
+    testWithGroups = false and
+    str =
+      [
+        getCaseSensitiveBypassExample(getARoot(regexp)), getAnEndpointExample(endPoint),
+        toOtherCase(getAnEndpointExample(endPoint))
+      ]
+  )
+}
+
+import RegexpMatching::RegexpMatching<isMatchingCandidate/4> as Matcher
+
 from
  DataFlow::RegExpCreationNode regexp, Routing::RouteSetup middleware, Routing::RouteSetup endpoint,
-  DataFlow::Node arg, string example
+  DataFlow::Node arg, string byPassExample, string endpointExample, string byPassEndPoint
 where
  isCaseSensitiveMiddleware(middleware, regexp, arg) and
-  example = getCaseSensitiveBypassExample(regexp.getRoot()) and
+  byPassExample = getCaseSensitiveBypassExample(getARoot(regexp)) and
  isGuardedCaseInsensitiveEndpoint(endpoint, middleware) and
-  exists(endpoint.getRelativePath().toLowerCase().indexOf(example.toLowerCase()))
+  // only report one example.
+  endpointExample =
+    min(string ex | ex = getAnEndpointExample(endpoint) and Matcher::matches(regexp.getRoot(), ex)) and
+  not Matcher::matches(regexp.getRoot(), byPassExample) and
+  byPassEndPoint = toOtherCase(endpointExample) and
+  not Matcher::matches(regexp.getRoot(), byPassEndPoint)
 select arg,
  "This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '"
-    + example + "' will bypass the middleware.", regexp, "pattern", endpoint, "here"
+    + byPassEndPoint + "' will bypass the middleware.", regexp, "pattern", endpoint, "here"
--- a/javascript/ql/test/query-tests/Security/CWE-178/CaseSensitiveMiddlewarePath.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-178/CaseSensitiveMiddlewarePath.expected
@@ -1,3 +1,7 @@
-| tst.js:8:9:8:19 | /\\/foo\\/.*/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/' will bypass the middleware. | tst.js:8:9:8:19 | /\\/foo\\/.*/ | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here |
-| tst.js:14:5:14:28 | new Reg ... (.*)?') | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO' will bypass the middleware. | tst.js:14:5:14:28 | new Reg ... (.*)?') | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here |
-| tst.js:41:9:41:25 | /\\/foo\\/([0-9]+)/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/' will bypass the middleware. | tst.js:41:9:41:25 | /\\/foo\\/([0-9]+)/ | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here |
+| tst.js:8:9:8:19 | /\\/foo\\/.*/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/1' will bypass the middleware. | tst.js:8:9:8:19 | /\\/foo\\/.*/ | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here |
+| tst.js:14:5:14:28 | new Reg ... (.*)?') | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/1' will bypass the middleware. | tst.js:14:5:14:28 | new Reg ... (.*)?') | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here |
+| tst.js:41:9:41:25 | /\\/foo\\/([0-9]+)/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/1' will bypass the middleware. | tst.js:41:9:41:25 | /\\/foo\\/([0-9]+)/ | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here |
+| tst.js:64:5:64:28 | new Reg ... (.*)?') | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/BAR/1' will bypass the middleware. | tst.js:64:5:64:28 | new Reg ... (.*)?') | pattern | tst.js:73:1:74:2 | app.get ... ware\\n}) | here |
+| tst.js:76:9:76:20 | /\\/baz\\/bla/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/BAZ/BLA' will bypass the middleware. | tst.js:76:9:76:20 | /\\/baz\\/bla/ | pattern | tst.js:77:1:79:2 | app.get ...  });\\n}) | here |
+| tst.js:86:9:86:30 | /\\/[Bb] ... 3\\/[a]/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/BAZ3/A' will bypass the middleware. | tst.js:86:9:86:30 | /\\/[Bb] ... 3\\/[a]/ | pattern | tst.js:87:1:89:2 | app.get ...  });\\n}) | here |
+| tst.js:91:9:91:40 | /\\/summ ... ntGame/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/CURRENTGAME' will bypass the middleware. | tst.js:91:9:91:40 | /\\/summ ... ntGame/ | pattern | tst.js:93:1:95:2 | app.get ... O");\\n}) | here |
--- a/javascript/ql/test/query-tests/Security/CWE-178/tst.js
+++ b/javascript/ql/test/query-tests/Security/CWE-178/tst.js
@@ -59,3 +59,37 @@ app.use(/\/foo\/([0-9]+)/i, (req, res) => { // OK - not middleware (also case in

 app.get('/foo/:param', (req, res) => { // OK - not a middleware
 });
+
+app.get(
+    new RegExp('^/bar(.*)?'), // NOT OK - case sensitive
+    unknown(),
+    function(req, res, next) {
+        if (req.params.blah) {
+            next();
+        }
+    }
+);
+
+app.get('/bar/*', (req, res) => { // OK - not a middleware
+});
+
+app.use(/\/baz\/bla/, unknown()); // NOT OK - case sensitive
+app.get('/baz/bla', (req, resp) => {
+  resp.send({ test: 123 });
+});
+
+app.use(/\/[Bb][Aa][Zz]2\/[aA]/, unknown()); // OK - not case sensitive
+app.get('/baz2/a', (req, resp) => {
+  resp.send({ test: 123 });
+});
+
+app.use(/\/[Bb][Aa][Zz]3\/[a]/, unknown()); // NOT OK - case sensitive
+app.get('/baz3/a', (req, resp) => {
+  resp.send({ test: 123 });
+});
+
+app.use(/\/summonerByName|\/currentGame/,apiLimit1, apiLimit2);
+
+app.get('/currentGame', function (req, res) {
+    res.send("FOO");
+});
--- a/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll
+++ b/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll
@@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm {
   * Holds if this root term is relevant to the ReDoS analysis.
   */
  predicate isRelevant() {
-    // there is at least one repetition
-    getRoot(any(InfiniteRepetitionQuantifier q)) = this and
    // is actually used as a RegExp
    this.isUsedAsRegExp() and
    // not excluded for library specific reasons
@@ -877,6 +875,101 @@ predicate isStartState(State state) {
 */
 signature predicate isCandidateSig(State state, string pump);

+/**
+ * Holds if `state` is a candidate for ReDoS.
+ */
+signature predicate isCandidateSig(State state);
+
+/**
+ * Predicates for constructing a prefix string that leads to a given state.
+ */
+module PrefixConstruction<isCandidateSig/1 isCandidate> {
+  /**
+   * Holds if `state` is the textually last start state for the regular expression.
+   */
+  private predicate lastStartState(State state) {
+    exists(RegExpRoot root |
+      state =
+        max(State s, Location l |
+          s = stateInRelevantRegexp() and
+          isStartState(s) and
+          getRoot(s.getRepr()) = root and
+          l = s.getRepr().getLocation()
+        |
+          s
+          order by
+            l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
+            l.getEndLine()
+        )
+    )
+  }
+
+  /**
+   * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
+   */
+  private predicate existsTransition(State a, State b) { delta(a, _, b) }
+
+  /**
+   * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
+   */
+  int prefixLength(State start, State state) =
+    shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
+
+  /**
+   * Gets the minimum number of transitions it takes to reach `state` from the start state.
+   */
+  private int lengthFromStart(State state) { result = prefixLength(_, state) }
+
+  /**
+   * Gets a string for which the regular expression will reach `state`.
+   *
+   * Has at most one result for any given `state`.
+   * This predicate will not always have a result even if there is a ReDoS issue in
+   * the regular expression.
+   */
+  string prefix(State state) {
+    lastStartState(state) and
+    result = ""
+    or
+    // the search stops past the last redos candidate state.
+    lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
+    exists(State prev |
+      // select a unique predecessor (by an arbitrary measure)
+      prev =
+        min(State s, Location loc |
+          lengthFromStart(s) = lengthFromStart(state) - 1 and
+          loc = s.getRepr().getLocation() and
+          delta(s, _, state)
+        |
+          s
+          order by
+            loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
+            s.getRepr().toString()
+        )
+    |
+      // greedy search for the shortest prefix
+      result = prefix(prev) and delta(prev, Epsilon(), state)
+      or
+      not delta(prev, Epsilon(), state) and
+      result = prefix(prev) + getCanonicalEdgeChar(prev, state)
+    )
+  }
+
+  /**
+   * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
+   */
+  private string getCanonicalEdgeChar(State prev, State next) {
+    result =
+      min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
+  }
+
+  /** Gets a state within a regular expression that contains a candidate state. */
+  pragma[noinline]
+  State stateInRelevantRegexp() {
+    exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
+  }
+}
+
 /**
 * A module for pruning candidate ReDoS states.
 * The candidates are specified by the `isCandidate` signature predicate.
@@ -910,95 +1003,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  /** Gets a state that can reach the `accept-any` state using only epsilon steps. */
  private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }

-  /**
-   * Predicates for constructing a prefix string that leads to a given state.
-   */
-  private module PrefixConstruction {
-    /**
-     * Holds if `state` is the textually last start state for the regular expression.
-     */
-    private predicate lastStartState(State state) {
-      exists(RegExpRoot root |
-        state =
-          max(State s, Location l |
-            s = stateInPumpableRegexp() and
-            isStartState(s) and
-            getRoot(s.getRepr()) = root and
-            l = s.getRepr().getLocation()
-          |
-            s
-            order by
-              l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
-              l.getEndLine()
-          )
-      )
-    }
+  predicate isCandidateState(State s) { isReDoSCandidate(s, _) }

-    /**
-     * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
-     */
-    private predicate existsTransition(State a, State b) { delta(a, _, b) }
-
-    /**
-     * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
-     */
-    int prefixLength(State start, State state) =
-      shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
-
-    /**
-     * Gets the minimum number of transitions it takes to reach `state` from the start state.
-     */
-    private int lengthFromStart(State state) { result = prefixLength(_, state) }
-
-    /**
-     * Gets a string for which the regular expression will reach `state`.
-     *
-     * Has at most one result for any given `state`.
-     * This predicate will not always have a result even if there is a ReDoS issue in
-     * the regular expression.
-     */
-    string prefix(State state) {
-      lastStartState(state) and
-      result = ""
-      or
-      // the search stops past the last redos candidate state.
-      lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
-      exists(State prev |
-        // select a unique predecessor (by an arbitrary measure)
-        prev =
-          min(State s, Location loc |
-            lengthFromStart(s) = lengthFromStart(state) - 1 and
-            loc = s.getRepr().getLocation() and
-            delta(s, _, state)
-          |
-            s
-            order by
-              loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
-              s.getRepr().toString()
-          )
-      |
-        // greedy search for the shortest prefix
-        result = prefix(prev) and delta(prev, Epsilon(), state)
-        or
-        not delta(prev, Epsilon(), state) and
-        result = prefix(prev) + getCanonicalEdgeChar(prev, state)
-      )
-    }
-
-    /**
-     * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
-     */
-    private string getCanonicalEdgeChar(State prev, State next) {
-      result =
-        min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
-    }
-
-    /** Gets a state within a regular expression that has a pumpable state. */
-    pragma[noinline]
-    State stateInPumpableRegexp() {
-      exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
-    }
-  }
+  import PrefixConstruction<isCandidateState/1> as Prefix

  /**
   * Predicates for testing the presence of a rejecting suffix.
@@ -1018,8 +1025,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
   * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
   */
  private module SuffixConstruction {
-    import PrefixConstruction
-
    /**
     * Holds if all states reachable from `fork` by repeating `w`
     * are likely rejectable by appending some suffix.
@@ -1036,7 +1041,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private predicate isLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInRelevantRegexp() and
      (
        // exists a reject edge with some char.
        hasRejectEdge(s)
@@ -1052,7 +1057,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
     */
    predicate isRejectState(State s) {
-      s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
+      s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_)
    }

    /**
@@ -1060,7 +1065,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noopt]
    predicate hasEdgeToLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInRelevantRegexp() and
      // all edges (at least one) with some char leads to another state that is rejectable.
      // the `next` states might not share a common suffix, which can cause FPs.
      exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
@@ -1076,7 +1081,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private string hasEdgeToLikelyRejectableHelper(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInRelevantRegexp() and
      not hasRejectEdge(s) and
      not isRejectState(s) and
      deltaClosedChar(s, result, _)
@@ -1088,8 +1093,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * `prev` to `next` that the character symbol `char`.
     */
    predicate deltaClosedChar(State prev, string char, State next) {
-      prev = stateInPumpableRegexp() and
-      next = stateInPumpableRegexp() and
+      prev = Prefix::stateInRelevantRegexp() and
+      next = Prefix::stateInRelevantRegexp() and
      deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
    }

@@ -1099,18 +1104,28 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
      result = getAnInputSymbolMatching(char)
    }

+    pragma[noinline]
+    RegExpRoot relevantRoot() {
+      exists(RegExpTerm term, State s |
+        s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm()
+      )
+    }
+
    /**
     * Gets a char used for finding possible suffixes inside `root`.
     */
    pragma[noinline]
    private string relevant(RegExpRoot root) {
-      exists(ascii(result)) and exists(root)
-      or
-      exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
-      or
-      // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
-      // The three chars must be kept in sync with `hasSimpleRejectEdge`.
-      result = ["|", "\n", "Z"] and exists(root)
+      root = relevantRoot() and
+      (
+        exists(ascii(result)) and exists(root)
+        or
+        exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
+        or
+        // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
+        // The three chars must be kept in sync with `hasSimpleRejectEdge`.
+        result = ["|", "\n", "Z"] and exists(root)
+      )
    }

    /**
@@ -1208,12 +1223,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
    isReDoSAttackable(t, pump, s) and
    (
-      prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
-      not PrefixConstruction::prefix(s) = ""
+      prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
+      not Prefix::prefix(s) = ""
      or
-      PrefixConstruction::prefix(s) = "" and prefixMsg = ""
+      Prefix::prefix(s) = "" and prefixMsg = ""
      or
-      not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
+      not exists(Prefix::prefix(s)) and prefixMsg = ""
    )
  }

--- a/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll
+++ b/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll
@@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm {
   * Holds if this root term is relevant to the ReDoS analysis.
   */
  predicate isRelevant() {
-    // there is at least one repetition
-    getRoot(any(InfiniteRepetitionQuantifier q)) = this and
    // is actually used as a RegExp
    this.isUsedAsRegExp() and
    // not excluded for library specific reasons
@@ -877,6 +875,101 @@ predicate isStartState(State state) {
 */
 signature predicate isCandidateSig(State state, string pump);

+/**
+ * Holds if `state` is a candidate for ReDoS.
+ */
+signature predicate isCandidateSig(State state);
+
+/**
+ * Predicates for constructing a prefix string that leads to a given state.
+ */
+module PrefixConstruction<isCandidateSig/1 isCandidate> {
+  /**
+   * Holds if `state` is the textually last start state for the regular expression.
+   */
+  private predicate lastStartState(State state) {
+    exists(RegExpRoot root |
+      state =
+        max(State s, Location l |
+          s = stateInRelevantRegexp() and
+          isStartState(s) and
+          getRoot(s.getRepr()) = root and
+          l = s.getRepr().getLocation()
+        |
+          s
+          order by
+            l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
+            l.getEndLine()
+        )
+    )
+  }
+
+  /**
+   * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
+   */
+  private predicate existsTransition(State a, State b) { delta(a, _, b) }
+
+  /**
+   * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
+   */
+  int prefixLength(State start, State state) =
+    shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
+
+  /**
+   * Gets the minimum number of transitions it takes to reach `state` from the start state.
+   */
+  private int lengthFromStart(State state) { result = prefixLength(_, state) }
+
+  /**
+   * Gets a string for which the regular expression will reach `state`.
+   *
+   * Has at most one result for any given `state`.
+   * This predicate will not always have a result even if there is a ReDoS issue in
+   * the regular expression.
+   */
+  string prefix(State state) {
+    lastStartState(state) and
+    result = ""
+    or
+    // the search stops past the last redos candidate state.
+    lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
+    exists(State prev |
+      // select a unique predecessor (by an arbitrary measure)
+      prev =
+        min(State s, Location loc |
+          lengthFromStart(s) = lengthFromStart(state) - 1 and
+          loc = s.getRepr().getLocation() and
+          delta(s, _, state)
+        |
+          s
+          order by
+            loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
+            s.getRepr().toString()
+        )
+    |
+      // greedy search for the shortest prefix
+      result = prefix(prev) and delta(prev, Epsilon(), state)
+      or
+      not delta(prev, Epsilon(), state) and
+      result = prefix(prev) + getCanonicalEdgeChar(prev, state)
+    )
+  }
+
+  /**
+   * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
+   */
+  private string getCanonicalEdgeChar(State prev, State next) {
+    result =
+      min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
+  }
+
+  /** Gets a state within a regular expression that contains a candidate state. */
+  pragma[noinline]
+  State stateInRelevantRegexp() {
+    exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
+  }
+}
+
 /**
 * A module for pruning candidate ReDoS states.
 * The candidates are specified by the `isCandidate` signature predicate.
@@ -910,95 +1003,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  /** Gets a state that can reach the `accept-any` state using only epsilon steps. */
  private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }

-  /**
-   * Predicates for constructing a prefix string that leads to a given state.
-   */
-  private module PrefixConstruction {
-    /**
-     * Holds if `state` is the textually last start state for the regular expression.
-     */
-    private predicate lastStartState(State state) {
-      exists(RegExpRoot root |
-        state =
-          max(State s, Location l |
-            s = stateInPumpableRegexp() and
-            isStartState(s) and
-            getRoot(s.getRepr()) = root and
-            l = s.getRepr().getLocation()
-          |
-            s
-            order by
-              l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
-              l.getEndLine()
-          )
-      )
-    }
+  predicate isCandidateState(State s) { isReDoSCandidate(s, _) }

-    /**
-     * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
-     */
-    private predicate existsTransition(State a, State b) { delta(a, _, b) }
-
-    /**
-     * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
-     */
-    int prefixLength(State start, State state) =
-      shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
-
-    /**
-     * Gets the minimum number of transitions it takes to reach `state` from the start state.
-     */
-    private int lengthFromStart(State state) { result = prefixLength(_, state) }
-
-    /**
-     * Gets a string for which the regular expression will reach `state`.
-     *
-     * Has at most one result for any given `state`.
-     * This predicate will not always have a result even if there is a ReDoS issue in
-     * the regular expression.
-     */
-    string prefix(State state) {
-      lastStartState(state) and
-      result = ""
-      or
-      // the search stops past the last redos candidate state.
-      lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
-      exists(State prev |
-        // select a unique predecessor (by an arbitrary measure)
-        prev =
-          min(State s, Location loc |
-            lengthFromStart(s) = lengthFromStart(state) - 1 and
-            loc = s.getRepr().getLocation() and
-            delta(s, _, state)
-          |
-            s
-            order by
-              loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
-              s.getRepr().toString()
-          )
-      |
-        // greedy search for the shortest prefix
-        result = prefix(prev) and delta(prev, Epsilon(), state)
-        or
-        not delta(prev, Epsilon(), state) and
-        result = prefix(prev) + getCanonicalEdgeChar(prev, state)
-      )
-    }
-
-    /**
-     * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
-     */
-    private string getCanonicalEdgeChar(State prev, State next) {
-      result =
-        min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
-    }
-
-    /** Gets a state within a regular expression that has a pumpable state. */
-    pragma[noinline]
-    State stateInPumpableRegexp() {
-      exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
-    }
-  }
+  import PrefixConstruction<isCandidateState/1> as Prefix

  /**
   * Predicates for testing the presence of a rejecting suffix.
@@ -1018,8 +1025,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
   * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
   */
  private module SuffixConstruction {
-    import PrefixConstruction
-
    /**
     * Holds if all states reachable from `fork` by repeating `w`
     * are likely rejectable by appending some suffix.
@@ -1036,7 +1041,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private predicate isLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInRelevantRegexp() and
      (
        // exists a reject edge with some char.
        hasRejectEdge(s)
@@ -1052,7 +1057,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
     */
    predicate isRejectState(State s) {
-      s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
+      s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_)
    }

    /**
@@ -1060,7 +1065,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noopt]
    predicate hasEdgeToLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInRelevantRegexp() and
      // all edges (at least one) with some char leads to another state that is rejectable.
      // the `next` states might not share a common suffix, which can cause FPs.
      exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
@@ -1076,7 +1081,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private string hasEdgeToLikelyRejectableHelper(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInRelevantRegexp() and
      not hasRejectEdge(s) and
      not isRejectState(s) and
      deltaClosedChar(s, result, _)
@@ -1088,8 +1093,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * `prev` to `next` that the character symbol `char`.
     */
    predicate deltaClosedChar(State prev, string char, State next) {
-      prev = stateInPumpableRegexp() and
-      next = stateInPumpableRegexp() and
+      prev = Prefix::stateInRelevantRegexp() and
+      next = Prefix::stateInRelevantRegexp() and
      deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
    }

@@ -1099,18 +1104,28 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
      result = getAnInputSymbolMatching(char)
    }

+    pragma[noinline]
+    RegExpRoot relevantRoot() {
+      exists(RegExpTerm term, State s |
+        s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm()
+      )
+    }
+
    /**
     * Gets a char used for finding possible suffixes inside `root`.
     */
    pragma[noinline]
    private string relevant(RegExpRoot root) {
-      exists(ascii(result)) and exists(root)
-      or
-      exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
-      or
-      // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
-      // The three chars must be kept in sync with `hasSimpleRejectEdge`.
-      result = ["|", "\n", "Z"] and exists(root)
+      root = relevantRoot() and
+      (
+        exists(ascii(result)) and exists(root)
+        or
+        exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
+        or
+        // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
+        // The three chars must be kept in sync with `hasSimpleRejectEdge`.
+        result = ["|", "\n", "Z"] and exists(root)
+      )
    }

    /**
@@ -1208,12 +1223,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
    isReDoSAttackable(t, pump, s) and
    (
-      prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
-      not PrefixConstruction::prefix(s) = ""
+      prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
+      not Prefix::prefix(s) = ""
      or
-      PrefixConstruction::prefix(s) = "" and prefixMsg = ""
+      Prefix::prefix(s) = "" and prefixMsg = ""
      or
-      not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
+      not exists(Prefix::prefix(s)) and prefixMsg = ""
    )
  }