move the PrefixConstruction module out of the ReDoSPruning module

2025-12-20 18:56:32 +01:00 · 2022-08-16 11:13:47 +02:00
parent 70ec70940a
commit 473bc92e2d
4 changed files with 428 additions and 400 deletions
--- a/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll
+++ b/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll
@@ -877,6 +877,101 @@ predicate isStartState(State state) {
 */
 signature predicate isCandidateSig(State state, string pump);
 /**
 * Holds if `state` is a candidate for ReDoS.
 */
 signature predicate isCandidateSig(State state);
 /**
 * Predicates for constructing a prefix string that leads to a given state.
 */
 module PrefixConstruction<isCandidateSig/1 isCandidate> {
  /**
   * Holds if `state` is the textually last start state for the regular expression.
   */
  private predicate lastStartState(State state) {
    exists(RegExpRoot root |
      state =
        max(State s, Location l |
          s = stateInPumpableRegexp() and
          isStartState(s) and
          getRoot(s.getRepr()) = root and
          l = s.getRepr().getLocation()
        |
          s
          order by
            l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
            l.getEndLine()
        )
    )
  }
  /**
   * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
   */
  private predicate existsTransition(State a, State b) { delta(a, _, b) }
  /**
   * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
   */
  int prefixLength(State start, State state) =
    shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
  /**
   * Gets the minimum number of transitions it takes to reach `state` from the start state.
   */
  private int lengthFromStart(State state) { result = prefixLength(_, state) }
  /**
   * Gets a string for which the regular expression will reach `state`.
   *
   * Has at most one result for any given `state`.
   * This predicate will not always have a result even if there is a ReDoS issue in
   * the regular expression.
   */
  string prefix(State state) {
    lastStartState(state) and
    result = ""
    or
    // the search stops past the last redos candidate state.
    lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
    exists(State prev |
      // select a unique predecessor (by an arbitrary measure)
      prev =
        min(State s, Location loc |
          lengthFromStart(s) = lengthFromStart(state) - 1 and
          loc = s.getRepr().getLocation() and
          delta(s, _, state)
        |
          s
          order by
            loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
            s.getRepr().toString()
        )
    |
      // greedy search for the shortest prefix
      result = prefix(prev) and delta(prev, Epsilon(), state)
      or
      not delta(prev, Epsilon(), state) and
      result = prefix(prev) + getCanonicalEdgeChar(prev, state)
    )
  }
  /**
   * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
   */
  private string getCanonicalEdgeChar(State prev, State next) {
    result =
      min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
  }
  /** Gets a state within a regular expression that has a pumpable state. */
  pragma[noinline]
  State stateInPumpableRegexp() {
    exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
  }
 }
 /**
 * A module for pruning candidate ReDoS states.
 * The candidates are specified by the `isCandidate` signature predicate.
@@ -910,95 +1005,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  /** Gets a state that can reach the `accept-any` state using only epsilon steps. */
  private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
-  /**
+  predicate isCandidateState(State s) { isReDoSCandidate(s, _) }
   * Predicates for constructing a prefix string that leads to a given state.
   */
  private module PrefixConstruction {
    /**
     * Holds if `state` is the textually last start state for the regular expression.
     */
    private predicate lastStartState(State state) {
      exists(RegExpRoot root |
        state =
          max(State s, Location l |
            s = stateInPumpableRegexp() and
            isStartState(s) and
            getRoot(s.getRepr()) = root and
            l = s.getRepr().getLocation()
          |
            s
            order by
              l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
              l.getEndLine()
          )
      )
    }
-    /**
+  import PrefixConstruction<isCandidateState/1> as Prefix
     * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
     */
    private predicate existsTransition(State a, State b) { delta(a, _, b) }
    /**
     * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
     */
    int prefixLength(State start, State state) =
      shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
    /**
     * Gets the minimum number of transitions it takes to reach `state` from the start state.
     */
    private int lengthFromStart(State state) { result = prefixLength(_, state) }
    /**
     * Gets a string for which the regular expression will reach `state`.
     *
     * Has at most one result for any given `state`.
     * This predicate will not always have a result even if there is a ReDoS issue in
     * the regular expression.
     */
    string prefix(State state) {
      lastStartState(state) and
      result = ""
      or
      // the search stops past the last redos candidate state.
      lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
      exists(State prev |
        // select a unique predecessor (by an arbitrary measure)
        prev =
          min(State s, Location loc |
            lengthFromStart(s) = lengthFromStart(state) - 1 and
            loc = s.getRepr().getLocation() and
            delta(s, _, state)
          |
            s
            order by
              loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
              s.getRepr().toString()
          )
      |
        // greedy search for the shortest prefix
        result = prefix(prev) and delta(prev, Epsilon(), state)
        or
        not delta(prev, Epsilon(), state) and
        result = prefix(prev) + getCanonicalEdgeChar(prev, state)
      )
    }
    /**
     * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
     */
    private string getCanonicalEdgeChar(State prev, State next) {
      result =
        min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
    }
    /** Gets a state within a regular expression that has a pumpable state. */
    pragma[noinline]
    State stateInPumpableRegexp() {
      exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
    }
  }
  /**
   * Predicates for testing the presence of a rejecting suffix.
@@ -1018,8 +1027,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
   * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
   */
  private module SuffixConstruction {
    import PrefixConstruction
    /**
     * Holds if all states reachable from `fork` by repeating `w`
     * are likely rejectable by appending some suffix.
@@ -1036,7 +1043,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private predicate isLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInPumpableRegexp() and
      (
        // exists a reject edge with some char.
        hasRejectEdge(s)
@@ -1052,7 +1059,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
     */
    predicate isRejectState(State s) {
-      s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
+      s = Prefix::stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
    }
    /**
@@ -1060,7 +1067,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noopt]
    predicate hasEdgeToLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInPumpableRegexp() and
      // all edges (at least one) with some char leads to another state that is rejectable.
      // the `next` states might not share a common suffix, which can cause FPs.
      exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
@@ -1076,7 +1083,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private string hasEdgeToLikelyRejectableHelper(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInPumpableRegexp() and
      not hasRejectEdge(s) and
      not isRejectState(s) and
      deltaClosedChar(s, result, _)
@@ -1088,8 +1095,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * `prev` to `next` that the character symbol `char`.
     */
    predicate deltaClosedChar(State prev, string char, State next) {
-      prev = stateInPumpableRegexp() and
+      prev = Prefix::stateInPumpableRegexp() and
-      next = stateInPumpableRegexp() and
+      next = Prefix::stateInPumpableRegexp() and
      deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
    }
@@ -1208,12 +1215,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
    isReDoSAttackable(t, pump, s) and
    (
-      prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
+      prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
-      not PrefixConstruction::prefix(s) = ""
+      not Prefix::prefix(s) = ""
      or
-      PrefixConstruction::prefix(s) = "" and prefixMsg = ""
+      Prefix::prefix(s) = "" and prefixMsg = ""
      or
-      not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
+      not exists(Prefix::prefix(s)) and prefixMsg = ""
    )
  }
--- a/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll
+++ b/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll
@@ -877,6 +877,101 @@ predicate isStartState(State state) {
 */
 signature predicate isCandidateSig(State state, string pump);
 /**
 * Holds if `state` is a candidate for ReDoS.
 */
 signature predicate isCandidateSig(State state);
 /**
 * Predicates for constructing a prefix string that leads to a given state.
 */
 module PrefixConstruction<isCandidateSig/1 isCandidate> {
  /**
   * Holds if `state` is the textually last start state for the regular expression.
   */
  private predicate lastStartState(State state) {
    exists(RegExpRoot root |
      state =
        max(State s, Location l |
          s = stateInPumpableRegexp() and
          isStartState(s) and
          getRoot(s.getRepr()) = root and
          l = s.getRepr().getLocation()
        |
          s
          order by
            l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
            l.getEndLine()
        )
    )
  }
  /**
   * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
   */
  private predicate existsTransition(State a, State b) { delta(a, _, b) }
  /**
   * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
   */
  int prefixLength(State start, State state) =
    shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
  /**
   * Gets the minimum number of transitions it takes to reach `state` from the start state.
   */
  private int lengthFromStart(State state) { result = prefixLength(_, state) }
  /**
   * Gets a string for which the regular expression will reach `state`.
   *
   * Has at most one result for any given `state`.
   * This predicate will not always have a result even if there is a ReDoS issue in
   * the regular expression.
   */
  string prefix(State state) {
    lastStartState(state) and
    result = ""
    or
    // the search stops past the last redos candidate state.
    lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
    exists(State prev |
      // select a unique predecessor (by an arbitrary measure)
      prev =
        min(State s, Location loc |
          lengthFromStart(s) = lengthFromStart(state) - 1 and
          loc = s.getRepr().getLocation() and
          delta(s, _, state)
        |
          s
          order by
            loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
            s.getRepr().toString()
        )
    |
      // greedy search for the shortest prefix
      result = prefix(prev) and delta(prev, Epsilon(), state)
      or
      not delta(prev, Epsilon(), state) and
      result = prefix(prev) + getCanonicalEdgeChar(prev, state)
    )
  }
  /**
   * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
   */
  private string getCanonicalEdgeChar(State prev, State next) {
    result =
      min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
  }
  /** Gets a state within a regular expression that has a pumpable state. */
  pragma[noinline]
  State stateInPumpableRegexp() {
    exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
  }
 }
 /**
 * A module for pruning candidate ReDoS states.
 * The candidates are specified by the `isCandidate` signature predicate.
@@ -910,95 +1005,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  /** Gets a state that can reach the `accept-any` state using only epsilon steps. */
  private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
-  /**
+  predicate isCandidateState(State s) { isReDoSCandidate(s, _) }
   * Predicates for constructing a prefix string that leads to a given state.
   */
  private module PrefixConstruction {
    /**
     * Holds if `state` is the textually last start state for the regular expression.
     */
    private predicate lastStartState(State state) {
      exists(RegExpRoot root |
        state =
          max(State s, Location l |
            s = stateInPumpableRegexp() and
            isStartState(s) and
            getRoot(s.getRepr()) = root and
            l = s.getRepr().getLocation()
          |
            s
            order by
              l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
              l.getEndLine()
          )
      )
    }
-    /**
+  import PrefixConstruction<isCandidateState/1> as Prefix
     * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
     */
    private predicate existsTransition(State a, State b) { delta(a, _, b) }
    /**
     * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
     */
    int prefixLength(State start, State state) =
      shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
    /**
     * Gets the minimum number of transitions it takes to reach `state` from the start state.
     */
    private int lengthFromStart(State state) { result = prefixLength(_, state) }
    /**
     * Gets a string for which the regular expression will reach `state`.
     *
     * Has at most one result for any given `state`.
     * This predicate will not always have a result even if there is a ReDoS issue in
     * the regular expression.
     */
    string prefix(State state) {
      lastStartState(state) and
      result = ""
      or
      // the search stops past the last redos candidate state.
      lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
      exists(State prev |
        // select a unique predecessor (by an arbitrary measure)
        prev =
          min(State s, Location loc |
            lengthFromStart(s) = lengthFromStart(state) - 1 and
            loc = s.getRepr().getLocation() and
            delta(s, _, state)
          |
            s
            order by
              loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
              s.getRepr().toString()
          )
      |
        // greedy search for the shortest prefix
        result = prefix(prev) and delta(prev, Epsilon(), state)
        or
        not delta(prev, Epsilon(), state) and
        result = prefix(prev) + getCanonicalEdgeChar(prev, state)
      )
    }
    /**
     * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
     */
    private string getCanonicalEdgeChar(State prev, State next) {
      result =
        min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
    }
    /** Gets a state within a regular expression that has a pumpable state. */
    pragma[noinline]
    State stateInPumpableRegexp() {
      exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
    }
  }
  /**
   * Predicates for testing the presence of a rejecting suffix.
@@ -1018,8 +1027,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
   * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
   */
  private module SuffixConstruction {
    import PrefixConstruction
    /**
     * Holds if all states reachable from `fork` by repeating `w`
     * are likely rejectable by appending some suffix.
@@ -1036,7 +1043,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private predicate isLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInPumpableRegexp() and
      (
        // exists a reject edge with some char.
        hasRejectEdge(s)
@@ -1052,7 +1059,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
     */
    predicate isRejectState(State s) {
-      s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
+      s = Prefix::stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
    }
    /**
@@ -1060,7 +1067,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noopt]
    predicate hasEdgeToLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInPumpableRegexp() and
      // all edges (at least one) with some char leads to another state that is rejectable.
      // the `next` states might not share a common suffix, which can cause FPs.
      exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
@@ -1076,7 +1083,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private string hasEdgeToLikelyRejectableHelper(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInPumpableRegexp() and
      not hasRejectEdge(s) and
      not isRejectState(s) and
      deltaClosedChar(s, result, _)
@@ -1088,8 +1095,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * `prev` to `next` that the character symbol `char`.
     */
    predicate deltaClosedChar(State prev, string char, State next) {
-      prev = stateInPumpableRegexp() and
+      prev = Prefix::stateInPumpableRegexp() and
-      next = stateInPumpableRegexp() and
+      next = Prefix::stateInPumpableRegexp() and
      deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
    }
@@ -1208,12 +1215,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
    isReDoSAttackable(t, pump, s) and
    (
-      prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
+      prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
-      not PrefixConstruction::prefix(s) = ""
+      not Prefix::prefix(s) = ""
      or
-      PrefixConstruction::prefix(s) = "" and prefixMsg = ""
+      Prefix::prefix(s) = "" and prefixMsg = ""
      or
-      not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
+      not exists(Prefix::prefix(s)) and prefixMsg = ""
    )
  }
--- a/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll
+++ b/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll
@@ -877,6 +877,101 @@ predicate isStartState(State state) {
 */
 signature predicate isCandidateSig(State state, string pump);
 /**
 * Holds if `state` is a candidate for ReDoS.
 */
 signature predicate isCandidateSig(State state);
 /**
 * Predicates for constructing a prefix string that leads to a given state.
 */
 module PrefixConstruction<isCandidateSig/1 isCandidate> {
  /**
   * Holds if `state` is the textually last start state for the regular expression.
   */
  private predicate lastStartState(State state) {
    exists(RegExpRoot root |
      state =
        max(State s, Location l |
          s = stateInPumpableRegexp() and
          isStartState(s) and
          getRoot(s.getRepr()) = root and
          l = s.getRepr().getLocation()
        |
          s
          order by
            l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
            l.getEndLine()
        )
    )
  }
  /**
   * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
   */
  private predicate existsTransition(State a, State b) { delta(a, _, b) }
  /**
   * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
   */
  int prefixLength(State start, State state) =
    shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
  /**
   * Gets the minimum number of transitions it takes to reach `state` from the start state.
   */
  private int lengthFromStart(State state) { result = prefixLength(_, state) }
  /**
   * Gets a string for which the regular expression will reach `state`.
   *
   * Has at most one result for any given `state`.
   * This predicate will not always have a result even if there is a ReDoS issue in
   * the regular expression.
   */
  string prefix(State state) {
    lastStartState(state) and
    result = ""
    or
    // the search stops past the last redos candidate state.
    lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
    exists(State prev |
      // select a unique predecessor (by an arbitrary measure)
      prev =
        min(State s, Location loc |
          lengthFromStart(s) = lengthFromStart(state) - 1 and
          loc = s.getRepr().getLocation() and
          delta(s, _, state)
        |
          s
          order by
            loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
            s.getRepr().toString()
        )
    |
      // greedy search for the shortest prefix
      result = prefix(prev) and delta(prev, Epsilon(), state)
      or
      not delta(prev, Epsilon(), state) and
      result = prefix(prev) + getCanonicalEdgeChar(prev, state)
    )
  }
  /**
   * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
   */
  private string getCanonicalEdgeChar(State prev, State next) {
    result =
      min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
  }
  /** Gets a state within a regular expression that has a pumpable state. */
  pragma[noinline]
  State stateInPumpableRegexp() {
    exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
  }
 }
 /**
 * A module for pruning candidate ReDoS states.
 * The candidates are specified by the `isCandidate` signature predicate.
@@ -910,95 +1005,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  /** Gets a state that can reach the `accept-any` state using only epsilon steps. */
  private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
-  /**
+  predicate isCandidateState(State s) { isReDoSCandidate(s, _) }
   * Predicates for constructing a prefix string that leads to a given state.
   */
  private module PrefixConstruction {
    /**
     * Holds if `state` is the textually last start state for the regular expression.
     */
    private predicate lastStartState(State state) {
      exists(RegExpRoot root |
        state =
          max(State s, Location l |
            s = stateInPumpableRegexp() and
            isStartState(s) and
            getRoot(s.getRepr()) = root and
            l = s.getRepr().getLocation()
          |
            s
            order by
              l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
              l.getEndLine()
          )
      )
    }
-    /**
+  import PrefixConstruction<isCandidateState/1> as Prefix
     * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
     */
    private predicate existsTransition(State a, State b) { delta(a, _, b) }
    /**
     * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
     */
    int prefixLength(State start, State state) =
      shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
    /**
     * Gets the minimum number of transitions it takes to reach `state` from the start state.
     */
    private int lengthFromStart(State state) { result = prefixLength(_, state) }
    /**
     * Gets a string for which the regular expression will reach `state`.
     *
     * Has at most one result for any given `state`.
     * This predicate will not always have a result even if there is a ReDoS issue in
     * the regular expression.
     */
    string prefix(State state) {
      lastStartState(state) and
      result = ""
      or
      // the search stops past the last redos candidate state.
      lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
      exists(State prev |
        // select a unique predecessor (by an arbitrary measure)
        prev =
          min(State s, Location loc |
            lengthFromStart(s) = lengthFromStart(state) - 1 and
            loc = s.getRepr().getLocation() and
            delta(s, _, state)
          |
            s
            order by
              loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
              s.getRepr().toString()
          )
      |
        // greedy search for the shortest prefix
        result = prefix(prev) and delta(prev, Epsilon(), state)
        or
        not delta(prev, Epsilon(), state) and
        result = prefix(prev) + getCanonicalEdgeChar(prev, state)
      )
    }
    /**
     * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
     */
    private string getCanonicalEdgeChar(State prev, State next) {
      result =
        min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
    }
    /** Gets a state within a regular expression that has a pumpable state. */
    pragma[noinline]
    State stateInPumpableRegexp() {
      exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
    }
  }
  /**
   * Predicates for testing the presence of a rejecting suffix.
@@ -1018,8 +1027,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
   * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
   */
  private module SuffixConstruction {
    import PrefixConstruction
    /**
     * Holds if all states reachable from `fork` by repeating `w`
     * are likely rejectable by appending some suffix.
@@ -1036,7 +1043,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private predicate isLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInPumpableRegexp() and
      (
        // exists a reject edge with some char.
        hasRejectEdge(s)
@@ -1052,7 +1059,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
     */
    predicate isRejectState(State s) {
-      s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
+      s = Prefix::stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
    }
    /**
@@ -1060,7 +1067,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noopt]
    predicate hasEdgeToLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInPumpableRegexp() and
      // all edges (at least one) with some char leads to another state that is rejectable.
      // the `next` states might not share a common suffix, which can cause FPs.
      exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
@@ -1076,7 +1083,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private string hasEdgeToLikelyRejectableHelper(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInPumpableRegexp() and
      not hasRejectEdge(s) and
      not isRejectState(s) and
      deltaClosedChar(s, result, _)
@@ -1088,8 +1095,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * `prev` to `next` that the character symbol `char`.
     */
    predicate deltaClosedChar(State prev, string char, State next) {
-      prev = stateInPumpableRegexp() and
+      prev = Prefix::stateInPumpableRegexp() and
-      next = stateInPumpableRegexp() and
+      next = Prefix::stateInPumpableRegexp() and
      deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
    }
@@ -1208,12 +1215,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
    isReDoSAttackable(t, pump, s) and
    (
-      prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
+      prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
-      not PrefixConstruction::prefix(s) = ""
+      not Prefix::prefix(s) = ""
      or
-      PrefixConstruction::prefix(s) = "" and prefixMsg = ""
+      Prefix::prefix(s) = "" and prefixMsg = ""
      or
-      not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
+      not exists(Prefix::prefix(s)) and prefixMsg = ""
    )
  }
--- a/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll
+++ b/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll
@@ -877,6 +877,101 @@ predicate isStartState(State state) {
 */
 signature predicate isCandidateSig(State state, string pump);
 /**
 * Holds if `state` is a candidate for ReDoS.
 */
 signature predicate isCandidateSig(State state);
 /**
 * Predicates for constructing a prefix string that leads to a given state.
 */
 module PrefixConstruction<isCandidateSig/1 isCandidate> {
  /**
   * Holds if `state` is the textually last start state for the regular expression.
   */
  private predicate lastStartState(State state) {
    exists(RegExpRoot root |
      state =
        max(State s, Location l |
          s = stateInPumpableRegexp() and
          isStartState(s) and
          getRoot(s.getRepr()) = root and
          l = s.getRepr().getLocation()
        |
          s
          order by
            l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
            l.getEndLine()
        )
    )
  }
  /**
   * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
   */
  private predicate existsTransition(State a, State b) { delta(a, _, b) }
  /**
   * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
   */
  int prefixLength(State start, State state) =
    shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
  /**
   * Gets the minimum number of transitions it takes to reach `state` from the start state.
   */
  private int lengthFromStart(State state) { result = prefixLength(_, state) }
  /**
   * Gets a string for which the regular expression will reach `state`.
   *
   * Has at most one result for any given `state`.
   * This predicate will not always have a result even if there is a ReDoS issue in
   * the regular expression.
   */
  string prefix(State state) {
    lastStartState(state) and
    result = ""
    or
    // the search stops past the last redos candidate state.
    lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
    exists(State prev |
      // select a unique predecessor (by an arbitrary measure)
      prev =
        min(State s, Location loc |
          lengthFromStart(s) = lengthFromStart(state) - 1 and
          loc = s.getRepr().getLocation() and
          delta(s, _, state)
        |
          s
          order by
            loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
            s.getRepr().toString()
        )
    |
      // greedy search for the shortest prefix
      result = prefix(prev) and delta(prev, Epsilon(), state)
      or
      not delta(prev, Epsilon(), state) and
      result = prefix(prev) + getCanonicalEdgeChar(prev, state)
    )
  }
  /**
   * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
   */
  private string getCanonicalEdgeChar(State prev, State next) {
    result =
      min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
  }
  /** Gets a state within a regular expression that has a pumpable state. */
  pragma[noinline]
  State stateInPumpableRegexp() {
    exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
  }
 }
 /**
 * A module for pruning candidate ReDoS states.
 * The candidates are specified by the `isCandidate` signature predicate.
@@ -910,95 +1005,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  /** Gets a state that can reach the `accept-any` state using only epsilon steps. */
  private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
-  /**
+  predicate isCandidateState(State s) { isReDoSCandidate(s, _) }
   * Predicates for constructing a prefix string that leads to a given state.
   */
  private module PrefixConstruction {
    /**
     * Holds if `state` is the textually last start state for the regular expression.
     */
    private predicate lastStartState(State state) {
      exists(RegExpRoot root |
        state =
          max(State s, Location l |
            s = stateInPumpableRegexp() and
            isStartState(s) and
            getRoot(s.getRepr()) = root and
            l = s.getRepr().getLocation()
          |
            s
            order by
              l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
              l.getEndLine()
          )
      )
    }
-    /**
+  import PrefixConstruction<isCandidateState/1> as Prefix
     * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
     */
    private predicate existsTransition(State a, State b) { delta(a, _, b) }
    /**
     * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
     */
    int prefixLength(State start, State state) =
      shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
    /**
     * Gets the minimum number of transitions it takes to reach `state` from the start state.
     */
    private int lengthFromStart(State state) { result = prefixLength(_, state) }
    /**
     * Gets a string for which the regular expression will reach `state`.
     *
     * Has at most one result for any given `state`.
     * This predicate will not always have a result even if there is a ReDoS issue in
     * the regular expression.
     */
    string prefix(State state) {
      lastStartState(state) and
      result = ""
      or
      // the search stops past the last redos candidate state.
      lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
      exists(State prev |
        // select a unique predecessor (by an arbitrary measure)
        prev =
          min(State s, Location loc |
            lengthFromStart(s) = lengthFromStart(state) - 1 and
            loc = s.getRepr().getLocation() and
            delta(s, _, state)
          |
            s
            order by
              loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
              s.getRepr().toString()
          )
      |
        // greedy search for the shortest prefix
        result = prefix(prev) and delta(prev, Epsilon(), state)
        or
        not delta(prev, Epsilon(), state) and
        result = prefix(prev) + getCanonicalEdgeChar(prev, state)
      )
    }
    /**
     * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
     */
    private string getCanonicalEdgeChar(State prev, State next) {
      result =
        min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
    }
    /** Gets a state within a regular expression that has a pumpable state. */
    pragma[noinline]
    State stateInPumpableRegexp() {
      exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
    }
  }
  /**
   * Predicates for testing the presence of a rejecting suffix.
@@ -1018,8 +1027,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
   * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
   */
  private module SuffixConstruction {
    import PrefixConstruction
    /**
     * Holds if all states reachable from `fork` by repeating `w`
     * are likely rejectable by appending some suffix.
@@ -1036,7 +1043,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private predicate isLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInPumpableRegexp() and
      (
        // exists a reject edge with some char.
        hasRejectEdge(s)
@@ -1052,7 +1059,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
     */
    predicate isRejectState(State s) {
-      s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
+      s = Prefix::stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
    }
    /**
@@ -1060,7 +1067,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noopt]
    predicate hasEdgeToLikelyRejectable(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInPumpableRegexp() and
      // all edges (at least one) with some char leads to another state that is rejectable.
      // the `next` states might not share a common suffix, which can cause FPs.
      exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
@@ -1076,7 +1083,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     */
    pragma[noinline]
    private string hasEdgeToLikelyRejectableHelper(State s) {
-      s = stateInPumpableRegexp() and
+      s = Prefix::stateInPumpableRegexp() and
      not hasRejectEdge(s) and
      not isRejectState(s) and
      deltaClosedChar(s, result, _)
@@ -1088,8 +1095,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
     * `prev` to `next` that the character symbol `char`.
     */
    predicate deltaClosedChar(State prev, string char, State next) {
-      prev = stateInPumpableRegexp() and
+      prev = Prefix::stateInPumpableRegexp() and
-      next = stateInPumpableRegexp() and
+      next = Prefix::stateInPumpableRegexp() and
      deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
    }
@@ -1208,12 +1215,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
  predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
    isReDoSAttackable(t, pump, s) and
    (
-      prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
+      prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
-      not PrefixConstruction::prefix(s) = ""
+      not Prefix::prefix(s) = ""
      or
-      PrefixConstruction::prefix(s) = "" and prefixMsg = ""
+      Prefix::prefix(s) = "" and prefixMsg = ""
      or
-      not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
+      not exists(Prefix::prefix(s)) and prefixMsg = ""
    )
  }