improve performance of suffix checking

2026-04-29 18:55:14 +02:00 · 2020-12-17 20:51:43 +01:00
parent 6369374224
commit 05569187b4
2 changed files with 18 additions and 8 deletions
--- a/javascript/ql/src/semmle/javascript/security/performance/ReDoSUtil.qll
+++ b/javascript/ql/src/semmle/javascript/security/performance/ReDoSUtil.qll
@@ -838,7 +838,7 @@ private module SuffixConstruction {
   * Holds if there likely exists a suffix starting from `s` that leads to the regular expression being rejected.
   * This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs.
   */
-  pragma[nomagic]
+  pragma[noinline]
  private predicate isLikelyRejectable(StateInPumpableRegexp s) {
    // exists a reject edge with some char.
    hasRejectEdge(s)
@@ -857,10 +857,11 @@ private module SuffixConstruction {
  /**
   * Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
   */
+  pragma[noinline]
  predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
    // all edges (at least one) with some char leads to another state that is rejectable.
    // the `next` states might not share a common suffix, which can cause FPs.
-    exists(string char | char = relevant() |
+    exists(string char | char = relevant(getRoot(s.getRepr())) |
      forex(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
    )
  }
@@ -871,21 +872,31 @@ private module SuffixConstruction {
   * `prev` to `next` that the character symbol `char`.
   */
  predicate deltaClosedChar(StateInPumpableRegexp prev, string char, StateInPumpableRegexp next) {
-    char = relevant() and
-    deltaClosed(prev, getAnInputSymbolMatching(char), next)
+    deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
+  }
+
+  pragma[noinline]
+  InputSymbol getAnInputSymbolMatchingRelevant(string char) {
+    char = relevant(_) and
+    result = getAnInputSymbolMatching(char)
  }

  /**
-   * Gets a char used for finding possible suffixes.
+   * Gets a char used for finding possible suffixes inside `root`.
   */
-  private string relevant() { result = CharacterClasses::getARelevantChar() }
+  pragma[noinline]
+  private string relevant(RegExpRoot root) {
+    result = ["a", "9", "|", "\n", " "]
+    or
+    exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
+  }

  /**
   * Holds if there is no edge from `s` labeled `char` in our NFA.
   * The NFA does not model reject states, so the above is the same as saying there is a reject edge.
   */
  private predicate hasRejectEdge(State s) {
-    exists(string char | char = relevant() | not deltaClosedChar(s, char, _))
+    exists(string char | char = relevant(getRoot(s.getRepr())) | not deltaClosedChar(s, char, _))
  }

  /**
--- a/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialBackTracking.expected
+++ b/javascript/ql/test/query-tests/Performance/ReDoS/PolynomialBackTracking.expected
@@ -348,7 +348,6 @@
 | tst.js:23:33:23:40 | [\\s\\S]*? | Strings starting with '(*(*' and with many repetitions of '(*' can start matching anywhere after the start of the preceeding (?:[\\s\\S]*?\\(\\*[\\s\\S]*?\\*\\))* |
 | tst.js:23:47:23:54 | [\\s\\S]*? | Strings starting with '(*' and with many repetitions of '(*a' can start matching anywhere after the start of the preceeding (?:[\\s\\S]*?\\(\\*[\\s\\S]*?\\*\\))* |
 | tst.js:28:24:28:25 | .* | Strings starting with '!\|' and with many repetitions of '\|' can start matching anywhere after the start of the preceeding .* |
-| tst.js:28:59:28:60 | .* | Strings starting with '!\|\\n-\|\\n\|' and with many repetitions of '\|' can start matching anywhere after the start of the preceeding .* |
 | tst.js:31:23:31:24 | .* | Strings starting with '!\|' and with many repetitions of '\|' can start matching anywhere after the start of the preceeding .* |
 | tst.js:31:54:31:55 | .* | Strings starting with '!\|\\n-\|\\n' and with many repetitions of '\|\\n\|' can start matching anywhere after the start of the preceeding .* |
 | tst.js:31:58:31:59 | .* | Strings starting with '!\|\\n-\|\\n\|' and with many repetitions of '\|' can start matching anywhere after the start of the preceeding .* |