Add Java ReDoS libraries to identical-files.json

2025-12-16 16:53:25 +01:00 · 2022-02-02 13:53:41 +00:00
parent 11e465f2ac
commit f9f7a01f57
3 changed files with 120 additions and 55 deletions
--- a/config/identical-files.json
+++ b/config/identical-files.json
@@ -475,20 +475,23 @@
    "python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll",
    "ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll"
  ],
-  "ReDoS Util Python/JS/Ruby": [
+  "ReDoS Util Python/JS/Ruby/Java": [
    "javascript/ql/lib/semmle/javascript/security/performance/ReDoSUtil.qll",
    "python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll",
-    "ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll"
+    "ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll",
+    "java/ql/lib/semmle/code/java/security/performance/ReDoSUtil.qll"
  ],
-  "ReDoS Exponential Python/JS/Ruby": [
+  "ReDoS Exponential Python/JS/Ruby/Java": [
    "javascript/ql/lib/semmle/javascript/security/performance/ExponentialBackTracking.qll",
    "python/ql/lib/semmle/python/security/performance/ExponentialBackTracking.qll",
-    "ruby/ql/lib/codeql/ruby/security/performance/ExponentialBackTracking.qll"
+    "ruby/ql/lib/codeql/ruby/security/performance/ExponentialBackTracking.qll",
+    "java/ql/lib/semmle/code/java/security/performance/ExponentialBackTracking.qll"
  ],
-  "ReDoS Polynomial Python/JS/Ruby": [
+  "ReDoS Polynomial Python/JS/Ruby/Java": [
    "javascript/ql/lib/semmle/javascript/security/performance/SuperlinearBackTracking.qll",
    "python/ql/lib/semmle/python/security/performance/SuperlinearBackTracking.qll",
-    "ruby/ql/lib/codeql/ruby/security/performance/SuperlinearBackTracking.qll"
+    "ruby/ql/lib/codeql/ruby/security/performance/SuperlinearBackTracking.qll",
+    "java/ql/lib/semmle/code/java/security/performance/SuperlinearBackTracking.qll"
  ],
  "BadTagFilterQuery Python/JS/Ruby": [
    "javascript/ql/lib/semmle/javascript/security/BadTagFilterQuery.qll",
--- a/java/ql/lib/semmle/code/java/security/performance/ReDoSUtil.qll
+++ b/java/ql/lib/semmle/code/java/security/performance/ReDoSUtil.qll
@@ -140,9 +140,9 @@ class RegExpRoot extends RegExpTerm {
    // there is at least one repetition
    getRoot(any(InfiniteRepetitionQuantifier q)) = this and
    // is actually used as a RegExp
-    isUsedAsRegExp() and
+    this.isUsedAsRegExp() and
    // not excluded for library specific reasons
-    not isExcluded(getRootTerm().getParent())
+    not isExcluded(this.getRootTerm().getParent())
  }
 }

@@ -218,7 +218,7 @@ private newtype TInputSymbol =
      recc instanceof RegExpCharacterClass and
      not recc.(RegExpCharacterClass).isUniversalClass()
      or
-      recc instanceof RegExpCharacterClassEscape
+      isEscapeClass(recc, _)
    )
  } or
  /** An input symbol representing all characters matched by `.`. */
@@ -302,7 +302,7 @@ abstract class CharacterClass extends InputSymbol {
  /**
   * Gets a character matched by this character class.
   */
-  string choose() { result = getARelevantChar() and matches(result) }
+  string choose() { result = this.getARelevantChar() and this.matches(result) }
 }

 /**
@@ -340,13 +340,13 @@ private module CharacterClasses {
        char <= hi
      )
      or
-      exists(RegExpCharacterClassEscape escape | escape = child |
-        escape.getValue() = escape.getValue().toLowerCase() and
-        classEscapeMatches(escape.getValue(), char)
+      exists(string charClass | isEscapeClass(child, charClass) |
+        charClass.toLowerCase() = charClass and
+        classEscapeMatches(charClass, char)
        or
        char = getARelevantChar() and
-        escape.getValue() = escape.getValue().toUpperCase() and
-        not classEscapeMatches(escape.getValue().toLowerCase(), char)
+        charClass.toUpperCase() = charClass and
+        not classEscapeMatches(charClass, char)
      )
    )
  }
@@ -409,10 +409,10 @@ private module CharacterClasses {
      or
      child.(RegExpCharacterRange).isRange(_, result)
      or
-      exists(RegExpCharacterClassEscape escape | child = escape |
-        result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+      exists(string charClass | isEscapeClass(child, charClass) |
+        result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
        or
-        result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+        result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
      )
    )
  }
@@ -466,33 +466,36 @@ private module CharacterClasses {
   * An implementation of `CharacterClass` for \d, \s, and \w.
   */
  private class PositiveCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
+    string charClass;

    PositiveCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
+      isEscapeClass(cc, charClass) and
+      this = getCanonicalCharClass(cc) and
+      charClass = ["d", "s", "w"]
    }

    override string getARelevantChar() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = ["0", "9"]
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = ["a", "Z", "_", "0", "9"]
    }

-    override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
+    override predicate matches(string char) { classEscapeMatches(charClass, char) }

    override string choose() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = "9"
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = "a"
    }
  }
@@ -501,26 +504,29 @@ private module CharacterClasses {
   * An implementation of `CharacterClass` for \D, \S, and \W.
   */
  private class NegativeCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
+    string charClass;

    NegativeCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
+      isEscapeClass(cc, charClass) and
+      this = getCanonicalCharClass(cc) and
+      charClass = ["D", "S", "W"]
    }

    override string getARelevantChar() {
-      cc.getValue() = "D" and
+      charClass = "D" and
      result = ["a", "Z", "!"]
      or
-      cc.getValue() = "S" and
+      charClass = "S" and
      result = ["a", "9", "!"]
      or
-      cc.getValue() = "W" and
+      charClass = "W" and
      result = [" ", "!"]
    }

    bindingset[char]
    override predicate matches(string char) {
-      not classEscapeMatches(cc.getValue().toLowerCase(), char)
+      not classEscapeMatches(charClass.toLowerCase(), char)
    }
  }
 }
@@ -533,6 +539,55 @@ private class EdgeLabel extends TInputSymbol {
  }
 }

+/**
+ * A RegExp term that acts like a plus.
+ * Either it's a RegExpPlus, or it is a range {1,X} where X is >= 30.
+ * 30 has been chosen as a threshold because for exponential blowup 2^30 is enough to get a decent DOS attack.
+ */
+private class EffectivelyPlus extends RegExpTerm {
+  EffectivelyPlus() {
+    this instanceof RegExpPlus
+    or
+    exists(RegExpRange range |
+      range.getLowerBound() = 1 and
+      (range.getUpperBound() >= 30 or not exists(range.getUpperBound()))
+    |
+      this = range
+    )
+  }
+}
+
+/**
+ * A RegExp term that acts like a star.
+ * Either it's a RegExpStar, or it is a range {0,X} where X is >= 30.
+ */
+private class EffectivelyStar extends RegExpTerm {
+  EffectivelyStar() {
+    this instanceof RegExpStar
+    or
+    exists(RegExpRange range |
+      range.getLowerBound() = 0 and
+      (range.getUpperBound() >= 30 or not exists(range.getUpperBound()))
+    |
+      this = range
+    )
+  }
+}
+
+/**
+ * A RegExp term that acts like a question mark.
+ * Either it's a RegExpQuestion, or it is a range {0,1}.
+ */
+private class EffectivelyQuestion extends RegExpTerm {
+  EffectivelyQuestion() {
+    this instanceof RegExpOpt
+    or
+    exists(RegExpRange range | range.getLowerBound() = 0 and range.getUpperBound() = 1 |
+      this = range
+    )
+  }
+}
+
 /**
 * Gets the state before matching `t`.
 */
@@ -542,7 +597,7 @@ private State before(RegExpTerm t) { result = Match(t, 0) }
 /**
 * Gets a state the NFA may be in after matching `t`.
 */
-private State after(RegExpTerm t) {
+State after(RegExpTerm t) {
  exists(RegExpAlt alt | t = alt.getAChild() | result = after(alt))
  or
  exists(RegExpSequence seq, int i | t = seq.getChild(i) |
@@ -553,14 +608,14 @@ private State after(RegExpTerm t) {
  or
  exists(RegExpGroup grp | t = grp.getAChild() | result = after(grp))
  or
-  exists(RegExpStar star | t = star.getAChild() | result = before(star))
+  exists(EffectivelyStar star | t = star.getAChild() | result = before(star))
  or
-  exists(RegExpPlus plus | t = plus.getAChild() |
+  exists(EffectivelyPlus plus | t = plus.getAChild() |
    result = before(plus) or
    result = after(plus)
  )
  or
-  exists(RegExpOpt opt | t = opt.getAChild() | result = after(opt))
+  exists(EffectivelyQuestion opt | t = opt.getAChild() | result = after(opt))
  or
  exists(RegExpRoot root | t = root | result = AcceptAnySuffix(root))
 }
@@ -599,7 +654,7 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
    q2 = after(cc)
  )
  or
-  exists(RegExpCharacterClassEscape cc |
+  exists(RegExpTerm cc | isEscapeClass(cc, _) |
    q1 = before(cc) and
    lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
    q2 = after(cc)
@@ -611,15 +666,17 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
  or
  exists(RegExpGroup grp | lbl = Epsilon() | q1 = before(grp) and q2 = before(grp.getChild(0)))
  or
-  exists(RegExpStar star | lbl = Epsilon() |
+  exists(EffectivelyStar star | lbl = Epsilon() |
    q1 = before(star) and q2 = before(star.getChild(0))
    or
    q1 = before(star) and q2 = after(star)
  )
  or
-  exists(RegExpPlus plus | lbl = Epsilon() | q1 = before(plus) and q2 = before(plus.getChild(0)))
+  exists(EffectivelyPlus plus | lbl = Epsilon() |
+    q1 = before(plus) and q2 = before(plus.getChild(0))
+  )
  or
-  exists(RegExpOpt opt | lbl = Epsilon() |
+  exists(EffectivelyQuestion opt | lbl = Epsilon() |
    q1 = before(opt) and q2 = before(opt.getChild(0))
    or
    q1 = before(opt) and q2 = after(opt)
@@ -671,7 +728,7 @@ RegExpRoot getRoot(RegExpTerm term) {
 /**
 * A state in the NFA.
 */
-private newtype TState =
+newtype TState =
  /**
   * A state representing that the NFA is about to match a term.
   * `i` is used to index into multi-char literals.
@@ -801,29 +858,26 @@ InputSymbol getAnInputSymbolMatching(string char) {
  result = Any()
 }

+/**
+ * Holds if `state` is a start state.
+ */
+predicate isStartState(State state) {
+  state = mkMatch(any(RegExpRoot r))
+  or
+  exists(RegExpCaret car | state = after(car))
+}
+
 /**
 * Predicates for constructing a prefix string that leads to a given state.
 */
 private module PrefixConstruction {
-  /**
-   * Holds if `state` starts the string matched by the regular expression.
-   */
-  private predicate isStartState(State state) {
-    state instanceof StateInPumpableRegexp and
-    (
-      state = Match(any(RegExpRoot r), _)
-      or
-      exists(RegExpCaret car | state = after(car))
-    )
-  }
-
  /**
   * Holds if `state` is the textually last start state for the regular expression.
   */
  private predicate lastStartState(State state) {
    exists(RegExpRoot root |
      state =
-        max(State s, Location l |
+        max(StateInPumpableRegexp s, Location l |
          isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
        |
          s
--- a/java/ql/lib/semmle/code/java/security/performance/RegExpTreeView.qll
+++ b/java/ql/lib/semmle/code/java/security/performance/RegExpTreeView.qll
@@ -5,6 +5,14 @@
 import java
 import semmle.code.java.regex.RegexTreeView

+/**
+ * Holds if `term` is an ecape class representing e.g. `\d`.
+ * `clazz` is which character class it represents, e.g. "d" for `\d`.
+ */
+predicate isEscapeClass(RegExpTerm term, string clazz) {
+  exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
+}
+
 /**
 * Holds if the regular expression should not be considered.
 *