get ReDoSUtil in sync for ruby

2025-12-16 16:53:25 +01:00 · 2021-11-18 16:31:45 +01:00
parent 6c2713dd8b
commit ee858d840e
9 changed files with 138 additions and 187 deletions
--- a/config/identical-files.json
+++ b/config/identical-files.json
@@ -460,9 +460,10 @@
    "javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll",
    "python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll"
  ],
-  "ReDoS Util Python/JS": [
+  "ReDoS Util Python/JS/Ruby": [
    "javascript/ql/lib/semmle/javascript/security/performance/ReDoSUtil.qll",
-    "python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll"
+    "python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll",
+    "ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll"
  ],
  "ReDoS Exponential Python/JS": [
    "javascript/ql/lib/semmle/javascript/security/performance/ExponentialBackTracking.qll",
--- a/javascript/ql/lib/semmle/javascript/security/performance/ReDoSUtil.qll
+++ b/javascript/ql/lib/semmle/javascript/security/performance/ReDoSUtil.qll
@@ -218,7 +218,7 @@ private newtype TInputSymbol =
      recc instanceof RegExpCharacterClass and
      not recc.(RegExpCharacterClass).isUniversalClass()
      or
-      recc instanceof RegExpCharacterClassEscape
+      isEscapeClass(recc, _)
    )
  } or
  /** An input symbol representing all characters matched by `.`. */
@@ -340,13 +340,13 @@ private module CharacterClasses {
        char <= hi
      )
      or
-      exists(RegExpCharacterClassEscape escape | escape = child |
-        escape.getValue() = escape.getValue().toLowerCase() and
-        classEscapeMatches(escape.getValue(), char)
+      exists(string charClass | isEscapeClass(child, charClass) |
+        charClass.toLowerCase() = charClass and
+        classEscapeMatches(charClass, char)
        or
        char = getARelevantChar() and
-        escape.getValue() = escape.getValue().toUpperCase() and
-        not classEscapeMatches(escape.getValue().toLowerCase(), char)
+        charClass.toUpperCase() = charClass and
+        not classEscapeMatches(charClass, char)
      )
    )
  }
@@ -409,10 +409,10 @@ private module CharacterClasses {
      or
      child.(RegExpCharacterRange).isRange(_, result)
      or
-      exists(RegExpCharacterClassEscape escape | child = escape |
-        result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+      exists(string charClass | isEscapeClass(child, charClass) |
+        result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
        or
-        result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+        result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
      )
    )
  }
@@ -466,33 +466,36 @@ private module CharacterClasses {
   * An implementation of `CharacterClass` for \d, \s, and \w.
   */
  private class PositiveCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
+    string charClass;

    PositiveCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
+      isEscapeClass(cc, charClass) and
+      this = getCanonicalCharClass(cc) and
+      charClass = ["d", "s", "w"]
    }

    override string getARelevantChar() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = ["0", "9"]
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = ["a", "Z", "_", "0", "9"]
    }

-    override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
+    override predicate matches(string char) { classEscapeMatches(charClass, char) }

    override string choose() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = "9"
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = "a"
    }
  }
@@ -501,26 +504,29 @@ private module CharacterClasses {
   * An implementation of `CharacterClass` for \D, \S, and \W.
   */
  private class NegativeCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
+    string charClass;

    NegativeCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
+      isEscapeClass(cc, charClass) and
+      this = getCanonicalCharClass(cc) and
+      charClass = ["D", "S", "W"]
    }

    override string getARelevantChar() {
-      cc.getValue() = "D" and
+      charClass = "D" and
      result = ["a", "Z", "!"]
      or
-      cc.getValue() = "S" and
+      charClass = "S" and
      result = ["a", "9", "!"]
      or
-      cc.getValue() = "W" and
+      charClass = "W" and
      result = [" ", "!"]
    }

    bindingset[char]
    override predicate matches(string char) {
-      not classEscapeMatches(cc.getValue().toLowerCase(), char)
+      not classEscapeMatches(charClass.toLowerCase(), char)
    }
  }
 }
@@ -599,7 +605,7 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
    q2 = after(cc)
  )
  or
-  exists(RegExpCharacterClassEscape cc |
+  exists(RegExpTerm cc | isEscapeClass(cc, _) |
    q1 = before(cc) and
    lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
    q2 = after(cc)
--- a/javascript/ql/lib/semmle/javascript/security/performance/RegExpTreeView.qll
+++ b/javascript/ql/lib/semmle/javascript/security/performance/RegExpTreeView.qll
@@ -6,6 +6,14 @@

 import javascript

+/**
+ * Holds if `term` is an ecape class representing e.g. `\d`.
+ * `clazz` is which character class it represents, e.g. "d" for `\d`.
+ */
+predicate isEscapeClass(RegExpTerm term, string clazz) {
+  exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
+}
+
 /**
 * Holds if the regular expression should not be considered.
 *
--- a/python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll
+++ b/python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll
@@ -218,7 +218,7 @@ private newtype TInputSymbol =
      recc instanceof RegExpCharacterClass and
      not recc.(RegExpCharacterClass).isUniversalClass()
      or
-      recc instanceof RegExpCharacterClassEscape
+      isEscapeClass(recc, _)
    )
  } or
  /** An input symbol representing all characters matched by `.`. */
@@ -340,13 +340,13 @@ private module CharacterClasses {
        char <= hi
      )
      or
-      exists(RegExpCharacterClassEscape escape | escape = child |
-        escape.getValue() = escape.getValue().toLowerCase() and
-        classEscapeMatches(escape.getValue(), char)
+      exists(string charClass | isEscapeClass(child, charClass) |
+        charClass.toLowerCase() = charClass and
+        classEscapeMatches(charClass, char)
        or
        char = getARelevantChar() and
-        escape.getValue() = escape.getValue().toUpperCase() and
-        not classEscapeMatches(escape.getValue().toLowerCase(), char)
+        charClass.toUpperCase() = charClass and
+        not classEscapeMatches(charClass, char)
      )
    )
  }
@@ -409,10 +409,10 @@ private module CharacterClasses {
      or
      child.(RegExpCharacterRange).isRange(_, result)
      or
-      exists(RegExpCharacterClassEscape escape | child = escape |
-        result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+      exists(string charClass | isEscapeClass(child, charClass) |
+        result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
        or
-        result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+        result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
      )
    )
  }
@@ -466,33 +466,36 @@ private module CharacterClasses {
   * An implementation of `CharacterClass` for \d, \s, and \w.
   */
  private class PositiveCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
+    string charClass;

    PositiveCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
+      isEscapeClass(cc, charClass) and
+      this = getCanonicalCharClass(cc) and
+      charClass = ["d", "s", "w"]
    }

    override string getARelevantChar() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = ["0", "9"]
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = ["a", "Z", "_", "0", "9"]
    }

-    override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
+    override predicate matches(string char) { classEscapeMatches(charClass, char) }

    override string choose() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = "9"
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = "a"
    }
  }
@@ -501,26 +504,29 @@ private module CharacterClasses {
   * An implementation of `CharacterClass` for \D, \S, and \W.
   */
  private class NegativeCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
+    string charClass;

    NegativeCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
+      isEscapeClass(cc, charClass) and
+      this = getCanonicalCharClass(cc) and
+      charClass = ["D", "S", "W"]
    }

    override string getARelevantChar() {
-      cc.getValue() = "D" and
+      charClass = "D" and
      result = ["a", "Z", "!"]
      or
-      cc.getValue() = "S" and
+      charClass = "S" and
      result = ["a", "9", "!"]
      or
-      cc.getValue() = "W" and
+      charClass = "W" and
      result = [" ", "!"]
    }

    bindingset[char]
    override predicate matches(string char) {
-      not classEscapeMatches(cc.getValue().toLowerCase(), char)
+      not classEscapeMatches(charClass.toLowerCase(), char)
    }
  }
 }
@@ -599,7 +605,7 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
    q2 = after(cc)
  )
  or
-  exists(RegExpCharacterClassEscape cc |
+  exists(RegExpTerm cc | isEscapeClass(cc, _) |
    q1 = before(cc) and
    lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
    q2 = after(cc)
--- a/python/ql/lib/semmle/python/security/performance/RegExpTreeView.qll
+++ b/python/ql/lib/semmle/python/security/performance/RegExpTreeView.qll
@@ -5,6 +5,14 @@
 import python
 import semmle.python.RegexTreeView

+/**
+ * Holds if `term` is an ecape class representing e.g. `\d`.
+ * `clazz` is which character class it represents, e.g. "d" for `\d`.
+ */
+predicate isEscapeClass(RegExpTerm term, string clazz) {
+  exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
+}
+
 /**
 * Holds if the regular expression should not be considered.
 *
--- a/ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll
+++ b/ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll
@@ -13,7 +13,6 @@
 */

 import RegExpTreeView
-private import codeql.Locations

 /**
 * A configuration for which parts of a regular expression should be considered relevant for
@@ -219,9 +218,7 @@ private newtype TInputSymbol =
      recc instanceof RegExpCharacterClass and
      not recc.(RegExpCharacterClass).isUniversalClass()
      or
-      recc instanceof RegExpCharacterClassEscape
-      or
-      recc instanceof RegExpNamedCharacterProperty
+      isEscapeClass(recc, _)
    )
  } or
  /** An input symbol representing all characters matched by `.`. */
@@ -343,22 +340,13 @@ private module CharacterClasses {
        char <= hi
      )
      or
-      exists(RegExpCharacterClassEscape escape | escape = child |
-        escape.getValue() = escape.getValue().toLowerCase() and
-        classEscapeMatches(escape.getValue(), char)
+      exists(string charClass | isEscapeClass(child, charClass) |
+        charClass.toLowerCase() = charClass and
+        classEscapeMatches(charClass, char)
        or
        char = getARelevantChar() and
-        escape.getValue() = escape.getValue().toUpperCase() and
-        not classEscapeMatches(escape.getValue().toLowerCase(), char)
-      )
-      or
-      exists(RegExpNamedCharacterProperty charProp | charProp = child |
-        not charProp.isInverted() and
-        namedCharacterPropertyMatches(charProp.getName(), char)
-        or
-        char = getARelevantChar() and
-        charProp.isInverted() and
-        not namedCharacterPropertyMatches(charProp.getName(), char)
+        charClass.toUpperCase() = charClass and
+        not classEscapeMatches(charClass, char)
      )
    )
  }
@@ -421,16 +409,10 @@ private module CharacterClasses {
      or
      child.(RegExpCharacterRange).isRange(_, result)
      or
-      exists(RegExpCharacterClassEscape escape | child = escape |
-        result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+      exists(string charClass | isEscapeClass(child, charClass) |
+        result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
        or
-        result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
-      )
-      or
-      exists(RegExpNamedCharacterProperty charProp | child = charProp |
-        result = min(string s | namedCharacterPropertyMatches(charProp.getName(), s))
-        or
-        result = max(string s | namedCharacterPropertyMatches(charProp.getName(), s))
+        result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
      )
    )
  }
@@ -480,60 +462,40 @@ private module CharacterClasses {
    char = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_".charAt(_)
  }

-  /**
-   * Holds if the named character property (e.g. from a POSIX bracket
-   * expression) `propName` matches `char`. For example, it holds when `name` is
-   * `"word"` and `char` is `"a"`.
-   *
-   * TODO: expand to cover more properties.
-   */
-  private predicate namedCharacterPropertyMatches(string propName, string char) {
-    propName = ["digit", "Digit"] and
-    char = "0123456789".charAt(_)
-    or
-    propName = ["space", "Space"] and
-    (
-      char = [" ", "\t", "\r", "\n"]
-      or
-      char = getARelevantChar() and
-      char.regexpMatch("\\u000b|\\u000c") // \v|\f (vertical tab | form feed)
-    )
-    or
-    propName = ["word", "Word"] and
-    char = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_".charAt(_)
-  }
-
  /**
   * An implementation of `CharacterClass` for \d, \s, and \w.
   */
  private class PositiveCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
+    string charClass;

    PositiveCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
+      isEscapeClass(cc, charClass) and
+      this = getCanonicalCharClass(cc) and
+      charClass = ["d", "s", "w"]
    }

    override string getARelevantChar() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = ["0", "9"]
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = ["a", "Z", "_", "0", "9"]
    }

-    override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
+    override predicate matches(string char) { classEscapeMatches(charClass, char) }

    override string choose() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = "9"
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = "a"
    }
  }
@@ -542,88 +504,29 @@ private module CharacterClasses {
   * An implementation of `CharacterClass` for \D, \S, and \W.
   */
  private class NegativeCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
+    string charClass;

    NegativeCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
+      isEscapeClass(cc, charClass) and
+      this = getCanonicalCharClass(cc) and
+      charClass = ["D", "S", "W"]
    }

    override string getARelevantChar() {
-      cc.getValue() = "D" and
+      charClass = "D" and
      result = ["a", "Z", "!"]
      or
-      cc.getValue() = "S" and
+      charClass = "S" and
      result = ["a", "9", "!"]
      or
-      cc.getValue() = "W" and
+      charClass = "W" and
      result = [" ", "!"]
    }

    bindingset[char]
    override predicate matches(string char) {
-      not classEscapeMatches(cc.getValue().toLowerCase(), char)
-    }
-  }
-
-  /**
-   * An implementation of `NamedCharacterProperty` for positive (non-inverted)
-   * character properties.
-   */
-  private class PositiveNamedCharacterProperty extends CharacterClass {
-    RegExpNamedCharacterProperty cp;
-
-    PositiveNamedCharacterProperty() { this = getCanonicalCharClass(cp) and not cp.isInverted() }
-
-    override string getARelevantChar() {
-      exists(string lowerName | lowerName = cp.getName().toLowerCase() |
-        lowerName = "digit" and
-        result = ["0", "9"]
-        or
-        lowerName = "space" and
-        result = [" "]
-        or
-        lowerName = "word" and
-        result = ["a", "Z", "_", "0", "9"]
-      )
-    }
-
-    override predicate matches(string char) { namedCharacterPropertyMatches(cp.getName(), char) }
-
-    override string choose() {
-      exists(string lowerName | lowerName = cp.getName().toLowerCase() |
-        lowerName = "digit" and
-        result = "9"
-        or
-        lowerName = "space" and
-        result = " "
-        or
-        lowerName = "word" and
-        result = "a"
-      )
-    }
-  }
-
-  private class InvertedNamedCharacterProperty extends CharacterClass {
-    RegExpNamedCharacterProperty cp;
-
-    InvertedNamedCharacterProperty() { this = getCanonicalCharClass(cp) and cp.isInverted() }
-
-    override string getARelevantChar() {
-      exists(string lowerName | lowerName = cp.getName().toLowerCase() |
-        lowerName = "digit" and
-        result = ["a", "Z", "!"]
-        or
-        lowerName = "space" and
-        result = ["a", "9", "!"]
-        or
-        lowerName = "word" and
-        result = [" ", "!"]
-      )
-    }
-
-    bindingset[char]
-    override predicate matches(string char) {
-      not namedCharacterPropertyMatches(cp.getName(), char)
+      not classEscapeMatches(charClass.toLowerCase(), char)
    }
  }
 }
@@ -702,18 +605,12 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
    q2 = after(cc)
  )
  or
-  exists(RegExpCharacterClassEscape cc |
+  exists(RegExpTerm cc | isEscapeClass(cc, _) |
    q1 = before(cc) and
    lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
    q2 = after(cc)
  )
  or
-  exists(RegExpNamedCharacterProperty cp |
-    q1 = before(cp) and
-    lbl = CharClass(cp.getRawValue()) and
-    q2 = after(cp)
-  )
-  or
  exists(RegExpAlt alt | lbl = Epsilon() | q1 = before(alt) and q2 = before(alt.getAChild()))
  or
  exists(RegExpSequence seq | lbl = Epsilon() | q1 = before(seq) and q2 = before(seq.getChild(0)))
--- a/ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll
+++ b/ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll
@@ -1,6 +1,27 @@
 private import codeql.ruby.ast.Literal as AST
 private import codeql.Locations
 private import ParseRegExp
+import codeql.Locations
+
+/**
+ * Holds if `term` is an ecape class representing e.g. `\d`.
+ * `clazz` is which character class it represents, e.g. "d" for `\d`.
+ */
+predicate isEscapeClass(RegExpTerm term, string clazz) {
+  exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
+  or
+  // TODO: expand to cover more properties
+  exists(RegExpNamedCharacterProperty escape | term = escape |
+    escape.getName().toLowerCase() = "digit" and
+    if escape.isInverted() then clazz = "D" else clazz = "d"
+    or
+    escape.getName().toLowerCase() = "space" and
+    if escape.isInverted() then clazz = "S" else clazz = "s"
+    or
+    escape.getName().toLowerCase() = "word" and
+    if escape.isInverted() then clazz = "W" else clazz = "w"
+  )
+}

 /**
 * Holds if the regular expression should not be considered.
--- a/ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/ReDoS.expected
+++ b/ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/ReDoS.expected
@@ -91,3 +91,4 @@
 | tst.rb:362:11:362:31 | ((?:a{0,\|-)\|\\w\\{\\d,)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,'. |
 | tst.rb:363:11:363:34 | ((?:a{0,2\|-)\|\\w\\{\\d,\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,2'. |
 | tst.rb:369:12:369:22 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
+| tst.rb:375:11:375:27 | ([[:digit:]]\|\\d)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '0'. |
--- a/ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/tst.rb
+++ b/ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/tst.rb
@@ -369,4 +369,7 @@ good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/
 bad87 = /^X(\u0061|a)*Y$/

 # GOOD
-good43 = /^X(\u0061|b)+Y$/
+good43 = /^X(\u0061|b)+Y$/
+
+# NOT GODD
+bad88 = /X([[:digit:]]|\d)+Y/