get ReDoSUtil in sync for ruby

2025-12-17 01:03:14 +01:00 · 2021-11-18 16:31:45 +01:00
parent 6c2713dd8b
commit ee858d840e
9 changed files with 138 additions and 187 deletions
--- a/config/identical-files.json
+++ b/config/identical-files.json
@@ -460,9 +460,10 @@
    "javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll",
    "python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll"
  ],
-  "ReDoS Util Python/JS": [
+  "ReDoS Util Python/JS/Ruby": [
    "javascript/ql/lib/semmle/javascript/security/performance/ReDoSUtil.qll",
-    "python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll"
+    "python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll",
    "ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll"
  ],
  "ReDoS Exponential Python/JS": [
    "javascript/ql/lib/semmle/javascript/security/performance/ExponentialBackTracking.qll",
--- a/javascript/ql/lib/semmle/javascript/security/performance/ReDoSUtil.qll
+++ b/javascript/ql/lib/semmle/javascript/security/performance/ReDoSUtil.qll
@@ -218,7 +218,7 @@ private newtype TInputSymbol =
      recc instanceof RegExpCharacterClass and
      not recc.(RegExpCharacterClass).isUniversalClass()
      or
-      recc instanceof RegExpCharacterClassEscape
+      isEscapeClass(recc, _)
    )
  } or
  /** An input symbol representing all characters matched by `.`. */
@@ -340,13 +340,13 @@ private module CharacterClasses {
        char <= hi
      )
      or
-      exists(RegExpCharacterClassEscape escape | escape = child |
+      exists(string charClass | isEscapeClass(child, charClass) |
-        escape.getValue() = escape.getValue().toLowerCase() and
+        charClass.toLowerCase() = charClass and
-        classEscapeMatches(escape.getValue(), char)
+        classEscapeMatches(charClass, char)
        or
        char = getARelevantChar() and
-        escape.getValue() = escape.getValue().toUpperCase() and
+        charClass.toUpperCase() = charClass and
-        not classEscapeMatches(escape.getValue().toLowerCase(), char)
+        not classEscapeMatches(charClass, char)
      )
    )
  }
@@ -409,10 +409,10 @@ private module CharacterClasses {
      or
      child.(RegExpCharacterRange).isRange(_, result)
      or
-      exists(RegExpCharacterClassEscape escape | child = escape |
+      exists(string charClass | isEscapeClass(child, charClass) |
-        result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+        result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
        or
-        result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+        result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
      )
    )
  }
@@ -466,33 +466,36 @@ private module CharacterClasses {
   * An implementation of `CharacterClass` for \d, \s, and \w.
   */
  private class PositiveCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
    string charClass;
    PositiveCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
+      isEscapeClass(cc, charClass) and
      this = getCanonicalCharClass(cc) and
      charClass = ["d", "s", "w"]
    }
    override string getARelevantChar() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = ["0", "9"]
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = ["a", "Z", "_", "0", "9"]
    }
-    override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
+    override predicate matches(string char) { classEscapeMatches(charClass, char) }
    override string choose() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = "9"
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = "a"
    }
  }
@@ -501,26 +504,29 @@ private module CharacterClasses {
   * An implementation of `CharacterClass` for \D, \S, and \W.
   */
  private class NegativeCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
    string charClass;
    NegativeCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
+      isEscapeClass(cc, charClass) and
      this = getCanonicalCharClass(cc) and
      charClass = ["D", "S", "W"]
    }
    override string getARelevantChar() {
-      cc.getValue() = "D" and
+      charClass = "D" and
      result = ["a", "Z", "!"]
      or
-      cc.getValue() = "S" and
+      charClass = "S" and
      result = ["a", "9", "!"]
      or
-      cc.getValue() = "W" and
+      charClass = "W" and
      result = [" ", "!"]
    }
    bindingset[char]
    override predicate matches(string char) {
-      not classEscapeMatches(cc.getValue().toLowerCase(), char)
+      not classEscapeMatches(charClass.toLowerCase(), char)
    }
  }
 }
@@ -599,7 +605,7 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
    q2 = after(cc)
  )
  or
-  exists(RegExpCharacterClassEscape cc |
+  exists(RegExpTerm cc | isEscapeClass(cc, _) |
    q1 = before(cc) and
    lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
    q2 = after(cc)
--- a/javascript/ql/lib/semmle/javascript/security/performance/RegExpTreeView.qll
+++ b/javascript/ql/lib/semmle/javascript/security/performance/RegExpTreeView.qll
@@ -6,6 +6,14 @@
 import javascript
 /**
 * Holds if `term` is an ecape class representing e.g. `\d`.
 * `clazz` is which character class it represents, e.g. "d" for `\d`.
 */
 predicate isEscapeClass(RegExpTerm term, string clazz) {
  exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
 }
 /**
 * Holds if the regular expression should not be considered.
 *
--- a/python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll
+++ b/python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll
@@ -218,7 +218,7 @@ private newtype TInputSymbol =
      recc instanceof RegExpCharacterClass and
      not recc.(RegExpCharacterClass).isUniversalClass()
      or
-      recc instanceof RegExpCharacterClassEscape
+      isEscapeClass(recc, _)
    )
  } or
  /** An input symbol representing all characters matched by `.`. */
@@ -340,13 +340,13 @@ private module CharacterClasses {
        char <= hi
      )
      or
-      exists(RegExpCharacterClassEscape escape | escape = child |
+      exists(string charClass | isEscapeClass(child, charClass) |
-        escape.getValue() = escape.getValue().toLowerCase() and
+        charClass.toLowerCase() = charClass and
-        classEscapeMatches(escape.getValue(), char)
+        classEscapeMatches(charClass, char)
        or
        char = getARelevantChar() and
-        escape.getValue() = escape.getValue().toUpperCase() and
+        charClass.toUpperCase() = charClass and
-        not classEscapeMatches(escape.getValue().toLowerCase(), char)
+        not classEscapeMatches(charClass, char)
      )
    )
  }
@@ -409,10 +409,10 @@ private module CharacterClasses {
      or
      child.(RegExpCharacterRange).isRange(_, result)
      or
-      exists(RegExpCharacterClassEscape escape | child = escape |
+      exists(string charClass | isEscapeClass(child, charClass) |
-        result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+        result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
        or
-        result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+        result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
      )
    )
  }
@@ -466,33 +466,36 @@ private module CharacterClasses {
   * An implementation of `CharacterClass` for \d, \s, and \w.
   */
  private class PositiveCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
    string charClass;
    PositiveCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
+      isEscapeClass(cc, charClass) and
      this = getCanonicalCharClass(cc) and
      charClass = ["d", "s", "w"]
    }
    override string getARelevantChar() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = ["0", "9"]
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = ["a", "Z", "_", "0", "9"]
    }
-    override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
+    override predicate matches(string char) { classEscapeMatches(charClass, char) }
    override string choose() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = "9"
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = "a"
    }
  }
@@ -501,26 +504,29 @@ private module CharacterClasses {
   * An implementation of `CharacterClass` for \D, \S, and \W.
   */
  private class NegativeCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
    string charClass;
    NegativeCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
+      isEscapeClass(cc, charClass) and
      this = getCanonicalCharClass(cc) and
      charClass = ["D", "S", "W"]
    }
    override string getARelevantChar() {
-      cc.getValue() = "D" and
+      charClass = "D" and
      result = ["a", "Z", "!"]
      or
-      cc.getValue() = "S" and
+      charClass = "S" and
      result = ["a", "9", "!"]
      or
-      cc.getValue() = "W" and
+      charClass = "W" and
      result = [" ", "!"]
    }
    bindingset[char]
    override predicate matches(string char) {
-      not classEscapeMatches(cc.getValue().toLowerCase(), char)
+      not classEscapeMatches(charClass.toLowerCase(), char)
    }
  }
 }
@@ -599,7 +605,7 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
    q2 = after(cc)
  )
  or
-  exists(RegExpCharacterClassEscape cc |
+  exists(RegExpTerm cc | isEscapeClass(cc, _) |
    q1 = before(cc) and
    lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
    q2 = after(cc)
--- a/python/ql/lib/semmle/python/security/performance/RegExpTreeView.qll
+++ b/python/ql/lib/semmle/python/security/performance/RegExpTreeView.qll
@@ -5,6 +5,14 @@
 import python
 import semmle.python.RegexTreeView
 /**
 * Holds if `term` is an ecape class representing e.g. `\d`.
 * `clazz` is which character class it represents, e.g. "d" for `\d`.
 */
 predicate isEscapeClass(RegExpTerm term, string clazz) {
  exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
 }
 /**
 * Holds if the regular expression should not be considered.
 *
--- a/ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll
+++ b/ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll
@@ -13,7 +13,6 @@
 */
 import RegExpTreeView
 private import codeql.Locations
 /**
 * A configuration for which parts of a regular expression should be considered relevant for
@@ -219,9 +218,7 @@ private newtype TInputSymbol =
      recc instanceof RegExpCharacterClass and
      not recc.(RegExpCharacterClass).isUniversalClass()
      or
-      recc instanceof RegExpCharacterClassEscape
+      isEscapeClass(recc, _)
      or
      recc instanceof RegExpNamedCharacterProperty
    )
  } or
  /** An input symbol representing all characters matched by `.`. */
@@ -343,22 +340,13 @@ private module CharacterClasses {
        char <= hi
      )
      or
-      exists(RegExpCharacterClassEscape escape | escape = child |
+      exists(string charClass | isEscapeClass(child, charClass) |
-        escape.getValue() = escape.getValue().toLowerCase() and
+        charClass.toLowerCase() = charClass and
-        classEscapeMatches(escape.getValue(), char)
+        classEscapeMatches(charClass, char)
        or
        char = getARelevantChar() and
-        escape.getValue() = escape.getValue().toUpperCase() and
+        charClass.toUpperCase() = charClass and
-        not classEscapeMatches(escape.getValue().toLowerCase(), char)
+        not classEscapeMatches(charClass, char)
      )
      or
      exists(RegExpNamedCharacterProperty charProp | charProp = child |
        not charProp.isInverted() and
        namedCharacterPropertyMatches(charProp.getName(), char)
        or
        char = getARelevantChar() and
        charProp.isInverted() and
        not namedCharacterPropertyMatches(charProp.getName(), char)
      )
    )
  }
@@ -421,16 +409,10 @@ private module CharacterClasses {
      or
      child.(RegExpCharacterRange).isRange(_, result)
      or
-      exists(RegExpCharacterClassEscape escape | child = escape |
+      exists(string charClass | isEscapeClass(child, charClass) |
-        result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+        result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
        or
-        result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
+        result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
      )
      or
      exists(RegExpNamedCharacterProperty charProp | child = charProp |
        result = min(string s | namedCharacterPropertyMatches(charProp.getName(), s))
        or
        result = max(string s | namedCharacterPropertyMatches(charProp.getName(), s))
      )
    )
  }
@@ -480,60 +462,40 @@ private module CharacterClasses {
    char = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_".charAt(_)
  }
  /**
   * Holds if the named character property (e.g. from a POSIX bracket
   * expression) `propName` matches `char`. For example, it holds when `name` is
   * `"word"` and `char` is `"a"`.
   *
   * TODO: expand to cover more properties.
   */
  private predicate namedCharacterPropertyMatches(string propName, string char) {
    propName = ["digit", "Digit"] and
    char = "0123456789".charAt(_)
    or
    propName = ["space", "Space"] and
    (
      char = [" ", "\t", "\r", "\n"]
      or
      char = getARelevantChar() and
      char.regexpMatch("\\u000b|\\u000c") // \v|\f (vertical tab | form feed)
    )
    or
    propName = ["word", "Word"] and
    char = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_".charAt(_)
  }
  /**
   * An implementation of `CharacterClass` for \d, \s, and \w.
   */
  private class PositiveCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
    string charClass;
    PositiveCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
+      isEscapeClass(cc, charClass) and
      this = getCanonicalCharClass(cc) and
      charClass = ["d", "s", "w"]
    }
    override string getARelevantChar() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = ["0", "9"]
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = ["a", "Z", "_", "0", "9"]
    }
-    override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
+    override predicate matches(string char) { classEscapeMatches(charClass, char) }
    override string choose() {
-      cc.getValue() = "d" and
+      charClass = "d" and
      result = "9"
      or
-      cc.getValue() = "s" and
+      charClass = "s" and
      result = " "
      or
-      cc.getValue() = "w" and
+      charClass = "w" and
      result = "a"
    }
  }
@@ -542,88 +504,29 @@ private module CharacterClasses {
   * An implementation of `CharacterClass` for \D, \S, and \W.
   */
  private class NegativeCharacterClassEscape extends CharacterClass {
-    RegExpCharacterClassEscape cc;
+    RegExpTerm cc;
    string charClass;
    NegativeCharacterClassEscape() {
-      this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
+      isEscapeClass(cc, charClass) and
      this = getCanonicalCharClass(cc) and
      charClass = ["D", "S", "W"]
    }
    override string getARelevantChar() {
-      cc.getValue() = "D" and
+      charClass = "D" and
      result = ["a", "Z", "!"]
      or
-      cc.getValue() = "S" and
+      charClass = "S" and
      result = ["a", "9", "!"]
      or
-      cc.getValue() = "W" and
+      charClass = "W" and
      result = [" ", "!"]
    }
    bindingset[char]
    override predicate matches(string char) {
-      not classEscapeMatches(cc.getValue().toLowerCase(), char)
+      not classEscapeMatches(charClass.toLowerCase(), char)
    }
  }
  /**
   * An implementation of `NamedCharacterProperty` for positive (non-inverted)
   * character properties.
   */
  private class PositiveNamedCharacterProperty extends CharacterClass {
    RegExpNamedCharacterProperty cp;
    PositiveNamedCharacterProperty() { this = getCanonicalCharClass(cp) and not cp.isInverted() }
    override string getARelevantChar() {
      exists(string lowerName | lowerName = cp.getName().toLowerCase() |
        lowerName = "digit" and
        result = ["0", "9"]
        or
        lowerName = "space" and
        result = [" "]
        or
        lowerName = "word" and
        result = ["a", "Z", "_", "0", "9"]
      )
    }
    override predicate matches(string char) { namedCharacterPropertyMatches(cp.getName(), char) }
    override string choose() {
      exists(string lowerName | lowerName = cp.getName().toLowerCase() |
        lowerName = "digit" and
        result = "9"
        or
        lowerName = "space" and
        result = " "
        or
        lowerName = "word" and
        result = "a"
      )
    }
  }
  private class InvertedNamedCharacterProperty extends CharacterClass {
    RegExpNamedCharacterProperty cp;
    InvertedNamedCharacterProperty() { this = getCanonicalCharClass(cp) and cp.isInverted() }
    override string getARelevantChar() {
      exists(string lowerName | lowerName = cp.getName().toLowerCase() |
        lowerName = "digit" and
        result = ["a", "Z", "!"]
        or
        lowerName = "space" and
        result = ["a", "9", "!"]
        or
        lowerName = "word" and
        result = [" ", "!"]
      )
    }
    bindingset[char]
    override predicate matches(string char) {
      not namedCharacterPropertyMatches(cp.getName(), char)
    }
  }
 }
@@ -702,18 +605,12 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
    q2 = after(cc)
  )
  or
-  exists(RegExpCharacterClassEscape cc |
+  exists(RegExpTerm cc | isEscapeClass(cc, _) |
    q1 = before(cc) and
    lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
    q2 = after(cc)
  )
  or
  exists(RegExpNamedCharacterProperty cp |
    q1 = before(cp) and
    lbl = CharClass(cp.getRawValue()) and
    q2 = after(cp)
  )
  or
  exists(RegExpAlt alt | lbl = Epsilon() | q1 = before(alt) and q2 = before(alt.getAChild()))
  or
  exists(RegExpSequence seq | lbl = Epsilon() | q1 = before(seq) and q2 = before(seq.getChild(0)))
--- a/ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll
+++ b/ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll
@@ -1,6 +1,27 @@
 private import codeql.ruby.ast.Literal as AST
 private import codeql.Locations
 private import ParseRegExp
 import codeql.Locations
 /**
 * Holds if `term` is an ecape class representing e.g. `\d`.
 * `clazz` is which character class it represents, e.g. "d" for `\d`.
 */
 predicate isEscapeClass(RegExpTerm term, string clazz) {
  exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
  or
  // TODO: expand to cover more properties
  exists(RegExpNamedCharacterProperty escape | term = escape |
    escape.getName().toLowerCase() = "digit" and
    if escape.isInverted() then clazz = "D" else clazz = "d"
    or
    escape.getName().toLowerCase() = "space" and
    if escape.isInverted() then clazz = "S" else clazz = "s"
    or
    escape.getName().toLowerCase() = "word" and
    if escape.isInverted() then clazz = "W" else clazz = "w"
  )
 }
 /**
 * Holds if the regular expression should not be considered.
--- a/ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/ReDoS.expected
+++ b/ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/ReDoS.expected
@@ -91,3 +91,4 @@
 | tst.rb:362:11:362:31 | ((?:a{0,\|-)\|\\w\\{\\d,)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,'. |
 | tst.rb:363:11:363:34 | ((?:a{0,2\|-)\|\\w\\{\\d,\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,2'. |
 | tst.rb:369:12:369:22 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
 | tst.rb:375:11:375:27 | ([[:digit:]]\|\\d)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '0'. |
--- a/ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/tst.rb
+++ b/ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/tst.rb
@@ -369,4 +369,7 @@ good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/
 bad87 = /^X(\u0061|a)*Y$/
 # GOOD
-good43 = /^X(\u0061|b)+Y$/
+good43 = /^X(\u0061|b)+Y$/
 # NOT GODD
 bad88 = /X([[:digit:]]|\d)+Y/