Merge pull request #230 from github/redos-enable-tounicode

enable unicode parsing in the ReDoS query
2026-04-26 01:05:15 +02:00 · 2021-08-02 10:42:09 +02:00
parent d986bea317 632ad518f0
commit 2f491a1924
3 changed files with 43 additions and 37 deletions
--- a/ql/src/codeql_ruby/regexp/RegExpTreeView.qll
+++ b/ql/src/codeql_ruby/regexp/RegExpTreeView.qll
@@ -351,49 +351,48 @@ class RegExpEscape extends RegExpNormalChar {
   * E.g. for `\u0061` this returns "a".
   */
  private string getUnicode() {
-    // TODO: uncomment when toUnicode() is available
-    none()
-    //exists(int codepoint | codepoint = sum(getHexValueFromUnicode(_)) |
-    //  result = codepoint.toUnicode()
-    //)
+    exists(int codepoint | codepoint = sum(getHexValueFromUnicode(_)) |
+      result = codepoint.toUnicode()
+    )
+  }
+
+  /**
+   * Gets int value for the `index`th char in the hex number of the unicode escape.
+   * E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
+   */
+  private int getHexValueFromUnicode(int index) {
+    isUnicode() and
+    exists(string hex, string char | hex = getText().suffix(2) |
+      char = hex.charAt(index) and
+      result = 16.pow(hex.length() - index - 1) * toHex(char)
+    )
  }

-  // TODO: uncomment when toUnicode() is available
-  ///**
-  // * Gets int value for the `index`th char in the hex number of the unicode escape.
-  // * E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
-  // */
-  //private int getHexValueFromUnicode(int index) {
-  //  isUnicode() and
-  //  exists(string hex, string char | hex = getText().suffix(2) |
-  //    char = hex.charAt(index) and
-  //    result = 16.pow(hex.length() - index - 1) * toHex(char)
-  //  )
-  //}
  string getUnescaped() { result = this.getText().suffix(1) }

  override string getAPrimaryQlClass() { result = "RegExpEscape" }
 }

-///**
-// * Gets the hex number for the `hex` char.
-// */
-//private int toHex(string hex) {
-//  hex = [0 .. 9].toString() and
-//  result = hex.toInt()
-//  or
-//  result = 10 and hex = ["a", "A"]
-//  or
-//  result = 11 and hex = ["b", "B"]
-//  or
-//  result = 12 and hex = ["c", "C"]
-//  or
-//  result = 13 and hex = ["d", "D"]
-//  or
-//  result = 14 and hex = ["e", "E"]
-//  or
-//  result = 15 and hex = ["f", "F"]
-//}
+/**
+ * Gets the hex number for the `hex` char.
+ */
+private int toHex(string hex) {
+  hex = [0 .. 9].toString() and
+  result = hex.toInt()
+  or
+  result = 10 and hex = ["a", "A"]
+  or
+  result = 11 and hex = ["b", "B"]
+  or
+  result = 12 and hex = ["c", "C"]
+  or
+  result = 13 and hex = ["d", "D"]
+  or
+  result = 14 and hex = ["e", "E"]
+  or
+  result = 15 and hex = ["f", "F"]
+}
+
 /**
 * A character class escape in a regular expression.
 * That is, an escaped character that denotes multiple characters.
--- a/ql/test/query-tests/security/cwe-1333/ReDoS.expected
+++ b/ql/test/query-tests/security/cwe-1333/ReDoS.expected
@@ -90,3 +90,4 @@
 | tst.rb:361:11:361:29 | ((?:a{0\|-)\|\\w\\{\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0'. |
 | tst.rb:362:11:362:31 | ((?:a{0,\|-)\|\\w\\{\\d,)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,'. |
 | tst.rb:363:11:363:34 | ((?:a{0,2\|-)\|\\w\\{\\d,\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,2'. |
+| tst.rb:369:12:369:22 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
--- a/ql/test/query-tests/security/cwe-1333/tst.rb
+++ b/ql/test/query-tests/security/cwe-1333/tst.rb
@@ -363,4 +363,10 @@ bad85 = /^((?:a{0,|-)|\w\{\d,)+X$/
 bad86 = /^((?:a{0,2|-)|\w\{\d,\d)+X$/

 # GOOD: 
-good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/
+good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/
+
+# NOT GOOD
+bad87 = /^X(\u0061|a)*Y$/
+
+# GOOD
+good43 = /^X(\u0061|b)+Y$/