support more regular expressions in js/incomplete-multi-character-sanitization

2026-07-20 18:58:36 +02:00 · 2021-05-18 21:51:28 +02:00
parent 92804a3cc3
commit 59f0a41665
2 changed files with 49 additions and 18 deletions
--- a/javascript/ql/src/Security/CWE-116/IncompleteMultiCharacterSanitization.ql
+++ b/javascript/ql/src/Security/CWE-116/IncompleteMultiCharacterSanitization.ql
@@ -56,31 +56,62 @@ DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
  not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable())
 }

+private import semmle.javascript.security.performance.ReDoSUtil as ReDoSUtil
+
+/**
+ * Gets a char from a dangerous prefix that is matched by `t`.
+ */
+pragma[noinline]
+DangerousPrefixSubstring getADangerousMatchedChar(EmptyReplaceRegExpTerm t) {
+  t.isNullable() and result = ""
+  or
+  t.getAMatchedString() = result
+  or
+  ReDoSUtil::getCanonicalCharClass(t).(ReDoSUtil::CharacterClass).matches(result)
+  or
+  t instanceof RegExpDot and
+  result.length() = 1
+  or
+  (
+    t instanceof RegExpOpt or
+    t instanceof RegExpStar or
+    t instanceof RegExpPlus or
+    t instanceof RegExpGroup or
+    t instanceof RegExpAlt
+  ) and
+  result = getADangerousMatchedChar(t.getAChild())
+}
+
 /**
 * Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
 *
 * Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
 */
 DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
-  exists(string left |
-    t.isNullable() and left = ""
-    or
-    t.getAMatchedString() = left
-    or
-    (
-      t instanceof RegExpOpt or
-      t instanceof RegExpStar or
-      t instanceof RegExpPlus or
-      t instanceof RegExpGroup or
-      t instanceof RegExpAlt
-    ) and
-    left = getADangerousMatchedPrefixSubstring(t.getAChild())
-  |
-    result = left + getADangerousMatchedPrefixSubstring(t.getSuccessor()) or
-    result = left
+  result = getADangerousMatchedChar(t) + getADangerousMatchedPrefixSubstring(t.getSuccessor())
+  or
+  result = getADangerousMatchedChar(t)
+  or
+  // loop around for repetitions (only considering alphanumeric characters in the repetition)
+  exists(RepetitionMatcher repetition | t = repetition |
+    result = getADangerousMatchedPrefixSubstring(repetition) + repetition.getAChar()
  )
 }

+class RepetitionMatcher extends EmptyReplaceRegExpTerm {
+  string char;
+
+  pragma[noinline]
+  RepetitionMatcher() {
+    (this instanceof RegExpPlus or this instanceof RegExpStar) and
+    char = getADangerousMatchedChar(this.getAChild()) and
+    char.regexpMatch("\\w")
+  }
+
+  pragma[noinline]
+  string getAChar() { result = char }
+}
+
 /**
 * Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerablity of kind `kind`.
 */
@@ -151,7 +182,7 @@ where
  // skip leading optional elements
  not dangerous.isNullable() and
  // only warn about the longest match (presumably the most descriptive)
-  prefix = max(string m | matchesDangerousPrefix(dangerous, m, kind) | m order by m.length()) and
+  prefix = max(string m | matchesDangerousPrefix(dangerous, m, kind) | m order by m.length(), m) and
  // only warn once per kind
  not exists(EmptyReplaceRegExpTerm other |
    other = dangerous.getAChild+() or other = dangerous.getPredecessor+()
--- a/javascript/ql/src/semmle/javascript/security/performance/ReDoSUtil.qll
+++ b/javascript/ql/src/semmle/javascript/security/performance/ReDoSUtil.qll
@@ -251,7 +251,7 @@ class InputSymbol extends TInputSymbol {
 /**
 * An abstract input symbol that represents a character class.
 */
-abstract private class CharacterClass extends InputSymbol {
+abstract class CharacterClass extends InputSymbol {
  /**
   * Gets a character that is relevant for intersection-tests involving this
   * character class.