JS: rewrite js/incomplete-multi-character-sanitization

2026-01-06 03:00:24 +01:00 · 2020-06-19 11:50:39 +02:00
parent 9d4cd0aa85
commit 580a24e982
4 changed files with 204 additions and 95 deletions
--- a/javascript/ql/src/Security/CWE-116/IncompleteMultiCharacterSanitization.ql
+++ b/javascript/ql/src/Security/CWE-116/IncompleteMultiCharacterSanitization.ql
@@ -12,69 +12,153 @@
 */

 import javascript
-import semmle.javascript.security.IncompleteBlacklistSanitizer

-predicate isDangerous(RegExpTerm t) {
-  // path traversals
-  t.getAMatchedString() = ["..", "/..", "../"]
-  or
-  exists(RegExpTerm start |
-    start = t.(RegExpSequence).getAChild() and
-    start.getConstantValue() = "." and
-    start.getSuccessor().getConstantValue() = "." and
-    not [start.getPredecessor(), start.getSuccessor().getSuccessor()].getConstantValue() = "."
-  )
-  or
-  // HTML comments
-  t.getAMatchedString() = "<!--"
-  or
-  // HTML scripts
-  t.getAMatchedString().regexpMatch("(?i)<script.*")
-  or
-  exists(RegExpSequence seq | seq = t |
-    t.getChild(0).getConstantValue() = "<" and
-    // the `cript|scrip` case has been observed in the wild, not sure what the goal of that pattern is...
-    t.getChild(0)
-        .getSuccessor+()
-        .getAMatchedString()
-        .regexpMatch("(?i)iframe|script|cript|scrip|style")
-  )
-  or
-  // HTML attributes
-  exists(string dangerousPrefix | dangerousPrefix = ["ng-", "on"] |
-    t.getAMatchedString().regexpMatch("(i?)" + dangerousPrefix + "[a-z]+")
+/**
+ * A regexp term that matches substrings that should be replaced with the empty string.
+ */
+class EmptyReplaceRegExpTerm extends RegExpTerm {
+  EmptyReplaceRegExpTerm() {
+    exists(StringReplaceCall replace |
+      [replace.getRawReplacement(), replace.getCallback(1).getAReturn()].mayHaveStringValue("") and
+      this = replace.getRegExp().getRoot().getAChild*()
+    )
+  }
+}
+
+/**
+ * A prefix that may be dangerous to sanitize explicitly.
+ *
+ * Note that this class exists solely as a (necessary) optimization for this query.
+ */
+class DangerousPrefix extends string {
+  DangerousPrefix() {
+    this = ["/..", "../"] or
+    this = "<!--" or
+    this = "<" + ["iframe", "script", "cript", "scrip", "style"]
+  }
+}
+
+/**
+ * A substring of a prefix that may be dangerous to sanitize explicitly.
+ */
+class DangerousPrefixSubstring extends string {
+  DangerousPrefixSubstring() {
+    exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()]))
+  }
+}
+
+/**
+ * Gets a dangerous prefix that is in the prefix language of `t`.
+ */
+DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
+  result = getADangerousMatchedPrefixSubstring(t) and
+  not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable())
+}
+
+/**
+ * Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
+ *
+ * Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
+ */
+DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
+  exists(string left |
+    t.isNullable() and left = ""
    or
-    exists(RegExpTerm start, RegExpTerm event | start = t.getAChild() |
-      start.getConstantValue().regexpMatch("(?i)[^a-z]*" + dangerousPrefix) and
-      event = start.getSuccessor() and
-      exists(RegExpTerm quantified | quantified = event.(RegExpQuantifier).getChild(0) |
-        quantified
-            .(RegExpCharacterClass)
-            .getAChild()
-            .(RegExpCharacterRange)
-            .isRange(["a", "A"], ["z", "Z"]) or
-        [quantified, quantified.(RegExpRange).getAChild()].(RegExpCharacterClassEscape).getValue() =
-          "w"
-      )
+    t.getAMatchedString() = left
+    or
+    (
+      t instanceof RegExpOpt or
+      t instanceof RegExpStar or
+      t instanceof RegExpPlus or
+      t instanceof RegExpGroup or
+      t instanceof RegExpAlt
+    ) and
+    left = getADangerousMatchedPrefixSubstring(t.getAChild())
+  |
+    result = left + getADangerousMatchedPrefixSubstring(t.getSuccessor()) or
+    result = left
+  )
+}
+
+/**
+ * Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerablity of kind `kind`.
+ */
+predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) {
+  prefix = getADangerousMatchedPrefix(t) and
+  (
+    kind = "path injection" and
+    // upwards navigation
+    prefix = ["/..", "../"] and
+    not t.getSuccessor*().getAMatchedString().regexpMatch("(?i).*[a-z0-9_-]+.*") // explicit path name mentions make this an unlikely sanitizer
+    or
+    kind = "HTML element injection" and
+    (
+      // comments
+      prefix = "<!--" and
+      not t.getSuccessor*().getAMatchedString().regexpMatch("(?i).*[a-z0-9_]+.*") // explicit comment content mentions make this an unlikely sanitizer
+      or
+      // specific tags
+      prefix = "<" + ["iframe", "script", "cript", "scrip", "style"] // the `cript|scrip` case has been observed in the wild several times
+    )
+  )
+  or
+  kind = "HTML attribute injection" and
+  prefix =
+    [
+      // ordinary event handler prefix
+      "on",
+      // angular prefixes
+      "ng-", "ng:", "data-ng-", "x-ng-"
+    ] and
+  (
+    // explicit matching: `onclick` and `ng-bind`
+    t.getAMatchedString().regexpMatch("(?i)" + prefix + "[a-z]+")
+    or
+    // regexp-based matching: `on[a-z]+`
+    exists(EmptyReplaceRegExpTerm start | start = t.getAChild() |
+      start.getConstantValue().regexpMatch("(?i)[^a-z]*" + prefix) and
+      isCommonWordMatcher(start.getSuccessor())
    )
  )
 }

-from StringReplaceCall replace, RegExpTerm regexp, RegExpTerm dangerous
+/**
+ * Holds if `t` is a common pattern for matching words
+ */
+predicate isCommonWordMatcher(RegExpTerm t) {
+  exists(RegExpTerm quantified | quantified = t.(RegExpQuantifier).getChild(0) |
+    // [a-z]+ and similar
+    quantified
+        .(RegExpCharacterClass)
+        .getAChild()
+        .(RegExpCharacterRange)
+        .isRange(["a", "A"], ["z", "Z"])
+    or
+    // \w+ or [\w]+
+    [quantified, quantified.(RegExpCharacterClass).getAChild()]
+        .(RegExpCharacterClassEscape)
+        .getValue() = "w"
+  )
+}
+
+from
+  StringReplaceCall replace, EmptyReplaceRegExpTerm regexp, EmptyReplaceRegExpTerm dangerous,
+  string prefix, string kind
 where
-  [replace.getRawReplacement(), replace.getCallback(1).getAReturn()].mayHaveStringValue("") and
-  replace.isGlobal() and
  regexp = replace.getRegExp().getRoot() and
  dangerous.getRootTerm() = regexp and
-  isDangerous(dangerous) and
-  // avoid anchored terms
-  not exists(RegExpAnchor a | a.getRootTerm() = regexp) and
-  // avoid flagging wrappers
-  not (
-    dangerous instanceof RegExpAlt or
-    dangerous instanceof RegExpGroup
+  // only warn about the longest match (presumably the most descriptive)
+  prefix = max(string m | matchesDangerousPrefix(dangerous, m, kind) | m order by m.length()) and
+  // only warn once per kind
+  not exists(EmptyReplaceRegExpTerm other |
+    other = dangerous.getAChild+() or other = dangerous.getPredecessor+()
+  |
+    matchesDangerousPrefix(other, _, kind)
  ) and
  // don't flag replace operations in a loop
-  not replace.getReceiver().getALocalSource() = replace
-select replace, "The replaced string may still contain a substring that starts matching at $@.",
-  dangerous, dangerous.toString()
+  not replace.getAMethodCall*().flowsTo(replace.getReceiver()) and
+  // avoid anchored terms
+  not exists(RegExpAnchor a | regexp = a.getRootTerm())
+select replace,
+  "This string may still contain a substring that starts matching at $@, which may cause a " + kind +
+    " vulnerability.", dangerous, prefix