Merge pull request #3651 from esbena/js/bad-multicharacter-sanitization

JS: initial version of IncompleteMultiCharacterSanitization.ql
2026-05-05 05:35:13 +02:00 · 2020-06-12 16:25:22 +01:00
parent 01c51eea89 678bb7c128
commit d844e0025a
5 changed files with 255 additions and 0 deletions
--- a/javascript/ql/src/Security/CWE-116/IncompleteMultiCharacterSanitization.qhelp
+++ b/javascript/ql/src/Security/CWE-116/IncompleteMultiCharacterSanitization.qhelp
@@ -0,0 +1,24 @@
+<!DOCTYPE qhelp PUBLIC
+"-//Semmle//qhelp//EN"
+"qhelp.dtd">
+<qhelp>
+
+	<overview>
+
+	</overview>
+
+	<recommendation>
+
+	</recommendation>
+
+	<example>
+
+	</example>
+
+	<references>
+
+		<li>OWASP Top 10: <a href="https://www.owasp.org/index.php/Top_10-2017_A1-Injection">A1 Injection</a>.</li>
+
+	</references>
+
+</qhelp>
--- a/javascript/ql/src/Security/CWE-116/IncompleteMultiCharacterSanitization.ql
+++ b/javascript/ql/src/Security/CWE-116/IncompleteMultiCharacterSanitization.ql
@@ -0,0 +1,81 @@
+/**
+ * @name Incomplete multi-character sanitization
+ * @description A sanitizer that removes a sequence of characters may reintroduce the dangerous sequence.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id js/incomplete-multi-character-sanitization
+ * @tags correctness
+ *       security
+ *       external/cwe/cwe-116
+ *       external/cwe/cwe-20
+ */
+
+import javascript
+import semmle.javascript.security.IncompleteBlacklistSanitizer
+
+predicate isDangerous(RegExpTerm t) {
+  // path traversals
+  t.getAMatchedString() = ["..", "/..", "../"]
+  or
+  exists(RegExpTerm start |
+    start = t.(RegExpSequence).getAChild() and
+    start.getConstantValue() = "." and
+    start.getSuccessor().getConstantValue() = "." and
+    not [start.getPredecessor(), start.getSuccessor().getSuccessor()].getConstantValue() = "."
+  )
+  or
+  // HTML comments
+  t.getAMatchedString() = "<!--"
+  or
+  // HTML scripts
+  t.getAMatchedString().regexpMatch("(?i)<script.*")
+  or
+  exists(RegExpSequence seq | seq = t |
+    t.getChild(0).getConstantValue() = "<" and
+    // the `cript|scrip` case has been observed in the wild, not sure what the goal of that pattern is...
+    t
+        .getChild(0)
+        .getSuccessor+()
+        .getAMatchedString()
+        .regexpMatch("(?i)iframe|script|cript|scrip|style")
+  )
+  or
+  // HTML attributes
+  exists(string dangerousPrefix | dangerousPrefix = ["ng-", "on"] |
+    t.getAMatchedString().regexpMatch("(i?)" + dangerousPrefix + "[a-z]+")
+    or
+    exists(RegExpTerm start, RegExpTerm event | start = t.getAChild() |
+      start.getConstantValue().regexpMatch("(?i)[^a-z]*" + dangerousPrefix) and
+      event = start.getSuccessor() and
+      exists(RegExpTerm quantified | quantified = event.(RegExpQuantifier).getChild(0) |
+        quantified
+            .(RegExpCharacterClass)
+            .getAChild()
+            .(RegExpCharacterRange)
+            .isRange(["a", "A"], ["z", "Z"]) or
+        [quantified, quantified.(RegExpRange).getAChild()].(RegExpCharacterClassEscape).getValue() =
+          "w"
+      )
+    )
+  )
+}
+
+from StringReplaceCall replace, RegExpTerm regexp, RegExpTerm dangerous
+where
+  [replace.getRawReplacement(), replace.getCallback(1).getAReturn()].mayHaveStringValue("") and
+  replace.isGlobal() and
+  regexp = replace.getRegExp().getRoot() and
+  dangerous.getRootTerm() = regexp and
+  isDangerous(dangerous) and
+  // avoid anchored terms
+  not exists(RegExpAnchor a | a.getRootTerm() = regexp) and
+  // avoid flagging wrappers
+  not (
+    dangerous instanceof RegExpAlt or
+    dangerous instanceof RegExpGroup
+  ) and
+  // don't flag replace operations in a loop
+  not replace.getReceiver().getALocalSource() = replace
+select replace, "The replaced string may still contain a substring that starts matching at $@.",
+  dangerous, dangerous.toString()