JS: Make XSS MetacharEscapeSanitizer more precise

2026-05-02 20:25:13 +02:00 · 2021-03-15 23:39:52 +00:00
parent effa52f9e1
commit 198bdcab26
2 changed files with 63 additions and 9 deletions
--- a/javascript/ql/src/semmle/javascript/Regexp.qll
+++ b/javascript/ql/src/semmle/javascript/Regexp.qll
@@ -1114,4 +1114,64 @@ module RegExp {
    or
    result = node.asExpr().(StringLiteral).asRegExp()
  }
+
+  /**
+   * A character that will be analyzed by `RegExp::alwaysMatchesMetaCharacter`.
+   *
+   * Currently only `<`, `'`, and `"` are considered to be meta-characters, but new meta-characters
+   * can be added by subclassing this class.
+   */
+  abstract class MetaCharacter extends string {
+    bindingset[this]
+    MetaCharacter() { any() }
+
+    /**
+     * Holds if the given atomic term matches this meta-character.
+     *
+     * Does not hold for derived terms like alternatives and groups.
+     *
+     * By default, `.`, `\W`, `\S`, and `\D` are considered to match any meta-character,
+     * but the predicate can be overridden for meta-characters where this is not the case.
+     */
+    predicate matchedByAtom(RegExpTerm term) {
+      term.(RegExpConstant).getConstantValue() = this
+      or
+      term instanceof RegExpDot
+      or
+      term.(RegExpCharacterClassEscape).getValue() = ["\\W", "\\S", "\\D"]
+      or
+      exists(string lo, string hi |
+        term.(RegExpCharacterRange).isRange(lo, hi) and
+        lo <= this and
+        this <= hi
+      )
+    }
+  }
+
+  private class DefaultMetaCharacter extends MetaCharacter {
+    DefaultMetaCharacter() { this = ["<", "'", "\""] }
+  }
+
+  /**
+   * Holds if `term` can match any occurence of `char` within a string (not taking into account
+   * the context in which `term` appears).
+   *
+   * This predicate is under-approximate and never considers sequences to guarantee a match.
+   */
+  predicate alwaysMatchesMetaCharacter(RegExpTerm term, MetaCharacter char) {
+    not term.getParent() instanceof RegExpSequence and // restrict size of predicate
+    char.matchedByAtom(term)
+    or
+    alwaysMatchesMetaCharacter(term.(RegExpGroup).getAChild(), char)
+    or
+    alwaysMatchesMetaCharacter(term.(RegExpAlt).getAlternative(), char)
+    or
+    exists(RegExpCharacterClass class_ | term = class_ |
+      not class_.isInverted() and
+      char.matchedByAtom(class_.getAChild())
+      or
+      class_.isInverted() and
+      not char.matchedByAtom(class_.getAChild())
+    )
+  }
 }
--- a/javascript/ql/src/semmle/javascript/security/dataflow/Xss.qll
+++ b/javascript/ql/src/semmle/javascript/security/dataflow/Xss.qll
@@ -28,19 +28,13 @@ module Shared {
  abstract class SanitizerGuard extends TaintTracking::SanitizerGuardNode { }

  /**
-   * A global regexp replacement involving an HTML meta-character, viewed as a sanitizer for
+   * A global regexp replacement involving the `<`, `'`, or `"` meta-character, viewed as a sanitizer for
   * XSS vulnerabilities.
-   *
-   * The XSS queries do not attempt to reason about correctness or completeness of sanitizers,
-   * so any such replacement stops taint propagation.
   */
  class MetacharEscapeSanitizer extends Sanitizer, StringReplaceCall {
    MetacharEscapeSanitizer() {
-      this.isGlobal() and
-      exists(RegExpConstant c |
-        c.getLiteral() = getRegExp().asExpr() and
-        c.getValue().regexpMatch("['\"&<>]")
-      )
+      isGlobal() and
+      RegExp::alwaysMatchesMetaCharacter(getRegExp().getRoot(), ["<", "'", "\""])
    }
  }