diff --git a/javascript/ql/src/semmle/javascript/Regexp.qll b/javascript/ql/src/semmle/javascript/Regexp.qll index 9d1a4b9cf61..67c149409ef 100644 --- a/javascript/ql/src/semmle/javascript/Regexp.qll +++ b/javascript/ql/src/semmle/javascript/Regexp.qll @@ -968,4 +968,92 @@ module RegExp { /** Holds `flags` includes the `s` flag or is the unknown flag `?`. */ bindingset[flags] predicate maybeDotAll(string flags) { flags = unknownFlag() or isDotAll(flags) } + + /** Holds if `term` and all of its disjuncts are anchored on both ends. */ + predicate isFullyAnchoredTerm(RegExpTerm term) { + exists(RegExpSequence seq | term = seq | + seq.getChild(0) instanceof RegExpCaret and + seq.getLastChild() instanceof RegExpDollar + ) + or + isFullyAnchoredTerm(term.(RegExpGroup).getAChild()) + or + isFullyAnchoredAlt(term, term.getNumChild()) + } + + /** Holds if the first `i` disjuncts of `term` are fully anchored. */ + private predicate isFullyAnchoredAlt(RegExpAlt term, int i) { + isFullyAnchoredTerm(term.getChild(0)) and i = 1 + or + isFullyAnchoredAlt(term, i - 1) and + isFullyAnchoredTerm(term.getChild(i - 1)) + } + + /** + * Holds if `term` is matches any character except for explicitly listed exceptions. + * + * For example, holds for `.`, `[^<>]`, or `\W`, but not for `[a-z]`, `\w`, or `[^\W\S]`. + */ + predicate isWildcardLike(RegExpTerm term) { + term instanceof RegExpDot + or + term.(RegExpCharacterClassEscape).getValue().isUppercase() + or + exists(RegExpCharacterClass cls | term = cls | + cls.isInverted() and + not cls.getAChild().(RegExpCharacterClassEscape).getValue().isUppercase() + ) + or + exists(RegExpCharacterClass cls | term = cls | + not cls.isInverted() and + cls.getAChild().(RegExpCharacterClassEscape).getValue().isUppercase() + ) + } + + /** + * Holds if `term` is a generic sanitizer for strings that match (if `outcome` is true) + * or strings that don't match (if `outcome` is false). + * + * Specifically, whitelisting regexps such as `^(foo|bar)$` sanitize matches in the true case. + * Inverted character classes such as `[^a-z]` or `\W` sanitize matches in the false case. + */ + predicate isGenericRegExpSanitizer(RegExpTerm term, boolean outcome) { + term.isRootTerm() and + ( + outcome = true and + isFullyAnchoredTerm(term) and + not isWildcardLike(term.getAChild*()) + or + outcome = false and + exists(RegExpTerm root | + root = term + or + root = term.(RegExpGroup).getAChild() + | + isWildcardLike(root) + or + isWildcardLike(root.(RegExpAlt).getAChild()) + ) + ) + } + + /** + * Gets the AST of a regular expression object that can flow to `node`. + */ + RegExpTerm getRegExpObjectFromNode(DataFlow::Node node) { + exists(DataFlow::RegExpCreationNode regexp | + regexp.getAReference().flowsTo(node) and + result = regexp.getRegExpTerm() + ) + } + + /** + * Gets the AST of a regular expression that can flow to `node`, + * including `RegExp` objects as well as strings interpreted as regular expressions. + */ + RegExpTerm getRegExpFromNode(DataFlow::Node node) { + result = getRegExpObjectFromNode(node) + or + result = node.asExpr().(StringLiteral).asRegExp() + } } diff --git a/javascript/ql/src/semmle/javascript/dataflow/TaintTracking.qll b/javascript/ql/src/semmle/javascript/dataflow/TaintTracking.qll index d99863b6294..be1df8bc7c2 100644 --- a/javascript/ql/src/semmle/javascript/dataflow/TaintTracking.qll +++ b/javascript/ql/src/semmle/javascript/dataflow/TaintTracking.qll @@ -696,33 +696,37 @@ module TaintTracking { */ class SanitizingRegExpTest extends AdditionalSanitizerGuardNode, DataFlow::ValueNode { Expr expr; + boolean sanitizedOutcome; SanitizingRegExpTest() { exists(MethodCallExpr mce, Expr base, string m, Expr firstArg | mce = astNode and mce.calls(base, m) and firstArg = mce.getArgument(0) | // /re/.test(u) or /re/.exec(u) - base.analyze().getAType() = TTRegExp() and + RegExp::isGenericRegExpSanitizer(RegExp::getRegExpObjectFromNode(base.flow()), sanitizedOutcome) and (m = "test" or m = "exec") and firstArg = expr or // u.match(/re/) or u.match("re") base = expr and m = "match" and - exists(InferredType firstArgType | firstArgType = firstArg.analyze().getAType() | - firstArgType = TTRegExp() or firstArgType = TTString() - ) + RegExp::isGenericRegExpSanitizer(RegExp::getRegExpFromNode(firstArg.flow()), sanitizedOutcome) ) or // m = /re/.exec(u) and similar - DataFlow::valueNode(astNode.(AssignExpr).getRhs()).(SanitizingRegExpTest).getSanitizedExpr() = - expr + exists(SanitizingRegExpTest other | + other = DataFlow::valueNode(astNode.(AssignExpr).getRhs()) and + expr = other.getSanitizedExpr() and + sanitizedOutcome = other.getSanitizedOutcome() + ) } private Expr getSanitizedExpr() { result = expr } + private boolean getSanitizedOutcome() { result = sanitizedOutcome } + override predicate sanitizes(boolean outcome, Expr e) { - (outcome = true or outcome = false) and + outcome = sanitizedOutcome and e = expr } diff --git a/javascript/ql/test/query-tests/Security/CWE-079/tst.js b/javascript/ql/test/query-tests/Security/CWE-079/tst.js index 907f1be9299..f02735a072d 100644 --- a/javascript/ql/test/query-tests/Security/CWE-079/tst.js +++ b/javascript/ql/test/query-tests/Security/CWE-079/tst.js @@ -132,7 +132,7 @@ function tst() { document.write(v); } - if (!(/\d+/.test(v))) + if (!(/^\d+$/.test(v))) return; // OK