Merge pull request #3598 from asger-semmle/js/regexp-test

Approved by esbena
This commit is contained in:
semmle-qlci
2020-06-04 09:05:21 +01:00
committed by GitHub
9 changed files with 348 additions and 8 deletions

View File

@@ -1355,6 +1355,11 @@ class EqualityTest extends @equalitytest, Comparison {
(this instanceof NEqExpr or this instanceof StrictNEqExpr) and
result = false
}
/**
* Holds if the equality operator is strict (`===` or `!==`).
*/
predicate isStrict() { this instanceof StrictEqExpr or this instanceof StrictNEqExpr }
}
/**

View File

@@ -629,4 +629,153 @@ module StringOps {
class HtmlConcatenationLeaf extends ConcatenationLeaf {
HtmlConcatenationLeaf() { getRoot() instanceof HtmlConcatenationRoot }
}
/**
* A data flow node whose boolean value indicates whether a regexp matches a given string.
*
* For example, the condition of each of the following `if`-statements are `RegExpTest` nodes:
* ```js
* if (regexp.test(str)) { ... }
* if (regexp.exec(str) != null) { ... }
* if (str.matches(regexp)) { ... }
* ```
*
* Note that `RegExpTest` represents a boolean-valued expression or one
* that is coerced to a boolean, which is not always the same as the call that performs the
* regexp-matching. For example, the `exec` call below is not itself a `RegExpTest`,
* but the `match` variable in the condition is:
* ```js
* let match = regexp.exec(str);
* if (!match) { ... } // <--- 'match' is the RegExpTest
* ```
*/
class RegExpTest extends DataFlow::Node {
RegExpTest::Range range;
RegExpTest() { this = range }
/**
* Gets the AST of the regular expression used in the test, if it can be seen locally.
*/
RegExpTerm getRegExp() {
result = getRegExpOperand().getALocalSource().(DataFlow::RegExpCreationNode).getRoot()
or
result = range.getRegExpOperand(true).asExpr().(StringLiteral).asRegExp()
}
/**
* Gets the data flow node corresponding to the regular expression object used in the test.
*
* In some cases this represents a string value being coerced to a RegExp object.
*/
DataFlow::Node getRegExpOperand() { result = range.getRegExpOperand(_) }
/**
* Gets the data flow node corresponding to the string being tested against the regular expression.
*/
DataFlow::Node getStringOperand() { result = range.getStringOperand() }
/**
* Gets the return value indicating that the string matched the regular expression.
*
* For example, for `regexp.exec(str) == null`, the polarity is `false`, and for
* `regexp.exec(str) != null` the polarity is `true`.
*/
boolean getPolarity() { result = range.getPolarity() }
}
/**
* Companion module to the `RegExpTest` class.
*/
module RegExpTest {
/**
* A data flow node whose boolean value indicates whether a regexp matches a given string.
*
* This class can be extended to contribute new kinds of `RegExpTest` nodes.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the data flow node corresponding to the regular expression object used in the test.
*/
abstract DataFlow::Node getRegExpOperand(boolean coerced);
/**
* Gets the data flow node corresponding to the string being tested against the regular expression.
*/
abstract DataFlow::Node getStringOperand();
/**
* Gets the return value indicating that the string matched the regular expression.
*/
boolean getPolarity() { result = true }
}
private class TestCall extends Range, DataFlow::MethodCallNode {
TestCall() { getMethodName() = "test" }
override DataFlow::Node getRegExpOperand(boolean coerced) {
result = getReceiver() and coerced = false
}
override DataFlow::Node getStringOperand() { result = getArgument(0) }
}
private class MatchesCall extends Range, DataFlow::MethodCallNode {
MatchesCall() { getMethodName() = "matches" }
override DataFlow::Node getRegExpOperand(boolean coerced) {
result = getArgument(0) and coerced = true
}
override DataFlow::Node getStringOperand() { result = getReceiver() }
}
private class ExecCall extends DataFlow::MethodCallNode {
ExecCall() { getMethodName() = "exec" }
}
private predicate isCoercedToBoolean(Expr e) {
e = any(ConditionGuardNode guard).getTest()
or
e = any(LogNotExpr n).getOperand()
}
/**
* Holds if `e` evaluating to `polarity` implies that `operand` is not null.
*/
private predicate impliesNotNull(Expr e, Expr operand, boolean polarity) {
exists(EqualityTest test, Expr other |
e = test and
polarity = test.getPolarity().booleanNot() and
test.hasOperands(other, operand) and
SyntacticConstants::isNullOrUndefined(other) and
not (
// 'exec() === undefined' doesn't work
other instanceof SyntacticConstants::UndefinedConstant and
test.isStrict()
)
)
or
isCoercedToBoolean(e) and
operand = e and
polarity = true
}
private class ExecTest extends Range, DataFlow::ValueNode {
ExecCall exec;
boolean polarity;
ExecTest() {
exists(Expr use | exec.flowsToExpr(use) | impliesNotNull(astNode, use, polarity))
}
override DataFlow::Node getRegExpOperand(boolean coerced) {
result = exec.getReceiver() and coerced = false
}
override DataFlow::Node getStringOperand() { result = exec.getArgument(0) }
override boolean getPolarity() { result = polarity }
}
}
}

View File

@@ -78,18 +78,16 @@ module Shared {
* A sanitizer guard that checks for the existence of HTML chars in a string.
* E.g. `/["'&<>]/.exec(str)`.
*/
class ContainsHTMLGuard extends SanitizerGuard, DataFlow::MethodCallNode {
DataFlow::RegExpCreationNode regExp;
class ContainsHTMLGuard extends SanitizerGuard, StringOps::RegExpTest {
ContainsHTMLGuard() {
this.getMethodName() = ["test", "exec"] and
this.getReceiver().getALocalSource() = regExp and
regExp.getRoot() instanceof RegExpCharacterClass and
forall(string s | s = ["\"", "&", "<", ">"] | regExp.getRoot().getAMatchedString() = s)
exists(RegExpCharacterClass regExp |
regExp = getRegExp() and
forall(string s | s = ["\"", "&", "<", ">"] | regExp.getAMatchedString() = s)
)
}
override predicate sanitizes(boolean outcome, Expr e) {
outcome = false and e = this.getArgument(0).asExpr()
outcome = getPolarity().booleanNot() and e = this.getStringOperand().asExpr()
}
}