Ruby: copy JS version of IncompleteUrlSubstringSanitization.ql

This commit is contained in:
Arthur Baars
2022-03-03 15:40:40 +01:00
parent eeb9a1d270
commit c9fa1fb5bb
10 changed files with 1292 additions and 0 deletions

View File

@@ -0,0 +1,204 @@
/**
* Contains classes for recognizing array and string inclusion tests.
*/
private import javascript
/**
* An expression that checks if an element is contained in an array
* or is a substring of another string.
*
* Examples:
* ```
* A.includes(B)
* A.indexOf(B) !== -1
* A.indexOf(B) >= 0
* ~A.indexOf(B)
* ```
*/
class InclusionTest extends DataFlow::Node instanceof InclusionTest::Range {
/** Gets the `A` in `A.includes(B)`. */
DataFlow::Node getContainerNode() { result = super.getContainerNode() }
/** Gets the `B` in `A.includes(B)`. */
DataFlow::Node getContainedNode() { result = super.getContainedNode() }
/**
* Gets the polarity of the check.
*
* If the polarity is `false` the check returns `true` if the container does not contain
* the given element.
*/
boolean getPolarity() { result = super.getPolarity() }
}
module InclusionTest {
/**
* A expression that is equivalent to `A.includes(B)` or `!A.includes(B)`.
*
* Note that this also includes calls to the array method named `includes`.
*/
abstract class Range extends DataFlow::Node {
/** Gets the `A` in `A.includes(B)`. */
abstract DataFlow::Node getContainerNode();
/** Gets the `B` in `A.includes(B)`. */
abstract DataFlow::Node getContainedNode();
/**
* Gets the polarity of the check.
*
* If the polarity is `false` the check returns `true` if the container does not contain
* the given element.
*/
boolean getPolarity() { result = true }
}
/**
* A call to a utility function (`callee`) that performs an InclusionTest (`inner`).
*/
private class IndirectInclusionTest extends Range, DataFlow::CallNode {
InclusionTest inner;
Function callee;
IndirectInclusionTest() {
inner.getEnclosingExpr() = unique(Expr ret | ret = callee.getAReturnedExpr()) and
callee = unique(Function f | f = this.getACallee()) and
not this.isImprecise() and
inner.getContainedNode().getALocalSource() = DataFlow::parameterNode(callee.getAParameter()) and
inner.getContainerNode().getALocalSource() = DataFlow::parameterNode(callee.getAParameter())
}
override DataFlow::Node getContainerNode() {
exists(int arg |
inner.getContainerNode().getALocalSource() =
DataFlow::parameterNode(callee.getParameter(arg)) and
result = this.getArgument(arg)
)
}
override DataFlow::Node getContainedNode() {
exists(int arg |
inner.getContainedNode().getALocalSource() =
DataFlow::parameterNode(callee.getParameter(arg)) and
result = this.getArgument(arg)
)
}
override boolean getPolarity() { result = inner.getPolarity() }
}
/**
* A call to a method named `includes`, assumed to refer to `String.prototype.includes`
* or `Array.prototype.includes`.
*/
private class Includes_Native extends Range, DataFlow::MethodCallNode {
Includes_Native() {
this.getMethodName() = "includes" and
this.getNumArgument() = 1
}
override DataFlow::Node getContainerNode() { result = this.getReceiver() }
override DataFlow::Node getContainedNode() { result = this.getArgument(0) }
}
/**
* A call to `_.includes` or similar, assumed to operate on strings.
*/
private class Includes_Library extends Range, DataFlow::CallNode {
Includes_Library() {
exists(string name |
this = LodashUnderscore::member(name).getACall() and
(name = "includes" or name = "include" or name = "contains")
or
this = Closure::moduleImport("goog.string." + name).getACall() and
(name = "contains" or name = "caseInsensitiveContains")
)
}
override DataFlow::Node getContainerNode() { result = this.getArgument(0) }
override DataFlow::Node getContainedNode() { result = this.getArgument(1) }
}
/**
* A check of form `A.indexOf(B) !== -1` or similar.
*/
private class Includes_IndexOfEquals extends Range, DataFlow::ValueNode {
MethodCallExpr indexOf;
override EqualityTest astNode;
Includes_IndexOfEquals() {
exists(Expr index | astNode.hasOperands(indexOf, index) |
// one operand is of the form `whitelist.indexOf(x)`
indexOf.getMethodName() = "indexOf" and
// and the other one is -1
index.getIntValue() = -1
)
}
override DataFlow::Node getContainerNode() { result = indexOf.getReceiver().flow() }
override DataFlow::Node getContainedNode() { result = indexOf.getArgument(0).flow() }
override boolean getPolarity() { result = astNode.getPolarity().booleanNot() }
}
/**
* A check of form `A.indexOf(B) >= 0` or similar.
*/
private class Includes_IndexOfRelational extends Range, DataFlow::ValueNode {
MethodCallExpr indexOf;
override RelationalComparison astNode;
boolean polarity;
Includes_IndexOfRelational() {
exists(Expr lesser, Expr greater |
astNode.getLesserOperand() = lesser and
astNode.getGreaterOperand() = greater and
indexOf.getMethodName() = "indexOf" and
indexOf.getNumArgument() = 1
|
polarity = true and
greater = indexOf and
(
lesser.getIntValue() = 0 and astNode.isInclusive()
or
lesser.getIntValue() = -1 and not astNode.isInclusive()
)
or
polarity = false and
lesser = indexOf and
(
greater.getIntValue() = -1 and astNode.isInclusive()
or
greater.getIntValue() = 0 and not astNode.isInclusive()
)
)
}
override DataFlow::Node getContainerNode() { result = indexOf.getReceiver().flow() }
override DataFlow::Node getContainedNode() { result = indexOf.getArgument(0).flow() }
override boolean getPolarity() { result = polarity }
}
/**
* An expression of form `~A.indexOf(B)` which, when coerced to a boolean, is equivalent to `A.includes(B)`.
*/
private class Includes_IndexOfBitwise extends Range, DataFlow::ValueNode {
MethodCallExpr indexOf;
override BitNotExpr astNode;
Includes_IndexOfBitwise() {
astNode.getOperand() = indexOf and
indexOf.getMethodName() = "indexOf"
}
override DataFlow::Node getContainerNode() { result = indexOf.getReceiver().flow() }
override DataFlow::Node getContainedNode() { result = indexOf.getArgument(0).flow() }
}
}

View File

@@ -0,0 +1,800 @@
/**
* Provides classes and predicates for reasoning about string-manipulating expressions.
*/
import javascript
module StringOps {
/**
* A expression that is equivalent to `A.startsWith(B)` or `!A.startsWith(B)`.
*/
class StartsWith extends DataFlow::Node instanceof StartsWith::Range {
/**
* Gets the `A` in `A.startsWith(B)`.
*/
DataFlow::Node getBaseString() { result = super.getBaseString() }
/**
* Gets the `B` in `A.startsWith(B)`.
*/
DataFlow::Node getSubstring() { result = super.getSubstring() }
/**
* Gets the polarity of the check.
*
* If the polarity is `false` the check returns `true` if the string does not start
* with the given substring.
*/
boolean getPolarity() { result = super.getPolarity() }
}
module StartsWith {
/**
* A expression that is equivalent to `A.startsWith(B)` or `!A.startsWith(B)`.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the `A` in `A.startsWith(B)`.
*/
abstract DataFlow::Node getBaseString();
/**
* Gets the `B` in `A.startsWith(B)`.
*/
abstract DataFlow::Node getSubstring();
/**
* Gets the polarity of the check.
*
* If the polarity is `false` the check returns `true` if the string does not start
* with the given substring.
*/
boolean getPolarity() { result = true }
}
/**
* A call to a utility function (`callee`) that performs a StartsWith check (`inner`).
*/
private class IndirectStartsWith extends Range, DataFlow::CallNode {
StartsWith inner;
Function callee;
IndirectStartsWith() {
inner.getEnclosingExpr() = unique(Expr ret | ret = callee.getAReturnedExpr()) and
callee = unique(Function f | f = this.getACallee()) and
not this.isImprecise() and
inner.getBaseString().getALocalSource().getEnclosingExpr() = callee.getAParameter() and
inner.getSubstring().getALocalSource().getEnclosingExpr() = callee.getAParameter()
}
override DataFlow::Node getBaseString() {
exists(int arg |
inner.getBaseString().getALocalSource().getEnclosingExpr() = callee.getParameter(arg) and
result = this.getArgument(arg)
)
}
override DataFlow::Node getSubstring() {
exists(int arg |
inner.getSubstring().getALocalSource().getEnclosingExpr() = callee.getParameter(arg) and
result = this.getArgument(arg)
)
}
override boolean getPolarity() { result = inner.getPolarity() }
}
/**
* An expression of form `A.startsWith(B)`.
*/
private class StartsWith_Native extends Range, DataFlow::MethodCallNode {
StartsWith_Native() {
this.getMethodName() = "startsWith" and
this.getNumArgument() = 1
}
override DataFlow::Node getBaseString() { result = this.getReceiver() }
override DataFlow::Node getSubstring() { result = this.getArgument(0) }
}
/**
* An expression of form `A.indexOf(B) === 0`.
*/
private class StartsWith_IndexOfEquals extends Range, DataFlow::ValueNode {
override EqualityTest astNode;
DataFlow::MethodCallNode indexOf;
StartsWith_IndexOfEquals() {
indexOf.getMethodName() = "indexOf" and
indexOf.getNumArgument() = 1 and
indexOf.flowsToExpr(astNode.getAnOperand()) and
astNode.getAnOperand().getIntValue() = 0
}
override DataFlow::Node getBaseString() { result = indexOf.getReceiver() }
override DataFlow::Node getSubstring() { result = indexOf.getArgument(0) }
override boolean getPolarity() { result = astNode.getPolarity() }
}
/**
* An expression of form `A.indexOf(B)` coerced to a boolean.
*
* This is equivalent to `!A.startsWith(B)` since all return values other than zero map to `true`.
*/
private class StartsWith_IndexOfCoercion extends Range, DataFlow::MethodCallNode {
StartsWith_IndexOfCoercion() {
this.getMethodName() = "indexOf" and
this.getNumArgument() = 1 and
this.flowsToExpr(any(ConditionGuardNode guard).getTest()) // check for boolean coercion
}
override DataFlow::Node getBaseString() { result = this.getReceiver() }
override DataFlow::Node getSubstring() { result = this.getArgument(0) }
override boolean getPolarity() { result = false }
}
/**
* A call of form `_.startsWith(A, B)` or `ramda.startsWith(A, B)` or `goog.string.startsWith(A, B)`.
*/
private class StartsWith_Library extends Range, DataFlow::CallNode {
StartsWith_Library() {
this.getNumArgument() = 2 and
exists(DataFlow::SourceNode callee | this = callee.getACall() |
callee = LodashUnderscore::member("startsWith")
or
callee = DataFlow::moduleMember("ramda", "startsWith")
or
exists(string name |
callee = Closure::moduleImport("goog.string." + name) and
(name = "startsWith" or name = "caseInsensitiveStartsWith")
)
)
}
override DataFlow::Node getBaseString() { result = this.getArgument(0) }
override DataFlow::Node getSubstring() { result = this.getArgument(1) }
}
/**
* A comparison of form `x[0] === "k"` for some single-character constant `k`.
*/
private class StartsWith_FirstCharacter extends Range, DataFlow::ValueNode {
override EqualityTest astNode;
DataFlow::PropRead read;
Expr constant;
StartsWith_FirstCharacter() {
read.flowsTo(astNode.getAnOperand().flow()) and
read.getPropertyNameExpr().getIntValue() = 0 and
constant.getStringValue().length() = 1 and
astNode.getAnOperand() = constant
}
override DataFlow::Node getBaseString() { result = read.getBase() }
override DataFlow::Node getSubstring() { result = constant.flow() }
override boolean getPolarity() { result = astNode.getPolarity() }
}
/**
* A comparison of form `x.substring(0, y.length) === y`.
*/
private class StartsWith_Substring extends Range, DataFlow::ValueNode {
override EqualityTest astNode;
DataFlow::MethodCallNode call;
DataFlow::Node substring;
StartsWith_Substring() {
astNode.hasOperands(call.asExpr(), substring.asExpr()) and
(
call.getMethodName() = "substring" or
call.getMethodName() = "substr" or
call.getMethodName() = "slice"
) and
call.getNumArgument() = 2 and
(
AccessPath::getAnAliasedSourceNode(substring)
.getAPropertyRead("length")
.flowsTo(call.getArgument(1))
or
substring.getStringValue().length() = call.getArgument(1).asExpr().getIntValue()
)
}
override DataFlow::Node getBaseString() { result = call.getReceiver() }
override DataFlow::Node getSubstring() { result = substring }
override boolean getPolarity() { result = astNode.getPolarity() }
}
}
/**
* A expression that is equivalent to `A.includes(B)` or `!A.includes(B)`.
*
* Note that this class is equivalent to `InclusionTest`, which also matches
* inclusion tests on array objects.
*/
class Includes extends InclusionTest {
/** Gets the `A` in `A.includes(B)`. */
DataFlow::Node getBaseString() { result = this.getContainerNode() }
/** Gets the `B` in `A.includes(B)`. */
DataFlow::Node getSubstring() { result = this.getContainedNode() }
}
/**
* An expression that is equivalent to `A.endsWith(B)` or `!A.endsWith(B)`.
*/
class EndsWith extends DataFlow::Node instanceof EndsWith::Range {
/**
* Gets the `A` in `A.endsWith(B)`.
*/
DataFlow::Node getBaseString() { result = super.getBaseString() }
/**
* Gets the `B` in `A.endsWith(B)`.
*/
DataFlow::Node getSubstring() { result = super.getSubstring() }
/**
* Gets the polarity if the check.
*
* If the polarity is `false` the check returns `true` if the string does not end
* with the given substring.
*/
boolean getPolarity() { result = super.getPolarity() }
}
module EndsWith {
/**
* An expression that is equivalent to `A.endsWith(B)` or `!A.endsWith(B)`.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the `A` in `A.startsWith(B)`.
*/
abstract DataFlow::Node getBaseString();
/**
* Gets the `B` in `A.startsWith(B)`.
*/
abstract DataFlow::Node getSubstring();
/**
* Gets the polarity if the check.
*
* If the polarity is `false` the check returns `true` if the string does not end
* with the given substring.
*/
boolean getPolarity() { result = true }
}
/**
* A call to a utility function (`callee`) that performs an EndsWith check (`inner`).
*/
private class IndirectEndsWith extends Range, DataFlow::CallNode {
EndsWith inner;
Function callee;
IndirectEndsWith() {
inner.getEnclosingExpr() = unique(Expr ret | ret = callee.getAReturnedExpr()) and
callee = unique(Function f | f = this.getACallee()) and
not this.isImprecise() and
inner.getBaseString().getALocalSource().getEnclosingExpr() = callee.getAParameter() and
inner.getSubstring().getALocalSource().getEnclosingExpr() = callee.getAParameter()
}
override DataFlow::Node getBaseString() {
exists(int arg |
inner.getBaseString().getALocalSource().getEnclosingExpr() = callee.getParameter(arg) and
result = this.getArgument(arg)
)
}
override DataFlow::Node getSubstring() {
exists(int arg |
inner.getSubstring().getALocalSource().getEnclosingExpr() = callee.getParameter(arg) and
result = this.getArgument(arg)
)
}
override boolean getPolarity() { result = inner.getPolarity() }
}
/**
* A call of form `A.endsWith(B)`.
*/
private class EndsWith_Native extends Range, DataFlow::MethodCallNode {
EndsWith_Native() {
this.getMethodName() = "endsWith" and
this.getNumArgument() = 1
}
override DataFlow::Node getBaseString() { result = this.getReceiver() }
override DataFlow::Node getSubstring() { result = this.getArgument(0) }
}
/**
* A call of form `_.endsWith(A, B)` or `ramda.endsWith(A, B)`.
*/
private class EndsWith_Library extends Range, DataFlow::CallNode {
EndsWith_Library() {
this.getNumArgument() = 2 and
exists(DataFlow::SourceNode callee | this = callee.getACall() |
callee = LodashUnderscore::member("endsWith")
or
callee = DataFlow::moduleMember("ramda", "endsWith")
or
exists(string name |
callee = Closure::moduleImport("goog.string." + name) and
(name = "endsWith" or name = "caseInsensitiveEndsWith")
)
)
}
override DataFlow::Node getBaseString() { result = this.getArgument(0) }
override DataFlow::Node getSubstring() { result = this.getArgument(1) }
}
}
/**
* Holds if `first` and `second` are adjacent leaves in a concatenation tree.
*/
pragma[nomagic]
private predicate adjacentLeaves(ConcatenationLeaf first, ConcatenationLeaf second) {
exists(Concatenation parent, int i |
first = parent.getOperand(i).getLastLeaf() and
second = parent.getOperand(i + 1).getFirstLeaf()
)
}
/**
* A data flow node that performs a string concatenation or occurs as an operand
* in a string concatenation.
*
* For example, the expression `x + y + z` contains the following concatenation
* nodes:
* - The leaf nodes `x`, `y`, and `z`
* - The intermediate node `x + y`, which is both a concatenation and an operand
* - The root node `x + y + z`
*
*
* Note that the following are not recognized a string concatenations:
* - Array `join()` calls with a non-empty separator
* - Tagged template literals
*
*
* Also note that all `+` operators are seen as string concatenations,
* even in cases where it is used for arithmetic.
*
* Examples of string concatenations:
* ```
* x + y
* x += y
* [x, y].join('')
* Array(x, y).join('')
* `Hello, ${message}`
* ```
*/
class ConcatenationNode extends DataFlow::Node {
pragma[inline]
ConcatenationNode() {
exists(StringConcatenation::getAnOperand(this))
or
this = StringConcatenation::getAnOperand(_)
}
/**
* Gets the `n`th operand of this string concatenation.
*/
pragma[inline]
ConcatenationOperand getOperand(int n) { result = StringConcatenation::getOperand(this, n) }
/**
* Gets an operand of this string concatenation.
*/
pragma[inline]
ConcatenationOperand getAnOperand() { result = StringConcatenation::getAnOperand(this) }
/**
* Gets the number of operands of this string concatenation.
*/
pragma[inline]
int getNumOperand() { result = StringConcatenation::getNumOperand(this) }
/**
* Gets the first operand of this string concatenation.
*/
pragma[inline]
ConcatenationOperand getFirstOperand() { result = StringConcatenation::getFirstOperand(this) }
/**
* Gets the last operand of this string concatenation
*/
pragma[inline]
ConcatenationOperand getLastOperand() { result = StringConcatenation::getLastOperand(this) }
/**
* Holds if this only acts as a string coercion, such as `"" + x`.
*/
pragma[inline]
predicate isCoercion() { StringConcatenation::isCoercion(this) }
/**
* Holds if this is the root of a concatenation tree, that is,
* it is a concatenation operator that is not itself the immediate operand to
* another concatenation operator.
*/
pragma[inline]
predicate isRoot() { StringConcatenation::isRoot(this) }
/**
* Holds if this is a leaf in the concatenation tree, that is, it is not
* itself a concatenation.
*/
pragma[inline]
predicate isLeaf() { not exists(StringConcatenation::getAnOperand(this)) }
/**
* Gets the root of the concatenation tree in which this is an operator.
*/
pragma[inline]
ConcatenationRoot getRoot() { result = StringConcatenation::getRoot(this) }
/**
* Gets the enclosing concatenation in which this is an operand, if any.
*/
pragma[inline]
Concatenation getParentConcatenation() { this = StringConcatenation::getAnOperand(result) }
/**
* Gets the last leaf in this concatenation tree.
*
* For example, `z` is the last leaf in `x + y + z`.
*/
pragma[inline]
ConcatenationLeaf getLastLeaf() { result = StringConcatenation::getLastOperand*(this) }
/**
* Gets the first leaf in this concatenation tree.
*
* For example, `x` is the first leaf in `x + y + z`.
*/
pragma[inline]
ConcatenationLeaf getFirstLeaf() { result = StringConcatenation::getFirstOperand*(this) }
/**
* Gets the leaf that is occurs immediately before this leaf in the
* concatenation tree, if any.
*
* For example, `y` is the previous leaf from `z` in `x + y + z`.
*/
pragma[inline]
ConcatenationLeaf getPreviousLeaf() { adjacentLeaves(result, this) }
/**
* Gets the leaf that is occurs immediately after this leaf in the
* concatenation tree, if any.
*
* For example, `y` is the next leaf from `x` in `x + y + z`.
*/
pragma[inline]
ConcatenationLeaf getNextLeaf() { adjacentLeaves(this, result) }
}
/**
* A data flow node that performs a string concatenation and returns the result.
*
* Examples:
* ```
* x + y
* x += y
* [x, y].join('')
* Array(x, y).join('')
* `Hello ${message}`
* ```
*
* See `ConcatenationNode` for more information.
*/
class Concatenation extends ConcatenationNode {
pragma[inline]
Concatenation() { exists(StringConcatenation::getAnOperand(this)) }
}
/**
* An operand in a string concatenation.
*
* Examples:
* ```
* x + y // x and y are operands
* [x, y].join('') // x and y are operands
* `Hello ${message}` // `Hello ` and message are operands
* ```
*
* See `ConcatenationNode` for more information.
*/
class ConcatenationOperand extends ConcatenationNode {
pragma[inline]
ConcatenationOperand() { this = StringConcatenation::getAnOperand(_) }
}
/**
* A data flow node that performs a string concatenation, and is not an
* immediate operand in a larger string concatenation.
*
* Examples:
* ```
* // x + y + z is a root, but the inner x + y is not
* return x + y + z;
* ```
*
* See `ConcatenationNode` for more information.
*/
class ConcatenationRoot extends Concatenation {
pragma[inline]
ConcatenationRoot() { this.isRoot() }
/**
* Gets a leaf in this concatenation tree that this node is the root of.
*/
pragma[inline]
ConcatenationLeaf getALeaf() { this = StringConcatenation::getRoot(result) }
/**
* Returns the concatenation of all constant operands in this concatenation,
* ignoring the non-constant parts entirely.
*
* For example, for the following concatenation
* ```
* `Hello ${person}, how are you?`
* ```
* the result is `"Hello , how are you?"`
*/
string getConstantStringParts() {
result = this.getStringValue()
or
not exists(this.getStringValue()) and
result =
strictconcat(StringLiteralLike leaf |
leaf = this.(SmallConcatenationRoot).getALeaf().asExpr()
|
leaf.getStringValue()
order by
leaf.getLocation().getStartLine(), leaf.getLocation().getStartColumn()
)
}
}
/**
* A concatenation root where the combined length of the constant parts
* is less than 1 million chars.
*/
private class SmallConcatenationRoot extends ConcatenationRoot {
SmallConcatenationRoot() {
sum(StringLiteralLike leaf | leaf = this.getALeaf().asExpr() | leaf.getStringValue().length()) <
1000 * 1000
}
}
/** A string literal or template literal without any substitutions. */
private class StringLiteralLike extends Expr {
StringLiteralLike() {
this instanceof StringLiteral or
this instanceof TemplateElement
}
}
/**
* An operand to a concatenation that is not itself a concatenation.
*
* Example:
* ```
* x + y + z // x, y, and z are leaves
* [x, y + z].join('') // x, y, and z are leaves
* ```
*
* See `ConcatenationNode` for more information.
*/
class ConcatenationLeaf extends ConcatenationOperand {
pragma[inline]
ConcatenationLeaf() { this.isLeaf() }
}
/**
* The root node in a concatenation of one or more strings containing HTML fragments.
*/
class HtmlConcatenationRoot extends ConcatenationRoot {
pragma[noinline]
HtmlConcatenationRoot() {
this.getConstantStringParts().regexpMatch("(?s).*</?[a-zA-Z][^\\r\\n<>/]*/?>.*")
}
}
/**
* A data flow node that is part of an HTML string concatenation.
*/
class HtmlConcatenationNode extends ConcatenationNode {
HtmlConcatenationNode() { this.getRoot() instanceof HtmlConcatenationRoot }
}
/**
* A data flow node that is part of an HTML string concatenation,
* and is not itself a concatenation operator.
*/
class HtmlConcatenationLeaf extends ConcatenationLeaf {
HtmlConcatenationLeaf() { this.getRoot() instanceof HtmlConcatenationRoot }
}
/**
* A data flow node whose boolean value indicates whether a regexp matches a given string.
*
* For example, the condition of each of the following `if`-statements are `RegExpTest` nodes:
* ```js
* if (regexp.test(str)) { ... }
* if (regexp.exec(str) != null) { ... }
* if (str.matches(regexp)) { ... }
* ```
*
* Note that `RegExpTest` represents a boolean-valued expression or one
* that is coerced to a boolean, which is not always the same as the call that performs the
* regexp-matching. For example, the `exec` call below is not itself a `RegExpTest`,
* but the `match` variable in the condition is:
* ```js
* let match = regexp.exec(str);
* if (!match) { ... } // <--- 'match' is the RegExpTest
* ```
*/
class RegExpTest extends DataFlow::Node instanceof RegExpTest::Range {
/**
* Gets the AST of the regular expression used in the test, if it can be seen locally.
*/
RegExpTerm getRegExp() {
result = this.getRegExpOperand().getALocalSource().(DataFlow::RegExpCreationNode).getRoot()
or
result = super.getRegExpOperand(true).asExpr().(StringLiteral).asRegExp()
}
/**
* Gets the data flow node corresponding to the regular expression object used in the test.
*
* In some cases this represents a string value being coerced to a RegExp object.
*/
DataFlow::Node getRegExpOperand() { result = super.getRegExpOperand(_) }
/**
* Gets the data flow node corresponding to the string being tested against the regular expression.
*/
DataFlow::Node getStringOperand() { result = super.getStringOperand() }
/**
* Gets the return value indicating that the string matched the regular expression.
*
* For example, for `regexp.exec(str) == null`, the polarity is `false`, and for
* `regexp.exec(str) != null` the polarity is `true`.
*/
boolean getPolarity() { result = super.getPolarity() }
}
/**
* Companion module to the `RegExpTest` class.
*/
module RegExpTest {
/**
* A data flow node whose boolean value indicates whether a regexp matches a given string.
*
* This class can be extended to contribute new kinds of `RegExpTest` nodes.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the data flow node corresponding to the regular expression object used in the test.
*/
abstract DataFlow::Node getRegExpOperand(boolean coerced);
/**
* Gets the data flow node corresponding to the string being tested against the regular expression.
*/
abstract DataFlow::Node getStringOperand();
/**
* Gets the return value indicating that the string matched the regular expression.
*/
boolean getPolarity() { result = true }
}
private class TestCall extends Range, DataFlow::MethodCallNode {
TestCall() { this.getMethodName() = "test" }
override DataFlow::Node getRegExpOperand(boolean coerced) {
result = this.getReceiver() and coerced = false
}
override DataFlow::Node getStringOperand() { result = this.getArgument(0) }
}
private class MatchCall extends DataFlow::MethodCallNode {
MatchCall() { this.getMethodName() = "match" }
}
private class ExecCall extends DataFlow::MethodCallNode {
ExecCall() { this.getMethodName() = "exec" }
}
private predicate isCoercedToBoolean(Expr e) {
e = any(ConditionGuardNode guard).getTest()
or
e = any(LogNotExpr n).getOperand()
}
/**
* Holds if `e` evaluating to `polarity` implies that `operand` is not null.
*/
private predicate impliesNotNull(Expr e, Expr operand, boolean polarity) {
exists(EqualityTest test, Expr other |
e = test and
polarity = test.getPolarity().booleanNot() and
test.hasOperands(other, operand) and
SyntacticConstants::isNullOrUndefined(other) and
not (
// 'exec() === undefined' doesn't work
other instanceof SyntacticConstants::UndefinedConstant and
test.isStrict()
)
)
or
isCoercedToBoolean(e) and
operand = e and
polarity = true
}
private class ExecTest extends Range, DataFlow::ValueNode {
ExecCall exec;
boolean polarity;
ExecTest() {
exists(Expr use | exec.flowsToExpr(use) | impliesNotNull(astNode, use, polarity))
}
override DataFlow::Node getRegExpOperand(boolean coerced) {
result = exec.getReceiver() and coerced = false
}
override DataFlow::Node getStringOperand() { result = exec.getArgument(0) }
override boolean getPolarity() { result = polarity }
}
private class MatchTest extends Range, DataFlow::ValueNode {
MatchCall match;
boolean polarity;
MatchTest() {
exists(Expr use | match.flowsToExpr(use) | impliesNotNull(astNode, use, polarity))
}
override DataFlow::Node getRegExpOperand(boolean coerced) {
result = match.getArgument(0) and coerced = true
}
override DataFlow::Node getStringOperand() { result = match.getReceiver() }
override boolean getPolarity() { result = polarity }
}
}
/**
* Gets the name of a string method which performs substring extraction.
*
* These are `substring`, `substr`, and `slice`.
*/
string substringMethodName() { result = ["substring", "substr", "slice"] }
}

View File

@@ -0,0 +1,88 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Sanitizing untrusted URLs is an important technique for
preventing attacks such as request forgeries and malicious
redirections. Usually, this is done by checking that the host of a URL
is in a set of allowed hosts.
</p>
<p>
However, treating the URL as a string and checking if one of the
allowed hosts is a substring of the URL is very prone to errors.
Malicious URLs can bypass such security checks by embedding one
of the allowed hosts in an unexpected location.
</p>
<p>
Even if the substring check is not used in a
security-critical context, the incomplete check may still cause
undesirable behaviors when the check succeeds accidentally.
</p>
</overview>
<recommendation>
<p>
Parse a URL before performing a check on its host value,
and ensure that the check handles arbitrary subdomain sequences
correctly.
</p>
</recommendation>
<example>
<p>
The following example code checks that a URL redirection
will reach the <code>example.com</code> domain, or one of its
subdomains, and not some malicious site.
</p>
<sample src="examples/IncompleteUrlSubstringSanitization_BAD1.js"/>
<p>
The substring check is, however, easy to bypass. For example
by embedding <code>example.com</code> in the path component:
<code>http://evil-example.net/example.com</code>, or in the query
string component: <code>http://evil-example.net/?x=example.com</code>.
Address these shortcomings by checking the host of the parsed URL instead:
</p>
<sample src="examples/IncompleteUrlSubstringSanitization_BAD2.js"/>
<p>
This is still not a sufficient check as the
following URLs bypass it: <code>http://evil-example.com</code>
<code>http://example.com.evil-example.net</code>.
Instead, use an explicit whitelist of allowed hosts to
make the redirect secure:
</p>
<sample src="examples/IncompleteUrlSubstringSanitization_GOOD.js"/>
</example>
<references>
<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Unvalidated_Redirects_and_Forwards_Cheat_Sheet.html">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,70 @@
/**
* @name Incomplete URL substring sanitization
* @description Security checks on the substrings of an unparsed URL are often vulnerable to bypassing.
* @kind problem
* @problem.severity warning
* @security-severity 7.8
* @precision high
* @id js/incomplete-url-substring-sanitization
* @tags correctness
* security
* external/cwe/cwe-020
*/
import javascript
private import semmle.javascript.dataflow.InferredTypes
/**
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
*/
class SomeSubstringCheck extends DataFlow::Node {
DataFlow::Node substring;
SomeSubstringCheck() {
this.(StringOps::StartsWith).getSubstring() = substring or
this.(StringOps::Includes).getSubstring() = substring or
this.(StringOps::EndsWith).getSubstring() = substring
}
/**
* Gets the substring.
*/
DataFlow::Node getSubstring() { result = substring }
}
from SomeSubstringCheck check, DataFlow::Node substring, string target, string msg
where
substring = check.getSubstring() and
substring.mayHaveStringValue(target) and
(
// target contains a domain on a common TLD, and perhaps some other URL components
target
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::commonTLD() +
"(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
or
// target is a HTTP URL to a domain on any TLD with path elements, and the check is an includes check
check instanceof StringOps::Includes and
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/[a-z0-9/_-]+")
) and
(
if check instanceof StringOps::StartsWith
then msg = "may be followed by an arbitrary host name"
else
if check instanceof StringOps::EndsWith
then msg = "may be preceded by an arbitrary host name"
else msg = "can be anywhere in the URL, and arbitrary hosts may come before or after it"
) and
// whitelist
not (
// the leading dot in a subdomain sequence makes the suffix-check safe (if it is performed on the host of the url)
check instanceof StringOps::EndsWith and
target.regexpMatch("(?i)\\.([a-z0-9-]+)(\\.[a-z0-9-]+)+")
or
// the trailing port or slash makes the prefix-check safe
check instanceof StringOps::StartsWith and
target.regexpMatch(".*(:[0-9]+|/)")
)
select check, "'$@' " + msg + ".", substring, target

View File

@@ -0,0 +1,7 @@
app.get('/some/path', function(req, res) {
let url = req.param("url");
// BAD: the host of `url` may be controlled by an attacker
if (url.includes("example.com")) {
res.redirect(url);
}
});

View File

@@ -0,0 +1,8 @@
app.get('/some/path', function(req, res) {
let url = req.param("url"),
host = urlLib.parse(url).host;
// BAD: the host of `url` may be controlled by an attacker
if (host.includes("example.com")) {
res.redirect(url);
}
});

View File

@@ -0,0 +1,13 @@
app.get('/some/path', function(req, res) {
let url = req.param('url'),
host = urlLib.parse(url).host;
// GOOD: the host of `url` can not be controlled by an attacker
let allowedHosts = [
'example.com',
'beta.example.com',
'www.example.com'
];
if (allowedHosts.includes(host)) {
res.redirect(url);
}
});

View File

@@ -0,0 +1,25 @@
| tst-IncompleteUrlSubstringSanitization.js:4:5:4:34 | x.index ... !== -1 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:4:15:4:26 | "secure.com" | secure.com |
| tst-IncompleteUrlSubstringSanitization.js:5:5:5:34 | x.index ... !== -1 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:5:15:5:26 | "secure.net" | secure.net |
| tst-IncompleteUrlSubstringSanitization.js:6:5:6:35 | x.index ... !== -1 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:6:15:6:27 | ".secure.com" | .secure.com |
| tst-IncompleteUrlSubstringSanitization.js:10:5:10:34 | x.index ... === -1 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:10:15:10:26 | "secure.com" | secure.com |
| tst-IncompleteUrlSubstringSanitization.js:11:5:11:33 | x.index ... ) === 0 | '$@' may be followed by an arbitrary host name. | tst-IncompleteUrlSubstringSanitization.js:11:15:11:26 | "secure.com" | secure.com |
| tst-IncompleteUrlSubstringSanitization.js:12:5:12:32 | x.index ... ") >= 0 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:12:15:12:26 | "secure.com" | secure.com |
| tst-IncompleteUrlSubstringSanitization.js:14:5:14:38 | x.start ... e.com") | '$@' may be followed by an arbitrary host name. | tst-IncompleteUrlSubstringSanitization.js:14:18:14:37 | "https://secure.com" | https://secure.com |
| tst-IncompleteUrlSubstringSanitization.js:15:5:15:28 | x.endsW ... e.com") | '$@' may be preceded by an arbitrary host name. | tst-IncompleteUrlSubstringSanitization.js:15:16:15:27 | "secure.com" | secure.com |
| tst-IncompleteUrlSubstringSanitization.js:20:5:20:28 | x.inclu ... e.com") | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:20:16:20:27 | "secure.com" | secure.com |
| tst-IncompleteUrlSubstringSanitization.js:32:5:32:42 | x.index ... !== -1 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:32:15:32:34 | "https://secure.com" | https://secure.com |
| tst-IncompleteUrlSubstringSanitization.js:33:5:33:46 | x.index ... !== -1 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:33:15:33:38 | "https: ... om:443" | https://secure.com:443 |
| tst-IncompleteUrlSubstringSanitization.js:34:5:34:43 | x.index ... !== -1 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:34:15:34:35 | "https: ... e.com/" | https://secure.com/ |
| tst-IncompleteUrlSubstringSanitization.js:52:5:52:48 | x.index ... !== -1 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:52:15:52:40 | "https: ... ternal" | https://example.internal |
| tst-IncompleteUrlSubstringSanitization.js:55:5:55:44 | x.start ... ernal") | '$@' may be followed by an arbitrary host name. | tst-IncompleteUrlSubstringSanitization.js:55:18:55:43 | "https: ... ternal" | https://example.internal |
| tst-IncompleteUrlSubstringSanitization.js:56:5:56:51 | x.index ... ) !== 0 | '$@' may be followed by an arbitrary host name. | tst-IncompleteUrlSubstringSanitization.js:56:15:56:44 | 'https: ... al.org' | https://example.internal.org |
| tst-IncompleteUrlSubstringSanitization.js:57:5:57:51 | x.index ... ) === 0 | '$@' may be followed by an arbitrary host name. | tst-IncompleteUrlSubstringSanitization.js:57:15:57:44 | 'https: ... al.org' | https://example.internal.org |
| tst-IncompleteUrlSubstringSanitization.js:58:5:58:30 | x.endsW ... l.com") | '$@' may be preceded by an arbitrary host name. | tst-IncompleteUrlSubstringSanitization.js:58:16:58:29 | "internal.com" | internal.com |
| tst-IncompleteUrlSubstringSanitization.js:61:2:61:31 | x.index ... !== -1 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:61:12:61:23 | "secure.com" | secure.com |
| tst-IncompleteUrlSubstringSanitization.js:62:2:62:31 | x.index ... === -1 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:62:12:62:23 | "secure.com" | secure.com |
| tst-IncompleteUrlSubstringSanitization.js:63:4:63:33 | x.index ... !== -1 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:63:14:63:25 | "secure.com" | secure.com |
| tst-IncompleteUrlSubstringSanitization.js:64:3:64:26 | x.inclu ... e.com") | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:64:14:64:25 | "secure.com" | secure.com |
| tst-IncompleteUrlSubstringSanitization.js:66:6:66:29 | x.inclu ... e.com") | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:66:17:66:28 | "secure.com" | secure.com |
| tst-IncompleteUrlSubstringSanitization.js:73:5:73:48 | x.index ... ") >= 0 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:73:15:73:42 | "https: ... oo/bar" | https://secure.com/foo/bar |
| tst-IncompleteUrlSubstringSanitization.js:74:5:74:40 | x.index ... ") >= 0 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:74:15:74:34 | "https://secure.com" | https://secure.com |
| tst-IncompleteUrlSubstringSanitization.js:75:5:75:52 | x.index ... ") >= 0 | '$@' can be anywhere in the URL, and arbitrary hosts may come before or after it. | tst-IncompleteUrlSubstringSanitization.js:75:15:75:46 | "https: ... ar-baz" | https://secure.com/foo/bar-baz |

View File

@@ -0,0 +1 @@
Security/CWE-020/IncompleteUrlSubstringSanitization.ql

View File

@@ -0,0 +1,76 @@
(function(x){
x.indexOf("internal") !== -1; // NOT OK, but not flagged
x.indexOf("localhost") !== -1; // NOT OK, but not flagged
x.indexOf("secure.com") !== -1; // NOT OK
x.indexOf("secure.net") !== -1; // NOT OK
x.indexOf(".secure.com") !== -1; // NOT OK
x.indexOf("sub.secure.") !== -1; // NOT OK, but not flagged
x.indexOf(".sub.secure.") !== -1; // NOT OK, but not flagged
x.indexOf("secure.com") === -1; // NOT OK
x.indexOf("secure.com") === 0; // NOT OK
x.indexOf("secure.com") >= 0; // NOT OK
x.startsWith("https://secure.com"); // NOT OK
x.endsWith("secure.com"); // NOT OK
x.endsWith(".secure.com"); // OK
x.startsWith("secure.com/"); // OK
x.indexOf("secure.com/") === 0; // OK
x.includes("secure.com"); // NOT OK
x.indexOf("#") !== -1; // OK
x.indexOf(":") !== -1; // OK
x.indexOf(":/") !== -1; // OK
x.indexOf("://") !== -1; // OK
x.indexOf("//") !== -1; // OK
x.indexOf(":443") !== -1; // OK
x.indexOf("/some/path/") !== -1; // OK
x.indexOf("some/path") !== -1; // OK
x.indexOf("/index.html") !== -1; // OK
x.indexOf(":template:") !== -1; // OK
x.indexOf("https://secure.com") !== -1; // NOT OK
x.indexOf("https://secure.com:443") !== -1; // NOT OK
x.indexOf("https://secure.com/") !== -1; // NOT OK
x.indexOf(".cn") !== -1; // NOT OK, but not flagged
x.indexOf(".jpg") !== -1; // OK
x.indexOf("index.html") !== -1; // OK
x.indexOf("index.js") !== -1; // OK
x.indexOf("index.php") !== -1; // OK
x.indexOf("index.css") !== -1; // OK
x.indexOf("secure=true") !== -1; // OK (query param)
x.indexOf("&auth=") !== -1; // OK (query param)
x.indexOf(getCurrentDomain()) !== -1; // NOT OK, but not flagged
x.indexOf(location.origin) !== -1; // NOT OK, but not flagged
x.indexOf("tar.gz") + offset; // OK
x.indexOf("tar.gz") - offset; // OK
x.indexOf("https://example.internal") !== -1; // NOT OK
x.indexOf("https://") !== -1; // OK
x.startsWith("https://example.internal"); // NOT OK
x.indexOf('https://example.internal.org') !== 0; // NOT OK
x.indexOf('https://example.internal.org') === 0; // NOT OK
x.endsWith("internal.com"); // NOT OK
x.startsWith("https://example.internal:80"); // OK
x.indexOf("secure.com") !== -1; // NOT OK
x.indexOf("secure.com") === -1; // OK
!(x.indexOf("secure.com") !== -1); // OK
!x.includes("secure.com"); // OK
if(!x.includes("secure.com")) { // NOT OK
} else {
doSomeThingWithTrustedURL(x);
}
x.startsWith("https://secure.com/foo/bar"); // OK - a forward slash after the domain makes prefix checks safe.
x.indexOf("https://secure.com/foo/bar") >= 0 // NOT OK - the url can be anywhere in the string.
x.indexOf("https://secure.com") >= 0 // NOT OK
x.indexOf("https://secure.com/foo/bar-baz") >= 0 // NOT OK - the url can be anywhere in the string.
});