Merge pull request #18203 from asgerf/jss/document-url

JS: Use TaintedUrlSuffix in ClientSideUrlRedirect
This commit is contained in:
Asger F
2024-12-12 15:47:51 +01:00
committed by GitHub
18 changed files with 486 additions and 235 deletions

View File

@@ -1611,7 +1611,12 @@ class RegExpConstructorInvokeNode extends DataFlow::InvokeNode {
* Gets the AST of the regular expression created here, provided that the
* first argument is a string literal.
*/
RegExpTerm getRoot() { result = this.getArgument(0).asExpr().(StringLiteral).asRegExp() }
RegExpTerm getRoot() {
result = this.getArgument(0).asExpr().(StringLiteral).asRegExp()
or
// In case someone writes `new RegExp(/foo/)` for some reason
result = this.getArgument(0).asExpr().(RegExpLiteral).getRoot()
}
/**
* Gets the flags provided in the second argument, or an empty string if no

View File

@@ -4,114 +4,15 @@
*/
import javascript
private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
/**
* Provides a flow label for reasoning about URLs with a tainted query and fragment part,
* which we collectively refer to as the "suffix" of the URL.
*/
module TaintedUrlSuffix {
private import DataFlow
import TaintedUrlSuffixCustomizations::TaintedUrlSuffix
/**
* The flow label representing a URL with a tainted query and fragment part.
*
* Can also be accessed using `TaintedUrlSuffix::label()`.
*/
class TaintedUrlSuffixLabel extends FlowLabel {
TaintedUrlSuffixLabel() { this = "tainted-url-suffix" }
}
/**
* Gets the flow label representing a URL with a tainted query and fragment part.
*/
FlowLabel label() { result instanceof TaintedUrlSuffixLabel }
/** Gets a remote flow source that is a tainted URL query or fragment part from `window.location`. */
ClientSideRemoteFlowSource source() {
result = DOM::locationRef().getAPropertyRead(["search", "hash"])
or
result = DOM::locationSource()
or
result.getKind().isUrl()
}
/**
* Holds if `node` should be a barrier for the given `label`.
*
* This should be used in the `isBarrier` predicate of a configuration that uses the tainted-url-suffix
* label.
*/
predicate isBarrier(Node node, FlowLabel label) {
label = label() and
DataFlowPrivate::optionalBarrier(node, "split-url-suffix")
}
/**
* Holds if there is a flow step `src -> dst` involving the URL suffix taint label.
*
* This handles steps through string operations, promises, URL parsers, and URL accessors.
*/
predicate step(Node src, Node dst, FlowLabel srclbl, FlowLabel dstlbl) {
// Transition from tainted-url-suffix to general taint when entering the second array element
// of a split('#') or split('?') array.
//
// x [tainted-url-suffix] --> x.split('#') [array element 1] [taint]
//
// Technically we should also preverse tainted-url-suffix when entering the first array element of such
// a split, but this mostly leads to FPs since we currently don't track if the taint has been through URI-decoding.
// (The query/fragment parts are often URI-decoded in practice, but not the other URL parts are not)
srclbl = label() and
dstlbl.isTaint() and
DataFlowPrivate::optionalStep(src, "split-url-suffix-post", dst)
or
// Transition from URL suffix to full taint when extracting the query/fragment part.
srclbl = label() and
dstlbl.isTaint() and
(
exists(MethodCallNode call, string name |
src = call.getReceiver() and
dst = call and
name = call.getMethodName()
|
// Substring that is not a prefix
name = StringOps::substringMethodName() and
not call.getArgument(0).getIntValue() = 0
or
// Replace '#' and '?' with nothing
name = "replace" and
call.getArgument(0).getStringValue() = ["#", "?"] and
call.getArgument(1).getStringValue() = ""
or
// The `get` call in `url.searchParams.get(x)` and `url.hashParams.get(x)`
// The step should be safe since nothing else reachable by this flow label supports a method named 'get'.
name = "get"
or
// Methods on URL objects from the Closure library
name = "getDecodedQuery"
or
name = "getFragment"
or
name = "getParameterValue"
or
name = "getParameterValues"
or
name = "getQueryData"
)
or
exists(PropRead read |
src = read.getBase() and
dst = read and
// Unlike the `search` property, the `query` property from `url.parse` does not include the `?`.
read.getPropertyName() = "query"
)
or
// Assume calls to regexp.exec always extract query/fragment parameters.
exists(MethodCallNode call |
call = any(RegExpLiteral re).flow().(DataFlow::SourceNode).getAMethodCall("exec") and
src = call.getArgument(0) and
dst = call
)
)
private class ConcreteTaintedUrlSuffixLabel extends TaintedUrlSuffixLabel {
ConcreteTaintedUrlSuffixLabel() { this = this }
}
}

View File

@@ -0,0 +1,153 @@
/**
* Provides a flow label for reasoning about URLs with a tainted query and fragment part,
* which we collectively refer to as the "suffix" of the URL.
*/
import javascript
private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
/**
* Provides a flow label for reasoning about URLs with a tainted query and fragment part,
* which we collectively refer to as the "suffix" of the URL.
*/
module TaintedUrlSuffix {
private import DataFlow
/**
* The flow label representing a URL with a tainted query and fragment part.
*
* Can also be accessed using `TaintedUrlSuffix::label()`.
*/
abstract class TaintedUrlSuffixLabel extends FlowLabel {
TaintedUrlSuffixLabel() { this = "tainted-url-suffix" }
}
/**
* Gets the flow label representing a URL with a tainted query and fragment part.
*/
FlowLabel label() { result instanceof TaintedUrlSuffixLabel }
/** Gets a remote flow source that is a tainted URL query or fragment part from `window.location`. */
ClientSideRemoteFlowSource source() {
result = DOM::locationRef().getAPropertyRead(["search", "hash"])
or
result = DOM::locationSource()
or
result.getKind().isUrl()
}
/**
* Holds if `node` should be a barrier for the given `label`.
*
* This should be used in the `isBarrier` predicate of a configuration that uses the tainted-url-suffix
* label.
*/
predicate isBarrier(Node node, FlowLabel label) {
label = label() and
DataFlowPrivate::optionalBarrier(node, "split-url-suffix")
}
/**
* Holds if there is a flow step `src -> dst` involving the URL suffix taint label.
*
* This handles steps through string operations, promises, URL parsers, and URL accessors.
*/
predicate step(Node src, Node dst, FlowLabel srclbl, FlowLabel dstlbl) {
// Transition from tainted-url-suffix to general taint when entering the second array element
// of a split('#') or split('?') array.
//
// x [tainted-url-suffix] --> x.split('#') [array element 1] [taint]
//
// Technically we should also preverse tainted-url-suffix when entering the first array element of such
// a split, but this mostly leads to FPs since we currently don't track if the taint has been through URI-decoding.
// (The query/fragment parts are often URI-decoded in practice, but not the other URL parts are not)
srclbl = label() and
dstlbl.isTaint() and
DataFlowPrivate::optionalStep(src, "split-url-suffix-post", dst)
or
// Transition from URL suffix to full taint when extracting the query/fragment part.
srclbl = label() and
dstlbl.isTaint() and
(
exists(MethodCallNode call, string name |
src = call.getReceiver() and
dst = call and
name = call.getMethodName()
|
// Substring that is not a prefix
name = StringOps::substringMethodName() and
not call.getArgument(0).getIntValue() = 0
or
// Replace '#' and '?' with nothing
name = "replace" and
call.getArgument(0).getStringValue() = ["#", "?"] and
call.getArgument(1).getStringValue() = ""
or
// The `get` call in `url.searchParams.get(x)` and `url.hashParams.get(x)`
// The step should be safe since nothing else reachable by this flow label supports a method named 'get'.
name = "get"
or
// Methods on URL objects from the Closure library
name = "getDecodedQuery"
or
name = "getFragment"
or
name = "getParameterValue"
or
name = "getParameterValues"
or
name = "getQueryData"
)
or
exists(PropRead read |
src = read.getBase() and
dst = read and
// Unlike the `search` property, the `query` property from `url.parse` does not include the `?`.
read.getPropertyName() = "query"
)
or
exists(MethodCallNode call, DataFlow::RegExpCreationNode re |
(
call = re.getAMethodCall("exec") and
src = call.getArgument(0) and
dst = call
or
call.getMethodName() = ["match", "matchAll"] and
re.flowsTo(call.getArgument(0)) and
src = call.getReceiver() and
dst = call
)
|
captureAfterSuffixIndicator(re.getRoot().getAChild*())
or
// If the regexp is unknown, assume it will extract the URL suffix
not exists(re.getRoot())
)
)
}
/** Holds if the `n`th child of `seq` contains a character indicating that everything thereafter is part of the suffix */
private predicate containsSuffixIndicator(RegExpSequence seq, int n) {
// Also include '=' as it usually only appears in the URL suffix
seq.getChild(n).getAChild*().(RegExpConstant).getValue().regexpMatch(".*[?#=].*")
}
/** Holds if the `n`th child of `seq` contains a capture group. */
private predicate containsCaptureGroup(RegExpSequence seq, int n) {
seq.getChild(n).getAChild*().(RegExpGroup).isCapture()
}
/**
* Holds if `seq` contains a capture group that will likely match path of the URL suffix,
* thereby extracting tainted data.
*
* For example, `/#(.*)/.exec(url)` will extract the tainted URL suffix from `url`.
*/
private predicate captureAfterSuffixIndicator(RegExpSequence seq) {
exists(int suffix, int capture |
containsSuffixIndicator(seq, suffix) and
containsCaptureGroup(seq, capture) and
suffix < capture
)
}
}

View File

@@ -5,6 +5,7 @@
*/
import javascript
private import semmle.javascript.security.TaintedUrlSuffixCustomizations
module ClientSideUrlRedirect {
/**
@@ -31,12 +32,12 @@ module ClientSideUrlRedirect {
abstract class Sanitizer extends DataFlow::Node { }
/**
* DEPRECATED. Replaced by functionality from the `TaintedUrlSuffix` library.
*
* A flow label for values that represent the URL of the current document, and
* hence are only partially user-controlled.
*/
abstract class DocumentUrl extends DataFlow::FlowLabel {
DocumentUrl() { this = "document.url" } // TODO: replace with TaintedUrlSuffix
}
deprecated class DocumentUrl = TaintedUrlSuffix::TaintedUrlSuffixLabel;
/**
* DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
@@ -50,8 +51,8 @@ module ClientSideUrlRedirect {
ActiveThreatModelSourceAsSource() { not this.(ClientSideRemoteFlowSource).getKind().isPath() }
override DataFlow::FlowLabel getAFlowLabel() {
if this.(ClientSideRemoteFlowSource).getKind().isUrl()
then result instanceof DocumentUrl
if this = TaintedUrlSuffix::source()
then result = TaintedUrlSuffix::label()
else result.isTaint()
}
}
@@ -60,7 +61,7 @@ module ClientSideUrlRedirect {
* Holds if `node` extracts a part of a URL that does not contain the suffix.
*/
pragma[inline]
predicate isPrefixExtraction(DataFlow::MethodCallNode node) {
deprecated predicate isPrefixExtraction(DataFlow::MethodCallNode node) {
// Block flow through prefix-extraction `substring(0, ...)` and `split("#")[0]`
node.getMethodName() = [StringOps::substringMethodName(), "split"] and
not untrustedUrlSubstring(_, node)
@@ -70,7 +71,7 @@ module ClientSideUrlRedirect {
* Holds if `substring` refers to a substring of `base` which is considered untrusted
* when `base` is the current URL.
*/
predicate untrustedUrlSubstring(DataFlow::Node base, DataFlow::Node substring) {
deprecated predicate untrustedUrlSubstring(DataFlow::Node base, DataFlow::Node substring) {
exists(DataFlow::MethodCallNode mcn, string methodName |
mcn = substring and mcn.calls(base, methodName)
|

View File

@@ -10,9 +10,10 @@
import javascript
import UrlConcatenation
import ClientSideUrlRedirectCustomizations::ClientSideUrlRedirect
import semmle.javascript.security.TaintedUrlSuffix
// Materialize flow labels
private class ConcreteDocumentUrl extends DocumentUrl {
deprecated private class ConcreteDocumentUrl extends DocumentUrl {
ConcreteDocumentUrl() { this = this }
}
@@ -35,8 +36,7 @@ module ClientSideUrlRedirectConfig implements DataFlow::StateConfigSig {
}
predicate isBarrier(DataFlow::Node node, DataFlow::FlowLabel state) {
isPrefixExtraction(node) and
state instanceof DocumentUrl
TaintedUrlSuffix::isBarrier(node, state)
}
predicate isBarrierOut(DataFlow::Node node) { hostnameSanitizingPrefixEdge(node, _) }
@@ -47,9 +47,7 @@ module ClientSideUrlRedirectConfig implements DataFlow::StateConfigSig {
DataFlow::Node node1, DataFlow::FlowLabel state1, DataFlow::Node node2,
DataFlow::FlowLabel state2
) {
untrustedUrlSubstring(node1, node2) and
state1 instanceof DocumentUrl and
state2.isTaint()
TaintedUrlSuffix::step(node1, node2, state1, state2)
or
exists(HtmlSanitizerCall call |
node1 = call.getInput() and