mirror of
https://github.com/github/codeql.git
synced 2026-04-26 01:05:15 +02:00
Merge pull request #18203 from asgerf/jss/document-url
JS: Use TaintedUrlSuffix in ClientSideUrlRedirect
This commit is contained in:
@@ -1611,7 +1611,12 @@ class RegExpConstructorInvokeNode extends DataFlow::InvokeNode {
|
||||
* Gets the AST of the regular expression created here, provided that the
|
||||
* first argument is a string literal.
|
||||
*/
|
||||
RegExpTerm getRoot() { result = this.getArgument(0).asExpr().(StringLiteral).asRegExp() }
|
||||
RegExpTerm getRoot() {
|
||||
result = this.getArgument(0).asExpr().(StringLiteral).asRegExp()
|
||||
or
|
||||
// In case someone writes `new RegExp(/foo/)` for some reason
|
||||
result = this.getArgument(0).asExpr().(RegExpLiteral).getRoot()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the flags provided in the second argument, or an empty string if no
|
||||
|
||||
@@ -4,114 +4,15 @@
|
||||
*/
|
||||
|
||||
import javascript
|
||||
private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
|
||||
|
||||
/**
|
||||
* Provides a flow label for reasoning about URLs with a tainted query and fragment part,
|
||||
* which we collectively refer to as the "suffix" of the URL.
|
||||
*/
|
||||
module TaintedUrlSuffix {
|
||||
private import DataFlow
|
||||
import TaintedUrlSuffixCustomizations::TaintedUrlSuffix
|
||||
|
||||
/**
|
||||
* The flow label representing a URL with a tainted query and fragment part.
|
||||
*
|
||||
* Can also be accessed using `TaintedUrlSuffix::label()`.
|
||||
*/
|
||||
class TaintedUrlSuffixLabel extends FlowLabel {
|
||||
TaintedUrlSuffixLabel() { this = "tainted-url-suffix" }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the flow label representing a URL with a tainted query and fragment part.
|
||||
*/
|
||||
FlowLabel label() { result instanceof TaintedUrlSuffixLabel }
|
||||
|
||||
/** Gets a remote flow source that is a tainted URL query or fragment part from `window.location`. */
|
||||
ClientSideRemoteFlowSource source() {
|
||||
result = DOM::locationRef().getAPropertyRead(["search", "hash"])
|
||||
or
|
||||
result = DOM::locationSource()
|
||||
or
|
||||
result.getKind().isUrl()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `node` should be a barrier for the given `label`.
|
||||
*
|
||||
* This should be used in the `isBarrier` predicate of a configuration that uses the tainted-url-suffix
|
||||
* label.
|
||||
*/
|
||||
predicate isBarrier(Node node, FlowLabel label) {
|
||||
label = label() and
|
||||
DataFlowPrivate::optionalBarrier(node, "split-url-suffix")
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is a flow step `src -> dst` involving the URL suffix taint label.
|
||||
*
|
||||
* This handles steps through string operations, promises, URL parsers, and URL accessors.
|
||||
*/
|
||||
predicate step(Node src, Node dst, FlowLabel srclbl, FlowLabel dstlbl) {
|
||||
// Transition from tainted-url-suffix to general taint when entering the second array element
|
||||
// of a split('#') or split('?') array.
|
||||
//
|
||||
// x [tainted-url-suffix] --> x.split('#') [array element 1] [taint]
|
||||
//
|
||||
// Technically we should also preverse tainted-url-suffix when entering the first array element of such
|
||||
// a split, but this mostly leads to FPs since we currently don't track if the taint has been through URI-decoding.
|
||||
// (The query/fragment parts are often URI-decoded in practice, but not the other URL parts are not)
|
||||
srclbl = label() and
|
||||
dstlbl.isTaint() and
|
||||
DataFlowPrivate::optionalStep(src, "split-url-suffix-post", dst)
|
||||
or
|
||||
// Transition from URL suffix to full taint when extracting the query/fragment part.
|
||||
srclbl = label() and
|
||||
dstlbl.isTaint() and
|
||||
(
|
||||
exists(MethodCallNode call, string name |
|
||||
src = call.getReceiver() and
|
||||
dst = call and
|
||||
name = call.getMethodName()
|
||||
|
|
||||
// Substring that is not a prefix
|
||||
name = StringOps::substringMethodName() and
|
||||
not call.getArgument(0).getIntValue() = 0
|
||||
or
|
||||
// Replace '#' and '?' with nothing
|
||||
name = "replace" and
|
||||
call.getArgument(0).getStringValue() = ["#", "?"] and
|
||||
call.getArgument(1).getStringValue() = ""
|
||||
or
|
||||
// The `get` call in `url.searchParams.get(x)` and `url.hashParams.get(x)`
|
||||
// The step should be safe since nothing else reachable by this flow label supports a method named 'get'.
|
||||
name = "get"
|
||||
or
|
||||
// Methods on URL objects from the Closure library
|
||||
name = "getDecodedQuery"
|
||||
or
|
||||
name = "getFragment"
|
||||
or
|
||||
name = "getParameterValue"
|
||||
or
|
||||
name = "getParameterValues"
|
||||
or
|
||||
name = "getQueryData"
|
||||
)
|
||||
or
|
||||
exists(PropRead read |
|
||||
src = read.getBase() and
|
||||
dst = read and
|
||||
// Unlike the `search` property, the `query` property from `url.parse` does not include the `?`.
|
||||
read.getPropertyName() = "query"
|
||||
)
|
||||
or
|
||||
// Assume calls to regexp.exec always extract query/fragment parameters.
|
||||
exists(MethodCallNode call |
|
||||
call = any(RegExpLiteral re).flow().(DataFlow::SourceNode).getAMethodCall("exec") and
|
||||
src = call.getArgument(0) and
|
||||
dst = call
|
||||
)
|
||||
)
|
||||
private class ConcreteTaintedUrlSuffixLabel extends TaintedUrlSuffixLabel {
|
||||
ConcreteTaintedUrlSuffixLabel() { this = this }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,153 @@
|
||||
/**
|
||||
* Provides a flow label for reasoning about URLs with a tainted query and fragment part,
|
||||
* which we collectively refer to as the "suffix" of the URL.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
|
||||
|
||||
/**
|
||||
* Provides a flow label for reasoning about URLs with a tainted query and fragment part,
|
||||
* which we collectively refer to as the "suffix" of the URL.
|
||||
*/
|
||||
module TaintedUrlSuffix {
|
||||
private import DataFlow
|
||||
|
||||
/**
|
||||
* The flow label representing a URL with a tainted query and fragment part.
|
||||
*
|
||||
* Can also be accessed using `TaintedUrlSuffix::label()`.
|
||||
*/
|
||||
abstract class TaintedUrlSuffixLabel extends FlowLabel {
|
||||
TaintedUrlSuffixLabel() { this = "tainted-url-suffix" }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the flow label representing a URL with a tainted query and fragment part.
|
||||
*/
|
||||
FlowLabel label() { result instanceof TaintedUrlSuffixLabel }
|
||||
|
||||
/** Gets a remote flow source that is a tainted URL query or fragment part from `window.location`. */
|
||||
ClientSideRemoteFlowSource source() {
|
||||
result = DOM::locationRef().getAPropertyRead(["search", "hash"])
|
||||
or
|
||||
result = DOM::locationSource()
|
||||
or
|
||||
result.getKind().isUrl()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `node` should be a barrier for the given `label`.
|
||||
*
|
||||
* This should be used in the `isBarrier` predicate of a configuration that uses the tainted-url-suffix
|
||||
* label.
|
||||
*/
|
||||
predicate isBarrier(Node node, FlowLabel label) {
|
||||
label = label() and
|
||||
DataFlowPrivate::optionalBarrier(node, "split-url-suffix")
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is a flow step `src -> dst` involving the URL suffix taint label.
|
||||
*
|
||||
* This handles steps through string operations, promises, URL parsers, and URL accessors.
|
||||
*/
|
||||
predicate step(Node src, Node dst, FlowLabel srclbl, FlowLabel dstlbl) {
|
||||
// Transition from tainted-url-suffix to general taint when entering the second array element
|
||||
// of a split('#') or split('?') array.
|
||||
//
|
||||
// x [tainted-url-suffix] --> x.split('#') [array element 1] [taint]
|
||||
//
|
||||
// Technically we should also preverse tainted-url-suffix when entering the first array element of such
|
||||
// a split, but this mostly leads to FPs since we currently don't track if the taint has been through URI-decoding.
|
||||
// (The query/fragment parts are often URI-decoded in practice, but not the other URL parts are not)
|
||||
srclbl = label() and
|
||||
dstlbl.isTaint() and
|
||||
DataFlowPrivate::optionalStep(src, "split-url-suffix-post", dst)
|
||||
or
|
||||
// Transition from URL suffix to full taint when extracting the query/fragment part.
|
||||
srclbl = label() and
|
||||
dstlbl.isTaint() and
|
||||
(
|
||||
exists(MethodCallNode call, string name |
|
||||
src = call.getReceiver() and
|
||||
dst = call and
|
||||
name = call.getMethodName()
|
||||
|
|
||||
// Substring that is not a prefix
|
||||
name = StringOps::substringMethodName() and
|
||||
not call.getArgument(0).getIntValue() = 0
|
||||
or
|
||||
// Replace '#' and '?' with nothing
|
||||
name = "replace" and
|
||||
call.getArgument(0).getStringValue() = ["#", "?"] and
|
||||
call.getArgument(1).getStringValue() = ""
|
||||
or
|
||||
// The `get` call in `url.searchParams.get(x)` and `url.hashParams.get(x)`
|
||||
// The step should be safe since nothing else reachable by this flow label supports a method named 'get'.
|
||||
name = "get"
|
||||
or
|
||||
// Methods on URL objects from the Closure library
|
||||
name = "getDecodedQuery"
|
||||
or
|
||||
name = "getFragment"
|
||||
or
|
||||
name = "getParameterValue"
|
||||
or
|
||||
name = "getParameterValues"
|
||||
or
|
||||
name = "getQueryData"
|
||||
)
|
||||
or
|
||||
exists(PropRead read |
|
||||
src = read.getBase() and
|
||||
dst = read and
|
||||
// Unlike the `search` property, the `query` property from `url.parse` does not include the `?`.
|
||||
read.getPropertyName() = "query"
|
||||
)
|
||||
or
|
||||
exists(MethodCallNode call, DataFlow::RegExpCreationNode re |
|
||||
(
|
||||
call = re.getAMethodCall("exec") and
|
||||
src = call.getArgument(0) and
|
||||
dst = call
|
||||
or
|
||||
call.getMethodName() = ["match", "matchAll"] and
|
||||
re.flowsTo(call.getArgument(0)) and
|
||||
src = call.getReceiver() and
|
||||
dst = call
|
||||
)
|
||||
|
|
||||
captureAfterSuffixIndicator(re.getRoot().getAChild*())
|
||||
or
|
||||
// If the regexp is unknown, assume it will extract the URL suffix
|
||||
not exists(re.getRoot())
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if the `n`th child of `seq` contains a character indicating that everything thereafter is part of the suffix */
|
||||
private predicate containsSuffixIndicator(RegExpSequence seq, int n) {
|
||||
// Also include '=' as it usually only appears in the URL suffix
|
||||
seq.getChild(n).getAChild*().(RegExpConstant).getValue().regexpMatch(".*[?#=].*")
|
||||
}
|
||||
|
||||
/** Holds if the `n`th child of `seq` contains a capture group. */
|
||||
private predicate containsCaptureGroup(RegExpSequence seq, int n) {
|
||||
seq.getChild(n).getAChild*().(RegExpGroup).isCapture()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `seq` contains a capture group that will likely match path of the URL suffix,
|
||||
* thereby extracting tainted data.
|
||||
*
|
||||
* For example, `/#(.*)/.exec(url)` will extract the tainted URL suffix from `url`.
|
||||
*/
|
||||
private predicate captureAfterSuffixIndicator(RegExpSequence seq) {
|
||||
exists(int suffix, int capture |
|
||||
containsSuffixIndicator(seq, suffix) and
|
||||
containsCaptureGroup(seq, capture) and
|
||||
suffix < capture
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,7 @@
|
||||
*/
|
||||
|
||||
import javascript
|
||||
private import semmle.javascript.security.TaintedUrlSuffixCustomizations
|
||||
|
||||
module ClientSideUrlRedirect {
|
||||
/**
|
||||
@@ -31,12 +32,12 @@ module ClientSideUrlRedirect {
|
||||
abstract class Sanitizer extends DataFlow::Node { }
|
||||
|
||||
/**
|
||||
* DEPRECATED. Replaced by functionality from the `TaintedUrlSuffix` library.
|
||||
*
|
||||
* A flow label for values that represent the URL of the current document, and
|
||||
* hence are only partially user-controlled.
|
||||
*/
|
||||
abstract class DocumentUrl extends DataFlow::FlowLabel {
|
||||
DocumentUrl() { this = "document.url" } // TODO: replace with TaintedUrlSuffix
|
||||
}
|
||||
deprecated class DocumentUrl = TaintedUrlSuffix::TaintedUrlSuffixLabel;
|
||||
|
||||
/**
|
||||
* DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
|
||||
@@ -50,8 +51,8 @@ module ClientSideUrlRedirect {
|
||||
ActiveThreatModelSourceAsSource() { not this.(ClientSideRemoteFlowSource).getKind().isPath() }
|
||||
|
||||
override DataFlow::FlowLabel getAFlowLabel() {
|
||||
if this.(ClientSideRemoteFlowSource).getKind().isUrl()
|
||||
then result instanceof DocumentUrl
|
||||
if this = TaintedUrlSuffix::source()
|
||||
then result = TaintedUrlSuffix::label()
|
||||
else result.isTaint()
|
||||
}
|
||||
}
|
||||
@@ -60,7 +61,7 @@ module ClientSideUrlRedirect {
|
||||
* Holds if `node` extracts a part of a URL that does not contain the suffix.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate isPrefixExtraction(DataFlow::MethodCallNode node) {
|
||||
deprecated predicate isPrefixExtraction(DataFlow::MethodCallNode node) {
|
||||
// Block flow through prefix-extraction `substring(0, ...)` and `split("#")[0]`
|
||||
node.getMethodName() = [StringOps::substringMethodName(), "split"] and
|
||||
not untrustedUrlSubstring(_, node)
|
||||
@@ -70,7 +71,7 @@ module ClientSideUrlRedirect {
|
||||
* Holds if `substring` refers to a substring of `base` which is considered untrusted
|
||||
* when `base` is the current URL.
|
||||
*/
|
||||
predicate untrustedUrlSubstring(DataFlow::Node base, DataFlow::Node substring) {
|
||||
deprecated predicate untrustedUrlSubstring(DataFlow::Node base, DataFlow::Node substring) {
|
||||
exists(DataFlow::MethodCallNode mcn, string methodName |
|
||||
mcn = substring and mcn.calls(base, methodName)
|
||||
|
|
||||
|
||||
@@ -10,9 +10,10 @@
|
||||
import javascript
|
||||
import UrlConcatenation
|
||||
import ClientSideUrlRedirectCustomizations::ClientSideUrlRedirect
|
||||
import semmle.javascript.security.TaintedUrlSuffix
|
||||
|
||||
// Materialize flow labels
|
||||
private class ConcreteDocumentUrl extends DocumentUrl {
|
||||
deprecated private class ConcreteDocumentUrl extends DocumentUrl {
|
||||
ConcreteDocumentUrl() { this = this }
|
||||
}
|
||||
|
||||
@@ -35,8 +36,7 @@ module ClientSideUrlRedirectConfig implements DataFlow::StateConfigSig {
|
||||
}
|
||||
|
||||
predicate isBarrier(DataFlow::Node node, DataFlow::FlowLabel state) {
|
||||
isPrefixExtraction(node) and
|
||||
state instanceof DocumentUrl
|
||||
TaintedUrlSuffix::isBarrier(node, state)
|
||||
}
|
||||
|
||||
predicate isBarrierOut(DataFlow::Node node) { hostnameSanitizingPrefixEdge(node, _) }
|
||||
@@ -47,9 +47,7 @@ module ClientSideUrlRedirectConfig implements DataFlow::StateConfigSig {
|
||||
DataFlow::Node node1, DataFlow::FlowLabel state1, DataFlow::Node node2,
|
||||
DataFlow::FlowLabel state2
|
||||
) {
|
||||
untrustedUrlSubstring(node1, node2) and
|
||||
state1 instanceof DocumentUrl and
|
||||
state2.isTaint()
|
||||
TaintedUrlSuffix::step(node1, node2, state1, state2)
|
||||
or
|
||||
exists(HtmlSanitizerCall call |
|
||||
node1 = call.getInput() and
|
||||
|
||||
Reference in New Issue
Block a user