mirror of
https://github.com/github/codeql.git
synced 2026-02-18 07:53:43 +01:00
184 lines
6.5 KiB
Plaintext
184 lines
6.5 KiB
Plaintext
/**
|
|
* Provides a flow state for reasoning about URLs with a tainted query and fragment part,
|
|
* which we collectively refer to as the "suffix" of the URL.
|
|
*/
|
|
|
|
import javascript
|
|
private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
|
|
|
|
/**
|
|
* Provides a flow state for reasoning about URLs with a tainted query and fragment part,
|
|
* which we collectively refer to as the "suffix" of the URL.
|
|
*/
|
|
module TaintedUrlSuffix {
|
|
private import DataFlow
|
|
import CommonFlowState
|
|
|
|
/**
|
|
* The flow label representing a URL with a tainted query and fragment part.
|
|
*
|
|
* Can also be accessed using `TaintedUrlSuffix::label()`.
|
|
*/
|
|
abstract deprecated class TaintedUrlSuffixLabel extends FlowLabel {
|
|
TaintedUrlSuffixLabel() { this = "tainted-url-suffix" }
|
|
}
|
|
|
|
/**
|
|
* Gets the flow label representing a URL with a tainted query and fragment part.
|
|
*/
|
|
deprecated FlowLabel label() { result instanceof TaintedUrlSuffixLabel }
|
|
|
|
/** Gets a remote flow source that is a tainted URL query or fragment part from `window.location`. */
|
|
ClientSideRemoteFlowSource source() {
|
|
result = DOM::locationRef().getAPropertyRead(["search", "hash"])
|
|
or
|
|
result = DOM::locationSource()
|
|
or
|
|
result.getKind().isUrl()
|
|
}
|
|
|
|
/**
|
|
* DEPRECATED. Use `isStateBarrier(node, state)` instead.
|
|
*
|
|
* Holds if `node` should be a barrier for the given `label`.
|
|
*
|
|
* This should be used in the `isBarrier` predicate of a configuration that uses the tainted-url-suffix
|
|
* label.
|
|
*/
|
|
deprecated predicate isBarrier(Node node, FlowLabel label) {
|
|
isStateBarrier(node, FlowState::fromFlowLabel(label))
|
|
}
|
|
|
|
/**
|
|
* Holds if `node` should be blocked in `state`.
|
|
*/
|
|
predicate isStateBarrier(Node node, FlowState state) {
|
|
DataFlowPrivate::optionalBarrier(node, "split-url-suffix") and
|
|
state.isTaintedUrlSuffix()
|
|
}
|
|
|
|
/**
|
|
* DEPRECATED. Use `isAdditionalFlowStep` instead.
|
|
*/
|
|
deprecated predicate step(Node src, Node dst, FlowLabel srclbl, FlowLabel dstlbl) {
|
|
isAdditionalFlowStep(src, FlowState::fromFlowLabel(srclbl), dst,
|
|
FlowState::fromFlowLabel(dstlbl))
|
|
}
|
|
|
|
/**
|
|
* Holds if there is a flow step `node1 -> node2` involving the URL suffix flow state.
|
|
*
|
|
* This handles steps through string operations, promises, URL parsers, and URL accessors.
|
|
*/
|
|
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
|
|
// Transition from tainted-url-suffix to general taint when entering the second array element
|
|
// of a split('#') or split('?') array.
|
|
//
|
|
// x [tainted-url-suffix] --> x.split('#') [array element 1] [taint]
|
|
//
|
|
// Technically we should also preserve tainted-url-suffix when entering the first array element of such
|
|
// a split, but this mostly leads to FPs since we currently don't track if the taint has been through URI-decoding.
|
|
// (The query/fragment parts are often URI-decoded in practice, but not the other URL parts are not)
|
|
state1.isTaintedUrlSuffix() and
|
|
state2.isTaint() and
|
|
DataFlowPrivate::optionalStep(node1, "split-url-suffix-post", node2)
|
|
or
|
|
// Transition from URL suffix to full taint when extracting the query/fragment part.
|
|
state1.isTaintedUrlSuffix() and
|
|
state2.isTaint() and
|
|
(
|
|
exists(MethodCallNode call, string name |
|
|
node1 = call.getReceiver() and
|
|
node2 = call and
|
|
name = call.getMethodName()
|
|
|
|
|
// Substring that is not a prefix
|
|
name = StringOps::substringMethodName() and
|
|
not call.getArgument(0).getIntValue() = 0
|
|
or
|
|
// Replace '#' and '?' with nothing
|
|
name = "replace" and
|
|
call.getArgument(0).getStringValue() = ["#", "?"] and
|
|
call.getArgument(1).getStringValue() = ""
|
|
or
|
|
// The `get` call in `url.searchParams.get(x)` and `url.hashParams.get(x)`
|
|
// The step should be safe since nothing else reachable by this flow label supports a method named 'get'.
|
|
name = "get"
|
|
or
|
|
// Methods on URL objects from the Closure library
|
|
name = "getDecodedQuery"
|
|
or
|
|
name = "getFragment"
|
|
or
|
|
name = "getParameterValue"
|
|
or
|
|
name = "getParameterValues"
|
|
or
|
|
name = "getQueryData"
|
|
)
|
|
or
|
|
exists(PropRead read |
|
|
node1 = read.getBase() and
|
|
node2 = read and
|
|
// Unlike the `search` property, the `query` property from `url.parse` does not include the `?`.
|
|
read.getPropertyName() = "query"
|
|
)
|
|
or
|
|
exists(MethodCallNode call, DataFlow::RegExpCreationNode re |
|
|
(
|
|
call = re.getAMethodCall("exec") and
|
|
node1 = call.getArgument(0) and
|
|
node2 = call
|
|
or
|
|
call.getMethodName() = ["match", "matchAll"] and
|
|
re.flowsTo(call.getArgument(0)) and
|
|
node1 = call.getReceiver() and
|
|
node2 = call
|
|
)
|
|
|
|
|
captureAfterSuffixIndicator(re.getRoot().getAChild*())
|
|
or
|
|
// If the regexp is unknown, assume it will extract the URL suffix
|
|
not exists(re.getRoot())
|
|
)
|
|
or
|
|
// Query-string parsers that strip off a leading '#' or '?'.
|
|
state1.isTaintedUrlSuffix() and
|
|
state2.isTaint() and
|
|
exists(DataFlow::CallNode call |
|
|
node1 = call.getArgument(0) and
|
|
node2 = call
|
|
|
|
|
call = API::moduleImport("query-string").getMember(["parse", "extract"]).getACall()
|
|
or
|
|
call = API::moduleImport("querystringify").getMember("parse").getACall()
|
|
)
|
|
)
|
|
}
|
|
|
|
/** Holds if the `n`th child of `seq` contains a character indicating that everything thereafter is part of the suffix */
|
|
private predicate containsSuffixIndicator(RegExpSequence seq, int n) {
|
|
// Also include '=' as it usually only appears in the URL suffix
|
|
seq.getChild(n).getAChild*().(RegExpConstant).getValue().regexpMatch(".*[?#=].*")
|
|
}
|
|
|
|
/** Holds if the `n`th child of `seq` contains a capture group. */
|
|
private predicate containsCaptureGroup(RegExpSequence seq, int n) {
|
|
seq.getChild(n).getAChild*().(RegExpGroup).isCapture()
|
|
}
|
|
|
|
/**
|
|
* Holds if `seq` contains a capture group that will likely match path of the URL suffix,
|
|
* thereby extracting tainted data.
|
|
*
|
|
* For example, `/#(.*)/.exec(url)` will extract the tainted URL suffix from `url`.
|
|
*/
|
|
private predicate captureAfterSuffixIndicator(RegExpSequence seq) {
|
|
exists(int suffix, int capture |
|
|
containsSuffixIndicator(seq, suffix) and
|
|
containsCaptureGroup(seq, capture) and
|
|
suffix < capture
|
|
)
|
|
}
|
|
}
|