mirror of
https://github.com/github/codeql.git
synced 2026-07-01 09:35:42 +02:00
Flips the Python dataflow trunk from the legacy CFG (semmle/python/Flow.qll) and legacy ESSA SSA (semmle/python/essa/*) to the new shared CFG facade (semmle.python.controlflow.internal.Cfg) and the new SSA adapter (semmle.python.dataflow.new.internal.SsaImpl), both introduced additively in the preceding PRs in this stack. This is the trunk-flip equivalent of the original draft PR #21894 (kept around as documentation), rebased on top of the four preparatory PRs: P1: Remove AstNode.getAFlowNode() and rewrite callers (#21919). P2: Qualify Flow.qll's AST references with Py:: prefix (#21920). P3: Add new shared-CFG-backed control flow graph (#21921). P4: Add new shared-SSA-backed SSA adapter (#21923). The Python dataflow library (semmle/python/dataflow/new/) now imports the new CFG facade and SSA adapter. All CFG-typed predicates (ControlFlowNode, CallNode, BasicBlock, NameNode, AttrNode, ...) are qualified with the Cfg:: prefix; SSA references switch from EssaVariable/EssaDefinition to SsaImpl::Definition/SourceVariable. GuardNode is redesigned to use the new CFG's outcome-node model (isAfterTrue / isAfterFalse) instead of the legacy ConditionBlock + flipped indirection. Only BarrierGuard<...> is preserved as public API. Framework files (Bottle, FastApi, Django, Tornado, Pyramid, Stdlib, ...) are updated to take CFG nodes from the new facade. A handful of dataflow consistency tweaks for the new CFG: - Augmented-assignment targets are treated as both load and store. - 'from X import *' produces uncertain SSA writes for unknown names. - CFG nodes are canonicalised so dataflow does not see equivalent pre/post-order pairs as distinct nodes. Two AST tweaks for the new CFG: - AstNodeImpl: omit PEP 695 type-parameter names from FunctionDefExpr / ClassDefExpr children. - ImportResolution: drop the legacy essa import. Test churn (~175 files): reblessed library- and query-test .expected files reflect slightly different CFG granularity, different toString output, and a handful of true alert deltas in security queries. Verification: all 367 lib + src + consistency-queries compile clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
188 lines
7.0 KiB
Plaintext
188 lines
7.0 KiB
Plaintext
/**
|
|
* Provides default sources, sinks and sanitizers for detecting
|
|
* "Server-side request forgery"
|
|
* vulnerabilities, as well as extension points for adding your own.
|
|
*/
|
|
|
|
private import python
|
|
private import semmle.python.dataflow.new.DataFlow
|
|
private import semmle.python.Concepts
|
|
private import semmle.python.dataflow.new.RemoteFlowSources
|
|
private import semmle.python.dataflow.new.BarrierGuards
|
|
private import semmle.python.ApiGraphs
|
|
private import semmle.python.frameworks.data.internal.ApiGraphModels
|
|
private import semmle.python.controlflow.internal.Cfg as Cfg
|
|
|
|
/**
|
|
* Provides default sources, sinks and sanitizers for detecting
|
|
* "Server-side request forgery"
|
|
* vulnerabilities, as well as extension points for adding your own.
|
|
*/
|
|
module ServerSideRequestForgery {
|
|
/**
|
|
* A data flow source for "Server-side request forgery" vulnerabilities.
|
|
*/
|
|
abstract class Source extends DataFlow::Node { }
|
|
|
|
/**
|
|
* A data flow sink for "Server-side request forgery" vulnerabilities.
|
|
*/
|
|
abstract class Sink extends DataFlow::Node {
|
|
/**
|
|
* Gets the request this sink belongs to.
|
|
*/
|
|
abstract Http::Client::Request getRequest();
|
|
}
|
|
|
|
/**
|
|
* A sanitizer for "Server-side request forgery" vulnerabilities.
|
|
*/
|
|
abstract class Sanitizer extends DataFlow::Node { }
|
|
|
|
/**
|
|
* A sanitizer for "Server-side request forgery" vulnerabilities,
|
|
* that ensures the attacker does not have full control of the URL. (that is, might
|
|
* still be able to control path or query parameters).
|
|
*/
|
|
abstract class FullUrlControlSanitizer extends DataFlow::Node { }
|
|
|
|
/**
|
|
* DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
|
|
*/
|
|
deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
|
|
|
|
/**
|
|
* An active threat-model source, considered as a flow source.
|
|
*/
|
|
private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }
|
|
|
|
/** The URL of an HTTP request, considered as a sink. */
|
|
class HttpRequestUrlAsSink extends Sink {
|
|
Http::Client::Request req;
|
|
|
|
HttpRequestUrlAsSink() {
|
|
req.getAUrlPart() = this and
|
|
// if we extract the stdlib code for HTTPConnection, we will also find calls that
|
|
// make requests within the HTTPConnection implementation -- for example the
|
|
// `request` method calls the `_send_request` method internally. So without this
|
|
// extra bit of code, we would give alerts within the HTTPConnection
|
|
// implementation as well, which is just annoying.
|
|
//
|
|
// Notice that we're excluding based on the request location, and not the URL part
|
|
// location, since the URL part would be in user code for the scenario above.
|
|
//
|
|
// See comment for command injection sinks for more details.
|
|
not req.getScope().getEnclosingModule().getName() in ["http.client", "httplib"]
|
|
}
|
|
|
|
override Http::Client::Request getRequest() { result = req }
|
|
}
|
|
|
|
/**
|
|
* A comparison with a constant, considered as a sanitizer-guard.
|
|
*/
|
|
class ConstCompareAsSanitizerGuard extends Sanitizer, ConstCompareBarrier { }
|
|
|
|
/** DEPRECATED: Use ConstCompareAsSanitizerGuard instead. */
|
|
deprecated class StringConstCompareAsSanitizerGuard = ConstCompareAsSanitizerGuard;
|
|
|
|
/**
|
|
* A string construction (concat, format, f-string) where the left side is not
|
|
* user-controlled.
|
|
*
|
|
* For all of these cases, we try to allow `http://` or `https://` on the left side
|
|
* since that will still allow full URL control.
|
|
*/
|
|
class StringConstructionAsFullUrlControlSanitizer extends FullUrlControlSanitizer {
|
|
StringConstructionAsFullUrlControlSanitizer() {
|
|
// string concat
|
|
exists(Cfg::BinaryExprNode add |
|
|
add.getOp() instanceof Add and
|
|
add.getRight() = this.asCfgNode() and
|
|
not add.getLeft().getNode().(StringLiteral).getText().toLowerCase() in [
|
|
"http://", "https://"
|
|
]
|
|
)
|
|
or
|
|
// % formatting
|
|
exists(Cfg::BinaryExprNode fmt |
|
|
fmt.getOp() instanceof Mod and
|
|
fmt.getRight() = this.asCfgNode() and
|
|
// detecting %-formatting is not super easy, so we simplify it to only handle
|
|
// when there is a **single** substitution going on.
|
|
not fmt.getLeft().getNode().(StringLiteral).getText().regexpMatch("^(?i)https?://%s[^%]*$")
|
|
)
|
|
or
|
|
// arguments to a format call
|
|
exists(DataFlow::MethodCallNode call, string httpPrefixRe |
|
|
httpPrefixRe = "^(?i)https?://(?:(\\{\\})|\\{([0-9]+)\\}|\\{([^0-9].*)\\}).*$"
|
|
|
|
|
call.getMethodName() = "format" and
|
|
(
|
|
if call.getObject().asExpr().(StringLiteral).getText().regexpMatch(httpPrefixRe)
|
|
then
|
|
exists(string text | text = call.getObject().asExpr().(StringLiteral).getText() |
|
|
// `http://{}...`
|
|
exists(text.regexpCapture(httpPrefixRe, 1)) and
|
|
this in [call.getArg(any(int i | i >= 1)), call.getArgByName(_)]
|
|
or
|
|
// `http://{123}...`
|
|
exists(int safeArgIndex | safeArgIndex = text.regexpCapture(httpPrefixRe, 2).toInt() |
|
|
this in [call.getArg(any(int i | i != safeArgIndex)), call.getArgByName(_)]
|
|
)
|
|
or
|
|
// `http://{abc}...`
|
|
exists(string safeArgName | safeArgName = text.regexpCapture(httpPrefixRe, 3) |
|
|
this in [call.getArg(_), call.getArgByName(any(string s | s != safeArgName))]
|
|
)
|
|
)
|
|
else this in [call.getArg(_), call.getArgByName(_)]
|
|
)
|
|
)
|
|
or
|
|
// f-string
|
|
exists(Fstring fstring |
|
|
if fstring.getValue(0).(StringLiteral).getText().toLowerCase() in ["http://", "https://"]
|
|
then fstring.getValue(any(int i | i >= 2)) = this.asExpr()
|
|
else fstring.getValue(any(int i | i >= 1)) = this.asExpr()
|
|
)
|
|
}
|
|
}
|
|
|
|
/** A validation that a string does not contain certain characters, considered as a sanitizer. */
|
|
private class StringRestrictionSanitizerGuard extends Sanitizer {
|
|
StringRestrictionSanitizerGuard() {
|
|
this = DataFlow::BarrierGuard<stringRestriction/3>::getABarrierNode()
|
|
}
|
|
}
|
|
|
|
private predicate stringRestriction(
|
|
DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch
|
|
) {
|
|
exists(DataFlow::MethodCallNode call, DataFlow::Node strNode |
|
|
call.asCfgNode() = g and strNode.asCfgNode() = node
|
|
|
|
|
branch = true and
|
|
call.calls(strNode,
|
|
["isalnum", "isalpha", "isdecimal", "isdigit", "isidentifier", "isnumeric", "isspace"])
|
|
or
|
|
branch = true and
|
|
call = API::moduleImport("re").getMember(["match", "fullmatch"]).getACall() and
|
|
strNode = [call.getArg(1), call.getArgByName("string")]
|
|
or
|
|
branch = true and
|
|
call =
|
|
API::moduleImport("re")
|
|
.getMember("compile")
|
|
.getReturn()
|
|
.getMember(["match", "fullmatch"])
|
|
.getACall() and
|
|
strNode = [call.getArg(0), call.getArgByName("string")]
|
|
)
|
|
}
|
|
|
|
private class ExternalRequestForgerySanitizer extends FullUrlControlSanitizer {
|
|
ExternalRequestForgerySanitizer() { ModelOutput::barrierNode(this, "request-forgery") }
|
|
}
|
|
}
|