Files
codeql/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll
yoff ca2d5e21d7 Python: switch dataflow library to new (shared) CFG + SSA
Flips the Python dataflow trunk from the legacy CFG (semmle/python/Flow.qll)
and legacy ESSA SSA (semmle/python/essa/*) to the new shared CFG facade
(semmle.python.controlflow.internal.Cfg) and the new SSA adapter
(semmle.python.dataflow.new.internal.SsaImpl), both introduced
additively in the preceding PRs in this stack.

This is the trunk-flip equivalent of the original draft PR #21894 (kept
around as documentation), rebased on top of the four preparatory PRs:

  P1: Remove AstNode.getAFlowNode() and rewrite callers (#21919).
  P2: Qualify Flow.qll's AST references with Py:: prefix (#21920).
  P3: Add new shared-CFG-backed control flow graph (#21921).
  P4: Add new shared-SSA-backed SSA adapter (#21923).

The Python dataflow library (semmle/python/dataflow/new/) now imports
the new CFG facade and SSA adapter. All CFG-typed predicates
(ControlFlowNode, CallNode, BasicBlock, NameNode, AttrNode, ...) are
qualified with the Cfg:: prefix; SSA references switch from
EssaVariable/EssaDefinition to SsaImpl::Definition/SourceVariable.

GuardNode is redesigned to use the new CFG's outcome-node model
(isAfterTrue / isAfterFalse) instead of the legacy ConditionBlock +
flipped indirection. Only BarrierGuard<...> is preserved as public
API.

Framework files (Bottle, FastApi, Django, Tornado, Pyramid, Stdlib,
...) are updated to take CFG nodes from the new facade.

A handful of dataflow consistency tweaks for the new CFG:
- Augmented-assignment targets are treated as both load and store.
- 'from X import *' produces uncertain SSA writes for unknown names.
- CFG nodes are canonicalised so dataflow does not see equivalent
  pre/post-order pairs as distinct nodes.

Two AST tweaks for the new CFG:
- AstNodeImpl: omit PEP 695 type-parameter names from
  FunctionDefExpr / ClassDefExpr children.
- ImportResolution: drop the legacy essa import.

Test churn (~175 files): reblessed library- and query-test .expected
files reflect slightly different CFG granularity, different toString
output, and a handful of true alert deltas in security queries.

Verification: all 367 lib + src + consistency-queries compile clean.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-06-30 15:22:54 +00:00

188 lines
7.0 KiB
Plaintext

/**
* Provides default sources, sinks and sanitizers for detecting
* "Server-side request forgery"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.data.internal.ApiGraphModels
private import semmle.python.controlflow.internal.Cfg as Cfg
/**
* Provides default sources, sinks and sanitizers for detecting
* "Server-side request forgery"
* vulnerabilities, as well as extension points for adding your own.
*/
module ServerSideRequestForgery {
/**
* A data flow source for "Server-side request forgery" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "Server-side request forgery" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node {
/**
* Gets the request this sink belongs to.
*/
abstract Http::Client::Request getRequest();
}
/**
* A sanitizer for "Server-side request forgery" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A sanitizer for "Server-side request forgery" vulnerabilities,
* that ensures the attacker does not have full control of the URL. (that is, might
* still be able to control path or query parameters).
*/
abstract class FullUrlControlSanitizer extends DataFlow::Node { }
/**
* DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
*/
deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource;
/**
* An active threat-model source, considered as a flow source.
*/
private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { }
/** The URL of an HTTP request, considered as a sink. */
class HttpRequestUrlAsSink extends Sink {
Http::Client::Request req;
HttpRequestUrlAsSink() {
req.getAUrlPart() = this and
// if we extract the stdlib code for HTTPConnection, we will also find calls that
// make requests within the HTTPConnection implementation -- for example the
// `request` method calls the `_send_request` method internally. So without this
// extra bit of code, we would give alerts within the HTTPConnection
// implementation as well, which is just annoying.
//
// Notice that we're excluding based on the request location, and not the URL part
// location, since the URL part would be in user code for the scenario above.
//
// See comment for command injection sinks for more details.
not req.getScope().getEnclosingModule().getName() in ["http.client", "httplib"]
}
override Http::Client::Request getRequest() { result = req }
}
/**
* A comparison with a constant, considered as a sanitizer-guard.
*/
class ConstCompareAsSanitizerGuard extends Sanitizer, ConstCompareBarrier { }
/** DEPRECATED: Use ConstCompareAsSanitizerGuard instead. */
deprecated class StringConstCompareAsSanitizerGuard = ConstCompareAsSanitizerGuard;
/**
* A string construction (concat, format, f-string) where the left side is not
* user-controlled.
*
* For all of these cases, we try to allow `http://` or `https://` on the left side
* since that will still allow full URL control.
*/
class StringConstructionAsFullUrlControlSanitizer extends FullUrlControlSanitizer {
StringConstructionAsFullUrlControlSanitizer() {
// string concat
exists(Cfg::BinaryExprNode add |
add.getOp() instanceof Add and
add.getRight() = this.asCfgNode() and
not add.getLeft().getNode().(StringLiteral).getText().toLowerCase() in [
"http://", "https://"
]
)
or
// % formatting
exists(Cfg::BinaryExprNode fmt |
fmt.getOp() instanceof Mod and
fmt.getRight() = this.asCfgNode() and
// detecting %-formatting is not super easy, so we simplify it to only handle
// when there is a **single** substitution going on.
not fmt.getLeft().getNode().(StringLiteral).getText().regexpMatch("^(?i)https?://%s[^%]*$")
)
or
// arguments to a format call
exists(DataFlow::MethodCallNode call, string httpPrefixRe |
httpPrefixRe = "^(?i)https?://(?:(\\{\\})|\\{([0-9]+)\\}|\\{([^0-9].*)\\}).*$"
|
call.getMethodName() = "format" and
(
if call.getObject().asExpr().(StringLiteral).getText().regexpMatch(httpPrefixRe)
then
exists(string text | text = call.getObject().asExpr().(StringLiteral).getText() |
// `http://{}...`
exists(text.regexpCapture(httpPrefixRe, 1)) and
this in [call.getArg(any(int i | i >= 1)), call.getArgByName(_)]
or
// `http://{123}...`
exists(int safeArgIndex | safeArgIndex = text.regexpCapture(httpPrefixRe, 2).toInt() |
this in [call.getArg(any(int i | i != safeArgIndex)), call.getArgByName(_)]
)
or
// `http://{abc}...`
exists(string safeArgName | safeArgName = text.regexpCapture(httpPrefixRe, 3) |
this in [call.getArg(_), call.getArgByName(any(string s | s != safeArgName))]
)
)
else this in [call.getArg(_), call.getArgByName(_)]
)
)
or
// f-string
exists(Fstring fstring |
if fstring.getValue(0).(StringLiteral).getText().toLowerCase() in ["http://", "https://"]
then fstring.getValue(any(int i | i >= 2)) = this.asExpr()
else fstring.getValue(any(int i | i >= 1)) = this.asExpr()
)
}
}
/** A validation that a string does not contain certain characters, considered as a sanitizer. */
private class StringRestrictionSanitizerGuard extends Sanitizer {
StringRestrictionSanitizerGuard() {
this = DataFlow::BarrierGuard<stringRestriction/3>::getABarrierNode()
}
}
private predicate stringRestriction(
DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch
) {
exists(DataFlow::MethodCallNode call, DataFlow::Node strNode |
call.asCfgNode() = g and strNode.asCfgNode() = node
|
branch = true and
call.calls(strNode,
["isalnum", "isalpha", "isdecimal", "isdigit", "isidentifier", "isnumeric", "isspace"])
or
branch = true and
call = API::moduleImport("re").getMember(["match", "fullmatch"]).getACall() and
strNode = [call.getArg(1), call.getArgByName("string")]
or
branch = true and
call =
API::moduleImport("re")
.getMember("compile")
.getReturn()
.getMember(["match", "fullmatch"])
.getACall() and
strNode = [call.getArg(0), call.getArgByName("string")]
)
}
private class ExternalRequestForgerySanitizer extends FullUrlControlSanitizer {
ExternalRequestForgerySanitizer() { ModelOutput::barrierNode(this, "request-forgery") }
}
}