mirror of
https://github.com/github/codeql.git
synced 2026-06-23 21:57:01 +02:00
Flips the Python dataflow trunk from the legacy CFG (semmle/python/Flow.qll) and legacy ESSA SSA (semmle/python/essa/*) to the new shared CFG facade (semmle.python.controlflow.internal.Cfg) and the new SSA adapter (semmle.python.dataflow.new.internal.SsaImpl), both introduced additively in the preceding PRs in this stack. This is the trunk-flip equivalent of the original draft PR #21894 (kept around as documentation), rebased on top of the four preparatory PRs: P1: Remove AstNode.getAFlowNode() and rewrite callers (#21919). P2: Qualify Flow.qll's AST references with Py:: prefix (#21920). P3: Add new shared-CFG-backed control flow graph (#21921). P4: Add new shared-SSA-backed SSA adapter (#21923). The Python dataflow library (semmle/python/dataflow/new/) now imports the new CFG facade and SSA adapter. All CFG-typed predicates (ControlFlowNode, CallNode, BasicBlock, NameNode, AttrNode, ...) are qualified with the Cfg:: prefix; SSA references switch from EssaVariable/EssaDefinition to SsaImpl::Definition/SourceVariable. GuardNode is redesigned to use the new CFG's outcome-node model (isAfterTrue / isAfterFalse) instead of the legacy ConditionBlock + flipped indirection. Only BarrierGuard<...> is preserved as public API. Framework files (Bottle, FastApi, Django, Tornado, Pyramid, Stdlib, ...) are updated to take CFG nodes from the new facade. A handful of dataflow consistency tweaks for the new CFG: - Augmented-assignment targets are treated as both load and store. - 'from X import *' produces uncertain SSA writes for unknown names. - CFG nodes are canonicalised so dataflow does not see equivalent pre/post-order pairs as distinct nodes. Two AST tweaks for the new CFG: - AstNodeImpl: omit PEP 695 type-parameter names from FunctionDefExpr / ClassDefExpr children. - ImportResolution: drop the legacy essa import. Test churn (~175 files): reblessed library- and query-test .expected files reflect slightly different CFG granularity, different toString output, and a handful of true alert deltas in security queries. Verification: all 367 lib + src + consistency-queries compile clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
142 lines
5.1 KiB
Plaintext
142 lines
5.1 KiB
Plaintext
/**
|
|
* Provides classes modeling security-relevant aspects of the `yarl` PyPI package.
|
|
* See https://yarl.readthedocs.io/en/stable/.
|
|
*/
|
|
|
|
private import python
|
|
private import semmle.python.controlflow.internal.Cfg as Cfg
|
|
private import semmle.python.dataflow.new.DataFlow
|
|
private import semmle.python.dataflow.new.TaintTracking
|
|
private import semmle.python.Concepts
|
|
private import semmle.python.ApiGraphs
|
|
private import semmle.python.frameworks.Multidict
|
|
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
|
|
private import semmle.python.security.dataflow.UrlRedirectCustomizations
|
|
|
|
/**
|
|
* INTERNAL: Do not use.
|
|
*
|
|
* Provides models for the `yarl` PyPI package.
|
|
* See https://multidict.readthedocs.io/en/stable/.
|
|
*/
|
|
module Yarl {
|
|
/**
|
|
* Provides models for a the `yarl.URL` class:
|
|
*
|
|
* See https://yarl.readthedocs.io/en/stable/api.html#yarl.URL
|
|
*/
|
|
module Url {
|
|
/**
|
|
* A source of instances of `yarl.URL`, extend this class to model new instances.
|
|
*
|
|
* This can include instantiations of the class, return values from function
|
|
* calls, or a special parameter that will be set when functions are called by an external
|
|
* library.
|
|
*
|
|
* Use `Url::instance()` predicate to get references to instances of `yarl.URL`.
|
|
*/
|
|
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
|
|
|
|
/** A direct instantiation of `yarl.URL`. */
|
|
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
|
|
ClassInstantiation() { this = API::moduleImport("yarl").getMember("URL").getACall() }
|
|
}
|
|
|
|
/** Gets a reference to an instance of `yarl.URL`. */
|
|
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
|
|
t.start() and
|
|
result instanceof InstanceSource
|
|
or
|
|
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
|
|
}
|
|
|
|
/** Gets a reference to an instance of `yarl.URL`. */
|
|
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
|
|
|
|
/**
|
|
* Taint propagation for `yarl.URL`.
|
|
*/
|
|
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
|
|
InstanceTaintSteps() { this = "yarl.URL" }
|
|
|
|
override DataFlow::Node getInstance() { result = instance() }
|
|
|
|
override string getAttributeName() {
|
|
result in [
|
|
"user", "raw_user", "password", "raw_password", "host", "raw_host", "port",
|
|
"explicit_port", "authority", "raw_authority", "path", "raw_path", "path_qs",
|
|
"raw_path_qs", "query_string", "raw_query_string", "fragment", "raw_fragment", "parts",
|
|
"raw_parts", "name", "raw_name", "query"
|
|
]
|
|
}
|
|
|
|
override string getMethodName() { result = "human_repr" }
|
|
|
|
override string getAsyncMethodName() { none() }
|
|
}
|
|
|
|
/**
|
|
* Extra taint propagation for `yarl.URL`, not covered by `InstanceTaintSteps`.
|
|
*/
|
|
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
|
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
|
// class instantiation
|
|
exists(ClassInstantiation call |
|
|
nodeFrom in [call.getArg(0), call.getArgByName("val")] and
|
|
nodeTo = call
|
|
)
|
|
or
|
|
// methods that give an altered URL. taint both from object, and form argument
|
|
// (to result of call)
|
|
exists(DataFlow::MethodCallNode call |
|
|
call.calls(instance(),
|
|
[
|
|
"with_scheme", "with_user", "with_password", "with_host", "with_port", "with_path",
|
|
"with_query", "with_query", "update_query", "update_query", "with_fragment",
|
|
"with_name",
|
|
// join is a bit different, but is still correct to add here :+1:
|
|
"join"
|
|
]) and
|
|
nodeTo = call and
|
|
nodeFrom in [call.getObject(), call.getArg(_), call.getArgByName(_)]
|
|
)
|
|
}
|
|
}
|
|
|
|
/** An attribute read on a `yarl.URL` that is a `MultiDictProxy` instance. */
|
|
class YarlUrlMultiDictProxyInstance extends Multidict::MultiDictProxy::InstanceSource {
|
|
YarlUrlMultiDictProxyInstance() {
|
|
this.(DataFlow::AttrRead).getObject() = Yarl::Url::instance() and
|
|
this.(DataFlow::AttrRead).getAttributeName() = "query"
|
|
}
|
|
}
|
|
|
|
private predicate yarlUrlIsAbsoluteCall(
|
|
DataFlow::GuardNode g, Cfg::ControlFlowNode node, boolean branch
|
|
) {
|
|
exists(ClassInstantiation instance, DataFlow::MethodCallNode call |
|
|
call.calls(instance, "is_absolute") and
|
|
g = call.asCfgNode() and
|
|
node = instance.getArg(0).asCfgNode() and
|
|
branch = false
|
|
)
|
|
}
|
|
|
|
/**
|
|
* A call to `yarl.URL.is_absolute`, considered as a sanitizer-guard for URL redirection.
|
|
*
|
|
* See https://yarl.aio-libs.org/en/latest/api/#absolute-and-relative-urls.
|
|
*/
|
|
private class YarlIsAbsoluteUrl extends UrlRedirect::Sanitizer {
|
|
YarlIsAbsoluteUrl() {
|
|
this = DataFlow::BarrierGuard<yarlUrlIsAbsoluteCall/3>::getABarrierNode()
|
|
}
|
|
|
|
override predicate sanitizes(UrlRedirect::FlowState state) {
|
|
// `is_absolute` does not handle backslashes
|
|
state instanceof UrlRedirect::NoBackslashes
|
|
}
|
|
}
|
|
}
|
|
}
|