Files
codeql/python/ql/test/library-tests/dataflow/coverage/argumentRoutingTest.ql
Copilot 4ed5722e3e Python: switch dataflow library to new (shared) CFG + SSA
Flips the Python dataflow trunk from the legacy CFG (semmle/python/Flow.qll)
and legacy ESSA SSA (semmle/python/essa/*) to the new shared CFG facade
(semmle.python.controlflow.internal.Cfg) and the new SSA adapter
(semmle.python.dataflow.new.internal.SsaImpl), both introduced
additively in the preceding PRs in this stack.

This is the trunk-flip equivalent of the original draft PR #21894 (kept
around as documentation), rebased on top of the four preparatory PRs:

  P1: Remove AstNode.getAFlowNode() and rewrite callers (#21919).
  P2: Qualify Flow.qll's AST references with Py:: prefix (#21920).
  P3: Add new shared-CFG-backed control flow graph (#21921).
  P4: Add new shared-SSA-backed SSA adapter (#21923).

The Python dataflow library (semmle/python/dataflow/new/) now imports
the new CFG facade and SSA adapter. All CFG-typed predicates
(ControlFlowNode, CallNode, BasicBlock, NameNode, AttrNode, ...) are
qualified with the Cfg:: prefix; SSA references switch from
EssaVariable/EssaDefinition to SsaImpl::Definition/SourceVariable.

GuardNode is redesigned to use the new CFG's outcome-node model
(isAfterTrue / isAfterFalse) instead of the legacy ConditionBlock +
flipped indirection. Only BarrierGuard<...> is preserved as public
API.

Framework files (Bottle, FastApi, Django, Tornado, Pyramid, Stdlib,
...) are updated to take CFG nodes from the new facade.

A handful of dataflow consistency tweaks for the new CFG:
- Augmented-assignment targets are treated as both load and store.
- 'from X import *' produces uncertain SSA writes for unknown names.
- CFG nodes are canonicalised so dataflow does not see equivalent
  pre/post-order pairs as distinct nodes.

Two AST tweaks for the new CFG:
- AstNodeImpl: omit PEP 695 type-parameter names from
  FunctionDefExpr / ClassDefExpr children.
- ImportResolution: drop the legacy essa import.

Test churn (~175 files): reblessed library- and query-test .expected
files reflect slightly different CFG granularity, different toString
output, and a handful of true alert deltas in security queries.

Verification: all 367 lib + src + consistency-queries compile clean.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-06-02 14:09:45 +00:00

136 lines
4.9 KiB
Plaintext

import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
import utils.test.dataflow.RoutingTest
module Argument1RoutingTest implements RoutingTestSig {
class Argument = Unit;
string flowTag(Argument arg) { result = "arg1" and exists(arg) }
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink, Argument arg) {
(
Argument1ExtraRoutingFlow::flow(source, sink)
or
ArgumentRoutingFlow::flow(source, sink) and
ArgumentRoutingConfig::isArgSource(source, 1) and
ArgumentRoutingConfig::isGoodSink(sink, 1)
) and
exists(arg)
}
}
class ArgNumber extends int {
ArgNumber() { this in [1 .. 7] }
}
module ArgumentRoutingConfig implements DataFlow::ConfigSig {
additional predicate isArgSource(DataFlow::Node node, ArgNumber argNumber) {
node.(DataFlow::CfgNode).getNode().(Cfg::NameNode).getId() = "arg" + argNumber
}
predicate isSource(DataFlow::Node node) { isArgSource(node, _) }
additional predicate isGoodSink(DataFlow::Node node, ArgNumber argNumber) {
exists(Cfg::CallNode call |
call.getFunction().(Cfg::NameNode).getId() = "SINK" + argNumber and
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}
additional predicate isBadSink(DataFlow::Node node, ArgNumber argNumber) {
exists(Cfg::CallNode call |
call.getFunction().(Cfg::NameNode).getId() = "SINK" + argNumber + "_F" and
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}
predicate isSink(DataFlow::Node node) { isGoodSink(node, _) or isBadSink(node, _) }
/**
* We want to be able to use `arg` in a sequence of calls such as `func(kw=arg); ... ; func(arg)`.
* Use-use flow lets the argument to the first call reach the sink inside the second call,
* making it seem like we handle all cases even if we only handle the last one.
* We make the test honest by preventing flow into source nodes.
*/
predicate isBarrierIn(DataFlow::Node node) { isSource(node) }
}
module ArgumentRoutingFlow = DataFlow::Global<ArgumentRoutingConfig>;
module Argument1ExtraRoutingConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) {
exists(SsaImpl::AssignmentDefinition def, DataFlow::CallCfgNode call |
def.getDefiningNode() = node.(DataFlow::CfgNode).getNode() and
def.getValue() = call.getNode() and
call.getFunction().asCfgNode().(Cfg::NameNode).getId().matches("With\\_%")
) and
node.(DataFlow::CfgNode).getNode().(Cfg::NameNode).getId().matches("with\\_%")
}
predicate isSink(DataFlow::Node node) {
exists(Cfg::CallNode call |
call.getFunction().(Cfg::NameNode).getId() = "SINK1" and
node.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}
/**
* We want to be able to use `arg` in a sequence of calls such as `func(kw=arg); ... ; func(arg)`.
* Use-use flow lets the argument to the first call reach the sink inside the second call,
* making it seem like we handle all cases even if we only handle the last one.
* We make the test honest by preventing flow into source nodes.
*/
predicate isBarrierIn(DataFlow::Node node) { isSource(node) }
}
module Argument1ExtraRoutingFlow = DataFlow::Global<Argument1ExtraRoutingConfig>;
module RestArgumentRoutingTest implements RoutingTestSig {
class Argument = ArgNumber;
string flowTag(Argument arg) { result = "arg" + arg }
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink, Argument arg) {
ArgumentRoutingFlow::flow(source, sink) and
ArgumentRoutingConfig::isArgSource(source, arg) and
ArgumentRoutingConfig::isGoodSink(sink, arg) and
arg > 1
}
}
/** Bad flow from `arg<n>` to `SINK<N>_F` */
module BadArgumentRoutingTestSinkF implements RoutingTestSig {
class Argument = ArgNumber;
string flowTag(Argument arg) { result = "bad" + arg }
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink, Argument arg) {
ArgumentRoutingFlow::flow(source, sink) and
ArgumentRoutingConfig::isArgSource(source, arg) and
ArgumentRoutingConfig::isBadSink(sink, arg)
}
}
/** Bad flow from `arg<n>` to `SINK<M>` or `SINK<M>_F`, where `n != m`. */
module BadArgumentRoutingTestWrongSink implements RoutingTestSig {
class Argument = ArgNumber;
string flowTag(Argument arg) { result = "bad" + arg }
predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink, Argument arg) {
ArgumentRoutingFlow::flow(source, sink) and
ArgumentRoutingConfig::isArgSource(source, any(ArgNumber i | not i = arg)) and
(
ArgumentRoutingConfig::isGoodSink(sink, arg)
or
ArgumentRoutingConfig::isBadSink(sink, arg)
)
}
}
import MakeTest<MergeTests4<MakeTestSig<Argument1RoutingTest>, MakeTestSig<RestArgumentRoutingTest>,
MakeTestSig<BadArgumentRoutingTestSinkF>, MakeTestSig<BadArgumentRoutingTestWrongSink>>>