Files
codeql/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll
Copilot 4ed5722e3e Python: switch dataflow library to new (shared) CFG + SSA
Flips the Python dataflow trunk from the legacy CFG (semmle/python/Flow.qll)
and legacy ESSA SSA (semmle/python/essa/*) to the new shared CFG facade
(semmle.python.controlflow.internal.Cfg) and the new SSA adapter
(semmle.python.dataflow.new.internal.SsaImpl), both introduced
additively in the preceding PRs in this stack.

This is the trunk-flip equivalent of the original draft PR #21894 (kept
around as documentation), rebased on top of the four preparatory PRs:

  P1: Remove AstNode.getAFlowNode() and rewrite callers (#21919).
  P2: Qualify Flow.qll's AST references with Py:: prefix (#21920).
  P3: Add new shared-CFG-backed control flow graph (#21921).
  P4: Add new shared-SSA-backed SSA adapter (#21923).

The Python dataflow library (semmle/python/dataflow/new/) now imports
the new CFG facade and SSA adapter. All CFG-typed predicates
(ControlFlowNode, CallNode, BasicBlock, NameNode, AttrNode, ...) are
qualified with the Cfg:: prefix; SSA references switch from
EssaVariable/EssaDefinition to SsaImpl::Definition/SourceVariable.

GuardNode is redesigned to use the new CFG's outcome-node model
(isAfterTrue / isAfterFalse) instead of the legacy ConditionBlock +
flipped indirection. Only BarrierGuard<...> is preserved as public
API.

Framework files (Bottle, FastApi, Django, Tornado, Pyramid, Stdlib,
...) are updated to take CFG nodes from the new facade.

A handful of dataflow consistency tweaks for the new CFG:
- Augmented-assignment targets are treated as both load and store.
- 'from X import *' produces uncertain SSA writes for unknown names.
- CFG nodes are canonicalised so dataflow does not see equivalent
  pre/post-order pairs as distinct nodes.

Two AST tweaks for the new CFG:
- AstNodeImpl: omit PEP 695 type-parameter names from
  FunctionDefExpr / ClassDefExpr children.
- ImportResolution: drop the legacy essa import.

Test churn (~175 files): reblessed library- and query-test .expected
files reflect slightly different CFG granularity, different toString
output, and a handful of true alert deltas in security queries.

Verification: all 367 lib + src + consistency-queries compile clean.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-06-02 14:09:45 +00:00

224 lines
7.6 KiB
Plaintext

/** Provides logic related to captured variables. */
overlay[local]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl
private import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl
private import DataFlowPublic
private import semmle.python.dataflow.new.internal.DataFlowPrivate
private import codeql.dataflow.VariableCapture as Shared
// Note: The Javascript implementation (on the branch https://github.com/github/codeql/pull/14412)
// had some tweaks related to performance. See these two commits:
// - JS: Capture flow: https://github.com/github/codeql/pull/14412/commits/7bcf8b858babfea0a3e36ce61145954c249e13ac
// - JS: Disallow consecutive captured contents: https://github.com/github/codeql/pull/14412/commits/46e4cdc6232604ea7f58138a336d5a222fad8567
// The first is the main implementation, the second is a performance motivated restriction.
// The restriction is to clear any `CapturedVariableContent` before writing a new one
// to avoid long access paths (see the link for a nice explanation).
private module CaptureInput implements Shared::InputSig<Location, CfgImpl::BasicBlock> {
private import python as PY
additional class ExprCfgNode extends Cfg::ControlFlowNode {
ExprCfgNode() { isExpressionNode(this) }
}
class Callable extends Scope {
predicate isConstructor() { none() }
}
Callable basicBlockGetEnclosingCallable(CfgImpl::BasicBlock bb) {
result = bb.getEnclosingCallable().asScope()
}
class CapturedVariable extends LocalVariable {
Function f;
CapturedVariable() {
// note: captured variables originating on module scope is currently
// covered by global variable handling.
this.getScope() = f and
this.getAnAccess().getScope() != f
}
Callable getCallable() { result = f }
Location getLocation() { result = f.getLocation() }
/** Gets a scope that captures this variable. */
Scope getACapturingScope() {
result = this.getAnAccess().getScope().getScope*() and
result.getScope+() = f
}
}
class CapturedParameter extends CapturedVariable {
CapturedParameter() { this.isParameter() }
Cfg::ControlFlowNode getCfgNode() { result.getNode().(Parameter) = this.getAnAccess() }
}
class Expr extends ExprCfgNode {
predicate hasCfgNode(CfgImpl::BasicBlock bb, int i) { this = bb.getNode(i) }
}
class VariableWrite extends Cfg::ControlFlowNode {
CapturedVariable v;
VariableWrite() {
exists(Cfg::DefinitionNode d | d.getNode() = v.getAStore() | this = d.getValue())
}
CapturedVariable getVariable() { result = v }
predicate hasCfgNode(CfgImpl::BasicBlock bb, int i) { this = bb.getNode(i) }
}
class VariableRead extends Expr {
CapturedVariable v;
VariableRead() { this.getNode() = v.getALoad() }
CapturedVariable getVariable() { result = v }
}
private predicate closureFlowStep(ExprCfgNode nodeFrom, ExprCfgNode nodeTo) {
// TODO: Other languages have an extra case here looking like
// simpleAstFlowStep(nodeFrom, nodeTo)
// we should investigate the potential benefit of adding that.
exists(SsaImpl::EssaVariable def |
def.getAUse() = nodeTo and
def.getAnUltimateDefinition()
.getDefinition()
.(SsaImpl::EssaNodeDefinition)
.getDefiningNode()
.(Cfg::DefinitionNode)
.getValue() = nodeFrom
)
}
class ClosureExpr extends Expr {
ClosureExpr() {
this.getNode() instanceof CallableExpr
or
this.getNode() instanceof Comp
}
predicate hasBody(Callable body) {
body = this.getNode().(CallableExpr).getInnerScope()
or
body = this.getNode().(Comp).getFunction()
}
predicate hasAliasedAccess(Expr f) { closureFlowStep+(this, f) and not closureFlowStep(f, _) }
}
}
class CapturedVariable = CaptureInput::CapturedVariable;
class ClosureExpr = CaptureInput::ClosureExpr;
module Flow = Shared::Flow<Location, Cfg::CfgSigImpl, CaptureInput>;
private Flow::ClosureNode asClosureNode(Node n) {
result = n.(SynthCaptureNode).getSynthesizedCaptureNode()
or
exists(Comp comp | n = TSynthCompCapturedVariablesArgumentNode(comp) |
result.(Flow::ExprNode).getExpr().getNode() = comp
)
or
// For captured variable argument nodes (and their post-update variants), we use the closure node
// for the underlying node.
result = asClosureNode(n.(SynthCapturedVariablesArgumentNode).getUnderlyingNode())
or
result = asClosureNode(n.(SynthCapturedVariablesArgumentPostUpdateNode).getUnderlyingNode())
or
// TODO: Should the `Comp`s above be excluded here?
result.(Flow::ExprNode).getExpr() = n.(CfgNode).getNode()
or
result.(Flow::VariableWriteSourceNode).getVariableWrite() = n.(CfgNode).getNode()
or
result.(Flow::ExprPostUpdateNode).getExpr() =
n.(PostUpdateNode).getPreUpdateNode().(CfgNode).getNode()
or
result.(Flow::ParameterNode).getParameter().getCfgNode() = n.(CfgNode).getNode()
or
result.(Flow::ThisParameterNode).getCallable() =
n.(SynthCapturedVariablesParameterNode).getCallable()
}
predicate storeStep(Node nodeFrom, CapturedVariableContent c, Node nodeTo) {
Flow::storeStep(asClosureNode(nodeFrom), c.getVariable(), asClosureNode(nodeTo))
}
predicate readStep(Node nodeFrom, CapturedVariableContent c, Node nodeTo) {
Flow::readStep(asClosureNode(nodeFrom), c.getVariable(), asClosureNode(nodeTo))
}
predicate clearsContent(Node node, CapturedVariableContent c) {
Flow::clearsContent(asClosureNode(node), c.getVariable())
}
predicate valueStep(Node nodeFrom, Node nodeTo) {
Flow::localFlowStep(asClosureNode(nodeFrom), asClosureNode(nodeTo))
}
/**
* Provides predicates to understand the behavior of the variable capture
* library instantiation on Python code bases.
*
* The predicates in here are meant to be run by quick-eval on databases of
* interest. The `unmapped*`-predicates should ideally be empty.
*/
private module Debug {
predicate flowStoreStep(
Node nodeFrom, Flow::ClosureNode closureNodeFrom, CapturedVariable v,
Flow::ClosureNode closureNodeTo, Node nodeTo
) {
closureNodeFrom = asClosureNode(nodeFrom) and
closureNodeTo = asClosureNode(nodeTo) and
Flow::storeStep(closureNodeFrom, v, closureNodeTo)
}
predicate unmappedFlowStoreStep(
Flow::ClosureNode closureNodeFrom, CapturedVariable v, Flow::ClosureNode closureNodeTo
) {
Flow::storeStep(closureNodeFrom, v, closureNodeTo) and
not flowStoreStep(_, closureNodeFrom, v, closureNodeTo, _)
}
predicate flowReadStep(
Node nodeFrom, Flow::ClosureNode closureNodeFrom, CapturedVariable v,
Flow::ClosureNode closureNodeTo, Node nodeTo
) {
closureNodeFrom = asClosureNode(nodeFrom) and
closureNodeTo = asClosureNode(nodeTo) and
Flow::readStep(closureNodeFrom, v, closureNodeTo)
}
predicate unmappedFlowReadStep(
Flow::ClosureNode closureNodeFrom, CapturedVariable v, Flow::ClosureNode closureNodeTo
) {
Flow::readStep(closureNodeFrom, v, closureNodeTo) and
not flowReadStep(_, closureNodeFrom, v, closureNodeTo, _)
}
predicate flowValueStep(
Node nodeFrom, Flow::ClosureNode closureNodeFrom, Flow::ClosureNode closureNodeTo, Node nodeTo
) {
closureNodeFrom = asClosureNode(nodeFrom) and
closureNodeTo = asClosureNode(nodeTo) and
Flow::localFlowStep(closureNodeFrom, closureNodeTo)
}
predicate unmappedFlowValueStep(Flow::ClosureNode closureNodeFrom, Flow::ClosureNode closureNodeTo) {
Flow::localFlowStep(closureNodeFrom, closureNodeTo) and
not flowValueStep(_, closureNodeFrom, closureNodeTo, _)
}
predicate unmappedFlowClosureNode(Flow::ClosureNode closureNode) {
not closureNode = asClosureNode(_)
}
}