Python: deprecate AstNode.getAFlowNode() and rewrite internal callers

Preparatory refactor for the shared-CFG dataflow migration.

Deprecates the AstNode.getAFlowNode() cached predicate on the public
Python QL API and rewrites all ~140 internal callers across lib/, src/,
test/, and tools/ from `expr.getAFlowNode() = cfgNode` to
`cfgNode.getNode() = expr`, using ControlFlowNode.getNode() which
already exists in Flow.qll.

The predicate itself is preserved (with a deprecation note pointing at
the new pattern) so external users do not experience churn — they can
migrate at their own pace and the AST/CFG hierarchies still get the
intended untangling once the deprecation eventually elapses.

Semantic noop verified by:
- All 361 lib/ + src/ queries compile clean.
- All 122 ControlFlow + PointsTo library-tests pass.
- All 64 dataflow library-tests pass.
- All 113 Variables/Exceptions/Expressions/Statements/Functions/Imports/
  Security/CWE-798/ModificationOfParameterWithDefault query-tests pass.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Copilot
2026-06-01 10:53:39 +00:00
committed by yoff
parent 8179bffe64
commit 717ff62d70
68 changed files with 273 additions and 181 deletions

View File

@@ -133,7 +133,11 @@ class ListComprehensionDeclaration extends ListComp {
major_version() = 2 and
this.getIterationVariable(_).getId() = result.getId() and
result.getScope() = this.getScope() and
this.getAFlowNode().strictlyReaches(result.getAFlowNode()) and
exists(ControlFlowNode thisCfg, ControlFlowNode resultCfg |
thisCfg.getNode() = this and resultCfg.getNode() = result
|
thisCfg.strictlyReaches(resultCfg)
) and
result.isUse()
}

View File

@@ -13,18 +13,21 @@
import python
import Definition
from ListComprehensionDeclaration l, Name use, Name defn
from
ListComprehensionDeclaration l, Name use, Name defn, ControlFlowNode lCfg, ControlFlowNode useCfg
where
use = l.getALeakedVariableUse() and
defn = l.getDefinition() and
l.getAFlowNode().strictlyReaches(use.getAFlowNode()) and
lCfg.getNode() = l and
useCfg.getNode() = use and
lCfg.strictlyReaches(useCfg) and
/* Make sure we aren't in a loop, as the variable may be redefined */
not use.getAFlowNode().strictlyReaches(l.getAFlowNode()) and
not useCfg.strictlyReaches(lCfg) and
not l.contains(use) and
not use.deletes(_) and
not exists(SsaVariable v |
v.getAUse() = use.getAFlowNode() and
not v.getDefinition().strictlyDominates(l.getAFlowNode())
v.getAUse() = useCfg and
not v.getDefinition().strictlyDominates(lCfg)
)
select use,
use.getId() + " may have a different value in Python 3, as the $@ will not be in scope.", defn,

View File

@@ -26,8 +26,11 @@ private Stmt loop_probably_defines(Variable v) {
/** Holds if the variable used by `use` is probably defined in a loop */
predicate probably_defined_in_loop(Name use) {
exists(Stmt loop | loop = loop_probably_defines(use.getVariable()) |
loop.getAFlowNode().strictlyReaches(use.getAFlowNode())
exists(Stmt loop, ControlFlowNode loopCfg, ControlFlowNode useCfg |
loop = loop_probably_defines(use.getVariable()) and
loopCfg.getNode() = loop and
useCfg.getNode() = use and
loopCfg.strictlyReaches(useCfg)
)
}

View File

@@ -24,8 +24,8 @@ predicate multiply_defined(AstNode asgn1, AstNode asgn2, Variable v) {
forex(Definition def, Definition redef |
def.getVariable() = v and
def = asgn1.getAFlowNode() and
redef = asgn2.getAFlowNode()
def.getNode() = asgn1 and
redef.getNode() = asgn2
|
def.isUnused() and
def.getARedef() = redef and

View File

@@ -88,7 +88,9 @@ predicate implicit_repeat(For f) {
* E.g. gets `x` from `{ y for y in x }`.
*/
ControlFlowNode get_comp_iterable(For f) {
exists(Comp c | c.getFunction().getStmt(0) = f | c.getAFlowNode().getAPredecessor() = result)
exists(Comp c, ControlFlowNode cCfg |
c.getFunction().getStmt(0) = f and cCfg.getNode() = c and cCfg.getAPredecessor() = result
)
}
from For f, Variable v, string msg

View File

@@ -19,9 +19,10 @@ private predicate loop_entry_variables(EssaVariable pred, EssaVariable succ) {
private predicate loop_entry_edge(BasicBlock pred, BasicBlock loop) {
pred = loop.getAPredecessor() and
pred = loop.getImmediateDominator() and
exists(Stmt s |
exists(Stmt s, ControlFlowNode sCfg |
loop_probably_executes_at_least_once(s) and
s.getAFlowNode().getBasicBlock() = loop
sCfg.getNode() = s and
sCfg.getBasicBlock() = loop
)
}

View File

@@ -27,7 +27,7 @@ predicate guarded_against_name_error(Name u) {
|
globals.getFunc().(Name).getId() = "globals" and
guard.controls(controlled, _) and
controlled.contains(u.getAFlowNode())
exists(ControlFlowNode uCfg | uCfg.getNode() = u | controlled.contains(uCfg))
)
}
@@ -101,18 +101,18 @@ predicate undefined_use(Name u) {
}
private predicate first_use_in_a_block(Name use) {
exists(GlobalVariable v, BasicBlock b, int i |
i = min(int j | b.getNode(j).getNode() = v.getALoad()) and b.getNode(i) = use.getAFlowNode()
exists(GlobalVariable v, BasicBlock b, int i, ControlFlowNode useCfg | useCfg.getNode() = use |
i = min(int j | b.getNode(j).getNode() = v.getALoad()) and b.getNode(i) = useCfg
)
}
predicate first_undefined_use(Name use) {
undefined_use(use) and
exists(GlobalVariable v | v.getALoad() = use |
exists(GlobalVariable v, ControlFlowNode useCfg | v.getALoad() = use and useCfg.getNode() = use |
first_use_in_a_block(use) and
not exists(ControlFlowNode other |
other.getNode() = v.getALoad() and
other.getBasicBlock().strictlyDominates(use.getAFlowNode().getBasicBlock())
other.getBasicBlock().strictlyDominates(useCfg.getBasicBlock())
)
)
}

View File

@@ -18,8 +18,8 @@ private import semmle.python.types.ImportTime
/* Local variable part */
predicate initialized_as_local(PlaceHolder use) {
exists(SsaVariableWithPointsTo l, Function f |
f = use.getScope() and l.getAUse() = use.getAFlowNode()
exists(SsaVariableWithPointsTo l, Function f, ControlFlowNode useCfg |
f = use.getScope() and useCfg.getNode() = use and l.getAUse() = useCfg
|
l.getVariable() instanceof LocalVariable and
not l.maybeUndefined()

View File

@@ -54,7 +54,7 @@ predicate unused_global(Name unused, GlobalVariable v) {
u.uses(v)
|
// That is reachable from this definition, directly
defn.strictlyReaches(u.getAFlowNode())
exists(ControlFlowNode uCfg | uCfg.getNode() = u | defn.strictlyReaches(uCfg))
or
// indirectly
defn.getBasicBlock().reachesExit() and u.getScope() != unused.getScope()