Python: deprecate AstNode.getAFlowNode() and rewrite internal callers

Preparatory refactor for the shared-CFG dataflow migration.

Deprecates the AstNode.getAFlowNode() cached predicate on the public
Python QL API and rewrites all ~140 internal callers across lib/, src/,
test/, and tools/ from `expr.getAFlowNode() = cfgNode` to
`cfgNode.getNode() = expr`, using ControlFlowNode.getNode() which
already exists in Flow.qll.

The predicate itself is preserved (with a deprecation note pointing at
the new pattern) so external users do not experience churn — they can
migrate at their own pace and the AST/CFG hierarchies still get the
intended untangling once the deprecation eventually elapses.

Semantic noop verified by:
- All 361 lib/ + src/ queries compile clean.
- All 122 ControlFlow + PointsTo library-tests pass.
- All 64 dataflow library-tests pass.
- All 113 Variables/Exceptions/Expressions/Statements/Functions/Imports/
  Security/CWE-798/ModificationOfParameterWithDefault query-tests pass.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Copilot
2026-06-01 10:53:39 +00:00
committed by yoff
parent 7a3f546587
commit db1e5035b4
68 changed files with 274 additions and 198 deletions

View File

@@ -48,9 +48,11 @@ class CheckClass extends ClassObject {
self_dict = sub.getObject()
or
/* Indirect assignment via temporary variable */
exists(SsaVariable v |
v.getAUse() = sub.getObject().getAFlowNode() and
v.getDefinition().(DefinitionNode).getValue() = self_dict.getAFlowNode()
exists(SsaVariable v, ControlFlowNode subObjCfg, ControlFlowNode selfDictCfg |
subObjCfg.getNode() = sub.getObject() and selfDictCfg.getNode() = self_dict
|
v.getAUse() = subObjCfg and
v.getDefinition().(DefinitionNode).getValue() = selfDictCfg
)
) and
a.getATarget() = sub and
@@ -62,9 +64,10 @@ class CheckClass extends ClassObject {
pragma[nomagic]
private predicate monkeyPatched(string name) {
exists(Attribute a |
exists(Attribute a, ControlFlowNode objCfg |
objCfg.getNode() = a.getObject() and
a.getCtx() instanceof Store and
PointsTo::points_to(a.getObject().getAFlowNode(), _, this, _, _) and
PointsTo::points_to(objCfg, _, this, _, _) and
a.getName() = name
)
}
@@ -84,9 +87,9 @@ class CheckClass extends ClassObject {
}
predicate interestingUndefined(SelfAttributeRead a) {
exists(string name | name = a.getName() |
exists(string name, ControlFlowNode aCfg | name = a.getName() and aCfg.getNode() = a |
this.interestingContext(a, name) and
not this.definedInBlock(a.getAFlowNode().getBasicBlock(), name)
not this.definedInBlock(aCfg.getBasicBlock(), name)
)
}
@@ -109,8 +112,9 @@ class CheckClass extends ClassObject {
pragma[nomagic]
private predicate definitionInBlock(BasicBlock b, string name) {
exists(SelfAttributeStore sa |
sa.getAFlowNode().getBasicBlock() = b and
exists(SelfAttributeStore sa, ControlFlowNode saCfg |
saCfg.getNode() = sa and
saCfg.getBasicBlock() = b and
sa.getName() = name and
sa.getClass() = this.getPyClass()
)

View File

@@ -15,7 +15,9 @@
import python
import semmle.python.ApiGraphs
predicate doesnt_reraise(ExceptStmt ex) { ex.getAFlowNode().getBasicBlock().reachesExit() }
predicate doesnt_reraise(ExceptStmt ex) {
exists(ControlFlowNode exCfg | exCfg.getNode() = ex | exCfg.getBasicBlock().reachesExit())
}
predicate catches_base_exception(ExceptStmt ex) {
ex.getType() = API::builtin("BaseException").getAValueReachableFromSource().asExpr()

View File

@@ -116,7 +116,7 @@ FunctionValue get_function_or_initializer(Value func_or_cls) {
predicate illegally_named_parameter_objectapi(Call call, Object func, string name) {
not func.isC() and
name = call.getANamedArgumentName() and
call.getAFlowNode() = get_a_call_objectapi(func) and
exists(ControlFlowNode callCfg | callCfg.getNode() = call | callCfg = get_a_call_objectapi(func)) and
not get_function_or_initializer_objectapi(func).isLegalArgumentName(name)
}
@@ -124,7 +124,7 @@ predicate illegally_named_parameter_objectapi(Call call, Object func, string nam
predicate illegally_named_parameter(Call call, Value func, string name) {
not func.isBuiltin() and
name = call.getANamedArgumentName() and
call.getAFlowNode() = get_a_call(func) and
exists(ControlFlowNode callCfg | callCfg.getNode() = call | callCfg = get_a_call(func)) and
not get_function_or_initializer(func).isLegalArgumentName(name)
}
@@ -146,7 +146,9 @@ predicate too_few_args_objectapi(Call call, Object callable, int limit) {
call = func.getAMethodCall().getNode() and limit = func.minParameters() - 1
or
callable instanceof ClassObject and
call.getAFlowNode() = get_a_call_objectapi(callable) and
exists(ControlFlowNode callCfg | callCfg.getNode() = call |
callCfg = get_a_call_objectapi(callable)
) and
limit = func.minParameters() - 1
)
}
@@ -172,7 +174,7 @@ predicate too_few_args(Call call, Value callable, int limit) {
call = func.getAMethodCall().getNode() and limit = func.minParameters() - 1
or
callable instanceof ClassValue and
call.getAFlowNode() = get_a_call(callable) and
exists(ControlFlowNode callCfg | callCfg.getNode() = call | callCfg = get_a_call(callable)) and
limit = func.minParameters() - 1
)
}
@@ -191,7 +193,9 @@ predicate too_many_args_objectapi(Call call, Object callable, int limit) {
call = func.getAMethodCall().getNode() and limit = func.maxParameters() - 1
or
callable instanceof ClassObject and
call.getAFlowNode() = get_a_call_objectapi(callable) and
exists(ControlFlowNode callCfg | callCfg.getNode() = call |
callCfg = get_a_call_objectapi(callable)
) and
limit = func.maxParameters() - 1
) and
positional_arg_count_for_call_objectapi(call, callable) > limit
@@ -211,7 +215,7 @@ predicate too_many_args(Call call, Value callable, int limit) {
call = func.getAMethodCall().getNode() and limit = func.maxParameters() - 1
or
callable instanceof ClassValue and
call.getAFlowNode() = get_a_call(callable) and
exists(ControlFlowNode callCfg | callCfg.getNode() = call | callCfg = get_a_call(callable)) and
limit = func.maxParameters() - 1
) and
positional_arg_count_for_call(call, callable) > limit

View File

@@ -36,11 +36,15 @@ where
exists(string s | dict_key(d, k1, s) and dict_key(d, k2, s) and k1 != k2) and
(
exists(BasicBlock b, int i1, int i2 |
k1.getAFlowNode() = b.getNode(i1) and
k2.getAFlowNode() = b.getNode(i2) and
b.getNode(i1).getNode() = k1 and
b.getNode(i2).getNode() = k2 and
i1 < i2
)
or
k1.getAFlowNode().getBasicBlock().strictlyDominates(k2.getAFlowNode().getBasicBlock())
exists(ControlFlowNode k1Cfg, ControlFlowNode k2Cfg |
k1Cfg.getNode() = k1 and k2Cfg.getNode() = k2
|
k1Cfg.getBasicBlock().strictlyDominates(k2Cfg.getBasicBlock())
)
)
select k1, "Dictionary key " + repr(k1) + " is subsequently $@.", k2, "overwritten"

View File

@@ -98,16 +98,18 @@ private predicate brace_pair(PossibleAdvancedFormatString fmt, int start, int en
}
private predicate advanced_format_call(Call format_expr, PossibleAdvancedFormatString fmt, int args) {
exists(CallNode call | call = format_expr.getAFlowNode() |
exists(CallNode call, ControlFlowNode fmtCfg |
call.getNode() = format_expr and fmtCfg.getNode() = fmt
|
call.getFunction().(ControlFlowNodeWithPointsTo).pointsTo(Value::named("format")) and
call.getArg(0).(ControlFlowNodeWithPointsTo).pointsTo(_, fmt.getAFlowNode()) and
call.getArg(0).(ControlFlowNodeWithPointsTo).pointsTo(_, fmtCfg) and
args = count(format_expr.getAnArg()) - 1
or
call.getFunction()
.(AttrNode)
.getObject("format")
.(ControlFlowNodeWithPointsTo)
.pointsTo(_, fmt.getAFlowNode()) and
.pointsTo(_, fmtCfg) and
args = count(format_expr.getAnArg())
)
}

View File

@@ -15,7 +15,7 @@ import python
/** Holds if the comparison `comp` uses `is` or `is not` (represented as `op`) to compare its `left` and `right` arguments. */
predicate comparison_using_is(Compare comp, ControlFlowNode left, Cmpop op, ControlFlowNode right) {
exists(CompareNode fcomp | fcomp = comp.getAFlowNode() |
exists(CompareNode fcomp | fcomp.getNode() = comp |
fcomp.operands(left, op, right) and
(op instanceof Is or op instanceof IsNot)
)

View File

@@ -5,7 +5,7 @@ private import LegacyPointsTo
/** Holds if the comparison `comp` uses `is` or `is not` (represented as `op`) to compare its `left` and `right` arguments. */
predicate comparison_using_is(Compare comp, ControlFlowNode left, Cmpop op, ControlFlowNode right) {
exists(CompareNode fcomp | fcomp = comp.getAFlowNode() |
exists(CompareNode fcomp | fcomp.getNode() = comp |
fcomp.operands(left, op, right) and
(op instanceof Is or op instanceof IsNot)
)

View File

@@ -19,7 +19,7 @@ where
// Only relevant for Python 2, as all later versions implement true division
major_version() = 2 and
exists(BinaryExprNode bin, Value lval, Value rval |
bin = div.getAFlowNode() and
bin.getNode() = div and
bin.getNode().getOp() instanceof Div and
bin.getLeft().(ControlFlowNodeWithPointsTo).pointsTo(lval, left) and
lval.getClass() = ClassValue::int_() and

View File

@@ -19,7 +19,9 @@ where
exists(Function init | init.isInitMethod() and r.getScope() = init) and
r.getValue() = rv and
not rv.pointsTo(Value::none_()) and
not exists(FunctionValue f | f.getACall() = rv.getAFlowNode() | f.neverReturns()) and
not exists(FunctionValue f, ControlFlowNode rvCfg | rvCfg.getNode() = rv |
f.getACall() = rvCfg and f.neverReturns()
) and
// to avoid double reporting, don't trigger if returning result from other __init__ function
not exists(Attribute meth | meth = rv.(Call).getFunc() | meth.getName() = "__init__")
select r, "Explicit return in __init__ method."

View File

@@ -69,7 +69,12 @@ where
returns_meaningful_value(callee) and
not wrapped_in_try_except(call) and
exists(int unused |
unused = count(ExprStmt e | e.getValue().getAFlowNode() = callee.getACall()) and
unused =
count(ExprStmt e |
exists(ControlFlowNode eValCfg | eValCfg.getNode() = e.getValue() |
eValCfg = callee.getACall()
)
) and
total = count(callee.getACall())
|
percentage_used = (100.0 * (total - unused) / total).floor()

View File

@@ -138,12 +138,12 @@ predicate function_opens_file(FunctionValue f) {
f = Value::named("open")
or
exists(EssaVariable v, Return ret | ret.getScope() = f.getScope() |
ret.getValue().getAFlowNode() = v.getAUse() and
v.getNode() = ret.getValue().getAUse() and
var_is_open(v, _)
)
or
exists(Return ret, FunctionValue callee | ret.getScope() = f.getScope() |
ret.getValue().getAFlowNode() = callee.getACall() and
callee.getNode() = ret.getValue().getACall() and
function_opens_file(callee)
)
}

View File

@@ -94,7 +94,7 @@ class CredentialSink extends DataFlow::Node {
this.(DataFlow::ArgumentNode).argumentOf(_, pos)
)
or
exists(Keyword k | k.getArg() = name and k.getValue().getAFlowNode() = this.asCfgNode())
exists(Keyword k | k.getArg() = name and this.asCfgNode().getNode() = k.getValue())
or
exists(CompareNode cmp, NameNode n | n.getId() = name |
cmp.operands(this.asCfgNode(), any(Eq eq), n)

View File

@@ -25,7 +25,7 @@ from
For loop, ControlFlowNodeWithPointsTo iter, Value str, Value seq, ControlFlowNode seq_origin,
ControlFlowNode str_origin
where
loop.getIter().getAFlowNode() = iter and
iter.getNode() = loop.getIter() and
iter.pointsTo(str, str_origin) and
iter.pointsTo(seq, seq_origin) and
has_string_type(str) and

View File

@@ -15,7 +15,7 @@
import python
predicate loop_variable_ssa(For f, Variable v, SsaVariable s) {
f.getTarget().getAFlowNode() = s.getDefinition() and v = s.getVariable()
s.getDefinition().getNode() = f.getTarget() and v = s.getVariable()
}
predicate variableUsedInNestedLoops(For inner, For outer, Variable v, Name n) {

View File

@@ -16,7 +16,7 @@ private import LegacyPointsTo
from For loop, ControlFlowNodeWithPointsTo iter, Value v, ClassValue t, ControlFlowNode origin
where
loop.getIter().getAFlowNode() = iter and
iter.getNode() = loop.getIter() and
iter.pointsTo(_, v, origin) and
v.getClass() = t and
not t.isIterable() and

View File

@@ -24,11 +24,13 @@ predicate func_with_side_effects(Expr e) {
}
predicate call_with_side_effect(Call e) {
e.getAFlowNode() =
API::moduleImport("subprocess")
.getMember(["call", "check_call", "check_output"])
.getACall()
.asCfgNode()
exists(ControlFlowNode eCfg | eCfg.getNode() = e |
eCfg =
API::moduleImport("subprocess")
.getMember(["call", "check_call", "check_output"])
.getACall()
.asCfgNode()
)
}
predicate probable_side_effect(Expr e) {

View File

@@ -133,7 +133,11 @@ class ListComprehensionDeclaration extends ListComp {
major_version() = 2 and
this.getIterationVariable(_).getId() = result.getId() and
result.getScope() = this.getScope() and
this.getAFlowNode().strictlyReaches(result.getAFlowNode()) and
exists(ControlFlowNode thisCfg, ControlFlowNode resultCfg |
thisCfg.getNode() = this and resultCfg.getNode() = result
|
thisCfg.strictlyReaches(resultCfg)
) and
result.isUse()
}

View File

@@ -13,18 +13,21 @@
import python
import Definition
from ListComprehensionDeclaration l, Name use, Name defn
from
ListComprehensionDeclaration l, Name use, Name defn, ControlFlowNode lCfg, ControlFlowNode useCfg
where
use = l.getALeakedVariableUse() and
defn = l.getDefinition() and
l.getAFlowNode().strictlyReaches(use.getAFlowNode()) and
lCfg.getNode() = l and
useCfg.getNode() = use and
lCfg.strictlyReaches(useCfg) and
/* Make sure we aren't in a loop, as the variable may be redefined */
not use.getAFlowNode().strictlyReaches(l.getAFlowNode()) and
not useCfg.strictlyReaches(lCfg) and
not l.contains(use) and
not use.deletes(_) and
not exists(SsaVariable v |
v.getAUse() = use.getAFlowNode() and
not v.getDefinition().strictlyDominates(l.getAFlowNode())
v.getAUse() = useCfg and
not v.getDefinition().strictlyDominates(lCfg)
)
select use,
use.getId() + " may have a different value in Python 3, as the $@ will not be in scope.", defn,

View File

@@ -26,8 +26,11 @@ private Stmt loop_probably_defines(Variable v) {
/** Holds if the variable used by `use` is probably defined in a loop */
predicate probably_defined_in_loop(Name use) {
exists(Stmt loop | loop = loop_probably_defines(use.getVariable()) |
loop.getAFlowNode().strictlyReaches(use.getAFlowNode())
exists(Stmt loop, ControlFlowNode loopCfg, ControlFlowNode useCfg |
loop = loop_probably_defines(use.getVariable()) and
loopCfg.getNode() = loop and
useCfg.getNode() = use and
loopCfg.strictlyReaches(useCfg)
)
}

View File

@@ -24,8 +24,8 @@ predicate multiply_defined(AstNode asgn1, AstNode asgn2, Variable v) {
forex(Definition def, Definition redef |
def.getVariable() = v and
def = asgn1.getAFlowNode() and
redef = asgn2.getAFlowNode()
def.getNode() = asgn1 and
redef.getNode() = asgn2
|
def.isUnused() and
def.getARedef() = redef and

View File

@@ -88,7 +88,9 @@ predicate implicit_repeat(For f) {
* E.g. gets `x` from `{ y for y in x }`.
*/
ControlFlowNode get_comp_iterable(For f) {
exists(Comp c | c.getFunction().getStmt(0) = f | c.getAFlowNode().getAPredecessor() = result)
exists(Comp c, ControlFlowNode cCfg |
c.getFunction().getStmt(0) = f and cCfg.getNode() = c and cCfg.getAPredecessor() = result
)
}
from For f, Variable v, string msg

View File

@@ -19,9 +19,10 @@ private predicate loop_entry_variables(EssaVariable pred, EssaVariable succ) {
private predicate loop_entry_edge(BasicBlock pred, BasicBlock loop) {
pred = loop.getAPredecessor() and
pred = loop.getImmediateDominator() and
exists(Stmt s |
exists(Stmt s, ControlFlowNode sCfg |
loop_probably_executes_at_least_once(s) and
s.getAFlowNode().getBasicBlock() = loop
sCfg.getNode() = s and
sCfg.getBasicBlock() = loop
)
}

View File

@@ -27,7 +27,7 @@ predicate guarded_against_name_error(Name u) {
|
globals.getFunc().(Name).getId() = "globals" and
guard.controls(controlled, _) and
controlled.contains(u.getAFlowNode())
exists(ControlFlowNode uCfg | uCfg.getNode() = u | controlled.contains(uCfg))
)
}
@@ -101,18 +101,18 @@ predicate undefined_use(Name u) {
}
private predicate first_use_in_a_block(Name use) {
exists(GlobalVariable v, BasicBlock b, int i |
i = min(int j | b.getNode(j).getNode() = v.getALoad()) and b.getNode(i) = use.getAFlowNode()
exists(GlobalVariable v, BasicBlock b, int i, ControlFlowNode useCfg | useCfg.getNode() = use |
i = min(int j | b.getNode(j).getNode() = v.getALoad()) and b.getNode(i) = useCfg
)
}
predicate first_undefined_use(Name use) {
undefined_use(use) and
exists(GlobalVariable v | v.getALoad() = use |
exists(GlobalVariable v, ControlFlowNode useCfg | v.getALoad() = use and useCfg.getNode() = use |
first_use_in_a_block(use) and
not exists(ControlFlowNode other |
other.getNode() = v.getALoad() and
other.getBasicBlock().strictlyDominates(use.getAFlowNode().getBasicBlock())
other.getBasicBlock().strictlyDominates(useCfg.getBasicBlock())
)
)
}

View File

@@ -18,8 +18,8 @@ private import semmle.python.types.ImportTime
/* Local variable part */
predicate initialized_as_local(PlaceHolder use) {
exists(SsaVariableWithPointsTo l, Function f |
f = use.getScope() and l.getAUse() = use.getAFlowNode()
exists(SsaVariableWithPointsTo l, Function f, ControlFlowNode useCfg |
f = use.getScope() and useCfg.getNode() = use and l.getAUse() = useCfg
|
l.getVariable() instanceof LocalVariable and
not l.maybeUndefined()

View File

@@ -54,7 +54,7 @@ predicate unused_global(Name unused, GlobalVariable v) {
u.uses(v)
|
// That is reachable from this definition, directly
defn.strictlyReaches(u.getAFlowNode())
exists(ControlFlowNode uCfg | uCfg.getNode() = u | defn.strictlyReaches(uCfg))
or
// indirectly
defn.getBasicBlock().reachesExit() and u.getScope() != unused.getScope()

View File

@@ -48,15 +48,17 @@ class Symbol extends TSymbol {
AstNode find() {
this = TModule(result)
or
exists(Symbol s, string name | this = TMember(s, name) |
exists(Symbol s, string name, ControlFlowNode resultCfg |
this = TMember(s, name) and resultCfg.getNode() = result
|
exists(ClassObject cls |
s.resolvesTo() = cls and
cls.attributeRefersTo(name, _, result.getAFlowNode())
cls.attributeRefersTo(name, _, resultCfg)
)
or
exists(ModuleObject m |
s.resolvesTo() = m and
m.attributeRefersTo(name, _, result.getAFlowNode())
m.attributeRefersTo(name, _, resultCfg)
)
)
}

View File

@@ -80,10 +80,11 @@ class VersionGuard extends ConditionBlock {
VersionGuard() { this.getLastNode() instanceof VersionTest }
}
from ImportExpr ie
from ImportExpr ie, ControlFlowNode ieCfg
where
ieCfg.getNode() = ie and
not ie.(ExprWithPointsTo).refersTo(_) and
exists(Context c | c.appliesTo(ie.getAFlowNode())) and
exists(Context c | c.appliesTo(ieCfg)) and
not ok_to_fail(ie) and
not exists(VersionGuard guard | guard.controls(ie.getAFlowNode().getBasicBlock(), _))
not exists(VersionGuard guard | guard.controls(ieCfg.getBasicBlock(), _))
select ie, "Unable to resolve import of '" + ie.getImportedModuleName() + "'."

View File

@@ -11,13 +11,13 @@ import python
import semmle.python.pointsto.PointsTo
predicate points_to_failure(Expr e) {
exists(ControlFlowNode f | f = e.getAFlowNode() | not PointsTo::pointsTo(f, _, _, _))
exists(ControlFlowNode f | f.getNode() = e | not PointsTo::pointsTo(f, _, _, _))
}
predicate key_points_to_failure(Expr e) {
points_to_failure(e) and
not points_to_failure(e.getASubExpression()) and
not exists(SsaVariable ssa | ssa.getAUse() = e.getAFlowNode() |
not exists(SsaVariable ssa, ControlFlowNode eCfg | eCfg.getNode() = e and ssa.getAUse() = eCfg |
points_to_failure(ssa.getAnUltimateDefinition().getDefinition().getNode())
) and
not exists(Assign a | a.getATarget() = e)

View File

@@ -12,5 +12,5 @@ import python
private import LegacyPointsTo
from Expr e
where exists(ControlFlowNodeWithPointsTo f | f = e.getAFlowNode() | not f.refersTo(_))
where exists(ControlFlowNodeWithPointsTo f | f.getNode() = e | not f.refersTo(_))
select e, "Expression does not 'point-to' any object."

View File

@@ -131,7 +131,7 @@ module ModificationOfParameterWithDefault {
exists(DeletionNode d | d.getTarget().(SubscriptNode).getObject() = this.asCfgNode())
or
// augmented assignment to the value
exists(AugAssign a | a.getTarget().getAFlowNode() = this.asCfgNode())
exists(AugAssign a | this.asCfgNode().getNode() = a.getTarget())
or
// modifying function call
exists(DataFlow::CallCfgNode c, DataFlow::AttrRead a | c.getFunction() = a |