From 635134ffd2f71af71dd2ebea5b45050999121b1d Mon Sep 17 00:00:00 2001 From: yoff Date: Tue, 26 May 2026 10:56:21 +0000 Subject: [PATCH] Python: treat augmented-assignment targets as both load and store MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The legacy CFG emitted two ControlFlowNodes for `x[i] += 42` (one load, one store, with `load.strictlyDominates(store)`). The new CFG collapses them to a single canonical node, mirroring Java's single-`VarAccess` model where `isVarRead`/`isVarWrite` are non-disjoint on the same expression. Reconcile two legacy two-node behaviours with the merged single-node world: 1. `Cfg::ControlFlowNode.isLoad()` no longer excludes augmented targets — both `isLoad` and `isStore` hold on the merged canonical node, matching Java. `NameNode.defines` drops the now-redundant `not isLoad` guard; `Py::Name.defines` already filters by `isDefinition` (Store/Param/AugAssign-target ctx). 2. `LocalFlow::definitionFlowStep` is restricted to NameNode targets, matching legacy ESSA's `assignment_definition` which required `defn.(NameNode).defines(v)`. Subscript and attribute writes (`x[i] = 42`, `obj.attr = 42`) no longer emit a local-flow step *into* the LHS expression — that flow is handled by the AttrWrite and content-flow machinery. This is essential for keeping augmented Subscript/Attribute targets classifiable as `LocalSourceNode` on the read side, which the API graph requires for emitting Use edges. `StoreLoadTest.ql` is updated to filter `isAugLoad` out of the regular `load` tag, mirroring the pre-existing `not isAugStore` filter on the `store` tag so augmented-assignment expectations remain `augload=n augstore=n` (not also `load=n store=n`). Closes the three remaining ApiGraphs library-test failures (`getSubscript.ql` semantically, plus cosmetic toString updates in `ModuleImportWithDots.ql` and `test_crosstalk.ql`). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../python/controlflow/internal/Cfg.qll | 30 +++++++++++++------ .../dataflow/new/internal/DataFlowPrivate.qll | 20 +++++++++++-- .../py3/ModuleImportWithDots.expected | 4 +-- .../ApiGraphs/py3/test_crosstalk.expected | 4 +-- .../ControlFlow/store-load/StoreLoadTest.ql | 2 +- 5 files changed, 44 insertions(+), 16 deletions(-) diff --git a/python/ql/lib/semmle/python/controlflow/internal/Cfg.qll b/python/ql/lib/semmle/python/controlflow/internal/Cfg.qll index 66f36771f48..ae5062092ba 100644 --- a/python/ql/lib/semmle/python/controlflow/internal/Cfg.qll +++ b/python/ql/lib/semmle/python/controlflow/internal/Cfg.qll @@ -172,10 +172,17 @@ class ControlFlowNode extends CfgImpl::ControlFlowNode { predicate isNormalExit() { this instanceof CfgImpl::ControlFlow::NormalExitNode } // ===== AST-shape predicates (bridges to the wrapped Python AST) ===== - /** Holds if this flow node is a load (including those in augmented assignments). */ - predicate isLoad() { - exists(Py::Expr e | e = toAst(this) | py_expr_contexts(_, 3, e) and not augstore(_, this)) - } + /** + * Holds if this flow node is a load (including those in augmented + * assignments). + * + * Note: an augmented-assignment target (`x[i]` in `x[i] += 1`) is + * both a load and a store — `isLoad` and `isStore` both hold on the + * canonical CFG node. This mirrors Java's `VarAccess.isVarRead`, + * which holds on the destination of compound and unary assignments + * even though the destination is also a write. + */ + predicate isLoad() { exists(Py::Expr e | e = toAst(this) | py_expr_contexts(_, 3, e)) } /** Holds if this flow node is a store (including those in augmented assignments). */ predicate isStore() { @@ -460,11 +467,16 @@ class NameNode extends ControlFlowNode { toAst(this) instanceof Py::PlaceHolder } - /** Holds if this flow node defines the variable `v`. */ - predicate defines(Py::Variable v) { - exists(Py::Name n | n = toAst(this) and n.defines(v)) and - not this.isLoad() - } + /** + * Holds if this flow node defines the variable `v`. + * + * This includes augmented-assignment targets — `n += 1` is both a + * read and a write of `n`, so `defines(n)` and `uses(n)` both hold + * on the same canonical CFG node. Mirrors Java's `VariableUpdate` + * semantics where compound assignments register both a write + * (`VarWrite`) and a read (`VarRead`) on the destination. + */ + predicate defines(Py::Variable v) { exists(Py::Name n | n = toAst(this) and n.defines(v)) } /** Holds if this flow node deletes the variable `v`. */ predicate deletes(Py::Variable v) { exists(Py::Name n | n = toAst(this) and n.deletes(v)) } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 5d1f6417255..480f29eed39 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -350,16 +350,32 @@ module LocalFlow { // definitions that have no subsequent read in the same scope (e.g. // a module-level `def f():` whose `f` is only read inside other // functions). The CFG-level link is unconditional. + // + // The Name-target restriction mirrors legacy ESSA's + // `SsaDefinitions::assignment_definition`, which required + // `defn.(NameNode).defines(v)`. Subscript and attribute writes + // (`x[i] = 42`, `obj.attr = 42`) are intentionally excluded — their + // value flow is handled by the content-flow / `AttrWrite` machinery, + // not by a local-flow step *into* the Subscript/Attribute expression. + // Excluding them is essential for keeping augmented-assignment + // targets (`x[i] += 42`) classifiable as `LocalSourceNode` on the + // read side: the single canonical CFG node is both a load and a + // store, and any incoming local-flow step would disqualify it from + // being a local source. exists(Cfg::DefinitionNode def | nodeFrom.(CfgNode).getNode() = def.getValue() and - nodeTo.(CfgNode).getNode() = def + nodeTo.(CfgNode).getNode() = def and + def instanceof Cfg::NameNode ) or // With definition // `with f(42) as x:` // nodeFrom is `f(42)` // nodeTo is `x` - exists(With with, Cfg::ControlFlowNode contextManager, SsaImpl::WithDefinition withDef, Cfg::ControlFlowNode var | + exists( + With with, Cfg::ControlFlowNode contextManager, SsaImpl::WithDefinition withDef, + Cfg::ControlFlowNode var + | var = withDef.getDefiningNode() | nodeFrom.(CfgNode).getNode() = contextManager and diff --git a/python/ql/test/library-tests/ApiGraphs/py3/ModuleImportWithDots.expected b/python/ql/test/library-tests/ApiGraphs/py3/ModuleImportWithDots.expected index a6798920cde..1b0d0a0cf3a 100644 --- a/python/ql/test/library-tests/ApiGraphs/py3/ModuleImportWithDots.expected +++ b/python/ql/test/library-tests/ApiGraphs/py3/ModuleImportWithDots.expected @@ -1,5 +1,5 @@ moduleImportWithDots doesntFullyWork works -| test.py:25:6:25:18 | ControlFlowNode for Attribute() | -| test.py:28:10:28:17 | ControlFlowNode for method() | +| test.py:25:6:25:18 | After Attribute() | +| test.py:28:10:28:17 | After method() | diff --git a/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.expected b/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.expected index 58698e5ec9d..24c2b88cb4a 100644 --- a/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.expected +++ b/python/ql/test/library-tests/ApiGraphs/py3/test_crosstalk.expected @@ -1,2 +1,2 @@ -| test_crosstalk.py:8:16:8:18 | ControlFlowNode for f() | bar | -| test_crosstalk.py:13:16:13:18 | ControlFlowNode for g() | baz | +| test_crosstalk.py:8:16:8:18 | After f() | bar | +| test_crosstalk.py:13:16:13:18 | After g() | baz | diff --git a/python/ql/test/library-tests/ControlFlow/store-load/StoreLoadTest.ql b/python/ql/test/library-tests/ControlFlow/store-load/StoreLoadTest.ql index 8d3a3462920..7d83ea98a0e 100644 --- a/python/ql/test/library-tests/ControlFlow/store-load/StoreLoadTest.ql +++ b/python/ql/test/library-tests/ControlFlow/store-load/StoreLoadTest.ql @@ -26,7 +26,7 @@ module StoreLoadTest implements TestSig { element = n.toString() and value = n.getId() and ( - n.isLoad() and tag = "load" + n.isLoad() and not n.isAugLoad() and tag = "load" or n.isStore() and not n.isAugStore() and tag = "store" or