From 57fa3ee2d4c200d895ce85dbdf52c7ff781d3fe2 Mon Sep 17 00:00:00 2001 From: yoff Date: Tue, 19 May 2026 12:06:15 +0000 Subject: [PATCH] Python: SSA: handle closure variables via per-scope entry defs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new SSA's implicit entry-def predicate previously placed entries in the variable's defining scope. For closure variables that's the outer function, so inner functions had no entry def for the captured variable — reads in the inner scope failed to resolve to any definition. Mirrors legacy ESSA's 'NonLocalVariable.getScopeEntryDefinition()': place an implicit entry def at every reading scope's entry block, independently of where the variable is *defined*. A closure variable accessed in two nested functions and the outer one gets three entry defs (one per reading scope). Also makes 'ScopeEntryDefinition' extend 'EssaNodeDefinition' (matching legacy ESSA), with 'getDefiningNode()' returning the scope's entry CFG node. This requires extending the private 'writeDefNode' helper to project i=-1 entries to bb.getNode(0). Updates the new-vs-legacy comparison snapshot: closure-variable reads ('x:32:5'), nested global reads ('GLOBAL:52:1') now resolve. New 'def-only-new' entries appear for unbound names ('sum', 'open', 'compute') — the new SSA uniformly creates scope-entry defs for all non-local reads, including those that legacy ESSA classifies as builtin and excludes. This is a more uniform semantic and arguably cleaner. Updates the SsaTest 'some_undefined' annotation: previously documented as a known limitation, now correctly resolves to a scope-entry def. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../python/dataflow/new/internal/SsaImpl.qll | 58 ++++++++++++------- .../CmpTest.expected | 5 +- .../library-tests/dataflow-new-ssa/test.py | 2 +- 3 files changed, 40 insertions(+), 25 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll index da8f34b8b52..4ca59ff5977 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll @@ -83,6 +83,10 @@ class SsaSourceVariable extends TSsaSourceVariable { * Holds if `v` is a non-local read in scope `s`, in the sense that `s` * uses `v` but does not write it within `s`. This includes globals, * builtins, and variables captured from an enclosing function scope. + * + * The `Py::Variable` `v` lives in some defining scope (the module for + * globals, an outer function for closures, etc.); the reading scope + * `s` is the scope where the use of `v` occurs. */ private predicate nonLocalReadIn(Py::Variable v, Py::Scope s) { exists(Cfg::NameNode n | @@ -93,17 +97,23 @@ private predicate nonLocalReadIn(Py::Variable v, Py::Scope s) { } /** - * Holds if `v` should have an implicit entry definition at the start of - * scope `s`. This covers: - * - non-local / global / builtin variables (defined outside `s`), and - * - captured variables (defined in an enclosing scope but read here). + * Holds if `bb` is the entry basic block of a scope where `v` should + * have an implicit entry definition. This covers: + * - non-local / global / builtin variables read in `s`, and + * - captured variables (defined in an enclosing scope but read in `s`). + * + * Each reading scope gets its own entry def, so a closure variable can + * have multiple entry defs across all functions/methods that read it. * * Parameters are *not* included: their bound `Name` is itself a CFG * node (per the C#-style parameter wiring), so `variableWrite` fires at * the parameter's natural CFG index. */ -private predicate hasEntryDef(SsaSourceVariable v, Py::Scope s) { - nonLocalReadIn(v.getVariable(), s) +private predicate hasEntryDefIn(SsaSourceVariable v, CfgImpl::BasicBlock bb) { + exists(Py::Scope s | + nonLocalReadIn(v.getVariable(), s) and + bb = entryBlock(s) + ) } /** @@ -144,9 +154,11 @@ private module SsaImplInput implements SsaImplCommon::InputSig= 0 and result = bb.getNode(i) + or + i = -1 and result = bb.getNode(0) ) } @@ -290,8 +304,11 @@ class WithDefinition extends EssaNodeDefinition { /** * An implicit entry definition for a non-local / captured / global / * builtin variable read in a scope but not defined there. + * + * Inherits from `EssaNodeDefinition` and exposes the scope's entry node + * as its defining node (matching legacy ESSA semantics). */ -class ScopeEntryDefinition extends Ssa::Definition { +class ScopeEntryDefinition extends EssaNodeDefinition { ScopeEntryDefinition() { exists(CfgImpl::BasicBlock bb | this.definesAt(_, bb, -1) and @@ -299,14 +316,11 @@ class ScopeEntryDefinition extends Ssa::Definition { ) } - /** Gets the variable being entered. */ - SsaSourceVariable getVariable() { result = this.getSourceVariable() } - - /** Gets the enclosing scope. */ - Py::Scope getScope() { + /** Gets the enclosing scope (the scope whose entry block this def is in). */ + override Py::Scope getScope() { exists(CfgImpl::BasicBlock bb | this.definesAt(_, bb, -1) and - result = this.getSourceVariable().getVariable().getScope() + result = bb.getNode(0).(Cfg::ControlFlowNode).getScope() ) } } diff --git a/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.expected b/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.expected index ec2b8438c61..7aaeefce2f8 100644 --- a/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.expected +++ b/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.expected @@ -1,6 +1,8 @@ +| def-only-new | compute:37:1 | +| def-only-new | open:44:1 | +| def-only-new | sum:27:1 | | def-only-old | $:0:0 | | def-only-old | GLOBAL:49:1 | -| def-only-old | GLOBAL:52:1 | | def-only-old | __name__:0:0 | | def-only-old | __package__:0:0 | | def-only-old | closure:31:5 | @@ -17,4 +19,3 @@ | def-only-old | with_binding:44:5 | | def-only-old | x:20:1 | | def-only-old | x:31:13 | -| def-only-old | x:32:5 | diff --git a/python/ql/test/library-tests/dataflow-new-ssa/test.py b/python/ql/test/library-tests/dataflow-new-ssa/test.py index fb1f658c4fb..c6cdc22c3b3 100644 --- a/python/ql/test/library-tests/dataflow-new-ssa/test.py +++ b/python/ql/test/library-tests/dataflow-new-ssa/test.py @@ -35,6 +35,6 @@ def if_else_phi(cond): # $ def=cond def use_global(): - return some_undefined # known limitation: undefined globals not resolved here + return some_undefined # $ use=some_undefined