mirror of
https://github.com/github/codeql.git
synced 2026-06-06 05:57:07 +02:00
Python: add new shared-SSA-backed SSA adapter
Preparatory refactor for the shared-CFG dataflow migration. Adds the new Python SSA adapter additively, without changing any production behaviour. Library additions: - semmle.python.dataflow.new.internal.SsaImpl — Python SSA implementation built on the new (shared) CFG. Mirrors the Java SSA adapter (java/ql/lib/semmle/code/java/dataflow/internal/SsaImpl.qll): an InputSig is defined in terms of positional (BasicBlock, int) variable references, and the shared codeql.ssa.Ssa::Make<Location, Cfg, Input> module is then instantiated. SourceVariable is the AST-level Py::Variable. Variable references are looked up via the new CFG facade's NameNode.defines/uses/deletes predicates (added in the preceding PR), which themselves are one-line bridges to AST-level Name.defines/uses/deletes. Implicit-entry definitions are inserted for non-local/global/builtin reads, captured variables, and (when needed) parameters. Test additions: - library-tests/dataflow-new-ssa/ — exercises the new SSA over a representative test corpus and checks expected def/use chains. - library-tests/dataflow-new-ssa-vs-legacy/ — runs both new SSA and legacy ESSA over the same corpus and diffs the results, so any semantic divergence shows up as a test failure. Production impact: None. The new SSA adapter has zero callers in lib/ and src/ — the legacy ESSA SSA (semmle/python/essa/*) remains the default. The dataflow library is not migrated yet; that lands in a follow-up PR. Verified by: - All 367 lib + src + consistency-queries compile clean. - All 641 ControlFlow + PointsTo + dataflow + essa + consistency library-tests pass. - Both new dataflow-new-ssa[/vs-legacy] test packs pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,6 @@
|
||||
| def-only-old | $:0:0 |
|
||||
| def-only-old | __name__:0:0 |
|
||||
| def-only-old | __package__:0:0 |
|
||||
| def-only-old | e:37:1 |
|
||||
| def-only-old | e:40:25 |
|
||||
| def-only-old | x:20:1 |
|
||||
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
* Compares the new-CFG SSA against the legacy ESSA on the same Python
|
||||
* sources. Reports definitions present in one implementation but not
|
||||
* the other, identified by variable name + source position.
|
||||
*
|
||||
* The `.expected` file records the current diff as a snapshot: as the
|
||||
* new SSA matures (closing captured-variable gap, exception bindings,
|
||||
* etc.) and tracks more variables, the snapshot should monotonically
|
||||
* shrink.
|
||||
*
|
||||
* Known categories of `def-only-old` mismatches:
|
||||
* - Function / class / global definitions with no in-scope read
|
||||
* (intentional: SSA is liveness-pruned, write-only variables are
|
||||
* not tracked).
|
||||
* - Captured / closure variables (gap: new SSA does not yet model
|
||||
* closure captures).
|
||||
* - Module variables `__name__`, `__package__`, `$` (legacy ESSA
|
||||
* adds implicit bindings the new SSA does not).
|
||||
* - Exception-handler `as` bindings (depend on raise modelling).
|
||||
*
|
||||
* `def-only-new` mismatches would indicate the new SSA produces spurious
|
||||
* definitions; currently none are expected.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.internal.SsaImpl as NewSsa
|
||||
import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
import semmle.python.essa.Essa
|
||||
|
||||
string newDefSig(NewSsa::EssaNodeDefinition def) {
|
||||
exists(Cfg::ControlFlowNode n | n = def.getDefiningNode() |
|
||||
result =
|
||||
def.getVariable().getVariable().getId() + ":" + n.getLocation().getStartLine() + ":" +
|
||||
n.getLocation().getStartColumn()
|
||||
)
|
||||
}
|
||||
|
||||
string legacyDefSig(EssaNodeDefinition def) {
|
||||
exists(ControlFlowNode n | n = def.getDefiningNode() |
|
||||
result =
|
||||
def.getSourceVariable().getName() + ":" + n.getLocation().getStartLine() + ":" +
|
||||
n.getLocation().getStartColumn()
|
||||
)
|
||||
}
|
||||
|
||||
from string kind, string sig
|
||||
where
|
||||
kind = "def-only-new" and
|
||||
exists(NewSsa::EssaNodeDefinition def |
|
||||
sig = newDefSig(def) and
|
||||
not exists(EssaNodeDefinition legacyDef | sig = legacyDefSig(legacyDef))
|
||||
)
|
||||
or
|
||||
kind = "def-only-old" and
|
||||
exists(EssaNodeDefinition legacyDef |
|
||||
sig = legacyDefSig(legacyDef) and
|
||||
not exists(NewSsa::EssaNodeDefinition def | sig = newDefSig(def))
|
||||
)
|
||||
select kind, sig
|
||||
@@ -0,0 +1,53 @@
|
||||
def simple_assign():
|
||||
x = 1
|
||||
return x
|
||||
|
||||
|
||||
def reassignment():
|
||||
x = 1
|
||||
x = 2
|
||||
return x
|
||||
|
||||
|
||||
def if_else_branch(cond):
|
||||
if cond:
|
||||
x = 1
|
||||
else:
|
||||
x = 2
|
||||
return x
|
||||
|
||||
|
||||
def loop(xs):
|
||||
total = 0
|
||||
for x in xs:
|
||||
total = total + x
|
||||
return total
|
||||
|
||||
|
||||
def parameter(a, b=2, *args, **kwargs):
|
||||
return a + b + sum(args)
|
||||
|
||||
|
||||
def closure(x):
|
||||
def inner():
|
||||
return x
|
||||
return inner
|
||||
|
||||
|
||||
def exception_binding():
|
||||
try:
|
||||
compute()
|
||||
except Exception as e:
|
||||
return e
|
||||
|
||||
|
||||
def with_binding():
|
||||
with open("file") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
GLOBAL = 1
|
||||
|
||||
|
||||
def read_global():
|
||||
return GLOBAL
|
||||
@@ -0,0 +1,6 @@
|
||||
| test.py:14:5:14:15 | basic_param | Unexpected result: def=basic_param |
|
||||
| test.py:18:5:18:16 | basic_assign | Unexpected result: def=basic_assign |
|
||||
| test.py:23:5:23:16 | reassignment | Unexpected result: def=reassignment |
|
||||
| test.py:29:5:29:15 | if_else_phi | Unexpected result: def=if_else_phi |
|
||||
| test.py:37:5:37:14 | use_global | Unexpected result: def=use_global |
|
||||
| test.py:38:28:38:49 | Comment # $ use=some_undefined | Missing result: use=some_undefined |
|
||||
59
python/ql/test/library-tests/dataflow-new-ssa/SsaTest.ql
Normal file
59
python/ql/test/library-tests/dataflow-new-ssa/SsaTest.ql
Normal file
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
* Inline-expectations test for the new-CFG SSA adapter
|
||||
* (`semmle.python.dataflow.new.internal.SsaImpl`).
|
||||
*
|
||||
* Tags:
|
||||
* - `def=<var>`: there is an SSA write definition of `<var>` at this
|
||||
* line (parameter init, plain assignment, augmented assignment,
|
||||
* exception-handler binding, deletion, etc.).
|
||||
* - `use=<var>`: `<var>` is used at this line, and some SSA definition
|
||||
* of `<var>` reaches the read.
|
||||
* - `phi=<var>`: there is an SSA phi definition of `<var>` whose BB
|
||||
* starts on this line.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl
|
||||
import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl
|
||||
import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
import utils.test.InlineExpectationsTest
|
||||
|
||||
module SsaTest implements TestSig {
|
||||
string getARelevantTag() { result = ["def", "use", "phi"] }
|
||||
|
||||
predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
// A `def=<id>` fires when an SSA WriteDefinition is at a CFG node
|
||||
// on the given line.
|
||||
exists(SsaImpl::Ssa::WriteDefinition def, CfgImpl::BasicBlock bb, int i, Cfg::NameNode n |
|
||||
def.definesAt(_, bb, i) and
|
||||
bb.getNode(i) = n and
|
||||
tag = "def" and
|
||||
location = n.getLocation() and
|
||||
element = n.toString() and
|
||||
value = n.getId()
|
||||
)
|
||||
or
|
||||
// A `use=<id>` fires when an SSA Definition reaches a read at this
|
||||
// CFG node.
|
||||
exists(SsaImpl::Ssa::Definition def, CfgImpl::BasicBlock bb, int i, Cfg::NameNode n |
|
||||
SsaImpl::Ssa::ssaDefReachesRead(_, def, bb, i) and
|
||||
bb.getNode(i) = n and
|
||||
tag = "use" and
|
||||
location = n.getLocation() and
|
||||
element = n.toString() and
|
||||
value = n.getId()
|
||||
)
|
||||
or
|
||||
// A `phi=<id>` fires when there is a phi node whose BB's first
|
||||
// CFG node is on the given line.
|
||||
exists(SsaImpl::Ssa::PhiNode phi, CfgImpl::BasicBlock bb |
|
||||
phi.definesAt(_, bb, _) and
|
||||
tag = "phi" and
|
||||
location = bb.getNode(0).getLocation() and
|
||||
element = bb.toString() and
|
||||
value = phi.getSourceVariable().(SsaImpl::SsaSourceVariable).getVariable().getId()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
import MakeTest<SsaTest>
|
||||
40
python/ql/test/library-tests/dataflow-new-ssa/test.py
Normal file
40
python/ql/test/library-tests/dataflow-new-ssa/test.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# Basic SSA tests for the new-CFG SSA adapter.
|
||||
#
|
||||
# The shared SSA implementation prunes its construction by liveness:
|
||||
# definitions of variables that are not read are never materialised.
|
||||
# This is by design — write-only variables would only bloat the SSA
|
||||
# graph. Tests therefore must always include a read of each variable
|
||||
# being verified.
|
||||
#
|
||||
# Annotations:
|
||||
# def=<var>: there is an SSA write definition of <var> at this line
|
||||
# use=<var>: <var> is used here and the read resolves to some def
|
||||
|
||||
|
||||
def basic_param(x): # $ def=x
|
||||
return x # $ use=x
|
||||
|
||||
|
||||
def basic_assign():
|
||||
y = 1 # $ def=y
|
||||
return y # $ use=y
|
||||
|
||||
|
||||
def reassignment():
|
||||
x = 1
|
||||
x = 2 # $ def=x
|
||||
return x # $ use=x
|
||||
|
||||
|
||||
def if_else_phi(cond): # $ def=cond
|
||||
if cond: # $ use=cond phi=x
|
||||
x = 1 # $ def=x
|
||||
else:
|
||||
x = 2 # $ def=x
|
||||
return x # $ use=x
|
||||
|
||||
|
||||
def use_global():
|
||||
return some_undefined # $ use=some_undefined
|
||||
|
||||
|
||||
Reference in New Issue
Block a user