mirror of
https://github.com/github/codeql.git
synced 2026-06-02 20:30:15 +02:00
Python: extend new SSA with ESSA-shaped adapter + baseline comparison test
Phase 0.5 - Adapter API on top of the shared SSA:
Adds the legacy-ESSA-shaped class hierarchy that the dataflow library
consumes, layered on the shared 'Ssa::Make' instantiation:
* EssaDefinition / EssaNodeDefinition: the latter exposes
'getDefiningNode()' (the CFG node at the def's index in its BB)
and 'getVariable()' / 'getScope()'.
* AssignmentDefinition: matches Assign, AnnAssign with value,
AssignExpr and AugAssign target Names. Exposes 'getValue()'
pointing at the RHS' CFG node.
* ParameterDefinition: matches when the defining Name is in
parameter context.
* WithDefinition: matches 'with ... as x:' bindings.
* ScopeEntryDefinition: implicit entry defs at synthetic position
'-1' of the scope's entry basic block (non-local / global /
builtin / captured reads).
* PhiFunction (alias for PhiNode).
* EssaVariable adapter wrapping a 'Ssa::Definition' with 'getAUse()',
'getDefinition()', 'getAnUltimateDefinition()', and 'getName()'.
* AdjacentUses module with 'firstUse' and 'adjacentUseUse' predicates
bridging to 'Ssa::firstUse' / 'Ssa::adjacentUseUse'.
This is the minimum API the new dataflow's internals call into. The
richer legacy ESSA (refinement nodes, attribute refinements, edge
refinements) stays in 'semmle.python.essa.Essa' for legacy code.
Phase 0.6 - Comparison test:
Adds 'dataflow-new-ssa-vs-legacy/CmpTest.ql' that snapshots the
difference between definitions produced by new SSA vs legacy ESSA on
the same Python source. Baseline output records the current
'def-only-old' mismatches, grouped by category:
* function/class/global definitions with no in-scope read (intentional;
SSA is liveness-pruned)
* captured / closure variables (real gap in new SSA - no
closure-capture handling yet)
* module variables __name__ / __package__ / $ (legacy ESSA implicit
bindings)
* exception 'as' bindings (depend on raise modelling)
Zero 'def-only-new' mismatches: the new SSA never produces a spurious
definition compared to legacy ESSA on this corpus.
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,20 @@
|
||||
| def-only-old | $:0:0 |
|
||||
| def-only-old | GLOBAL:49:1 |
|
||||
| def-only-old | GLOBAL:52:1 |
|
||||
| def-only-old | __name__:0:0 |
|
||||
| def-only-old | __package__:0:0 |
|
||||
| def-only-old | closure:31:5 |
|
||||
| def-only-old | e:37:1 |
|
||||
| def-only-old | e:40:25 |
|
||||
| def-only-old | exception_binding:37:5 |
|
||||
| def-only-old | if_else_branch:12:5 |
|
||||
| def-only-old | kwargs:27:32 |
|
||||
| def-only-old | loop:20:5 |
|
||||
| def-only-old | parameter:27:5 |
|
||||
| def-only-old | read_global:52:5 |
|
||||
| def-only-old | reassignment:6:5 |
|
||||
| def-only-old | simple_assign:1:5 |
|
||||
| def-only-old | with_binding:44:5 |
|
||||
| def-only-old | x:20:1 |
|
||||
| def-only-old | x:31:13 |
|
||||
| def-only-old | x:32:5 |
|
||||
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
* Compares the new-CFG SSA against the legacy ESSA on the same Python
|
||||
* sources. Reports definitions present in one implementation but not
|
||||
* the other, identified by variable name + source position.
|
||||
*
|
||||
* The `.expected` file records the current diff as a snapshot: as the
|
||||
* new SSA matures (closing captured-variable gap, exception bindings,
|
||||
* etc.) and tracks more variables, the snapshot should monotonically
|
||||
* shrink.
|
||||
*
|
||||
* Known categories of `def-only-old` mismatches:
|
||||
* - Function / class / global definitions with no in-scope read
|
||||
* (intentional: SSA is liveness-pruned, write-only variables are
|
||||
* not tracked).
|
||||
* - Captured / closure variables (gap: new SSA does not yet model
|
||||
* closure captures).
|
||||
* - Module variables `__name__`, `__package__`, `$` (legacy ESSA
|
||||
* adds implicit bindings the new SSA does not).
|
||||
* - Exception-handler `as` bindings (depend on raise modelling).
|
||||
*
|
||||
* `def-only-new` mismatches would indicate the new SSA produces spurious
|
||||
* definitions; currently none are expected.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.internal.SsaImpl as NewSsa
|
||||
import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
import semmle.python.essa.Essa
|
||||
|
||||
string newDefSig(NewSsa::EssaNodeDefinition def) {
|
||||
exists(Cfg::ControlFlowNode n | n = def.getDefiningNode() |
|
||||
result =
|
||||
def.getVariable().getVariable().getId() + ":" + n.getLocation().getStartLine() + ":" +
|
||||
n.getLocation().getStartColumn()
|
||||
)
|
||||
}
|
||||
|
||||
string legacyDefSig(EssaNodeDefinition def) {
|
||||
exists(ControlFlowNode n | n = def.getDefiningNode() |
|
||||
result =
|
||||
def.getSourceVariable().getName() + ":" + n.getLocation().getStartLine() + ":" +
|
||||
n.getLocation().getStartColumn()
|
||||
)
|
||||
}
|
||||
|
||||
from string kind, string sig
|
||||
where
|
||||
kind = "def-only-new" and
|
||||
exists(NewSsa::EssaNodeDefinition def |
|
||||
sig = newDefSig(def) and
|
||||
not exists(EssaNodeDefinition legacyDef | sig = legacyDefSig(legacyDef))
|
||||
)
|
||||
or
|
||||
kind = "def-only-old" and
|
||||
exists(EssaNodeDefinition legacyDef |
|
||||
sig = legacyDefSig(legacyDef) and
|
||||
not exists(NewSsa::EssaNodeDefinition def | sig = newDefSig(def))
|
||||
)
|
||||
select kind, sig
|
||||
@@ -0,0 +1,53 @@
|
||||
def simple_assign():
|
||||
x = 1
|
||||
return x
|
||||
|
||||
|
||||
def reassignment():
|
||||
x = 1
|
||||
x = 2
|
||||
return x
|
||||
|
||||
|
||||
def if_else_branch(cond):
|
||||
if cond:
|
||||
x = 1
|
||||
else:
|
||||
x = 2
|
||||
return x
|
||||
|
||||
|
||||
def loop(xs):
|
||||
total = 0
|
||||
for x in xs:
|
||||
total = total + x
|
||||
return total
|
||||
|
||||
|
||||
def parameter(a, b=2, *args, **kwargs):
|
||||
return a + b + sum(args)
|
||||
|
||||
|
||||
def closure(x):
|
||||
def inner():
|
||||
return x
|
||||
return inner
|
||||
|
||||
|
||||
def exception_binding():
|
||||
try:
|
||||
compute()
|
||||
except Exception as e:
|
||||
return e
|
||||
|
||||
|
||||
def with_binding():
|
||||
with open("file") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
GLOBAL = 1
|
||||
|
||||
|
||||
def read_global():
|
||||
return GLOBAL
|
||||
Reference in New Issue
Block a user