mirror of
https://github.com/github/codeql.git
synced 2026-05-27 09:31:30 +02:00
Python: extend new SSA with ESSA-shaped adapter + baseline comparison test
Phase 0.5 - Adapter API on top of the shared SSA:
Adds the legacy-ESSA-shaped class hierarchy that the dataflow library
consumes, layered on the shared 'Ssa::Make' instantiation:
* EssaDefinition / EssaNodeDefinition: the latter exposes
'getDefiningNode()' (the CFG node at the def's index in its BB)
and 'getVariable()' / 'getScope()'.
* AssignmentDefinition: matches Assign, AnnAssign with value,
AssignExpr and AugAssign target Names. Exposes 'getValue()'
pointing at the RHS' CFG node.
* ParameterDefinition: matches when the defining Name is in
parameter context.
* WithDefinition: matches 'with ... as x:' bindings.
* ScopeEntryDefinition: implicit entry defs at synthetic position
'-1' of the scope's entry basic block (non-local / global /
builtin / captured reads).
* PhiFunction (alias for PhiNode).
* EssaVariable adapter wrapping a 'Ssa::Definition' with 'getAUse()',
'getDefinition()', 'getAnUltimateDefinition()', and 'getName()'.
* AdjacentUses module with 'firstUse' and 'adjacentUseUse' predicates
bridging to 'Ssa::firstUse' / 'Ssa::adjacentUseUse'.
This is the minimum API the new dataflow's internals call into. The
richer legacy ESSA (refinement nodes, attribute refinements, edge
refinements) stays in 'semmle.python.essa.Essa' for legacy code.
Phase 0.6 - Comparison test:
Adds 'dataflow-new-ssa-vs-legacy/CmpTest.ql' that snapshots the
difference between definitions produced by new SSA vs legacy ESSA on
the same Python source. Baseline output records the current
'def-only-old' mismatches, grouped by category:
* function/class/global definitions with no in-scope read (intentional;
SSA is liveness-pruned)
* captured / closure variables (real gap in new SSA - no
closure-capture handling yet)
* module variables __name__ / __package__ / $ (legacy ESSA implicit
bindings)
* exception 'as' bindings (depend on raise modelling)
Zero 'def-only-new' mismatches: the new SSA never produces a spurious
definition compared to legacy ESSA on this corpus.
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -177,3 +177,197 @@ final class WriteDefinition = Ssa::WriteDefinition;
|
||||
final class UncertainWriteDefinition = Ssa::UncertainWriteDefinition;
|
||||
|
||||
final class PhiNode = Ssa::PhiNode;
|
||||
|
||||
// ===========================================================================
|
||||
// ESSA-shaped adapter layer
|
||||
//
|
||||
// The dataflow library (`python/ql/lib/semmle/python/dataflow/new/`) and
|
||||
// related modules (`ApiGraphs.qll`, etc.) consume the legacy ESSA API
|
||||
// (`EssaVariable`, `EssaDefinition`, `AssignmentDefinition`,
|
||||
// `ScopeEntryDefinition`, `ParameterDefinition`, `WithDefinition`,
|
||||
// `PhiFunction`, plus the `AdjacentUses` module). To migrate them off
|
||||
// the legacy CFG, we expose the same API surface on top of the
|
||||
// shared SSA built above.
|
||||
//
|
||||
// This adapter is intentionally narrow: it covers only the predicates
|
||||
// that new dataflow consumes. The richer legacy ESSA — refinement
|
||||
// nodes, attribute refinements, edge refinements — stays available
|
||||
// via `semmle.python.essa.Essa` for points-to / legacy code.
|
||||
// ===========================================================================
|
||||
/**
|
||||
* Gets the CFG node at which a write definition's binding takes place.
|
||||
*
|
||||
* This is the `Cfg::ControlFlowNode` whose index in `def`'s basic block
|
||||
* is the same as `def`'s defining index. Phi definitions have no
|
||||
* defining CFG node and are excluded.
|
||||
*/
|
||||
private Cfg::ControlFlowNode writeDefNode(Ssa::WriteDefinition def) {
|
||||
exists(CfgImpl::BasicBlock bb, int i |
|
||||
def.definesAt(_, bb, i) and
|
||||
result = bb.getNode(i)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A write definition whose binding has a corresponding CFG node — i.e.
|
||||
* everything that's not a phi node. Mirrors legacy ESSA's
|
||||
* `EssaNodeDefinition`.
|
||||
*/
|
||||
class EssaNodeDefinition extends Ssa::WriteDefinition {
|
||||
/** Gets the CFG node where this definition's binding takes place. */
|
||||
Cfg::ControlFlowNode getDefiningNode() { result = writeDefNode(this) }
|
||||
|
||||
/** Gets the variable defined here (legacy name). */
|
||||
SsaSourceVariable getVariable() { result = this.getSourceVariable() }
|
||||
|
||||
/** Gets the enclosing scope. */
|
||||
Py::Scope getScope() {
|
||||
exists(Cfg::ControlFlowNode n | n = this.getDefiningNode() | result = n.getScope())
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An assignment definition `x = e`. The defining node is `x`'s CFG
|
||||
* node; the value is `e`'s CFG node.
|
||||
*/
|
||||
class AssignmentDefinition extends EssaNodeDefinition {
|
||||
AssignmentDefinition() {
|
||||
exists(Cfg::NameNode n | n = this.getDefiningNode() |
|
||||
exists(Py::Assign a | a.getATarget() = n.getNode())
|
||||
or
|
||||
exists(Py::AnnAssign a | a.getTarget() = n.getNode() and exists(a.getValue()))
|
||||
or
|
||||
exists(Py::AssignExpr a | a.getTarget() = n.getNode())
|
||||
or
|
||||
exists(Py::AugAssign a | a.getTarget() = n.getNode())
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the CFG node for the value being assigned, if statically known. */
|
||||
Cfg::ControlFlowNode getValue() {
|
||||
exists(Cfg::NameNode target | target = this.getDefiningNode() |
|
||||
exists(Py::Assign a |
|
||||
a.getATarget() = target.getNode() and
|
||||
result.getNode() = a.getValue()
|
||||
)
|
||||
or
|
||||
exists(Py::AnnAssign a |
|
||||
a.getTarget() = target.getNode() and
|
||||
result.getNode() = a.getValue()
|
||||
)
|
||||
or
|
||||
exists(Py::AssignExpr a |
|
||||
a.getTarget() = target.getNode() and
|
||||
result.getNode() = a.getValue()
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A parameter definition — the binding of a parameter name in a
|
||||
* function's scope.
|
||||
*/
|
||||
class ParameterDefinition extends EssaNodeDefinition {
|
||||
ParameterDefinition() { this.getDefiningNode().isParameter() }
|
||||
|
||||
/** Gets the AST `Parameter` (a `Py::Name` in param context). */
|
||||
Py::Name getParameter() { result = this.getDefiningNode().getNode() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A definition introduced by a `with ... as x:` clause.
|
||||
*/
|
||||
class WithDefinition extends EssaNodeDefinition {
|
||||
WithDefinition() {
|
||||
exists(Cfg::NameNode n, Py::With w |
|
||||
n = this.getDefiningNode() and
|
||||
w.getOptionalVars() = n.getNode()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An implicit entry definition for a non-local / captured / global /
|
||||
* builtin variable read in a scope but not defined there.
|
||||
*/
|
||||
class ScopeEntryDefinition extends Ssa::Definition {
|
||||
ScopeEntryDefinition() {
|
||||
exists(CfgImpl::BasicBlock bb |
|
||||
this.definesAt(_, bb, -1) and
|
||||
bb instanceof CfgImpl::Cfg::EntryBasicBlock
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the variable being entered. */
|
||||
SsaSourceVariable getVariable() { result = this.getSourceVariable() }
|
||||
|
||||
/** Gets the enclosing scope. */
|
||||
Py::Scope getScope() {
|
||||
exists(CfgImpl::BasicBlock bb |
|
||||
this.definesAt(_, bb, -1) and
|
||||
result = this.getSourceVariable().getVariable().getScope()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** A phi node (alias matching legacy naming). */
|
||||
class PhiFunction = PhiNode;
|
||||
|
||||
/** Base class for all ESSA definitions (legacy-shaped). */
|
||||
class EssaDefinition = Ssa::Definition;
|
||||
|
||||
/**
|
||||
* An adapter representing a single SSA-defined "variable" — wrapping
|
||||
* one `Ssa::Definition`. Mirrors legacy `EssaVariable` API.
|
||||
*/
|
||||
class EssaVariable extends Ssa::Definition {
|
||||
/** Gets the underlying SSA definition (legacy name). */
|
||||
Ssa::Definition getDefinition() { result = this }
|
||||
|
||||
/** Gets a CFG node where this definition is used. */
|
||||
Cfg::NameNode getAUse() {
|
||||
exists(CfgImpl::BasicBlock bb, int i |
|
||||
Ssa::ssaDefReachesRead(this.getSourceVariable(), this, bb, i) and
|
||||
bb.getNode(i) = result
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets the (textual) name of the underlying variable. */
|
||||
string getName() { result = this.getSourceVariable().getVariable().getId() }
|
||||
|
||||
/** Gets an ultimate non-phi ancestor of this definition. */
|
||||
EssaVariable getAnUltimateDefinition() {
|
||||
if this instanceof PhiNode
|
||||
then
|
||||
exists(Ssa::Definition input |
|
||||
Ssa::phiHasInputFromBlock(this, input, _) and
|
||||
result = input.(EssaVariable).getAnUltimateDefinition()
|
||||
)
|
||||
else result = this
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adjacent use-use and def-use relations exposed by the shared SSA
|
||||
* library. Provides the same interface as legacy
|
||||
* `semmle.python.essa.SsaCompute::AdjacentUses`.
|
||||
*/
|
||||
module AdjacentUses {
|
||||
/** Holds if `nodeFrom` and `nodeTo` are adjacent uses of the same SSA variable. */
|
||||
predicate adjacentUseUse(Cfg::NameNode nodeFrom, Cfg::NameNode nodeTo) {
|
||||
exists(SsaSourceVariable v, CfgImpl::BasicBlock bb1, int i1, CfgImpl::BasicBlock bb2, int i2 |
|
||||
Ssa::adjacentUseUse(bb1, i1, bb2, i2, v, _) and
|
||||
nodeFrom = bb1.getNode(i1) and
|
||||
nodeTo = bb2.getNode(i2)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `use` is a first use of definition `def`. */
|
||||
predicate firstUse(Ssa::Definition def, Cfg::NameNode use) {
|
||||
exists(CfgImpl::BasicBlock bb, int i |
|
||||
Ssa::firstUse(def, bb, i, _) and
|
||||
use = bb.getNode(i)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
| def-only-old | $:0:0 |
|
||||
| def-only-old | GLOBAL:49:1 |
|
||||
| def-only-old | GLOBAL:52:1 |
|
||||
| def-only-old | __name__:0:0 |
|
||||
| def-only-old | __package__:0:0 |
|
||||
| def-only-old | closure:31:5 |
|
||||
| def-only-old | e:37:1 |
|
||||
| def-only-old | e:40:25 |
|
||||
| def-only-old | exception_binding:37:5 |
|
||||
| def-only-old | if_else_branch:12:5 |
|
||||
| def-only-old | kwargs:27:32 |
|
||||
| def-only-old | loop:20:5 |
|
||||
| def-only-old | parameter:27:5 |
|
||||
| def-only-old | read_global:52:5 |
|
||||
| def-only-old | reassignment:6:5 |
|
||||
| def-only-old | simple_assign:1:5 |
|
||||
| def-only-old | with_binding:44:5 |
|
||||
| def-only-old | x:20:1 |
|
||||
| def-only-old | x:31:13 |
|
||||
| def-only-old | x:32:5 |
|
||||
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
* Compares the new-CFG SSA against the legacy ESSA on the same Python
|
||||
* sources. Reports definitions present in one implementation but not
|
||||
* the other, identified by variable name + source position.
|
||||
*
|
||||
* The `.expected` file records the current diff as a snapshot: as the
|
||||
* new SSA matures (closing captured-variable gap, exception bindings,
|
||||
* etc.) and tracks more variables, the snapshot should monotonically
|
||||
* shrink.
|
||||
*
|
||||
* Known categories of `def-only-old` mismatches:
|
||||
* - Function / class / global definitions with no in-scope read
|
||||
* (intentional: SSA is liveness-pruned, write-only variables are
|
||||
* not tracked).
|
||||
* - Captured / closure variables (gap: new SSA does not yet model
|
||||
* closure captures).
|
||||
* - Module variables `__name__`, `__package__`, `$` (legacy ESSA
|
||||
* adds implicit bindings the new SSA does not).
|
||||
* - Exception-handler `as` bindings (depend on raise modelling).
|
||||
*
|
||||
* `def-only-new` mismatches would indicate the new SSA produces spurious
|
||||
* definitions; currently none are expected.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.internal.SsaImpl as NewSsa
|
||||
import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
import semmle.python.essa.Essa
|
||||
|
||||
string newDefSig(NewSsa::EssaNodeDefinition def) {
|
||||
exists(Cfg::ControlFlowNode n | n = def.getDefiningNode() |
|
||||
result =
|
||||
def.getVariable().getVariable().getId() + ":" + n.getLocation().getStartLine() + ":" +
|
||||
n.getLocation().getStartColumn()
|
||||
)
|
||||
}
|
||||
|
||||
string legacyDefSig(EssaNodeDefinition def) {
|
||||
exists(ControlFlowNode n | n = def.getDefiningNode() |
|
||||
result =
|
||||
def.getSourceVariable().getName() + ":" + n.getLocation().getStartLine() + ":" +
|
||||
n.getLocation().getStartColumn()
|
||||
)
|
||||
}
|
||||
|
||||
from string kind, string sig
|
||||
where
|
||||
kind = "def-only-new" and
|
||||
exists(NewSsa::EssaNodeDefinition def |
|
||||
sig = newDefSig(def) and
|
||||
not exists(EssaNodeDefinition legacyDef | sig = legacyDefSig(legacyDef))
|
||||
)
|
||||
or
|
||||
kind = "def-only-old" and
|
||||
exists(EssaNodeDefinition legacyDef |
|
||||
sig = legacyDefSig(legacyDef) and
|
||||
not exists(NewSsa::EssaNodeDefinition def | sig = newDefSig(def))
|
||||
)
|
||||
select kind, sig
|
||||
@@ -0,0 +1,53 @@
|
||||
def simple_assign():
|
||||
x = 1
|
||||
return x
|
||||
|
||||
|
||||
def reassignment():
|
||||
x = 1
|
||||
x = 2
|
||||
return x
|
||||
|
||||
|
||||
def if_else_branch(cond):
|
||||
if cond:
|
||||
x = 1
|
||||
else:
|
||||
x = 2
|
||||
return x
|
||||
|
||||
|
||||
def loop(xs):
|
||||
total = 0
|
||||
for x in xs:
|
||||
total = total + x
|
||||
return total
|
||||
|
||||
|
||||
def parameter(a, b=2, *args, **kwargs):
|
||||
return a + b + sum(args)
|
||||
|
||||
|
||||
def closure(x):
|
||||
def inner():
|
||||
return x
|
||||
return inner
|
||||
|
||||
|
||||
def exception_binding():
|
||||
try:
|
||||
compute()
|
||||
except Exception as e:
|
||||
return e
|
||||
|
||||
|
||||
def with_binding():
|
||||
with open("file") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
GLOBAL = 1
|
||||
|
||||
|
||||
def read_global():
|
||||
return GLOBAL
|
||||
Reference in New Issue
Block a user