diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll index 11e7b9f4d3d..da8f34b8b52 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll @@ -177,3 +177,197 @@ final class WriteDefinition = Ssa::WriteDefinition; final class UncertainWriteDefinition = Ssa::UncertainWriteDefinition; final class PhiNode = Ssa::PhiNode; + +// =========================================================================== +// ESSA-shaped adapter layer +// +// The dataflow library (`python/ql/lib/semmle/python/dataflow/new/`) and +// related modules (`ApiGraphs.qll`, etc.) consume the legacy ESSA API +// (`EssaVariable`, `EssaDefinition`, `AssignmentDefinition`, +// `ScopeEntryDefinition`, `ParameterDefinition`, `WithDefinition`, +// `PhiFunction`, plus the `AdjacentUses` module). To migrate them off +// the legacy CFG, we expose the same API surface on top of the +// shared SSA built above. +// +// This adapter is intentionally narrow: it covers only the predicates +// that new dataflow consumes. The richer legacy ESSA — refinement +// nodes, attribute refinements, edge refinements — stays available +// via `semmle.python.essa.Essa` for points-to / legacy code. +// =========================================================================== +/** + * Gets the CFG node at which a write definition's binding takes place. + * + * This is the `Cfg::ControlFlowNode` whose index in `def`'s basic block + * is the same as `def`'s defining index. Phi definitions have no + * defining CFG node and are excluded. + */ +private Cfg::ControlFlowNode writeDefNode(Ssa::WriteDefinition def) { + exists(CfgImpl::BasicBlock bb, int i | + def.definesAt(_, bb, i) and + result = bb.getNode(i) + ) +} + +/** + * A write definition whose binding has a corresponding CFG node — i.e. + * everything that's not a phi node. Mirrors legacy ESSA's + * `EssaNodeDefinition`. + */ +class EssaNodeDefinition extends Ssa::WriteDefinition { + /** Gets the CFG node where this definition's binding takes place. */ + Cfg::ControlFlowNode getDefiningNode() { result = writeDefNode(this) } + + /** Gets the variable defined here (legacy name). */ + SsaSourceVariable getVariable() { result = this.getSourceVariable() } + + /** Gets the enclosing scope. */ + Py::Scope getScope() { + exists(Cfg::ControlFlowNode n | n = this.getDefiningNode() | result = n.getScope()) + } +} + +/** + * An assignment definition `x = e`. The defining node is `x`'s CFG + * node; the value is `e`'s CFG node. + */ +class AssignmentDefinition extends EssaNodeDefinition { + AssignmentDefinition() { + exists(Cfg::NameNode n | n = this.getDefiningNode() | + exists(Py::Assign a | a.getATarget() = n.getNode()) + or + exists(Py::AnnAssign a | a.getTarget() = n.getNode() and exists(a.getValue())) + or + exists(Py::AssignExpr a | a.getTarget() = n.getNode()) + or + exists(Py::AugAssign a | a.getTarget() = n.getNode()) + ) + } + + /** Gets the CFG node for the value being assigned, if statically known. */ + Cfg::ControlFlowNode getValue() { + exists(Cfg::NameNode target | target = this.getDefiningNode() | + exists(Py::Assign a | + a.getATarget() = target.getNode() and + result.getNode() = a.getValue() + ) + or + exists(Py::AnnAssign a | + a.getTarget() = target.getNode() and + result.getNode() = a.getValue() + ) + or + exists(Py::AssignExpr a | + a.getTarget() = target.getNode() and + result.getNode() = a.getValue() + ) + ) + } +} + +/** + * A parameter definition — the binding of a parameter name in a + * function's scope. + */ +class ParameterDefinition extends EssaNodeDefinition { + ParameterDefinition() { this.getDefiningNode().isParameter() } + + /** Gets the AST `Parameter` (a `Py::Name` in param context). */ + Py::Name getParameter() { result = this.getDefiningNode().getNode() } +} + +/** + * A definition introduced by a `with ... as x:` clause. + */ +class WithDefinition extends EssaNodeDefinition { + WithDefinition() { + exists(Cfg::NameNode n, Py::With w | + n = this.getDefiningNode() and + w.getOptionalVars() = n.getNode() + ) + } +} + +/** + * An implicit entry definition for a non-local / captured / global / + * builtin variable read in a scope but not defined there. + */ +class ScopeEntryDefinition extends Ssa::Definition { + ScopeEntryDefinition() { + exists(CfgImpl::BasicBlock bb | + this.definesAt(_, bb, -1) and + bb instanceof CfgImpl::Cfg::EntryBasicBlock + ) + } + + /** Gets the variable being entered. */ + SsaSourceVariable getVariable() { result = this.getSourceVariable() } + + /** Gets the enclosing scope. */ + Py::Scope getScope() { + exists(CfgImpl::BasicBlock bb | + this.definesAt(_, bb, -1) and + result = this.getSourceVariable().getVariable().getScope() + ) + } +} + +/** A phi node (alias matching legacy naming). */ +class PhiFunction = PhiNode; + +/** Base class for all ESSA definitions (legacy-shaped). */ +class EssaDefinition = Ssa::Definition; + +/** + * An adapter representing a single SSA-defined "variable" — wrapping + * one `Ssa::Definition`. Mirrors legacy `EssaVariable` API. + */ +class EssaVariable extends Ssa::Definition { + /** Gets the underlying SSA definition (legacy name). */ + Ssa::Definition getDefinition() { result = this } + + /** Gets a CFG node where this definition is used. */ + Cfg::NameNode getAUse() { + exists(CfgImpl::BasicBlock bb, int i | + Ssa::ssaDefReachesRead(this.getSourceVariable(), this, bb, i) and + bb.getNode(i) = result + ) + } + + /** Gets the (textual) name of the underlying variable. */ + string getName() { result = this.getSourceVariable().getVariable().getId() } + + /** Gets an ultimate non-phi ancestor of this definition. */ + EssaVariable getAnUltimateDefinition() { + if this instanceof PhiNode + then + exists(Ssa::Definition input | + Ssa::phiHasInputFromBlock(this, input, _) and + result = input.(EssaVariable).getAnUltimateDefinition() + ) + else result = this + } +} + +/** + * Adjacent use-use and def-use relations exposed by the shared SSA + * library. Provides the same interface as legacy + * `semmle.python.essa.SsaCompute::AdjacentUses`. + */ +module AdjacentUses { + /** Holds if `nodeFrom` and `nodeTo` are adjacent uses of the same SSA variable. */ + predicate adjacentUseUse(Cfg::NameNode nodeFrom, Cfg::NameNode nodeTo) { + exists(SsaSourceVariable v, CfgImpl::BasicBlock bb1, int i1, CfgImpl::BasicBlock bb2, int i2 | + Ssa::adjacentUseUse(bb1, i1, bb2, i2, v, _) and + nodeFrom = bb1.getNode(i1) and + nodeTo = bb2.getNode(i2) + ) + } + + /** Holds if `use` is a first use of definition `def`. */ + predicate firstUse(Ssa::Definition def, Cfg::NameNode use) { + exists(CfgImpl::BasicBlock bb, int i | + Ssa::firstUse(def, bb, i, _) and + use = bb.getNode(i) + ) + } +} diff --git a/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.expected b/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.expected new file mode 100644 index 00000000000..ec2b8438c61 --- /dev/null +++ b/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.expected @@ -0,0 +1,20 @@ +| def-only-old | $:0:0 | +| def-only-old | GLOBAL:49:1 | +| def-only-old | GLOBAL:52:1 | +| def-only-old | __name__:0:0 | +| def-only-old | __package__:0:0 | +| def-only-old | closure:31:5 | +| def-only-old | e:37:1 | +| def-only-old | e:40:25 | +| def-only-old | exception_binding:37:5 | +| def-only-old | if_else_branch:12:5 | +| def-only-old | kwargs:27:32 | +| def-only-old | loop:20:5 | +| def-only-old | parameter:27:5 | +| def-only-old | read_global:52:5 | +| def-only-old | reassignment:6:5 | +| def-only-old | simple_assign:1:5 | +| def-only-old | with_binding:44:5 | +| def-only-old | x:20:1 | +| def-only-old | x:31:13 | +| def-only-old | x:32:5 | diff --git a/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.ql b/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.ql new file mode 100644 index 00000000000..590f5ebed47 --- /dev/null +++ b/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.ql @@ -0,0 +1,59 @@ +/** + * Compares the new-CFG SSA against the legacy ESSA on the same Python + * sources. Reports definitions present in one implementation but not + * the other, identified by variable name + source position. + * + * The `.expected` file records the current diff as a snapshot: as the + * new SSA matures (closing captured-variable gap, exception bindings, + * etc.) and tracks more variables, the snapshot should monotonically + * shrink. + * + * Known categories of `def-only-old` mismatches: + * - Function / class / global definitions with no in-scope read + * (intentional: SSA is liveness-pruned, write-only variables are + * not tracked). + * - Captured / closure variables (gap: new SSA does not yet model + * closure captures). + * - Module variables `__name__`, `__package__`, `$` (legacy ESSA + * adds implicit bindings the new SSA does not). + * - Exception-handler `as` bindings (depend on raise modelling). + * + * `def-only-new` mismatches would indicate the new SSA produces spurious + * definitions; currently none are expected. + */ + +import python +import semmle.python.dataflow.new.internal.SsaImpl as NewSsa +import semmle.python.controlflow.internal.Cfg as Cfg +import semmle.python.essa.Essa + +string newDefSig(NewSsa::EssaNodeDefinition def) { + exists(Cfg::ControlFlowNode n | n = def.getDefiningNode() | + result = + def.getVariable().getVariable().getId() + ":" + n.getLocation().getStartLine() + ":" + + n.getLocation().getStartColumn() + ) +} + +string legacyDefSig(EssaNodeDefinition def) { + exists(ControlFlowNode n | n = def.getDefiningNode() | + result = + def.getSourceVariable().getName() + ":" + n.getLocation().getStartLine() + ":" + + n.getLocation().getStartColumn() + ) +} + +from string kind, string sig +where + kind = "def-only-new" and + exists(NewSsa::EssaNodeDefinition def | + sig = newDefSig(def) and + not exists(EssaNodeDefinition legacyDef | sig = legacyDefSig(legacyDef)) + ) + or + kind = "def-only-old" and + exists(EssaNodeDefinition legacyDef | + sig = legacyDefSig(legacyDef) and + not exists(NewSsa::EssaNodeDefinition def | sig = newDefSig(def)) + ) +select kind, sig diff --git a/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/test.py b/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/test.py new file mode 100644 index 00000000000..8b061109bf2 --- /dev/null +++ b/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/test.py @@ -0,0 +1,53 @@ +def simple_assign(): + x = 1 + return x + + +def reassignment(): + x = 1 + x = 2 + return x + + +def if_else_branch(cond): + if cond: + x = 1 + else: + x = 2 + return x + + +def loop(xs): + total = 0 + for x in xs: + total = total + x + return total + + +def parameter(a, b=2, *args, **kwargs): + return a + b + sum(args) + + +def closure(x): + def inner(): + return x + return inner + + +def exception_binding(): + try: + compute() + except Exception as e: + return e + + +def with_binding(): + with open("file") as f: + return f.read() + + +GLOBAL = 1 + + +def read_global(): + return GLOBAL