mirror of
https://github.com/github/codeql.git
synced 2026-05-27 01:21:23 +02:00
Python: introduce shared-SSA adapter on the new CFG
Adds 'python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll', a
minimal Python SSA implementation built on the shared SSA library
('codeql.ssa.Ssa::Make<Location, Cfg, Input>'). The structure mirrors
Java's adapter at 'java/ql/lib/semmle/code/java/dataflow/internal/SsaImpl.qll'.
Key design choices:
* 'SourceVariable' wraps 'Py::Variable'. Only variables that are read
or deleted somewhere are tracked - write-only variables don't
benefit from SSA construction.
* Variable references are positional ('BasicBlock', 'int') pairs
looked up via 'Cfg::NameNode.defines'/'.uses'/'.deletes' (which
themselves are one-line bridges to AST-level 'Name.defines' etc.).
* Parameter writes are not synthesised: parameter Name nodes are
already wired into the CFG (per the earlier C#-style parameter
extension in 'AstNodeImpl.qll'), so the regular 'variableWrite'
path handles them at their natural CFG index.
* Non-local / captured / global / builtin variables read in a scope
but not written in it receive a synthetic entry definition at
index '-1' of the scope's entry basic block. This matches Java's
'hasEntryDef'.
* 'del x' is modelled as a certain write at the deletion site.
Includes an inline-expectations test under
'python/ql/test/library-tests/dataflow-new-ssa/' covering:
plain parameter pass-through, simple assignment + read, reassignment
with dead-write pruning, if/else with phi insertion at the join, and
an undefined-name read (currently a known limitation - no SSA flow
without an enclosing definition).
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
179
python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll
Normal file
179
python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll
Normal file
@@ -0,0 +1,179 @@
|
||||
/**
|
||||
* Provides the Python SSA implementation built on the new (shared) CFG.
|
||||
*
|
||||
* Mirrors the Java SSA adapter at
|
||||
* `java/ql/lib/semmle/code/java/dataflow/internal/SsaImpl.qll`:
|
||||
* an `InputSig` is defined in terms of positional `(BasicBlock, int)`
|
||||
* variable references, and the shared
|
||||
* `codeql.ssa.Ssa::Make<Location, Cfg, Input>` module is then
|
||||
* instantiated.
|
||||
*
|
||||
* `SourceVariable` is the AST-level `Py::Variable`. Variable references
|
||||
* are looked up via the CFG facade's `NameNode.defines`/`uses`/`deletes`
|
||||
* predicates, which themselves are one-line bridges to AST-level
|
||||
* `Name.defines`/`uses`/`deletes`.
|
||||
*
|
||||
* Implicit-entry definitions are inserted for:
|
||||
* - non-local / global / builtin variables that are read in the scope
|
||||
* but never assigned (no enclosing CFG node defines them),
|
||||
* - captured variables (variables defined in an enclosing scope that
|
||||
* are read inside the scope), and
|
||||
* - parameters, but only if the corresponding parameter name is *not*
|
||||
* itself a CFG node. With the C#-style parameter wiring already
|
||||
* installed in `AstNodeImpl.qll`, parameter names *are* CFG nodes,
|
||||
* so the regular `variableWrite` path handles them — no `i = -1`
|
||||
* entry is needed for ordinary parameters.
|
||||
*/
|
||||
overlay[local?]
|
||||
module;
|
||||
|
||||
private import python as Py
|
||||
private import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl
|
||||
private import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
private import codeql.ssa.Ssa as SsaImplCommon
|
||||
private import codeql.controlflow.BasicBlock as BB
|
||||
|
||||
/**
|
||||
* Adapts the Python `Cfg` facade to the shared SSA library's `CfgSig`.
|
||||
* All members are inherited from `Cfg::ControlFlowNode` and
|
||||
* `Cfg::BasicBlock`.
|
||||
*/
|
||||
private module CfgForSsa implements BB::CfgSig<Py::Location> {
|
||||
class ControlFlowNode = CfgImpl::ControlFlowNode;
|
||||
|
||||
class BasicBlock = CfgImpl::BasicBlock;
|
||||
|
||||
class EntryBasicBlock = CfgImpl::Cfg::EntryBasicBlock;
|
||||
|
||||
predicate dominatingEdge = CfgImpl::Cfg::dominatingEdge/2;
|
||||
}
|
||||
|
||||
/**
|
||||
* A source variable for SSA. Wraps a Python `Variable` (the AST-level
|
||||
* notion of a named binding within a scope) so that the shared SSA
|
||||
* implementation can use it as a `SourceVariable`.
|
||||
*
|
||||
* We only track variables that are read at least once in their scope —
|
||||
* tracking write-only variables is unnecessary work.
|
||||
*/
|
||||
private newtype TSsaSourceVariable =
|
||||
TPyVar(Py::Variable v) {
|
||||
// Has a use somewhere — read-relevant for SSA.
|
||||
exists(Cfg::NameNode n | n.uses(v))
|
||||
or
|
||||
// Or has a deletion (treated as a write that destroys the value).
|
||||
exists(Cfg::NameNode n | n.deletes(v))
|
||||
}
|
||||
|
||||
/**
|
||||
* A source variable for SSA, wrapping a Python AST `Variable`.
|
||||
*/
|
||||
class SsaSourceVariable extends TSsaSourceVariable {
|
||||
/** Gets the underlying Python AST variable. */
|
||||
Py::Variable getVariable() { this = TPyVar(result) }
|
||||
|
||||
/** Gets a textual representation of this source variable. */
|
||||
string toString() { result = this.getVariable().toString() }
|
||||
|
||||
/** Gets the location of this source variable. */
|
||||
Py::Location getLocation() { result = this.getVariable().getScope().getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `v` is a non-local read in scope `s`, in the sense that `s`
|
||||
* uses `v` but does not write it within `s`. This includes globals,
|
||||
* builtins, and variables captured from an enclosing function scope.
|
||||
*/
|
||||
private predicate nonLocalReadIn(Py::Variable v, Py::Scope s) {
|
||||
exists(Cfg::NameNode n |
|
||||
n.uses(v) and
|
||||
n.getScope() = s and
|
||||
not exists(Cfg::NameNode def | def.defines(v) and def.getScope() = s)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `v` should have an implicit entry definition at the start of
|
||||
* scope `s`. This covers:
|
||||
* - non-local / global / builtin variables (defined outside `s`), and
|
||||
* - captured variables (defined in an enclosing scope but read here).
|
||||
*
|
||||
* Parameters are *not* included: their bound `Name` is itself a CFG
|
||||
* node (per the C#-style parameter wiring), so `variableWrite` fires at
|
||||
* the parameter's natural CFG index.
|
||||
*/
|
||||
private predicate hasEntryDef(SsaSourceVariable v, Py::Scope s) {
|
||||
nonLocalReadIn(v.getVariable(), s)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the entry basic block of scope `s`, where implicit entry
|
||||
* definitions are placed (at synthetic index `-1`).
|
||||
*/
|
||||
private CfgImpl::BasicBlock entryBlock(Py::Scope s) {
|
||||
exists(CfgImpl::ControlFlowNode entry |
|
||||
entry instanceof CfgImpl::ControlFlow::EntryNode and
|
||||
entry.getEnclosingCallable().asScope() = s and
|
||||
result = entry.getBasicBlock()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* The SSA `InputSig` for Python. References are positional
|
||||
* `(BasicBlock, int)` pairs into the new CFG.
|
||||
*/
|
||||
private module SsaImplInput implements SsaImplCommon::InputSig<Py::Location, CfgImpl::BasicBlock> {
|
||||
class SourceVariable = SsaSourceVariable;
|
||||
|
||||
predicate variableWrite(CfgImpl::BasicBlock bb, int i, SourceVariable v, boolean certain) {
|
||||
// Explicit binding at a CFG node — includes assignments,
|
||||
// parameter Names (wired in via the C# pattern), exception-handler
|
||||
// `as`-bindings, import aliases, and match-pattern captures.
|
||||
exists(Cfg::NameNode n |
|
||||
bb.getNode(i) = n and
|
||||
n.defines(v.getVariable()) and
|
||||
certain = true
|
||||
)
|
||||
or
|
||||
// `del x` — removes the binding. Modelled as a certain write that
|
||||
// makes any subsequent read invalid.
|
||||
exists(Cfg::NameNode n |
|
||||
bb.getNode(i) = n and
|
||||
n.deletes(v.getVariable()) and
|
||||
certain = true
|
||||
)
|
||||
or
|
||||
// Implicit entry definition for non-local / captured / global /
|
||||
// builtin variables read in the scope.
|
||||
bb = entryBlock(v.getVariable().getScope()) and
|
||||
hasEntryDef(v, v.getVariable().getScope()) and
|
||||
i = -1 and
|
||||
certain = true
|
||||
}
|
||||
|
||||
predicate variableRead(CfgImpl::BasicBlock bb, int i, SourceVariable v, boolean certain) {
|
||||
exists(Cfg::NameNode n |
|
||||
bb.getNode(i) = n and
|
||||
n.uses(v.getVariable()) and
|
||||
certain = true
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The shared SSA instantiation for Python.
|
||||
*
|
||||
* Members:
|
||||
* - `Definition` — the union of explicit, uncertain, and phi definitions
|
||||
* - `WriteDefinition`, `UncertainWriteDefinition`, `PhiNode`
|
||||
* - the standard SSA predicates (`getAUse`, `getAnUltimateDefinition`, ...).
|
||||
*/
|
||||
module Ssa = SsaImplCommon::Make<Py::Location, CfgForSsa, SsaImplInput>;
|
||||
|
||||
final class Definition = Ssa::Definition;
|
||||
|
||||
final class WriteDefinition = Ssa::WriteDefinition;
|
||||
|
||||
final class UncertainWriteDefinition = Ssa::UncertainWriteDefinition;
|
||||
|
||||
final class PhiNode = Ssa::PhiNode;
|
||||
59
python/ql/test/library-tests/dataflow-new-ssa/SsaTest.ql
Normal file
59
python/ql/test/library-tests/dataflow-new-ssa/SsaTest.ql
Normal file
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
* Inline-expectations test for the new-CFG SSA adapter
|
||||
* (`semmle.python.dataflow.new.internal.SsaImpl`).
|
||||
*
|
||||
* Tags:
|
||||
* - `def=<var>`: there is an SSA write definition of `<var>` at this
|
||||
* line (parameter init, plain assignment, augmented assignment,
|
||||
* exception-handler binding, deletion, etc.).
|
||||
* - `use=<var>`: `<var>` is used at this line, and some SSA definition
|
||||
* of `<var>` reaches the read.
|
||||
* - `phi=<var>`: there is an SSA phi definition of `<var>` whose BB
|
||||
* starts on this line.
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl
|
||||
import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl
|
||||
import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
import utils.test.InlineExpectationsTest
|
||||
|
||||
module SsaTest implements TestSig {
|
||||
string getARelevantTag() { result = ["def", "use", "phi"] }
|
||||
|
||||
predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
// A `def=<id>` fires when an SSA WriteDefinition is at a CFG node
|
||||
// on the given line.
|
||||
exists(SsaImpl::Ssa::WriteDefinition def, CfgImpl::BasicBlock bb, int i, Cfg::NameNode n |
|
||||
def.definesAt(_, bb, i) and
|
||||
bb.getNode(i) = n and
|
||||
tag = "def" and
|
||||
location = n.getLocation() and
|
||||
element = n.toString() and
|
||||
value = n.getId()
|
||||
)
|
||||
or
|
||||
// A `use=<id>` fires when an SSA Definition reaches a read at this
|
||||
// CFG node.
|
||||
exists(SsaImpl::Ssa::Definition def, CfgImpl::BasicBlock bb, int i, Cfg::NameNode n |
|
||||
SsaImpl::Ssa::ssaDefReachesRead(_, def, bb, i) and
|
||||
bb.getNode(i) = n and
|
||||
tag = "use" and
|
||||
location = n.getLocation() and
|
||||
element = n.toString() and
|
||||
value = n.getId()
|
||||
)
|
||||
or
|
||||
// A `phi=<id>` fires when there is a phi node whose BB's first
|
||||
// CFG node is on the given line.
|
||||
exists(SsaImpl::Ssa::PhiNode phi, CfgImpl::BasicBlock bb |
|
||||
phi.definesAt(_, bb, _) and
|
||||
tag = "phi" and
|
||||
location = bb.getNode(0).getLocation() and
|
||||
element = bb.toString() and
|
||||
value = phi.getSourceVariable().(SsaImpl::SsaSourceVariable).getVariable().getId()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
import MakeTest<SsaTest>
|
||||
40
python/ql/test/library-tests/dataflow-new-ssa/test.py
Normal file
40
python/ql/test/library-tests/dataflow-new-ssa/test.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# Basic SSA tests for the new-CFG SSA adapter.
|
||||
#
|
||||
# The shared SSA implementation prunes its construction by liveness:
|
||||
# definitions of variables that are not read are never materialised.
|
||||
# This is by design — write-only variables would only bloat the SSA
|
||||
# graph. Tests therefore must always include a read of each variable
|
||||
# being verified.
|
||||
#
|
||||
# Annotations:
|
||||
# def=<var>: there is an SSA write definition of <var> at this line
|
||||
# use=<var>: <var> is used here and the read resolves to some def
|
||||
|
||||
|
||||
def basic_param(x): # $ def=x
|
||||
return x # $ use=x
|
||||
|
||||
|
||||
def basic_assign():
|
||||
y = 1 # $ def=y
|
||||
return y # $ use=y
|
||||
|
||||
|
||||
def reassignment():
|
||||
x = 1
|
||||
x = 2 # $ def=x
|
||||
return x # $ use=x
|
||||
|
||||
|
||||
def if_else_phi(cond): # $ def=cond
|
||||
if cond: # $ use=cond phi=x
|
||||
x = 1 # $ def=x
|
||||
else:
|
||||
x = 2 # $ def=x
|
||||
return x # $ use=x
|
||||
|
||||
|
||||
def use_global():
|
||||
return some_undefined # known limitation: undefined globals not resolved here
|
||||
|
||||
|
||||
Reference in New Issue
Block a user