Python: unpin legacy CFG/ESSA from the AST cached stage

The legacy CFG (`Flow.qll`) and legacy ESSA (`Essa`/`SsaCompute`/
`SsaDefinitions`) were pinned into the always-on `Stages::AST` cached stage
via `Stages::AST::ref()` and the matching `backref()` disjuncts. Because a
cached stage is materialized as a unit once any of its predicates is demanded
(and every query demands e.g. `Expr.toString()`), this forced the legacy
CFG/ESSA to be computed for *every* query -- including the security/dataflow
queries, which after the shared-CFG dataflow flip no longer depend on the
legacy CFG at all.

Since `Stages::AST::ref()` is `1 = 1`, removing it is result-preserving; it
only changes stage scheduling. After this change the legacy CFG/ESSA is no
longer materialised for queries that do not genuinely reference it. Verified
on the full `python-security-extended` suite and on django: legacy CFG/ESSA
families materialised drop from ~165 to 0 with byte-identical results.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
yoff
2026-07-02 22:48:25 +00:00
parent c2f439a38f
commit bf1220463c
5 changed files with 3 additions and 50 deletions

View File

@@ -128,7 +128,6 @@ class ControlFlowNode extends @py_flow_node {
/** Gets a textual representation of this element. */
cached
string toString() {
Stages::AST::ref() and
// Since modules can have ambigous names, entry nodes can too, if we do not collate them.
exists(Py::Scope s | s.getEntryNode() = this |
result = "Entry node for " + concat( | | s.toString(), ",")
@@ -152,7 +151,6 @@ class ControlFlowNode extends @py_flow_node {
/** Gets the scope containing this flow node */
cached
Py::Scope getScope() {
Stages::AST::ref() and
if this.getNode() instanceof Py::Scope
then
/* Entry or exit node */
@@ -554,7 +552,6 @@ class UnaryExprNode extends ControlFlowNode {
class DefinitionNode extends ControlFlowNode {
cached
DefinitionNode() {
Stages::AST::ref() and
exists(Py::Assign a | this.getNode() = a.getATarget())
or
exists(Py::AssignExpr a | this.getNode() = a.getTarget())
@@ -635,7 +632,6 @@ class TupleNode extends SequenceNode {
TupleNode() { toAst(this) instanceof Py::Tuple }
override ControlFlowNode getElement(int n) {
Stages::AST::ref() and
exists(Py::Tuple t | this.getNode() = t and result.getNode() = t.getElt(n)) and
(
result.getBasicBlock().dominates(this.getBasicBlock())
@@ -1015,10 +1011,7 @@ class BasicBlock extends @py_flow_node {
/** Whether this basic block strictly dominates the other */
cached
predicate strictlyDominates(BasicBlock other) {
Stages::AST::ref() and
other.getImmediateDominator+() = this
}
predicate strictlyDominates(BasicBlock other) { other.getImmediateDominator+() = this }
/** Whether this basic block dominates the other */
predicate dominates(BasicBlock other) {
@@ -1029,7 +1022,6 @@ class BasicBlock extends @py_flow_node {
cached
BasicBlock getImmediateDominator() {
Stages::AST::ref() and
this.firstNode().getImmediateDominator().getBasicBlock() = result
}
@@ -1075,10 +1067,7 @@ class BasicBlock extends @py_flow_node {
/** Gets a successor to this basic block */
cached
BasicBlock getASuccessor() {
Stages::AST::ref() and
result = this.getLastNode().getASuccessor().getBasicBlock()
}
BasicBlock getASuccessor() { result = this.getLastNode().getASuccessor().getBasicBlock() }
/** Gets a predecessor to this basic block */
BasicBlock getAPredecessor() { result.getASuccessor() = this }
@@ -1140,10 +1129,7 @@ class BasicBlock extends @py_flow_node {
/** Holds if this basic block strictly reaches the other. Is the start of other reachable from the end of this. */
cached
predicate strictlyReaches(BasicBlock other) {
Stages::AST::ref() and
this.getASuccessor+() = other
}
predicate strictlyReaches(BasicBlock other) { this.getASuccessor+() = other }
/** Holds if this basic block reaches the other. Is the start of other reachable from the end of this. */
predicate reaches(BasicBlock other) { this = other or this.strictlyReaches(other) }

View File

@@ -274,7 +274,6 @@ class PhiFunction extends EssaDefinition, TPhiFunction {
/** Gets the input variable for this phi node on the edge `pred` -> `this.getBasicBlock()`, if any. */
cached
EssaVariable getInput(BasicBlock pred) {
Stages::AST::ref() and
result.getDefinition() = this.reachingDefinition(pred)
or
result.getDefinition() = this.inputEdgeRefinement(pred)

View File

@@ -311,7 +311,6 @@ private module SsaComputeImpl {
*/
cached
predicate reachesEndOfBlock(SsaSourceVariable v, BasicBlock defbb, int defindex, BasicBlock b) {
Stages::AST::ref() and
Liveness::liveAtExit(v, b) and
(
defbb = b and

View File

@@ -20,7 +20,6 @@ module SsaSource {
/** Holds if `v` is used as the receiver in a method call. */
cached
predicate method_call_refinement(Variable v, ControlFlowNode use, CallNode call) {
Stages::AST::ref() and
use = v.getAUse() and
call.getFunction().(AttrNode).getObject() = use and
not test_contains(_, call)

View File

@@ -45,13 +45,9 @@ module Stages {
cached
predicate ref() { 1 = 1 }
private import semmle.python.essa.SsaDefinitions as SsaDefinitions
private import semmle.python.essa.SsaCompute as SsaCompute
private import semmle.python.essa.Essa as Essa
private import semmle.python.Module as PyModule
private import semmle.python.Exprs as Exprs
private import semmle.python.AstExtended as AstExtended
private import semmle.python.Flow as PyFlow
/**
* DONT USE!
@@ -61,12 +57,6 @@ module Stages {
predicate backref() {
1 = 1
or
SsaDefinitions::SsaSource::method_call_refinement(_, _, _)
or
SsaCompute::SsaDefinitions::reachesEndOfBlock(_, _, _, _)
or
exists(any(Essa::PhiFunction p).getInput(_))
or
exists(PyModule::moduleNameFromFile(_))
or
exists(any(Exprs::Expr e).toString())
@@ -76,26 +66,6 @@ module Stages {
exists(any(AstExtended::AstNode n).getAChildNode())
or
exists(any(AstExtended::AstNode n).getParentNode())
or
exists(PyFlow::ControlFlowNode cfg, AstExtended::AstNode n | cfg.getNode() = n)
or
exists(any(PyFlow::BasicBlock b).getImmediateDominator())
or
exists(any(PyFlow::BasicBlock b).getScope())
or
any(PyFlow::BasicBlock b).strictlyDominates(_)
or
any(PyFlow::BasicBlock b).strictlyReaches(_)
or
exists(any(PyFlow::BasicBlock b).getASuccessor())
or
exists(any(PyFlow::ControlFlowNode b).getScope())
or
exists(PyFlow::DefinitionNode b)
or
exists(any(PyFlow::SequenceNode n).getElement(_))
or
exists(any(PyFlow::ControlFlowNode c).toString())
}
}