Python: migrate dataflow library to new CFG + shared SSA

Switches the trunk dataflow library and all in-tree consumers
(frameworks, ApiGraphs, Concepts, regexp, security customisations,
test harness) from the legacy Flow.qll/ESSA stack to the new
shared-CFG facade (Cfg.qll) and the ESSA-shaped adapter on the
shared-SSA library (SsaImpl.qll).

Highlights:

  * DataFlowPublic/Private/Dispatch, Attributes, VariableCapture,
    IterableUnpacking, ImportResolution, ImportStar, LocalSources,
    TaintTrackingPrivate, MatchUnpacking, TypeTrackingImpl,
    SsaImpl, Builtins all now qualify CFG/SSA references with
    Cfg:: / SsaImpl:: and stop pulling in semmle.python.essa.*.

  * AstNodeImpl.qll/Cfg.qll: ImportMember exposes its inner
    ImportExpr, DefinitionNode.getValue covers Alias / AnnAssign /
    AugAssign / AssignExpr / For-target / Parameter-default,
    ForNode is treated as an expression node, AnnotatedExitNode is
    canonical, and BoolExprNode.getAnOperand drops the dominance
    constraint that did not hold for short-circuit BBs.

  * SsaImpl.qll: parameters always get a ParameterDefinition (so
    unused parameters still have SSA defs), scope-entry defs for
    module globals require an actual store somewhere, scope-exit
    has a synthetic use so reaching-defs survives to module
    boundary, and the legacy SsaSourceVariable / EssaVariable
    surface (getName, getScope, getAUse, getASourceUse,
    getAnImplicitUse) is reinstated for downstream queries.

  * DataFlowPublic.qll: GuardNode redesigned around the new
    structural outcome nodes (isAfterTrue / isAfterFalse).  The
    legacy ConditionBlock + flipped indirection is gone;
    controlsBlock walks UP through 'not' / '==True' / 'is False'
    etc. via outcomeOfGuard, accumulating polarity cleanly.  Only
    BarrierGuard<...> is preserved as public API.

  * ModuleVariableNode.getAWrite and LocalFlow::definitionFlowStep
    bypass SSA and consult Cfg::NameNode.defines /
    Cfg::DefinitionNode.getValue directly, so that write defs
    pruned by shared SSA (because the variable has no in-scope
    read) still produce dataflow steps.

  * Frameworks + downstream consumers: replace
    EssaVariable.hasDefiningNode, getAReturnValueFlowNode,
    Parameter.getDefault, Scope.getEntryNode / getANormalExit etc.
    with CFG-side bridges through Cfg::ControlFlowNode.

The legacy Flow.qll / Essa.qll stack is untouched and remains
available for queries that import it directly.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
yoff
2026-05-26 07:20:44 +00:00
parent cccd207ae7
commit ccfaa6ea7f
49 changed files with 824 additions and 471 deletions

View File

@@ -10,6 +10,7 @@
*/
import python
private import semmle.python.controlflow.internal.Cfg as Cfg
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
@@ -19,14 +20,14 @@ private import semmle.python.Concepts
DataFlow::Node shouldBeTainted() {
exists(DataFlow::CallCfgNode call |
call.getFunction().asCfgNode().(NameNode).getId() = "ensure_tainted" and
call.getFunction().asCfgNode().(Cfg::NameNode).getId() = "ensure_tainted" and
result in [call.getArg(_), call.getArgByName(_)]
)
}
DataFlow::Node shouldNotBeTainted() {
exists(DataFlow::CallCfgNode call |
call.getFunction().asCfgNode().(NameNode).getId() = "ensure_not_tainted" and
call.getFunction().asCfgNode().(Cfg::NameNode).getId() = "ensure_not_tainted" and
result in [call.getArg(_), call.getArgByName(_)]
)
}
@@ -36,13 +37,13 @@ DataFlow::Node shouldNotBeTainted() {
module Conf {
module TestTaintTrackingConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
source.asCfgNode().(NameNode).getId() in [
source.asCfgNode().(Cfg::NameNode).getId() in [
"TAINTED_STRING", "TAINTED_BYTES", "TAINTED_LIST", "TAINTED_DICT"
]
or
// User defined sources
exists(CallNode call |
call.getFunction().(NameNode).getId() = "taint" and
exists(Cfg::CallNode call |
call.getFunction().(Cfg::NameNode).getId() = "taint" and
source.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
or

View File

@@ -2,6 +2,7 @@ overlay[local?]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.FlowSummary
private import semmle.python.ApiGraphs
@@ -17,7 +18,7 @@ module RecursionGuard {
RecursionGuard() { this = "RecursionGuard" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this and
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this and
(TT::callStep(_, _) implies any())
}
@@ -33,7 +34,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable::Range {
SummarizedCallableIdentity() { this = "identity" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -50,7 +51,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable::Range {
SummarizedCallableApplyLambda() { this = "apply_lambda" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -70,7 +71,7 @@ private class SummarizedCallableReversed extends SummarizedCallable::Range {
SummarizedCallableReversed() { this = "list_reversed" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -86,7 +87,7 @@ private class SummarizedCallableMap extends SummarizedCallable::Range {
SummarizedCallableMap() { this = "list_map" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -106,7 +107,7 @@ private class SummarizedCallableAppend extends SummarizedCallable::Range {
SummarizedCallableAppend() { this = "append_to_list" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }

View File

@@ -1,4 +1,5 @@
import python
private import semmle.python.controlflow.internal.Cfg as Cfg
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.PrintNode
@@ -6,20 +7,20 @@ private import semmle.python.dataflow.new.internal.PrintNode
module TestTaintTrackingConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
// Standard sources
source.(DataFlow::CfgNode).getNode().(NameNode).getId() in [
source.(DataFlow::CfgNode).getNode().(Cfg::NameNode).getId() in [
"TAINTED_STRING", "TAINTED_BYTES", "TAINTED_LIST", "TAINTED_DICT"
]
or
// User defined sources
exists(CallNode call |
call.getFunction().(NameNode).getId() = "taint" and
exists(Cfg::CallNode call |
call.getFunction().(Cfg::NameNode).getId() = "taint" and
source.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}
predicate isSink(DataFlow::Node sink) {
exists(CallNode call |
call.getFunction().(NameNode).getId() in ["ensure_tainted", "ensure_not_tainted"] and
exists(Cfg::CallNode call |
call.getFunction().(Cfg::NameNode).getId() in ["ensure_tainted", "ensure_not_tainted"] and
sink.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
}

View File

@@ -2,6 +2,7 @@ overlay[local?]
module;
private import python
private import semmle.python.controlflow.internal.Cfg as Cfg
private import semmle.python.dataflow.new.FlowSummary
private import semmle.python.ApiGraphs
@@ -17,7 +18,7 @@ module RecursionGuard {
RecursionGuard() { this = "TypeTrackingSummariesRecursionGuard" }
override DataFlow::CallCfgNode getACall() {
result.getFunction().asCfgNode().(NameNode).getId() = this and
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this and
(TT::callStep(_, _) implies any())
}
@@ -41,7 +42,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable::Range {
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -60,7 +61,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable::Range {
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -82,7 +83,7 @@ private class SummarizedCallableReversed extends SummarizedCallable::Range {
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -100,7 +101,7 @@ private class SummarizedCallableMap extends SummarizedCallable::Range {
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -122,7 +123,7 @@ private class SummarizedCallableAppend extends SummarizedCallable::Range {
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -165,7 +166,7 @@ private class SummarizedCallableReadSecret extends SummarizedCallable::Range {
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
@@ -183,7 +184,7 @@ private class SummarizedCallableSetSecret extends SummarizedCallable::Range {
override DataFlow::CallCfgNode getACall() { none() }
override DataFlow::CallCfgNode getACallSimple() {
result.getFunction().asCfgNode().(NameNode).getId() = this
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
}
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }