mirror of
https://github.com/github/codeql.git
synced 2026-05-27 09:31:30 +02:00
Python: migrate dataflow library to new CFG + shared SSA
Switches the trunk dataflow library and all in-tree consumers
(frameworks, ApiGraphs, Concepts, regexp, security customisations,
test harness) from the legacy Flow.qll/ESSA stack to the new
shared-CFG facade (Cfg.qll) and the ESSA-shaped adapter on the
shared-SSA library (SsaImpl.qll).
Highlights:
* DataFlowPublic/Private/Dispatch, Attributes, VariableCapture,
IterableUnpacking, ImportResolution, ImportStar, LocalSources,
TaintTrackingPrivate, MatchUnpacking, TypeTrackingImpl,
SsaImpl, Builtins all now qualify CFG/SSA references with
Cfg:: / SsaImpl:: and stop pulling in semmle.python.essa.*.
* AstNodeImpl.qll/Cfg.qll: ImportMember exposes its inner
ImportExpr, DefinitionNode.getValue covers Alias / AnnAssign /
AugAssign / AssignExpr / For-target / Parameter-default,
ForNode is treated as an expression node, AnnotatedExitNode is
canonical, and BoolExprNode.getAnOperand drops the dominance
constraint that did not hold for short-circuit BBs.
* SsaImpl.qll: parameters always get a ParameterDefinition (so
unused parameters still have SSA defs), scope-entry defs for
module globals require an actual store somewhere, scope-exit
has a synthetic use so reaching-defs survives to module
boundary, and the legacy SsaSourceVariable / EssaVariable
surface (getName, getScope, getAUse, getASourceUse,
getAnImplicitUse) is reinstated for downstream queries.
* DataFlowPublic.qll: GuardNode redesigned around the new
structural outcome nodes (isAfterTrue / isAfterFalse). The
legacy ConditionBlock + flipped indirection is gone;
controlsBlock walks UP through 'not' / '==True' / 'is False'
etc. via outcomeOfGuard, accumulating polarity cleanly. Only
BarrierGuard<...> is preserved as public API.
* ModuleVariableNode.getAWrite and LocalFlow::definitionFlowStep
bypass SSA and consult Cfg::NameNode.defines /
Cfg::DefinitionNode.getValue directly, so that write defs
pruned by shared SSA (because the variable has no in-scope
read) still produce dataflow steps.
* Frameworks + downstream consumers: replace
EssaVariable.hasDefiningNode, getAReturnValueFlowNode,
Parameter.getDefault, Scope.getEntryNode / getANormalExit etc.
with CFG-side bridges through Cfg::ControlFlowNode.
The legacy Flow.qll / Essa.qll stack is untouched and remains
available for queries that import it directly.
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -10,6 +10,7 @@
|
||||
*/
|
||||
|
||||
import python
|
||||
private import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.dataflow.new.RemoteFlowSources
|
||||
@@ -19,14 +20,14 @@ private import semmle.python.Concepts
|
||||
|
||||
DataFlow::Node shouldBeTainted() {
|
||||
exists(DataFlow::CallCfgNode call |
|
||||
call.getFunction().asCfgNode().(NameNode).getId() = "ensure_tainted" and
|
||||
call.getFunction().asCfgNode().(Cfg::NameNode).getId() = "ensure_tainted" and
|
||||
result in [call.getArg(_), call.getArgByName(_)]
|
||||
)
|
||||
}
|
||||
|
||||
DataFlow::Node shouldNotBeTainted() {
|
||||
exists(DataFlow::CallCfgNode call |
|
||||
call.getFunction().asCfgNode().(NameNode).getId() = "ensure_not_tainted" and
|
||||
call.getFunction().asCfgNode().(Cfg::NameNode).getId() = "ensure_not_tainted" and
|
||||
result in [call.getArg(_), call.getArgByName(_)]
|
||||
)
|
||||
}
|
||||
@@ -36,13 +37,13 @@ DataFlow::Node shouldNotBeTainted() {
|
||||
module Conf {
|
||||
module TestTaintTrackingConfig implements DataFlow::ConfigSig {
|
||||
predicate isSource(DataFlow::Node source) {
|
||||
source.asCfgNode().(NameNode).getId() in [
|
||||
source.asCfgNode().(Cfg::NameNode).getId() in [
|
||||
"TAINTED_STRING", "TAINTED_BYTES", "TAINTED_LIST", "TAINTED_DICT"
|
||||
]
|
||||
or
|
||||
// User defined sources
|
||||
exists(CallNode call |
|
||||
call.getFunction().(NameNode).getId() = "taint" and
|
||||
exists(Cfg::CallNode call |
|
||||
call.getFunction().(Cfg::NameNode).getId() = "taint" and
|
||||
source.(DataFlow::CfgNode).getNode() = call.getAnArg()
|
||||
)
|
||||
or
|
||||
|
||||
@@ -2,6 +2,7 @@ overlay[local?]
|
||||
module;
|
||||
|
||||
private import python
|
||||
private import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
private import semmle.python.dataflow.new.FlowSummary
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
@@ -17,7 +18,7 @@ module RecursionGuard {
|
||||
RecursionGuard() { this = "RecursionGuard" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this and
|
||||
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this and
|
||||
(TT::callStep(_, _) implies any())
|
||||
}
|
||||
|
||||
@@ -33,7 +34,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable::Range {
|
||||
SummarizedCallableIdentity() { this = "identity" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
@@ -50,7 +51,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable::Range {
|
||||
SummarizedCallableApplyLambda() { this = "apply_lambda" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
@@ -70,7 +71,7 @@ private class SummarizedCallableReversed extends SummarizedCallable::Range {
|
||||
SummarizedCallableReversed() { this = "list_reversed" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
@@ -86,7 +87,7 @@ private class SummarizedCallableMap extends SummarizedCallable::Range {
|
||||
SummarizedCallableMap() { this = "list_map" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
@@ -106,7 +107,7 @@ private class SummarizedCallableAppend extends SummarizedCallable::Range {
|
||||
SummarizedCallableAppend() { this = "append_to_list" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import python
|
||||
private import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.internal.PrintNode
|
||||
@@ -6,20 +7,20 @@ private import semmle.python.dataflow.new.internal.PrintNode
|
||||
module TestTaintTrackingConfig implements DataFlow::ConfigSig {
|
||||
predicate isSource(DataFlow::Node source) {
|
||||
// Standard sources
|
||||
source.(DataFlow::CfgNode).getNode().(NameNode).getId() in [
|
||||
source.(DataFlow::CfgNode).getNode().(Cfg::NameNode).getId() in [
|
||||
"TAINTED_STRING", "TAINTED_BYTES", "TAINTED_LIST", "TAINTED_DICT"
|
||||
]
|
||||
or
|
||||
// User defined sources
|
||||
exists(CallNode call |
|
||||
call.getFunction().(NameNode).getId() = "taint" and
|
||||
exists(Cfg::CallNode call |
|
||||
call.getFunction().(Cfg::NameNode).getId() = "taint" and
|
||||
source.(DataFlow::CfgNode).getNode() = call.getAnArg()
|
||||
)
|
||||
}
|
||||
|
||||
predicate isSink(DataFlow::Node sink) {
|
||||
exists(CallNode call |
|
||||
call.getFunction().(NameNode).getId() in ["ensure_tainted", "ensure_not_tainted"] and
|
||||
exists(Cfg::CallNode call |
|
||||
call.getFunction().(Cfg::NameNode).getId() in ["ensure_tainted", "ensure_not_tainted"] and
|
||||
sink.(DataFlow::CfgNode).getNode() = call.getAnArg()
|
||||
)
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ overlay[local?]
|
||||
module;
|
||||
|
||||
private import python
|
||||
private import semmle.python.controlflow.internal.Cfg as Cfg
|
||||
private import semmle.python.dataflow.new.FlowSummary
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
@@ -17,7 +18,7 @@ module RecursionGuard {
|
||||
RecursionGuard() { this = "TypeTrackingSummariesRecursionGuard" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this and
|
||||
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this and
|
||||
(TT::callStep(_, _) implies any())
|
||||
}
|
||||
|
||||
@@ -41,7 +42,7 @@ private class SummarizedCallableIdentity extends SummarizedCallable::Range {
|
||||
override DataFlow::CallCfgNode getACall() { none() }
|
||||
|
||||
override DataFlow::CallCfgNode getACallSimple() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
@@ -60,7 +61,7 @@ private class SummarizedCallableApplyLambda extends SummarizedCallable::Range {
|
||||
override DataFlow::CallCfgNode getACall() { none() }
|
||||
|
||||
override DataFlow::CallCfgNode getACallSimple() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
@@ -82,7 +83,7 @@ private class SummarizedCallableReversed extends SummarizedCallable::Range {
|
||||
override DataFlow::CallCfgNode getACall() { none() }
|
||||
|
||||
override DataFlow::CallCfgNode getACallSimple() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
@@ -100,7 +101,7 @@ private class SummarizedCallableMap extends SummarizedCallable::Range {
|
||||
override DataFlow::CallCfgNode getACall() { none() }
|
||||
|
||||
override DataFlow::CallCfgNode getACallSimple() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
@@ -122,7 +123,7 @@ private class SummarizedCallableAppend extends SummarizedCallable::Range {
|
||||
override DataFlow::CallCfgNode getACall() { none() }
|
||||
|
||||
override DataFlow::CallCfgNode getACallSimple() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
@@ -165,7 +166,7 @@ private class SummarizedCallableReadSecret extends SummarizedCallable::Range {
|
||||
override DataFlow::CallCfgNode getACall() { none() }
|
||||
|
||||
override DataFlow::CallCfgNode getACallSimple() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
@@ -183,7 +184,7 @@ private class SummarizedCallableSetSecret extends SummarizedCallable::Range {
|
||||
override DataFlow::CallCfgNode getACall() { none() }
|
||||
|
||||
override DataFlow::CallCfgNode getACallSimple() {
|
||||
result.getFunction().asCfgNode().(NameNode).getId() = this
|
||||
result.getFunction().asCfgNode().(Cfg::NameNode).getId() = this
|
||||
}
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() { result.asExpr().(Name).getId() = this }
|
||||
|
||||
Reference in New Issue
Block a user