mirror of
https://github.com/github/codeql.git
synced 2026-01-21 18:34:46 +01:00
Python: remove EssaNodes
This commit removes SSA nodes from the data flow graph. Specifically, for a definition and use such as ```python x = expr y = x + 2 ``` we used to have flow from `expr` to an SSA variable representing x and from that SSA variable to the use of `x` in the definition of `y`. Now we instead have flow from `expr` to the control flow node for `x` at line 1 and from there to the control flow node for `x` at line 2. Specific changes: - `EssaNode` from the data flow layer no longer exists. - Several glue steps between `EssaNode`s and `CfgNode`s have been deleted. - Entry nodes are now admitted as `CfgNodes` in the data flow layer (they were filtered out before). - Entry nodes now have a new `toString` taking into account that the module name may be ambigous. - Some tests have been rewritten to accomodate the changes, but only `python/ql/test/experimental/dataflow/basic/maximalFlowsConfig.qll` should have semantic changes. - Comments have been updated - Test output has been updated, but apart from `python/ql/test/experimental/dataflow/basic/maximalFlows.expected` only `python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py` should have a semantic change. This is a bonus fix, probably meaning that something was never connected up correctly.
This commit is contained in:
@@ -126,7 +126,10 @@ class ControlFlowNode extends @py_flow_node {
|
||||
cached
|
||||
string toString() {
|
||||
Stages::AST::ref() and
|
||||
exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
|
||||
// Since modules can have ambigous names, entry nodes can too, if we do not collate them.
|
||||
exists(Scope s | s.getEntryNode() = this |
|
||||
result = "Entry node for " + concat( | | s.toString(), ",")
|
||||
)
|
||||
or
|
||||
exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
|
||||
or
|
||||
|
||||
@@ -281,28 +281,33 @@ class DataFlowExpr = Expr;
|
||||
/**
|
||||
* A module to compute local flow.
|
||||
*
|
||||
* Flow will generally go from control flow nodes into essa variables at definitions,
|
||||
* Flow will generally go from control flow nodes for expressions into
|
||||
* control flow nodes for variables at definitions,
|
||||
* and from there via use-use flow to other control flow nodes.
|
||||
*
|
||||
* Some syntaxtic constructs are handled separately.
|
||||
*/
|
||||
module LocalFlow {
|
||||
/** Holds if `nodeFrom` is the control flow node defining the essa variable `nodeTo`. */
|
||||
/** Holds if `nodeFrom` is the expression defining the value for the variable `nodeTo`. */
|
||||
predicate definitionFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
// Definition
|
||||
// `x = f(42)`
|
||||
// nodeFrom is `f(42)`, cfg node
|
||||
// nodeTo is `x`, essa var
|
||||
nodeFrom.(CfgNode).getNode() =
|
||||
nodeTo.(EssaNode).getVar().getDefinition().(AssignmentDefinition).getValue()
|
||||
// nodeFrom is `f(42)`
|
||||
// nodeTo is `x`
|
||||
exists(AssignmentDefinition def |
|
||||
nodeFrom.(CfgNode).getNode() = def.getValue() and
|
||||
nodeTo.(CfgNode).getNode() = def.getDefiningNode()
|
||||
)
|
||||
or
|
||||
// With definition
|
||||
// `with f(42) as x:`
|
||||
// nodeFrom is `f(42)`, cfg node
|
||||
// nodeTo is `x`, essa var
|
||||
exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
|
||||
// nodeFrom is `f(42)`
|
||||
// nodeTo is `x`
|
||||
exists(With with, ControlFlowNode contextManager, WithDefinition withDef, ControlFlowNode var |
|
||||
var = withDef.getDefiningNode()
|
||||
|
|
||||
nodeFrom.(CfgNode).getNode() = contextManager and
|
||||
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
|
||||
nodeTo.(CfgNode).getNode() = var and
|
||||
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
|
||||
with.getContextExpr() = contextManager.getNode() and
|
||||
with.getOptionalVars() = var.getNode() and
|
||||
@@ -313,34 +318,6 @@ module LocalFlow {
|
||||
// * `foo = x.foo(); await foo.async_method(); foo.close()` and
|
||||
// * `async with x.foo() as foo: await foo.async_method()`.
|
||||
)
|
||||
or
|
||||
// Async with var definition
|
||||
// `async with f(42) as x:`
|
||||
// nodeFrom is `x`, cfg node
|
||||
// nodeTo is `x`, essa var
|
||||
//
|
||||
// This makes the cfg node the local source of the awaited value.
|
||||
//
|
||||
// We have this step in addition to the step above, to handle cases where the QL
|
||||
// modeling of `f(42)` requires a `.getAwaited()` step (in API graphs) when not
|
||||
// using `async with`, so you can do both:
|
||||
// * `foo = await x.foo(); await foo.async_method(); foo.close()` and
|
||||
// * `async with x.foo() as foo: await foo.async_method()`.
|
||||
exists(With with, ControlFlowNode var |
|
||||
nodeFrom.(CfgNode).getNode() = var and
|
||||
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
|
||||
with.getOptionalVars() = var.getNode() and
|
||||
with.isAsync()
|
||||
)
|
||||
or
|
||||
// Parameter definition
|
||||
// `def foo(x):`
|
||||
// nodeFrom is `x`, cfgNode
|
||||
// nodeTo is `x`, essa var
|
||||
exists(ParameterDefinition pd |
|
||||
nodeFrom.(CfgNode).getNode() = pd.getDefiningNode() and
|
||||
nodeTo.(EssaNode).getVar() = pd.getVariable()
|
||||
)
|
||||
}
|
||||
|
||||
predicate expressionFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
@@ -372,9 +349,12 @@ module LocalFlow {
|
||||
// First use after definition
|
||||
// `y = 42`
|
||||
// `x = f(y)`
|
||||
// nodeFrom is `y` on first line, essa var
|
||||
// nodeTo is `y` on second line, cfg node
|
||||
defToFirstUse(nodeFrom.asVar(), nodeTo.asCfgNode())
|
||||
// nodeFrom is `y` on first line
|
||||
// nodeTo is `y` on second line
|
||||
exists(EssaDefinition def |
|
||||
nodeFrom.(CfgNode).getNode() = def.(EssaNodeDefinition).getDefiningNode() and
|
||||
AdjacentUses::firstUse(def, nodeTo.(CfgNode).getNode())
|
||||
)
|
||||
or
|
||||
// Next use after use
|
||||
// `x = f(y)`
|
||||
@@ -565,11 +545,7 @@ predicate neverSkipInPathGraph(Node n) {
|
||||
// ```
|
||||
// we would end up saying that the path MUST not skip the x in `y = x`, which is just
|
||||
// annoying and doesn't help the path explanation become clearer.
|
||||
n.asVar() instanceof EssaDefinition and
|
||||
// For a parameter we have flow from ControlFlowNode to SSA node, and then onwards
|
||||
// with use-use flow, and since the CFN is already part of the path graph, we don't
|
||||
// want to force showing the SSA node as well.
|
||||
not n.asVar() instanceof ParameterDefinition
|
||||
n.asCfgNode() = any(EssaNodeDefinition def).getDefiningNode()
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -916,7 +892,7 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
|
||||
predicate forReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
|
||||
exists(ForTarget target |
|
||||
nodeFrom.asExpr() = target.getSource() and
|
||||
nodeTo.asVar().(EssaNodeDefinition).getDefiningNode() = target
|
||||
nodeTo.asCfgNode() = target
|
||||
) and
|
||||
(
|
||||
c instanceof ListElementContent
|
||||
|
||||
@@ -23,13 +23,13 @@ private import FlowSummaryImpl as FlowSummaryImpl
|
||||
* The current implementation of these cross flows can be seen in `EssaTaintTracking`.
|
||||
*/
|
||||
newtype TNode =
|
||||
/** A node corresponding to an SSA variable. */
|
||||
TEssaNode(EssaVariable var) or
|
||||
/** A node corresponding to a control flow node. */
|
||||
TCfgNode(ControlFlowNode node) {
|
||||
isExpressionNode(node)
|
||||
or
|
||||
node.getNode() instanceof Pattern
|
||||
or
|
||||
node = any(ScopeEntryDefinition def).getDefiningNode()
|
||||
} or
|
||||
/**
|
||||
* A synthetic node representing the value of an object before a state change.
|
||||
@@ -155,9 +155,6 @@ class Node extends TNode {
|
||||
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
|
||||
/** Gets the ESSA variable corresponding to this node, if any. */
|
||||
EssaVariable asVar() { none() }
|
||||
|
||||
/** Gets the control-flow node corresponding to this node, if any. */
|
||||
ControlFlowNode asCfgNode() { none() }
|
||||
|
||||
@@ -170,25 +167,6 @@ class Node extends TNode {
|
||||
LocalSourceNode getALocalSource() { result.flowsTo(this) }
|
||||
}
|
||||
|
||||
/** A data-flow node corresponding to an SSA variable. */
|
||||
class EssaNode extends Node, TEssaNode {
|
||||
EssaVariable var;
|
||||
|
||||
EssaNode() { this = TEssaNode(var) }
|
||||
|
||||
/** Gets the `EssaVariable` represented by this data-flow node. */
|
||||
EssaVariable getVar() { result = var }
|
||||
|
||||
override EssaVariable asVar() { result = var }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
override string toString() { result = var.toString() }
|
||||
|
||||
override Scope getScope() { result = var.getScope() }
|
||||
|
||||
override Location getLocation() { result = var.getLocation() }
|
||||
}
|
||||
|
||||
/** A data-flow node corresponding to a control-flow node. */
|
||||
class CfgNode extends Node, TCfgNode {
|
||||
ControlFlowNode node;
|
||||
@@ -411,8 +389,8 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
|
||||
}
|
||||
|
||||
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
|
||||
EssaNode getAWrite() {
|
||||
result.getVar().getDefinition().(EssaNodeDefinition).definedBy(var, any(DefinitionNode defn))
|
||||
Node getAWrite() {
|
||||
any(EssaNodeDefinition def).definedBy(var, result.asCfgNode().(DefinitionNode))
|
||||
}
|
||||
|
||||
/** Gets the possible values of the variable at the end of import time */
|
||||
|
||||
@@ -112,7 +112,7 @@ module ImportResolution {
|
||||
not allowedEssaImportStep(_, firstDef)
|
||||
|
|
||||
not LocalFlow::defToFirstUse(firstDef, _) and
|
||||
val.asVar() = firstDef
|
||||
val.asCfgNode() = firstDef.getDefinition().(EssaNodeDefinition).getDefiningNode()
|
||||
or
|
||||
exists(ControlFlowNode mid, ControlFlowNode end |
|
||||
LocalFlow::defToFirstUse(firstDef, mid) and
|
||||
@@ -320,11 +320,11 @@ module ImportResolution {
|
||||
// name as a submodule, we always consider that this attribute _could_ be a
|
||||
// reference to the submodule, even if we don't know that the submodule has been
|
||||
// imported yet.
|
||||
exists(string submodule, Module package |
|
||||
submodule = result.asVar().getName() and
|
||||
SsaSource::init_module_submodule_defn(result.asVar().getSourceVariable(),
|
||||
package.getEntryNode()) and
|
||||
m = getModuleFromName(package.getPackageName() + "." + submodule)
|
||||
exists(string submodule, Module package, EssaVariable var |
|
||||
submodule = var.getName() and
|
||||
SsaSource::init_module_submodule_defn(var.getSourceVariable(), package.getEntryNode()) and
|
||||
m = getModuleFromName(package.getPackageName() + "." + submodule) and
|
||||
result.asCfgNode() = var.getDefinition().(EssaNodeDefinition).getDefiningNode()
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -87,13 +87,13 @@
|
||||
* This is adequate as the route through `TIterableElement(sequence)` does not transfer precise content.
|
||||
*
|
||||
* 5. [Read] Content is read from `sequence` to its elements.
|
||||
* a) If the element is a plain variable, the target is the corresponding essa node.
|
||||
* a) If the element is a plain variable, the target is the corresponding control flow node.
|
||||
*
|
||||
* b) If the element is itself a sequence, with control-flow node `seq`, the target is `TIterableSequence(seq)`.
|
||||
*
|
||||
* c) If the element is a starred variable, with control-flow node `v`, the target is `TIterableElement(v)`.
|
||||
*
|
||||
* 6. [Store] Content is stored from `TIterableElement(v)` to the essa variable for `v`, with
|
||||
* 6. [Store] Content is stored from `TIterableElement(v)` to the control flow node for variable `v`, with
|
||||
* content type `ListElementContent`.
|
||||
*
|
||||
* 7. [Flow, Read, Store] Steps 2 through 7 are repeated for all recursive elements which are sequences.
|
||||
@@ -313,7 +313,7 @@ predicate iterableUnpackingConvertingStoreStep(Node nodeFrom, Content c, Node no
|
||||
* Step 5
|
||||
* For a sequence node inside an iterable unpacking, data flows from the sequence to its elements. There are
|
||||
* three cases for what `toNode` should be:
|
||||
* a) If the element is a plain variable, `toNode` is the corresponding essa node.
|
||||
* a) If the element is a plain variable, `toNode` is the corresponding control flow node.
|
||||
*
|
||||
* b) If the element is itself a sequence, with control-flow node `seq`, `toNode` is `TIterableSequence(seq)`.
|
||||
*
|
||||
@@ -351,20 +351,25 @@ predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo
|
||||
nodeTo = TIterableElementNode(element)
|
||||
else
|
||||
// Step 5a
|
||||
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = element
|
||||
exists(MultiAssignmentDefinition mad | element = mad.getDefiningNode() |
|
||||
nodeTo.(CfgNode).getNode() = element
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Step 6
|
||||
* Data flows from `TIterableElement(v)` to the essa variable for `v`, with
|
||||
* Data flows from `TIterableElement(v)` to the control flow node for variable `v`, with
|
||||
* content type `ListElementContent`.
|
||||
*/
|
||||
predicate iterableUnpackingStarredElementStoreStep(Node nodeFrom, Content c, Node nodeTo) {
|
||||
exists(ControlFlowNode starred | starred.getNode() instanceof Starred |
|
||||
exists(ControlFlowNode starred, MultiAssignmentDefinition mad |
|
||||
starred.getNode() instanceof Starred and
|
||||
starred = mad.getDefiningNode()
|
||||
|
|
||||
nodeFrom = TIterableElementNode(starred) and
|
||||
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = starred and
|
||||
nodeTo.asCfgNode() = starred and
|
||||
c instanceof ListElementContent
|
||||
)
|
||||
}
|
||||
|
||||
@@ -71,7 +71,7 @@ class LocalSourceNode extends Node {
|
||||
or
|
||||
// We include all scope entry definitions, as these act as the local source within the scope they
|
||||
// enter.
|
||||
this.asVar() instanceof ScopeEntryDefinition
|
||||
this.asCfgNode() = any(ScopeEntryDefinition def).getDefiningNode()
|
||||
}
|
||||
|
||||
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
|
||||
@@ -165,7 +165,7 @@ class LocalSourceNodeNotModuleVariableNode extends LocalSourceNode {
|
||||
LocalSourceNodeNotModuleVariableNode() {
|
||||
this instanceof ExprNode
|
||||
or
|
||||
this.asVar() instanceof ScopeEntryDefinition
|
||||
this.asCfgNode() = any(ScopeEntryDefinition def).getDefiningNode()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -89,8 +89,9 @@ predicate matchAsFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
or
|
||||
// the interior pattern flows to the alias
|
||||
nodeFrom.(CfgNode).getNode().getNode() = subject.getPattern() and
|
||||
nodeTo.(EssaNode).getVar().getDefinition().(PatternAliasDefinition).getDefiningNode().getNode() =
|
||||
alias
|
||||
exists(PatternAliasDefinition pad | pad.getDefiningNode().getNode() = alias |
|
||||
nodeTo.(CfgNode).getNode() = pad.getDefiningNode()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -123,13 +124,9 @@ predicate matchLiteralFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
predicate matchCaptureFlowStep(Node nodeFrom, Node nodeTo) {
|
||||
exists(MatchCapturePattern capture, Name var | capture.getVariable() = var |
|
||||
nodeFrom.(CfgNode).getNode().getNode() = capture and
|
||||
nodeTo
|
||||
.(EssaNode)
|
||||
.getVar()
|
||||
.getDefinition()
|
||||
.(PatternCaptureDefinition)
|
||||
.getDefiningNode()
|
||||
.getNode() = var
|
||||
exists(PatternCaptureDefinition pcd | pcd.getDefiningNode().getNode() = var |
|
||||
nodeTo.(CfgNode).getNode() = pcd.getDefiningNode()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -216,8 +216,10 @@ predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
*/
|
||||
predicate asyncWithStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
|
||||
var = any(WithDefinition wd).getDefiningNode()
|
||||
|
|
||||
nodeFrom.(DataFlow::CfgNode).getNode() = contextManager and
|
||||
nodeTo.(DataFlow::EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
|
||||
nodeTo.(DataFlow::CfgNode).getNode() = var and
|
||||
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
|
||||
with.getContextExpr() = contextManager.getNode() and
|
||||
with.getOptionalVars() = var.getNode() and
|
||||
|
||||
@@ -50,8 +50,20 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
|
||||
}
|
||||
|
||||
predicate capturedJumpStep(Node nodeFrom, Node nodeTo) {
|
||||
exists(SsaSourceVariable var, DefinitionNode def | var.hasDefiningNode(def) |
|
||||
nodeTo.asVar().(ScopeEntryDefinition).getSourceVariable() = var and
|
||||
// Jump into a capturing scope.
|
||||
//
|
||||
// var = expr
|
||||
// ...
|
||||
// def f():
|
||||
// ..var is used..
|
||||
//
|
||||
// nodeFrom is `expr`
|
||||
// nodeTo is entry node for `f`
|
||||
exists(ScopeEntryDefinition e, SsaSourceVariable var, DefinitionNode def |
|
||||
e.getSourceVariable() = var and
|
||||
var.hasDefiningNode(def)
|
||||
|
|
||||
nodeTo.asCfgNode() = e.getDefiningNode() and
|
||||
nodeFrom.asCfgNode() = def.getValue() and
|
||||
var.getScope().getScope*() = nodeFrom.getScope()
|
||||
)
|
||||
@@ -228,8 +240,7 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
|
||||
|
|
||||
param = FlowSummary::SummaryComponent::parameter(apos) and
|
||||
DataFlowDispatch::parameterMatch(ppos, apos) and
|
||||
// pick the SsaNode rather than the CfgNode
|
||||
result.asVar().getDefinition().(ParameterDefinition).getParameter() = p and
|
||||
result.asCfgNode().getNode() = p and
|
||||
(
|
||||
exists(int i | ppos.isPositional(i) |
|
||||
p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArg(i)
|
||||
|
||||
@@ -2740,7 +2740,7 @@ module PrivateDjango {
|
||||
this.asExpr() = list and
|
||||
// we look for an assignment to the `MIDDLEWARE` setting
|
||||
exists(DataFlow::Node mw |
|
||||
mw.asVar().getName() = "MIDDLEWARE" and
|
||||
mw.asExpr().(Name).getId() = "MIDDLEWARE" and
|
||||
DataFlow::localFlow(this, mw)
|
||||
|
|
||||
// To only include results where CSRF protection matters, we only care about CSRF
|
||||
|
||||
@@ -28,8 +28,8 @@ private class TracebackFunctionCall extends ExceptionInfo, DataFlow::CallCfgNode
|
||||
/** A caught exception. */
|
||||
private class CaughtException extends ExceptionInfo {
|
||||
CaughtException() {
|
||||
this.asVar().getDefinition().(EssaNodeDefinition).getDefiningNode().getNode() =
|
||||
any(ExceptStmt s).getName()
|
||||
this.asExpr() = any(ExceptStmt s).getName() and
|
||||
this.asCfgNode() = any(EssaNodeDefinition def).getDefiningNode()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user