Python: remove EssaNodes

This commit removes SSA nodes from the data flow graph. Specifically, for a definition and use such as
```python
  x = expr
  y = x + 2
```
we used to have flow from `expr` to an SSA variable representing x and from that SSA variable to the use of `x` in the definition of `y`. Now we instead have flow from `expr` to the control flow node for `x` at line 1 and from there to the control flow node for `x` at line 2.

Specific changes:
- `EssaNode` from the data flow layer no longer exists.
- Several glue steps between `EssaNode`s and `CfgNode`s have been deleted.
- Entry nodes are now admitted as `CfgNodes` in the data flow layer (they were filtered out before).
- Entry nodes now have a new `toString` taking into account that the module name may be ambigous.
- Some tests have been rewritten to accomodate the changes, but only `python/ql/test/experimental/dataflow/basic/maximalFlowsConfig.qll` should have semantic changes.
- Comments have been updated
- Test output has been updated, but apart from `python/ql/test/experimental/dataflow/basic/maximalFlows.expected` only `python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py` should have a semantic change. This is a bonus fix, probably meaning that something was never connected up correctly.
This commit is contained in:
Rasmus Lerchedahl Petersen
2023-11-14 00:05:00 +01:00
parent 75f860595a
commit 11c71fdd18
83 changed files with 1924 additions and 1958 deletions

View File

@@ -126,7 +126,10 @@ class ControlFlowNode extends @py_flow_node {
cached
string toString() {
Stages::AST::ref() and
exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
// Since modules can have ambigous names, entry nodes can too, if we do not collate them.
exists(Scope s | s.getEntryNode() = this |
result = "Entry node for " + concat( | | s.toString(), ",")
)
or
exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
or

View File

@@ -281,28 +281,33 @@ class DataFlowExpr = Expr;
/**
* A module to compute local flow.
*
* Flow will generally go from control flow nodes into essa variables at definitions,
* Flow will generally go from control flow nodes for expressions into
* control flow nodes for variables at definitions,
* and from there via use-use flow to other control flow nodes.
*
* Some syntaxtic constructs are handled separately.
*/
module LocalFlow {
/** Holds if `nodeFrom` is the control flow node defining the essa variable `nodeTo`. */
/** Holds if `nodeFrom` is the expression defining the value for the variable `nodeTo`. */
predicate definitionFlowStep(Node nodeFrom, Node nodeTo) {
// Definition
// `x = f(42)`
// nodeFrom is `f(42)`, cfg node
// nodeTo is `x`, essa var
nodeFrom.(CfgNode).getNode() =
nodeTo.(EssaNode).getVar().getDefinition().(AssignmentDefinition).getValue()
// nodeFrom is `f(42)`
// nodeTo is `x`
exists(AssignmentDefinition def |
nodeFrom.(CfgNode).getNode() = def.getValue() and
nodeTo.(CfgNode).getNode() = def.getDefiningNode()
)
or
// With definition
// `with f(42) as x:`
// nodeFrom is `f(42)`, cfg node
// nodeTo is `x`, essa var
exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
// nodeFrom is `f(42)`
// nodeTo is `x`
exists(With with, ControlFlowNode contextManager, WithDefinition withDef, ControlFlowNode var |
var = withDef.getDefiningNode()
|
nodeFrom.(CfgNode).getNode() = contextManager and
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
nodeTo.(CfgNode).getNode() = var and
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
with.getContextExpr() = contextManager.getNode() and
with.getOptionalVars() = var.getNode() and
@@ -313,34 +318,6 @@ module LocalFlow {
// * `foo = x.foo(); await foo.async_method(); foo.close()` and
// * `async with x.foo() as foo: await foo.async_method()`.
)
or
// Async with var definition
// `async with f(42) as x:`
// nodeFrom is `x`, cfg node
// nodeTo is `x`, essa var
//
// This makes the cfg node the local source of the awaited value.
//
// We have this step in addition to the step above, to handle cases where the QL
// modeling of `f(42)` requires a `.getAwaited()` step (in API graphs) when not
// using `async with`, so you can do both:
// * `foo = await x.foo(); await foo.async_method(); foo.close()` and
// * `async with x.foo() as foo: await foo.async_method()`.
exists(With with, ControlFlowNode var |
nodeFrom.(CfgNode).getNode() = var and
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
with.getOptionalVars() = var.getNode() and
with.isAsync()
)
or
// Parameter definition
// `def foo(x):`
// nodeFrom is `x`, cfgNode
// nodeTo is `x`, essa var
exists(ParameterDefinition pd |
nodeFrom.(CfgNode).getNode() = pd.getDefiningNode() and
nodeTo.(EssaNode).getVar() = pd.getVariable()
)
}
predicate expressionFlowStep(Node nodeFrom, Node nodeTo) {
@@ -372,9 +349,12 @@ module LocalFlow {
// First use after definition
// `y = 42`
// `x = f(y)`
// nodeFrom is `y` on first line, essa var
// nodeTo is `y` on second line, cfg node
defToFirstUse(nodeFrom.asVar(), nodeTo.asCfgNode())
// nodeFrom is `y` on first line
// nodeTo is `y` on second line
exists(EssaDefinition def |
nodeFrom.(CfgNode).getNode() = def.(EssaNodeDefinition).getDefiningNode() and
AdjacentUses::firstUse(def, nodeTo.(CfgNode).getNode())
)
or
// Next use after use
// `x = f(y)`
@@ -565,11 +545,7 @@ predicate neverSkipInPathGraph(Node n) {
// ```
// we would end up saying that the path MUST not skip the x in `y = x`, which is just
// annoying and doesn't help the path explanation become clearer.
n.asVar() instanceof EssaDefinition and
// For a parameter we have flow from ControlFlowNode to SSA node, and then onwards
// with use-use flow, and since the CFN is already part of the path graph, we don't
// want to force showing the SSA node as well.
not n.asVar() instanceof ParameterDefinition
n.asCfgNode() = any(EssaNodeDefinition def).getDefiningNode()
}
/**
@@ -916,7 +892,7 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
predicate forReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
exists(ForTarget target |
nodeFrom.asExpr() = target.getSource() and
nodeTo.asVar().(EssaNodeDefinition).getDefiningNode() = target
nodeTo.asCfgNode() = target
) and
(
c instanceof ListElementContent

View File

@@ -23,13 +23,13 @@ private import FlowSummaryImpl as FlowSummaryImpl
* The current implementation of these cross flows can be seen in `EssaTaintTracking`.
*/
newtype TNode =
/** A node corresponding to an SSA variable. */
TEssaNode(EssaVariable var) or
/** A node corresponding to a control flow node. */
TCfgNode(ControlFlowNode node) {
isExpressionNode(node)
or
node.getNode() instanceof Pattern
or
node = any(ScopeEntryDefinition def).getDefiningNode()
} or
/**
* A synthetic node representing the value of an object before a state change.
@@ -155,9 +155,6 @@ class Node extends TNode {
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
/** Gets the ESSA variable corresponding to this node, if any. */
EssaVariable asVar() { none() }
/** Gets the control-flow node corresponding to this node, if any. */
ControlFlowNode asCfgNode() { none() }
@@ -170,25 +167,6 @@ class Node extends TNode {
LocalSourceNode getALocalSource() { result.flowsTo(this) }
}
/** A data-flow node corresponding to an SSA variable. */
class EssaNode extends Node, TEssaNode {
EssaVariable var;
EssaNode() { this = TEssaNode(var) }
/** Gets the `EssaVariable` represented by this data-flow node. */
EssaVariable getVar() { result = var }
override EssaVariable asVar() { result = var }
/** Gets a textual representation of this element. */
override string toString() { result = var.toString() }
override Scope getScope() { result = var.getScope() }
override Location getLocation() { result = var.getLocation() }
}
/** A data-flow node corresponding to a control-flow node. */
class CfgNode extends Node, TCfgNode {
ControlFlowNode node;
@@ -411,8 +389,8 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
}
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
EssaNode getAWrite() {
result.getVar().getDefinition().(EssaNodeDefinition).definedBy(var, any(DefinitionNode defn))
Node getAWrite() {
any(EssaNodeDefinition def).definedBy(var, result.asCfgNode().(DefinitionNode))
}
/** Gets the possible values of the variable at the end of import time */

View File

@@ -112,7 +112,7 @@ module ImportResolution {
not allowedEssaImportStep(_, firstDef)
|
not LocalFlow::defToFirstUse(firstDef, _) and
val.asVar() = firstDef
val.asCfgNode() = firstDef.getDefinition().(EssaNodeDefinition).getDefiningNode()
or
exists(ControlFlowNode mid, ControlFlowNode end |
LocalFlow::defToFirstUse(firstDef, mid) and
@@ -320,11 +320,11 @@ module ImportResolution {
// name as a submodule, we always consider that this attribute _could_ be a
// reference to the submodule, even if we don't know that the submodule has been
// imported yet.
exists(string submodule, Module package |
submodule = result.asVar().getName() and
SsaSource::init_module_submodule_defn(result.asVar().getSourceVariable(),
package.getEntryNode()) and
m = getModuleFromName(package.getPackageName() + "." + submodule)
exists(string submodule, Module package, EssaVariable var |
submodule = var.getName() and
SsaSource::init_module_submodule_defn(var.getSourceVariable(), package.getEntryNode()) and
m = getModuleFromName(package.getPackageName() + "." + submodule) and
result.asCfgNode() = var.getDefinition().(EssaNodeDefinition).getDefiningNode()
)
}

View File

@@ -87,13 +87,13 @@
* This is adequate as the route through `TIterableElement(sequence)` does not transfer precise content.
*
* 5. [Read] Content is read from `sequence` to its elements.
* a) If the element is a plain variable, the target is the corresponding essa node.
* a) If the element is a plain variable, the target is the corresponding control flow node.
*
* b) If the element is itself a sequence, with control-flow node `seq`, the target is `TIterableSequence(seq)`.
*
* c) If the element is a starred variable, with control-flow node `v`, the target is `TIterableElement(v)`.
*
* 6. [Store] Content is stored from `TIterableElement(v)` to the essa variable for `v`, with
* 6. [Store] Content is stored from `TIterableElement(v)` to the control flow node for variable `v`, with
* content type `ListElementContent`.
*
* 7. [Flow, Read, Store] Steps 2 through 7 are repeated for all recursive elements which are sequences.
@@ -313,7 +313,7 @@ predicate iterableUnpackingConvertingStoreStep(Node nodeFrom, Content c, Node no
* Step 5
* For a sequence node inside an iterable unpacking, data flows from the sequence to its elements. There are
* three cases for what `toNode` should be:
* a) If the element is a plain variable, `toNode` is the corresponding essa node.
* a) If the element is a plain variable, `toNode` is the corresponding control flow node.
*
* b) If the element is itself a sequence, with control-flow node `seq`, `toNode` is `TIterableSequence(seq)`.
*
@@ -351,20 +351,25 @@ predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo
nodeTo = TIterableElementNode(element)
else
// Step 5a
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = element
exists(MultiAssignmentDefinition mad | element = mad.getDefiningNode() |
nodeTo.(CfgNode).getNode() = element
)
)
)
}
/**
* Step 6
* Data flows from `TIterableElement(v)` to the essa variable for `v`, with
* Data flows from `TIterableElement(v)` to the control flow node for variable `v`, with
* content type `ListElementContent`.
*/
predicate iterableUnpackingStarredElementStoreStep(Node nodeFrom, Content c, Node nodeTo) {
exists(ControlFlowNode starred | starred.getNode() instanceof Starred |
exists(ControlFlowNode starred, MultiAssignmentDefinition mad |
starred.getNode() instanceof Starred and
starred = mad.getDefiningNode()
|
nodeFrom = TIterableElementNode(starred) and
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = starred and
nodeTo.asCfgNode() = starred and
c instanceof ListElementContent
)
}

View File

@@ -71,7 +71,7 @@ class LocalSourceNode extends Node {
or
// We include all scope entry definitions, as these act as the local source within the scope they
// enter.
this.asVar() instanceof ScopeEntryDefinition
this.asCfgNode() = any(ScopeEntryDefinition def).getDefiningNode()
}
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
@@ -165,7 +165,7 @@ class LocalSourceNodeNotModuleVariableNode extends LocalSourceNode {
LocalSourceNodeNotModuleVariableNode() {
this instanceof ExprNode
or
this.asVar() instanceof ScopeEntryDefinition
this.asCfgNode() = any(ScopeEntryDefinition def).getDefiningNode()
}
}

View File

@@ -89,8 +89,9 @@ predicate matchAsFlowStep(Node nodeFrom, Node nodeTo) {
or
// the interior pattern flows to the alias
nodeFrom.(CfgNode).getNode().getNode() = subject.getPattern() and
nodeTo.(EssaNode).getVar().getDefinition().(PatternAliasDefinition).getDefiningNode().getNode() =
alias
exists(PatternAliasDefinition pad | pad.getDefiningNode().getNode() = alias |
nodeTo.(CfgNode).getNode() = pad.getDefiningNode()
)
)
}
@@ -123,13 +124,9 @@ predicate matchLiteralFlowStep(Node nodeFrom, Node nodeTo) {
predicate matchCaptureFlowStep(Node nodeFrom, Node nodeTo) {
exists(MatchCapturePattern capture, Name var | capture.getVariable() = var |
nodeFrom.(CfgNode).getNode().getNode() = capture and
nodeTo
.(EssaNode)
.getVar()
.getDefinition()
.(PatternCaptureDefinition)
.getDefiningNode()
.getNode() = var
exists(PatternCaptureDefinition pcd | pcd.getDefiningNode().getNode() = var |
nodeTo.(CfgNode).getNode() = pcd.getDefiningNode()
)
)
}

View File

@@ -216,8 +216,10 @@ predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
*/
predicate asyncWithStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
var = any(WithDefinition wd).getDefiningNode()
|
nodeFrom.(DataFlow::CfgNode).getNode() = contextManager and
nodeTo.(DataFlow::EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
nodeTo.(DataFlow::CfgNode).getNode() = var and
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
with.getContextExpr() = contextManager.getNode() and
with.getOptionalVars() = var.getNode() and

View File

@@ -50,8 +50,20 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
}
predicate capturedJumpStep(Node nodeFrom, Node nodeTo) {
exists(SsaSourceVariable var, DefinitionNode def | var.hasDefiningNode(def) |
nodeTo.asVar().(ScopeEntryDefinition).getSourceVariable() = var and
// Jump into a capturing scope.
//
// var = expr
// ...
// def f():
// ..var is used..
//
// nodeFrom is `expr`
// nodeTo is entry node for `f`
exists(ScopeEntryDefinition e, SsaSourceVariable var, DefinitionNode def |
e.getSourceVariable() = var and
var.hasDefiningNode(def)
|
nodeTo.asCfgNode() = e.getDefiningNode() and
nodeFrom.asCfgNode() = def.getValue() and
var.getScope().getScope*() = nodeFrom.getScope()
)
@@ -228,8 +240,7 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
|
param = FlowSummary::SummaryComponent::parameter(apos) and
DataFlowDispatch::parameterMatch(ppos, apos) and
// pick the SsaNode rather than the CfgNode
result.asVar().getDefinition().(ParameterDefinition).getParameter() = p and
result.asCfgNode().getNode() = p and
(
exists(int i | ppos.isPositional(i) |
p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArg(i)

View File

@@ -2740,7 +2740,7 @@ module PrivateDjango {
this.asExpr() = list and
// we look for an assignment to the `MIDDLEWARE` setting
exists(DataFlow::Node mw |
mw.asVar().getName() = "MIDDLEWARE" and
mw.asExpr().(Name).getId() = "MIDDLEWARE" and
DataFlow::localFlow(this, mw)
|
// To only include results where CSRF protection matters, we only care about CSRF

View File

@@ -28,8 +28,8 @@ private class TracebackFunctionCall extends ExceptionInfo, DataFlow::CallCfgNode
/** A caught exception. */
private class CaughtException extends ExceptionInfo {
CaughtException() {
this.asVar().getDefinition().(EssaNodeDefinition).getDefiningNode().getNode() =
any(ExceptStmt s).getName()
this.asExpr() = any(ExceptStmt s).getName() and
this.asCfgNode() = any(EssaNodeDefinition def).getDefiningNode()
}
}