Python: Add ModuleVariableNode to dataflow

This commit is contained in:
Taus Brock-Nannestad
2020-09-14 14:57:32 +02:00
parent e0f5b208da
commit 5fb33c90bc
6 changed files with 182 additions and 10 deletions

View File

@@ -119,6 +119,12 @@ module EssaFlow {
nodeTo.(EssaNode).getVar() = p.getVariable() and
nodeFrom.(EssaNode).getVar() = p.getAnInput()
)
or
// Module variable read
nodeFrom.(ModuleVariableNode).getARead() = nodeTo
or
// Module variable write
nodeFrom = nodeTo.(ModuleVariableNode).getAWrite()
}
}
@@ -131,8 +137,10 @@ module EssaFlow {
* excludes SSA flow through instance fields.
*/
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
not nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable and
not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and
not nodeFrom.asVar() instanceof GlobalSsaVariable and
not nodeTo.asVar() instanceof GlobalSsaVariable and
not nodeFrom instanceof ModuleVariableNode and
not nodeTo instanceof ModuleVariableNode and
EssaFlow::essaFlowStep(update(nodeFrom), nodeTo)
}
@@ -365,15 +373,13 @@ string ppReprType(DataFlowType t) { none() }
* another. Additional steps specified by the configuration are *not*
* taken into account.
*/
predicate jumpStep(Node pred, Node succ) {
// As we have ESSA variables for global variables,
// we include ESSA flow steps involving global variables.
predicate jumpStep(Node nodeFrom, Node nodeTo) {
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
(
pred.(EssaNode).getVar() instanceof GlobalSsaVariable
nodeFrom instanceof ModuleVariableNode
or
succ.(EssaNode).getVar() instanceof GlobalSsaVariable
) and
EssaFlow::essaFlowStep(pred, succ)
nodeTo instanceof ModuleVariableNode
)
}
//--------

View File

@@ -23,7 +23,9 @@ newtype TNode =
/** A node corresponding to a control flow node. */
TCfgNode(DataFlowCfgNode node) or
/** A node representing the value of an object after a state change */
TPostUpdateNode(PreUpdateNode pre)
TPostUpdateNode(PreUpdateNode pre) or
/** A node representing a global (module-level) variable in a specific module */
TModuleVariableNode(Module m, GlobalVariable v) { v.getScope() = m and v.escapes() }
/**
* An element, viewed as a node in a data flow graph. Either an SSA variable
@@ -148,6 +150,35 @@ class ParameterNode extends EssaNode {
override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
}
class ModuleVariableNode extends Node, TModuleVariableNode {
Module mod;
GlobalVariable var;
ModuleVariableNode() { this = TModuleVariableNode(mod, var) }
override Scope getScope() { result = mod }
override string toString() {
result = "ModuleVariableNode for " + var.toString() + " in " + mod.toString()
}
/** Gets the module in which this variable appears. */
Module getModule() { result = mod }
/** Gets the global variable corresponding to this node. */
GlobalVariable getVariable() { result = var }
/** Gets a node that reads this variable. */
Node getARead() { result.asCfgNode() = var.getALoad().getAFlowNode() }
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
Node getAWrite() {
exists(DefinitionNode defn |
result.asVar().getDefinition().(EssaNodeDefinition).definedBy(var, defn)
)
}
}
/**
* A guard that validates some expression.
*

View File

@@ -0,0 +1,36 @@
import python
import experimental.dataflow.DataFlow
import TestUtilities.InlineExpectationsTest
class GlobalReadTest extends InlineExpectationsTest {
GlobalReadTest() { this = "GlobalReadTest" }
override string getARelevantTag() { result = "reads" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(DataFlow::ModuleVariableNode n, DataFlow::Node read |
read = n.getARead() and
value = n.getVariable().getId() and
value != "print" and
tag = "reads" and
location = read.getLocation() and
element = read.toString()
)
}
}
class GlobalWriteTest extends InlineExpectationsTest {
GlobalWriteTest() { this = "GlobalWriteTest" }
override string getARelevantTag() { result = "writes" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(DataFlow::ModuleVariableNode n, DataFlow::Node read |
read = n.getAWrite() and
value = n.getVariable().getId() and
tag = "writes" and
location = read.getLocation() and
element = read.toString()
)
}
}

View File

@@ -0,0 +1 @@
known_attr = [1000]

View File

@@ -0,0 +1,98 @@
### Tests of global flow
# Simple assignment
g = [5] # $writes=g
# Multiple assignment
g1, g2 = [6], [7] # $writes=g1 $writes=g2
# Assignment that's only referenced in this scope. This one will not give rise to a `ModuleVariableNode`.
unreferenced_g = [8]
print(unreferenced_g)
# Testing modifications of globals
# Modification by reassignment
g_mod = [10] # $writes=g_mod
print(g_mod) # $reads=g_mod
g_mod = [100] # $writes=g_mod
# Modification by mutation
g_ins = [50] # $writes=g_ins
print(g_ins) # $reads=g_ins
g_ins.insert(75) # $reads=g_ins
# A global with multiple potential definitions
import unknown_module
if unknown_module.attr:
g_mult = [200] # $writes=g_mult
else:
g_mult = [300] # $writes=g_mult
def global_access():
l = 5
print(g) # $reads=g
print(g1) # $reads=g1
print(g2) # $reads=g2
print(g_mod) # $reads=g_mod
print(g_ins) # $reads=g_ins
print(g_mult) # $reads=g_mult
def print_g_mod(): # $writes=print_g_mod
print(g_mod) # $reads=g_mod
def global_mod():
global g_mod
g_mod += [150] # $reads,writes=g_mod
print_g_mod() # $reads=print_g_mod
def global_inside_local_function():
def local_function():
print(g) # $reads=g
local_function()
## Imports
# Direct imports
import foo_module # $writes=foo_module
def use_foo():
print(foo_module.attr) # $reads=foo_module
# Partial imports
from bar import baz_attr, quux_attr # $writes=baz_attr $writes=quux_attr
def use_partial_import():
print(baz_attr, quux_attr) # $reads=baz_attr $reads=quux_attr
# Aliased imports
from spam_module import ham_attr as eggs_attr # $writes=eggs_attr
def use_aliased_import():
print(eggs_attr) # $reads=eggs_attr
# Import star (unlikely to work unless we happen to extract/model the referenced module)
# Unknown modules
from unknown import *
def secretly_use_unknown():
print(unknown_attr) # $reads=unknown_attr
# Known modules
from known import *
def secretly_use_known():
print(known_attr) # $reads=known_attr