Merge pull request #4235 from yoff/SharedDataflow_UseUseFlow

Python: Port use-use implementation from Java
This commit is contained in:
Rasmus Wriedt Larsen
2020-09-10 16:12:28 +02:00
committed by GitHub
5 changed files with 182 additions and 15 deletions

View File

@@ -1,6 +1,7 @@
private import python
private import DataFlowPublic
import semmle.python.SpecialMethods
private import semmle.python.essa.SsaCompute
//--------
// Data flow graph
@@ -31,7 +32,7 @@ class StorePreUpdateNode extends PreUpdateNode, CfgNode {
}
}
/** A node marking the state change of an object after a read */
/** A node marking the state change of an object after a read. */
class ReadPreUpdateNode extends PreUpdateNode, CfgNode {
ReadPreUpdateNode() {
exists(Attribute a |
@@ -97,12 +98,19 @@ module EssaFlow {
contextManager.strictlyDominates(var)
)
or
// Use
// First use after definition
// `y = 42`
// `x = f(y)`
// nodeFrom is `y` on first line, essa var
// nodeTo is `y` on second line, cfg node
nodeFrom.(EssaNode).getVar().getASourceUse() = nodeTo.(CfgNode).getNode()
defToFirstUse(nodeFrom.asVar(), nodeTo.asCfgNode())
or
// Next use after use
// `x = f(y)`
// `z = y + 1`
// nodeFrom is 'y' on first line, cfg node
// nodeTo is `y` on second line, cfg node
useToNextUse(nodeFrom.asCfgNode(), nodeTo.asCfgNode())
or
// Refinements
exists(EssaEdgeRefinement r |
@@ -120,6 +128,14 @@ module EssaFlow {
nodeFrom.(EssaNode).getVar() = p.getAnInput()
)
}
predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
AdjacentUses::adjacentUseUseSameVar(nodeFrom, nodeTo)
}
predicate defToFirstUse(EssaVariable var, NameNode nodeTo) {
AdjacentUses::firstUse(var.getDefinition(), nodeTo)
}
}
//--------
@@ -131,18 +147,25 @@ module EssaFlow {
* excludes SSA flow through instance fields.
*/
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
not nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable and
not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and
EssaFlow::essaFlowStep(update(nodeFrom), nodeTo)
// If there is ESSA-flow out of a node `node`, we want flow
// both out of `node` and any post-update node of `node`.
exists(Node node |
not node.(EssaNode).getVar() instanceof GlobalSsaVariable and
not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and
EssaFlow::essaFlowStep(node, nodeTo) and
nodeFrom = update(node)
)
}
/**
* Holds if `result` is either `node`, or the post-update node for `node`.
*/
private Node update(Node node) {
exists(PostUpdateNode pun |
node = pun.getPreUpdateNode() and
result = pun
)
or
not exists(PostUpdateNode pun | node = pun.getPreUpdateNode()) and
result = node
}
@@ -365,15 +388,22 @@ string ppReprType(DataFlowType t) { none() }
* another. Additional steps specified by the configuration are *not*
* taken into account.
*/
predicate jumpStep(Node pred, Node succ) {
predicate jumpStep(Node nodeFrom, Node nodeTo) {
// As we have ESSA variables for global variables,
// we include ESSA flow steps involving global variables.
(
pred.(EssaNode).getVar() instanceof GlobalSsaVariable
nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable
or
succ.(EssaNode).getVar() instanceof GlobalSsaVariable
nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable
) and
EssaFlow::essaFlowStep(pred, succ)
(
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
or
// As jump steps do not respect chronology,
// we add jump steps for each def-use pair.
nodeFrom.asVar() instanceof GlobalSsaVariable and
nodeTo.asCfgNode() = nodeFrom.asVar().getASourceUse()
)
}
//--------

View File

@@ -194,7 +194,7 @@ private module SsaComputeImpl {
defUseRank(v, b, rankix, i)
}
/** A `VarAccess` `use` of `v` in `b` at index `i`. */
/** A variable access `use` of `v` in `b` at index `i`. */
cached
predicate variableUse(SsaSourceVariable v, ControlFlowNode use, BasicBlock b, int i) {
(v.getAUse() = use or v.hasRefinement(use, _)) and
@@ -348,11 +348,147 @@ private module SsaComputeImpl {
)
}
}
cached
module AdjacentUsesImpl {
/**
* Holds if `rankix` is the rank the index `i` at which there is an SSA definition or explicit use of
* `v` in the basic block `b`.
*/
cached
predicate defSourceUseRank(SsaSourceVariable v, BasicBlock b, int rankix, int i) {
i = rank[rankix](int j | variableDefine(v, _, b, j) or variableSourceUse(v, _, b, j))
}
/** A variable access `use` of `v` in `b` at index `i`. */
cached
predicate variableSourceUse(SsaSourceVariable v, ControlFlowNode use, BasicBlock b, int i) {
v.getASourceUse() = use and
exists(int j |
b.getNode(j) = use and
i = 2 * j
)
}
/** Gets the maximum rank index for the given variable and basic block. */
private int lastSourceUseRank(SsaSourceVariable v, BasicBlock b) {
result = max(int rankix | defSourceUseRank(v, b, rankix, _))
}
/** Holds if `v` is defined or used in `b`. */
private predicate varOccursInBlock(SsaSourceVariable v, BasicBlock b) {
defSourceUseRank(v, b, _, _)
}
/** Holds if `v` occurs in `b` or one of `b`'s transitive successors. */
private predicate blockPrecedesVar(SsaSourceVariable v, BasicBlock b) {
varOccursInBlock(v, b.getASuccessor*())
}
/**
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
* in `b2` or one of its transitive successors but not in any block on the path
* between `b1` and `b2`.
*/
private predicate varBlockReaches(SsaSourceVariable v, BasicBlock b1, BasicBlock b2) {
varOccursInBlock(v, b1) and
b2 = b1.getASuccessor() and
blockPrecedesVar(v, b2)
or
exists(BasicBlock mid |
varBlockReaches(v, b1, mid) and
b2 = mid.getASuccessor() and
not varOccursInBlock(v, mid) and
blockPrecedesVar(v, b2)
)
}
/**
* Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
* `b2` but not in any block on the path between `b1` and `b2`.
*/
private predicate varBlockStep(SsaSourceVariable v, BasicBlock b1, BasicBlock b2) {
varBlockReaches(v, b1, b2) and
varOccursInBlock(v, b2)
}
/**
* Holds if `v` occurs at index `i1` in `b1` and at index `i2` in `b2` and
* there is a path between them without any occurrence of `v`.
*/
cached
predicate adjacentVarRefs(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2) {
exists(int rankix |
b1 = b2 and
defSourceUseRank(v, b1, rankix, i1) and
defSourceUseRank(v, b2, rankix + 1, i2)
)
or
defSourceUseRank(v, b1, lastSourceUseRank(v, b1), i1) and
varBlockStep(v, b1, b2) and
defSourceUseRank(v, b2, 1, i2)
}
/**
* Holds if `use1` and `use2` form an adjacent use-use-pair of the same SSA
* variable, that is, the value read in `use1` can reach `use2` without passing
* through any other use or any SSA definition of the variable.
*/
cached
predicate adjacentUseUseSameVar(ControlFlowNode use1, ControlFlowNode use2) {
exists(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2 |
adjacentVarRefs(v, b1, i1, b2, i2) and
variableSourceUse(v, use1, b1, i1) and
variableSourceUse(v, use2, b2, i2)
)
}
/**
* Holds if the value defined at `def` can reach `use` without passing through
* any other uses, but possibly through phi nodes and uncertain implicit updates.
*/
cached
predicate firstUse(EssaDefinition def, ControlFlowNode use) {
exists(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2 |
adjacentVarRefs(v, b1, i1, b2, i2) and
definesAt(def, v, b1, i1) and
variableSourceUse(v, use, b2, i2)
)
or
exists(
SsaSourceVariable v, EssaDefinition redef, BasicBlock b1, int i1, BasicBlock b2, int i2
|
redef instanceof PhiFunction
|
adjacentVarRefs(v, b1, i1, b2, i2) and
definesAt(def, v, b1, i1) and
definesAt(redef, v, b2, i2) and
firstUse(redef, use)
)
}
/**
* Holds if `def` defines `v` at the specified position.
* Phi nodes are placed at index -1.
*/
cached
predicate definesAt(EssaDefinition def, SsaSourceVariable v, BasicBlock b, int i) {
exists(ControlFlowNode defNode |
def.(EssaNodeDefinition).definedBy(v, defNode) and
variableDefine(v, defNode, b, i)
)
or
v = def.(PhiFunction).getSourceVariable() and
b = def.(PhiFunction).getBasicBlock() and
i = -1
}
}
}
import SsaComputeImpl::SsaDefinitionsImpl as SsaDefinitions
import SsaComputeImpl::EssaDefinitionsImpl as EssaDefinitions
import SsaComputeImpl::LivenessImpl as Liveness
import SsaComputeImpl::AdjacentUsesImpl as AdjacentUses
/* This is exported primarily for testing */
/*

View File

@@ -5,6 +5,7 @@ uniqueEnclosingCallable
| module.py:2:8:2:13 | ControlFlowNode for Str | Node should have one enclosing callable but has 0. |
| module.py:5:1:5:21 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
| module.py:5:5:5:18 | GSSA Variable dangerous_func | Node should have one enclosing callable but has 0. |
| module.py:9:9:9:14 | ControlFlowNode for SOURCE | Node should have one enclosing callable but has 0. |
| module.py:10:1:10:5 | GSSA Variable safe2 | Node should have one enclosing callable but has 0. |
| module.py:10:9:10:14 | ControlFlowNode for Str | Node should have one enclosing callable but has 0. |
| test.py:6:1:6:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |

View File

@@ -1,5 +1,5 @@
test_taint
| test.py:22 | fail | test_custom_sanitizer | s |
| test.py:22 | ok | test_custom_sanitizer | s |
| test.py:36 | fail | test_custom_sanitizer_guard | s |
| test.py:38 | ok | test_custom_sanitizer_guard | s |
| test.py:49 | ok | test_escape | s2 |

View File

@@ -50,7 +50,7 @@
| test_collections.py:149 | ok | list_index_aug_assign | my_list |
| test_collections.py:152 | fail | list_index_aug_assign | my_list |
| test_collections.py:159 | ok | list_append | my_list |
| test_collections.py:162 | fail | list_append | my_list |
| test_collections.py:162 | ok | list_append | my_list |
| test_collections.py:169 | ok | list_extend | my_list |
| test_collections.py:172 | fail | list_extend | my_list |
| test_collections.py:179 | ok | dict_update_dict | my_dict |
@@ -63,7 +63,7 @@
| test_collections.py:212 | fail | dict_manual_update | my_dict |
| test_collections.py:220 | fail | dict_merge | merged |
| test_collections.py:227 | ok | set_add | my_set |
| test_collections.py:230 | fail | set_add | my_set |
| test_collections.py:230 | ok | set_add | my_set |
| test_json.py:26 | ok | test | json.dumps(..) |
| test_json.py:27 | ok | test | json.loads(..) |
| test_json.py:34 | fail | test | tainted_filelike |