Merge pull request #11280 from RasmusWL/dict-dataflow-steps

Python: Support more dictionary read/store steps
This commit is contained in:
Rasmus Wriedt Larsen
2023-04-30 16:07:29 +02:00
committed by GitHub
19 changed files with 185 additions and 9 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added more content-flow/field-flow for dictionaries, by adding support for reads through `mydict.get("key")` and `mydict.setdefault("key", value)`, and store steps through `dict["key"] = value` and `mydict.setdefault("key", value)`.

View File

@@ -588,6 +588,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
or
dictStoreStep(nodeFrom, c, nodeTo)
or
moreDictStoreSteps(nodeFrom, c, nodeTo)
or
comprehensionStoreStep(nodeFrom, c, nodeTo)
or
iterableUnpackingStoreStep(nodeFrom, c, nodeTo)
@@ -688,19 +690,48 @@ predicate tupleStoreStep(CfgNode nodeFrom, TupleElementContent c, CfgNode nodeTo
}
/** Data flows from an element of a dictionary to the dictionary at a specific key. */
predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, CfgNode nodeTo) {
predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
// Dictionary
// `{..., "key" = 42, ...}`
// nodeFrom is `42`, cfg node
// nodeTo is the dict, `{..., "key" = 42, ...}`, cfg node
// c denotes element of dictionary and the key `"key"`
exists(KeyValuePair item |
item = nodeTo.getNode().(DictNode).getNode().(Dict).getAnItem() and
item = nodeTo.asCfgNode().(DictNode).getNode().(Dict).getAnItem() and
nodeFrom.getNode().getNode() = item.getValue() and
c.getKey() = item.getKey().(StrConst).getS()
)
}
/**
* This has been made private since `dictStoreStep` is used by taint-tracking, and
* adding these extra steps made some alerts very noisy.
*
* TODO: Once TaintTracking no longer uses `dictStoreStep`, unify the two predicates.
*/
private predicate moreDictStoreSteps(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
exists(SubscriptNode subscript |
nodeTo.(PostUpdateNode).getPreUpdateNode().asCfgNode() = subscript.getObject() and
nodeFrom.asCfgNode() = subscript.(DefinitionNode).getValue() and
c.getKey() = subscript.getIndex().getNode().(StrConst).getText()
)
or
// see https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
exists(MethodCallNode call |
call.calls(nodeTo.(PostUpdateNode).getPreUpdateNode(), "setdefault") and
call.getArg(0).asExpr().(StrConst).getText() = c.getKey() and
nodeFrom = call.getArg(1)
)
}
predicate dictClearStep(Node node, DictionaryElementContent c) {
exists(SubscriptNode subscript |
subscript instanceof DefinitionNode and
node.asCfgNode() = subscript.getObject() and
c.getKey() = subscript.getIndex().getNode().(StrConst).getText()
)
}
/** Data flows from an element expression in a comprehension to the comprehension. */
predicate comprehensionStoreStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
// Comprehension
@@ -761,6 +792,8 @@ predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
subscriptReadStep(nodeFrom, c, nodeTo)
or
dictReadStep(nodeFrom, c, nodeTo)
or
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
or
matchReadStep(nodeFrom, c, nodeTo)
@@ -799,6 +832,17 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
)
}
predicate dictReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
// see
// - https://docs.python.org/3.10/library/stdtypes.html#dict.get
// - https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
exists(MethodCallNode call |
call.calls(nodeFrom, ["get", "setdefault"]) and
call.getArg(0).asExpr().(StrConst).getText() = c.(DictionaryElementContent).getKey() and
nodeTo = call
)
}
/** Data flows from a sequence to a call to `pop` on the sequence. */
predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
// set.pop or list.pop
@@ -873,6 +917,8 @@ predicate clearsContent(Node n, Content c) {
or
attributeClearStep(n, c)
or
dictClearStep(n, c)
or
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
or
dictSplatParameterNodeClearStep(n, c)

View File

@@ -3795,6 +3795,30 @@ private module StdlibPrivate {
preservesValue = true
}
}
/**
* A flow summary for `dict.setdefault`.
*
* See https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
*/
class DictSetdefaultSummary extends SummarizedCallable {
DictSetdefaultSummary() { this = "dict.setdefault" }
override DataFlow::CallCfgNode getACall() {
result.(DataFlow::MethodCallNode).calls(_, "setdefault")
}
override DataFlow::ArgumentNode getACallback() {
result.(DataFlow::AttrRead).getAttributeName() = "setdefault"
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// store/read steps with dictionary content of this is modeled in DataFlowPrivate
input = "Argument[1]" and
output = "ReturnValue" and
preservesValue = true
}
}
}
// ---------------------------------------------------------------------------

View File

@@ -9,14 +9,17 @@ class UnresolvedCallExpectations extends InlineExpectationsTest {
override string getARelevantTag() { result = "unresolved_call" }
predicate unresolvedCall(CallNode call) {
not exists(DataFlowPrivate::DataFlowCall dfc |
exists(dfc.getCallable()) and dfc.getNode() = call
) and
not DataFlowPrivate::resolveClassCall(call, _) and
not call = API::builtin(_).getACall().asCfgNode()
}
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(location.getFile().getRelativePath()) and
exists(CallNode call |
not exists(DataFlowPrivate::DataFlowCall dfc |
exists(dfc.getCallable()) and dfc.getNode() = call
) and
not DataFlowPrivate::resolveClassCall(call, _) and
not call = API::builtin(_).getACall().asCfgNode() and
exists(CallNode call | this.unresolvedCall(call) |
location = call.getLocation() and
tag = "unresolved_call" and
value = prettyExpr(call.getNode()) and

View File

@@ -1,4 +1,5 @@
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |

View File

@@ -1,3 +1,4 @@
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:7:19:7:19 | ControlFlowNode for a |

View File

@@ -1,4 +1,5 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

View File

@@ -1,4 +1,5 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

View File

@@ -1,8 +1,11 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed | file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |

View File

@@ -1,4 +1,5 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | SSA variable x |

View File

@@ -1,3 +1,4 @@
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:1:7:1 | GSSA Variable b |
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |

View File

@@ -1,7 +1,9 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |

View File

@@ -1,7 +1,9 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |

View File

@@ -1,2 +1,12 @@
import python
import experimental.dataflow.TestUtil.UnresolvedCalls
private import semmle.python.dataflow.new.DataFlow
class IgnoreDictMethod extends UnresolvedCallExpectations {
override predicate unresolvedCall(CallNode call) {
super.unresolvedCall(call) and
not any(DataFlow::MethodCallNode methodCall |
methodCall.getMethodName() in ["get", "setdefault"]
).asCfgNode() = call
}
}

View File

@@ -0,0 +1,71 @@
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__)))) # $ unresolved_call=sys.path.append(..)
from testlib import expects
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
# ------------------------------------------------------------------------------
# Actual tests
# ------------------------------------------------------------------------------
@expects(2) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_dict_literal():
d = {"key": SOURCE}
SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']"
SINK(d.get("key")) # $ flow="SOURCE, l:-2 -> d.get(..)"
@expects(2) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_dict_update():
d = {}
d["key"] = SOURCE
SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']"
SINK(d.get("key")) # $ flow="SOURCE, l:-2 -> d.get(..)"
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_dict_setdefault():
d = {}
x = d.setdefault("key", SOURCE)
SINK(x) # $ flow="SOURCE, l:-1 -> x"
SINK(d["key"]) # $ flow="SOURCE, l:-2 -> d['key']"
SINK(d.setdefault("key", NONSOURCE)) # $ flow="SOURCE, l:-3 -> d.setdefault(..)"
@expects(2) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_dict_override():
d = {}
d["key"] = SOURCE
SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']"
d["key"] = NONSOURCE
SINK_F(d["key"])
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_dict_nonstring_key():
d = {}
d[42] = SOURCE
SINK(d[42]) # $ MISSING: flow
SINK(d.get(42)) # $ MISSING: flow
SINK(d.setdefault(42, NONSOURCE)) # $ MISSING: flow

View File

@@ -1,4 +1,5 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:3:1:3:7 | GSSA Variable tainted | test.py:4:6:4:12 | ControlFlowNode for tainted |
| test.py:3:11:3:16 | ControlFlowNode for SOURCE | test.py:3:1:3:7 | GSSA Variable tainted |
| test.py:6:1:6:11 | ControlFlowNode for FunctionExpr | test.py:6:5:6:8 | GSSA Variable func |

View File

@@ -72,6 +72,7 @@ if __name__ == "__main__":
check_tests_valid("variable-capture.collections")
check_tests_valid("module-initialization.multiphase")
check_tests_valid("fieldflow.test")
check_tests_valid("fieldflow.test_dict")
check_tests_valid_after_version("match.test", (3, 10))
check_tests_valid("exceptions.test")
check_tests_valid_after_version("exceptions.test_group", (3, 11))

View File

@@ -3,7 +3,8 @@ edges
| UnsafeUnpack.py:5:26:5:32 | ControlFlowNode for ImportMember | UnsafeUnpack.py:5:26:5:32 | GSSA Variable request |
| UnsafeUnpack.py:5:26:5:32 | GSSA Variable request | UnsafeUnpack.py:0:0:0:0 | ModuleVariableNode for UnsafeUnpack.request |
| UnsafeUnpack.py:11:18:11:24 | ControlFlowNode for request | UnsafeUnpack.py:11:18:11:29 | ControlFlowNode for Attribute |
| UnsafeUnpack.py:11:18:11:29 | ControlFlowNode for Attribute | UnsafeUnpack.py:17:27:17:38 | ControlFlowNode for Attribute |
| UnsafeUnpack.py:11:18:11:29 | ControlFlowNode for Attribute | UnsafeUnpack.py:11:18:11:49 | ControlFlowNode for Attribute() |
| UnsafeUnpack.py:11:18:11:49 | ControlFlowNode for Attribute() | UnsafeUnpack.py:17:27:17:38 | ControlFlowNode for Attribute |
| UnsafeUnpack.py:17:27:17:38 | ControlFlowNode for Attribute | UnsafeUnpack.py:19:35:19:41 | ControlFlowNode for tarpath |
| UnsafeUnpack.py:33:50:33:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:34:23:34:38 | ControlFlowNode for local_ziped_path |
| UnsafeUnpack.py:47:20:47:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:48:23:48:37 | ControlFlowNode for compressed_file |
@@ -32,6 +33,7 @@ nodes
| UnsafeUnpack.py:5:26:5:32 | GSSA Variable request | semmle.label | GSSA Variable request |
| UnsafeUnpack.py:11:18:11:24 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| UnsafeUnpack.py:11:18:11:29 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:11:18:11:49 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| UnsafeUnpack.py:17:27:17:38 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:19:35:19:41 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
| UnsafeUnpack.py:33:50:33:65 | ControlFlowNode for local_ziped_path | semmle.label | ControlFlowNode for local_ziped_path |

View File

@@ -0,0 +1 @@
semmle-extractor-options: --lang=3 --max-import-depth=1