Python: Support more dictionary read/store steps

The `setdefault` behavior is kinda strange, but no reason not to support
it.
This commit is contained in:
Rasmus Wriedt Larsen
2022-11-15 20:26:42 +01:00
parent 6e31f64aaa
commit b56869551d
3 changed files with 49 additions and 9 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added more content-flow/field-flow for dictionaries, by adding support for reads through `mydict.get("key")` and `mydict.setdefault("key", value)`, and store steps through `dict["key"] = value` and `mydict.setdefault("key", value)`.

View File

@@ -688,17 +688,38 @@ predicate tupleStoreStep(CfgNode nodeFrom, TupleElementContent c, CfgNode nodeTo
}
/** Data flows from an element of a dictionary to the dictionary at a specific key. */
predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, CfgNode nodeTo) {
predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
// Dictionary
// `{..., "key" = 42, ...}`
// nodeFrom is `42`, cfg node
// nodeTo is the dict, `{..., "key" = 42, ...}`, cfg node
// c denotes element of dictionary and the key `"key"`
exists(KeyValuePair item |
item = nodeTo.getNode().(DictNode).getNode().(Dict).getAnItem() and
item = nodeTo.asCfgNode().(DictNode).getNode().(Dict).getAnItem() and
nodeFrom.getNode().getNode() = item.getValue() and
c.getKey() = item.getKey().(StrConst).getS()
)
or
exists(SubscriptNode subscript |
nodeTo.(PostUpdateNode).getPreUpdateNode().asCfgNode() = subscript.getObject() and
nodeFrom.asCfgNode() = subscript.(DefinitionNode).getValue() and
c.getKey() = subscript.getIndex().getNode().(StrConst).getText()
)
or
// see https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
exists(MethodCallNode call |
call.calls(nodeTo.(PostUpdateNode).getPreUpdateNode(), ["setdefault"]) and
call.getArg(0).asExpr().(StrConst).getText() = c.(DictionaryElementContent).getKey() and
nodeFrom = call.getArg(1)
)
}
predicate dictClearStep(Node node, DictionaryElementContent c) {
exists(SubscriptNode subscript |
subscript instanceof DefinitionNode and
node.asCfgNode() = subscript.getObject() and
c.getKey() = subscript.getIndex().getNode().(StrConst).getText()
)
}
/** Data flows from an element expression in a comprehension to the comprehension. */
@@ -761,6 +782,8 @@ predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
subscriptReadStep(nodeFrom, c, nodeTo)
or
dictReadStep(nodeFrom, c, nodeTo)
or
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
or
matchReadStep(nodeFrom, c, nodeTo)
@@ -799,6 +822,17 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
)
}
predicate dictReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
// see
// - https://docs.python.org/3.10/library/stdtypes.html#dict.get
// - https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
exists(MethodCallNode call |
call.calls(nodeFrom, ["get", "setdefault"]) and
call.getArg(0).asExpr().(StrConst).getText() = c.(DictionaryElementContent).getKey() and
nodeTo = call
)
}
/** Data flows from a sequence to a call to `pop` on the sequence. */
predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
// set.pop or list.pop
@@ -873,6 +907,8 @@ predicate clearsContent(Node n, Content c) {
or
attributeClearStep(n, c)
or
dictClearStep(n, c)
or
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
or
dictSplatParameterNodeClearStep(n, c)

View File

@@ -35,24 +35,24 @@ def SINK_F(x):
def test_dict_literal():
d = {"key": SOURCE}
SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']"
SINK(d.get("key")) # $ MISSING: flow
SINK(d.setdefault("key", NONSOURCE)) # $ MISSING: flow
SINK(d.get("key")) # $ flow="SOURCE, l:-2 -> d.get(..)"
SINK(d.setdefault("key", NONSOURCE)) # $ flow="SOURCE, l:-3 -> d.setdefault(..)"
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_dict_update():
d = {}
d["key"] = SOURCE
SINK(d["key"]) # $ MISSING: flow
SINK(d.get("key")) # $ MISSING: flow
SINK(d.setdefault("key", NONSOURCE)) # $ MISSING: flow
SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']"
SINK(d.get("key")) # $ flow="SOURCE, l:-2 -> d.get(..)"
SINK(d.setdefault("key", NONSOURCE)) # $ flow="SOURCE, l:-3 -> d.setdefault(..)"
@expects(2) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
def test_dict_override():
d = {}
d["key"] = SOURCE
SINK(d["key"]) # $ MISSING: flow
SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']"
d["key"] = NONSOURCE
SINK_F(d["key"])
@@ -61,7 +61,7 @@ def test_dict_override():
def test_dict_setdefault():
d = {}
d.setdefault("key", SOURCE)
SINK(d["key"]) # $ MISSING: flow
SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']"
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)