Merge pull request #13209 from yoff/python/container-summaries-2

python: Container summaries, part 2
This commit is contained in:
yoff
2023-06-13 18:17:09 +02:00
committed by GitHub
18 changed files with 407 additions and 146 deletions

View File

@@ -794,14 +794,10 @@ predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
subscriptReadStep(nodeFrom, c, nodeTo)
or
dictReadStep(nodeFrom, c, nodeTo)
or
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
or
matchReadStep(nodeFrom, c, nodeTo)
or
popReadStep(nodeFrom, c, nodeTo)
or
forReadStep(nodeFrom, c, nodeTo)
or
attributeReadStep(nodeFrom, c, nodeTo)
@@ -834,51 +830,6 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
)
}
predicate dictReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
// see
// - https://docs.python.org/3.10/library/stdtypes.html#dict.get
// - https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
exists(MethodCallNode call |
call.calls(nodeFrom, ["get", "setdefault"]) and
call.getArg(0).asExpr().(StrConst).getText() = c.(DictionaryElementContent).getKey() and
nodeTo = call
)
}
/** Data flows from a sequence to a call to `pop` on the sequence. */
predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
// set.pop or list.pop
// `s.pop()`
// nodeFrom is `s`, cfg node
// nodeTo is `s.pop()`, cfg node
// c denotes element of list or set
exists(CallNode call, AttrNode a |
call.getFunction() = a and
a.getName() = "pop" and // Should match appropriate call since we tracked a sequence here.
not exists(call.getAnArg()) and
nodeFrom.getNode() = a.getObject() and
nodeTo.getNode() = call and
(
c instanceof ListElementContent
or
c instanceof SetElementContent
)
)
or
// dict.pop
// `d.pop("key")`
// nodeFrom is `d`, cfg node
// nodeTo is `d.pop("key")`, cfg node
// c denotes the key `"key"`
exists(CallNode call, AttrNode a |
call.getFunction() = a and
a.getName() = "pop" and // Should match appropriate call since we tracked a dictionary here.
nodeFrom.getNode() = a.getObject() and
nodeTo.getNode() = call and
c.(DictionaryElementContent).getKey() = call.getArg(0).getNode().(StrConst).getS()
)
}
predicate forReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
exists(ForTarget target |
nodeFrom.asExpr() = target.getSource() and

View File

@@ -190,14 +190,9 @@ predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
call.getArg(0) = nodeFrom
)
or
// methods
// dict methods
exists(DataFlow::MethodCallNode call, string methodName | call = nodeTo |
methodName in [
// general
"copy", "pop",
// dict
"values", "items", "get", "popitem"
] and
methodName in ["values", "items"] and
call.calls(nodeFrom, methodName)
)
or

View File

@@ -3900,6 +3900,176 @@ private module StdlibPrivate {
}
}
// ---------------------------------------------------------------------------
// Flow summaries for container methods
// ---------------------------------------------------------------------------
/** A flow summary for `copy`. */
class CopySummary extends SummarizedCallable {
CopySummary() { this = "collection.copy" }
override DataFlow::CallCfgNode getACall() {
result.(DataFlow::MethodCallNode).getMethodName() = "copy"
}
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(string content |
content = "ListElement"
or
content = "SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
content = "TupleElement[" + i.toString() + "]"
)
or
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
content = "DictionaryElement[" + key + "]"
)
|
input = "Argument[self]." + content and
output = "ReturnValue." + content and
preservesValue = true
)
or
input = "Argument[self]" and
output = "ReturnValue" and
preservesValue = true
}
}
/**
* A flow summary for `pop` either for list or set.
* This ignores the index if given, since content is
* imprecise anyway.
*
* I also handles the default value when `pop` is called
* on a dictionary, since that also does not depend on the key.
*/
class PopSummary extends SummarizedCallable {
PopSummary() { this = "collection.pop" }
override DataFlow::CallCfgNode getACall() {
result.(DataFlow::MethodCallNode).getMethodName() = "pop"
}
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[self].ListElement" and
output = "ReturnValue" and
preservesValue = true
or
input = "Argument[self].SetElement" and
output = "ReturnValue" and
preservesValue = true
or
// default value for dictionary
input = "Argument[1]" and
output = "ReturnValue" and
preservesValue = true
or
// transfer taint on self to return value
input = "Argument[self]" and
output = "ReturnValue" and
preservesValue = false
}
}
/** A flow summary for `dict.pop` */
class DictPopSummary extends SummarizedCallable {
string key;
DictPopSummary() {
this = "dict.pop(" + key + ")" and
exists(DataFlow::DictionaryElementContent dc | key = dc.getKey())
}
override DataFlow::CallCfgNode getACall() {
result.(DataFlow::MethodCallNode).getMethodName() = "pop" and
result.getArg(0).getALocalSource().asExpr().(StrConst).getText() = key
}
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue" and
preservesValue = true
}
}
/** A flow summary for `dict.get` at specific content. */
class DictGetSummary extends SummarizedCallable {
string key;
DictGetSummary() {
this = "dict.get(" + key + ")" and
exists(DataFlow::DictionaryElementContent dc | key = dc.getKey())
}
override DataFlow::CallCfgNode getACall() {
result.(DataFlow::MethodCallNode).getMethodName() = "get" and
result.getArg(0).getALocalSource().asExpr().(StrConst).getText() = key
}
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue" and
preservesValue = true
or
// optional default value
input = "Argument[1]" and
output = "ReturnValue" and
preservesValue = true
}
}
/** A flow summary for `dict.get` disregarding content. */
class DictGetAnySummary extends SummarizedCallable {
DictGetAnySummary() { this = "dict.get" }
override DataFlow::CallCfgNode getACall() {
result.(DataFlow::MethodCallNode).getMethodName() = "get"
}
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// default value
input = "Argument[1]" and
output = "ReturnValue" and
preservesValue = true
or
// transfer taint from self to return value
input = "Argument[self]" and
output = "ReturnValue" and
preservesValue = false
}
}
/** A flow summary for `dict.popitem` */
class DictPopitemSummary extends SummarizedCallable {
DictPopitemSummary() { this = "dict.popitem" }
override DataFlow::CallCfgNode getACall() {
result.(DataFlow::MethodCallNode).getMethodName() = "popitem"
}
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue.TupleElement[1]" and
preservesValue = true
// TODO: put `key` into "ReturnValue.TupleElement[0]"
)
}
}
/**
* A flow summary for `dict.setdefault`.
*
@@ -3923,6 +4093,40 @@ private module StdlibPrivate {
preservesValue = true
}
}
/**
* A flow summary for `dict.setdefault` at specific content.
* See https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
* This summary handles read and store steps. See `DictSetdefaultSummary`
* for the dataflow steps.
*/
class DictSetdefaultKeySummary extends SummarizedCallable {
string key;
DictSetdefaultKeySummary() {
this = "dict.setdefault(" + key + ")" and
exists(DataFlow::DictionaryElementContent dc | key = dc.getKey())
}
override DataFlow::CallCfgNode getACall() {
result.(DataFlow::MethodCallNode).getMethodName() = "setdefault" and
result.getArg(0).getALocalSource().asExpr().(StrConst).getText() = key
}
override DataFlow::ArgumentNode getACallback() { none() }
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
// If key is in the dictionary, return its value.
input = "Argument[self].DictionaryElement[" + key + "]" and
output = "ReturnValue" and
preservesValue = true
or
// If not, insert key with a value of default.
input = "Argument[1]" and
output = "ReturnValue.DictionaryElement[" + key + "]" and
preservesValue = true
}
}
}
// ---------------------------------------------------------------------------