Merge branch 'main' into polyQhelp

This commit is contained in:
erik-krogh
2023-05-21 22:17:06 +02:00
191 changed files with 4780 additions and 3938 deletions

View File

@@ -1,6 +1,7 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
private import FlowSummaryImpl as FlowSummaryImpl
private import semmle.python.dataflow.new.internal.TaintTrackingPublic
private import semmle.python.ApiGraphs
@@ -55,6 +56,8 @@ private module Cached {
awaitStep(nodeFrom, nodeTo)
or
asyncWithStep(nodeFrom, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false)
}
}
@@ -159,7 +162,7 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
* is currently very imprecise, as an example, since we model `dict.get`, we treat any
* `<tainted object>.get(<arg>)` will be tainted, whether it's true or not.
*/
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// construction by literal
//
// TODO: once we have proper flow-summary modeling, we might not need this step any
@@ -181,13 +184,6 @@ predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
// don't provide that right now.
DataFlowPrivate::comprehensionStoreStep(nodeFrom, _, nodeTo)
or
// constructor call
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::builtin(["list", "set", "frozenset", "dict", "tuple"]).getACall() and
call.getArg(0) = nodeFrom
// TODO: Properly handle defaultdict/namedtuple
)
or
// functions operating on collections
exists(DataFlow::CallCfgNode call | call = nodeTo |
call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and

View File

@@ -613,8 +613,17 @@ class TypeBackTracker extends TTypeBackTracker {
* also flow to `sink`.
*/
TypeTracker getACompatibleTypeTracker() {
exists(boolean hasCall | result = MkTypeTracker(hasCall, content) |
hasCall = false or this.hasReturn() = false
exists(boolean hasCall, OptionalTypeTrackerContent c |
result = MkTypeTracker(hasCall, c) and
(
compatibleContents(c, content)
or
content = noContent() and c = content
)
|
hasCall = false
or
this.hasReturn() = false
)
}
}

View File

@@ -3790,6 +3790,138 @@ private module StdlibPrivate {
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
}
// ---------------------------------------------------------------------------
// Flow summaries for functions contructing containers
// ---------------------------------------------------------------------------
/** A flow summary for `dict`. */
class DictSummary extends SummarizedCallable {
DictSummary() { this = "builtins.dict" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("dict").getACall() }
override DataFlow::ArgumentNode getACallback() {
result = API::builtin("dict").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[0].DictionaryElement[" + key + "]" and
output = "ReturnValue.DictionaryElement[" + key + "]" and
preservesValue = true
)
or
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
input = "Argument[" + key + ":]" and
output = "ReturnValue.DictionaryElement[" + key + "]" and
preservesValue = true
)
or
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}
/** A flow summary for `list`. */
class ListSummary extends SummarizedCallable {
ListSummary() { this = "builtins.list" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("list").getACall() }
override DataFlow::ArgumentNode getACallback() {
result = API::builtin("list").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
(
input = "Argument[0].ListElement"
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.ListElement" and
preservesValue = true
or
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}
/** A flow summary for tuple */
class TupleSummary extends SummarizedCallable {
TupleSummary() { this = "builtins.tuple" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("tuple").getACall() }
override DataFlow::ArgumentNode getACallback() {
result = API::builtin("tuple").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]" and
output = "ReturnValue.TupleElement[" + i.toString() + "]" and
preservesValue = true
)
or
// TODO: We need to also translate iterable content such as list element
// but we currently lack TupleElementAny
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}
/** A flow summary for set */
class SetSummary extends SummarizedCallable {
SetSummary() { this = "builtins.set" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("set").getACall() }
override DataFlow::ArgumentNode getACallback() {
result = API::builtin("set").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
(
input = "Argument[0].ListElement"
or
input = "Argument[0].SetElement"
or
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
input = "Argument[0].TupleElement[" + i.toString() + "]"
)
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
) and
output = "ReturnValue.SetElement" and
preservesValue = true
or
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}
/** A flow summary for frozenset */
class FrozensetSummary extends SummarizedCallable {
FrozensetSummary() { this = "builtins.frozenset" }
override DataFlow::CallCfgNode getACall() { result = API::builtin("frozenset").getACall() }
override DataFlow::ArgumentNode getACallback() {
result = API::builtin("frozenset").getAValueReachableFromSource()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
any(SetSummary s).propagatesFlowExt(input, output, preservesValue)
}
}
/** A flow summary for `reversed`. */
class ReversedSummary extends SummarizedCallable {
ReversedSummary() { this = "builtins.reversed" }

View File

@@ -1,5 +1,8 @@
import experimental.dataflow.callGraphConfig
from DataFlow::Node source, DataFlow::Node sink
where exists(CallGraphConfig cfg | cfg.hasFlow(source, sink))
where
exists(CallGraphConfig cfg | cfg.hasFlow(source, sink)) and
exists(source.getLocation().getFile().getRelativePath()) and
exists(sink.getLocation().getFile().getRelativePath())
select source, sink

View File

@@ -1,5 +1,3 @@
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
| test.py:1:19:1:19 | ControlFlowNode for x |
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |

View File

@@ -1,5 +1,7 @@
import experimental.dataflow.callGraphConfig
from DataFlow::Node sink
where exists(CallGraphConfig cfg | cfg.isSink(sink))
where
exists(CallGraphConfig cfg | cfg.isSink(sink)) and
exists(sink.getLocation().getFile().getRelativePath())
select sink

View File

@@ -1,4 +1,2 @@
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:7:19:7:19 | ControlFlowNode for a |

View File

@@ -1,5 +1,7 @@
import experimental.dataflow.callGraphConfig
from DataFlow::Node source
where exists(CallGraphConfig cfg | cfg.isSource(source))
where
exists(CallGraphConfig cfg | cfg.isSource(source)) and
exists(source.getLocation().getFile().getRelativePath())
select source

View File

@@ -1,5 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

View File

@@ -3,5 +3,7 @@ import allFlowsConfig
from DataFlow::Node source, DataFlow::Node sink
where
source != sink and
exists(AllFlowsConfig cfg | cfg.hasFlow(source, sink))
exists(AllFlowsConfig cfg | cfg.hasFlow(source, sink)) and
exists(source.getLocation().getFile().getRelativePath()) and
exists(sink.getLocation().getFile().getRelativePath())
select source, sink

View File

@@ -1,5 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

View File

@@ -1,5 +1,8 @@
import allFlowsConfig
from DataFlow::PathNode fromNode, DataFlow::PathNode toNode
where toNode = fromNode.getASuccessor()
where
toNode = fromNode.getASuccessor() and
exists(fromNode.getNode().getLocation().getFile().getRelativePath()) and
exists(toNode.getNode().getLocation().getFile().getRelativePath())
select fromNode, toNode

View File

@@ -1,11 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed | file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |

View File

@@ -1,5 +1,8 @@
import semmle.python.dataflow.new.DataFlow
from DataFlow::Node fromNode, DataFlow::Node toNode
where DataFlow::localFlow(fromNode, toNode)
where
DataFlow::localFlow(fromNode, toNode) and
exists(fromNode.getLocation().getFile().getRelativePath()) and
exists(toNode.getLocation().getFile().getRelativePath())
select fromNode, toNode

View File

@@ -1,5 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | SSA variable x |

View File

@@ -1,5 +1,8 @@
import semmle.python.dataflow.new.DataFlow
from DataFlow::Node fromNode, DataFlow::Node toNode
where DataFlow::localFlowStep(fromNode, toNode)
where
DataFlow::localFlowStep(fromNode, toNode) and
exists(fromNode.getLocation().getFile().getRelativePath()) and
exists(toNode.getLocation().getFile().getRelativePath())
select fromNode, toNode

View File

@@ -1,4 +1,3 @@
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:1:7:1 | GSSA Variable b |
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |

View File

@@ -3,5 +3,7 @@ import maximalFlowsConfig
from DataFlow::Node source, DataFlow::Node sink
where
source != sink and
exists(MaximalFlowsConfig cfg | cfg.hasFlow(source, sink))
exists(MaximalFlowsConfig cfg | cfg.hasFlow(source, sink)) and
exists(source.getLocation().getFile().getRelativePath()) and
exists(sink.getLocation().getFile().getRelativePath())
select source, sink

View File

@@ -1,9 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |

View File

@@ -1,5 +1,7 @@
import allFlowsConfig
from DataFlow::Node sink
where exists(AllFlowsConfig cfg | cfg.isSink(sink))
where
exists(AllFlowsConfig cfg | cfg.isSink(sink)) and
exists(sink.getLocation().getFile().getRelativePath())
select sink

View File

@@ -1,9 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault |
| test.py:0:0:0:0 | GSSA Variable __name__ |
| test.py:0:0:0:0 | GSSA Variable __package__ |
| test.py:0:0:0:0 | GSSA Variable b |

View File

@@ -1,5 +1,7 @@
import allFlowsConfig
from DataFlow::Node source
where exists(AllFlowsConfig cfg | cfg.isSource(source))
where
exists(AllFlowsConfig cfg | cfg.isSource(source)) and
exists(source.getLocation().getFile().getRelativePath())
select source

View File

@@ -41,8 +41,8 @@ def SINK_F(x):
def test_list_from_list():
l1 = [SOURCE, NONSOURCE]
l2 = list(l1)
SINK(l2[0]) #$ MISSING: flow="SOURCE, l:-2 -> l2[0]"
SINK_F(l2[1]) # expecting FP due to imprecise flow
SINK(l2[0]) #$ flow="SOURCE, l:-2 -> l2[0]"
SINK_F(l2[1]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l2[1]"
# -- skip list_from_string
@@ -50,13 +50,13 @@ def test_list_from_list():
def test_list_from_tuple():
t = (SOURCE, NONSOURCE)
l = list(t)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]"
SINK_F(l[1]) # expecting FP due to imprecise flow
SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]"
SINK_F(l[1]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l[1]"
def test_list_from_set():
s = {SOURCE}
l = list(s)
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]"
SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]"
@expects(2)
def test_list_from_dict():
@@ -78,7 +78,7 @@ def test_tuple_from_list():
def test_tuple_from_tuple():
t0 = (SOURCE, NONSOURCE)
t = tuple(t0)
SINK(t[0]) #$ MISSING: flow="SOURCE, l:-2 -> t[0]"
SINK(t[0]) #$ flow="SOURCE, l:-2 -> t[0]"
SINK_F(t[1])
def test_tuple_from_set():
@@ -100,19 +100,19 @@ def test_set_from_list():
l = [SOURCE]
s = set(l)
v = s.pop()
SINK(v) #$ MISSING: flow="SOURCE, l:-3 -> v"
SINK(v) #$ flow="SOURCE, l:-3 -> v"
def test_set_from_tuple():
t = (SOURCE,)
s = set(t)
v = s.pop()
SINK(v) #$ MISSING: flow="SOURCE, l:-3 -> v"
SINK(v) #$ flow="SOURCE, l:-3 -> v"
def test_set_from_set():
s0 = {SOURCE}
s = set(s0)
v = s.pop()
SINK(v) #$ MISSING: flow="SOURCE, l:-3 -> v"
SINK(v) #$ flow="SOURCE, l:-3 -> v"
def test_set_from_dict():
d = {SOURCE: "val"}
@@ -126,7 +126,7 @@ def test_set_from_dict():
@expects(2)
def test_dict_from_keyword():
d = dict(k = SOURCE, k1 = NONSOURCE)
SINK(d["k"]) #$ MISSING: flow="SOURCE, l:-1 -> d[k]"
SINK(d["k"]) #$ flow="SOURCE, l:-1 -> d['k']"
SINK_F(d["k1"])
@expects(2)
@@ -139,7 +139,7 @@ def test_dict_from_list():
def test_dict_from_dict():
d1 = {'k': SOURCE, 'k1': NONSOURCE}
d2 = dict(d1)
SINK(d2["k"]) #$ MISSING: flow="SOURCE, l:-2 -> d[k]"
SINK(d2["k"]) #$ flow="SOURCE, l:-2 -> d2['k']"
SINK_F(d2["k1"])
## Container methods
@@ -278,8 +278,8 @@ def test_reversed_list():
l0 = [SOURCE, NONSOURCE]
r = reversed(l0)
l = list(r)
SINK_F(l[0])
SINK(l[1]) #$ MISSING: flow="SOURCE, l:-4 -> l[1]"
SINK_F(l[0]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l[0]"
SINK(l[1]) #$ flow="SOURCE, l:-4 -> l[1]"
@expects(2)
def test_reversed_tuple():

View File

@@ -1,5 +1,3 @@
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
| test.py:3:1:3:7 | GSSA Variable tainted | test.py:4:6:4:12 | ControlFlowNode for tainted |
| test.py:3:11:3:16 | ControlFlowNode for SOURCE | test.py:3:1:3:7 | GSSA Variable tainted |
| test.py:6:1:6:11 | ControlFlowNode for FunctionExpr | test.py:6:5:6:8 | GSSA Variable func |

View File

@@ -3,5 +3,8 @@ import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.DataFlow
from DataFlow::Node nodeFrom, DataFlow::Node nodeTo
where TaintTracking::localTaintStep(nodeFrom, nodeTo)
where
TaintTracking::localTaintStep(nodeFrom, nodeTo) and
exists(nodeFrom.getLocation().getFile().getRelativePath()) and
exists(nodeTo.getLocation().getFile().getRelativePath())
select nodeFrom, nodeTo

View File

@@ -38,8 +38,8 @@ def test_construction():
set(tainted_list), # $ tainted
frozenset(tainted_list), # $ tainted
dict(tainted_dict), # $ tainted
dict(k = tainted_string)["k"], # $ MISSING: tainted
dict(dict(k = tainted_string))["k"], # $ MISSING: tainted
dict(k = tainted_string)["k"], # $ tainted
dict(dict(k = tainted_string))["k"], # $ tainted
dict(["k", tainted_string]), # $ tainted
)