mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
Merge pull request #18301 from joefarebrother/python-model-missing-builtins
Python: Add models for builtins `map`, `filter`, `zip`, and `enumerate`.
This commit is contained in:
4
python/ql/lib/change-notes/2025-01-15-builtin-model.md
Normal file
4
python/ql/lib/change-notes/2025-01-15-builtin-model.md
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* Additional data flow models for the builtin functions `map`, `filter`, `zip`, and `enumerate` have been added.
|
||||
@@ -4523,6 +4523,124 @@ module StdlibPrivate {
|
||||
}
|
||||
}
|
||||
|
||||
/** A flow summary for `map`. */
|
||||
class MapSummary extends SummarizedCallable {
|
||||
MapSummary() { this = "builtins.map" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() { result = API::builtin("map").getACall() }
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() {
|
||||
result = API::builtin("map").getAValueReachableFromSource()
|
||||
}
|
||||
|
||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||
exists(int i | exists(any(Call c).getArg(i)) |
|
||||
(
|
||||
input = "Argument[" + (i + 1).toString() + "].ListElement"
|
||||
or
|
||||
input = "Argument[" + (i + 1).toString() + "].SetElement"
|
||||
or
|
||||
// We reduce generality slightly by not tracking tuple contents on list arguments beyond the first, for performance.
|
||||
// TODO: Once we have TupleElementAny, this generality can be increased.
|
||||
i = 0 and
|
||||
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
|
||||
input = "Argument[1].TupleElement[" + j.toString() + "]"
|
||||
)
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "Argument[0].Parameter[" + i.toString() + "]" and
|
||||
preservesValue = true
|
||||
)
|
||||
or
|
||||
input = "Argument[0].ReturnValue" and
|
||||
output = "ReturnValue.ListElement" and
|
||||
preservesValue = true
|
||||
}
|
||||
}
|
||||
|
||||
/** A flow summary for `filter`. */
|
||||
class FilterSummary extends SummarizedCallable {
|
||||
FilterSummary() { this = "builtins.filter" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() { result = API::builtin("filter").getACall() }
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() {
|
||||
result = API::builtin("filter").getAValueReachableFromSource()
|
||||
}
|
||||
|
||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||
(
|
||||
input = "Argument[1].ListElement"
|
||||
or
|
||||
input = "Argument[1].SetElement"
|
||||
or
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[1].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
(output = "Argument[0].Parameter[0]" or output = "ReturnValue.ListElement") and
|
||||
preservesValue = true
|
||||
}
|
||||
}
|
||||
|
||||
/**A summary for `enumerate`. */
|
||||
class EnumerateSummary extends SummarizedCallable {
|
||||
EnumerateSummary() { this = "builtins.enumerate" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() { result = API::builtin("enumerate").getACall() }
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() {
|
||||
result = API::builtin("enumerate").getAValueReachableFromSource()
|
||||
}
|
||||
|
||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||
(
|
||||
input = "Argument[0].ListElement"
|
||||
or
|
||||
input = "Argument[0].SetElement"
|
||||
or
|
||||
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
|
||||
input = "Argument[0].TupleElement[" + i.toString() + "]"
|
||||
)
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "ReturnValue.ListElement.TupleElement[1]" and
|
||||
preservesValue = true
|
||||
}
|
||||
}
|
||||
|
||||
/** A flow summary for `zip`. */
|
||||
class ZipSummary extends SummarizedCallable {
|
||||
ZipSummary() { this = "builtins.zip" }
|
||||
|
||||
override DataFlow::CallCfgNode getACall() { result = API::builtin("zip").getACall() }
|
||||
|
||||
override DataFlow::ArgumentNode getACallback() {
|
||||
result = API::builtin("zip").getAValueReachableFromSource()
|
||||
}
|
||||
|
||||
override predicate propagatesFlow(string input, string output, boolean preservesValue) {
|
||||
exists(int i | exists(any(Call c).getArg(i)) |
|
||||
(
|
||||
input = "Argument[" + i.toString() + "].ListElement"
|
||||
or
|
||||
input = "Argument[" + i.toString() + "].SetElement"
|
||||
or
|
||||
// We reduce generality slightly by not tracking tuple contents on arguments beyond the first two, for performance.
|
||||
// TODO: Once we have TupleElementAny, this generality can be increased.
|
||||
i in [0 .. 1] and
|
||||
exists(DataFlow::TupleElementContent tc, int j | j = tc.getIndex() |
|
||||
input = "Argument[" + i.toString() + "].TupleElement[" + j.toString() + "]"
|
||||
)
|
||||
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
|
||||
) and
|
||||
output = "ReturnValue.ListElement.TupleElement[" + i.toString() + "]" and
|
||||
preservesValue = true
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Flow summaries for container methods
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -366,3 +366,246 @@ def test_next_dict():
|
||||
i = iter(d)
|
||||
n = next(i)
|
||||
SINK(n) #$ MISSING: flow="SOURCE, l:-3 -> n"
|
||||
|
||||
### map
|
||||
|
||||
@expects(4)
|
||||
def test_map_list():
|
||||
l1 = [SOURCE]
|
||||
l2 = [NONSOURCE]
|
||||
|
||||
def f(p1,p2):
|
||||
SINK(p1) #$ flow="SOURCE, l:-4 -> p1"
|
||||
SINK_F(p2)
|
||||
|
||||
return p1,p2
|
||||
|
||||
rl = list(map(f, l1, l2))
|
||||
SINK(rl[0][0]) #$ flow="SOURCE, l:-10 -> rl[0][0]"
|
||||
SINK_F(rl[0][1])
|
||||
|
||||
@expects(4)
|
||||
def test_map_set():
|
||||
s1 = {SOURCE}
|
||||
s2 = {NONSOURCE}
|
||||
|
||||
def f(p1,p2):
|
||||
SINK(p1) #$ flow="SOURCE, l:-4 -> p1"
|
||||
SINK_F(p2)
|
||||
|
||||
return p1,p2
|
||||
|
||||
rl = list(map(f, s1, s2))
|
||||
SINK(rl[0][0]) #$ flow="SOURCE, l:-10 -> rl[0][0]"
|
||||
SINK_F(rl[0][1])
|
||||
|
||||
@expects(4)
|
||||
def test_map_tuple():
|
||||
t1 = (SOURCE,)
|
||||
t2 = (NONSOURCE,)
|
||||
|
||||
def f(p1,p2):
|
||||
SINK(p1) #$ flow="SOURCE, l:-4 -> p1"
|
||||
SINK_F(p2)
|
||||
|
||||
return p1,p2
|
||||
|
||||
rl = list(map(f, t1, t2))
|
||||
SINK(rl[0][0]) #$ flow="SOURCE, l:-10 -> rl[0][0]"
|
||||
SINK_F(rl[0][1])
|
||||
|
||||
|
||||
@expects(4)
|
||||
def test_map_dict():
|
||||
d1 = {SOURCE: "v1"}
|
||||
d2 = {NONSOURCE: "v2"}
|
||||
|
||||
def f(p1,p2):
|
||||
SINK(p1) #$ MISSING: flow="SOURCE, l:-4 -> p1"
|
||||
SINK_F(p2)
|
||||
|
||||
return p1,p2
|
||||
|
||||
rl = list(map(f, d1, d2))
|
||||
SINK(rl[0][0]) #$ MISSING: flow="SOURCE, l:-10 -> rl[0][0]"
|
||||
SINK_F(rl[0][1])
|
||||
|
||||
@expects(4)
|
||||
def test_map_multi_list():
|
||||
l1 = [SOURCE]
|
||||
l2 = [SOURCE]
|
||||
|
||||
def f(p1,p2):
|
||||
SINK(p1) #$ flow="SOURCE, l:-4 -> p1"
|
||||
SINK(p2) #$ flow="SOURCE, l:-4 -> p2"
|
||||
return p1,p2
|
||||
|
||||
rl = list(map(f, l1, l2))
|
||||
SINK(rl[0][0]) #$ flow="SOURCE, l:-9 -> rl[0][0]"
|
||||
SINK(rl[0][1]) #$ flow="SOURCE, l:-9 -> rl[0][1]"
|
||||
|
||||
@expects(4)
|
||||
def test_map_multi_tuple():
|
||||
l1 = (SOURCE,)
|
||||
l2 = (SOURCE,)
|
||||
|
||||
def f(p1,p2):
|
||||
SINK(p1) #$ flow="SOURCE, l:-4 -> p1"
|
||||
SINK(p2) #$ MISSING: flow="SOURCE, l:-4 -> p2" # Tuples are not tracked beyond the first list argument for performance.
|
||||
return p1,p2
|
||||
|
||||
rl = list(map(f, l1, l2))
|
||||
SINK(rl[0][0]) #$ flow="SOURCE, l:-9 -> rl[0][0]"
|
||||
SINK(rl[0][1]) #$ MISSING: flow="SOURCE, l:-9 -> rl[0][1]"
|
||||
|
||||
### filter
|
||||
|
||||
@expects(2)
|
||||
def test_filter_list():
|
||||
l = [SOURCE]
|
||||
|
||||
def f(p):
|
||||
SINK(p) #$ flow="SOURCE, l:-3 -> p"
|
||||
return True
|
||||
|
||||
rl = list(filter(f,l))
|
||||
SINK(rl[0]) #$ flow="SOURCE, l:-7 -> rl[0]"
|
||||
|
||||
@expects(2)
|
||||
def test_filter_set():
|
||||
s = {SOURCE}
|
||||
|
||||
def f(p):
|
||||
SINK(p) #$ flow="SOURCE, l:-3 -> p"
|
||||
return True
|
||||
|
||||
rl = list(filter(f,s))
|
||||
SINK(rl[0]) #$ flow="SOURCE, l:-7 -> rl[0]"
|
||||
|
||||
@expects(2)
|
||||
def test_filter_tuple():
|
||||
t = (SOURCE,)
|
||||
|
||||
def f(p):
|
||||
SINK(p) #$ flow="SOURCE, l:-3 -> p"
|
||||
return True
|
||||
|
||||
rl = list(filter(f,t))
|
||||
SINK(rl[0]) #$ flow="SOURCE, l:-7 -> rl[0]"
|
||||
|
||||
@expects(2)
|
||||
def test_filter_dict():
|
||||
d = {SOURCE: "v"}
|
||||
|
||||
def f(p):
|
||||
SINK(p) #$ MISSING: flow="SOURCE, l:-3 -> p"
|
||||
return True
|
||||
|
||||
rl = list(filter(f,d))
|
||||
SINK(rl[0]) #$ MISSING: flow="SOURCE, l:-7 -> rl[0]"
|
||||
|
||||
@expects(1)
|
||||
def test_enumerate_list():
|
||||
l = [SOURCE]
|
||||
|
||||
e = list(enumerate(l))
|
||||
|
||||
SINK(e[0][1]) #$ flow="SOURCE, l:-4 -> e[0][1]"
|
||||
|
||||
@expects(1)
|
||||
def test_enumerate_set():
|
||||
s = {SOURCE}
|
||||
|
||||
e = list(enumerate(s))
|
||||
|
||||
SINK(e[0][1]) #$ flow="SOURCE, l:-4 -> e[0][1]"
|
||||
|
||||
@expects(1)
|
||||
def test_enumerate_tuple():
|
||||
t = (SOURCE,)
|
||||
|
||||
e = list(enumerate(t))
|
||||
|
||||
SINK(e[0][1]) #$ flow="SOURCE, l:-4 -> e[0][1]"
|
||||
|
||||
@expects(2)
|
||||
def test_enumerate_list_for():
|
||||
l = [SOURCE]
|
||||
|
||||
for i, x in enumerate(l):
|
||||
SINK(x) #$ flow="SOURCE, l:-3 -> x"
|
||||
|
||||
for t in enumerate(l):
|
||||
SINK(t[1]) #$ flow="SOURCE, l:-6 -> t[1]"
|
||||
|
||||
@expects(1)
|
||||
def test_enumerate_dict():
|
||||
d = {SOURCE:"v"}
|
||||
|
||||
e = list(enumerate(d))
|
||||
|
||||
SINK(e[0][1]) # $ MISSING: flow="SOURCE, l:-4 -> e[0][1]"
|
||||
|
||||
@expects(8)
|
||||
def test_zip_list():
|
||||
l1 = [SOURCE, SOURCE]
|
||||
l2 = [SOURCE, NONSOURCE]
|
||||
l3 = [NONSOURCE, SOURCE]
|
||||
l4 = [NONSOURCE, NONSOURCE]
|
||||
|
||||
z = list(zip(l1,l2,l3,l4))
|
||||
|
||||
SINK(z[0][0]) #$ flow="SOURCE, l:-7 -> z[0][0]"
|
||||
SINK(z[0][1]) #$ flow="SOURCE, l:-7 -> z[0][1]"
|
||||
SINK_F(z[0][2]) #$ SPURIOUS: flow="SOURCE, l:-7 -> z[0][2]"
|
||||
SINK_F(z[0][3])
|
||||
SINK(z[1][0]) #$ flow="SOURCE, l:-11 -> z[1][0]"
|
||||
SINK_F(z[1][1]) #$ SPURIOUS: flow="SOURCE, l:-11 -> z[1][1]"
|
||||
SINK(z[1][2]) #$ flow="SOURCE, l:-11 -> z[1][2]"
|
||||
SINK_F(z[1][3])
|
||||
|
||||
@expects(4)
|
||||
def test_zip_set():
|
||||
s1 = {SOURCE}
|
||||
s2 = {NONSOURCE}
|
||||
s3 = {SOURCE}
|
||||
s4 = {NONSOURCE}
|
||||
|
||||
z = list(zip(s1,s2,s3,s4))
|
||||
|
||||
SINK(z[0][0]) #$ flow="SOURCE, l:-7 -> z[0][0]"
|
||||
SINK_F(z[0][1])
|
||||
SINK(z[0][2]) #$ flow="SOURCE, l:-7 -> z[0][2]"
|
||||
SINK_F(z[0][3])
|
||||
|
||||
@expects(8)
|
||||
def test_zip_tuple():
|
||||
t1 = (SOURCE, SOURCE)
|
||||
t2 = (SOURCE, NONSOURCE)
|
||||
t3 = (NONSOURCE, SOURCE)
|
||||
t4 = (NONSOURCE, NONSOURCE)
|
||||
|
||||
z = list(zip(t1,t2,t3,t4))
|
||||
|
||||
SINK(z[0][0]) #$ flow="SOURCE, l:-7 -> z[0][0]"
|
||||
SINK(z[0][1]) #$ flow="SOURCE, l:-7 -> z[0][1]"
|
||||
SINK_F(z[0][2])
|
||||
SINK_F(z[0][3])
|
||||
SINK(z[1][0]) #$ flow="SOURCE, l:-11 -> z[1][0]"
|
||||
SINK_F(z[1][1]) #$ SPURIOUS: flow="SOURCE, l:-11 -> z[1][1]"
|
||||
SINK(z[1][2]) #$ MISSING: flow="SOURCE, l:-11 -> z[1][2]" # Tuple contents are not tracked beyond the first two arguments for performance.
|
||||
SINK_F(z[1][3])
|
||||
|
||||
@expects(4)
|
||||
def test_zip_dict():
|
||||
d1 = {SOURCE: "v"}
|
||||
d2 = {NONSOURCE: "v"}
|
||||
d3 = {SOURCE: "v"}
|
||||
d4 = {NONSOURCE: "v"}
|
||||
|
||||
z = list(zip(d1,d2,d3,d4))
|
||||
|
||||
SINK(z[0][0]) #$ MISSING: flow="SOURCE, l:-7 -> z[0][0]"
|
||||
SINK_F(z[0][1])
|
||||
SINK(z[0][2]) #$ MISSING: flow="SOURCE, l:-7 -> z[0][2]"
|
||||
SINK_F(z[0][3])
|
||||
@@ -45,4 +45,4 @@ def test_library_call():
|
||||
for x in map(set, [1]):
|
||||
pass
|
||||
|
||||
SINK(captured["x"]) #$ MISSING: captured
|
||||
SINK(captured["x"]) #$ captured
|
||||
|
||||
Reference in New Issue
Block a user