Merge pull request #12537 from yoff/python/captured-variables-for-typetracking

Python: Captured variables for type tracking and the API graph
This commit is contained in:
yoff
2023-05-09 12:34:22 +02:00
committed by GitHub
22 changed files with 299 additions and 48 deletions

View File

@@ -726,15 +726,15 @@ def test_deep_callgraph():
return f5(arg)
x = f6(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f5(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f4(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f3(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f2(SOURCE)
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
SINK(x) #$ flow="SOURCE, l:-1 -> x"
x = f1(SOURCE)
SINK(x) #$ flow="SOURCE, l:-1 -> x"

View File

@@ -6,5 +6,6 @@ module_attr_tracker
| import_as_attr.py:1:28:1:35 | GSSA Variable attr_ref |
| import_as_attr.py:3:1:3:1 | GSSA Variable x |
| import_as_attr.py:3:5:3:12 | ControlFlowNode for attr_ref |
| import_as_attr.py:5:1:5:10 | GSSA Variable attr_ref |
| import_as_attr.py:6:5:6:5 | SSA variable y |
| import_as_attr.py:6:9:6:16 | ControlFlowNode for attr_ref |

View File

@@ -60,10 +60,10 @@ def test_import():
def to_inner_scope():
x = tracked # $tracked
def foo():
y = x # $ MISSING: tracked
return y # $ MISSING: tracked
also_x = foo() # $ MISSING: tracked
print(also_x) # $ MISSING: tracked
y = x # $ tracked
return y # $ tracked
also_x = foo() # $ tracked
print(also_x) # $ tracked
# ------------------------------------------------------------------------------
# Function decorator

View File

@@ -24,6 +24,11 @@ class TrackedTest extends InlineExpectationsTest {
tracked(t).flowsTo(e) and
// Module variables have no sensible location, and hence can't be annotated.
not e instanceof DataFlow::ModuleVariableNode and
// Global variables on line 0 also cannot be annotated
not e.getLocation().getStartLine() = 0 and
// We do not wish to annotate scope entry definitions,
// as they do not appear in the source code.
not e.asVar() instanceof ScopeEntryDefinition and
tag = "tracked" and
location = e.getLocation() and
value = t.getAttr() and

View File

@@ -68,8 +68,10 @@ if __name__ == "__main__":
check_tests_valid("coverage-py3.classes")
check_tests_valid("variable-capture.in")
check_tests_valid("variable-capture.nonlocal")
check_tests_valid("variable-capture.global")
check_tests_valid("variable-capture.dict")
check_tests_valid("variable-capture.collections")
check_tests_valid("variable-capture.test_collections")
check_tests_valid("variable-capture.by_value")
check_tests_valid("module-initialization.multiphase")
check_tests_valid("fieldflow.test")
check_tests_valid("fieldflow.test_dict")

View File

@@ -0,0 +1,52 @@
# Here we test capturing the _value_ of a variable (by using it as the default value for a parameter)
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
# This can be checked by running validTest.py.
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
def by_value1():
a = SOURCE
def inner(a_val=a):
SINK(a_val) #$ captured
SINK_F(a)
a = NONSOURCE
inner()
def by_value2():
a = NONSOURCE
def inner(a_val=a):
SINK(a) #$ MISSING:captured
SINK_F(a_val)
a = SOURCE
inner()
@expects(4)
def test_by_value():
by_value1()
by_value2()

View File

@@ -1,10 +1,11 @@
uniqueEnclosingCallable
uniqueCallEnclosingCallable
| collections.py:39:17:39:38 | Lambda() | Call should have one enclosing callable but has 0. |
| collections.py:39:17:39:38 | Lambda() | Call should have one enclosing callable but has 0. |
| collections.py:45:19:45:24 | mod() | Call should have one enclosing callable but has 0. |
| collections.py:45:19:45:24 | mod() | Call should have one enclosing callable but has 0. |
| collections.py:52:13:52:24 | mod_local() | Call should have one enclosing callable but has 0. |
| test_collections.py:39:17:39:38 | Lambda() | Call should have one enclosing callable but has 0. |
| test_collections.py:39:17:39:38 | Lambda() | Call should have one enclosing callable but has 0. |
| test_collections.py:45:19:45:24 | mod() | Call should have one enclosing callable but has 0. |
| test_collections.py:45:19:45:24 | mod() | Call should have one enclosing callable but has 0. |
| test_collections.py:52:13:52:24 | mod_local() | Call should have one enclosing callable but has 0. |
| test_collections.py:52:13:52:24 | mod_local() | Call should have one enclosing callable but has 0. |
uniqueType
uniqueNodeLocation
missingLocation

View File

@@ -77,16 +77,18 @@ def through(tainted):
captureOut2()
SINK(sinkO2["x"]) #$ MISSING:captured
nonSink0 = { "x": "" }
nonSink1 = { "x": "" }
def captureOut1NotCalled():
nonSink0["x"] = tainted
SINK_F(nonSink0["x"])
nonSink1["x"] = tainted
SINK_F(nonSink1["x"])
nonSink2 = { "x": "" }
def captureOut2NotCalled():
# notice that `m` is not called
def m():
nonSink0["x"] = tainted
nonSink2["x"] = tainted
captureOut2NotCalled()
SINK_F(nonSink0["x"])
SINK_F(nonSink2["x"])
@expects(4)
def test_through():

View File

@@ -0,0 +1,106 @@
# Here we test writing to a captured global variable via the `global` keyword (see `out`).
# We also test reading one captured variable and writing the value to another (see `through`).
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
# This can be checked by running validTest.py.
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from testlib import expects
# These are defined so that we can evaluate the test code.
NONSOURCE = "not a source"
SOURCE = "source"
def is_source(x):
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
def SINK(x):
if is_source(x):
print("OK")
else:
print("Unexpected flow", x)
def SINK_F(x):
if is_source(x):
print("Unexpected flow", x)
else:
print("OK")
sinkO1 = ""
sinkO2 = ""
nonSink1 = ""
nonSink2 = ""
def out():
def captureOut1():
global sinkO1
sinkO1 = SOURCE
captureOut1()
SINK(sinkO1) #$ captured
def captureOut2():
def m():
global sinkO2
sinkO2 = SOURCE
m()
captureOut2()
SINK(sinkO2) #$ captured
def captureOut1NotCalled():
global nonSink1
nonSink1 = SOURCE
SINK_F(nonSink1) #$ SPURIOUS: captured
def captureOut2NotCalled():
# notice that `m` is not called
def m():
global nonSink2
nonSink2 = SOURCE
captureOut2NotCalled()
SINK_F(nonSink2) #$ SPURIOUS: captured
@expects(4)
def test_out():
out()
sinkT1 = ""
sinkT2 = ""
nonSinkT1 = ""
nonSinkT2 = ""
def through(tainted):
def captureOut1():
global sinkT1
sinkT1 = tainted
captureOut1()
SINK(sinkT1) #$ MISSING:captured
def captureOut2():
def m():
global sinkT2
sinkT2 = tainted
m()
captureOut2()
SINK(sinkT2) #$ MISSING:captured
def captureOut1NotCalled():
global nonSinkT1
nonSinkT1 = tainted
SINK_F(nonSinkT1)
def captureOut2NotCalled():
# notice that `m` is not called
def m():
global nonSinkT2
nonSinkT2 = tainted
captureOut2NotCalled()
SINK_F(nonSinkT2)
@expects(4)
def test_through():
through(SOURCE)

View File

@@ -48,13 +48,14 @@ def inParam(tainted):
captureIn3("")
def captureIn1NotCalled():
nonSink0 = tainted
SINK_F(nonSink0)
nonSink1 = tainted
SINK_F(nonSink1)
def captureIn2NotCalled():
# notice that `m` is not called
def m():
nonSink0 = tainted
SINK_F(nonSink0)
nonSink1 = tainted
SINK_F(nonSink1)
captureIn2NotCalled()
@expects(3)
@@ -81,13 +82,14 @@ def inLocal():
captureIn3("")
def captureIn1NotCalled():
nonSink0 = tainted
SINK_F(nonSink0)
nonSink1 = tainted
SINK_F(nonSink1)
def captureIn2NotCalled():
# notice that `m` is not called
def m():
nonSink0 = tainted
SINK_F(nonSink0)
nonSink2 = tainted
SINK_F(nonSink2)
captureIn2NotCalled()
@expects(3)

View File

@@ -49,18 +49,20 @@ def out():
captureOut2()
SINK(sinkO2) #$ MISSING:captured
nonSink0 = ""
nonSink1 = ""
def captureOut1NotCalled():
nonlocal nonSink0
nonSink0 = SOURCE
SINK_F(nonSink0)
nonlocal nonSink1
nonSink1 = SOURCE
SINK_F(nonSink1)
nonSink2 = ""
def captureOut2NotCalled():
# notice that `m` is not called
def m():
nonlocal nonSink0
nonSink0 = SOURCE
nonlocal nonSink2
nonSink2 = SOURCE
captureOut2NotCalled()
SINK_F(nonSink0)
SINK_F(nonSink2)
@expects(4)
def test_out():
@@ -83,18 +85,20 @@ def through(tainted):
captureOut2()
SINK(sinkO2) #$ MISSING:captured
nonSink0 = ""
nonSink1 = ""
def captureOut1NotCalled():
nonlocal nonSink0
nonSink0 = tainted
SINK_F(nonSink0)
nonlocal nonSink1
nonSink1 = tainted
SINK_F(nonSink1)
nonSink2 = ""
def captureOut2NotCalled():
# notice that `m` is not called
def m():
nonlocal nonSink0
nonSink0 = tainted
nonlocal nonSink2
nonSink2 = tainted
captureOut2NotCalled()
SINK_F(nonSink0)
SINK_F(nonSink2)
@expects(4)
def test_through():

View File

@@ -52,7 +52,7 @@ def mod_list(l):
return [mod_local(x) for x in l]
l_modded = mod_list(l)
SINK(l_modded[0]) #$ MISSING: captured
SINK(l_modded[0]) #$ captured
def mod_list_first(l):
def mod_local(x):