mirror of
https://github.com/github/codeql.git
synced 2025-12-21 03:06:31 +01:00
Merge pull request #12537 from yoff/python/captured-variables-for-typetracking
Python: Captured variables for type tracking and the API graph
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* Type tracking is now aware of reads of captured variables (variables defined in an outer scope). This leads to a richer API graph, and may lead to more results in some queries.
|
||||
@@ -987,7 +987,7 @@ module API {
|
||||
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
|
||||
Stages::TypeTracking::ref() and
|
||||
result = trackUseNode(src, DataFlow::TypeTracker::end()) and
|
||||
result instanceof DataFlow::ExprNode
|
||||
result instanceof DataFlow::LocalSourceNodeNotModuleVariableNode
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -51,6 +51,10 @@ class LocalSourceNode extends Node {
|
||||
// We explicitly include any read of a global variable, as some of these may have local flow going
|
||||
// into them.
|
||||
this = any(ModuleVariableNode mvn).getARead()
|
||||
or
|
||||
// We include all scope entry definitions, as these act as the local source within the scope they
|
||||
// enter.
|
||||
this.asVar() instanceof ScopeEntryDefinition
|
||||
}
|
||||
|
||||
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
|
||||
@@ -133,6 +137,21 @@ class LocalSourceNode extends Node {
|
||||
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A LocalSourceNode that is not a ModuleVariableNode
|
||||
* This class provides a positive formulation of that in its charpred.
|
||||
*
|
||||
* Aka FutureLocalSourceNode (see FutureWork below), but until the future is here...
|
||||
*/
|
||||
class LocalSourceNodeNotModuleVariableNode extends LocalSourceNode {
|
||||
cached
|
||||
LocalSourceNodeNotModuleVariableNode() {
|
||||
this instanceof ExprNode
|
||||
or
|
||||
this.asVar() instanceof ScopeEntryDefinition
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A node that can be used for type tracking or type back-tracking.
|
||||
*
|
||||
|
||||
@@ -43,7 +43,19 @@ predicate compatibleContents(TypeTrackerContent storeContent, TypeTrackerContent
|
||||
|
||||
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStepForTypetracking/2;
|
||||
|
||||
predicate jumpStep = DataFlowPrivate::jumpStepSharedWithTypeTracker/2;
|
||||
predicate jumpStep(Node nodeFrom, Node nodeTo) {
|
||||
DataFlowPrivate::jumpStepSharedWithTypeTracker(nodeFrom, nodeTo)
|
||||
or
|
||||
capturedJumpStep(nodeFrom, nodeTo)
|
||||
}
|
||||
|
||||
predicate capturedJumpStep(Node nodeFrom, Node nodeTo) {
|
||||
exists(SsaSourceVariable var, DefinitionNode def | var.hasDefiningNode(def) |
|
||||
nodeTo.asVar().(ScopeEntryDefinition).getSourceVariable() = var and
|
||||
nodeFrom.asCfgNode() = def.getValue() and
|
||||
var.getScope().getScope*() = nodeFrom.getScope()
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */
|
||||
predicate levelStepCall(Node nodeFrom, Node nodeTo) { none() }
|
||||
|
||||
@@ -726,15 +726,15 @@ def test_deep_callgraph():
|
||||
return f5(arg)
|
||||
|
||||
x = f6(SOURCE)
|
||||
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
|
||||
SINK(x) #$ flow="SOURCE, l:-1 -> x"
|
||||
x = f5(SOURCE)
|
||||
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
|
||||
SINK(x) #$ flow="SOURCE, l:-1 -> x"
|
||||
x = f4(SOURCE)
|
||||
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
|
||||
SINK(x) #$ flow="SOURCE, l:-1 -> x"
|
||||
x = f3(SOURCE)
|
||||
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
|
||||
SINK(x) #$ flow="SOURCE, l:-1 -> x"
|
||||
x = f2(SOURCE)
|
||||
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
|
||||
SINK(x) #$ flow="SOURCE, l:-1 -> x"
|
||||
x = f1(SOURCE)
|
||||
SINK(x) #$ flow="SOURCE, l:-1 -> x"
|
||||
|
||||
|
||||
@@ -6,5 +6,6 @@ module_attr_tracker
|
||||
| import_as_attr.py:1:28:1:35 | GSSA Variable attr_ref |
|
||||
| import_as_attr.py:3:1:3:1 | GSSA Variable x |
|
||||
| import_as_attr.py:3:5:3:12 | ControlFlowNode for attr_ref |
|
||||
| import_as_attr.py:5:1:5:10 | GSSA Variable attr_ref |
|
||||
| import_as_attr.py:6:5:6:5 | SSA variable y |
|
||||
| import_as_attr.py:6:9:6:16 | ControlFlowNode for attr_ref |
|
||||
|
||||
@@ -60,10 +60,10 @@ def test_import():
|
||||
def to_inner_scope():
|
||||
x = tracked # $tracked
|
||||
def foo():
|
||||
y = x # $ MISSING: tracked
|
||||
return y # $ MISSING: tracked
|
||||
also_x = foo() # $ MISSING: tracked
|
||||
print(also_x) # $ MISSING: tracked
|
||||
y = x # $ tracked
|
||||
return y # $ tracked
|
||||
also_x = foo() # $ tracked
|
||||
print(also_x) # $ tracked
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Function decorator
|
||||
|
||||
@@ -24,6 +24,11 @@ class TrackedTest extends InlineExpectationsTest {
|
||||
tracked(t).flowsTo(e) and
|
||||
// Module variables have no sensible location, and hence can't be annotated.
|
||||
not e instanceof DataFlow::ModuleVariableNode and
|
||||
// Global variables on line 0 also cannot be annotated
|
||||
not e.getLocation().getStartLine() = 0 and
|
||||
// We do not wish to annotate scope entry definitions,
|
||||
// as they do not appear in the source code.
|
||||
not e.asVar() instanceof ScopeEntryDefinition and
|
||||
tag = "tracked" and
|
||||
location = e.getLocation() and
|
||||
value = t.getAttr() and
|
||||
|
||||
@@ -68,8 +68,10 @@ if __name__ == "__main__":
|
||||
check_tests_valid("coverage-py3.classes")
|
||||
check_tests_valid("variable-capture.in")
|
||||
check_tests_valid("variable-capture.nonlocal")
|
||||
check_tests_valid("variable-capture.global")
|
||||
check_tests_valid("variable-capture.dict")
|
||||
check_tests_valid("variable-capture.collections")
|
||||
check_tests_valid("variable-capture.test_collections")
|
||||
check_tests_valid("variable-capture.by_value")
|
||||
check_tests_valid("module-initialization.multiphase")
|
||||
check_tests_valid("fieldflow.test")
|
||||
check_tests_valid("fieldflow.test_dict")
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
# Here we test capturing the _value_ of a variable (by using it as the default value for a parameter)
|
||||
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
|
||||
# This can be checked by running validTest.py.
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
|
||||
from testlib import expects
|
||||
|
||||
# These are defined so that we can evaluate the test code.
|
||||
NONSOURCE = "not a source"
|
||||
SOURCE = "source"
|
||||
|
||||
def is_source(x):
|
||||
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
|
||||
|
||||
|
||||
def SINK(x):
|
||||
if is_source(x):
|
||||
print("OK")
|
||||
else:
|
||||
print("Unexpected flow", x)
|
||||
|
||||
|
||||
def SINK_F(x):
|
||||
if is_source(x):
|
||||
print("Unexpected flow", x)
|
||||
else:
|
||||
print("OK")
|
||||
|
||||
|
||||
def by_value1():
|
||||
a = SOURCE
|
||||
def inner(a_val=a):
|
||||
SINK(a_val) #$ captured
|
||||
SINK_F(a)
|
||||
a = NONSOURCE
|
||||
inner()
|
||||
|
||||
def by_value2():
|
||||
a = NONSOURCE
|
||||
def inner(a_val=a):
|
||||
SINK(a) #$ MISSING:captured
|
||||
SINK_F(a_val)
|
||||
a = SOURCE
|
||||
inner()
|
||||
|
||||
@expects(4)
|
||||
def test_by_value():
|
||||
by_value1()
|
||||
by_value2()
|
||||
@@ -1,10 +1,11 @@
|
||||
uniqueEnclosingCallable
|
||||
uniqueCallEnclosingCallable
|
||||
| collections.py:39:17:39:38 | Lambda() | Call should have one enclosing callable but has 0. |
|
||||
| collections.py:39:17:39:38 | Lambda() | Call should have one enclosing callable but has 0. |
|
||||
| collections.py:45:19:45:24 | mod() | Call should have one enclosing callable but has 0. |
|
||||
| collections.py:45:19:45:24 | mod() | Call should have one enclosing callable but has 0. |
|
||||
| collections.py:52:13:52:24 | mod_local() | Call should have one enclosing callable but has 0. |
|
||||
| test_collections.py:39:17:39:38 | Lambda() | Call should have one enclosing callable but has 0. |
|
||||
| test_collections.py:39:17:39:38 | Lambda() | Call should have one enclosing callable but has 0. |
|
||||
| test_collections.py:45:19:45:24 | mod() | Call should have one enclosing callable but has 0. |
|
||||
| test_collections.py:45:19:45:24 | mod() | Call should have one enclosing callable but has 0. |
|
||||
| test_collections.py:52:13:52:24 | mod_local() | Call should have one enclosing callable but has 0. |
|
||||
| test_collections.py:52:13:52:24 | mod_local() | Call should have one enclosing callable but has 0. |
|
||||
uniqueType
|
||||
uniqueNodeLocation
|
||||
missingLocation
|
||||
|
||||
@@ -77,16 +77,18 @@ def through(tainted):
|
||||
captureOut2()
|
||||
SINK(sinkO2["x"]) #$ MISSING:captured
|
||||
|
||||
nonSink0 = { "x": "" }
|
||||
nonSink1 = { "x": "" }
|
||||
def captureOut1NotCalled():
|
||||
nonSink0["x"] = tainted
|
||||
SINK_F(nonSink0["x"])
|
||||
nonSink1["x"] = tainted
|
||||
SINK_F(nonSink1["x"])
|
||||
|
||||
nonSink2 = { "x": "" }
|
||||
def captureOut2NotCalled():
|
||||
# notice that `m` is not called
|
||||
def m():
|
||||
nonSink0["x"] = tainted
|
||||
nonSink2["x"] = tainted
|
||||
captureOut2NotCalled()
|
||||
SINK_F(nonSink0["x"])
|
||||
SINK_F(nonSink2["x"])
|
||||
|
||||
@expects(4)
|
||||
def test_through():
|
||||
|
||||
106
python/ql/test/experimental/dataflow/variable-capture/global.py
Normal file
106
python/ql/test/experimental/dataflow/variable-capture/global.py
Normal file
@@ -0,0 +1,106 @@
|
||||
# Here we test writing to a captured global variable via the `global` keyword (see `out`).
|
||||
# We also test reading one captured variable and writing the value to another (see `through`).
|
||||
|
||||
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
|
||||
# This can be checked by running validTest.py.
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
|
||||
from testlib import expects
|
||||
|
||||
# These are defined so that we can evaluate the test code.
|
||||
NONSOURCE = "not a source"
|
||||
SOURCE = "source"
|
||||
|
||||
def is_source(x):
|
||||
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
|
||||
|
||||
|
||||
def SINK(x):
|
||||
if is_source(x):
|
||||
print("OK")
|
||||
else:
|
||||
print("Unexpected flow", x)
|
||||
|
||||
|
||||
def SINK_F(x):
|
||||
if is_source(x):
|
||||
print("Unexpected flow", x)
|
||||
else:
|
||||
print("OK")
|
||||
|
||||
|
||||
sinkO1 = ""
|
||||
sinkO2 = ""
|
||||
nonSink1 = ""
|
||||
nonSink2 = ""
|
||||
|
||||
def out():
|
||||
def captureOut1():
|
||||
global sinkO1
|
||||
sinkO1 = SOURCE
|
||||
captureOut1()
|
||||
SINK(sinkO1) #$ captured
|
||||
|
||||
def captureOut2():
|
||||
def m():
|
||||
global sinkO2
|
||||
sinkO2 = SOURCE
|
||||
m()
|
||||
captureOut2()
|
||||
SINK(sinkO2) #$ captured
|
||||
|
||||
def captureOut1NotCalled():
|
||||
global nonSink1
|
||||
nonSink1 = SOURCE
|
||||
SINK_F(nonSink1) #$ SPURIOUS: captured
|
||||
|
||||
def captureOut2NotCalled():
|
||||
# notice that `m` is not called
|
||||
def m():
|
||||
global nonSink2
|
||||
nonSink2 = SOURCE
|
||||
captureOut2NotCalled()
|
||||
SINK_F(nonSink2) #$ SPURIOUS: captured
|
||||
|
||||
@expects(4)
|
||||
def test_out():
|
||||
out()
|
||||
|
||||
sinkT1 = ""
|
||||
sinkT2 = ""
|
||||
nonSinkT1 = ""
|
||||
nonSinkT2 = ""
|
||||
def through(tainted):
|
||||
def captureOut1():
|
||||
global sinkT1
|
||||
sinkT1 = tainted
|
||||
captureOut1()
|
||||
SINK(sinkT1) #$ MISSING:captured
|
||||
|
||||
def captureOut2():
|
||||
def m():
|
||||
global sinkT2
|
||||
sinkT2 = tainted
|
||||
m()
|
||||
captureOut2()
|
||||
SINK(sinkT2) #$ MISSING:captured
|
||||
|
||||
def captureOut1NotCalled():
|
||||
global nonSinkT1
|
||||
nonSinkT1 = tainted
|
||||
SINK_F(nonSinkT1)
|
||||
|
||||
def captureOut2NotCalled():
|
||||
# notice that `m` is not called
|
||||
def m():
|
||||
global nonSinkT2
|
||||
nonSinkT2 = tainted
|
||||
captureOut2NotCalled()
|
||||
SINK_F(nonSinkT2)
|
||||
|
||||
@expects(4)
|
||||
def test_through():
|
||||
through(SOURCE)
|
||||
@@ -48,13 +48,14 @@ def inParam(tainted):
|
||||
captureIn3("")
|
||||
|
||||
def captureIn1NotCalled():
|
||||
nonSink0 = tainted
|
||||
SINK_F(nonSink0)
|
||||
nonSink1 = tainted
|
||||
SINK_F(nonSink1)
|
||||
|
||||
def captureIn2NotCalled():
|
||||
# notice that `m` is not called
|
||||
def m():
|
||||
nonSink0 = tainted
|
||||
SINK_F(nonSink0)
|
||||
nonSink1 = tainted
|
||||
SINK_F(nonSink1)
|
||||
captureIn2NotCalled()
|
||||
|
||||
@expects(3)
|
||||
@@ -81,13 +82,14 @@ def inLocal():
|
||||
captureIn3("")
|
||||
|
||||
def captureIn1NotCalled():
|
||||
nonSink0 = tainted
|
||||
SINK_F(nonSink0)
|
||||
nonSink1 = tainted
|
||||
SINK_F(nonSink1)
|
||||
|
||||
def captureIn2NotCalled():
|
||||
# notice that `m` is not called
|
||||
def m():
|
||||
nonSink0 = tainted
|
||||
SINK_F(nonSink0)
|
||||
nonSink2 = tainted
|
||||
SINK_F(nonSink2)
|
||||
captureIn2NotCalled()
|
||||
|
||||
@expects(3)
|
||||
|
||||
@@ -49,18 +49,20 @@ def out():
|
||||
captureOut2()
|
||||
SINK(sinkO2) #$ MISSING:captured
|
||||
|
||||
nonSink0 = ""
|
||||
nonSink1 = ""
|
||||
def captureOut1NotCalled():
|
||||
nonlocal nonSink0
|
||||
nonSink0 = SOURCE
|
||||
SINK_F(nonSink0)
|
||||
nonlocal nonSink1
|
||||
nonSink1 = SOURCE
|
||||
SINK_F(nonSink1)
|
||||
|
||||
nonSink2 = ""
|
||||
def captureOut2NotCalled():
|
||||
# notice that `m` is not called
|
||||
def m():
|
||||
nonlocal nonSink0
|
||||
nonSink0 = SOURCE
|
||||
nonlocal nonSink2
|
||||
nonSink2 = SOURCE
|
||||
captureOut2NotCalled()
|
||||
SINK_F(nonSink0)
|
||||
SINK_F(nonSink2)
|
||||
|
||||
@expects(4)
|
||||
def test_out():
|
||||
@@ -83,18 +85,20 @@ def through(tainted):
|
||||
captureOut2()
|
||||
SINK(sinkO2) #$ MISSING:captured
|
||||
|
||||
nonSink0 = ""
|
||||
nonSink1 = ""
|
||||
def captureOut1NotCalled():
|
||||
nonlocal nonSink0
|
||||
nonSink0 = tainted
|
||||
SINK_F(nonSink0)
|
||||
nonlocal nonSink1
|
||||
nonSink1 = tainted
|
||||
SINK_F(nonSink1)
|
||||
|
||||
nonSink2 = ""
|
||||
def captureOut2NotCalled():
|
||||
# notice that `m` is not called
|
||||
def m():
|
||||
nonlocal nonSink0
|
||||
nonSink0 = tainted
|
||||
nonlocal nonSink2
|
||||
nonSink2 = tainted
|
||||
captureOut2NotCalled()
|
||||
SINK_F(nonSink0)
|
||||
SINK_F(nonSink2)
|
||||
|
||||
@expects(4)
|
||||
def test_through():
|
||||
|
||||
@@ -52,7 +52,7 @@ def mod_list(l):
|
||||
return [mod_local(x) for x in l]
|
||||
|
||||
l_modded = mod_list(l)
|
||||
SINK(l_modded[0]) #$ MISSING: captured
|
||||
SINK(l_modded[0]) #$ captured
|
||||
|
||||
def mod_list_first(l):
|
||||
def mod_local(x):
|
||||
@@ -3,6 +3,7 @@ uniqueCallEnclosingCallable
|
||||
| test_captured.py:7:22:7:25 | p() | Call should have one enclosing callable but has 0. |
|
||||
| test_captured.py:7:22:7:25 | p() | Call should have one enclosing callable but has 0. |
|
||||
| test_captured.py:14:26:14:30 | pp() | Call should have one enclosing callable but has 0. |
|
||||
| test_captured.py:14:26:14:30 | pp() | Call should have one enclosing callable but has 0. |
|
||||
uniqueType
|
||||
uniqueNodeLocation
|
||||
missingLocation
|
||||
|
||||
@@ -89,7 +89,7 @@ def use_of_builtins():
|
||||
def imported_builtins():
|
||||
import builtins #$ use=moduleImport("builtins")
|
||||
def open(f):
|
||||
return builtins.open(f) #$ MISSING: use=moduleImport("builtins").getMember("open").getReturn()
|
||||
return builtins.open(f) #$ use=moduleImport("builtins").getMember("open").getReturn()
|
||||
|
||||
def redefine_print():
|
||||
def my_print(x):
|
||||
|
||||
@@ -11,4 +11,4 @@ def pp_list(l):
|
||||
return escape(x) #$ use=moduleImport("html").getMember("escape").getReturn()
|
||||
|
||||
def pp_list_inner(l):
|
||||
return ", ".join(pp(x) for x in l) #$ MISSING: use=moduleImport("html").getMember("escape").getReturn()
|
||||
return ", ".join(pp(x) for x in l) #$ use=moduleImport("html").getMember("escape").getReturn()
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
from flask import Flask
|
||||
from flask_sqlalchemy import SQLAlchemy
|
||||
from flask_user import UserMixin
|
||||
|
||||
def create_app():
|
||||
app = Flask(__name__)
|
||||
db = SQLAlchemy(app) #$ use=moduleImport("flask_sqlalchemy").getMember("SQLAlchemy").getReturn()
|
||||
|
||||
class Users(db.Model, UserMixin): #$ use=moduleImport("flask_sqlalchemy").getMember("SQLAlchemy").getReturn().getMember("Model").getASubclass()
|
||||
__tablename__ = 'users'
|
||||
|
||||
@app.route('/v2/user/<int:id>', methods=['GET','PUT'])
|
||||
def users(id):
|
||||
Users.query.filter_by(id=id).first() #$ use=moduleImport("flask_sqlalchemy").getMember("SQLAlchemy").getReturn().getMember("Model").getASubclass().getMember("query").getMember("filter_by")
|
||||
@@ -0,0 +1,26 @@
|
||||
from foo import A, B
|
||||
|
||||
def func():
|
||||
if cond():
|
||||
class Foo(A): pass
|
||||
else:
|
||||
class Foo(B): pass
|
||||
|
||||
class Bar(A): pass
|
||||
class Bar(B): pass
|
||||
|
||||
class Baz(A): pass
|
||||
|
||||
def other_func():
|
||||
print(Foo) #$ use=moduleImport("foo").getMember("A").getASubclass() use=moduleImport("foo").getMember("B").getASubclass()
|
||||
# On the next line, we wish to express that it is not possible for `Bar` to be a subclass of `A`.
|
||||
# However, we have no "true negative" annotation, so we use the MISSING annotation instead.
|
||||
# (Normally, "true negative" is not needed as all applicable annotations must be present,
|
||||
# but these API graph tests work differently, since having all results recorded in annotations
|
||||
# would be excessive)
|
||||
print(Bar) #$ use=moduleImport("foo").getMember("B").getASubclass() MISSING: use=moduleImport("foo").getMember("A").getASubclass()
|
||||
print(Baz) #$ use=moduleImport("foo").getMember("B").getASubclass() SPURIOUS: use=moduleImport("foo").getMember("A").getASubclass()
|
||||
|
||||
class Baz(B): pass
|
||||
|
||||
other_func()
|
||||
@@ -32,5 +32,5 @@ def func1():
|
||||
def func3():
|
||||
var2 = print #$ use=moduleImport("builtins").getMember("print")
|
||||
def func4():
|
||||
var2() #$ MISSING: use=moduleImport("builtins").getMember("print").getReturn()
|
||||
var2() #$ use=moduleImport("builtins").getMember("print").getReturn()
|
||||
func4()
|
||||
|
||||
Reference in New Issue
Block a user