Python: Add annotated call-graph tests

See the added README for in-depth details
This commit is contained in:
Rasmus Wriedt Larsen
2020-06-24 22:12:26 +02:00
parent 090a685d86
commit 155bbbdec9
21 changed files with 513 additions and 0 deletions

View File

@@ -0,0 +1 @@
../CallGraph/CallGraphTest.qll

View File

@@ -0,0 +1,18 @@
debug_missingAnnotationForCallable
| annotation_xfail.py:10:1:10:24 | callable_not_annotated() | This call is annotated with 'callable_not_annotated', but no callable with that annotation was extracted. Please fix. |
debug_nonUniqueAnnotationForCallable
| annotation_xfail.py:13:1:13:17 | Function non_unique | Multiple callables are annotated with 'non_unique'. Please fix. |
| annotation_xfail.py:17:1:17:26 | Function too_much_copy_paste | Multiple callables are annotated with 'non_unique'. Please fix. |
debug_missingAnnotationForCall
| annotation_xfail.py:2:1:2:24 | Function no_annotated_call | This callable is annotated with 'no_annotated_call', but no call with that annotation was extracted. Please fix. |
expectedCallEdgeNotFound
| call_edge_xfail.py:36:1:36:11 | xfail_foo() | call_edge_xfail.py:8:1:8:16 | Function xfail_bar |
| call_edge_xfail.py:39:1:39:11 | xfail_baz() | call_edge_xfail.py:8:1:8:16 | Function xfail_bar |
unexpectedCallEdgeFound
| call_edge_xfail.py:29:1:29:6 | func() | call_edge_xfail.py:4:1:4:16 | Function xfail_foo | Call resolved to the callable named 'xfail_foo' but was not annotated as such |
| call_edge_xfail.py:29:1:29:6 | func() | call_edge_xfail.py:8:1:8:16 | Function xfail_bar | Call resolved to the callable named 'xfail_bar' but was not annotated as such |
| call_edge_xfail.py:30:1:30:11 | xfail_foo() | call_edge_xfail.py:4:1:4:16 | Function xfail_foo | Call resolved to the callable named 'xfail_foo' but was not annotated as such |
| call_edge_xfail.py:31:1:31:14 | xfail_lambda() | call_edge_xfail.py:15:16:15:44 | Function lambda | Call resolved to the callable named 'xfail_lambda' but was not annotated as such |
| call_edge_xfail.py:36:1:36:11 | xfail_foo() | call_edge_xfail.py:4:1:4:16 | Function xfail_foo | Call resolved to the callable named 'xfail_foo' but was not annotated as such |
| call_edge_xfail.py:39:1:39:11 | xfail_baz() | call_edge_xfail.py:11:1:11:16 | Function xfail_baz | Annotated call resolved to unannotated callable |
| call_edge_xfail.py:43:1:43:6 | func() | call_edge_xfail.py:8:1:8:16 | Function xfail_bar | Call resolved to the callable named 'xfail_bar' but was not annotated as such |

View File

@@ -0,0 +1 @@
../CallGraph/PointsTo.ql

View File

@@ -0,0 +1 @@
Test that show our failure handling in [CallGraph](../CallGraph/) works as expected.

View File

@@ -0,0 +1,21 @@
# name:no_annotated_call
def no_annotated_call():
pass
def callable_not_annotated():
pass
no_annotated_call()
# calls:callable_not_annotated
callable_not_annotated()
# name:non_unique
def non_unique():
pass
# name:non_unique
def too_much_copy_paste():
pass
# calls:non_unique
non_unique()

View File

@@ -0,0 +1,43 @@
import sys
# name:xfail_foo
def xfail_foo():
print('xfail_foo')
# name:xfail_bar
def xfail_bar():
print('xfail_bar')
def xfail_baz():
print('xfail_baz')
# name:xfail_lambda
xfail_lambda = lambda: print('xfail_lambda')
if len(sys.argv) >= 2 and not sys.argv[1] in ['0', 'False', 'false']:
func = xfail_foo
else:
func = xfail_bar
# Correct usage to supres bad annotation errors
# calls:xfail_foo calls:xfail_bar
func()
# calls:xfail_lambda
xfail_lambda()
# These are not annotated, and will give rise to unexpectedCallEdgeFound
func()
xfail_foo()
xfail_lambda()
# These are annotated wrongly, and will give rise to unexpectedCallEdgeFound
# calls:xfail_bar
xfail_foo()
# calls:xfail_bar
xfail_baz()
# The annotation is incomplete (does not include the call to xfail_bar)
# calls:xfail_foo
func()

View File

@@ -0,0 +1,145 @@
import python
/** Gets the comment on the line above for a given `ast_node` */
Comment comment_for(AstNode ast_node) {
exists(int line | line = ast_node.getLocation().getStartLine() - 1 |
result
.getLocation()
.hasLocationInfo(ast_node.getLocation().getFile().getAbsolutePath(), line, _, line, _)
)
}
/** Gets the value from `tag:value` in the comment for `ast_node` */
string getAnnotation(AstNode ast_node, string tag) {
exists(Comment comment, string match, string the_regex |
the_regex = "([\\w]+):([\\w.]+)" and
comment = comment_for(ast_node) and
match = comment.getText().regexpFind(the_regex, _, _) and
tag = match.regexpCapture(the_regex, 1) and
result = match.regexpCapture(the_regex, 2)
)
}
/** Gets a callable annotated with `name:name` */
Function annotatedCallable(string name) { name = getAnnotation(result, "name") }
/** Gets a call annotated with `calls:name` */
Call annotatedCall(string name) { name = getAnnotation(result, "calls") }
predicate missingAnnotationForCallable(string name, Call call) {
call = annotatedCall(name) and
not exists(annotatedCallable(name))
}
predicate nonUniqueAnnotationForCallable(string name, Function callable) {
strictcount(annotatedCallable(name)) > 1 and
callable = annotatedCallable(name)
}
predicate missingAnnotationForCall(string name, Function callable) {
not exists(annotatedCall(name)) and
callable = annotatedCallable(name)
}
/** There is an obvious problem with the annotation `name` */
predicate name_in_error_state(string name) {
missingAnnotationForCallable(name, _)
or
nonUniqueAnnotationForCallable(name, _)
or
missingAnnotationForCall(name, _)
}
/** Source code has annotation with `name` showing that `call` will call `callable` */
predicate annotatedCallEdge(string name, Call call, Function callable) {
not name_in_error_state(name) and
call = annotatedCall(name) and
callable = annotatedCallable(name)
}
// ------------------------- Annotation debug query predicates -------------------------
query predicate debug_missingAnnotationForCallable(Call call, string message) {
exists(string name |
message =
"This call is annotated with '" + name +
"', but no callable with that annotation was extracted. Please fix." and
missingAnnotationForCallable(name, call)
)
}
query predicate debug_nonUniqueAnnotationForCallable(Function callable, string message) {
exists(string name |
message = "Multiple callables are annotated with '" + name + "'. Please fix." and
nonUniqueAnnotationForCallable(name, callable)
)
}
query predicate debug_missingAnnotationForCall(Function callable, string message) {
exists(string name |
message =
"This callable is annotated with '" + name +
"', but no call with that annotation was extracted. Please fix." and
missingAnnotationForCall(name, callable)
)
}
// ------------------------- Call Graph resolution -------------------------
private newtype TCallGraphResolver =
TPointsToResolver() or
TTypeTrackerResolver()
/** Describes a method of for call graph resolution */
abstract class CallGraphResolver extends TCallGraphResolver {
abstract predicate callEdge(Call call, Function callable);
/**
* Annotations show that `call` will call `callable`,
* but our call graph resolver was not able to figure that out
*/
predicate expectedCallEdgeNotFound(Call call, Function callable) {
annotatedCallEdge(_, call, callable) and
not this.callEdge(call, callable)
}
/**
* No annotations show that `call` will call `callable` (where at least one of these are annotated),
* but the call graph resolver claims that `call` will call `callable`
*/
predicate unexpectedCallEdgeFound(Call call, Function callable, string message) {
this.callEdge(call, callable) and
not annotatedCallEdge(_, call, callable) and
(
exists(string name |
message = "Call resolved to the callable named '" + name + "' but was not annotated as such" and
callable = annotatedCallable(name) and
not name_in_error_state(name)
)
or
exists(string name |
message = "Annotated call resolved to unannotated callable" and
call = annotatedCall(name) and
not name_in_error_state(name) and
not exists( | callable = annotatedCallable(_))
)
)
}
string toString() { result = "CallGraphResolver" }
}
class PointsToResolver extends CallGraphResolver, TPointsToResolver {
override predicate callEdge(Call call, Function callable) {
exists(PythonFunctionValue func_value |
func_value.getScope() = callable and
call = func_value.getACall().getNode()
)
}
override string toString() { result = "PointsToResolver" }
}
class TypeTrackerResolver extends CallGraphResolver, TTypeTrackerResolver {
override predicate callEdge(Call call, Function callable) { none() }
override string toString() { result = "TypeTrackerResolver" }
}

View File

@@ -0,0 +1,7 @@
debug_missingAnnotationForCallable
debug_nonUniqueAnnotationForCallable
debug_missingAnnotationForCall
expectedCallEdgeNotFound
| code/class_simple.py:23:5:23:9 | A() | code/class_simple.py:4:5:4:28 | Function __init__ |
| code/underscore_prefix_func_name.py:16:5:16:19 | some_function() | code/underscore_prefix_func_name.py:10:1:10:20 | Function some_function |
unexpectedCallEdgeFound

View File

@@ -0,0 +1,10 @@
import python
import CallGraphTest
query predicate expectedCallEdgeNotFound(Call call, Function callable) {
any(PointsToResolver r).expectedCallEdgeNotFound(call, callable)
}
query predicate unexpectedCallEdgeFound(Call call, Function callable, string message) {
any(PointsToResolver r).unexpectedCallEdgeFound(call, callable, message)
}

View File

@@ -0,0 +1,37 @@
# Call Graph Tests
A small testing framework for our call graph resolution. It relies on manual annotation of calls and callables, **and will only include output if something is wrong**. For example, if we are not able to resolve that the `foo()` call will call the `foo` function, that should give an alert.
```py
# name:foo
def foo():
pass
# calls:foo
foo()
```
This is greatly inspired by [`CallGraphs/AnnotatedTest`](https://github.com/github/codeql/blob/696d19cb1440b6f6a75c6a2c1319e18860ceb436/javascript/ql/test/library-tests/CallGraphs/AnnotatedTest/Test.ql) from JavaScript.
IMPORTANT: Names used in annotations are not scoped, so must be unique globally. (this is a bit annoying, but makes things simple).
Important files:
- `CallGraphTest.qll`: main code to find annotated calls/callables and setting everything up.
- `PointsTo.ql`: results when using points-to for call graph resolution.
- `TypeTracker.ql`: results when using TypeTracking for call graph resolution.
- `Relative.ql`: differences between using points-to and TypeTracking.
- `code/` contains the actual Python code we test against (included by `test.py`).
All queries will also execute some `debug_*` predicates, that highlights any obvious problems with the annotation setup, and there should never be any results comitted. To show that this works as expected, see the [CallGraph-xfail](../CallGraph-xfail/) which uses symlinked versions of the files in this directory (can't include as subdir, so has to be a sibling).
## `options` file
If the value for `--max-import-depth` is set so `import random` will extract `random.py` from the standard library, BUT NO transitive imports are extracted, then points-to analysis will fail to handle the following snippet.
```py
import random
if random.random() < 0.5:
...
else:
...
```

View File

@@ -0,0 +1,20 @@
debug_missingAnnotationForCallable
debug_nonUniqueAnnotationForCallable
debug_missingAnnotationForCall
pointsTo_found_typeTracker_notFound
| code/class_simple.py:26:1:26:15 | Attribute() | code/class_simple.py:9:5:9:26 | Function some_method |
| code/class_simple.py:28:1:28:21 | Attribute() | code/class_simple.py:14:5:14:28 | Function some_staticmethod |
| code/class_simple.py:30:1:30:20 | Attribute() | code/class_simple.py:19:5:19:30 | Function some_classmethod |
| code/class_simple.py:33:1:33:21 | Attribute() | code/class_simple.py:14:5:14:28 | Function some_staticmethod |
| code/class_simple.py:35:1:35:20 | Attribute() | code/class_simple.py:19:5:19:30 | Function some_classmethod |
| code/runtime_decision.py:21:1:21:6 | func() | code/runtime_decision.py:8:1:8:13 | Function rd_foo |
| code/runtime_decision.py:21:1:21:6 | func() | code/runtime_decision.py:12:1:12:13 | Function rd_bar |
| code/runtime_decision.py:30:1:30:7 | func2() | code/runtime_decision.py:8:1:8:13 | Function rd_foo |
| code/runtime_decision.py:30:1:30:7 | func2() | code/runtime_decision.py:12:1:12:13 | Function rd_bar |
| code/simple.py:19:1:19:5 | foo() | code/simple.py:2:1:2:10 | Function foo |
| code/simple.py:21:1:21:14 | indirect_foo() | code/simple.py:2:1:2:10 | Function foo |
| code/simple.py:23:1:23:5 | bar() | code/simple.py:10:1:10:10 | Function bar |
| code/simple.py:25:1:25:5 | lam() | code/simple.py:15:7:15:36 | Function lambda |
| code/underscore_prefix_func_name.py:21:5:21:19 | some_function() | code/underscore_prefix_func_name.py:10:1:10:20 | Function some_function |
| code/underscore_prefix_func_name.py:25:5:25:19 | some_function() | code/underscore_prefix_func_name.py:10:1:10:20 | Function some_function |
pointsTo_notFound_typeTracker_found

View File

@@ -0,0 +1,15 @@
import python
import CallGraphTest
query predicate pointsTo_found_typeTracker_notFound(Call call, Function callable) {
annotatedCallEdge(_, call, callable) and
any(PointsToResolver r).callEdge(call, callable) and
not any(TypeTrackerResolver r).callEdge(call, callable)
}
query predicate pointsTo_notFound_typeTracker_found(Call call, Function callable) {
annotatedCallEdge(_, call, callable) and
not any(PointsToResolver r).callEdge(call, callable) and
any(TypeTrackerResolver r).callEdge(call, callable)
}

View File

@@ -0,0 +1,22 @@
debug_missingAnnotationForCallable
debug_nonUniqueAnnotationForCallable
debug_missingAnnotationForCall
expectedCallEdgeNotFound
| code/class_simple.py:23:5:23:9 | A() | code/class_simple.py:4:5:4:28 | Function __init__ |
| code/class_simple.py:26:1:26:15 | Attribute() | code/class_simple.py:9:5:9:26 | Function some_method |
| code/class_simple.py:28:1:28:21 | Attribute() | code/class_simple.py:14:5:14:28 | Function some_staticmethod |
| code/class_simple.py:30:1:30:20 | Attribute() | code/class_simple.py:19:5:19:30 | Function some_classmethod |
| code/class_simple.py:33:1:33:21 | Attribute() | code/class_simple.py:14:5:14:28 | Function some_staticmethod |
| code/class_simple.py:35:1:35:20 | Attribute() | code/class_simple.py:19:5:19:30 | Function some_classmethod |
| code/runtime_decision.py:21:1:21:6 | func() | code/runtime_decision.py:8:1:8:13 | Function rd_foo |
| code/runtime_decision.py:21:1:21:6 | func() | code/runtime_decision.py:12:1:12:13 | Function rd_bar |
| code/runtime_decision.py:30:1:30:7 | func2() | code/runtime_decision.py:8:1:8:13 | Function rd_foo |
| code/runtime_decision.py:30:1:30:7 | func2() | code/runtime_decision.py:12:1:12:13 | Function rd_bar |
| code/simple.py:19:1:19:5 | foo() | code/simple.py:2:1:2:10 | Function foo |
| code/simple.py:21:1:21:14 | indirect_foo() | code/simple.py:2:1:2:10 | Function foo |
| code/simple.py:23:1:23:5 | bar() | code/simple.py:10:1:10:10 | Function bar |
| code/simple.py:25:1:25:5 | lam() | code/simple.py:15:7:15:36 | Function lambda |
| code/underscore_prefix_func_name.py:16:5:16:19 | some_function() | code/underscore_prefix_func_name.py:10:1:10:20 | Function some_function |
| code/underscore_prefix_func_name.py:21:5:21:19 | some_function() | code/underscore_prefix_func_name.py:10:1:10:20 | Function some_function |
| code/underscore_prefix_func_name.py:25:5:25:19 | some_function() | code/underscore_prefix_func_name.py:10:1:10:20 | Function some_function |
unexpectedCallEdgeFound

View File

@@ -0,0 +1,10 @@
import python
import CallGraphTest
query predicate expectedCallEdgeNotFound(Call call, Function callable) {
any(TypeTrackerResolver r).expectedCallEdgeNotFound(call, callable)
}
query predicate unexpectedCallEdgeFound(Call call, Function callable, string message) {
any(TypeTrackerResolver r).unexpectedCallEdgeFound(call, callable, message)
}

View File

@@ -0,0 +1,40 @@
class B(object):
def __init__(self, arg):
print('B.__init__', arg)
self._arg = arg
def __str__(self):
print('B.__str__')
return 'B (arg={})'.format(self.arg)
def __add__(self, other):
print('B.__add__')
if isinstance(other, B):
return B(self.arg + other.arg)
return B(self.arg + other)
@property
def arg(self):
print('B.arg getter')
return self._arg
@arg.setter
def arg(self, value):
print('B.arg setter')
self._arg = value
b1 = B(1)
b2 = B(2)
b3 = b1 + b2
print('value printing:', str(b1))
print('value printing:', str(b2))
print('value printing:', str(b3))
b3.arg = 42
b4 = b3 + 100
# this calls `str(b4)` inside
print('value printing:', b4)

View File

@@ -0,0 +1,35 @@
class A(object):
# name:A.__init__
def __init__(self, arg):
print('A.__init__', arg)
self.arg = arg
# name:A.some_method
def some_method(self):
print('A.some_method', self)
@staticmethod
# name:A.some_staticmethod
def some_staticmethod():
print('A.some_staticmethod')
@classmethod
# name:A.some_classmethod
def some_classmethod(cls):
print('A.some_classmethod', cls)
# calls:A.__init__
a = A(42)
# calls:A.some_method
a.some_method()
# calls:A.some_staticmethod
a.some_staticmethod()
# calls:A.some_classmethod
a.some_classmethod()
# calls:A.some_staticmethod
A.some_staticmethod()
# calls:A.some_classmethod
A.some_classmethod()

View File

@@ -0,0 +1,30 @@
import sys
import random
# hmm, annoying that you have to keep names unique accross files :|
# since I like to use foo and bar ALL the time :D
# name:rd_foo
def rd_foo():
print('rd_foo')
# name:rd_bar
def rd_bar():
print('rd_bar')
if len(sys.argv) >= 2 and not sys.argv[1] in ['0', 'False', 'false']:
func = rd_foo
else:
func = rd_bar
# calls:rd_foo calls:rd_bar
func()
# Random doesn't work with points-to :O
if random.random() < 0.5:
func2 = rd_foo
else:
func2 = rd_bar
# calls:rd_foo calls:rd_bar
func2()

View File

@@ -0,0 +1,27 @@
# name:foo
def foo():
print("foo called")
indirect_foo = foo
# name:bar
def bar():
print("bar called")
# name:lam
lam = lambda: print("lambda called")
# calls:foo
foo()
# calls:foo
indirect_foo()
# calls:bar
bar()
# calls:lam
lam()
# python -m trace --trackcalls simple.py

View File

@@ -0,0 +1,28 @@
# Points-to information seems to be missing if our analysis thinks the enclosing function
# is never called. However, as illustrated by the code below, it's easy to fool our
# analysis :(
# This was inspired by a problem in real code, where our analysis doesn't have any
# points-to information about the `open` call in
# https://google-gruyere.appspot.com/code/gruyere.py on line 227
# name:some_function
def some_function():
print('some_function')
def _ignored():
print('_ignored')
# calls:some_function
some_function()
def _works_since_called():
print('_works_since_called')
# calls:some_function
some_function()
def works_even_though_not_called():
# calls:some_function
some_function()
globals()['_ignored']()
_works_since_called()

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1000

View File

@@ -0,0 +1 @@
from code import *