mirror of
https://github.com/github/codeql.git
synced 2026-04-28 02:05:14 +02:00
Python: Add worked example of taint step modeling of external libs
This can't be seen on the example, but I went through quite a lot of iterations before arriving at this fairly simple solution.
This commit is contained in:
@@ -0,0 +1,35 @@
|
||||
edges
|
||||
| test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() |
|
||||
| test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val |
|
||||
| test.py:40:11:40:25 | ControlFlowNode for Attribute() | test.py:41:10:41:12 | ControlFlowNode for val |
|
||||
| test.py:45:11:45:18 | ControlFlowNode for source() | test.py:40:11:40:25 | ControlFlowNode for Attribute() |
|
||||
| test.py:53:11:53:25 | ControlFlowNode for Attribute() | test.py:54:10:54:12 | ControlFlowNode for val |
|
||||
| test.py:70:11:70:18 | ControlFlowNode for source() | test.py:53:11:53:25 | ControlFlowNode for Attribute() |
|
||||
| test.py:78:11:78:14 | ControlFlowNode for bm() | test.py:79:10:79:12 | ControlFlowNode for val |
|
||||
| test.py:83:11:83:18 | ControlFlowNode for source() | test.py:78:11:78:14 | ControlFlowNode for bm() |
|
||||
| test.py:90:11:90:14 | ControlFlowNode for bm() | test.py:91:10:91:12 | ControlFlowNode for val |
|
||||
| test.py:107:11:107:18 | ControlFlowNode for source() | test.py:90:11:90:14 | ControlFlowNode for bm() |
|
||||
nodes
|
||||
| test.py:21:11:21:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
|
||||
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
|
||||
| test.py:29:11:29:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
|
||||
| test.py:33:10:33:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
|
||||
| test.py:40:11:40:25 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
|
||||
| test.py:41:10:41:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
|
||||
| test.py:45:11:45:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
|
||||
| test.py:53:11:53:25 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
|
||||
| test.py:54:10:54:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
|
||||
| test.py:70:11:70:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
|
||||
| test.py:78:11:78:14 | ControlFlowNode for bm() | semmle.label | ControlFlowNode for bm() |
|
||||
| test.py:79:10:79:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
|
||||
| test.py:83:11:83:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
|
||||
| test.py:90:11:90:14 | ControlFlowNode for bm() | semmle.label | ControlFlowNode for bm() |
|
||||
| test.py:91:10:91:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
|
||||
| test.py:107:11:107:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
|
||||
#select
|
||||
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() | test flow (naive): test_simple |
|
||||
| test.py:33:10:33:12 | ControlFlowNode for val | test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val | test flow (naive): test_alias |
|
||||
| test.py:41:10:41:12 | ControlFlowNode for val | test.py:45:11:45:18 | ControlFlowNode for source() | test.py:41:10:41:12 | ControlFlowNode for val | test flow (naive): test_accross_functions |
|
||||
| test.py:54:10:54:12 | ControlFlowNode for val | test.py:70:11:70:18 | ControlFlowNode for source() | test.py:54:10:54:12 | ControlFlowNode for val | test flow (naive): test_deeply_nested |
|
||||
| test.py:79:10:79:12 | ControlFlowNode for val | test.py:83:11:83:18 | ControlFlowNode for source() | test.py:79:10:79:12 | ControlFlowNode for val | test flow (naive): test_pass_bound_method |
|
||||
| test.py:91:10:91:12 | ControlFlowNode for val | test.py:107:11:107:18 | ControlFlowNode for source() | test.py:91:10:91:12 | ControlFlowNode for val | test flow (naive): test_deeply_nested_bound_method |
|
||||
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* @kind path-problem
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import experimental.dataflow.DataFlow
|
||||
private import experimental.dataflow.TaintTracking
|
||||
import DataFlow::PathGraph
|
||||
import SharedCode
|
||||
|
||||
class MyClassGetValueAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
// obj -> obj.get_value()
|
||||
exists(DataFlow::Node bound_method |
|
||||
bound_method = myClassGetValue(nodeFrom) and
|
||||
nodeTo.asCfgNode().(CallNode).getFunction() = bound_method.asCfgNode()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
from SharedConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink,
|
||||
"test flow (naive): " + source.getNode().asCfgNode().getScope().getName()
|
||||
@@ -0,0 +1,67 @@
|
||||
edges
|
||||
| test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() |
|
||||
| test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val |
|
||||
| test.py:39:15:39:17 | SSA variable arg | test.py:41:10:41:12 | ControlFlowNode for val |
|
||||
| test.py:45:11:45:18 | ControlFlowNode for source() | test.py:46:15:46:17 | ControlFlowNode for src |
|
||||
| test.py:46:15:46:17 | ControlFlowNode for src | test.py:39:15:39:17 | SSA variable arg |
|
||||
| test.py:52:24:52:26 | SSA variable arg | test.py:54:10:54:12 | ControlFlowNode for val |
|
||||
| test.py:57:33:57:35 | SSA variable arg | test.py:58:24:58:26 | ControlFlowNode for arg |
|
||||
| test.py:58:24:58:26 | ControlFlowNode for arg | test.py:52:24:52:26 | SSA variable arg |
|
||||
| test.py:61:33:61:35 | SSA variable arg | test.py:62:33:62:35 | ControlFlowNode for arg |
|
||||
| test.py:62:33:62:35 | ControlFlowNode for arg | test.py:57:33:57:35 | SSA variable arg |
|
||||
| test.py:65:33:65:35 | SSA variable arg | test.py:66:33:66:35 | ControlFlowNode for arg |
|
||||
| test.py:66:33:66:35 | ControlFlowNode for arg | test.py:61:33:61:35 | SSA variable arg |
|
||||
| test.py:70:11:70:18 | ControlFlowNode for source() | test.py:71:33:71:35 | ControlFlowNode for src |
|
||||
| test.py:71:33:71:35 | ControlFlowNode for src | test.py:65:33:65:35 | SSA variable arg |
|
||||
| test.py:77:23:77:24 | SSA variable bm | test.py:79:10:79:12 | ControlFlowNode for val |
|
||||
| test.py:83:11:83:18 | ControlFlowNode for source() | test.py:84:23:84:35 | ControlFlowNode for Attribute |
|
||||
| test.py:84:23:84:35 | ControlFlowNode for Attribute | test.py:77:23:77:24 | SSA variable bm |
|
||||
| test.py:89:37:89:38 | SSA variable bm | test.py:91:10:91:12 | ControlFlowNode for val |
|
||||
| test.py:94:46:94:47 | SSA variable bm | test.py:95:37:95:38 | ControlFlowNode for bm |
|
||||
| test.py:95:37:95:38 | ControlFlowNode for bm | test.py:89:37:89:38 | SSA variable bm |
|
||||
| test.py:98:46:98:47 | SSA variable bm | test.py:99:46:99:47 | ControlFlowNode for bm |
|
||||
| test.py:99:46:99:47 | ControlFlowNode for bm | test.py:94:46:94:47 | SSA variable bm |
|
||||
| test.py:102:46:102:47 | SSA variable bm | test.py:103:46:103:47 | ControlFlowNode for bm |
|
||||
| test.py:103:46:103:47 | ControlFlowNode for bm | test.py:98:46:98:47 | SSA variable bm |
|
||||
| test.py:107:11:107:18 | ControlFlowNode for source() | test.py:108:46:108:58 | ControlFlowNode for Attribute |
|
||||
| test.py:108:46:108:58 | ControlFlowNode for Attribute | test.py:102:46:102:47 | SSA variable bm |
|
||||
nodes
|
||||
| test.py:21:11:21:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
|
||||
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
|
||||
| test.py:29:11:29:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
|
||||
| test.py:33:10:33:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
|
||||
| test.py:39:15:39:17 | SSA variable arg | semmle.label | SSA variable arg |
|
||||
| test.py:41:10:41:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
|
||||
| test.py:45:11:45:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
|
||||
| test.py:46:15:46:17 | ControlFlowNode for src | semmle.label | ControlFlowNode for src |
|
||||
| test.py:52:24:52:26 | SSA variable arg | semmle.label | SSA variable arg |
|
||||
| test.py:54:10:54:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
|
||||
| test.py:57:33:57:35 | SSA variable arg | semmle.label | SSA variable arg |
|
||||
| test.py:58:24:58:26 | ControlFlowNode for arg | semmle.label | ControlFlowNode for arg |
|
||||
| test.py:61:33:61:35 | SSA variable arg | semmle.label | SSA variable arg |
|
||||
| test.py:62:33:62:35 | ControlFlowNode for arg | semmle.label | ControlFlowNode for arg |
|
||||
| test.py:65:33:65:35 | SSA variable arg | semmle.label | SSA variable arg |
|
||||
| test.py:66:33:66:35 | ControlFlowNode for arg | semmle.label | ControlFlowNode for arg |
|
||||
| test.py:70:11:70:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
|
||||
| test.py:71:33:71:35 | ControlFlowNode for src | semmle.label | ControlFlowNode for src |
|
||||
| test.py:77:23:77:24 | SSA variable bm | semmle.label | SSA variable bm |
|
||||
| test.py:79:10:79:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
|
||||
| test.py:83:11:83:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
|
||||
| test.py:84:23:84:35 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
|
||||
| test.py:89:37:89:38 | SSA variable bm | semmle.label | SSA variable bm |
|
||||
| test.py:91:10:91:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
|
||||
| test.py:94:46:94:47 | SSA variable bm | semmle.label | SSA variable bm |
|
||||
| test.py:95:37:95:38 | ControlFlowNode for bm | semmle.label | ControlFlowNode for bm |
|
||||
| test.py:98:46:98:47 | SSA variable bm | semmle.label | SSA variable bm |
|
||||
| test.py:99:46:99:47 | ControlFlowNode for bm | semmle.label | ControlFlowNode for bm |
|
||||
| test.py:102:46:102:47 | SSA variable bm | semmle.label | SSA variable bm |
|
||||
| test.py:103:46:103:47 | ControlFlowNode for bm | semmle.label | ControlFlowNode for bm |
|
||||
| test.py:107:11:107:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
|
||||
| test.py:108:46:108:58 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
|
||||
#select
|
||||
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() | test flow (proper): test_simple |
|
||||
| test.py:33:10:33:12 | ControlFlowNode for val | test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val | test flow (proper): test_alias |
|
||||
| test.py:41:10:41:12 | ControlFlowNode for val | test.py:45:11:45:18 | ControlFlowNode for source() | test.py:41:10:41:12 | ControlFlowNode for val | test flow (proper): test_accross_functions |
|
||||
| test.py:54:10:54:12 | ControlFlowNode for val | test.py:70:11:70:18 | ControlFlowNode for source() | test.py:54:10:54:12 | ControlFlowNode for val | test flow (proper): test_deeply_nested |
|
||||
| test.py:79:10:79:12 | ControlFlowNode for val | test.py:83:11:83:18 | ControlFlowNode for source() | test.py:79:10:79:12 | ControlFlowNode for val | test flow (proper): test_pass_bound_method |
|
||||
| test.py:91:10:91:12 | ControlFlowNode for val | test.py:107:11:107:18 | ControlFlowNode for source() | test.py:91:10:91:12 | ControlFlowNode for val | test flow (proper): test_deeply_nested_bound_method |
|
||||
@@ -0,0 +1,26 @@
|
||||
/**
|
||||
* @kind path-problem
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import experimental.dataflow.DataFlow
|
||||
private import experimental.dataflow.TaintTracking
|
||||
import DataFlow::PathGraph
|
||||
import SharedCode
|
||||
|
||||
class MyClassGetValueAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
// obj -> obj.get_value
|
||||
nodeTo.asCfgNode().(AttrNode).getObject("get_value") = nodeFrom.asCfgNode() and
|
||||
nodeTo = myClassGetValue(_)
|
||||
or
|
||||
// get_value -> get_value()
|
||||
nodeFrom = myClassGetValue(_) and
|
||||
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
|
||||
}
|
||||
}
|
||||
|
||||
from SharedConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink,
|
||||
"test flow (proper): " + source.getNode().asCfgNode().getScope().getName()
|
||||
@@ -0,0 +1,29 @@
|
||||
This test illustrates that you need to be very careful when adding additional taint-steps or dataflow steps using `TypeTracker`.
|
||||
|
||||
The basic setup is that we're modeling the behavior of a (fictitious) external library class `MyClass`, and (fictitious) source of such an instance (the `source` function).
|
||||
|
||||
```py3
|
||||
class MyClass:
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
|
||||
def get_value(self):
|
||||
return self.value
|
||||
```
|
||||
|
||||
We want to extend our analysis to `obj.get_value()` is also tainted if `obj` is a tainted instance of `MyClass`.
|
||||
|
||||
The actual type-tracking is done in `SharedCode.qll`, but it's the _way_ we use it that matters.
|
||||
|
||||
In `NaiveModel.ql` we add an additional taint step from an instance of `MyClass` to calls of the bound method `get_value` (that we have tracked). It provides us with the correct results, but the path explanations are not very useful, since we are now able to cross functions in _one step_.
|
||||
|
||||
In `ProperModel.ql` we split the additional taint step in two:
|
||||
|
||||
1. from tracked `obj` that is instance of `MyClass`, to `obj.get_value` **but only** exactly where the attribute is accessed (by an `AttrNode`). This is important, since if we allowed `<any tracked qualifier>.get_value` we would again be able to cross functions in one step.
|
||||
2. from tracked `get_value` bound method to calls of it, **but only** exactly where the call is (by an `CallNode`). for same reason as above.
|
||||
|
||||
**Try running the queries in VS Code to see the difference**
|
||||
|
||||
### Possible improvements
|
||||
|
||||
Using `AttrNode` directly in the code here means there is no easy way to add `getattr` support too all such predicates. Not really sure how to handle this in a generalized way though :|
|
||||
@@ -0,0 +1,36 @@
|
||||
private import python
|
||||
private import experimental.dataflow.DataFlow
|
||||
private import experimental.dataflow.TaintTracking
|
||||
|
||||
// Helpers modeling MyClass
|
||||
/** A data-flow Node representing an instance of MyClass. */
|
||||
abstract class MyClass extends DataFlow::Node { }
|
||||
|
||||
private DataFlow::Node myClassGetValue(MyClass qualifier, DataFlow::TypeTracker t) {
|
||||
t.startInAttr("get_value") and
|
||||
result = qualifier
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = myClassGetValue(qualifier, t2).track(t2, t))
|
||||
}
|
||||
|
||||
DataFlow::Node myClassGetValue(MyClass qualifier) {
|
||||
result = myClassGetValue(qualifier, DataFlow::TypeTracker::end())
|
||||
}
|
||||
|
||||
// Config
|
||||
class SourceCall extends DataFlow::Node, MyClass {
|
||||
SourceCall() { this.asCfgNode().(CallNode).getFunction().(NameNode).getId() = "source" }
|
||||
}
|
||||
|
||||
class SharedConfig extends TaintTracking::Configuration {
|
||||
SharedConfig() { this = "SharedConfig" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof SourceCall }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
exists(CallNode call |
|
||||
call.getFunction().(NameNode).getId() = "sink" and
|
||||
call.getArg(0) = sink.asCfgNode()
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,108 @@
|
||||
class MyClass:
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
|
||||
def get_value(self):
|
||||
return self.value
|
||||
|
||||
|
||||
def source():
|
||||
return MyClass("tainted")
|
||||
|
||||
|
||||
def sink(obj):
|
||||
print("sink", obj)
|
||||
|
||||
|
||||
################################################################################
|
||||
|
||||
|
||||
def test_simple():
|
||||
src = source()
|
||||
sink(src.get_value())
|
||||
|
||||
|
||||
################################################################################
|
||||
|
||||
|
||||
def test_alias():
|
||||
src = source()
|
||||
foo = src
|
||||
bound_method = foo.get_value
|
||||
val = bound_method()
|
||||
sink(val)
|
||||
|
||||
|
||||
################################################################################
|
||||
|
||||
|
||||
def sink_func(arg):
|
||||
val = arg.get_value()
|
||||
sink(val)
|
||||
|
||||
|
||||
def test_accross_functions():
|
||||
src = source()
|
||||
sink_func(src)
|
||||
|
||||
|
||||
################################################################################
|
||||
|
||||
|
||||
def deeply_nested_sink(arg):
|
||||
val = arg.get_value()
|
||||
sink(val)
|
||||
|
||||
|
||||
def deeply_nested_passthrough_1(arg):
|
||||
deeply_nested_sink(arg)
|
||||
|
||||
|
||||
def deeply_nested_passthrough_2(arg):
|
||||
deeply_nested_passthrough_1(arg)
|
||||
|
||||
|
||||
def deeply_nested_passthrough_3(arg):
|
||||
deeply_nested_passthrough_2(arg)
|
||||
|
||||
|
||||
def test_deeply_nested():
|
||||
src = source()
|
||||
deeply_nested_passthrough_3(src)
|
||||
|
||||
|
||||
################################################################################
|
||||
|
||||
|
||||
def recv_bound_method(bm):
|
||||
val = bm()
|
||||
sink(val)
|
||||
|
||||
|
||||
def test_pass_bound_method():
|
||||
src = source()
|
||||
recv_bound_method(src.get_value)
|
||||
|
||||
|
||||
################################################################################
|
||||
|
||||
def deeply_nested_bound_method_sink(bm):
|
||||
val = bm()
|
||||
sink(val)
|
||||
|
||||
|
||||
def deeply_nested_bound_method_passthrough_1(bm):
|
||||
deeply_nested_bound_method_sink(bm)
|
||||
|
||||
|
||||
def deeply_nested_bound_method_passthrough_2(bm):
|
||||
deeply_nested_bound_method_passthrough_1(bm)
|
||||
|
||||
|
||||
def deeply_nested_bound_method_passthrough_3(bm):
|
||||
deeply_nested_bound_method_passthrough_2(bm)
|
||||
|
||||
|
||||
def test_deeply_nested_bound_method():
|
||||
src = source()
|
||||
deeply_nested_bound_method_passthrough_3(src.get_value)
|
||||
Reference in New Issue
Block a user