Python: Add worked example of taint step modeling of external libs

This can't be seen on the example, but I went through quite a lot of iterations
before arriving at this fairly simple solution.
This commit is contained in:
Rasmus Wriedt Larsen
2020-09-22 14:41:07 +02:00
parent 00ea0cebc3
commit a82fa04d8a
7 changed files with 325 additions and 0 deletions

View File

@@ -0,0 +1,35 @@
edges
| test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() |
| test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val |
| test.py:40:11:40:25 | ControlFlowNode for Attribute() | test.py:41:10:41:12 | ControlFlowNode for val |
| test.py:45:11:45:18 | ControlFlowNode for source() | test.py:40:11:40:25 | ControlFlowNode for Attribute() |
| test.py:53:11:53:25 | ControlFlowNode for Attribute() | test.py:54:10:54:12 | ControlFlowNode for val |
| test.py:70:11:70:18 | ControlFlowNode for source() | test.py:53:11:53:25 | ControlFlowNode for Attribute() |
| test.py:78:11:78:14 | ControlFlowNode for bm() | test.py:79:10:79:12 | ControlFlowNode for val |
| test.py:83:11:83:18 | ControlFlowNode for source() | test.py:78:11:78:14 | ControlFlowNode for bm() |
| test.py:90:11:90:14 | ControlFlowNode for bm() | test.py:91:10:91:12 | ControlFlowNode for val |
| test.py:107:11:107:18 | ControlFlowNode for source() | test.py:90:11:90:14 | ControlFlowNode for bm() |
nodes
| test.py:21:11:21:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:29:11:29:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:33:10:33:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:40:11:40:25 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:41:10:41:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:45:11:45:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:53:11:53:25 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:54:10:54:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:70:11:70:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:78:11:78:14 | ControlFlowNode for bm() | semmle.label | ControlFlowNode for bm() |
| test.py:79:10:79:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:83:11:83:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:90:11:90:14 | ControlFlowNode for bm() | semmle.label | ControlFlowNode for bm() |
| test.py:91:10:91:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:107:11:107:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
#select
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() | test flow (naive): test_simple |
| test.py:33:10:33:12 | ControlFlowNode for val | test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val | test flow (naive): test_alias |
| test.py:41:10:41:12 | ControlFlowNode for val | test.py:45:11:45:18 | ControlFlowNode for source() | test.py:41:10:41:12 | ControlFlowNode for val | test flow (naive): test_accross_functions |
| test.py:54:10:54:12 | ControlFlowNode for val | test.py:70:11:70:18 | ControlFlowNode for source() | test.py:54:10:54:12 | ControlFlowNode for val | test flow (naive): test_deeply_nested |
| test.py:79:10:79:12 | ControlFlowNode for val | test.py:83:11:83:18 | ControlFlowNode for source() | test.py:79:10:79:12 | ControlFlowNode for val | test flow (naive): test_pass_bound_method |
| test.py:91:10:91:12 | ControlFlowNode for val | test.py:107:11:107:18 | ControlFlowNode for source() | test.py:91:10:91:12 | ControlFlowNode for val | test flow (naive): test_deeply_nested_bound_method |

View File

@@ -0,0 +1,24 @@
/**
* @kind path-problem
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.TaintTracking
import DataFlow::PathGraph
import SharedCode
class MyClassGetValueAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// obj -> obj.get_value()
exists(DataFlow::Node bound_method |
bound_method = myClassGetValue(nodeFrom) and
nodeTo.asCfgNode().(CallNode).getFunction() = bound_method.asCfgNode()
)
}
}
from SharedConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"test flow (naive): " + source.getNode().asCfgNode().getScope().getName()

View File

@@ -0,0 +1,67 @@
edges
| test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() |
| test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val |
| test.py:39:15:39:17 | SSA variable arg | test.py:41:10:41:12 | ControlFlowNode for val |
| test.py:45:11:45:18 | ControlFlowNode for source() | test.py:46:15:46:17 | ControlFlowNode for src |
| test.py:46:15:46:17 | ControlFlowNode for src | test.py:39:15:39:17 | SSA variable arg |
| test.py:52:24:52:26 | SSA variable arg | test.py:54:10:54:12 | ControlFlowNode for val |
| test.py:57:33:57:35 | SSA variable arg | test.py:58:24:58:26 | ControlFlowNode for arg |
| test.py:58:24:58:26 | ControlFlowNode for arg | test.py:52:24:52:26 | SSA variable arg |
| test.py:61:33:61:35 | SSA variable arg | test.py:62:33:62:35 | ControlFlowNode for arg |
| test.py:62:33:62:35 | ControlFlowNode for arg | test.py:57:33:57:35 | SSA variable arg |
| test.py:65:33:65:35 | SSA variable arg | test.py:66:33:66:35 | ControlFlowNode for arg |
| test.py:66:33:66:35 | ControlFlowNode for arg | test.py:61:33:61:35 | SSA variable arg |
| test.py:70:11:70:18 | ControlFlowNode for source() | test.py:71:33:71:35 | ControlFlowNode for src |
| test.py:71:33:71:35 | ControlFlowNode for src | test.py:65:33:65:35 | SSA variable arg |
| test.py:77:23:77:24 | SSA variable bm | test.py:79:10:79:12 | ControlFlowNode for val |
| test.py:83:11:83:18 | ControlFlowNode for source() | test.py:84:23:84:35 | ControlFlowNode for Attribute |
| test.py:84:23:84:35 | ControlFlowNode for Attribute | test.py:77:23:77:24 | SSA variable bm |
| test.py:89:37:89:38 | SSA variable bm | test.py:91:10:91:12 | ControlFlowNode for val |
| test.py:94:46:94:47 | SSA variable bm | test.py:95:37:95:38 | ControlFlowNode for bm |
| test.py:95:37:95:38 | ControlFlowNode for bm | test.py:89:37:89:38 | SSA variable bm |
| test.py:98:46:98:47 | SSA variable bm | test.py:99:46:99:47 | ControlFlowNode for bm |
| test.py:99:46:99:47 | ControlFlowNode for bm | test.py:94:46:94:47 | SSA variable bm |
| test.py:102:46:102:47 | SSA variable bm | test.py:103:46:103:47 | ControlFlowNode for bm |
| test.py:103:46:103:47 | ControlFlowNode for bm | test.py:98:46:98:47 | SSA variable bm |
| test.py:107:11:107:18 | ControlFlowNode for source() | test.py:108:46:108:58 | ControlFlowNode for Attribute |
| test.py:108:46:108:58 | ControlFlowNode for Attribute | test.py:102:46:102:47 | SSA variable bm |
nodes
| test.py:21:11:21:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:29:11:29:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:33:10:33:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:39:15:39:17 | SSA variable arg | semmle.label | SSA variable arg |
| test.py:41:10:41:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:45:11:45:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:46:15:46:17 | ControlFlowNode for src | semmle.label | ControlFlowNode for src |
| test.py:52:24:52:26 | SSA variable arg | semmle.label | SSA variable arg |
| test.py:54:10:54:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:57:33:57:35 | SSA variable arg | semmle.label | SSA variable arg |
| test.py:58:24:58:26 | ControlFlowNode for arg | semmle.label | ControlFlowNode for arg |
| test.py:61:33:61:35 | SSA variable arg | semmle.label | SSA variable arg |
| test.py:62:33:62:35 | ControlFlowNode for arg | semmle.label | ControlFlowNode for arg |
| test.py:65:33:65:35 | SSA variable arg | semmle.label | SSA variable arg |
| test.py:66:33:66:35 | ControlFlowNode for arg | semmle.label | ControlFlowNode for arg |
| test.py:70:11:70:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:71:33:71:35 | ControlFlowNode for src | semmle.label | ControlFlowNode for src |
| test.py:77:23:77:24 | SSA variable bm | semmle.label | SSA variable bm |
| test.py:79:10:79:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:83:11:83:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:84:23:84:35 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:89:37:89:38 | SSA variable bm | semmle.label | SSA variable bm |
| test.py:91:10:91:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:94:46:94:47 | SSA variable bm | semmle.label | SSA variable bm |
| test.py:95:37:95:38 | ControlFlowNode for bm | semmle.label | ControlFlowNode for bm |
| test.py:98:46:98:47 | SSA variable bm | semmle.label | SSA variable bm |
| test.py:99:46:99:47 | ControlFlowNode for bm | semmle.label | ControlFlowNode for bm |
| test.py:102:46:102:47 | SSA variable bm | semmle.label | SSA variable bm |
| test.py:103:46:103:47 | ControlFlowNode for bm | semmle.label | ControlFlowNode for bm |
| test.py:107:11:107:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:108:46:108:58 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
#select
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() | test flow (proper): test_simple |
| test.py:33:10:33:12 | ControlFlowNode for val | test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val | test flow (proper): test_alias |
| test.py:41:10:41:12 | ControlFlowNode for val | test.py:45:11:45:18 | ControlFlowNode for source() | test.py:41:10:41:12 | ControlFlowNode for val | test flow (proper): test_accross_functions |
| test.py:54:10:54:12 | ControlFlowNode for val | test.py:70:11:70:18 | ControlFlowNode for source() | test.py:54:10:54:12 | ControlFlowNode for val | test flow (proper): test_deeply_nested |
| test.py:79:10:79:12 | ControlFlowNode for val | test.py:83:11:83:18 | ControlFlowNode for source() | test.py:79:10:79:12 | ControlFlowNode for val | test flow (proper): test_pass_bound_method |
| test.py:91:10:91:12 | ControlFlowNode for val | test.py:107:11:107:18 | ControlFlowNode for source() | test.py:91:10:91:12 | ControlFlowNode for val | test flow (proper): test_deeply_nested_bound_method |

View File

@@ -0,0 +1,26 @@
/**
* @kind path-problem
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.TaintTracking
import DataFlow::PathGraph
import SharedCode
class MyClassGetValueAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// obj -> obj.get_value
nodeTo.asCfgNode().(AttrNode).getObject("get_value") = nodeFrom.asCfgNode() and
nodeTo = myClassGetValue(_)
or
// get_value -> get_value()
nodeFrom = myClassGetValue(_) and
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
}
}
from SharedConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"test flow (proper): " + source.getNode().asCfgNode().getScope().getName()

View File

@@ -0,0 +1,29 @@
This test illustrates that you need to be very careful when adding additional taint-steps or dataflow steps using `TypeTracker`.
The basic setup is that we're modeling the behavior of a (fictitious) external library class `MyClass`, and (fictitious) source of such an instance (the `source` function).
```py3
class MyClass:
def __init__(self, value):
self.value = value
def get_value(self):
return self.value
```
We want to extend our analysis to `obj.get_value()` is also tainted if `obj` is a tainted instance of `MyClass`.
The actual type-tracking is done in `SharedCode.qll`, but it's the _way_ we use it that matters.
In `NaiveModel.ql` we add an additional taint step from an instance of `MyClass` to calls of the bound method `get_value` (that we have tracked). It provides us with the correct results, but the path explanations are not very useful, since we are now able to cross functions in _one step_.
In `ProperModel.ql` we split the additional taint step in two:
1. from tracked `obj` that is instance of `MyClass`, to `obj.get_value` **but only** exactly where the attribute is accessed (by an `AttrNode`). This is important, since if we allowed `<any tracked qualifier>.get_value` we would again be able to cross functions in one step.
2. from tracked `get_value` bound method to calls of it, **but only** exactly where the call is (by an `CallNode`). for same reason as above.
**Try running the queries in VS Code to see the difference**
### Possible improvements
Using `AttrNode` directly in the code here means there is no easy way to add `getattr` support too all such predicates. Not really sure how to handle this in a generalized way though :|

View File

@@ -0,0 +1,36 @@
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.TaintTracking
// Helpers modeling MyClass
/** A data-flow Node representing an instance of MyClass. */
abstract class MyClass extends DataFlow::Node { }
private DataFlow::Node myClassGetValue(MyClass qualifier, DataFlow::TypeTracker t) {
t.startInAttr("get_value") and
result = qualifier
or
exists(DataFlow::TypeTracker t2 | result = myClassGetValue(qualifier, t2).track(t2, t))
}
DataFlow::Node myClassGetValue(MyClass qualifier) {
result = myClassGetValue(qualifier, DataFlow::TypeTracker::end())
}
// Config
class SourceCall extends DataFlow::Node, MyClass {
SourceCall() { this.asCfgNode().(CallNode).getFunction().(NameNode).getId() = "source" }
}
class SharedConfig extends TaintTracking::Configuration {
SharedConfig() { this = "SharedConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof SourceCall }
override predicate isSink(DataFlow::Node sink) {
exists(CallNode call |
call.getFunction().(NameNode).getId() = "sink" and
call.getArg(0) = sink.asCfgNode()
)
}
}

View File

@@ -0,0 +1,108 @@
class MyClass:
def __init__(self, value):
self.value = value
def get_value(self):
return self.value
def source():
return MyClass("tainted")
def sink(obj):
print("sink", obj)
################################################################################
def test_simple():
src = source()
sink(src.get_value())
################################################################################
def test_alias():
src = source()
foo = src
bound_method = foo.get_value
val = bound_method()
sink(val)
################################################################################
def sink_func(arg):
val = arg.get_value()
sink(val)
def test_accross_functions():
src = source()
sink_func(src)
################################################################################
def deeply_nested_sink(arg):
val = arg.get_value()
sink(val)
def deeply_nested_passthrough_1(arg):
deeply_nested_sink(arg)
def deeply_nested_passthrough_2(arg):
deeply_nested_passthrough_1(arg)
def deeply_nested_passthrough_3(arg):
deeply_nested_passthrough_2(arg)
def test_deeply_nested():
src = source()
deeply_nested_passthrough_3(src)
################################################################################
def recv_bound_method(bm):
val = bm()
sink(val)
def test_pass_bound_method():
src = source()
recv_bound_method(src.get_value)
################################################################################
def deeply_nested_bound_method_sink(bm):
val = bm()
sink(val)
def deeply_nested_bound_method_passthrough_1(bm):
deeply_nested_bound_method_sink(bm)
def deeply_nested_bound_method_passthrough_2(bm):
deeply_nested_bound_method_passthrough_1(bm)
def deeply_nested_bound_method_passthrough_3(bm):
deeply_nested_bound_method_passthrough_2(bm)
def test_deeply_nested_bound_method():
src = source()
deeply_nested_bound_method_passthrough_3(src.get_value)