Merge pull request #3961 from tausbn/python-add-typetracker

Python: Add type tracker and step summary implementation.
This commit is contained in:
Calum Grant
2020-09-02 09:42:14 +01:00
committed by GitHub
6 changed files with 455 additions and 0 deletions

View File

@@ -0,0 +1,282 @@
/** Step Summaries and Type Tracking */
import python
import internal.DataFlowPublic
import internal.DataFlowPrivate
/** Any string that may appear as the name of an attribute or access path. */
class AttributeName extends string {
AttributeName() { this = any(Attribute a).getName() }
}
/** Either an attribute name, or the empty string (representing no attribute). */
class OptionalAttributeName extends string {
OptionalAttributeName() { this instanceof AttributeName or this = "" }
}
/**
* A description of a step on an inter-procedural data flow path.
*/
private newtype TStepSummary =
LevelStep() or
CallStep() or
ReturnStep() or
StoreStep(AttributeName attr) or
LoadStep(AttributeName attr)
/**
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
*
* A description of a step on an inter-procedural data flow path.
*/
class StepSummary extends TStepSummary {
/** Gets a textual representation of this step summary. */
string toString() {
this instanceof LevelStep and result = "level"
or
this instanceof CallStep and result = "call"
or
this instanceof ReturnStep and result = "return"
or
exists(string attr | this = StoreStep(attr) | result = "store " + attr)
or
exists(string attr | this = LoadStep(attr) | result = "load " + attr)
}
}
module StepSummary {
cached
predicate step(Node nodeFrom, Node nodeTo, StepSummary summary) {
exists(Node mid | EssaFlow::essaFlowStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
}
predicate smallstep(Node nodeFrom, Node nodeTo, StepSummary summary) {
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
summary = LevelStep()
or
callStep(nodeFrom, nodeTo) and summary = CallStep()
or
returnStep(nodeFrom, nodeTo) and
summary = ReturnStep()
or
exists(string attr |
basicStoreStep(nodeFrom, nodeTo, attr) and
summary = StoreStep(attr)
or
basicLoadStep(nodeFrom, nodeTo, attr) and summary = LoadStep(attr)
)
}
}
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
predicate callStep(ArgumentNode nodeFrom, ParameterNode nodeTo) {
// TODO: Support special methods?
exists(DataFlowCall call, int i |
nodeFrom.argumentOf(call, i) and nodeTo.isParameterOf(call.getCallable(), i)
)
}
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
exists(DataFlowCall call |
nodeFrom.getEnclosingCallable() = call.getCallable() and nodeTo.asCfgNode() = call.getNode()
)
}
/**
* Holds if `nodeFrom` is being written to the `attr` attribute of the object in `nodeTo`.
*
* Note that the choice of `nodeTo` does not have to make sense "chronologically".
* All we care about is whether the `attr` attribute of `nodeTo` can have a specific type,
* and the assumption is that if a specific type appears here, then any access of that
* particular attribute can yield something of that particular type.
*
* Thus, in an example such as
*
* ```python
* def foo(y):
* x = Foo()
* bar(x)
* x.attr = y
* baz(x)
*
* def bar(x):
* z = x.attr
* ```
* for the attribute write `x.attr = y`, we will have `attr` being the literal string `"attr"`,
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
*/
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
exists(AttributeAssignment a, Node var |
a.getName() = attr and
EssaFlow::essaFlowStep*(nodeTo, var) and
var.asVar() = a.getInput() and
nodeFrom.asCfgNode() = a.getValue()
)
}
/**
* Holds if `nodeTo` is the result of accessing the `attr` attribute of `nodeFrom`.
*/
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) {
exists(AttrNode s | nodeTo.asCfgNode() = s and s.getObject(attr) = nodeFrom.asCfgNode())
}
/**
* A utility class that is equivalent to `boolean` but does not require type joining.
*/
private class Boolean extends boolean {
Boolean() { this = true or this = false }
}
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeName attr)
/**
* Summary of the steps needed to track a value to a given dataflow node.
*
* This can be used to track objects that implement a certain API in order to
* recognize calls to that API. Note that type-tracking does not by itself provide a
* source/sink relation, that is, it may determine that a node has a given type,
* but it won't determine where that type came from.
*
* It is recommended that all uses of this type are written in the following form,
* for tracking some type `myType`:
* ```
* Node myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or
* exists (TypeTracker t2 |
* result = myType(t2).track(t2, t)
* )
* }
*
* DataFlow::SourceNode myType() { result = myType(DataFlow::TypeTracker::end()) }
* ```
*
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
*/
class TypeTracker extends TTypeTracker {
Boolean hasCall;
OptionalAttributeName attr;
TypeTracker() { this = MkTypeTracker(hasCall, attr) }
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
cached
TypeTracker append(StepSummary step) {
step = LevelStep() and result = this
or
step = CallStep() and result = MkTypeTracker(true, attr)
or
step = ReturnStep() and hasCall = false and result = this
or
step = LoadStep(attr) and result = MkTypeTracker(hasCall, "")
or
exists(string p | step = StoreStep(p) and attr = "" and result = MkTypeTracker(hasCall, p))
}
/** Gets a textual representation of this summary. */
string toString() {
exists(string withCall, string withAttr |
(if hasCall = true then withCall = "with" else withCall = "without") and
(if attr != "" then withAttr = " with attribute " + attr else withAttr = "") and
result = "type tracker " + withCall + " call steps" + withAttr
)
}
/**
* Holds if this is the starting point of type tracking.
*/
predicate start() { hasCall = false and attr = "" }
/**
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
* The type tracking only ends after the attribute has been loaded.
*/
predicate startInAttr(AttributeName attrName) { hasCall = false and attr = attrName }
/**
* Holds if this is the starting point of type tracking
* when tracking a parameter into a call, but not out of it.
*/
predicate call() { hasCall = true and attr = "" }
/**
* Holds if this is the end point of type tracking.
*/
predicate end() { attr = "" }
/**
* INTERNAL. DO NOT USE.
*
* Holds if this type has been tracked into a call.
*/
boolean hasCall() { result = hasCall }
/**
* INTERNAL. DO NOT USE.
*
* Gets the attribute associated with this type tracker.
*/
string getAttr() { result = attr }
/**
* Gets a type tracker that starts where this one has left off to allow continued
* tracking.
*
* This predicate is only defined if the type has not been tracked into an attribute.
*/
TypeTracker continue() { attr = "" and result = this }
/**
* Gets the summary that corresponds to having taken a forwards
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*/
pragma[inline]
TypeTracker step(Node nodeFrom, Node nodeTo) {
exists(StepSummary summary |
StepSummary::step(nodeFrom, nodeTo, summary) and
result = this.append(summary)
)
}
/**
* Gets the summary that corresponds to having taken a forwards
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
*
* Unlike `TypeTracker::step`, this predicate exposes all edges
* in the flow graph, and not just the edges between `Node`s.
* It may therefore be less performant.
*
* Type tracking predicates using small steps typically take the following form:
* ```ql
* DataFlow::Node myType(DataFlow::TypeTracker t) {
* t.start() and
* result = < source of myType >
* or
* exists (DataFlow::TypeTracker t2 |
* t = t2.smallstep(myType(t2), result)
* )
* }
*
* DataFlow::Node myType() {
* result = myType(DataFlow::TypeTracker::end())
* }
* ```
*/
pragma[inline]
TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
exists(StepSummary summary |
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
result = this.append(summary)
)
or
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
result = this
}
}

View File

@@ -4,6 +4,7 @@
private import python
private import DataFlowPrivate
import experimental.dataflow.TypeTracker
/**
* IPA type for data flow nodes.
@@ -69,6 +70,14 @@ class Node extends TNode {
/** Convenience method for casting to ExprNode and calling getNode and getNode again. */
Expr asExpr() { none() }
/**
* Gets a node that this node may flow to using one heap and/or interprocedural step.
*
* See `TypeTracker` for more details about how to use this.
*/
pragma[inline]
Node track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
}
class EssaNode extends Node, TEssaNode {

View File

@@ -0,0 +1,31 @@
class SomeClass:
pass
def simple_read_write():
x = SomeClass() # $tracked=foo
x.foo = tracked # $tracked $tracked=foo
y = x.foo # $tracked=foo $tracked
do_stuff(y) # $tracked
def foo():
x = SomeClass() # $tracked=attr
bar(x) # $tracked=attr
x.attr = tracked # $tracked=attr $tracked
baz(x) # $tracked=attr
def bar(x): # $tracked=attr
z = x.attr # $tracked $tracked=attr
do_stuff(z) # $tracked
def expects_int(x): # $int=field $f+:str=field
do_int_stuff(x.field) # $int $f+:str $int=field $f+:str=field
def expects_string(x): # $f+:int=field $str=field
do_string_stuff(x.field) # $f+:int $str $f+:int=field $str=field
def test_incompatible_types():
x = SomeClass() # $int,str=field
x.field = int(5) # $int=field $f+:str=field $int $f+:str
expects_int(x) # $int=field $f+:str=field
x.field = str("Hello") # $f+:int=field $str=field $f+:int $str
expects_string(x) # $f+:int=field $str=field

View File

@@ -0,0 +1,61 @@
def get_tracked():
x = tracked # $tracked
return x # $tracked
def use_tracked_foo(x): # $tracked
do_stuff(x) # $tracked
def foo():
use_tracked_foo(
get_tracked() # $tracked
)
def use_tracked_bar(x): # $tracked
do_stuff(x) # $tracked
def bar():
x = get_tracked() # $tracked
use_tracked_bar(x) # $tracked
def use_tracked_baz(x): # $tracked
do_stuff(x) # $tracked
def baz():
x = tracked # $tracked
use_tracked_baz(x) # $tracked
def id(x): # $tracked
return x # $tracked
def use_tracked_quux(x): # $f-:tracked
do_stuff(y) # call after return -- not tracked in here.
def quux():
x = tracked # $tracked
y = id(x) # $tracked
use_tracked_quux(y) # not tracked out of call to id.
g = None
def write_g(x): # $tracked
g = x # $tracked
def use_g():
do_stuff(g) # $f-:tracked // no global flow for now.
def global_var_write_test():
x = tracked # $tracked
write_g(x) # $tracked
use_g()
def expects_int(x): # $int
do_int_stuff(x) # $int
def expects_string(x): # $str
do_string_stuff(x) # $str
def redefine_test():
x = int(5) # $int
expects_int(x) # $int
x = str("Hello") # $str
expects_string(x) # $str

View File

@@ -0,0 +1,72 @@
import python
import experimental.dataflow.TypeTracker
import TestUtilities.InlineExpectationsTest
Node tracked(TypeTracker t) {
t.start() and
result.asCfgNode() = any(NameNode n | n.getId() = "tracked")
or
exists(TypeTracker t2 | result = tracked(t2).track(t2, t))
}
class TrackedTest extends InlineExpectationsTest {
TrackedTest() { this = "TrackedTest" }
override string getARelevantTag() { result = "tracked" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(Node e, TypeTracker t |
e = tracked(t) and
tag = "tracked" and
location = e.getLocation() and
value = t.getAttr() and
element = e.toString()
)
}
}
Node int_type(TypeTracker t) {
t.start() and
result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "int")
or
exists(TypeTracker t2 | result = int_type(t2).track(t2, t))
}
Node string_type(TypeTracker t) {
t.start() and
result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "str")
or
exists(TypeTracker t2 | result = string_type(t2).track(t2, t))
}
class TrackedIntTest extends InlineExpectationsTest {
TrackedIntTest() { this = "TrackedIntTest" }
override string getARelevantTag() { result = "int" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(Node e, TypeTracker t |
e = int_type(t) and
tag = "int" and
location = e.getLocation() and
value = t.getAttr() and
element = e.toString()
)
}
}
class TrackedStringTest extends InlineExpectationsTest {
TrackedStringTest() { this = "TrackedStringTest" }
override string getARelevantTag() { result = "str" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(Node e, TypeTracker t |
e = string_type(t) and
tag = "str" and
location = e.getLocation() and
value = t.getAttr() and
element = e.toString()
)
}
}