mirror of
https://github.com/github/codeql.git
synced 2026-04-30 11:15:13 +02:00
Merge pull request #3961 from tausbn/python-add-typetracker
Python: Add type tracker and step summary implementation.
This commit is contained in:
282
python/ql/src/experimental/dataflow/TypeTracker.qll
Normal file
282
python/ql/src/experimental/dataflow/TypeTracker.qll
Normal file
@@ -0,0 +1,282 @@
|
||||
/** Step Summaries and Type Tracking */
|
||||
|
||||
import python
|
||||
import internal.DataFlowPublic
|
||||
import internal.DataFlowPrivate
|
||||
|
||||
/** Any string that may appear as the name of an attribute or access path. */
|
||||
class AttributeName extends string {
|
||||
AttributeName() { this = any(Attribute a).getName() }
|
||||
}
|
||||
|
||||
/** Either an attribute name, or the empty string (representing no attribute). */
|
||||
class OptionalAttributeName extends string {
|
||||
OptionalAttributeName() { this instanceof AttributeName or this = "" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A description of a step on an inter-procedural data flow path.
|
||||
*/
|
||||
private newtype TStepSummary =
|
||||
LevelStep() or
|
||||
CallStep() or
|
||||
ReturnStep() or
|
||||
StoreStep(AttributeName attr) or
|
||||
LoadStep(AttributeName attr)
|
||||
|
||||
/**
|
||||
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
|
||||
*
|
||||
* A description of a step on an inter-procedural data flow path.
|
||||
*/
|
||||
class StepSummary extends TStepSummary {
|
||||
/** Gets a textual representation of this step summary. */
|
||||
string toString() {
|
||||
this instanceof LevelStep and result = "level"
|
||||
or
|
||||
this instanceof CallStep and result = "call"
|
||||
or
|
||||
this instanceof ReturnStep and result = "return"
|
||||
or
|
||||
exists(string attr | this = StoreStep(attr) | result = "store " + attr)
|
||||
or
|
||||
exists(string attr | this = LoadStep(attr) | result = "load " + attr)
|
||||
}
|
||||
}
|
||||
|
||||
module StepSummary {
|
||||
cached
|
||||
predicate step(Node nodeFrom, Node nodeTo, StepSummary summary) {
|
||||
exists(Node mid | EssaFlow::essaFlowStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
|
||||
}
|
||||
|
||||
predicate smallstep(Node nodeFrom, Node nodeTo, StepSummary summary) {
|
||||
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
|
||||
summary = LevelStep()
|
||||
or
|
||||
callStep(nodeFrom, nodeTo) and summary = CallStep()
|
||||
or
|
||||
returnStep(nodeFrom, nodeTo) and
|
||||
summary = ReturnStep()
|
||||
or
|
||||
exists(string attr |
|
||||
basicStoreStep(nodeFrom, nodeTo, attr) and
|
||||
summary = StoreStep(attr)
|
||||
or
|
||||
basicLoadStep(nodeFrom, nodeTo, attr) and summary = LoadStep(attr)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
|
||||
predicate callStep(ArgumentNode nodeFrom, ParameterNode nodeTo) {
|
||||
// TODO: Support special methods?
|
||||
exists(DataFlowCall call, int i |
|
||||
nodeFrom.argumentOf(call, i) and nodeTo.isParameterOf(call.getCallable(), i)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
|
||||
predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
|
||||
exists(DataFlowCall call |
|
||||
nodeFrom.getEnclosingCallable() = call.getCallable() and nodeTo.asCfgNode() = call.getNode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` is being written to the `attr` attribute of the object in `nodeTo`.
|
||||
*
|
||||
* Note that the choice of `nodeTo` does not have to make sense "chronologically".
|
||||
* All we care about is whether the `attr` attribute of `nodeTo` can have a specific type,
|
||||
* and the assumption is that if a specific type appears here, then any access of that
|
||||
* particular attribute can yield something of that particular type.
|
||||
*
|
||||
* Thus, in an example such as
|
||||
*
|
||||
* ```python
|
||||
* def foo(y):
|
||||
* x = Foo()
|
||||
* bar(x)
|
||||
* x.attr = y
|
||||
* baz(x)
|
||||
*
|
||||
* def bar(x):
|
||||
* z = x.attr
|
||||
* ```
|
||||
* for the attribute write `x.attr = y`, we will have `attr` being the literal string `"attr"`,
|
||||
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
|
||||
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
|
||||
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
|
||||
*/
|
||||
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
|
||||
exists(AttributeAssignment a, Node var |
|
||||
a.getName() = attr and
|
||||
EssaFlow::essaFlowStep*(nodeTo, var) and
|
||||
var.asVar() = a.getInput() and
|
||||
nodeFrom.asCfgNode() = a.getValue()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeTo` is the result of accessing the `attr` attribute of `nodeFrom`.
|
||||
*/
|
||||
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) {
|
||||
exists(AttrNode s | nodeTo.asCfgNode() = s and s.getObject(attr) = nodeFrom.asCfgNode())
|
||||
}
|
||||
|
||||
/**
|
||||
* A utility class that is equivalent to `boolean` but does not require type joining.
|
||||
*/
|
||||
private class Boolean extends boolean {
|
||||
Boolean() { this = true or this = false }
|
||||
}
|
||||
|
||||
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeName attr)
|
||||
|
||||
/**
|
||||
* Summary of the steps needed to track a value to a given dataflow node.
|
||||
*
|
||||
* This can be used to track objects that implement a certain API in order to
|
||||
* recognize calls to that API. Note that type-tracking does not by itself provide a
|
||||
* source/sink relation, that is, it may determine that a node has a given type,
|
||||
* but it won't determine where that type came from.
|
||||
*
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for tracking some type `myType`:
|
||||
* ```
|
||||
* Node myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
* exists (TypeTracker t2 |
|
||||
* result = myType(t2).track(t2, t)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::SourceNode myType() { result = myType(DataFlow::TypeTracker::end()) }
|
||||
* ```
|
||||
*
|
||||
* Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
|
||||
* `t = t2.step(myType(t2), result)`. If you additionally want to track individual
|
||||
* intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
|
||||
*/
|
||||
class TypeTracker extends TTypeTracker {
|
||||
Boolean hasCall;
|
||||
OptionalAttributeName attr;
|
||||
|
||||
TypeTracker() { this = MkTypeTracker(hasCall, attr) }
|
||||
|
||||
/** Gets the summary resulting from appending `step` to this type-tracking summary. */
|
||||
cached
|
||||
TypeTracker append(StepSummary step) {
|
||||
step = LevelStep() and result = this
|
||||
or
|
||||
step = CallStep() and result = MkTypeTracker(true, attr)
|
||||
or
|
||||
step = ReturnStep() and hasCall = false and result = this
|
||||
or
|
||||
step = LoadStep(attr) and result = MkTypeTracker(hasCall, "")
|
||||
or
|
||||
exists(string p | step = StoreStep(p) and attr = "" and result = MkTypeTracker(hasCall, p))
|
||||
}
|
||||
|
||||
/** Gets a textual representation of this summary. */
|
||||
string toString() {
|
||||
exists(string withCall, string withAttr |
|
||||
(if hasCall = true then withCall = "with" else withCall = "without") and
|
||||
(if attr != "" then withAttr = " with attribute " + attr else withAttr = "") and
|
||||
result = "type tracker " + withCall + " call steps" + withAttr
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking.
|
||||
*/
|
||||
predicate start() { hasCall = false and attr = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
|
||||
* The type tracking only ends after the attribute has been loaded.
|
||||
*/
|
||||
predicate startInAttr(AttributeName attrName) { hasCall = false and attr = attrName }
|
||||
|
||||
/**
|
||||
* Holds if this is the starting point of type tracking
|
||||
* when tracking a parameter into a call, but not out of it.
|
||||
*/
|
||||
predicate call() { hasCall = true and attr = "" }
|
||||
|
||||
/**
|
||||
* Holds if this is the end point of type tracking.
|
||||
*/
|
||||
predicate end() { attr = "" }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Holds if this type has been tracked into a call.
|
||||
*/
|
||||
boolean hasCall() { result = hasCall }
|
||||
|
||||
/**
|
||||
* INTERNAL. DO NOT USE.
|
||||
*
|
||||
* Gets the attribute associated with this type tracker.
|
||||
*/
|
||||
string getAttr() { result = attr }
|
||||
|
||||
/**
|
||||
* Gets a type tracker that starts where this one has left off to allow continued
|
||||
* tracking.
|
||||
*
|
||||
* This predicate is only defined if the type has not been tracked into an attribute.
|
||||
*/
|
||||
TypeTracker continue() { attr = "" and result = this }
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTracker step(Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::step(nodeFrom, nodeTo, summary) and
|
||||
result = this.append(summary)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the summary that corresponds to having taken a forwards
|
||||
* local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
|
||||
*
|
||||
* Unlike `TypeTracker::step`, this predicate exposes all edges
|
||||
* in the flow graph, and not just the edges between `Node`s.
|
||||
* It may therefore be less performant.
|
||||
*
|
||||
* Type tracking predicates using small steps typically take the following form:
|
||||
* ```ql
|
||||
* DataFlow::Node myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
* exists (DataFlow::TypeTracker t2 |
|
||||
* t = t2.smallstep(myType(t2), result)
|
||||
* )
|
||||
* }
|
||||
*
|
||||
* DataFlow::Node myType() {
|
||||
* result = myType(DataFlow::TypeTracker::end())
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
pragma[inline]
|
||||
TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
|
||||
exists(StepSummary summary |
|
||||
StepSummary::smallstep(nodeFrom, nodeTo, summary) and
|
||||
result = this.append(summary)
|
||||
)
|
||||
or
|
||||
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
|
||||
result = this
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
private import python
|
||||
private import DataFlowPrivate
|
||||
import experimental.dataflow.TypeTracker
|
||||
|
||||
/**
|
||||
* IPA type for data flow nodes.
|
||||
@@ -69,6 +70,14 @@ class Node extends TNode {
|
||||
|
||||
/** Convenience method for casting to ExprNode and calling getNode and getNode again. */
|
||||
Expr asExpr() { none() }
|
||||
|
||||
/**
|
||||
* Gets a node that this node may flow to using one heap and/or interprocedural step.
|
||||
*
|
||||
* See `TypeTracker` for more details about how to use this.
|
||||
*/
|
||||
pragma[inline]
|
||||
Node track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
|
||||
}
|
||||
|
||||
class EssaNode extends Node, TEssaNode {
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
class SomeClass:
|
||||
pass
|
||||
|
||||
def simple_read_write():
|
||||
x = SomeClass() # $tracked=foo
|
||||
x.foo = tracked # $tracked $tracked=foo
|
||||
y = x.foo # $tracked=foo $tracked
|
||||
do_stuff(y) # $tracked
|
||||
|
||||
def foo():
|
||||
x = SomeClass() # $tracked=attr
|
||||
bar(x) # $tracked=attr
|
||||
x.attr = tracked # $tracked=attr $tracked
|
||||
baz(x) # $tracked=attr
|
||||
|
||||
def bar(x): # $tracked=attr
|
||||
z = x.attr # $tracked $tracked=attr
|
||||
do_stuff(z) # $tracked
|
||||
|
||||
def expects_int(x): # $int=field $f+:str=field
|
||||
do_int_stuff(x.field) # $int $f+:str $int=field $f+:str=field
|
||||
|
||||
def expects_string(x): # $f+:int=field $str=field
|
||||
do_string_stuff(x.field) # $f+:int $str $f+:int=field $str=field
|
||||
|
||||
def test_incompatible_types():
|
||||
x = SomeClass() # $int,str=field
|
||||
x.field = int(5) # $int=field $f+:str=field $int $f+:str
|
||||
expects_int(x) # $int=field $f+:str=field
|
||||
x.field = str("Hello") # $f+:int=field $str=field $f+:int $str
|
||||
expects_string(x) # $f+:int=field $str=field
|
||||
61
python/ql/test/experimental/dataflow/typetracking/test.py
Normal file
61
python/ql/test/experimental/dataflow/typetracking/test.py
Normal file
@@ -0,0 +1,61 @@
|
||||
def get_tracked():
|
||||
x = tracked # $tracked
|
||||
return x # $tracked
|
||||
|
||||
def use_tracked_foo(x): # $tracked
|
||||
do_stuff(x) # $tracked
|
||||
|
||||
def foo():
|
||||
use_tracked_foo(
|
||||
get_tracked() # $tracked
|
||||
)
|
||||
|
||||
def use_tracked_bar(x): # $tracked
|
||||
do_stuff(x) # $tracked
|
||||
|
||||
def bar():
|
||||
x = get_tracked() # $tracked
|
||||
use_tracked_bar(x) # $tracked
|
||||
|
||||
def use_tracked_baz(x): # $tracked
|
||||
do_stuff(x) # $tracked
|
||||
|
||||
def baz():
|
||||
x = tracked # $tracked
|
||||
use_tracked_baz(x) # $tracked
|
||||
|
||||
def id(x): # $tracked
|
||||
return x # $tracked
|
||||
|
||||
def use_tracked_quux(x): # $f-:tracked
|
||||
do_stuff(y) # call after return -- not tracked in here.
|
||||
|
||||
def quux():
|
||||
x = tracked # $tracked
|
||||
y = id(x) # $tracked
|
||||
use_tracked_quux(y) # not tracked out of call to id.
|
||||
|
||||
g = None
|
||||
|
||||
def write_g(x): # $tracked
|
||||
g = x # $tracked
|
||||
|
||||
def use_g():
|
||||
do_stuff(g) # $f-:tracked // no global flow for now.
|
||||
|
||||
def global_var_write_test():
|
||||
x = tracked # $tracked
|
||||
write_g(x) # $tracked
|
||||
use_g()
|
||||
|
||||
def expects_int(x): # $int
|
||||
do_int_stuff(x) # $int
|
||||
|
||||
def expects_string(x): # $str
|
||||
do_string_stuff(x) # $str
|
||||
|
||||
def redefine_test():
|
||||
x = int(5) # $int
|
||||
expects_int(x) # $int
|
||||
x = str("Hello") # $str
|
||||
expects_string(x) # $str
|
||||
72
python/ql/test/experimental/dataflow/typetracking/tracked.ql
Normal file
72
python/ql/test/experimental/dataflow/typetracking/tracked.ql
Normal file
@@ -0,0 +1,72 @@
|
||||
import python
|
||||
import experimental.dataflow.TypeTracker
|
||||
import TestUtilities.InlineExpectationsTest
|
||||
|
||||
Node tracked(TypeTracker t) {
|
||||
t.start() and
|
||||
result.asCfgNode() = any(NameNode n | n.getId() = "tracked")
|
||||
or
|
||||
exists(TypeTracker t2 | result = tracked(t2).track(t2, t))
|
||||
}
|
||||
|
||||
class TrackedTest extends InlineExpectationsTest {
|
||||
TrackedTest() { this = "TrackedTest" }
|
||||
|
||||
override string getARelevantTag() { result = "tracked" }
|
||||
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(Node e, TypeTracker t |
|
||||
e = tracked(t) and
|
||||
tag = "tracked" and
|
||||
location = e.getLocation() and
|
||||
value = t.getAttr() and
|
||||
element = e.toString()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
Node int_type(TypeTracker t) {
|
||||
t.start() and
|
||||
result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "int")
|
||||
or
|
||||
exists(TypeTracker t2 | result = int_type(t2).track(t2, t))
|
||||
}
|
||||
|
||||
Node string_type(TypeTracker t) {
|
||||
t.start() and
|
||||
result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "str")
|
||||
or
|
||||
exists(TypeTracker t2 | result = string_type(t2).track(t2, t))
|
||||
}
|
||||
|
||||
class TrackedIntTest extends InlineExpectationsTest {
|
||||
TrackedIntTest() { this = "TrackedIntTest" }
|
||||
|
||||
override string getARelevantTag() { result = "int" }
|
||||
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(Node e, TypeTracker t |
|
||||
e = int_type(t) and
|
||||
tag = "int" and
|
||||
location = e.getLocation() and
|
||||
value = t.getAttr() and
|
||||
element = e.toString()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class TrackedStringTest extends InlineExpectationsTest {
|
||||
TrackedStringTest() { this = "TrackedStringTest" }
|
||||
|
||||
override string getARelevantTag() { result = "str" }
|
||||
|
||||
override predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(Node e, TypeTracker t |
|
||||
e = string_type(t) and
|
||||
tag = "str" and
|
||||
location = e.getLocation() and
|
||||
value = t.getAttr() and
|
||||
element = e.toString()
|
||||
)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user