mirror of
https://github.com/github/codeql.git
synced 2026-04-30 11:15:13 +02:00
Python: Clean up basicStoreStep
Moves the `flowsTo` logic into the shared implementation, so that `TypeTrackingPrivate` only has to define the shape of immediate store steps. Also cleans up the documentation to talk a bit more about what `content` can represent, and what caveats there are.
This commit is contained in:
@@ -2,7 +2,18 @@
|
||||
|
||||
private import TypeTrackerPrivate
|
||||
|
||||
/** Any string that may appear as the name of a piece of content. */
|
||||
/**
|
||||
* Any string that may appear as the name of a piece of content. This will usually include things like:
|
||||
* - Attribute names (in Python)
|
||||
* - Property names (in JavaScript)
|
||||
*
|
||||
* In general, this can also be used to model things like stores to specific list indices. To ensure
|
||||
* correctness, it is important that
|
||||
*
|
||||
* - different types of content do not have overlapping names, and
|
||||
* - the empty string `""` is not a valid piece of content, as it is used to indicate the absence of
|
||||
* content instead.
|
||||
*/
|
||||
class ContentName extends string {
|
||||
ContentName() { this = getPossibleContentName() }
|
||||
}
|
||||
@@ -70,14 +81,41 @@ module StepSummary {
|
||||
summary = ReturnStep()
|
||||
or
|
||||
exists(string content |
|
||||
basicStoreStep(nodeFrom, nodeTo, content) and
|
||||
localSourceStoreStep(nodeFrom, nodeTo, content) and
|
||||
summary = StoreStep(content)
|
||||
or
|
||||
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
|
||||
*
|
||||
* Note that `nodeTo` will always be a local source node that flows to the place where the content
|
||||
* is written in `basicStoreStep`. This may lead to the flow of information going "back in time"
|
||||
* from the point of view of the execution of the program.
|
||||
*
|
||||
* For instance, if we interpret attribute writes in Python as writing to content with the same
|
||||
* name as the attribute and consider the following snippet
|
||||
*
|
||||
* ```python
|
||||
* def foo(y):
|
||||
* x = Foo()
|
||||
* bar(x)
|
||||
* x.attr = y
|
||||
* baz(x)
|
||||
*
|
||||
* def bar(x):
|
||||
* z = x.attr
|
||||
* ```
|
||||
* for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`,
|
||||
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
|
||||
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
|
||||
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
|
||||
*/
|
||||
predicate localSourceStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string content) {
|
||||
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
|
||||
}
|
||||
}
|
||||
|
||||
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content)
|
||||
@@ -92,7 +130,7 @@ private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentNam
|
||||
*
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for tracking some type `myType`:
|
||||
* ```
|
||||
* ```ql
|
||||
* DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
@@ -253,7 +291,7 @@ private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, Optional
|
||||
* It is recommended that all uses of this type are written in the following form,
|
||||
* for back-tracking some callback type `myCallback`:
|
||||
*
|
||||
* ```
|
||||
* ```ql
|
||||
* DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
|
||||
* t.start() and
|
||||
* result = (< some API call >).getArgument(< n >).getALocalSource()
|
||||
|
||||
@@ -13,10 +13,8 @@ predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the name of a possible piece of content. This will usually include things like
|
||||
*
|
||||
* - Attribute names (in Python)
|
||||
* - Property names (in JavaScript)
|
||||
* Gets the name of a possible piece of content. For Python, this is currently only attribute names,
|
||||
* using the name of the attribute for the corresponding content.
|
||||
*/
|
||||
string getPossibleContentName() { result = any(DataFlowPublic::AttrRef a).getAttributeName() }
|
||||
|
||||
@@ -54,34 +52,12 @@ predicate returnStep(DataFlowPrivate::ReturnNode nodeFrom, Node nodeTo) {
|
||||
|
||||
/**
|
||||
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
|
||||
*
|
||||
* Note that the choice of `nodeTo` does not have to make sense "chronologically".
|
||||
* All we care about is whether the `content` content of `nodeTo` can have a specific type,
|
||||
* and the assumption is that if a specific type appears here, then any access of that
|
||||
* particular content can yield something of that particular type.
|
||||
*
|
||||
* Thus, in an example such as
|
||||
*
|
||||
* ```python
|
||||
* def foo(y):
|
||||
* x = Foo()
|
||||
* bar(x)
|
||||
* x.content = y
|
||||
* baz(x)
|
||||
*
|
||||
* def bar(x):
|
||||
* z = x.content
|
||||
* ```
|
||||
* for the content write `x.content = y`, we will have `content` being the literal string `"content"`,
|
||||
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
|
||||
* function. This means we will track the fact that `x.content` can have the type of `y` into the
|
||||
* assignment to `z` inside `bar`, even though this content write happens _after_ `bar` is called.
|
||||
*/
|
||||
predicate basicStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string content) {
|
||||
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string content) {
|
||||
exists(DataFlowPublic::AttrWrite a |
|
||||
a.mayHaveAttributeName(content) and
|
||||
nodeFrom = a.getValue() and
|
||||
nodeTo.flowsTo(a.getObject())
|
||||
nodeTo = a.getObject()
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user