Python: Add LocalSourceNode and flowsTo

This fixes the major performance problem with type tracking on
some (pathological) databases.

The interface could probably be improved a bit. In particular, I'm
thinking that we might want to have `DataFlow::exprNode` return a
`LocalSourceNode` so that a cast isn't necessary in order to use
`flowsTo`.

I have added two `cached` annotations. The one on `flowsTo` is
crucial, as performance regresses without it. The one on
`simpleLocalFlowStep` may not be needed, but Java has a similar
annotation, and to me it makes sense to have this relation cached.
This commit is contained in:
Taus Brock-Nannestad
2020-11-05 16:26:03 +01:00
parent 104ff5d217
commit 83ba8c9bf5
7 changed files with 28 additions and 13 deletions

View File

@@ -336,7 +336,7 @@ module HTTP {
/** Gets the URL pattern for this route, if it can be statically determined. */
string getUrlPattern() {
exists(StrConst str |
DataFlow::localFlow(DataFlow::exprNode(str), this.getUrlPatternArg()) and
DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getUrlPatternArg()) and
result = str.getText()
)
}
@@ -403,7 +403,9 @@ module HTTP {
/** Gets the mimetype of this HTTP response, if it can be statically determined. */
string getMimetype() {
exists(StrConst str |
DataFlow::localFlow(DataFlow::exprNode(str), this.getMimetypeOrContentTypeArg()) and
DataFlow::exprNode(str)
.(DataFlow::LocalSourceNode)
.flowsTo(this.getMimetypeOrContentTypeArg()) and
result = str.getText().splitAt(";", 0)
)
or

View File

@@ -46,7 +46,7 @@ class StepSummary extends TStepSummary {
module StepSummary {
cached
predicate step(Node nodeFrom, Node nodeTo, StepSummary summary) {
predicate step(LocalSourceNode nodeFrom, Node nodeTo, StepSummary summary) {
exists(Node mid | typePreservingStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
}
@@ -70,9 +70,8 @@ module StepSummary {
/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
EssaFlow::essaFlowStep(nodeFrom, nodeTo) or
jumpStep(nodeFrom, nodeTo) or
nodeFrom = nodeTo.(PostUpdateNode).getPreUpdateNode()
simpleLocalFlowStep(nodeFrom, nodeTo) or
jumpStep(nodeFrom, nodeTo)
}
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
@@ -115,11 +114,11 @@ predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
*/
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
predicate basicStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string attr) {
exists(AttrWrite a |
a.mayHaveAttributeName(attr) and
nodeFrom = a.getValue() and
simpleLocalFlowStep*(nodeTo, a.getObject())
nodeTo.flowsTo(a.getObject())
)
}

View File

@@ -30,8 +30,8 @@ abstract class AttrRef extends Node {
predicate mayHaveAttributeName(string attrName) {
attrName = this.getAttributeName()
or
exists(Node nodeFrom |
localFlow(nodeFrom, this.getAttributeNameExpr()) and
exists(LocalSourceNode nodeFrom |
nodeFrom.flowsTo(this.getAttributeNameExpr()) and
attrName = nodeFrom.asExpr().(StrConst).getText()
)
}

View File

@@ -181,6 +181,7 @@ module EssaFlow {
* data flow. It is a strict subset of the `localFlowStep` predicate, as it
* excludes SSA flow through instance fields.
*/
cached
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
// If there is ESSA-flow out of a node `node`, we want flow
// both out of `node` and any post-update node of `node`.

View File

@@ -351,6 +351,19 @@ class BarrierGuard extends GuardNode {
}
}
/**
* A data flow node that is a source of local flow. This includes things like
* - Expressions
* - Function parameters
*/
class LocalSourceNode extends Node {
LocalSourceNode() { not simpleLocalFlowStep(_, this) }
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
cached
predicate flowsTo(Node nodeTo) { simpleLocalFlowStep*(this, nodeTo) }
}
/**
* A reference contained in an object. This is either a field or a property.
*/

View File

@@ -1641,7 +1641,7 @@ private module Django {
DjangoRouteRegex() {
this instanceof StrConst and
DataFlow::localFlow(DataFlow::exprNode(this), rePathCall.getUrlPatternArg())
DataFlow::exprNode(this).(DataFlow::LocalSourceNode).flowsTo(rePathCall.getUrlPatternArg())
}
DjangoRegexRouteSetup getRouteSetup() { result = rePathCall }

View File

@@ -319,9 +319,9 @@ private module FlaskModel {
}
override Function getARouteHandler() {
exists(DataFlow::Node view_func_arg, DataFlow::Node func_src |
exists(DataFlow::Node view_func_arg, DataFlow::LocalSourceNode func_src |
view_func_arg.asCfgNode() in [node.getArg(2), node.getArgByName("view_func")] and
DataFlow::localFlow(func_src, view_func_arg) and
func_src.flowsTo(view_func_arg) and
func_src.asExpr().(CallableExpr) = result.getDefinition()
)
}