Merge pull request #10539 from yoff/python/improve-API-graphs

Python: add subscript to API graphs
This commit is contained in:
yoff
2022-09-29 21:05:22 +02:00
committed by GitHub
7 changed files with 121 additions and 147 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added the ability to refer to subscript operations in the API graph. It is now possible to write `response().getMember("cookies").getASubscript()` to find code like `resp.cookies["key"]` (assuming `response` returns an API node for reponse objects).

View File

@@ -243,6 +243,12 @@ module API {
*/
Node getAwaited() { result = this.getASuccessor(Label::await()) }
/**
* Gets a node representing a subscript of this node.
* For example `obj[x]` is a subscript of `obj`.
*/
Node getASubscript() { result = this.getASuccessor(Label::subscript()) }
/**
* Gets a string representation of the lexicographically least among all shortest access paths
* from the root to this node.
@@ -570,8 +576,6 @@ module API {
* API graph node for the prefix `foo`), in accordance with the usual semantics of Python.
*/
private import semmle.python.internal.Awaited
cached
newtype TApiNode =
/** The root of the API graph. */
@@ -747,6 +751,14 @@ module API {
lbl = Label::return() and
ref = pred.getACall()
or
// Awaiting a node that is a use of `base`
lbl = Label::await() and
ref = pred.getAnAwaited()
or
// Subscripting a node that is a use of `base`
lbl = Label::subscript() and
ref = pred.getASubscript()
or
// Subclassing a node
lbl = Label::subclass() and
exists(PY::ClassExpr clsExpr, DataFlow::Node superclass | pred.flowsTo(superclass) |
@@ -760,13 +772,6 @@ module API {
ref.(DataFlow::ExprNode).getNode().getNode() = clsExpr.getADecoratorCall()
)
)
or
// awaiting
exists(DataFlow::Node awaitedValue |
lbl = Label::await() and
ref = awaited(awaitedValue) and
pred.flowsTo(awaitedValue)
)
)
or
exists(DataFlow::Node def, PY::CallableExpr fn |
@@ -986,6 +991,7 @@ module API {
MkLabelReturn() or
MkLabelSubclass() or
MkLabelAwait() or
MkLabelSubscript() or
MkLabelEntryPoint(EntryPoint ep)
/** A label for a module. */
@@ -1061,6 +1067,11 @@ module API {
override string toString() { result = "getAwaited()" }
}
/** A label that gets the subscript of a sequence/mapping. */
class LabelSubscript extends ApiLabel, MkLabelSubscript {
override string toString() { result = "getASubscript()" }
}
/** A label for entry points. */
class LabelEntryPoint extends ApiLabel, MkLabelEntryPoint {
private EntryPoint entry;
@@ -1106,6 +1117,9 @@ module API {
/** Gets the `await` edge label. */
LabelAwait await() { any() }
/** Gets the `subscript` edge label. */
LabelSubscript subscript() { any() }
/** Gets the label going from the root node to the nodes associated with the given entry point. */
LabelEntryPoint entryPoint(EntryPoint ep) { result = MkLabelEntryPoint(ep) }
}

View File

@@ -10,6 +10,7 @@ private import python
import DataFlowPublic
private import DataFlowPrivate
private import semmle.python.internal.CachedStages
private import semmle.python.internal.Awaited
/**
* A data flow node that is a source of local flow. This includes things like
@@ -95,6 +96,16 @@ class LocalSourceNode extends Node {
*/
CallCfgNode getACall() { Cached::call(this, result) }
/**
* Gets an awaited value from this node.
*/
Node getAnAwaited() { Cached::await(this, result) }
/**
* Gets a subscript of this node.
*/
Node getASubscript() { Cached::subscript(this, result) }
/**
* Gets a call to the method `methodName` on this node.
*
@@ -225,4 +236,26 @@ private module Cached {
n = call.getFunction()
)
}
/**
* Holds if `node` flows to a value that, when awaited, results in `awaited`.
*/
cached
predicate await(LocalSourceNode node, Node awaited) {
exists(Node awaitedValue |
node.flowsTo(awaitedValue) and
awaited = awaited(awaitedValue)
)
}
/**
* Holds if `node` flows to a sequence/mapping of which `subscript` is a subscript.
*/
cached
predicate subscript(LocalSourceNode node, CfgNode subscript) {
exists(CfgNode seq, SubscriptNode subscriptNode | subscriptNode = subscript.getNode() |
node.flowsTo(seq) and
seq.getNode() = subscriptNode.getObject()
)
}
}

View File

@@ -424,22 +424,20 @@ module Flask {
}
}
private API::Node requestFileStorage() {
// TODO: This approach for identifying member-access is very adhoc, and we should
// be able to do something more structured for providing modeling of the members
// of a container-object.
result = request().getMember("files").getASubscript()
or
result = request().getMember("files").getMember("get").getReturn()
or
result = request().getMember("files").getMember("getlist").getReturn().getASubscript()
}
/** An `FileStorage` instance that originates from a flask request. */
private class FlaskRequestFileStorageInstances extends Werkzeug::FileStorage::InstanceSource {
FlaskRequestFileStorageInstances() {
// TODO: This approach for identifying member-access is very adhoc, and we should
// be able to do something more structured for providing modeling of the members
// of a container-object.
exists(API::Node files | files = request().getMember("files") |
this.asCfgNode().(SubscriptNode).getObject() =
files.getAValueReachableFromSource().asCfgNode()
or
this = files.getMember("get").getACall()
or
this.asCfgNode().(SubscriptNode).getObject() =
files.getMember("getlist").getReturn().getAValueReachableFromSource().asCfgNode()
)
}
FlaskRequestFileStorageInstances() { this = requestFileStorage().asSource() }
}
/** An `Headers` instance that originates from a flask request. */

View File

@@ -1725,39 +1725,21 @@ private module StdlibPrivate {
API::Node getlistResult() { result = getlistRef().getReturn() }
/** Gets a reference to a list of fields. */
private DataFlow::TypeTrackingNode fieldList(DataFlow::TypeTracker t) {
t.start() and
// TODO: Should have better handling of subscripting
result.asCfgNode().(SubscriptNode).getObject() =
instance().getAValueReachableFromSource().asCfgNode()
API::Node fieldList() {
result = getlistResult()
or
exists(DataFlow::TypeTracker t2 | result = fieldList(t2).track(t2, t))
}
/** Gets a reference to a list of fields. */
DataFlow::Node fieldList() {
result = getlistResult().getAValueReachableFromSource() or
result = getvalueResult().getAValueReachableFromSource() or
fieldList(DataFlow::TypeTracker::end()).flowsTo(result)
result = getvalueResult()
or
result = instance().getASubscript()
}
/** Gets a reference to a field. */
private DataFlow::TypeTrackingNode field(DataFlow::TypeTracker t) {
t.start() and
// TODO: Should have better handling of subscripting
result.asCfgNode().(SubscriptNode).getObject() =
[instance().getAValueReachableFromSource(), fieldList()].asCfgNode()
API::Node field() {
result = getfirstResult()
or
exists(DataFlow::TypeTracker t2 | result = field(t2).track(t2, t))
}
/** Gets a reference to a field. */
DataFlow::Node field() {
result = getfirstResult().getAValueReachableFromSource()
result = getvalueResult()
or
result = getvalueResult().getAValueReachableFromSource()
or
field(DataFlow::TypeTracker::end()).flowsTo(result)
result = [instance(), fieldList()].getASubscript()
}
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
@@ -1780,11 +1762,13 @@ private module StdlibPrivate {
)
or
// Indexing
nodeFrom in [instance().getAValueReachableFromSource(), fieldList()] and
nodeFrom in [
instance().getAValueReachableFromSource(), fieldList().getAValueReachableFromSource()
] and
nodeTo.asCfgNode().(SubscriptNode).getObject() = nodeFrom.asCfgNode()
or
// Attributes on Field
nodeFrom = field() and
nodeFrom = field().getAValueReachableFromSource() and
exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom |
read.getAttributeName() in ["value", "file", "filename"]
)