Merge pull request #10539 from yoff/python/improve-API-graphs

Python: add subscript to API graphs
This commit is contained in:
yoff
2022-09-29 21:05:22 +02:00
committed by GitHub
7 changed files with 121 additions and 147 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added the ability to refer to subscript operations in the API graph. It is now possible to write `response().getMember("cookies").getASubscript()` to find code like `resp.cookies["key"]` (assuming `response` returns an API node for reponse objects).

View File

@@ -243,6 +243,12 @@ module API {
*/
Node getAwaited() { result = this.getASuccessor(Label::await()) }
/**
* Gets a node representing a subscript of this node.
* For example `obj[x]` is a subscript of `obj`.
*/
Node getASubscript() { result = this.getASuccessor(Label::subscript()) }
/**
* Gets a string representation of the lexicographically least among all shortest access paths
* from the root to this node.
@@ -570,8 +576,6 @@ module API {
* API graph node for the prefix `foo`), in accordance with the usual semantics of Python.
*/
private import semmle.python.internal.Awaited
cached
newtype TApiNode =
/** The root of the API graph. */
@@ -747,6 +751,14 @@ module API {
lbl = Label::return() and
ref = pred.getACall()
or
// Awaiting a node that is a use of `base`
lbl = Label::await() and
ref = pred.getAnAwaited()
or
// Subscripting a node that is a use of `base`
lbl = Label::subscript() and
ref = pred.getASubscript()
or
// Subclassing a node
lbl = Label::subclass() and
exists(PY::ClassExpr clsExpr, DataFlow::Node superclass | pred.flowsTo(superclass) |
@@ -760,13 +772,6 @@ module API {
ref.(DataFlow::ExprNode).getNode().getNode() = clsExpr.getADecoratorCall()
)
)
or
// awaiting
exists(DataFlow::Node awaitedValue |
lbl = Label::await() and
ref = awaited(awaitedValue) and
pred.flowsTo(awaitedValue)
)
)
or
exists(DataFlow::Node def, PY::CallableExpr fn |
@@ -986,6 +991,7 @@ module API {
MkLabelReturn() or
MkLabelSubclass() or
MkLabelAwait() or
MkLabelSubscript() or
MkLabelEntryPoint(EntryPoint ep)
/** A label for a module. */
@@ -1061,6 +1067,11 @@ module API {
override string toString() { result = "getAwaited()" }
}
/** A label that gets the subscript of a sequence/mapping. */
class LabelSubscript extends ApiLabel, MkLabelSubscript {
override string toString() { result = "getASubscript()" }
}
/** A label for entry points. */
class LabelEntryPoint extends ApiLabel, MkLabelEntryPoint {
private EntryPoint entry;
@@ -1106,6 +1117,9 @@ module API {
/** Gets the `await` edge label. */
LabelAwait await() { any() }
/** Gets the `subscript` edge label. */
LabelSubscript subscript() { any() }
/** Gets the label going from the root node to the nodes associated with the given entry point. */
LabelEntryPoint entryPoint(EntryPoint ep) { result = MkLabelEntryPoint(ep) }
}

View File

@@ -10,6 +10,7 @@ private import python
import DataFlowPublic
private import DataFlowPrivate
private import semmle.python.internal.CachedStages
private import semmle.python.internal.Awaited
/**
* A data flow node that is a source of local flow. This includes things like
@@ -95,6 +96,16 @@ class LocalSourceNode extends Node {
*/
CallCfgNode getACall() { Cached::call(this, result) }
/**
* Gets an awaited value from this node.
*/
Node getAnAwaited() { Cached::await(this, result) }
/**
* Gets a subscript of this node.
*/
Node getASubscript() { Cached::subscript(this, result) }
/**
* Gets a call to the method `methodName` on this node.
*
@@ -225,4 +236,26 @@ private module Cached {
n = call.getFunction()
)
}
/**
* Holds if `node` flows to a value that, when awaited, results in `awaited`.
*/
cached
predicate await(LocalSourceNode node, Node awaited) {
exists(Node awaitedValue |
node.flowsTo(awaitedValue) and
awaited = awaited(awaitedValue)
)
}
/**
* Holds if `node` flows to a sequence/mapping of which `subscript` is a subscript.
*/
cached
predicate subscript(LocalSourceNode node, CfgNode subscript) {
exists(CfgNode seq, SubscriptNode subscriptNode | subscriptNode = subscript.getNode() |
node.flowsTo(seq) and
seq.getNode() = subscriptNode.getObject()
)
}
}

View File

@@ -424,22 +424,20 @@ module Flask {
}
}
private API::Node requestFileStorage() {
// TODO: This approach for identifying member-access is very adhoc, and we should
// be able to do something more structured for providing modeling of the members
// of a container-object.
result = request().getMember("files").getASubscript()
or
result = request().getMember("files").getMember("get").getReturn()
or
result = request().getMember("files").getMember("getlist").getReturn().getASubscript()
}
/** An `FileStorage` instance that originates from a flask request. */
private class FlaskRequestFileStorageInstances extends Werkzeug::FileStorage::InstanceSource {
FlaskRequestFileStorageInstances() {
// TODO: This approach for identifying member-access is very adhoc, and we should
// be able to do something more structured for providing modeling of the members
// of a container-object.
exists(API::Node files | files = request().getMember("files") |
this.asCfgNode().(SubscriptNode).getObject() =
files.getAValueReachableFromSource().asCfgNode()
or
this = files.getMember("get").getACall()
or
this.asCfgNode().(SubscriptNode).getObject() =
files.getMember("getlist").getReturn().getAValueReachableFromSource().asCfgNode()
)
}
FlaskRequestFileStorageInstances() { this = requestFileStorage().asSource() }
}
/** An `Headers` instance that originates from a flask request. */

View File

@@ -1725,39 +1725,21 @@ private module StdlibPrivate {
API::Node getlistResult() { result = getlistRef().getReturn() }
/** Gets a reference to a list of fields. */
private DataFlow::TypeTrackingNode fieldList(DataFlow::TypeTracker t) {
t.start() and
// TODO: Should have better handling of subscripting
result.asCfgNode().(SubscriptNode).getObject() =
instance().getAValueReachableFromSource().asCfgNode()
API::Node fieldList() {
result = getlistResult()
or
exists(DataFlow::TypeTracker t2 | result = fieldList(t2).track(t2, t))
}
/** Gets a reference to a list of fields. */
DataFlow::Node fieldList() {
result = getlistResult().getAValueReachableFromSource() or
result = getvalueResult().getAValueReachableFromSource() or
fieldList(DataFlow::TypeTracker::end()).flowsTo(result)
result = getvalueResult()
or
result = instance().getASubscript()
}
/** Gets a reference to a field. */
private DataFlow::TypeTrackingNode field(DataFlow::TypeTracker t) {
t.start() and
// TODO: Should have better handling of subscripting
result.asCfgNode().(SubscriptNode).getObject() =
[instance().getAValueReachableFromSource(), fieldList()].asCfgNode()
API::Node field() {
result = getfirstResult()
or
exists(DataFlow::TypeTracker t2 | result = field(t2).track(t2, t))
}
/** Gets a reference to a field. */
DataFlow::Node field() {
result = getfirstResult().getAValueReachableFromSource()
result = getvalueResult()
or
result = getvalueResult().getAValueReachableFromSource()
or
field(DataFlow::TypeTracker::end()).flowsTo(result)
result = [instance(), fieldList()].getASubscript()
}
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
@@ -1780,11 +1762,13 @@ private module StdlibPrivate {
)
or
// Indexing
nodeFrom in [instance().getAValueReachableFromSource(), fieldList()] and
nodeFrom in [
instance().getAValueReachableFromSource(), fieldList().getAValueReachableFromSource()
] and
nodeTo.asCfgNode().(SubscriptNode).getObject() = nodeFrom.asCfgNode()
or
// Attributes on Field
nodeFrom = field() and
nodeFrom = field().getAValueReachableFromSource() and
exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom |
read.getAttributeName() in ["value", "file", "filename"]
)

View File

@@ -85,35 +85,20 @@ private module ExperimentalPrivateDjango {
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/** Gets a reference to a header instance. */
private DataFlow::LocalSourceNode headerInstance(DataFlow::TypeTracker t) {
t.start() and
(
exists(SubscriptNode subscript |
subscript.getObject() =
baseClassRef().getReturn().getAValueReachableFromSource().asCfgNode() and
result.asCfgNode() = subscript
)
or
result.(DataFlow::AttrRead).getObject() =
baseClassRef().getReturn().getAValueReachableFromSource()
)
API::Node headerInstance() {
result = baseClassRef().getReturn().getASubscript()
or
exists(DataFlow::TypeTracker t2 | result = headerInstance(t2).track(t2, t))
}
/** Gets a reference to a header instance use. */
private DataFlow::Node headerInstance() {
headerInstance(DataFlow::TypeTracker::end()).flowsTo(result)
result = baseClassRef().getReturn().getAMember()
}
/** Gets a reference to a header instance call with `__setitem__`. */
private DataFlow::Node headerSetItemCall() {
API::Node headerSetItem() {
result = headerInstance() and
result.(DataFlow::AttrRead).getAttributeName() = "__setitem__"
result.asSource().(DataFlow::AttrRead).getAttributeName() = "__setitem__"
}
class DjangoResponseSetItemCall extends DataFlow::CallCfgNode, HeaderDeclaration::Range {
DjangoResponseSetItemCall() { this.getFunction() = headerSetItemCall() }
DjangoResponseSetItemCall() { this = headerSetItem().getACall() }
override DataFlow::Node getNameArg() { result = this.getArg(0) }
@@ -124,7 +109,8 @@ private module ExperimentalPrivateDjango {
DataFlow::Node headerInput;
DjangoResponseDefinition() {
this.asCfgNode().(DefinitionNode) = headerInstance().asCfgNode() and
this.asCfgNode().(DefinitionNode) =
headerInstance().getAValueReachableFromSource().asCfgNode() and
headerInput.asCfgNode() = this.asCfgNode().(DefinitionNode).getValue()
}

View File

@@ -45,33 +45,6 @@ private module NoSql {
/**
* Gets a reference to a `Mongo` DB instance.
*/
private DataFlow::LocalSourceNode mongoDBInstance(DataFlow::TypeTracker t) {
t.start() and
(
exists(SubscriptNode subscript |
subscript.getObject() = mongoClientInstance().getAValueReachableFromSource().asCfgNode() and
result.asCfgNode() = subscript
)
or
result.(DataFlow::AttrRead).getObject() = mongoClientInstance().getAValueReachableFromSource()
or
result = mongoEngine().getMember(["get_db", "connect"]).getACall()
or
result = mongoEngine().getMember("connection").getMember(["get_db", "connect"]).getACall()
or
result = flask_MongoEngine().getMember("get_db").getACall()
or
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_default_database
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_database
result = mongoClientInstance().getMember(["get_default_database", "get_database"]).getACall()
)
or
exists(DataFlow::TypeTracker t2 | result = mongoDBInstance(t2).track(t2, t))
}
/**
* Gets a reference to a `Mongo` DB use.
*
* ```py
* from flask_pymongo import PyMongo
@@ -79,36 +52,26 @@ private module NoSql {
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db` would be a use of a `Mongo` instance, and so the result.
* `mongo.db` would be a `Mongo` instance.
*/
private DataFlow::Node mongoDBInstance() {
mongoDBInstance(DataFlow::TypeTracker::end()).flowsTo(result)
}
/**
* Gets a reference to a `Mongo` collection use.
*/
private DataFlow::LocalSourceNode mongoCollection(DataFlow::TypeTracker t) {
t.start() and
(
exists(SubscriptNode subscript | result.asCfgNode() = subscript |
subscript.getObject() = mongoDBInstance().asCfgNode()
)
or
result.(DataFlow::AttrRead).getObject() = mongoDBInstance()
or
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.get_collection
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.create_collection
result
.(DataFlow::MethodCallNode)
.calls(mongoDBInstance(), ["get_collection", "create_collection"])
)
private API::Node mongoDBInstance() {
result = mongoClientInstance().getASubscript()
or
exists(DataFlow::TypeTracker t2 | result = mongoCollection(t2).track(t2, t))
result = mongoClientInstance().getAMember()
or
result = mongoEngine().getMember(["get_db", "connect"]).getReturn()
or
result = mongoEngine().getMember("connection").getMember(["get_db", "connect"]).getReturn()
or
result = flask_MongoEngine().getMember("get_db").getReturn()
or
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_default_database
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_database
result = mongoClientInstance().getMember(["get_default_database", "get_database"]).getReturn()
}
/**
* Gets a reference to a `Mongo` collection use.
* Gets a reference to a `Mongo` collection.
*
* ```py
* from flask_pymongo import PyMongo
@@ -116,10 +79,16 @@ private module NoSql {
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user` would be a use of a `Mongo` collection, and so the result.
* `mongo.db.user` would be a `Mongo` collection.
*/
private DataFlow::Node mongoCollection() {
mongoCollection(DataFlow::TypeTracker::end()).flowsTo(result)
private API::Node mongoCollection() {
result = mongoDBInstance().getASubscript()
or
result = mongoDBInstance().getAMember()
or
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.get_collection
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.create_collection
result = mongoDBInstance().getMember(["get_collection", "create_collection"]).getReturn()
}
/** This class represents names of find_* relevant `Mongo` collection-level operation methods. */
@@ -132,22 +101,6 @@ private module NoSql {
}
}
/**
* Gets a reference to a `Mongo` collection method.
*
* ```py
* from flask_pymongo import PyMongo
* mongo = PyMongo(app)
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user.find` would be a collection method, and so the result.
*/
private DataFlow::Node mongoCollectionMethod() {
mongoCollection() = result.(DataFlow::AttrRead).getObject() and
result.(DataFlow::AttrRead).getAttributeName() instanceof MongoCollectionMethodNames
}
/**
* Gets a reference to a `Mongo` collection method call
*
@@ -157,10 +110,12 @@ private module NoSql {
* mongo.db.user.find({'name': safe_search})
* ```
*
* `mongo.db.user.find({'name': safe_search})` would be a collection method call, and so the result.
* `mongo.db.user.find({'name': safe_search})` would be a collection method call.
*/
private class MongoCollectionCall extends DataFlow::CallCfgNode, NoSqlQuery::Range {
MongoCollectionCall() { this.getFunction() = mongoCollectionMethod() }
MongoCollectionCall() {
this = mongoCollection().getMember(any(MongoCollectionMethodNames m)).getACall()
}
override DataFlow::Node getQuery() { result = this.getArg(0) }
}