mirror of
https://github.com/github/codeql.git
synced 2025-12-21 19:26:31 +01:00
Merge pull request #10539 from yoff/python/improve-API-graphs
Python: add subscript to API graphs
This commit is contained in:
4
python/ql/lib/change-notes/2022-09-28-api-subscript.md
Normal file
4
python/ql/lib/change-notes/2022-09-28-api-subscript.md
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* Added the ability to refer to subscript operations in the API graph. It is now possible to write `response().getMember("cookies").getASubscript()` to find code like `resp.cookies["key"]` (assuming `response` returns an API node for reponse objects).
|
||||
@@ -243,6 +243,12 @@ module API {
|
||||
*/
|
||||
Node getAwaited() { result = this.getASuccessor(Label::await()) }
|
||||
|
||||
/**
|
||||
* Gets a node representing a subscript of this node.
|
||||
* For example `obj[x]` is a subscript of `obj`.
|
||||
*/
|
||||
Node getASubscript() { result = this.getASuccessor(Label::subscript()) }
|
||||
|
||||
/**
|
||||
* Gets a string representation of the lexicographically least among all shortest access paths
|
||||
* from the root to this node.
|
||||
@@ -570,8 +576,6 @@ module API {
|
||||
* API graph node for the prefix `foo`), in accordance with the usual semantics of Python.
|
||||
*/
|
||||
|
||||
private import semmle.python.internal.Awaited
|
||||
|
||||
cached
|
||||
newtype TApiNode =
|
||||
/** The root of the API graph. */
|
||||
@@ -747,6 +751,14 @@ module API {
|
||||
lbl = Label::return() and
|
||||
ref = pred.getACall()
|
||||
or
|
||||
// Awaiting a node that is a use of `base`
|
||||
lbl = Label::await() and
|
||||
ref = pred.getAnAwaited()
|
||||
or
|
||||
// Subscripting a node that is a use of `base`
|
||||
lbl = Label::subscript() and
|
||||
ref = pred.getASubscript()
|
||||
or
|
||||
// Subclassing a node
|
||||
lbl = Label::subclass() and
|
||||
exists(PY::ClassExpr clsExpr, DataFlow::Node superclass | pred.flowsTo(superclass) |
|
||||
@@ -760,13 +772,6 @@ module API {
|
||||
ref.(DataFlow::ExprNode).getNode().getNode() = clsExpr.getADecoratorCall()
|
||||
)
|
||||
)
|
||||
or
|
||||
// awaiting
|
||||
exists(DataFlow::Node awaitedValue |
|
||||
lbl = Label::await() and
|
||||
ref = awaited(awaitedValue) and
|
||||
pred.flowsTo(awaitedValue)
|
||||
)
|
||||
)
|
||||
or
|
||||
exists(DataFlow::Node def, PY::CallableExpr fn |
|
||||
@@ -986,6 +991,7 @@ module API {
|
||||
MkLabelReturn() or
|
||||
MkLabelSubclass() or
|
||||
MkLabelAwait() or
|
||||
MkLabelSubscript() or
|
||||
MkLabelEntryPoint(EntryPoint ep)
|
||||
|
||||
/** A label for a module. */
|
||||
@@ -1061,6 +1067,11 @@ module API {
|
||||
override string toString() { result = "getAwaited()" }
|
||||
}
|
||||
|
||||
/** A label that gets the subscript of a sequence/mapping. */
|
||||
class LabelSubscript extends ApiLabel, MkLabelSubscript {
|
||||
override string toString() { result = "getASubscript()" }
|
||||
}
|
||||
|
||||
/** A label for entry points. */
|
||||
class LabelEntryPoint extends ApiLabel, MkLabelEntryPoint {
|
||||
private EntryPoint entry;
|
||||
@@ -1106,6 +1117,9 @@ module API {
|
||||
/** Gets the `await` edge label. */
|
||||
LabelAwait await() { any() }
|
||||
|
||||
/** Gets the `subscript` edge label. */
|
||||
LabelSubscript subscript() { any() }
|
||||
|
||||
/** Gets the label going from the root node to the nodes associated with the given entry point. */
|
||||
LabelEntryPoint entryPoint(EntryPoint ep) { result = MkLabelEntryPoint(ep) }
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ private import python
|
||||
import DataFlowPublic
|
||||
private import DataFlowPrivate
|
||||
private import semmle.python.internal.CachedStages
|
||||
private import semmle.python.internal.Awaited
|
||||
|
||||
/**
|
||||
* A data flow node that is a source of local flow. This includes things like
|
||||
@@ -95,6 +96,16 @@ class LocalSourceNode extends Node {
|
||||
*/
|
||||
CallCfgNode getACall() { Cached::call(this, result) }
|
||||
|
||||
/**
|
||||
* Gets an awaited value from this node.
|
||||
*/
|
||||
Node getAnAwaited() { Cached::await(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a subscript of this node.
|
||||
*/
|
||||
Node getASubscript() { Cached::subscript(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a call to the method `methodName` on this node.
|
||||
*
|
||||
@@ -225,4 +236,26 @@ private module Cached {
|
||||
n = call.getFunction()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `node` flows to a value that, when awaited, results in `awaited`.
|
||||
*/
|
||||
cached
|
||||
predicate await(LocalSourceNode node, Node awaited) {
|
||||
exists(Node awaitedValue |
|
||||
node.flowsTo(awaitedValue) and
|
||||
awaited = awaited(awaitedValue)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `node` flows to a sequence/mapping of which `subscript` is a subscript.
|
||||
*/
|
||||
cached
|
||||
predicate subscript(LocalSourceNode node, CfgNode subscript) {
|
||||
exists(CfgNode seq, SubscriptNode subscriptNode | subscriptNode = subscript.getNode() |
|
||||
node.flowsTo(seq) and
|
||||
seq.getNode() = subscriptNode.getObject()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -424,22 +424,20 @@ module Flask {
|
||||
}
|
||||
}
|
||||
|
||||
private API::Node requestFileStorage() {
|
||||
// TODO: This approach for identifying member-access is very adhoc, and we should
|
||||
// be able to do something more structured for providing modeling of the members
|
||||
// of a container-object.
|
||||
result = request().getMember("files").getASubscript()
|
||||
or
|
||||
result = request().getMember("files").getMember("get").getReturn()
|
||||
or
|
||||
result = request().getMember("files").getMember("getlist").getReturn().getASubscript()
|
||||
}
|
||||
|
||||
/** An `FileStorage` instance that originates from a flask request. */
|
||||
private class FlaskRequestFileStorageInstances extends Werkzeug::FileStorage::InstanceSource {
|
||||
FlaskRequestFileStorageInstances() {
|
||||
// TODO: This approach for identifying member-access is very adhoc, and we should
|
||||
// be able to do something more structured for providing modeling of the members
|
||||
// of a container-object.
|
||||
exists(API::Node files | files = request().getMember("files") |
|
||||
this.asCfgNode().(SubscriptNode).getObject() =
|
||||
files.getAValueReachableFromSource().asCfgNode()
|
||||
or
|
||||
this = files.getMember("get").getACall()
|
||||
or
|
||||
this.asCfgNode().(SubscriptNode).getObject() =
|
||||
files.getMember("getlist").getReturn().getAValueReachableFromSource().asCfgNode()
|
||||
)
|
||||
}
|
||||
FlaskRequestFileStorageInstances() { this = requestFileStorage().asSource() }
|
||||
}
|
||||
|
||||
/** An `Headers` instance that originates from a flask request. */
|
||||
|
||||
@@ -1725,39 +1725,21 @@ private module StdlibPrivate {
|
||||
API::Node getlistResult() { result = getlistRef().getReturn() }
|
||||
|
||||
/** Gets a reference to a list of fields. */
|
||||
private DataFlow::TypeTrackingNode fieldList(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
// TODO: Should have better handling of subscripting
|
||||
result.asCfgNode().(SubscriptNode).getObject() =
|
||||
instance().getAValueReachableFromSource().asCfgNode()
|
||||
API::Node fieldList() {
|
||||
result = getlistResult()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = fieldList(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to a list of fields. */
|
||||
DataFlow::Node fieldList() {
|
||||
result = getlistResult().getAValueReachableFromSource() or
|
||||
result = getvalueResult().getAValueReachableFromSource() or
|
||||
fieldList(DataFlow::TypeTracker::end()).flowsTo(result)
|
||||
result = getvalueResult()
|
||||
or
|
||||
result = instance().getASubscript()
|
||||
}
|
||||
|
||||
/** Gets a reference to a field. */
|
||||
private DataFlow::TypeTrackingNode field(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
// TODO: Should have better handling of subscripting
|
||||
result.asCfgNode().(SubscriptNode).getObject() =
|
||||
[instance().getAValueReachableFromSource(), fieldList()].asCfgNode()
|
||||
API::Node field() {
|
||||
result = getfirstResult()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = field(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to a field. */
|
||||
DataFlow::Node field() {
|
||||
result = getfirstResult().getAValueReachableFromSource()
|
||||
result = getvalueResult()
|
||||
or
|
||||
result = getvalueResult().getAValueReachableFromSource()
|
||||
or
|
||||
field(DataFlow::TypeTracker::end()).flowsTo(result)
|
||||
result = [instance(), fieldList()].getASubscript()
|
||||
}
|
||||
|
||||
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
@@ -1780,11 +1762,13 @@ private module StdlibPrivate {
|
||||
)
|
||||
or
|
||||
// Indexing
|
||||
nodeFrom in [instance().getAValueReachableFromSource(), fieldList()] and
|
||||
nodeFrom in [
|
||||
instance().getAValueReachableFromSource(), fieldList().getAValueReachableFromSource()
|
||||
] and
|
||||
nodeTo.asCfgNode().(SubscriptNode).getObject() = nodeFrom.asCfgNode()
|
||||
or
|
||||
// Attributes on Field
|
||||
nodeFrom = field() and
|
||||
nodeFrom = field().getAValueReachableFromSource() and
|
||||
exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom |
|
||||
read.getAttributeName() in ["value", "file", "filename"]
|
||||
)
|
||||
|
||||
@@ -85,35 +85,20 @@ private module ExperimentalPrivateDjango {
|
||||
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
|
||||
|
||||
/** Gets a reference to a header instance. */
|
||||
private DataFlow::LocalSourceNode headerInstance(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
(
|
||||
exists(SubscriptNode subscript |
|
||||
subscript.getObject() =
|
||||
baseClassRef().getReturn().getAValueReachableFromSource().asCfgNode() and
|
||||
result.asCfgNode() = subscript
|
||||
)
|
||||
or
|
||||
result.(DataFlow::AttrRead).getObject() =
|
||||
baseClassRef().getReturn().getAValueReachableFromSource()
|
||||
)
|
||||
API::Node headerInstance() {
|
||||
result = baseClassRef().getReturn().getASubscript()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = headerInstance(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to a header instance use. */
|
||||
private DataFlow::Node headerInstance() {
|
||||
headerInstance(DataFlow::TypeTracker::end()).flowsTo(result)
|
||||
result = baseClassRef().getReturn().getAMember()
|
||||
}
|
||||
|
||||
/** Gets a reference to a header instance call with `__setitem__`. */
|
||||
private DataFlow::Node headerSetItemCall() {
|
||||
API::Node headerSetItem() {
|
||||
result = headerInstance() and
|
||||
result.(DataFlow::AttrRead).getAttributeName() = "__setitem__"
|
||||
result.asSource().(DataFlow::AttrRead).getAttributeName() = "__setitem__"
|
||||
}
|
||||
|
||||
class DjangoResponseSetItemCall extends DataFlow::CallCfgNode, HeaderDeclaration::Range {
|
||||
DjangoResponseSetItemCall() { this.getFunction() = headerSetItemCall() }
|
||||
DjangoResponseSetItemCall() { this = headerSetItem().getACall() }
|
||||
|
||||
override DataFlow::Node getNameArg() { result = this.getArg(0) }
|
||||
|
||||
@@ -124,7 +109,8 @@ private module ExperimentalPrivateDjango {
|
||||
DataFlow::Node headerInput;
|
||||
|
||||
DjangoResponseDefinition() {
|
||||
this.asCfgNode().(DefinitionNode) = headerInstance().asCfgNode() and
|
||||
this.asCfgNode().(DefinitionNode) =
|
||||
headerInstance().getAValueReachableFromSource().asCfgNode() and
|
||||
headerInput.asCfgNode() = this.asCfgNode().(DefinitionNode).getValue()
|
||||
}
|
||||
|
||||
|
||||
@@ -45,33 +45,6 @@ private module NoSql {
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` DB instance.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode mongoDBInstance(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
(
|
||||
exists(SubscriptNode subscript |
|
||||
subscript.getObject() = mongoClientInstance().getAValueReachableFromSource().asCfgNode() and
|
||||
result.asCfgNode() = subscript
|
||||
)
|
||||
or
|
||||
result.(DataFlow::AttrRead).getObject() = mongoClientInstance().getAValueReachableFromSource()
|
||||
or
|
||||
result = mongoEngine().getMember(["get_db", "connect"]).getACall()
|
||||
or
|
||||
result = mongoEngine().getMember("connection").getMember(["get_db", "connect"]).getACall()
|
||||
or
|
||||
result = flask_MongoEngine().getMember("get_db").getACall()
|
||||
or
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_default_database
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_database
|
||||
result = mongoClientInstance().getMember(["get_default_database", "get_database"]).getACall()
|
||||
)
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = mongoDBInstance(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` DB use.
|
||||
*
|
||||
* ```py
|
||||
* from flask_pymongo import PyMongo
|
||||
@@ -79,36 +52,26 @@ private module NoSql {
|
||||
* mongo.db.user.find({'name': safe_search})
|
||||
* ```
|
||||
*
|
||||
* `mongo.db` would be a use of a `Mongo` instance, and so the result.
|
||||
* `mongo.db` would be a `Mongo` instance.
|
||||
*/
|
||||
private DataFlow::Node mongoDBInstance() {
|
||||
mongoDBInstance(DataFlow::TypeTracker::end()).flowsTo(result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` collection use.
|
||||
*/
|
||||
private DataFlow::LocalSourceNode mongoCollection(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
(
|
||||
exists(SubscriptNode subscript | result.asCfgNode() = subscript |
|
||||
subscript.getObject() = mongoDBInstance().asCfgNode()
|
||||
)
|
||||
or
|
||||
result.(DataFlow::AttrRead).getObject() = mongoDBInstance()
|
||||
or
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.get_collection
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.create_collection
|
||||
result
|
||||
.(DataFlow::MethodCallNode)
|
||||
.calls(mongoDBInstance(), ["get_collection", "create_collection"])
|
||||
)
|
||||
private API::Node mongoDBInstance() {
|
||||
result = mongoClientInstance().getASubscript()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = mongoCollection(t2).track(t2, t))
|
||||
result = mongoClientInstance().getAMember()
|
||||
or
|
||||
result = mongoEngine().getMember(["get_db", "connect"]).getReturn()
|
||||
or
|
||||
result = mongoEngine().getMember("connection").getMember(["get_db", "connect"]).getReturn()
|
||||
or
|
||||
result = flask_MongoEngine().getMember("get_db").getReturn()
|
||||
or
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_default_database
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_database
|
||||
result = mongoClientInstance().getMember(["get_default_database", "get_database"]).getReturn()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` collection use.
|
||||
* Gets a reference to a `Mongo` collection.
|
||||
*
|
||||
* ```py
|
||||
* from flask_pymongo import PyMongo
|
||||
@@ -116,10 +79,16 @@ private module NoSql {
|
||||
* mongo.db.user.find({'name': safe_search})
|
||||
* ```
|
||||
*
|
||||
* `mongo.db.user` would be a use of a `Mongo` collection, and so the result.
|
||||
* `mongo.db.user` would be a `Mongo` collection.
|
||||
*/
|
||||
private DataFlow::Node mongoCollection() {
|
||||
mongoCollection(DataFlow::TypeTracker::end()).flowsTo(result)
|
||||
private API::Node mongoCollection() {
|
||||
result = mongoDBInstance().getASubscript()
|
||||
or
|
||||
result = mongoDBInstance().getAMember()
|
||||
or
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.get_collection
|
||||
// see https://pymongo.readthedocs.io/en/stable/api/pymongo/database.html#pymongo.database.Database.create_collection
|
||||
result = mongoDBInstance().getMember(["get_collection", "create_collection"]).getReturn()
|
||||
}
|
||||
|
||||
/** This class represents names of find_* relevant `Mongo` collection-level operation methods. */
|
||||
@@ -132,22 +101,6 @@ private module NoSql {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` collection method.
|
||||
*
|
||||
* ```py
|
||||
* from flask_pymongo import PyMongo
|
||||
* mongo = PyMongo(app)
|
||||
* mongo.db.user.find({'name': safe_search})
|
||||
* ```
|
||||
*
|
||||
* `mongo.db.user.find` would be a collection method, and so the result.
|
||||
*/
|
||||
private DataFlow::Node mongoCollectionMethod() {
|
||||
mongoCollection() = result.(DataFlow::AttrRead).getObject() and
|
||||
result.(DataFlow::AttrRead).getAttributeName() instanceof MongoCollectionMethodNames
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to a `Mongo` collection method call
|
||||
*
|
||||
@@ -157,10 +110,12 @@ private module NoSql {
|
||||
* mongo.db.user.find({'name': safe_search})
|
||||
* ```
|
||||
*
|
||||
* `mongo.db.user.find({'name': safe_search})` would be a collection method call, and so the result.
|
||||
* `mongo.db.user.find({'name': safe_search})` would be a collection method call.
|
||||
*/
|
||||
private class MongoCollectionCall extends DataFlow::CallCfgNode, NoSqlQuery::Range {
|
||||
MongoCollectionCall() { this.getFunction() = mongoCollectionMethod() }
|
||||
MongoCollectionCall() {
|
||||
this = mongoCollection().getMember(any(MongoCollectionMethodNames m)).getACall()
|
||||
}
|
||||
|
||||
override DataFlow::Node getQuery() { result = this.getArg(0) }
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user