mirror of
https://github.com/github/codeql.git
synced 2025-12-20 10:46:30 +01:00
Merge pull request #10539 from yoff/python/improve-API-graphs
Python: add subscript to API graphs
This commit is contained in:
4
python/ql/lib/change-notes/2022-09-28-api-subscript.md
Normal file
4
python/ql/lib/change-notes/2022-09-28-api-subscript.md
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: minorAnalysis
|
||||
---
|
||||
* Added the ability to refer to subscript operations in the API graph. It is now possible to write `response().getMember("cookies").getASubscript()` to find code like `resp.cookies["key"]` (assuming `response` returns an API node for reponse objects).
|
||||
@@ -243,6 +243,12 @@ module API {
|
||||
*/
|
||||
Node getAwaited() { result = this.getASuccessor(Label::await()) }
|
||||
|
||||
/**
|
||||
* Gets a node representing a subscript of this node.
|
||||
* For example `obj[x]` is a subscript of `obj`.
|
||||
*/
|
||||
Node getASubscript() { result = this.getASuccessor(Label::subscript()) }
|
||||
|
||||
/**
|
||||
* Gets a string representation of the lexicographically least among all shortest access paths
|
||||
* from the root to this node.
|
||||
@@ -570,8 +576,6 @@ module API {
|
||||
* API graph node for the prefix `foo`), in accordance with the usual semantics of Python.
|
||||
*/
|
||||
|
||||
private import semmle.python.internal.Awaited
|
||||
|
||||
cached
|
||||
newtype TApiNode =
|
||||
/** The root of the API graph. */
|
||||
@@ -747,6 +751,14 @@ module API {
|
||||
lbl = Label::return() and
|
||||
ref = pred.getACall()
|
||||
or
|
||||
// Awaiting a node that is a use of `base`
|
||||
lbl = Label::await() and
|
||||
ref = pred.getAnAwaited()
|
||||
or
|
||||
// Subscripting a node that is a use of `base`
|
||||
lbl = Label::subscript() and
|
||||
ref = pred.getASubscript()
|
||||
or
|
||||
// Subclassing a node
|
||||
lbl = Label::subclass() and
|
||||
exists(PY::ClassExpr clsExpr, DataFlow::Node superclass | pred.flowsTo(superclass) |
|
||||
@@ -760,13 +772,6 @@ module API {
|
||||
ref.(DataFlow::ExprNode).getNode().getNode() = clsExpr.getADecoratorCall()
|
||||
)
|
||||
)
|
||||
or
|
||||
// awaiting
|
||||
exists(DataFlow::Node awaitedValue |
|
||||
lbl = Label::await() and
|
||||
ref = awaited(awaitedValue) and
|
||||
pred.flowsTo(awaitedValue)
|
||||
)
|
||||
)
|
||||
or
|
||||
exists(DataFlow::Node def, PY::CallableExpr fn |
|
||||
@@ -986,6 +991,7 @@ module API {
|
||||
MkLabelReturn() or
|
||||
MkLabelSubclass() or
|
||||
MkLabelAwait() or
|
||||
MkLabelSubscript() or
|
||||
MkLabelEntryPoint(EntryPoint ep)
|
||||
|
||||
/** A label for a module. */
|
||||
@@ -1061,6 +1067,11 @@ module API {
|
||||
override string toString() { result = "getAwaited()" }
|
||||
}
|
||||
|
||||
/** A label that gets the subscript of a sequence/mapping. */
|
||||
class LabelSubscript extends ApiLabel, MkLabelSubscript {
|
||||
override string toString() { result = "getASubscript()" }
|
||||
}
|
||||
|
||||
/** A label for entry points. */
|
||||
class LabelEntryPoint extends ApiLabel, MkLabelEntryPoint {
|
||||
private EntryPoint entry;
|
||||
@@ -1106,6 +1117,9 @@ module API {
|
||||
/** Gets the `await` edge label. */
|
||||
LabelAwait await() { any() }
|
||||
|
||||
/** Gets the `subscript` edge label. */
|
||||
LabelSubscript subscript() { any() }
|
||||
|
||||
/** Gets the label going from the root node to the nodes associated with the given entry point. */
|
||||
LabelEntryPoint entryPoint(EntryPoint ep) { result = MkLabelEntryPoint(ep) }
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ private import python
|
||||
import DataFlowPublic
|
||||
private import DataFlowPrivate
|
||||
private import semmle.python.internal.CachedStages
|
||||
private import semmle.python.internal.Awaited
|
||||
|
||||
/**
|
||||
* A data flow node that is a source of local flow. This includes things like
|
||||
@@ -95,6 +96,16 @@ class LocalSourceNode extends Node {
|
||||
*/
|
||||
CallCfgNode getACall() { Cached::call(this, result) }
|
||||
|
||||
/**
|
||||
* Gets an awaited value from this node.
|
||||
*/
|
||||
Node getAnAwaited() { Cached::await(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a subscript of this node.
|
||||
*/
|
||||
Node getASubscript() { Cached::subscript(this, result) }
|
||||
|
||||
/**
|
||||
* Gets a call to the method `methodName` on this node.
|
||||
*
|
||||
@@ -225,4 +236,26 @@ private module Cached {
|
||||
n = call.getFunction()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `node` flows to a value that, when awaited, results in `awaited`.
|
||||
*/
|
||||
cached
|
||||
predicate await(LocalSourceNode node, Node awaited) {
|
||||
exists(Node awaitedValue |
|
||||
node.flowsTo(awaitedValue) and
|
||||
awaited = awaited(awaitedValue)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `node` flows to a sequence/mapping of which `subscript` is a subscript.
|
||||
*/
|
||||
cached
|
||||
predicate subscript(LocalSourceNode node, CfgNode subscript) {
|
||||
exists(CfgNode seq, SubscriptNode subscriptNode | subscriptNode = subscript.getNode() |
|
||||
node.flowsTo(seq) and
|
||||
seq.getNode() = subscriptNode.getObject()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -424,22 +424,20 @@ module Flask {
|
||||
}
|
||||
}
|
||||
|
||||
private API::Node requestFileStorage() {
|
||||
// TODO: This approach for identifying member-access is very adhoc, and we should
|
||||
// be able to do something more structured for providing modeling of the members
|
||||
// of a container-object.
|
||||
result = request().getMember("files").getASubscript()
|
||||
or
|
||||
result = request().getMember("files").getMember("get").getReturn()
|
||||
or
|
||||
result = request().getMember("files").getMember("getlist").getReturn().getASubscript()
|
||||
}
|
||||
|
||||
/** An `FileStorage` instance that originates from a flask request. */
|
||||
private class FlaskRequestFileStorageInstances extends Werkzeug::FileStorage::InstanceSource {
|
||||
FlaskRequestFileStorageInstances() {
|
||||
// TODO: This approach for identifying member-access is very adhoc, and we should
|
||||
// be able to do something more structured for providing modeling of the members
|
||||
// of a container-object.
|
||||
exists(API::Node files | files = request().getMember("files") |
|
||||
this.asCfgNode().(SubscriptNode).getObject() =
|
||||
files.getAValueReachableFromSource().asCfgNode()
|
||||
or
|
||||
this = files.getMember("get").getACall()
|
||||
or
|
||||
this.asCfgNode().(SubscriptNode).getObject() =
|
||||
files.getMember("getlist").getReturn().getAValueReachableFromSource().asCfgNode()
|
||||
)
|
||||
}
|
||||
FlaskRequestFileStorageInstances() { this = requestFileStorage().asSource() }
|
||||
}
|
||||
|
||||
/** An `Headers` instance that originates from a flask request. */
|
||||
|
||||
@@ -1725,39 +1725,21 @@ private module StdlibPrivate {
|
||||
API::Node getlistResult() { result = getlistRef().getReturn() }
|
||||
|
||||
/** Gets a reference to a list of fields. */
|
||||
private DataFlow::TypeTrackingNode fieldList(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
// TODO: Should have better handling of subscripting
|
||||
result.asCfgNode().(SubscriptNode).getObject() =
|
||||
instance().getAValueReachableFromSource().asCfgNode()
|
||||
API::Node fieldList() {
|
||||
result = getlistResult()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = fieldList(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to a list of fields. */
|
||||
DataFlow::Node fieldList() {
|
||||
result = getlistResult().getAValueReachableFromSource() or
|
||||
result = getvalueResult().getAValueReachableFromSource() or
|
||||
fieldList(DataFlow::TypeTracker::end()).flowsTo(result)
|
||||
result = getvalueResult()
|
||||
or
|
||||
result = instance().getASubscript()
|
||||
}
|
||||
|
||||
/** Gets a reference to a field. */
|
||||
private DataFlow::TypeTrackingNode field(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
// TODO: Should have better handling of subscripting
|
||||
result.asCfgNode().(SubscriptNode).getObject() =
|
||||
[instance().getAValueReachableFromSource(), fieldList()].asCfgNode()
|
||||
API::Node field() {
|
||||
result = getfirstResult()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = field(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to a field. */
|
||||
DataFlow::Node field() {
|
||||
result = getfirstResult().getAValueReachableFromSource()
|
||||
result = getvalueResult()
|
||||
or
|
||||
result = getvalueResult().getAValueReachableFromSource()
|
||||
or
|
||||
field(DataFlow::TypeTracker::end()).flowsTo(result)
|
||||
result = [instance(), fieldList()].getASubscript()
|
||||
}
|
||||
|
||||
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
@@ -1780,11 +1762,13 @@ private module StdlibPrivate {
|
||||
)
|
||||
or
|
||||
// Indexing
|
||||
nodeFrom in [instance().getAValueReachableFromSource(), fieldList()] and
|
||||
nodeFrom in [
|
||||
instance().getAValueReachableFromSource(), fieldList().getAValueReachableFromSource()
|
||||
] and
|
||||
nodeTo.asCfgNode().(SubscriptNode).getObject() = nodeFrom.asCfgNode()
|
||||
or
|
||||
// Attributes on Field
|
||||
nodeFrom = field() and
|
||||
nodeFrom = field().getAValueReachableFromSource() and
|
||||
exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom |
|
||||
read.getAttributeName() in ["value", "file", "filename"]
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user