Python: Model requests Responses

This commit is contained in:
Rasmus Wriedt Larsen
2021-12-13 15:09:27 +01:00
parent 35cba17642
commit cf2ee0672f
2 changed files with 95 additions and 15 deletions

View File

@@ -10,6 +10,8 @@ private import python
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
private import semmle.python.frameworks.Stdlib
/**
* INTERNAL: Do not use.
@@ -83,4 +85,78 @@ private module Requests {
private DataFlow::LocalSourceNode verifyArgBacktracker(DataFlow::Node arg) {
result = verifyArgBacktracker(DataFlow::TypeBackTracker::end(), arg)
}
// ---------------------------------------------------------------------------
// Response
// ---------------------------------------------------------------------------
/**
* Provides models for the `requests.models.Response` class
*
* See https://docs.python-requests.org/en/latest/api/#requests.Response.
*/
module Response {
/** Gets a reference to the `requests.models.Response` class. */
private API::Node classRef() {
result = API::moduleImport("requests").getMember("models").getMember("Response")
or
result = API::moduleImport("requests").getMember("Response")
}
/**
* A source of instances of `requests.models.Response`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Response::instance()` to get references to instances of `requests.models.Response`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `requests.models.Response`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
}
/** Return value from making a reuqest. */
private class RequestReturnValue extends InstanceSource, DataFlow::Node {
RequestReturnValue() { this = any(OutgoingRequestCall c).getResponse() }
}
/** Gets a reference to an instance of `requests.models.Response`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `requests.models.Response`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `requests.models.Response`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "requests.models.Response" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in ["text", "content", "raw", "links", "cookies", "headers"]
}
override string getMethodName() { result in ["json", "iter_content", "iter_lines"] }
override string getAsyncMethodName() { none() }
}
/** An attribute read that is a file-like instance. */
private class FileLikeInstances extends Stdlib::FileLikeObject::InstanceSource {
FileLikeInstances() {
this.(DataFlow::AttrRead).getObject() = instance() and
this.(DataFlow::AttrRead).getAttributeName() = "raw"
}
}
}
}

View File

@@ -13,34 +13,38 @@ def test_taint(): # $ requestHandler
# user-controlled as well.
resp = requests.get(url) # $ clientRequestUrl=url
requests.Response
requests.models.Response
ensure_tainted(
# see https://docs.python-requests.org/en/latest/api/#requests.Response
resp, # $ tainted
resp.text, # $ MISSING: tainted
resp.content, # $ MISSING: tainted
resp.json(), # $ MISSING: tainted
resp.text, # $ tainted
resp.content, # $ tainted
resp.json(), # $ tainted
# file-like
resp.raw, # $ MISSING: tainted
resp.raw, # $ tainted
resp.raw.read(), # $ tainted
resp.links, # $ MISSING: tainted
resp.links['key'], # $ MISSING: tainted
resp.links.get('key'), # $ MISSING: tainted
resp.links, # $ tainted
resp.links['key'], # $ tainted
resp.links.get('key'), # $ tainted
resp.cookies, # $ MISSING: tainted
resp.cookies['key'], # $ MISSING: tainted
resp.cookies.get('key'), # $ MISSING: tainted
resp.cookies, # $ tainted
resp.cookies['key'], # $ tainted
resp.cookies.get('key'), # $ tainted
resp.headers, # $ MISSING: tainted
resp.headers['key'], # $ MISSING: tainted
resp.headers.get('key'), # $ MISSING: tainted
resp.headers, # $ tainted
resp.headers['key'], # $ tainted
resp.headers.get('key'), # $ tainted
)
for content_chunk in resp.iter_content():
ensure_tainted(content_chunk) # $ MISSING: tainted
ensure_tainted(content_chunk) # $ tainted
for line in resp.iter_lines():
ensure_tainted(line) # $ MISSING: tainted
ensure_tainted(line) # $ tainted
# for now, we don't assume that the response to ANY outgoing request is a remote
# flow source, since this could lead to FPs.