Python: Consider taint of client http requests

This commit is contained in:
Rasmus Wriedt Larsen
2021-12-13 14:54:14 +01:00
parent b68d280129
commit 35cba17642
3 changed files with 67 additions and 12 deletions

View File

@@ -822,8 +822,8 @@ module HTTP {
* extend `Request::Range` instead.
*/
class Request extends DataFlow::Node instanceof Request::Range {
/** Gets a node which returns the body of the response */
DataFlow::Node getResponseBody() { result = super.getResponseBody() }
/** Gets a node that provides the response to this request. */
DataFlow::Node getResponse() { result = super.getResponse() }
/**
* Gets a node that contributes to the URL of the request.
@@ -857,8 +857,8 @@ module HTTP {
* extend `Request` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets a node which returns the body of the response */
abstract DataFlow::Node getResponseBody();
/** Gets a node that provides the response to this request. */
abstract DataFlow::Node getResponse();
/**
* Gets a node that contributes to the URL of the request.
@@ -882,14 +882,19 @@ module HTTP {
}
}
/** The response body from an outgoing HTTP request, considered as a remote flow source */
private class RequestResponseBody extends RemoteFlowSource::Range, DataFlow::Node {
Request request;
RequestResponseBody() { this = request.getResponseBody() }
override string getSourceType() { result = request.getFramework() + " response body" }
/**
* Additional taint step from a client request with user-controlled URL to the response.
*/
private class HttpClientRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(Request req |
nodeFrom = req.getUrl() and
nodeTo = req.getResponse()
)
}
}
// TODO: investigate whether we should treat responses to client requests as
// remote-flow-sources in general.
}
}

View File

@@ -51,7 +51,7 @@ private module Requests {
result = this.getArg(1)
}
override DataFlow::Node getResponseBody() { none() }
override DataFlow::Node getResponse() { result = this }
/** Gets the `verify` argument to this outgoing requests call. */
DataFlow::Node getVerifyArg() { result = this.getArgByName("verify") }

View File

@@ -0,0 +1,50 @@
import requests
from flask import Flask, request
app = Flask(__name__)
@app.route("/taint_test") # $ routeSetup="/taint_test"
def test_taint(): # $ requestHandler
url = request.args['untrusted_input']
# response from a request to a user-controlled URL should be considered
# user-controlled as well.
resp = requests.get(url) # $ clientRequestUrl=url
ensure_tainted(
# see https://docs.python-requests.org/en/latest/api/#requests.Response
resp, # $ tainted
resp.text, # $ MISSING: tainted
resp.content, # $ MISSING: tainted
resp.json(), # $ MISSING: tainted
# file-like
resp.raw, # $ MISSING: tainted
resp.links, # $ MISSING: tainted
resp.links['key'], # $ MISSING: tainted
resp.links.get('key'), # $ MISSING: tainted
resp.cookies, # $ MISSING: tainted
resp.cookies['key'], # $ MISSING: tainted
resp.cookies.get('key'), # $ MISSING: tainted
resp.headers, # $ MISSING: tainted
resp.headers['key'], # $ MISSING: tainted
resp.headers.get('key'), # $ MISSING: tainted
)
for content_chunk in resp.iter_content():
ensure_tainted(content_chunk) # $ MISSING: tainted
for line in resp.iter_lines():
ensure_tainted(line) # $ MISSING: tainted
# for now, we don't assume that the response to ANY outgoing request is a remote
# flow source, since this could lead to FPs.
# TODO: investigate whether we should consider this a remote flow source.
trusted_url = "https://internal-api-that-i-trust.com"
resp = requests.get(trusted_url) # $ clientRequestUrl=trusted_url
ensure__not_tainted(resp)