From 7619d0fc33d8994470b21d45fa2c2d5fc8557ade Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 25 Oct 2021 15:22:24 +0200 Subject: [PATCH] Python: FastAPI: Model WebSocket usage --- docs/codeql/support/reusables/frameworks.rst | 1 + python/ql/lib/semmle/python/Frameworks.qll | 1 + .../lib/semmle/python/frameworks/FastApi.qll | 14 +- .../semmle/python/frameworks/Starlette.qll | 162 ++++++++++++++++++ .../lib/semmle/python/frameworks/Stdlib.qll | 92 ++++++++++ .../frameworks/fastapi/taint_test.py | 68 ++++---- 6 files changed, 303 insertions(+), 35 deletions(-) create mode 100644 python/ql/lib/semmle/python/frameworks/Starlette.qll diff --git a/docs/codeql/support/reusables/frameworks.rst b/docs/codeql/support/reusables/frameworks.rst index ad629d61604..7a7db2b25af 100644 --- a/docs/codeql/support/reusables/frameworks.rst +++ b/docs/codeql/support/reusables/frameworks.rst @@ -159,6 +159,7 @@ Python built-in support Flask, Web framework Tornado, Web framework Twisted, Web framework + starlette, Asynchronous Server Gateway Interface (ASGI) PyYAML, Serialization dill, Serialization simplejson, Serialization diff --git a/python/ql/lib/semmle/python/Frameworks.qll b/python/ql/lib/semmle/python/Frameworks.qll index 74cc9e9637d..f32675f6f47 100644 --- a/python/ql/lib/semmle/python/Frameworks.qll +++ b/python/ql/lib/semmle/python/Frameworks.qll @@ -29,6 +29,7 @@ private import semmle.python.frameworks.PyMySQL private import semmle.python.frameworks.Rsa private import semmle.python.frameworks.Simplejson private import semmle.python.frameworks.SqlAlchemy +private import semmle.python.frameworks.Starlette private import semmle.python.frameworks.Stdlib private import semmle.python.frameworks.Tornado private import semmle.python.frameworks.Twisted diff --git a/python/ql/lib/semmle/python/frameworks/FastApi.qll b/python/ql/lib/semmle/python/frameworks/FastApi.qll index ed40dee82ac..a6d9e52efad 100644 --- a/python/ql/lib/semmle/python/frameworks/FastApi.qll +++ b/python/ql/lib/semmle/python/frameworks/FastApi.qll @@ -10,6 +10,7 @@ private import semmle.python.dataflow.new.TaintTracking private import semmle.python.Concepts private import semmle.python.ApiGraphs private import semmle.python.frameworks.Pydantic +private import semmle.python.frameworks.Starlette /** * Provides models for the `fastapi` PyPI package. @@ -49,7 +50,7 @@ private module FastApi { exists(string routeAddingMethod | routeAddingMethod = HTTP::httpVerbLower() or - routeAddingMethod = "api_route" + routeAddingMethod in ["api_route", "websocket"] | this = App::instance().getMember(routeAddingMethod).getACall() or @@ -94,6 +95,17 @@ private module FastApi { // --------------------------------------------------------------------------- // Response modeling // --------------------------------------------------------------------------- + /** + * A parameter to a request handler that has a WebSocket type-annotation. + */ + private class WebSocketRequestHandlerParam extends Starlette::WebSocket::InstanceSource, + DataFlow::ParameterNode { + WebSocketRequestHandlerParam() { + this.getParameter().getAnnotation() = Starlette::WebSocket::classRef().getAUse().asExpr() and + any(FastApiRouteSetup rs).getARequestHandler().getArgByName(_) = this.getParameter() + } + } + /** * Provides models for the `fastapi.Response` class and subclasses. * diff --git a/python/ql/lib/semmle/python/frameworks/Starlette.qll b/python/ql/lib/semmle/python/frameworks/Starlette.qll new file mode 100644 index 00000000000..7fc46209760 --- /dev/null +++ b/python/ql/lib/semmle/python/frameworks/Starlette.qll @@ -0,0 +1,162 @@ +/** + * Provides classes modeling security-relevant aspects of the `starlette` PyPI package. + * + * See + * - https://pypi.org/project/starlette/ + * - https://www.starlette.io/ + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.TaintTracking +private import semmle.python.Concepts +private import semmle.python.ApiGraphs +private import semmle.python.frameworks.internal.InstanceTaintStepsHelper +private import semmle.python.frameworks.Stdlib + +/** + * INTERNAL: Do not use. + * + * Provides models for `starlette` PyPI package. + * + * See + * - https://pypi.org/project/starlette/ + * - https://www.starlette.io/ + */ +module Starlette { + /** + * Provides models for the `starlette.websockets.WebSocket` class + * + * See https://www.starlette.io/websockets/. + */ + module WebSocket { + /** Gets a reference to the `starlette.websockets.WebSocket` class. */ + API::Node classRef() { + result = API::moduleImport("starlette").getMember("websockets").getMember("WebSocket") + or + result = API::moduleImport("fastapi").getMember("WebSocket") + } + + /** + * A source of instances of `starlette.websockets.WebSocket`, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `WebSocket::instance()` to get references to instances of `starlette.websockets.WebSocket`. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { } + + /** A direct instantiation of `starlette.websockets.WebSocket`. */ + private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode { + ClassInstantiation() { this = classRef().getACall() } + } + + /** Gets a reference to an instance of `starlette.websockets.WebSocket`. */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } + + /** Gets a reference to an instance of `starlette.websockets.WebSocket`. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + + /** + * Taint propagation for `starlette.websockets.WebSocket`. + */ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "starlette.websockets.WebSocket" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { result in ["url", "headers", "query_params", "cookies"] } + + override string getMethodName() { none() } + + override string getAsyncMethodName() { + result in [ + "receive", "receive_bytes", "receive_text", "receive_json", "iter_bytes", "iter_text", + "iter_json" + ] + } + } + + /** An attribute read on a `starlette.websockets.WebSocket` instance that is a `starlette.requests.URL` instance. */ + private class UrlInstances extends URL::InstanceSource { + UrlInstances() { + this.(DataFlow::AttrRead).getObject() = instance() and + this.(DataFlow::AttrRead).getAttributeName() = "url" + } + } + } + + /** + * Provides models for the `starlette.requests.URL` class + * + * See the URL part of https://www.starlette.io/websockets/. + */ + module URL { + /** Gets a reference to the `starlette.requests.URL` class. */ + private API::Node classRef() { + result = API::moduleImport("starlette").getMember("requests").getMember("URL") + } + + /** + * A source of instances of `starlette.requests.URL`, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `URL::instance()` to get references to instances of `starlette.requests.URL`. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { } + + /** A direct instantiation of `starlette.requests.URL`. */ + private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode { + ClassInstantiation() { this = classRef().getACall() } + } + + /** Gets a reference to an instance of `starlette.requests.URL`. */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } + + /** Gets a reference to an instance of `starlette.requests.URL`. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + + /** + * Taint propagation for `starlette.requests.URL`. + */ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "starlette.requests.URL" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { + result in [ + "components", "netloc", "path", "query", "fragment", "username", "password", "hostname", + "port" + ] + } + + override string getMethodName() { none() } + + override string getAsyncMethodName() { none() } + } + + /** An attribute read on a `starlette.requests.URL` instance that is a `urllib.parse.SplitResult` instance. */ + private class UrlSplitInstances extends Stdlib::SplitResult::InstanceSource { + UrlSplitInstances() { + this.(DataFlow::AttrRead).getObject() = instance() and + this.(DataFlow::AttrRead).getAttributeName() = "components" + } + } + } +} diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 973af899896..abe9afe3a39 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -167,6 +167,74 @@ module Stdlib { override string getAsyncMethodName() { none() } } } + + /** + * Provides models for the `urllib.parse.SplitResult` class + * + * See https://docs.python.org/3.9/library/urllib.parse.html#urllib.parse.SplitResult. + */ + module SplitResult { + /** Gets a reference to the `urllib.parse.SplitResult` class. */ + private API::Node classRef() { + result = API::moduleImport("urllib").getMember("parse").getMember("SplitResult") + } + + /** + * A source of instances of `urllib.parse.SplitResult`, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `SplitResult::instance()` to get references to instances of `urllib.parse.SplitResult`. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { } + + /** A direct instantiation of `urllib.parse.SplitResult`. */ + private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode { + ClassInstantiation() { this = classRef().getACall() } + } + + /** Gets a reference to an instance of `urllib.parse.SplitResult`. */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } + + /** Gets a reference to an instance of `urllib.parse.SplitResult`. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + + /** + * Taint propagation for `urllib.parse.SplitResult`. + */ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "urllib.parse.SplitResult" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { + result in [ + "netloc", "path", "query", "fragment", "username", "password", "hostname", "port" + ] + } + + override string getMethodName() { none() } + + override string getAsyncMethodName() { none() } + } + + /** + * Extra taint propagation for `urllib.parse.SplitResult`, not covered by `InstanceTaintSteps`. + */ + private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + // TODO + none() + } + } + } } /** @@ -1749,6 +1817,30 @@ private module StdlibPrivate { override string getKind() { result = Escaping::getRegexKind() } } + + // --------------------------------------------------------------------------- + // urllib + // --------------------------------------------------------------------------- + /** + * A call to `urllib.parse.urlsplit` + * + * See https://docs.python.org/3.9/library/urllib.parse.html#urllib.parse.urlsplit + */ + class UrllibParseUrlsplitCall extends Stdlib::SplitResult::InstanceSource, DataFlow::CallCfgNode { + UrllibParseUrlsplitCall() { + this = API::moduleImport("urllib").getMember("parse").getMember("urlsplit").getACall() + } + + /** Gets the argument that specifies the URL. */ + DataFlow::Node getUrl() { result in [this.getArg(0), this.getArgByName("url")] } + } + + /** Extra taint-step such that the result of `urllib.parse.urlsplit(tainted_string)` is tainted. */ + private class UrllibParseUrlsplitCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + nodeTo.(UrllibParseUrlsplitCall).getUrl() = nodeFrom + } + } } // --------------------------------------------------------------------------- diff --git a/python/ql/test/library-tests/frameworks/fastapi/taint_test.py b/python/ql/test/library-tests/frameworks/fastapi/taint_test.py index b1a43198277..591d74a887b 100644 --- a/python/ql/test/library-tests/frameworks/fastapi/taint_test.py +++ b/python/ql/test/library-tests/frameworks/fastapi/taint_test.py @@ -104,47 +104,47 @@ from fastapi import WebSocket assert WebSocket == starlette.websockets.WebSocket -@app.websocket("/ws") -async def websocket_test(websocket: WebSocket): +@app.websocket("/ws") # $ routeSetup="/ws" +async def websocket_test(websocket: WebSocket): # $ requestHandler routedParameter=websocket await websocket.accept() ensure_tainted( - websocket, # $ MISSING: tainted + websocket, # $ tainted - websocket.url, # $ MISSING: tainted + websocket.url, # $ tainted - websocket.url.netloc, # $ MISSING: tainted - websocket.url.path, # $ MISSING: tainted - websocket.url.query, # $ MISSING: tainted - websocket.url.fragment, # $ MISSING: tainted - websocket.url.username, # $ MISSING: tainted - websocket.url.password, # $ MISSING: tainted - websocket.url.hostname, # $ MISSING: tainted - websocket.url.port, # $ MISSING: tainted + websocket.url.netloc, # $ tainted + websocket.url.path, # $ tainted + websocket.url.query, # $ tainted + websocket.url.fragment, # $ tainted + websocket.url.username, # $ tainted + websocket.url.password, # $ tainted + websocket.url.hostname, # $ tainted + websocket.url.port, # $ tainted - websocket.url.components, # $ MISSING: tainted - websocket.url.components.netloc, # $ MISSING: tainted - websocket.url.components.path, # $ MISSING: tainted - websocket.url.components.query, # $ MISSING: tainted - websocket.url.components.fragment, # $ MISSING: tainted - websocket.url.components.username, # $ MISSING: tainted - websocket.url.components.password, # $ MISSING: tainted - websocket.url.components.hostname, # $ MISSING: tainted - websocket.url.components.port, # $ MISSING: tainted + websocket.url.components, # $ tainted + websocket.url.components.netloc, # $ tainted + websocket.url.components.path, # $ tainted + websocket.url.components.query, # $ tainted + websocket.url.components.fragment, # $ tainted + websocket.url.components.username, # $ tainted + websocket.url.components.password, # $ tainted + websocket.url.components.hostname, # $ tainted + websocket.url.components.port, # $ tainted - websocket.headers, # $ MISSING: tainted - websocket.headers["key"], # $ MISSING: tainted + websocket.headers, # $ tainted + websocket.headers["key"], # $ tainted - websocket.query_params, # $ MISSING: tainted - websocket.query_params["key"], # $ MISSING: tainted + websocket.query_params, # $ tainted + websocket.query_params["key"], # $ tainted - websocket.cookies, # $ MISSING: tainted - websocket.cookies["key"], # $ MISSING: tainted + websocket.cookies, # $ tainted + websocket.cookies["key"], # $ tainted - await websocket.receive(), # $ MISSING: tainted - await websocket.receive_bytes(), # $ MISSING: tainted - await websocket.receive_text(), # $ MISSING: tainted - await websocket.receive_json(), # $ MISSING: tainted + await websocket.receive(), # $ tainted + await websocket.receive_bytes(), # $ tainted + await websocket.receive_text(), # $ tainted + await websocket.receive_json(), # $ tainted ) # scheme seems very unlikely to give interesting results, but very likely to give FPs. @@ -154,10 +154,10 @@ async def websocket_test(websocket: WebSocket): ) async for data in websocket.iter_bytes(): - ensure_tainted(data) # $ MISSING: tainted + ensure_tainted(data) # $ tainted async for data in websocket.iter_text(): - ensure_tainted(data) # $ MISSING: tainted + ensure_tainted(data) # $ tainted async for data in websocket.iter_json(): - ensure_tainted(data) # $ MISSING: tainted + ensure_tainted(data) # $ tainted