diff --git a/python/.vscode/ql.code-snippets b/python/.vscode/ql.code-snippets index bb36cf701a3..862d43291e3 100644 --- a/python/.vscode/ql.code-snippets +++ b/python/.vscode/ql.code-snippets @@ -199,31 +199,51 @@ " /**", " * Taint propagation for `${TM_SELECTED_TEXT}`.", " */", + " private class InstanceTaintSteps extends InstanceTaintStepsHelper {", + " InstanceTaintSteps() { this = \"${TM_SELECTED_TEXT}\" }", + " ", + " override DataFlow::Node getInstance() { result = instance() }", + " ", + " override string getAttributeName() { none() }", + " ", + " override string getMethodName() { none() }", + " ", + " override string getAsyncMethodName() { none() }", + " }", + "", + " /**", + " * Extra taint propagation for `${TM_SELECTED_TEXT}`, not covered by `InstanceTaintSteps`.", + " */", " private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {", " override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {", - " // normal (non-async) methods", - " nodeFrom = instance() and", - " nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, [\"TODO\"])", - " or", - " // async methods", - " exists(DataFlow::MethodCallNode call, Await await |", - " nodeTo.asExpr() = await and", - " nodeFrom = instance()", - " |", - " await.getValue() = any(DataFlow::Node awaitable | call.flowsTo(awaitable)).asExpr() and", - " call.calls(nodeFrom, [\"TODO\"])", - " )", - " or", - " // Attributes", - " nodeFrom = instance() and", - " nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and", - " nodeTo.(DataFlow::AttrRead).getAttributeName() in [\"TODO\"]", + " // TODO", + " none()", " }", " }", "}", ], "description": "Type tracking class (select full class path before inserting)", }, + "foo": { + "scope": "ql", + "prefix": "foo", + "body": [ + " /**", + " * Taint propagation for `$1`.", + " */", + " private class InstanceTaintSteps extends InstanceTaintStepsHelper {", + " InstanceTaintSteps() { this = \"$1\" }", + "", + " override DataFlow::Node getInstance() { result = instance() }", + "", + " override string getAttributeName() { none() }", + "", + " override string getMethodName() { none() }", + "", + " override string getAsyncMethodName() { none() }", + " }", + ], + }, "API graph .getMember chain": { "scope": "ql", "prefix": "api graph .getMember chain", diff --git a/python/ql/src/semmle/python/frameworks/Aiohttp.qll b/python/ql/src/semmle/python/frameworks/Aiohttp.qll index 5d1b6bd222d..46bcf3e554c 100644 --- a/python/ql/src/semmle/python/frameworks/Aiohttp.qll +++ b/python/ql/src/semmle/python/frameworks/Aiohttp.qll @@ -13,6 +13,7 @@ private import semmle.python.frameworks.internal.PoorMansFunctionResolution private import semmle.python.frameworks.internal.SelfRefMixin private import semmle.python.frameworks.Multidict private import semmle.python.frameworks.Yarl +private import semmle.python.frameworks.internal.InstanceTaintStepsHelper /** * INTERNAL: Do not use. @@ -296,33 +297,25 @@ module AiohttpWebModel { /** * Taint propagation for `aiohttp.web.Request`. - * - * See https://docs.aiohttp.org/en/stable/web_reference.html#request-and-base-request */ - private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // normal (non-async) methods - nodeFrom = Request::instance() and - nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["clone", "get_extra_info"]) - or - // async methods - exists(DataFlow::MethodCallNode call, Await await | - nodeTo.asExpr() = await and - nodeFrom = Request::instance() - | - await.getValue() = any(DataFlow::Node awaitable | call.flowsTo(awaitable)).asExpr() and - call.calls(nodeFrom, ["read", "text", "json", "multipart", "post"]) - ) - or - // Attributes - nodeFrom = Request::instance() and - nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and - nodeTo.(DataFlow::AttrRead).getAttributeName() in [ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "aiohttp.web.Request" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { + result in [ "url", "rel_url", "forwarded", "host", "remote", "path", "path_qs", "raw_path", "query", "headers", "transport", "cookies", "content", "_payload", "content_type", "charset", "http_range", "if_modified_since", "if_unmodified_since", "if_range", "match_info" ] } + + override string getMethodName() { result in ["clone", "get_extra_info"] } + + override string getAsyncMethodName() { + result in ["read", "text", "json", "multipart", "post"] + } } /** An attribute read on an `aiohttp.web.Request` that is a `MultiDictProxy` instance. */ @@ -424,24 +417,20 @@ module AiohttpWebModel { /** * Taint propagation for `aiohttp.StreamReader`. */ - private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // normal (non-async) methods - nodeFrom = instance() and - nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["read_nowait"]) - or - // async methods - exists(DataFlow::MethodCallNode call, Await await | - nodeTo.asExpr() = await and - nodeFrom = instance() - | - await.getValue() = any(DataFlow::Node awaitable | call.flowsTo(awaitable)).asExpr() and - call.calls(nodeFrom, - [ - "read", "readany", "readexactly", "readline", "readchunk", "iter_chunked", "iter_any", - "iter_chunks" - ]) - ) + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "aiohttp.StreamReader" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { none() } + + override string getMethodName() { result in ["read_nowait"] } + + override string getAsyncMethodName() { + result in [ + "read", "readany", "readexactly", "readline", "readchunk", "iter_chunked", "iter_any", + "iter_chunks" + ] } } } diff --git a/python/ql/src/semmle/python/frameworks/Django.qll b/python/ql/src/semmle/python/frameworks/Django.qll index 14008d66c6a..cb2c57101c7 100644 --- a/python/ql/src/semmle/python/frameworks/Django.qll +++ b/python/ql/src/semmle/python/frameworks/Django.qll @@ -14,6 +14,7 @@ private import semmle.python.frameworks.Stdlib private import semmle.python.regex private import semmle.python.frameworks.internal.PoorMansFunctionResolution private import semmle.python.frameworks.internal.SelfRefMixin +private import semmle.python.frameworks.internal.InstanceTaintStepsHelper /** * Provides models for the `django` PyPI package. @@ -340,6 +341,23 @@ private module Django { /** * Taint propagation for `django.utils.datastructures.MultiValueDict`. */ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "django.utils.datastructures.MultiValueDict" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { none() } + + override string getMethodName() { + result in ["getlist", "lists", "popitem", "dict", "urlencode"] + } + + override string getAsyncMethodName() { none() } + } + + /** + * Extra taint propagation for `django.utils.datastructures.MultiValueDict`, not covered by `InstanceTaintSteps`. + */ private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { // class instantiation @@ -347,12 +365,6 @@ private module Django { nodeFrom = call.getArg(0) and nodeTo = call ) - or - // normal (non-async) methods - nodeFrom = instance() and - nodeTo - .(DataFlow::MethodCallNode) - .calls(nodeFrom, ["getlist", "lists", "popitem", "dict", "urlencode"]) } } } @@ -388,15 +400,20 @@ private module Django { /** * Taint propagation for `django.core.files.uploadedfile.UploadedFile`. */ - private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // Attributes - nodeFrom = instance() and - nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and - nodeTo.(DataFlow::AttrRead).getAttributeName() in [ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "django.core.files.uploadedfile.UploadedFile" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { + result in [ "content_type", "content_type_extra", "content_type_extra", "charset", "name", "file" ] } + + override string getMethodName() { none() } + + override string getAsyncMethodName() { none() } } /** A file-like object instance that originates from a `UploadedFile`. */ @@ -436,13 +453,16 @@ private module Django { /** * Taint propagation for `django.urls.ResolverMatch`. */ - private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // Attributes - nodeFrom = instance() and - nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and - nodeTo.(DataFlow::AttrRead).getAttributeName() in ["args", "kwargs"] - } + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "django.urls.ResolverMatch" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { result in ["args", "kwargs"] } + + override string getMethodName() { none() } + + override string getAsyncMethodName() { none() } } } } @@ -747,15 +767,43 @@ private module PrivateDjango { /** * Taint propagation for `django.http.request.HttpRequest`. */ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "django.http.request.HttpRequest" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { + result in [ + // str / bytes + "body", "path", "path_info", "method", "encoding", "content_type", + // django.http.QueryDict + "GET", "POST", + // dict[str, str] + "content_params", "COOKIES", + // dict[str, Any] + "META", + // HttpHeaders (case insensitive dict-like) + "headers", + // MultiValueDict[str, UploadedFile] + "FILES", + // django.urls.ResolverMatch + "resolver_match" + ] + // TODO: Handle that a HttpRequest is iterable + } + + override string getMethodName() { + result in ["get_full_path", "get_full_path_info", "read", "readline", "readlines"] + } + + override string getAsyncMethodName() { none() } + } + + /** + * Extra taint propagation for `django.http.request.HttpRequest`, not covered by `InstanceTaintSteps`. + */ private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // normal (non-async) methods - nodeFrom = django::http::request::HttpRequest::instance() and - nodeTo - .(DataFlow::MethodCallNode) - .calls(nodeFrom, - ["get_full_path", "get_full_path_info", "read", "readline", "readlines"]) - or // special handling of the `build_absolute_uri` method, see // https://docs.djangoproject.com/en/3.0/ref/request-response/#django.http.HttpRequest.build_absolute_uri exists(DataFlow::AttrRead attr, DataFlow::CallCfgNode call, DataFlow::Node instance | @@ -775,27 +823,6 @@ private module PrivateDjango { nodeFrom = call.getArgByName("location") ) ) - or - // Attributes - nodeFrom = django::http::request::HttpRequest::instance() and - nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and - nodeTo.(DataFlow::AttrRead).getAttributeName() in [ - // str / bytes - "body", "path", "path_info", "method", "encoding", "content_type", - // django.http.QueryDict - "GET", "POST", - // dict[str, str] - "content_params", "COOKIES", - // dict[str, Any] - "META", - // HttpHeaders (case insensitive dict-like) - "headers", - // MultiValueDict[str, UploadedFile] - "FILES", - // django.urls.ResolverMatch - "resolver_match" - ] - // TODO: Handle that a HttpRequest is iterable } } diff --git a/python/ql/src/semmle/python/frameworks/Flask.qll b/python/ql/src/semmle/python/frameworks/Flask.qll index ce90d162001..e854e07658b 100644 --- a/python/ql/src/semmle/python/frameworks/Flask.qll +++ b/python/ql/src/semmle/python/frameworks/Flask.qll @@ -10,6 +10,7 @@ private import semmle.python.dataflow.new.TaintTracking private import semmle.python.Concepts private import semmle.python.frameworks.Werkzeug private import semmle.python.ApiGraphs +private import semmle.python.frameworks.internal.InstanceTaintStepsHelper /** * Provides models for the `flask` PyPI package. @@ -341,56 +342,56 @@ module Flask { } /** - * Taint propagation for a flask request. + * Taint propagation for `flask.Request`. * * See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request */ - private class FlaskRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // normal (non-async) methods - nodeFrom = request().getAUse() and - nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["get_data", "get_json"]) - or - // Attributes - nodeFrom = request().getAUse() and - exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom | - read.getAttributeName() in [ - // str - "path", "full_path", "base_url", "url", "access_control_request_method", - "content_encoding", "content_md5", "content_type", "data", "method", "mimetype", - "origin", "query_string", "referrer", "remote_addr", "remote_user", "user_agent", - // dict - "environ", "cookies", "mimetype_params", "view_args", - // json - "json", - // List[str] - "access_route", - // file-like - "stream", "input_stream", - // MultiDict[str, str] - // https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict - "args", "values", "form", - // MultiDict[str, FileStorage] - // https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage - // TODO: FileStorage needs extra taint steps - "files", - // https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.HeaderSet - "access_control_request_headers", "pragma", - // https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Accept - // TODO: Kinda badly modeled for now -- has type List[Tuple[value, quality]], and some extra methods - "accept_charsets", "accept_encodings", "accept_languages", "accept_mimetypes", - // https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Authorization - // TODO: dict subclass with extra attributes like `username` and `password` - "authorization", - // https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.RequestCacheControl - // TODO: has attributes like `no_cache`, and `to_header` method (actually, many of these models do) - "cache_control", - // https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers - // TODO: dict-like with wsgiref.headers.Header compatibility methods - "headers" - ] - ) + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "flask.Request" } + + override DataFlow::Node getInstance() { result = request().getAUse() } + + override string getAttributeName() { + result in [ + // str + "path", "full_path", "base_url", "url", "access_control_request_method", + "content_encoding", "content_md5", "content_type", "data", "method", "mimetype", "origin", + "query_string", "referrer", "remote_addr", "remote_user", "user_agent", + // dict + "environ", "cookies", "mimetype_params", "view_args", + // json + "json", + // List[str] + "access_route", + // file-like + "stream", "input_stream", + // MultiDict[str, str] + // https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict + "args", "values", "form", + // MultiDict[str, FileStorage] + // https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage + // TODO: FileStorage needs extra taint steps + "files", + // https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.HeaderSet + "access_control_request_headers", "pragma", + // https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Accept + // TODO: Kinda badly modeled for now -- has type List[Tuple[value, quality]], and some extra methods + "accept_charsets", "accept_encodings", "accept_languages", "accept_mimetypes", + // https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Authorization + // TODO: dict subclass with extra attributes like `username` and `password` + "authorization", + // https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.RequestCacheControl + // TODO: has attributes like `no_cache`, and `to_header` method (actually, many of these models do) + "cache_control", + // https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers + // TODO: dict-like with wsgiref.headers.Header compatibility methods + "headers" + ] } + + override string getMethodName() { result in ["get_data", "get_json"] } + + override string getAsyncMethodName() { none() } } private class RequestAttrMultiDict extends Werkzeug::MultiDict::InstanceSource { diff --git a/python/ql/src/semmle/python/frameworks/MarkupSafe.qll b/python/ql/src/semmle/python/frameworks/MarkupSafe.qll index 590c3238b9b..5679a23910f 100644 --- a/python/ql/src/semmle/python/frameworks/MarkupSafe.qll +++ b/python/ql/src/semmle/python/frameworks/MarkupSafe.qll @@ -8,6 +8,7 @@ private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.TaintTracking private import semmle.python.Concepts private import semmle.python.ApiGraphs +private import semmle.python.frameworks.internal.InstanceTaintStepsHelper /** * Provides models for the `MarkupSafe` PyPI package. diff --git a/python/ql/src/semmle/python/frameworks/Multidict.qll b/python/ql/src/semmle/python/frameworks/Multidict.qll index 2864bfcd562..06700e28347 100644 --- a/python/ql/src/semmle/python/frameworks/Multidict.qll +++ b/python/ql/src/semmle/python/frameworks/Multidict.qll @@ -8,6 +8,7 @@ private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.TaintTracking private import semmle.python.Concepts private import semmle.python.ApiGraphs +private import semmle.python.frameworks.internal.InstanceTaintStepsHelper /** * INTERNAL: Do not use. @@ -60,8 +61,21 @@ module Multidict { /** * Taint propagation for `multidict.MultiDictProxy`. - * - * See https://multidict.readthedocs.io/en/stable/multidict.html#multidictproxy + */ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "multidict.MultiDictProxy" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { none() } + + override string getMethodName() { result in ["getone", "getall"] } + + override string getAsyncMethodName() { none() } + } + + /** + * Extra taint propagation for `multidict.MultiDictProxy`, not covered by `InstanceTaintSteps`. */ private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { @@ -70,10 +84,6 @@ module Multidict { nodeFrom = call.getArg(0) and nodeTo = call ) - or - // normal (non-async) methods - nodeFrom = instance() and - nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["getone", "getall"]) } } } diff --git a/python/ql/src/semmle/python/frameworks/Stdlib.qll b/python/ql/src/semmle/python/frameworks/Stdlib.qll index 637eed70089..dc7060d3da7 100644 --- a/python/ql/src/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/src/semmle/python/frameworks/Stdlib.qll @@ -10,6 +10,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.Concepts private import semmle.python.ApiGraphs private import semmle.python.frameworks.PEP249 +private import semmle.python.frameworks.internal.InstanceTaintStepsHelper /** Provides models for the Python standard library. */ module Stdlib { @@ -47,12 +48,23 @@ module Stdlib { /** * Taint propagation for file-like objects. */ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { none() } + + override string getMethodName() { result in ["read", "readline", "readlines"] } + + override string getAsyncMethodName() { none() } + } + + /** + * Extra taint propagation for file-like objects, not covered by `InstanceTaintSteps`.", + */ private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // result of method call is tainted - nodeFrom = instance() and - nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["read", "readline", "readlines"]) - or // taint-propagation back to instance from `foo.write(tainted_data)` exists(DataFlow::AttrRead write, DataFlow::CallCfgNode call, DataFlow::Node instance_ | instance_ = instance() and @@ -99,14 +111,16 @@ module Stdlib { /** * Taint propagation for `http.client.HTTPMessage`. */ - private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // normal (non-async) methods - nodeFrom = instance() and - nodeTo - .(DataFlow::MethodCallNode) - .calls(nodeFrom, ["get_all", "as_bytes", "as_string", "keys"]) - } + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "http.client.HTTPMessage" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { none() } + + override string getMethodName() { result in ["get_all", "as_bytes", "as_string", "keys"] } + + override string getAsyncMethodName() { none() } } } @@ -141,17 +155,16 @@ module Stdlib { /** * Taint propagation for `http.cookies.Morsel`. */ - private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // normal (non-async) methods - nodeFrom = instance() and - nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["output", "js_output"]) - or - // Attributes - nodeFrom = instance() and - nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and - nodeTo.(DataFlow::AttrRead).getAttributeName() in ["key", "value", "coded_value"] - } + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "http.cookies.Morsel" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { result in ["key", "value", "coded_value"] } + + override string getMethodName() { result in ["output", "js_output"] } + + override string getAsyncMethodName() { none() } } } } diff --git a/python/ql/src/semmle/python/frameworks/Tornado.qll b/python/ql/src/semmle/python/frameworks/Tornado.qll index 1c09cd89250..ba4898facc8 100644 --- a/python/ql/src/semmle/python/frameworks/Tornado.qll +++ b/python/ql/src/semmle/python/frameworks/Tornado.qll @@ -11,6 +11,7 @@ private import semmle.python.Concepts private import semmle.python.ApiGraphs private import semmle.python.regex private import semmle.python.frameworks.Stdlib +private import semmle.python.frameworks.internal.InstanceTaintStepsHelper /** * Provides models for the `tornado` PyPI package. @@ -48,12 +49,16 @@ private module Tornado { /** * Taint propagation for `tornado.httputil.HTTPHeaders`. */ - private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // normal (non-async) methods - nodeFrom = instance() and - nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["get_list", "get_all"]) - } + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "tornado.httputil.HTTPHeaders" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { none() } + + override string getMethodName() { result in ["get_list", "get_all"] } + + override string getAsyncMethodName() { none() } } } @@ -162,31 +167,33 @@ private module Tornado { /** Gets a reference to the `write` method. */ DataFlow::Node writeMethod() { writeMethod(DataFlow::TypeTracker::end()).flowsTo(result) } - private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // normal (non-async) methods - nodeFrom = instance() and - nodeTo - .(DataFlow::MethodCallNode) - .calls(nodeFrom, - [ - "get_argument", "get_body_argument", "get_query_argument", "get_arguments", - "get_body_arguments", "get_query_arguments" - ]) - or - // Attributes - nodeFrom = instance() and - exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom | - read.getAttributeName() in [ - // List[str] - "path_args", - // Dict[str, str] - "path_kwargs", - // tornado.httputil.HTTPServerRequest - "request" - ] - ) + /** + * Taint propagation for `tornado.web.RequestHandler`. + */ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "tornado.web.RequestHandler" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { + result in [ + // List[str] + "path_args", + // Dict[str, str] + "path_kwargs", + // tornado.httputil.HTTPServerRequest + "request" + ] } + + override string getMethodName() { + result in [ + "get_argument", "get_body_argument", "get_query_argument", "get_arguments", + "get_body_arguments", "get_query_arguments" + ] + } + + override string getAsyncMethodName() { none() } } private class RequestAttrAccess extends tornado::httputil::HttpServerRequest::InstanceSource { @@ -290,27 +297,30 @@ private module Tornado { /** Gets a reference to an instance of `tornado.httputil.HttpServerRequest`. */ DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } - private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // normal (non-async) methods - nodeFrom = instance() and - nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["full_url"]) - or - // Attributes - nodeFrom = instance() and - exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom | - read.getAttributeName() in [ - // str / bytes - "uri", "path", "query", "remote_ip", "body", - // Dict[str, List[bytes]] - "arguments", "query_arguments", "body_arguments", - // dict-like, https://www.tornadoweb.org/en/stable/httputil.html#tornado.httputil.HTTPHeaders - "headers", - // Dict[str, http.cookies.Morsel] - "cookies" - ] - ) + /** + * Taint propagation for `tornado.httputil.HttpServerRequest`. + */ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "tornado.httputil.HttpServerRequest" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { + result in [ + // str / bytes + "uri", "path", "query", "remote_ip", "body", + // Dict[str, List[bytes]] + "arguments", "query_arguments", "body_arguments", + // dict-like, https://www.tornadoweb.org/en/stable/httputil.html#tornado.httputil.HTTPHeaders + "headers", + // Dict[str, http.cookies.Morsel] + "cookies" + ] } + + override string getMethodName() { result in ["full_url"] } + + override string getAsyncMethodName() { none() } } /** An `HTTPHeaders` instance that originates from a Tornado request. */ diff --git a/python/ql/src/semmle/python/frameworks/Twisted.qll b/python/ql/src/semmle/python/frameworks/Twisted.qll index c17a6c5b2f6..2ac9bb5eb40 100644 --- a/python/ql/src/semmle/python/frameworks/Twisted.qll +++ b/python/ql/src/semmle/python/frameworks/Twisted.qll @@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking private import semmle.python.Concepts private import semmle.python.ApiGraphs +private import semmle.python.frameworks.internal.InstanceTaintStepsHelper /** * Provides models for the `twisted` PyPI package. @@ -114,26 +115,26 @@ private module Twisted { /** * Taint propagation for `twisted.web.server.Request`. */ - private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // normal (non-async) methods - nodeFrom = instance() and - nodeTo - .(DataFlow::MethodCallNode) - .calls(nodeFrom, - [ - "getCookie", "getHeader", "getAllHeaders", "getUser", "getPassword", "getHost", - "getRequestHostname" - ]) - or - // Attributes - nodeFrom = instance() and - nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and - nodeTo.(DataFlow::AttrRead).getAttributeName() in [ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "twisted.web.server.Request" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { + result in [ "uri", "path", "prepath", "postpath", "content", "args", "received_cookies", "requestHeaders", "user", "password", "host" ] } + + override string getMethodName() { + result in [ + "getCookie", "getHeader", "getAllHeaders", "getUser", "getPassword", "getHost", + "getRequestHostname" + ] + } + + override string getAsyncMethodName() { none() } } } diff --git a/python/ql/src/semmle/python/frameworks/Werkzeug.qll b/python/ql/src/semmle/python/frameworks/Werkzeug.qll index 5ec27d3cf76..039481f8522 100644 --- a/python/ql/src/semmle/python/frameworks/Werkzeug.qll +++ b/python/ql/src/semmle/python/frameworks/Werkzeug.qll @@ -11,6 +11,7 @@ private import semmle.python.dataflow.new.TaintTracking private import semmle.python.ApiGraphs private import semmle.python.frameworks.Stdlib private import semmle.python.Concepts +private import semmle.python.frameworks.internal.InstanceTaintStepsHelper /** * Provides models for the `Werkzeug` PyPI package. @@ -47,11 +48,19 @@ module Werkzeug { /** Gets a reference to an instance of `werkzeug.datastructures.MultiDict`. */ DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } - private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - nodeFrom = instance() and - nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, "getlist") - } + /** + * Taint propagation for `werkzeug.datastructures.MultiDict`. + */ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "werkzeug.datastructures.MultiDict" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { none() } + + override string getMethodName() { result in ["getlist"] } + + override string getAsyncMethodName() { none() } } } @@ -87,23 +96,30 @@ module Werkzeug { /** Gets a reference to an instance of `werkzeug.datastructures.FileStorage`. */ DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } - private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - nodeFrom = instance() and - exists(DataFlow::AttrRead read | nodeTo = read | - read.getAttributeName() in [ - // str - "filename", "name", "content_type", "mimetype", - // file-like - "stream", - // TODO: werkzeug.datastructures.Headers - "headers", - // dict[str, str] - "mimetype_params" - ] and - read.getObject() = nodeFrom - ) + /** + * Taint propagation for `werkzeug.datastructures.FileStorage`. + */ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "werkzeug.datastructures.FileStorage" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { + result in [ + // str + "filename", "name", "content_type", "mimetype", + // file-like + "stream", + // TODO: werkzeug.datastructures.Headers + "headers", + // dict[str, str] + "mimetype_params" + ] } + + override string getMethodName() { none() } + + override string getAsyncMethodName() { none() } } /** A file-like object instance that originates from a `FileStorage`. */ @@ -152,14 +168,18 @@ module Werkzeug { /** * Taint propagation for `werkzeug.datastructures.Headers`. */ - private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // normal (non-async) methods - nodeFrom = instance() and - nodeTo - .(DataFlow::MethodCallNode) - .calls(nodeFrom, ["getlist", "get_all", "popitem", "to_wsgi_list"]) + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "werkzeug.datastructures.Headers" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { none() } + + override string getMethodName() { + result in ["getlist", "get_all", "popitem", "to_wsgi_list"] } + + override string getAsyncMethodName() { none() } } } @@ -194,16 +214,21 @@ module Werkzeug { /** * Taint propagation for `werkzeug.datastructures.Authorization`. */ - private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // Attributes - nodeFrom = instance() and - nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and - nodeTo.(DataFlow::AttrRead).getAttributeName() in [ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "werkzeug.datastructures.Authorization" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { + result in [ "username", "password", "realm", "nonce", "uri", "nc", "cnonce", "response", "opaque", "qop" ] } + + override string getMethodName() { none() } + + override string getAsyncMethodName() { none() } } } diff --git a/python/ql/src/semmle/python/frameworks/Yarl.qll b/python/ql/src/semmle/python/frameworks/Yarl.qll index 9ff62bb7524..00b0911471b 100644 --- a/python/ql/src/semmle/python/frameworks/Yarl.qll +++ b/python/ql/src/semmle/python/frameworks/Yarl.qll @@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.TaintTracking private import semmle.python.Concepts private import semmle.python.ApiGraphs private import semmle.python.frameworks.Multidict +private import semmle.python.frameworks.internal.InstanceTaintStepsHelper /** * INTERNAL: Do not use. @@ -52,8 +53,28 @@ module Yarl { /** * Taint propagation for `yarl.URL`. - * - * See https://yarl.readthedocs.io/en/stable/api.html#yarl.URL + */ + private class InstanceTaintSteps extends InstanceTaintStepsHelper { + InstanceTaintSteps() { this = "yarl.URL" } + + override DataFlow::Node getInstance() { result = instance() } + + override string getAttributeName() { + result in [ + "user", "raw_user", "password", "raw_password", "host", "raw_host", "port", + "explicit_port", "authority", "raw_authority", "path", "raw_path", "path_qs", + "raw_path_qs", "query_string", "raw_query_string", "fragment", "raw_fragment", "parts", + "raw_parts", "name", "raw_name", "query" + ] + } + + override string getMethodName() { result in ["human_repr"] } + + override string getAsyncMethodName() { none() } + } + + /** + * Extra taint propagation for `yarl.URL`, not covered by `InstanceTaintSteps`. */ private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { @@ -63,10 +84,6 @@ module Yarl { nodeTo = call ) or - // normal (non-async) methods - nodeFrom = instance() and - nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["human_repr"]) - or // methods that give an altered URL. taint both from object, and form argument // (to result of call) exists(DataFlow::MethodCallNode call | @@ -81,16 +98,6 @@ module Yarl { nodeTo = call and nodeFrom in [call.getObject(), call.getArg(_), call.getArgByName(_)] ) - or - // Attributes - nodeFrom = instance() and - nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and - nodeTo.(DataFlow::AttrRead).getAttributeName() in [ - "user", "raw_user", "password", "raw_password", "host", "raw_host", "port", - "explicit_port", "authority", "raw_authority", "path", "raw_path", "path_qs", - "raw_path_qs", "query_string", "raw_query_string", "fragment", "raw_fragment", "parts", - "raw_parts", "name", "raw_name", "query" - ] } } diff --git a/python/ql/src/semmle/python/frameworks/internal/InstanceTaintStepsHelper.qll b/python/ql/src/semmle/python/frameworks/internal/InstanceTaintStepsHelper.qll new file mode 100644 index 00000000000..d87de463bfb --- /dev/null +++ b/python/ql/src/semmle/python/frameworks/internal/InstanceTaintStepsHelper.qll @@ -0,0 +1,50 @@ +/** + * INTERNAL: Do no use. + * + * Provides helper class for defining additional taint step. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.TaintTracking + +/** + * A helper class for defining additional taint steps. + */ +bindingset[this] +abstract class InstanceTaintStepsHelper extends string { + /** Gets an instance that the additional taint steps should be applied to. */ + abstract DataFlow::Node getInstance(); + + /** Gets the name of an attribute that should be tainted. */ + abstract string getAttributeName(); + + /** Gets the name of a method, whose results should be tainted. */ + abstract string getMethodName(); + + /** Gets the name of an async method, whose results should be tainted. */ + abstract string getAsyncMethodName(); +} + +private class InstanceAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + exists(InstanceTaintStepsHelper helper | + // normal (non-async) methods + nodeFrom = helper.getInstance() and + nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, helper.getMethodName()) + or + // async methods + exists(DataFlow::MethodCallNode call, Await await | + nodeTo.asExpr() = await and + nodeFrom = helper.getInstance() + | + await.getValue() = any(DataFlow::Node awaitable | call.flowsTo(awaitable)).asExpr() and + call.calls(nodeFrom, helper.getAsyncMethodName()) + ) + or + // Attributes + nodeFrom = helper.getInstance() and + nodeTo.(DataFlow::AttrRead).accesses(nodeFrom, helper.getAttributeName()) + ) + } +}