From be1cad864bca209285212e51fb847af42b7e126b Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 21 Jul 2021 13:25:53 +0200 Subject: [PATCH] Python: Resolve all `meth = obj.meth; meth()` TODOs It would probably have been easier to do this as the _first_ thing... but that's too late now :sweat: --- python/.vscode/ql.code-snippets | 27 +++----- .../src/semmle/python/frameworks/Aiohttp.qll | 63 ++++++++----------- .../src/semmle/python/frameworks/Django.qll | 29 +++------ .../semmle/python/frameworks/Multidict.qll | 12 +--- .../src/semmle/python/frameworks/Stdlib.qll | 26 ++------ .../src/semmle/python/frameworks/Tornado.qll | 12 +--- .../src/semmle/python/frameworks/Twisted.qll | 47 ++++---------- .../src/semmle/python/frameworks/Werkzeug.qll | 14 ++--- .../ql/src/semmle/python/frameworks/Yarl.qll | 41 ++++-------- 9 files changed, 81 insertions(+), 190 deletions(-) diff --git a/python/.vscode/ql.code-snippets b/python/.vscode/ql.code-snippets index cebd4c1533a..bb36cf701a3 100644 --- a/python/.vscode/ql.code-snippets +++ b/python/.vscode/ql.code-snippets @@ -201,24 +201,17 @@ " */", " private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {", " override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {", - " // Methods", - " //", - " // TODO: When we have tools that make it easy, model these properly to handle", - " // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach", - " // (since it allows us to at least capture the most common cases).", + " // normal (non-async) methods", " nodeFrom = instance() and", - " exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom |", - " // normal (non-async) methods", - " attr.getAttributeName() in [\"TODO\"] and", - " nodeTo.(DataFlow::CallCfgNode).getFunction() = attr", - " or", - " // async methods", - " exists(Await await, DataFlow::CallCfgNode call |", - " attr.getAttributeName() in [\"TODO\"] and", - " call.getFunction() = attr and", - " await.getValue() = call.asExpr() and", - " nodeTo.asExpr() = await", - " )", + " nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, [\"TODO\"])", + " or", + " // async methods", + " exists(DataFlow::MethodCallNode call, Await await |", + " nodeTo.asExpr() = await and", + " nodeFrom = instance()", + " |", + " await.getValue() = any(DataFlow::Node awaitable | call.flowsTo(awaitable)).asExpr() and", + " call.calls(nodeFrom, [\"TODO\"])", " )", " or", " // Attributes", diff --git a/python/ql/src/semmle/python/frameworks/Aiohttp.qll b/python/ql/src/semmle/python/frameworks/Aiohttp.qll index eaf4043a64e..383c72ff952 100644 --- a/python/ql/src/semmle/python/frameworks/Aiohttp.qll +++ b/python/ql/src/semmle/python/frameworks/Aiohttp.qll @@ -359,27 +359,21 @@ module AiohttpWebModel { */ private class AiohttpStreamReaderAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // Methods - // - // TODO: When we have tools that make it easy, model these properly to handle - // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach - // (since it allows us to at least capture the most common cases). - nodeFrom = StreamReader::instance() and - exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | - // normal methods - attr.getAttributeName() in ["read_nowait"] and - nodeTo.(DataFlow::CallCfgNode).getFunction() = attr - or - // async methods - exists(Await await, DataFlow::CallCfgNode call | - attr.getAttributeName() in [ - "read", "readany", "readexactly", "readline", "readchunk", "iter_chunked", - "iter_any", "iter_chunks" - ] and - call.getFunction() = attr and - await.getValue() = call.asExpr() and - nodeTo.asExpr() = await - ) + // normal (non-async) methods + nodeFrom = instance() and + nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["read_nowait"]) + or + // async methods + exists(DataFlow::MethodCallNode call, Await await | + nodeTo.asExpr() = await and + nodeFrom = instance() + | + await.getValue() = any(DataFlow::Node awaitable | call.flowsTo(awaitable)).asExpr() and + call.calls(nodeFrom, + [ + "read", "readany", "readexactly", "readline", "readchunk", "iter_chunked", "iter_any", + "iter_chunks" + ]) ) } } @@ -438,24 +432,17 @@ module AiohttpWebModel { */ private class AiohttpRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // Methods - // - // TODO: When we have tools that make it easy, model these properly to handle - // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach - // (since it allows us to at least capture the most common cases). + // normal (non-async) methods nodeFrom = Request::instance() and - exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | - // normal methods - attr.getAttributeName() in ["clone", "get_extra_info"] and - nodeTo.(DataFlow::CallCfgNode).getFunction() = attr - or - // async methods - exists(Await await, DataFlow::CallCfgNode call | - attr.getAttributeName() in ["read", "text", "json", "multipart", "post"] and - call.getFunction() = attr and - await.getValue() = call.asExpr() and - nodeTo.asExpr() = await - ) + nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["clone", "get_extra_info"]) + or + // async methods + exists(DataFlow::MethodCallNode call, Await await | + nodeTo.asExpr() = await and + nodeFrom = Request::instance() + | + await.getValue() = any(DataFlow::Node awaitable | call.flowsTo(awaitable)).asExpr() and + call.calls(nodeFrom, ["read", "text", "json", "multipart", "post"]) ) or // Attributes diff --git a/python/ql/src/semmle/python/frameworks/Django.qll b/python/ql/src/semmle/python/frameworks/Django.qll index 61a307358cc..503eec2b8a3 100644 --- a/python/ql/src/semmle/python/frameworks/Django.qll +++ b/python/ql/src/semmle/python/frameworks/Django.qll @@ -348,17 +348,11 @@ private module Django { nodeTo = call ) or - // Methods - // - // TODO: When we have tools that make it easy, model these properly to handle - // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach - // (since it allows us to at least capture the most common cases). + // normal (non-async) methods nodeFrom = instance() and - exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | - // methods (non-async) - attr.getAttributeName() in ["getlist", "lists", "popitem", "dict", "urlencode"] and - nodeTo.(DataFlow::CallCfgNode).getFunction() = attr - ) + nodeTo + .(DataFlow::MethodCallNode) + .calls(nodeFrom, ["getlist", "lists", "popitem", "dict", "urlencode"]) } } } @@ -2044,18 +2038,11 @@ private module PrivateDjango { private class DjangoHttpRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // Methods - // - // TODO: When we have tools that make it easy, model these properly to handle - // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach - // (since it allows us to at least capture the most common cases). + // normal (non-async) methods nodeFrom = django::http::request::HttpRequest::instance() and - exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | - attr.getAttributeName() in [ - "get_full_path", "get_full_path_info", "read", "readline", "readlines" - ] and - nodeTo.(DataFlow::CallCfgNode).getFunction() = attr - ) + nodeTo + .(DataFlow::MethodCallNode) + .calls(nodeFrom, ["get_full_path", "get_full_path_info", "read", "readline", "readlines"]) or // special handling of the `build_absolute_uri` method, see // https://docs.djangoproject.com/en/3.0/ref/request-response/#django.http.HttpRequest.build_absolute_uri diff --git a/python/ql/src/semmle/python/frameworks/Multidict.qll b/python/ql/src/semmle/python/frameworks/Multidict.qll index 038739176e0..2dae5d3dd68 100644 --- a/python/ql/src/semmle/python/frameworks/Multidict.qll +++ b/python/ql/src/semmle/python/frameworks/Multidict.qll @@ -71,17 +71,9 @@ module Multidict { nodeTo = call ) or - // Methods - // - // TODO: When we have tools that make it easy, model these properly to handle - // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach - // (since it allows us to at least capture the most common cases). + // normal (non-async) methods nodeFrom = instance() and - exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | - // methods (non-async) - attr.getAttributeName() in ["getone", "getall"] and - nodeTo.(DataFlow::CallCfgNode).getFunction() = attr - ) + nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["getone", "getall"]) } } } diff --git a/python/ql/src/semmle/python/frameworks/Stdlib.qll b/python/ql/src/semmle/python/frameworks/Stdlib.qll index 895efed8508..8e12f1a8d9a 100644 --- a/python/ql/src/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/src/semmle/python/frameworks/Stdlib.qll @@ -101,17 +101,11 @@ module Stdlib { */ private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // Methods - // - // TODO: When we have tools that make it easy, model these properly to handle - // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach - // (since it allows us to at least capture the most common cases). + // normal (non-async) methods nodeFrom = instance() and - exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | - // normal (non-async) methods - attr.getAttributeName() in ["get_all", "as_bytes", "as_string", "keys"] and - nodeTo.(DataFlow::CallCfgNode).getFunction() = attr - ) + nodeTo + .(DataFlow::MethodCallNode) + .calls(nodeFrom, ["get_all", "as_bytes", "as_string", "keys"]) } } } @@ -149,17 +143,9 @@ module Stdlib { */ private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // Methods - // - // TODO: When we have tools that make it easy, model these properly to handle - // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach - // (since it allows us to at least capture the most common cases). + // normal (non-async) methods nodeFrom = instance() and - exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | - // normal (non-async) methods - attr.getAttributeName() in ["output", "js_output"] and - nodeTo.(DataFlow::CallCfgNode).getFunction() = attr - ) + nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["output", "js_output"]) or // Attributes nodeFrom = instance() and diff --git a/python/ql/src/semmle/python/frameworks/Tornado.qll b/python/ql/src/semmle/python/frameworks/Tornado.qll index 6def64c5264..4f17109ff15 100644 --- a/python/ql/src/semmle/python/frameworks/Tornado.qll +++ b/python/ql/src/semmle/python/frameworks/Tornado.qll @@ -50,17 +50,9 @@ private module Tornado { */ private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // Methods - // - // TODO: When we have tools that make it easy, model these properly to handle - // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach - // (since it allows us to at least capture the most common cases). + // normal (non-async) methods nodeFrom = instance() and - exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | - // normal (non-async) methods - attr.getAttributeName() in ["get_list", "get_all"] and - nodeTo.(DataFlow::CallCfgNode).getFunction() = attr - ) + nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["get_list", "get_all"]) } } } diff --git a/python/ql/src/semmle/python/frameworks/Twisted.qll b/python/ql/src/semmle/python/frameworks/Twisted.qll index 0b85a66e6f9..d6aa7f2594e 100644 --- a/python/ql/src/semmle/python/frameworks/Twisted.qll +++ b/python/ql/src/semmle/python/frameworks/Twisted.qll @@ -130,20 +130,15 @@ private module Twisted { */ private class TwistedRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // Methods - // - // TODO: When we have tools that make it easy, model these properly to handle - // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach - // (since it allows us to at least capture the most common cases). + // normal (non-async) methods nodeFrom = Request::instance() and - exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | - // normal (non-async) methods - attr.getAttributeName() in [ - "getCookie", "getHeader", "getAllHeaders", "getUser", "getPassword", "getHost", - "getRequestHostname" - ] and - nodeTo.(DataFlow::CallCfgNode).getFunction() = attr - ) + nodeTo + .(DataFlow::MethodCallNode) + .calls(nodeFrom, + [ + "getCookie", "getHeader", "getAllHeaders", "getUser", "getPassword", "getHost", + "getRequestHostname" + ]) or // Attributes nodeFrom = Request::instance() and @@ -198,17 +193,8 @@ private module Twisted { * * See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.server.Request.html#write */ - class TwistedRequestWriteCall extends HTTP::Server::HttpResponse::Range, DataFlow::CallCfgNode { - TwistedRequestWriteCall() { - // TODO: When we have tools that make it easy, model these properly to handle - // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach - // (since it allows us to at least capture the most common cases). - exists(DataFlow::AttrRead read | - this.getFunction() = read and - read.getObject() = Request::instance() and - read.getAttributeName() = "write" - ) - } + class TwistedRequestWriteCall extends HTTP::Server::HttpResponse::Range, DataFlow::MethodCallNode { + TwistedRequestWriteCall() { this.calls(Request::instance(), "write") } override DataFlow::Node getBody() { result.asCfgNode() in [node.getArg(0), node.getArgByName("data")] @@ -225,17 +211,8 @@ private module Twisted { * See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.http.Request.html#redirect */ class TwistedRequestRedirectCall extends HTTP::Server::HttpRedirectResponse::Range, - DataFlow::CallCfgNode { - TwistedRequestRedirectCall() { - // TODO: When we have tools that make it easy, model these properly to handle - // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach - // (since it allows us to at least capture the most common cases). - exists(DataFlow::AttrRead read | - this.getFunction() = read and - read.getObject() = Request::instance() and - read.getAttributeName() = "redirect" - ) - } + DataFlow::MethodCallNode { + TwistedRequestRedirectCall() { this.calls(Request::instance(), "redirect") } override DataFlow::Node getBody() { none() } diff --git a/python/ql/src/semmle/python/frameworks/Werkzeug.qll b/python/ql/src/semmle/python/frameworks/Werkzeug.qll index c5f1c408f4d..21560d1e8c8 100644 --- a/python/ql/src/semmle/python/frameworks/Werkzeug.qll +++ b/python/ql/src/semmle/python/frameworks/Werkzeug.qll @@ -154,17 +154,11 @@ module Werkzeug { */ class HeadersAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // Methods - // - // TODO: When we have tools that make it easy, model these properly to handle - // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach - // (since it allows us to at least capture the most common cases). + // normal (non-async) methods nodeFrom = instance() and - exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | - // methods (non-async) - attr.getAttributeName() in ["getlist", "get_all", "popitem", "to_wsgi_list"] and - nodeTo.(DataFlow::CallCfgNode).getFunction() = attr - ) + nodeTo + .(DataFlow::MethodCallNode) + .calls(nodeFrom, ["getlist", "get_all", "popitem", "to_wsgi_list"]) } } } diff --git a/python/ql/src/semmle/python/frameworks/Yarl.qll b/python/ql/src/semmle/python/frameworks/Yarl.qll index c3e8e89e172..02d1f886391 100644 --- a/python/ql/src/semmle/python/frameworks/Yarl.qll +++ b/python/ql/src/semmle/python/frameworks/Yarl.qll @@ -63,40 +63,23 @@ module Yarl { nodeTo = call ) or - // Methods - // - // TODO: When we have tools that make it easy, model these properly to handle - // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach - // (since it allows us to at least capture the most common cases). - exists(DataFlow::AttrRead attr | - // methods (that replaces part of URL, taken as only arguments) - attr.getAttributeName() in [ + // normal (non-async) methods + nodeFrom = instance() and + nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["human_repr"]) + or + // methods that give an altered URL. taint both from object, and form argument + // (to result of call) + exists(DataFlow::MethodCallNode call | + call.calls(instance(), + [ "with_scheme", "with_user", "with_password", "with_host", "with_port", "with_path", "with_query", "with_query", "update_query", "update_query", "with_fragment", "with_name", // join is a bit different, but is still correct to add here :+1: "join" - ] and - ( - // obj -> obj.meth() - nodeFrom = instance() and - attr.getObject() = nodeFrom and - nodeTo.(DataFlow::CallCfgNode).getFunction() = attr - or - // argument of obj.meth() -> obj.meth() - attr.getObject() = instance() and - nodeTo.(DataFlow::CallCfgNode).getFunction() = attr and - nodeFrom in [ - nodeTo.(DataFlow::CallCfgNode).getArg(_), - nodeTo.(DataFlow::CallCfgNode).getArgByName(_) - ] - ) - or - // other methods - nodeFrom = instance() and - attr.getObject() = nodeFrom and - attr.getAttributeName() in ["human_repr"] and - nodeTo.(DataFlow::CallCfgNode).getFunction() = attr + ]) and + nodeTo = call and + nodeFrom in [call.getObject(), call.getArg(_), call.getArgByName(_)] ) or // Attributes