Python: Resolve all meth = obj.meth; meth() TODOs

It would probably have been easier to do this as the _first_ thing...
but that's too late now 😓
This commit is contained in:
Rasmus Wriedt Larsen
2021-07-21 13:25:53 +02:00
parent 6f63c03558
commit be1cad864b
9 changed files with 81 additions and 190 deletions

View File

@@ -201,24 +201,17 @@
" */", " */",
" private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {", " private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {",
" override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {", " override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {",
" // Methods", " // normal (non-async) methods",
" //",
" // TODO: When we have tools that make it easy, model these properly to handle",
" // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach",
" // (since it allows us to at least capture the most common cases).",
" nodeFrom = instance() and", " nodeFrom = instance() and",
" exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom |", " nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, [\"TODO\"])",
" // normal (non-async) methods", " or",
" attr.getAttributeName() in [\"TODO\"] and", " // async methods",
" nodeTo.(DataFlow::CallCfgNode).getFunction() = attr", " exists(DataFlow::MethodCallNode call, Await await |",
" or", " nodeTo.asExpr() = await and",
" // async methods", " nodeFrom = instance()",
" exists(Await await, DataFlow::CallCfgNode call |", " |",
" attr.getAttributeName() in [\"TODO\"] and", " await.getValue() = any(DataFlow::Node awaitable | call.flowsTo(awaitable)).asExpr() and",
" call.getFunction() = attr and", " call.calls(nodeFrom, [\"TODO\"])",
" await.getValue() = call.asExpr() and",
" nodeTo.asExpr() = await",
" )",
" )", " )",
" or", " or",
" // Attributes", " // Attributes",

View File

@@ -359,27 +359,21 @@ module AiohttpWebModel {
*/ */
private class AiohttpStreamReaderAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { private class AiohttpStreamReaderAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Methods // normal (non-async) methods
// nodeFrom = instance() and
// TODO: When we have tools that make it easy, model these properly to handle nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["read_nowait"])
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach or
// (since it allows us to at least capture the most common cases). // async methods
nodeFrom = StreamReader::instance() and exists(DataFlow::MethodCallNode call, Await await |
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | nodeTo.asExpr() = await and
// normal methods nodeFrom = instance()
attr.getAttributeName() in ["read_nowait"] and |
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr await.getValue() = any(DataFlow::Node awaitable | call.flowsTo(awaitable)).asExpr() and
or call.calls(nodeFrom,
// async methods [
exists(Await await, DataFlow::CallCfgNode call | "read", "readany", "readexactly", "readline", "readchunk", "iter_chunked", "iter_any",
attr.getAttributeName() in [ "iter_chunks"
"read", "readany", "readexactly", "readline", "readchunk", "iter_chunked", ])
"iter_any", "iter_chunks"
] and
call.getFunction() = attr and
await.getValue() = call.asExpr() and
nodeTo.asExpr() = await
)
) )
} }
} }
@@ -438,24 +432,17 @@ module AiohttpWebModel {
*/ */
private class AiohttpRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { private class AiohttpRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Methods // normal (non-async) methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
nodeFrom = Request::instance() and nodeFrom = Request::instance() and
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["clone", "get_extra_info"])
// normal methods or
attr.getAttributeName() in ["clone", "get_extra_info"] and // async methods
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr exists(DataFlow::MethodCallNode call, Await await |
or nodeTo.asExpr() = await and
// async methods nodeFrom = Request::instance()
exists(Await await, DataFlow::CallCfgNode call | |
attr.getAttributeName() in ["read", "text", "json", "multipart", "post"] and await.getValue() = any(DataFlow::Node awaitable | call.flowsTo(awaitable)).asExpr() and
call.getFunction() = attr and call.calls(nodeFrom, ["read", "text", "json", "multipart", "post"])
await.getValue() = call.asExpr() and
nodeTo.asExpr() = await
)
) )
or or
// Attributes // Attributes

View File

@@ -348,17 +348,11 @@ private module Django {
nodeTo = call nodeTo = call
) )
or or
// Methods // normal (non-async) methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
nodeFrom = instance() and nodeFrom = instance() and
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | nodeTo
// methods (non-async) .(DataFlow::MethodCallNode)
attr.getAttributeName() in ["getlist", "lists", "popitem", "dict", "urlencode"] and .calls(nodeFrom, ["getlist", "lists", "popitem", "dict", "urlencode"])
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
)
} }
} }
} }
@@ -2044,18 +2038,11 @@ private module PrivateDjango {
private class DjangoHttpRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { private class DjangoHttpRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Methods // normal (non-async) methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
nodeFrom = django::http::request::HttpRequest::instance() and nodeFrom = django::http::request::HttpRequest::instance() and
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | nodeTo
attr.getAttributeName() in [ .(DataFlow::MethodCallNode)
"get_full_path", "get_full_path_info", "read", "readline", "readlines" .calls(nodeFrom, ["get_full_path", "get_full_path_info", "read", "readline", "readlines"])
] and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
)
or or
// special handling of the `build_absolute_uri` method, see // special handling of the `build_absolute_uri` method, see
// https://docs.djangoproject.com/en/3.0/ref/request-response/#django.http.HttpRequest.build_absolute_uri // https://docs.djangoproject.com/en/3.0/ref/request-response/#django.http.HttpRequest.build_absolute_uri

View File

@@ -71,17 +71,9 @@ module Multidict {
nodeTo = call nodeTo = call
) )
or or
// Methods // normal (non-async) methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
nodeFrom = instance() and nodeFrom = instance() and
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["getone", "getall"])
// methods (non-async)
attr.getAttributeName() in ["getone", "getall"] and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
)
} }
} }
} }

View File

@@ -101,17 +101,11 @@ module Stdlib {
*/ */
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Methods // normal (non-async) methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
nodeFrom = instance() and nodeFrom = instance() and
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | nodeTo
// normal (non-async) methods .(DataFlow::MethodCallNode)
attr.getAttributeName() in ["get_all", "as_bytes", "as_string", "keys"] and .calls(nodeFrom, ["get_all", "as_bytes", "as_string", "keys"])
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
)
} }
} }
} }
@@ -149,17 +143,9 @@ module Stdlib {
*/ */
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Methods // normal (non-async) methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
nodeFrom = instance() and nodeFrom = instance() and
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["output", "js_output"])
// normal (non-async) methods
attr.getAttributeName() in ["output", "js_output"] and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
)
or or
// Attributes // Attributes
nodeFrom = instance() and nodeFrom = instance() and

View File

@@ -50,17 +50,9 @@ private module Tornado {
*/ */
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Methods // normal (non-async) methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
nodeFrom = instance() and nodeFrom = instance() and
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["get_list", "get_all"])
// normal (non-async) methods
attr.getAttributeName() in ["get_list", "get_all"] and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
)
} }
} }
} }

View File

@@ -130,20 +130,15 @@ private module Twisted {
*/ */
private class TwistedRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { private class TwistedRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Methods // normal (non-async) methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
nodeFrom = Request::instance() and nodeFrom = Request::instance() and
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | nodeTo
// normal (non-async) methods .(DataFlow::MethodCallNode)
attr.getAttributeName() in [ .calls(nodeFrom,
"getCookie", "getHeader", "getAllHeaders", "getUser", "getPassword", "getHost", [
"getRequestHostname" "getCookie", "getHeader", "getAllHeaders", "getUser", "getPassword", "getHost",
] and "getRequestHostname"
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr ])
)
or or
// Attributes // Attributes
nodeFrom = Request::instance() and nodeFrom = Request::instance() and
@@ -198,17 +193,8 @@ private module Twisted {
* *
* See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.server.Request.html#write * See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.server.Request.html#write
*/ */
class TwistedRequestWriteCall extends HTTP::Server::HttpResponse::Range, DataFlow::CallCfgNode { class TwistedRequestWriteCall extends HTTP::Server::HttpResponse::Range, DataFlow::MethodCallNode {
TwistedRequestWriteCall() { TwistedRequestWriteCall() { this.calls(Request::instance(), "write") }
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
exists(DataFlow::AttrRead read |
this.getFunction() = read and
read.getObject() = Request::instance() and
read.getAttributeName() = "write"
)
}
override DataFlow::Node getBody() { override DataFlow::Node getBody() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("data")] result.asCfgNode() in [node.getArg(0), node.getArgByName("data")]
@@ -225,17 +211,8 @@ private module Twisted {
* See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.http.Request.html#redirect * See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.http.Request.html#redirect
*/ */
class TwistedRequestRedirectCall extends HTTP::Server::HttpRedirectResponse::Range, class TwistedRequestRedirectCall extends HTTP::Server::HttpRedirectResponse::Range,
DataFlow::CallCfgNode { DataFlow::MethodCallNode {
TwistedRequestRedirectCall() { TwistedRequestRedirectCall() { this.calls(Request::instance(), "redirect") }
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
exists(DataFlow::AttrRead read |
this.getFunction() = read and
read.getObject() = Request::instance() and
read.getAttributeName() = "redirect"
)
}
override DataFlow::Node getBody() { none() } override DataFlow::Node getBody() { none() }

View File

@@ -154,17 +154,11 @@ module Werkzeug {
*/ */
class HeadersAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { class HeadersAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Methods // normal (non-async) methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
nodeFrom = instance() and nodeFrom = instance() and
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | nodeTo
// methods (non-async) .(DataFlow::MethodCallNode)
attr.getAttributeName() in ["getlist", "get_all", "popitem", "to_wsgi_list"] and .calls(nodeFrom, ["getlist", "get_all", "popitem", "to_wsgi_list"])
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
)
} }
} }
} }

View File

@@ -63,40 +63,23 @@ module Yarl {
nodeTo = call nodeTo = call
) )
or or
// Methods // normal (non-async) methods
// nodeFrom = instance() and
// TODO: When we have tools that make it easy, model these properly to handle nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["human_repr"])
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach or
// (since it allows us to at least capture the most common cases). // methods that give an altered URL. taint both from object, and form argument
exists(DataFlow::AttrRead attr | // (to result of call)
// methods (that replaces part of URL, taken as only arguments) exists(DataFlow::MethodCallNode call |
attr.getAttributeName() in [ call.calls(instance(),
[
"with_scheme", "with_user", "with_password", "with_host", "with_port", "with_path", "with_scheme", "with_user", "with_password", "with_host", "with_port", "with_path",
"with_query", "with_query", "update_query", "update_query", "with_fragment", "with_query", "with_query", "update_query", "update_query", "with_fragment",
"with_name", "with_name",
// join is a bit different, but is still correct to add here :+1: // join is a bit different, but is still correct to add here :+1:
"join" "join"
] and ]) and
( nodeTo = call and
// obj -> obj.meth() nodeFrom in [call.getObject(), call.getArg(_), call.getArgByName(_)]
nodeFrom = instance() and
attr.getObject() = nodeFrom and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
or
// argument of obj.meth() -> obj.meth()
attr.getObject() = instance() and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr and
nodeFrom in [
nodeTo.(DataFlow::CallCfgNode).getArg(_),
nodeTo.(DataFlow::CallCfgNode).getArgByName(_)
]
)
or
// other methods
nodeFrom = instance() and
attr.getObject() = nodeFrom and
attr.getAttributeName() in ["human_repr"] and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
) )
or or
// Attributes // Attributes