Merge pull request #6349 from RasmusWL/more-modeling

Python: Improve various library modeling
This commit is contained in:
yoff
2021-09-06 17:01:45 +02:00
committed by GitHub
30 changed files with 1446 additions and 560 deletions

View File

@@ -180,8 +180,6 @@
"",
" /** A direct instantiation of `${TM_SELECTED_TEXT}`. */",
" private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {",
" override CallNode node;",
"",
" ClassInstantiation() { this = classRef().getACall() }",
" }",
"",
@@ -195,11 +193,55 @@
"",
" /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
" DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }",
"",
" /**",
" * Taint propagation for `${TM_SELECTED_TEXT}`.",
" */",
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
" InstanceTaintSteps() { this = \"${TM_SELECTED_TEXT}\" }",
" ",
" override DataFlow::Node getInstance() { result = instance() }",
" ",
" override string getAttributeName() { none() }",
" ",
" override string getMethodName() { none() }",
" ",
" override string getAsyncMethodName() { none() }",
" }",
"",
" /**",
" * Extra taint propagation for `${TM_SELECTED_TEXT}`, not covered by `InstanceTaintSteps`.",
" */",
" private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {",
" override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {",
" // TODO",
" none()",
" }",
" }",
"}",
],
"description": "Type tracking class (select full class path before inserting)",
},
"foo": {
"scope": "ql",
"prefix": "foo",
"body": [
" /**",
" * Taint propagation for `$1`.",
" */",
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
" InstanceTaintSteps() { this = \"$1\" }",
"",
" override DataFlow::Node getInstance() { result = instance() }",
"",
" override string getAttributeName() { none() }",
"",
" override string getMethodName() { none() }",
"",
" override string getAsyncMethodName() { none() }",
" }",
],
},
"API graph .getMember chain": {
"scope": "ql",
"prefix": "api graph .getMember chain",

View File

@@ -1,6 +1,20 @@
import python
import semmle.python.dataflow.new.DataFlow
/**
* INTERNAL: Do not use.
*
* Provides helper predicates for pretty-printing `DataFlow::Node`s.
*
* Since these have not been performance optimized, please only use them for
* debug-queries or in tests.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
/**
* INTERNAL: Do not use.
*
* Gets the pretty-printed version of the Expr `e`.
*/
string prettyExpr(Expr e) {
not e instanceof Num and
not e instanceof StrConst and
@@ -27,7 +41,9 @@ string prettyExpr(Expr e) {
}
/**
* Gets pretty-printed version of the DataFlow::Node `node`
* INTERNAL: Do not use.
*
* Gets the pretty-printed version of the DataFlow::Node `node`
*/
bindingset[node]
string prettyNode(DataFlow::Node node) {
@@ -35,7 +51,9 @@ string prettyNode(DataFlow::Node node) {
}
/**
* Gets pretty-printed version of the DataFlow::Node `node`, that is suitable for use
* INTERNAL: Do not use.
*
* Gets the pretty-printed version of the DataFlow::Node `node`, that is suitable for use
* with `TestUtilities.InlineExpectationsTest` (that is, no spaces unless required).
*/
bindingset[node]

View File

@@ -46,9 +46,13 @@ private module Cached {
or
copyStep(nodeFrom, nodeTo)
or
forStep(nodeFrom, nodeTo)
DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo)
or
unpackingAssignmentStep(nodeFrom, nodeTo)
DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo)
or
DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
or
awaitStep(nodeFrom, nodeTo)
}
}
@@ -201,26 +205,9 @@ predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
* for example `for x in xs`, or `for x,y in points`.
* Holds if taint can flow from `nodeFrom` to `nodeTo` with an `await`-step,
* such that the whole expression `await x` is tainted if `x` is tainted.
*/
predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
exists(EssaNodeDefinition defn, For for |
for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
nodeTo.getVar() = defn and
nodeFrom.asExpr() = for.getIter()
)
}
/**
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
* Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
*/
predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
// `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
exists(MultiAssignmentDefinition defn, Assign assign |
assign.getATarget().contains(defn.getDefiningNode().getNode()) and
nodeTo.getVar() = defn and
nodeFrom.asExpr() = assign.getValue()
)
predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.asExpr().(Await).getValue() = nodeFrom.asExpr()
}

View File

@@ -13,6 +13,7 @@ private import semmle.python.frameworks.internal.PoorMansFunctionResolution
private import semmle.python.frameworks.internal.SelfRefMixin
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.Yarl
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
/**
* INTERNAL: Do not use.
@@ -293,6 +294,65 @@ module AiohttpWebModel {
/** Gets a reference to an instance of `aiohttp.web.Request`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `aiohttp.web.Request`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "aiohttp.web.Request" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in [
"url", "rel_url", "forwarded", "host", "remote", "path", "path_qs", "raw_path", "query",
"headers", "transport", "cookies", "content", "_payload", "content_type", "charset",
"http_range", "if_modified_since", "if_unmodified_since", "if_range", "match_info"
]
}
override string getMethodName() { result in ["clone", "get_extra_info"] }
override string getAsyncMethodName() {
result in ["read", "text", "json", "multipart", "post"]
}
}
/** An attribute read on an `aiohttp.web.Request` that is a `MultiDictProxy` instance. */
class AiohttpRequestMultiDictProxyInstances extends Multidict::MultiDictProxy::InstanceSource {
AiohttpRequestMultiDictProxyInstances() {
this.(DataFlow::AttrRead).getObject() = Request::instance() and
this.(DataFlow::AttrRead).getAttributeName() in ["query", "headers"]
or
// Handle the common case of `x = await request.post()`
// but don't try to handle anything else, since we don't have an easy way to do this yet.
// TODO: more complete handling of `await request.post()`
exists(Await await, DataFlow::CallCfgNode call, DataFlow::AttrRead read |
this.asExpr() = await
|
read.(DataFlow::AttrRead).getObject() = Request::instance() and
read.(DataFlow::AttrRead).getAttributeName() = "post" and
call.getFunction() = read and
await.getValue() = call.asExpr()
)
}
}
/** An attribute read on an `aiohttp.web.Request` that is a `yarl.URL` instance. */
class AiohttpRequestYarlUrlInstances extends Yarl::Url::InstanceSource {
AiohttpRequestYarlUrlInstances() {
this.(DataFlow::AttrRead).getObject() = Request::instance() and
this.(DataFlow::AttrRead).getAttributeName() in ["url", "rel_url"]
}
}
/** An attribute read on an `aiohttp.web.Request` that is a `aiohttp.StreamReader` instance. */
class AiohttpRequestStreamReaderInstances extends StreamReader::InstanceSource {
AiohttpRequestStreamReaderInstances() {
this.(DataFlow::AttrRead).getObject() = Request::instance() and
this.(DataFlow::AttrRead).getAttributeName() in ["content", "_payload"]
}
}
}
/**
@@ -357,30 +417,20 @@ module AiohttpWebModel {
/**
* Taint propagation for `aiohttp.StreamReader`.
*/
private class AiohttpStreamReaderAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
nodeFrom = StreamReader::instance() and
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom |
// normal methods
attr.getAttributeName() in ["read_nowait"] and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
or
// async methods
exists(Await await, DataFlow::CallCfgNode call |
attr.getAttributeName() in [
"read", "readany", "readexactly", "readline", "readchunk", "iter_chunked",
"iter_any", "iter_chunks"
] and
call.getFunction() = attr and
await.getValue() = call.asExpr() and
nodeTo.asExpr() = await
)
)
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "aiohttp.StreamReader" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() { none() }
override string getMethodName() { result in ["read_nowait"] }
override string getAsyncMethodName() {
result in [
"read", "readany", "readexactly", "readline", "readchunk", "iter_chunked", "iter_any",
"iter_chunks"
]
}
}
}
@@ -431,80 +481,6 @@ module AiohttpWebModel {
}
}
/**
* Taint propagation for `aiohttp.web.Request`.
*
* See https://docs.aiohttp.org/en/stable/web_reference.html#request-and-base-request
*/
private class AiohttpRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
nodeFrom = Request::instance() and
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom |
// normal methods
attr.getAttributeName() in ["clone", "get_extra_info"] and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
or
// async methods
exists(Await await, DataFlow::CallCfgNode call |
attr.getAttributeName() in ["read", "text", "json", "multipart", "post"] and
call.getFunction() = attr and
await.getValue() = call.asExpr() and
nodeTo.asExpr() = await
)
)
or
// Attributes
nodeFrom = Request::instance() and
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
nodeTo.(DataFlow::AttrRead).getAttributeName() in [
"url", "rel_url", "forwarded", "host", "remote", "path", "path_qs", "raw_path", "query",
"headers", "transport", "cookies", "content", "_payload", "content_type", "charset",
"http_range", "if_modified_since", "if_unmodified_since", "if_range", "match_info"
]
}
}
/** An attribute read on an `aiohttp.web.Request` that is a `MultiDictProxy` instance. */
class AiohttpRequestMultiDictProxyInstances extends Multidict::MultiDictProxy::InstanceSource {
AiohttpRequestMultiDictProxyInstances() {
this.(DataFlow::AttrRead).getObject() = Request::instance() and
this.(DataFlow::AttrRead).getAttributeName() in ["query", "headers"]
or
// Handle the common case of `x = await request.post()`
// but don't try to handle anything else, since we don't have an easy way to do this yet.
// TODO: more complete handling of `await request.post()`
exists(Await await, DataFlow::CallCfgNode call, DataFlow::AttrRead read |
this.asExpr() = await
|
read.(DataFlow::AttrRead).getObject() = Request::instance() and
read.(DataFlow::AttrRead).getAttributeName() = "post" and
call.getFunction() = read and
await.getValue() = call.asExpr()
)
}
}
/** An attribute read on an `aiohttp.web.Request` that is a `yarl.URL` instance. */
class AiohttpRequestYarlUrlInstances extends Yarl::Url::InstanceSource {
AiohttpRequestYarlUrlInstances() {
this.(DataFlow::AttrRead).getObject() = Request::instance() and
this.(DataFlow::AttrRead).getAttributeName() in ["url", "rel_url"]
}
}
/** An attribute read on an `aiohttp.web.Request` that is a `aiohttp.StreamReader` instance. */
class AiohttpRequestStreamReaderInstances extends StreamReader::InstanceSource {
AiohttpRequestStreamReaderInstances() {
this.(DataFlow::AttrRead).getObject() = Request::instance() and
this.(DataFlow::AttrRead).getAttributeName() in ["content", "_payload"]
}
}
// ---------------------------------------------------------------------------
// aiohttp.web Response modeling
// ---------------------------------------------------------------------------

View File

@@ -10,9 +10,11 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
private import semmle.python.frameworks.Stdlib
private import semmle.python.regex
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
private import semmle.python.frameworks.internal.SelfRefMixin
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
/**
* Provides models for the `django` PyPI package.
@@ -289,6 +291,178 @@ private module Django {
API::Node subclassRef() { result = any(ModeledSubclass subclass).getASubclass*() }
}
}
/**
* Provides models for the `django.utils.datastructures.MultiValueDict` class
*
* See
* - https://docs.djangoproject.com/en/3.0/ref/request-response/#django.http.QueryDict (subclass that has proper docs)
* - https://www.kite.com/python/docs/django.utils.datastructures.MultiValueDict
*/
module MultiValueDict {
/** Gets a reference to the `django.utils.datastructures.MultiValueDict` class. */
private API::Node classRef() {
result =
API::moduleImport("django")
.getMember("utils")
.getMember("datastructures")
.getMember("MultiValueDict")
}
/**
* A source of instances of `django.utils.datastructures.MultiValueDict`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `MultiValueDict::instance()` to get references to instances of `django.utils.datastructures.MultiValueDict`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** A direct instantiation of `django.utils.datastructures.MultiValueDict`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
ClassInstantiation() { this = classRef().getACall() }
}
/** Gets a reference to an instance of `django.utils.datastructures.MultiValueDict`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `django.utils.datastructures.MultiValueDict`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `django.utils.datastructures.MultiValueDict`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "django.utils.datastructures.MultiValueDict" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() { none() }
override string getMethodName() {
result in ["getlist", "lists", "popitem", "dict", "urlencode"]
}
override string getAsyncMethodName() { none() }
}
/**
* Extra taint propagation for `django.utils.datastructures.MultiValueDict`, not covered by `InstanceTaintSteps`.
*/
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// class instantiation
exists(ClassInstantiation call |
nodeFrom = call.getArg(0) and
nodeTo = call
)
}
}
}
/**
* Provides models for the `django.core.files.uploadedfile.UploadedFile` class
*
* See https://docs.djangoproject.com/en/3.0/ref/files/uploads/#django.core.files.uploadedfile.UploadedFile.
*/
module UploadedFile {
/**
* A source of instances of `django.core.files.uploadedfile.UploadedFile`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `UploadedFile::instance()` to get references to instances of `django.core.files.uploadedfile.UploadedFile`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `django.core.files.uploadedfile.UploadedFile`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `django.core.files.uploadedfile.UploadedFile`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `django.core.files.uploadedfile.UploadedFile`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "django.core.files.uploadedfile.UploadedFile" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in [
"content_type", "content_type_extra", "content_type_extra", "charset", "name", "file"
]
}
override string getMethodName() { none() }
override string getAsyncMethodName() { none() }
}
/** A file-like object instance that originates from a `UploadedFile`. */
class UploadedFileFileLikeInstances extends Stdlib::FileLikeObject::InstanceSource {
UploadedFileFileLikeInstances() { this.(DataFlow::AttrRead).accesses(instance(), "file") }
}
}
/**
* Provides models for the `django.urls.ResolverMatch` class
*
* See https://docs.djangoproject.com/en/3.0/ref/urlresolvers/#django.urls.ResolverMatch.
*/
module ResolverMatch {
/**
* A source of instances of `django.urls.ResolverMatch`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `ResolverMatch::instance()` to get references to instances of `django.urls.ResolverMatch`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `django.urls.ResolverMatch`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `django.urls.ResolverMatch`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `django.urls.ResolverMatch`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "django.urls.ResolverMatch" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() { result in ["args", "kwargs"] }
override string getMethodName() { none() }
override string getAsyncMethodName() { none() }
}
}
}
/**
@@ -587,6 +761,118 @@ private module PrivateDjango {
/** Gets a reference to an instance of `django.http.request.HttpRequest`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `django.http.request.HttpRequest`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "django.http.request.HttpRequest" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in [
// str / bytes
"body", "path", "path_info", "method", "encoding", "content_type",
// django.http.QueryDict
"GET", "POST",
// dict[str, str]
"content_params", "COOKIES",
// dict[str, Any]
"META",
// HttpHeaders (case insensitive dict-like)
"headers",
// MultiValueDict[str, UploadedFile]
"FILES",
// django.urls.ResolverMatch
"resolver_match"
]
// TODO: Handle that a HttpRequest is iterable
}
override string getMethodName() {
result in ["get_full_path", "get_full_path_info", "read", "readline", "readlines"]
}
override string getAsyncMethodName() { none() }
}
/**
* Extra taint propagation for `django.http.request.HttpRequest`, not covered by `InstanceTaintSteps`.
*/
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// special handling of the `build_absolute_uri` method, see
// https://docs.djangoproject.com/en/3.0/ref/request-response/#django.http.HttpRequest.build_absolute_uri
exists(DataFlow::AttrRead attr, DataFlow::CallCfgNode call, DataFlow::Node instance |
instance = django::http::request::HttpRequest::instance() and
attr.getObject() = instance
|
attr.getAttributeName() = "build_absolute_uri" and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr and
call = nodeTo and
(
not exists(call.getArg(_)) and
not exists(call.getArgByName(_)) and
nodeFrom = instance
or
nodeFrom = call.getArg(0)
or
nodeFrom = call.getArgByName("location")
)
)
}
}
/** An attribute read on an django request that is a `MultiValueDict` instance. */
private class DjangoHttpRequestMultiValueDictInstances extends Django::MultiValueDict::InstanceSource {
DjangoHttpRequestMultiValueDictInstances() {
this.(DataFlow::AttrRead).getObject() = instance() and
this.(DataFlow::AttrRead).getAttributeName() in ["GET", "POST", "FILES"]
}
}
/** An attribute read on an django request that is a `ResolverMatch` instance. */
private class DjangoHttpRequestResolverMatchInstances extends Django::ResolverMatch::InstanceSource {
DjangoHttpRequestResolverMatchInstances() {
this.(DataFlow::AttrRead).getObject() = instance() and
this.(DataFlow::AttrRead).getAttributeName() = "resolver_match"
}
}
/** An `UploadedFile` instance that originates from a django request. */
private class DjangoHttpRequestUploadedFileInstances extends Django::UploadedFile::InstanceSource {
DjangoHttpRequestUploadedFileInstances() {
// TODO: this currently only works in local-scope, since writing type-trackers for
// this is a little too much effort. Once API-graphs are available for more
// things, we can rewrite this.
//
// TODO: This approach for identifying member-access is very adhoc, and we should
// be able to do something more structured for providing modeling of the members
// of a container-object.
//
// dicts
exists(DataFlow::AttrRead files, DataFlow::Node dict |
files.accesses(instance(), "FILES") and
(
dict = files
or
dict.(DataFlow::MethodCallNode).calls(files, "dict")
)
|
this.asCfgNode().(SubscriptNode).getObject() = dict.asCfgNode()
or
this.(DataFlow::MethodCallNode).calls(dict, "get")
)
or
// getlist
exists(DataFlow::AttrRead files, DataFlow::MethodCallNode getlistCall |
files.accesses(instance(), "FILES") and
getlistCall.calls(files, "getlist") and
this.asCfgNode().(SubscriptNode).getObject() = getlistCall.asCfgNode()
)
}
}
}
}
@@ -1455,9 +1741,6 @@ private module PrivateDjango {
}
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
// Form and form field modeling
// ---------------------------------------------------------------------------
@@ -1883,36 +2166,6 @@ private module PrivateDjango {
}
}
private class DjangoHttpRequstAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeFrom = django::http::request::HttpRequest::instance() and
exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom |
read.getAttributeName() in [
// str / bytes
"body", "path", "path_info", "method", "encoding", "content_type",
// django.http.QueryDict
// TODO: Model QueryDict
"GET", "POST",
// dict[str, str]
"content_params", "COOKIES",
// dict[str, Any]
"META",
// HttpHeaders (case insensitive dict-like)
"headers",
// MultiValueDict[str, UploadedFile]
// TODO: Model MultiValueDict
// TODO: Model UploadedFile
"FILES",
// django.urls.ResolverMatch
// TODO: Model ResolverMatch
"resolver_match"
]
// TODO: Handle calls to methods
// TODO: Handle that a HttpRequest is iterable
)
}
}
// ---------------------------------------------------------------------------
// django.shortcuts.redirect
// ---------------------------------------------------------------------------

View File

@@ -10,6 +10,7 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.frameworks.Werkzeug
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
/**
* Provides models for the `flask` PyPI package.
@@ -341,83 +342,101 @@ module Flask {
}
/**
* Taint propagation for a flask request.
* Taint propagation for `flask.Request`.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
*/
private class FlaskRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Methods
exists(string method_name | method_name in ["get_data", "get_json"] |
// Method access
nodeFrom = request().getAUse() and
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
nodeTo.(DataFlow::AttrRead).getAttributeName() = method_name
or
// Method call
nodeFrom = request().getMember(method_name).getAUse() and
nodeTo.(DataFlow::CallCfgNode).getFunction() = nodeFrom
)
or
// Attributes
nodeFrom = request().getAUse() and
exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom |
read.getAttributeName() in [
// str
"path", "full_path", "base_url", "url", "access_control_request_method",
"content_encoding", "content_md5", "content_type", "data", "method", "mimetype",
"origin", "query_string", "referrer", "remote_addr", "remote_user", "user_agent",
// dict
"environ", "cookies", "mimetype_params", "view_args",
// json
"json",
// List[str]
"access_route",
// file-like
"stream", "input_stream",
// MultiDict[str, str]
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
"args", "values", "form",
// MultiDict[str, FileStorage]
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
// TODO: FileStorage needs extra taint steps
"files",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.HeaderSet
"access_control_request_headers", "pragma",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Accept
// TODO: Kinda badly modeled for now -- has type List[Tuple[value, quality]], and some extra methods
"accept_charsets", "accept_encodings", "accept_languages", "accept_mimetypes",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Authorization
// TODO: dict subclass with extra attributes like `username` and `password`
"authorization",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.RequestCacheControl
// TODO: has attributes like `no_cache`, and `to_header` method (actually, many of these models do)
"cache_control",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers
// TODO: dict-like with wsgiref.headers.Header compatibility methods
"headers"
]
)
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "flask.Request" }
override DataFlow::Node getInstance() { result = request().getAUse() }
override string getAttributeName() {
result in [
// str
"path", "full_path", "base_url", "url", "access_control_request_method",
"content_encoding", "content_md5", "content_type", "data", "method", "mimetype", "origin",
"query_string", "referrer", "remote_addr", "remote_user", "user_agent",
// dict
"environ", "cookies", "mimetype_params", "view_args",
// json
"json",
// List[str]
"access_route",
// file-like
"stream", "input_stream",
// MultiDict[str, str]
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
"args", "values", "form",
// MultiDict[str, FileStorage]
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
// TODO: FileStorage needs extra taint steps
"files",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.HeaderSet
"access_control_request_headers", "pragma",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Accept
// TODO: Kinda badly modeled for now -- has type List[Tuple[value, quality]], and some extra methods
"accept_charsets", "accept_encodings", "accept_languages", "accept_mimetypes",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Authorization
// TODO: dict subclass with extra attributes like `username` and `password`
"authorization",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.RequestCacheControl
// TODO: has attributes like `no_cache`, and `to_header` method (actually, many of these models do)
"cache_control",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers
// TODO: dict-like with wsgiref.headers.Header compatibility methods
"headers"
]
}
override string getMethodName() { result in ["get_data", "get_json"] }
override string getAsyncMethodName() { none() }
}
private class RequestAttrMultiDict extends Werkzeug::werkzeug::datastructures::MultiDict::InstanceSourceApiNode {
private class RequestAttrMultiDict extends Werkzeug::MultiDict::InstanceSource {
string attr_name;
RequestAttrMultiDict() {
attr_name in ["args", "values", "form", "files"] and
this = request().getMember(attr_name)
this.(DataFlow::AttrRead).accesses(request().getAUse(), attr_name)
}
override string toString() { result = this.(API::Node).toString() }
}
private class RequestAttrFiles extends RequestAttrMultiDict {
// TODO: Somehow specify that elements of `RequestAttrFiles` are
// Werkzeug::werkzeug::datastructures::FileStorage and should have those additional taint steps
// AND that the 0-indexed argument to its' save method is a sink for path-injection.
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.save
RequestAttrFiles() { attr_name = "files" }
/** An `FileStorage` instance that originates from a flask request. */
private class FlaskRequestFileStorageInstances extends Werkzeug::FileStorage::InstanceSource {
FlaskRequestFileStorageInstances() {
// TODO: this currently only works in local-scope, since writing type-trackers for
// this is a little too much effort. Once API-graphs are available for more
// things, we can rewrite this.
//
// TODO: This approach for identifying member-access is very adhoc, and we should
// be able to do something more structured for providing modeling of the members
// of a container-object.
exists(DataFlow::AttrRead files | files.accesses(request().getAUse(), "files") |
this.asCfgNode().(SubscriptNode).getObject() = files.asCfgNode()
or
this.(DataFlow::MethodCallNode).calls(files, "get")
or
exists(DataFlow::MethodCallNode getlistCall | getlistCall.calls(files, "getlist") |
this.asCfgNode().(SubscriptNode).getObject() = getlistCall.asCfgNode()
)
)
}
}
/** An `Headers` instance that originates from a flask request. */
private class FlaskRequestHeadersInstances extends Werkzeug::Headers::InstanceSource {
FlaskRequestHeadersInstances() {
this.(DataFlow::AttrRead).accesses(request().getAUse(), "headers")
}
}
/** An `Authorization` instance that originates from a flask request. */
private class FlaskRequestAuthorizationInstances extends Werkzeug::Authorization::InstanceSource {
FlaskRequestAuthorizationInstances() {
this.(DataFlow::AttrRead).accesses(request().getAUse(), "authorization")
}
}
// ---------------------------------------------------------------------------

View File

@@ -8,6 +8,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
/**
* Provides models for the `MarkupSafe` PyPI package.
@@ -82,7 +83,7 @@ private module MarkupSafeModel {
}
/** Taint propagation for `markupsafe.Markup`. */
class AddtionalTaintSteps extends TaintTracking::AdditionalTaintStep {
private class AddtionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeTo.(ClassInstantiation).getArg(0) = nodeFrom
}

View File

@@ -8,6 +8,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
/**
* INTERNAL: Do not use.
@@ -60,28 +61,29 @@ module Multidict {
/**
* Taint propagation for `multidict.MultiDictProxy`.
*
* See https://multidict.readthedocs.io/en/stable/multidict.html#multidictproxy
*/
class MultiDictProxyAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "multidict.MultiDictProxy" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() { none() }
override string getMethodName() { result in ["getone", "getall"] }
override string getAsyncMethodName() { none() }
}
/**
* Extra taint propagation for `multidict.MultiDictProxy`, not covered by `InstanceTaintSteps`.
*/
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// class instantiation
exists(ClassInstantiation call |
nodeFrom = call.getArg(0) and
nodeTo = call
)
or
// Methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
nodeFrom = instance() and
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom |
// methods (non-async)
attr.getAttributeName() in ["getone", "getall"] and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
)
}
}
}

View File

@@ -10,9 +10,173 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.PEP249
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
/** Provides models for the Python standard library. */
private module Stdlib {
module Stdlib {
/**
* Provides models for file-like objects,
* mostly to define standard set of extra taint-steps.
*
* See
* - https://docs.python.org/3.9/glossary.html#term-file-like-object
* - https://docs.python.org/3.9/library/io.html#io.IOBase
*/
module FileLikeObject {
/**
* A source of a file-like object, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `like::instance()` to get references to instances of `file.like`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to a file-like object. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to a file-like object. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for file-like objects.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "<file-like object>" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() { none() }
override string getMethodName() { result in ["read", "readline", "readlines"] }
override string getAsyncMethodName() { none() }
}
/**
* Extra taint propagation for file-like objects, not covered by `InstanceTaintSteps`.",
*/
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// taint-propagation back to instance from `foo.write(tainted_data)`
exists(DataFlow::AttrRead write, DataFlow::CallCfgNode call, DataFlow::Node instance_ |
instance_ = instance() and
write.accesses(instance_, "write")
|
nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() = instance_ and
call.getFunction() = write and
nodeFrom = call.getArg(0)
)
}
}
}
/**
* Provides models for the `http.client.HTTPMessage` class
*
* Has no official docs, but see
* https://github.com/python/cpython/blob/64f54b7ccd49764b0304e076bfd79b5482988f53/Lib/http/client.py#L175
* and https://docs.python.org/3.9/library/email.compat32-message.html#email.message.Message
*/
module HTTPMessage {
/**
* A source of instances of `http.client.HTTPMessage`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `HTTPMessage::instance()` to get references to instances of `http.client.HTTPMessage`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `http.client.HTTPMessage`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `http.client.HTTPMessage`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `http.client.HTTPMessage`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "http.client.HTTPMessage" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() { none() }
override string getMethodName() { result in ["get_all", "as_bytes", "as_string", "keys"] }
override string getAsyncMethodName() { none() }
}
}
/**
* Provides models for the `http.cookies.Morsel` class
*
* See https://docs.python.org/3.9/library/http.cookies.html#http.cookies.Morsel.
*/
module Morsel {
/**
* A source of instances of `http.cookies.Morsel`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Morsel::instance()` to get references to instances of `http.cookies.Morsel`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `http.cookies.Morsel`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `http.cookies.Morsel`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `http.cookies.Morsel`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "http.cookies.Morsel" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() { result in ["key", "value", "coded_value"] }
override string getMethodName() { result in ["output", "js_output"] }
override string getAsyncMethodName() { none() }
}
}
}
/**
* Provides models for the Python standard library.
*
* This module is marked private as exposing it means committing to 1-year deprecation
* policy, and the code is not in a polished enough state that we want to do so -- at
* least not without having convincing use-cases for it :)
*/
private module StdlibPrivate {
// ---------------------------------------------------------------------------
// os
// ---------------------------------------------------------------------------
@@ -395,7 +559,8 @@ private module Stdlib {
* A call to the builtin `open` function.
* See https://docs.python.org/3/library/functions.html#open
*/
private class OpenCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
private class OpenCall extends FileSystemAccess::Range, Stdlib::FileLikeObject::InstanceSource,
DataFlow::CallCfgNode {
OpenCall() { this = getOpenFunctionRef().getACall() }
override DataFlow::Node getAPathArgument() {
@@ -911,6 +1076,20 @@ private module Stdlib {
}
}
/** An `HTTPMessage` instance that originates from a `BaseHTTPRequestHandler` instance. */
private class BaseHTTPRequestHandlerHeadersInstances extends Stdlib::HTTPMessage::InstanceSource {
BaseHTTPRequestHandlerHeadersInstances() {
this.(DataFlow::AttrRead).accesses(instance(), "headers")
}
}
/** A file-like object that originates from a `BaseHTTPRequestHandler` instance. */
private class BaseHTTPRequestHandlerFileLikeObjectInstances extends Stdlib::FileLikeObject::InstanceSource {
BaseHTTPRequestHandlerFileLikeObjectInstances() {
this.(DataFlow::AttrRead).accesses(instance(), "rfile")
}
}
/**
* The entry-point for handling a request with a `BaseHTTPRequestHandler` subclass.
*
@@ -1081,7 +1260,7 @@ private module Stdlib {
}
/** A call to the `open` method on a `pathlib.Path` instance. */
private class PathLibOpenCall extends PathlibFileAccess {
private class PathLibOpenCall extends PathlibFileAccess, Stdlib::FileLikeObject::InstanceSource {
PathLibOpenCall() { attrbuteName = "open" }
}

View File

@@ -10,12 +10,58 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.regex
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
/**
* Provides models for the `tornado` PyPI package.
* See https://www.tornadoweb.org/en/stable/.
*/
private module Tornado {
/**
* Provides models for the `tornado.httputil.HTTPHeaders` class
*
* See https://www.tornadoweb.org/en/stable/httputil.html#tornado.httputil.HTTPHeaders.
*/
module HTTPHeaders {
/**
* A source of instances of `tornado.httputil.HTTPHeaders`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `HTTPHeaders::instance()` to get references to instances of `tornado.httputil.HTTPHeaders`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `tornado.httputil.HTTPHeaders`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `tornado.httputil.HTTPHeaders`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `tornado.httputil.HTTPHeaders`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "tornado.httputil.HTTPHeaders" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() { none() }
override string getMethodName() { result in ["get_list", "get_all"] }
override string getAsyncMethodName() { none() }
}
}
// ---------------------------------------------------------------------------
// tornado
// ---------------------------------------------------------------------------
@@ -97,32 +143,6 @@ private module Tornado {
/** Gets a reference to an instance of the `tornado.web.RequestHandler` class or any subclass. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/** Gets a reference to one of the methods `get_argument`, `get_body_argument`, `get_query_argument`. */
private DataFlow::TypeTrackingNode argumentMethod(DataFlow::TypeTracker t) {
t.startInAttr(["get_argument", "get_body_argument", "get_query_argument"]) and
result = instance()
or
exists(DataFlow::TypeTracker t2 | result = argumentMethod(t2).track(t2, t))
}
/** Gets a reference to one of the methods `get_argument`, `get_body_argument`, `get_query_argument`. */
DataFlow::Node argumentMethod() {
argumentMethod(DataFlow::TypeTracker::end()).flowsTo(result)
}
/** Gets a reference to one of the methods `get_arguments`, `get_body_arguments`, `get_query_arguments`. */
private DataFlow::TypeTrackingNode argumentsMethod(DataFlow::TypeTracker t) {
t.startInAttr(["get_arguments", "get_body_arguments", "get_query_arguments"]) and
result = instance()
or
exists(DataFlow::TypeTracker t2 | result = argumentsMethod(t2).track(t2, t))
}
/** Gets a reference to one of the methods `get_arguments`, `get_body_arguments`, `get_query_arguments`. */
DataFlow::Node argumentsMethod() {
argumentsMethod(DataFlow::TypeTracker::end()).flowsTo(result)
}
/** Gets a reference the `redirect` method. */
private DataFlow::TypeTrackingNode redirectMethod(DataFlow::TypeTracker t) {
t.startInAttr("redirect") and
@@ -147,30 +167,33 @@ private module Tornado {
/** Gets a reference to the `write` method. */
DataFlow::Node writeMethod() { writeMethod(DataFlow::TypeTracker::end()).flowsTo(result) }
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Method access
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
nodeFrom = instance() and
nodeTo in [argumentMethod(), argumentsMethod()]
or
// Method call
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode() and
nodeFrom in [argumentMethod(), argumentsMethod()]
or
// Attributes
nodeFrom = instance() and
exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom |
read.getAttributeName() in [
// List[str]
"path_args",
// Dict[str, str]
"path_kwargs",
// tornado.httputil.HTTPServerRequest
"request"
]
)
/**
* Taint propagation for `tornado.web.RequestHandler`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "tornado.web.RequestHandler" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in [
// List[str]
"path_args",
// Dict[str, str]
"path_kwargs",
// tornado.httputil.HTTPServerRequest
"request"
]
}
override string getMethodName() {
result in [
"get_argument", "get_body_argument", "get_query_argument", "get_arguments",
"get_body_arguments", "get_query_arguments"
]
}
override string getAsyncMethodName() { none() }
}
private class RequestAttrAccess extends tornado::httputil::HttpServerRequest::InstanceSource {
@@ -274,41 +297,53 @@ private module Tornado {
/** Gets a reference to an instance of `tornado.httputil.HttpServerRequest`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/** Gets a reference to the `full_url` method. */
private DataFlow::TypeTrackingNode full_url(DataFlow::TypeTracker t) {
t.startInAttr("full_url") and
result = instance()
or
exists(DataFlow::TypeTracker t2 | result = full_url(t2).track(t2, t))
/**
* Taint propagation for `tornado.httputil.HttpServerRequest`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "tornado.httputil.HttpServerRequest" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in [
// str / bytes
"uri", "path", "query", "remote_ip", "body",
// Dict[str, List[bytes]]
"arguments", "query_arguments", "body_arguments",
// dict-like, https://www.tornadoweb.org/en/stable/httputil.html#tornado.httputil.HTTPHeaders
"headers",
// Dict[str, http.cookies.Morsel]
"cookies"
]
}
override string getMethodName() { result in ["full_url"] }
override string getAsyncMethodName() { none() }
}
/** Gets a reference to the `full_url` method. */
DataFlow::Node full_url() { full_url(DataFlow::TypeTracker::end()).flowsTo(result) }
/** An `HTTPHeaders` instance that originates from a Tornado request. */
private class TornadoRequestHTTPHeadersInstances extends HTTPHeaders::InstanceSource {
TornadoRequestHTTPHeadersInstances() {
this.(DataFlow::AttrRead).accesses(instance(), "headers")
}
}
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Method access
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
nodeFrom = instance() and
nodeTo in [full_url()]
or
// Method call
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode() and
nodeFrom in [full_url()]
or
// Attributes
nodeFrom = instance() and
exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom |
read.getAttributeName() in [
// str / bytes
"uri", "path", "query", "remote_ip", "body",
// Dict[str, List[bytes]]
"arguments", "query_arguments", "body_arguments",
// dict-like, https://www.tornadoweb.org/en/stable/httputil.html#tornado.httputil.HTTPHeaders
"headers",
// Dict[str, http.cookies.Morsel]
"cookies"
]
/** An `Morsel` instance that originates from a Tornado request. */
private class TornadoRequestMorselInstances extends Stdlib::Morsel::InstanceSource {
TornadoRequestMorselInstances() {
// TODO: this currently only works in local-scope, since writing type-trackers for
// this is a little too much effort. Once API-graphs are available for more
// things, we can rewrite this.
//
// TODO: This approach for identifying member-access is very adhoc, and we should
// be able to do something more structured for providing modeling of the members
// of a container-object.
exists(DataFlow::AttrRead files | files.accesses(instance(), "cookies") |
this.asCfgNode().(SubscriptNode).getObject() = files.asCfgNode()
or
this.(DataFlow::MethodCallNode).calls(files, "get")
)
}
}

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
/**
* Provides models for the `twisted` PyPI package.
@@ -110,6 +111,31 @@ private module Twisted {
/** Gets a reference to an instance of `twisted.web.server.Request`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `twisted.web.server.Request`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "twisted.web.server.Request" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in [
"uri", "path", "prepath", "postpath", "content", "args", "received_cookies",
"requestHeaders", "user", "password", "host"
]
}
override string getMethodName() {
result in [
"getCookie", "getHeader", "getAllHeaders", "getUser", "getPassword", "getHost",
"getRequestHostname"
]
}
override string getAsyncMethodName() { none() }
}
}
/**
@@ -125,36 +151,6 @@ private module Twisted {
override string getSourceType() { result = "twisted.web.server.Request" }
}
/**
* Taint propagation for `twisted.web.server.Request`.
*/
private class TwistedRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
nodeFrom = Request::instance() and
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom |
// normal (non-async) methods
attr.getAttributeName() in [
"getCookie", "getHeader", "getAllHeaders", "getUser", "getPassword", "getHost",
"getRequestHostname"
] and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
)
or
// Attributes
nodeFrom = Request::instance() and
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
nodeTo.(DataFlow::AttrRead).getAttributeName() in [
"uri", "path", "prepath", "postpath", "content", "args", "received_cookies",
"requestHeaders", "user", "password", "host"
]
}
}
/**
* A parameter of a request handler method (on a `twisted.web.resource.Resource` subclass)
* that is also given remote user input. (a bit like RoutedParameter).
@@ -198,17 +194,8 @@ private module Twisted {
*
* See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.server.Request.html#write
*/
class TwistedRequestWriteCall extends HTTP::Server::HttpResponse::Range, DataFlow::CallCfgNode {
TwistedRequestWriteCall() {
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
exists(DataFlow::AttrRead read |
this.getFunction() = read and
read.getObject() = Request::instance() and
read.getAttributeName() = "write"
)
}
class TwistedRequestWriteCall extends HTTP::Server::HttpResponse::Range, DataFlow::MethodCallNode {
TwistedRequestWriteCall() { this.calls(Request::instance(), "write") }
override DataFlow::Node getBody() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("data")]
@@ -225,17 +212,8 @@ private module Twisted {
* See https://twistedmatrix.com/documents/21.2.0/api/twisted.web.http.Request.html#redirect
*/
class TwistedRequestRedirectCall extends HTTP::Server::HttpRedirectResponse::Range,
DataFlow::CallCfgNode {
TwistedRequestRedirectCall() {
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
exists(DataFlow::AttrRead read |
this.getFunction() = read and
read.getObject() = Request::instance() and
read.getAttributeName() = "redirect"
)
}
DataFlow::MethodCallNode {
TwistedRequestRedirectCall() { this.calls(Request::instance(), "redirect") }
override DataFlow::Node getBody() { none() }

View File

@@ -9,6 +9,9 @@ private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.Stdlib
private import semmle.python.Concepts
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
/**
* Provides models for the `Werkzeug` PyPI package.
@@ -17,86 +20,92 @@ private import semmle.python.ApiGraphs
* - https://werkzeug.palletsprojects.com/en/1.0.x/#werkzeug
*/
module Werkzeug {
/** Provides models for the `werkzeug` module. */
module werkzeug {
/** Provides models for the `werkzeug.datastructures` module. */
module datastructures {
/**
* Provides models for the `werkzeug.datastructures.MultiDict` class
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict.
*/
module MultiDict {
/** DEPRECATED. Use `InstanceSourceApiNode` instead. */
abstract deprecated class InstanceSource extends DataFlow::Node { }
/**
* Provides models for the `werkzeug.datastructures.MultiDict` class
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict.
*/
module MultiDict {
/**
* A source of instances of `werkzeug.datastructures.MultiDict`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `MultiDict::instance()` to get references to instances of `werkzeug.datastructures.MultiDict`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/**
* A source of instances of `werkzeug.datastructures.MultiDict`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `MultiDict::instance()` to get references to instances of `werkzeug.datastructures.MultiDict`.
*/
abstract class InstanceSourceApiNode extends API::Node { }
/**
* Gets a reference to the `getlist` method on an instance of `werkzeug.datastructures.MultiDict`.
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers.getlist
*/
DataFlow::Node getlist() {
result = any(InstanceSourceApiNode a).getMember("getlist").getAUse()
}
}
/**
* Provides models for the `werkzeug.datastructures.FileStorage` class
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.
*/
module FileStorage {
/** DEPRECATED. Use `InstanceSourceApiNode` instead. */
abstract deprecated class InstanceSource extends DataFlow::Node { }
/**
* A source of instances of `werkzeug.datastructures.FileStorage`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `FileStorage::instance()` to get references to instances of `werkzeug.datastructures.FileStorage`.
*/
abstract class InstanceSourceApiNode extends API::Node { }
/** Gets a reference to an instance of `werkzeug.datastructures.FileStorage`. */
DataFlow::Node instance() { result = any(InstanceSourceApiNode a).getAUse() }
}
}
}
private class MultiDictAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// obj -> obj.getlist
exists(DataFlow::AttrRead read |
read.getObject() = nodeFrom and
nodeTo = read and
nodeTo = werkzeug::datastructures::MultiDict::getlist()
)
/** Gets a reference to an instance of `werkzeug.datastructures.MultiDict`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
// getlist -> getlist()
nodeFrom = werkzeug::datastructures::MultiDict::getlist() and
nodeTo.(DataFlow::CallCfgNode).getFunction() = nodeFrom
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `werkzeug.datastructures.MultiDict`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `werkzeug.datastructures.MultiDict`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "werkzeug.datastructures.MultiDict" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() { none() }
override string getMethodName() { result in ["getlist"] }
override string getAsyncMethodName() { none() }
}
}
private class FileStorageAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeFrom = werkzeug::datastructures::FileStorage::instance() and
exists(DataFlow::AttrRead read | nodeTo = read |
read.getAttributeName() in [
/**
* Provides models for the `werkzeug.datastructures.FileStorage` class
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.
*/
module FileStorage {
/**
* A source of instances of `werkzeug.datastructures.FileStorage`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `FileStorage::instance()` to get references to instances of `werkzeug.datastructures.FileStorage`.
*/
// All the attributes of the wrapper stream are proxied by the file storage so its
// possible to do storage.read() instead of the long form storage.stream.read(). So
// that's why InstanceSource also extends `Stdlib::FileLikeObject::InstanceSource`
abstract class InstanceSource extends Stdlib::FileLikeObject::InstanceSource,
DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `werkzeug.datastructures.FileStorage`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `werkzeug.datastructures.FileStorage`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `werkzeug.datastructures.FileStorage`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "werkzeug.datastructures.FileStorage" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in [
// str
"filename", "name", "content_type", "mimetype",
// file-like
@@ -105,9 +114,244 @@ module Werkzeug {
"headers",
// dict[str, str]
"mimetype_params"
] and
read.getObject() = nodeFrom
)
]
}
override string getMethodName() { none() }
override string getAsyncMethodName() { none() }
}
/** A file-like object instance that originates from a `FileStorage`. */
private class FileStorageFileLikeInstances extends Stdlib::FileLikeObject::InstanceSource {
FileStorageFileLikeInstances() { this.(DataFlow::AttrRead).accesses(instance(), "stream") }
}
/** A call to the `save` method of a `FileStorage`. */
private class FileStorageSaveCall extends FileSystemAccess::Range, DataFlow::MethodCallNode {
FileStorageSaveCall() { this.calls(instance(), "save") }
override DataFlow::Node getAPathArgument() {
result in [this.getArg(0), this.getArgByName("dst")]
}
}
}
/**
* Provides models for the `werkzeug.datastructures.Headers` class
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers.
*/
module Headers {
/**
* A source of instances of `werkzeug.datastructures.Headers`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Headers::instance()` to get references to instances of `werkzeug.datastructures.Headers`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `werkzeug.datastructures.Headers`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `werkzeug.datastructures.Headers`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `werkzeug.datastructures.Headers`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "werkzeug.datastructures.Headers" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() { none() }
override string getMethodName() {
result in ["getlist", "get_all", "popitem", "to_wsgi_list"]
}
override string getAsyncMethodName() { none() }
}
}
/**
* Provides models for the `werkzeug.datastructures.Authorization` class
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Authorization.
*/
module Authorization {
/**
* A source of instances of `werkzeug.datastructures.Authorization`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Authorization::instance()` to get references to instances of `werkzeug.datastructures.Authorization`.
*/
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
/** Gets a reference to an instance of `werkzeug.datastructures.Authorization`. */
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `werkzeug.datastructures.Authorization`. */
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
/**
* Taint propagation for `werkzeug.datastructures.Authorization`.
*/
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "werkzeug.datastructures.Authorization" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in [
"username", "password", "realm", "nonce", "uri", "nc", "cnonce", "response", "opaque",
"qop"
]
}
override string getMethodName() { none() }
override string getAsyncMethodName() { none() }
}
}
import WerkzeugOld
}
/**
* Old version that contains the deprecated modules.
*/
private module WerkzeugOld {
/**
* DEPRECATED: Use the modeling available directly in the `Werkzeug` module instead.
*
* Provides models for the `werkzeug` module.
*/
deprecated module werkzeug {
/**
* DEPRECATED: Use the modeling available directly in the `Werkzeug` module instead.
*
* Provides models for the `werkzeug.datastructures` module.
*/
deprecated module datastructures {
/**
* DEPRECATED: Use `Werkzeug::MultiDict` instead.
*
* Provides models for the `werkzeug.datastructures.MultiDict` class
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict.
*/
deprecated module MultiDict {
/**
* DEPRECATED. Use `Werkzeug::MultiDict::InstanceSource` instead.
*/
abstract deprecated class InstanceSource extends DataFlow::Node { }
/**
* DEPRECATED. Use `Werkzeug::MultiDict::InstanceSource` instead.
*
* A source of instances of `werkzeug.datastructures.MultiDict`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `MultiDict::instance()` to get references to instances of `werkzeug.datastructures.MultiDict`.
*/
abstract deprecated class InstanceSourceApiNode extends API::Node { }
/**
* DEPRECATED
*
* Gets a reference to the `getlist` method on an instance of `werkzeug.datastructures.MultiDict`.
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers.getlist
*/
deprecated DataFlow::Node getlist() {
result = any(InstanceSourceApiNode a).getMember("getlist").getAUse()
}
private class MultiDictAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// obj -> obj.getlist
exists(DataFlow::AttrRead read |
read.getObject() = nodeFrom and
nodeTo = read and
nodeTo = getlist()
)
or
// getlist -> getlist()
nodeFrom = getlist() and
nodeTo.(DataFlow::CallCfgNode).getFunction() = nodeFrom
}
}
}
/**
* DEPRECATED: Use `Werkzeug::FileStorage` instead.
*
* Provides models for the `werkzeug.datastructures.FileStorage` class
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.
*/
deprecated module FileStorage {
/**
* DEPRECATED. Use `Werkzeug::FileStorage::InstanceSource` instead.
*/
abstract deprecated class InstanceSource extends DataFlow::Node { }
/**
* DEPRECATED. Use `Werkzeug::FileStorage::InstanceSource` instead.
*
* A source of instances of `werkzeug.datastructures.FileStorage`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `FileStorage::instance()` to get references to instances of `werkzeug.datastructures.FileStorage`.
*/
abstract deprecated class InstanceSourceApiNode extends API::Node { }
/** Gets a reference to an instance of `werkzeug.datastructures.FileStorage`. */
deprecated DataFlow::Node instance() { result = any(InstanceSourceApiNode a).getAUse() }
private class FileStorageAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeFrom = instance() and
exists(DataFlow::AttrRead read | nodeTo = read |
read.getAttributeName() in [
// str
"filename", "name", "content_type", "mimetype",
// file-like
"stream",
// TODO: werkzeug.datastructures.Headers
"headers",
// dict[str, str]
"mimetype_params"
] and
read.getObject() = nodeFrom
)
}
}
}
}
}
}

View File

@@ -9,6 +9,7 @@ private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
/**
* INTERNAL: Do not use.
@@ -52,10 +53,30 @@ module Yarl {
/**
* Taint propagation for `yarl.URL`.
*
* See https://yarl.readthedocs.io/en/stable/api.html#yarl.URL
*/
class YarlUrlAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
InstanceTaintSteps() { this = "yarl.URL" }
override DataFlow::Node getInstance() { result = instance() }
override string getAttributeName() {
result in [
"user", "raw_user", "password", "raw_password", "host", "raw_host", "port",
"explicit_port", "authority", "raw_authority", "path", "raw_path", "path_qs",
"raw_path_qs", "query_string", "raw_query_string", "fragment", "raw_fragment", "parts",
"raw_parts", "name", "raw_name", "query"
]
}
override string getMethodName() { result in ["human_repr"] }
override string getAsyncMethodName() { none() }
}
/**
* Extra taint propagation for `yarl.URL`, not covered by `InstanceTaintSteps`.
*/
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// class instantiation
exists(ClassInstantiation call |
@@ -63,51 +84,20 @@ module Yarl {
nodeTo = call
)
or
// Methods
//
// TODO: When we have tools that make it easy, model these properly to handle
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
// (since it allows us to at least capture the most common cases).
exists(DataFlow::AttrRead attr |
// methods (that replaces part of URL, taken as only arguments)
attr.getAttributeName() in [
// methods that give an altered URL. taint both from object, and form argument
// (to result of call)
exists(DataFlow::MethodCallNode call |
call.calls(instance(),
[
"with_scheme", "with_user", "with_password", "with_host", "with_port", "with_path",
"with_query", "with_query", "update_query", "update_query", "with_fragment",
"with_name",
// join is a bit different, but is still correct to add here :+1:
"join"
] and
(
// obj -> obj.meth()
nodeFrom = instance() and
attr.getObject() = nodeFrom and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
or
// argument of obj.meth() -> obj.meth()
attr.getObject() = instance() and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr and
nodeFrom in [
nodeTo.(DataFlow::CallCfgNode).getArg(_),
nodeTo.(DataFlow::CallCfgNode).getArgByName(_)
]
)
or
// other methods
nodeFrom = instance() and
attr.getObject() = nodeFrom and
attr.getAttributeName() in ["human_repr"] and
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
]) and
nodeTo = call and
nodeFrom in [call.getObject(), call.getArg(_), call.getArgByName(_)]
)
or
// Attributes
nodeFrom = instance() and
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
nodeTo.(DataFlow::AttrRead).getAttributeName() in [
"user", "raw_user", "password", "raw_password", "host", "raw_host", "port",
"explicit_port", "authority", "raw_authority", "path", "raw_path", "path_qs",
"raw_path_qs", "query_string", "raw_query_string", "fragment", "raw_fragment", "parts",
"raw_parts", "name", "raw_name", "query"
]
}
}

View File

@@ -0,0 +1,51 @@
/**
* INTERNAL: Do no use.
*
* Provides helper class for defining additional taint step.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
/**
* A helper class for defining additional taint steps.
*/
bindingset[this]
abstract class InstanceTaintStepsHelper extends string {
/** Gets an instance that the additional taint steps should be applied to. */
abstract DataFlow::Node getInstance();
/** Gets the name of an attribute that should be tainted. */
abstract string getAttributeName();
/** Gets the name of a method, whose results should be tainted. */
abstract string getMethodName();
/** Gets the name of an async method, whose results should be tainted. */
abstract string getAsyncMethodName();
}
private class InstanceAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(InstanceTaintStepsHelper helper |
// normal (non-async) methods
nodeFrom = helper.getInstance() and
nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, helper.getMethodName())
or
// async methods.
//
// since we have general taint-step from `foo` in `await foo` to the whole
// expression, we simply taint the awaitable that is the result of "calling" the
// async method. That also allows such an awaitable to be placed in a list (for
// use with `asyncio.gather` for example), and thereby propagate taint to the
// list.
nodeFrom = helper.getInstance() and
nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, helper.getAsyncMethodName())
or
// Attributes
nodeFrom = helper.getInstance() and
nodeTo.(DataFlow::AttrRead).accesses(nodeFrom, helper.getAttributeName())
)
}
}

View File

@@ -0,0 +1,18 @@
/**
* @name Remote flow sources
* @description Sources of remote user input.
* @kind problem
* @problem.severity recommendation
* @id py/meta/alerts/remote-flow-sources
* @tags meta
* @precision very-low
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import meta.MetaMetrics
from RemoteFlowSource source
where not source.getLocation().getFile() instanceof IgnoredFile
select source, "RemoteFlowSource: " + source.getSourceType()

View File

@@ -0,0 +1,55 @@
/**
* @name Remote flow sources reach
* @description Nodes that can be reached with taint tracking from sources of
* remote user input.
* @kind problem
* @problem.severity recommendation
* @id py/meta/alerts/remote-flow-sources-reach
* @tags meta
* @precision very-low
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.dataflow.new.RemoteFlowSources
private import meta.MetaMetrics
private import semmle.python.dataflow.new.internal.PrintNode
class RemoteFlowSourceReach extends TaintTracking::Configuration {
RemoteFlowSourceReach() { this = "RemoteFlowSourceReach" }
override predicate isSource(DataFlow::Node node) {
node instanceof RemoteFlowSource and
not node.getLocation().getFile() instanceof IgnoredFile
}
override predicate isSink(DataFlow::Node node) {
not node.getLocation().getFile() instanceof IgnoredFile and
(
node instanceof RemoteFlowSource
or
this.isAdditionalFlowStep(_, node)
) and
// In september 2021 we changed how we do taint-propagation for method calls (mostly
// relating to modeled frameworks/libraries). We used to do `obj -> obj.meth` and
// `obj.meth -> obj.meth()` in two separate steps, and now do them in one
// `obj -> obj.meth()`. To be able to compare the overall reach between these two
// version, we don't want this query to alert us to the fact that we no longer taint
// the node in the middle (since that is just noise).
// see https://github.com/github/codeql/pull/6349
//
// We should be able to remove the following few lines of code once we don't care to
// compare with the old (before September 2021) way of doing taint-propagation for
// method calls.
not exists(DataFlow::MethodCallNode c |
node = c.getFunction() and
this.isAdditionalFlowStep(c.getObject(), node) and
this.isAdditionalFlowStep(node, c)
)
}
}
from RemoteFlowSourceReach cfg, DataFlow::Node reachable
where cfg.hasFlow(_, reachable)
select reachable, prettyNode(reachable)

View File

@@ -1,7 +1,7 @@
import python
import semmle.python.dataflow.new.DataFlow
import TestUtilities.InlineExpectationsTest
import experimental.dataflow.TestUtil.PrintNode
private import semmle.python.dataflow.new.internal.PrintNode
abstract class FlowTest extends InlineExpectationsTest {
bindingset[this]

View File

@@ -1,7 +1,7 @@
import python
import semmle.python.dataflow.new.DataFlow
import TestUtilities.InlineExpectationsTest
import experimental.dataflow.TestUtil.PrintNode
private import semmle.python.dataflow.new.internal.PrintNode
/**
* A routing test is designed to test that values are routed to the

View File

@@ -1,6 +1,6 @@
import python
import semmle.python.dataflow.new.DataFlow
import experimental.dataflow.TestUtil.PrintNode
private import semmle.python.dataflow.new.internal.PrintNode
query predicate conjunctive_lookup(
DataFlow::MethodCallNode methCall, string call, string object, string methodName

View File

@@ -1,7 +1,7 @@
import python
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.DataFlow
import experimental.dataflow.TestUtil.PrintNode
private import semmle.python.dataflow.new.internal.PrintNode
class TestTaintTrackingConfiguration extends TaintTracking::Configuration {
TestTaintTrackingConfiguration() { this = "TestTaintTrackingConfiguration" }

View File

@@ -16,6 +16,15 @@ def test_access():
tainted_list.copy(), # $ tainted
)
for ((x, y, *z), a, b) in tainted_list:
ensure_tainted(
x, # $ tainted
y, # $ tainted
z, # $ tainted
a, # $ tainted
b, # $ tainted
)
def list_clear():
tainted_string = TAINTED_STRING

View File

@@ -52,6 +52,8 @@ def test_access(x, y, z):
reversed(tainted_list), # $ tainted
iter(tainted_list), # $ tainted
next(iter(tainted_list)), # $ tainted
[i for i in tainted_list], # $ tainted
[tainted_list for _i in [1,2,3]], # $ MISSING: tainted
)
a, b, c = tainted_list[0:3]

View File

@@ -2,7 +2,7 @@ import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.Concepts
import TestUtilities.InlineExpectationsTest
import experimental.dataflow.TestUtil.PrintNode
private import semmle.python.dataflow.new.internal.PrintNode
class SystemCommandExecutionTest extends InlineExpectationsTest {
SystemCommandExecutionTest() { this = "SystemCommandExecutionTest" }

View File

@@ -14,7 +14,7 @@ import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
import TestUtilities.InlineExpectationsTest
import experimental.dataflow.TestUtil.PrintNode
private import semmle.python.dataflow.new.internal.PrintNode
DataFlow::Node shouldBeTainted() {
exists(DataFlow::CallCfgNode call |

View File

@@ -55,10 +55,10 @@ async def test_taint(request: web.Request): # $ requestHandler
await request.content.readline(), # $ tainted
await request.content.readchunk(), # $ tainted
(await request.content.readchunk())[0], # $ tainted
[line async for line in request.content], # $ MISSING: tainted
[data async for data in request.content.iter_chunked(1024)], # $ MISSING: tainted
[data async for data in request.content.iter_any()], # $ MISSING: tainted
[data async for data, _ in request.content.iter_chunks()], # $ MISSING: tainted
[line async for line in request.content], # $ tainted
[data async for data in request.content.iter_chunked(1024)], # $ tainted
[data async for data in request.content.iter_any()], # $ tainted
[data async for data, _ in request.content.iter_chunks()], # $ tainted
request.content.read_nowait(), # $ tainted
# aiohttp.StreamReader

View File

@@ -11,6 +11,9 @@ def test_taint(request: HttpRequest, foo, bar, baz=None): # $requestHandler rou
# Manually inspected all fields of the HttpRequest object
# https://docs.djangoproject.com/en/3.0/ref/request-response/#httprequest-objects
import django.urls
django.urls.ResolverMatch
ensure_tainted(
request, # $ tainted
@@ -35,8 +38,8 @@ def test_taint(request: HttpRequest, foo, bar, baz=None): # $requestHandler rou
request.GET, # $ tainted
request.GET["key"], # $ tainted
request.GET.get("key"), # $ tainted
request.GET.getlist("key"), # $ MISSING: tainted
request.GET.getlist("key")[0], # $ MISSING: tainted
request.GET.getlist("key"), # $ tainted
request.GET.getlist("key")[0], # $ tainted
request.GET.pop("key"), # $ tainted
request.GET.pop("key")[0], # $ tainted
# key
@@ -45,9 +48,10 @@ def test_taint(request: HttpRequest, foo, bar, baz=None): # $requestHandler rou
request.GET.popitem()[1], # $ tainted
# values[0]
request.GET.popitem()[1][0], # $ tainted
request.GET.dict(), # $ MISSING: tainted
request.GET.dict()["key"], # $ MISSING: tainted
request.GET.urlencode(), # $ MISSING: tainted
request.GET.lists(), # $ tainted
request.GET.dict(), # $ tainted
request.GET.dict()["key"], # $ tainted
request.GET.urlencode(), # $ tainted
# django.http.QueryDict (same as above, did not duplicate tests)
request.POST, # $ tainted
@@ -60,22 +64,23 @@ def test_taint(request: HttpRequest, foo, bar, baz=None): # $requestHandler rou
# MultiValueDict[str, UploadedFile]
request.FILES, # $ tainted
request.FILES["key"], # $ tainted
request.FILES["key"].content_type, # $ MISSING: tainted
request.FILES["key"].content_type_extra, # $ MISSING: tainted
request.FILES["key"].content_type_extra["key"], # $ MISSING: tainted
request.FILES["key"].charset, # $ MISSING: tainted
request.FILES["key"].name, # $ MISSING: tainted
request.FILES["key"].file, # $ MISSING: tainted
request.FILES["key"].file.read(), # $ MISSING: tainted
request.FILES["key"].content_type, # $ tainted
request.FILES["key"].content_type_extra, # $ tainted
request.FILES["key"].content_type_extra["key"], # $ tainted
request.FILES["key"].charset, # $ tainted
request.FILES["key"].name, # $ tainted
request.FILES["key"].file, # $ tainted
request.FILES["key"].file.read(), # $ tainted
request.FILES.get("key"), # $ tainted
request.FILES.get("key").name, # $ MISSING: tainted
request.FILES.getlist("key"), # $ MISSING: tainted
request.FILES.getlist("key")[0], # $ MISSING: tainted
request.FILES.getlist("key")[0].name, # $ MISSING: tainted
request.FILES.dict(), # $ MISSING: tainted
request.FILES.dict()["key"], # $ MISSING: tainted
request.FILES.dict()["key"].name, # $ MISSING: tainted
request.FILES.get("key").name, # $ tainted
request.FILES.getlist("key"), # $ tainted
request.FILES.getlist("key")[0], # $ tainted
request.FILES.getlist("key")[0].name, # $ tainted
request.FILES.dict(), # $ tainted
request.FILES.dict()["key"], # $ tainted
request.FILES.dict()["key"].name, # $ tainted
request.FILES.dict().get("key").name, # $ tainted
# Dict[str, Any]
request.META, # $ tainted
@@ -89,21 +94,21 @@ def test_taint(request: HttpRequest, foo, bar, baz=None): # $requestHandler rou
# django.urls.ResolverMatch
request.resolver_match, # $ tainted
request.resolver_match.args, # $ MISSING: tainted
request.resolver_match.args[0], # $ MISSING: tainted
request.resolver_match.kwargs, # $ MISSING: tainted
request.resolver_match.kwargs["key"], # $ MISSING: tainted
request.resolver_match.args, # $ tainted
request.resolver_match.args[0], # $ tainted
request.resolver_match.kwargs, # $ tainted
request.resolver_match.kwargs["key"], # $ tainted
request.get_full_path(), # $ MISSING: tainted
request.get_full_path_info(), # $ MISSING: tainted
request.get_full_path(), # $ tainted
request.get_full_path_info(), # $ tainted
# build_absolute_uri handled below
# get_signed_cookie handled below
request.read(), # $ MISSING: tainted
request.readline(), # $ MISSING: tainted
request.readlines(), # $ MISSING: tainted
request.readlines()[0], # $ MISSING: tainted
[line for line in request], # $ MISSING: tainted
request.read(), # $ tainted
request.readline(), # $ tainted
request.readlines(), # $ tainted
request.readlines()[0], # $ tainted
[line for line in request], # $ tainted
)
# django.urls.ResolverMatch also supports iterable unpacking
@@ -129,9 +134,9 @@ def test_taint(request: HttpRequest, foo, bar, baz=None): # $requestHandler rou
# build_absolute_uri
####################################
ensure_tainted(
request.build_absolute_uri(), # $ MISSING: tainted
request.build_absolute_uri(request.GET["key"]), # $ MISSING: tainted
request.build_absolute_uri(location=request.GET["key"]), # $ MISSING: tainted
request.build_absolute_uri(), # $ tainted
request.build_absolute_uri(request.GET["key"]), # $ tainted
request.build_absolute_uri(location=request.GET["key"]), # $ tainted
)
ensure_not_tainted(
request.build_absolute_uri("/hardcoded/"),

View File

@@ -0,0 +1,6 @@
from flask import Flask, request
app = Flask(__name__)
@app.route("/save-uploaded-file") # $routeSetup="/save-uploaded-file"
def test_taint(): # $requestHandler
request.files['key'].save("path") # $ getAPathArgument="path"

View File

@@ -44,7 +44,16 @@ def test_taint(name = "World!", number="0", foo="foo"): # $requestHandler route
# werkzeug.datastructures.Authorization (a dict, with some properties)
request.authorization, # $ tainted
request.authorization['username'], # $ tainted
request.authorization.username, # $ MISSING: tainted
request.authorization.username, # $ tainted
request.authorization.password, # $ tainted
request.authorization.realm, # $ tainted
request.authorization.nonce, # $ tainted
request.authorization.uri, # $ tainted
request.authorization.nc, # $ tainted
request.authorization.cnonce, # $ tainted
request.authorization.response, # $ tainted
request.authorization.opaque, # $ tainted
request.authorization.qop, # $ tainted
# werkzeug.datastructures.RequestCacheControl
request.cache_control, # $ tainted
@@ -68,14 +77,16 @@ def test_taint(name = "World!", number="0", foo="foo"): # $requestHandler route
# a werkzeug.datastructures.MultiDict, mapping [str, werkzeug.datastructures.FileStorage]
request.files, # $ tainted
request.files['key'], # $ tainted
request.files['key'].filename, # $ MISSING: tainted
request.files['key'].stream, # $ MISSING: tainted
request.files['key'].filename, # $ tainted
request.files['key'].stream, # $ tainted
request.files['key'].read(), # $ tainted
request.files['key'].stream.read(), # $ tainted
request.files.get('key'), # $ tainted
request.files.get('key').filename, # $ MISSING: tainted
request.files.get('key').stream, # $ MISSING: tainted
request.files.get('key').filename, # $ tainted
request.files.get('key').stream, # $ tainted
request.files.getlist('key'), # $ tainted
request.files.getlist('key')[0].filename, # $ MISSING: tainted
request.files.getlist('key')[0].stream, # $ MISSING: tainted
request.files.getlist('key')[0].filename, # $ tainted
request.files.getlist('key')[0].stream, # $ tainted
# By default werkzeug.datastructures.ImmutableMultiDict -- although can be changed :\
request.form, # $ tainted
@@ -94,11 +105,15 @@ def test_taint(name = "World!", number="0", foo="foo"): # $requestHandler route
request.headers, # $ tainted
request.headers['key'], # $ tainted
request.headers.get('key'), # $ tainted
request.headers.get_all('key'), # $ MISSING: tainted
request.headers.getlist('key'), # $ MISSING: tainted
request.headers.get_all('key'), # $ tainted
request.headers.getlist('key'), # $ tainted
# popitem returns `(key, value)`
request.headers.popitem(), # $ tainted
request.headers.popitem()[0], # $ tainted
request.headers.popitem()[1], # $ tainted
# two ways to get (k, v) lists
list(request.headers), # $ tainted
request.headers.to_wsgi_list(), # $ MISSING: tainted
request.headers.to_wsgi_list(), # $ tainted
request.json, # $ tainted
request.json['foo'], # $ tainted

View File

@@ -58,17 +58,17 @@ class MyHandler(BaseHTTPRequestHandler):
self.headers, # $ tainted
self.headers['Foo'], # $ tainted
self.headers.get('Foo'), # $ tainted
self.headers.get_all('Foo'), # $ MISSING: tainted
self.headers.keys(), # $ MISSING: tainted
self.headers.get_all('Foo'), # $ tainted
self.headers.keys(), # $ tainted
self.headers.values(), # $ tainted
self.headers.items(), # $ tainted
self.headers.as_bytes(), # $ MISSING: tainted
self.headers.as_string(), # $ MISSING: tainted
self.headers.as_bytes(), # $ tainted
self.headers.as_string(), # $ tainted
str(self.headers), # $ tainted
bytes(self.headers), # $ tainted
self.rfile, # $ tainted
self.rfile.read(), # $ MISSING: tainted
self.rfile.read(), # $ tainted
)
form = cgi.FieldStorage(

View File

@@ -61,15 +61,16 @@ class TaintTest(tornado.web.RequestHandler):
# dict-like, see https://www.tornadoweb.org/en/stable/httputil.html#tornado.httputil.HTTPHeaders
request.headers, # $ tainted
request.headers["header-name"], # $ tainted
request.headers.get_list("header-name"), # $ MISSING: tainted
request.headers.get_all(), # $ MISSING: tainted
[(k, v) for (k, v) in request.headers.get_all()], # $ MISSING: tainted
request.headers.get_list("header-name"), # $ tainted
request.headers.get_all(), # $ tainted
[(k, v) for (k, v) in request.headers.get_all()], # $ tainted
# Dict[str, http.cookies.Morsel]
request.cookies, # $ tainted
request.cookies["cookie-name"], # $ tainted
request.cookies["cookie-name"].key, # $ MISSING: tainted
request.cookies["cookie-name"].value, # $ MISSING: tainted
request.cookies["cookie-name"].key, # $ tainted
request.cookies["cookie-name"].value, # $ tainted
request.cookies["cookie-name"].coded_value, # $ tainted
)