Merge pull request #4490 from RasmusWL/python-model-django-sources

Python: model Django HttpRequest as RemoteFlowSource
This commit is contained in:
yoff
2020-10-21 13:46:51 +02:00
committed by GitHub
28 changed files with 1092 additions and 14 deletions

View File

@@ -163,8 +163,16 @@ module HTTP {
* extend `RouteSetup` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument used to set the URL pattern. */
abstract DataFlow::Node getUrlPatternArg();
/** Gets the URL pattern for this route, if it can be statically determined. */
abstract string getUrlPattern();
string getUrlPattern() {
exists(StrConst str |
DataFlow::localFlow(DataFlow::exprNode(str), this.getUrlPatternArg()) and
result = str.getText()
)
}
/** Gets a function that will handle incoming requests for this route, if any. */
abstract Function getARouteHandler();

View File

@@ -1,10 +1,487 @@
/**
* Provides classes modeling security-relevant aspects of the `django` package.
* Provides classes modeling security-relevant aspects of the `django` PyPI package.
* See https://www.djangoproject.com/.
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.dataflow.TaintTracking
private import experimental.semmle.python.Concepts
private import semmle.python.regex
private module Django { }
/**
* Provides models for the `django` PyPI package.
* See https://www.djangoproject.com/.
*/
private module Django {
// ---------------------------------------------------------------------------
// django
// ---------------------------------------------------------------------------
/** Gets a reference to the `django` module. */
private DataFlow::Node django(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("django")
or
exists(DataFlow::TypeTracker t2 | result = django(t2).track(t2, t))
}
/** Gets a reference to the `django` module. */
DataFlow::Node django() { result = django(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of the `django` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node django_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["urls", "http"] and
(
t.start() and
result = DataFlow::importNode("django" + "." + attr_name)
or
t.startInAttr(attr_name) and
result = DataFlow::importNode("django")
)
or
// Due to bad performance when using normal setup with `django_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
django_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate django_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(django_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `django` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node django_attr(string attr_name) {
result = django_attr(DataFlow::TypeTracker::end(), attr_name)
}
/** Provides models for the `django` module. */
module django {
// -------------------------------------------------------------------------
// django.urls
// -------------------------------------------------------------------------
/** Gets a reference to the `django.urls` module. */
DataFlow::Node urls() { result = django_attr("urls") }
/** Provides models for the `django.urls` module */
module urls {
/**
* Gets a reference to the attribute `attr_name` of the `urls` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node urls_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["path", "re_path"] and
(
t.start() and
result = DataFlow::importNode("django.urls" + "." + attr_name)
or
t.startInAttr(attr_name) and
result = DataFlow::importNode("django.urls")
or
t.startInAttr(attr_name) and
result = django::urls()
)
or
// Due to bad performance when using normal setup with `urls_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
urls_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate urls_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res,
DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(urls_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `urls` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node urls_attr(string attr_name) {
result = urls_attr(DataFlow::TypeTracker::end(), attr_name)
}
/**
* Gets a reference to the `django.urls.path` function.
* See https://docs.djangoproject.com/en/3.0/ref/urls/#path
*/
DataFlow::Node path() { result = urls_attr("path") }
/**
* Gets a reference to the `django.urls.re_path` function.
* See https://docs.djangoproject.com/en/3.0/ref/urls/#re_path
*/
DataFlow::Node re_path() { result = urls_attr("re_path") }
}
// -------------------------------------------------------------------------
// django.http
// -------------------------------------------------------------------------
/** Gets a reference to the `django.http` module. */
DataFlow::Node http() { result = django_attr("http") }
/** Provides models for the `django.http` module */
module http {
/**
* Gets a reference to the attribute `attr_name` of the `django.http` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node http_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["request", "HttpRequest"] and
(
t.start() and
result = DataFlow::importNode("django.http" + "." + attr_name)
or
t.startInAttr(attr_name) and
result = django::http()
)
or
// Due to bad performance when using normal setup with `http_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
http_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate http_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res,
DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(http_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `django.http` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node http_attr(string attr_name) {
result = http_attr(DataFlow::TypeTracker::end(), attr_name)
}
// ---------------------------------------------------------------------------
// django.http.request
// ---------------------------------------------------------------------------
/** Gets a reference to the `django.http.request` module. */
DataFlow::Node request() { result = http_attr("request") }
/** Provides models for the `django.http.request` module. */
module request {
/**
* Gets a reference to the attribute `attr_name` of the `django.http.request` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node request_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["HttpRequest"] and
(
t.start() and
result = DataFlow::importNode("django.http.request" + "." + attr_name)
or
t.startInAttr(attr_name) and
result = django::http::request()
)
or
// Due to bad performance when using normal setup with `request_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
request_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate request_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res,
DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(request_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `django.http.request` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node request_attr(string attr_name) {
result = request_attr(DataFlow::TypeTracker::end(), attr_name)
}
/**
* Provides models for the `django.http.request.HttpRequest` class
*
* See https://docs.djangoproject.com/en/3.0/ref/request-response/#httprequest-objects
*/
module HttpRequest {
/** Gets a reference to the `django.http.request.HttpRequest` class. */
private DataFlow::Node classRef(DataFlow::TypeTracker t) {
t.start() and
result = request_attr("HttpRequest")
or
// handle django.http.HttpRequest alias
t.start() and
result = http_attr("HttpRequest")
or
exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
}
/** Gets a reference to the `django.http.request.HttpRequest` class. */
DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
/**
* A source of an instance of `django.http.request.HttpRequest`.
*
* This can include instantiation of the class, return value from function
* calls, or a special parameter that will be set when functions are call by external
* library.
*
* Use `django::http::request::HttpRequest::instance()` predicate to get
* references to instances of `django.http.request.HttpRequest`.
*/
abstract class InstanceSource extends DataFlow::Node { }
/** Gets a reference to an instance of `django.http.request.HttpRequest`. */
private DataFlow::Node instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `django.http.request.HttpRequest`. */
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
}
}
}
}
// ---------------------------------------------------------------------------
// routing modeling
// ---------------------------------------------------------------------------
/**
* Gets a reference to the Function `func`.
*
* The idea is that this function should be used as a route handler when setting up a
* route, but currently it just tracks all functions, since we can't do type-tracking
* backwards yet (TODO).
*/
private DataFlow::Node djangoRouteHandlerFunctionTracker(DataFlow::TypeTracker t, Function func) {
t.start() and
result = DataFlow::exprNode(func.getDefinition())
or
exists(DataFlow::TypeTracker t2 |
result = djangoRouteHandlerFunctionTracker(t2, func).track(t2, t)
)
}
/**
* Gets a reference to the Function `func`.
*
* The idea is that this function should be used as a route handler when setting up a
* route, but currently it just tracks all functions, since we can't do type-tracking
* backwards yet (TODO).
*/
private DataFlow::Node djangoRouteHandlerFunctionTracker(Function func) {
result = djangoRouteHandlerFunctionTracker(DataFlow::TypeTracker::end(), func)
}
/**
* A function that is used as a django route handler.
*/
private class DjangoRouteHandler extends Function {
DjangoRouteHandler() { exists(djangoRouteHandlerFunctionTracker(this)) }
/** Gets the index of the request parameter. */
int getRequestParamIndex() {
not this.isMethod() and
result = 0
or
this.isMethod() and
result = 1
}
/** Gets the request parameter. */
Parameter getRequestParam() { result = this.getArg(this.getRequestParamIndex()) }
}
abstract private class DjangoRouteSetup extends HTTP::Server::RouteSetup::Range, DataFlow::CfgNode {
abstract override DjangoRouteHandler getARouteHandler();
}
/**
* Gets the regex that is used by django to find routed parameters when using `django.urls.path`.
*
* Taken from https://github.com/django/django/blob/7d1bf29977bb368d7c28e7c6eb146db3b3009ae7/django/urls/resolvers.py#L199
*/
private string pathRoutedParameterRegex() {
result = "<(?:(?<converter>[^>:]+):)?(?<parameter>\\w+)>"
}
/**
* A call to `django.urls.path`.
*
* See https://docs.djangoproject.com/en/3.0/ref/urls/#path
*/
private class DjangoUrlsPathCall extends DjangoRouteSetup {
override CallNode node;
DjangoUrlsPathCall() { node.getFunction() = django::urls::path().asCfgNode() }
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() = [node.getArg(0), node.getArgByName("route")]
}
override DjangoRouteHandler getARouteHandler() {
exists(DataFlow::Node viewArg |
viewArg.asCfgNode() in [node.getArg(1), node.getArgByName("view")] and
djangoRouteHandlerFunctionTracker(result) = viewArg
)
}
override Parameter getARoutedParameter() {
// If we don't know the URL pattern, we simply mark all parameters as a routed
// parameter. This should give us more RemoteFlowSources but could also lead to
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
exists(DjangoRouteHandler routeHandler | routeHandler = this.getARouteHandler() |
not exists(this.getUrlPattern()) and
result in [routeHandler.getArg(_), routeHandler.getArgByName(_)] and
not result = any(int i | i <= routeHandler.getRequestParamIndex() | routeHandler.getArg(i))
)
or
exists(string name |
result = this.getARouteHandler().getArgByName(name) and
exists(string match |
match = this.getUrlPattern().regexpFind(pathRoutedParameterRegex(), _, _) and
name = match.regexpCapture(pathRoutedParameterRegex(), 2)
)
)
}
}
/**
* A regex that is used in a call to `django.urls.re_path`.
*
* Needs this subclass to be considered a RegexString.
*/
private class DjangoUrlsRePathRegex extends RegexString {
DjangoUrlsRePathCall rePathCall;
DjangoUrlsRePathRegex() {
this instanceof StrConst and
DataFlow::localFlow(DataFlow::exprNode(this), rePathCall.getUrlPatternArg())
}
DjangoUrlsRePathCall getRePathCall() { result = rePathCall }
}
/**
* A call to `django.urls.re_path`.
*
* See https://docs.djangoproject.com/en/3.0/ref/urls/#re_path
*/
private class DjangoUrlsRePathCall extends DjangoRouteSetup {
override CallNode node;
DjangoUrlsRePathCall() { node.getFunction() = django::urls::re_path().asCfgNode() }
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() = [node.getArg(0), node.getArgByName("route")]
}
override DjangoRouteHandler getARouteHandler() {
exists(DataFlow::Node viewArg |
viewArg.asCfgNode() in [node.getArg(1), node.getArgByName("view")] and
djangoRouteHandlerFunctionTracker(result) = viewArg
)
}
override Parameter getARoutedParameter() {
// If we don't know the URL pattern, we simply mark all parameters as a routed
// parameter. This should give us more RemoteFlowSources but could also lead to
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
exists(DjangoRouteHandler routeHandler | routeHandler = this.getARouteHandler() |
not exists(this.getUrlPattern()) and
result in [routeHandler.getArg(_), routeHandler.getArgByName(_)] and
not result = any(int i | i <= routeHandler.getRequestParamIndex() | routeHandler.getArg(i))
)
or
exists(DjangoRouteHandler routeHandler, DjangoUrlsRePathRegex regex |
routeHandler = this.getARouteHandler() and
regex.getRePathCall() = this
|
// either using named capture groups (passed as keyword arguments) or using
// unnamed capture groups (passed as positional arguments)
not exists(regex.getGroupName(_, _)) and
// first group will have group number 1
result =
routeHandler.getArg(routeHandler.getRequestParamIndex() + regex.getGroupNumber(_, _))
or
result = routeHandler.getArgByName(regex.getGroupName(_, _))
)
}
}
// ---------------------------------------------------------------------------
// HttpRequest taint modeling
// ---------------------------------------------------------------------------
class DjangoRouteHandlerRequestParam extends django::http::request::HttpRequest::InstanceSource,
RemoteFlowSource::Range, DataFlow::ParameterNode {
DjangoRouteHandlerRequestParam() {
this.getParameter() = any(DjangoRouteSetup setup).getARouteHandler().getRequestParam()
}
override string getSourceType() { result = "django.http.request.HttpRequest" }
}
private class DjangoHttpRequstAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeFrom = django::http::request::HttpRequest::instance() and
exists(DataFlow::AttrRead read | nodeTo = read and read.getObject() = nodeFrom |
read.getAttributeName() in ["body",
// str / bytes
"path", "path_info", "method", "encoding", "content_type",
// django.http.QueryDict
// TODO: Model QueryDict
"GET", "POST",
// dict[str, str]
"content_params", "COOKIES",
// dict[str, Any]
"META",
// HttpHeaders (case insensitive dict-like)
"headers",
// MultiValueDict[str, UploadedFile]
// TODO: Model MultiValueDict
// TODO: Model UploadedFile
"FILES",
// django.urls.ResolverMatch
// TODO: Model ResolverMatch
"resolver_match"]
// TODO: Handle calls to methods
// TODO: Handle that a HttpRequest is iterable
)
}
}
}

View File

@@ -131,16 +131,6 @@ private module Flask {
)
)
}
/** Gets the argument used to pass in the URL pattern. */
abstract DataFlow::Node getUrlPatternArg();
override string getUrlPattern() {
exists(StrConst str |
DataFlow::localFlow(DataFlow::exprNode(str), this.getUrlPatternArg()) and
result = str.getText()
)
}
}
/**

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,17 @@
Tests for Django in version 2.x and 3.x.
This folder contains a runable django application generated with `django-admin startproject testproj` and `django-admin startapp testapp`.
To run the development server, install django (in venv), and run `python manage.py runserver`
To understand how things work, see
- https://docs.djangoproject.com/en/3.1/intro/tutorial01/#creating-a-project
- https://docs.djangoproject.com/en/3.1/intro/tutorial02/#activating-models
---
Note that from [Django 2.0 only Python 3 is supported](https://docs.djangoproject.com/en/stable/releases/2.0/#python-compatibility) (enforced by `options` file).
As I see it, from a QL modeling perspective, the important part of [Django 3.0](https://docs.djangoproject.com/en/stable/releases/3.0/) was the added support for ASGI (Asynchronous Server Gateway Interface), and [Django 3.1](https://docs.djangoproject.com/en/stable/releases/3.1/) added support for async views, async middleware.
We currently don't have any tests specific to Django 3.0, since it's very compatible with Django 2.0 in general, but we could split the tests in the future.

View File

@@ -0,0 +1,82 @@
| taint_test.py:7 | ok | test_taint | bar |
| taint_test.py:7 | ok | test_taint | foo |
| taint_test.py:8 | ok | test_taint | baz |
| taint_test.py:14 | ok | test_taint | request |
| taint_test.py:16 | ok | test_taint | request.body |
| taint_test.py:17 | ok | test_taint | request.path |
| taint_test.py:18 | ok | test_taint | request.path_info |
| taint_test.py:22 | ok | test_taint | request.method |
| taint_test.py:24 | ok | test_taint | request.encoding |
| taint_test.py:25 | ok | test_taint | request.content_type |
| taint_test.py:28 | ok | test_taint | request.content_params |
| taint_test.py:29 | ok | test_taint | request.content_params["key"] |
| taint_test.py:30 | ok | test_taint | request.content_params.get(..) |
| taint_test.py:34 | ok | test_taint | request.GET |
| taint_test.py:35 | ok | test_taint | request.GET["key"] |
| taint_test.py:36 | ok | test_taint | request.GET.get(..) |
| taint_test.py:37 | fail | test_taint | request.GET.getlist(..) |
| taint_test.py:38 | fail | test_taint | request.GET.getlist(..)[0] |
| taint_test.py:39 | ok | test_taint | request.GET.pop(..) |
| taint_test.py:40 | ok | test_taint | request.GET.pop(..)[0] |
| taint_test.py:41 | ok | test_taint | request.GET.popitem()[0] |
| taint_test.py:42 | ok | test_taint | request.GET.popitem()[1] |
| taint_test.py:43 | ok | test_taint | request.GET.popitem()[1][0] |
| taint_test.py:44 | fail | test_taint | request.GET.dict() |
| taint_test.py:45 | fail | test_taint | request.GET.dict()["key"] |
| taint_test.py:46 | fail | test_taint | request.GET.urlencode() |
| taint_test.py:49 | ok | test_taint | request.POST |
| taint_test.py:52 | ok | test_taint | request.COOKIES |
| taint_test.py:53 | ok | test_taint | request.COOKIES["key"] |
| taint_test.py:54 | ok | test_taint | request.COOKIES.get(..) |
| taint_test.py:57 | ok | test_taint | request.FILES |
| taint_test.py:58 | ok | test_taint | request.FILES["key"] |
| taint_test.py:59 | fail | test_taint | request.FILES["key"].content_type |
| taint_test.py:60 | fail | test_taint | request.FILES["key"].content_type_extra |
| taint_test.py:61 | fail | test_taint | request.FILES["key"].content_type_extra["key"] |
| taint_test.py:62 | fail | test_taint | request.FILES["key"].charset |
| taint_test.py:63 | fail | test_taint | request.FILES["key"].name |
| taint_test.py:64 | fail | test_taint | request.FILES["key"].file |
| taint_test.py:65 | fail | test_taint | request.FILES["key"].file.read() |
| taint_test.py:67 | ok | test_taint | request.FILES.get(..) |
| taint_test.py:68 | fail | test_taint | request.FILES.get(..).name |
| taint_test.py:69 | fail | test_taint | request.FILES.getlist(..) |
| taint_test.py:70 | fail | test_taint | request.FILES.getlist(..)[0] |
| taint_test.py:71 | fail | test_taint | request.FILES.getlist(..)[0].name |
| taint_test.py:72 | fail | test_taint | request.FILES.dict() |
| taint_test.py:73 | fail | test_taint | request.FILES.dict()["key"] |
| taint_test.py:74 | fail | test_taint | request.FILES.dict()["key"].name |
| taint_test.py:77 | ok | test_taint | request.META |
| taint_test.py:78 | ok | test_taint | request.META["HTTP_USER_AGENT"] |
| taint_test.py:79 | ok | test_taint | request.META.get(..) |
| taint_test.py:82 | ok | test_taint | request.headers |
| taint_test.py:83 | ok | test_taint | request.headers["user-agent"] |
| taint_test.py:84 | ok | test_taint | request.headers["USER_AGENT"] |
| taint_test.py:87 | ok | test_taint | request.resolver_match |
| taint_test.py:88 | fail | test_taint | request.resolver_match.args |
| taint_test.py:89 | fail | test_taint | request.resolver_match.args[0] |
| taint_test.py:90 | fail | test_taint | request.resolver_match.kwargs |
| taint_test.py:91 | fail | test_taint | request.resolver_match.kwargs["key"] |
| taint_test.py:93 | fail | test_taint | request.get_full_path() |
| taint_test.py:94 | fail | test_taint | request.get_full_path_info() |
| taint_test.py:98 | fail | test_taint | request.read() |
| taint_test.py:99 | fail | test_taint | request.readline() |
| taint_test.py:100 | fail | test_taint | request.readlines() |
| taint_test.py:101 | fail | test_taint | request.readlines()[0] |
| taint_test.py:102 | fail | test_taint | ListComp |
| taint_test.py:108 | ok | test_taint | args |
| taint_test.py:109 | ok | test_taint | args[0] |
| taint_test.py:110 | ok | test_taint | kwargs |
| taint_test.py:111 | ok | test_taint | kwargs["key"] |
| taint_test.py:115 | ok | test_taint | request.current_app |
| taint_test.py:120 | ok | test_taint | request.get_host() |
| taint_test.py:121 | ok | test_taint | request.get_port() |
| taint_test.py:128 | fail | test_taint | request.build_absolute_uri() |
| taint_test.py:129 | fail | test_taint | request.build_absolute_uri(..) |
| taint_test.py:130 | fail | test_taint | request.build_absolute_uri(..) |
| taint_test.py:133 | ok | test_taint | request.build_absolute_uri(..) |
| taint_test.py:134 | ok | test_taint | request.build_absolute_uri(..) |
| taint_test.py:142 | ok | test_taint | request.get_signed_cookie(..) |
| taint_test.py:143 | ok | test_taint | request.get_signed_cookie(..) |
| taint_test.py:144 | ok | test_taint | request.get_signed_cookie(..) |
| taint_test.py:148 | fail | test_taint | request.get_signed_cookie(..) |
| taint_test.py:149 | fail | test_taint | request.get_signed_cookie(..) |

View File

@@ -0,0 +1,6 @@
import experimental.dataflow.tainttracking.TestTaintLib
import experimental.dataflow.RemoteFlowSources
class RemoteFlowTestTaintConfiguration extends TestTaintTrackingConfiguration {
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
}

View File

@@ -0,0 +1,10 @@
# to force extractor to see files. since we use `--max-import-depth=1`, we use this
# "fake" import that doesn't actually work, but tricks the python extractor to look at
# all the files
from testproj import *
from testapp import *
import os.path as pth
pth.join("foo", "bar")

View File

@@ -0,0 +1,22 @@
#!/usr/bin/env python
"""Django's command-line utility for administrative tasks."""
import os
import sys
def main():
"""Run administrative tasks."""
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings')
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1 --lang=3

View File

@@ -0,0 +1,99 @@
"""testing views for Django 2.x and 3.x"""
from django.urls import path, re_path
from django.http import HttpResponse, HttpResponseRedirect, JsonResponse, HttpResponseNotFound
from django.views import View
def url_match_xss(request, foo, bar, no_taint=None): # $routeHandler $routedParameter=foo $routedParameter=bar
return HttpResponse('url_match_xss: {} {}'.format(foo, bar))
def get_params_xss(request): # $routeHandler
return HttpResponse(request.GET.get("untrusted"))
def post_params_xss(request): # $routeHandler
return HttpResponse(request.POST.get("untrusted"))
def http_resp_write(request): # $routeHandler
rsp = HttpResponse()
rsp.write(request.GET.get("untrusted"))
return rsp
class Foo(object):
# Note: since Foo is used as the super type in a class view, it will be able to handle requests.
def post(self, request, untrusted): # $f-:routeHandler $f-:routedParameter=untrusted
return HttpResponse('Foo post: {}'.format(untrusted))
class ClassView(View, Foo):
def get(self, request, untrusted): # $f-:routeHandler $f-:routedParameter=untrusted
return HttpResponse('ClassView get: {}'.format(untrusted))
def show_articles(request, page_number=1): # $routeHandler $routedParameter=page_number
page_number = int(page_number)
return HttpResponse('articles page: {}'.format(page_number))
def xxs_positional_arg(request, arg0, arg1, no_taint=None): # $routeHandler $routedParameter=arg0 $routedParameter=arg1
return HttpResponse('xxs_positional_arg: {} {}'.format(arg0, arg1))
urlpatterns = [
re_path(r"^url_match/(?P<foo>[^/]+)/(?P<bar>[^/]+)", url_match_xss), # $routeSetup="^url_match/(?P<foo>[^/]+)/(?P<bar>[^/]+)"
re_path(r"^get_params", get_params_xss), # $routeSetup="^get_params"
re_path(r"^post_params", post_params_xss), # $routeSetup="^post_params"
re_path(r"^http_resp_write", http_resp_write), # $routeSetup="^http_resp_write"
re_path(r"^class_view/(?P<untrusted>.+)", ClassView.as_view()), # $routeSetup="^class_view/(?P<untrusted>.+)"
# one pattern to support `articles/page-<n>` and ensuring that articles/ goes to page-1
re_path(r"articles/^(?:page-(?P<page_number>\d+)/)?", show_articles), # $routeSetup="articles/^(?:page-(?P<page_number>\d+)/)?"
# passing as positional argument is not the recommended way of doing things, but it is certainly
# possible
re_path(r"^([^/]+)/(?:foo|bar)/([^/]+)", xxs_positional_arg, name='xxs_positional_arg'), # $routeSetup="^([^/]+)/(?:foo|bar)/([^/]+)"
]
# Show we understand the keyword arguments to django.urls.re_path
def re_path_kwargs(request): # $routeHandler
return HttpResponse('re_path_kwargs')
urlpatterns = [
re_path(view=re_path_kwargs, route=r"^specifying-as-kwargs-is-not-a-problem") # $routeSetup="^specifying-as-kwargs-is-not-a-problem"
]
################################################################################
# Using path
################################################################################
# saying page_number is an externally controlled *string* is a bit strange, when we have an int converter :O
def page_number(request, page_number=1): # $routeHandler $routedParameter=page_number
return HttpResponse('page_number: {}'.format(page_number))
def foo_bar_baz(request, foo, bar, baz): # $routeHandler $routedParameter=foo $routedParameter=bar $routedParameter=baz
return HttpResponse('foo_bar_baz: {} {} {}'.format(foo, bar, baz))
def path_kwargs(request, foo, bar): # $routeHandler $routedParameter=foo $routedParameter=bar
return HttpResponse('path_kwargs: {} {} {}'.format(foo, bar))
def not_valid_identifier(request): # $routeHandler
return HttpResponse('<foo!>')
urlpatterns = [
path("articles/", page_number), # $routeSetup="articles/"
path("articles/page-<int:page_number>", page_number), # $routeSetup="articles/page-<int:page_number>"
path("<int:foo>/<str:bar>/<baz>", foo_bar_baz, name='foo-bar-baz'), # $routeSetup="<int:foo>/<str:bar>/<baz>"
path(view=path_kwargs, route="<foo>/<bar>"), # $routeSetup="<foo>/<bar>"
# We should not report there is a request parameter called `not_valid!`
path("not_valid/<not_valid!>", not_valid_identifier), # $routeSetup="not_valid/<not_valid!>"
]

View File

@@ -0,0 +1,156 @@
"""testing views for Django 2.x and 3.x"""
from django.urls import path
from django.http import HttpRequest
def test_taint(request: HttpRequest, foo, bar, baz=None): # $routeHandler $routedParameter=foo $routedParameter=bar
ensure_tainted(foo, bar)
ensure_not_tainted(baz)
# Manually inspected all fields of the HttpRequest object
# https://docs.djangoproject.com/en/3.0/ref/request-response/#httprequest-objects
ensure_tainted(
request,
request.body,
request.path,
request.path_info,
# With CSRF middleware disabled, it's possible to use custom methods,
# for example by `curl -X FOO <url>`
request.method,
request.encoding,
request.content_type,
# Dict[str, str]
request.content_params,
request.content_params["key"],
request.content_params.get("key"),
# django.http.QueryDict
# see https://docs.djangoproject.com/en/3.0/ref/request-response/#querydict-objects
request.GET,
request.GET["key"],
request.GET.get("key"),
request.GET.getlist("key"),
request.GET.getlist("key")[0],
request.GET.pop("key"),
request.GET.pop("key")[0],
request.GET.popitem()[0], # key
request.GET.popitem()[1], # values
request.GET.popitem()[1][0], # values[0]
request.GET.dict(),
request.GET.dict()["key"],
request.GET.urlencode(),
# django.http.QueryDict (same as above, did not duplicate tests)
request.POST,
# Dict[str, str]
request.COOKIES,
request.COOKIES["key"],
request.COOKIES.get("key"),
# MultiValueDict[str, UploadedFile]
request.FILES,
request.FILES["key"],
request.FILES["key"].content_type,
request.FILES["key"].content_type_extra,
request.FILES["key"].content_type_extra["key"],
request.FILES["key"].charset,
request.FILES["key"].name,
request.FILES["key"].file,
request.FILES["key"].file.read(),
request.FILES.get("key"),
request.FILES.get("key").name,
request.FILES.getlist("key"),
request.FILES.getlist("key")[0],
request.FILES.getlist("key")[0].name,
request.FILES.dict(),
request.FILES.dict()["key"],
request.FILES.dict()["key"].name,
# Dict[str, Any]
request.META,
request.META["HTTP_USER_AGENT"],
request.META.get("HTTP_USER_AGENT"),
# HttpHeaders (case insensitive dict-like)
request.headers,
request.headers["user-agent"],
request.headers["USER_AGENT"],
# django.urls.ResolverMatch
request.resolver_match,
request.resolver_match.args,
request.resolver_match.args[0],
request.resolver_match.kwargs,
request.resolver_match.kwargs["key"],
request.get_full_path(),
request.get_full_path_info(),
# build_absolute_uri handled below
# get_signed_cookie handled below
request.read(),
request.readline(),
request.readlines(),
request.readlines()[0],
[line for line in request],
)
# django.urls.ResolverMatch also supports iterable unpacking
_view, args, kwargs = request.resolver_match
ensure_tainted(
args,
args[0],
kwargs,
kwargs["key"],
)
ensure_not_tainted(
request.current_app,
# Django has `ALLOWED_HOSTS` to ensure the HOST value cannot be tampered with.
# It is possible to remove this protection, but it seems reasonable to assume
# people don"t do this by default.
request.get_host(),
request.get_port(),
)
####################################
# build_absolute_uri
####################################
ensure_tainted(
request.build_absolute_uri(),
request.build_absolute_uri(request.GET["key"]),
request.build_absolute_uri(location=request.GET["key"]),
)
ensure_not_tainted(
request.build_absolute_uri("/hardcoded/"),
request.build_absolute_uri("https://example.com"),
)
####################################
# get_signed_cookie
####################################
# We don't consider user to be able to tamper with cookies that are signed
ensure_not_tainted(
request.get_signed_cookie("key"),
request.get_signed_cookie("key", salt="salt"),
request.get_signed_cookie("key", max_age=60),
)
# However, providing tainted default value might result in taint
ensure_tainted(
request.get_signed_cookie("key", request.COOKIES["key"]),
request.get_signed_cookie("key", default=request.COOKIES["key"]),
)
# fake setup, you can't actually run this
urlpatterns = [
path("test-taint/<foo>/<bar>", test_taint), # $routeSetup="test-taint/<foo>/<bar>"
]

View File

@@ -0,0 +1,3 @@
from django.contrib import admin
# Register your models here.

View File

@@ -0,0 +1,5 @@
from django.apps import AppConfig
class TestappConfig(AppConfig):
name = 'testapp'

View File

@@ -0,0 +1,3 @@
from django.db import models
# Create your models here.

View File

@@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

View File

@@ -0,0 +1,11 @@
from django.urls import path, re_path
from . import views
urlpatterns = [
path("foo/", views.foo), # $routeSetup="foo/"
# TODO: Doesn't include standard `$` to mark end of string, due to problems with
# inline expectation tests (which thinks the `$` would mark the beginning of a new
# line)
re_path(r"^ba[rz]/", views.bar_baz), # $routeSetup="^ba[rz]/"
]

View File

@@ -0,0 +1,7 @@
from django.http import HttpRequest, HttpResponse
def foo(request: HttpRequest): # $routeHandler
return HttpResponse("foo")
def bar_baz(request: HttpRequest): # $routeHandler
return HttpResponse("bar_baz")

View File

@@ -0,0 +1,16 @@
"""
ASGI config for testproj project.
It exposes the ASGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/3.1/howto/deployment/asgi/
"""
import os
from django.core.asgi import get_asgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings')
application = get_asgi_application()

View File

@@ -0,0 +1,121 @@
"""
Django settings for testproj project.
Generated by 'django-admin startproject' using Django 3.1.2.
For more information on this file, see
https://docs.djangoproject.com/en/3.1/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/3.1/ref/settings/
"""
from pathlib import Path
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/3.1/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = '!vwm^sd$9#=ebollrn--dd3_x8-b=aj!c@lp8x)ha8r()^51^f'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = []
# Application definition
INSTALLED_APPS = [
'testapp.apps.TestappConfig',
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
]
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
ROOT_URLCONF = 'testproj.urls'
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
WSGI_APPLICATION = 'testproj.wsgi.application'
# Database
# https://docs.djangoproject.com/en/3.1/ref/settings/#databases
# DATABASES = {
# 'default': {
# 'ENGINE': 'django.db.backends.sqlite3',
# 'NAME': BASE_DIR / 'db.sqlite3',
# }
# }
# Password validation
# https://docs.djangoproject.com/en/3.1/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
# Internationalization
# https://docs.djangoproject.com/en/3.1/topics/i18n/
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'UTC'
USE_I18N = True
USE_L10N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/3.1/howto/static-files/
STATIC_URL = '/static/'

View File

@@ -0,0 +1,22 @@
"""testproj URL Configuration
The `urlpatterns` list routes URLs to views. For more information please see:
https://docs.djangoproject.com/en/3.1/topics/http/urls/
Examples:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: path('', views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.urls import include, path
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.contrib import admin
from django.urls import path, include
urlpatterns = [
path("admin/", admin.site.urls), # $routeSetup="admin/"
path("app/", include("testapp.urls")), # $routeSetup="app/"
]

View File

@@ -0,0 +1,16 @@
"""
WSGI config for testproj project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/3.1/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings')
application = get_wsgi_application()

View File

@@ -39,6 +39,7 @@
| views_2x_3x.py:44:24:44:30 | request | django.request.HttpRequest |
| views_2x_3x.py:44:33:44:36 | arg0 | externally controlled string |
| views_2x_3x.py:44:39:44:42 | arg1 | externally controlled string |
| views_2x_3x.py:65:20:65:26 | request | django.request.HttpRequest |
| views_2x_3x.py:78:17:78:23 | request | django.request.HttpRequest |
| views_2x_3x.py:78:26:78:36 | page_number | externally controlled string |
| views_2x_3x.py:81:17:81:23 | request | django.request.HttpRequest |

View File

@@ -67,7 +67,7 @@ def re_path_kwargs(request):
urlpatterns = [
re_path(view=re_path_kwargs, regex=r'^specifying-as-kwargs-is-not-a-problem$')
re_path(view=re_path_kwargs, route=r'^specifying-as-kwargs-is-not-a-problem$')
]
################################################################################