Merge pull request #3503 from RasmusWL/python-fix-django-taint-sinks

Python: Fix django taint sinks
This commit is contained in:
Taus
2020-07-02 13:32:35 +02:00
committed by GitHub
10 changed files with 253 additions and 65 deletions

View File

@@ -28,7 +28,12 @@ class ReflectedXssConfiguration extends TaintTracking::Configuration {
source instanceof HttpRequestTaintSource
}
override predicate isSink(TaintTracking::Sink sink) { sink instanceof HttpResponseTaintSink }
override predicate isSink(TaintTracking::Sink sink) {
sink instanceof HttpResponseTaintSink and
not sink instanceof DjangoResponseContent
or
sink instanceof DjangoResponseContentXSSVulnerable
}
}
from ReflectedXssConfiguration config, TaintedPathSource src, TaintedPathSink sink

View File

@@ -11,10 +11,30 @@ private import semmle.python.web.django.Shared
private import semmle.python.web.Http
/**
* Represents an argument to the `django.redirect` function.
* The URL argument for a call to the `django.shortcuts.redirect` function.
*/
class DjangoRedirect extends HttpRedirectTaintSink {
override string toString() { result = "django.redirect" }
class DjangoShortcutsRedirectSink extends HttpRedirectTaintSink {
override string toString() { result = "DjangoShortcutsRedirectSink" }
DjangoRedirect() { this = redirect().getACall().getAnArg() }
DjangoShortcutsRedirectSink() {
this = Value::named("django.shortcuts.redirect").(FunctionValue).getArgumentForCall(_, 0)
}
}
/** DEPRECATED: Use `DjangoShortcutsRedirectSink` instead. */
deprecated class DjangoRedirect = DjangoShortcutsRedirectSink;
/**
* The URL argument when instantiating a Django Redirect Response.
*/
class DjangoRedirectResponseSink extends HttpRedirectTaintSink {
DjangoRedirectResponseSink() {
exists(CallNode call | call = any(DjangoRedirectResponseClass cls).getACall() |
this = call.getArg(0)
or
this = call.getArgByName("redirect_to")
)
}
override string toString() { result = "DjangoRedirectResponseSink" }
}

View File

@@ -5,37 +5,25 @@ private import semmle.python.web.django.Shared
private import semmle.python.web.Http
/**
* DEPRECATED: This class is internal to the django library modeling, and should
* never be used by anyone.
*
* A django.http.response.Response object
* This isn't really a "taint", but we use the value tracking machinery to
* track the flow of response objects.
*/
class DjangoResponse extends TaintKind {
DjangoResponse() { this = "django.response.HttpResponse" }
deprecated class DjangoResponse = DjangoResponseKind;
/** INTERNAL class used for tracking a django response object. */
private class DjangoResponseKind extends TaintKind {
DjangoResponseKind() { this = "django.response.HttpResponse" }
}
private ClassValue theDjangoHttpResponseClass() {
(
// version 1.x
result = Value::named("django.http.response.HttpResponse")
or
// version 2.x
// https://docs.djangoproject.com/en/2.2/ref/request-response/#httpresponse-objects
result = Value::named("django.http.HttpResponse")
) and
// TODO: does this do anything? when could they be the same???
not result = theDjangoHttpRedirectClass()
}
/** internal class used for tracking a django response. */
/** INTERNAL taint-source used for tracking a django response object. */
private class DjangoResponseSource extends TaintSource {
DjangoResponseSource() {
exists(ClassValue cls |
cls.getASuperType() = theDjangoHttpResponseClass() and
cls.getACall() = this
)
}
DjangoResponseSource() { exists(DjangoContentResponseClass cls | cls.getACall() = this) }
override predicate isSourceOf(TaintKind kind) { kind instanceof DjangoResponse }
override predicate isSourceOf(TaintKind kind) { kind instanceof DjangoResponseKind }
override string toString() { result = "django.http.response.HttpResponse" }
}
@@ -45,7 +33,7 @@ class DjangoResponseWrite extends HttpResponseTaintSink {
DjangoResponseWrite() {
exists(AttrNode meth, CallNode call |
call.getFunction() = meth and
any(DjangoResponse response).taints(meth.getObject("write")) and
any(DjangoResponseKind response).taints(meth.getObject("write")) and
this = call.getArg(0)
)
}
@@ -55,17 +43,16 @@ class DjangoResponseWrite extends HttpResponseTaintSink {
override string toString() { result = "django.Response.write(...)" }
}
/** An argument to initialization of a django response, which is vulnerable to external data (xss) */
/**
* An argument to initialization of a django response.
*/
class DjangoResponseContent extends HttpResponseTaintSink {
DjangoContentResponseClass cls;
CallNode call;
DjangoResponseContent() {
exists(CallNode call, ClassValue cls |
cls.getASuperType() = theDjangoHttpResponseClass() and
call.getFunction().pointsTo(cls)
|
call.getArg(0) = this
or
call.getArgByName("content") = this
)
call = cls.getACall() and
this = cls.getContentArg(call)
}
override predicate sinks(TaintKind kind) { kind instanceof StringKind }
@@ -73,9 +60,25 @@ class DjangoResponseContent extends HttpResponseTaintSink {
override string toString() { result = "django.Response(...)" }
}
/**
* An argument to initialization of a django response, which is vulnerable to external data (XSS).
*/
class DjangoResponseContentXSSVulnerable extends DjangoResponseContent {
override DjangoXSSVulnerableResponseClass cls;
DjangoResponseContentXSSVulnerable() {
not exists(cls.getContentTypeArg(call))
or
exists(StringValue s |
cls.getContentTypeArg(call).pointsTo(s) and
s.getText().matches("text/html%")
)
}
}
class DjangoCookieSet extends CookieSet, CallNode {
DjangoCookieSet() {
any(DjangoResponse r).taints(this.getFunction().(AttrNode).getObject("set_cookie"))
any(DjangoResponseKind r).taints(this.getFunction().(AttrNode).getObject("set_cookie"))
}
override string toString() { result = CallNode.super.toString() }

View File

@@ -1,12 +1,84 @@
import python
/** django.shortcuts.redirect */
FunctionValue redirect() { result = Value::named("django.shortcuts.redirect") }
/** DEPRECATED: Use `Value::named("django.shortcuts.redirect")` instead. */
deprecated FunctionValue redirect() { result = Value::named("django.shortcuts.redirect") }
ClassValue theDjangoHttpRedirectClass() {
/** DEPRECATED: Use `DjangoRedirectResponseClass` instead. */
deprecated ClassValue theDjangoHttpRedirectClass() {
// version 1.x
result = Value::named("django.http.response.HttpResponseRedirectBase")
or
// version 2.x
result = Value::named("django.http.HttpResponseRedirectBase")
}
/** A class that is a Django Redirect Response (subclass of `django.http.HttpResponseRedirectBase`). */
class DjangoRedirectResponseClass extends ClassValue {
DjangoRedirectResponseClass() {
exists(ClassValue redirect_base |
// version 1.x
redirect_base = Value::named("django.http.response.HttpResponseRedirectBase")
or
// version 2.x and 3.x
redirect_base = Value::named("django.http.HttpResponseRedirectBase")
|
this.getASuperType() = redirect_base
)
}
}
/**
* A class that is a Django Response, which can contain content.
* A subclass of `django.http.HttpResponse` that is not a `DjangoRedirectResponseClass`.
*/
class DjangoContentResponseClass extends ClassValue {
ClassValue base;
DjangoContentResponseClass() {
(
// version 1.x
base = Value::named("django.http.response.HttpResponse")
or
// version 2.x and 3.x
// https://docs.djangoproject.com/en/2.2/ref/request-response/#httpresponse-objects
base = Value::named("django.http.HttpResponse")
) and
this.getASuperType() = base
}
// The reason these two methods are defined in this class (and not in the Sink
// definition that uses this class), is that if we were to add support for
// `django.http.response.HttpResponseNotAllowed` it would make much more sense to add
// the custom logic in this class (or subclass), than to handle all of it in the sink
// definition.
/** Gets the `content` argument of a `call` to the constructor */
ControlFlowNode getContentArg(CallNode call) { none() }
/** Gets the `content_type` argument of a `call` to the constructor */
ControlFlowNode getContentTypeArg(CallNode call) { none() }
}
/** A class that is a Django Response, and is vulnerable to XSS. */
class DjangoXSSVulnerableResponseClass extends DjangoContentResponseClass {
DjangoXSSVulnerableResponseClass() {
// We want to avoid FPs on subclasses that are not exposed to XSS, for example `JsonResponse`.
// The easiest way is to disregard any subclass that has a special `__init__` method.
// It's not guaranteed to remove all FPs, or not to generate FNs, but compared to our
// previous implementation that would treat 0-th argument to _any_ subclass as a sink,
// this gets us much closer to reality.
this.lookup("__init__") = base.lookup("__init__") and
not this instanceof DjangoRedirectResponseClass
}
override ControlFlowNode getContentArg(CallNode call) {
result = call.getArg(0)
or
result = call.getArgByName("content")
}
override ControlFlowNode getContentTypeArg(CallNode call) {
result = call.getArg(1)
or
result = call.getArgByName("content_type")
}
}

View File

@@ -1,2 +1,4 @@
| test_1x.py:13:21:13:24 | django.redirect | externally controlled string |
| test_2x_3x.py:13:21:13:24 | django.redirect | externally controlled string |
| test_1x.py:13:21:13:24 | DjangoShortcutsRedirectSink | externally controlled string |
| test_2x_3x.py:13:21:13:24 | DjangoShortcutsRedirectSink | externally controlled string |
| views_1x.py:99:33:99:55 | DjangoRedirectResponseSink | externally controlled string |
| views_2x_3x.py:120:33:120:55 | DjangoRedirectResponseSink | externally controlled string |

View File

@@ -8,6 +8,10 @@
| views_1x.py:45:25:45:70 | django.Response(...) | externally controlled string |
| views_1x.py:66:25:66:55 | django.Response(...) | externally controlled string |
| views_1x.py:75:25:75:33 | django.Response(...) | externally controlled string |
| views_1x.py:90:25:90:33 | django.Response(...) | externally controlled string |
| views_1x.py:94:25:94:58 | django.Response(...) | externally controlled string |
| views_1x.py:103:33:103:55 | django.Response(...) | externally controlled string |
| views_1x.py:107:25:107:47 | django.Response(...) | externally controlled string |
| views_2x_3x.py:8:25:8:63 | django.Response(...) | externally controlled string |
| views_2x_3x.py:12:25:12:52 | django.Response(...) | externally controlled string |
| views_2x_3x.py:16:25:16:53 | django.Response(...) | externally controlled string |
@@ -21,3 +25,7 @@
| views_2x_3x.py:82:25:82:69 | django.Response(...) | externally controlled string |
| views_2x_3x.py:85:25:85:64 | django.Response(...) | externally controlled string |
| views_2x_3x.py:88:25:88:32 | django.Response(...) | externally controlled string |
| views_2x_3x.py:111:25:111:33 | django.Response(...) | externally controlled string |
| views_2x_3x.py:115:25:115:58 | django.Response(...) | externally controlled string |
| views_2x_3x.py:124:33:124:55 | django.Response(...) | externally controlled string |
| views_2x_3x.py:128:25:128:47 | django.Response(...) | externally controlled string |

View File

@@ -1,6 +1,6 @@
"""test of views for Django 1.x"""
from django.conf.urls import patterns, url
from django.http.response import HttpResponse
from django.http.response import HttpResponse, HttpResponseRedirect, JsonResponse, HttpResponseNotFound
from django.views.generic import View
@@ -77,3 +77,31 @@ def kw_args(request):
urlpatterns = [
url(view=kw_args, regex=r'^kw_args$')
]
# Not an XSS sink, since the Content-Type is not "text/html"
# FP reported in https://github.com/github/codeql-python-team/issues/38
def fp_json_response(request):
# implicitly sets Content-Type to "application/json"
return JsonResponse({"foo": request.GET.get("foo")})
# Not an XSS sink, since the Content-Type is not "text/html"
def fp_manual_json_response(request):
json_data = '{"json": "{}"}'.format(request.GET.get("foo"))
return HttpResponse(json_data, content_type="application/json")
# Not an XSS sink, since the Content-Type is not "text/html"
def fp_manual_content_type(reuqest):
return HttpResponse('<img src="0" onerror="alert(1)">', content_type="text/plain")
# XSS FP reported in https://github.com/github/codeql/issues/3466
# Note: This should be a open-redirect sink, but not a XSS sink.
def fp_redirect(request):
return HttpResponseRedirect(request.GET.get("next"))
# Ensure that simple subclasses are still vuln to XSS
def tp_not_found(request):
return HttpResponseNotFound(request.GET.get("name"))
# Ensure we still have a XSS sink when manually setting the content_type to HTML
def tp_manual_response_type(request):
return HttpResponse(request.GET.get("name"), content_type="text/html; charset=utf-8")

View File

@@ -1,6 +1,6 @@
"""testing views for Django 2.x and 3.x"""
from django.urls import path, re_path
from django.http import HttpResponse
from django.http import HttpResponse, HttpResponseRedirect, JsonResponse, HttpResponseNotFound
from django.views import View
@@ -99,24 +99,30 @@ urlpatterns = [
]
################################################################################
# Not an XSS sink, since the Content-Type is not "text/html"
# FP reported in https://github.com/github/codeql-python-team/issues/38
def fp_json_response(request):
# implicitly sets Content-Type to "application/json"
return JsonResponse({"foo": request.GET.get("foo")})
# Not an XSS sink, since the Content-Type is not "text/html"
def fp_manual_json_response(request):
json_data = '{"json": "{}"}'.format(request.GET.get("foo"))
return HttpResponse(json_data, content_type="application/json")
# We should abort if a decorator is used. As demonstrated below, anything might happen
# Not an XSS sink, since the Content-Type is not "text/html"
def fp_manual_content_type(reuqest):
return HttpResponse('<img src="0" onerror="alert(1)">', content_type="text/plain")
# def reverse_kwargs(f):
# @wraps(f)
# def f_(*args, **kwargs):
# new_kwargs = dict()
# for key, value in kwargs.items():
# new_kwargs[key[::-1]] = value
# return f(*args, **new_kwargs)
# return f_
# XSS FP reported in https://github.com/github/codeql/issues/3466
# Note: This should be a open-redirect sink, but not a XSS sink.
def fp_redirect(request):
return HttpResponseRedirect(request.GET.get("next"))
# @reverse_kwargs
# def decorators_can_do_anything(request, oof, foo=None):
# return HttpResponse('This is a mess'[::-1])
# Ensure that simple subclasses are still vuln to XSS
def tp_not_found(request):
return HttpResponseNotFound(request.GET.get("name"))
# urlpatterns = [
# path('rev/<foo>', decorators_can_do_anything),
# ]
# Ensure we still have a XSS sink when manually setting the content_type to HTML
def tp_manual_response_type(request):
return HttpResponse(request.GET.get("name"), content_type="text/html; charset=utf-8")

View File

@@ -1,2 +1,2 @@
from .response import HttpResponse
from .response import *
from .request import HttpRequest

View File

@@ -1,2 +1,46 @@
class HttpResponse(object):
pass
class HttpResponseBase(object):
status_code = 200
def __init__(self, content_type=None, status=None, reason=None, charset=None):
pass
class HttpResponse(HttpResponseBase):
def __init__(self, content=b"", *args, **kwargs):
super().__init__(*args, **kwargs)
# Content is a bytestring. See the `content` property methods.
self.content = content
class HttpResponseRedirectBase(HttpResponse):
def __init__(self, redirect_to, *args, **kwargs):
super().__init__(*args, **kwargs)
...
class HttpResponsePermanentRedirect(HttpResponseRedirectBase):
status_code = 301
class HttpResponseRedirect(HttpResponseRedirectBase):
status_code = 302
class HttpResponseNotFound(HttpResponse):
status_code = 404
class JsonResponse(HttpResponse):
def __init__(
self,
data,
encoder=...,
safe=True,
json_dumps_params=None,
**kwargs
):
# fake code to represent what is going on :)
kwargs.setdefault("content_type", "application/json")
data = str(data)
super().__init__(content=data, **kwargs)