diff --git a/python/ql/src/Security/CWE-079/ReflectedXss.ql b/python/ql/src/Security/CWE-079/ReflectedXss.ql index dbc02671603..cea41442c5b 100644 --- a/python/ql/src/Security/CWE-079/ReflectedXss.ql +++ b/python/ql/src/Security/CWE-079/ReflectedXss.ql @@ -28,7 +28,12 @@ class ReflectedXssConfiguration extends TaintTracking::Configuration { source instanceof HttpRequestTaintSource } - override predicate isSink(TaintTracking::Sink sink) { sink instanceof HttpResponseTaintSink } + override predicate isSink(TaintTracking::Sink sink) { + sink instanceof HttpResponseTaintSink and + not sink instanceof DjangoResponseContent + or + sink instanceof DjangoResponseContentXSSVulnerable + } } from ReflectedXssConfiguration config, TaintedPathSource src, TaintedPathSink sink diff --git a/python/ql/src/semmle/python/web/django/Redirect.qll b/python/ql/src/semmle/python/web/django/Redirect.qll index 67342517a99..d6afbcce7e7 100644 --- a/python/ql/src/semmle/python/web/django/Redirect.qll +++ b/python/ql/src/semmle/python/web/django/Redirect.qll @@ -11,10 +11,30 @@ private import semmle.python.web.django.Shared private import semmle.python.web.Http /** - * Represents an argument to the `django.redirect` function. + * The URL argument for a call to the `django.shortcuts.redirect` function. */ -class DjangoRedirect extends HttpRedirectTaintSink { - override string toString() { result = "django.redirect" } +class DjangoShortcutsRedirectSink extends HttpRedirectTaintSink { + override string toString() { result = "DjangoShortcutsRedirectSink" } - DjangoRedirect() { this = redirect().getACall().getAnArg() } + DjangoShortcutsRedirectSink() { + this = Value::named("django.shortcuts.redirect").(FunctionValue).getArgumentForCall(_, 0) + } +} + +/** DEPRECATED: Use `DjangoShortcutsRedirectSink` instead. */ +deprecated class DjangoRedirect = DjangoShortcutsRedirectSink; + +/** + * The URL argument when instantiating a Django Redirect Response. + */ +class DjangoRedirectResponseSink extends HttpRedirectTaintSink { + DjangoRedirectResponseSink() { + exists(CallNode call | call = any(DjangoRedirectResponseClass cls).getACall() | + this = call.getArg(0) + or + this = call.getArgByName("redirect_to") + ) + } + + override string toString() { result = "DjangoRedirectResponseSink" } } diff --git a/python/ql/src/semmle/python/web/django/Response.qll b/python/ql/src/semmle/python/web/django/Response.qll index 35d8cd63553..649a503cc4b 100644 --- a/python/ql/src/semmle/python/web/django/Response.qll +++ b/python/ql/src/semmle/python/web/django/Response.qll @@ -5,37 +5,25 @@ private import semmle.python.web.django.Shared private import semmle.python.web.Http /** + * DEPRECATED: This class is internal to the django library modeling, and should + * never be used by anyone. + * * A django.http.response.Response object * This isn't really a "taint", but we use the value tracking machinery to * track the flow of response objects. */ -class DjangoResponse extends TaintKind { - DjangoResponse() { this = "django.response.HttpResponse" } +deprecated class DjangoResponse = DjangoResponseKind; + +/** INTERNAL class used for tracking a django response object. */ +private class DjangoResponseKind extends TaintKind { + DjangoResponseKind() { this = "django.response.HttpResponse" } } -private ClassValue theDjangoHttpResponseClass() { - ( - // version 1.x - result = Value::named("django.http.response.HttpResponse") - or - // version 2.x - // https://docs.djangoproject.com/en/2.2/ref/request-response/#httpresponse-objects - result = Value::named("django.http.HttpResponse") - ) and - // TODO: does this do anything? when could they be the same??? - not result = theDjangoHttpRedirectClass() -} - -/** internal class used for tracking a django response. */ +/** INTERNAL taint-source used for tracking a django response object. */ private class DjangoResponseSource extends TaintSource { - DjangoResponseSource() { - exists(ClassValue cls | - cls.getASuperType() = theDjangoHttpResponseClass() and - cls.getACall() = this - ) - } + DjangoResponseSource() { exists(DjangoContentResponseClass cls | cls.getACall() = this) } - override predicate isSourceOf(TaintKind kind) { kind instanceof DjangoResponse } + override predicate isSourceOf(TaintKind kind) { kind instanceof DjangoResponseKind } override string toString() { result = "django.http.response.HttpResponse" } } @@ -45,7 +33,7 @@ class DjangoResponseWrite extends HttpResponseTaintSink { DjangoResponseWrite() { exists(AttrNode meth, CallNode call | call.getFunction() = meth and - any(DjangoResponse response).taints(meth.getObject("write")) and + any(DjangoResponseKind response).taints(meth.getObject("write")) and this = call.getArg(0) ) } @@ -55,17 +43,16 @@ class DjangoResponseWrite extends HttpResponseTaintSink { override string toString() { result = "django.Response.write(...)" } } -/** An argument to initialization of a django response, which is vulnerable to external data (xss) */ +/** + * An argument to initialization of a django response. + */ class DjangoResponseContent extends HttpResponseTaintSink { + DjangoContentResponseClass cls; + CallNode call; + DjangoResponseContent() { - exists(CallNode call, ClassValue cls | - cls.getASuperType() = theDjangoHttpResponseClass() and - call.getFunction().pointsTo(cls) - | - call.getArg(0) = this - or - call.getArgByName("content") = this - ) + call = cls.getACall() and + this = cls.getContentArg(call) } override predicate sinks(TaintKind kind) { kind instanceof StringKind } @@ -73,9 +60,25 @@ class DjangoResponseContent extends HttpResponseTaintSink { override string toString() { result = "django.Response(...)" } } +/** + * An argument to initialization of a django response, which is vulnerable to external data (XSS). + */ +class DjangoResponseContentXSSVulnerable extends DjangoResponseContent { + override DjangoXSSVulnerableResponseClass cls; + + DjangoResponseContentXSSVulnerable() { + not exists(cls.getContentTypeArg(call)) + or + exists(StringValue s | + cls.getContentTypeArg(call).pointsTo(s) and + s.getText().matches("text/html%") + ) + } +} + class DjangoCookieSet extends CookieSet, CallNode { DjangoCookieSet() { - any(DjangoResponse r).taints(this.getFunction().(AttrNode).getObject("set_cookie")) + any(DjangoResponseKind r).taints(this.getFunction().(AttrNode).getObject("set_cookie")) } override string toString() { result = CallNode.super.toString() } diff --git a/python/ql/src/semmle/python/web/django/Shared.qll b/python/ql/src/semmle/python/web/django/Shared.qll index 669dc5712df..ea856ddf65a 100644 --- a/python/ql/src/semmle/python/web/django/Shared.qll +++ b/python/ql/src/semmle/python/web/django/Shared.qll @@ -1,12 +1,84 @@ import python -/** django.shortcuts.redirect */ -FunctionValue redirect() { result = Value::named("django.shortcuts.redirect") } +/** DEPRECATED: Use `Value::named("django.shortcuts.redirect")` instead. */ +deprecated FunctionValue redirect() { result = Value::named("django.shortcuts.redirect") } -ClassValue theDjangoHttpRedirectClass() { +/** DEPRECATED: Use `DjangoRedirectResponseClass` instead. */ +deprecated ClassValue theDjangoHttpRedirectClass() { // version 1.x result = Value::named("django.http.response.HttpResponseRedirectBase") or // version 2.x result = Value::named("django.http.HttpResponseRedirectBase") } + +/** A class that is a Django Redirect Response (subclass of `django.http.HttpResponseRedirectBase`). */ +class DjangoRedirectResponseClass extends ClassValue { + DjangoRedirectResponseClass() { + exists(ClassValue redirect_base | + // version 1.x + redirect_base = Value::named("django.http.response.HttpResponseRedirectBase") + or + // version 2.x and 3.x + redirect_base = Value::named("django.http.HttpResponseRedirectBase") + | + this.getASuperType() = redirect_base + ) + } +} + +/** + * A class that is a Django Response, which can contain content. + * A subclass of `django.http.HttpResponse` that is not a `DjangoRedirectResponseClass`. + */ +class DjangoContentResponseClass extends ClassValue { + ClassValue base; + + DjangoContentResponseClass() { + ( + // version 1.x + base = Value::named("django.http.response.HttpResponse") + or + // version 2.x and 3.x + // https://docs.djangoproject.com/en/2.2/ref/request-response/#httpresponse-objects + base = Value::named("django.http.HttpResponse") + ) and + this.getASuperType() = base + } + + // The reason these two methods are defined in this class (and not in the Sink + // definition that uses this class), is that if we were to add support for + // `django.http.response.HttpResponseNotAllowed` it would make much more sense to add + // the custom logic in this class (or subclass), than to handle all of it in the sink + // definition. + /** Gets the `content` argument of a `call` to the constructor */ + ControlFlowNode getContentArg(CallNode call) { none() } + + /** Gets the `content_type` argument of a `call` to the constructor */ + ControlFlowNode getContentTypeArg(CallNode call) { none() } +} + +/** A class that is a Django Response, and is vulnerable to XSS. */ +class DjangoXSSVulnerableResponseClass extends DjangoContentResponseClass { + DjangoXSSVulnerableResponseClass() { + // We want to avoid FPs on subclasses that are not exposed to XSS, for example `JsonResponse`. + // The easiest way is to disregard any subclass that has a special `__init__` method. + // It's not guaranteed to remove all FPs, or not to generate FNs, but compared to our + // previous implementation that would treat 0-th argument to _any_ subclass as a sink, + // this gets us much closer to reality. + this.lookup("__init__") = base.lookup("__init__") and + not this instanceof DjangoRedirectResponseClass + } + + override ControlFlowNode getContentArg(CallNode call) { + result = call.getArg(0) + or + result = call.getArgByName("content") + } + + override ControlFlowNode getContentTypeArg(CallNode call) { + result = call.getArg(1) + or + result = call.getArgByName("content_type") + } +} diff --git a/python/ql/test/library-tests/web/django/HttpRedirectSinks.expected b/python/ql/test/library-tests/web/django/HttpRedirectSinks.expected index c47548dd3a5..1c9bcb0cfd5 100644 --- a/python/ql/test/library-tests/web/django/HttpRedirectSinks.expected +++ b/python/ql/test/library-tests/web/django/HttpRedirectSinks.expected @@ -1,2 +1,4 @@ -| test_1x.py:13:21:13:24 | django.redirect | externally controlled string | -| test_2x_3x.py:13:21:13:24 | django.redirect | externally controlled string | +| test_1x.py:13:21:13:24 | DjangoShortcutsRedirectSink | externally controlled string | +| test_2x_3x.py:13:21:13:24 | DjangoShortcutsRedirectSink | externally controlled string | +| views_1x.py:99:33:99:55 | DjangoRedirectResponseSink | externally controlled string | +| views_2x_3x.py:120:33:120:55 | DjangoRedirectResponseSink | externally controlled string | diff --git a/python/ql/test/library-tests/web/django/HttpResponseSinks.expected b/python/ql/test/library-tests/web/django/HttpResponseSinks.expected index 0180a8726e4..e4e52c97514 100644 --- a/python/ql/test/library-tests/web/django/HttpResponseSinks.expected +++ b/python/ql/test/library-tests/web/django/HttpResponseSinks.expected @@ -8,6 +8,10 @@ | views_1x.py:45:25:45:70 | django.Response(...) | externally controlled string | | views_1x.py:66:25:66:55 | django.Response(...) | externally controlled string | | views_1x.py:75:25:75:33 | django.Response(...) | externally controlled string | +| views_1x.py:90:25:90:33 | django.Response(...) | externally controlled string | +| views_1x.py:94:25:94:58 | django.Response(...) | externally controlled string | +| views_1x.py:103:33:103:55 | django.Response(...) | externally controlled string | +| views_1x.py:107:25:107:47 | django.Response(...) | externally controlled string | | views_2x_3x.py:8:25:8:63 | django.Response(...) | externally controlled string | | views_2x_3x.py:12:25:12:52 | django.Response(...) | externally controlled string | | views_2x_3x.py:16:25:16:53 | django.Response(...) | externally controlled string | @@ -21,3 +25,7 @@ | views_2x_3x.py:82:25:82:69 | django.Response(...) | externally controlled string | | views_2x_3x.py:85:25:85:64 | django.Response(...) | externally controlled string | | views_2x_3x.py:88:25:88:32 | django.Response(...) | externally controlled string | +| views_2x_3x.py:111:25:111:33 | django.Response(...) | externally controlled string | +| views_2x_3x.py:115:25:115:58 | django.Response(...) | externally controlled string | +| views_2x_3x.py:124:33:124:55 | django.Response(...) | externally controlled string | +| views_2x_3x.py:128:25:128:47 | django.Response(...) | externally controlled string | diff --git a/python/ql/test/library-tests/web/django/views_1x.py b/python/ql/test/library-tests/web/django/views_1x.py index 068f8246210..f5476a13cef 100644 --- a/python/ql/test/library-tests/web/django/views_1x.py +++ b/python/ql/test/library-tests/web/django/views_1x.py @@ -1,6 +1,6 @@ """test of views for Django 1.x""" from django.conf.urls import patterns, url -from django.http.response import HttpResponse +from django.http.response import HttpResponse, HttpResponseRedirect, JsonResponse, HttpResponseNotFound from django.views.generic import View @@ -77,3 +77,31 @@ def kw_args(request): urlpatterns = [ url(view=kw_args, regex=r'^kw_args$') ] + +# Not an XSS sink, since the Content-Type is not "text/html" +# FP reported in https://github.com/github/codeql-python-team/issues/38 +def fp_json_response(request): + # implicitly sets Content-Type to "application/json" + return JsonResponse({"foo": request.GET.get("foo")}) + +# Not an XSS sink, since the Content-Type is not "text/html" +def fp_manual_json_response(request): + json_data = '{"json": "{}"}'.format(request.GET.get("foo")) + return HttpResponse(json_data, content_type="application/json") + +# Not an XSS sink, since the Content-Type is not "text/html" +def fp_manual_content_type(reuqest): + return HttpResponse('', content_type="text/plain") + +# XSS FP reported in https://github.com/github/codeql/issues/3466 +# Note: This should be a open-redirect sink, but not a XSS sink. +def fp_redirect(request): + return HttpResponseRedirect(request.GET.get("next")) + +# Ensure that simple subclasses are still vuln to XSS +def tp_not_found(request): + return HttpResponseNotFound(request.GET.get("name")) + +# Ensure we still have a XSS sink when manually setting the content_type to HTML +def tp_manual_response_type(request): + return HttpResponse(request.GET.get("name"), content_type="text/html; charset=utf-8") diff --git a/python/ql/test/library-tests/web/django/views_2x_3x.py b/python/ql/test/library-tests/web/django/views_2x_3x.py index 50fe6d638fe..d26b7915ef8 100644 --- a/python/ql/test/library-tests/web/django/views_2x_3x.py +++ b/python/ql/test/library-tests/web/django/views_2x_3x.py @@ -1,6 +1,6 @@ """testing views for Django 2.x and 3.x""" from django.urls import path, re_path -from django.http import HttpResponse +from django.http import HttpResponse, HttpResponseRedirect, JsonResponse, HttpResponseNotFound from django.views import View @@ -99,24 +99,30 @@ urlpatterns = [ ] -################################################################################ +# Not an XSS sink, since the Content-Type is not "text/html" +# FP reported in https://github.com/github/codeql-python-team/issues/38 +def fp_json_response(request): + # implicitly sets Content-Type to "application/json" + return JsonResponse({"foo": request.GET.get("foo")}) +# Not an XSS sink, since the Content-Type is not "text/html" +def fp_manual_json_response(request): + json_data = '{"json": "{}"}'.format(request.GET.get("foo")) + return HttpResponse(json_data, content_type="application/json") -# We should abort if a decorator is used. As demonstrated below, anything might happen +# Not an XSS sink, since the Content-Type is not "text/html" +def fp_manual_content_type(reuqest): + return HttpResponse('', content_type="text/plain") -# def reverse_kwargs(f): -# @wraps(f) -# def f_(*args, **kwargs): -# new_kwargs = dict() -# for key, value in kwargs.items(): -# new_kwargs[key[::-1]] = value -# return f(*args, **new_kwargs) -# return f_ +# XSS FP reported in https://github.com/github/codeql/issues/3466 +# Note: This should be a open-redirect sink, but not a XSS sink. +def fp_redirect(request): + return HttpResponseRedirect(request.GET.get("next")) -# @reverse_kwargs -# def decorators_can_do_anything(request, oof, foo=None): -# return HttpResponse('This is a mess'[::-1]) +# Ensure that simple subclasses are still vuln to XSS +def tp_not_found(request): + return HttpResponseNotFound(request.GET.get("name")) -# urlpatterns = [ -# path('rev/', decorators_can_do_anything), -# ] +# Ensure we still have a XSS sink when manually setting the content_type to HTML +def tp_manual_response_type(request): + return HttpResponse(request.GET.get("name"), content_type="text/html; charset=utf-8") diff --git a/python/ql/test/query-tests/Security/lib/django/http/__init__.py b/python/ql/test/query-tests/Security/lib/django/http/__init__.py index 962077dbad6..f2ac6d2c55b 100644 --- a/python/ql/test/query-tests/Security/lib/django/http/__init__.py +++ b/python/ql/test/query-tests/Security/lib/django/http/__init__.py @@ -1,2 +1,2 @@ -from .response import HttpResponse +from .response import * from .request import HttpRequest diff --git a/python/ql/test/query-tests/Security/lib/django/http/response.py b/python/ql/test/query-tests/Security/lib/django/http/response.py index ae101562df4..1110a3cde19 100644 --- a/python/ql/test/query-tests/Security/lib/django/http/response.py +++ b/python/ql/test/query-tests/Security/lib/django/http/response.py @@ -1,2 +1,46 @@ -class HttpResponse(object): - pass +class HttpResponseBase(object): + status_code = 200 + + def __init__(self, content_type=None, status=None, reason=None, charset=None): + pass + + +class HttpResponse(HttpResponseBase): + def __init__(self, content=b"", *args, **kwargs): + super().__init__(*args, **kwargs) + # Content is a bytestring. See the `content` property methods. + self.content = content + + +class HttpResponseRedirectBase(HttpResponse): + def __init__(self, redirect_to, *args, **kwargs): + super().__init__(*args, **kwargs) + ... + + +class HttpResponsePermanentRedirect(HttpResponseRedirectBase): + status_code = 301 + + +class HttpResponseRedirect(HttpResponseRedirectBase): + status_code = 302 + + +class HttpResponseNotFound(HttpResponse): + status_code = 404 + + +class JsonResponse(HttpResponse): + + def __init__( + self, + data, + encoder=..., + safe=True, + json_dumps_params=None, + **kwargs + ): + # fake code to represent what is going on :) + kwargs.setdefault("content_type", "application/json") + data = str(data) + super().__init__(content=data, **kwargs)