diff --git a/python/ql/lib/change-notes/2023-11-21-request-handler-args-kwargs.md b/python/ql/lib/change-notes/2023-11-21-request-handler-args-kwargs.md new file mode 100644 index 00000000000..7215f1a2ab3 --- /dev/null +++ b/python/ql/lib/change-notes/2023-11-21-request-handler-args-kwargs.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Added modeling of `*args` and `**kwargs` as routed-parameters in request handlers for django/flask/FastAPI/tornado. diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index 81c7bd2990e..f7acd89bd62 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -928,7 +928,10 @@ module Http { override Parameter getARoutedParameter() { result = rs.getARoutedParameter() and - result in [this.getArg(_), this.getArgByName(_)] + result in [ + this.getArg(_), this.getArgByName(_), this.getVararg().(Parameter), + this.getKwarg().(Parameter) + ] } override string getFramework() { result = rs.getFramework() } diff --git a/python/ql/lib/semmle/python/frameworks/Django.qll b/python/ql/lib/semmle/python/frameworks/Django.qll index 28aaa0a227d..38e488a3ea5 100644 --- a/python/ql/lib/semmle/python/frameworks/Django.qll +++ b/python/ql/lib/semmle/python/frameworks/Django.qll @@ -2416,7 +2416,10 @@ module PrivateDjango { // Since we don't know the URL pattern, we simply mark all parameters as a routed // parameter. This should give us more RemoteFlowSources but could also lead to // more FPs. If this turns out to be the wrong tradeoff, we can always change our mind. - result in [this.getArg(_), this.getArgByName(_)] and + result in [ + this.getArg(_), this.getArgByName(_), // + this.getVararg().(Parameter), this.getKwarg().(Parameter), // TODO: These sources should be modeled as storing content! + ] and not result = any(int i | i < this.getFirstPossibleRoutedParamIndex() | this.getArg(i)) } @@ -2452,13 +2455,20 @@ module PrivateDjango { // more FPs. If this turns out to be the wrong tradeoff, we can always change our mind. exists(DjangoRouteHandler routeHandler | routeHandler = this.getARequestHandler() | not exists(this.getUrlPattern()) and - result in [routeHandler.getArg(_), routeHandler.getArgByName(_)] and + result in [ + routeHandler.getArg(_), routeHandler.getArgByName(_), // + routeHandler.getVararg().(Parameter), routeHandler.getKwarg().(Parameter), // TODO: These sources should be modeled as storing content! + ] and not result = any(int i | i < routeHandler.getFirstPossibleRoutedParamIndex() | routeHandler.getArg(i)) ) or exists(string name | - result = this.getARequestHandler().getArgByName(name) and + ( + result = this.getARequestHandler().getKwarg() // TODO: These sources should be modeled as storing content! + or + result = this.getARequestHandler().getArgByName(name) + ) and exists(string match | match = this.getUrlPattern().regexpFind(pathRoutedParameterRegex(), _, _) and name = match.regexpCapture(pathRoutedParameterRegex(), 2) @@ -2475,7 +2485,10 @@ module PrivateDjango { // more FPs. If this turns out to be the wrong tradeoff, we can always change our mind. exists(DjangoRouteHandler routeHandler | routeHandler = this.getARequestHandler() | not exists(this.getUrlPattern()) and - result in [routeHandler.getArg(_), routeHandler.getArgByName(_)] and + result in [ + routeHandler.getArg(_), routeHandler.getArgByName(_), // + routeHandler.getVararg().(Parameter), routeHandler.getKwarg().(Parameter), // TODO: These sources should be modeled as storing content! + ] and not result = any(int i | i < routeHandler.getFirstPossibleRoutedParamIndex() | routeHandler.getArg(i)) ) @@ -2488,13 +2501,22 @@ module PrivateDjango { // either using named capture groups (passed as keyword arguments) or using // unnamed capture groups (passed as positional arguments) not exists(regex.getGroupName(_, _)) and - // first group will have group number 1 - result = - routeHandler - .getArg(routeHandler.getFirstPossibleRoutedParamIndex() - 1 + - regex.getGroupNumber(_, _)) + ( + // first group will have group number 1 + result = + routeHandler + .getArg(routeHandler.getFirstPossibleRoutedParamIndex() - 1 + + regex.getGroupNumber(_, _)) + or + result = routeHandler.getVararg() + ) or - result = routeHandler.getArgByName(regex.getGroupName(_, _)) + exists(regex.getGroupName(_, _)) and + ( + result = routeHandler.getArgByName(regex.getGroupName(_, _)) + or + result = routeHandler.getKwarg() + ) ) } } diff --git a/python/ql/lib/semmle/python/frameworks/FastApi.qll b/python/ql/lib/semmle/python/frameworks/FastApi.qll index 07dbd982653..0c4599d89ad 100644 --- a/python/ql/lib/semmle/python/frameworks/FastApi.qll +++ b/python/ql/lib/semmle/python/frameworks/FastApi.qll @@ -66,6 +66,9 @@ private module FastApi { result = this.getARequestHandler().getArgByName(_) and // type-annotated with `Response` not any(Response::RequestHandlerParam src).asExpr() = result + or + // **kwargs + result = this.getARequestHandler().getKwarg() } override DataFlow::Node getUrlPatternArg() { diff --git a/python/ql/lib/semmle/python/frameworks/Flask.qll b/python/ql/lib/semmle/python/frameworks/Flask.qll index 9c2e6cf08ed..3feee0cf668 100644 --- a/python/ql/lib/semmle/python/frameworks/Flask.qll +++ b/python/ql/lib/semmle/python/frameworks/Flask.qll @@ -279,6 +279,9 @@ module Flask { name = match.regexpCapture(werkzeug_rule_re(), 4) ) ) + or + // **kwargs + result = this.getARequestHandler().getKwarg() } override string getFramework() { result = "Flask" } @@ -347,6 +350,12 @@ module Flask { // more FPs. If this turns out to be the wrong tradeoff, we can always change our mind. result in [this.getArg(_), this.getArgByName(_)] and not result = this.getArg(0) + or + // *args + result = this.getVararg() + or + // **kwargs + result = this.getKwarg() } override string getFramework() { result = "Flask" } diff --git a/python/ql/lib/semmle/python/frameworks/RestFramework.qll b/python/ql/lib/semmle/python/frameworks/RestFramework.qll index 49eb3cbe7e6..02c39a573f2 100644 --- a/python/ql/lib/semmle/python/frameworks/RestFramework.qll +++ b/python/ql/lib/semmle/python/frameworks/RestFramework.qll @@ -172,7 +172,10 @@ private module RestFramework { // Since we don't know the URL pattern, we simply mark all parameters as a routed // parameter. This should give us more RemoteFlowSources but could also lead to // more FPs. If this turns out to be the wrong tradeoff, we can always change our mind. - result in [this.getArg(_), this.getArgByName(_)] and + result in [ + this.getArg(_), this.getArgByName(_), // + this.getVararg().(Parameter), this.getKwarg().(Parameter), // TODO: These sources should be modeled as storing content! + ] and not result = any(int i | i < this.getFirstPossibleRoutedParamIndex() | this.getArg(i)) } diff --git a/python/ql/lib/semmle/python/frameworks/Tornado.qll b/python/ql/lib/semmle/python/frameworks/Tornado.qll index 73c016ed927..a8afbea57d9 100644 --- a/python/ql/lib/semmle/python/frameworks/Tornado.qll +++ b/python/ql/lib/semmle/python/frameworks/Tornado.qll @@ -416,7 +416,10 @@ module Tornado { // more FPs. If this turns out to be the wrong tradeoff, we can always change our mind. exists(Function requestHandler | requestHandler = this.getARequestHandler() | not exists(this.getUrlPattern()) and - result in [requestHandler.getArg(_), requestHandler.getArgByName(_)] and + result in [ + requestHandler.getArg(_), requestHandler.getArgByName(_), + requestHandler.getVararg().(Parameter), requestHandler.getKwarg().(Parameter) + ] and not result = requestHandler.getArg(0) ) or @@ -429,6 +432,12 @@ module Tornado { result = requestHandler.getArg(regex.getGroupNumber(_, _)) or result = requestHandler.getArgByName(regex.getGroupName(_, _)) + or + exists(regex.getGroupNumber(_, _)) and + result = requestHandler.getVararg() + or + exists(regex.getGroupName(_, _)) and + result = requestHandler.getKwarg() ) } } @@ -446,7 +455,10 @@ module Tornado { // Since we don't know the URL pattern, we simply mark all parameters as a routed // parameter. This should give us more RemoteFlowSources but could also lead to // more FPs. If this turns out to be the wrong tradeoff, we can always change our mind. - result in [this.getArg(_), this.getArgByName(_)] and + result in [ + this.getArg(_), this.getArgByName(_), this.getVararg().(Parameter), + this.getKwarg().(Parameter) + ] and not result = this.getArg(0) } diff --git a/python/ql/test/library-tests/frameworks/django-v2-v3/routing_test.py b/python/ql/test/library-tests/frameworks/django-v2-v3/routing_test.py index acdbe2cfa42..8f36f55ee26 100644 --- a/python/ql/test/library-tests/frameworks/django-v2-v3/routing_test.py +++ b/python/ql/test/library-tests/frameworks/django-v2-v3/routing_test.py @@ -150,3 +150,40 @@ class UnknownViewSubclass(UnknownViewSuperclass): urlpatterns = [ path("UnknownViewSubclass/", UnknownViewSubclass.as_view()), # $ routeSetup="UnknownViewSubclass/" ] + +################################################################################ +# Routing to *args and **kwargs +################################################################################ + +def kwargs_param(request, **kwargs): # $ requestHandler routedParameter=kwargs + ensure_tainted( + kwargs, # $ tainted + kwargs["foo"], # $ tainted + kwargs["bar"] # $ tainted + ) + + ensure_tainted(request) # $ tainted + + +def star_args_param(request, *args): # $ requestHandler routedParameter=args + ensure_tainted( + args, # $ tainted + args[0], # $ tainted + args[1], # $ tainted + ) + ensure_tainted(request) # $ tainted + + +def star_args_param_check(request, foo, bar): # $ requestHandler routedParameter=foo routedParameter=bar + ensure_tainted( + foo, # $ tainted + bar, # $ tainted + ) + ensure_tainted(request) # $ tainted + + +urlpatterns = [ + path("test-kwargs_param//", kwargs_param), # $ routeSetup="test-kwargs_param//" + re_path("test-star_args_param/([^/]+)/(.+)", star_args_param), # $ routeSetup="test-star_args_param/([^/]+)/(.+)" + re_path("test-star_args_param_check/([^/]+)/(.+)", star_args_param_check), # $ routeSetup="test-star_args_param_check/([^/]+)/(.+)" +] diff --git a/python/ql/test/library-tests/frameworks/rest_framework/taint_test.py b/python/ql/test/library-tests/frameworks/rest_framework/taint_test.py index ff30b00ca99..c4c12b387ee 100644 --- a/python/ql/test/library-tests/frameworks/rest_framework/taint_test.py +++ b/python/ql/test/library-tests/frameworks/rest_framework/taint_test.py @@ -1,5 +1,6 @@ from rest_framework.decorators import api_view, parser_classes from rest_framework.views import APIView +from rest_framework.viewsets import ModelViewSet from rest_framework.request import Request from rest_framework.response import Response from rest_framework.parsers import JSONParser @@ -89,7 +90,7 @@ def test_taint(request: Request, routed_param): # $ requestHandler routedParamet class MyClass(APIView): - def initial(self, request, *args, **kwargs): # $ requestHandler + def initial(self, request, *args, **kwargs): # $ requestHandler routedParameter=kwargs # this method will be called before processing any request ensure_tainted(request) # $ tainted @@ -107,12 +108,31 @@ class MyClass(APIView): return Response("ok") # $ HttpResponse +# Viewsets +# see https://www.django-rest-framework.org/api-guide/viewsets/ + +class MyModelViewSet(ModelViewSet): + def retrieve(self, request, routed_param): # $ requestHandler routedParameter=routed_param + ensure_tainted( + request, # $ tainted + request.GET, # $ tainted + request.GET.get("pk"), # $ tainted + request.data # $ tainted + ) + + ensure_tainted(routed_param) # $ tainted + + # same as for standard Django view + ensure_tainted(self.args, self.kwargs) # $ tainted + + return Response("retrieve") # $ HttpResponse # fake setup, you can't actually run this urlpatterns = [ path("test-taint/", test_taint), # $ routeSetup="test-taint/" path("ClassView/", MyClass.as_view()), # $ routeSetup="ClassView/" + path("MyModelViewSet/", MyModelViewSet.as_view()) # $ routeSetup="MyModelViewSet/" ] # tests with no route-setup, but we can still tell that these are using Django REST