Python: Tornado: Handle basic route setup with tuples

The reason this becomes valueable right now, is that we can mark routed params
as taint-sources. Longer down the line, we can (hopefully) detect that a routed
param will only accept digits, and mark it safe for some of our taint-tracking
queries.
This commit is contained in:
Rasmus Wriedt Larsen
2020-12-22 16:16:29 +01:00
parent 39d85896a1
commit 1849b9e771
6 changed files with 182 additions and 32 deletions

View File

@@ -8,6 +8,7 @@ private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.TaintTracking
private import semmle.python.Concepts
private import semmle.python.regex
/**
* Provides models for the `tornado` PyPI package.
@@ -82,7 +83,7 @@ private module Tornado {
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node web_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["RequestHandler"] and
attr_name in ["RequestHandler", "Application"] and
(
t.start() and
result = DataFlow::importNode("tornado.web" + "." + attr_name)
@@ -138,8 +139,19 @@ private module Tornado {
DataFlow::Node subclassRef() { result = subclassRef(DataFlow::TypeTracker::end()) }
/** A RequestHandler class (most likely in project code). */
private class RequestHandlerClass extends Class {
class RequestHandlerClass extends Class {
RequestHandlerClass() { this.getParent() = subclassRef().asExpr() }
/** Gets a reference to this class. */
private DataFlow::Node getARef(DataFlow::TypeTracker t) {
t.start() and
result.asExpr().(ClassExpr) = this.getParent()
or
exists(DataFlow::TypeTracker t2 | result = this.getARef(t2).track(t2, t))
}
/** Gets a reference to this class. */
DataFlow::Node getARef() { result = this.getARef(DataFlow::TypeTracker::end()) }
}
/**
@@ -229,6 +241,64 @@ private module Tornado {
}
}
}
/**
* Provides models for the `tornado.web.Application` class
*
* See https://www.tornadoweb.org/en/stable/web.html#tornado.web.Application.
*/
module Application {
/** Gets a reference to the `tornado.web.Application` class. */
private DataFlow::Node classRef(DataFlow::TypeTracker t) {
t.start() and
result = web_attr("Application")
or
exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
}
/** Gets a reference to the `tornado.web.Application` class. */
DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
/**
* A source of instances of `tornado.web.Application`, extend this class to model new instances.
*
* This can include instantiations of the class, return values from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use the predicate `Application::instance()` to get references to instances of `tornado.web.Application`.
*/
abstract class InstanceSource extends DataFlow::Node { }
/** A direct instantiation of `tornado.web.Application`. */
class ClassInstantiation extends InstanceSource, DataFlow::CfgNode {
override CallNode node;
ClassInstantiation() { node.getFunction() = classRef().asCfgNode() }
}
/** Gets a reference to an instance of `tornado.web.Application`. */
private DataFlow::Node instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `tornado.web.Application`. */
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
/** Gets a reference to the `add_handlers` method. */
private DataFlow::Node add_handlers(DataFlow::TypeTracker t) {
t.startInAttr("add_handlers") and
result = instance()
or
exists(DataFlow::TypeTracker t2 | result = add_handlers(t2).track(t2, t))
}
/** Gets a reference to the `add_handlers` method. */
DataFlow::Node add_handlers() { result = add_handlers(DataFlow::TypeTracker::end()) }
}
}
// -------------------------------------------------------------------------
@@ -366,4 +436,84 @@ private module Tornado {
}
}
}
// ---------------------------------------------------------------------------
// routing
// ---------------------------------------------------------------------------
/** A sequence that defines a number of route rules */
SequenceNode routeSetupRuleList() {
exists(CallNode call | call = any(tornado::web::Application::ClassInstantiation c).asCfgNode() |
result in [call.getArg(0), call.getArgByName("handlers")]
)
or
exists(CallNode call |
call.getFunction() = tornado::web::Application::add_handlers().asCfgNode()
|
result in [call.getArg(1), call.getArgByName("host_handlers")]
)
or
result = routeSetupRuleList().getElement(_).(TupleNode).getElement(1)
}
/** A tornado route setup. */
abstract class TornadoRouteSetup extends HTTP::Server::RouteSetup::Range { }
/**
* A regex that is used to set up a route.
*
* Needs this subclass to be considered a RegexString.
*/
private class TornadoRouteRegex extends RegexString {
TornadoRouteSetup setup;
TornadoRouteRegex() {
this instanceof StrConst and
DataFlow::localFlow(DataFlow::exprNode(this), setup.getUrlPatternArg())
}
TornadoRouteSetup getRouteSetup() { result = setup }
}
/** A route setup using a tuple. */
private class TornadoTupleRouteSetup extends TornadoRouteSetup, DataFlow::CfgNode {
override TupleNode node;
TornadoTupleRouteSetup() {
node = routeSetupRuleList().getElement(_) and
count(node.getElement(_)) = 2 and
not node.getElement(1) instanceof SequenceNode
}
override DataFlow::Node getUrlPatternArg() { result.asCfgNode() = node.getElement(0) }
override Function getARequestHandler() {
exists(tornado::web::RequestHandler::RequestHandlerClass cls |
cls.getARef().asCfgNode() = node.getElement(1) and
// TODO: Proper MRO
result = cls.getAMethod() and
result.getName() = HTTP::httpVerbLower()
)
}
override Parameter getARoutedParameter() {
// If we don't know the URL pattern, we simply mark all parameters as a routed
// parameter. This should give us more RemoteFlowSources but could also lead to
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
exists(Function requestHandler | requestHandler = this.getARequestHandler() |
not exists(this.getUrlPattern()) and
result in [requestHandler.getArg(_), requestHandler.getArgByName(_)] and
not result = requestHandler.getArg(0)
)
or
exists(Function requestHandler, TornadoRouteRegex regex |
requestHandler = this.getARequestHandler() and
regex.getRouteSetup() = this
|
// first group will have group number 1
result = requestHandler.getArg(regex.getGroupNumber(_, _))
or
result = requestHandler.getArgByName(regex.getGroupName(_, _))
)
}
}
}

View File

@@ -1,5 +1,5 @@
| taint_test.py:6 | fail | get | name |
| taint_test.py:6 | fail | get | number |
| taint_test.py:6 | ok | get | name |
| taint_test.py:6 | ok | get | number |
| taint_test.py:7 | ok | get | foo |
| taint_test.py:11 | ok | get | self.get_argument(..) |
| taint_test.py:12 | ok | get | self.get_arguments(..) |

View File

@@ -2,26 +2,26 @@ import tornado.web
class BasicHandler(tornado.web.RequestHandler):
def get(self): # $ MISSING: requestHandler
def get(self): # $ requestHandler
self.write("BasicHandler " + self.get_argument("xss"))
def post(self): # $ MISSING: requestHandler
def post(self): # $ requestHandler
self.write("BasicHandler (POST)")
class DeepInheritance(BasicHandler):
def get(self): # $ MISSING: requestHandler
def get(self): # $ requestHandler
self.write("DeepInheritance" + self.get_argument("also_xss"))
class FormHandler(tornado.web.RequestHandler):
def post(self): # $ MISSING: requestHandler
def post(self): # $ requestHandler
name = self.get_body_argument("name")
self.write(name)
class RedirectHandler(tornado.web.RequestHandler):
def get(self): # $ MISSING: requestHandler
def get(self): # $ requestHandler
req = self.request
h = req.headers
url = h["url"]
@@ -40,11 +40,11 @@ class ReverseInheritance(BaseReverseInheritance):
def make_app():
return tornado.web.Application(
[
(r"/basic", BasicHandler), # $ MISSING: routeSetup="/basic"
(r"/deep", DeepInheritance), # $ MISSING: routeSetup="/deep"
(r"/form", FormHandler), # $ MISSING: routeSetup="/form"
(r"/redirect", RedirectHandler), # $ MISSING: routeSetup="/redirect"
(r"/reverse-inheritance", ReverseInheritance), # $ MISSING: routeSetup="/reverse-inheritance"
(r"/basic", BasicHandler), # $ routeSetup="/basic"
(r"/deep", DeepInheritance), # $ routeSetup="/deep"
(r"/form", FormHandler), # $ routeSetup="/form"
(r"/redirect", RedirectHandler), # $ routeSetup="/redirect"
(r"/reverse-inheritance", ReverseInheritance), # $ routeSetup="/reverse-inheritance"
],
debug=True,
)

View File

@@ -2,7 +2,7 @@ import tornado.web
class ResponseWriting(tornado.web.RequestHandler):
def get(self, type_): # $ MISSING: requestHandler routedParameter=type_
def get(self, type_): # $ requestHandler routedParameter=type_
if type_ == "str":
self.write("foo")
elif type_ == "bytes":
@@ -17,7 +17,7 @@ class ResponseWriting(tornado.web.RequestHandler):
def make_app():
return tornado.web.Application(
[
(r"/ResponseWriting/(str|bytes|dict)", ResponseWriting), # $ MISSING: routeSetup="/ResponseWriting/(str|bytes|dict)"
(r"/ResponseWriting/(str|bytes|dict)", ResponseWriting), # $ routeSetup="/ResponseWriting/(str|bytes|dict)"
],
debug=True,
)

View File

@@ -3,7 +3,7 @@ import tornado.routing
class FooHandler(tornado.web.RequestHandler):
def get(self, x, y=None, not_used=None): # $ MISSING: requestHandler routedParameter=x routedParameter=y
def get(self, x, y=None, not_used=None): # $ requestHandler routedParameter=x routedParameter=y
self.write("FooHandler {} {}".format(x, y))
@@ -18,32 +18,32 @@ class BazHandler(tornado.web.RequestHandler):
class KwArgs(tornado.web.RequestHandler):
def get(self, *, x, y=None, not_used=None): # $ MISSING: requestHandler routedParameter=x routedParameter=y
def get(self, *, x, y=None, not_used=None): # $ requestHandler routedParameter=x routedParameter=y
self.write("KwArgs {} {}".format(x, y))
class OnlyLocalhost(tornado.web.RequestHandler):
def get(self): # $ MISSING: requestHandler
def get(self): # $ requestHandler
self.write("OnlyLocalhost")
class One(tornado.web.RequestHandler):
def get(self): # $ MISSING: requestHandler
def get(self): # $ requestHandler
self.write("One")
class Two(tornado.web.RequestHandler):
def get(self): # $ MISSING: requestHandler
def get(self): # $ requestHandler
self.write("Two")
class Three(tornado.web.RequestHandler):
def get(self): # $ MISSING: requestHandler
def get(self): # $ requestHandler
self.write("Three")
class AddedLater(tornado.web.RequestHandler):
def get(self, x, y=None, not_used=None): # $ MISSING: requestHandler routedParameter=x routedParameter=y
def get(self, x, y=None, not_used=None): # $ requestHandler routedParameter=x routedParameter=y
self.write("AddedLater {} {}".format(x, y))
@@ -59,26 +59,26 @@ def make_app():
# see https://www.tornadoweb.org/en/stable/routing.html for even more examples
app = tornado.web.Application(
[
(r"/foo/([0-9]+)/([0-9]+)?", FooHandler), # $ MISSING: routeSetup="/foo/([0-9]+)/([0-9]+)?"
(r"/foo/([0-9]+)/([0-9]+)?", FooHandler), # $ routeSetup="/foo/([0-9]+)/([0-9]+)?"
tornado.web.URLSpec(r"/bar/([0-9]+)/([0-9]+)?", BarHandler), # $ MISSING: routeSetup="/bar/([0-9]+)/([0-9]+)?"
# Very verbose way to write same as FooHandler
tornado.routing.Rule(tornado.routing.PathMatches(r"/baz/([0-9]+)/([0-9]+)?"), BazHandler), # $ MISSING: routeSetup="/baz/([0-9]+)/([0-9]+)?"
(r"/kw-args/(?P<x>[0-9]+)/(?P<y>[0-9]+)?", KwArgs), # $ MISSING: routeSetup="/kw-args/(?P<x>[0-9]+)/(?P<y>[0-9]+)?"
(r"/kw-args/(?P<x>[0-9]+)/(?P<y>[0-9]+)?", KwArgs), # $ routeSetup="/kw-args/(?P<x>[0-9]+)/(?P<y>[0-9]+)?"
# You can do nesting
(r"/(one|two|three)", [
(r"/one", One), # $ MISSING: routeSetup="/one"
(r"/two", Two), # $ MISSING: routeSetup="/two"
(r"/three", Three) # $ MISSING: routeSetup="/three"
(r"/one", One), # $ routeSetup="/one"
(r"/two", Two), # $ routeSetup="/two"
(r"/three", Three) # $ routeSetup="/three"
]),
# which is _one_ recommended way to ensure known host is used
(tornado.routing.HostMatches(r"(localhost|127\.0\.0\.1)"), [
("/only-localhost", OnlyLocalhost) # $ MISSING: routeSetup="/only-localhost"
("/only-localhost", OnlyLocalhost) # $ routeSetup="/only-localhost"
]),
],
debug=True,
)
app.add_handlers(r".*", [(r"/added-later/([0-9]+)/([0-9]+)?", AddedLater)]) # $ MISSING: routeSetup="/added-later/([0-9]+)/([0-9]+)?"
app.add_handlers(r".*", [(r"/added-later/([0-9]+)/([0-9]+)?", AddedLater)]) # $ routeSetup="/added-later/([0-9]+)/([0-9]+)?"
return app

View File

@@ -2,7 +2,7 @@ import tornado.web
class TaintTest(tornado.web.RequestHandler):
def get(self, name = "World!", number="0", foo="foo"): # $ MISSING: requestHandler routedParameter=name routedParameter=number
def get(self, name = "World!", number="0", foo="foo"): # $ requestHandler routedParameter=name routedParameter=number
ensure_tainted(name, number)
ensure_not_tainted(foo)
@@ -72,7 +72,7 @@ class TaintTest(tornado.web.RequestHandler):
def make_app():
return tornado.web.Application(
[
(r"/test_taint/([^/]+)/([0-9]+)", TaintTest), # $ MISSING: routeSetup="/test_taint/([^/]+)/([0-9]+)"
(r"/test_taint/([^/]+)/([0-9]+)", TaintTest), # $ routeSetup="/test_taint/([^/]+)/([0-9]+)"
],
debug=True,
)