From 6f63c03558ab0322b654d4abf50b046644515770 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 21 Jul 2021 11:41:19 +0200 Subject: [PATCH] Python: Model `http.cookies.Morsel` and usage in Tornado --- .../src/semmle/python/frameworks/Stdlib.qll | 53 +++++++++++++++++++ .../src/semmle/python/frameworks/Tornado.qll | 19 +++++++ .../frameworks/tornado/taint_test.py | 5 +- 3 files changed, 75 insertions(+), 2 deletions(-) diff --git a/python/ql/src/semmle/python/frameworks/Stdlib.qll b/python/ql/src/semmle/python/frameworks/Stdlib.qll index 20e9117f866..895efed8508 100644 --- a/python/ql/src/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/src/semmle/python/frameworks/Stdlib.qll @@ -115,6 +115,59 @@ module Stdlib { } } } + + /** + * Provides models for the `http.cookies.Morsel` class + * + * See https://docs.python.org/3.9/library/http.cookies.html#http.cookies.Morsel. + */ + module Morsel { + /** + * A source of instances of `http.cookies.Morsel`, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `Morsel::instance()` to get references to instances of `http.cookies.Morsel`. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { } + + /** Gets a reference to an instance of `http.cookies.Morsel`. */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } + + /** Gets a reference to an instance of `http.cookies.Morsel`. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + + /** + * Taint propagation for `http.cookies.Morsel`. + */ + private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + // Methods + // + // TODO: When we have tools that make it easy, model these properly to handle + // `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach + // (since it allows us to at least capture the most common cases). + nodeFrom = instance() and + exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom | + // normal (non-async) methods + attr.getAttributeName() in ["output", "js_output"] and + nodeTo.(DataFlow::CallCfgNode).getFunction() = attr + ) + or + // Attributes + nodeFrom = instance() and + nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and + nodeTo.(DataFlow::AttrRead).getAttributeName() in ["key", "value", "coded_value"] + } + } + } } /** diff --git a/python/ql/src/semmle/python/frameworks/Tornado.qll b/python/ql/src/semmle/python/frameworks/Tornado.qll index 0911fc8a7cb..6def64c5264 100644 --- a/python/ql/src/semmle/python/frameworks/Tornado.qll +++ b/python/ql/src/semmle/python/frameworks/Tornado.qll @@ -10,6 +10,7 @@ private import semmle.python.dataflow.new.TaintTracking private import semmle.python.Concepts private import semmle.python.ApiGraphs private import semmle.python.regex +private import semmle.python.frameworks.Stdlib /** * Provides models for the `tornado` PyPI package. @@ -367,6 +368,24 @@ private module Tornado { this.(DataFlow::AttrRead).accesses(instance(), "headers") } } + + /** An `Morsel` instance that originates from a Tornado request. */ + private class TornadoRequestMorselInstances extends Stdlib::Morsel::InstanceSource { + TornadoRequestMorselInstances() { + // TODO: this currently only works in local-scope, since writing type-trackers for + // this is a little too much effort. Once API-graphs are available for more + // things, we can rewrite this. + // + // TODO: This approach for identifying member-access is very adhoc, and we should + // be able to do something more structured for providing modeling of the members + // of a container-object. + exists(DataFlow::AttrRead files | files.accesses(instance(), "cookies") | + this.asCfgNode().(SubscriptNode).getObject() = files.asCfgNode() + or + this.(DataFlow::MethodCallNode).calls(files, "get") + ) + } + } } } } diff --git a/python/ql/test/library-tests/frameworks/tornado/taint_test.py b/python/ql/test/library-tests/frameworks/tornado/taint_test.py index 6a161df152a..7e80186a56f 100644 --- a/python/ql/test/library-tests/frameworks/tornado/taint_test.py +++ b/python/ql/test/library-tests/frameworks/tornado/taint_test.py @@ -68,8 +68,9 @@ class TaintTest(tornado.web.RequestHandler): # Dict[str, http.cookies.Morsel] request.cookies, # $ tainted request.cookies["cookie-name"], # $ tainted - request.cookies["cookie-name"].key, # $ MISSING: tainted - request.cookies["cookie-name"].value, # $ MISSING: tainted + request.cookies["cookie-name"].key, # $ tainted + request.cookies["cookie-name"].value, # $ tainted + request.cookies["cookie-name"].coded_value, # $ tainted )