Merge branch 'main' of github.com:github/codeql into python-port-path-injection

This commit is contained in:
Rasmus Lerchedahl Petersen
2020-10-28 10:24:23 +01:00
116 changed files with 8324 additions and 965 deletions

View File

@@ -34,7 +34,7 @@ private module Django {
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node django_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["db", "urls", "http"] and
attr_name in ["db", "urls", "http", "conf"] and
(
t.start() and
result = DataFlow::importNode("django" + "." + attr_name)
@@ -437,6 +437,55 @@ private module Django {
DataFlow::Node re_path() { result = urls_attr("re_path") }
}
// -------------------------------------------------------------------------
// django.conf
// -------------------------------------------------------------------------
/** Gets a reference to the `django.conf` module. */
DataFlow::Node conf() { result = django_attr("conf") }
/** Provides models for the `django.conf` module */
module conf {
// -------------------------------------------------------------------------
// django.conf.urls
// -------------------------------------------------------------------------
/** Gets a reference to the `django.conf.urls` module. */
private DataFlow::Node urls(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("django.conf.urls")
or
t.startInAttr("urls") and
result = conf()
or
exists(DataFlow::TypeTracker t2 | result = urls(t2).track(t2, t))
}
// NOTE: had to rename due to shadowing rules in QL
/** Gets a reference to the `django.conf.urls` module. */
DataFlow::Node conf_urls() { result = urls(DataFlow::TypeTracker::end()) }
// NOTE: had to rename due to shadowing rules in QL
/** Provides models for the `django.conf.urls` module */
module conf_urls {
/** Gets a reference to the `django.conf.urls.url` function. */
private DataFlow::Node url(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("django.conf.urls.url")
or
t.startInAttr("url") and
result = conf_urls()
or
exists(DataFlow::TypeTracker t2 | result = url(t2).track(t2, t))
}
/**
* Gets a reference to the `django.conf.urls.url` function.
*
* See https://docs.djangoproject.com/en/1.11/ref/urls/#django.conf.urls.url
*/
DataFlow::Node url() { result = url(DataFlow::TypeTracker::end()) }
}
}
// -------------------------------------------------------------------------
// django.http
// -------------------------------------------------------------------------
@@ -558,7 +607,7 @@ private module Django {
* A source of an instance of `django.http.request.HttpRequest`.
*
* This can include instantiation of the class, return value from function
* calls, or a special parameter that will be set when functions are call by external
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use `django::http::request::HttpRequest::instance()` predicate to get
@@ -684,20 +733,48 @@ private module Django {
}
}
/** A Django route setup that uses a Regex to specify route (and routed parameters). */
abstract private class DjangoRegexRouteSetup extends DjangoRouteSetup {
override Parameter getARoutedParameter() {
// If we don't know the URL pattern, we simply mark all parameters as a routed
// parameter. This should give us more RemoteFlowSources but could also lead to
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
exists(DjangoRouteHandler routeHandler | routeHandler = this.getARouteHandler() |
not exists(this.getUrlPattern()) and
result in [routeHandler.getArg(_), routeHandler.getArgByName(_)] and
not result = any(int i | i <= routeHandler.getRequestParamIndex() | routeHandler.getArg(i))
)
or
exists(DjangoRouteHandler routeHandler, DjangoRouteRegex regex |
routeHandler = this.getARouteHandler() and
regex.getRouteSetup() = this
|
// either using named capture groups (passed as keyword arguments) or using
// unnamed capture groups (passed as positional arguments)
not exists(regex.getGroupName(_, _)) and
// first group will have group number 1
result =
routeHandler.getArg(routeHandler.getRequestParamIndex() + regex.getGroupNumber(_, _))
or
result = routeHandler.getArgByName(regex.getGroupName(_, _))
)
}
}
/**
* A regex that is used in a call to `django.urls.re_path`.
* A regex that is used to set up a route.
*
* Needs this subclass to be considered a RegexString.
*/
private class DjangoUrlsRePathRegex extends RegexString {
DjangoUrlsRePathCall rePathCall;
private class DjangoRouteRegex extends RegexString {
DjangoRegexRouteSetup rePathCall;
DjangoUrlsRePathRegex() {
DjangoRouteRegex() {
this instanceof StrConst and
DataFlow::localFlow(DataFlow::exprNode(this), rePathCall.getUrlPatternArg())
}
DjangoUrlsRePathCall getRePathCall() { result = rePathCall }
DjangoRegexRouteSetup getRouteSetup() { result = rePathCall }
}
/**
@@ -705,7 +782,7 @@ private module Django {
*
* See https://docs.djangoproject.com/en/3.0/ref/urls/#re_path
*/
private class DjangoUrlsRePathCall extends DjangoRouteSetup {
private class DjangoUrlsRePathCall extends DjangoRegexRouteSetup {
override CallNode node;
DjangoUrlsRePathCall() { node.getFunction() = django::urls::re_path().asCfgNode() }
@@ -720,29 +797,26 @@ private module Django {
djangoRouteHandlerFunctionTracker(result) = viewArg
)
}
}
override Parameter getARoutedParameter() {
// If we don't know the URL pattern, we simply mark all parameters as a routed
// parameter. This should give us more RemoteFlowSources but could also lead to
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
exists(DjangoRouteHandler routeHandler | routeHandler = this.getARouteHandler() |
not exists(this.getUrlPattern()) and
result in [routeHandler.getArg(_), routeHandler.getArgByName(_)] and
not result = any(int i | i <= routeHandler.getRequestParamIndex() | routeHandler.getArg(i))
)
or
exists(DjangoRouteHandler routeHandler, DjangoUrlsRePathRegex regex |
routeHandler = this.getARouteHandler() and
regex.getRePathCall() = this
|
// either using named capture groups (passed as keyword arguments) or using
// unnamed capture groups (passed as positional arguments)
not exists(regex.getGroupName(_, _)) and
// first group will have group number 1
result =
routeHandler.getArg(routeHandler.getRequestParamIndex() + regex.getGroupNumber(_, _))
or
result = routeHandler.getArgByName(regex.getGroupName(_, _))
/**
* A call to `django.conf.urls.url`.
*
* See https://docs.djangoproject.com/en/1.11/ref/urls/#django.conf.urls.url
*/
private class DjangoConfUrlsUrlCall extends DjangoRegexRouteSetup {
override CallNode node;
DjangoConfUrlsUrlCall() { node.getFunction() = django::conf::conf_urls::url().asCfgNode() }
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() = [node.getArg(0), node.getArgByName("regex")]
}
override DjangoRouteHandler getARouteHandler() {
exists(DataFlow::Node viewArg |
viewArg.asCfgNode() in [node.getArg(1), node.getArgByName("view")] and
djangoRouteHandlerFunctionTracker(result) = viewArg
)
}
}

View File

@@ -459,7 +459,7 @@ private module FabricV2 {
* A source of an instance of a subclass of `fabric.group.Group`
*
* This can include instantiation of a class, return value from function
* calls, or a special parameter that will be set when functions are call by external
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use `Group::subclassInstance()` predicate to get references to an instance of a subclass of `fabric.group.Group`.

View File

@@ -10,13 +10,11 @@ private import experimental.dataflow.TaintTracking
private import experimental.semmle.python.Concepts
private import experimental.semmle.python.frameworks.Werkzeug
// for old improved impl see
// https://github.com/github/codeql/blob/9f95212e103c68d0c1dfa4b6f30fb5d53954ccef/python/ql/src/semmle/python/web/flask/Request.qll
/**
* Provides models for the `flask` PyPI package.
* See https://flask.palletsprojects.com/en/1.1.x/.
*/
private module Flask {
private module FlaskModel {
/** Gets a reference to the `flask` module. */
private DataFlow::Node flask(DataFlow::TypeTracker t) {
t.start() and
@@ -44,69 +42,101 @@ private module Flask {
/** Gets a reference to the `flask.request` object. */
DataFlow::Node request() { result = request(DataFlow::TypeTracker::end()) }
/** Gets a reference to the `flask.Flask` class. */
private DataFlow::Node classFlask(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("flask.Flask")
or
t.startInAttr("Flask") and
result = flask()
or
exists(DataFlow::TypeTracker t2 | result = classFlask(t2).track(t2, t))
/**
* Provides models for the `flask.Flask` class
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Flask.
*/
module Flask {
/** Gets a reference to the `flask.Flask` class. */
private DataFlow::Node classRef(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("flask.Flask")
or
t.startInAttr("Flask") and
result = flask()
or
exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
}
/** Gets a reference to the `flask.Flask` class. */
DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
/**
* A source of an instance of `flask.Flask`.
*
* This can include instantiation of the class, return value from function
* calls, or a special parameter that will be set when functions are call by external
* library.
*
* Use `Flask::instance()` predicate to get references to instances of `flask.Flask`.
*/
abstract class InstanceSource extends DataFlow::Node { }
/** A direct instantiation of `flask.Flask`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CfgNode {
override CallNode node;
ClassInstantiation() { node.getFunction() = classRef().asCfgNode() }
}
/** Gets a reference to an instance of `flask.Flask` (a flask application). */
private DataFlow::Node instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `flask.Flask` (a flask application). */
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of an instance of `flask.Flask` (a flask application).
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node instance_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["route", "add_url_rule"] and
t.startInAttr(attr_name) and
result = flask::Flask::instance()
or
// Due to bad performance when using normal setup with `instance_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
instance_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate instance_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res,
DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(instance_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of an instance of `flask.Flask` (a flask application).
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node instance_attr(string attr_name) {
result = instance_attr(DataFlow::TypeTracker::end(), attr_name)
}
/** Gets a reference to the `route` method on an instance of `flask.Flask`. */
DataFlow::Node route() { result = instance_attr("route") }
/** Gets a reference to the `add_url_rule` method on an instance of `flask.Flask`. */
DataFlow::Node add_url_rule() { result = instance_attr("add_url_rule") }
}
/** Gets a reference to the `flask.Flask` class. */
DataFlow::Node classFlask() { result = classFlask(DataFlow::TypeTracker::end()) }
/** Gets a reference to an instance of `flask.Flask` (a Flask application). */
private DataFlow::Node app(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(CallNode).getFunction() = flask::classFlask().asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = app(t2).track(t2, t))
}
/** Gets a reference to an instance of `flask.Flask` (a flask application). */
DataFlow::Node app() { result = app(DataFlow::TypeTracker::end()) }
}
// ---------------------------------------------------------------------------
// routing modeling
// ---------------------------------------------------------------------------
/**
* Gets a reference to the attribute `attr_name` of a flask application.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node app_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["route", "add_url_rule"] and
t.startInAttr(attr_name) and
result = flask::app()
or
// Due to bad performance when using normal setup with `app_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
app_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate app_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(app_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of a flask application.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node app_attr(string attr_name) {
result = app_attr(DataFlow::TypeTracker::end(), attr_name)
}
private string werkzeug_rule_re() {
// since flask uses werkzeug internally, we are using its routing rules from
// https://github.com/pallets/werkzeug/blob/4dc8d6ab840d4b78cbd5789cef91b01e3bde01d5/src/werkzeug/routing.py#L138-L151
@@ -134,14 +164,14 @@ private module Flask {
}
/**
* A call to `flask.Flask.route`.
* A call to the `route` method on an instance of `flask.Flask`.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Flask.route
*/
private class FlaskAppRouteCall extends FlaskRouteSetup, DataFlow::CfgNode {
override CallNode node;
FlaskAppRouteCall() { node.getFunction() = app_attr("route").asCfgNode() }
FlaskAppRouteCall() { node.getFunction() = flask::Flask::route().asCfgNode() }
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("rule")]
@@ -151,14 +181,14 @@ private module Flask {
}
/**
* A call to `flask.Flask.add_url_rule`.
* A call to the `add_url_rule` method on an instance of `flask.Flask`.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Flask.add_url_rule
*/
private class FlaskAppAddUrlRule extends FlaskRouteSetup, DataFlow::CfgNode {
private class FlaskAppAddUrlRuleCall extends FlaskRouteSetup, DataFlow::CfgNode {
override CallNode node;
FlaskAppAddUrlRule() { node.getFunction() = app_attr("add_url_rule").asCfgNode() }
FlaskAppAddUrlRuleCall() { node.getFunction() = flask::Flask::add_url_rule().asCfgNode() }
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("rule")]
@@ -287,7 +317,7 @@ private module Flask {
}
private class RequestInputMultiDict extends RequestInputAccess,
Werkzeug::Datastructures::MultiDict {
Werkzeug::werkzeug::datastructures::MultiDict::InstanceSource {
RequestInputMultiDict() { attr_name in ["args", "values", "form", "files"] }
}
@@ -295,7 +325,7 @@ private module Flask {
RequestInputFiles() { attr_name = "files" }
}
// TODO: Somehow specify that elements of `RequestInputFiles` are
// Werkzeug::Datastructures::FileStorage and should have those additional taint steps
// Werkzeug::werkzeug::datastructures::FileStorage and should have those additional taint steps
// AND that the 0-indexed argument to its' save method is a sink for path-injection.
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.save
}

View File

@@ -6,70 +6,120 @@ private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.TaintTracking
// for old impl see
// https://github.com/github/codeql/blob/9f95212e103c68d0c1dfa4b6f30fb5d53954ccef/python/ql/src/semmle/python/libraries/Werkzeug.qll
module Werkzeug {
module Datastructures {
// ---------------------------------------------------------------------- //
// MultiDict //
// ---------------------------------------------------------------------- //
/**
* A Node representing an instance of a werkzeug.datastructures.MultiDict
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
*/
abstract class MultiDict extends DataFlow::Node { }
/** Provides models for the `werkzeug` module. */
module werkzeug {
/** Provides models for the `werkzeug.datastructures` module. */
module datastructures {
/**
* Provides models for the `werkzeug.datastructures.MultiDict` class
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict.
*/
module MultiDict {
/**
* A source of an instance of `werkzeug.datastructures.MultiDict`.
*
* This can include instantiation of the class, return value from function
* calls, or a special parameter that will be set when functions are call by external
* library.
*
* Use `MultiDict::instance()` predicate to get references to instances of `werkzeug.datastructures.MultiDict`.
*/
abstract class InstanceSource extends DataFlow::Node { }
private module MultiDictTracking {
private DataFlow::Node getlist(DataFlow::TypeTracker t) {
t.startInAttr("getlist") and
result instanceof MultiDict
or
exists(DataFlow::TypeTracker t2 | result = getlist(t2).track(t2, t))
/** Gets a reference to an instance of `werkzeug.datastructures.MultiDict`. */
private DataFlow::Node instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `werkzeug.datastructures.MultiDict`. */
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the `getlist` method on an instance of `werkzeug.datastructures.MultiDict`.
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers.getlist
*/
private DataFlow::Node getlist(DataFlow::TypeTracker t) {
t.startInAttr("getlist") and
result = instance()
or
exists(DataFlow::TypeTracker t2 | result = getlist(t2).track(t2, t))
}
/**
* Gets a reference to the `getlist` method on an instance of `werkzeug.datastructures.MultiDict`.
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers.getlist
*/
DataFlow::Node getlist() { result = getlist(DataFlow::TypeTracker::end()) }
}
DataFlow::Node getlist() { result = getlist(DataFlow::TypeTracker::end()) }
}
/**
* Provides models for the `werkzeug.datastructures.FileStorage` class
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.
*/
module FileStorage {
/**
* A source of an instance of `werkzeug.datastructures.FileStorage`.
*
* This can include instantiation of the class, return value from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use `FileStorage::instance()` predicate to get references to instances of `werkzeug.datastructures.FileStorage`.
*/
abstract class InstanceSource extends DataFlow::Node { }
private class MultiDictAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// obj -> obj.getlist
nodeTo.asCfgNode().(AttrNode).getObject("getlist") = nodeFrom.asCfgNode() and
nodeTo = MultiDictTracking::getlist()
or
// getlist -> getlist()
nodeFrom = MultiDictTracking::getlist() and
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
}
}
/** Gets a reference to an instance of `werkzeug.datastructures.FileStorage`. */
private DataFlow::Node instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
// ---------------------------------------------------------------------- //
// FileStorage //
// ---------------------------------------------------------------------- //
/**
* A Node representing an instance of a werkzeug.datastructures.FileStorage
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
*/
abstract class FileStorage extends DataFlow::Node { }
private class FileStorageAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// TODO: should be `nodeFrom = tracked(any(FileStorage fs))`
nodeFrom instanceof FileStorage and
exists(string name |
name in ["filename",
// str
"name", "content_type", "mimetype",
// file-like
"stream",
// TODO: werkzeug.datastructures.Headers
"headers",
// dict[str, str]
"mimetype_params"] and
nodeTo.asCfgNode().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
)
/** Gets a reference to an instance of `werkzeug.datastructures.FileStorage`. */
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
}
}
}
private class MultiDictAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// obj -> obj.getlist
exists(DataFlow::AttrRead read |
read.getObject() = nodeFrom and
nodeTo = read and
nodeTo = werkzeug::datastructures::MultiDict::getlist()
)
or
// getlist -> getlist()
nodeFrom = werkzeug::datastructures::MultiDict::getlist() and
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
}
}
private class FileStorageAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeFrom = werkzeug::datastructures::FileStorage::instance() and
exists(DataFlow::AttrRead read | nodeTo = read |
read.getAttributeName() in ["filename",
// str
"name", "content_type", "mimetype",
// file-like
"stream",
// TODO: werkzeug.datastructures.Headers
"headers",
// dict[str, str]
"mimetype_params"] and
read.getObject() = nodeFrom
)
}
}
}