Merge branch 'main' of github.com:github/codeql into python-port-path-injection

This commit is contained in:
Rasmus Lerchedahl Petersen
2020-10-28 10:24:23 +01:00
116 changed files with 8324 additions and 965 deletions

View File

@@ -34,7 +34,7 @@ private module Django {
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node django_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["db", "urls", "http"] and
attr_name in ["db", "urls", "http", "conf"] and
(
t.start() and
result = DataFlow::importNode("django" + "." + attr_name)
@@ -437,6 +437,55 @@ private module Django {
DataFlow::Node re_path() { result = urls_attr("re_path") }
}
// -------------------------------------------------------------------------
// django.conf
// -------------------------------------------------------------------------
/** Gets a reference to the `django.conf` module. */
DataFlow::Node conf() { result = django_attr("conf") }
/** Provides models for the `django.conf` module */
module conf {
// -------------------------------------------------------------------------
// django.conf.urls
// -------------------------------------------------------------------------
/** Gets a reference to the `django.conf.urls` module. */
private DataFlow::Node urls(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("django.conf.urls")
or
t.startInAttr("urls") and
result = conf()
or
exists(DataFlow::TypeTracker t2 | result = urls(t2).track(t2, t))
}
// NOTE: had to rename due to shadowing rules in QL
/** Gets a reference to the `django.conf.urls` module. */
DataFlow::Node conf_urls() { result = urls(DataFlow::TypeTracker::end()) }
// NOTE: had to rename due to shadowing rules in QL
/** Provides models for the `django.conf.urls` module */
module conf_urls {
/** Gets a reference to the `django.conf.urls.url` function. */
private DataFlow::Node url(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("django.conf.urls.url")
or
t.startInAttr("url") and
result = conf_urls()
or
exists(DataFlow::TypeTracker t2 | result = url(t2).track(t2, t))
}
/**
* Gets a reference to the `django.conf.urls.url` function.
*
* See https://docs.djangoproject.com/en/1.11/ref/urls/#django.conf.urls.url
*/
DataFlow::Node url() { result = url(DataFlow::TypeTracker::end()) }
}
}
// -------------------------------------------------------------------------
// django.http
// -------------------------------------------------------------------------
@@ -558,7 +607,7 @@ private module Django {
* A source of an instance of `django.http.request.HttpRequest`.
*
* This can include instantiation of the class, return value from function
* calls, or a special parameter that will be set when functions are call by external
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use `django::http::request::HttpRequest::instance()` predicate to get
@@ -684,20 +733,48 @@ private module Django {
}
}
/** A Django route setup that uses a Regex to specify route (and routed parameters). */
abstract private class DjangoRegexRouteSetup extends DjangoRouteSetup {
override Parameter getARoutedParameter() {
// If we don't know the URL pattern, we simply mark all parameters as a routed
// parameter. This should give us more RemoteFlowSources but could also lead to
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
exists(DjangoRouteHandler routeHandler | routeHandler = this.getARouteHandler() |
not exists(this.getUrlPattern()) and
result in [routeHandler.getArg(_), routeHandler.getArgByName(_)] and
not result = any(int i | i <= routeHandler.getRequestParamIndex() | routeHandler.getArg(i))
)
or
exists(DjangoRouteHandler routeHandler, DjangoRouteRegex regex |
routeHandler = this.getARouteHandler() and
regex.getRouteSetup() = this
|
// either using named capture groups (passed as keyword arguments) or using
// unnamed capture groups (passed as positional arguments)
not exists(regex.getGroupName(_, _)) and
// first group will have group number 1
result =
routeHandler.getArg(routeHandler.getRequestParamIndex() + regex.getGroupNumber(_, _))
or
result = routeHandler.getArgByName(regex.getGroupName(_, _))
)
}
}
/**
* A regex that is used in a call to `django.urls.re_path`.
* A regex that is used to set up a route.
*
* Needs this subclass to be considered a RegexString.
*/
private class DjangoUrlsRePathRegex extends RegexString {
DjangoUrlsRePathCall rePathCall;
private class DjangoRouteRegex extends RegexString {
DjangoRegexRouteSetup rePathCall;
DjangoUrlsRePathRegex() {
DjangoRouteRegex() {
this instanceof StrConst and
DataFlow::localFlow(DataFlow::exprNode(this), rePathCall.getUrlPatternArg())
}
DjangoUrlsRePathCall getRePathCall() { result = rePathCall }
DjangoRegexRouteSetup getRouteSetup() { result = rePathCall }
}
/**
@@ -705,7 +782,7 @@ private module Django {
*
* See https://docs.djangoproject.com/en/3.0/ref/urls/#re_path
*/
private class DjangoUrlsRePathCall extends DjangoRouteSetup {
private class DjangoUrlsRePathCall extends DjangoRegexRouteSetup {
override CallNode node;
DjangoUrlsRePathCall() { node.getFunction() = django::urls::re_path().asCfgNode() }
@@ -720,29 +797,26 @@ private module Django {
djangoRouteHandlerFunctionTracker(result) = viewArg
)
}
}
override Parameter getARoutedParameter() {
// If we don't know the URL pattern, we simply mark all parameters as a routed
// parameter. This should give us more RemoteFlowSources but could also lead to
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
exists(DjangoRouteHandler routeHandler | routeHandler = this.getARouteHandler() |
not exists(this.getUrlPattern()) and
result in [routeHandler.getArg(_), routeHandler.getArgByName(_)] and
not result = any(int i | i <= routeHandler.getRequestParamIndex() | routeHandler.getArg(i))
)
or
exists(DjangoRouteHandler routeHandler, DjangoUrlsRePathRegex regex |
routeHandler = this.getARouteHandler() and
regex.getRePathCall() = this
|
// either using named capture groups (passed as keyword arguments) or using
// unnamed capture groups (passed as positional arguments)
not exists(regex.getGroupName(_, _)) and
// first group will have group number 1
result =
routeHandler.getArg(routeHandler.getRequestParamIndex() + regex.getGroupNumber(_, _))
or
result = routeHandler.getArgByName(regex.getGroupName(_, _))
/**
* A call to `django.conf.urls.url`.
*
* See https://docs.djangoproject.com/en/1.11/ref/urls/#django.conf.urls.url
*/
private class DjangoConfUrlsUrlCall extends DjangoRegexRouteSetup {
override CallNode node;
DjangoConfUrlsUrlCall() { node.getFunction() = django::conf::conf_urls::url().asCfgNode() }
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() = [node.getArg(0), node.getArgByName("regex")]
}
override DjangoRouteHandler getARouteHandler() {
exists(DataFlow::Node viewArg |
viewArg.asCfgNode() in [node.getArg(1), node.getArgByName("view")] and
djangoRouteHandlerFunctionTracker(result) = viewArg
)
}
}

View File

@@ -459,7 +459,7 @@ private module FabricV2 {
* A source of an instance of a subclass of `fabric.group.Group`
*
* This can include instantiation of a class, return value from function
* calls, or a special parameter that will be set when functions are call by external
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use `Group::subclassInstance()` predicate to get references to an instance of a subclass of `fabric.group.Group`.

View File

@@ -10,13 +10,11 @@ private import experimental.dataflow.TaintTracking
private import experimental.semmle.python.Concepts
private import experimental.semmle.python.frameworks.Werkzeug
// for old improved impl see
// https://github.com/github/codeql/blob/9f95212e103c68d0c1dfa4b6f30fb5d53954ccef/python/ql/src/semmle/python/web/flask/Request.qll
/**
* Provides models for the `flask` PyPI package.
* See https://flask.palletsprojects.com/en/1.1.x/.
*/
private module Flask {
private module FlaskModel {
/** Gets a reference to the `flask` module. */
private DataFlow::Node flask(DataFlow::TypeTracker t) {
t.start() and
@@ -44,69 +42,101 @@ private module Flask {
/** Gets a reference to the `flask.request` object. */
DataFlow::Node request() { result = request(DataFlow::TypeTracker::end()) }
/** Gets a reference to the `flask.Flask` class. */
private DataFlow::Node classFlask(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("flask.Flask")
or
t.startInAttr("Flask") and
result = flask()
or
exists(DataFlow::TypeTracker t2 | result = classFlask(t2).track(t2, t))
/**
* Provides models for the `flask.Flask` class
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Flask.
*/
module Flask {
/** Gets a reference to the `flask.Flask` class. */
private DataFlow::Node classRef(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("flask.Flask")
or
t.startInAttr("Flask") and
result = flask()
or
exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
}
/** Gets a reference to the `flask.Flask` class. */
DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
/**
* A source of an instance of `flask.Flask`.
*
* This can include instantiation of the class, return value from function
* calls, or a special parameter that will be set when functions are call by external
* library.
*
* Use `Flask::instance()` predicate to get references to instances of `flask.Flask`.
*/
abstract class InstanceSource extends DataFlow::Node { }
/** A direct instantiation of `flask.Flask`. */
private class ClassInstantiation extends InstanceSource, DataFlow::CfgNode {
override CallNode node;
ClassInstantiation() { node.getFunction() = classRef().asCfgNode() }
}
/** Gets a reference to an instance of `flask.Flask` (a flask application). */
private DataFlow::Node instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `flask.Flask` (a flask application). */
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of an instance of `flask.Flask` (a flask application).
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node instance_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["route", "add_url_rule"] and
t.startInAttr(attr_name) and
result = flask::Flask::instance()
or
// Due to bad performance when using normal setup with `instance_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
instance_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate instance_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res,
DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(instance_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of an instance of `flask.Flask` (a flask application).
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node instance_attr(string attr_name) {
result = instance_attr(DataFlow::TypeTracker::end(), attr_name)
}
/** Gets a reference to the `route` method on an instance of `flask.Flask`. */
DataFlow::Node route() { result = instance_attr("route") }
/** Gets a reference to the `add_url_rule` method on an instance of `flask.Flask`. */
DataFlow::Node add_url_rule() { result = instance_attr("add_url_rule") }
}
/** Gets a reference to the `flask.Flask` class. */
DataFlow::Node classFlask() { result = classFlask(DataFlow::TypeTracker::end()) }
/** Gets a reference to an instance of `flask.Flask` (a Flask application). */
private DataFlow::Node app(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(CallNode).getFunction() = flask::classFlask().asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = app(t2).track(t2, t))
}
/** Gets a reference to an instance of `flask.Flask` (a flask application). */
DataFlow::Node app() { result = app(DataFlow::TypeTracker::end()) }
}
// ---------------------------------------------------------------------------
// routing modeling
// ---------------------------------------------------------------------------
/**
* Gets a reference to the attribute `attr_name` of a flask application.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node app_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["route", "add_url_rule"] and
t.startInAttr(attr_name) and
result = flask::app()
or
// Due to bad performance when using normal setup with `app_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
app_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate app_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(app_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of a flask application.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node app_attr(string attr_name) {
result = app_attr(DataFlow::TypeTracker::end(), attr_name)
}
private string werkzeug_rule_re() {
// since flask uses werkzeug internally, we are using its routing rules from
// https://github.com/pallets/werkzeug/blob/4dc8d6ab840d4b78cbd5789cef91b01e3bde01d5/src/werkzeug/routing.py#L138-L151
@@ -134,14 +164,14 @@ private module Flask {
}
/**
* A call to `flask.Flask.route`.
* A call to the `route` method on an instance of `flask.Flask`.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Flask.route
*/
private class FlaskAppRouteCall extends FlaskRouteSetup, DataFlow::CfgNode {
override CallNode node;
FlaskAppRouteCall() { node.getFunction() = app_attr("route").asCfgNode() }
FlaskAppRouteCall() { node.getFunction() = flask::Flask::route().asCfgNode() }
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("rule")]
@@ -151,14 +181,14 @@ private module Flask {
}
/**
* A call to `flask.Flask.add_url_rule`.
* A call to the `add_url_rule` method on an instance of `flask.Flask`.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Flask.add_url_rule
*/
private class FlaskAppAddUrlRule extends FlaskRouteSetup, DataFlow::CfgNode {
private class FlaskAppAddUrlRuleCall extends FlaskRouteSetup, DataFlow::CfgNode {
override CallNode node;
FlaskAppAddUrlRule() { node.getFunction() = app_attr("add_url_rule").asCfgNode() }
FlaskAppAddUrlRuleCall() { node.getFunction() = flask::Flask::add_url_rule().asCfgNode() }
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("rule")]
@@ -287,7 +317,7 @@ private module Flask {
}
private class RequestInputMultiDict extends RequestInputAccess,
Werkzeug::Datastructures::MultiDict {
Werkzeug::werkzeug::datastructures::MultiDict::InstanceSource {
RequestInputMultiDict() { attr_name in ["args", "values", "form", "files"] }
}
@@ -295,7 +325,7 @@ private module Flask {
RequestInputFiles() { attr_name = "files" }
}
// TODO: Somehow specify that elements of `RequestInputFiles` are
// Werkzeug::Datastructures::FileStorage and should have those additional taint steps
// Werkzeug::werkzeug::datastructures::FileStorage and should have those additional taint steps
// AND that the 0-indexed argument to its' save method is a sink for path-injection.
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.save
}

View File

@@ -6,70 +6,120 @@ private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.TaintTracking
// for old impl see
// https://github.com/github/codeql/blob/9f95212e103c68d0c1dfa4b6f30fb5d53954ccef/python/ql/src/semmle/python/libraries/Werkzeug.qll
module Werkzeug {
module Datastructures {
// ---------------------------------------------------------------------- //
// MultiDict //
// ---------------------------------------------------------------------- //
/**
* A Node representing an instance of a werkzeug.datastructures.MultiDict
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
*/
abstract class MultiDict extends DataFlow::Node { }
/** Provides models for the `werkzeug` module. */
module werkzeug {
/** Provides models for the `werkzeug.datastructures` module. */
module datastructures {
/**
* Provides models for the `werkzeug.datastructures.MultiDict` class
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict.
*/
module MultiDict {
/**
* A source of an instance of `werkzeug.datastructures.MultiDict`.
*
* This can include instantiation of the class, return value from function
* calls, or a special parameter that will be set when functions are call by external
* library.
*
* Use `MultiDict::instance()` predicate to get references to instances of `werkzeug.datastructures.MultiDict`.
*/
abstract class InstanceSource extends DataFlow::Node { }
private module MultiDictTracking {
private DataFlow::Node getlist(DataFlow::TypeTracker t) {
t.startInAttr("getlist") and
result instanceof MultiDict
or
exists(DataFlow::TypeTracker t2 | result = getlist(t2).track(t2, t))
/** Gets a reference to an instance of `werkzeug.datastructures.MultiDict`. */
private DataFlow::Node instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `werkzeug.datastructures.MultiDict`. */
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the `getlist` method on an instance of `werkzeug.datastructures.MultiDict`.
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers.getlist
*/
private DataFlow::Node getlist(DataFlow::TypeTracker t) {
t.startInAttr("getlist") and
result = instance()
or
exists(DataFlow::TypeTracker t2 | result = getlist(t2).track(t2, t))
}
/**
* Gets a reference to the `getlist` method on an instance of `werkzeug.datastructures.MultiDict`.
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers.getlist
*/
DataFlow::Node getlist() { result = getlist(DataFlow::TypeTracker::end()) }
}
DataFlow::Node getlist() { result = getlist(DataFlow::TypeTracker::end()) }
}
/**
* Provides models for the `werkzeug.datastructures.FileStorage` class
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.
*/
module FileStorage {
/**
* A source of an instance of `werkzeug.datastructures.FileStorage`.
*
* This can include instantiation of the class, return value from function
* calls, or a special parameter that will be set when functions are called by an external
* library.
*
* Use `FileStorage::instance()` predicate to get references to instances of `werkzeug.datastructures.FileStorage`.
*/
abstract class InstanceSource extends DataFlow::Node { }
private class MultiDictAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// obj -> obj.getlist
nodeTo.asCfgNode().(AttrNode).getObject("getlist") = nodeFrom.asCfgNode() and
nodeTo = MultiDictTracking::getlist()
or
// getlist -> getlist()
nodeFrom = MultiDictTracking::getlist() and
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
}
}
/** Gets a reference to an instance of `werkzeug.datastructures.FileStorage`. */
private DataFlow::Node instance(DataFlow::TypeTracker t) {
t.start() and
result instanceof InstanceSource
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
// ---------------------------------------------------------------------- //
// FileStorage //
// ---------------------------------------------------------------------- //
/**
* A Node representing an instance of a werkzeug.datastructures.FileStorage
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
*/
abstract class FileStorage extends DataFlow::Node { }
private class FileStorageAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// TODO: should be `nodeFrom = tracked(any(FileStorage fs))`
nodeFrom instanceof FileStorage and
exists(string name |
name in ["filename",
// str
"name", "content_type", "mimetype",
// file-like
"stream",
// TODO: werkzeug.datastructures.Headers
"headers",
// dict[str, str]
"mimetype_params"] and
nodeTo.asCfgNode().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
)
/** Gets a reference to an instance of `werkzeug.datastructures.FileStorage`. */
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
}
}
}
private class MultiDictAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// obj -> obj.getlist
exists(DataFlow::AttrRead read |
read.getObject() = nodeFrom and
nodeTo = read and
nodeTo = werkzeug::datastructures::MultiDict::getlist()
)
or
// getlist -> getlist()
nodeFrom = werkzeug::datastructures::MultiDict::getlist() and
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
}
}
private class FileStorageAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
nodeFrom = werkzeug::datastructures::FileStorage::instance() and
exists(DataFlow::AttrRead read | nodeTo = read |
read.getAttributeName() in ["filename",
// str
"name", "content_type", "mimetype",
// file-like
"stream",
// TODO: werkzeug.datastructures.Headers
"headers",
// dict[str, str]
"mimetype_params"] and
read.getObject() = nodeFrom
)
}
}
}

28
python/ql/src/printAst.ql Normal file
View File

@@ -0,0 +1,28 @@
/**
* @name Print AST
* @description Outputs a representation of a file's Abstract Syntax Tree. This
* query is used by the VS Code extension.
* @id py/print-ast
* @kind graph
* @tags ide-contextual-queries/print-ast
*/
import python
import semmle.python.PrintAst
import analysis.DefinitionTracking
/**
* The source file to generate an AST from.
*/
external string selectedSourceFile();
class PrintAstConfigurationOverride extends PrintAstConfiguration {
/**
* Holds if the location matches the selected file in the VS Code extension and
* the element is not a synthetic constructor.
*/
override predicate shouldPrint(AstNode e, Location l) {
super.shouldPrint(e, l) and
l.getFile() = getEncodedFile(selectedSourceFile())
}
}

View File

@@ -0,0 +1,647 @@
/**
* Provides queries to pretty-print a Python AST as a graph.
*
* By default, this will print the AST for all elements in the database. To change this behavior,
* extend `PrintAstConfiguration` and override `shouldPrint` to hold for only the elements
* you wish to view the AST for.
*/
import python
private newtype TPrintAstConfiguration = MkPrintAstConfiguration()
/**
* The query can extend this class to control which elements are printed.
*/
class PrintAstConfiguration extends TPrintAstConfiguration {
/**
* Gets a textual representation of this `PrintAstConfiguration`.
*/
string toString() { result = "PrintAstConfiguration" }
/**
* Controls whether the `AstNode` should be considered for AST printing.
* By default it checks whether the `AstNode` `e` belongs to `Location` `l`.
*/
predicate shouldPrint(AstNode e, Location l) { l = e.getLocation() }
}
private predicate shouldPrint(AstNode e, Location l) {
exists(PrintAstConfiguration config | config.shouldPrint(e, l))
}
/** Holds if the given element does not need to be rendered in the AST. */
private predicate isNotNeeded(AstNode el) {
el.isArtificial()
or
el instanceof Module
or
exists(AstNode parent | isNotNeeded(parent) and not parent instanceof Module |
el = parent.getAChildNode()
)
}
/**
* Printed nodes.
*/
private newtype TPrintAstNode =
TElementNode(AstNode el) { shouldPrint(el, _) and not isNotNeeded(el) } or
TFunctionParamsNode(Function f) { shouldPrint(f, _) and not isNotNeeded(f) } or
TCallArgumentsNode(Call c) { shouldPrint(c, _) and not isNotNeeded(c) } or
TStmtListNode(StmtList list) {
shouldPrint(list.getAnItem(), _) and
not list = any(Module mod).getBody() and
not forall(AstNode child | child = list.getAnItem() | isNotNeeded(child)) and
exists(list.getAnItem())
}
/**
* A node in the output tree.
*/
class PrintAstNode extends TPrintAstNode {
/**
* Gets a textual representation of this node in the PrintAst output tree.
*/
string toString() { none() }
/**
* Gets the child node at index `childIndex`. Child indices must be unique,
* but need not be contiguous.
*/
PrintAstNode getChild(int childIndex) { none() }
/**
* Gets a child of this node.
*/
final PrintAstNode getAChild() { result = getChild(_) }
/**
* Gets the parent of this node, if any.
*/
final PrintAstNode getParent() { result.getAChild() = this }
/**
* Gets the location of this node in the source code.
*/
Location getLocation() { none() }
/**
* Gets the value of the property of this node, where the name of the property
* is `key`.
*/
string getProperty(string key) {
key = "semmle.label" and
result = toString()
}
/**
* Gets the label for the edge from this node to the specified child. By
* default, this is just the index of the child, but subclasses can override
* this.
*/
string getChildEdgeLabel(int childIndex) {
exists(getChild(childIndex)) and
result = childIndex.toString()
}
}
/** A top-level AST node. */
class TopLevelPrintAstNode extends PrintAstNode {
TopLevelPrintAstNode() { not exists(this.getParent()) }
private int getOrder() {
this =
rank[result](TopLevelPrintAstNode n, Location l |
l = n.getLocation()
|
n
order by
l.getFile().getRelativePath(), l.getStartLine(), l.getStartColumn(), l.getEndLine(),
l.getEndColumn()
)
}
override string getProperty(string key) {
result = super.getProperty(key)
or
key = "semmle.order" and
result = this.getOrder().toString()
}
}
/**
* An `AstNode` printed in the print-viewer.
*
* This class can be overridden to define more specific behavior for some `AstNode`s.
* The `getChildNode` and `getStmtList` methods can be overridden to easily set up a child-parent relation between different `AstElementNode`s.
* Be very careful about overriding `getChild`, as `getChildNode` and `getStmtList` depend on the default behavior of `getChild`.
*/
class AstElementNode extends PrintAstNode, TElementNode {
AstNode element;
AstElementNode() { this = TElementNode(element) }
override string toString() {
result = "[" + PrettyPrinting::getQlClass(element) + "] " + PrettyPrinting::prettyPrint(element)
}
override Location getLocation() { result = element.getLocation() }
/**
* Gets the `AstNode` that is printed by this print node.
*/
final AstNode getAstNode() { result = element }
override PrintAstNode getChild(int childIndex) {
exists(AstNode el | result.(AstElementNode).getAstNode() = el |
el = this.getChildNode(childIndex) and not el = getStmtList(_, _).getAnItem()
)
or
// displaying all `StmtList` after the other children.
exists(int offset | offset = 1 + max([0, any(int index | exists(this.getChildNode(index)))]) |
exists(int index | childIndex = index + offset |
result.(StmtListNode).getList() = getStmtList(index, _)
)
)
}
/**
* Gets a child node for the AstNode that this print node represents.
*
* The default behavior in `getChild` uses `getChildNode` to easily define a parent-child relation between different `AstElementNode`s.
*/
AstNode getChildNode(int childIndex) { result = getChild(element, childIndex) }
/**
* Gets the `index`th `StmtList` that is a child of the `AstNode` that this print node represents.
* `label` is used for pretty-printing a label in the parent-child relation in the ast-viewer.
*
* The `StmtListNode` class and the `getChild` predicate uses `getStmtList` to define a parent-child relation with labels.
*
* `index` must be 0 or positive.
*/
StmtList getStmtList(int index, string label) { none() }
}
/**
* A print node for `Try` statements.
*/
class TryNode extends AstElementNode {
override Try element;
override StmtList getStmtList(int index, string label) {
index = 0 and result = element.getBody() and label = "body"
or
index = 1 and result = element.getOrelse() and label = "orelse"
or
index = 2 and result = element.getHandlers() and label = "handlers"
or
index = 3 and result = element.getFinalbody() and label = "final body"
}
}
/**
* A print node for `If` statements.
*/
class IfNode extends AstElementNode {
override If element;
override AstNode getChildNode(int childIndex) { childIndex = 0 and result = element.getTest() }
override StmtList getStmtList(int index, string label) {
index = 1 and result = element.getBody() and label = "body"
or
index = 2 and result = element.getOrelse() and label = "orelse"
}
}
/**
* A print node for classes.
*/
class ClassNode extends AstElementNode {
override Class element;
override StmtList getStmtList(int index, string label) {
index = 1 and result = element.getBody() and label = "body"
}
}
/**
* A print node for `ExceptStmt`.
*/
class ExceptNode extends AstElementNode {
override ExceptStmt element;
override StmtList getStmtList(int index, string label) {
index = 1 and result = element.getBody() and label = "body"
}
}
/**
* A print node for `With` statements.
*/
class WithNode extends AstElementNode {
override With element;
override StmtList getStmtList(int index, string label) {
index = 1 and result = element.getBody() and label = "body"
}
}
/**
* A print node for `For` statements.
*/
class ForPrintNode extends AstElementNode {
override For element;
override StmtList getStmtList(int index, string label) {
index = 1 and result = element.getBody() and label = "body"
or
index = 2 and result = element.getOrelse() and label = "orelse"
}
}
/**
* A print node for `While` statements.
*/
class WhilePrintNode extends AstElementNode {
override While element;
override StmtList getStmtList(int index, string label) {
index = 1 and result = element.getBody() and label = "body"
or
index = 2 and result = element.getOrelse() and label = "orelse"
}
}
/**
* A print node for `StmtList`.
* A `StmtListNode` is always a child of an `AstElementNode`,
* and the child-parent relation is defined by the `getStmtList` predicate in `AstElementNode`.
*
* The label for a `StmtList` is decided based on the result from the `getStmtList` predicate in `AstElementNode`.
*/
class StmtListNode extends PrintAstNode, TStmtListNode {
StmtList list;
StmtListNode() {
this = TStmtListNode(list) and
list = any(AstElementNode node).getStmtList(_, _)
}
/**
* Gets the `StmtList` that this print node represents.
*/
StmtList getList() { result = list }
private string getLabel() { this.getList() = any(AstElementNode node).getStmtList(_, result) }
override string toString() { result = "(StmtList) " + getLabel() }
override PrintAstNode getChild(int childIndex) {
exists(AstNode el | result.(AstElementNode).getAstNode() = el | el = list.getItem(childIndex))
}
}
/**
* A print node for a `Call`.
*
* The arguments to this call are aggregated into a `CallArgumentsNode`.
*/
class CallPrintNode extends AstElementNode {
override Call element;
override PrintAstNode getChild(int childIndex) {
childIndex = 0 and result.(AstElementNode).getAstNode() = element.getFunc()
or
childIndex = 1 and result.(CallArgumentsNode).getCall() = element
}
}
/**
* A synthetic print node for the arguments to `call`.
*/
class CallArgumentsNode extends PrintAstNode, TCallArgumentsNode {
Call call;
CallArgumentsNode() { this = TCallArgumentsNode(call) }
/**
* Gets the call for which this print node represents the arguments.
*/
Call getCall() { result = call }
override string toString() { result = "(arguments)" }
override PrintAstNode getChild(int childIndex) {
result.(AstElementNode).getAstNode() = getChild(call, childIndex) and
not result.(AstElementNode).getAstNode() = call.getFunc()
}
}
/**
* A print node for a `Function`.
*/
class FunctionNode extends AstElementNode {
override Function element;
override PrintAstNode getChild(int childIndex) {
exists(FunctionParamsNode paramsNode | paramsNode.getFunction() = element |
childIndex = 0 and result = paramsNode
or
result = AstElementNode.super.getChild(childIndex) and
// parameters is handled above
not result.(AstElementNode).getAstNode() =
paramsNode.getChild(_).(AstElementNode).getAstNode() and
// The default of a Parameter is handled by `ParameterNode`
not result.(AstElementNode).getAstNode() = any(Parameter param).getDefault() and
// The annotation is a parameter is handled by `ParameterNode`.
not result.(AstElementNode).getAstNode() = any(Parameter param).getAnnotation()
)
}
override StmtList getStmtList(int index, string label) {
index = 1 and result = element.getBody() and label = "body"
}
}
/**
* A print node for a `FunctionDef`.
*/
class FunctionDefNode extends AstElementNode {
override FunctionDef element;
override AstNode getChildNode(int childIndex) {
childIndex = 0 and result = element.getTarget(0)
or
childIndex = 1 and result = element.getValue()
}
}
/**
* A print node for the parameters in `func`.
*/
class FunctionParamsNode extends PrintAstNode, TFunctionParamsNode {
Function func;
FunctionParamsNode() { this = TFunctionParamsNode(func) }
/**
* Gets the `Function` that this print node represents.
*/
Function getFunction() { result = func }
override string toString() { result = "(parameters)" }
override PrintAstNode getChild(int childIndex) {
// everything that is not a stmt is a parameter.
exists(AstNode el | result.(AstElementNode).getAstNode() = el |
el = getChild(func, childIndex) and not el = func.getAStmt()
)
}
}
/**
* A print node for a `Parameter`.
*
* This print node has the annotation and default value of the `Parameter` as children.
* The type annotation and default value would by default exist as children of the parent `Function`.
*/
class ParameterNode extends AstElementNode {
Parameter param;
ParameterNode() { this.getAstNode() = param.asName() or this.getAstNode() = param.asTuple() }
override AstNode getChildNode(int childIndex) {
childIndex = 0 and result = param.getAnnotation()
or
childIndex = 1 and result = param.getDefault()
}
}
/**
* Gets the `i`th child from `node` ordered by location.
*/
private AstNode getChild(AstNode node, int i) {
shouldPrint(node, _) and
result =
rank[i](AstNode child |
child = node.getAChildNode()
|
child
order by
child.getLocation().getStartLine(), child.getLocation().getStartColumn(),
child.getLocation().getEndLine(), child.getLocation().getEndColumn()
)
}
/**
* A module for pretty-printing some `AstNode`s.
*/
private module PrettyPrinting {
/**
* Gets the QL class for the `AstNode` `a`.
* Most `AstNode`s print their QL class in the `toString()` method, however there are exceptions.
* These exceptions are handled in the `getQlCustomClass` predicate.
*/
string getQlClass(AstNode a) {
shouldPrint(a, _) and
(
not exists(getQlCustomClass(a)) and result = a.toString()
or
result = strictconcat(getQlCustomClass(a), " | ")
)
}
/**
* Gets the QL class for `AstNode`s where the `toString` method does not print the QL class.
*/
string getQlCustomClass(AstNode a) {
shouldPrint(a, _) and
(
a instanceof Name and
result = "Name" and
not a instanceof Parameter and
not a instanceof NameConstant
or
a instanceof Parameter and result = "Parameter"
or
a instanceof PlaceHolder and result = "PlaceHolder"
or
a instanceof Function and result = "Function"
or
a instanceof Class and result = "Class"
or
a instanceof Call and result = "Call"
or
a instanceof NameConstant and result = "NameConstant"
)
}
/**
* Gets a human-readable representation of the `AstNode` `a`, or the empty string.
*
* Has exactly one result for every `AstNode`.
*/
string prettyPrint(AstNode a) {
shouldPrint(a, _) and
(
// this strictconcat should not be needed.
// However, the printAst feature breaks if this predicate has more than one result for an `AstNode`, so the strictconcat stays.
result = strictconcat(reprRec(a), " | ")
or
not exists(reprRec(a)) and
result = ""
)
}
/**
* Gets a human-readable representation of the given `AstNode`.
*
* Only has a result for some `AstNode`s.
*
* The monotonicity of this recursive predicate is kept by defining the non-recursive cases inside the `reprBase` predicate,
* and then using `reprBase` when there is a negative edge.
*/
private string reprRec(AstNode a) {
shouldPrint(a, _) and
not isNotNeeded(a) and
(
// For NameNodes, we just use the underlying variable name
result = reprBase(a)
or
exists(Expr obj |
obj = a.(Attribute).getObject() // Attribute .getname .getObject
|
// Attributes of the form `name.name2`
result = reprBase(obj) + "." + a.(Attribute).getName()
or
// Attributes where the object is a more complicated expression
not exists(reprBase(obj)) and
result = "(...)." + a.(Attribute).getName()
)
or
result = "import " + reprRec(a.(Import).getName(_).getAsname())
or
exists(Keyword keyword | keyword = a |
result = keyword.getArg() + "=" + reprRec(keyword.getValue())
)
or
result = reprRec(a.(Call).getFunc()) + "(" + printArgs(a) + ")"
or
not exists(printArgs(a)) and result = reprRec(a.(Call).getFunc()) + "(...)"
or
result = "try " + reprRec(a.(Try).getBody().getItem(0))
or
result = "if " + reprRec(a.(If).getTest()) + ":"
or
result = reprRec(a.(Compare).getLeft()) + " " + a.(Compare).getOp(0).getSymbol() + " ..."
or
result = a.(Subscript).getObject() + "[" + reprRec(a.(Subscript).getIndex()) + "]"
or
exists(Assign asn | asn = a |
strictcount(asn.getTargets()) = 1 and
result = reprRec(a.(Assign).getTarget(0)) + " = " + reprRec(asn.getValue())
)
or
result = "return " + reprRec(a.(Return).getValue())
or
result = reprRec(a.(ExprStmt).getValue())
or
exists(BoolExpr b, string op |
a = b and
(
b.getOp() instanceof And and op = "and"
or
b.getOp() instanceof Or and op = "or"
)
|
result = reprRec(b.getValue(0)) + " " + op + " " + reprRec(b.getValue(1))
)
)
}
/**
* Gets a comma separated pretty printed list of the arguments in `call`.
*/
string printArgs(Call call) {
not exists(call.getAnArg()) and result = ""
or
result = strictconcat(int i | | reprBase(call.getArg(i)), ", ")
}
/**
* Gets a human-readable representation of the given `AstNode`.
* Is only defined for `AstNode`s for which a human-readable representation can be created without using recursion.
*/
private string reprBase(AstNode a) {
shouldPrint(a, _) and
not isNotNeeded(a) and
(
result = a.(Name).getId()
or
result = a.(PlaceHolder).toString()
or
result = "class " + a.(ClassExpr).getName()
or
result = "class " + a.(Class).getName()
or
result = a.(StrConst).getText()
or
result = "yield " + a.(Yield).getValue()
or
result = "yield from " + a.(YieldFrom).getValue()
or
result = "*" + a.(Starred).getValue()
or
result = "`" + a.(Repr).getValue() + "`"
or
a instanceof Ellipsis and result = "..."
or
result = a.(Num).getText()
or
result = a.(NegativeIntegerLiteral).getValue().toString()
or
result = a.(NameConstant).toString()
or
result = "await " + a.(Await).getValue()
or
result = "function " + a.(FunctionExpr).getName() + "(...)"
or
result = "function " + a.(Function).getName() + "(...)"
or
a instanceof List and result = "[...]"
or
a instanceof Set and result = "{...}"
or
a instanceof Continue and result = "continue"
or
a instanceof Break and result = "break"
or
a instanceof Pass and result = "pass"
)
}
}
/** Holds if `node` belongs to the output tree, and its property `key` has the given `value`. */
query predicate nodes(PrintAstNode node, string key, string value) { value = node.getProperty(key) }
/**
* Holds if `target` is a child of `source` in the AST, and property `key` of the edge has the
* given `value`.
*/
query predicate edges(PrintAstNode source, PrintAstNode target, string key, string value) {
exists(int childIndex |
target = source.getChild(childIndex) and
(
key = "semmle.label" and value = source.getChildEdgeLabel(childIndex)
or
key = "semmle.order" and value = childIndex.toString()
)
)
}
/** Holds if property `key` of the graph has the given `value`. */
query predicate graphProperties(string key, string value) {
key = "semmle.graphKind" and value = "tree"
}

View File

@@ -431,6 +431,31 @@ private module SsaComputeImpl {
defSourceUseRank(v, b2, 1, i2)
}
/**
* Holds if `use1` is a use of the variable `v`, and there exists an adjacent reference to `v`
* in basic block `b1` at index `i1`.
*
* A helper predicate for `adjacentUseUseSameVar`, to prevent the first join from being between
* the two instances of `variableSourceUse` in
* ```ql
* exists(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2 |
* adjacentVarRefs(v, b1, i1, b2, i2) and
* variableSourceUse(v, use1, b1, i1) and
* variableSourceUse(v, use2, b2, i2)
* )
* ```
*/
pragma[nomagic]
cached
private predicate adjacentRefUse(
SsaSourceVariable v, BasicBlock b2, int i2, ControlFlowNode use1
) {
exists(BasicBlock b1, int i1 |
adjacentVarRefs(v, b1, i1, b2, i2) and
variableSourceUse(v, use1, b1, i1)
)
}
/**
* Holds if `use1` and `use2` form an adjacent use-use-pair of the same SSA
* variable, that is, the value read in `use1` can reach `use2` without passing
@@ -438,9 +463,8 @@ private module SsaComputeImpl {
*/
cached
predicate adjacentUseUseSameVar(ControlFlowNode use1, ControlFlowNode use2) {
exists(SsaSourceVariable v, BasicBlock b1, int i1, BasicBlock b2, int i2 |
adjacentVarRefs(v, b1, i1, b2, i2) and
variableSourceUse(v, use1, b1, i1) and
exists(SsaSourceVariable v, BasicBlock b2, int i2 |
adjacentRefUse(v, b2, i2, use1) and
variableSourceUse(v, use2, b2, i2)
)
}

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,29 @@
from django.http.response import HttpResponse, HttpResponseRedirect, JsonResponse, HttpResponseNotFound
# Not an XSS sink, since the Content-Type is not "text/html"
# FP reported in https://github.com/github/codeql-python-team/issues/38
def fp_json_response(request):
# implicitly sets Content-Type to "application/json"
return JsonResponse({"foo": request.GET.get("foo")})
# Not an XSS sink, since the Content-Type is not "text/html"
def fp_manual_json_response(request):
json_data = '{"json": "{}"}'.format(request.GET.get("foo"))
return HttpResponse(json_data, content_type="application/json")
# Not an XSS sink, since the Content-Type is not "text/html"
def fp_manual_content_type(request):
return HttpResponse('<img src="0" onerror="alert(1)">', content_type="text/plain")
# XSS FP reported in https://github.com/github/codeql/issues/3466
# Note: This should be a open-redirect sink, but not a XSS sink.
def fp_redirect(request):
return HttpResponseRedirect(request.GET.get("next"))
# Ensure that simple subclasses are still vuln to XSS
def tp_not_found(request):
return HttpResponseNotFound(request.GET.get("name"))
# Ensure we still have a XSS sink when manually setting the content_type to HTML
def tp_manual_response_type(request):
return HttpResponse(request.GET.get("name"), content_type="text/html; charset=utf-8")

View File

@@ -0,0 +1,79 @@
"""test of views for Django 1.x"""
from django.conf.urls import patterns, url
from django.http.response import HttpResponse
from django.views.generic import View
def url_match_xss(request, foo, bar, no_taint=None): # $routeHandler $routedParameter=foo $routedParameter=bar
return HttpResponse('url_match_xss: {} {}'.format(foo, bar))
def get_params_xss(request): # $routeHandler
return HttpResponse(request.GET.get("untrusted"))
def post_params_xss(request): # $routeHandler
return HttpResponse(request.POST.get("untrusted"))
def http_resp_write(request): # $routeHandler
rsp = HttpResponse()
rsp.write(request.GET.get("untrusted"))
return rsp
class Foo(object):
# Note: since Foo is used as the super type in a class view, it will be able to handle requests.
def post(self, request, untrusted): # $f-:routeHandler $f-:routedParameter=untrusted
return HttpResponse('Foo post: {}'.format(untrusted))
class ClassView(View, Foo):
def get(self, request, untrusted): # $f-:routeHandler $f-:routedParameter=untrusted
return HttpResponse('ClassView get: {}'.format(untrusted))
def show_articles(request, page_number=1): # $routeHandler $routedParameter=page_number
page_number = int(page_number)
return HttpResponse('articles page: {}'.format(page_number))
def xxs_positional_arg(request, arg0, arg1, no_taint=None): # $routeHandler $routedParameter=arg0 $routedParameter=arg1
return HttpResponse('xxs_positional_arg: {} {}'.format(arg0, arg1))
urlpatterns = [
url(r"^url_match/(?P<foo>[^/]+)/(?P<bar>[^/]+)", url_match_xss), # $routeSetup="^url_match/(?P<foo>[^/]+)/(?P<bar>[^/]+)"
url(r"^get_params", get_params_xss), # $routeSetup="^get_params"
url(r"^post_params", post_params_xss), # $routeSetup="^post_params"
url(r"^http_resp_write", http_resp_write), # $routeSetup="^http_resp_write"
url(r"^class_view/(?P<untrusted>.+)", ClassView.as_view()), # $routeSetup="^class_view/(?P<untrusted>.+)"
# one pattern to support `articles/page-<n>` and ensuring that articles/ goes to page-1
url(r"articles/^(?:page-(?P<page_number>\d+)/)?", show_articles), # $routeSetup="articles/^(?:page-(?P<page_number>\d+)/)?"
# passing as positional argument is not the recommended way of doing things, but it is certainly
# possible
url(r"^([^/]+)/(?:foo|bar)/([^/]+)", xxs_positional_arg, name='xxs_positional_arg'), # $routeSetup="^([^/]+)/(?:foo|bar)/([^/]+)"
]
################################################################################
# Using patterns() for routing
def show_user(request, username): # $routeHandler $routedParameter=username
return HttpResponse('show_user {}'.format(username))
urlpatterns = patterns(url(r"^users/(?P<username>[^/]+)", show_user)) # $routeSetup="^users/(?P<username>[^/]+)"
################################################################################
# Show we understand the keyword arguments to django.conf.urls.url
def kw_args(request): # $routeHandler
return HttpResponse('kw_args')
urlpatterns = [
url(view=kw_args, regex=r"^kw_args") # $routeSetup="^kw_args"
]

View File

@@ -97,3 +97,13 @@ urlpatterns = [
# We should not report there is a request parameter called `not_valid!`
path("not_valid/<not_valid!>", not_valid_identifier), # $routeSetup="not_valid/<not_valid!>"
]
# This version 1.x way of defining urls is deprecated in Django 3.1, but still works
from django.conf.urls import url
def deprecated(request): # $routeHandler
return HttpResponse('deprecated')
urlpatterns = [
url(r"^deprecated/", deprecated), # $routeSetup="^deprecated/"
]

View File

@@ -1,5 +1,8 @@
from django.urls import path, re_path
# This version 1.x way of defining urls is deprecated in Django 3.1, but still works
from django.conf.urls import url
from . import views
urlpatterns = [
@@ -8,4 +11,5 @@ urlpatterns = [
# inline expectation tests (which thinks the `$` would mark the beginning of a new
# line)
re_path(r"^ba[rz]/", views.bar_baz), # $routeSetup="^ba[rz]/"
url(r"^deprecated/", views.deprecated), # $routeSetup="^deprecated/"
]

View File

@@ -5,3 +5,6 @@ def foo(request: HttpRequest): # $routeHandler
def bar_baz(request: HttpRequest): # $routeHandler
return HttpResponse("bar_baz")
def deprecated(request: HttpRequest): # $routeHandler
return HttpResponse("deprecated")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
import semmle.python.PrintAst