Merge branch 'main' into python-model-python2-specific-command-execution

This commit is contained in:
Rasmus Wriedt Larsen
2020-10-15 09:58:20 +02:00
280 changed files with 4374 additions and 3541 deletions

View File

@@ -0,0 +1,35 @@
/**
* @name Code injection
* @description Interpreting unsanitized user input as code allows a malicious user to perform arbitrary
* code execution.
* @kind path-problem
* @problem.severity error
* @sub-severity high
* @precision high
* @id py/code-injection
* @tags security
* external/owasp/owasp-a1
* external/cwe/cwe-094
* external/cwe/cwe-095
* external/cwe/cwe-116
*/
import python
import experimental.dataflow.DataFlow
import experimental.dataflow.TaintTracking
import experimental.semmle.python.Concepts
import experimental.dataflow.RemoteFlowSources
import DataFlow::PathGraph
class CodeInjectionConfiguration extends TaintTracking::Configuration {
CodeInjectionConfiguration() { this = "CodeInjectionConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
}
from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
source.getNode(), "A user-provided value"

View File

@@ -2,6 +2,7 @@ private import python
private import experimental.dataflow.DataFlow
// Need to import since frameworks can extend `RemoteFlowSource::Range`
private import experimental.semmle.python.Frameworks
private import experimental.semmle.python.Concepts
/**
* A data flow source of remote user input.

View File

@@ -71,7 +71,8 @@ module StepSummary {
/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
EssaFlow::essaFlowStep(nodeFrom, nodeTo) or
jumpStep(nodeFrom, nodeTo)
jumpStep(nodeFrom, nodeTo) or
nodeFrom = nodeTo.(PostUpdateNode).getPreUpdateNode()
}
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */

View File

@@ -150,22 +150,6 @@ module EssaFlow {
// nodeTo is `y` on second line, cfg node
useToNextUse(nodeFrom.asCfgNode(), nodeTo.asCfgNode())
or
// Refinements
exists(EssaEdgeRefinement r |
nodeTo.(EssaNode).getVar() = r.getVariable() and
nodeFrom.(EssaNode).getVar() = r.getInput()
)
or
exists(EssaNodeRefinement r |
nodeTo.(EssaNode).getVar() = r.getVariable() and
nodeFrom.(EssaNode).getVar() = r.getInput()
)
or
exists(PhiFunction p |
nodeTo.(EssaNode).getVar() = p.getVariable() and
nodeFrom.(EssaNode).getVar() = p.getAnInput()
)
or
// If expressions
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
}

View File

@@ -152,6 +152,9 @@ class ParameterNode extends EssaNode {
}
override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
/** Gets the `Parameter` this `ParameterNode` represents. */
Parameter getParameter() { result = var.(ParameterDefinition).getParameter() }
}
/**

View File

@@ -68,5 +68,5 @@ Node importNode(string name) {
// Because named imports are modelled as `AttrRead`s, the statement `from foo import bar as baz`
// is interpreted as if it was an assignment `baz = foo.bar`, which means `baz` gets tracked as a
// reference to `foo.bar`, as desired.
result.asCfgNode().getNode() = any(ImportExpr i | i.getAnImportedModuleName() = name)
result.asCfgNode().getNode() = any(ImportExpr i | i.getName() = name)
}

View File

@@ -7,6 +7,7 @@
import python
private import experimental.dataflow.DataFlow
private import experimental.semmle.python.Frameworks
private import experimental.dataflow.RemoteFlowSources
/**
* A data-flow node that executes an operating system command,
@@ -16,12 +17,12 @@ private import experimental.semmle.python.Frameworks
* extend `SystemCommandExecution::Range` instead.
*/
class SystemCommandExecution extends DataFlow::Node {
SystemCommandExecution::Range self;
SystemCommandExecution::Range range;
SystemCommandExecution() { this = self }
SystemCommandExecution() { this = range }
/** Gets the argument that specifies the command to be executed. */
DataFlow::Node getCommand() { result = self.getCommand() }
DataFlow::Node getCommand() { result = range.getCommand() }
}
/** Provides a class for modeling new system-command execution APIs. */
@@ -38,3 +39,91 @@ module SystemCommandExecution {
abstract DataFlow::Node getCommand();
}
}
/**
* A data-flow node that dynamically executes Python code.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CodeExecution::Range` instead.
*/
class CodeExecution extends DataFlow::Node {
CodeExecution::Range range;
CodeExecution() { this = range }
/** Gets the argument that specifies the code to be executed. */
DataFlow::Node getCode() { result = range.getCode() }
}
/** Provides a class for modeling new dynamic code execution APIs. */
module CodeExecution {
/**
* A data-flow node that dynamically executes Python code.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `CodeExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the code to be executed. */
abstract DataFlow::Node getCode();
}
}
/** Provides classes for modeling HTTP-related APIs. */
module HTTP {
/** Provides classes for modeling HTTP servers. */
module Server {
/**
* An data-flow node that sets up a route on a server.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RouteSetup::Range` instead.
*/
class RouteSetup extends DataFlow::Node {
RouteSetup::Range range;
RouteSetup() { this = range }
/** Gets the URL pattern for this route, if it can be statically determined. */
string getUrlPattern() { result = range.getUrlPattern() }
/** Gets a function that will handle incoming requests for this route, if any. */
Function getARouteHandler() { result = range.getARouteHandler() }
/**
* Gets a parameter that will receive parts of the url when handling incoming
* requests for this route, if any. These automatically become a `RemoteFlowSource`.
*/
Parameter getARoutedParameter() { result = range.getARoutedParameter() }
}
/** Provides a class for modeling new HTTP routing APIs. */
module RouteSetup {
/**
* An data-flow node that sets up a route on a server.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RouteSetup` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the URL pattern for this route, if it can be statically determined. */
abstract string getUrlPattern();
/** Gets a function that will handle incoming requests for this route, if any. */
abstract Function getARouteHandler();
/**
* Gets a parameter that will receive parts of the url when handling incoming
* requests for this route, if any. These automatically become a `RemoteFlowSource`.
*/
abstract Parameter getARoutedParameter();
}
}
private class RoutedParameter extends RemoteFlowSource::Range, DataFlow::ParameterNode {
RoutedParameter() { this.getParameter() = any(RouteSetup setup).getARoutedParameter() }
override string getSourceType() { result = "RoutedParameter" }
}
}
}

View File

@@ -2,6 +2,7 @@
* Helper file that imports all framework modeling.
*/
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Invoke
private import experimental.semmle.python.frameworks.Stdlib

View File

@@ -1,5 +1,6 @@
/**
* Provides classes modeling security-relevant aspects of the `flask` package.
* Provides classes modeling security-relevant aspects of the `flask` PyPI package.
* See https://flask.palletsprojects.com/en/1.1.x/.
*/
private import python
@@ -11,6 +12,10 @@ private import experimental.semmle.python.frameworks.Werkzeug
// for old improved impl see
// https://github.com/github/codeql/blob/9f95212e103c68d0c1dfa4b6f30fb5d53954ccef/python/ql/src/semmle/python/web/flask/Request.qll
/**
* Provides models for the `flask` PyPI package.
* See https://flask.palletsprojects.com/en/1.1.x/.
*/
private module Flask {
/** Gets a reference to the `flask` module. */
DataFlow::Node flask(DataFlow::TypeTracker t) {
@@ -23,6 +28,7 @@ private module Flask {
/** Gets a reference to the `flask` module. */
DataFlow::Node flask() { result = flask(DataFlow::TypeTracker::end()) }
/** Provides models for the `flask` module. */
module flask {
/** Gets a reference to the `flask.request` object. */
DataFlow::Node request(DataFlow::TypeTracker t) {
@@ -32,13 +38,154 @@ private module Flask {
t.startInAttr("request") and
result = flask()
or
exists(DataFlow::TypeTracker t2 | result = flask::request(t2).track(t2, t))
exists(DataFlow::TypeTracker t2 | result = request(t2).track(t2, t))
}
/** Gets a reference to the `flask.request` object. */
DataFlow::Node request() { result = flask::request(DataFlow::TypeTracker::end()) }
DataFlow::Node request() { result = request(DataFlow::TypeTracker::end()) }
/** Gets a reference to the `flask.Flask` class. */
private DataFlow::Node classFlask(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("flask.Flask")
or
t.startInAttr("Flask") and
result = flask()
or
exists(DataFlow::TypeTracker t2 | result = classFlask(t2).track(t2, t))
}
/** Gets a reference to the `flask.Flask` class. */
DataFlow::Node classFlask() { result = classFlask(DataFlow::TypeTracker::end()) }
/** Gets a reference to an instance of `flask.Flask` (a Flask application). */
private DataFlow::Node app(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(CallNode).getFunction() = flask::classFlask().asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = app(t2).track(t2, t))
}
/** Gets a reference to an instance of `flask.Flask` (a flask application). */
DataFlow::Node app() { result = app(DataFlow::TypeTracker::end()) }
}
// ---------------------------------------------------------------------------
// routing modeling
// ---------------------------------------------------------------------------
/**
* Gets a reference to the attribute `attr_name` of a flask application.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node app_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["route", "add_url_rule"] and
t.startInAttr(attr_name) and
result = flask::app()
or
// Due to bad performance when using normal setup with `app_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
app_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate app_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(app_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of a flask application.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node app_attr(string attr_name) {
result = app_attr(DataFlow::TypeTracker::end(), attr_name)
}
private string werkzeug_rule_re() {
// since flask uses werkzeug internally, we are using its routing rules from
// https://github.com/pallets/werkzeug/blob/4dc8d6ab840d4b78cbd5789cef91b01e3bde01d5/src/werkzeug/routing.py#L138-L151
result =
"(?<static>[^<]*)<(?:(?<converter>[a-zA-Z_][a-zA-Z0-9_]*)(?:\\((?<args>.*?)\\))?\\:)?(?<variable>[a-zA-Z_][a-zA-Z0-9_]*)>"
}
/** A route setup made by flask (sharing handling of URL patterns). */
abstract private class FlaskRouteSetup extends HTTP::Server::RouteSetup::Range {
override Parameter getARoutedParameter() {
// If we don't know the URL pattern, we simply mark all parameters as a routed
// parameter. This should give us more RemoteFlowSources but could also lead to
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
not exists(this.getUrlPattern()) and
result = this.getARouteHandler().getArgByName(_)
or
exists(string name |
result = this.getARouteHandler().getArgByName(name) and
exists(string match |
match = this.getUrlPattern().regexpFind(werkzeug_rule_re(), _, _) and
name = match.regexpCapture(werkzeug_rule_re(), 4)
)
)
}
/** Gets the argument used to pass in the URL pattern. */
abstract DataFlow::Node getUrlPatternArg();
override string getUrlPattern() {
exists(StrConst str |
DataFlow::localFlow(DataFlow::exprNode(str), this.getUrlPatternArg()) and
result = str.getText()
)
}
}
/**
* A call to `flask.Flask.route`.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Flask.route
*/
private class FlaskAppRouteCall extends FlaskRouteSetup, DataFlow::CfgNode {
override CallNode node;
FlaskAppRouteCall() { node.getFunction() = app_attr("route").asCfgNode() }
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("rule")]
}
override Function getARouteHandler() { result.getADecorator().getAFlowNode() = node }
}
/**
* A call to `flask.Flask.add_url_rule`.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Flask.add_url_rule
*/
private class FlaskAppAddUrlRule extends FlaskRouteSetup, DataFlow::CfgNode {
override CallNode node;
FlaskAppAddUrlRule() { node.getFunction() = app_attr("add_url_rule").asCfgNode() }
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("rule")]
}
override Function getARouteHandler() {
exists(DataFlow::Node view_func_arg, DataFlow::Node func_src |
view_func_arg.asCfgNode() in [node.getArg(2), node.getArgByName("view_func")] and
DataFlow::localFlow(func_src, view_func_arg) and
func_src.asExpr().(CallableExpr) = result.getDefinition()
)
}
}
// ---------------------------------------------------------------------------
// flask.Request taint modeling
// ---------------------------------------------------------------------------
// TODO: Do we even need this class? :|
/**
* A source of remote flow from a flask request.

View File

@@ -0,0 +1,149 @@
/**
* Provides classes modeling security-relevant aspects of the `invoke` PyPI package.
* See https://www.pyinvoke.org/.
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.semmle.python.Concepts
/**
* Provides models for the `invoke` PyPI package.
* See https://www.pyinvoke.org/.
*/
private module Invoke {
// ---------------------------------------------------------------------------
// invoke
// ---------------------------------------------------------------------------
/** Gets a reference to the `invoke` module. */
private DataFlow::Node invoke(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("invoke")
or
exists(DataFlow::TypeTracker t2 | result = invoke(t2).track(t2, t))
}
/** Gets a reference to the `invoke` module. */
DataFlow::Node invoke() { result = invoke(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of the `invoke` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node invoke_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["run", "sudo", "context", "Context", "task"] and
(
t.start() and
result = DataFlow::importNode("invoke." + attr_name)
or
t.startInAttr(attr_name) and
result = DataFlow::importNode("invoke")
)
or
// Due to bad performance when using normal setup with `invoke_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
invoke_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate invoke_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(invoke_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `invoke` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node invoke_attr(string attr_name) {
result = invoke_attr(DataFlow::TypeTracker::end(), attr_name)
}
/** Provides models for the `invoke` module. */
module invoke {
/** Gets a reference to the `invoke.context` module. */
DataFlow::Node context() { result = invoke_attr("context") }
/** Provides models for the `invoke.context` module */
module context {
/** Provides models for the `invoke.context.Context` class */
module Context {
/** Gets a reference to the `invoke.context.Context` class. */
private DataFlow::Node classRef(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("invoke.context.Context")
or
t.startInAttr("Context") and
result = invoke::context()
or
// handle invoke.Context alias
t.start() and
result = invoke_attr("Context")
or
exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
}
/** Gets a reference to the `invoke.context.Context` class. */
DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
/** Gets a reference to an instance of `invoke.context.Context`. */
private DataFlow::Node instance(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(CallNode).getFunction() =
invoke::context::Context::classRef().asCfgNode()
or
t.start() and
exists(Function func |
func.getADecorator() = invoke_attr("task").asExpr() and
result.(DataFlow::ParameterNode).getParameter() = func.getArg(0)
)
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `invoke.context.Context`. */
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
/** Gets a reference to the `run` or `sudo` methods on a `invoke.context.Context` instance. */
private DataFlow::Node instanceRunMethods(DataFlow::TypeTracker t) {
t.startInAttr(["run", "sudo"]) and
result = invoke::context::Context::instance()
or
exists(DataFlow::TypeTracker t2 | result = instanceRunMethods(t2).track(t2, t))
}
/** Gets a reference to the `run` or `sudo` methods on a `invoke.context.Context` instance. */
DataFlow::Node instanceRunMethods() {
result = instanceRunMethods(DataFlow::TypeTracker::end())
}
}
}
}
/**
* A call to either
* - `invoke.run` or `invoke.sudo` functions (http://docs.pyinvoke.org/en/stable/api/__init__.html)
* - `run` or `sudo` methods on a `invoke.context.Context` instance (http://docs.pyinvoke.org/en/stable/api/context.html#invoke.context.Context.run)
*/
private class InvokeRunCommandCall extends SystemCommandExecution::Range, DataFlow::CfgNode {
override CallNode node;
InvokeRunCommandCall() {
exists(DataFlow::Node callFunction | node.getFunction() = callFunction.asCfgNode() |
callFunction = invoke_attr(["run", "sudo"])
or
callFunction = invoke::context::Context::instanceRunMethods()
)
}
override DataFlow::Node getCommand() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("command")]
}
}
}

View File

@@ -469,4 +469,115 @@ private module Stdlib {
result.asCfgNode() in [node.getArg(0), node.getArgByName("cmd")]
}
}
// ---------------------------------------------------------------------------
// builtins
// ---------------------------------------------------------------------------
/** Gets a reference to the `builtins` module (called `__builtin__` in Python 2). */
private DataFlow::Node builtins(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode(["builtins", "__builtin__"])
or
exists(DataFlow::TypeTracker t2 | result = builtins(t2).track(t2, t))
}
/** Gets a reference to the `builtins` module. */
DataFlow::Node builtins() { result = builtins(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of the `builtins` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node builtins_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["exec", "eval", "compile"] and
(
t.start() and
result = DataFlow::importNode(["builtins", "__builtin__"] + "." + attr_name)
or
t.startInAttr(attr_name) and
result = DataFlow::importNode(["builtins", "__builtin__"])
or
// special handling of builtins, that are in scope without any imports
// TODO: Take care of overrides, either `def eval: ...`, `eval = ...`, or `builtins.eval = ...`
t.start() and
exists(NameNode ref | result.asCfgNode() = ref |
ref.isGlobal() and
ref.getId() = attr_name and
ref.isLoad()
)
)
or
// Due to bad performance when using normal setup with `builtins_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
builtins_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate builtins_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(builtins_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `builtins` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node builtins_attr(string attr_name) {
result = builtins_attr(DataFlow::TypeTracker::end(), attr_name)
}
/**
* A call to the builtin `exec` function.
* See https://docs.python.org/3/library/functions.html#exec
*/
private class BuiltinsExecCall extends CodeExecution::Range, DataFlow::CfgNode {
override CallNode node;
BuiltinsExecCall() { node.getFunction() = builtins_attr("exec").asCfgNode() }
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
}
/**
* A call to the builtin `eval` function.
* See https://docs.python.org/3/library/functions.html#eval
*/
private class BuiltinsEvalCall extends CodeExecution::Range, DataFlow::CfgNode {
override CallNode node;
BuiltinsEvalCall() { node.getFunction() = builtins_attr("eval").asCfgNode() }
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
}
/** An additional taint step for calls to the builtin function `compile` */
private class BuiltinsCompileCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(CallNode call |
nodeTo.asCfgNode() = call and
call.getFunction() = builtins_attr("compile").asCfgNode() and
nodeFrom.asCfgNode() in [call.getArg(0), call.getArgByName("source")]
)
}
}
}
/**
* An exec statement (only Python 2).
* Se ehttps://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
*/
private class ExecStatement extends CodeExecution::Range {
ExecStatement() {
// since there are no DataFlow::Nodes for a Statement, we can't do anything like
// `this = any(Exec exec)`
this.asExpr() = any(Exec exec).getBody()
}
override DataFlow::Node getCode() { result = this }
}