Merge branch 'main' of github.com:github/codeql into SharedDataflow_ArgumentPassing

This commit is contained in:
Rasmus Lerchedahl Petersen
2020-10-15 09:49:21 +02:00
470 changed files with 32336 additions and 7534 deletions

View File

@@ -0,0 +1,35 @@
/**
* @name Code injection
* @description Interpreting unsanitized user input as code allows a malicious user to perform arbitrary
* code execution.
* @kind path-problem
* @problem.severity error
* @sub-severity high
* @precision high
* @id py/code-injection
* @tags security
* external/owasp/owasp-a1
* external/cwe/cwe-094
* external/cwe/cwe-095
* external/cwe/cwe-116
*/
import python
import experimental.dataflow.DataFlow
import experimental.dataflow.TaintTracking
import experimental.semmle.python.Concepts
import experimental.dataflow.RemoteFlowSources
import DataFlow::PathGraph
class CodeInjectionConfiguration extends TaintTracking::Configuration {
CodeInjectionConfiguration() { this = "CodeInjectionConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink = any(CodeExecution e).getCode() }
}
from CodeInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows to here and is interpreted as code.",
source.getNode(), "A user-provided value"

View File

@@ -2,6 +2,7 @@ private import python
private import experimental.dataflow.DataFlow
// Need to import since frameworks can extend `RemoteFlowSource::Range`
private import experimental.semmle.python.Frameworks
private import experimental.semmle.python.Concepts
/**
* A data flow source of remote user input.

View File

@@ -6,7 +6,7 @@ private import internal.DataFlowPrivate
/** Any string that may appear as the name of an attribute or access path. */
class AttributeName extends string {
AttributeName() { this = any(Attribute a).getName() }
AttributeName() { this = any(AttrRef a).getAttributeName() }
}
/** Either an attribute name, or the empty string (representing no attribute). */
@@ -71,7 +71,8 @@ module StepSummary {
/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
EssaFlow::essaFlowStep(nodeFrom, nodeTo) or
jumpStep(nodeFrom, nodeTo)
jumpStep(nodeFrom, nodeTo) or
nodeFrom = nodeTo.(PostUpdateNode).getPreUpdateNode()
}
/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
@@ -115,11 +116,10 @@ predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
*/
predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
exists(AttributeAssignment a, Node var |
a.getName() = attr and
simpleLocalFlowStep*(nodeTo, var) and
var.asVar() = a.getInput() and
nodeFrom.asCfgNode() = a.getValue()
exists(AttrWrite a |
a.mayHaveAttributeName(attr) and
nodeFrom = a.getValue() and
simpleLocalFlowStep*(nodeTo, a.getObject())
)
}
@@ -127,7 +127,11 @@ predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
* Holds if `nodeTo` is the result of accessing the `attr` attribute of `nodeFrom`.
*/
predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) {
exists(AttrNode s | nodeTo.asCfgNode() = s and s.getObject(attr) = nodeFrom.asCfgNode())
exists(AttrRead a |
a.mayHaveAttributeName(attr) and
nodeFrom = a.getObject() and
nodeTo = a
)
}
/**

View File

@@ -0,0 +1,244 @@
/** This module provides an API for attribute reads and writes. */
import DataFlowUtil
import DataFlowPublic
private import DataFlowPrivate
/**
* A data flow node that reads or writes an attribute of an object.
*
* This abstract base class only knows about the base object on which the attribute is being
* accessed, and the attribute itself, if it is statically inferrable.
*/
abstract class AttrRef extends Node {
/**
* Gets the data flow node corresponding to the object whose attribute is being read or written.
*/
abstract Node getObject();
/**
* Gets the expression node that defines the attribute being accessed, if any. This is
* usually an identifier or literal.
*/
abstract ExprNode getAttributeNameExpr();
/**
* Holds if this attribute reference may access an attribute named `attrName`.
* Uses local data flow to track potential attribute names, which may lead to imprecision. If more
* precision is needed, consider using `getAttributeName` instead.
*/
predicate mayHaveAttributeName(string attrName) {
attrName = this.getAttributeName()
or
exists(Node nodeFrom |
localFlow(nodeFrom, this.getAttributeNameExpr()) and
attrName = nodeFrom.asExpr().(StrConst).getText()
)
}
/**
* Gets the name of the attribute being read or written. For dynamic attribute accesses, this
* method is not guaranteed to return a result. For such cases, using `mayHaveAttributeName` may yield
* better results.
*/
abstract string getAttributeName();
}
/**
* A data flow node that writes an attribute of an object. This includes
* - Simple attribute writes: `object.attr = value`
* - Dynamic attribute writes: `setattr(object, attr, value)`
* - Fields written during class initialization: `class MyClass: attr = value`
*/
abstract class AttrWrite extends AttrRef {
/** Gets the data flow node corresponding to the value that is written to the attribute. */
abstract Node getValue();
}
/**
* Represents a control flow node for a simple attribute assignment. That is,
* ```python
* object.attr = value
* ```
* Also gives access to the `value` being written, by extending `DefinitionNode`.
*/
private class AttributeAssignmentNode extends DefinitionNode, AttrNode {
override ControlFlowNode getValue() { result = DefinitionNode.super.getValue() }
}
/** A simple attribute assignment: `object.attr = value`. */
private class AttributeAssignmentAsAttrWrite extends AttrWrite, CfgNode {
override AttributeAssignmentNode node;
override Node getValue() { result.asCfgNode() = node.getValue() }
override Node getObject() { result.asCfgNode() = node.getObject() }
override ExprNode getAttributeNameExpr() {
// Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be
// identifiers, and are therefore represented directly as strings.
// Use `getAttributeName` to access the name of the attribute.
none()
}
override string getAttributeName() { result = node.getName() }
}
import semmle.python.types.Builtins
/** Represents `CallNode`s that may refer to calls to built-in functions or classes. */
private class BuiltInCallNode extends CallNode {
string name;
BuiltInCallNode() {
// TODO disallow instances where the name of the built-in may refer to an in-scope variable of that name.
exists(NameNode id | this.getFunction() = id and id.getId() = name and id.isGlobal()) and
name = any(Builtin b).getName()
}
/** Gets the name of the built-in function that is called at this `CallNode` */
string getBuiltinName() { result = name }
}
/**
* Represents a call to the built-ins that handle dynamic inspection and modification of
* attributes: `getattr`, `setattr`, `hasattr`, and `delattr`.
*/
private class BuiltinAttrCallNode extends BuiltInCallNode {
BuiltinAttrCallNode() { name in ["setattr", "getattr", "hasattr", "delattr"] }
/** Gets the control flow node for object on which the attribute is accessed. */
ControlFlowNode getObject() { result in [this.getArg(0), this.getArgByName("object")] }
/**
* Gets the control flow node for the value that is being written to the attribute.
* Only relevant for `setattr` calls.
*/
ControlFlowNode getValue() {
// only valid for `setattr`
name = "setattr" and
result in [this.getArg(2), this.getArgByName("value")]
}
/** Gets the control flow node that defines the name of the attribute being accessed. */
ControlFlowNode getName() { result in [this.getArg(1), this.getArgByName("name")] }
}
/** Represents calls to the built-in `setattr`. */
private class SetAttrCallNode extends BuiltinAttrCallNode {
SetAttrCallNode() { name = "setattr" }
}
/** Represents calls to the built-in `getattr`. */
private class GetAttrCallNode extends BuiltinAttrCallNode {
GetAttrCallNode() { name = "getattr" }
}
/** An attribute assignment using `setattr`, e.g. `setattr(object, attr, value)` */
private class SetAttrCallAsAttrWrite extends AttrWrite, CfgNode {
override SetAttrCallNode node;
override Node getValue() { result.asCfgNode() = node.getValue() }
override Node getObject() { result.asCfgNode() = node.getObject() }
override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() }
override string getAttributeName() {
result = this.getAttributeNameExpr().asExpr().(StrConst).getText()
}
}
/**
* Represents an attribute of a class that is assigned statically during class definition. For instance
* ```python
* class MyClass:
* attr = value
* ...
* ```
* Instances of this class correspond to the `NameNode` for `attr`, and also gives access to `value` by
* virtue of being a `DefinitionNode`.
*/
private class ClassAttributeAssignmentNode extends DefinitionNode, NameNode { }
/**
* An attribute assignment via a class field, e.g.
* ```python
* class MyClass:
* attr = value
* ```
* is treated as equivalent to `MyClass.attr = value`.
*/
private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode {
ClassExpr cls;
override ClassAttributeAssignmentNode node;
ClassDefinitionAsAttrWrite() { node.getScope() = cls.getInnerScope() }
override Node getValue() { result.asCfgNode() = node.getValue() }
override Node getObject() { result.asCfgNode() = cls.getAFlowNode() }
override ExprNode getAttributeNameExpr() { none() }
override string getAttributeName() { result = node.getId() }
}
/**
* A read of an attribute on an object. This includes
* - Simple attribute reads: `object.attr`
* - Dynamic attribute reads using `getattr`: `getattr(object, attr)`
* - Qualified imports: `from module import attr as name`
*/
abstract class AttrRead extends AttrRef, Node { }
/** A simple attribute read, e.g. `object.attr` */
private class AttributeReadAsAttrRead extends AttrRead, CfgNode {
override AttrNode node;
override Node getObject() { result.asCfgNode() = node.getObject() }
override ExprNode getAttributeNameExpr() {
// Attribute names don't exist as `Node`s in the control flow graph, as they can only ever be
// identifiers, and are therefore represented directly as strings.
// Use `getAttributeName` to access the name of the attribute.
none()
}
override string getAttributeName() { result = node.getName() }
}
/** An attribute read using `getattr`: `getattr(object, attr)` */
private class GetAttrCallAsAttrRead extends AttrRead, CfgNode {
override GetAttrCallNode node;
override Node getObject() { result.asCfgNode() = node.getObject() }
override ExprNode getAttributeNameExpr() { result.asCfgNode() = node.getName() }
override string getAttributeName() {
result = this.getAttributeNameExpr().asExpr().(StrConst).getText()
}
}
/**
* Represents a named import as an attribute read. That is,
* ```python
* from module import attr as attr_ref
* ```
* is treated as if it is a read of the attribute `module.attr`, even if `module` is not imported directly.
*/
private class ModuleAttributeImportAsAttrRead extends AttrRead, CfgNode {
override ImportMemberNode node;
override Node getObject() { result.asCfgNode() = node.getModule(_) }
override ExprNode getAttributeNameExpr() {
// The name of an imported attribute doesn't exist as a `Node` in the control flow graph, as it
// can only ever be an identifier, and is therefore represented directly as a string.
// Use `getAttributeName` to access the name of the attribute.
none()
}
override string getAttributeName() { exists(node.getModule(result)) }
}

View File

@@ -167,4 +167,9 @@ module Consistency {
not isImmutableOrUnobservable(n) and
msg = "ArgumentNode is missing PostUpdateNode."
}
query predicate postWithInFlow(PostUpdateNode n, string msg) {
simpleLocalFlowStep(_, n) and
msg = "PostUpdateNode should not be the target of local flow."
}
}

View File

@@ -11,11 +11,6 @@ private import semmle.python.essa.SsaCompute
//--------
predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }
/** A control flow node which is also a dataflow node */
class DataFlowCfgNode extends ControlFlowNode {
DataFlowCfgNode() { isExpressionNode(this) }
}
/** A data flow node for which we should synthesise an associated pre-update node. */
abstract class NeedsSyntheticPreUpdateNode extends Node {
/** A label for this kind of node. This will figure in the textual representation of the synthesized pre-update node. */
@@ -158,29 +153,6 @@ module EssaFlow {
// nodeTo is `y` on second line, cfg node
useToNextUse(nodeFrom.asCfgNode(), nodeTo.asCfgNode())
or
// Refinements
exists(EssaEdgeRefinement r |
nodeTo.(EssaNode).getVar() = r.getVariable() and
nodeFrom.(EssaNode).getVar() = r.getInput()
)
or
exists(EssaNodeRefinement r |
nodeTo.(EssaNode).getVar() = r.getVariable() and
nodeFrom.(EssaNode).getVar() = r.getInput()
)
or
exists(PhiFunction p |
nodeTo.(EssaNode).getVar() = p.getVariable() and
nodeFrom.(EssaNode).getVar() = p.getAnInput()
)
or
// Overflow keyword argument
exists(CallNode call, CallableValue callable |
call = callable.getACall() and
nodeTo = TKwOverflowNode(call, callable) and
nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode()
)
or
// If expressions
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
}

View File

@@ -5,6 +5,7 @@
private import python
private import DataFlowPrivate
import experimental.dataflow.TypeTracker
import Attributes
private import semmle.python.essa.SsaCompute
/**
@@ -22,8 +23,8 @@ newtype TNode =
/** A node corresponding to an SSA variable. */
TEssaNode(EssaVariable var) or
/** A node corresponding to a control flow node. */
TCfgNode(DataFlowCfgNode node) or
/** A synthetic node representing the value of an object before a state change. */
TCfgNode(ControlFlowNode node) { isExpressionNode(node) } or
/** A synthetic node representing the value of an object before a state change */
TSyntheticPreUpdateNode(NeedsSyntheticPreUpdateNode post) or
/** A synthetic node representing the value of an object after a state change. */
TSyntheticPostUpdateNode(NeedsSyntheticPostUpdateNode pre) or
@@ -133,7 +134,7 @@ class EssaNode extends Node, TEssaNode {
}
class CfgNode extends Node, TCfgNode {
DataFlowCfgNode node;
ControlFlowNode node;
CfgNode() { this = TCfgNode(node) }
@@ -181,6 +182,9 @@ class ParameterNode extends EssaNode {
}
override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
/** Gets the `Parameter` this `ParameterNode` represents. */
Parameter getParameter() { result = var.(ParameterDefinition).getParameter() }
}
/**

View File

@@ -18,7 +18,7 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
/**
* Gets an EssaNode that holds the module imported by `name`.
* Gets a `Node` that refers to the module referenced by `name`.
* Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
* reference to the module `pkg`.
*
@@ -27,16 +27,17 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
* 2. `from <package> import <module>` when `<name> = <package> + "." + <module>`
* 3. `from <module> import <member>` when `<name> = <module> + "." + <member>`
*
* Finally, in `from <module> import <member>` we consider the `ImportExpr` corresponding to
* `<module>` to be a reference to that module.
*
* Note:
* While it is technically possible that `import mypkg.foo` and `from mypkg import foo` can give different values,
* it's highly unlikely that this will be a problem in production level code.
* Example: If `mypkg/__init__.py` contains `foo = 42`, then `from mypkg import foo` will not import the module
* `mypkg/foo.py` but the variable `foo` containing `42` -- however, `import mypkg.foo` will always cause `mypkg.foo`
* to refer to the module.
*
* Also see `DataFlow::importMember`
*/
EssaNode importModule(string name) {
Node importNode(string name) {
exists(Variable var, Import imp, Alias alias |
alias = imp.getAName() and
alias.getAsname() = var.getAStore() and
@@ -45,23 +46,27 @@ EssaNode importModule(string name) {
or
name = alias.getValue().(ImportExpr).getImportedModuleName()
) and
result.getVar().(AssignmentDefinition).getSourceVariable() = var
)
}
/**
* Gets a EssaNode that holds the value imported by using fully qualified name in
*`from <moduleName> import <memberName>`.
*
* Also see `DataFlow::importModule`.
*/
EssaNode importMember(string moduleName, string memberName) {
exists(Variable var, Import imp, Alias alias, ImportMember member |
alias = imp.getAName() and
member = alias.getValue() and
moduleName = member.getModule().(ImportExpr).getImportedModuleName() and
memberName = member.getName() and
alias.getAsname() = var.getAStore() and
result.getVar().(AssignmentDefinition).getSourceVariable() = var
result.(EssaNode).getVar().(AssignmentDefinition).getSourceVariable() = var
)
or
// Although it may seem superfluous to consider the `foo` part of `from foo import bar as baz` to
// be a reference to a module (since that reference only makes sense locally within the `import`
// statement), it's important for our use of type trackers to consider this local reference to
// also refer to the `foo` module. That way, if one wants to track references to the `bar`
// attribute using a type tracker, one can simply write
//
// ```ql
// DataFlow::Node bar_attr_tracker(TypeTracker t) {
// t.startInAttr("bar") and
// result = foo_module_tracker()
// or
// exists(TypeTracker t2 | result = bar_attr_tracker(t2).track(t2, t))
// }
// ```
//
// Where `foo_module_tracker` is a type tracker that tracks references to the `foo` module.
// Because named imports are modelled as `AttrRead`s, the statement `from foo import bar as baz`
// is interpreted as if it was an assignment `baz = foo.bar`, which means `baz` gets tracked as a
// reference to `foo.bar`, as desired.
result.asCfgNode().getNode() = any(ImportExpr i | i.getName() = name)
}

View File

@@ -7,6 +7,7 @@
import python
private import experimental.dataflow.DataFlow
private import experimental.semmle.python.Frameworks
private import experimental.dataflow.RemoteFlowSources
/**
* A data-flow node that executes an operating system command,
@@ -16,12 +17,12 @@ private import experimental.semmle.python.Frameworks
* extend `SystemCommandExecution::Range` instead.
*/
class SystemCommandExecution extends DataFlow::Node {
SystemCommandExecution::Range self;
SystemCommandExecution::Range range;
SystemCommandExecution() { this = self }
SystemCommandExecution() { this = range }
/** Gets the argument that specifies the command to be executed. */
DataFlow::Node getCommand() { result = self.getCommand() }
DataFlow::Node getCommand() { result = range.getCommand() }
}
/** Provides a class for modeling new system-command execution APIs. */
@@ -38,3 +39,91 @@ module SystemCommandExecution {
abstract DataFlow::Node getCommand();
}
}
/**
* A data-flow node that dynamically executes Python code.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `CodeExecution::Range` instead.
*/
class CodeExecution extends DataFlow::Node {
CodeExecution::Range range;
CodeExecution() { this = range }
/** Gets the argument that specifies the code to be executed. */
DataFlow::Node getCode() { result = range.getCode() }
}
/** Provides a class for modeling new dynamic code execution APIs. */
module CodeExecution {
/**
* A data-flow node that dynamically executes Python code.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `CodeExecution` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the code to be executed. */
abstract DataFlow::Node getCode();
}
}
/** Provides classes for modeling HTTP-related APIs. */
module HTTP {
/** Provides classes for modeling HTTP servers. */
module Server {
/**
* An data-flow node that sets up a route on a server.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RouteSetup::Range` instead.
*/
class RouteSetup extends DataFlow::Node {
RouteSetup::Range range;
RouteSetup() { this = range }
/** Gets the URL pattern for this route, if it can be statically determined. */
string getUrlPattern() { result = range.getUrlPattern() }
/** Gets a function that will handle incoming requests for this route, if any. */
Function getARouteHandler() { result = range.getARouteHandler() }
/**
* Gets a parameter that will receive parts of the url when handling incoming
* requests for this route, if any. These automatically become a `RemoteFlowSource`.
*/
Parameter getARoutedParameter() { result = range.getARoutedParameter() }
}
/** Provides a class for modeling new HTTP routing APIs. */
module RouteSetup {
/**
* An data-flow node that sets up a route on a server.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RouteSetup` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the URL pattern for this route, if it can be statically determined. */
abstract string getUrlPattern();
/** Gets a function that will handle incoming requests for this route, if any. */
abstract Function getARouteHandler();
/**
* Gets a parameter that will receive parts of the url when handling incoming
* requests for this route, if any. These automatically become a `RemoteFlowSource`.
*/
abstract Parameter getARoutedParameter();
}
}
private class RoutedParameter extends RemoteFlowSource::Range, DataFlow::ParameterNode {
RoutedParameter() { this.getParameter() = any(RouteSetup setup).getARoutedParameter() }
override string getSourceType() { result = "RoutedParameter" }
}
}
}

View File

@@ -2,6 +2,7 @@
* Helper file that imports all framework modeling.
*/
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Invoke
private import experimental.semmle.python.frameworks.Stdlib

View File

@@ -1,5 +1,6 @@
/**
* Provides classes modeling security-relevant aspects of the `flask` package.
* Provides classes modeling security-relevant aspects of the `flask` PyPI package.
* See https://flask.palletsprojects.com/en/1.1.x/.
*/
private import python
@@ -11,11 +12,15 @@ private import experimental.semmle.python.frameworks.Werkzeug
// for old improved impl see
// https://github.com/github/codeql/blob/9f95212e103c68d0c1dfa4b6f30fb5d53954ccef/python/ql/src/semmle/python/web/flask/Request.qll
/**
* Provides models for the `flask` PyPI package.
* See https://flask.palletsprojects.com/en/1.1.x/.
*/
private module Flask {
/** Gets a reference to the `flask` module. */
DataFlow::Node flask(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importModule("flask")
result = DataFlow::importNode("flask")
or
exists(DataFlow::TypeTracker t2 | result = flask(t2).track(t2, t))
}
@@ -23,22 +28,164 @@ private module Flask {
/** Gets a reference to the `flask` module. */
DataFlow::Node flask() { result = flask(DataFlow::TypeTracker::end()) }
/** Provides models for the `flask` module. */
module flask {
/** Gets a reference to the `flask.request` object. */
DataFlow::Node request(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importMember("flask", "request")
result = DataFlow::importNode("flask.request")
or
t.startInAttr("request") and
result = flask()
or
exists(DataFlow::TypeTracker t2 | result = flask::request(t2).track(t2, t))
exists(DataFlow::TypeTracker t2 | result = request(t2).track(t2, t))
}
/** Gets a reference to the `flask.request` object. */
DataFlow::Node request() { result = flask::request(DataFlow::TypeTracker::end()) }
DataFlow::Node request() { result = request(DataFlow::TypeTracker::end()) }
/** Gets a reference to the `flask.Flask` class. */
private DataFlow::Node classFlask(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("flask.Flask")
or
t.startInAttr("Flask") and
result = flask()
or
exists(DataFlow::TypeTracker t2 | result = classFlask(t2).track(t2, t))
}
/** Gets a reference to the `flask.Flask` class. */
DataFlow::Node classFlask() { result = classFlask(DataFlow::TypeTracker::end()) }
/** Gets a reference to an instance of `flask.Flask` (a Flask application). */
private DataFlow::Node app(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(CallNode).getFunction() = flask::classFlask().asCfgNode()
or
exists(DataFlow::TypeTracker t2 | result = app(t2).track(t2, t))
}
/** Gets a reference to an instance of `flask.Flask` (a flask application). */
DataFlow::Node app() { result = app(DataFlow::TypeTracker::end()) }
}
// ---------------------------------------------------------------------------
// routing modeling
// ---------------------------------------------------------------------------
/**
* Gets a reference to the attribute `attr_name` of a flask application.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node app_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["route", "add_url_rule"] and
t.startInAttr(attr_name) and
result = flask::app()
or
// Due to bad performance when using normal setup with `app_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
app_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate app_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(app_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of a flask application.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node app_attr(string attr_name) {
result = app_attr(DataFlow::TypeTracker::end(), attr_name)
}
private string werkzeug_rule_re() {
// since flask uses werkzeug internally, we are using its routing rules from
// https://github.com/pallets/werkzeug/blob/4dc8d6ab840d4b78cbd5789cef91b01e3bde01d5/src/werkzeug/routing.py#L138-L151
result =
"(?<static>[^<]*)<(?:(?<converter>[a-zA-Z_][a-zA-Z0-9_]*)(?:\\((?<args>.*?)\\))?\\:)?(?<variable>[a-zA-Z_][a-zA-Z0-9_]*)>"
}
/** A route setup made by flask (sharing handling of URL patterns). */
abstract private class FlaskRouteSetup extends HTTP::Server::RouteSetup::Range {
override Parameter getARoutedParameter() {
// If we don't know the URL pattern, we simply mark all parameters as a routed
// parameter. This should give us more RemoteFlowSources but could also lead to
// more FPs. If this turns out to be the wrong tradeoff, we can always change our mind.
not exists(this.getUrlPattern()) and
result = this.getARouteHandler().getArgByName(_)
or
exists(string name |
result = this.getARouteHandler().getArgByName(name) and
exists(string match |
match = this.getUrlPattern().regexpFind(werkzeug_rule_re(), _, _) and
name = match.regexpCapture(werkzeug_rule_re(), 4)
)
)
}
/** Gets the argument used to pass in the URL pattern. */
abstract DataFlow::Node getUrlPatternArg();
override string getUrlPattern() {
exists(StrConst str |
DataFlow::localFlow(DataFlow::exprNode(str), this.getUrlPatternArg()) and
result = str.getText()
)
}
}
/**
* A call to `flask.Flask.route`.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Flask.route
*/
private class FlaskAppRouteCall extends FlaskRouteSetup, DataFlow::CfgNode {
override CallNode node;
FlaskAppRouteCall() { node.getFunction() = app_attr("route").asCfgNode() }
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("rule")]
}
override Function getARouteHandler() { result.getADecorator().getAFlowNode() = node }
}
/**
* A call to `flask.Flask.add_url_rule`.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Flask.add_url_rule
*/
private class FlaskAppAddUrlRule extends FlaskRouteSetup, DataFlow::CfgNode {
override CallNode node;
FlaskAppAddUrlRule() { node.getFunction() = app_attr("add_url_rule").asCfgNode() }
override DataFlow::Node getUrlPatternArg() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("rule")]
}
override Function getARouteHandler() {
exists(DataFlow::Node view_func_arg, DataFlow::Node func_src |
view_func_arg.asCfgNode() in [node.getArg(2), node.getArgByName("view_func")] and
DataFlow::localFlow(func_src, view_func_arg) and
func_src.asExpr().(CallableExpr) = result.getDefinition()
)
}
}
// ---------------------------------------------------------------------------
// flask.Request taint modeling
// ---------------------------------------------------------------------------
// TODO: Do we even need this class? :|
/**
* A source of remote flow from a flask request.

View File

@@ -0,0 +1,149 @@
/**
* Provides classes modeling security-relevant aspects of the `invoke` PyPI package.
* See https://www.pyinvoke.org/.
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.semmle.python.Concepts
/**
* Provides models for the `invoke` PyPI package.
* See https://www.pyinvoke.org/.
*/
private module Invoke {
// ---------------------------------------------------------------------------
// invoke
// ---------------------------------------------------------------------------
/** Gets a reference to the `invoke` module. */
private DataFlow::Node invoke(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("invoke")
or
exists(DataFlow::TypeTracker t2 | result = invoke(t2).track(t2, t))
}
/** Gets a reference to the `invoke` module. */
DataFlow::Node invoke() { result = invoke(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of the `invoke` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node invoke_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["run", "sudo", "context", "Context", "task"] and
(
t.start() and
result = DataFlow::importNode("invoke." + attr_name)
or
t.startInAttr(attr_name) and
result = DataFlow::importNode("invoke")
)
or
// Due to bad performance when using normal setup with `invoke_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
invoke_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate invoke_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(invoke_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `invoke` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node invoke_attr(string attr_name) {
result = invoke_attr(DataFlow::TypeTracker::end(), attr_name)
}
/** Provides models for the `invoke` module. */
module invoke {
/** Gets a reference to the `invoke.context` module. */
DataFlow::Node context() { result = invoke_attr("context") }
/** Provides models for the `invoke.context` module */
module context {
/** Provides models for the `invoke.context.Context` class */
module Context {
/** Gets a reference to the `invoke.context.Context` class. */
private DataFlow::Node classRef(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("invoke.context.Context")
or
t.startInAttr("Context") and
result = invoke::context()
or
// handle invoke.Context alias
t.start() and
result = invoke_attr("Context")
or
exists(DataFlow::TypeTracker t2 | result = classRef(t2).track(t2, t))
}
/** Gets a reference to the `invoke.context.Context` class. */
DataFlow::Node classRef() { result = classRef(DataFlow::TypeTracker::end()) }
/** Gets a reference to an instance of `invoke.context.Context`. */
private DataFlow::Node instance(DataFlow::TypeTracker t) {
t.start() and
result.asCfgNode().(CallNode).getFunction() =
invoke::context::Context::classRef().asCfgNode()
or
t.start() and
exists(Function func |
func.getADecorator() = invoke_attr("task").asExpr() and
result.(DataFlow::ParameterNode).getParameter() = func.getArg(0)
)
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
}
/** Gets a reference to an instance of `invoke.context.Context`. */
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
/** Gets a reference to the `run` or `sudo` methods on a `invoke.context.Context` instance. */
private DataFlow::Node instanceRunMethods(DataFlow::TypeTracker t) {
t.startInAttr(["run", "sudo"]) and
result = invoke::context::Context::instance()
or
exists(DataFlow::TypeTracker t2 | result = instanceRunMethods(t2).track(t2, t))
}
/** Gets a reference to the `run` or `sudo` methods on a `invoke.context.Context` instance. */
DataFlow::Node instanceRunMethods() {
result = instanceRunMethods(DataFlow::TypeTracker::end())
}
}
}
}
/**
* A call to either
* - `invoke.run` or `invoke.sudo` functions (http://docs.pyinvoke.org/en/stable/api/__init__.html)
* - `run` or `sudo` methods on a `invoke.context.Context` instance (http://docs.pyinvoke.org/en/stable/api/context.html#invoke.context.Context.run)
*/
private class InvokeRunCommandCall extends SystemCommandExecution::Range, DataFlow::CfgNode {
override CallNode node;
InvokeRunCommandCall() {
exists(DataFlow::Node callFunction | node.getFunction() = callFunction.asCfgNode() |
callFunction = invoke_attr(["run", "sudo"])
or
callFunction = invoke::context::Context::instanceRunMethods()
)
}
override DataFlow::Node getCommand() {
result.asCfgNode() in [node.getArg(0), node.getArgByName("command")]
}
}
}

View File

@@ -17,7 +17,7 @@ private module Stdlib {
/** Gets a reference to the `os` module. */
private DataFlow::Node os(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importModule("os")
result = DataFlow::importNode("os")
or
exists(DataFlow::TypeTracker t2 | result = os(t2).track(t2, t))
}
@@ -42,10 +42,10 @@ private module Stdlib {
"path"] and
(
t.start() and
result = DataFlow::importMember("os", attr_name)
result = DataFlow::importNode("os." + attr_name)
or
t.startInAttr(attr_name) and
result = DataFlow::importModule("os")
result = DataFlow::importNode("os")
)
or
// Due to bad performance when using normal setup with `os_attr(t2, attr_name).track(t2, t)`
@@ -85,7 +85,7 @@ private module Stdlib {
/** Gets a reference to the `os.path.join` function. */
private DataFlow::Node join(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importMember("os.path", "join")
result = DataFlow::importNode("os.path.join")
or
t.startInAttr("join") and
result = os::path()
@@ -190,7 +190,7 @@ private module Stdlib {
/** Gets a reference to the `subprocess` module. */
private DataFlow::Node subprocess(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importModule("subprocess")
result = DataFlow::importNode("subprocess")
or
exists(DataFlow::TypeTracker t2 | result = subprocess(t2).track(t2, t))
}
@@ -208,10 +208,10 @@ private module Stdlib {
attr_name in ["Popen", "call", "check_call", "check_output", "run"] and
(
t.start() and
result = DataFlow::importMember("subprocess", attr_name)
result = DataFlow::importNode("subprocess." + attr_name)
or
t.startInAttr(attr_name) and
result = DataFlow::importModule("subprocess")
result = subprocess()
)
or
// Due to bad performance when using normal setup with `subprocess_attr(t2, attr_name).track(t2, t)`
@@ -327,4 +327,115 @@ private module Stdlib {
)
}
}
// ---------------------------------------------------------------------------
// builtins
// ---------------------------------------------------------------------------
/** Gets a reference to the `builtins` module (called `__builtin__` in Python 2). */
private DataFlow::Node builtins(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode(["builtins", "__builtin__"])
or
exists(DataFlow::TypeTracker t2 | result = builtins(t2).track(t2, t))
}
/** Gets a reference to the `builtins` module. */
DataFlow::Node builtins() { result = builtins(DataFlow::TypeTracker::end()) }
/**
* Gets a reference to the attribute `attr_name` of the `builtins` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node builtins_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in ["exec", "eval", "compile"] and
(
t.start() and
result = DataFlow::importNode(["builtins", "__builtin__"] + "." + attr_name)
or
t.startInAttr(attr_name) and
result = DataFlow::importNode(["builtins", "__builtin__"])
or
// special handling of builtins, that are in scope without any imports
// TODO: Take care of overrides, either `def eval: ...`, `eval = ...`, or `builtins.eval = ...`
t.start() and
exists(NameNode ref | result.asCfgNode() = ref |
ref.isGlobal() and
ref.getId() = attr_name and
ref.isLoad()
)
)
or
// Due to bad performance when using normal setup with `builtins_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
builtins_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate builtins_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(builtins_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `builtins` module.
* WARNING: Only holds for a few predefined attributes.
*/
private DataFlow::Node builtins_attr(string attr_name) {
result = builtins_attr(DataFlow::TypeTracker::end(), attr_name)
}
/**
* A call to the builtin `exec` function.
* See https://docs.python.org/3/library/functions.html#exec
*/
private class BuiltinsExecCall extends CodeExecution::Range, DataFlow::CfgNode {
override CallNode node;
BuiltinsExecCall() { node.getFunction() = builtins_attr("exec").asCfgNode() }
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
}
/**
* A call to the builtin `eval` function.
* See https://docs.python.org/3/library/functions.html#eval
*/
private class BuiltinsEvalCall extends CodeExecution::Range, DataFlow::CfgNode {
override CallNode node;
BuiltinsEvalCall() { node.getFunction() = builtins_attr("eval").asCfgNode() }
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
}
/** An additional taint step for calls to the builtin function `compile` */
private class BuiltinsCompileCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(CallNode call |
nodeTo.asCfgNode() = call and
call.getFunction() = builtins_attr("compile").asCfgNode() and
nodeFrom.asCfgNode() in [call.getArg(0), call.getArgByName("source")]
)
}
}
}
/**
* An exec statement (only Python 2).
* Se ehttps://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
*/
private class ExecStatement extends CodeExecution::Range {
ExecStatement() {
// since there are no DataFlow::Nodes for a Statement, we can't do anything like
// `this = any(Exec exec)`
this.asExpr() = any(Exec exec).getBody()
}
override DataFlow::Node getCode() { result = this }
}