mirror of
https://github.com/github/codeql.git
synced 2025-12-20 18:56:32 +01:00
Merge pull request #4323 from RasmusWL/python-new-command-injection-query
Approved by tausbn
This commit is contained in:
37
python/ql/src/experimental/Security-new-dataflow/CWE-078/CommandInjection.ql
Executable file
37
python/ql/src/experimental/Security-new-dataflow/CWE-078/CommandInjection.ql
Executable file
@@ -0,0 +1,37 @@
|
||||
/**
|
||||
* @name Uncontrolled command line
|
||||
* @description Using externally controlled strings in a command line may allow a malicious
|
||||
* user to change the meaning of the command.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @id py/command-line-injection
|
||||
* @tags correctness
|
||||
* security
|
||||
* external/owasp/owasp-a1
|
||||
* external/cwe/cwe-078
|
||||
* external/cwe/cwe-088
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.dataflow.DataFlow
|
||||
import experimental.dataflow.TaintTracking
|
||||
import experimental.semmle.python.Concepts
|
||||
import experimental.dataflow.RemoteFlowSources
|
||||
import DataFlow::PathGraph
|
||||
|
||||
class CommandInjectionConfiguration extends TaintTracking::Configuration {
|
||||
CommandInjectionConfiguration() { this = "CommandInjectionConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
sink = any(SystemCommandExecution e).getCommand()
|
||||
}
|
||||
}
|
||||
|
||||
from CommandInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),
|
||||
"a user-provided value"
|
||||
@@ -154,7 +154,7 @@ private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeN
|
||||
* t.start() and
|
||||
* result = < source of myType >
|
||||
* or
|
||||
* exists (TypeTracker t2 |
|
||||
* exists (DataFlow::TypeTracker t2 |
|
||||
* result = myType(t2).track(t2, t)
|
||||
* )
|
||||
* }
|
||||
|
||||
@@ -411,7 +411,11 @@ predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }
|
||||
/**
|
||||
* Gets the type of `node`.
|
||||
*/
|
||||
DataFlowType getNodeType(Node node) { result = TAnyFlow() }
|
||||
DataFlowType getNodeType(Node node) {
|
||||
result = TAnyFlow() and
|
||||
// Suppress unused variable warning
|
||||
node = node
|
||||
}
|
||||
|
||||
/** Gets a string representation of a type returned by `getErasedRepr`. */
|
||||
string ppReprType(DataFlowType t) { none() }
|
||||
@@ -458,7 +462,9 @@ predicate listStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo)
|
||||
// nodeFrom is `42`, cfg node
|
||||
// nodeTo is the list, `[..., 42, ...]`, cfg node
|
||||
// c denotes element of list
|
||||
nodeTo.getNode().(ListNode).getAnElement() = nodeFrom.getNode()
|
||||
nodeTo.getNode().(ListNode).getAnElement() = nodeFrom.getNode() and
|
||||
// Suppress unused variable warning
|
||||
c = c
|
||||
}
|
||||
|
||||
/** Data flows from an element of a set to the set. */
|
||||
@@ -468,7 +474,9 @@ predicate setStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo) {
|
||||
// nodeFrom is `42`, cfg node
|
||||
// nodeTo is the set, `{..., 42, ...}`, cfg node
|
||||
// c denotes element of list
|
||||
nodeTo.getNode().(SetNode).getAnElement() = nodeFrom.getNode()
|
||||
nodeTo.getNode().(SetNode).getAnElement() = nodeFrom.getNode() and
|
||||
// Suppress unused variable warning
|
||||
c = c
|
||||
}
|
||||
|
||||
/** Data flows from an element of a tuple to the tuple at a specific index. */
|
||||
|
||||
@@ -16,3 +16,52 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
|
||||
* (intra-procedural) steps.
|
||||
*/
|
||||
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
|
||||
|
||||
/**
|
||||
* Gets an EssaNode that holds the module imported by `name`.
|
||||
* Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
|
||||
* reference to the module `pkg`.
|
||||
*
|
||||
* This predicate handles (with optional `... as <new-name>`):
|
||||
* 1. `import <name>`
|
||||
* 2. `from <package> import <module>` when `<name> = <package> + "." + <module>`
|
||||
* 3. `from <module> import <member>` when `<name> = <module> + "." + <member>`
|
||||
*
|
||||
* Note:
|
||||
* While it is technically possible that `import mypkg.foo` and `from mypkg import foo` can give different values,
|
||||
* it's highly unlikely that this will be a problem in production level code.
|
||||
* Example: If `mypkg/__init__.py` contains `foo = 42`, then `from mypkg import foo` will not import the module
|
||||
* `mypkg/foo.py` but the variable `foo` containing `42` -- however, `import mypkg.foo` will always cause `mypkg.foo`
|
||||
* to refer to the module.
|
||||
*
|
||||
* Also see `DataFlow::importMember`
|
||||
*/
|
||||
EssaNode importModule(string name) {
|
||||
exists(Variable var, Import imp, Alias alias |
|
||||
alias = imp.getAName() and
|
||||
alias.getAsname() = var.getAStore() and
|
||||
(
|
||||
name = alias.getValue().(ImportMember).getImportedModuleName()
|
||||
or
|
||||
name = alias.getValue().(ImportExpr).getImportedModuleName()
|
||||
) and
|
||||
result.getVar().(AssignmentDefinition).getSourceVariable() = var
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a EssaNode that holds the value imported by using fully qualified name in
|
||||
*`from <moduleName> import <memberName>`.
|
||||
*
|
||||
* Also see `DataFlow::importModule`.
|
||||
*/
|
||||
EssaNode importMember(string moduleName, string memberName) {
|
||||
exists(Variable var, Import imp, Alias alias, ImportMember member |
|
||||
alias = imp.getAName() and
|
||||
member = alias.getValue() and
|
||||
moduleName = member.getModule().(ImportExpr).getImportedModuleName() and
|
||||
memberName = member.getName() and
|
||||
alias.getAsname() = var.getAStore() and
|
||||
result.getVar().(AssignmentDefinition).getSourceVariable() = var
|
||||
)
|
||||
}
|
||||
|
||||
@@ -5,6 +5,141 @@
|
||||
private import python
|
||||
private import experimental.dataflow.DataFlow
|
||||
private import experimental.dataflow.RemoteFlowSources
|
||||
private import experimental.dataflow.TaintTracking
|
||||
private import experimental.semmle.python.Concepts
|
||||
private import experimental.semmle.python.frameworks.Werkzeug
|
||||
|
||||
private module Flask { }
|
||||
// for old improved impl see
|
||||
// https://github.com/github/codeql/blob/9f95212e103c68d0c1dfa4b6f30fb5d53954ccef/python/ql/src/semmle/python/web/flask/Request.qll
|
||||
private module Flask {
|
||||
/** Gets a reference to the `flask` module. */
|
||||
DataFlow::Node flask(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importModule("flask")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = flask(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `flask` module. */
|
||||
DataFlow::Node flask() { result = flask(DataFlow::TypeTracker::end()) }
|
||||
|
||||
module flask {
|
||||
/** Gets a reference to the `flask.request` object. */
|
||||
DataFlow::Node request(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importMember("flask", "request")
|
||||
or
|
||||
t.startInAttr("request") and
|
||||
result = flask()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = flask::request(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `flask.request` object. */
|
||||
DataFlow::Node request() { result = flask::request(DataFlow::TypeTracker::end()) }
|
||||
}
|
||||
|
||||
// TODO: Do we even need this class? :|
|
||||
/**
|
||||
* A source of remote flow from a flask request.
|
||||
*
|
||||
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
|
||||
*/
|
||||
private class RequestSource extends RemoteFlowSource::Range {
|
||||
RequestSource() { this = flask::request() }
|
||||
|
||||
override string getSourceType() { result = "flask.request" }
|
||||
}
|
||||
|
||||
private module FlaskRequestTracking {
|
||||
private DataFlow::Node tainted_methods(string attr_name, DataFlow::TypeTracker t) {
|
||||
attr_name in ["get_data", "get_json"] and
|
||||
t.startInAttr(attr_name) and
|
||||
result = flask::request()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = tainted_methods(attr_name, t2).track(t2, t))
|
||||
}
|
||||
|
||||
DataFlow::Node tainted_methods(string attr_name) {
|
||||
result = tainted_methods(attr_name, DataFlow::TypeTracker::end())
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A source of remote flow from attributes from a flask request.
|
||||
*
|
||||
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
|
||||
*/
|
||||
private class RequestInputAccess extends RemoteFlowSource::Range {
|
||||
string attr_name;
|
||||
|
||||
RequestInputAccess() {
|
||||
// attributes
|
||||
exists(AttrNode attr |
|
||||
this.asCfgNode() = attr and attr.getObject(attr_name) = flask::request().asCfgNode()
|
||||
|
|
||||
attr_name in ["path",
|
||||
// str
|
||||
"full_path", "base_url", "url", "access_control_request_method", "content_encoding",
|
||||
"content_md5", "content_type", "data", "method", "mimetype", "origin", "query_string",
|
||||
"referrer", "remote_addr", "remote_user", "user_agent",
|
||||
// dict
|
||||
"environ", "cookies", "mimetype_params", "view_args",
|
||||
// json
|
||||
"json",
|
||||
// List[str]
|
||||
"access_route",
|
||||
// file-like
|
||||
"stream", "input_stream",
|
||||
// MultiDict[str, str]
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
|
||||
"args", "values", "form",
|
||||
// MultiDict[str, FileStorage]
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
|
||||
// TODO: FileStorage needs extra taint steps
|
||||
"files",
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.HeaderSet
|
||||
"access_control_request_headers", "pragma",
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Accept
|
||||
// TODO: Kinda badly modeled for now -- has type List[Tuple[value, quality]], and some extra methods
|
||||
"accept_charsets", "accept_encodings", "accept_languages", "accept_mimetypes",
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Authorization
|
||||
// TODO: dict subclass with extra attributes like `username` and `password`
|
||||
"authorization",
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.RequestCacheControl
|
||||
// TODO: has attributes like `no_cache`, and `to_header` method (actually, many of these models do)
|
||||
"cache_control",
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers
|
||||
// TODO: dict-like with wsgiref.headers.Header compatibility methods
|
||||
"headers"]
|
||||
)
|
||||
or
|
||||
// methods (needs special handling to track bound-methods -- see `FlaskRequestMethodCallsAdditionalTaintStep` below)
|
||||
this = FlaskRequestTracking::tainted_methods(attr_name)
|
||||
}
|
||||
|
||||
override string getSourceType() { result = "flask.request input" }
|
||||
}
|
||||
|
||||
private class FlaskRequestMethodCallsAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
// NOTE: `request -> request.tainted_method` part is handled as part of RequestInputAccess
|
||||
// tainted_method -> tainted_method()
|
||||
nodeFrom = FlaskRequestTracking::tainted_methods(_) and
|
||||
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
|
||||
}
|
||||
}
|
||||
|
||||
private class RequestInputMultiDict extends RequestInputAccess,
|
||||
Werkzeug::Datastructures::MultiDict {
|
||||
RequestInputMultiDict() { attr_name in ["args", "values", "form", "files"] }
|
||||
}
|
||||
|
||||
private class RequestInputFiles extends RequestInputMultiDict {
|
||||
RequestInputFiles() { attr_name = "files" }
|
||||
}
|
||||
// TODO: Somehow specify that elements of `RequestInputFiles` are
|
||||
// Werkzeug::Datastructures::FileStorage and should have those additional taint steps
|
||||
// AND that the 0-indexed argument to its' save method is a sink for path-injection.
|
||||
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.save
|
||||
}
|
||||
|
||||
@@ -7,3 +7,70 @@ private import python
|
||||
private import experimental.dataflow.DataFlow
|
||||
private import experimental.dataflow.RemoteFlowSources
|
||||
private import experimental.semmle.python.Concepts
|
||||
|
||||
private module Stdlib {
|
||||
/** Gets a reference to the `os` module. */
|
||||
DataFlow::Node os(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importModule("os")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = os(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `os` module. */
|
||||
DataFlow::Node os() { result = os(DataFlow::TypeTracker::end()) }
|
||||
|
||||
module os {
|
||||
/** Gets a reference to the `os.system` function. */
|
||||
DataFlow::Node system(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importMember("os", "system")
|
||||
or
|
||||
t.startInAttr("system") and
|
||||
result = os()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = os::system(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `os.system` function. */
|
||||
DataFlow::Node system() { result = os::system(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/** Gets a reference to the `os.popen` function. */
|
||||
DataFlow::Node popen(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importMember("os", "popen")
|
||||
or
|
||||
t.startInAttr("popen") and
|
||||
result = os()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = os::popen(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `os.popen` function. */
|
||||
DataFlow::Node popen() { result = os::popen(DataFlow::TypeTracker::end()) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `os.system`.
|
||||
* See https://docs.python.org/3/library/os.html#os.system
|
||||
*/
|
||||
private class OsSystemCall extends SystemCommandExecution::Range {
|
||||
OsSystemCall() { this.asCfgNode().(CallNode).getFunction() = os::system().asCfgNode() }
|
||||
|
||||
override DataFlow::Node getCommand() {
|
||||
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `os.popen`
|
||||
* See https://docs.python.org/3/library/os.html#os.popen
|
||||
*/
|
||||
private class OsPopenCall extends SystemCommandExecution::Range {
|
||||
OsPopenCall() { this.asCfgNode().(CallNode).getFunction() = os::popen().asCfgNode() }
|
||||
|
||||
override DataFlow::Node getCommand() {
|
||||
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,75 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `flask` package.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import experimental.dataflow.DataFlow
|
||||
private import experimental.dataflow.TaintTracking
|
||||
|
||||
// for old impl see
|
||||
// https://github.com/github/codeql/blob/9f95212e103c68d0c1dfa4b6f30fb5d53954ccef/python/ql/src/semmle/python/libraries/Werkzeug.qll
|
||||
module Werkzeug {
|
||||
module Datastructures {
|
||||
// ---------------------------------------------------------------------- //
|
||||
// MultiDict //
|
||||
// ---------------------------------------------------------------------- //
|
||||
/**
|
||||
* A Node representing an instance of a werkzeug.datastructures.MultiDict
|
||||
*
|
||||
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
|
||||
*/
|
||||
abstract class MultiDict extends DataFlow::Node { }
|
||||
|
||||
private module MultiDictTracking {
|
||||
private DataFlow::Node getlist(DataFlow::TypeTracker t) {
|
||||
t.startInAttr("getlist") and
|
||||
result instanceof MultiDict
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = getlist(t2).track(t2, t))
|
||||
}
|
||||
|
||||
DataFlow::Node getlist() { result = getlist(DataFlow::TypeTracker::end()) }
|
||||
}
|
||||
|
||||
private class MultiDictAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
// obj -> obj.getlist
|
||||
nodeTo.asCfgNode().(AttrNode).getObject("getlist") = nodeFrom.asCfgNode() and
|
||||
nodeTo = MultiDictTracking::getlist()
|
||||
or
|
||||
// getlist -> getlist()
|
||||
nodeFrom = MultiDictTracking::getlist() and
|
||||
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------- //
|
||||
// FileStorage //
|
||||
// ---------------------------------------------------------------------- //
|
||||
/**
|
||||
* A Node representing an instance of a werkzeug.datastructures.FileStorage
|
||||
*
|
||||
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
|
||||
*/
|
||||
abstract class FileStorage extends DataFlow::Node { }
|
||||
|
||||
private class FileStorageAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
|
||||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
// TODO: should be `nodeFrom = tracked(any(FileStorage fs))`
|
||||
nodeFrom instanceof FileStorage and
|
||||
exists(string name |
|
||||
name in ["filename",
|
||||
// str
|
||||
"name", "content_type", "mimetype",
|
||||
// file-like
|
||||
"stream",
|
||||
// TODO: werkzeug.datastructures.Headers
|
||||
"headers",
|
||||
// dict[str, str]
|
||||
"mimetype_params"] and
|
||||
nodeTo.asCfgNode().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user