Merge pull request #4323 from RasmusWL/python-new-command-injection-query

Approved by tausbn
This commit is contained in:
CodeQL CI
2020-09-25 02:39:46 -07:00
committed by GitHub
34 changed files with 1201 additions and 5 deletions

View File

@@ -0,0 +1,37 @@
/**
* @name Uncontrolled command line
* @description Using externally controlled strings in a command line may allow a malicious
* user to change the meaning of the command.
* @kind path-problem
* @problem.severity error
* @sub-severity high
* @precision high
* @id py/command-line-injection
* @tags correctness
* security
* external/owasp/owasp-a1
* external/cwe/cwe-078
* external/cwe/cwe-088
*/
import python
import experimental.dataflow.DataFlow
import experimental.dataflow.TaintTracking
import experimental.semmle.python.Concepts
import experimental.dataflow.RemoteFlowSources
import DataFlow::PathGraph
class CommandInjectionConfiguration extends TaintTracking::Configuration {
CommandInjectionConfiguration() { this = "CommandInjectionConfiguration" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) {
sink = any(SystemCommandExecution e).getCommand()
}
}
from CommandInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This command depends on $@.", source.getNode(),
"a user-provided value"

View File

@@ -154,7 +154,7 @@ private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeN
* t.start() and
* result = < source of myType >
* or
* exists (TypeTracker t2 |
* exists (DataFlow::TypeTracker t2 |
* result = myType(t2).track(t2, t)
* )
* }

View File

@@ -411,7 +411,11 @@ predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }
/**
* Gets the type of `node`.
*/
DataFlowType getNodeType(Node node) { result = TAnyFlow() }
DataFlowType getNodeType(Node node) {
result = TAnyFlow() and
// Suppress unused variable warning
node = node
}
/** Gets a string representation of a type returned by `getErasedRepr`. */
string ppReprType(DataFlowType t) { none() }
@@ -458,7 +462,9 @@ predicate listStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo)
// nodeFrom is `42`, cfg node
// nodeTo is the list, `[..., 42, ...]`, cfg node
// c denotes element of list
nodeTo.getNode().(ListNode).getAnElement() = nodeFrom.getNode()
nodeTo.getNode().(ListNode).getAnElement() = nodeFrom.getNode() and
// Suppress unused variable warning
c = c
}
/** Data flows from an element of a set to the set. */
@@ -468,7 +474,9 @@ predicate setStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo) {
// nodeFrom is `42`, cfg node
// nodeTo is the set, `{..., 42, ...}`, cfg node
// c denotes element of list
nodeTo.getNode().(SetNode).getAnElement() = nodeFrom.getNode()
nodeTo.getNode().(SetNode).getAnElement() = nodeFrom.getNode() and
// Suppress unused variable warning
c = c
}
/** Data flows from an element of a tuple to the tuple at a specific index. */

View File

@@ -16,3 +16,52 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
* (intra-procedural) steps.
*/
predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
/**
* Gets an EssaNode that holds the module imported by `name`.
* Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
* reference to the module `pkg`.
*
* This predicate handles (with optional `... as <new-name>`):
* 1. `import <name>`
* 2. `from <package> import <module>` when `<name> = <package> + "." + <module>`
* 3. `from <module> import <member>` when `<name> = <module> + "." + <member>`
*
* Note:
* While it is technically possible that `import mypkg.foo` and `from mypkg import foo` can give different values,
* it's highly unlikely that this will be a problem in production level code.
* Example: If `mypkg/__init__.py` contains `foo = 42`, then `from mypkg import foo` will not import the module
* `mypkg/foo.py` but the variable `foo` containing `42` -- however, `import mypkg.foo` will always cause `mypkg.foo`
* to refer to the module.
*
* Also see `DataFlow::importMember`
*/
EssaNode importModule(string name) {
exists(Variable var, Import imp, Alias alias |
alias = imp.getAName() and
alias.getAsname() = var.getAStore() and
(
name = alias.getValue().(ImportMember).getImportedModuleName()
or
name = alias.getValue().(ImportExpr).getImportedModuleName()
) and
result.getVar().(AssignmentDefinition).getSourceVariable() = var
)
}
/**
* Gets a EssaNode that holds the value imported by using fully qualified name in
*`from <moduleName> import <memberName>`.
*
* Also see `DataFlow::importModule`.
*/
EssaNode importMember(string moduleName, string memberName) {
exists(Variable var, Import imp, Alias alias, ImportMember member |
alias = imp.getAName() and
member = alias.getValue() and
moduleName = member.getModule().(ImportExpr).getImportedModuleName() and
memberName = member.getName() and
alias.getAsname() = var.getAStore() and
result.getVar().(AssignmentDefinition).getSourceVariable() = var
)
}

View File

@@ -5,6 +5,141 @@
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.dataflow.TaintTracking
private import experimental.semmle.python.Concepts
private import experimental.semmle.python.frameworks.Werkzeug
private module Flask { }
// for old improved impl see
// https://github.com/github/codeql/blob/9f95212e103c68d0c1dfa4b6f30fb5d53954ccef/python/ql/src/semmle/python/web/flask/Request.qll
private module Flask {
/** Gets a reference to the `flask` module. */
DataFlow::Node flask(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importModule("flask")
or
exists(DataFlow::TypeTracker t2 | result = flask(t2).track(t2, t))
}
/** Gets a reference to the `flask` module. */
DataFlow::Node flask() { result = flask(DataFlow::TypeTracker::end()) }
module flask {
/** Gets a reference to the `flask.request` object. */
DataFlow::Node request(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importMember("flask", "request")
or
t.startInAttr("request") and
result = flask()
or
exists(DataFlow::TypeTracker t2 | result = flask::request(t2).track(t2, t))
}
/** Gets a reference to the `flask.request` object. */
DataFlow::Node request() { result = flask::request(DataFlow::TypeTracker::end()) }
}
// TODO: Do we even need this class? :|
/**
* A source of remote flow from a flask request.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
*/
private class RequestSource extends RemoteFlowSource::Range {
RequestSource() { this = flask::request() }
override string getSourceType() { result = "flask.request" }
}
private module FlaskRequestTracking {
private DataFlow::Node tainted_methods(string attr_name, DataFlow::TypeTracker t) {
attr_name in ["get_data", "get_json"] and
t.startInAttr(attr_name) and
result = flask::request()
or
exists(DataFlow::TypeTracker t2 | result = tainted_methods(attr_name, t2).track(t2, t))
}
DataFlow::Node tainted_methods(string attr_name) {
result = tainted_methods(attr_name, DataFlow::TypeTracker::end())
}
}
/**
* A source of remote flow from attributes from a flask request.
*
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
*/
private class RequestInputAccess extends RemoteFlowSource::Range {
string attr_name;
RequestInputAccess() {
// attributes
exists(AttrNode attr |
this.asCfgNode() = attr and attr.getObject(attr_name) = flask::request().asCfgNode()
|
attr_name in ["path",
// str
"full_path", "base_url", "url", "access_control_request_method", "content_encoding",
"content_md5", "content_type", "data", "method", "mimetype", "origin", "query_string",
"referrer", "remote_addr", "remote_user", "user_agent",
// dict
"environ", "cookies", "mimetype_params", "view_args",
// json
"json",
// List[str]
"access_route",
// file-like
"stream", "input_stream",
// MultiDict[str, str]
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
"args", "values", "form",
// MultiDict[str, FileStorage]
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
// TODO: FileStorage needs extra taint steps
"files",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.HeaderSet
"access_control_request_headers", "pragma",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Accept
// TODO: Kinda badly modeled for now -- has type List[Tuple[value, quality]], and some extra methods
"accept_charsets", "accept_encodings", "accept_languages", "accept_mimetypes",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Authorization
// TODO: dict subclass with extra attributes like `username` and `password`
"authorization",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.RequestCacheControl
// TODO: has attributes like `no_cache`, and `to_header` method (actually, many of these models do)
"cache_control",
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers
// TODO: dict-like with wsgiref.headers.Header compatibility methods
"headers"]
)
or
// methods (needs special handling to track bound-methods -- see `FlaskRequestMethodCallsAdditionalTaintStep` below)
this = FlaskRequestTracking::tainted_methods(attr_name)
}
override string getSourceType() { result = "flask.request input" }
}
private class FlaskRequestMethodCallsAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// NOTE: `request -> request.tainted_method` part is handled as part of RequestInputAccess
// tainted_method -> tainted_method()
nodeFrom = FlaskRequestTracking::tainted_methods(_) and
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
}
}
private class RequestInputMultiDict extends RequestInputAccess,
Werkzeug::Datastructures::MultiDict {
RequestInputMultiDict() { attr_name in ["args", "values", "form", "files"] }
}
private class RequestInputFiles extends RequestInputMultiDict {
RequestInputFiles() { attr_name = "files" }
}
// TODO: Somehow specify that elements of `RequestInputFiles` are
// Werkzeug::Datastructures::FileStorage and should have those additional taint steps
// AND that the 0-indexed argument to its' save method is a sink for path-injection.
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage.save
}

View File

@@ -7,3 +7,70 @@ private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts
private module Stdlib {
/** Gets a reference to the `os` module. */
DataFlow::Node os(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importModule("os")
or
exists(DataFlow::TypeTracker t2 | result = os(t2).track(t2, t))
}
/** Gets a reference to the `os` module. */
DataFlow::Node os() { result = os(DataFlow::TypeTracker::end()) }
module os {
/** Gets a reference to the `os.system` function. */
DataFlow::Node system(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importMember("os", "system")
or
t.startInAttr("system") and
result = os()
or
exists(DataFlow::TypeTracker t2 | result = os::system(t2).track(t2, t))
}
/** Gets a reference to the `os.system` function. */
DataFlow::Node system() { result = os::system(DataFlow::TypeTracker::end()) }
/** Gets a reference to the `os.popen` function. */
DataFlow::Node popen(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importMember("os", "popen")
or
t.startInAttr("popen") and
result = os()
or
exists(DataFlow::TypeTracker t2 | result = os::popen(t2).track(t2, t))
}
/** Gets a reference to the `os.popen` function. */
DataFlow::Node popen() { result = os::popen(DataFlow::TypeTracker::end()) }
}
/**
* A call to `os.system`.
* See https://docs.python.org/3/library/os.html#os.system
*/
private class OsSystemCall extends SystemCommandExecution::Range {
OsSystemCall() { this.asCfgNode().(CallNode).getFunction() = os::system().asCfgNode() }
override DataFlow::Node getCommand() {
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
}
}
/**
* A call to `os.popen`
* See https://docs.python.org/3/library/os.html#os.popen
*/
private class OsPopenCall extends SystemCommandExecution::Range {
OsPopenCall() { this.asCfgNode().(CallNode).getFunction() = os::popen().asCfgNode() }
override DataFlow::Node getCommand() {
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
}
}
}

View File

@@ -0,0 +1,75 @@
/**
* Provides classes modeling security-relevant aspects of the `flask` package.
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.TaintTracking
// for old impl see
// https://github.com/github/codeql/blob/9f95212e103c68d0c1dfa4b6f30fb5d53954ccef/python/ql/src/semmle/python/libraries/Werkzeug.qll
module Werkzeug {
module Datastructures {
// ---------------------------------------------------------------------- //
// MultiDict //
// ---------------------------------------------------------------------- //
/**
* A Node representing an instance of a werkzeug.datastructures.MultiDict
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
*/
abstract class MultiDict extends DataFlow::Node { }
private module MultiDictTracking {
private DataFlow::Node getlist(DataFlow::TypeTracker t) {
t.startInAttr("getlist") and
result instanceof MultiDict
or
exists(DataFlow::TypeTracker t2 | result = getlist(t2).track(t2, t))
}
DataFlow::Node getlist() { result = getlist(DataFlow::TypeTracker::end()) }
}
private class MultiDictAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// obj -> obj.getlist
nodeTo.asCfgNode().(AttrNode).getObject("getlist") = nodeFrom.asCfgNode() and
nodeTo = MultiDictTracking::getlist()
or
// getlist -> getlist()
nodeFrom = MultiDictTracking::getlist() and
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
}
}
// ---------------------------------------------------------------------- //
// FileStorage //
// ---------------------------------------------------------------------- //
/**
* A Node representing an instance of a werkzeug.datastructures.FileStorage
*
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
*/
abstract class FileStorage extends DataFlow::Node { }
private class FileStorageAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// TODO: should be `nodeFrom = tracked(any(FileStorage fs))`
nodeFrom instanceof FileStorage and
exists(string name |
name in ["filename",
// str
"name", "content_type", "mimetype",
// file-like
"stream",
// TODO: werkzeug.datastructures.Headers
"headers",
// dict[str, str]
"mimetype_params"] and
nodeTo.asCfgNode().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
)
}
}
}
}

View File

@@ -0,0 +1,20 @@
importModule
| test1.py:1:8:1:12 | GSSA Variable mypkg | mypkg |
| test2.py:1:19:1:21 | GSSA Variable foo | mypkg.foo |
| test2.py:1:24:1:26 | GSSA Variable bar | mypkg.bar |
| test3.py:2:8:2:16 | GSSA Variable mypkg | mypkg |
| test4.py:1:21:1:24 | GSSA Variable _foo | mypkg.foo |
| test4.py:2:21:2:24 | GSSA Variable _bar | mypkg.bar |
| test5.py:1:8:1:12 | GSSA Variable mypkg | mypkg |
| test5.py:9:26:9:29 | GSSA Variable _bar | mypkg.bar |
| test6.py:1:8:1:12 | GSSA Variable mypkg | mypkg |
| test6.py:5:8:5:16 | GSSA Variable mypkg | mypkg |
| test7.py:1:19:1:21 | GSSA Variable foo | mypkg.foo |
| test7.py:5:8:5:16 | GSSA Variable mypkg | mypkg |
| test7.py:9:19:9:21 | GSSA Variable foo | mypkg.foo |
importMember
| test2.py:1:19:1:21 | GSSA Variable foo | mypkg | foo |
| test2.py:1:24:1:26 | GSSA Variable bar | mypkg | bar |
| test5.py:9:26:9:29 | GSSA Variable _bar | mypkg | bar |
| test7.py:1:19:1:21 | GSSA Variable foo | mypkg | foo |
| test7.py:9:19:9:21 | GSSA Variable foo | mypkg | foo |

View File

@@ -0,0 +1,8 @@
import python
import experimental.dataflow.DataFlow
query predicate importModule(DataFlow::Node res, string name) { res = DataFlow::importModule(name) }
query predicate importMember(DataFlow::Node res, string moduleName, string memberName) {
res = DataFlow::importMember(moduleName, memberName)
}

View File

@@ -0,0 +1 @@
Small tests that explore difference between `import mypkg.foo` and `from mypkg import foo`.

View File

@@ -0,0 +1 @@
foo = 42

View File

@@ -0,0 +1 @@
pass

View File

@@ -0,0 +1 @@
pass

View File

@@ -0,0 +1,6 @@
import mypkg
print(mypkg.foo) # 42
try:
print(mypkg.bar)
except AttributeError as e:
print(e) # module 'mypkg' has no attribute 'bar'

View File

@@ -0,0 +1,3 @@
from mypkg import foo, bar
print(foo)
print(bar)

View File

@@ -0,0 +1,4 @@
import mypkg.foo
import mypkg.bar
print(mypkg.foo) # <module 'mypkg.foo' ...
print(mypkg.bar) # <module 'mypkg.bar' ...

View File

@@ -0,0 +1,4 @@
import mypkg.foo as _foo
import mypkg.bar as _bar
print(_foo) # <module 'mypkg.bar' ...
print(_bar) # <module 'mypkg.bar' ...

View File

@@ -0,0 +1,10 @@
import mypkg
print(mypkg.foo) # 42
try:
print(mypkg.bar)
except AttributeError as e:
print(e) # module 'mypkg' has no attribute 'bar'
from mypkg import bar as _bar
print(mypkg.bar) # <module 'mypkg.bar' ...

View File

@@ -0,0 +1,6 @@
import mypkg
print(mypkg.foo) # 42
import mypkg.foo
print(mypkg.foo) # <module 'mypkg.foo' ...

View File

@@ -0,0 +1,10 @@
from mypkg import foo
print(foo) # 42
import mypkg.foo
print(foo) # 42
print(mypkg.foo) # <module 'mypkg.bar' ...
from mypkg import foo
print(foo) # <module 'mypkg.bar' ...

View File

@@ -0,0 +1,98 @@
| test.py:6 | fail | test_taint | name |
| test.py:6 | fail | test_taint | number |
| test.py:7 | ok | test_taint | foo |
| test.py:14 | ok | test_taint | request.environ |
| test.py:15 | ok | test_taint | request.environ.get(..) |
| test.py:17 | ok | test_taint | request.path |
| test.py:18 | ok | test_taint | request.full_path |
| test.py:19 | ok | test_taint | request.base_url |
| test.py:20 | ok | test_taint | request.url |
| test.py:23 | fail | test_taint | request.accept_charsets.best |
| test.py:24 | fail | test_taint | request.accept_charsets.best_match(..) |
| test.py:25 | ok | test_taint | request.accept_charsets[0] |
| test.py:26 | ok | test_taint | request.accept_encodings |
| test.py:27 | ok | test_taint | request.accept_languages |
| test.py:28 | ok | test_taint | request.accept_mimetypes |
| test.py:31 | ok | test_taint | request.access_control_request_headers |
| test.py:33 | ok | test_taint | request.access_control_request_method |
| test.py:35 | ok | test_taint | request.access_route |
| test.py:36 | ok | test_taint | request.access_route[0] |
| test.py:39 | ok | test_taint | request.args |
| test.py:40 | ok | test_taint | request.args['key'] |
| test.py:41 | ok | test_taint | request.args.getlist(..) |
| test.py:44 | ok | test_taint | request.authorization |
| test.py:45 | ok | test_taint | request.authorization['username'] |
| test.py:46 | fail | test_taint | request.authorization.username |
| test.py:49 | ok | test_taint | request.cache_control |
| test.py:51 | fail | test_taint | request.cache_control.max_age |
| test.py:52 | fail | test_taint | request.cache_control.max_stale |
| test.py:53 | fail | test_taint | request.cache_control.min_fresh |
| test.py:55 | ok | test_taint | request.content_encoding |
| test.py:57 | ok | test_taint | request.content_md5 |
| test.py:59 | ok | test_taint | request.content_type |
| test.py:62 | ok | test_taint | request.cookies |
| test.py:63 | ok | test_taint | request.cookies['key'] |
| test.py:65 | ok | test_taint | request.data |
| test.py:68 | ok | test_taint | request.files |
| test.py:69 | ok | test_taint | request.files['key'] |
| test.py:70 | fail | test_taint | request.files['key'].filename |
| test.py:71 | fail | test_taint | request.files['key'].stream |
| test.py:72 | ok | test_taint | request.files.getlist(..) |
| test.py:73 | fail | test_taint | request.files.getlist(..)[0].filename |
| test.py:74 | fail | test_taint | request.files.getlist(..)[0].stream |
| test.py:77 | ok | test_taint | request.form |
| test.py:78 | ok | test_taint | request.form['key'] |
| test.py:79 | ok | test_taint | request.form.getlist(..) |
| test.py:81 | ok | test_taint | request.get_data() |
| test.py:83 | ok | test_taint | request.get_json() |
| test.py:84 | ok | test_taint | request.get_json()['foo'] |
| test.py:85 | ok | test_taint | request.get_json()['foo']['bar'] |
| test.py:89 | ok | test_taint | request.headers |
| test.py:90 | ok | test_taint | request.headers['key'] |
| test.py:91 | fail | test_taint | request.headers.get_all(..) |
| test.py:92 | fail | test_taint | request.headers.getlist(..) |
| test.py:93 | ok | test_taint | list(..) |
| test.py:94 | fail | test_taint | request.headers.to_wsgi_list() |
| test.py:96 | ok | test_taint | request.json |
| test.py:97 | ok | test_taint | request.json['foo'] |
| test.py:98 | ok | test_taint | request.json['foo']['bar'] |
| test.py:100 | ok | test_taint | request.method |
| test.py:102 | ok | test_taint | request.mimetype |
| test.py:104 | ok | test_taint | request.mimetype_params |
| test.py:106 | ok | test_taint | request.origin |
| test.py:109 | ok | test_taint | request.pragma |
| test.py:111 | ok | test_taint | request.query_string |
| test.py:113 | ok | test_taint | request.referrer |
| test.py:115 | ok | test_taint | request.remote_addr |
| test.py:117 | ok | test_taint | request.remote_user |
| test.py:120 | ok | test_taint | request.stream |
| test.py:121 | ok | test_taint | request.input_stream |
| test.py:123 | ok | test_taint | request.url |
| test.py:125 | ok | test_taint | request.user_agent |
| test.py:128 | ok | test_taint | request.values |
| test.py:129 | ok | test_taint | request.values['key'] |
| test.py:130 | ok | test_taint | request.values.getlist(..) |
| test.py:133 | ok | test_taint | request.view_args |
| test.py:134 | ok | test_taint | request.view_args['key'] |
| test.py:138 | ok | test_taint | request.script_root |
| test.py:139 | ok | test_taint | request.url_root |
| test.py:143 | ok | test_taint | request.charset |
| test.py:144 | ok | test_taint | request.url_charset |
| test.py:148 | ok | test_taint | request.date |
| test.py:151 | ok | test_taint | request.endpoint |
| test.py:156 | ok | test_taint | request.host |
| test.py:157 | ok | test_taint | request.host_url |
| test.py:159 | ok | test_taint | request.scheme |
| test.py:161 | ok | test_taint | request.script_root |
| test.py:169 | ok | test_taint | request.args |
| test.py:170 | ok | test_taint | a |
| test.py:171 | ok | test_taint | b |
| test.py:173 | ok | test_taint | request.args['key'] |
| test.py:174 | ok | test_taint | a['key'] |
| test.py:175 | ok | test_taint | b['key'] |
| test.py:177 | ok | test_taint | request.args.getlist(..) |
| test.py:178 | ok | test_taint | a.getlist(..) |
| test.py:179 | ok | test_taint | b.getlist(..) |
| test.py:180 | ok | test_taint | gl(..) |
| test.py:187 | ok | test_taint | req.path |
| test.py:188 | ok | test_taint | gd() |

View File

@@ -0,0 +1,6 @@
import experimental.dataflow.tainttracking.TestTaintLib
import experimental.dataflow.RemoteFlowSources
class RemoteFlowTestTaintConfiguration extends TestTaintTrackingConfiguration {
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
}

View File

@@ -0,0 +1,273 @@
from flask import Flask, request
app = Flask(__name__)
@app.route('/test_taint/<name>/<int:number>')
def test_taint(name = "World!", number="0", foo="foo"):
ensure_tainted(name, number)
ensure_not_tainted(foo)
# Manually inspected all fields of the Request object
# https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
ensure_tainted(
request.environ,
request.environ.get('HTTP_AUTHORIZATION'),
request.path,
request.full_path,
request.base_url,
request.url,
# These request.accept_* properties are instances of subclasses of werkzeug.datastructures.Accept
request.accept_charsets.best,
request.accept_charsets.best_match(["utf-8", "utf-16"]),
request.accept_charsets[0],
request.accept_encodings,
request.accept_languages,
request.accept_mimetypes,
# werkzeug.datastructures.HeaderSet (subclass of collections_abc.MutableSet)
request.access_control_request_headers,
request.access_control_request_method,
request.access_route,
request.access_route[0],
# By default werkzeug.datastructures.ImmutableMultiDict -- although can be changed :\
request.args,
request.args['key'],
request.args.getlist('key'),
# werkzeug.datastructures.Authorization (a dict, with some properties)
request.authorization,
request.authorization['username'],
request.authorization.username,
# werkzeug.datastructures.RequestCacheControl
request.cache_control,
# These should be `int`s, but can be strings... see debug method below
request.cache_control.max_age,
request.cache_control.max_stale,
request.cache_control.min_fresh,
request.content_encoding,
request.content_md5,
request.content_type,
# werkzeug.datastructures.ImmutableTypeConversionDict (which is basically just a dict)
request.cookies,
request.cookies['key'],
request.data,
# a werkzeug.datastructures.MultiDict, mapping [str, werkzeug.datastructures.FileStorage]
request.files,
request.files['key'],
request.files['key'].filename,
request.files['key'].stream,
request.files.getlist('key'),
request.files.getlist('key')[0].filename,
request.files.getlist('key')[0].stream,
# By default werkzeug.datastructures.ImmutableMultiDict -- although can be changed :\
request.form,
request.form['key'],
request.form.getlist('key'),
request.get_data(),
request.get_json(),
request.get_json()['foo'],
request.get_json()['foo']['bar'],
# werkzeug.datastructures.EnvironHeaders,
# which has same interface as werkzeug.datastructures.Headers
request.headers,
request.headers['key'],
request.headers.get_all('key'),
request.headers.getlist('key'),
list(request.headers), # (k, v) list
request.headers.to_wsgi_list(), # (k, v) list
request.json,
request.json['foo'],
request.json['foo']['bar'],
request.method,
request.mimetype,
request.mimetype_params,
request.origin,
# werkzeug.datastructures.HeaderSet (subclass of collections_abc.MutableSet)
request.pragma,
request.query_string,
request.referrer,
request.remote_addr,
request.remote_user,
# file-like object
request.stream,
request.input_stream,
request.url,
request.user_agent,
# werkzeug.datastructures.CombinedMultiDict, which is basically just a werkzeug.datastructures.MultiDict
request.values,
request.values['key'],
request.values.getlist('key'),
# dict
request.view_args,
request.view_args['key'],
)
ensure_not_tainted(
request.script_root,
request.url_root,
# The expected charset for parsing request data / urls. Can not be changed by client.
# https://github.com/pallets/werkzeug/blob/4dc8d6ab840d4b78cbd5789cef91b01e3bde01d5/src/werkzeug/wrappers/base_request.py#L71-L72
request.charset,
request.url_charset,
# request.date is a parsed `datetime`
# https://github.com/pallets/werkzeug/blob/4dc8d6ab840d4b78cbd5789cef91b01e3bde01d5/src/werkzeug/wrappers/common_descriptors.py#L76-L83
request.date,
# Assuming that endpoints are not created by user-input seems fair
request.endpoint,
# In some rare circumstances a client could spoof the host, but by default they
# should not be able to. See
# https://werkzeug.palletsprojects.com/en/1.0.x/wrappers/#werkzeug.wrappers.BaseRequest.trusted_hosts
request.host,
request.host_url,
request.scheme,
request.script_root,
)
# Testing some more tricky data-flow still works
a = request.args
b = a
gl = b.getlist
ensure_tainted(
request.args,
a,
b,
request.args['key'],
a['key'],
b['key'],
request.args.getlist('key'),
a.getlist('key'),
b.getlist('key'),
gl('key'),
)
# aliasing tests
req = request
gd = request.get_data
ensure_tainted(
req.path,
gd(),
)
@app.route('/debug/<foo>/<bar>', methods=['GET'])
def debug(foo, bar):
print("request.view_args", request.view_args)
print("request.headers {!r}".format(request.headers))
print("request.headers['accept'] {!r}".format(request.headers['accept']))
print("request.pragma {!r}".format(request.pragma))
return 'ok'
@app.route('/stream', methods=['POST'])
def stream():
print(request.path)
s = request.stream
print(s)
# just works :)
print(s.read())
return 'ok'
@app.route('/input_stream', methods=['POST'])
def input_stream():
print(request.path)
s = request.input_stream
print(s)
# hangs until client stops connection, since max number of bytes to read must
# be handled manually
print(s.read())
return 'ok'
@app.route('/form', methods=['POST'])
def form():
print(request.path)
print("request.form", request.form)
return 'ok'
@app.route('/cache_control', methods=['POST'])
def cache_control():
print(request.path)
print("request.cache_control.max_age", request.cache_control.max_age, type(request.cache_control.max_age))
print("request.cache_control.max_stale", request.cache_control.max_stale, type(request.cache_control.max_stale))
print("request.cache_control.min_fresh", request.cache_control.min_fresh, type(request.cache_control.min_fresh))
return 'ok'
@app.route('/file_upload', methods=['POST'])
def file_upload():
print(request.path)
for k,v in request.files.items():
print(k, v, v.name, v.filename, v.stream)
return 'ok'
@app.route('/args', methods=['GET'])
def args():
print(request.path)
print("request.args", request.args)
return 'ok'
# curl --header "My-Header: some-value" http://localhost:5000/debug/fooval/barval
# curl --header "Pragma: foo, bar" --header "Pragma: stuff, foo" http://localhost:5000/debug/fooval/barval
# curl -X POST --data 'wat' http://localhost:5000/stream
# curl -X POST --data 'wat' http://localhost:5000/input_stream
# curl --form foo=foo --form foo=123 http://localhost:5000/form
# curl --header "Cache-Control: max-age=foo, max-stale=bar, min-fresh=baz" http://localhost:5000/cache_control
# curl --header "Cache-Control: max-age=1, max-stale=2, min-fresh=3" http://localhost:5000/cache_control
# curl -F myfile=@<some-file> localhost:5000/file_upload
# curl http://localhost:5000/args?foo=42&bar=bar
if __name__ == "__main__":
app.run(debug=True)

View File

@@ -0,0 +1,35 @@
edges
| test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() |
| test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val |
| test.py:40:11:40:25 | ControlFlowNode for Attribute() | test.py:41:10:41:12 | ControlFlowNode for val |
| test.py:45:11:45:18 | ControlFlowNode for source() | test.py:40:11:40:25 | ControlFlowNode for Attribute() |
| test.py:53:11:53:25 | ControlFlowNode for Attribute() | test.py:54:10:54:12 | ControlFlowNode for val |
| test.py:70:11:70:18 | ControlFlowNode for source() | test.py:53:11:53:25 | ControlFlowNode for Attribute() |
| test.py:78:11:78:14 | ControlFlowNode for bm() | test.py:79:10:79:12 | ControlFlowNode for val |
| test.py:83:11:83:18 | ControlFlowNode for source() | test.py:78:11:78:14 | ControlFlowNode for bm() |
| test.py:90:11:90:14 | ControlFlowNode for bm() | test.py:91:10:91:12 | ControlFlowNode for val |
| test.py:107:11:107:18 | ControlFlowNode for source() | test.py:90:11:90:14 | ControlFlowNode for bm() |
nodes
| test.py:21:11:21:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:29:11:29:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:33:10:33:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:40:11:40:25 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:41:10:41:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:45:11:45:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:53:11:53:25 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:54:10:54:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:70:11:70:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:78:11:78:14 | ControlFlowNode for bm() | semmle.label | ControlFlowNode for bm() |
| test.py:79:10:79:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:83:11:83:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:90:11:90:14 | ControlFlowNode for bm() | semmle.label | ControlFlowNode for bm() |
| test.py:91:10:91:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:107:11:107:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
#select
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() | test flow (naive): test_simple |
| test.py:33:10:33:12 | ControlFlowNode for val | test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val | test flow (naive): test_alias |
| test.py:41:10:41:12 | ControlFlowNode for val | test.py:45:11:45:18 | ControlFlowNode for source() | test.py:41:10:41:12 | ControlFlowNode for val | test flow (naive): test_accross_functions |
| test.py:54:10:54:12 | ControlFlowNode for val | test.py:70:11:70:18 | ControlFlowNode for source() | test.py:54:10:54:12 | ControlFlowNode for val | test flow (naive): test_deeply_nested |
| test.py:79:10:79:12 | ControlFlowNode for val | test.py:83:11:83:18 | ControlFlowNode for source() | test.py:79:10:79:12 | ControlFlowNode for val | test flow (naive): test_pass_bound_method |
| test.py:91:10:91:12 | ControlFlowNode for val | test.py:107:11:107:18 | ControlFlowNode for source() | test.py:91:10:91:12 | ControlFlowNode for val | test flow (naive): test_deeply_nested_bound_method |

View File

@@ -0,0 +1,24 @@
/**
* @kind path-problem
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.TaintTracking
import DataFlow::PathGraph
import SharedCode
class MyClassGetValueAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// obj -> obj.get_value()
exists(DataFlow::Node bound_method |
bound_method = myClassGetValue(nodeFrom) and
nodeTo.asCfgNode().(CallNode).getFunction() = bound_method.asCfgNode()
)
}
}
from SharedConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"test flow (naive): " + source.getNode().asCfgNode().getScope().getName()

View File

@@ -0,0 +1,67 @@
edges
| test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() |
| test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val |
| test.py:39:15:39:17 | SSA variable arg | test.py:41:10:41:12 | ControlFlowNode for val |
| test.py:45:11:45:18 | ControlFlowNode for source() | test.py:46:15:46:17 | ControlFlowNode for src |
| test.py:46:15:46:17 | ControlFlowNode for src | test.py:39:15:39:17 | SSA variable arg |
| test.py:52:24:52:26 | SSA variable arg | test.py:54:10:54:12 | ControlFlowNode for val |
| test.py:57:33:57:35 | SSA variable arg | test.py:58:24:58:26 | ControlFlowNode for arg |
| test.py:58:24:58:26 | ControlFlowNode for arg | test.py:52:24:52:26 | SSA variable arg |
| test.py:61:33:61:35 | SSA variable arg | test.py:62:33:62:35 | ControlFlowNode for arg |
| test.py:62:33:62:35 | ControlFlowNode for arg | test.py:57:33:57:35 | SSA variable arg |
| test.py:65:33:65:35 | SSA variable arg | test.py:66:33:66:35 | ControlFlowNode for arg |
| test.py:66:33:66:35 | ControlFlowNode for arg | test.py:61:33:61:35 | SSA variable arg |
| test.py:70:11:70:18 | ControlFlowNode for source() | test.py:71:33:71:35 | ControlFlowNode for src |
| test.py:71:33:71:35 | ControlFlowNode for src | test.py:65:33:65:35 | SSA variable arg |
| test.py:77:23:77:24 | SSA variable bm | test.py:79:10:79:12 | ControlFlowNode for val |
| test.py:83:11:83:18 | ControlFlowNode for source() | test.py:84:23:84:35 | ControlFlowNode for Attribute |
| test.py:84:23:84:35 | ControlFlowNode for Attribute | test.py:77:23:77:24 | SSA variable bm |
| test.py:89:37:89:38 | SSA variable bm | test.py:91:10:91:12 | ControlFlowNode for val |
| test.py:94:46:94:47 | SSA variable bm | test.py:95:37:95:38 | ControlFlowNode for bm |
| test.py:95:37:95:38 | ControlFlowNode for bm | test.py:89:37:89:38 | SSA variable bm |
| test.py:98:46:98:47 | SSA variable bm | test.py:99:46:99:47 | ControlFlowNode for bm |
| test.py:99:46:99:47 | ControlFlowNode for bm | test.py:94:46:94:47 | SSA variable bm |
| test.py:102:46:102:47 | SSA variable bm | test.py:103:46:103:47 | ControlFlowNode for bm |
| test.py:103:46:103:47 | ControlFlowNode for bm | test.py:98:46:98:47 | SSA variable bm |
| test.py:107:11:107:18 | ControlFlowNode for source() | test.py:108:46:108:58 | ControlFlowNode for Attribute |
| test.py:108:46:108:58 | ControlFlowNode for Attribute | test.py:102:46:102:47 | SSA variable bm |
nodes
| test.py:21:11:21:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:29:11:29:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:33:10:33:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:39:15:39:17 | SSA variable arg | semmle.label | SSA variable arg |
| test.py:41:10:41:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:45:11:45:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:46:15:46:17 | ControlFlowNode for src | semmle.label | ControlFlowNode for src |
| test.py:52:24:52:26 | SSA variable arg | semmle.label | SSA variable arg |
| test.py:54:10:54:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:57:33:57:35 | SSA variable arg | semmle.label | SSA variable arg |
| test.py:58:24:58:26 | ControlFlowNode for arg | semmle.label | ControlFlowNode for arg |
| test.py:61:33:61:35 | SSA variable arg | semmle.label | SSA variable arg |
| test.py:62:33:62:35 | ControlFlowNode for arg | semmle.label | ControlFlowNode for arg |
| test.py:65:33:65:35 | SSA variable arg | semmle.label | SSA variable arg |
| test.py:66:33:66:35 | ControlFlowNode for arg | semmle.label | ControlFlowNode for arg |
| test.py:70:11:70:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:71:33:71:35 | ControlFlowNode for src | semmle.label | ControlFlowNode for src |
| test.py:77:23:77:24 | SSA variable bm | semmle.label | SSA variable bm |
| test.py:79:10:79:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:83:11:83:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:84:23:84:35 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| test.py:89:37:89:38 | SSA variable bm | semmle.label | SSA variable bm |
| test.py:91:10:91:12 | ControlFlowNode for val | semmle.label | ControlFlowNode for val |
| test.py:94:46:94:47 | SSA variable bm | semmle.label | SSA variable bm |
| test.py:95:37:95:38 | ControlFlowNode for bm | semmle.label | ControlFlowNode for bm |
| test.py:98:46:98:47 | SSA variable bm | semmle.label | SSA variable bm |
| test.py:99:46:99:47 | ControlFlowNode for bm | semmle.label | ControlFlowNode for bm |
| test.py:102:46:102:47 | SSA variable bm | semmle.label | SSA variable bm |
| test.py:103:46:103:47 | ControlFlowNode for bm | semmle.label | ControlFlowNode for bm |
| test.py:107:11:107:18 | ControlFlowNode for source() | semmle.label | ControlFlowNode for source() |
| test.py:108:46:108:58 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
#select
| test.py:22:10:22:24 | ControlFlowNode for Attribute() | test.py:21:11:21:18 | ControlFlowNode for source() | test.py:22:10:22:24 | ControlFlowNode for Attribute() | test flow (proper): test_simple |
| test.py:33:10:33:12 | ControlFlowNode for val | test.py:29:11:29:18 | ControlFlowNode for source() | test.py:33:10:33:12 | ControlFlowNode for val | test flow (proper): test_alias |
| test.py:41:10:41:12 | ControlFlowNode for val | test.py:45:11:45:18 | ControlFlowNode for source() | test.py:41:10:41:12 | ControlFlowNode for val | test flow (proper): test_accross_functions |
| test.py:54:10:54:12 | ControlFlowNode for val | test.py:70:11:70:18 | ControlFlowNode for source() | test.py:54:10:54:12 | ControlFlowNode for val | test flow (proper): test_deeply_nested |
| test.py:79:10:79:12 | ControlFlowNode for val | test.py:83:11:83:18 | ControlFlowNode for source() | test.py:79:10:79:12 | ControlFlowNode for val | test flow (proper): test_pass_bound_method |
| test.py:91:10:91:12 | ControlFlowNode for val | test.py:107:11:107:18 | ControlFlowNode for source() | test.py:91:10:91:12 | ControlFlowNode for val | test flow (proper): test_deeply_nested_bound_method |

View File

@@ -0,0 +1,26 @@
/**
* @kind path-problem
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.TaintTracking
import DataFlow::PathGraph
import SharedCode
class MyClassGetValueAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// obj -> obj.get_value
nodeTo.asCfgNode().(AttrNode).getObject("get_value") = nodeFrom.asCfgNode() and
nodeTo = myClassGetValue(_)
or
// get_value -> get_value()
nodeFrom = myClassGetValue(_) and
nodeTo.asCfgNode().(CallNode).getFunction() = nodeFrom.asCfgNode()
}
}
from SharedConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"test flow (proper): " + source.getNode().asCfgNode().getScope().getName()

View File

@@ -0,0 +1,29 @@
This test illustrates that you need to be very careful when adding additional taint-steps or dataflow steps using `TypeTracker`.
The basic setup is that we're modeling the behavior of a (fictitious) external library class `MyClass`, and (fictitious) source of such an instance (the `source` function).
```py3
class MyClass:
def __init__(self, value):
self.value = value
def get_value(self):
return self.value
```
We want to extend our analysis to `obj.get_value()` is also tainted if `obj` is a tainted instance of `MyClass`.
The actual type-tracking is done in `SharedCode.qll`, but it's the _way_ we use it that matters.
In `NaiveModel.ql` we add an additional taint step from an instance of `MyClass` to calls of the bound method `get_value` (that we have tracked). It provides us with the correct results, but the path explanations are not very useful, since we are now able to cross functions in _one step_.
In `ProperModel.ql` we split the additional taint step in two:
1. from tracked `obj` that is instance of `MyClass`, to `obj.get_value` **but only** exactly where the attribute is accessed (by an `AttrNode`). This is important, since if we allowed `<any tracked qualifier>.get_value` we would again be able to cross functions in one step.
2. from tracked `get_value` bound method to calls of it, **but only** exactly where the call is (by a `CallNode`). for same reason as above.
**Try running the queries in VS Code to see the difference**
### Possible improvements
Using `AttrNode` directly in the code here means there is no easy way to add `getattr` support too all such predicates. Not really sure how to handle this in a generalized way though :|

View File

@@ -0,0 +1,36 @@
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.TaintTracking
// Helpers modeling MyClass
/** A data-flow Node representing an instance of MyClass. */
abstract class MyClass extends DataFlow::Node { }
private DataFlow::Node myClassGetValue(MyClass qualifier, DataFlow::TypeTracker t) {
t.startInAttr("get_value") and
result = qualifier
or
exists(DataFlow::TypeTracker t2 | result = myClassGetValue(qualifier, t2).track(t2, t))
}
DataFlow::Node myClassGetValue(MyClass qualifier) {
result = myClassGetValue(qualifier, DataFlow::TypeTracker::end())
}
// Config
class SourceCall extends DataFlow::Node, MyClass {
SourceCall() { this.asCfgNode().(CallNode).getFunction().(NameNode).getId() = "source" }
}
class SharedConfig extends TaintTracking::Configuration {
SharedConfig() { this = "SharedConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof SourceCall }
override predicate isSink(DataFlow::Node sink) {
exists(CallNode call |
call.getFunction().(NameNode).getId() = "sink" and
call.getArg(0) = sink.asCfgNode()
)
}
}

View File

@@ -0,0 +1,108 @@
class MyClass:
def __init__(self, value):
self.value = value
def get_value(self):
return self.value
def source():
return MyClass("tainted")
def sink(obj):
print("sink", obj)
################################################################################
def test_simple():
src = source()
sink(src.get_value())
################################################################################
def test_alias():
src = source()
foo = src
bound_method = foo.get_value
val = bound_method()
sink(val)
################################################################################
def sink_func(arg):
val = arg.get_value()
sink(val)
def test_accross_functions():
src = source()
sink_func(src)
################################################################################
def deeply_nested_sink(arg):
val = arg.get_value()
sink(val)
def deeply_nested_passthrough_1(arg):
deeply_nested_sink(arg)
def deeply_nested_passthrough_2(arg):
deeply_nested_passthrough_1(arg)
def deeply_nested_passthrough_3(arg):
deeply_nested_passthrough_2(arg)
def test_deeply_nested():
src = source()
deeply_nested_passthrough_3(src)
################################################################################
def recv_bound_method(bm):
val = bm()
sink(val)
def test_pass_bound_method():
src = source()
recv_bound_method(src.get_value)
################################################################################
def deeply_nested_bound_method_sink(bm):
val = bm()
sink(val)
def deeply_nested_bound_method_passthrough_1(bm):
deeply_nested_bound_method_sink(bm)
def deeply_nested_bound_method_passthrough_2(bm):
deeply_nested_bound_method_passthrough_1(bm)
def deeply_nested_bound_method_passthrough_3(bm):
deeply_nested_bound_method_passthrough_2(bm)
def test_deeply_nested_bound_method():
src = source()
deeply_nested_bound_method_passthrough_3(src.get_value)

View File

@@ -0,0 +1 @@
semmle-extractor-options: --max-import-depth=1

View File

@@ -0,0 +1,11 @@
edges
| command_injection.py:10:13:10:24 | ControlFlowNode for Attribute | command_injection.py:12:15:12:27 | ControlFlowNode for BinaryExpr |
| command_injection.py:30:13:30:24 | ControlFlowNode for Attribute | command_injection.py:32:14:32:26 | ControlFlowNode for BinaryExpr |
nodes
| command_injection.py:10:13:10:24 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| command_injection.py:12:15:12:27 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
| command_injection.py:30:13:30:24 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| command_injection.py:32:14:32:26 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr |
#select
| command_injection.py:12:15:12:27 | ControlFlowNode for BinaryExpr | command_injection.py:10:13:10:24 | ControlFlowNode for Attribute | command_injection.py:12:15:12:27 | ControlFlowNode for BinaryExpr | This command depends on $@. | command_injection.py:10:13:10:24 | ControlFlowNode for Attribute | a user-provided value |
| command_injection.py:32:14:32:26 | ControlFlowNode for BinaryExpr | command_injection.py:30:13:30:24 | ControlFlowNode for Attribute | command_injection.py:32:14:32:26 | ControlFlowNode for BinaryExpr | This command depends on $@. | command_injection.py:30:13:30:24 | ControlFlowNode for Attribute | a user-provided value |

View File

@@ -0,0 +1 @@
experimental/Security-new-dataflow/CWE-078/CommandInjection.ql

View File

@@ -0,0 +1,35 @@
import os
import subprocess
from flask import Flask, request
app = Flask(__name__)
@app.route("/command1")
def command_injection1():
files = request.args.get('files', '')
# Don't let files be `; rm -rf /`
os.system("ls " + files)
@app.route("/command2")
def command_injection2():
files = request.args.get('files', '')
# Don't let files be `; rm -rf /`
subprocess.Popen(["ls", files], shell = True)
@app.route("/command3")
def first_arg_injection():
cmd = request.args.get('cmd', '')
subprocess.Popen([cmd, "param1"])
@app.route("/other_cases")
def others():
files = request.args.get('files', '')
# Don't let files be `; rm -rf /`
os.popen("ls " + files)
# TODO: popen2 module for Python 2 only https://devdocs.io/python~2.7/library/popen2
# (deprecated since Python 2.6, but still functional in Python 2.7.17)