Python: Address many review comments

still need to move concept tests
This commit is contained in:
Rasmus Lerchedahl Petersen
2020-10-13 12:03:23 +02:00
parent 433a36225b
commit 4685f2d5f2
16 changed files with 285 additions and 238 deletions

View File

@@ -1,10 +0,0 @@
from django.conf.urls import url
import json
def safe(pickled):
return json.loads(pickled)
urlpatterns = [
url(r'^(?P<object>.*)$', safe)
]

View File

@@ -1,10 +0,0 @@
from django.conf.urls import url
import pickle
def unsafe(pickled):
return pickle.loads(pickled)
urlpatterns = [
url(r'^(?P<object>.*)$', unsafe)
]

View File

@@ -1,61 +0,0 @@
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
<qhelp>
<overview>
<p>
Deserializing untrusted data using any deserialization framework that
allows the construction of arbitrary serializable objects is easily exploitable
and in many cases allows an attacker to execute arbitrary code. Even before a
deserialized object is returned to the caller of a deserialization method a lot
of code may have been executed, including static initializers, constructors,
and finalizers. Automatic deserialization of fields means that an attacker may
craft a nested combination of objects on which the executed initialization code
may have unforeseen effects, such as the execution of arbitrary code.
</p>
<p>
There are many different serialization frameworks. This query currently
supports Pickle, Marshal and Yaml.
</p>
</overview>
<recommendation>
<p>
Avoid deserialization of untrusted data if at all possible. If the
architecture permits it then use other formats instead of serialized objects,
for example JSON.
</p>
</recommendation>
<example>
<p>
The following example calls <code>pickle.loads</code> directly on a
value provided by an incoming HTTP request. Pickle then creates a new value from untrusted data, and is
therefore inherently unsafe.
</p>
<sample src="UnpicklingBad.py" />
<p>
Changing the code to use <code>json.loads</code> instead of <code>pickle.loads</code> removes the vulnerability.
</p>
<sample src="JsonGood.py" />
</example>
<references>
<li>
OWASP vulnerability description:
<a href="https://www.owasp.org/index.php/Deserialization_of_untrusted_data">Deserialization of untrusted data</a>.
</li>
<li>
OWASP guidance on deserializing objects:
<a href="https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html">Deserialization Cheat Sheet</a>.
</li>
<li>
Talks by Chris Frohoff &amp; Gabriel Lawrence:
<a href="http://frohoff.github.io/appseccali-marshalling-pickles/">
AppSecCali 2015: Marshalling Pickles - how deserializing objects will ruin your day</a>
</li>
</references>
</qhelp>

View File

@@ -23,7 +23,12 @@ class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) { sink = any(DeserializationSink d).getData() }
override predicate isSink(DataFlow::Node sink) {
exists(UnmarshalingFunction d |
d.unsafe() and
sink = d.getAnInput()
)
}
}
from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink

View File

@@ -40,31 +40,52 @@ module SystemCommandExecution {
}
/**
* A data-flow node that performs deserialization in a potentially unsafe way.
* A function that decodes data from a binary or textual format.
* Doing so should normally preserve taint, but it can also be a problem
* in itself, e.g. if it performs deserialization in a potentially unsafe way.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `DeserializationSink::Range` instead.
* extend `UnmarshalingFunction::Range` instead.
*/
class DeserializationSink extends DataFlow::Node {
DeserializationSink::Range self;
class UnmarshalingFunction extends DataFlow::Node {
UnmarshalingFunction::Range self;
DeserializationSink() { this = self }
UnmarshalingFunction() { this = self }
/** Gets the argument that specifies the command to be executed. */
DataFlow::Node getData() { result = self.getData() }
/** Holds if this call is unsafe, e.g. if it may execute arbitrary code. */
predicate unsafe() { self.unsafe() }
/** Gets an input that is decoded by this function. */
DataFlow::Node getAnInput() { result = self.getAnInput() }
/** Gets the output that contains the decoded data produced by this function. */
DataFlow::Node getOutput() { result = self.getOutput() }
/** Gets an identifier for the format this function decodes from, such as "JSON". */
string getFormat() { result = self.getFormat() }
}
/** Provides a class for modeling new system-command execution APIs. */
module DeserializationSink {
/** Provides a class for modeling new unmarshaling/decoding/deserialization functions. */
module UnmarshalingFunction {
/**
* A data-flow node that executes an operating system command,
* for instance by spawning a new process.
* A function that decodes data from a binary or textual format.
* Doing so should normally preserve taint, but it can oalso be a problem
* in itself, e.g. if it performs deserialization in a potentially unsafe way.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `DeserializationSink` instead.
* extend `UnmarshalingFunction` instead.
*/
abstract class Range extends DataFlow::Node {
/** Gets the argument that specifies the command to be executed. */
abstract DataFlow::Node getData();
/** Holds if this call is unsafe, e.g. if it may execute arbitrary code. */
abstract predicate unsafe();
/** Gets an input that is decoded by this function. */
abstract DataFlow::Node getAnInput();
/** Gets the output that contains the decoded data produced by this function. */
abstract DataFlow::Node getOutput();
/** Gets an identifier for the format this function decodes from, such as "JSON". */
abstract string getFormat();
}
}

View File

@@ -3,8 +3,7 @@
*/
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Dill
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Marshal
private import experimental.semmle.python.frameworks.Pickle
private import experimental.semmle.python.frameworks.Stdlib
private import experimental.semmle.python.frameworks.Yaml

View File

@@ -0,0 +1,59 @@
/**
* Provides classes modeling security-relevant aspects of the 'dill' package.
* See https://pypi.org/project/dill/.
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts
private module Dill {
/** Gets a reference to the `dill` module. */
private DataFlow::Node dill(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importModule("dill")
or
exists(DataFlow::TypeTracker t2 | result = dill(t2).track(t2, t))
}
/** Gets a reference to the `dill` module. */
DataFlow::Node dill() { result = dill(DataFlow::TypeTracker::end()) }
module dill {
/** Gets a reference to the `dill.loads` function. */
private DataFlow::Node loads(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importMember("dill", "loads")
or
t.startInAttr("loads") and
result = dill()
or
exists(DataFlow::TypeTracker t2 | result = loads(t2).track(t2, t))
}
/** Gets a reference to the `dill.loads` function. */
DataFlow::Node loads() { result = loads(DataFlow::TypeTracker::end()) }
}
}
/**
* A call to `dill.loads`
* See https://pypi.org/project/dill/ (which currently refers you
* to https://docs.python.org/3/library/pickle.html#pickle.loads)
*/
private class DillDeserialization extends UnmarshalingFunction::Range {
DillDeserialization() {
this.asCfgNode().(CallNode).getFunction() = Dill::dill::loads().asCfgNode()
}
override predicate unsafe() { any() }
override DataFlow::Node getAnInput() {
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
}
override DataFlow::Node getOutput() { result = this }
override string getFormat() { none() }
}

View File

@@ -13,7 +13,7 @@ private import experimental.semmle.python.frameworks.Werkzeug
// https://github.com/github/codeql/blob/9f95212e103c68d0c1dfa4b6f30fb5d53954ccef/python/ql/src/semmle/python/web/flask/Request.qll
private module Flask {
/** Gets a reference to the `flask` module. */
DataFlow::Node flask(DataFlow::TypeTracker t) {
private DataFlow::Node flask(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importModule("flask")
or
@@ -25,18 +25,18 @@ private module Flask {
module flask {
/** Gets a reference to the `flask.request` object. */
DataFlow::Node request(DataFlow::TypeTracker t) {
private DataFlow::Node request(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importMember("flask", "request")
or
t.startInAttr("request") and
result = flask()
or
exists(DataFlow::TypeTracker t2 | result = flask::request(t2).track(t2, t))
exists(DataFlow::TypeTracker t2 | result = request(t2).track(t2, t))
}
/** Gets a reference to the `flask.request` object. */
DataFlow::Node request() { result = flask::request(DataFlow::TypeTracker::end()) }
DataFlow::Node request() { result = request(DataFlow::TypeTracker::end()) }
}
// TODO: Do we even need this class? :|

View File

@@ -1,49 +0,0 @@
/**
* Provides classes modeling security-relevant aspects of the `marshal` package.
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts
private module Marshal {
/** Gets a reference to the `marshal` module. */
private DataFlow::Node marshal(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importModule("marshal")
or
exists(DataFlow::TypeTracker t2 | result = marshal(t2).track(t2, t))
}
/** Gets a reference to the `marshal` module. */
DataFlow::Node marshal() { result = marshal(DataFlow::TypeTracker::end()) }
module marshal {
/** Gets a reference to the `marshal.loads` function. */
DataFlow::Node loads(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importMember("marshal", "loads")
or
t.startInAttr("loads") and
result = marshal()
or
exists(DataFlow::TypeTracker t2 | result = marshal::loads(t2).track(t2, t))
}
/** Gets a reference to the `marshal.loads` function. */
DataFlow::Node loads() { result = marshal::loads(DataFlow::TypeTracker::end()) }
}
}
/**
* A call to `marshal.loads`
* See https://docs.python.org/3/library/marshal.html#marshal.loads
*/
private class MarshalDeserialization extends DeserializationSink::Range {
MarshalDeserialization() {
this.asCfgNode().(CallNode).getFunction() = Marshal::marshal::loads().asCfgNode()
}
override DataFlow::Node getData() { result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0) }
}

View File

@@ -1,51 +0,0 @@
/**
* Provides classes modeling security-relevant aspects of the `pickle` package.
*/
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts
private module Pickle {
private string pickleModuleName() { result in ["pickle", "cPickle", "dill"] }
/** Gets a reference to the `pickle` module. */
DataFlow::Node pickle(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importModule(pickleModuleName())
or
exists(DataFlow::TypeTracker t2 | result = pickle(t2).track(t2, t))
}
/** Gets a reference to the `pickle` module. */
DataFlow::Node pickle() { result = pickle(DataFlow::TypeTracker::end()) }
module pickle {
/** Gets a reference to the `pickle.loads` function. */
DataFlow::Node loads(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importMember(pickleModuleName(), "loads")
or
t.startInAttr("loads") and
result = pickle()
or
exists(DataFlow::TypeTracker t2 | result = pickle::loads(t2).track(t2, t))
}
/** Gets a reference to the `pickle.loads` function. */
DataFlow::Node loads() { result = pickle::loads(DataFlow::TypeTracker::end()) }
}
}
/**
* A call to `pickle.loads`
* See https://docs.python.org/3/library/pickle.html#pickle.loads
*/
private class PickleDeserialization extends DeserializationSink::Range {
PickleDeserialization() {
this.asCfgNode().(CallNode).getFunction() = Pickle::pickle::loads().asCfgNode()
}
override DataFlow::Node getData() { result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0) }
}

View File

@@ -327,4 +327,108 @@ private module Stdlib {
)
}
}
// ---------------------------------------------------------------------------
// marshal
// ---------------------------------------------------------------------------
/** Gets a reference to the `marshal` module. */
private DataFlow::Node marshal(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importModule("marshal")
or
exists(DataFlow::TypeTracker t2 | result = marshal(t2).track(t2, t))
}
/** Gets a reference to the `marshal` module. */
DataFlow::Node marshal() { result = marshal(DataFlow::TypeTracker::end()) }
module marshal {
/** Gets a reference to the `marshal.loads` function. */
private DataFlow::Node loads(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importMember("marshal", "loads")
or
t.startInAttr("loads") and
result = marshal()
or
exists(DataFlow::TypeTracker t2 | result = loads(t2).track(t2, t))
}
/** Gets a reference to the `marshal.loads` function. */
DataFlow::Node loads() { result = loads(DataFlow::TypeTracker::end()) }
}
/**
* A call to `marshal.loads`
* See https://docs.python.org/3/library/marshal.html#marshal.loads
*/
private class MarshalDeserialization extends UnmarshalingFunction::Range {
MarshalDeserialization() {
this.asCfgNode().(CallNode).getFunction() = marshal::loads().asCfgNode()
}
override predicate unsafe() { any() }
override DataFlow::Node getAnInput() {
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
}
override DataFlow::Node getOutput() { result = this }
override string getFormat() { none() }
}
// ---------------------------------------------------------------------------
// pickle
// ---------------------------------------------------------------------------
private string pickleModuleName() { result in ["pickle", "cPickle"] }
/** Gets a reference to the `pickle` module. */
private DataFlow::Node pickle(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importModule(pickleModuleName())
or
exists(DataFlow::TypeTracker t2 | result = pickle(t2).track(t2, t))
}
/** Gets a reference to the `pickle` module. */
DataFlow::Node pickle() { result = pickle(DataFlow::TypeTracker::end()) }
module pickle {
/** Gets a reference to the `pickle.loads` function. */
private DataFlow::Node loads(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importMember(pickleModuleName(), "loads")
or
t.startInAttr("loads") and
result = pickle()
or
exists(DataFlow::TypeTracker t2 | result = loads(t2).track(t2, t))
}
/** Gets a reference to the `pickle.loads` function. */
DataFlow::Node loads() { result = loads(DataFlow::TypeTracker::end()) }
}
/**
* A call to `pickle.loads`
* See https://docs.python.org/3/library/pickle.html#pickle.loads
*/
private class PickleDeserialization extends UnmarshalingFunction::Range {
PickleDeserialization() {
this.asCfgNode().(CallNode).getFunction() = pickle::loads().asCfgNode()
}
override predicate unsafe() { any() }
override DataFlow::Node getAnInput() {
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
}
override DataFlow::Node getOutput() { result = this }
override string getFormat() {
result = this.asCfgNode().(CallNode).getArgByName("encoding").(NameNode).getId()
}
}
}

View File

@@ -1,5 +1,6 @@
/**
* Provides classes modeling security-relevant aspects of the `yaml` package.
* Provides classes modeling security-relevant aspects of the PyYAML package
* https://pyyaml.org/wiki/PyYAMLDocumentation (obtained via `import yaml`).
*/
private import python
@@ -9,7 +10,7 @@ private import experimental.semmle.python.Concepts
private module Yaml {
/** Gets a reference to the `yaml` module. */
DataFlow::Node yaml(DataFlow::TypeTracker t) {
private DataFlow::Node yaml(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importModule("yaml")
or
@@ -21,29 +22,42 @@ private module Yaml {
module yaml {
/** Gets a reference to the `yaml.load` function. */
DataFlow::Node load(DataFlow::TypeTracker t) {
private DataFlow::Node load(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importMember("yaml", "load")
or
t.startInAttr("load") and
result = yaml()
or
exists(DataFlow::TypeTracker t2 | result = yaml::load(t2).track(t2, t))
exists(DataFlow::TypeTracker t2 | result = load(t2).track(t2, t))
}
/** Gets a reference to the `yaml.load` function. */
DataFlow::Node load() { result = yaml::load(DataFlow::TypeTracker::end()) }
DataFlow::Node load() { result = load(DataFlow::TypeTracker::end()) }
}
}
/**
* A call to `yaml.load`
* See https://pyyaml.org/wiki/PyYAMLDocumentation
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
*/
private class YamlDeserialization extends DeserializationSink::Range {
private class YamlDeserialization extends UnmarshalingFunction::Range {
YamlDeserialization() {
this.asCfgNode().(CallNode).getFunction() = Yaml::yaml::load().asCfgNode()
}
override DataFlow::Node getData() { result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0) }
override predicate unsafe() {
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
// then the default `Loader` will be used, which is not safe.
not this.asCfgNode().(CallNode).getArgByName("Loader").(NameNode).getId() in ["SafeLoader",
"BaseLoader"]
}
override DataFlow::Node getAnInput() {
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
}
override DataFlow::Node getOutput() { result = this }
override string getFormat() { none() }
}