mirror of
https://github.com/github/codeql.git
synced 2025-12-20 10:46:30 +01:00
Merge pull request #4453 from yoff/python-port-unsafe-deserialization
Python: port unsafe deserialization
This commit is contained in:
@@ -0,0 +1,36 @@
|
||||
/**
|
||||
* @name Deserializing untrusted input
|
||||
* @description Deserializing user-controlled data may allow attackers to execute arbitrary code.
|
||||
* @kind path-problem
|
||||
* @id py/unsafe-deserialization
|
||||
* @problem.severity error
|
||||
* @sub-severity high
|
||||
* @precision high
|
||||
* @tags external/cwe/cwe-502
|
||||
* security
|
||||
* serialization
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.dataflow.DataFlow
|
||||
import experimental.dataflow.TaintTracking
|
||||
import experimental.semmle.python.Concepts
|
||||
import experimental.dataflow.RemoteFlowSources
|
||||
import DataFlow::PathGraph
|
||||
|
||||
class UnsafeDeserializationConfiguration extends TaintTracking::Configuration {
|
||||
UnsafeDeserializationConfiguration() { this = "UnsafeDeserializationConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
exists(Decoding d |
|
||||
d.mayExecuteInput() and
|
||||
sink = d.getAnInput()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
from UnsafeDeserializationConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "Deserializing of $@.", source.getNode(), "untrusted input"
|
||||
@@ -40,6 +40,63 @@ module SystemCommandExecution {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that decodes data from a binary or textual format. This
|
||||
* is intended to include deserialization, unmarshalling, decoding, unpickling,
|
||||
* decompressing, decrypting, parsing etc.
|
||||
*
|
||||
* Doing so should normally preserve taint, but it can also be a problem
|
||||
* in itself, e.g. if it allows code execution or could result in denial-of-service.
|
||||
*
|
||||
* Extend this class to refine existing API models. If you want to model new APIs,
|
||||
* extend `Decoding::Range` instead.
|
||||
*/
|
||||
class Decoding extends DataFlow::Node {
|
||||
Decoding::Range range;
|
||||
|
||||
Decoding() { this = range }
|
||||
|
||||
/** Holds if this call may execute code embedded in its input. */
|
||||
predicate mayExecuteInput() { range.mayExecuteInput() }
|
||||
|
||||
/** Gets an input that is decoded by this function. */
|
||||
DataFlow::Node getAnInput() { result = range.getAnInput() }
|
||||
|
||||
/** Gets the output that contains the decoded data produced by this function. */
|
||||
DataFlow::Node getOutput() { result = range.getOutput() }
|
||||
|
||||
/** Gets an identifier for the format this function decodes from, such as "JSON". */
|
||||
string getFormat() { result = range.getFormat() }
|
||||
}
|
||||
|
||||
/** Provides a class for modeling new decoding mechanisms. */
|
||||
module Decoding {
|
||||
/**
|
||||
* A data-flow node that decodes data from a binary or textual format. This
|
||||
* is intended to include deserialization, unmarshalling, decoding, unpickling,
|
||||
* decompressing, decrypting, parsing etc.
|
||||
*
|
||||
* Doing so should normally preserve taint, but it can also be a problem
|
||||
* in itself, e.g. if it allows code execution or could result in denial-of-service.
|
||||
*
|
||||
* Extend this class to model new APIs. If you want to refine existing API models,
|
||||
* extend `Decoding` instead.
|
||||
*/
|
||||
abstract class Range extends DataFlow::Node {
|
||||
/** Holds if this call may execute code embedded in its input. */
|
||||
abstract predicate mayExecuteInput();
|
||||
|
||||
/** Gets an input that is decoded by this function. */
|
||||
abstract DataFlow::Node getAnInput();
|
||||
|
||||
/** Gets the output that contains the decoded data produced by this function. */
|
||||
abstract DataFlow::Node getOutput();
|
||||
|
||||
/** Gets an identifier for the format this function decodes from, such as "JSON". */
|
||||
abstract string getFormat();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A data-flow node that dynamically executes Python code.
|
||||
*
|
||||
|
||||
@@ -2,7 +2,9 @@
|
||||
* Helper file that imports all framework modeling.
|
||||
*/
|
||||
|
||||
private import experimental.semmle.python.frameworks.Dill
|
||||
private import experimental.semmle.python.frameworks.Django
|
||||
private import experimental.semmle.python.frameworks.Flask
|
||||
private import experimental.semmle.python.frameworks.Invoke
|
||||
private import experimental.semmle.python.frameworks.Stdlib
|
||||
private import experimental.semmle.python.frameworks.Yaml
|
||||
|
||||
58
python/ql/src/experimental/semmle/python/frameworks/Dill.qll
Normal file
58
python/ql/src/experimental/semmle/python/frameworks/Dill.qll
Normal file
@@ -0,0 +1,58 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the 'dill' package.
|
||||
* See https://pypi.org/project/dill/.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import experimental.dataflow.DataFlow
|
||||
private import experimental.dataflow.RemoteFlowSources
|
||||
private import experimental.semmle.python.Concepts
|
||||
|
||||
private module Dill {
|
||||
/** Gets a reference to the `dill` module. */
|
||||
private DataFlow::Node dill(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode("dill")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = dill(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `dill` module. */
|
||||
DataFlow::Node dill() { result = dill(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/** Provides models for the `dill` module. */
|
||||
module dill {
|
||||
/** Gets a reference to the `dill.loads` function. */
|
||||
private DataFlow::Node loads(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode("dill.loads")
|
||||
or
|
||||
t.startInAttr("loads") and
|
||||
result = dill()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = loads(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `dill.loads` function. */
|
||||
DataFlow::Node loads() { result = loads(DataFlow::TypeTracker::end()) }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `dill.loads`
|
||||
* See https://pypi.org/project/dill/ (which currently refers you
|
||||
* to https://docs.python.org/3/library/pickle.html#pickle.loads)
|
||||
*/
|
||||
private class DillLoadsCall extends Decoding::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
DillLoadsCall() { node.getFunction() = Dill::dill::loads().asCfgNode() }
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "dill" }
|
||||
}
|
||||
@@ -18,7 +18,7 @@ private import experimental.semmle.python.frameworks.Werkzeug
|
||||
*/
|
||||
private module Flask {
|
||||
/** Gets a reference to the `flask` module. */
|
||||
DataFlow::Node flask(DataFlow::TypeTracker t) {
|
||||
private DataFlow::Node flask(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode("flask")
|
||||
or
|
||||
@@ -31,7 +31,7 @@ private module Flask {
|
||||
/** Provides models for the `flask` module. */
|
||||
module flask {
|
||||
/** Gets a reference to the `flask.request` object. */
|
||||
DataFlow::Node request(DataFlow::TypeTracker t) {
|
||||
private DataFlow::Node request(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode("flask.request")
|
||||
or
|
||||
|
||||
@@ -329,6 +329,106 @@ private module Stdlib {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// marshal
|
||||
// ---------------------------------------------------------------------------
|
||||
/** Gets a reference to the `marshal` module. */
|
||||
private DataFlow::Node marshal(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode("marshal")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = marshal(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `marshal` module. */
|
||||
DataFlow::Node marshal() { result = marshal(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/** Provides models for the `marshal` module. */
|
||||
module marshal {
|
||||
/** Gets a reference to the `marshal.loads` function. */
|
||||
private DataFlow::Node loads(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode("marshal.loads")
|
||||
or
|
||||
t.startInAttr("loads") and
|
||||
result = marshal()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = loads(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `marshal.loads` function. */
|
||||
DataFlow::Node loads() { result = loads(DataFlow::TypeTracker::end()) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `marshal.loads`
|
||||
* See https://docs.python.org/3/library/marshal.html#marshal.loads
|
||||
*/
|
||||
private class MarshalLoadsCall extends Decoding::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
MarshalLoadsCall() { node.getFunction() = marshal::loads().asCfgNode() }
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "marshal" }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// pickle
|
||||
// ---------------------------------------------------------------------------
|
||||
private string pickleModuleName() { result in ["pickle", "cPickle", "_pickle"] }
|
||||
|
||||
/** Gets a reference to the `pickle` module. */
|
||||
private DataFlow::Node pickle(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode(pickleModuleName())
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = pickle(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `pickle` module. */
|
||||
DataFlow::Node pickle() { result = pickle(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/** Provides models for the `pickle` module. */
|
||||
module pickle {
|
||||
/** Gets a reference to the `pickle.loads` function. */
|
||||
private DataFlow::Node loads(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode(pickleModuleName() + ".loads")
|
||||
or
|
||||
t.startInAttr("loads") and
|
||||
result = pickle()
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = loads(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `pickle.loads` function. */
|
||||
DataFlow::Node loads() { result = loads(DataFlow::TypeTracker::end()) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `pickle.loads`
|
||||
* See https://docs.python.org/3/library/pickle.html#pickle.loads
|
||||
*/
|
||||
private class PickleLoadsCall extends Decoding::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
PickleLoadsCall() { node.getFunction() = pickle::loads().asCfgNode() }
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "pickle" }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// popen2
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
98
python/ql/src/experimental/semmle/python/frameworks/Yaml.qll
Normal file
98
python/ql/src/experimental/semmle/python/frameworks/Yaml.qll
Normal file
@@ -0,0 +1,98 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the PyYAML package
|
||||
* https://pyyaml.org/wiki/PyYAMLDocumentation (obtained via `import yaml`).
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import experimental.dataflow.DataFlow
|
||||
private import experimental.dataflow.RemoteFlowSources
|
||||
private import experimental.semmle.python.Concepts
|
||||
|
||||
private module Yaml {
|
||||
/** Gets a reference to the `yaml` module. */
|
||||
private DataFlow::Node yaml(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode("yaml")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = yaml(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `yaml` module. */
|
||||
DataFlow::Node yaml() { result = yaml(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/** Provides models for the `yaml` module. */
|
||||
module yaml {
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `yaml` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*
|
||||
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
|
||||
*/
|
||||
private DataFlow::Node yaml_attr(DataFlow::TypeTracker t, string attr_name) {
|
||||
attr_name in ["load", "SafeLoader", "BaseLoader"] and
|
||||
(
|
||||
t.start() and
|
||||
result = DataFlow::importNode("yaml." + attr_name)
|
||||
or
|
||||
t.startInAttr(attr_name) and
|
||||
result = yaml()
|
||||
)
|
||||
or
|
||||
// Due to bad performance when using normal setup with `yaml_attr(t2, attr_name).track(t2, t)`
|
||||
// we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
yaml_attr_first_join(t2, attr_name, result, summary) and
|
||||
t = t2.append(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate yaml_attr_first_join(
|
||||
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(yaml_attr(t2, attr_name), res, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `yaml` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*
|
||||
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
|
||||
*/
|
||||
DataFlow::Node yaml_attr(string attr_name) {
|
||||
result = yaml_attr(DataFlow::TypeTracker::end(), attr_name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `yaml.load`
|
||||
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
|
||||
*/
|
||||
private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
|
||||
YamlLoadCall() { node.getFunction() = Yaml::yaml::yaml_attr("load").asCfgNode() }
|
||||
|
||||
/**
|
||||
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
|
||||
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
|
||||
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
|
||||
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
|
||||
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
|
||||
*/
|
||||
override predicate mayExecuteInput() {
|
||||
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
|
||||
// then the default loader will be used, which is not safe.
|
||||
not node.getArgByName("Loader") =
|
||||
Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"]).asCfgNode()
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "YAML" }
|
||||
}
|
||||
Reference in New Issue
Block a user