Merge pull request #6967 from RasmusWL/ruamel.yaml

Python: Model `ruamel.yaml` PyPI package
This commit is contained in:
yoff
2021-10-28 10:19:08 +02:00
committed by GitHub
11 changed files with 232 additions and 8 deletions

View File

@@ -25,6 +25,7 @@ private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.RuamelYaml
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.Stdlib

View File

@@ -0,0 +1,57 @@
/**
* Provides classes modeling security-relevant aspects of the `ruamel.yaml` PyPI package
*
* See
* - https://pypi.org/project/ruamel.yaml/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `ruamel.yaml` PyPI package.
*
* See
* - https://pypi.org/project/ruamel.yaml/
*/
private module RuamelYaml {
// Note: `ruamel.yaml` is a fork of the `PyYAML` PyPI package, so that's why the
// interface is so similar.
/**
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `safe_load`, `safe_load_all`)
*
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
*/
private class RuamelYamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
string func_name;
RuamelYamlLoadCall() {
func_name in ["load", "load_all", "safe_load", "safe_load_all"] and
this = API::moduleImport("ruamel").getMember("yaml").getMember(func_name).getACall()
}
override predicate mayExecuteInput() {
func_name in ["load", "load_all"] and
// If the `Loader` argument is not set, the default loader will be used, which is
// not safe. The only safe loaders are `SafeLoader` or `BaseLoader` (and their
// variants with C implementation).
not exists(DataFlow::Node loader_arg |
loader_arg in [this.getArg(1), this.getArgByName("Loader")]
|
loader_arg =
API::moduleImport("ruamel")
.getMember("yaml")
.getMember(["SafeLoader", "BaseLoader", "CSafeLoader", "CBaseLoader"])
.getAUse()
)
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("stream")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "YAML" }
}
}

View File

@@ -9,7 +9,6 @@
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
@@ -41,11 +40,17 @@ private module Yaml {
}
/**
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
* This function was thought safe from the 5.1 release in 2017, when the default
* loader was changed to `FullLoader` (see
* https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation).
*
* In 2020 new exploits were found, meaning it's not safe. With the 6.0 release (see
* https://github.com/yaml/pyyaml/commit/8cdff2c80573b8be8e8ad28929264a913a63aa33),
* when using `load` and `load_all` you are now required to specify a Loader. But
* from what I (@RasmusWL) can gather, `FullLoader` is not to be considered safe,
* although known exploits have been mitigated (is at least my impression). Also see
* https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389 for more
* details.
*/
override predicate mayExecuteInput() {
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
@@ -63,7 +68,7 @@ private module Yaml {
)
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("stream")] }
override DataFlow::Node getOutput() { result = this }