Merge pull request #5437 from RasmusWL/small-pyyaml-improvements

Python: Small PyYAML improvements
This commit is contained in:
yoff
2021-03-19 11:15:49 +01:00
committed by GitHub
3 changed files with 87 additions and 111 deletions

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Improved modeling of the `PyYAML` PyPI package, so we now correctly treat `CSafeLoader` and `CBaseLoader` as being safe loaders that can not lead to code execution.

View File

@@ -1,119 +1,72 @@
/**
* Provides classes modeling security-relevant aspects of the PyYAML package
* https://pyyaml.org/wiki/PyYAMLDocumentation (obtained via `import yaml`).
* Provides classes modeling security-relevant aspects of the PyYAML package (obtained
* via `import yaml`)
*
* See
* - https://pyyaml.org/wiki/PyYAMLDocumentation
* - https://pyyaml.docsforge.com/master/documentation/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private module Yaml {
/** Gets a reference to the `yaml` module. */
private DataFlow::Node yaml(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("yaml")
or
exists(DataFlow::TypeTracker t2 | result = yaml(t2).track(t2, t))
}
/** Gets a reference to the `yaml` module. */
DataFlow::Node yaml() { result = yaml(DataFlow::TypeTracker::end()) }
/** Provides models for the `yaml` module. */
module yaml {
/**
* Gets a reference to the attribute `attr_name` of the `yaml` module.
* WARNING: Only holds for a few predefined attributes.
*
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
*/
private DataFlow::Node yaml_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in [
// functions
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
"safe_load", "safe_load_all",
// Classes
"SafeLoader", "BaseLoader"
] and
(
t.start() and
result = DataFlow::importNode("yaml." + attr_name)
or
t.startInAttr(attr_name) and
result = yaml()
)
or
// Due to bad performance when using normal setup with `yaml_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
yaml_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}
pragma[nomagic]
private predicate yaml_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(yaml_attr(t2, attr_name), res, summary)
}
/**
* Gets a reference to the attribute `attr_name` of the `yaml` module.
* WARNING: Only holds for a few predefined attributes.
*
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
*/
DataFlow::Node yaml_attr(string attr_name) {
result = yaml_attr(DataFlow::TypeTracker::end(), attr_name)
}
}
}
private import semmle.python.ApiGraphs
/**
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`,
* `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`)
* Provides classes modeling security-relevant aspects of the PyYAML package (obtained
* via `import yaml`)
*
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
* See
* - https://pyyaml.org/wiki/PyYAMLDocumentation
* - https://pyyaml.docsforge.com/master/documentation/
*/
private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode {
override CallNode node;
string func_name;
YamlLoadCall() {
func_name in [
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
"safe_load", "safe_load_all"
] and
node.getFunction() = Yaml::yaml::yaml_attr(func_name).asCfgNode()
}
private module Yaml {
/**
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`,
* `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`)
*
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
*/
override predicate mayExecuteInput() {
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
or
func_name in ["load", "load_all"] and
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
// then the default loader will be used, which is not safe.
not exists(DataFlow::Node loader_arg |
loader_arg.asCfgNode() in [node.getArg(1), node.getArgByName("Loader")]
|
loader_arg = Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"])
)
private class YamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
override CallNode node;
string func_name;
YamlLoadCall() {
func_name in [
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
"safe_load", "safe_load_all"
] and
this = API::moduleImport("yaml").getMember(func_name).getACall()
}
/**
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
*/
override predicate mayExecuteInput() {
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
or
func_name in ["load", "load_all"] and
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
// then the default loader will be used, which is not safe.
not exists(DataFlow::Node loader_arg |
loader_arg in [this.getArg(1), this.getArgByName("Loader")]
|
loader_arg =
API::moduleImport("yaml")
.getMember(["SafeLoader", "BaseLoader", "CSafeLoader", "CBaseLoader"])
.getAUse()
)
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "YAML" }
}
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "YAML" }
}

View File

@@ -1,16 +1,37 @@
import yaml
from yaml import SafeLoader
# Unsafe:
yaml.load(payload) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
yaml.load(payload, SafeLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.load(payload, Loader=SafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.load(payload, Loader=yaml.BaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.load(payload, yaml.Loader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
yaml.unsafe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
yaml.full_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
# Safe:
yaml.load(payload, yaml.SafeLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.load(payload, Loader=yaml.SafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.load(payload, yaml.BaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
################################################################################
# load_all variants
################################################################################
# Unsafe:
yaml.load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.unsafe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
yaml.full_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
# Safe:
yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
################################################################################
# C-based loaders with `libyaml`
################################################################################
# Unsafe:
yaml.load(payload, yaml.CLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
yaml.load(payload, yaml.CFullLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
# Safe:
yaml.load(payload, yaml.CSafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.load(payload, yaml.CBaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML