mirror of
https://github.com/github/codeql.git
synced 2026-05-01 19:55:15 +02:00
Merge pull request #5437 from RasmusWL/small-pyyaml-improvements
Python: Small PyYAML improvements
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Improved modeling of the `PyYAML` PyPI package, so we now correctly treat `CSafeLoader` and `CBaseLoader` as being safe loaders that can not lead to code execution.
|
||||
@@ -1,119 +1,72 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the PyYAML package
|
||||
* https://pyyaml.org/wiki/PyYAMLDocumentation (obtained via `import yaml`).
|
||||
* Provides classes modeling security-relevant aspects of the PyYAML package (obtained
|
||||
* via `import yaml`)
|
||||
*
|
||||
* See
|
||||
* - https://pyyaml.org/wiki/PyYAMLDocumentation
|
||||
* - https://pyyaml.docsforge.com/master/documentation/
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.Concepts
|
||||
|
||||
private module Yaml {
|
||||
/** Gets a reference to the `yaml` module. */
|
||||
private DataFlow::Node yaml(DataFlow::TypeTracker t) {
|
||||
t.start() and
|
||||
result = DataFlow::importNode("yaml")
|
||||
or
|
||||
exists(DataFlow::TypeTracker t2 | result = yaml(t2).track(t2, t))
|
||||
}
|
||||
|
||||
/** Gets a reference to the `yaml` module. */
|
||||
DataFlow::Node yaml() { result = yaml(DataFlow::TypeTracker::end()) }
|
||||
|
||||
/** Provides models for the `yaml` module. */
|
||||
module yaml {
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `yaml` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*
|
||||
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
|
||||
*/
|
||||
private DataFlow::Node yaml_attr(DataFlow::TypeTracker t, string attr_name) {
|
||||
attr_name in [
|
||||
// functions
|
||||
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
|
||||
"safe_load", "safe_load_all",
|
||||
// Classes
|
||||
"SafeLoader", "BaseLoader"
|
||||
] and
|
||||
(
|
||||
t.start() and
|
||||
result = DataFlow::importNode("yaml." + attr_name)
|
||||
or
|
||||
t.startInAttr(attr_name) and
|
||||
result = yaml()
|
||||
)
|
||||
or
|
||||
// Due to bad performance when using normal setup with `yaml_attr(t2, attr_name).track(t2, t)`
|
||||
// we have inlined that code and forced a join
|
||||
exists(DataFlow::TypeTracker t2 |
|
||||
exists(DataFlow::StepSummary summary |
|
||||
yaml_attr_first_join(t2, attr_name, result, summary) and
|
||||
t = t2.append(summary)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
private predicate yaml_attr_first_join(
|
||||
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
|
||||
) {
|
||||
DataFlow::StepSummary::step(yaml_attr(t2, attr_name), res, summary)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a reference to the attribute `attr_name` of the `yaml` module.
|
||||
* WARNING: Only holds for a few predefined attributes.
|
||||
*
|
||||
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
|
||||
*/
|
||||
DataFlow::Node yaml_attr(string attr_name) {
|
||||
result = yaml_attr(DataFlow::TypeTracker::end(), attr_name)
|
||||
}
|
||||
}
|
||||
}
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`,
|
||||
* `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`)
|
||||
* Provides classes modeling security-relevant aspects of the PyYAML package (obtained
|
||||
* via `import yaml`)
|
||||
*
|
||||
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
|
||||
* See
|
||||
* - https://pyyaml.org/wiki/PyYAMLDocumentation
|
||||
* - https://pyyaml.docsforge.com/master/documentation/
|
||||
*/
|
||||
private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode {
|
||||
override CallNode node;
|
||||
string func_name;
|
||||
|
||||
YamlLoadCall() {
|
||||
func_name in [
|
||||
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
|
||||
"safe_load", "safe_load_all"
|
||||
] and
|
||||
node.getFunction() = Yaml::yaml::yaml_attr(func_name).asCfgNode()
|
||||
}
|
||||
|
||||
private module Yaml {
|
||||
/**
|
||||
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
|
||||
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
|
||||
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
|
||||
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
|
||||
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
|
||||
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`,
|
||||
* `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`)
|
||||
*
|
||||
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
|
||||
*/
|
||||
override predicate mayExecuteInput() {
|
||||
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
|
||||
or
|
||||
func_name in ["load", "load_all"] and
|
||||
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
|
||||
// then the default loader will be used, which is not safe.
|
||||
not exists(DataFlow::Node loader_arg |
|
||||
loader_arg.asCfgNode() in [node.getArg(1), node.getArgByName("Loader")]
|
||||
|
|
||||
loader_arg = Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"])
|
||||
)
|
||||
private class YamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
override CallNode node;
|
||||
string func_name;
|
||||
|
||||
YamlLoadCall() {
|
||||
func_name in [
|
||||
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
|
||||
"safe_load", "safe_load_all"
|
||||
] and
|
||||
this = API::moduleImport("yaml").getMember(func_name).getACall()
|
||||
}
|
||||
|
||||
/**
|
||||
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
|
||||
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
|
||||
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
|
||||
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
|
||||
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
|
||||
*/
|
||||
override predicate mayExecuteInput() {
|
||||
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
|
||||
or
|
||||
func_name in ["load", "load_all"] and
|
||||
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
|
||||
// then the default loader will be used, which is not safe.
|
||||
not exists(DataFlow::Node loader_arg |
|
||||
loader_arg in [this.getArg(1), this.getArgByName("Loader")]
|
||||
|
|
||||
loader_arg =
|
||||
API::moduleImport("yaml")
|
||||
.getMember(["SafeLoader", "BaseLoader", "CSafeLoader", "CBaseLoader"])
|
||||
.getAUse()
|
||||
)
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "YAML" }
|
||||
}
|
||||
|
||||
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "YAML" }
|
||||
}
|
||||
|
||||
@@ -1,16 +1,37 @@
|
||||
import yaml
|
||||
from yaml import SafeLoader
|
||||
|
||||
# Unsafe:
|
||||
yaml.load(payload) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
|
||||
yaml.load(payload, SafeLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
|
||||
yaml.load(payload, Loader=SafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
|
||||
yaml.load(payload, Loader=yaml.BaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
|
||||
|
||||
yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
|
||||
yaml.load(payload, yaml.Loader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
|
||||
yaml.unsafe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
|
||||
yaml.full_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
|
||||
|
||||
# Safe:
|
||||
yaml.load(payload, yaml.SafeLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
|
||||
yaml.load(payload, Loader=yaml.SafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
|
||||
yaml.load(payload, yaml.BaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
|
||||
yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
|
||||
|
||||
################################################################################
|
||||
# load_all variants
|
||||
################################################################################
|
||||
|
||||
# Unsafe:
|
||||
yaml.load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
|
||||
yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
|
||||
yaml.unsafe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
|
||||
yaml.full_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
|
||||
|
||||
# Safe:
|
||||
yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
|
||||
|
||||
################################################################################
|
||||
# C-based loaders with `libyaml`
|
||||
################################################################################
|
||||
|
||||
# Unsafe:
|
||||
yaml.load(payload, yaml.CLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
|
||||
yaml.load(payload, yaml.CFullLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
|
||||
|
||||
# Safe:
|
||||
yaml.load(payload, yaml.CSafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
|
||||
yaml.load(payload, yaml.CBaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
|
||||
|
||||
Reference in New Issue
Block a user