Python: Add modeling of ruamel.yaml PyPI package

This commit is contained in:
Rasmus Wriedt Larsen
2021-10-26 17:43:55 +02:00
parent 4a58349fcd
commit 1ce09afa08
8 changed files with 161 additions and 1 deletions

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of the `ruamel.yaml` PyPI package, resulting in additional sinks for the _Deserializing untrusted input_ (`py/unsafe-deserialization`) query (since `ruamel.yaml.load` can lead to code execution).

View File

@@ -25,6 +25,7 @@ private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Psycopg2
private import semmle.python.frameworks.PyMySQL
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.RuamelYaml
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.Stdlib

View File

@@ -0,0 +1,57 @@
/**
* Provides classes modeling security-relevant aspects of the `ruamel.yaml` PyPI package
*
* See
* - https://pypi.org/project/ruamel.yaml/
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `ruamel.yaml` PyPI package.
*
* See
* - https://pypi.org/project/ruamel.yaml/
*/
private module RuamelYaml {
// Note: `ruamel.yaml` is a fork of the `PyYAML` PyPI package, so that's why the
// interface is so similar.
/**
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `safe_load`, `safe_load_all`)
*
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
*/
private class RuamelYamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
string func_name;
RuamelYamlLoadCall() {
func_name in ["load", "load_all", "safe_load", "safe_load_all"] and
this = API::moduleImport("ruamel").getMember("yaml").getMember(func_name).getACall()
}
override predicate mayExecuteInput() {
func_name in ["load", "load_all"] and
// If the `Loader` argument is not set, the default loader will be used, which is
// not safe. The only safe loaders are `SafeLoader` or `BaseLoader` (and their
// variants with C implementation).
not exists(DataFlow::Node loader_arg |
loader_arg in [this.getArg(1), this.getArgByName("Loader")]
|
loader_arg =
API::moduleImport("ruamel")
.getMember("yaml")
.getMember(["SafeLoader", "BaseLoader", "CSafeLoader", "CBaseLoader"])
.getAUse()
)
}
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("stream")] }
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "YAML" }
}
}

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,33 @@
import ruamel.yaml
# Unsafe:
ruamel.yaml.load(payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
ruamel.yaml.load(stream=payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
ruamel.yaml.load(payload, ruamel.yaml.Loader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
# Safe:
ruamel.yaml.load(payload, ruamel.yaml.SafeLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML
ruamel.yaml.load(payload, Loader=ruamel.yaml.SafeLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML
ruamel.yaml.load(payload, ruamel.yaml.BaseLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML
ruamel.yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.safe_load(..) decodeFormat=YAML
################################################################################
# load_all variants
################################################################################
# Unsafe:
ruamel.yaml.load_all(payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.load_all(..) decodeFormat=YAML decodeMayExecuteInput
# Safe:
ruamel.yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=ruamel.yaml.safe_load_all(..) decodeFormat=YAML
################################################################################
# C-based loaders with `libyaml`
################################################################################
# Unsafe:
ruamel.yaml.load(payload, ruamel.yaml.CLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML decodeMayExecuteInput
# Safe:
ruamel.yaml.load(payload, ruamel.yaml.CSafeLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML
ruamel.yaml.load(payload, ruamel.yaml.CBaseLoader) # $ decodeInput=payload decodeOutput=ruamel.yaml.load(..) decodeFormat=YAML

View File

@@ -0,0 +1,63 @@
#!/usr/bin/env python3
# this file doesn't have a .py extension so the extractor doesn't pick it up, so it
# doesn't have to be annotated
# This file is just a Proof of Concept for how code execution can be triggered.
import os
import ruamel.yaml
class Exploit(object):
def __reduce__(self):
return (os.system, ('ls',))
data = Exploit()
serialized_data = ruamel.yaml.dump(data)
# All these will execute `ls`
print("!!! ruamel.yaml.load")
ruamel.yaml.load(serialized_data)
print("!!! ruamel.yaml.load kwarg")
ruamel.yaml.load(stream=serialized_data)
print("!!! ruamel.yaml.load with Loader=ruamel.yaml.Loader")
ruamel.yaml.load(serialized_data, ruamel.yaml.Loader)
print("!!! ruamel.yaml.load with Loader=ruamel.yaml.UnsafeLoader")
ruamel.yaml.load(serialized_data, ruamel.yaml.UnsafeLoader)
print("!!! ruamel.yaml.load with Loader=ruamel.yaml.CLoader")
ruamel.yaml.load(serialized_data, ruamel.yaml.CLoader)
# you need to iterate through the result for it to execute... but it still works
print("!!! ruamel.yaml.load_all")
for _ in ruamel.yaml.load_all(serialized_data):
pass
# check that the safe version is actually safe
print("\n" + "-"*80)
print("safe versions")
print("-" * 80)
print("!!! ruamel.yaml.safe_load")
try:
ruamel.yaml.safe_load(serialized_data)
raise Exception("should not happen")
except ruamel.yaml.constructor.ConstructorError:
pass
print("!!! ruamel.yaml.load with Loader=ruamel.yaml.SafeLoader")
try:
ruamel.yaml.load(serialized_data, ruamel.yaml.SafeLoader)
raise Exception("should not happen")
except ruamel.yaml.constructor.ConstructorError:
pass
print("!!! ruamel.yaml.load with Loader=ruamel.yaml.CSafeLoader")
try:
ruamel.yaml.load(serialized_data, ruamel.yaml.CSafeLoader)
raise Exception("should not happen")
except ruamel.yaml.constructor.ConstructorError:
pass