revert YAML.qll and yaml sinks to previous PR, make a separate experimental query only for yaml

This commit is contained in:
amammad
2023-10-04 18:21:12 +02:00
committed by Harry Maclean
parent c582ea626d
commit 32f5667bb6
15 changed files with 564 additions and 154 deletions

View File

@@ -0,0 +1,62 @@
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
<qhelp>
<overview>
<p>
Deserializing untrusted data using any method that allows the construction of
arbitrary objects is easily exploitable and, in many cases, allows an attacker
to execute arbitrary code.
</p>
</overview>
<recommendation>
<p>
If deserializing an untrusted YAML document using the <code>psych</code> gem,
prefer the <code>safe_load</code> and <code>safe_load_file</code> methods over
<code>load</code> and <code>load_file</code>, as the former will safely
handle untrusted data. Avoid passing untrusted data to the <code>load_stream</code>
method. In <code>psych</code> version 4.0.0 and above, the <code>load</code> method can
safely be used.
</p>
</recommendation>
<example>
<p>
The following example calls the <code>Marshal.load</code>,
<code>JSON.load</code>, <code>YAML.load</code>, and <code>Oj.load</code> methods
on data from an HTTP request. Since these methods are capable of deserializing
to arbitrary objects, this is inherently unsafe.
</p>
<sample src="examples/UnsafeDeserializationBad.rb"/>
<p>
Using <code>JSON.parse</code> and <code>YAML.safe_load</code> instead, as in the
following example, removes the vulnerability. Similarly, calling
<code>Oj.load</code> with any mode other than <code>:object</code> is safe, as
is calling <code>Oj.safe_load</code>. Note that there is no safe way to deserialize
untrusted data using <code>Marshal</code>.
</p>
<sample src="examples/UnsafeDeserializationGood.rb"/>
</example>
<references>
<li>
OWASP vulnerability description:
<a href="https://www.owasp.org/index.php/Deserialization_of_untrusted_data">deserialization of untrusted data</a>.
</li>
<li>
Ruby documentation: <a href="https://docs.ruby-lang.org/en/3.0.0/doc/security_rdoc.html">guidance on deserializing objects safely</a>.
</li>
<li>
Ruby documentation: <a href="https://ruby-doc.org/stdlib-3.0.2/libdoc/yaml/rdoc/YAML.html#module-YAML-label-Security">security guidance on the YAML library</a>.
</li>
<li>
You can read that how unsafe yaml load methods can lead to code executions:
<a href="https://devcraft.io/2021/01/07/universal-deserialisation-gadget-for-ruby-2-x-3-x.html">Universal Deserialisation Gadget for Ruby 2.x-3.x </a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,21 @@
/**
* @name Deserialization of user-controlled yaml data
* @description Deserializing user-controlled yaml data may allow attackers to
* execute arbitrary code.
* @kind path-problem
* @problem.severity warning
* @security-severity 9.8
* @precision high
* @id rb/unsafe-unsafeyamldeserialization
* @tags security
* external/cwe/cwe-502
*/
import ruby
import codeql.ruby.security.UnsafeDeserializationQuery
import UnsafeCodeConstructionFlow::PathGraph
from UnsafeCodeConstructionFlow::PathNode source, UnsafeCodeConstructionFlow::PathNode sink
where UnsafeCodeConstructionFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "Unsafe deserialization depends on a $@.", source.getNode(),
source.getNode().(UnsafeDeserialization::Source).describe()

View File

@@ -0,0 +1,83 @@
/**
* Provides a taint-tracking configuration for reasoning about unsafe deserialization.
*
* Note, for performance reasons: only import this file if
* `UnsafeYamlDeserializationFlow` is needed, otherwise
* `UnsafeYamlDeserializationCustomizations` should be imported instead.
*/
private import codeql.ruby.AST
private import codeql.ruby.DataFlow
private import codeql.ruby.TaintTracking
private import codeql.ruby.ApiGraphs
import UnsafeYamlDeserializationCustomizations::UnsafeYamlDeserialization
import Yaml
private module UnsafeYamlDeserializationConfig implements DataFlow::StateConfigSig {
class FlowState = FlowState::State;
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof Source and
(state instanceof FlowState::Parse or state instanceof FlowState::Load)
}
predicate isSink(DataFlow::Node sink, FlowState state) {
sink instanceof Sink and
(state instanceof FlowState::Parse or state instanceof FlowState::Load)
}
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
/**
* A taint step related to the result of `YAML.parse` calls, or similar.
* In the following example, this step will propagate taint from
* `source` to `sink`:
* this contains two seperate steps:
* ```rb
* x = source
* sink = YAML.parse(x)
* ```
* By second step
* source is a Successor of `YAML.parse(x)`
* which ends with `to_ruby` or an Element of `to_ruby`
* ```ruby
* sink source.to_ruby # Unsafe call
* ```
*/
predicate isAdditionalFlowStep(
DataFlow::Node pred, FlowState stateFrom, DataFlow::Node succ, FlowState stateTo
) {
(
exists(API::Node parseSuccessors, API::Node parseMethod |
parseMethod = yamlLibrary().getMethod(["parse", "parse_stream", "parse_file"]) and
parseSuccessors = yamlParseNode(parseMethod)
|
succ = parseSuccessors.getMethod("to_ruby").getReturn().asSource() and
pred = parseMethod.getArgument(0).asSink()
)
or
exists(API::Node parseMethod |
parseMethod = yamlLibrary().getMethod(["parse", "parse_stream", "parse_file"])
|
succ = parseMethod.getReturn().asSource() and
pred = parseMethod.getArgument(0).asSink()
)
) and
stateFrom instanceof FlowState::Parse and
stateTo instanceof FlowState::Parse
}
}
predicate isAdditionalFlowStepTest(DataFlow::Node pred, DataFlow::Node succ) {
exists(API::Node parseMethod |
parseMethod = yamlLibrary().getMethod(["parse", "parse_stream", "parse_file"])
|
succ = parseMethod.getReturn().asSource() and
pred = parseMethod.getArgument(0).asSink()
)
}
/**
* Taint-tracking for reasoning about unsafe deserialization.
*/
module UnsafeCodeConstructionFlow = TaintTracking::GlobalWithState<UnsafeYamlDeserializationConfig>;

View File

@@ -0,0 +1,136 @@
/**
* Provides default sources, sinks and sanitizers for reasoning about unsafe
* deserialization, as well as extension points for adding your own.
*/
private import codeql.ruby.AST
private import codeql.ruby.ApiGraphs
private import codeql.ruby.CFG
private import codeql.ruby.DataFlow
private import codeql.ruby.dataflow.RemoteFlowSources
private import codeql.ruby.frameworks.ActiveJob
private import codeql.ruby.frameworks.core.Module
private import codeql.ruby.frameworks.core.Kernel
private import Yaml
module UnsafeYamlDeserialization {
/** Flow states used to distinguish whether we are using a yaml parse node or a yaml load node. */
module FlowState {
private newtype TState =
TParse() or
TLoad()
/** A flow state used to distinguish whether we have a middle node that use `YAML.load*` or `YAML.parse*` */
class State extends TState {
/**
* Gets a string representation of this state.
*/
string toString() { result = this.getStringRepresentation() }
/**
* Gets a canonical string representation of this state.
*/
string getStringRepresentation() {
this = TParse() and result = "parse"
or
this = TLoad() and result = "load"
}
}
/**
* A flow state used for `YAML.parse*` methods.
*/
class Parse extends State, TParse { }
/**
* A flow state used for `YAML.load*` methods.
*/
class Load extends State, TLoad { }
}
/**
* A data flow source for unsafe deserialization vulnerabilities.
*/
abstract class Source extends DataFlow::Node {
/** Gets a string that describes the source. */
string describe() { result = "user-provided value" }
}
/**
* A data flow sink for unsafe deserialization vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for unsafe deserialization vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/** A source of remote user input, considered as a flow source for unsafe deserialization. */
class RemoteFlowSourceAsSource extends Source instanceof RemoteFlowSource { }
/** A read of data from `STDIN`/`ARGV`, considered as a flow source for unsafe deserialization. */
class StdInSource extends UnsafeYamlDeserialization::Source {
boolean stdin;
StdInSource() {
this = API::getTopLevelMember(["STDIN", "ARGF"]).getAMethodCall(["gets", "read"]) and
stdin = true
or
// > $stdin == STDIN
// => true
// but $stdin is special in that it is a global variable and not a constant. `API::getTopLevelMember` only gets constants.
exists(DataFlow::Node dollarStdin |
dollarStdin.asExpr().getExpr().(GlobalVariableReadAccess).getVariable().getName() = "$stdin" and
this = dollarStdin.getALocalSource().getAMethodCall(["gets", "read"])
) and
stdin = true
or
// ARGV.
this.asExpr().getExpr().(GlobalVariableReadAccess).getVariable().getName() = "ARGV" and
stdin = false
or
this.(Kernel::KernelMethodCall).getMethodName() = ["gets", "readline", "readlines"] and
stdin = true
}
override string describe() {
if stdin = true then result = "value from stdin" else result = "value from ARGV"
}
}
/**
* An argument in a call to `YAML.unsafe_*` and `YAML.load_stream` , considered a sink
* for unsafe deserialization. The `YAML` module is an alias of `Psych` in
* recent versions of Ruby.
*/
class YamlLoadArgument extends Sink {
YamlLoadArgument() {
// Note: this is safe in psych/yaml >= 4.0.0.
this = yamlLibrary().getAMethodCall("load").getArgument(0)
or
this =
yamlLibrary()
.getAMethodCall(["unsafe_load_file", "unsafe_load", "load_stream"])
.getArgument(0)
or
this = yamlLibrary().getAMethodCall(["unsafe_load", "load_stream"]).getKeywordArgument("yaml")
or
this = yamlLibrary().getAMethodCall("unsafe_load_file").getKeywordArgument("filename")
}
}
/**
* An argument in a call to `YAML.parse*`, considered a sink for unsafe deserialization
* if there is a call to `to_ruby` on the returned value of any Successor.
*/
class YamlParseArgument extends Sink {
YamlParseArgument() {
this =
yamlParseNode(yamlLibrary().getMethod(["parse", "parse_stream", "parse_file"]))
.getMethod(["to_ruby", "transform"])
.getReturn()
.asSource()
}
}
}

View File

@@ -0,0 +1,34 @@
/**
* Provides modeling for the `YAML` and `Psych` libraries.
*/
private import codeql.ruby.dataflow.FlowSteps
private import codeql.ruby.DataFlow
private import codeql.ruby.ApiGraphs
/**
* Gets A Node ends with YAML parse, parse_stream, parse_file methods
*/
API::Node yamlParseNode(API::Node yamlParseInstance) {
result = yamlParseInstance
or
result = yamlParseNode(yamlParseInstance).getReturn()
or
result = yamlParseNode(yamlParseInstance).getBlock()
or
result = yamlParseNode(yamlParseInstance).getAnElement()
or
result = yamlParseNode(yamlParseInstance).getParameter(_)
or
result = yamlParseNode(yamlParseInstance).getMethod(_)
or
result = yamlParseNode(yamlParseInstance).getMember(_)
or
result = yamlParseNode(yamlParseInstance).getArgument(_)
}
/**
* Gets A YAML module instance
*/
API::Node yamlLibrary() { result = API::getTopLevelMember(["YAML", "Psych"]) }

View File

@@ -0,0 +1,16 @@
require 'yaml'
class UserController < ActionController::Base
def yaml_example
object = YAML.unsafe_load params[:yaml]
object = YAML.load_stream params[:yaml]
parsed_yaml = Psych.parse_stream(params[:yaml])
# to_ruby is unsafe
parsed_yaml.children.each do |child|
object = child.to_ruby
end
object = Psych.parse(params[:yaml]).to_ruby
# ...
end
end

View File

@@ -0,0 +1,10 @@
require 'yaml'
class UserController < ActionController::Base
def safe_yaml_example
object = YAML.load params[:yaml]
object = Psych.load_file params[:yaml]
object = YAML.safe_load params[:yaml]
# ...
end
end