mirror of
https://github.com/github/codeql.git
synced 2026-04-24 16:25:15 +02:00
revert YAML.qll and yaml sinks to previous PR, make a separate experimental query only for yaml
This commit is contained in:
@@ -0,0 +1,62 @@
|
||||
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
Deserializing untrusted data using any method that allows the construction of
|
||||
arbitrary objects is easily exploitable and, in many cases, allows an attacker
|
||||
to execute arbitrary code.
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
|
||||
<p>
|
||||
If deserializing an untrusted YAML document using the <code>psych</code> gem,
|
||||
prefer the <code>safe_load</code> and <code>safe_load_file</code> methods over
|
||||
<code>load</code> and <code>load_file</code>, as the former will safely
|
||||
handle untrusted data. Avoid passing untrusted data to the <code>load_stream</code>
|
||||
method. In <code>psych</code> version 4.0.0 and above, the <code>load</code> method can
|
||||
safely be used.
|
||||
</p>
|
||||
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>
|
||||
The following example calls the <code>Marshal.load</code>,
|
||||
<code>JSON.load</code>, <code>YAML.load</code>, and <code>Oj.load</code> methods
|
||||
on data from an HTTP request. Since these methods are capable of deserializing
|
||||
to arbitrary objects, this is inherently unsafe.
|
||||
</p>
|
||||
<sample src="examples/UnsafeDeserializationBad.rb"/>
|
||||
|
||||
<p>
|
||||
Using <code>JSON.parse</code> and <code>YAML.safe_load</code> instead, as in the
|
||||
following example, removes the vulnerability. Similarly, calling
|
||||
<code>Oj.load</code> with any mode other than <code>:object</code> is safe, as
|
||||
is calling <code>Oj.safe_load</code>. Note that there is no safe way to deserialize
|
||||
untrusted data using <code>Marshal</code>.
|
||||
</p>
|
||||
<sample src="examples/UnsafeDeserializationGood.rb"/>
|
||||
</example>
|
||||
|
||||
<references>
|
||||
|
||||
<li>
|
||||
OWASP vulnerability description:
|
||||
<a href="https://www.owasp.org/index.php/Deserialization_of_untrusted_data">deserialization of untrusted data</a>.
|
||||
</li>
|
||||
<li>
|
||||
Ruby documentation: <a href="https://docs.ruby-lang.org/en/3.0.0/doc/security_rdoc.html">guidance on deserializing objects safely</a>.
|
||||
</li>
|
||||
<li>
|
||||
Ruby documentation: <a href="https://ruby-doc.org/stdlib-3.0.2/libdoc/yaml/rdoc/YAML.html#module-YAML-label-Security">security guidance on the YAML library</a>.
|
||||
</li>
|
||||
<li>
|
||||
You can read that how unsafe yaml load methods can lead to code executions:
|
||||
<a href="https://devcraft.io/2021/01/07/universal-deserialisation-gadget-for-ruby-2-x-3-x.html">Universal Deserialisation Gadget for Ruby 2.x-3.x </a>.
|
||||
</li>
|
||||
</references>
|
||||
|
||||
</qhelp>
|
||||
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* @name Deserialization of user-controlled yaml data
|
||||
* @description Deserializing user-controlled yaml data may allow attackers to
|
||||
* execute arbitrary code.
|
||||
* @kind path-problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 9.8
|
||||
* @precision high
|
||||
* @id rb/unsafe-unsafeyamldeserialization
|
||||
* @tags security
|
||||
* external/cwe/cwe-502
|
||||
*/
|
||||
|
||||
import ruby
|
||||
import codeql.ruby.security.UnsafeDeserializationQuery
|
||||
import UnsafeCodeConstructionFlow::PathGraph
|
||||
|
||||
from UnsafeCodeConstructionFlow::PathNode source, UnsafeCodeConstructionFlow::PathNode sink
|
||||
where UnsafeCodeConstructionFlow::flowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "Unsafe deserialization depends on a $@.", source.getNode(),
|
||||
source.getNode().(UnsafeDeserialization::Source).describe()
|
||||
@@ -0,0 +1,83 @@
|
||||
/**
|
||||
* Provides a taint-tracking configuration for reasoning about unsafe deserialization.
|
||||
*
|
||||
* Note, for performance reasons: only import this file if
|
||||
* `UnsafeYamlDeserializationFlow` is needed, otherwise
|
||||
* `UnsafeYamlDeserializationCustomizations` should be imported instead.
|
||||
*/
|
||||
|
||||
private import codeql.ruby.AST
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.TaintTracking
|
||||
private import codeql.ruby.ApiGraphs
|
||||
import UnsafeYamlDeserializationCustomizations::UnsafeYamlDeserialization
|
||||
import Yaml
|
||||
|
||||
private module UnsafeYamlDeserializationConfig implements DataFlow::StateConfigSig {
|
||||
class FlowState = FlowState::State;
|
||||
|
||||
predicate isSource(DataFlow::Node source, FlowState state) {
|
||||
source instanceof Source and
|
||||
(state instanceof FlowState::Parse or state instanceof FlowState::Load)
|
||||
}
|
||||
|
||||
predicate isSink(DataFlow::Node sink, FlowState state) {
|
||||
sink instanceof Sink and
|
||||
(state instanceof FlowState::Parse or state instanceof FlowState::Load)
|
||||
}
|
||||
|
||||
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
|
||||
|
||||
/**
|
||||
* A taint step related to the result of `YAML.parse` calls, or similar.
|
||||
* In the following example, this step will propagate taint from
|
||||
* `source` to `sink`:
|
||||
* this contains two seperate steps:
|
||||
* ```rb
|
||||
* x = source
|
||||
* sink = YAML.parse(x)
|
||||
* ```
|
||||
* By second step
|
||||
* source is a Successor of `YAML.parse(x)`
|
||||
* which ends with `to_ruby` or an Element of `to_ruby`
|
||||
* ```ruby
|
||||
* sink source.to_ruby # Unsafe call
|
||||
* ```
|
||||
*/
|
||||
predicate isAdditionalFlowStep(
|
||||
DataFlow::Node pred, FlowState stateFrom, DataFlow::Node succ, FlowState stateTo
|
||||
) {
|
||||
(
|
||||
exists(API::Node parseSuccessors, API::Node parseMethod |
|
||||
parseMethod = yamlLibrary().getMethod(["parse", "parse_stream", "parse_file"]) and
|
||||
parseSuccessors = yamlParseNode(parseMethod)
|
||||
|
|
||||
succ = parseSuccessors.getMethod("to_ruby").getReturn().asSource() and
|
||||
pred = parseMethod.getArgument(0).asSink()
|
||||
)
|
||||
or
|
||||
exists(API::Node parseMethod |
|
||||
parseMethod = yamlLibrary().getMethod(["parse", "parse_stream", "parse_file"])
|
||||
|
|
||||
succ = parseMethod.getReturn().asSource() and
|
||||
pred = parseMethod.getArgument(0).asSink()
|
||||
)
|
||||
) and
|
||||
stateFrom instanceof FlowState::Parse and
|
||||
stateTo instanceof FlowState::Parse
|
||||
}
|
||||
}
|
||||
|
||||
predicate isAdditionalFlowStepTest(DataFlow::Node pred, DataFlow::Node succ) {
|
||||
exists(API::Node parseMethod |
|
||||
parseMethod = yamlLibrary().getMethod(["parse", "parse_stream", "parse_file"])
|
||||
|
|
||||
succ = parseMethod.getReturn().asSource() and
|
||||
pred = parseMethod.getArgument(0).asSink()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Taint-tracking for reasoning about unsafe deserialization.
|
||||
*/
|
||||
module UnsafeCodeConstructionFlow = TaintTracking::GlobalWithState<UnsafeYamlDeserializationConfig>;
|
||||
@@ -0,0 +1,136 @@
|
||||
/**
|
||||
* Provides default sources, sinks and sanitizers for reasoning about unsafe
|
||||
* deserialization, as well as extension points for adding your own.
|
||||
*/
|
||||
|
||||
private import codeql.ruby.AST
|
||||
private import codeql.ruby.ApiGraphs
|
||||
private import codeql.ruby.CFG
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.dataflow.RemoteFlowSources
|
||||
private import codeql.ruby.frameworks.ActiveJob
|
||||
private import codeql.ruby.frameworks.core.Module
|
||||
private import codeql.ruby.frameworks.core.Kernel
|
||||
private import Yaml
|
||||
|
||||
module UnsafeYamlDeserialization {
|
||||
/** Flow states used to distinguish whether we are using a yaml parse node or a yaml load node. */
|
||||
module FlowState {
|
||||
private newtype TState =
|
||||
TParse() or
|
||||
TLoad()
|
||||
|
||||
/** A flow state used to distinguish whether we have a middle node that use `YAML.load*` or `YAML.parse*` */
|
||||
class State extends TState {
|
||||
/**
|
||||
* Gets a string representation of this state.
|
||||
*/
|
||||
string toString() { result = this.getStringRepresentation() }
|
||||
|
||||
/**
|
||||
* Gets a canonical string representation of this state.
|
||||
*/
|
||||
string getStringRepresentation() {
|
||||
this = TParse() and result = "parse"
|
||||
or
|
||||
this = TLoad() and result = "load"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A flow state used for `YAML.parse*` methods.
|
||||
*/
|
||||
class Parse extends State, TParse { }
|
||||
|
||||
/**
|
||||
* A flow state used for `YAML.load*` methods.
|
||||
*/
|
||||
class Load extends State, TLoad { }
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow source for unsafe deserialization vulnerabilities.
|
||||
*/
|
||||
abstract class Source extends DataFlow::Node {
|
||||
/** Gets a string that describes the source. */
|
||||
string describe() { result = "user-provided value" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow sink for unsafe deserialization vulnerabilities.
|
||||
*/
|
||||
abstract class Sink extends DataFlow::Node { }
|
||||
|
||||
/**
|
||||
* A sanitizer for unsafe deserialization vulnerabilities.
|
||||
*/
|
||||
abstract class Sanitizer extends DataFlow::Node { }
|
||||
|
||||
/** A source of remote user input, considered as a flow source for unsafe deserialization. */
|
||||
class RemoteFlowSourceAsSource extends Source instanceof RemoteFlowSource { }
|
||||
|
||||
/** A read of data from `STDIN`/`ARGV`, considered as a flow source for unsafe deserialization. */
|
||||
class StdInSource extends UnsafeYamlDeserialization::Source {
|
||||
boolean stdin;
|
||||
|
||||
StdInSource() {
|
||||
this = API::getTopLevelMember(["STDIN", "ARGF"]).getAMethodCall(["gets", "read"]) and
|
||||
stdin = true
|
||||
or
|
||||
// > $stdin == STDIN
|
||||
// => true
|
||||
// but $stdin is special in that it is a global variable and not a constant. `API::getTopLevelMember` only gets constants.
|
||||
exists(DataFlow::Node dollarStdin |
|
||||
dollarStdin.asExpr().getExpr().(GlobalVariableReadAccess).getVariable().getName() = "$stdin" and
|
||||
this = dollarStdin.getALocalSource().getAMethodCall(["gets", "read"])
|
||||
) and
|
||||
stdin = true
|
||||
or
|
||||
// ARGV.
|
||||
this.asExpr().getExpr().(GlobalVariableReadAccess).getVariable().getName() = "ARGV" and
|
||||
stdin = false
|
||||
or
|
||||
this.(Kernel::KernelMethodCall).getMethodName() = ["gets", "readline", "readlines"] and
|
||||
stdin = true
|
||||
}
|
||||
|
||||
override string describe() {
|
||||
if stdin = true then result = "value from stdin" else result = "value from ARGV"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An argument in a call to `YAML.unsafe_*` and `YAML.load_stream` , considered a sink
|
||||
* for unsafe deserialization. The `YAML` module is an alias of `Psych` in
|
||||
* recent versions of Ruby.
|
||||
*/
|
||||
class YamlLoadArgument extends Sink {
|
||||
YamlLoadArgument() {
|
||||
// Note: this is safe in psych/yaml >= 4.0.0.
|
||||
this = yamlLibrary().getAMethodCall("load").getArgument(0)
|
||||
or
|
||||
this =
|
||||
yamlLibrary()
|
||||
.getAMethodCall(["unsafe_load_file", "unsafe_load", "load_stream"])
|
||||
.getArgument(0)
|
||||
or
|
||||
this = yamlLibrary().getAMethodCall(["unsafe_load", "load_stream"]).getKeywordArgument("yaml")
|
||||
or
|
||||
this = yamlLibrary().getAMethodCall("unsafe_load_file").getKeywordArgument("filename")
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An argument in a call to `YAML.parse*`, considered a sink for unsafe deserialization
|
||||
* if there is a call to `to_ruby` on the returned value of any Successor.
|
||||
*/
|
||||
class YamlParseArgument extends Sink {
|
||||
YamlParseArgument() {
|
||||
this =
|
||||
yamlParseNode(yamlLibrary().getMethod(["parse", "parse_stream", "parse_file"]))
|
||||
.getMethod(["to_ruby", "transform"])
|
||||
.getReturn()
|
||||
.asSource()
|
||||
}
|
||||
}
|
||||
}
|
||||
34
ruby/ql/src/experimental/cwe-502/Yaml.qll
Normal file
34
ruby/ql/src/experimental/cwe-502/Yaml.qll
Normal file
@@ -0,0 +1,34 @@
|
||||
/**
|
||||
* Provides modeling for the `YAML` and `Psych` libraries.
|
||||
*/
|
||||
|
||||
private import codeql.ruby.dataflow.FlowSteps
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.ApiGraphs
|
||||
|
||||
|
||||
/**
|
||||
* Gets A Node ends with YAML parse, parse_stream, parse_file methods
|
||||
*/
|
||||
API::Node yamlParseNode(API::Node yamlParseInstance) {
|
||||
result = yamlParseInstance
|
||||
or
|
||||
result = yamlParseNode(yamlParseInstance).getReturn()
|
||||
or
|
||||
result = yamlParseNode(yamlParseInstance).getBlock()
|
||||
or
|
||||
result = yamlParseNode(yamlParseInstance).getAnElement()
|
||||
or
|
||||
result = yamlParseNode(yamlParseInstance).getParameter(_)
|
||||
or
|
||||
result = yamlParseNode(yamlParseInstance).getMethod(_)
|
||||
or
|
||||
result = yamlParseNode(yamlParseInstance).getMember(_)
|
||||
or
|
||||
result = yamlParseNode(yamlParseInstance).getArgument(_)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets A YAML module instance
|
||||
*/
|
||||
API::Node yamlLibrary() { result = API::getTopLevelMember(["YAML", "Psych"]) }
|
||||
@@ -0,0 +1,16 @@
|
||||
require 'yaml'
|
||||
|
||||
class UserController < ActionController::Base
|
||||
def yaml_example
|
||||
object = YAML.unsafe_load params[:yaml]
|
||||
object = YAML.load_stream params[:yaml]
|
||||
parsed_yaml = Psych.parse_stream(params[:yaml])
|
||||
|
||||
# to_ruby is unsafe
|
||||
parsed_yaml.children.each do |child|
|
||||
object = child.to_ruby
|
||||
end
|
||||
object = Psych.parse(params[:yaml]).to_ruby
|
||||
# ...
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,10 @@
|
||||
require 'yaml'
|
||||
|
||||
class UserController < ActionController::Base
|
||||
def safe_yaml_example
|
||||
object = YAML.load params[:yaml]
|
||||
object = Psych.load_file params[:yaml]
|
||||
object = YAML.safe_load params[:yaml]
|
||||
# ...
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user