Merge pull request #9896 from github/nickrolfe/hardcoded_code

Ruby: port js/hardcoded-data-interpreted-as-code
This commit is contained in:
Nick Rolfe
2022-08-26 13:49:25 +01:00
committed by GitHub
9 changed files with 287 additions and 0 deletions

View File

@@ -0,0 +1,111 @@
/**
* Provides default sources, sinks and sanitizers for reasoning about hard-coded
* data being interpreted as code, as well as extension points for adding your
* own.
*/
private import codeql.ruby.DataFlow
private import codeql.ruby.security.CodeInjectionCustomizations
private import codeql.ruby.AST as AST
private import codeql.ruby.controlflow.CfgNodes
/**
* Provides default sources, sinks and sanitizers for reasoning about hard-coded
* data being interpreted as code, as well as extension points for adding your
* own.
*/
module HardcodedDataInterpretedAsCode {
/**
* Flow states used to distinguish value-preserving flow from taint flow.
*/
module FlowState {
/** Flow state used to track value-preserving flow. */
DataFlow::FlowState data() { result = "data" }
/** Flow state used to tainted data (non-value preserving flow). */
DataFlow::FlowState taint() { result = "taint" }
}
/**
* A data flow source for hard-coded data.
*/
abstract class Source extends DataFlow::Node {
/** Gets a flow label for which this is a source. */
DataFlow::FlowState getLabel() { result = FlowState::data() }
}
/**
* A data flow sink for code injection.
*/
abstract class Sink extends DataFlow::Node {
/** Gets a description of what kind of sink this is. */
abstract string getKind();
/** Gets a flow label for which this is a sink. */
DataFlow::FlowState getLabel() {
// We want to ignore value-flow and only consider taint-flow, since the
// source is just a hex string, and evaluating that directly will just
// cause a syntax error.
result = FlowState::taint()
}
}
/** A sanitizer for hard-coded data. */
abstract class Sanitizer extends DataFlow::Node { }
/**
* A constant string consisting of eight or more hexadecimal characters (including at
* least one digit), viewed as a source of hard-coded data that should not be
* interpreted as code.
*/
private class HexStringSource extends Source {
HexStringSource() {
exists(string val |
this.asExpr().(ExprNodes::StringLiteralCfgNode).getConstantValue().isString(val)
|
val.regexpMatch("[0-9a-fA-F]{8,}") and
val.regexpMatch(".*[0-9].*")
)
}
}
/**
* A string literal whose raw text is made up entirely of `\x` escape
* sequences, viewed as a source of hard-coded data that should not be
* interpreted as code.
*/
private class HexEscapedStringSource extends Source {
HexEscapedStringSource() {
forex(StringComponentCfgNode c |
c = this.asExpr().(ExprNodes::StringlikeLiteralCfgNode).getAComponent()
|
c.getNode().(AST::StringEscapeSequenceComponent).getRawText().matches("\\x%")
)
}
}
/**
* A code injection sink; hard-coded data should not flow here.
*/
private class DefaultCodeInjectionSink extends Sink {
DefaultCodeInjectionSink() { this instanceof CodeInjection::Sink }
override string getKind() { result = "code" }
}
/**
* An argument to `require` path; hard-coded data should not flow here.
*/
private class RequireArgumentSink extends Sink {
RequireArgumentSink() {
exists(DataFlow::CallNode require |
require.getReceiver().getExprNode().getExpr() instanceof AST::SelfVariableAccess and
require.getMethodName() = "require"
|
this = require.getArgument(0)
)
}
override string getKind() { result = "an import path" }
}
}

View File

@@ -0,0 +1,48 @@
/**
* Provides a taint-tracking configuration for reasoning about hard-coded data
* being interpreted as code.
*
* Note, for performance reasons: only import this file if
* `HardcodedDataInterpretedAsCode::Configuration` is needed, otherwise
* `HardcodedDataInterpretedAsCodeCustomizations` should be imported instead.
*/
private import codeql.ruby.DataFlow
private import codeql.ruby.TaintTracking
private import codeql.ruby.dataflow.internal.TaintTrackingPrivate
import HardcodedDataInterpretedAsCodeCustomizations::HardcodedDataInterpretedAsCode
/**
* A taint-tracking configuration for reasoning about hard-coded data
* being interpreted as code.
*
* We extend `DataFlow::Configuration` rather than
* `TaintTracking::Configuration`, so that we can set the flow state to
* `"taint"` on a taint step.
*/
class Configuration extends DataFlow::Configuration {
Configuration() { this = "HardcodedDataInterpretedAsCode" }
override predicate isSource(DataFlow::Node source, DataFlow::FlowState label) {
source.(Source).getLabel() = label
}
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState label) {
sink.(Sink).getLabel() = label
}
override predicate isBarrier(DataFlow::Node node) {
super.isBarrier(node) or
node instanceof Sanitizer
}
override predicate isAdditionalFlowStep(
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
DataFlow::FlowState stateTo
) {
defaultAdditionalTaintStep(nodeFrom, nodeTo) and
// This is a taint step, so the flow state becomes `taint`.
stateFrom = [FlowState::data(), FlowState::taint()] and
stateTo = FlowState::taint()
}
}

View File

@@ -0,0 +1,4 @@
---
category: newQuery
---
* Added a new query, `rb/hardcoded-data-interpreted-as-code`, to detect cases where hardcoded data is executed as code, a technique associated with backdoors.

View File

@@ -0,0 +1,47 @@
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
<qhelp>
<overview>
<p>
Interpreting hard-coded data (such as string literals containing hexadecimal numbers)
as code or as an import path is typical of malicious backdoor code that has been
implanted into an otherwise trusted code base and is trying to hide its true purpose
from casual readers or automated scanning tools.
</p>
</overview>
<recommendation>
<p>
Examine the code in question carefully to ascertain its provenance and its true purpose.
If the code is benign, it should always be possible to rewrite it without relying
on dynamically interpreting data as code, improving both clarity and safety.
</p>
</recommendation>
<example>
<p>
As an example of malicious code using this obfuscation technique, consider the
following simplified Ruby version of a snippet of backdoor code that was
discovered in a dependency of the popular JavaScript <code>event-stream</code>
npm package:
</p>
<sample src="examples/HardcodedDataInterpretedAsCode.rb"/>
<p>
While this shows only the first few lines of code, it already looks very suspicious
since it takes a hard-coded string literal, hex-decodes it and then uses it as an
import path. The only reason to do so is to hide the name of the file being imported.
</p>
</example>
<references>
<li>
OWASP:
<a href="https://www.owasp.org/index.php/Trojan_Horse">Trojan Horse</a>.
</li>
<li>
The npm Blog:
<a href="https://blog.npmjs.org/post/180565383195/details-about-the-event-stream-incident">Details about the event-stream incident</a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,23 @@
/**
* @name Hard-coded data interpreted as code
* @description Transforming hard-coded data (such as hexadecimal constants) into code
* to be executed is a technique often associated with backdoors and should
* be avoided.
* @kind path-problem
* @problem.severity error
* @security-severity 9.1
* @precision medium
* @id rb/hardcoded-data-interpreted-as-code
* @tags security
* external/cwe/cwe-506
*/
import codeql.ruby.security.HardcodedDataInterpretedAsCodeQuery
import codeql.ruby.DataFlow
import DataFlow::PathGraph
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"Hard-coded data from $@ is interpreted as " + sink.getNode().(Sink).getKind() + ".",
source.getNode(), "here"

View File

@@ -0,0 +1,6 @@
def e(r)
[r].pack 'H*'
end
# BAD: hexadecimal constant decoded and interpreted as import path
require e("2e2f746573742f64617461")

View File

@@ -0,0 +1,29 @@
edges
| tst.rb:1:7:1:7 | r : | tst.rb:2:4:2:4 | r : |
| tst.rb:2:4:2:4 | r : | tst.rb:2:3:2:15 | call to pack : |
| tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." : | tst.rb:7:8:7:30 | totally_harmless_string : |
| tst.rb:7:8:7:30 | totally_harmless_string : | tst.rb:1:7:1:7 | r : |
| tst.rb:7:8:7:30 | totally_harmless_string : | tst.rb:7:6:7:31 | call to e |
| tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:1:7:1:7 | r : |
| tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:10:9:10:25 | call to e |
| tst.rb:16:31:16:84 | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." : | tst.rb:17:6:17:32 | another_questionable_string : |
| tst.rb:17:6:17:32 | another_questionable_string : | tst.rb:17:6:17:38 | call to strip |
nodes
| tst.rb:1:7:1:7 | r : | semmle.label | r : |
| tst.rb:2:3:2:15 | call to pack : | semmle.label | call to pack : |
| tst.rb:2:4:2:4 | r : | semmle.label | r : |
| tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." : | semmle.label | "707574732822636f646520696e6a6..." : |
| tst.rb:7:6:7:31 | call to e | semmle.label | call to e |
| tst.rb:7:8:7:30 | totally_harmless_string : | semmle.label | totally_harmless_string : |
| tst.rb:10:9:10:25 | call to e | semmle.label | call to e |
| tst.rb:10:11:10:24 | "666f6f626172" : | semmle.label | "666f6f626172" : |
| tst.rb:16:31:16:84 | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." : | semmle.label | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." : |
| tst.rb:17:6:17:32 | another_questionable_string : | semmle.label | another_questionable_string : |
| tst.rb:17:6:17:38 | call to strip | semmle.label | call to strip |
subpaths
| tst.rb:7:8:7:30 | totally_harmless_string : | tst.rb:1:7:1:7 | r : | tst.rb:2:3:2:15 | call to pack : | tst.rb:7:6:7:31 | call to e |
| tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:1:7:1:7 | r : | tst.rb:2:3:2:15 | call to pack : | tst.rb:10:9:10:25 | call to e |
#select
| tst.rb:7:6:7:31 | call to e | tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." : | tst.rb:7:6:7:31 | call to e | Hard-coded data from $@ is interpreted as code. | tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." | here |
| tst.rb:10:9:10:25 | call to e | tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:10:9:10:25 | call to e | Hard-coded data from $@ is interpreted as an import path. | tst.rb:10:11:10:24 | "666f6f626172" | here |
| tst.rb:17:6:17:38 | call to strip | tst.rb:16:31:16:84 | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." : | tst.rb:17:6:17:38 | call to strip | Hard-coded data from $@ is interpreted as code. | tst.rb:16:31:16:84 | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." | here |

View File

@@ -0,0 +1 @@
queries/security/cwe-506/HardcodedDataInterpretedAsCode.ql

View File

@@ -0,0 +1,18 @@
def e(r)
[r].pack 'H*'
end
totally_harmless_string = '707574732822636f646520696e6a656374696f6e2229'
eval(e(totally_harmless_string)) # NOT OK: eval("puts('hello'")
eval(totally_harmless_string) # OK: throws parse error
require e('666f6f626172') # NOT OK: require 'foobar'
require '666f6f626172' # OK: no taint step between source and sink
x = 'deadbeef'
require e(x) # OK: doesn't meet our criteria for being a source
another_questionable_string = "\x70\x75\x74\x73\x28\x27\x68\x65\x6C\x6C\x6F\x27\x29"
eval(another_questionable_string.strip) # NOT OK: eval("puts('hello'")
eval(another_questionable_string) # OK: no taint step between source and sink