mirror of
https://github.com/github/codeql.git
synced 2026-04-28 02:05:14 +02:00
Merge pull request #9896 from github/nickrolfe/hardcoded_code
Ruby: port js/hardcoded-data-interpreted-as-code
This commit is contained in:
@@ -0,0 +1,111 @@
|
||||
/**
|
||||
* Provides default sources, sinks and sanitizers for reasoning about hard-coded
|
||||
* data being interpreted as code, as well as extension points for adding your
|
||||
* own.
|
||||
*/
|
||||
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.security.CodeInjectionCustomizations
|
||||
private import codeql.ruby.AST as AST
|
||||
private import codeql.ruby.controlflow.CfgNodes
|
||||
|
||||
/**
|
||||
* Provides default sources, sinks and sanitizers for reasoning about hard-coded
|
||||
* data being interpreted as code, as well as extension points for adding your
|
||||
* own.
|
||||
*/
|
||||
module HardcodedDataInterpretedAsCode {
|
||||
/**
|
||||
* Flow states used to distinguish value-preserving flow from taint flow.
|
||||
*/
|
||||
module FlowState {
|
||||
/** Flow state used to track value-preserving flow. */
|
||||
DataFlow::FlowState data() { result = "data" }
|
||||
|
||||
/** Flow state used to tainted data (non-value preserving flow). */
|
||||
DataFlow::FlowState taint() { result = "taint" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow source for hard-coded data.
|
||||
*/
|
||||
abstract class Source extends DataFlow::Node {
|
||||
/** Gets a flow label for which this is a source. */
|
||||
DataFlow::FlowState getLabel() { result = FlowState::data() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A data flow sink for code injection.
|
||||
*/
|
||||
abstract class Sink extends DataFlow::Node {
|
||||
/** Gets a description of what kind of sink this is. */
|
||||
abstract string getKind();
|
||||
|
||||
/** Gets a flow label for which this is a sink. */
|
||||
DataFlow::FlowState getLabel() {
|
||||
// We want to ignore value-flow and only consider taint-flow, since the
|
||||
// source is just a hex string, and evaluating that directly will just
|
||||
// cause a syntax error.
|
||||
result = FlowState::taint()
|
||||
}
|
||||
}
|
||||
|
||||
/** A sanitizer for hard-coded data. */
|
||||
abstract class Sanitizer extends DataFlow::Node { }
|
||||
|
||||
/**
|
||||
* A constant string consisting of eight or more hexadecimal characters (including at
|
||||
* least one digit), viewed as a source of hard-coded data that should not be
|
||||
* interpreted as code.
|
||||
*/
|
||||
private class HexStringSource extends Source {
|
||||
HexStringSource() {
|
||||
exists(string val |
|
||||
this.asExpr().(ExprNodes::StringLiteralCfgNode).getConstantValue().isString(val)
|
||||
|
|
||||
val.regexpMatch("[0-9a-fA-F]{8,}") and
|
||||
val.regexpMatch(".*[0-9].*")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A string literal whose raw text is made up entirely of `\x` escape
|
||||
* sequences, viewed as a source of hard-coded data that should not be
|
||||
* interpreted as code.
|
||||
*/
|
||||
private class HexEscapedStringSource extends Source {
|
||||
HexEscapedStringSource() {
|
||||
forex(StringComponentCfgNode c |
|
||||
c = this.asExpr().(ExprNodes::StringlikeLiteralCfgNode).getAComponent()
|
||||
|
|
||||
c.getNode().(AST::StringEscapeSequenceComponent).getRawText().matches("\\x%")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A code injection sink; hard-coded data should not flow here.
|
||||
*/
|
||||
private class DefaultCodeInjectionSink extends Sink {
|
||||
DefaultCodeInjectionSink() { this instanceof CodeInjection::Sink }
|
||||
|
||||
override string getKind() { result = "code" }
|
||||
}
|
||||
|
||||
/**
|
||||
* An argument to `require` path; hard-coded data should not flow here.
|
||||
*/
|
||||
private class RequireArgumentSink extends Sink {
|
||||
RequireArgumentSink() {
|
||||
exists(DataFlow::CallNode require |
|
||||
require.getReceiver().getExprNode().getExpr() instanceof AST::SelfVariableAccess and
|
||||
require.getMethodName() = "require"
|
||||
|
|
||||
this = require.getArgument(0)
|
||||
)
|
||||
}
|
||||
|
||||
override string getKind() { result = "an import path" }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
/**
|
||||
* Provides a taint-tracking configuration for reasoning about hard-coded data
|
||||
* being interpreted as code.
|
||||
*
|
||||
* Note, for performance reasons: only import this file if
|
||||
* `HardcodedDataInterpretedAsCode::Configuration` is needed, otherwise
|
||||
* `HardcodedDataInterpretedAsCodeCustomizations` should be imported instead.
|
||||
*/
|
||||
|
||||
private import codeql.ruby.DataFlow
|
||||
private import codeql.ruby.TaintTracking
|
||||
private import codeql.ruby.dataflow.internal.TaintTrackingPrivate
|
||||
import HardcodedDataInterpretedAsCodeCustomizations::HardcodedDataInterpretedAsCode
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration for reasoning about hard-coded data
|
||||
* being interpreted as code.
|
||||
*
|
||||
* We extend `DataFlow::Configuration` rather than
|
||||
* `TaintTracking::Configuration`, so that we can set the flow state to
|
||||
* `"taint"` on a taint step.
|
||||
*/
|
||||
class Configuration extends DataFlow::Configuration {
|
||||
Configuration() { this = "HardcodedDataInterpretedAsCode" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source, DataFlow::FlowState label) {
|
||||
source.(Source).getLabel() = label
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState label) {
|
||||
sink.(Sink).getLabel() = label
|
||||
}
|
||||
|
||||
override predicate isBarrier(DataFlow::Node node) {
|
||||
super.isBarrier(node) or
|
||||
node instanceof Sanitizer
|
||||
}
|
||||
|
||||
override predicate isAdditionalFlowStep(
|
||||
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
|
||||
DataFlow::FlowState stateTo
|
||||
) {
|
||||
defaultAdditionalTaintStep(nodeFrom, nodeTo) and
|
||||
// This is a taint step, so the flow state becomes `taint`.
|
||||
stateFrom = [FlowState::data(), FlowState::taint()] and
|
||||
stateTo = FlowState::taint()
|
||||
}
|
||||
}
|
||||
4
ruby/ql/src/change-notes/2022-07-26-hardcoded-data.md
Normal file
4
ruby/ql/src/change-notes/2022-07-26-hardcoded-data.md
Normal file
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: newQuery
|
||||
---
|
||||
* Added a new query, `rb/hardcoded-data-interpreted-as-code`, to detect cases where hardcoded data is executed as code, a technique associated with backdoors.
|
||||
@@ -0,0 +1,47 @@
|
||||
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
Interpreting hard-coded data (such as string literals containing hexadecimal numbers)
|
||||
as code or as an import path is typical of malicious backdoor code that has been
|
||||
implanted into an otherwise trusted code base and is trying to hide its true purpose
|
||||
from casual readers or automated scanning tools.
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>
|
||||
Examine the code in question carefully to ascertain its provenance and its true purpose.
|
||||
If the code is benign, it should always be possible to rewrite it without relying
|
||||
on dynamically interpreting data as code, improving both clarity and safety.
|
||||
</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>
|
||||
As an example of malicious code using this obfuscation technique, consider the
|
||||
following simplified Ruby version of a snippet of backdoor code that was
|
||||
discovered in a dependency of the popular JavaScript <code>event-stream</code>
|
||||
npm package:
|
||||
</p>
|
||||
<sample src="examples/HardcodedDataInterpretedAsCode.rb"/>
|
||||
<p>
|
||||
While this shows only the first few lines of code, it already looks very suspicious
|
||||
since it takes a hard-coded string literal, hex-decodes it and then uses it as an
|
||||
import path. The only reason to do so is to hide the name of the file being imported.
|
||||
</p>
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>
|
||||
OWASP:
|
||||
<a href="https://www.owasp.org/index.php/Trojan_Horse">Trojan Horse</a>.
|
||||
</li>
|
||||
<li>
|
||||
The npm Blog:
|
||||
<a href="https://blog.npmjs.org/post/180565383195/details-about-the-event-stream-incident">Details about the event-stream incident</a>.
|
||||
</li>
|
||||
</references>
|
||||
|
||||
</qhelp>
|
||||
@@ -0,0 +1,23 @@
|
||||
/**
|
||||
* @name Hard-coded data interpreted as code
|
||||
* @description Transforming hard-coded data (such as hexadecimal constants) into code
|
||||
* to be executed is a technique often associated with backdoors and should
|
||||
* be avoided.
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 9.1
|
||||
* @precision medium
|
||||
* @id rb/hardcoded-data-interpreted-as-code
|
||||
* @tags security
|
||||
* external/cwe/cwe-506
|
||||
*/
|
||||
|
||||
import codeql.ruby.security.HardcodedDataInterpretedAsCodeQuery
|
||||
import codeql.ruby.DataFlow
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where cfg.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink,
|
||||
"Hard-coded data from $@ is interpreted as " + sink.getNode().(Sink).getKind() + ".",
|
||||
source.getNode(), "here"
|
||||
@@ -0,0 +1,6 @@
|
||||
def e(r)
|
||||
[r].pack 'H*'
|
||||
end
|
||||
|
||||
# BAD: hexadecimal constant decoded and interpreted as import path
|
||||
require e("2e2f746573742f64617461")
|
||||
@@ -0,0 +1,29 @@
|
||||
edges
|
||||
| tst.rb:1:7:1:7 | r : | tst.rb:2:4:2:4 | r : |
|
||||
| tst.rb:2:4:2:4 | r : | tst.rb:2:3:2:15 | call to pack : |
|
||||
| tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." : | tst.rb:7:8:7:30 | totally_harmless_string : |
|
||||
| tst.rb:7:8:7:30 | totally_harmless_string : | tst.rb:1:7:1:7 | r : |
|
||||
| tst.rb:7:8:7:30 | totally_harmless_string : | tst.rb:7:6:7:31 | call to e |
|
||||
| tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:1:7:1:7 | r : |
|
||||
| tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:10:9:10:25 | call to e |
|
||||
| tst.rb:16:31:16:84 | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." : | tst.rb:17:6:17:32 | another_questionable_string : |
|
||||
| tst.rb:17:6:17:32 | another_questionable_string : | tst.rb:17:6:17:38 | call to strip |
|
||||
nodes
|
||||
| tst.rb:1:7:1:7 | r : | semmle.label | r : |
|
||||
| tst.rb:2:3:2:15 | call to pack : | semmle.label | call to pack : |
|
||||
| tst.rb:2:4:2:4 | r : | semmle.label | r : |
|
||||
| tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." : | semmle.label | "707574732822636f646520696e6a6..." : |
|
||||
| tst.rb:7:6:7:31 | call to e | semmle.label | call to e |
|
||||
| tst.rb:7:8:7:30 | totally_harmless_string : | semmle.label | totally_harmless_string : |
|
||||
| tst.rb:10:9:10:25 | call to e | semmle.label | call to e |
|
||||
| tst.rb:10:11:10:24 | "666f6f626172" : | semmle.label | "666f6f626172" : |
|
||||
| tst.rb:16:31:16:84 | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." : | semmle.label | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." : |
|
||||
| tst.rb:17:6:17:32 | another_questionable_string : | semmle.label | another_questionable_string : |
|
||||
| tst.rb:17:6:17:38 | call to strip | semmle.label | call to strip |
|
||||
subpaths
|
||||
| tst.rb:7:8:7:30 | totally_harmless_string : | tst.rb:1:7:1:7 | r : | tst.rb:2:3:2:15 | call to pack : | tst.rb:7:6:7:31 | call to e |
|
||||
| tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:1:7:1:7 | r : | tst.rb:2:3:2:15 | call to pack : | tst.rb:10:9:10:25 | call to e |
|
||||
#select
|
||||
| tst.rb:7:6:7:31 | call to e | tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." : | tst.rb:7:6:7:31 | call to e | Hard-coded data from $@ is interpreted as code. | tst.rb:5:27:5:72 | "707574732822636f646520696e6a6..." | here |
|
||||
| tst.rb:10:9:10:25 | call to e | tst.rb:10:11:10:24 | "666f6f626172" : | tst.rb:10:9:10:25 | call to e | Hard-coded data from $@ is interpreted as an import path. | tst.rb:10:11:10:24 | "666f6f626172" | here |
|
||||
| tst.rb:17:6:17:38 | call to strip | tst.rb:16:31:16:84 | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." : | tst.rb:17:6:17:38 | call to strip | Hard-coded data from $@ is interpreted as code. | tst.rb:16:31:16:84 | "\\x70\\x75\\x74\\x73\\x28\\x27\\x68\\..." | here |
|
||||
@@ -0,0 +1 @@
|
||||
queries/security/cwe-506/HardcodedDataInterpretedAsCode.ql
|
||||
18
ruby/ql/test/query-tests/security/cwe-506/tst.rb
Normal file
18
ruby/ql/test/query-tests/security/cwe-506/tst.rb
Normal file
@@ -0,0 +1,18 @@
|
||||
def e(r)
|
||||
[r].pack 'H*'
|
||||
end
|
||||
|
||||
totally_harmless_string = '707574732822636f646520696e6a656374696f6e2229'
|
||||
|
||||
eval(e(totally_harmless_string)) # NOT OK: eval("puts('hello'")
|
||||
eval(totally_harmless_string) # OK: throws parse error
|
||||
|
||||
require e('666f6f626172') # NOT OK: require 'foobar'
|
||||
require '666f6f626172' # OK: no taint step between source and sink
|
||||
|
||||
x = 'deadbeef'
|
||||
require e(x) # OK: doesn't meet our criteria for being a source
|
||||
|
||||
another_questionable_string = "\x70\x75\x74\x73\x28\x27\x68\x65\x6C\x6C\x6F\x27\x29"
|
||||
eval(another_questionable_string.strip) # NOT OK: eval("puts('hello'")
|
||||
eval(another_questionable_string) # OK: no taint step between source and sink
|
||||
Reference in New Issue
Block a user