Merge remote-tracking branch 'upstream/master' into mergeback-20181217

This commit is contained in:
Jonas Jensen
2018-12-17 13:42:45 +01:00
12 changed files with 262 additions and 41 deletions

View File

@@ -15,6 +15,7 @@
| **Query** | **Tags** | **Purpose** |
|-----------------------------------------------|------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Double escaping or unescaping (`js/double-escaping`) | correctness, security, external/cwe/cwe-116 | Highlights potential double escaping or unescaping of special characters, indicating a possible violation of [CWE-116](https://cwe.mitre.org/data/definitions/116.html). Results are shown on LGTM by default. |
| Incomplete regular expression for hostnames (`js/incomplete-hostname-regexp`) | correctness, security, external/cwe/cwe-020 | Highlights hostname sanitizers that are likely to be incomplete, indicating a violation of [CWE-020](https://cwe.mitre.org/data/definitions/20.html). Results are shown on LGTM by default.|
| Incomplete URL substring sanitization | correctness, security, external/cwe/cwe-020 | Highlights URL sanitizers that are likely to be incomplete, indicating a violation of [CWE-020](https://cwe.mitre.org/data/definitions/20.html). Results shown on LGTM by default. |
| Incorrect suffix check (`js/incorrect-suffix-check`) | correctness, security, external/cwe/cwe-020 | Highlights error-prone suffix checks based on `indexOf`, indicating a potential violation of [CWE-20](https://cwe.mitre.org/data/definitions/20.html). Results are shown on LGTM by default. |
| Useless comparison test (`js/useless-comparison-test`) | correctness | Highlights code that is unreachable due to a numeric comparison that is always true or always false. Results are shown on LGTM by default. |

View File

@@ -159,17 +159,6 @@ predicate shellCommandPreface(string cmd, string flag) {
)
}
/**
* An array element. This supports multiple kinds of array syntax.
*/
private predicate arrayElement(Expr arrayLit, int idx, Expr element) {
exists (ArrayLiteral lit | lit = arrayLit |
lit.getElement(idx) = element)
or exists (MessageExpr arrayWithObjects | arrayWithObjects = arrayLit |
arrayWithObjects.getStaticTarget().getQualifiedName().matches("NSArray%::+arrayWithObjects:") and
arrayWithObjects.getArgument(idx) = element)
}
/**
* A command that is used as a command, or component of a command,
* that will be executed by a general-purpose command interpreter

View File

@@ -1,5 +1,6 @@
+ semmlecode-javascript-queries/DOM/TargetBlank.ql: /Security/CWE/CWE-200
+ semmlecode-javascript-queries/Electron/EnablingNodeIntegration.ql: /Security/CWE/CWE-094
+ semmlecode-javascript-queries/Security/CWE-020/IncompleteHostnameRegExp.ql: /Security/CWE/CWE-020
+ semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlSubstringSanitization.ql: /Security/CWE/CWE-020
+ semmlecode-javascript-queries/Security/CWE-020/IncorrectSuffixCheck.ql: /Security/CWE/CWE-020
+ semmlecode-javascript-queries/Security/CWE-022/TaintedPath.ql: /Security/CWE/CWE-022

View File

@@ -0,0 +1,72 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Sanitizing untrusted URLs is an important technique for
preventing attacks such as request forgeries and malicious
redirections. Often, this is done by checking that the host of a URL
is in a set of allowed hosts.
</p>
<p>
If a regular expression implements such a check, it is
easy to accidentally make the check too permissive by not escaping the
<code>.</code> meta-characters appropriately.
Even if the check is not used in a security-critical
context, the incomplete check may still cause undesirable behaviors
when it accidentally succeeds.
</p>
</overview>
<recommendation>
<p>
Escape all meta-characters appropriately when constructing
regular expressions for security checks, pay special attention to the
<code>.</code> meta-character.
</p>
</recommendation>
<example>
<p>
The following example code checks that a URL redirection
will reach the <code>example.com</code> domain, or one of its
subdomains.
</p>
<sample src="examples/IncompleteHostnameRegExp.js"/>
<p>
The check is however easy to bypass because the unescaped
<code>.</code> allows for any character before
<code>example.com</code>, effectively allowing the redirect to go to
an attacker-controlled domain such as <code>wwwXexample.com</code>.
</p>
<p>
Address this vulnerability by escaping <code>.</code>
appropriately: <code>let regex = /(www|beta|)\.example\.com/</code>.
</p>
</example>
<references>
<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
<li>OWASP: <a href="https://www.owasp.org/index.php/Unvalidated_Redirects_and_Forwards_Cheat_Sheet">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,64 @@
/**
* @name Incomplete regular expression for hostnames
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
* @kind problem
* @problem.severity warning
* @precision high
* @id js/incomplete-hostname-regexp
* @tags correctness
* security
* external/cwe/cwe-20
*/
import javascript
/**
* A taint tracking configuration for incomplete hostname regular expressions sources.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "IncompleteHostnameRegExpTracking" }
override
predicate isSource(DataFlow::Node source) {
isIncompleteHostNameRegExpPattern(source.asExpr().getStringValue(), _)
}
override
predicate isSink(DataFlow::Node sink) {
isInterpretedAsRegExp(sink)
}
}
/**
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
* and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
*/
bindingset[pattern]
predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
hostPart = pattern.regexpCapture(
"(?i).*" +
// an unescaped single `.`
"(?<!\\\\)[.]" +
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
"([():|?a-z0-9-]+(\\\\)?[.](" + RegExpPatterns::commonTLD() + "))" +
".*", 1)
}
from Expr e, string pattern, string hostPart
where
(
e.(RegExpLiteral).getValue() = pattern or
exists (Configuration cfg |
cfg.hasFlow(e.flow(), _) and
e.mayHaveStringValue(pattern)
)
) and
isIncompleteHostNameRegExpPattern(pattern, hostPart)
and
// ignore patterns with capture groups after the TLD
not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*")
select e, "This regular expression has an unescaped '.' before '" + hostPart + "', so it might match more hosts than expected."

View File

@@ -21,7 +21,7 @@ where
substring.mayHaveStringValue(target) and
(
// target contains a domain on a common TLD, and perhaps some other URL components
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(com|org|edu|gov|uk|net)(:[0-9]+)?/?") or
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(" + RegExpPatterns::commonTLD() + ")(:[0-9]+)?/?") or
// target is a HTTP URL to a domain on any TLD
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
) and

View File

@@ -0,0 +1,9 @@
app.get('/some/path', function(req, res) {
let url = req.param('url'),
host = urlLib.parse(url).host;
// BAD: the host of `url` may be controlled by an attacker
let regex = /(www|beta|).example.com/;
if (host.match(regex)) {
res.redirect(url);
}
});

View File

@@ -1,11 +1,12 @@
/**
* Provides classes for working with regular expression literals.
* Provides classes for working with regular expressions.
*
* Regular expressions are represented as an abstract syntax tree of regular expression
* Regular expression literals are represented as an abstract syntax tree of regular expression
* terms.
*/
import javascript
private import semmle.javascript.dataflow.InferredTypes
/**
* An element containing a regular expression term, that is, either
@@ -484,3 +485,41 @@ class RegExpParseError extends Error, @regexp_parse_error {
result = getMessage()
}
}
/**
* Holds if `source` may be interpreted as a regular expression.
*/
predicate isInterpretedAsRegExp(DataFlow::Node source) {
// The first argument to an invocation of `RegExp` (with or without `new`).
source = DataFlow::globalVarRef("RegExp").getAnInvocation().getArgument(0)
or
// The argument of a call that coerces the argument to a regular expression.
exists(MethodCallExpr mce, string methodName |
mce.getReceiver().analyze().getAType() = TTString() and
mce.getMethodName() = methodName
|
(methodName = "match" and source.asExpr() = mce.getArgument(0) and mce.getNumArgument() = 1)
or
(
methodName = "search" and
source.asExpr() = mce.getArgument(0) and
mce.getNumArgument() = 1 and
// "search" is a common method name, and so we exclude chained accesses
// because `String.prototype.search` returns a number
not exists(PropAccess p | p.getBase() = mce)
)
)
}
/**
* Provides regular expression patterns.
*/
module RegExpPatterns {
/**
* Gets a pattern that matches common top-level domain names.
*/
string commonTLD() {
// according to ranking by http://google.com/search?q=site:.<<TLD>>
result = "com|org|edu|gov|uk|net|io"
}
}

View File

@@ -4,7 +4,6 @@
*/
import javascript
private import semmle.javascript.dataflow.InferredTypes
module RegExpInjection {
/**
@@ -51,36 +50,14 @@ module RegExpInjection {
}
/**
* The first argument to an invocation of `RegExp` (with or without `new`).
* The source string of a regular expression.
*/
class RegExpObjectCreationSink extends Sink, DataFlow::ValueNode {
RegExpObjectCreationSink() {
this = DataFlow::globalVarRef("RegExp").getAnInvocation().getArgument(0)
class RegularExpressionSourceAsSink extends Sink {
RegularExpressionSourceAsSink() {
isInterpretedAsRegExp(this)
}
}
/**
* The argument of a call that coerces the argument to a regular expression.
*/
class RegExpObjectCoercionSink extends Sink {
RegExpObjectCoercionSink() {
exists (MethodCallExpr mce, string methodName |
mce.getReceiver().analyze().getAType() = TTString() and
mce.getMethodName() = methodName |
(methodName = "match" and this.asExpr() = mce.getArgument(0) and mce.getNumArgument() = 1) or
(
methodName = "search" and
this.asExpr() = mce.getArgument(0) and
mce.getNumArgument() = 1 and
// `String.prototype.search` returns a number, so exclude chained accesses
not exists(PropAccess p | p.getBase() = mce)
)
)
}
}
/**
* A call to a function whose name suggests that it escapes regular
* expression meta-characters.

View File

@@ -0,0 +1,21 @@
| tst-IncompleteHostnameRegExp.js:3:2:3:28 | /http:\\ ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:5:2:5:28 | /http:\\ ... le.net/ | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:11:13:11:37 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:12:10:12:34 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:15:22:15:46 | "http:/ ... le.com" | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:17:14:17:30 | test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:19:17:19:34 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:22:27:22:44 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:28:22:28:39 | 'test.example.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | This regular expression has an unescaped '.' before ')?example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:40:2:40:29 | /https? ... le.com/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:41:41:41:68 | '^https ... e\\.com' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:42:13:42:61 | 'http[s ... \\/(.+)' | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:44:9:44:100 | 'protos ... ernal)' | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. |
| tst-IncompleteHostnameRegExp.js:46:2:46:26 | /exampl ... le.com/ | This regular expression has an unescaped '.' before 'dev\|example.com', so it might match more hosts than expected. |

View File

@@ -0,0 +1 @@
Security/CWE-020/IncompleteHostnameRegExp.ql

View File

@@ -0,0 +1,47 @@
(function() {
/http:\/\/example.com/; // OK
/http:\/\/test.example.com/; // NOT OK
/http:\/\/test\\.example.com/; // OK
/http:\/\/test.example.net/; // NOT OK
/http:\/\/test.(example-a|example-b).com/; // NOT OK
/http:\/\/(.+)\\.example.com/; // NOT OK, but not yet supported with enough precision
/http:\/\/(\\.+)\\.example.com/; // OK
/http:\/\/(?:.+)\\.test\\.example.com/; // NOT OK, but not yet supported with enough precision
/http:\/\/test.example.com\/(?:.*)/; // OK
new RegExp("http://test.example.com"); // NOT OK
s.match("http://test.example.com"); // NOT OK
function id(e) { return e; }
new RegExp(id(id(id("http://test.example.com")))); // NOT OK
new RegExp(`test.example.com$`); // NOT OK
let hostname = 'test.example.com'; // NOT OK
new RegExp(`${hostname}$`);
let domain = { hostname: 'test.example.com' };
new RegExp(domain.hostname);
function convert(domain) {
return new RegExp(domain.hostname);
}
convert({ hostname: 'test.example.com' }); // NOT OK
let domains = [ { hostname: 'test.example.com' } ]; // NOT OK, but not yet supported
function convert(domain) {
return new RegExp(domain.hostname);
}
domains.map(d => convert(d));
/(.+\.(?:example-a|example-b)\.com)/; // NOT OK, but not yet supported with enough precision
/^(https?:)?\/\/((service|www).)?example.com(?=$|\/)/; // NOT OK
/^(http|https):\/\/www.example.com\/p\/f\//; // NOT OK
/\(http:\/\/sub.example.com\/\)/g; // NOT OK
/https?:\/\/api.example.com/; // NOT OK
new RegExp('^http://localhost:8000|' + '^https?://.+\.example\.com'); // NOT OK
new RegExp('http[s]?:\/\/?sub1\.sub2\.example\.com\/f\/(.+)'); // NOT OK
/^https:\/\/[a-z]*.example.com$/; // NOT OK
RegExp('protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)'); // NOT OK
/example.dev|example.com/; // OK, but still flagged
});