JS: add query js/incomplete-url-regexp

This commit is contained in:
Esben Sparre Andreasen
2018-12-05 14:22:34 +01:00
parent a4b3b1e8c8
commit 52ca696ff4
7 changed files with 221 additions and 0 deletions

View File

@@ -0,0 +1,69 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Sanitizing untrusted URLs is an important technique for
preventing attacks such as request forgeries and malicious
redirections. Usually, this is done by checking that the host of a URL
is in a set of allowed hosts.
</p>
<p>
If a regular expression implements such a check, it is
easy to accidentally make the check too permissive by not escaping the
<code>.</code> meta-characters appropriately.
Even if the check is not used in a security-critical
context, the incomplete check may still cause undesirable behaviors
when the check succeeds accidentally.
</p>
</overview>
<recommendation>
<p>
Escape all meta-characters appropriately when constructing
regular expressions for security checks, pay special attention to the
<code>.</code> meta-character.
</p>
</recommendation>
<example>
<p>
The following example code checks that a URL redirection
will reach the <code>example.com</code> domain, or one of its
subdomains.
</p>
<sample src="examples/IncompleteUrlRegExp.js"/>
<p>
The check is however easy to bypass because the unescaped
<code>.</code> allows for any character before
<code>example.com</code>, effectively allowing the redirect to go to
an attacker-controlled domain such as <code>wwwXexample.com</code>.
Address this vulnerability by escaping <code>.</code>
appropriately: <code>let regex =/(www|beta|)\.example\.com/</code>.
</p>
</example>
<references>
<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
<li>OWASP: <a href="https://www.owasp.org/index.php/Unvalidated_Redirects_and_Forwards_Cheat_Sheet">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,70 @@
/**
* @name Incomplete URL regular expression
* @description Security checks on URLs using regular expressions are sometimes vulnerable to bypassing.
* @kind problem
* @problem.severity error
* @precision high
* @id js/incomplete-url-regexp
* @tags correctness
* security
* external/cwe/cwe-20
*/
import javascript
import semmle.javascript.security.dataflow.RegExpInjection
module IncompleteUrlRegExpTracking {
/**
* A taint tracking configuration for incomplete URL regular expressions sources.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "IncompleteUrlRegExpTracking" }
override
predicate isSource(DataFlow::Node source) {
isIncompleteHostNameRegExpPattern(source.asExpr().(ConstantString).getStringValue(), _)
}
override
predicate isSink(DataFlow::Node sink) {
sink instanceof RegExpInjection::Sink
}
}
}
/**
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
* and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
*/
bindingset[pattern]
predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
hostPart = pattern.regexpCapture(
"(?i).*" +
// Either:
// - an unescaped and repeated `.`, followed by anything
// - a unescaped single `.`
"(?:(?<!\\\\)[.][+*].*?|(?<!\\\\)[.])" +
// a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
"([():|?a-z0-9-]+(\\\\)?[.](com|org|edu|gov|uk|net))" +
".*", 1)
}
from Expr e, string pattern, string intendedHost
where
(
e.(RegExpLiteral).getValue() = pattern or
exists (IncompleteUrlRegExpTracking::Configuration cfg |
cfg.hasFlow(e.flow(), _) and
e.mayHaveStringValue(pattern)
)
) and
isIncompleteHostNameRegExpPattern(pattern, intendedHost)
and
// ignore patterns with capture groups after the TLD
not pattern.regexpMatch("(?i).*[.](com|org|edu|gov|uk|net).*[(][?]:.*[)].*")
select e, "This regular expression has an unescaped '.', which means that '" + intendedHost + "' might not match the intended host of a matched URL."

View File

@@ -0,0 +1,9 @@
app.get('/some/path', function(req, res) {
let url = req.param('url'),
host = urlLib.parse(url).host;
// BAD: the host of `url` may be controlled by an attacker
let regex = /(www|beta|).example.com/;
if (host.match(regex)) {
res.redirect(url);
}
});