mirror of
https://github.com/github/codeql.git
synced 2026-04-29 02:35:15 +02:00
Merge remote-tracking branch 'upstream/master' into mergeback-20181217
This commit is contained in:
@@ -0,0 +1,72 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
|
||||
Sanitizing untrusted URLs is an important technique for
|
||||
preventing attacks such as request forgeries and malicious
|
||||
redirections. Often, this is done by checking that the host of a URL
|
||||
is in a set of allowed hosts.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
If a regular expression implements such a check, it is
|
||||
easy to accidentally make the check too permissive by not escaping the
|
||||
<code>.</code> meta-characters appropriately.
|
||||
|
||||
Even if the check is not used in a security-critical
|
||||
context, the incomplete check may still cause undesirable behaviors
|
||||
when it accidentally succeeds.
|
||||
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>
|
||||
|
||||
Escape all meta-characters appropriately when constructing
|
||||
regular expressions for security checks, pay special attention to the
|
||||
<code>.</code> meta-character.
|
||||
|
||||
</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
|
||||
<p>
|
||||
|
||||
The following example code checks that a URL redirection
|
||||
will reach the <code>example.com</code> domain, or one of its
|
||||
subdomains.
|
||||
|
||||
</p>
|
||||
|
||||
<sample src="examples/IncompleteHostnameRegExp.js"/>
|
||||
|
||||
<p>
|
||||
|
||||
The check is however easy to bypass because the unescaped
|
||||
<code>.</code> allows for any character before
|
||||
<code>example.com</code>, effectively allowing the redirect to go to
|
||||
an attacker-controlled domain such as <code>wwwXexample.com</code>.
|
||||
|
||||
</p>
|
||||
<p>
|
||||
|
||||
Address this vulnerability by escaping <code>.</code>
|
||||
appropriately: <code>let regex = /(www|beta|)\.example\.com/</code>.
|
||||
|
||||
</p>
|
||||
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
|
||||
<li>OWASP: <a href="https://www.owasp.org/index.php/Unvalidated_Redirects_and_Forwards_Cheat_Sheet">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,64 @@
|
||||
/**
|
||||
* @name Incomplete regular expression for hostnames
|
||||
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @precision high
|
||||
* @id js/incomplete-hostname-regexp
|
||||
* @tags correctness
|
||||
* security
|
||||
* external/cwe/cwe-20
|
||||
*/
|
||||
|
||||
import javascript
|
||||
|
||||
/**
|
||||
* A taint tracking configuration for incomplete hostname regular expressions sources.
|
||||
*/
|
||||
class Configuration extends TaintTracking::Configuration {
|
||||
Configuration() { this = "IncompleteHostnameRegExpTracking" }
|
||||
|
||||
override
|
||||
predicate isSource(DataFlow::Node source) {
|
||||
isIncompleteHostNameRegExpPattern(source.asExpr().getStringValue(), _)
|
||||
}
|
||||
|
||||
override
|
||||
predicate isSink(DataFlow::Node sink) {
|
||||
isInterpretedAsRegExp(sink)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
|
||||
* and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
|
||||
*/
|
||||
bindingset[pattern]
|
||||
predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
|
||||
hostPart = pattern.regexpCapture(
|
||||
"(?i).*" +
|
||||
// an unescaped single `.`
|
||||
"(?<!\\\\)[.]" +
|
||||
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
|
||||
"([():|?a-z0-9-]+(\\\\)?[.](" + RegExpPatterns::commonTLD() + "))" +
|
||||
".*", 1)
|
||||
}
|
||||
|
||||
from Expr e, string pattern, string hostPart
|
||||
where
|
||||
(
|
||||
e.(RegExpLiteral).getValue() = pattern or
|
||||
exists (Configuration cfg |
|
||||
cfg.hasFlow(e.flow(), _) and
|
||||
e.mayHaveStringValue(pattern)
|
||||
)
|
||||
) and
|
||||
isIncompleteHostNameRegExpPattern(pattern, hostPart)
|
||||
and
|
||||
// ignore patterns with capture groups after the TLD
|
||||
not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*")
|
||||
|
||||
|
||||
select e, "This regular expression has an unescaped '.' before '" + hostPart + "', so it might match more hosts than expected."
|
||||
@@ -21,7 +21,7 @@ where
|
||||
substring.mayHaveStringValue(target) and
|
||||
(
|
||||
// target contains a domain on a common TLD, and perhaps some other URL components
|
||||
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(com|org|edu|gov|uk|net)(:[0-9]+)?/?") or
|
||||
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(" + RegExpPatterns::commonTLD() + ")(:[0-9]+)?/?") or
|
||||
// target is a HTTP URL to a domain on any TLD
|
||||
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
|
||||
) and
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
app.get('/some/path', function(req, res) {
|
||||
let url = req.param('url'),
|
||||
host = urlLib.parse(url).host;
|
||||
// BAD: the host of `url` may be controlled by an attacker
|
||||
let regex = /(www|beta|).example.com/;
|
||||
if (host.match(regex)) {
|
||||
res.redirect(url);
|
||||
}
|
||||
});
|
||||
@@ -1,11 +1,12 @@
|
||||
/**
|
||||
* Provides classes for working with regular expression literals.
|
||||
* Provides classes for working with regular expressions.
|
||||
*
|
||||
* Regular expressions are represented as an abstract syntax tree of regular expression
|
||||
* Regular expression literals are represented as an abstract syntax tree of regular expression
|
||||
* terms.
|
||||
*/
|
||||
|
||||
import javascript
|
||||
private import semmle.javascript.dataflow.InferredTypes
|
||||
|
||||
/**
|
||||
* An element containing a regular expression term, that is, either
|
||||
@@ -484,3 +485,41 @@ class RegExpParseError extends Error, @regexp_parse_error {
|
||||
result = getMessage()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `source` may be interpreted as a regular expression.
|
||||
*/
|
||||
predicate isInterpretedAsRegExp(DataFlow::Node source) {
|
||||
// The first argument to an invocation of `RegExp` (with or without `new`).
|
||||
source = DataFlow::globalVarRef("RegExp").getAnInvocation().getArgument(0)
|
||||
or
|
||||
// The argument of a call that coerces the argument to a regular expression.
|
||||
exists(MethodCallExpr mce, string methodName |
|
||||
mce.getReceiver().analyze().getAType() = TTString() and
|
||||
mce.getMethodName() = methodName
|
||||
|
|
||||
(methodName = "match" and source.asExpr() = mce.getArgument(0) and mce.getNumArgument() = 1)
|
||||
or
|
||||
(
|
||||
methodName = "search" and
|
||||
source.asExpr() = mce.getArgument(0) and
|
||||
mce.getNumArgument() = 1 and
|
||||
// "search" is a common method name, and so we exclude chained accesses
|
||||
// because `String.prototype.search` returns a number
|
||||
not exists(PropAccess p | p.getBase() = mce)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides regular expression patterns.
|
||||
*/
|
||||
module RegExpPatterns {
|
||||
/**
|
||||
* Gets a pattern that matches common top-level domain names.
|
||||
*/
|
||||
string commonTLD() {
|
||||
// according to ranking by http://google.com/search?q=site:.<<TLD>>
|
||||
result = "com|org|edu|gov|uk|net|io"
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,6 @@
|
||||
*/
|
||||
|
||||
import javascript
|
||||
private import semmle.javascript.dataflow.InferredTypes
|
||||
|
||||
module RegExpInjection {
|
||||
/**
|
||||
@@ -51,36 +50,14 @@ module RegExpInjection {
|
||||
}
|
||||
|
||||
/**
|
||||
* The first argument to an invocation of `RegExp` (with or without `new`).
|
||||
* The source string of a regular expression.
|
||||
*/
|
||||
class RegExpObjectCreationSink extends Sink, DataFlow::ValueNode {
|
||||
RegExpObjectCreationSink() {
|
||||
this = DataFlow::globalVarRef("RegExp").getAnInvocation().getArgument(0)
|
||||
class RegularExpressionSourceAsSink extends Sink {
|
||||
RegularExpressionSourceAsSink() {
|
||||
isInterpretedAsRegExp(this)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The argument of a call that coerces the argument to a regular expression.
|
||||
*/
|
||||
class RegExpObjectCoercionSink extends Sink {
|
||||
|
||||
RegExpObjectCoercionSink() {
|
||||
exists (MethodCallExpr mce, string methodName |
|
||||
mce.getReceiver().analyze().getAType() = TTString() and
|
||||
mce.getMethodName() = methodName |
|
||||
(methodName = "match" and this.asExpr() = mce.getArgument(0) and mce.getNumArgument() = 1) or
|
||||
(
|
||||
methodName = "search" and
|
||||
this.asExpr() = mce.getArgument(0) and
|
||||
mce.getNumArgument() = 1 and
|
||||
// `String.prototype.search` returns a number, so exclude chained accesses
|
||||
not exists(PropAccess p | p.getBase() = mce)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to a function whose name suggests that it escapes regular
|
||||
* expression meta-characters.
|
||||
|
||||
Reference in New Issue
Block a user