JS: address review comments

This commit is contained in:
Esben Sparre Andreasen
2018-12-11 12:58:21 +01:00
parent 09e7124bb1
commit 1bc73ab592
4 changed files with 34 additions and 23 deletions

View File

@@ -8,7 +8,7 @@
Sanitizing untrusted URLs is an important technique for
preventing attacks such as request forgeries and malicious
redirections. Usually, this is done by checking that the host of a URL
redirections. Often, this is done by checking that the host of a URL
is in a set of allowed hosts.
</p>
@@ -56,7 +56,7 @@
an attacker-controlled domain such as <code>wwwXexample.com</code>.
Address this vulnerability by escaping <code>.</code>
appropriately: <code>let regex =/(www|beta|)\.example\.com/</code>.
appropriately: <code>let regex = /(www|beta|)\.example\.com/</code>.
</p>

View File

@@ -12,28 +12,25 @@
import javascript
module IncompleteHostnameRegExpTracking {
/**
* A taint tracking configuration for incomplete hostname regular expressions sources.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "IncompleteHostnameRegExpTracking" }
/**
* A taint tracking configuration for incomplete hostname regular expressions sources.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "IncompleteHostnameRegExpTracking" }
override
predicate isSource(DataFlow::Node source) {
isIncompleteHostNameRegExpPattern(source.asExpr().getStringValue(), _)
}
override
predicate isSink(DataFlow::Node sink) {
isInterpretedAsRegExp(sink)
}
override
predicate isSource(DataFlow::Node source) {
isIncompleteHostNameRegExpPattern(source.asExpr().getStringValue(), _)
}
override
predicate isSink(DataFlow::Node sink) {
isInterpretedAsRegExp(sink)
}
}
/**
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
* and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
@@ -45,7 +42,7 @@ predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
// an unescaped single `.`
"(?<!\\\\)[.]" +
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
"([():|?a-z0-9-]+(\\\\)?[.](com|org|edu|gov|uk|net))" +
"([():|?a-z0-9-]+(\\\\)?[.](" + RegExpPatterns::commonTLD() + "))" +
".*", 1)
}
@@ -53,7 +50,7 @@ from Expr e, string pattern, string hostPart
where
(
e.(RegExpLiteral).getValue() = pattern or
exists (IncompleteHostnameRegExpTracking::Configuration cfg |
exists (Configuration cfg |
cfg.hasFlow(e.flow(), _) and
e.mayHaveStringValue(pattern)
)
@@ -61,7 +58,7 @@ where
isIncompleteHostNameRegExpPattern(pattern, hostPart)
and
// ignore patterns with capture groups after the TLD
not pattern.regexpMatch("(?i).*[.](com|org|edu|gov|uk|net).*[(][?]:.*[)].*")
not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*")
select e, "This regular expression has an unescaped '.' before '" + hostPart + "', so it might match more hosts than expected."

View File

@@ -21,7 +21,7 @@ where
substring.mayHaveStringValue(target) and
(
// target contains a domain on a common TLD, and perhaps some other URL components
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(com|org|edu|gov|uk|net)(:[0-9]+)?/?") or
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(" + RegExpPatterns::commonTLD() + ")(:[0-9]+)?/?") or
// target is a HTTP URL to a domain on any TLD
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
) and