JS: reformulate js/incomplete-hostname-regexp with type tracking

This commit is contained in:
Esben Sparre Andreasen
2019-04-03 13:35:43 +02:00
parent 5a7101481c
commit 9c65277b53
3 changed files with 30 additions and 22 deletions

View File

@@ -13,16 +13,35 @@
import javascript
/**
* A taint tracking configuration for incomplete hostname regular expressions sources.
* Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted
* as a regular expression.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "IncompleteHostnameRegExpTracking" }
DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker t) {
t.start() and
re = result and
isInterpretedAsRegExp(result)
or
exists(DataFlow::TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) |
t2 = t.smallstep(result, succ)
or
any(TaintTracking::AdditionalTaintStep dts).step(result, succ) and
t = t2
)
}
override predicate isSource(DataFlow::Node source) {
isIncompleteHostNameRegExpPattern(source.getStringValue(), _)
}
DataFlow::Node regExpSource(DataFlow::Node re) {
result = regExpSource(re, DataFlow::TypeBackTracker::end())
}
override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }
/** Holds if `re` is a regular expression with value `pattern`. */
predicate regexp(DataFlow::Node re, string pattern, string kind, DataFlow::Node aux) {
re.asExpr().(RegExpLiteral).getValue() = pattern and
kind = "regular expression" and
aux = re
or
re = regExpSource(aux) and
pattern = re.getStringValue() and
kind = "string, which is used as a regular expression $@,"
}
/**
@@ -36,22 +55,11 @@ predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
// an unescaped single `.`
"(?<!\\\\)[.]" +
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
"([():|?a-z0-9-]+(\\\\)?[.](" + RegExpPatterns::commonTLD() + "))" + ".*", 1)
"([():|?a-z0-9-]+(\\\\)?[.]" + RegExpPatterns::commonTLD() + ")" + ".*", 1)
}
from DataFlow::Node re, string pattern, string hostPart, string kind, DataFlow::Node aux
where
(
re.asExpr().(RegExpLiteral).getValue() = pattern and
kind = "regular expression" and
aux = re
or
exists(Configuration cfg |
cfg.hasFlow(re, aux) and
re.mayHaveStringValue(pattern) and
kind = "string, which is used as a regular expression $@,"
)
) and
where regexp(re, pattern, kind, aux) and
isIncompleteHostNameRegExpPattern(pattern, hostPart) and
// ignore patterns with capture groups after the TLD
not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*")

View File

@@ -3,12 +3,12 @@
| tst-IncompleteHostnameRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | here |
| tst-IncompleteHostnameRegExp.js:11:13:11:37 | "http:/ ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:37 | "http:/ ... le.com" | here |
| tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | here |
| tst-IncompleteHostnameRegExp.js:15:22:15:46 | "http:/ ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:15:13:15:49 | id(id(i ... com"))) | here |
| tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here |
| tst-IncompleteHostnameRegExp.js:17:14:17:30 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here |
| tst-IncompleteHostnameRegExp.js:19:17:19:34 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:20:13:20:26 | `${hostname}$` | here |
| tst-IncompleteHostnameRegExp.js:22:27:22:44 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:23:13:23:27 | domain.hostname | here |
| tst-IncompleteHostnameRegExp.js:28:23:28:40 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:26:21:26:35 | domain.hostname | here |
| tst-IncompleteHostnameRegExp.js:30:30:30:47 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:32:21:32:35 | domain.hostname | here |
| tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | This regular expression has an unescaped '.' before ')?example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | here |
| tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | here |
| tst-IncompleteHostnameRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | here |

View File

@@ -12,7 +12,7 @@
s.match("^http://test.example.com"); // NOT OK
function id(e) { return e; }
new RegExp(id(id(id("http://test.example.com")))); // NOT OK
new RegExp(id(id(id("http://test.example.com")))); // NOT OK, but not supported by type tracking
new RegExp(`test.example.com$`); // NOT OK