mirror of
https://github.com/github/codeql.git
synced 2026-04-30 19:26:02 +02:00
JS: reformulate js/incomplete-hostname-regexp with type tracking
This commit is contained in:
@@ -13,16 +13,35 @@
|
||||
import javascript
|
||||
|
||||
/**
|
||||
* A taint tracking configuration for incomplete hostname regular expressions sources.
|
||||
* Gets a node whose value may flow (inter-procedurally) to a position where it is interpreted
|
||||
* as a regular expression.
|
||||
*/
|
||||
class Configuration extends TaintTracking::Configuration {
|
||||
Configuration() { this = "IncompleteHostnameRegExpTracking" }
|
||||
DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker t) {
|
||||
t.start() and
|
||||
re = result and
|
||||
isInterpretedAsRegExp(result)
|
||||
or
|
||||
exists(DataFlow::TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) |
|
||||
t2 = t.smallstep(result, succ)
|
||||
or
|
||||
any(TaintTracking::AdditionalTaintStep dts).step(result, succ) and
|
||||
t = t2
|
||||
)
|
||||
}
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
isIncompleteHostNameRegExpPattern(source.getStringValue(), _)
|
||||
}
|
||||
DataFlow::Node regExpSource(DataFlow::Node re) {
|
||||
result = regExpSource(re, DataFlow::TypeBackTracker::end())
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }
|
||||
/** Holds if `re` is a regular expression with value `pattern`. */
|
||||
predicate regexp(DataFlow::Node re, string pattern, string kind, DataFlow::Node aux) {
|
||||
re.asExpr().(RegExpLiteral).getValue() = pattern and
|
||||
kind = "regular expression" and
|
||||
aux = re
|
||||
or
|
||||
re = regExpSource(aux) and
|
||||
pattern = re.getStringValue() and
|
||||
kind = "string, which is used as a regular expression $@,"
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -36,22 +55,11 @@ predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
|
||||
// an unescaped single `.`
|
||||
"(?<!\\\\)[.]" +
|
||||
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
|
||||
"([():|?a-z0-9-]+(\\\\)?[.](" + RegExpPatterns::commonTLD() + "))" + ".*", 1)
|
||||
"([():|?a-z0-9-]+(\\\\)?[.]" + RegExpPatterns::commonTLD() + ")" + ".*", 1)
|
||||
}
|
||||
|
||||
from DataFlow::Node re, string pattern, string hostPart, string kind, DataFlow::Node aux
|
||||
where
|
||||
(
|
||||
re.asExpr().(RegExpLiteral).getValue() = pattern and
|
||||
kind = "regular expression" and
|
||||
aux = re
|
||||
or
|
||||
exists(Configuration cfg |
|
||||
cfg.hasFlow(re, aux) and
|
||||
re.mayHaveStringValue(pattern) and
|
||||
kind = "string, which is used as a regular expression $@,"
|
||||
)
|
||||
) and
|
||||
where regexp(re, pattern, kind, aux) and
|
||||
isIncompleteHostNameRegExpPattern(pattern, hostPart) and
|
||||
// ignore patterns with capture groups after the TLD
|
||||
not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*")
|
||||
|
||||
@@ -3,12 +3,12 @@
|
||||
| tst-IncompleteHostnameRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | here |
|
||||
| tst-IncompleteHostnameRegExp.js:11:13:11:37 | "http:/ ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:37 | "http:/ ... le.com" | here |
|
||||
| tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | here |
|
||||
| tst-IncompleteHostnameRegExp.js:15:22:15:46 | "http:/ ... le.com" | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:15:13:15:49 | id(id(i ... com"))) | here |
|
||||
| tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here |
|
||||
| tst-IncompleteHostnameRegExp.js:17:14:17:30 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:17:13:17:31 | `test.example.com$` | here |
|
||||
| tst-IncompleteHostnameRegExp.js:19:17:19:34 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:20:13:20:26 | `${hostname}$` | here |
|
||||
| tst-IncompleteHostnameRegExp.js:22:27:22:44 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:23:13:23:27 | domain.hostname | here |
|
||||
| tst-IncompleteHostnameRegExp.js:28:23:28:40 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:26:21:26:35 | domain.hostname | here |
|
||||
| tst-IncompleteHostnameRegExp.js:30:30:30:47 | 'test.example.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:32:21:32:35 | domain.hostname | here |
|
||||
| tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | This regular expression has an unescaped '.' before ')?example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | here |
|
||||
| tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | here |
|
||||
| tst-IncompleteHostnameRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | here |
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
s.match("^http://test.example.com"); // NOT OK
|
||||
|
||||
function id(e) { return e; }
|
||||
new RegExp(id(id(id("http://test.example.com")))); // NOT OK
|
||||
new RegExp(id(id(id("http://test.example.com")))); // NOT OK, but not supported by type tracking
|
||||
|
||||
new RegExp(`test.example.com$`); // NOT OK
|
||||
|
||||
|
||||
Reference in New Issue
Block a user