mirror of
https://github.com/github/codeql.git
synced 2026-05-03 20:58:03 +02:00
JS: make RegExpPatterns::commonTLD more robust
This commit is contained in:
@@ -36,7 +36,7 @@ predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
|
||||
// an unescaped single `.`
|
||||
"(?<!\\\\)[.]" +
|
||||
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
|
||||
"([():|?a-z0-9-]+(\\\\)?[.](" + RegExpPatterns::commonTLD() + "))" + ".*", 1)
|
||||
"([():|?a-z0-9-]+(\\\\)?[.]" + RegExpPatterns::commonTLD() + ")" + ".*", 1)
|
||||
}
|
||||
|
||||
from Expr e, string pattern, string hostPart
|
||||
@@ -51,7 +51,7 @@ where
|
||||
) and
|
||||
isIncompleteHostNameRegExpPattern(pattern, hostPart) and
|
||||
// ignore patterns with capture groups after the TLD
|
||||
not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*")
|
||||
not pattern.regexpMatch("(?i).*[.]" + RegExpPatterns::commonTLD() + ".*[(][?]:.*[)].*")
|
||||
select e,
|
||||
"This regular expression has an unescaped '.' before '" + hostPart +
|
||||
"', so it might match more hosts than expected."
|
||||
|
||||
@@ -38,8 +38,8 @@ where
|
||||
(
|
||||
// target contains a domain on a common TLD, and perhaps some other URL components
|
||||
target
|
||||
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(" + RegExpPatterns::commonTLD() +
|
||||
")(:[0-9]+)?/?")
|
||||
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::commonTLD() +
|
||||
"(:[0-9]+)?/?")
|
||||
or
|
||||
// target is a HTTP URL to a domain on any TLD
|
||||
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
|
||||
|
||||
@@ -397,10 +397,10 @@ predicate isInterpretedAsRegExp(DataFlow::Node source) {
|
||||
*/
|
||||
module RegExpPatterns {
|
||||
/**
|
||||
* Gets a pattern that matches common top-level domain names.
|
||||
* Gets a pattern that matches common top-level domain names in lower case.
|
||||
*/
|
||||
string commonTLD() {
|
||||
// according to ranking by http://google.com/search?q=site:.<<TLD>>
|
||||
result = "com|org|edu|gov|uk|net|io"
|
||||
result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user