mirror of
https://github.com/github/codeql.git
synced 2026-05-05 13:45:19 +02:00
JS: Add missing post-anchor case to MissingRegExpAnchor
This commit is contained in:
@@ -67,3 +67,38 @@ predicate hasTopLevelDomainEnding(RegExpSequence seq, int i) {
|
||||
predicate hasTopLevelDomainEnding(RegExpSequence seq) {
|
||||
hasTopLevelDomainEnding(seq, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` will always match a hostname, that is, all disjunctions contain
|
||||
* a hostname pattern that isn't inside a quantifier.
|
||||
*/
|
||||
predicate alwaysMatchesHostname(RegExpTerm term) {
|
||||
hasTopLevelDomainEnding(term, _)
|
||||
or
|
||||
// `localhost` is considered a hostname pattern, but has no TLD
|
||||
term.(RegExpConstant).getValue().regexpMatch("\\blocalhost\\b")
|
||||
or
|
||||
not term instanceof RegExpAlt and
|
||||
not term instanceof RegExpQuantifier and
|
||||
alwaysMatchesHostname(term.getAChild())
|
||||
or
|
||||
alwaysMatchesHostnameAlt(term)
|
||||
}
|
||||
|
||||
/** Holds if every child of `alt` contains a hostname pattern. */
|
||||
predicate alwaysMatchesHostnameAlt(RegExpAlt alt) {
|
||||
alwaysMatchesHostnameAlt(alt, alt.getNumChild() - 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the first `i` children of `alt` contains a hostname pattern.
|
||||
*
|
||||
* This is used instead of `forall` to avoid materializing the set of alternatives
|
||||
* that don't contains hostnames, which is much larger.
|
||||
*/
|
||||
predicate alwaysMatchesHostnameAlt(RegExpAlt alt, int i) {
|
||||
alwaysMatchesHostname(alt.getChild(0)) and i = 0
|
||||
or
|
||||
alwaysMatchesHostnameAlt(alt, i - 1) and
|
||||
alwaysMatchesHostname(alt.getChild(i))
|
||||
}
|
||||
|
||||
@@ -121,7 +121,7 @@ predicate hasExplicitAnchorPrecedence(RegExpAlt alt) {
|
||||
* The canonical example of such a mistake is: `^a|b|c`, which is
|
||||
* parsed as `(^a)|(b)|(c)`.
|
||||
*/
|
||||
predicate isInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string msg) {
|
||||
predicate hasMisleadingAnchorPrecedence(RegExpPatternSource src, string msg) {
|
||||
exists(RegExpAlt root, RegExpSequence anchoredTerm, string direction |
|
||||
root = src.getRegExpTerm() and
|
||||
not containsInteriorAnchor(root) and
|
||||
@@ -151,12 +151,47 @@ predicate isInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` is a final term, that is, no term will match anything after this one.
|
||||
*/
|
||||
predicate isFinalRegExpTerm(RegExpTerm term) {
|
||||
term.isRootTerm()
|
||||
or
|
||||
exists(RegExpSequence seq |
|
||||
isFinalRegExpTerm(seq) and
|
||||
term = seq.getLastChild()
|
||||
)
|
||||
or
|
||||
exists(RegExpTerm parent |
|
||||
isFinalRegExpTerm(parent) and
|
||||
term = parent.getAChild() and
|
||||
not parent instanceof RegExpSequence and
|
||||
not parent instanceof RegExpQuantifier
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `src` contains a hostname pattern that is missing a `$` anchor.
|
||||
*/
|
||||
predicate isSemiAnchoredHostnameRegExp(RegExpPatternSource src, string msg) {
|
||||
not hasMisleadingAnchorPrecedence(src, _) and // avoid double reporting
|
||||
exists(RegExpTerm term, RegExpSequence tld, int i | term = src.getRegExpTerm() |
|
||||
not isConstantInvalidInsideOrigin(term.getAChild*()) and
|
||||
tld = term.getAChild*() and
|
||||
hasTopLevelDomainEnding(tld, i) and
|
||||
isFinalRegExpTerm(tld.getChild(i)) and // nothing is matched after the TLD
|
||||
tld.getChild(0) instanceof RegExpCaret and
|
||||
msg = "This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end."
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `src` is an unanchored pattern for a URL, indicating a
|
||||
* mistake explained by `msg`.
|
||||
*/
|
||||
predicate isInterestingUnanchoredRegExpString(RegExpPatternSource src, string msg) {
|
||||
predicate isUnanchoredHostnameRegExp(RegExpPatternSource src, string msg) {
|
||||
exists(RegExpTerm term, RegExpSequence tld | term = src.getRegExpTerm() |
|
||||
alwaysMatchesHostname(term) and
|
||||
tld = term.getAChild*() and
|
||||
hasTopLevelDomainEnding(tld) and
|
||||
not isConstantInvalidInsideOrigin(term.getAChild*()) and
|
||||
@@ -185,7 +220,9 @@ predicate isInterestingUnanchoredRegExpString(RegExpPatternSource src, string ms
|
||||
|
||||
from DataFlow::Node nd, string msg
|
||||
where
|
||||
isInterestingUnanchoredRegExpString(nd, msg)
|
||||
isUnanchoredHostnameRegExp(nd, msg)
|
||||
or
|
||||
isInterestingSemiAnchoredRegExpString(nd, msg)
|
||||
isSemiAnchoredHostnameRegExp(nd, msg)
|
||||
or
|
||||
hasMisleadingAnchorPrecedence(nd, msg)
|
||||
select nd, msg
|
||||
|
||||
@@ -15,10 +15,12 @@
|
||||
| tst-IncompleteHostnameRegExp.js:38:3:38:43 | ^(http\|https):\\/\\/www.example.com\\/p\\/f\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | here |
|
||||
| tst-IncompleteHostnameRegExp.js:39:5:39:30 | http:\\/\\/sub.example.com\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | here |
|
||||
| tst-IncompleteHostnameRegExp.js:40:3:40:29 | ^https?:\\/\\/api.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | here |
|
||||
| tst-IncompleteHostnameRegExp.js:41:42:41:68 | ^https?://.+\\.example\\.com/ | This string, which is used as a regular expression $@, has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:41:13:41:71 | '^http: ... \\.com/' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:41:42:41:70 | ^https?://.+\\.example\\.com/ | This string, which is used as a regular expression $@, has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:41:13:41:71 | '^http: ... \\.com/' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:43:3:43:32 | ^https:\\/\\/[a-z]*.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | here |
|
||||
| tst-IncompleteHostnameRegExp.js:44:32:44:45 | .+.example.net | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:44:47:44:62 | .+.example-a.com | This regular expression has an unescaped '.' before 'example-a.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:44:64:44:79 | .+.example-b.com | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:48:42:48:67 | ^https?://.+.example\\.com/ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example\\.com/', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:48:42:48:67 | ^https?://.+.example\\.com/ | This string, which is used as a regular expression $@, has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:48:42:48:68 | ^https?://.+.example\\.com/ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example\\.com/', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:48:42:48:68 | ^https?://.+.example\\.com/ | This string, which is used as a regular expression $@, has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
|
||||
| tst-IncompleteHostnameRegExp.js:58:3:58:40 | ^http:\\/\\/.\\.example\\.com\\/index\\.html | This regular expression has an unescaped '.' before 'example\\.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:58:2:58:41 | /^http: ... \\.html/ | here |
|
||||
| tst-IncompleteHostnameRegExp.js:59:5:59:20 | foo.example\\.com | This regular expression has an unescaped '.' before 'example\\.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:59:2:59:32 | /^(foo. ... ever)$/ | here |
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
| tst-IncompleteHostnameRegExp.js:2:2:2:24 | /^http: ... le.com/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-IncompleteHostnameRegExp.js:3:2:3:29 | /^http: ... le.com/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-IncompleteHostnameRegExp.js:5:2:5:29 | /^http: ... le.net/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-IncompleteHostnameRegExp.js:6:2:6:43 | /^http: ... b).com/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-IncompleteHostnameRegExp.js:15:22:15:47 | "^http: ... le.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-IncompleteHostnameRegExp.js:19:17:19:35 | '^test.example.com' | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-IncompleteHostnameRegExp.js:55:13:55:39 | '^http: ... le.com' | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-SemiAnchoredRegExp.js:3:2:3:7 | /^a\|b/ | Misleading operator precedence. The subexpression '^a' is anchored at the beginning, but the other parts of this regular expression are not |
|
||||
| tst-SemiAnchoredRegExp.js:6:2:6:9 | /^a\|b\|c/ | Misleading operator precedence. The subexpression '^a' is anchored at the beginning, but the other parts of this regular expression are not |
|
||||
| tst-SemiAnchoredRegExp.js:12:2:12:9 | /^a\|(b)/ | Misleading operator precedence. The subexpression '^a' is anchored at the beginning, but the other parts of this regular expression are not |
|
||||
@@ -43,6 +53,10 @@
|
||||
| tst-SemiAnchoredRegExp.js:98:2:98:18 | /_xxx\|_yyy\|_zzz$/ | Misleading operator precedence. The subexpression '_zzz$' is anchored at the end, but the other parts of this regular expression are not |
|
||||
| tst-UnanchoredUrlRegExp.js:3:43:3:61 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
|
||||
| tst-UnanchoredUrlRegExp.js:4:54:4:72 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
|
||||
| tst-UnanchoredUrlRegExp.js:5:43:5:62 | "^https?://good.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-UnanchoredUrlRegExp.js:6:43:6:64 | /^https ... od.com/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-UnanchoredUrlRegExp.js:7:43:7:87 | "(^http ... 2.com)" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-UnanchoredUrlRegExp.js:8:43:8:86 | "(https ... e.com)" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-UnanchoredUrlRegExp.js:10:2:10:22 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
|
||||
| tst-UnanchoredUrlRegExp.js:11:13:11:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
|
||||
| tst-UnanchoredUrlRegExp.js:13:44:13:62 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
|
||||
@@ -52,5 +66,6 @@
|
||||
| tst-UnanchoredUrlRegExp.js:23:3:23:21 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
|
||||
| tst-UnanchoredUrlRegExp.js:24:3:24:23 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
|
||||
| tst-UnanchoredUrlRegExp.js:25:14:25:32 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
|
||||
| tst-UnanchoredUrlRegExp.js:26:3:26:22 | "^https?://good.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
|
||||
| tst-UnanchoredUrlRegExp.js:35:2:35:32 | /https? ... 0-9]+)/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
|
||||
| tst-UnanchoredUrlRegExp.js:77:11:77:32 | /vimeo\\ ... 0-9]+)/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
|
||||
|
||||
@@ -55,4 +55,6 @@
|
||||
new RegExp('^http://test\.example.com'); // NOT OK, but flagged by js/useless-regexp-character-escape
|
||||
|
||||
/^http:\/\/(..|...)\.example\.com\/index\.html/; // OK, wildcards are intentional
|
||||
/^http:\/\/.\.example\.com\/index\.html/; // OK, the wildcard is intentional
|
||||
/^(foo.example\.com|whatever)$/; // kinda OK - one disjunction doesn't even look like a hostname
|
||||
});
|
||||
|
||||
@@ -104,4 +104,7 @@
|
||||
path.replace(/engine.io/, "$&-client");
|
||||
|
||||
/\.com|\.org/; // OK, has no domain name
|
||||
/example\.com|whatever/; // OK, the other disjunction doesn't match a hostname
|
||||
|
||||
/^https?:\/\/www\.example\.com\/.*\.html|^https?:\/\/www\.(?:example1|example2).com\/foo\/\d+\/\d+.html/i; // OK
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user