mirror of
https://github.com/github/codeql.git
synced 2026-04-26 09:15:12 +02:00
JS: Fix FPs from TLDs without a domain name
This commit is contained in:
@@ -30,14 +30,34 @@ predicate isDotLike(RegExpTerm term) {
|
||||
isDotConstant(term)
|
||||
}
|
||||
|
||||
/** Holds if `term` will only ever be matched against the beginning of the input. */
|
||||
predicate matchesBeginningOfString(RegExpTerm term) {
|
||||
term.isRootTerm()
|
||||
or
|
||||
exists(RegExpTerm parent |
|
||||
matchesBeginningOfString(parent)
|
||||
|
|
||||
term = parent.(RegExpSequence).getChild(0)
|
||||
or
|
||||
parent.(RegExpSequence).getChild(0) instanceof RegExpCaret and
|
||||
term = parent.(RegExpSequence).getChild(1)
|
||||
or
|
||||
term = parent.(RegExpAlt).getAChild()
|
||||
or
|
||||
term = parent.(RegExpGroup).getAChild()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given sequence contains top-level domain preceded by a dot, such as `.com`.
|
||||
* Holds if the given sequence contains top-level domain preceded by a dot, such as `.com`,
|
||||
* excluding cases where this is at the very beginning of the regexp.
|
||||
*
|
||||
* `i` is bound to the index of the last child in the top-level domain part.
|
||||
*/
|
||||
predicate hasTopLevelDomainEnding(RegExpSequence seq, int i) {
|
||||
seq.getChild(i).(RegExpConstant).getValue().regexpMatch("(?i)" + RegExpPatterns::commonTLD() + "(:\\d+)?([/?#].*)?") and
|
||||
isDotLike(seq.getChild(i - 1))
|
||||
isDotLike(seq.getChild(i - 1)) and
|
||||
not (i = 1 and matchesBeginningOfString(seq))
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -156,8 +156,9 @@ predicate isInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string
|
||||
* mistake explained by `msg`.
|
||||
*/
|
||||
predicate isInterestingUnanchoredRegExpString(RegExpPatternSource src, string msg) {
|
||||
exists(RegExpTerm term | term = src.getRegExpTerm() |
|
||||
hasTopLevelDomainEnding(term.getAChild*()) and
|
||||
exists(RegExpTerm term, RegExpSequence tld | term = src.getRegExpTerm() |
|
||||
tld = term.getAChild*() and
|
||||
hasTopLevelDomainEnding(tld) and
|
||||
not isConstantInvalidInsideOrigin(term.getAChild*()) and
|
||||
not term.getAChild*() instanceof RegExpAnchor and
|
||||
// that is not used for capture or replace
|
||||
|
||||
Reference in New Issue
Block a user