mirror of
https://github.com/github/codeql.git
synced 2026-05-03 12:45:27 +02:00
share {js,rb}/regex/missing-regexp-anchor
This commit is contained in:
@@ -366,6 +366,9 @@ class RegExpAnchor extends RegExpTerm, @regexp_anchor {
|
||||
override predicate isNullable() { any() }
|
||||
|
||||
override string getAPrimaryQlClass() { result = "RegExpAnchor" }
|
||||
|
||||
/** Gets the char for this term. */
|
||||
abstract string getChar();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -379,6 +382,8 @@ class RegExpAnchor extends RegExpTerm, @regexp_anchor {
|
||||
*/
|
||||
class RegExpCaret extends RegExpAnchor, @regexp_caret {
|
||||
override string getAPrimaryQlClass() { result = "RegExpCaret" }
|
||||
|
||||
override string getChar() { result = "^" }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -392,6 +397,8 @@ class RegExpCaret extends RegExpAnchor, @regexp_caret {
|
||||
*/
|
||||
class RegExpDollar extends RegExpAnchor, @regexp_dollar {
|
||||
override string getAPrimaryQlClass() { result = "RegExpDollar" }
|
||||
|
||||
override string getChar() { result = "$" }
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -8,7 +8,8 @@ private import semmle.javascript.security.regexp.RegExpTreeView::RegExpTreeView
|
||||
private import semmle.javascript.Regexp as RegExp
|
||||
private import codeql.regex.HostnameRegexp as Shared
|
||||
|
||||
private module Impl implements Shared::HostnameRegexpSig<TreeImpl> {
|
||||
/** An implementation of the signature that allows the Hostname analysis to run. */
|
||||
module Impl implements Shared::HostnameRegexpSig<TreeImpl> {
|
||||
class DataFlowNode = JS::DataFlow::Node;
|
||||
|
||||
class RegExpPatternSource = RegExp::RegExpPatternSource;
|
||||
|
||||
@@ -12,194 +12,38 @@
|
||||
*/
|
||||
|
||||
private import javascript
|
||||
private import semmle.javascript.security.regexp.HostnameRegexp
|
||||
private import semmle.javascript.security.regexp.HostnameRegexp as HostnameRegexp
|
||||
private import codeql.regex.MissingRegExpAnchor as MissingRegExpAnchor
|
||||
private import semmle.javascript.security.regexp.RegExpTreeView::RegExpTreeView as TreeImpl
|
||||
|
||||
// TODO: Share the below code.
|
||||
/**
|
||||
* Holds if `term` is an anchor that is not the first or last node
|
||||
* in its tree.
|
||||
*/
|
||||
predicate isInteriorAnchor(RegExpAnchor term) {
|
||||
not isLeftArmTerm(term) and
|
||||
not isRightArmTerm(term)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` contains an anchor that is not the first or last node
|
||||
* in its tree, such as `(foo|bar$|baz)`.
|
||||
*/
|
||||
predicate containsInteriorAnchor(RegExpTerm term) { isInteriorAnchor(term.getAChild*()) }
|
||||
|
||||
/**
|
||||
* Holds if `term` starts with a word boundary or lookbehind assertion,
|
||||
* indicating that it's not intended to be anchored on that side.
|
||||
*/
|
||||
predicate containsLeadingPseudoAnchor(RegExpSequence term) {
|
||||
exists(RegExpTerm child | child = term.getChild(0) |
|
||||
child instanceof RegExpWordBoundary or
|
||||
child instanceof RegExpNonWordBoundary or
|
||||
child instanceof RegExpLookbehind
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` ends with a word boundary or lookahead assertion,
|
||||
* indicating that it's not intended to be anchored on that side.
|
||||
*/
|
||||
predicate containsTrailingPseudoAnchor(RegExpSequence term) {
|
||||
exists(RegExpTerm child | child = term.getLastChild() |
|
||||
child instanceof RegExpWordBoundary or
|
||||
child instanceof RegExpNonWordBoundary or
|
||||
child instanceof RegExpLookahead
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` is an empty sequence, usually arising from
|
||||
* literals with a trailing alternative such as `foo|`.
|
||||
*/
|
||||
predicate isEmpty(RegExpSequence term) { term.getNumChild() = 0 }
|
||||
|
||||
/**
|
||||
* Holds if `term` contains a letter constant.
|
||||
*
|
||||
* We use this as a heuristic to filter out uninteresting results.
|
||||
*/
|
||||
predicate containsLetters(RegExpTerm term) {
|
||||
term.getAChild*().(RegExpConstant).getValue().regexpMatch(".*[a-zA-Z].*")
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` consists only of an anchor and a parenthesized term,
|
||||
* such as the left side of `^(foo|bar)|baz`.
|
||||
*
|
||||
* The precedence of the anchor is likely to be intentional in this case,
|
||||
* as the group wouldn't be needed otherwise.
|
||||
*/
|
||||
predicate isAnchoredGroup(RegExpSequence term) {
|
||||
term.getNumChild() = 2 and
|
||||
term.getAChild() instanceof RegExpAnchor and
|
||||
term.getAChild() instanceof RegExpGroup
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `alt` has an explicitly anchored group, such as `^(foo|bar)|baz`
|
||||
* and doesn't have any unnecessary groups, such as in `^(foo)|(bar)`.
|
||||
*/
|
||||
predicate hasExplicitAnchorPrecedence(RegExpAlt alt) {
|
||||
isAnchoredGroup(alt.getAChild()) and
|
||||
not alt.getAChild() instanceof RegExpGroup
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `src` is a pattern for a collection of alternatives where
|
||||
* only the first or last alternative is anchored, indicating a
|
||||
* precedence mistake explained by `msg`.
|
||||
*
|
||||
* The canonical example of such a mistake is: `^a|b|c`, which is
|
||||
* parsed as `(^a)|(b)|(c)`.
|
||||
*/
|
||||
predicate hasMisleadingAnchorPrecedence(RegExpPatternSource src, string msg) {
|
||||
exists(RegExpAlt root, RegExpSequence anchoredTerm, string direction |
|
||||
root = src.getRegExpTerm() and
|
||||
not containsInteriorAnchor(root) and
|
||||
not isEmpty(root.getAChild()) and
|
||||
not hasExplicitAnchorPrecedence(root) and
|
||||
containsLetters(anchoredTerm) and
|
||||
(
|
||||
anchoredTerm = root.getChild(0) and
|
||||
anchoredTerm.getChild(0) instanceof RegExpCaret and
|
||||
not containsLeadingPseudoAnchor(root.getChild([1 .. root.getNumChild() - 1])) and
|
||||
containsLetters(root.getChild([1 .. root.getNumChild() - 1])) and
|
||||
direction = "beginning"
|
||||
or
|
||||
anchoredTerm = root.getLastChild() and
|
||||
anchoredTerm.getLastChild() instanceof RegExpDollar and
|
||||
not containsTrailingPseudoAnchor(root.getChild([0 .. root.getNumChild() - 2])) and
|
||||
containsLetters(root.getChild([0 .. root.getNumChild() - 2])) and
|
||||
direction = "end"
|
||||
) and
|
||||
// is not used for replace
|
||||
not exists(DataFlow::MethodCallNode replace |
|
||||
replace.getMethodName() = "replace" and
|
||||
src.getARegExpObject().flowsTo(replace.getArgument(0))
|
||||
) and
|
||||
msg =
|
||||
"Misleading operator precedence. The subexpression '" + anchoredTerm.getRawValue() +
|
||||
"' is anchored at the " + direction +
|
||||
", but the other parts of this regular expression are not"
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` is a final term, that is, no term will match anything after this one.
|
||||
*/
|
||||
predicate isFinalRegExpTerm(RegExpTerm term) {
|
||||
term.isRootTerm()
|
||||
or
|
||||
exists(RegExpSequence seq |
|
||||
isFinalRegExpTerm(seq) and
|
||||
term = seq.getLastChild()
|
||||
)
|
||||
or
|
||||
exists(RegExpTerm parent |
|
||||
isFinalRegExpTerm(parent) and
|
||||
term = parent.getAChild() and
|
||||
not parent instanceof RegExpSequence and
|
||||
not parent instanceof RegExpQuantifier
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `src` contains a hostname pattern that is missing a `$` anchor.
|
||||
*/
|
||||
predicate isSemiAnchoredHostnameRegExp(RegExpPatternSource src, string msg) {
|
||||
not hasMisleadingAnchorPrecedence(src, _) and // avoid double reporting
|
||||
exists(RegExpTerm term, RegExpSequence tld, int i | term = src.getRegExpTerm() |
|
||||
not isConstantInvalidInsideOrigin(term.getAChild*()) and
|
||||
tld = term.getAChild*() and
|
||||
hasTopLevelDomainEnding(tld, i) and
|
||||
isFinalRegExpTerm(tld.getChild(i)) and // nothing is matched after the TLD
|
||||
tld.getChild(0) instanceof RegExpCaret and
|
||||
msg =
|
||||
"This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end."
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `src` is an unanchored pattern for a URL, indicating a
|
||||
* mistake explained by `msg`.
|
||||
*/
|
||||
predicate isUnanchoredHostnameRegExp(RegExpPatternSource src, string msg) {
|
||||
exists(RegExpTerm term, RegExpSequence tld | term = src.getRegExpTerm() |
|
||||
alwaysMatchesHostname(term) and
|
||||
tld = term.getAChild*() and
|
||||
hasTopLevelDomainEnding(tld) and
|
||||
not isConstantInvalidInsideOrigin(term.getAChild*()) and
|
||||
not term.getAChild*() instanceof RegExpAnchor and
|
||||
// that is not used for capture or replace
|
||||
not exists(DataFlow::MethodCallNode mcn, string name | name = mcn.getMethodName() |
|
||||
private module Impl implements
|
||||
MissingRegExpAnchor::MissingRegExpAnchorSig<TreeImpl, HostnameRegexp::Impl> {
|
||||
predicate isUsedAsReplace(RegExpPatternSource pattern) {
|
||||
// is used for capture or replace
|
||||
exists(DataFlow::MethodCallNode mcn, string name | name = mcn.getMethodName() |
|
||||
name = "exec" and
|
||||
mcn = src.getARegExpObject().getAMethodCall() and
|
||||
mcn = pattern.getARegExpObject().getAMethodCall() and
|
||||
exists(mcn.getAPropertyRead())
|
||||
or
|
||||
exists(DataFlow::Node arg |
|
||||
arg = mcn.getArgument(0) and
|
||||
(
|
||||
src.getARegExpObject().flowsTo(arg) or
|
||||
src.getAParse() = arg
|
||||
pattern.getARegExpObject().flowsTo(arg) or
|
||||
pattern.getAParse() = arg
|
||||
)
|
||||
|
|
||||
name = "replace"
|
||||
or
|
||||
name = "match" and exists(mcn.getAPropertyRead())
|
||||
)
|
||||
) and
|
||||
msg =
|
||||
"When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it."
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
string getEndAnchorText() { result = "$" }
|
||||
}
|
||||
|
||||
import MissingRegExpAnchor::Make<TreeImpl, HostnameRegexp::Impl, Impl>
|
||||
|
||||
from DataFlow::Node nd, string msg
|
||||
where
|
||||
isUnanchoredHostnameRegExp(nd, msg)
|
||||
@@ -207,4 +51,5 @@ where
|
||||
isSemiAnchoredHostnameRegExp(nd, msg)
|
||||
or
|
||||
hasMisleadingAnchorPrecedence(nd, msg)
|
||||
// isLineAnchoredHostnameRegExp is not used here, as it is not relevant to JS.
|
||||
select nd, msg
|
||||
|
||||
Reference in New Issue
Block a user