mirror of
https://github.com/github/codeql.git
synced 2026-04-25 08:45:14 +02:00
JS: Update semi-anchored regex query
This commit is contained in:
@@ -12,6 +12,84 @@
|
||||
|
||||
import javascript
|
||||
|
||||
/** Holds if `term` is one of the transitive left children of a regexp. */
|
||||
predicate isLeftArmTerm(RegExpTerm term) {
|
||||
term.isRootTerm()
|
||||
or
|
||||
exists(RegExpTerm parent |
|
||||
term = parent.getChild(0) and
|
||||
isLeftArmTerm(parent)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `term` is one of the transitive right children of a regexp. */
|
||||
predicate isRightArmTerm(RegExpTerm term) {
|
||||
term.isRootTerm()
|
||||
or
|
||||
exists(RegExpTerm parent |
|
||||
term = parent.getLastChild() and
|
||||
isRightArmTerm(parent)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` is an anchor that is not the first or last node
|
||||
* in its tree.
|
||||
*/
|
||||
predicate isInteriorAnchor(RegExpAnchor term) {
|
||||
not isLeftArmTerm(term) and
|
||||
not isRightArmTerm(term)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` contains an anchor that is not the first or last node
|
||||
* in its tree, such as `(foo|bar$|baz)`.
|
||||
*/
|
||||
predicate containsInteriorAnchor(RegExpTerm term) {
|
||||
isInteriorAnchor(term.getAChild*())
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` starts with a word boundary or lookbehind assertion,
|
||||
* indicating that it's not intended to be anchored on that side.
|
||||
*/
|
||||
predicate containsLeadingPseudoAnchor(RegExpSequence term) {
|
||||
exists(RegExpTerm child | child = term.getChild(0) |
|
||||
child instanceof RegExpWordBoundary or
|
||||
child instanceof RegExpNonWordBoundary or
|
||||
child instanceof RegExpLookbehind
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` ends with a word boundary or lookahead assertion,
|
||||
* indicating that it's not intended to be anchored on that side.
|
||||
*/
|
||||
predicate containsTrailingPseudoAnchor(RegExpSequence term) {
|
||||
exists(RegExpTerm child | child = term.getLastChild() |
|
||||
child instanceof RegExpWordBoundary or
|
||||
child instanceof RegExpNonWordBoundary or
|
||||
child instanceof RegExpLookahead
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` is an empty sequence, usually arising from
|
||||
* literals with a trailing alternative such as `foo|`.
|
||||
*/
|
||||
predicate isEmpty(RegExpSequence term) {
|
||||
term.getNumChild() = 0
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` contains a letter constant.
|
||||
*
|
||||
* We use this as a heuristic to filter out uninteresting results.
|
||||
*/
|
||||
predicate containsLetters(RegExpTerm term) {
|
||||
term.getAChild*().(RegExpConstant).getValue().regexpMatch(".*[a-zA-Z].*")
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `src` is a pattern for a collection of alternatives where
|
||||
* only the first or last alternative is anchored, indicating a
|
||||
@@ -21,23 +99,21 @@ import javascript
|
||||
* parsed as `(^a)|(b)|(c)`.
|
||||
*/
|
||||
predicate isInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string msg) {
|
||||
exists(string str, string maybeGroupedStr, string regex, string anchorPart, string escapedDot |
|
||||
// a dot that might be escaped in a regular expression, for example `/\./` or `new RegExp('\\.')`
|
||||
escapedDot = "\\\\[.]" and
|
||||
// a string that is mostly free from special reqular expression symbols
|
||||
str = "(?:(?:" + escapedDot + ")|[a-z:/.?_,@0-9 -])+" and
|
||||
// the string may be wrapped in parentheses
|
||||
maybeGroupedStr = "(?:" + str + "|\\(" + str + "\\))" and
|
||||
exists(RegExpAlt root, RegExpSequence anchoredTerm |
|
||||
root = src.getRegExpTerm() and
|
||||
not containsInteriorAnchor(root) and
|
||||
not isEmpty(root.getAChild()) and
|
||||
containsLetters(anchoredTerm) and
|
||||
(
|
||||
// a problematic pattern: `^a|b|...|x`
|
||||
regex = "(?i)(\\^" + maybeGroupedStr + ")(?:\\|" + maybeGroupedStr + ")+"
|
||||
anchoredTerm = root.getChild(0) and
|
||||
anchoredTerm.getChild(0) instanceof RegExpCaret and
|
||||
not containsLeadingPseudoAnchor(root.getChild([ 1 .. root.getNumChild() - 1 ]))
|
||||
or
|
||||
// a problematic pattern: `a|b|...|x$`
|
||||
regex = "(?i)(?:" + maybeGroupedStr + "\\|)+(" + maybeGroupedStr + "\\$)"
|
||||
anchoredTerm = root.getLastChild() and
|
||||
anchoredTerm.getLastChild() instanceof RegExpDollar and
|
||||
not containsTrailingPseudoAnchor(root.getChild([ 0 .. root.getNumChild() - 2 ]))
|
||||
) and
|
||||
anchorPart = src.getPattern().regexpCapture(regex, 1) and
|
||||
anchorPart.regexpMatch("(?i).*[a-z].*") and
|
||||
msg = "Misleading operator precedence. The subexpression '" + anchorPart +
|
||||
msg = "Misleading operator precedence. The subexpression '" + anchoredTerm.getRawValue() +
|
||||
"' is anchored, but the other parts of this regular expression are not"
|
||||
)
|
||||
}
|
||||
|
||||
@@ -46,6 +46,9 @@ class RegExpTerm extends Locatable, @regexpterm {
|
||||
/** Gets the number of child terms of this term. */
|
||||
int getNumChild() { result = count(getAChild()) }
|
||||
|
||||
/** Gets the last child term of this term. */
|
||||
RegExpTerm getLastChild() { result = getChild(getNumChild() - 1) }
|
||||
|
||||
/**
|
||||
* Gets the parent term of this regular expression term, or the
|
||||
* regular expression literal if this is the root term.
|
||||
@@ -266,6 +269,20 @@ class RegExpSequence extends RegExpTerm, @regexp_seq {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A dollar `$` or caret assertion `^` matching the beginning or end of a line.
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* ```
|
||||
* ^
|
||||
* $
|
||||
* ```
|
||||
*/
|
||||
class RegExpAnchor extends RegExpTerm, @regexp_anchor {
|
||||
override predicate isNullable() { any() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A caret assertion `^` matching the beginning of a line.
|
||||
*
|
||||
@@ -275,8 +292,7 @@ class RegExpSequence extends RegExpTerm, @regexp_seq {
|
||||
* ^
|
||||
* ```
|
||||
*/
|
||||
class RegExpCaret extends RegExpTerm, @regexp_caret {
|
||||
override predicate isNullable() { any() }
|
||||
class RegExpCaret extends RegExpAnchor, @regexp_caret {
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -288,8 +304,7 @@ class RegExpCaret extends RegExpTerm, @regexp_caret {
|
||||
* $
|
||||
* ```
|
||||
*/
|
||||
class RegExpDollar extends RegExpTerm, @regexp_dollar {
|
||||
override predicate isNullable() { any() }
|
||||
class RegExpDollar extends RegExpAnchor, @regexp_dollar {
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -814,26 +829,26 @@ abstract class RegExpPatternSource extends DataFlow::Node {
|
||||
* of this node.
|
||||
*/
|
||||
abstract DataFlow::SourceNode getARegExpObject();
|
||||
|
||||
abstract RegExpTerm getRegExpTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* A regular expression literal, viewed as the pattern source for itself.
|
||||
*/
|
||||
private class RegExpLiteralPatternSource extends RegExpPatternSource {
|
||||
string pattern;
|
||||
|
||||
RegExpLiteralPatternSource() {
|
||||
exists(string raw | raw = asExpr().(RegExpLiteral).getRoot().getRawValue() |
|
||||
// hide the fact that `/` is escaped in the literal
|
||||
pattern = raw.regexpReplaceAll("\\\\/", "/")
|
||||
)
|
||||
}
|
||||
private class RegExpLiteralPatternSource extends RegExpPatternSource, DataFlow::ValueNode {
|
||||
override RegExpLiteral astNode;
|
||||
|
||||
override DataFlow::Node getAParse() { result = this }
|
||||
|
||||
override string getPattern() { result = pattern }
|
||||
override string getPattern() {
|
||||
// hide the fact that `/` is escaped in the literal
|
||||
result = astNode.getRoot().getRawValue().regexpReplaceAll("\\\\/", "/")
|
||||
}
|
||||
|
||||
override DataFlow::SourceNode getARegExpObject() { result = this }
|
||||
|
||||
override RegExpTerm getRegExpTerm() { result = astNode.getRoot() }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -856,4 +871,6 @@ private class StringRegExpPatternSource extends RegExpPatternSource {
|
||||
}
|
||||
|
||||
override string getPattern() { result = getStringValue() }
|
||||
|
||||
override RegExpTerm getRegExpTerm() { result = asExpr().(StringLiteral).asRegExp() }
|
||||
}
|
||||
|
||||
@@ -858,6 +858,7 @@ regexpParseErrors (unique int id: @regexp_parse_error,
|
||||
@regexp_lookahead = @regexp_positive_lookahead | @regexp_negative_lookahead;
|
||||
@regexp_lookbehind = @regexp_positive_lookbehind | @regexp_negative_lookbehind;
|
||||
@regexp_subpattern = @regexp_lookahead | @regexp_lookbehind;
|
||||
@regexp_anchor = @regexp_dollar | @regexp_caret;
|
||||
|
||||
isGreedy (int id: @regexp_quantifier ref);
|
||||
rangeQuantifierLowerBound (unique int id: @regexp_range ref, int lo: int ref);
|
||||
|
||||
Reference in New Issue
Block a user