mirror of
https://github.com/github/codeql.git
synced 2025-12-16 08:43:11 +01:00
1447 lines
39 KiB
Plaintext
1447 lines
39 KiB
Plaintext
/**
|
|
* Provides classes for working with regular expressions.
|
|
*
|
|
* Regular expression literals are represented as an abstract syntax tree of regular expression
|
|
* terms.
|
|
*/
|
|
overlay[local]
|
|
module;
|
|
|
|
import javascript
|
|
private import semmle.javascript.dataflow.InferredTypes
|
|
private import semmle.javascript.internal.CachedStages
|
|
|
|
/**
|
|
* An element containing a regular expression term, that is, either
|
|
* a regular expression literal, a string literal (parsed as a regular expression),
|
|
* or another regular expression term.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* // the regular expression literal and all terms it contains are regexp parents
|
|
* /((ECMA|Java)[sS]cript)*$/
|
|
* ```
|
|
*/
|
|
class RegExpParent extends Locatable, @regexpparent { }
|
|
|
|
/**
|
|
* A regular expression term, that is, a syntactic part of a regular expression.
|
|
*
|
|
* Regular expression terms may occur as part of a regular expression literal,
|
|
* such as `/[a-z]+/`, or as part of a string literal, such as `"[a-z]+"`.
|
|
*
|
|
* Note that some terms will occur as part of a string literal that isn't actually
|
|
* interpreted as regular expression at runtime. Use `isPartOfRegExpLiteral`
|
|
* or `isUsedAsRegExp` to check if a term is really used as a regular expression.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* ((ECMA|Java)[sS]cript)*$
|
|
* ((ECMA|Java)[sS]cript)*
|
|
* (ECMA|Java)
|
|
* $
|
|
* ```
|
|
*/
|
|
class RegExpTerm extends Locatable, @regexpterm {
|
|
/** Gets the `i`th child term of this term. */
|
|
RegExpTerm getChild(int i) { regexpterm(result, _, this, i, _) }
|
|
|
|
/** Gets a child term of this term. */
|
|
RegExpTerm getAChild() { result = this.getChild(_) }
|
|
|
|
/** Gets the number of child terms of this term. */
|
|
int getNumChild() { result = count(this.getAChild()) }
|
|
|
|
/** Gets the last child term of this term. */
|
|
RegExpTerm getLastChild() { result = this.getChild(this.getNumChild() - 1) }
|
|
|
|
/**
|
|
* Gets the parent term of this regular expression term, or the
|
|
* regular expression literal if this is the root term.
|
|
*/
|
|
RegExpParent getParent() { regexpterm(this, _, result, _, _) }
|
|
|
|
/** Gets the regular expression literal this term belongs to, if any. */
|
|
RegExpLiteral getLiteral() { result = this.getRootTerm().getParent() }
|
|
|
|
override string toString() { regexpterm(this, _, _, _, result) }
|
|
|
|
/** Gets the raw source text of this term. */
|
|
string getRawValue() { regexpterm(this, _, _, _, result) }
|
|
|
|
/** Holds if this regular expression term can match the empty string. */
|
|
predicate isNullable() { none() } // Overridden in subclasses.
|
|
|
|
/** Gets the regular expression term that is matched (textually) before this one, if any. */
|
|
RegExpTerm getPredecessor() {
|
|
exists(RegExpTerm parent | parent = this.getParent() |
|
|
result = parent.(RegExpSequence).previousElement(this)
|
|
or
|
|
not exists(parent.(RegExpSequence).previousElement(this)) and
|
|
not parent instanceof RegExpSubPattern and
|
|
result = parent.getPredecessor()
|
|
)
|
|
}
|
|
|
|
/** Gets the regular expression term that is matched (textually) after this one, if any. */
|
|
RegExpTerm getSuccessor() {
|
|
exists(RegExpTerm parent | parent = this.getParent() |
|
|
result = parent.(RegExpSequence).nextElement(this)
|
|
or
|
|
not exists(parent.(RegExpSequence).nextElement(this)) and
|
|
not parent instanceof RegExpSubPattern and
|
|
result = parent.getSuccessor()
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if this regular term is in a forward-matching context, that is,
|
|
* it has no enclosing lookbehind assertions.
|
|
*/
|
|
predicate isInForwardMatchingContext() { not this.isInBackwardMatchingContext() }
|
|
|
|
/**
|
|
* Holds if this regular term is in a backward-matching context, that is,
|
|
* it has an enclosing lookbehind assertions.
|
|
*/
|
|
predicate isInBackwardMatchingContext() { this = any(RegExpLookbehind lbh).getAChild+() }
|
|
|
|
/**
|
|
* Holds if this is the root term of a regular expression.
|
|
*/
|
|
predicate isRootTerm() { not this.getParent() instanceof RegExpTerm }
|
|
|
|
/**
|
|
* Gets the outermost term of this regular expression.
|
|
*/
|
|
RegExpTerm getRootTerm() {
|
|
this.isRootTerm() and
|
|
result = this
|
|
or
|
|
result = this.getParent().(RegExpTerm).getRootTerm()
|
|
}
|
|
|
|
/**
|
|
* Holds if this term occurs as part of a regular expression literal.
|
|
*/
|
|
predicate isPartOfRegExpLiteral() { exists(this.getLiteral()) }
|
|
|
|
/**
|
|
* Holds if this term occurs as part of a string literal.
|
|
*
|
|
* This predicate holds regardless of whether the string literal is actually
|
|
* used as a regular expression. See `isUsedAsRegExp`.
|
|
*/
|
|
predicate isPartOfStringLiteral() { this.getRootTerm().getParent() instanceof StringLiteral }
|
|
|
|
/**
|
|
* Holds if this term is part of a regular expression literal, or a string literal
|
|
* that is interpreted as a regular expression.
|
|
*
|
|
* Unlike `isPartOfRegExpLiteral` and `isPartOfStringLiteral`, this predicate takes
|
|
* data flow into account, to exclude string literals that aren't used as regular expressions.
|
|
*
|
|
* For example:
|
|
* ```js
|
|
* location.href.match("^https://example\\.com/") // YES - String is used as regexpp
|
|
*
|
|
* console.log("Hello world"); // NO - string is not used as regexp
|
|
*
|
|
* /[a-z]+/g; // YES - Regexp literals are always used as regexp
|
|
* ```
|
|
*/
|
|
overlay[global]
|
|
predicate isUsedAsRegExp() {
|
|
exists(RegExpParent parent | parent = this.getRootTerm().getParent() |
|
|
parent instanceof RegExpLiteral
|
|
or
|
|
parent.(Expr).flow() instanceof RegExpPatternSource
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Gets the single string this regular-expression term matches.
|
|
*
|
|
* This predicate is only defined for (sequences/groups of) constant regular expressions.
|
|
* In particular, terms involving zero-width assertions like `^` or `\b` are not considered
|
|
* to have a constant value.
|
|
*
|
|
* Note that this predicate does not take flags of the enclosing regular-expression literal
|
|
* into account.
|
|
*/
|
|
string getConstantValue() { none() }
|
|
|
|
/**
|
|
* Gets a string that is matched by this regular-expression term.
|
|
*/
|
|
string getAMatchedString() { result = this.getConstantValue() }
|
|
|
|
/** Holds if this term has the specified location. */
|
|
predicate hasLocationInfo(
|
|
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
|
) {
|
|
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A quantified regular expression term.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* ((ECMA|Java)[sS]cript)*
|
|
* ```
|
|
*/
|
|
class RegExpQuantifier extends RegExpTerm, @regexp_quantifier {
|
|
/** Holds if the quantifier of this term is a greedy quantifier. */
|
|
predicate isGreedy() { is_greedy(this) }
|
|
}
|
|
|
|
/**
|
|
* A regular expression term that permits unlimited repetitions.
|
|
*/
|
|
class InfiniteRepetitionQuantifier extends RegExpQuantifier {
|
|
InfiniteRepetitionQuantifier() {
|
|
this instanceof RegExpPlus
|
|
or
|
|
this instanceof RegExpStar
|
|
or
|
|
this instanceof RegExpRange and not exists(this.(RegExpRange).getUpperBound())
|
|
}
|
|
}
|
|
|
|
/**
|
|
* An escaped regular expression term, that is, a regular expression
|
|
* term starting with a backslash.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* \.
|
|
* \w
|
|
* ```
|
|
*/
|
|
class RegExpEscape extends RegExpTerm, @regexp_escape {
|
|
override string getAPrimaryQlClass() { result = "RegExpEscape" }
|
|
}
|
|
|
|
/**
|
|
* A constant regular expression term, that is, a regular expression
|
|
* term matching a single string.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* abc
|
|
* ```
|
|
*/
|
|
class RegExpConstant extends RegExpTerm, @regexp_constant {
|
|
/** Gets the string matched by this constant term. */
|
|
string getValue() { regexp_const_value(this, result) }
|
|
|
|
/**
|
|
* Holds if this constant represents a valid Unicode character (as opposed
|
|
* to a surrogate code point that does not correspond to a character by itself.)
|
|
*/
|
|
predicate isCharacter() { any() }
|
|
|
|
override predicate isNullable() { none() }
|
|
|
|
override string getConstantValue() { result = this.getValue() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpConstant" }
|
|
}
|
|
|
|
/**
|
|
* A character escape in a regular expression.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* \.
|
|
* ```
|
|
*/
|
|
class RegExpCharEscape extends RegExpEscape, RegExpConstant, @regexp_char_escape {
|
|
override predicate isCharacter() {
|
|
not (
|
|
// unencodable characters are represented as '?' or \uFFFD in the database
|
|
this.getValue() = ["?", 65533.toUnicode()] and
|
|
exists(string s | s = this.toString().toLowerCase() |
|
|
// only Unicode escapes give rise to unencodable characters
|
|
s.matches("\\\\u%") and
|
|
// but '\u003f' actually is the '?' character itself
|
|
s != "\\u003f"
|
|
)
|
|
)
|
|
}
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpCharEscape" }
|
|
}
|
|
|
|
/**
|
|
* An alternative term, that is, a term of the form `a|b`.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* ECMA|Java
|
|
* ```
|
|
*/
|
|
class RegExpAlt extends RegExpTerm, @regexp_alt {
|
|
/** Gets an alternative of this term. */
|
|
RegExpTerm getAlternative() { result = this.getAChild() }
|
|
|
|
/** Gets the number of alternatives of this term. */
|
|
int getNumAlternative() { result = this.getNumChild() }
|
|
|
|
override predicate isNullable() { this.getAlternative().isNullable() }
|
|
|
|
override string getAMatchedString() { result = this.getAlternative().getAMatchedString() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpAlt" }
|
|
}
|
|
|
|
/**
|
|
* An intersection term, that is, a term of the form `[[a]&&[ab]]`.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* /[[abc]&&[bcd]]/v - which matches 'b' and 'c' only.
|
|
* ```
|
|
*/
|
|
class RegExpIntersection extends RegExpTerm, @regexp_intersection {
|
|
/** Gets an intersected term of this term. */
|
|
RegExpTerm getAnElement() { result = this.getAChild() }
|
|
|
|
/** Gets the number of intersected terms of this term. */
|
|
int getNumIntersectedTerm() { result = this.getNumChild() }
|
|
|
|
override predicate isNullable() { this.getAnElement().isNullable() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpIntersection" }
|
|
}
|
|
|
|
/**
|
|
* A subtraction term, that is, a term of the form `[[a]--[ab]]`.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* /[[abc]--[bc]]/v - which matches 'a' only.
|
|
* ```
|
|
*/
|
|
class RegExpSubtraction extends RegExpTerm, @regexp_subtraction {
|
|
/** Gets the minuend (left operand) of this subtraction. */
|
|
RegExpTerm getFirstTerm() { result = this.getChild(0) }
|
|
|
|
/** Gets the number of subtractions terms of this term. */
|
|
int getNumSubtractedTerm() { result = this.getNumChild() - 1 }
|
|
|
|
/** Gets a subtrahend (right operand) of this subtraction. */
|
|
RegExpTerm getASubtractedTerm() { exists(int i | i > 0 and result = this.getChild(i)) }
|
|
|
|
override predicate isNullable() { none() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpSubtraction" }
|
|
}
|
|
|
|
/**
|
|
* A sequence term.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* (ECMA|Java)Script
|
|
* ```
|
|
*
|
|
* This is a sequence with the elements `(ECMA|Java)` and `Script`.
|
|
*/
|
|
class RegExpSequence extends RegExpTerm, @regexp_seq {
|
|
/** Gets an element of this sequence. */
|
|
RegExpTerm getElement() { result = this.getAChild() }
|
|
|
|
/** Gets the number of elements in this sequence. */
|
|
int getNumElement() { result = this.getNumChild() }
|
|
|
|
override predicate isNullable() {
|
|
forall(RegExpTerm child | child = this.getAChild() | child.isNullable())
|
|
}
|
|
|
|
override string getConstantValue() { result = this.getConstantValue(0) }
|
|
|
|
/**
|
|
* Gets the single string matched by the `i`th child and all following children of
|
|
* this sequence, if any.
|
|
*/
|
|
private string getConstantValue(int i) {
|
|
i = this.getNumChild() and
|
|
result = ""
|
|
or
|
|
result = this.getChild(i).getConstantValue() + this.getConstantValue(i + 1)
|
|
}
|
|
|
|
/** Gets the element preceding `element` in this sequence. */
|
|
RegExpTerm previousElement(RegExpTerm element) { element = this.nextElement(result) }
|
|
|
|
/** Gets the element following `element` in this sequence. */
|
|
RegExpTerm nextElement(RegExpTerm element) {
|
|
exists(int i |
|
|
element = this.getChild(i) and
|
|
result = this.getChild(i + 1)
|
|
)
|
|
}
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpSequence" }
|
|
}
|
|
|
|
/**
|
|
* A dollar `$` or caret assertion `^` matching the beginning or end of a line.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* ^
|
|
* $
|
|
* ```
|
|
*/
|
|
class RegExpAnchor extends RegExpTerm, @regexp_anchor {
|
|
override predicate isNullable() { any() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpAnchor" }
|
|
|
|
/** Gets the char for this term. */
|
|
abstract string getChar();
|
|
}
|
|
|
|
/**
|
|
* A caret assertion `^` matching the beginning of a line.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* ^
|
|
* ```
|
|
*/
|
|
class RegExpCaret extends RegExpAnchor, @regexp_caret {
|
|
override string getAPrimaryQlClass() { result = "RegExpCaret" }
|
|
|
|
override string getChar() { result = "^" }
|
|
}
|
|
|
|
/**
|
|
* A dollar assertion `$` matching the end of a line.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* $
|
|
* ```
|
|
*/
|
|
class RegExpDollar extends RegExpAnchor, @regexp_dollar {
|
|
override string getAPrimaryQlClass() { result = "RegExpDollar" }
|
|
|
|
override string getChar() { result = "$" }
|
|
}
|
|
|
|
/**
|
|
* A word boundary assertion.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* \b
|
|
* ```
|
|
*/
|
|
class RegExpWordBoundary extends RegExpTerm, @regexp_wordboundary {
|
|
override predicate isNullable() { any() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpWordBoundary" }
|
|
}
|
|
|
|
/**
|
|
* A non-word boundary assertion.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* \B
|
|
* ```
|
|
*/
|
|
class RegExpNonWordBoundary extends RegExpTerm, @regexp_nonwordboundary {
|
|
override predicate isNullable() { any() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpNonWordBoundary" }
|
|
}
|
|
|
|
/**
|
|
* A zero-width lookahead or lookbehind assertion.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* (?=\w)
|
|
* (?!\n)
|
|
* (?<=\.)
|
|
* (?<!\\)
|
|
* ```
|
|
*/
|
|
class RegExpSubPattern extends RegExpTerm, @regexp_subpattern {
|
|
/** Gets the lookahead term. */
|
|
RegExpTerm getOperand() { result = this.getAChild() }
|
|
|
|
override predicate isNullable() { any() }
|
|
}
|
|
|
|
/**
|
|
* A zero-width lookahead assertion.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* (?=\w)
|
|
* (?!\n)
|
|
* ```
|
|
*/
|
|
class RegExpLookahead extends RegExpSubPattern, @regexp_lookahead {
|
|
override string getAPrimaryQlClass() { result = "RegExpLookahead" }
|
|
}
|
|
|
|
/**
|
|
* A zero-width lookbehind assertion.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* (?<=\.)
|
|
* (?<!\\)
|
|
* ```
|
|
*/
|
|
class RegExpLookbehind extends RegExpSubPattern, @regexp_lookbehind {
|
|
override string getAPrimaryQlClass() { result = "RegExpLookbehind" }
|
|
}
|
|
|
|
/**
|
|
* A positive-lookahead assertion.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* (?=\w)
|
|
* ```
|
|
*/
|
|
class RegExpPositiveLookahead extends RegExpLookahead, @regexp_positive_lookahead {
|
|
override string getAPrimaryQlClass() { result = "RegExpPositiveLookahead" }
|
|
}
|
|
|
|
/**
|
|
* A negative-lookahead assertion.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* (?!\n)
|
|
* ```
|
|
*/
|
|
class RegExpNegativeLookahead extends RegExpLookahead, @regexp_negative_lookahead {
|
|
override string getAPrimaryQlClass() { result = "RegExpNegativeLookahead" }
|
|
}
|
|
|
|
/**
|
|
* A positive-lookbehind assertion.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* (?<=\.)
|
|
* ```
|
|
*/
|
|
class RegExpPositiveLookbehind extends RegExpLookbehind, @regexp_positive_lookbehind {
|
|
override string getAPrimaryQlClass() { result = "RegExpPositiveLookbehind" }
|
|
}
|
|
|
|
/**
|
|
* A negative-lookbehind assertion.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* (?<!\\)
|
|
* ```
|
|
*/
|
|
class RegExpNegativeLookbehind extends RegExpLookbehind, @regexp_negative_lookbehind {
|
|
override string getAPrimaryQlClass() { result = "RegExpNegativeLookbehind" }
|
|
}
|
|
|
|
/**
|
|
* A star-quantified term.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* \w*
|
|
* ```
|
|
*/
|
|
class RegExpStar extends RegExpQuantifier, @regexp_star {
|
|
override predicate isNullable() { any() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpStar" }
|
|
}
|
|
|
|
/**
|
|
* A plus-quantified term.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* \w+
|
|
* ```
|
|
*/
|
|
class RegExpPlus extends RegExpQuantifier, @regexp_plus {
|
|
override predicate isNullable() { this.getAChild().isNullable() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpPlus" }
|
|
}
|
|
|
|
/**
|
|
* An optional term.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* ;?
|
|
* ```
|
|
*/
|
|
class RegExpOpt extends RegExpQuantifier, @regexp_opt {
|
|
override predicate isNullable() { any() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpOpt" }
|
|
}
|
|
|
|
/**
|
|
* A range-quantified term
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* \w{2,4}
|
|
* \w{2,}
|
|
* \w{2}
|
|
* ```
|
|
*/
|
|
class RegExpRange extends RegExpQuantifier, @regexp_range {
|
|
/** Gets the lower bound of the range. */
|
|
int getLowerBound() { range_quantifier_lower_bound(this, result) }
|
|
|
|
/**
|
|
* Gets the upper bound of the range, if any.
|
|
*
|
|
* If there is no upper bound, any number of repetitions is allowed.
|
|
* For a term of the form `r{lo}`, both the lower and the upper bound
|
|
* are `lo`.
|
|
*/
|
|
int getUpperBound() { range_quantifier_upper_bound(this, result) }
|
|
|
|
override predicate isNullable() {
|
|
this.getAChild().isNullable() or
|
|
this.getLowerBound() = 0
|
|
}
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpRange" }
|
|
}
|
|
|
|
/**
|
|
* A dot regular expression.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* .
|
|
* ```
|
|
*/
|
|
class RegExpDot extends RegExpTerm, @regexp_dot {
|
|
override predicate isNullable() { none() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpDot" }
|
|
}
|
|
|
|
/**
|
|
* A grouped regular expression.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* (ECMA|Java)
|
|
* (?:ECMA|Java)
|
|
* (?<quote>['"])
|
|
* ```
|
|
*/
|
|
class RegExpGroup extends RegExpTerm, @regexp_group {
|
|
/** Holds if this is a capture group. */
|
|
predicate isCapture() { is_capture(this, _) }
|
|
|
|
/**
|
|
* Gets the index of this capture group within the enclosing regular
|
|
* expression literal.
|
|
*
|
|
* For example, in the regular expression `/((a?).)(?:b)/`, the
|
|
* group `((a?).)` has index 1, the group `(a?)` nested inside it
|
|
* has index 2, and the group `(?:b)` has no index, since it is
|
|
* not a capture group.
|
|
*/
|
|
int getNumber() { is_capture(this, result) }
|
|
|
|
/** Holds if this is a named capture group. */
|
|
predicate isNamed() { is_named_capture(this, _) }
|
|
|
|
/** Gets the name of this capture group, if any. */
|
|
string getName() { is_named_capture(this, result) }
|
|
|
|
override predicate isNullable() { this.getAChild().isNullable() }
|
|
|
|
override string getConstantValue() { result = this.getAChild().getConstantValue() }
|
|
|
|
override string getAMatchedString() { result = this.getAChild().getAMatchedString() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpGroup" }
|
|
}
|
|
|
|
/**
|
|
* A sequence of normal characters without special meaning in a regular expression.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* abc
|
|
* ;
|
|
* ```
|
|
*/
|
|
class RegExpNormalConstant extends RegExpConstant, @regexp_normal_constant {
|
|
override string getAPrimaryQlClass() { result = "RegExpNormalConstant" }
|
|
}
|
|
|
|
/**
|
|
* A hexadecimal character escape in a regular expression.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* \x0a
|
|
* ```
|
|
*/
|
|
class RegExpHexEscape extends RegExpCharEscape, @regexp_hex_escape {
|
|
override string getAPrimaryQlClass() { result = "RegExpHexEscape" }
|
|
}
|
|
|
|
/**
|
|
* A unicode character escape in a regular expression.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* \u000a
|
|
* ```
|
|
*/
|
|
class RegExpUnicodeEscape extends RegExpCharEscape, @regexp_unicode_escape {
|
|
override string getAPrimaryQlClass() { result = "RegExpUnicodeEscape" }
|
|
}
|
|
|
|
/**
|
|
* A decimal character escape in a regular expression.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* \0
|
|
* ```
|
|
*/
|
|
class RegExpDecimalEscape extends RegExpCharEscape, @regexp_dec_escape {
|
|
override string getAPrimaryQlClass() { result = "RegExpDecimalEscape" }
|
|
}
|
|
|
|
/**
|
|
* An octal character escape in a regular expression.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* \0177
|
|
* ```
|
|
*/
|
|
class RegExpOctalEscape extends RegExpCharEscape, @regexp_oct_escape {
|
|
override string getAPrimaryQlClass() { result = "RegExpOctalEscape" }
|
|
}
|
|
|
|
/**
|
|
* A control character escape in a regular expression.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* \ca
|
|
* ```
|
|
*/
|
|
class RegExpControlEscape extends RegExpCharEscape, @regexp_ctrl_escape {
|
|
override string getAPrimaryQlClass() { result = "RegExpControlEscape" }
|
|
}
|
|
|
|
/**
|
|
* A character class escape in a regular expression.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* \w
|
|
* \S
|
|
* ```
|
|
*/
|
|
class RegExpCharacterClassEscape extends RegExpEscape, @regexp_char_class_escape {
|
|
/** Gets the name of the character class; for example, `w` for `\w`. */
|
|
string getValue() { char_class_escape(this, result) }
|
|
|
|
override predicate isNullable() { none() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpCharacterClassEscape" }
|
|
}
|
|
|
|
/**
|
|
* A Unicode property escape in a regular expression.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* \p{Number}
|
|
* \p{Script=Greek}
|
|
* ```
|
|
*/
|
|
class RegExpUnicodePropertyEscape extends RegExpEscape, @regexp_unicode_property_escape {
|
|
/**
|
|
* Gets the name of this Unicode property; for example, `Number` for `\p{Number}` and
|
|
* `Script` for `\p{Script=Greek}`.
|
|
*/
|
|
string getName() { unicode_property_escapename(this, result) }
|
|
|
|
/**
|
|
* Gets the value of this Unicode property, if any.
|
|
*
|
|
* For example, the value of Unicode property `\p{Script=Greek}` is `Greek`, while
|
|
* `\p{Number}` does not have a value.
|
|
*/
|
|
string getValue() { unicode_property_escapevalue(this, result) }
|
|
|
|
override predicate isNullable() { none() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpUnicodePropertyEscape" }
|
|
}
|
|
|
|
/**
|
|
* An identity escape, that is, an escaped character in a regular expression that just
|
|
* represents itself.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* \\
|
|
* \/
|
|
* ```
|
|
*/
|
|
class RegExpIdentityEscape extends RegExpCharEscape, @regexp_id_escape {
|
|
override string getAPrimaryQlClass() { result = "RegExpIdentityEscape" }
|
|
}
|
|
|
|
/**
|
|
* A back reference, that is, a term of the form `\i` or `\k<name>`
|
|
* in a regular expression.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* \1
|
|
* \k<quote>
|
|
* ```
|
|
*/
|
|
class RegExpBackRef extends RegExpTerm, @regexp_backref {
|
|
/**
|
|
* Gets the number of the capture group this back reference refers to, if any.
|
|
*/
|
|
int getNumber() { backref(this, result) }
|
|
|
|
/**
|
|
* Gets the name of the capture group this back reference refers to, if any.
|
|
*/
|
|
string getName() { named_backref(this, result) }
|
|
|
|
/** Gets the capture group this back reference refers to. */
|
|
RegExpGroup getGroup() {
|
|
result.getLiteral() = this.getLiteral() and
|
|
(
|
|
result.getNumber() = this.getNumber() or
|
|
result.getName() = this.getName()
|
|
)
|
|
}
|
|
|
|
override predicate isNullable() { this.getGroup().isNullable() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpBackRef" }
|
|
}
|
|
|
|
/**
|
|
* A character class in a regular expression.
|
|
*
|
|
* Examples:
|
|
*
|
|
* ```
|
|
* [a-z_]
|
|
* [^<>&]
|
|
* ```
|
|
*/
|
|
class RegExpCharacterClass extends RegExpTerm, @regexp_char_class {
|
|
/** Holds if this is an inverted character class, that is, a term of the form `[^...]`. */
|
|
predicate isInverted() { is_inverted(this) }
|
|
|
|
override predicate isNullable() { none() }
|
|
|
|
override string getAMatchedString() {
|
|
not this.isInverted() and result = this.getAChild().getAMatchedString()
|
|
}
|
|
|
|
/**
|
|
* Holds if this character class matches any character.
|
|
*/
|
|
predicate isUniversalClass() {
|
|
// [^]
|
|
this.isInverted() and not exists(this.getAChild())
|
|
or
|
|
// [\w\W] and similar
|
|
not this.isInverted() and
|
|
exists(string cce1, string cce2 |
|
|
cce1 = this.getAChild().(RegExpCharacterClassEscape).getValue() and
|
|
cce2 = this.getAChild().(RegExpCharacterClassEscape).getValue()
|
|
|
|
|
cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
|
|
)
|
|
}
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpCharacterClass" }
|
|
}
|
|
|
|
/**
|
|
* A character range in a character class in a regular expression.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* a-z
|
|
* ```
|
|
*/
|
|
class RegExpCharacterRange extends RegExpTerm, @regexp_char_range {
|
|
override predicate isNullable() { none() }
|
|
|
|
/** Holds if `lo` is the lower bound of this character range and `hi` the upper bound. */
|
|
predicate isRange(string lo, string hi) {
|
|
lo = this.getChild(0).(RegExpConstant).getValue() and
|
|
hi = this.getChild(1).(RegExpConstant).getValue()
|
|
}
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpCharacterRange" }
|
|
}
|
|
|
|
/** A parse error encountered while processing a regular expression literal. */
|
|
class RegExpParseError extends Error, @regexp_parse_error {
|
|
/** Gets the regular expression term that triggered the parse error. */
|
|
RegExpTerm getTerm() { regexp_parse_errors(this, result, _) }
|
|
|
|
/** Gets the regular expression literal in which the parse error occurred. */
|
|
RegExpLiteral getLiteral() { result = this.getTerm().getLiteral() }
|
|
|
|
override string getMessage() { regexp_parse_errors(this, _, result) }
|
|
|
|
override string toString() { result = this.getMessage() }
|
|
|
|
override predicate isFatal() { none() }
|
|
}
|
|
|
|
/**
|
|
* Holds if `func` is a method defined on `String.prototype` with name `name`.
|
|
*/
|
|
overlay[global]
|
|
private predicate isNativeStringMethod(Function func, string name) {
|
|
exists(ExternalInstanceMemberDecl decl |
|
|
decl.hasQualifiedName("String", name) and
|
|
func = decl.getInit()
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `name` is the name of a property on a Match object returned by `String.prototype.match`,
|
|
* not including array indices.
|
|
*/
|
|
overlay[global]
|
|
private predicate isMatchObjectProperty(string name) {
|
|
any(ExternalInstanceMemberDecl decl).hasQualifiedName("Array", name)
|
|
or
|
|
name in ["length", "index", "input", "groups"]
|
|
}
|
|
|
|
/** Holds if `call` is a call to `match` whose result is used in a way that is incompatible with Match objects. */
|
|
overlay[global]
|
|
private predicate isUsedAsNonMatchObject(DataFlow::MethodCallNode call) {
|
|
call.getMethodName() = ["match", "matchAll"] and
|
|
call.getNumArgument() = 1 and
|
|
(
|
|
// Accessing a property that is absent on Match objects
|
|
exists(string propName |
|
|
exists(call.getAPropertyRead(propName)) and
|
|
not isMatchObjectProperty(propName) and
|
|
not exists(propName.toInt())
|
|
)
|
|
or
|
|
// Awaiting the result
|
|
call.flowsToExpr(any(AwaitExpr await).getOperand())
|
|
or
|
|
// Result is obviously unused
|
|
call.asExpr() = any(ExprStmt stmt).getExpr()
|
|
or
|
|
call = API::moduleImport("sinon").getMember("match").getACall()
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `value` is used in a way that suggests it returns a number.
|
|
*/
|
|
overlay[global]
|
|
pragma[inline]
|
|
private predicate isUsedAsNumber(DataFlow::LocalSourceNode value) {
|
|
any(Comparison compare)
|
|
.hasOperands(value.getALocalUse().asExpr(), any(Expr e | canBeNumber(e.analyze())))
|
|
or
|
|
value.flowsToExpr(any(ArithmeticExpr e).getAnOperand())
|
|
or
|
|
value.flowsToExpr(any(UnaryExpr e | e.getOperator() = "-").getOperand())
|
|
or
|
|
value.flowsToExpr(any(IndexExpr expr).getPropertyNameExpr())
|
|
or
|
|
exists(DataFlow::CallNode call |
|
|
call.getCalleeName() =
|
|
["substring", "substr", "slice", "splice", "charAt", "charCodeAt", "codePointAt", "toSpliced"] and
|
|
value.flowsTo(call.getAnArgument())
|
|
)
|
|
}
|
|
|
|
bindingset[node]
|
|
overlay[global]
|
|
pragma[inline_late]
|
|
private predicate canBeString(DataFlow::AnalyzedNode node) { node.getAType() = TTString() }
|
|
|
|
bindingset[node]
|
|
overlay[global]
|
|
pragma[inline_late]
|
|
private predicate canBeNumber(DataFlow::AnalyzedNode node) { node.getAType() = TTNumber() }
|
|
|
|
/**
|
|
* Holds if `source` may be interpreted as a regular expression.
|
|
*/
|
|
overlay[global]
|
|
cached
|
|
predicate isInterpretedAsRegExp(DataFlow::Node source) {
|
|
Stages::Taint::ref() and
|
|
canBeString(source) and
|
|
(
|
|
// The first argument to an invocation of `RegExp` (with or without `new`).
|
|
source = DataFlow::globalVarRef("RegExp").getAnInvocation().getArgument(0)
|
|
or
|
|
// The argument of a call that coerces the argument to a regular expression.
|
|
exists(DataFlow::MethodCallNode mce, string methodName |
|
|
canBeString(mce.getReceiver()) and
|
|
mce.getMethodName() = methodName and
|
|
not exists(Function func | func = mce.getACallee() |
|
|
not isNativeStringMethod(func, methodName)
|
|
)
|
|
|
|
|
methodName = ["match", "matchAll"] and
|
|
source = mce.getArgument(0) and
|
|
mce.getNumArgument() = 1 and
|
|
not isUsedAsNonMatchObject(mce)
|
|
or
|
|
methodName = "search" and
|
|
source = mce.getArgument(0) and
|
|
mce.getNumArgument() = 1 and
|
|
// "search" is a common method name, and the built-in "search" method is rarely used,
|
|
// so to reduce FPs we also require that the return value appears to be used as a number.
|
|
isUsedAsNumber(mce)
|
|
)
|
|
or
|
|
exists(DataFlow::SourceNode schema | schema = JsonSchema::getAPartOfJsonSchema() |
|
|
source = schema.getAPropertyWrite("pattern").getRhs()
|
|
or
|
|
source =
|
|
schema
|
|
.getAPropertySource("patternProperties")
|
|
.getAPropertyWrite()
|
|
.getPropertyNameExpr()
|
|
.flow()
|
|
)
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
|
|
* as a part of a regular expression.
|
|
*/
|
|
overlay[global]
|
|
private DataFlow::Node regExpSource(DataFlow::Node re, DataFlow::TypeBackTracker t) {
|
|
t.start() and
|
|
re = result and
|
|
isInterpretedAsRegExp(result)
|
|
or
|
|
exists(DataFlow::TypeBackTracker t2, DataFlow::Node succ | succ = regExpSource(re, t2) |
|
|
t2 = t.smallstep(result, succ)
|
|
or
|
|
TaintTracking::sharedTaintStep(result, succ) and
|
|
t = t2
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
|
|
* as a part of a regular expression.
|
|
*/
|
|
overlay[global]
|
|
private DataFlow::Node regExpSource(DataFlow::Node re) {
|
|
result = regExpSource(re, DataFlow::TypeBackTracker::end())
|
|
}
|
|
|
|
/**
|
|
* A node whose value may flow to a position where it is interpreted
|
|
* as a part of a regular expression.
|
|
*/
|
|
overlay[global]
|
|
abstract class RegExpPatternSource extends DataFlow::Node {
|
|
/**
|
|
* Gets a node where the pattern of this node is parsed as a part of
|
|
* a regular expression.
|
|
*/
|
|
abstract DataFlow::Node getAParse();
|
|
|
|
/**
|
|
* Gets the pattern of this node that is interpreted as a part of a
|
|
* regular expression.
|
|
*/
|
|
abstract string getPattern();
|
|
|
|
/**
|
|
* Gets a regular expression object that is constructed from the pattern
|
|
* of this node.
|
|
*/
|
|
abstract DataFlow::SourceNode getARegExpObject();
|
|
|
|
/**
|
|
* Gets the root term of the regular expression parsed from this pattern.
|
|
*/
|
|
abstract RegExpTerm getRegExpTerm();
|
|
}
|
|
|
|
/**
|
|
* A regular expression literal, viewed as the pattern source for itself.
|
|
*/
|
|
overlay[global]
|
|
private class RegExpLiteralPatternSource extends RegExpPatternSource, DataFlow::ValueNode {
|
|
override RegExpLiteral astNode;
|
|
|
|
override DataFlow::Node getAParse() { result = this }
|
|
|
|
override string getPattern() {
|
|
// hide the fact that `/` is escaped in the literal
|
|
result = astNode.getRoot().getRawValue().regexpReplaceAll("\\\\/", "/")
|
|
}
|
|
|
|
override DataFlow::SourceNode getARegExpObject() { result = this }
|
|
|
|
override RegExpTerm getRegExpTerm() { result = astNode.getRoot() }
|
|
}
|
|
|
|
/**
|
|
* A node whose string value may flow to a position where it is interpreted
|
|
* as a part of a regular expression.
|
|
*/
|
|
overlay[global]
|
|
private class StringRegExpPatternSource extends RegExpPatternSource {
|
|
DataFlow::Node parse;
|
|
|
|
StringRegExpPatternSource() { this = regExpSource(parse) }
|
|
|
|
override DataFlow::Node getAParse() { result = parse }
|
|
|
|
override DataFlow::SourceNode getARegExpObject() {
|
|
exists(DataFlow::InvokeNode constructor |
|
|
constructor = DataFlow::globalVarRef("RegExp").getAnInvocation() and
|
|
parse = constructor.getArgument(0) and
|
|
result = constructor
|
|
)
|
|
}
|
|
|
|
override string getPattern() { result = this.getStringValue() }
|
|
|
|
override RegExpTerm getRegExpTerm() { result = this.asExpr().(StringLiteral).asRegExp() }
|
|
}
|
|
|
|
/**
|
|
* A node whose string value may flow to a position where it is interpreted
|
|
* as a part of a regular expression.
|
|
*/
|
|
overlay[global]
|
|
private class StringConcatRegExpPatternSource extends RegExpPatternSource {
|
|
DataFlow::Node parse;
|
|
|
|
StringConcatRegExpPatternSource() { this = regExpSource(parse) }
|
|
|
|
override DataFlow::Node getAParse() { result = parse }
|
|
|
|
override DataFlow::SourceNode getARegExpObject() {
|
|
exists(DataFlow::InvokeNode constructor |
|
|
constructor = DataFlow::globalVarRef("RegExp").getAnInvocation() and
|
|
parse = constructor.getArgument(0) and
|
|
result = constructor
|
|
)
|
|
}
|
|
|
|
override string getPattern() { result = this.getStringValue() }
|
|
|
|
override RegExpTerm getRegExpTerm() { result = this.asExpr().(AddExpr).asRegExp() }
|
|
}
|
|
|
|
/**
|
|
* A quoted string escape in a regular expression, using the `\q` syntax.
|
|
* The only operation supported inside a quoted string is alternation, using `|`.
|
|
*
|
|
* Example:
|
|
*
|
|
* ```
|
|
* \q{foo}
|
|
* \q{a|b|c}
|
|
* ```
|
|
*/
|
|
class RegExpQuotedString extends RegExpTerm, @regexp_quoted_string {
|
|
/** Gets the term representing the contents of this quoted string. */
|
|
RegExpTerm getTerm() { result = this.getAChild() }
|
|
|
|
override predicate isNullable() { none() }
|
|
|
|
override string getAMatchedString() { result = this.getTerm().getAMatchedString() }
|
|
|
|
override string getAPrimaryQlClass() { result = "RegExpQuotedString" }
|
|
}
|
|
|
|
module RegExp {
|
|
/** Gets the string `"?"` used to represent a regular expression whose flags are unknown. */
|
|
string unknownFlag() { result = "?" }
|
|
|
|
/** Holds if `flags` includes the `m` flag. */
|
|
bindingset[flags]
|
|
predicate isMultiline(string flags) { flags.matches("%m%") }
|
|
|
|
/** Holds if `flags` includes the `g` flag. */
|
|
bindingset[flags]
|
|
predicate isGlobal(string flags) { flags.matches("%g%") }
|
|
|
|
/** Holds if `flags` includes the `i` flag. */
|
|
bindingset[flags]
|
|
predicate isIgnoreCase(string flags) { flags.matches("%i%") }
|
|
|
|
/** Holds if `flags` includes the `s` flag. */
|
|
bindingset[flags]
|
|
predicate isDotAll(string flags) { flags.matches("%s%") }
|
|
|
|
/** Holds if `flags` includes the `v` flag. */
|
|
bindingset[flags]
|
|
predicate isUnicodeSets(string flags) { flags.matches("%v%") }
|
|
|
|
/** Holds if `flags` includes the `m` flag or is the unknown flag `?`. */
|
|
bindingset[flags]
|
|
predicate maybeMultiline(string flags) { flags = unknownFlag() or isMultiline(flags) }
|
|
|
|
/** Holds if `flags` includes the `g` flag or is the unknown flag `?`. */
|
|
bindingset[flags]
|
|
predicate maybeGlobal(string flags) { flags = unknownFlag() or isGlobal(flags) }
|
|
|
|
/** Holds if `flags` includes the `i` flag or is the unknown flag `?`. */
|
|
bindingset[flags]
|
|
predicate maybeIgnoreCase(string flags) { flags = unknownFlag() or isIgnoreCase(flags) }
|
|
|
|
/** Holds if `flags` includes the `s` flag or is the unknown flag `?`. */
|
|
bindingset[flags]
|
|
predicate maybeDotAll(string flags) { flags = unknownFlag() or isDotAll(flags) }
|
|
|
|
/** Holds if `term` and all of its disjuncts are anchored on both ends. */
|
|
predicate isFullyAnchoredTerm(RegExpTerm term) {
|
|
exists(RegExpSequence seq | term = seq |
|
|
seq.getChild(0) instanceof RegExpCaret and
|
|
seq.getLastChild() instanceof RegExpDollar
|
|
)
|
|
or
|
|
isFullyAnchoredTerm(term.(RegExpGroup).getAChild())
|
|
or
|
|
isFullyAnchoredAlt(term, term.getNumChild())
|
|
}
|
|
|
|
/** Holds if the first `i` disjuncts of `term` are fully anchored. */
|
|
private predicate isFullyAnchoredAlt(RegExpAlt term, int i) {
|
|
isFullyAnchoredTerm(term.getChild(0)) and i = 1
|
|
or
|
|
isFullyAnchoredAlt(term, i - 1) and
|
|
isFullyAnchoredTerm(term.getChild(i - 1))
|
|
}
|
|
|
|
/**
|
|
* Holds if `term` matches any character except for explicitly listed exceptions.
|
|
*
|
|
* For example, holds for `.`, `[^<>]`, or `\W`, but not for `[a-z]`, `\w`, or `[^\W\S]`.
|
|
*/
|
|
predicate isWildcardLike(RegExpTerm term) {
|
|
term instanceof RegExpDot
|
|
or
|
|
term.(RegExpCharacterClassEscape).getValue().isUppercase()
|
|
or
|
|
// [^a-z]
|
|
exists(RegExpCharacterClass cls | term = cls |
|
|
cls.isInverted() and
|
|
not cls.getAChild().(RegExpCharacterClassEscape).getValue().isUppercase()
|
|
)
|
|
or
|
|
// [\W]
|
|
exists(RegExpCharacterClass cls | term = cls |
|
|
not cls.isInverted() and
|
|
cls.getAChild().(RegExpCharacterClassEscape).getValue().isUppercase()
|
|
)
|
|
or
|
|
// an unlimited number of wildcards, is also a wildcard.
|
|
exists(InfiniteRepetitionQuantifier q |
|
|
term = q and
|
|
isWildcardLike(q.getAChild())
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Holds if `term` is a generic sanitizer for strings that match (if `outcome` is true)
|
|
* or strings that don't match (if `outcome` is false).
|
|
*
|
|
* Specifically, whitelisting regexps such as `^(foo|bar)$` sanitize matches in the true case.
|
|
* Inverted character classes such as `[^a-z]` or `\W` sanitize matches in the false case.
|
|
*/
|
|
predicate isGenericRegExpSanitizer(RegExpTerm term, boolean outcome) {
|
|
term.isRootTerm() and
|
|
(
|
|
outcome = true and
|
|
isFullyAnchoredTerm(term) and
|
|
not isWildcardLike(term.getAChild*())
|
|
or
|
|
// Character set restrictions like `/[^a-z]/.test(x)` sanitize in the false case
|
|
outcome = false and
|
|
exists(RegExpTerm root |
|
|
root = term
|
|
or
|
|
root = term.(RegExpGroup).getAChild()
|
|
|
|
|
isWildcardLike(root)
|
|
or
|
|
isWildcardLike(root.(RegExpAlt).getAChild())
|
|
)
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Gets the AST of a regular expression object that can flow to `node`.
|
|
*/
|
|
overlay[global]
|
|
RegExpTerm getRegExpObjectFromNode(DataFlow::Node node) {
|
|
exists(DataFlow::RegExpCreationNode regexp |
|
|
regexp.getAReference().flowsTo(node) and
|
|
result = regexp.getRoot()
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Gets the AST of a regular expression that can flow to `node`,
|
|
* including `RegExp` objects as well as strings interpreted as regular expressions.
|
|
*/
|
|
overlay[global]
|
|
RegExpTerm getRegExpFromNode(DataFlow::Node node) {
|
|
result = getRegExpObjectFromNode(node)
|
|
or
|
|
result = node.asExpr().(StringLiteral).asRegExp()
|
|
}
|
|
|
|
/**
|
|
* A character that will be analyzed by `RegExp::alwaysMatchesMetaCharacter`.
|
|
*
|
|
* Currently only `<`, `'`, and `"` are considered to be meta-characters, but new meta-characters
|
|
* can be added by subclassing this class.
|
|
*/
|
|
abstract class MetaCharacter extends string {
|
|
bindingset[this]
|
|
MetaCharacter() { any() }
|
|
|
|
/**
|
|
* Holds if the given atomic term matches this meta-character.
|
|
*
|
|
* Does not hold for derived terms like alternatives and groups.
|
|
*
|
|
* By default, `.`, `\W`, `\S`, and `\D` are considered to match any meta-character,
|
|
* but the predicate can be overridden for meta-characters where this is not the case.
|
|
*/
|
|
predicate matchedByAtom(RegExpTerm term) {
|
|
term.(RegExpConstant).getConstantValue() = this
|
|
or
|
|
term instanceof RegExpDot
|
|
or
|
|
term.(RegExpCharacterClassEscape).getValue() = ["\\W", "\\S", "\\D"]
|
|
or
|
|
exists(string lo, string hi |
|
|
term.(RegExpCharacterRange).isRange(lo, hi) and
|
|
lo <= this and
|
|
this <= hi
|
|
)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A meta character used by HTML.
|
|
*/
|
|
private class HtmlMetaCharacter extends MetaCharacter {
|
|
HtmlMetaCharacter() { this = ["<", "'", "\""] }
|
|
}
|
|
|
|
/**
|
|
* A meta character used by regular expressions.
|
|
*/
|
|
private class RegexpMetaChars extends RegExp::MetaCharacter {
|
|
RegexpMetaChars() { this = ["{", "[", "+"] }
|
|
}
|
|
|
|
/**
|
|
* Holds if `term` can match any occurrence of `char` within a string (not taking into account
|
|
* the context in which `term` appears).
|
|
*
|
|
* This predicate is under-approximate and never considers sequences to guarantee a match.
|
|
*/
|
|
predicate alwaysMatchesMetaCharacter(RegExpTerm term, MetaCharacter char) {
|
|
not term.getParent() instanceof RegExpSequence and // restrict size of predicate
|
|
char.matchedByAtom(term)
|
|
or
|
|
alwaysMatchesMetaCharacter(term.(RegExpGroup).getAChild(), char)
|
|
or
|
|
alwaysMatchesMetaCharacter(term.(RegExpAlt).getAlternative(), char)
|
|
or
|
|
exists(RegExpCharacterClass class_ | term = class_ |
|
|
not class_.isInverted() and
|
|
char.matchedByAtom(class_.getAChild())
|
|
or
|
|
class_.isInverted() and
|
|
not char.matchedByAtom(class_.getAChild())
|
|
)
|
|
}
|
|
}
|