mirror of
https://github.com/github/codeql.git
synced 2025-12-23 04:06:37 +01:00
refactor to share predicates between regular expression queries
This commit is contained in:
@@ -13,7 +13,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import javascript
|
import javascript
|
||||||
import semmle.javascript.security.performance.SuperlinearBackTracking
|
import semmle.javascript.security.performance.ReDoSUtil
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This query implements the analysis described in the following two papers:
|
* This query implements the analysis described in the following two papers:
|
||||||
@@ -80,35 +80,11 @@ import semmle.javascript.security.performance.SuperlinearBackTracking
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A branch in a disjunction that is the root node in a literal, or a literal
|
* Holds if state `s` might be inside a backtracking repetition.
|
||||||
* whose root node is not a disjunction.
|
|
||||||
*/
|
*/
|
||||||
class RegExpRoot extends RegExpTerm {
|
pragma[noinline]
|
||||||
RegExpParent parent;
|
predicate stateInsideBacktracking(State s) {
|
||||||
|
s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition
|
||||||
RegExpRoot() {
|
|
||||||
exists(RegExpAlt alt |
|
|
||||||
alt.isRootTerm() and
|
|
||||||
this = alt.getAChild() and
|
|
||||||
parent = alt.getParent()
|
|
||||||
)
|
|
||||||
or
|
|
||||||
this.isRootTerm() and
|
|
||||||
not this instanceof RegExpAlt and
|
|
||||||
parent = this.getParent()
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if this root term is relevant to the ReDoS analysis.
|
|
||||||
*/
|
|
||||||
predicate isRelevant() {
|
|
||||||
// there is at least one repetition
|
|
||||||
exists(MaybeBacktrackingRepetition rep | getRoot(rep) = this) and
|
|
||||||
// there are no lookbehinds
|
|
||||||
not exists(RegExpLookbehind lbh | getRoot(lbh) = this) and
|
|
||||||
// is actually used as a RegExp
|
|
||||||
isUsedAsRegExp()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -125,503 +101,6 @@ class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* A constant in a regular expression that represents valid Unicode character(s).
|
|
||||||
*/
|
|
||||||
class RegexpCharacterConstant extends RegExpConstant {
|
|
||||||
RegexpCharacterConstant() { this.isCharacter() }
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the root containing the given term, that is, the root of the literal,
|
|
||||||
* or a branch of the root disjunction.
|
|
||||||
*/
|
|
||||||
RegExpRoot getRoot(RegExpTerm term) {
|
|
||||||
result = term or
|
|
||||||
result = getRoot(term.getParent())
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An abstract input symbol, representing a set of concrete characters.
|
|
||||||
*/
|
|
||||||
newtype TInputSymbol =
|
|
||||||
/** An input symbol corresponding to character `c`. */
|
|
||||||
Char(string c) {
|
|
||||||
c = any(RegexpCharacterConstant cc | getRoot(cc).isRelevant()).getValue().charAt(_)
|
|
||||||
} or
|
|
||||||
/**
|
|
||||||
* An input symbol representing all characters matched by
|
|
||||||
* (non-universal) character class `recc`.
|
|
||||||
*/
|
|
||||||
CharClass(RegExpTerm recc) {
|
|
||||||
getRoot(recc).isRelevant() and
|
|
||||||
(
|
|
||||||
recc instanceof RegExpCharacterClass and
|
|
||||||
not recc.(RegExpCharacterClass).isUniversalClass()
|
|
||||||
or
|
|
||||||
recc instanceof RegExpCharacterClassEscape
|
|
||||||
)
|
|
||||||
} or
|
|
||||||
/** An input symbol representing all characters matched by `.`. */
|
|
||||||
Dot() or
|
|
||||||
/** An input symbol representing all characters. */
|
|
||||||
Any() or
|
|
||||||
/** An epsilon transition in the automaton. */
|
|
||||||
Epsilon()
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if `a` and `b` are input symbols from the same regexp.
|
|
||||||
* (And not a `Dot()`, `Any()` or `Epsilon()`)
|
|
||||||
*/
|
|
||||||
private predicate sharesRoot(TInputSymbol a, TInputSymbol b) {
|
|
||||||
exists(RegExpRoot root |
|
|
||||||
belongsTo(a, root) and
|
|
||||||
belongsTo(b, root)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if the `a` is an input symbol from a regexp that has root `root`.
|
|
||||||
*/
|
|
||||||
private predicate belongsTo(TInputSymbol a, RegExpRoot root) {
|
|
||||||
exists(RegExpTerm term | getRoot(term) = root |
|
|
||||||
a = Char(term.(RegexpCharacterConstant).getValue().charAt(_))
|
|
||||||
or
|
|
||||||
a = CharClass(term)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An abstract input symbol, representing a set of concrete characters.
|
|
||||||
*/
|
|
||||||
class InputSymbol extends TInputSymbol {
|
|
||||||
InputSymbol() { not this instanceof Epsilon }
|
|
||||||
|
|
||||||
string toString() {
|
|
||||||
this = Char(result)
|
|
||||||
or
|
|
||||||
result = any(RegExpTerm recc | this = CharClass(recc)).toString()
|
|
||||||
or
|
|
||||||
this = Dot() and result = "."
|
|
||||||
or
|
|
||||||
this = Any() and result = "[^]"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An abstract input symbol that represents a character class.
|
|
||||||
*/
|
|
||||||
abstract class CharacterClass extends InputSymbol {
|
|
||||||
/**
|
|
||||||
* Gets a character that is relevant for intersection-tests involving this
|
|
||||||
* character class.
|
|
||||||
*
|
|
||||||
* Specifically, this is any of the characters mentioned explicitly in the
|
|
||||||
* character class, offset by one if it is inverted. For character class escapes,
|
|
||||||
* the result is as if the class had been written out as a series of intervals.
|
|
||||||
*
|
|
||||||
* This set is large enough to ensure that for any two intersecting character
|
|
||||||
* classes, one contains a relevant character from the other.
|
|
||||||
*/
|
|
||||||
abstract string getARelevantChar();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if this character class matches `char`.
|
|
||||||
*/
|
|
||||||
bindingset[char]
|
|
||||||
abstract predicate matches(string char);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a character matched by this character class.
|
|
||||||
*/
|
|
||||||
string choose() { result = getARelevantChar() and matches(result) }
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Provides implementations for `CharacterClass`.
|
|
||||||
*/
|
|
||||||
private module CharacterClasses {
|
|
||||||
/**
|
|
||||||
* Holds if the character class `cc` has a child (constant or range) that matches `char`.
|
|
||||||
*/
|
|
||||||
pragma[noinline]
|
|
||||||
predicate hasChildThatMatches(RegExpCharacterClass cc, string char) {
|
|
||||||
exists(CharClass(cc)) and
|
|
||||||
exists(RegExpTerm child | child = cc.getAChild() |
|
|
||||||
char = child.(RegexpCharacterConstant).getValue()
|
|
||||||
or
|
|
||||||
rangeMatchesOnLetterOrDigits(child, char)
|
|
||||||
or
|
|
||||||
not rangeMatchesOnLetterOrDigits(child, _) and
|
|
||||||
char = getARelevantChar() and
|
|
||||||
exists(string lo, string hi | child.(RegExpCharacterRange).isRange(lo, hi) |
|
|
||||||
lo <= char and
|
|
||||||
char <= hi
|
|
||||||
)
|
|
||||||
or
|
|
||||||
exists(RegExpCharacterClassEscape escape | escape = child |
|
|
||||||
escape.getValue() = escape.getValue().toLowerCase() and
|
|
||||||
classEscapeMatches(escape.getValue(), char)
|
|
||||||
or
|
|
||||||
char = getARelevantChar() and
|
|
||||||
escape.getValue() = escape.getValue().toUpperCase() and
|
|
||||||
not classEscapeMatches(escape.getValue().toLowerCase(), char)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if `range` is a range on lower-case, upper-case, or digits, and matches `char`.
|
|
||||||
* This predicate is used to restrict the searchspace for ranges by only joining `getAnyPossiblyMatchedChar`
|
|
||||||
* on a few ranges.
|
|
||||||
*/
|
|
||||||
private predicate rangeMatchesOnLetterOrDigits(RegExpCharacterRange range, string char) {
|
|
||||||
exists(string lo, string hi |
|
|
||||||
range.isRange(lo, hi) and lo = lowercaseLetter() and hi = lowercaseLetter()
|
|
||||||
|
|
|
||||||
lo <= char and
|
|
||||||
char <= hi and
|
|
||||||
char = lowercaseLetter()
|
|
||||||
)
|
|
||||||
or
|
|
||||||
exists(string lo, string hi |
|
|
||||||
range.isRange(lo, hi) and lo = upperCaseLetter() and hi = upperCaseLetter()
|
|
||||||
|
|
|
||||||
lo <= char and
|
|
||||||
char <= hi and
|
|
||||||
char = upperCaseLetter()
|
|
||||||
)
|
|
||||||
or
|
|
||||||
exists(string lo, string hi | range.isRange(lo, hi) and lo = digit() and hi = digit() |
|
|
||||||
lo <= char and
|
|
||||||
char <= hi and
|
|
||||||
char = digit()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
private string lowercaseLetter() { result = "abdcefghijklmnopqrstuvwxyz".charAt(_) }
|
|
||||||
|
|
||||||
private string upperCaseLetter() { result = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".charAt(_) }
|
|
||||||
|
|
||||||
private string digit() { result = [0 .. 9].toString() }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a char that could be matched by a regular expression.
|
|
||||||
* Includes all printable ascii chars, all constants mentioned in a regexp, and all chars matches by the regexp `/\s|\d|\w/`.
|
|
||||||
*/
|
|
||||||
string getARelevantChar() {
|
|
||||||
exists(ascii(result))
|
|
||||||
or
|
|
||||||
exists(RegexpCharacterConstant c | result = c.getValue().charAt(_))
|
|
||||||
or
|
|
||||||
classEscapeMatches(_, result)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a char that is mentioned in the character class `c`.
|
|
||||||
*/
|
|
||||||
private string getAMentionedChar(RegExpCharacterClass c) {
|
|
||||||
exists(RegExpTerm child | child = c.getAChild() |
|
|
||||||
result = child.(RegexpCharacterConstant).getValue()
|
|
||||||
or
|
|
||||||
child.(RegExpCharacterRange).isRange(result, _)
|
|
||||||
or
|
|
||||||
child.(RegExpCharacterRange).isRange(_, result)
|
|
||||||
or
|
|
||||||
exists(RegExpCharacterClassEscape escape | child = escape |
|
|
||||||
result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
|
|
||||||
or
|
|
||||||
result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An implementation of `CharacterClass` for positive (non inverted) character classes.
|
|
||||||
*/
|
|
||||||
private class PositiveCharacterClass extends CharacterClass {
|
|
||||||
RegExpCharacterClass cc;
|
|
||||||
|
|
||||||
PositiveCharacterClass() { this = CharClass(cc) and not cc.isInverted() }
|
|
||||||
|
|
||||||
override string getARelevantChar() { result = getAMentionedChar(cc) }
|
|
||||||
|
|
||||||
override predicate matches(string char) { hasChildThatMatches(cc, char) }
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An implementation of `CharacterClass` for inverted character classes.
|
|
||||||
*/
|
|
||||||
private class InvertedCharacterClass extends CharacterClass {
|
|
||||||
RegExpCharacterClass cc;
|
|
||||||
|
|
||||||
InvertedCharacterClass() { this = CharClass(cc) and cc.isInverted() }
|
|
||||||
|
|
||||||
override string getARelevantChar() {
|
|
||||||
result = nextChar(getAMentionedChar(cc)) or
|
|
||||||
nextChar(result) = getAMentionedChar(cc)
|
|
||||||
}
|
|
||||||
|
|
||||||
bindingset[char]
|
|
||||||
override predicate matches(string char) { not hasChildThatMatches(cc, char) }
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if the character class escape `clazz` (\d, \s, or \w) matches `char`.
|
|
||||||
*/
|
|
||||||
pragma[noinline]
|
|
||||||
private predicate classEscapeMatches(string clazz, string char) {
|
|
||||||
clazz = "d" and
|
|
||||||
char = "0123456789".charAt(_)
|
|
||||||
or
|
|
||||||
clazz = "s" and
|
|
||||||
(
|
|
||||||
char = [" ", "\t", "\r", "\n"]
|
|
||||||
or
|
|
||||||
char = getARelevantChar() and
|
|
||||||
char.regexpMatch("\\u000b|\\u000c") // \v|\f (vertical tab | form feed)
|
|
||||||
)
|
|
||||||
or
|
|
||||||
clazz = "w" and
|
|
||||||
char = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_".charAt(_)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An implementation of `CharacterClass` for \d, \s, and \w.
|
|
||||||
*/
|
|
||||||
private class PositiveCharacterClassEscape extends CharacterClass {
|
|
||||||
RegExpCharacterClassEscape cc;
|
|
||||||
|
|
||||||
PositiveCharacterClassEscape() { this = CharClass(cc) and cc.getValue() = ["d", "s", "w"] }
|
|
||||||
|
|
||||||
override string getARelevantChar() {
|
|
||||||
cc.getValue() = "d" and
|
|
||||||
result = ["0", "9"]
|
|
||||||
or
|
|
||||||
cc.getValue() = "s" and
|
|
||||||
result = [" "]
|
|
||||||
or
|
|
||||||
cc.getValue() = "w" and
|
|
||||||
result = ["a", "Z", "_", "0", "9"]
|
|
||||||
}
|
|
||||||
|
|
||||||
override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
|
|
||||||
|
|
||||||
override string choose() {
|
|
||||||
cc.getValue() = "d" and
|
|
||||||
result = "9"
|
|
||||||
or
|
|
||||||
cc.getValue() = "s" and
|
|
||||||
result = [" "]
|
|
||||||
or
|
|
||||||
cc.getValue() = "w" and
|
|
||||||
result = "a"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An implementation of `CharacterClass` for \D, \S, and \W.
|
|
||||||
*/
|
|
||||||
private class NegativeCharacterClassEscape extends CharacterClass {
|
|
||||||
RegExpCharacterClassEscape cc;
|
|
||||||
|
|
||||||
NegativeCharacterClassEscape() { this = CharClass(cc) and cc.getValue() = ["D", "S", "W"] }
|
|
||||||
|
|
||||||
override string getARelevantChar() {
|
|
||||||
cc.getValue() = "D" and
|
|
||||||
result = ["a", "Z", "!"]
|
|
||||||
or
|
|
||||||
cc.getValue() = "S" and
|
|
||||||
result = ["a", "9", "!"]
|
|
||||||
or
|
|
||||||
cc.getValue() = "W" and
|
|
||||||
result = [" ", "!"]
|
|
||||||
}
|
|
||||||
|
|
||||||
bindingset[char]
|
|
||||||
override predicate matches(string char) {
|
|
||||||
not classEscapeMatches(cc.getValue().toLowerCase(), char)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
newtype TState =
|
|
||||||
Match(RegExpTerm t, int i) {
|
|
||||||
getRoot(t).isRelevant() and
|
|
||||||
(
|
|
||||||
i = 0
|
|
||||||
or
|
|
||||||
exists(t.(RegexpCharacterConstant).getValue().charAt(i))
|
|
||||||
)
|
|
||||||
} or
|
|
||||||
Accept(RegExpRoot l) { l.isRelevant() } or
|
|
||||||
AcceptAnySuffix(RegExpRoot l) { l.isRelevant() }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A state in the NFA corresponding to a regular expression.
|
|
||||||
*
|
|
||||||
* Each regular expression literal `l` has one accepting state
|
|
||||||
* `Accept(l)`, one state that accepts all suffixes `AcceptAnySuffix(l)`,
|
|
||||||
* and a state `Match(t, i)` for every subterm `t`,
|
|
||||||
* which represents the state of the NFA before starting to
|
|
||||||
* match `t`, or the `i`th character in `t` if `t` is a constant.
|
|
||||||
*/
|
|
||||||
class State extends TState {
|
|
||||||
RegExpTerm repr;
|
|
||||||
|
|
||||||
State() {
|
|
||||||
this = Match(repr, _) or
|
|
||||||
this = Accept(repr) or
|
|
||||||
this = AcceptAnySuffix(repr)
|
|
||||||
}
|
|
||||||
|
|
||||||
string toString() {
|
|
||||||
exists(int i | this = Match(repr, i) | result = "Match(" + repr + "," + i + ")")
|
|
||||||
or
|
|
||||||
this instanceof Accept and
|
|
||||||
result = "Accept(" + repr + ")"
|
|
||||||
or
|
|
||||||
this instanceof AcceptAnySuffix and
|
|
||||||
result = "AcceptAny(" + repr + ")"
|
|
||||||
}
|
|
||||||
|
|
||||||
Location getLocation() { result = repr.getLocation() }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the term represented by this state.
|
|
||||||
*/
|
|
||||||
RegExpTerm getRepr() { result = repr }
|
|
||||||
}
|
|
||||||
|
|
||||||
class EdgeLabel extends TInputSymbol {
|
|
||||||
string toString() {
|
|
||||||
this = Epsilon() and result = ""
|
|
||||||
or
|
|
||||||
exists(InputSymbol s | this = s and result = s.toString())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the state before matching `t`.
|
|
||||||
*/
|
|
||||||
pragma[inline]
|
|
||||||
State before(RegExpTerm t) { result = Match(t, 0) }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a state the NFA may be in after matching `t`.
|
|
||||||
*/
|
|
||||||
State after(RegExpTerm t) {
|
|
||||||
exists(RegExpAlt alt | t = alt.getAChild() | result = after(alt))
|
|
||||||
or
|
|
||||||
exists(RegExpSequence seq, int i | t = seq.getChild(i) |
|
|
||||||
result = before(seq.getChild(i + 1))
|
|
||||||
or
|
|
||||||
i + 1 = seq.getNumChild() and result = after(seq)
|
|
||||||
)
|
|
||||||
or
|
|
||||||
exists(RegExpGroup grp | t = grp.getAChild() | result = after(grp))
|
|
||||||
or
|
|
||||||
exists(RegExpStar star | t = star.getAChild() | result = before(star))
|
|
||||||
or
|
|
||||||
exists(RegExpPlus plus | t = plus.getAChild() |
|
|
||||||
result = before(plus) or
|
|
||||||
result = after(plus)
|
|
||||||
)
|
|
||||||
or
|
|
||||||
exists(RegExpOpt opt | t = opt.getAChild() | result = after(opt))
|
|
||||||
or
|
|
||||||
exists(RegExpRoot root | t = root | result = AcceptAnySuffix(root))
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if the NFA has a transition from `q1` to `q2` labelled with `lbl`.
|
|
||||||
*/
|
|
||||||
predicate delta(State q1, EdgeLabel lbl, State q2) {
|
|
||||||
exists(RegexpCharacterConstant s, int i |
|
|
||||||
q1 = Match(s, i) and
|
|
||||||
lbl = Char(s.getValue().charAt(i)) and
|
|
||||||
(
|
|
||||||
q2 = Match(s, i + 1)
|
|
||||||
or
|
|
||||||
s.getValue().length() = i + 1 and
|
|
||||||
q2 = after(s)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
or
|
|
||||||
exists(RegExpDot dot | q1 = before(dot) and q2 = after(dot) |
|
|
||||||
if dot.getLiteral().isDotAll() then lbl = Any() else lbl = Dot()
|
|
||||||
)
|
|
||||||
or
|
|
||||||
exists(RegExpCharacterClass cc |
|
|
||||||
cc.isUniversalClass() and q1 = before(cc) and lbl = Any() and q2 = after(cc)
|
|
||||||
or
|
|
||||||
q1 = before(cc) and
|
|
||||||
lbl = CharClass(cc) and
|
|
||||||
q2 = after(cc)
|
|
||||||
)
|
|
||||||
or
|
|
||||||
exists(RegExpCharacterClassEscape cc |
|
|
||||||
q1 = before(cc) and
|
|
||||||
lbl = CharClass(cc) and
|
|
||||||
q2 = after(cc)
|
|
||||||
)
|
|
||||||
or
|
|
||||||
exists(RegExpAlt alt | lbl = Epsilon() | q1 = before(alt) and q2 = before(alt.getAChild()))
|
|
||||||
or
|
|
||||||
exists(RegExpSequence seq | lbl = Epsilon() | q1 = before(seq) and q2 = before(seq.getChild(0)))
|
|
||||||
or
|
|
||||||
exists(RegExpGroup grp | lbl = Epsilon() | q1 = before(grp) and q2 = before(grp.getChild(0)))
|
|
||||||
or
|
|
||||||
exists(RegExpStar star | lbl = Epsilon() |
|
|
||||||
q1 = before(star) and q2 = before(star.getChild(0))
|
|
||||||
or
|
|
||||||
q1 = before(star) and q2 = after(star)
|
|
||||||
)
|
|
||||||
or
|
|
||||||
exists(RegExpPlus plus | lbl = Epsilon() | q1 = before(plus) and q2 = before(plus.getChild(0)))
|
|
||||||
or
|
|
||||||
exists(RegExpOpt opt | lbl = Epsilon() |
|
|
||||||
q1 = before(opt) and q2 = before(opt.getChild(0))
|
|
||||||
or
|
|
||||||
q1 = before(opt) and q2 = after(opt)
|
|
||||||
)
|
|
||||||
or
|
|
||||||
exists(RegExpRoot root | q1 = AcceptAnySuffix(root) |
|
|
||||||
lbl = Any() and q2 = q1
|
|
||||||
or
|
|
||||||
lbl = Epsilon() and q2 = Accept(root)
|
|
||||||
)
|
|
||||||
or
|
|
||||||
exists(RegExpDollar dollar | q1 = before(dollar) |
|
|
||||||
lbl = Epsilon() and q2 = Accept(getRoot(dollar))
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a state that `q` has an epsilon transition to.
|
|
||||||
*/
|
|
||||||
State epsilonSucc(State q) { delta(q, Epsilon(), result) }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a state that has an epsilon transition to `q`.
|
|
||||||
*/
|
|
||||||
State epsilonPred(State q) { q = epsilonSucc(result) }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if there is a state `q` that can be reached from `q1`
|
|
||||||
* along epsilon edges, such that there is a transition from
|
|
||||||
* `q` to `q2` that consumes symbol `s`.
|
|
||||||
*/
|
|
||||||
predicate deltaClosed(State q1, InputSymbol s, State q2) { delta(epsilonSucc*(q1), s, q2) }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if state `s` might be inside a backtracking repetition.
|
|
||||||
*/
|
|
||||||
pragma[noinline]
|
|
||||||
predicate stateInsideBacktracking(State s) {
|
|
||||||
s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A state in the product automaton.
|
* A state in the product automaton.
|
||||||
*
|
*
|
||||||
@@ -759,96 +238,6 @@ class Trace extends TTrace {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the minimum char that is matched by both the character classes `c` and `d`.
|
|
||||||
*/
|
|
||||||
private string getMinOverlapBetweenCharacterClasses(CharacterClass c, CharacterClass d) {
|
|
||||||
result = min(getAOverlapBetweenCharacterClasses(c, d))
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a char that is matched by both the character classes `c` and `d`.
|
|
||||||
* And `c` and `d` is not the same character class.
|
|
||||||
*/
|
|
||||||
private string getAOverlapBetweenCharacterClasses(CharacterClass c, CharacterClass d) {
|
|
||||||
sharesRoot(c, d) and
|
|
||||||
result = [c.getARelevantChar(), d.getARelevantChar()] and
|
|
||||||
c.matches(result) and
|
|
||||||
d.matches(result) and
|
|
||||||
not c = d
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a character that is represented by both `c` and `d`.
|
|
||||||
*/
|
|
||||||
string intersect(InputSymbol c, InputSymbol d) {
|
|
||||||
c = Char(result) and
|
|
||||||
d = getAnInputSymbolMatching(result) and
|
|
||||||
(
|
|
||||||
sharesRoot(c, d)
|
|
||||||
or
|
|
||||||
d = Dot()
|
|
||||||
or
|
|
||||||
d = Any()
|
|
||||||
)
|
|
||||||
or
|
|
||||||
result = getMinOverlapBetweenCharacterClasses(c, d)
|
|
||||||
or
|
|
||||||
result = c.(CharacterClass).choose() and
|
|
||||||
(
|
|
||||||
d = c
|
|
||||||
or
|
|
||||||
d = Dot() and
|
|
||||||
not (result = "\n" or result = "\r")
|
|
||||||
or
|
|
||||||
d = Any()
|
|
||||||
)
|
|
||||||
or
|
|
||||||
c = Dot() and
|
|
||||||
(
|
|
||||||
d = Dot() and result = "a"
|
|
||||||
or
|
|
||||||
d = Any() and result = "a"
|
|
||||||
)
|
|
||||||
or
|
|
||||||
c = Any() and d = Any() and result = "a"
|
|
||||||
or
|
|
||||||
result = intersect(d, c)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a symbol that matches `char`.
|
|
||||||
*/
|
|
||||||
bindingset[char]
|
|
||||||
InputSymbol getAnInputSymbolMatching(string char) {
|
|
||||||
result = Char(char)
|
|
||||||
or
|
|
||||||
result.(CharacterClass).matches(char)
|
|
||||||
or
|
|
||||||
result = Dot() and
|
|
||||||
not (char = "\n" or char = "\r")
|
|
||||||
or
|
|
||||||
result = Any()
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the char after `c` (from a simplified ASCII table).
|
|
||||||
*/
|
|
||||||
string nextChar(string c) { exists(int code | code = ascii(c) | code + 1 = ascii(result)) }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets an approximation for the ASCII code for `char`.
|
|
||||||
* Only the easily printable chars are included (so no newline, tab, null, etc).
|
|
||||||
*/
|
|
||||||
int ascii(string char) {
|
|
||||||
char =
|
|
||||||
rank[result](string c |
|
|
||||||
c =
|
|
||||||
"! \"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
|
|
||||||
.charAt(_)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a string corresponding to the trace `t`.
|
* Gets a string corresponding to the trace `t`.
|
||||||
*/
|
*/
|
||||||
@@ -903,273 +292,16 @@ predicate isPumpable(State fork, string w) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Predicates for constructing a prefix string that leads to a given state.
|
* An instantiation of `ReDoSConfiguration` for exponential backtracking.
|
||||||
*/
|
*/
|
||||||
module PrefixConstruction {
|
class ExponentialReDoSConfiguration extends ReDoSConfiguration {
|
||||||
/**
|
ExponentialReDoSConfiguration() { this = "ExponentialReDoSConfiguration" }
|
||||||
* Holds if `state` starts the string matched by the regular expression.
|
|
||||||
*/
|
|
||||||
private predicate isStartState(State state) {
|
|
||||||
state instanceof StateInPumpableRegexp and
|
|
||||||
(
|
|
||||||
state = Match(any(RegExpRoot r), _)
|
|
||||||
or
|
|
||||||
exists(RegExpCaret car | state = after(car))
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
override predicate isReDoSCandidate(State state, string pump) { isPumpable(state, pump) }
|
||||||
* Holds if `state` is the textually last start state for the regular expression.
|
|
||||||
*/
|
|
||||||
private predicate lastStartState(State state) {
|
|
||||||
exists(RegExpRoot root |
|
|
||||||
state =
|
|
||||||
max(State s, Location l |
|
|
||||||
isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
|
|
||||||
|
|
|
||||||
s order by l.getStartLine(), l.getStartColumn()
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
|
|
||||||
*/
|
|
||||||
private predicate existsTransition(State a, State b) { delta(a, _, b) }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
|
|
||||||
*/
|
|
||||||
int prefixLength(State start, State state) =
|
|
||||||
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the minimum number of transitions it takes to reach `state` from the start state.
|
|
||||||
*/
|
|
||||||
private int lengthFromStart(State state) { result = prefixLength(_, state) }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a string for which the regular expression will reach `state`.
|
|
||||||
*
|
|
||||||
* Has at most one result for any given `state`.
|
|
||||||
* This predicate will not always have a result even if there is a ReDoS issue in
|
|
||||||
* the regular expression.
|
|
||||||
*/
|
|
||||||
string prefix(State state) {
|
|
||||||
lastStartState(state) and
|
|
||||||
result = ""
|
|
||||||
or
|
|
||||||
// the search stops past the last redos candidate state.
|
|
||||||
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
|
|
||||||
exists(State prev |
|
|
||||||
// select a unique predecessor (by an arbitrary measure)
|
|
||||||
prev =
|
|
||||||
min(State s, Location loc |
|
|
||||||
lengthFromStart(s) = lengthFromStart(state) - 1 and
|
|
||||||
loc = s.getRepr().getLocation() and
|
|
||||||
delta(s, _, state)
|
|
||||||
|
|
|
||||||
s order by loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn()
|
|
||||||
)
|
|
||||||
|
|
|
||||||
// greedy search for the shortest prefix
|
|
||||||
result = prefix(prev) and delta(prev, Epsilon(), state)
|
|
||||||
or
|
|
||||||
not delta(prev, Epsilon(), state) and
|
|
||||||
result =
|
|
||||||
prefix(prev) +
|
|
||||||
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), state))
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A state within a regular expression that has a pumpable state.
|
|
||||||
*/
|
|
||||||
class StateInPumpableRegexp extends State {
|
|
||||||
pragma[noinline]
|
|
||||||
StateInPumpableRegexp() {
|
|
||||||
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(this.getRepr()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Predicates for testing the presence of a rejecting suffix.
|
|
||||||
*
|
|
||||||
* These predicates are used to ensure that the all states reached from the fork
|
|
||||||
* by repeating `w` have a rejecting suffix.
|
|
||||||
*
|
|
||||||
* For example, a regexp like `/^(a+)+/` will accept any string as long the prefix is
|
|
||||||
* some number of `"a"`s, and it is therefore not possible to construct a rejecting suffix.
|
|
||||||
*
|
|
||||||
* A regexp like `/(a+)+$/` or `/(a+)+b/` trivially has a rejecting suffix,
|
|
||||||
* as the suffix "X" will cause both the regular expressions to be rejected.
|
|
||||||
*
|
|
||||||
* The string `w` is repeated any number of times because it needs to be
|
|
||||||
* infinitely repeatedable for the attack to work.
|
|
||||||
* For the regular expression `/((ab)+)*abab/` the accepting state is not reachable from the fork
|
|
||||||
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
|
|
||||||
*/
|
|
||||||
module SuffixConstruction {
|
|
||||||
import PrefixConstruction
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if all states reachable from `fork` by repeating `w`
|
|
||||||
* are likely rejectable by appending some suffix.
|
|
||||||
*/
|
|
||||||
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
|
|
||||||
isReDoSCandidate(fork, w) and
|
|
||||||
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if there likely exists a suffix starting from `s` that leads to the regular expression being rejected.
|
|
||||||
* This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs.
|
|
||||||
*/
|
|
||||||
pragma[nomagic]
|
|
||||||
private predicate isLikelyRejectable(StateInPumpableRegexp s) {
|
|
||||||
// exists a reject edge with some char.
|
|
||||||
hasRejectEdge(s)
|
|
||||||
or
|
|
||||||
hasEdgeToLikelyRejectable(s)
|
|
||||||
or
|
|
||||||
// stopping here is rejection
|
|
||||||
isRejectState(s)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
|
|
||||||
*/
|
|
||||||
predicate isRejectState(StateInPumpableRegexp s) { not epsilonSucc*(s) = Accept(_) }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
|
|
||||||
*/
|
|
||||||
predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
|
|
||||||
// all edges (at least one) with some char leads to another state that is rejectable.
|
|
||||||
// the `next` states might not share a common suffix, which can cause FPs.
|
|
||||||
exists(string char | char = relevant() |
|
|
||||||
forex(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if there is a state `next` that can be reached from `prev`
|
|
||||||
* along epsilon edges, such that there is a transition from
|
|
||||||
* `prev` to `next` that the character symbol `char`.
|
|
||||||
*/
|
|
||||||
predicate deltaClosedChar(StateInPumpableRegexp prev, string char, StateInPumpableRegexp next) {
|
|
||||||
char = relevant() and
|
|
||||||
deltaClosed(prev, getAnInputSymbolMatching(char), next)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a char used for finding possible suffixes.
|
|
||||||
*/
|
|
||||||
private string relevant() { result = CharacterClasses::getARelevantChar() }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if there is no edge from `s` labeled `char` in our NFA.
|
|
||||||
* The NFA does not model reject states, so the above is the same as saying there is a reject edge.
|
|
||||||
*/
|
|
||||||
private predicate hasRejectEdge(State s) {
|
|
||||||
exists(string char | char = relevant() | not deltaClosedChar(s, char, _))
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a state that can be reached from pumpable `fork` consuming all
|
|
||||||
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
|
|
||||||
*/
|
|
||||||
private State process(State fork, string w, int i) {
|
|
||||||
isReDoSCandidate(fork, w) and
|
|
||||||
exists(State prev |
|
|
||||||
i = 0 and prev = fork
|
|
||||||
or
|
|
||||||
prev = process(fork, w, i - 1)
|
|
||||||
or
|
|
||||||
// repeat until fixpoint
|
|
||||||
i = 0 and
|
|
||||||
prev = process(fork, w, w.length() - 1)
|
|
||||||
|
|
|
||||||
deltaClosed(prev, getAnInputSymbolMatching(w.charAt(i)), result)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if `term` may cause exponential backtracking on strings containing many repetitions of `pump`.
|
|
||||||
* Gets the minimum possible string that causes exponential backtracking.
|
|
||||||
*/
|
|
||||||
predicate isReDoSAttackable(RegExpTerm term, string pump, State s) {
|
|
||||||
exists(int i, string c | s = Match(term, i) |
|
|
||||||
c =
|
|
||||||
min(string w |
|
|
||||||
isReDoSCandidate(s, w) and
|
|
||||||
SuffixConstruction::reachesOnlyRejectableSuffixes(s, w)
|
|
||||||
|
|
|
||||||
w order by w.length(), w
|
|
||||||
) and
|
|
||||||
pump = escape(rotate(c, i))
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds if repeating `pump' starting at `state` is a candidate for causing exponential backtracking.
|
|
||||||
* No check whether a rejected suffix exists has been made.
|
|
||||||
*/
|
|
||||||
predicate isReDoSCandidate(State state, string pump) {
|
|
||||||
isPumpable(state, pump) and
|
|
||||||
(
|
|
||||||
not isPumpable(epsilonSucc+(state), _)
|
|
||||||
or
|
|
||||||
epsilonSucc+(state) = state and
|
|
||||||
state =
|
|
||||||
max(State s, Location l |
|
|
||||||
s = epsilonSucc+(state) and
|
|
||||||
l = s.getRepr().getLocation() and
|
|
||||||
isPumpable(s, _) and
|
|
||||||
s.getRepr() instanceof InfiniteRepetitionQuantifier
|
|
||||||
|
|
|
||||||
s order by l.getStartLine(), l.getStartColumn(), l.getEndColumn(), l.getEndLine()
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the result of backslash-escaping newlines, carriage-returns and
|
|
||||||
* backslashes in `s`.
|
|
||||||
*/
|
|
||||||
bindingset[s]
|
|
||||||
string escape(string s) {
|
|
||||||
result =
|
|
||||||
s.replaceAll("\\", "\\\\")
|
|
||||||
.replaceAll("\n", "\\n")
|
|
||||||
.replaceAll("\r", "\\r")
|
|
||||||
.replaceAll("\t", "\\t")
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets `str` with the last `i` characters moved to the front.
|
|
||||||
*
|
|
||||||
* We use this to adjust the pump string to match with the beginning of
|
|
||||||
* a RegExpTerm, so it doesn't start in the middle of a constant.
|
|
||||||
*/
|
|
||||||
bindingset[str, i]
|
|
||||||
string rotate(string str, int i) {
|
|
||||||
result = str.suffix(str.length() - i) + str.prefix(str.length() - i)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
from RegExpTerm t, string pump, State s, string prefixMsg
|
from RegExpTerm t, string pump, State s, string prefixMsg
|
||||||
where
|
where hasReDoSResult(t, pump, s, prefixMsg)
|
||||||
isReDoSAttackable(t, pump, s) and
|
|
||||||
(
|
|
||||||
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
|
|
||||||
not PrefixConstruction::prefix(s) = ""
|
|
||||||
or
|
|
||||||
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
|
|
||||||
or
|
|
||||||
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
|
|
||||||
)
|
|
||||||
select t,
|
select t,
|
||||||
"This part of the regular expression may cause exponential backtracking on strings " + prefixMsg +
|
"This part of the regular expression may cause exponential backtracking on strings " + prefixMsg +
|
||||||
"containing many repetitions of '" + pump + "'."
|
"containing many repetitions of '" + pump + "'."
|
||||||
|
|||||||
@@ -0,0 +1,930 @@
|
|||||||
|
/**
|
||||||
|
* Provides classes for working with regular expressions that can
|
||||||
|
* perform backtracking in superlinear/exponential time.
|
||||||
|
*
|
||||||
|
* This module contains a number of utility predicates for compiling a regular expression into a NFA and reasoning about this NFA.
|
||||||
|
*
|
||||||
|
* The `ReDoSConfiguration` contains a `isReDoSCandidate` predicate that is used to
|
||||||
|
* to determine which states the prefix/suffix search should happen on.
|
||||||
|
* There is only meant to exist one `ReDoSConfiguration` at a time.
|
||||||
|
*
|
||||||
|
* The predicate `hasReDoSResult` outputs a de-duplicated set of
|
||||||
|
* states that will cause backtracking (a rejecting suffix exists).
|
||||||
|
*/
|
||||||
|
|
||||||
|
import javascript
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A configuration for which parts of a regular expression should be considered relevant for
|
||||||
|
* the different predicates in `ReDoS.qll`.
|
||||||
|
* Used to adjust the computations for either superliniear or exponential backtracking.
|
||||||
|
*/
|
||||||
|
abstract class ReDoSConfiguration extends string {
|
||||||
|
bindingset[this]
|
||||||
|
ReDoSConfiguration() { any() }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `state` with the pump string `pump` is a candidate for a
|
||||||
|
* ReDoS vulnerable state.
|
||||||
|
* This is used to determine which states are considered for the prefix/suffix construction.
|
||||||
|
*/
|
||||||
|
abstract predicate isReDoSCandidate(State state, string pump);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if repeating `pump' starting at `state` is a candidate for causing backtracking.
|
||||||
|
* No check whether a rejected suffix exists has been made.
|
||||||
|
*/
|
||||||
|
private predicate isReDoSCandidate(State state, string pump) {
|
||||||
|
any(ReDoSConfiguration conf).isReDoSCandidate(state, pump) and
|
||||||
|
(
|
||||||
|
not any(ReDoSConfiguration conf).isReDoSCandidate(epsilonSucc+(state), _)
|
||||||
|
or
|
||||||
|
epsilonSucc+(state) = state and
|
||||||
|
state =
|
||||||
|
max(State s, Location l |
|
||||||
|
s = epsilonSucc+(state) and
|
||||||
|
l = s.getRepr().getLocation() and
|
||||||
|
any(ReDoSConfiguration conf).isReDoSCandidate(s, _) and
|
||||||
|
s.getRepr() instanceof InfiniteRepetitionQuantifier
|
||||||
|
|
|
||||||
|
s order by l.getStartLine(), l.getStartColumn(), l.getEndColumn(), l.getEndLine()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A regular expression term that permits unlimited repetitions.
|
||||||
|
*/
|
||||||
|
class InfiniteRepetitionQuantifier extends RegExpQuantifier {
|
||||||
|
InfiniteRepetitionQuantifier() {
|
||||||
|
this instanceof RegExpPlus
|
||||||
|
or
|
||||||
|
this instanceof RegExpStar
|
||||||
|
or
|
||||||
|
this instanceof RegExpRange and not exists(this.(RegExpRange).getUpperBound())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the char after `c` (from a simplified ASCII table).
|
||||||
|
*/
|
||||||
|
private string nextChar(string c) { exists(int code | code = ascii(c) | code + 1 = ascii(result)) }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets an approximation for the ASCII code for `char`.
|
||||||
|
* Only the easily printable chars are included (so no newline, tab, null, etc).
|
||||||
|
*/
|
||||||
|
private int ascii(string char) {
|
||||||
|
char =
|
||||||
|
rank[result](string c |
|
||||||
|
c =
|
||||||
|
"! \"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
|
||||||
|
.charAt(_)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A branch in a disjunction that is the root node in a literal, or a literal
|
||||||
|
* whose root node is not a disjunction.
|
||||||
|
*/
|
||||||
|
class RegExpRoot extends RegExpTerm {
|
||||||
|
RegExpParent parent;
|
||||||
|
|
||||||
|
RegExpRoot() {
|
||||||
|
exists(RegExpAlt alt |
|
||||||
|
alt.isRootTerm() and
|
||||||
|
this = alt.getAChild() and
|
||||||
|
parent = alt.getParent()
|
||||||
|
)
|
||||||
|
or
|
||||||
|
this.isRootTerm() and
|
||||||
|
not this instanceof RegExpAlt and
|
||||||
|
parent = this.getParent()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if this root term is relevant to the ReDoS analysis.
|
||||||
|
*/
|
||||||
|
predicate isRelevant() {
|
||||||
|
// there is at least one repetition
|
||||||
|
getRoot(any(InfiniteRepetitionQuantifier q)) = this and
|
||||||
|
// there are no lookbehinds
|
||||||
|
not exists(RegExpLookbehind lbh | getRoot(lbh) = this) and
|
||||||
|
// is actually used as a RegExp
|
||||||
|
isUsedAsRegExp()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A constant in a regular expression that represents valid Unicode character(s).
|
||||||
|
*/
|
||||||
|
private class RegexpCharacterConstant extends RegExpConstant {
|
||||||
|
RegexpCharacterConstant() { this.isCharacter() }
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An abstract input symbol, representing a set of concrete characters.
|
||||||
|
*/
|
||||||
|
private newtype TInputSymbol =
|
||||||
|
/** An input symbol corresponding to character `c`. */
|
||||||
|
Char(string c) {
|
||||||
|
c = any(RegexpCharacterConstant cc | getRoot(cc).isRelevant()).getValue().charAt(_)
|
||||||
|
} or
|
||||||
|
/**
|
||||||
|
* An input symbol representing all characters matched by
|
||||||
|
* (non-universal) character class `recc`.
|
||||||
|
*/
|
||||||
|
CharClass(RegExpTerm recc) {
|
||||||
|
getRoot(recc).isRelevant() and
|
||||||
|
(
|
||||||
|
recc instanceof RegExpCharacterClass and
|
||||||
|
not recc.(RegExpCharacterClass).isUniversalClass()
|
||||||
|
or
|
||||||
|
recc instanceof RegExpCharacterClassEscape
|
||||||
|
)
|
||||||
|
} or
|
||||||
|
/** An input symbol representing all characters matched by `.`. */
|
||||||
|
Dot() or
|
||||||
|
/** An input symbol representing all characters. */
|
||||||
|
Any() or
|
||||||
|
/** An epsilon transition in the automaton. */
|
||||||
|
Epsilon()
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `a` and `b` are input symbols from the same regexp.
|
||||||
|
* (And not a `Dot()`, `Any()` or `Epsilon()`)
|
||||||
|
*/
|
||||||
|
private predicate sharesRoot(TInputSymbol a, TInputSymbol b) {
|
||||||
|
exists(RegExpRoot root |
|
||||||
|
belongsTo(a, root) and
|
||||||
|
belongsTo(b, root)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if the `a` is an input symbol from a regexp that has root `root`.
|
||||||
|
*/
|
||||||
|
private predicate belongsTo(TInputSymbol a, RegExpRoot root) {
|
||||||
|
exists(RegExpTerm term | getRoot(term) = root |
|
||||||
|
a = Char(term.(RegexpCharacterConstant).getValue().charAt(_))
|
||||||
|
or
|
||||||
|
a = CharClass(term)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An abstract input symbol, representing a set of concrete characters.
|
||||||
|
*/
|
||||||
|
class InputSymbol extends TInputSymbol {
|
||||||
|
InputSymbol() { not this instanceof Epsilon }
|
||||||
|
|
||||||
|
string toString() {
|
||||||
|
this = Char(result)
|
||||||
|
or
|
||||||
|
result = any(RegExpTerm recc | this = CharClass(recc)).toString()
|
||||||
|
or
|
||||||
|
this = Dot() and result = "."
|
||||||
|
or
|
||||||
|
this = Any() and result = "[^]"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An abstract input symbol that represents a character class.
|
||||||
|
*/
|
||||||
|
abstract private class CharacterClass extends InputSymbol {
|
||||||
|
/**
|
||||||
|
* Gets a character that is relevant for intersection-tests involving this
|
||||||
|
* character class.
|
||||||
|
*
|
||||||
|
* Specifically, this is any of the characters mentioned explicitly in the
|
||||||
|
* character class, offset by one if it is inverted. For character class escapes,
|
||||||
|
* the result is as if the class had been written out as a series of intervals.
|
||||||
|
*
|
||||||
|
* This set is large enough to ensure that for any two intersecting character
|
||||||
|
* classes, one contains a relevant character from the other.
|
||||||
|
*/
|
||||||
|
abstract string getARelevantChar();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if this character class matches `char`.
|
||||||
|
*/
|
||||||
|
bindingset[char]
|
||||||
|
abstract predicate matches(string char);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a character matched by this character class.
|
||||||
|
*/
|
||||||
|
string choose() { result = getARelevantChar() and matches(result) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides implementations for `CharacterClass`.
|
||||||
|
*/
|
||||||
|
private module CharacterClasses {
|
||||||
|
/**
|
||||||
|
* Holds if the character class `cc` has a child (constant or range) that matches `char`.
|
||||||
|
*/
|
||||||
|
pragma[noinline]
|
||||||
|
predicate hasChildThatMatches(RegExpCharacterClass cc, string char) {
|
||||||
|
exists(CharClass(cc)) and
|
||||||
|
exists(RegExpTerm child | child = cc.getAChild() |
|
||||||
|
char = child.(RegexpCharacterConstant).getValue()
|
||||||
|
or
|
||||||
|
rangeMatchesOnLetterOrDigits(child, char)
|
||||||
|
or
|
||||||
|
not rangeMatchesOnLetterOrDigits(child, _) and
|
||||||
|
char = getARelevantChar() and
|
||||||
|
exists(string lo, string hi | child.(RegExpCharacterRange).isRange(lo, hi) |
|
||||||
|
lo <= char and
|
||||||
|
char <= hi
|
||||||
|
)
|
||||||
|
or
|
||||||
|
exists(RegExpCharacterClassEscape escape | escape = child |
|
||||||
|
escape.getValue() = escape.getValue().toLowerCase() and
|
||||||
|
classEscapeMatches(escape.getValue(), char)
|
||||||
|
or
|
||||||
|
char = getARelevantChar() and
|
||||||
|
escape.getValue() = escape.getValue().toUpperCase() and
|
||||||
|
not classEscapeMatches(escape.getValue().toLowerCase(), char)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `range` is a range on lower-case, upper-case, or digits, and matches `char`.
|
||||||
|
* This predicate is used to restrict the searchspace for ranges by only joining `getAnyPossiblyMatchedChar`
|
||||||
|
* on a few ranges.
|
||||||
|
*/
|
||||||
|
private predicate rangeMatchesOnLetterOrDigits(RegExpCharacterRange range, string char) {
|
||||||
|
exists(string lo, string hi |
|
||||||
|
range.isRange(lo, hi) and lo = lowercaseLetter() and hi = lowercaseLetter()
|
||||||
|
|
|
||||||
|
lo <= char and
|
||||||
|
char <= hi and
|
||||||
|
char = lowercaseLetter()
|
||||||
|
)
|
||||||
|
or
|
||||||
|
exists(string lo, string hi |
|
||||||
|
range.isRange(lo, hi) and lo = upperCaseLetter() and hi = upperCaseLetter()
|
||||||
|
|
|
||||||
|
lo <= char and
|
||||||
|
char <= hi and
|
||||||
|
char = upperCaseLetter()
|
||||||
|
)
|
||||||
|
or
|
||||||
|
exists(string lo, string hi | range.isRange(lo, hi) and lo = digit() and hi = digit() |
|
||||||
|
lo <= char and
|
||||||
|
char <= hi and
|
||||||
|
char = digit()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private string lowercaseLetter() { result = "abdcefghijklmnopqrstuvwxyz".charAt(_) }
|
||||||
|
|
||||||
|
private string upperCaseLetter() { result = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".charAt(_) }
|
||||||
|
|
||||||
|
private string digit() { result = [0 .. 9].toString() }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a char that could be matched by a regular expression.
|
||||||
|
* Includes all printable ascii chars, all constants mentioned in a regexp, and all chars matches by the regexp `/\s|\d|\w/`.
|
||||||
|
*/
|
||||||
|
string getARelevantChar() {
|
||||||
|
exists(ascii(result))
|
||||||
|
or
|
||||||
|
exists(RegexpCharacterConstant c | result = c.getValue().charAt(_))
|
||||||
|
or
|
||||||
|
classEscapeMatches(_, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a char that is mentioned in the character class `c`.
|
||||||
|
*/
|
||||||
|
private string getAMentionedChar(RegExpCharacterClass c) {
|
||||||
|
exists(RegExpTerm child | child = c.getAChild() |
|
||||||
|
result = child.(RegexpCharacterConstant).getValue()
|
||||||
|
or
|
||||||
|
child.(RegExpCharacterRange).isRange(result, _)
|
||||||
|
or
|
||||||
|
child.(RegExpCharacterRange).isRange(_, result)
|
||||||
|
or
|
||||||
|
exists(RegExpCharacterClassEscape escape | child = escape |
|
||||||
|
result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
|
||||||
|
or
|
||||||
|
result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An implementation of `CharacterClass` for positive (non inverted) character classes.
|
||||||
|
*/
|
||||||
|
private class PositiveCharacterClass extends CharacterClass {
|
||||||
|
RegExpCharacterClass cc;
|
||||||
|
|
||||||
|
PositiveCharacterClass() { this = CharClass(cc) and not cc.isInverted() }
|
||||||
|
|
||||||
|
override string getARelevantChar() { result = getAMentionedChar(cc) }
|
||||||
|
|
||||||
|
override predicate matches(string char) { hasChildThatMatches(cc, char) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An implementation of `CharacterClass` for inverted character classes.
|
||||||
|
*/
|
||||||
|
private class InvertedCharacterClass extends CharacterClass {
|
||||||
|
RegExpCharacterClass cc;
|
||||||
|
|
||||||
|
InvertedCharacterClass() { this = CharClass(cc) and cc.isInverted() }
|
||||||
|
|
||||||
|
override string getARelevantChar() {
|
||||||
|
result = nextChar(getAMentionedChar(cc)) or
|
||||||
|
nextChar(result) = getAMentionedChar(cc)
|
||||||
|
}
|
||||||
|
|
||||||
|
bindingset[char]
|
||||||
|
override predicate matches(string char) { not hasChildThatMatches(cc, char) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if the character class escape `clazz` (\d, \s, or \w) matches `char`.
|
||||||
|
*/
|
||||||
|
pragma[noinline]
|
||||||
|
private predicate classEscapeMatches(string clazz, string char) {
|
||||||
|
clazz = "d" and
|
||||||
|
char = "0123456789".charAt(_)
|
||||||
|
or
|
||||||
|
clazz = "s" and
|
||||||
|
(
|
||||||
|
char = [" ", "\t", "\r", "\n"]
|
||||||
|
or
|
||||||
|
char = getARelevantChar() and
|
||||||
|
char.regexpMatch("\\u000b|\\u000c") // \v|\f (vertical tab | form feed)
|
||||||
|
)
|
||||||
|
or
|
||||||
|
clazz = "w" and
|
||||||
|
char = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_".charAt(_)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An implementation of `CharacterClass` for \d, \s, and \w.
|
||||||
|
*/
|
||||||
|
private class PositiveCharacterClassEscape extends CharacterClass {
|
||||||
|
RegExpCharacterClassEscape cc;
|
||||||
|
|
||||||
|
PositiveCharacterClassEscape() { this = CharClass(cc) and cc.getValue() = ["d", "s", "w"] }
|
||||||
|
|
||||||
|
override string getARelevantChar() {
|
||||||
|
cc.getValue() = "d" and
|
||||||
|
result = ["0", "9"]
|
||||||
|
or
|
||||||
|
cc.getValue() = "s" and
|
||||||
|
result = [" "]
|
||||||
|
or
|
||||||
|
cc.getValue() = "w" and
|
||||||
|
result = ["a", "Z", "_", "0", "9"]
|
||||||
|
}
|
||||||
|
|
||||||
|
override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
|
||||||
|
|
||||||
|
override string choose() {
|
||||||
|
cc.getValue() = "d" and
|
||||||
|
result = "9"
|
||||||
|
or
|
||||||
|
cc.getValue() = "s" and
|
||||||
|
result = [" "]
|
||||||
|
or
|
||||||
|
cc.getValue() = "w" and
|
||||||
|
result = "a"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An implementation of `CharacterClass` for \D, \S, and \W.
|
||||||
|
*/
|
||||||
|
private class NegativeCharacterClassEscape extends CharacterClass {
|
||||||
|
RegExpCharacterClassEscape cc;
|
||||||
|
|
||||||
|
NegativeCharacterClassEscape() { this = CharClass(cc) and cc.getValue() = ["D", "S", "W"] }
|
||||||
|
|
||||||
|
override string getARelevantChar() {
|
||||||
|
cc.getValue() = "D" and
|
||||||
|
result = ["a", "Z", "!"]
|
||||||
|
or
|
||||||
|
cc.getValue() = "S" and
|
||||||
|
result = ["a", "9", "!"]
|
||||||
|
or
|
||||||
|
cc.getValue() = "W" and
|
||||||
|
result = [" ", "!"]
|
||||||
|
}
|
||||||
|
|
||||||
|
bindingset[char]
|
||||||
|
override predicate matches(string char) {
|
||||||
|
not classEscapeMatches(cc.getValue().toLowerCase(), char)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class EdgeLabel extends TInputSymbol {
|
||||||
|
string toString() {
|
||||||
|
this = Epsilon() and result = ""
|
||||||
|
or
|
||||||
|
exists(InputSymbol s | this = s and result = s.toString())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the state before matching `t`.
|
||||||
|
*/
|
||||||
|
pragma[inline]
|
||||||
|
private State before(RegExpTerm t) { result = Match(t, 0) }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a state the NFA may be in after matching `t`.
|
||||||
|
*/
|
||||||
|
private State after(RegExpTerm t) {
|
||||||
|
exists(RegExpAlt alt | t = alt.getAChild() | result = after(alt))
|
||||||
|
or
|
||||||
|
exists(RegExpSequence seq, int i | t = seq.getChild(i) |
|
||||||
|
result = before(seq.getChild(i + 1))
|
||||||
|
or
|
||||||
|
i + 1 = seq.getNumChild() and result = after(seq)
|
||||||
|
)
|
||||||
|
or
|
||||||
|
exists(RegExpGroup grp | t = grp.getAChild() | result = after(grp))
|
||||||
|
or
|
||||||
|
exists(RegExpStar star | t = star.getAChild() | result = before(star))
|
||||||
|
or
|
||||||
|
exists(RegExpPlus plus | t = plus.getAChild() |
|
||||||
|
result = before(plus) or
|
||||||
|
result = after(plus)
|
||||||
|
)
|
||||||
|
or
|
||||||
|
exists(RegExpOpt opt | t = opt.getAChild() | result = after(opt))
|
||||||
|
or
|
||||||
|
exists(RegExpRoot root | t = root | result = AcceptAnySuffix(root))
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if the NFA has a transition from `q1` to `q2` labelled with `lbl`.
|
||||||
|
*/
|
||||||
|
predicate delta(State q1, EdgeLabel lbl, State q2) {
|
||||||
|
exists(RegexpCharacterConstant s, int i |
|
||||||
|
q1 = Match(s, i) and
|
||||||
|
lbl = Char(s.getValue().charAt(i)) and
|
||||||
|
(
|
||||||
|
q2 = Match(s, i + 1)
|
||||||
|
or
|
||||||
|
s.getValue().length() = i + 1 and
|
||||||
|
q2 = after(s)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
or
|
||||||
|
exists(RegExpDot dot | q1 = before(dot) and q2 = after(dot) |
|
||||||
|
if dot.getLiteral().isDotAll() then lbl = Any() else lbl = Dot()
|
||||||
|
)
|
||||||
|
or
|
||||||
|
exists(RegExpCharacterClass cc |
|
||||||
|
cc.isUniversalClass() and q1 = before(cc) and lbl = Any() and q2 = after(cc)
|
||||||
|
or
|
||||||
|
q1 = before(cc) and
|
||||||
|
lbl = CharClass(cc) and
|
||||||
|
q2 = after(cc)
|
||||||
|
)
|
||||||
|
or
|
||||||
|
exists(RegExpCharacterClassEscape cc |
|
||||||
|
q1 = before(cc) and
|
||||||
|
lbl = CharClass(cc) and
|
||||||
|
q2 = after(cc)
|
||||||
|
)
|
||||||
|
or
|
||||||
|
exists(RegExpAlt alt | lbl = Epsilon() | q1 = before(alt) and q2 = before(alt.getAChild()))
|
||||||
|
or
|
||||||
|
exists(RegExpSequence seq | lbl = Epsilon() | q1 = before(seq) and q2 = before(seq.getChild(0)))
|
||||||
|
or
|
||||||
|
exists(RegExpGroup grp | lbl = Epsilon() | q1 = before(grp) and q2 = before(grp.getChild(0)))
|
||||||
|
or
|
||||||
|
exists(RegExpStar star | lbl = Epsilon() |
|
||||||
|
q1 = before(star) and q2 = before(star.getChild(0))
|
||||||
|
or
|
||||||
|
q1 = before(star) and q2 = after(star)
|
||||||
|
)
|
||||||
|
or
|
||||||
|
exists(RegExpPlus plus | lbl = Epsilon() | q1 = before(plus) and q2 = before(plus.getChild(0)))
|
||||||
|
or
|
||||||
|
exists(RegExpOpt opt | lbl = Epsilon() |
|
||||||
|
q1 = before(opt) and q2 = before(opt.getChild(0))
|
||||||
|
or
|
||||||
|
q1 = before(opt) and q2 = after(opt)
|
||||||
|
)
|
||||||
|
or
|
||||||
|
exists(RegExpRoot root | q1 = AcceptAnySuffix(root) |
|
||||||
|
lbl = Any() and q2 = q1
|
||||||
|
or
|
||||||
|
lbl = Epsilon() and q2 = Accept(root)
|
||||||
|
)
|
||||||
|
or
|
||||||
|
exists(RegExpDollar dollar | q1 = before(dollar) |
|
||||||
|
lbl = Epsilon() and q2 = Accept(getRoot(dollar))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a state that `q` has an epsilon transition to.
|
||||||
|
*/
|
||||||
|
State epsilonSucc(State q) { delta(q, Epsilon(), result) }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a state that has an epsilon transition to `q`.
|
||||||
|
*/
|
||||||
|
State epsilonPred(State q) { q = epsilonSucc(result) }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if there is a state `q` that can be reached from `q1`
|
||||||
|
* along epsilon edges, such that there is a transition from
|
||||||
|
* `q` to `q2` that consumes symbol `s`.
|
||||||
|
*/
|
||||||
|
predicate deltaClosed(State q1, InputSymbol s, State q2) { delta(epsilonSucc*(q1), s, q2) }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the root containing the given term, that is, the root of the literal,
|
||||||
|
* or a branch of the root disjunction.
|
||||||
|
*/
|
||||||
|
RegExpRoot getRoot(RegExpTerm term) {
|
||||||
|
result = term or
|
||||||
|
result = getRoot(term.getParent())
|
||||||
|
}
|
||||||
|
|
||||||
|
newtype TState =
|
||||||
|
Match(RegExpTerm t, int i) {
|
||||||
|
getRoot(t).isRelevant() and
|
||||||
|
(
|
||||||
|
i = 0
|
||||||
|
or
|
||||||
|
exists(t.(RegexpCharacterConstant).getValue().charAt(i))
|
||||||
|
)
|
||||||
|
} or
|
||||||
|
Accept(RegExpRoot l) { l.isRelevant() } or
|
||||||
|
AcceptAnySuffix(RegExpRoot l) { l.isRelevant() }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A state in the NFA corresponding to a regular expression.
|
||||||
|
*
|
||||||
|
* Each regular expression literal `l` has one accepting state
|
||||||
|
* `Accept(l)`, one state that accepts all suffixes `AcceptAnySuffix(l)`,
|
||||||
|
* and a state `Match(t, i)` for every subterm `t`,
|
||||||
|
* which represents the state of the NFA before starting to
|
||||||
|
* match `t`, or the `i`th character in `t` if `t` is a constant.
|
||||||
|
*/
|
||||||
|
class State extends TState {
|
||||||
|
RegExpTerm repr;
|
||||||
|
|
||||||
|
State() {
|
||||||
|
this = Match(repr, _) or
|
||||||
|
this = Accept(repr) or
|
||||||
|
this = AcceptAnySuffix(repr)
|
||||||
|
}
|
||||||
|
|
||||||
|
string toString() {
|
||||||
|
exists(int i | this = Match(repr, i) | result = "Match(" + repr + "," + i + ")")
|
||||||
|
or
|
||||||
|
this instanceof Accept and
|
||||||
|
result = "Accept(" + repr + ")"
|
||||||
|
or
|
||||||
|
this instanceof AcceptAnySuffix and
|
||||||
|
result = "AcceptAny(" + repr + ")"
|
||||||
|
}
|
||||||
|
|
||||||
|
Location getLocation() { result = repr.getLocation() }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the term represented by this state.
|
||||||
|
*/
|
||||||
|
RegExpTerm getRepr() { result = repr }
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the minimum char that is matched by both the character classes `c` and `d`.
|
||||||
|
*/
|
||||||
|
private string getMinOverlapBetweenCharacterClasses(CharacterClass c, CharacterClass d) {
|
||||||
|
result = min(getAOverlapBetweenCharacterClasses(c, d))
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a char that is matched by both the character classes `c` and `d`.
|
||||||
|
* And `c` and `d` is not the same character class.
|
||||||
|
*/
|
||||||
|
private string getAOverlapBetweenCharacterClasses(CharacterClass c, CharacterClass d) {
|
||||||
|
sharesRoot(c, d) and
|
||||||
|
result = [c.getARelevantChar(), d.getARelevantChar()] and
|
||||||
|
c.matches(result) and
|
||||||
|
d.matches(result) and
|
||||||
|
not c = d
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a character that is represented by both `c` and `d`.
|
||||||
|
*/
|
||||||
|
string intersect(InputSymbol c, InputSymbol d) {
|
||||||
|
c = Char(result) and
|
||||||
|
d = getAnInputSymbolMatching(result) and
|
||||||
|
(
|
||||||
|
sharesRoot(c, d)
|
||||||
|
or
|
||||||
|
d = Dot()
|
||||||
|
or
|
||||||
|
d = Any()
|
||||||
|
)
|
||||||
|
or
|
||||||
|
result = getMinOverlapBetweenCharacterClasses(c, d)
|
||||||
|
or
|
||||||
|
result = c.(CharacterClass).choose() and
|
||||||
|
(
|
||||||
|
d = c
|
||||||
|
or
|
||||||
|
d = Dot() and
|
||||||
|
not (result = "\n" or result = "\r")
|
||||||
|
or
|
||||||
|
d = Any()
|
||||||
|
)
|
||||||
|
or
|
||||||
|
c = Dot() and
|
||||||
|
(
|
||||||
|
d = Dot() and result = "a"
|
||||||
|
or
|
||||||
|
d = Any() and result = "a"
|
||||||
|
)
|
||||||
|
or
|
||||||
|
c = Any() and d = Any() and result = "a"
|
||||||
|
or
|
||||||
|
result = intersect(d, c)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a symbol that matches `char`.
|
||||||
|
*/
|
||||||
|
bindingset[char]
|
||||||
|
InputSymbol getAnInputSymbolMatching(string char) {
|
||||||
|
result = Char(char)
|
||||||
|
or
|
||||||
|
result.(CharacterClass).matches(char)
|
||||||
|
or
|
||||||
|
result = Dot() and
|
||||||
|
not (char = "\n" or char = "\r")
|
||||||
|
or
|
||||||
|
result = Any()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Predicates for constructing a prefix string that leads to a given state.
|
||||||
|
*/
|
||||||
|
private module PrefixConstruction {
|
||||||
|
/**
|
||||||
|
* Holds if `state` starts the string matched by the regular expression.
|
||||||
|
*/
|
||||||
|
private predicate isStartState(State state) {
|
||||||
|
state instanceof StateInPumpableRegexp and
|
||||||
|
(
|
||||||
|
state = Match(any(RegExpRoot r), _)
|
||||||
|
or
|
||||||
|
exists(RegExpCaret car | state = after(car))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `state` is the textually last start state for the regular expression.
|
||||||
|
*/
|
||||||
|
private predicate lastStartState(State state) {
|
||||||
|
exists(RegExpRoot root |
|
||||||
|
state =
|
||||||
|
max(State s, Location l |
|
||||||
|
isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
|
||||||
|
|
|
||||||
|
s order by l.getStartLine(), l.getStartColumn()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
|
||||||
|
*/
|
||||||
|
private predicate existsTransition(State a, State b) { delta(a, _, b) }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
|
||||||
|
*/
|
||||||
|
int prefixLength(State start, State state) =
|
||||||
|
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the minimum number of transitions it takes to reach `state` from the start state.
|
||||||
|
*/
|
||||||
|
private int lengthFromStart(State state) { result = prefixLength(_, state) }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a string for which the regular expression will reach `state`.
|
||||||
|
*
|
||||||
|
* Has at most one result for any given `state`.
|
||||||
|
* This predicate will not always have a result even if there is a ReDoS issue in
|
||||||
|
* the regular expression.
|
||||||
|
*/
|
||||||
|
string prefix(State state) {
|
||||||
|
lastStartState(state) and
|
||||||
|
result = ""
|
||||||
|
or
|
||||||
|
// the search stops past the last redos candidate state.
|
||||||
|
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
|
||||||
|
exists(State prev |
|
||||||
|
// select a unique predecessor (by an arbitrary measure)
|
||||||
|
prev =
|
||||||
|
min(State s, Location loc |
|
||||||
|
lengthFromStart(s) = lengthFromStart(state) - 1 and
|
||||||
|
loc = s.getRepr().getLocation() and
|
||||||
|
delta(s, _, state)
|
||||||
|
|
|
||||||
|
s order by loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn()
|
||||||
|
)
|
||||||
|
|
|
||||||
|
// greedy search for the shortest prefix
|
||||||
|
result = prefix(prev) and delta(prev, Epsilon(), state)
|
||||||
|
or
|
||||||
|
not delta(prev, Epsilon(), state) and
|
||||||
|
result =
|
||||||
|
prefix(prev) +
|
||||||
|
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), state))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A state within a regular expression that has a pumpable state.
|
||||||
|
*/
|
||||||
|
class StateInPumpableRegexp extends State {
|
||||||
|
pragma[noinline]
|
||||||
|
StateInPumpableRegexp() {
|
||||||
|
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(this.getRepr()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Predicates for testing the presence of a rejecting suffix.
|
||||||
|
*
|
||||||
|
* These predicates are used to ensure that the all states reached from the fork
|
||||||
|
* by repeating `w` have a rejecting suffix.
|
||||||
|
*
|
||||||
|
* For example, a regexp like `/^(a+)+/` will accept any string as long the prefix is
|
||||||
|
* some number of `"a"`s, and it is therefore not possible to construct a rejecting suffix.
|
||||||
|
*
|
||||||
|
* A regexp like `/(a+)+$/` or `/(a+)+b/` trivially has a rejecting suffix,
|
||||||
|
* as the suffix "X" will cause both the regular expressions to be rejected.
|
||||||
|
*
|
||||||
|
* The string `w` is repeated any number of times because it needs to be
|
||||||
|
* infinitely repeatedable for the attack to work.
|
||||||
|
* For the regular expression `/((ab)+)*abab/` the accepting state is not reachable from the fork
|
||||||
|
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
|
||||||
|
*/
|
||||||
|
private module SuffixConstruction {
|
||||||
|
import PrefixConstruction
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if all states reachable from `fork` by repeating `w`
|
||||||
|
* are likely rejectable by appending some suffix.
|
||||||
|
*/
|
||||||
|
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
|
||||||
|
isReDoSCandidate(fork, w) and
|
||||||
|
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if there likely exists a suffix starting from `s` that leads to the regular expression being rejected.
|
||||||
|
* This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs.
|
||||||
|
*/
|
||||||
|
pragma[nomagic]
|
||||||
|
private predicate isLikelyRejectable(StateInPumpableRegexp s) {
|
||||||
|
// exists a reject edge with some char.
|
||||||
|
hasRejectEdge(s)
|
||||||
|
or
|
||||||
|
hasEdgeToLikelyRejectable(s)
|
||||||
|
or
|
||||||
|
// stopping here is rejection
|
||||||
|
isRejectState(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
|
||||||
|
*/
|
||||||
|
predicate isRejectState(StateInPumpableRegexp s) { not epsilonSucc*(s) = Accept(_) }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
|
||||||
|
*/
|
||||||
|
predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
|
||||||
|
// all edges (at least one) with some char leads to another state that is rejectable.
|
||||||
|
// the `next` states might not share a common suffix, which can cause FPs.
|
||||||
|
exists(string char | char = relevant() |
|
||||||
|
forex(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if there is a state `next` that can be reached from `prev`
|
||||||
|
* along epsilon edges, such that there is a transition from
|
||||||
|
* `prev` to `next` that the character symbol `char`.
|
||||||
|
*/
|
||||||
|
predicate deltaClosedChar(StateInPumpableRegexp prev, string char, StateInPumpableRegexp next) {
|
||||||
|
char = relevant() and
|
||||||
|
deltaClosed(prev, getAnInputSymbolMatching(char), next)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a char used for finding possible suffixes.
|
||||||
|
*/
|
||||||
|
private string relevant() { result = CharacterClasses::getARelevantChar() }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if there is no edge from `s` labeled `char` in our NFA.
|
||||||
|
* The NFA does not model reject states, so the above is the same as saying there is a reject edge.
|
||||||
|
*/
|
||||||
|
private predicate hasRejectEdge(State s) {
|
||||||
|
exists(string char | char = relevant() | not deltaClosedChar(s, char, _))
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a state that can be reached from pumpable `fork` consuming all
|
||||||
|
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
|
||||||
|
*/
|
||||||
|
private State process(State fork, string w, int i) {
|
||||||
|
isReDoSCandidate(fork, w) and
|
||||||
|
exists(State prev |
|
||||||
|
i = 0 and prev = fork
|
||||||
|
or
|
||||||
|
prev = process(fork, w, i - 1)
|
||||||
|
or
|
||||||
|
// repeat until fixpoint
|
||||||
|
i = 0 and
|
||||||
|
prev = process(fork, w, w.length() - 1)
|
||||||
|
|
|
||||||
|
deltaClosed(prev, getAnInputSymbolMatching(w.charAt(i)), result)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the result of backslash-escaping newlines, carriage-returns and
|
||||||
|
* backslashes in `s`.
|
||||||
|
*/
|
||||||
|
bindingset[s]
|
||||||
|
private string escape(string s) {
|
||||||
|
result =
|
||||||
|
s.replaceAll("\\", "\\\\")
|
||||||
|
.replaceAll("\n", "\\n")
|
||||||
|
.replaceAll("\r", "\\r")
|
||||||
|
.replaceAll("\t", "\\t")
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets `str` with the last `i` characters moved to the front.
|
||||||
|
*
|
||||||
|
* We use this to adjust the pump string to match with the beginning of
|
||||||
|
* a RegExpTerm, so it doesn't start in the middle of a constant.
|
||||||
|
*/
|
||||||
|
bindingset[str, i]
|
||||||
|
private string rotate(string str, int i) {
|
||||||
|
result = str.suffix(str.length() - i) + str.prefix(str.length() - i)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if `term` may cause superliniear backtracking on strings containing many repetitions of `pump`.
|
||||||
|
* Gets the minimum possible string that causes superliniear backtracking.
|
||||||
|
*/
|
||||||
|
private predicate isReDoSAttackable(RegExpTerm term, string pump, State s) {
|
||||||
|
exists(int i, string c | s = Match(term, i) |
|
||||||
|
c =
|
||||||
|
min(string w |
|
||||||
|
any(ReDoSConfiguration conf).isReDoSCandidate(s, w) and
|
||||||
|
SuffixConstruction::reachesOnlyRejectableSuffixes(s, w)
|
||||||
|
|
|
||||||
|
w order by w.length(), w
|
||||||
|
) and
|
||||||
|
pump = escape(rotate(c, i))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds if the state `s` (represented by the term `t`) can have backtracking with repetitions of `pump`.
|
||||||
|
*
|
||||||
|
* `prefixMsg` contains a friendly message for a prefix that reaches `s` (or `prefixMsg` is the empty string if the prefix is empty or if no prefix could be found).
|
||||||
|
*/
|
||||||
|
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
|
||||||
|
isReDoSAttackable(t, pump, s) and
|
||||||
|
(
|
||||||
|
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
|
||||||
|
not PrefixConstruction::prefix(s) = ""
|
||||||
|
or
|
||||||
|
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
|
||||||
|
or
|
||||||
|
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
|
||||||
|
)
|
||||||
|
}
|
||||||
@@ -4,19 +4,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import javascript
|
import javascript
|
||||||
|
import ReDoSUtil
|
||||||
/**
|
|
||||||
* A regular expression term that permits unlimited repetitions.
|
|
||||||
*/
|
|
||||||
class InfiniteRepetitionQuantifier extends RegExpQuantifier {
|
|
||||||
InfiniteRepetitionQuantifier() {
|
|
||||||
this instanceof RegExpPlus
|
|
||||||
or
|
|
||||||
this instanceof RegExpStar
|
|
||||||
or
|
|
||||||
this instanceof RegExpRange and not exists(this.(RegExpRange).getUpperBound())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds if `t` matches at least an epsilon symbol.
|
* Holds if `t` matches at least an epsilon symbol.
|
||||||
|
|||||||
Reference in New Issue
Block a user