mirror of
https://github.com/github/codeql.git
synced 2026-04-30 19:26:02 +02:00
@@ -45,9 +45,7 @@ import javascript
|
||||
*
|
||||
* This is what the query does. It makes no attempt to construct a prefix
|
||||
* leading into `q`, and only a weak one to construct a suffix that ensures
|
||||
* rejection; this causes some false positives. Also, the query does not fully
|
||||
* handle character classes and does not handle various other features at all;
|
||||
* this causes false negatives.
|
||||
* rejection; this causes some false positives.
|
||||
*
|
||||
* Finally, sometimes it depends on the translation whether the NFA generated
|
||||
* for a regular expression has a pumpable fork or not. We implement one
|
||||
@@ -63,20 +61,23 @@ import javascript
|
||||
* * Transitions between states may be labelled with epsilon, or an abstract
|
||||
* input symbol.
|
||||
* * Each abstract input symbol represents a set of concrete input characters:
|
||||
* either a single character, a set of characters represented by a (positive)
|
||||
* either a single character, a set of characters represented by a
|
||||
* character class, or the set of all characters.
|
||||
* * The product automaton is constructed lazily, starting with pair states
|
||||
* `(q, q)` where `q` is a fork, and proceding along an over-approximate
|
||||
* step relation.
|
||||
* * The over-approximate step relation allows transitions along pairs of
|
||||
* abstract input symbols as long as the symbols are not trivially incompatible.
|
||||
* abstract input symbols where the symbols have overlap in the characters they accept.
|
||||
* * Once a trace of pairs of abstract input symbols that leads from a fork
|
||||
* back to itself has been identified, we attempt to construct a concrete
|
||||
* string corresponding to it, which may fail.
|
||||
* * Instead of trying to construct a suffix that makes the automaton fail,
|
||||
* we ensure that it isn't possible to reach the accepting state from the
|
||||
* fork along epsilon transitions. In this case, it is very likely (though
|
||||
* not guaranteed) that a rejecting suffix exists.
|
||||
* we ensure that repeating `n` copies of `w` does not reach a state that is
|
||||
* an epsilon transition from the accepting state.
|
||||
* This assumes that the accepting state accepts any suffix.
|
||||
* Regular expressions - where the end anchor `$` is used - have an accepting state
|
||||
* that does not accept all suffixes. Such regular expression not accurately
|
||||
* modelled by this assumption, which can cause false negatives.
|
||||
*/
|
||||
|
||||
/**
|
||||
@@ -103,7 +104,15 @@ class RegExpRoot extends RegExpTerm {
|
||||
*/
|
||||
predicate isRelevant() {
|
||||
// there is at least one repetition
|
||||
exists(RegExpRepetition rep | getRoot(rep) = this) and
|
||||
exists(RegExpRepetition rep | getRoot(rep) = this |
|
||||
// that could possibly match the same thing in multiple ways.
|
||||
exists(RegExpTerm child |
|
||||
child instanceof RegExpAlt or
|
||||
child instanceof RegExpQuantifier
|
||||
|
|
||||
child.getParent+() = rep
|
||||
)
|
||||
) and
|
||||
// there are no lookbehinds
|
||||
not exists(RegExpLookbehind lbh | getRoot(lbh) = this) and
|
||||
// is actually used as a RegExp
|
||||
@@ -122,6 +131,13 @@ class RegExpRepetition extends RegExpParent {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A constant in a regular expression that represents valid Unicode character(s).
|
||||
*/
|
||||
class RegexpCharacterConstant extends RegExpConstant {
|
||||
RegexpCharacterConstant() { this.isCharacter() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the root containing the given term, that is, the root of the literal,
|
||||
* or a branch of the root disjunction.
|
||||
@@ -136,15 +152,21 @@ RegExpRoot getRoot(RegExpTerm term) {
|
||||
*/
|
||||
newtype TInputSymbol =
|
||||
/** An input symbol corresponding to character `c`. */
|
||||
Char(string c) { c = any(RegExpConstant cc).getValue().charAt(_) } or
|
||||
Char(string c) {
|
||||
c = any(RegexpCharacterConstant cc | getRoot(cc).isRelevant()).getValue().charAt(_)
|
||||
} or
|
||||
/**
|
||||
* An input symbol representing all characters matched by
|
||||
* (positive, non-universal) character class `recc`.
|
||||
* (non-universal) character class `recc`.
|
||||
*/
|
||||
CharClass(RegExpCharacterClass recc) {
|
||||
CharClass(RegExpTerm recc) {
|
||||
getRoot(recc).isRelevant() and
|
||||
not recc.isInverted() and
|
||||
not recc.isUniversalClass()
|
||||
(
|
||||
recc instanceof RegExpCharacterClass and
|
||||
not recc.(RegExpCharacterClass).isUniversalClass()
|
||||
)
|
||||
or
|
||||
recc instanceof RegExpCharacterClassEscape
|
||||
} or
|
||||
/** An input symbol representing all characters matched by `.`. */
|
||||
Dot() or
|
||||
@@ -153,6 +175,28 @@ newtype TInputSymbol =
|
||||
/** An epsilon transition in the automaton. */
|
||||
Epsilon()
|
||||
|
||||
/**
|
||||
* Holds if `a` and `b` are input symbols from the same regexp.
|
||||
* (And not a `Dot()`, `Any()` or `Epsilon()`)
|
||||
*/
|
||||
private predicate sharesRoot(TInputSymbol a, TInputSymbol b) {
|
||||
exists(RegExpRoot root |
|
||||
belongsTo(a, root) and
|
||||
belongsTo(b, root)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the `a` is an input symbol from a regexp that has root `root`.
|
||||
*/
|
||||
private predicate belongsTo(TInputSymbol a, RegExpRoot root) {
|
||||
exists(RegExpTerm term | getRoot(term) = root |
|
||||
a = Char(term.(RegexpCharacterConstant).getValue().charAt(_))
|
||||
or
|
||||
a = CharClass(term)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* An abstract input symbol, representing a set of concrete characters.
|
||||
*/
|
||||
@@ -162,7 +206,7 @@ class InputSymbol extends TInputSymbol {
|
||||
string toString() {
|
||||
this = Char(result)
|
||||
or
|
||||
result = any(RegExpCharacterClass recc | this = CharClass(recc)).toString()
|
||||
result = any(RegExpTerm recc | this = CharClass(recc)).toString()
|
||||
or
|
||||
this = Dot() and result = "."
|
||||
or
|
||||
@@ -171,79 +215,240 @@ class InputSymbol extends TInputSymbol {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a lower bound on the characters matched by the given character class term.
|
||||
* An abstract input symbol that represents a character class.
|
||||
*/
|
||||
string getCCLowerBound(RegExpTerm t) {
|
||||
t.getParent() instanceof RegExpCharacterClass and
|
||||
(
|
||||
result = t.(RegExpConstant).getValue()
|
||||
or
|
||||
t.(RegExpCharacterRange).isRange(result, _)
|
||||
or
|
||||
exists(string name | name = t.(RegExpCharacterClassEscape).getValue() |
|
||||
name = "w" and result = "0"
|
||||
abstract class CharacterClass extends InputSymbol {
|
||||
/**
|
||||
* Gets a character that is relevant for intersection-tests involving this
|
||||
* character class.
|
||||
*
|
||||
* Specifically, this is any of the characters mentioned explicitly in the
|
||||
* character class, offset by one if it is inverted. For character class escapes,
|
||||
* the result is as if the class had been written out as a series of intervals.
|
||||
*
|
||||
* This set is large enough to ensure that for any two intersecting character
|
||||
* classes, one contains a relevant character from the other.
|
||||
*/
|
||||
abstract string getARelevantChar();
|
||||
|
||||
/**
|
||||
* Holds if this character class matches `char`.
|
||||
*/
|
||||
bindingset[char]
|
||||
abstract predicate matches(string char);
|
||||
|
||||
/**
|
||||
* Gets a character matched by this character class.
|
||||
*/
|
||||
string choose() { result = getARelevantChar() and matches(result) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides implementations for `CharacterClass`.
|
||||
*/
|
||||
private module CharacterClasses {
|
||||
/**
|
||||
* Holds if the character class `cc` has a child (constant or range) that matches `char`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
predicate hasChildThatMatches(RegExpCharacterClass cc, string char) {
|
||||
exists(CharClass(cc)) and
|
||||
exists(RegExpTerm child | child = cc.getAChild() |
|
||||
char = child.(RegexpCharacterConstant).getValue()
|
||||
or
|
||||
name = "W" and result = ""
|
||||
rangeMatchesOnLetterOrDigits(child, char)
|
||||
or
|
||||
name = "s" and result = ""
|
||||
not rangeMatchesOnLetterOrDigits(child, _) and
|
||||
char = getARelevantChar() and
|
||||
exists(string lo, string hi | child.(RegExpCharacterRange).isRange(lo, hi) |
|
||||
lo <= char and
|
||||
char <= hi
|
||||
)
|
||||
or
|
||||
name = "S" and result = ""
|
||||
exists(RegExpCharacterClassEscape escape | escape = child |
|
||||
escape.getValue() = escape.getValue().toLowerCase() and
|
||||
classEscapeMatches(escape.getValue(), char)
|
||||
or
|
||||
char = getARelevantChar() and
|
||||
escape.getValue() = escape.getValue().toUpperCase() and
|
||||
not classEscapeMatches(escape.getValue().toLowerCase(), char)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The highest character used in a regular expression. Used to represent intervals without an upper bound.
|
||||
*/
|
||||
string highestCharacter() { result = max(RegExpConstant c | | c.getValue()) }
|
||||
|
||||
/**
|
||||
* Gets an upper bound on the characters matched by the given character class term.
|
||||
*/
|
||||
string getCCUpperBound(RegExpTerm t) {
|
||||
t.getParent() instanceof RegExpCharacterClass and
|
||||
(
|
||||
result = t.(RegExpConstant).getValue()
|
||||
or
|
||||
t.(RegExpCharacterRange).isRange(_, result)
|
||||
or
|
||||
exists(string name | name = t.(RegExpCharacterClassEscape).getValue() |
|
||||
name = "w" and result = "z"
|
||||
or
|
||||
name = "W" and result = highestCharacter()
|
||||
or
|
||||
name = "s" and result = highestCharacter()
|
||||
or
|
||||
name = "S" and result = highestCharacter()
|
||||
/**
|
||||
* Holds if `range` is a range on lower-case, upper-case, or digits, and matches `char`.
|
||||
* This predicate is used to restrict the searchspace for ranges by only joining `getAnyPossiblyMatchedChar`
|
||||
* on a few ranges.
|
||||
*/
|
||||
private predicate rangeMatchesOnLetterOrDigits(RegExpCharacterRange range, string char) {
|
||||
exists(string lo, string hi |
|
||||
range.isRange(lo, hi) and lo = lowercaseLetter() and hi = lowercaseLetter()
|
||||
|
|
||||
lo <= char and
|
||||
char <= hi and
|
||||
char = lowercaseLetter()
|
||||
)
|
||||
)
|
||||
}
|
||||
or
|
||||
exists(string lo, string hi |
|
||||
range.isRange(lo, hi) and lo = upperCaseLetter() and hi = upperCaseLetter()
|
||||
|
|
||||
lo <= char and
|
||||
char <= hi and
|
||||
char = upperCaseLetter()
|
||||
)
|
||||
or
|
||||
exists(string lo, string hi | range.isRange(lo, hi) and lo = digit() and hi = digit() |
|
||||
lo <= char and
|
||||
char <= hi and
|
||||
char = digit()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `s` belongs to `l` and is a character class whose set of matched characters is contained
|
||||
* in the interval `lo-hi`.
|
||||
*/
|
||||
predicate hasBounds(RegExpRoot l, InputSymbol s, string lo, string hi) {
|
||||
exists(RegExpCharacterClass cc | s = CharClass(cc) |
|
||||
l = getRoot(cc) and
|
||||
lo = min(getCCLowerBound(cc.getAChild())) and
|
||||
hi = max(getCCUpperBound(cc.getAChild()))
|
||||
)
|
||||
}
|
||||
private string lowercaseLetter() { result = "abdcefghijklmnopqrstuvwxyz".charAt(_) }
|
||||
|
||||
/**
|
||||
* Holds if `s1` and `s2` possibly have a non-empty intersection.
|
||||
*
|
||||
* This predicate is over-approximate; it is only used for pruning the search space.
|
||||
*/
|
||||
predicate compatible(InputSymbol s1, InputSymbol s2) {
|
||||
exists(RegExpRoot l, string lo1, string lo2, string hi1, string hi2 |
|
||||
hasBounds(l, s1, lo1, hi1) and
|
||||
hasBounds(l, s2, lo2, hi2) and
|
||||
max(string s | s = lo1 or s = lo2) <= min(string s | s = hi1 or s = hi2)
|
||||
)
|
||||
or
|
||||
exists(intersect(s1, s2))
|
||||
private string upperCaseLetter() { result = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".charAt(_) }
|
||||
|
||||
private string digit() { result = [0 .. 9].toString() }
|
||||
|
||||
/**
|
||||
* Gets a char that could be matched by a regular expression.
|
||||
* Includes all printable ascii chars, all constants mentioned in a regexp, and all chars matches by the regexp `/\s|\d|\w/`.
|
||||
*/
|
||||
string getARelevantChar() {
|
||||
exists(ascii(result))
|
||||
or
|
||||
exists(RegexpCharacterConstant c | result = c.getValue().charAt(_))
|
||||
or
|
||||
classEscapeMatches(_, result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a char that is mentioned in the character class `c`.
|
||||
*/
|
||||
private string getAMentionedChar(RegExpCharacterClass c) {
|
||||
exists(RegExpTerm child | child = c.getAChild() |
|
||||
result = child.(RegexpCharacterConstant).getValue()
|
||||
or
|
||||
child.(RegExpCharacterRange).isRange(result, _)
|
||||
or
|
||||
child.(RegExpCharacterRange).isRange(_, result)
|
||||
or
|
||||
exists(RegExpCharacterClassEscape escape | child = escape |
|
||||
result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
|
||||
or
|
||||
result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* An implementation of `CharacterClass` for positive (non inverted) character classes.
|
||||
*/
|
||||
private class PositiveCharacterClass extends CharacterClass {
|
||||
RegExpCharacterClass cc;
|
||||
|
||||
PositiveCharacterClass() { this = CharClass(cc) and not cc.isInverted() }
|
||||
|
||||
override string getARelevantChar() { result = getAMentionedChar(cc) }
|
||||
|
||||
override predicate matches(string char) { hasChildThatMatches(cc, char) }
|
||||
}
|
||||
|
||||
/**
|
||||
* An implementation of `CharacterClass` for inverted character classes.
|
||||
*/
|
||||
private class InvertedCharacterClass extends CharacterClass {
|
||||
RegExpCharacterClass cc;
|
||||
|
||||
InvertedCharacterClass() { this = CharClass(cc) and cc.isInverted() }
|
||||
|
||||
override string getARelevantChar() {
|
||||
result = nextChar(getAMentionedChar(cc)) or
|
||||
nextChar(result) = getAMentionedChar(cc)
|
||||
}
|
||||
|
||||
bindingset[char]
|
||||
override predicate matches(string char) { not hasChildThatMatches(cc, char) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the character class escape `clazz` (\d, \s, or \w) matches `char`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate classEscapeMatches(string clazz, string char) {
|
||||
clazz = "d" and
|
||||
char = "0123456789".charAt(_)
|
||||
or
|
||||
clazz = "s" and
|
||||
(
|
||||
char = [" ", "\t", "\r", "\n"]
|
||||
or
|
||||
char = getARelevantChar() and
|
||||
char.regexpMatch("\\u000b|\\u000c") // \v|\f (vertical tab | form feed)
|
||||
)
|
||||
or
|
||||
clazz = "w" and
|
||||
char = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_".charAt(_)
|
||||
}
|
||||
|
||||
/**
|
||||
* An implementation of `CharacterClass` for \d, \s, and \w.
|
||||
*/
|
||||
private class PositiveCharacterClassEscape extends CharacterClass {
|
||||
RegExpCharacterClassEscape cc;
|
||||
|
||||
PositiveCharacterClassEscape() { this = CharClass(cc) and cc.getValue() = ["d", "s", "w"] }
|
||||
|
||||
override string getARelevantChar() {
|
||||
cc.getValue() = "d" and
|
||||
result = ["0", "9"]
|
||||
or
|
||||
cc.getValue() = "s" and
|
||||
result = [" "]
|
||||
or
|
||||
cc.getValue() = "w" and
|
||||
result = ["a", "Z", "_", "0", "9"]
|
||||
}
|
||||
|
||||
override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
|
||||
|
||||
override string choose() {
|
||||
cc.getValue() = "d" and
|
||||
result = "9"
|
||||
or
|
||||
cc.getValue() = "s" and
|
||||
result = [" "]
|
||||
or
|
||||
cc.getValue() = "w" and
|
||||
result = "a"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An implementation of `CharacterClass` for \D, \S, and \W.
|
||||
*/
|
||||
private class NegativeCharacterClassEscape extends CharacterClass {
|
||||
RegExpCharacterClassEscape cc;
|
||||
|
||||
NegativeCharacterClassEscape() { this = CharClass(cc) and cc.getValue() = ["D", "S", "W"] }
|
||||
|
||||
override string getARelevantChar() {
|
||||
cc.getValue() = "D" and
|
||||
result = ["a", "Z", "!"]
|
||||
or
|
||||
cc.getValue() = "S" and
|
||||
result = ["a", "9", "!"]
|
||||
or
|
||||
cc.getValue() = "W" and
|
||||
result = [" ", "!"]
|
||||
}
|
||||
|
||||
bindingset[char]
|
||||
override predicate matches(string char) {
|
||||
not classEscapeMatches(cc.getValue().toLowerCase(), char)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
newtype TState =
|
||||
@@ -252,7 +457,7 @@ newtype TState =
|
||||
(
|
||||
i = 0
|
||||
or
|
||||
exists(t.(RegExpConstant).getValue().charAt(i))
|
||||
exists(t.(RegexpCharacterConstant).getValue().charAt(i))
|
||||
)
|
||||
} or
|
||||
Accept(RegExpRoot l) { l.isRelevant() }
|
||||
@@ -324,7 +529,7 @@ State after(RegExpTerm t) {
|
||||
* Holds if the NFA has a transition from `q1` to `q2` labelled with `lbl`.
|
||||
*/
|
||||
predicate delta(State q1, EdgeLabel lbl, State q2) {
|
||||
exists(RegExpConstant s, int i |
|
||||
exists(RegexpCharacterConstant s, int i |
|
||||
q1 = Match(s, i) and
|
||||
lbl = Char(s.getValue().charAt(i)) and
|
||||
(
|
||||
@@ -342,7 +547,15 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
|
||||
exists(RegExpCharacterClass cc |
|
||||
cc.isUniversalClass() and q1 = before(cc) and lbl = Any() and q2 = after(cc)
|
||||
or
|
||||
q1 = before(cc) and lbl = CharClass(cc) and q2 = after(cc)
|
||||
q1 = before(cc) and
|
||||
lbl = CharClass(cc) and
|
||||
q2 = after(cc)
|
||||
)
|
||||
or
|
||||
exists(RegExpCharacterClassEscape cc |
|
||||
q1 = before(cc) and
|
||||
lbl = CharClass(cc) and
|
||||
q2 = after(cc)
|
||||
)
|
||||
or
|
||||
exists(RegExpAlt alt | lbl = Epsilon() | q1 = before(alt) and q2 = before(alt.getAChild()))
|
||||
@@ -407,6 +620,10 @@ class StatePair extends TStatePair {
|
||||
StatePair() { this = MkStatePair(q1, q2) }
|
||||
|
||||
string toString() { result = "(" + q1 + ", " + q2 + ")" }
|
||||
|
||||
State getLeft() { result = q1 }
|
||||
|
||||
State getRight() { result = q2 }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -444,10 +661,10 @@ predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
|
||||
delta(q1, s1, r1) and
|
||||
q2 = epsilonSucc*(q) and
|
||||
delta(q2, s2, r2) and
|
||||
// Use pragma[noopt] to prevent compatible(s1,s2) from being the starting point of the join.
|
||||
// Use pragma[noopt] to prevent intersect(s1,s2) from being the starting point of the join.
|
||||
// From (s1,s2) it would find a huge number of intermediate state pairs (q1,q2) originating from different literals,
|
||||
// and discover at the end that no `q` can reach both `q1` and `q2` by epsilon transitions.
|
||||
compatible(s1, s2)
|
||||
exists(intersect(s1, s2))
|
||||
|
|
||||
s1 != s2
|
||||
or
|
||||
@@ -469,11 +686,13 @@ predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) {
|
||||
* Holds if there are transitions from the components of `q` to `r1` and `r2`
|
||||
* labelled with `s1` and `s2`, respectively.
|
||||
*/
|
||||
pragma[noopt]
|
||||
predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
|
||||
exists(State q1, State q2 | q = MkStatePair(q1, q2) |
|
||||
exists(State q1, State q2 | q.getLeft() = q1 and q.getRight() = q2 |
|
||||
deltaClosed(q1, s1, r1) and
|
||||
deltaClosed(q2, s2, r2) and
|
||||
compatible(s1, s2)
|
||||
// use noopt to force the join on `intersect` to happen last.
|
||||
exists(intersect(s1, s2))
|
||||
)
|
||||
}
|
||||
|
||||
@@ -492,32 +711,44 @@ newtype Trace =
|
||||
t = Nil() and isFork(_, s1, s2, _, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the minimum char that is matched by both the character classes `c` and `d`.
|
||||
*/
|
||||
private string getMinOverlapBetweenCharacterClasses(CharacterClass c, CharacterClass d) {
|
||||
result = min(getAOverlapBetweenCharacterClasses(c, d))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a char that is matched by both the character classes `c` and `d`.
|
||||
* And `c` and `d` is not the same character class.
|
||||
*/
|
||||
private string getAOverlapBetweenCharacterClasses(CharacterClass c, CharacterClass d) {
|
||||
sharesRoot(c, d) and
|
||||
result = [c.getARelevantChar(), d.getARelevantChar()] and
|
||||
c.matches(result) and
|
||||
d.matches(result) and
|
||||
not c = d
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a character that is represented by both `c` and `d`.
|
||||
*/
|
||||
string intersect(InputSymbol c, InputSymbol d) {
|
||||
c = Char(result) and
|
||||
d = getAnInputSymbolMatching(result) and
|
||||
(
|
||||
d = Char(result)
|
||||
sharesRoot(c, d)
|
||||
or
|
||||
exists(RegExpCharacterClass cc | d = CharClass(cc) |
|
||||
exists(RegExpTerm child | child = cc.getAChild() |
|
||||
result = child.(RegExpConstant).getValue()
|
||||
or
|
||||
exists(string lo, string hi | child.(RegExpCharacterRange).isRange(lo, hi) |
|
||||
lo <= result and result <= hi
|
||||
)
|
||||
)
|
||||
)
|
||||
or
|
||||
d = Dot() and
|
||||
not (result = "\n" or result = "\r")
|
||||
d = Dot()
|
||||
or
|
||||
d = Any()
|
||||
)
|
||||
or
|
||||
exists(RegExpCharacterClass cc | c = CharClass(cc) and result = choose(cc) |
|
||||
d = CharClass(cc)
|
||||
result = getMinOverlapBetweenCharacterClasses(c, d)
|
||||
or
|
||||
result = c.(CharacterClass).choose() and
|
||||
(
|
||||
d = c
|
||||
or
|
||||
d = Dot() and
|
||||
not (result = "\n" or result = "\r")
|
||||
@@ -538,15 +769,35 @@ string intersect(InputSymbol c, InputSymbol d) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a character matched by character class `cc`.
|
||||
* Gets a symbol that matches `char`.
|
||||
*/
|
||||
string choose(RegExpCharacterClass cc) {
|
||||
result =
|
||||
min(string c |
|
||||
exists(RegExpTerm child | child = cc.getAChild() |
|
||||
c = child.(RegExpConstant).getValue() or
|
||||
child.(RegExpCharacterRange).isRange(c, _)
|
||||
)
|
||||
bindingset[char]
|
||||
InputSymbol getAnInputSymbolMatching(string char) {
|
||||
result = Char(char)
|
||||
or
|
||||
result.(CharacterClass).matches(char)
|
||||
or
|
||||
result = Dot() and
|
||||
not (char = "\n" or char = "\r")
|
||||
or
|
||||
result = Any()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the char after `c` (from a simplified ASCII table).
|
||||
*/
|
||||
string nextChar(string c) { exists(int code | code = ascii(c) | code + 1 = ascii(result)) }
|
||||
|
||||
/**
|
||||
* Gets an approximation for the ASCII code for `char`.
|
||||
* Only the easily printable chars are included (so no newline, tab, null, etc).
|
||||
*/
|
||||
int ascii(string char) {
|
||||
char =
|
||||
rank[result](string c |
|
||||
c =
|
||||
"! \"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
|
||||
.charAt(_)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -566,6 +817,7 @@ string concretise(Trace t) {
|
||||
* a path from `r` back to `(fork, fork)` with `rem` steps.
|
||||
*/
|
||||
predicate isReachableFromFork(State fork, StatePair r, Trace w, int rem) {
|
||||
// base case
|
||||
exists(InputSymbol s1, InputSymbol s2, State q1, State q2 |
|
||||
isFork(fork, s1, s2, q1, q2) and
|
||||
r = MkStatePair(q1, q2) and
|
||||
@@ -573,11 +825,12 @@ predicate isReachableFromFork(State fork, StatePair r, Trace w, int rem) {
|
||||
rem = statePairDist(r, MkStatePair(fork, fork))
|
||||
)
|
||||
or
|
||||
// recursive case
|
||||
exists(StatePair p, Trace v, InputSymbol s1, InputSymbol s2 |
|
||||
isReachableFromFork(fork, p, v, rem + 1) and
|
||||
step(p, s1, s2, r) and
|
||||
w = Step(s1, s2, v) and
|
||||
rem > 0
|
||||
rem >= statePairDist(r, MkStatePair(fork, fork))
|
||||
)
|
||||
}
|
||||
|
||||
@@ -608,14 +861,21 @@ predicate isPumpable(State fork, string w) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a state that can be reached from pumpable `fork` consuming
|
||||
* the first `i+1` characters of `w`.
|
||||
* Gets a state that can be reached from pumpable `fork` consuming all
|
||||
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
|
||||
*
|
||||
* Character classes are overapproximated as intervals; for example,
|
||||
* `[a-ln-z]` is treated the same as `[a-z]`, and hence considered
|
||||
* to match `m`, even though in fact it does not. This is fine for
|
||||
* our purposes, since we only use this predicate to avoid false
|
||||
* positives.
|
||||
* This predicate is used to ensure that the accepting state is not reached from the fork by repeating `w`.
|
||||
* This works under the assumption that any accepting state accepts all suffixes.
|
||||
* For example, a regexp like `/^(a+)+/` will accept any string as long the prefix is some number of `"a"`s,
|
||||
* and it is therefore not possible to construct a rejected suffix.
|
||||
* This assumption breaks on regular expression that use the anchor `$`, e.g: `/^(a+)+$/`, and such regular
|
||||
* expression are not accurately modeled by this query.
|
||||
*
|
||||
* The string `w` is repeated any number of times because it needs to be
|
||||
* infinitely repeatedable for the attack to work.
|
||||
* For a regular expression `/((ab)+)*abab/` the accepting state is not reachable from the fork
|
||||
* using epsilon transitions. But any attempt at repeating `w` will end in the accepting state.
|
||||
* This also relies on the assumption that any accepting state will accept all suffixes.
|
||||
*/
|
||||
State process(State fork, string w, int i) {
|
||||
isPumpable(fork, w) and
|
||||
@@ -623,11 +883,12 @@ State process(State fork, string w, int i) {
|
||||
i = 0 and prev = fork
|
||||
or
|
||||
prev = process(fork, w, i - 1)
|
||||
or
|
||||
// repeat until fixpoint
|
||||
i = 0 and
|
||||
prev = process(fork, w, w.length() - 1)
|
||||
|
|
||||
exists(InputSymbol s |
|
||||
deltaClosed(prev, s, result) and
|
||||
exists(intersect(Char(w.charAt(i)), s))
|
||||
)
|
||||
deltaClosed(prev, getAnInputSymbolMatching(w.charAt(i)), result)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -653,9 +914,14 @@ string rotate(string str, int i) {
|
||||
|
||||
from RegExpTerm t, string c, int i
|
||||
where
|
||||
c = min(string w | isPumpable(Match(t, i), w)) and
|
||||
not isPumpable(epsilonSucc+(Match(t, i)), _) and
|
||||
not epsilonSucc*(process(Match(t, i), c, c.length() - 1)) = Accept(_)
|
||||
c =
|
||||
min(string w |
|
||||
isPumpable(Match(t, i), w) and
|
||||
not isPumpable(epsilonSucc+(Match(t, i)), _) and
|
||||
not epsilonSucc*(process(Match(t, i), w, _)) = Accept(_)
|
||||
|
|
||||
w order by w.length(), w
|
||||
)
|
||||
select t,
|
||||
"This part of the regular expression may cause exponential backtracking on strings " +
|
||||
"containing many repetitions of '" + escape(rotate(c, i)) + "'."
|
||||
|
||||
@@ -181,8 +181,8 @@
|
||||
| regexplib/uri.js:59:2:59:13 | [a-zA-Z]{3,} | it can start matching anywhere |
|
||||
| regexplib/uri.js:64:31:64:36 | [\\w-]+ | it can start matching anywhere after the start of the preceeding '[\\w-\\s]*' |
|
||||
| regexplib/uri.js:73:2:73:4 | .*? | it can start matching anywhere |
|
||||
| tst.js:14:13:14:18 | (.*,)+ | it can start matching anywhere |
|
||||
| tst.js:14:14:14:15 | .* | it can start matching anywhere |
|
||||
| tst.js:14:14:14:19 | (.*,)+ | it can start matching anywhere |
|
||||
| tst.js:14:15:14:16 | .* | it can start matching anywhere |
|
||||
| tst.js:47:15:47:37 | (?:[^"']\|".*?"\|'.*?')*? | it can start matching anywhere |
|
||||
| tst.js:47:25:47:27 | .*? | it can start matching anywhere after the start of the preceeding '"' |
|
||||
| tst.js:47:31:47:33 | .*? | it can start matching anywhere after the start of the preceeding ''' |
|
||||
@@ -197,3 +197,71 @@
|
||||
| tst.js:83:14:83:20 | (.\|\\n)* | it can start matching anywhere |
|
||||
| tst.js:89:25:89:32 | (a\|aa?)* | it can start matching anywhere |
|
||||
| tst.js:92:14:92:21 | (a\|aa?)* | it can start matching anywhere |
|
||||
| tst.js:95:15:95:25 | ([^]\|[^a])* | it can start matching anywhere |
|
||||
| tst.js:101:15:101:23 | (.\|[^a])* | it can start matching anywhere |
|
||||
| tst.js:104:16:104:24 | (a\|[^a])* | it can start matching anywhere |
|
||||
| tst.js:107:15:107:23 | (b\|[^a])* | it can start matching anywhere |
|
||||
| tst.js:110:15:110:23 | (G\|[^a])* | it can start matching anywhere |
|
||||
| tst.js:113:15:113:27 | ([0-9]\|[^a])* | it can start matching anywhere |
|
||||
| tst.js:125:15:125:28 | ([a-z]\|[d-h])* | it can start matching anywhere |
|
||||
| tst.js:128:15:128:30 | ([^a-z]\|[^0-9])* | it can start matching anywhere |
|
||||
| tst.js:131:15:131:25 | (\\d\|[0-9])* | it can start matching anywhere |
|
||||
| tst.js:134:15:134:22 | (\\s\|\\s)* | it can start matching anywhere |
|
||||
| tst.js:137:15:137:21 | (\\w\|G)* | it can start matching anywhere |
|
||||
| tst.js:140:16:140:23 | (\\s\|\\d)* | it can start matching anywhere |
|
||||
| tst.js:143:15:143:22 | (\\d\|\\w)* | it can start matching anywhere |
|
||||
| tst.js:146:15:146:21 | (\\d\|5)* | it can start matching anywhere |
|
||||
| tst.js:149:15:149:24 | (\\s\|[\\f])* | it can start matching anywhere |
|
||||
| tst.js:152:15:152:28 | (\\s\|[\\v]\|\\\\v)* | it can start matching anywhere |
|
||||
| tst.js:155:15:155:24 | (\\f\|[\\f])* | it can start matching anywhere |
|
||||
| tst.js:158:15:158:22 | (\\W\|\\D)* | it can start matching anywhere |
|
||||
| tst.js:161:15:161:22 | (\\S\|\\w)* | it can start matching anywhere |
|
||||
| tst.js:164:15:164:24 | (\\S\|[\\w])* | it can start matching anywhere |
|
||||
| tst.js:167:15:167:27 | (1s\|[\\da-z])* | it can start matching anywhere |
|
||||
| tst.js:170:15:170:23 | (0\|[\\d])* | it can start matching anywhere |
|
||||
| tst.js:173:15:173:22 | ([\\d]+)* | it can start matching anywhere |
|
||||
| tst.js:173:16:173:20 | [\\d]+ | it can start matching anywhere |
|
||||
| tst.js:188:14:188:21 | (\\n\\s*)+ | it can start matching anywhere |
|
||||
| tst.js:197:14:197:24 | (a+\|b+\|c+)* | it can start matching anywhere |
|
||||
| tst.js:197:15:197:16 | a+ | it can start matching anywhere |
|
||||
| tst.js:197:18:197:19 | b+ | it can start matching anywhere |
|
||||
| tst.js:197:21:197:22 | c+ | it can start matching anywhere |
|
||||
| tst.js:200:15:200:24 | ((a+a?)*)+ | it can start matching anywhere |
|
||||
| tst.js:200:16:200:22 | (a+a?)* | it can start matching anywhere |
|
||||
| tst.js:200:17:200:18 | a+ | it can start matching anywhere |
|
||||
| tst.js:203:14:203:18 | (a+)+ | it can start matching anywhere |
|
||||
| tst.js:203:15:203:16 | a+ | it can start matching anywhere |
|
||||
| tst.js:206:15:206:19 | (a+)+ | it can start matching anywhere |
|
||||
| tst.js:206:16:206:17 | a+ | it can start matching anywhere |
|
||||
| tst.js:209:14:209:18 | (a+)+ | it can start matching anywhere |
|
||||
| tst.js:209:15:209:16 | a+ | it can start matching anywhere |
|
||||
| tst.js:212:15:212:20 | (\\n+)+ | it can start matching anywhere |
|
||||
| tst.js:212:16:212:18 | \\n+ | it can start matching anywhere |
|
||||
| tst.js:215:14:215:19 | (\\n+)+ | it can start matching anywhere |
|
||||
| tst.js:215:15:215:17 | \\n+ | it can start matching anywhere |
|
||||
| tst.js:218:14:218:21 | ([^X]+)* | it can start matching anywhere |
|
||||
| tst.js:218:15:218:19 | [^X]+ | it can start matching anywhere |
|
||||
| tst.js:221:14:221:24 | (([^X]b)+)* | it can start matching anywhere |
|
||||
| tst.js:221:15:221:22 | ([^X]b)+ | it can start matching anywhere |
|
||||
| tst.js:224:15:224:25 | (([^X]b)+)* | it can start matching anywhere |
|
||||
| tst.js:224:16:224:23 | ([^X]b)+ | it can start matching anywhere |
|
||||
| tst.js:227:14:227:24 | (([^X]b)+)* | it can start matching anywhere |
|
||||
| tst.js:227:15:227:22 | ([^X]b)+ | it can start matching anywhere |
|
||||
| tst.js:230:15:230:22 | ((ab)+)* | it can start matching anywhere |
|
||||
| tst.js:230:16:230:20 | (ab)+ | it can start matching anywhere |
|
||||
| tst.js:233:15:233:22 | ((ab)+)* | it can start matching anywhere |
|
||||
| tst.js:233:16:233:20 | (ab)+ | it can start matching anywhere |
|
||||
| tst.js:239:14:239:21 | ((ab)+)* | it can start matching anywhere |
|
||||
| tst.js:239:15:239:19 | (ab)+ | it can start matching anywhere |
|
||||
| tst.js:242:15:242:22 | ((ab)+)* | it can start matching anywhere |
|
||||
| tst.js:242:16:242:20 | (ab)+ | it can start matching anywhere |
|
||||
| tst.js:245:14:245:23 | ([\\n\\s]+)* | it can start matching anywhere |
|
||||
| tst.js:245:15:245:21 | [\\n\\s]+ | it can start matching anywhere |
|
||||
| tst.js:248:16:248:17 | A* | it can start matching anywhere |
|
||||
| tst.js:248:18:248:19 | A* | it can start matching anywhere |
|
||||
| tst.js:248:18:248:19 | A* | it can start matching anywhere after the start of the preceeding 'A*' |
|
||||
| tst.js:254:14:254:91 | (\\w*foobarbaz\\w*foobarbaz\\w*foobarbaz\\w*foobarbaz\\s*foobarbaz\\d*foobarbaz\\w*)+ | it can start matching anywhere |
|
||||
| tst.js:254:15:254:17 | \\w* | it can start matching anywhere |
|
||||
| tst.js:257:14:257:116 | (.thisisagoddamnlongstringforstresstestingthequery\|\\sthisisagoddamnlongstringforstresstestingthequery)* | it can start matching anywhere |
|
||||
| tst.js:260:14:260:77 | (thisisagoddamnlongstringforstresstestingthequery\|this\\w+query)* | it can start matching anywhere |
|
||||
| tst.js:263:15:263:117 | (thisisagoddamnlongstringforstresstestingthequery\|imanotherbutunrelatedstringcomparedtotheotherstring)* | it can start matching anywhere |
|
||||
|
||||
@@ -1,42 +1,60 @@
|
||||
| polynomial-redos.js:17:5:17:6 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
|
||||
| polynomial-redos.js:41:52:41:63 | [\\x21-\\x7E]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '?'. |
|
||||
| polynomial-redos.js:46:33:46:45 | [a-zA-Z_0-9]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'A'. |
|
||||
| regexplib/address.js:51:220:51:222 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| regexplib/address.js:51:616:51:618 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| regexplib/address.js:51:803:51:811 | [A-Za-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'A'. |
|
||||
| regexplib/address.js:75:220:75:222 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| regexplib/address.js:75:616:75:618 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| regexplib/address.js:75:803:75:811 | [A-Za-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'A'. |
|
||||
| regexplib/dates.js:66:133:66:139 | JANUARY | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'JANUARY'. |
|
||||
| regexplib/dates.js:66:141:66:148 | FEBRUARY | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'FEBRUARY'. |
|
||||
| regexplib/dates.js:66:150:66:154 | MARCH | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'MARCH'. |
|
||||
| regexplib/dates.js:66:156:66:160 | APRIL | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'APRIL'. |
|
||||
| regexplib/dates.js:66:162:66:164 | MAY | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'MAY'. |
|
||||
| regexplib/dates.js:66:166:66:169 | JUNE | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'JUNE'. |
|
||||
| regexplib/dates.js:66:171:66:174 | JULY | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'JULY'. |
|
||||
| regexplib/dates.js:66:176:66:181 | AUGUST | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'AUGUST'. |
|
||||
| regexplib/dates.js:66:183:66:191 | SEPTEMBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'SEPTEMBER'. |
|
||||
| regexplib/dates.js:66:193:66:199 | OCTOBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'OCTOBER'. |
|
||||
| regexplib/dates.js:66:201:66:208 | NOVEMBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'NOVEMBER'. |
|
||||
| regexplib/dates.js:66:210:66:217 | DECEMBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'DECEMBER'. |
|
||||
| regexplib/dates.js:66:234:66:240 | PRESENT | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'PRESENT'. |
|
||||
| regexplib/email.js:1:16:1:22 | [-.\\w]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:5:24:5:35 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:5:63:5:74 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:6:10:6:35 | (?:[a-zA-Z0-9][\\.\\-\\+_]?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:25:67:25:78 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:25:106:25:117 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:25:212:25:223 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:25:251:25:262 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:32:10:32:25 | (?:\\w[\\.\\-\\+]?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:33:16:33:22 | [-.\\w]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:33:38:33:51 | ([0-9a-zA-Z])+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '00.'. |
|
||||
| regexplib/email.js:33:53:33:58 | [-\\w]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:34:24:34:35 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/email.js:34:63:34:74 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| regexplib/markup.js:3:451:3:453 | .+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a '. |
|
||||
| regexplib/markup.js:13:6:13:12 | [^"']+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!'. |
|
||||
| regexplib/markup.js:13:14:13:16 | .+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a"'. |
|
||||
| regexplib/markup.js:37:29:37:56 | [a-zA-Z0-9\|:\|\\/\|=\|-\|.\|\\?\|&]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '='. |
|
||||
| regexplib/markup.js:40:23:40:25 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| regexplib/markup.js:40:132:40:134 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' @a<""'. |
|
||||
| regexplib/markup.js:53:29:53:56 | [a-zA-Z0-9\|:\|\\/\|=\|-\|.\|\\?\|&]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '='. |
|
||||
| regexplib/markup.js:56:23:56:25 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| regexplib/markup.js:56:132:56:134 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' @a<""'. |
|
||||
| regexplib/misc.js:15:56:15:118 | (([^\\\\/:\\*\\?"\\\|<>\\. ])\|([^\\\\/:\\*\\?"\\\|<>]*[^\\\\/:\\*\\?"\\\|<>\\. ]))? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!\\\\!'. |
|
||||
| regexplib/misc.js:24:56:24:118 | (([^\\\\/:\\*\\?"\\\|<>\\. ])\|([^\\\\/:\\*\\?"\\\|<>]*[^\\\\/:\\*\\?"\\\|<>\\. ]))? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '!\\\\!'. |
|
||||
| regexplib/misc.js:79:3:79:25 | (\\/w\|\\/W\|[^<>+?$%{}&])+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/W'. |
|
||||
| regexplib/misc.js:123:17:123:19 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '9'. |
|
||||
| regexplib/misc.js:142:3:142:25 | (\\/w\|\\/W\|[^<>+?$%{}&])+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/W'. |
|
||||
| regexplib/misc.js:148:20:148:22 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
|
||||
| regexplib/misc.js:148:23:148:29 | [^"'=]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '! '. |
|
||||
| regexplib/strings.js:19:31:19:57 | [a-zæøå0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '#'. |
|
||||
| regexplib/strings.js:57:17:57:19 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '9'. |
|
||||
| regexplib/strings.js:81:17:81:19 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '9'. |
|
||||
| regexplib/uri.js:3:128:3:129 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/'. |
|
||||
| regexplib/uri.js:3:200:3:215 | (?:\\&?\\w+\\=\\w+)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0=0'. |
|
||||
| regexplib/uri.js:5:42:5:43 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\a'. |
|
||||
| regexplib/uri.js:17:42:17:43 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\a'. |
|
||||
| regexplib/uri.js:38:35:38:40 | [a-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| regexplib/uri.js:38:52:38:60 | [a-z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0a'. |
|
||||
| regexplib/uri.js:55:35:55:40 | [a-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| regexplib/uri.js:55:52:55:60 | [a-z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0a'. |
|
||||
| regexplib/uri.js:63:393:63:429 | [a-zA-Z0-9\\.\\,\\?\\'\\\\/\\+&%\\$#\\=~_\\-@]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/#'. |
|
||||
| tst.js:4:18:4:32 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
|
||||
| tst.js:4:42:4:58 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '**'. |
|
||||
| tst.js:14:14:14:15 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
|
||||
| tst.js:19:24:19:43 | (?:[^"\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
|
||||
| tst.js:19:47:19:66 | (?:[^'\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
|
||||
| tst.js:19:71:19:90 | (?:[^)\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
|
||||
| tst.js:31:54:31:55 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\|a\|\\n'. |
|
||||
| tst.js:31:54:31:55 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\|\|\\n'. |
|
||||
| tst.js:36:23:36:32 | (\\\\\\/\|.)*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\/'. |
|
||||
| tst.js:41:27:41:28 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '#'. |
|
||||
| tst.js:47:25:47:27 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '""'. |
|
||||
@@ -45,10 +63,53 @@
|
||||
| tst.js:52:70:52:72 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
|
||||
| tst.js:58:15:58:20 | [a-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| tst.js:60:43:60:54 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| tst.js:66:16:66:31 | [\\w#:.~>+()\\s-]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '#'. |
|
||||
| tst.js:66:16:66:31 | [\\w#:.~>+()\\s-]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\t'. |
|
||||
| tst.js:66:38:66:40 | .*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ']['. |
|
||||
| tst.js:71:19:71:26 | (\\\\?.)*? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\a'. |
|
||||
| tst.js:74:14:74:21 | (b\|a?b)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
|
||||
| tst.js:77:14:77:21 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| tst.js:83:14:83:20 | (.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
|
||||
| tst.js:89:25:89:32 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| tst.js:95:15:95:25 | ([^]\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '`'. |
|
||||
| tst.js:101:15:101:23 | (.\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '`'. |
|
||||
| tst.js:107:15:107:23 | (b\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
|
||||
| tst.js:110:15:110:23 | (G\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
|
||||
| tst.js:113:15:113:27 | ([0-9]\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| tst.js:116:60:116:104 | (?:\\\\[\\x00-\\x7f]\|[^\\x00-\\x08\\x0a-\\x1f\\x7f"])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\!'. |
|
||||
| tst.js:119:16:119:60 | (?:\\\\[\\x00-\\x7f]\|[^\\x00-\\x08\\x0a-\\x1f\\x7f"])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\!'. |
|
||||
| tst.js:125:15:125:28 | ([a-z]\|[d-h])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'd'. |
|
||||
| tst.js:128:15:128:30 | ([^a-z]\|[^0-9])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/'. |
|
||||
| tst.js:131:15:131:25 | (\\d\|[0-9])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| tst.js:134:15:134:22 | (\\s\|\\s)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
|
||||
| tst.js:137:15:137:21 | (\\w\|G)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
|
||||
| tst.js:143:15:143:22 | (\\d\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| tst.js:146:15:146:21 | (\\d\|5)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '5'. |
|
||||
| tst.js:149:15:149:24 | (\\s\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
|
||||
| tst.js:152:15:152:28 | (\\s\|[\\v]\|\\\\v)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000b'. |
|
||||
| tst.js:155:15:155:24 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
|
||||
| tst.js:158:15:158:22 | (\\W\|\\D)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
|
||||
| tst.js:161:15:161:22 | (\\S\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| tst.js:164:15:164:24 | (\\S\|[\\w])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| tst.js:167:15:167:27 | (1s\|[\\da-z])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '1s'. |
|
||||
| tst.js:170:15:170:23 | (0\|[\\d])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| tst.js:173:16:173:20 | [\\d]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
|
||||
| tst.js:182:17:182:21 | [^>]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '='. |
|
||||
| tst.js:185:16:185:21 | [^>a]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '='. |
|
||||
| tst.js:188:17:188:19 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
|
||||
| tst.js:191:18:191:20 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
|
||||
| tst.js:194:68:194:79 | [ a-zA-Z{}]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' A:'. |
|
||||
| tst.js:194:81:194:82 | ,? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ',A: '. |
|
||||
| tst.js:197:15:197:16 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| tst.js:197:18:197:19 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
|
||||
| tst.js:200:17:200:18 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| tst.js:203:15:203:16 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| tst.js:209:15:209:16 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
| tst.js:215:15:215:17 | \\n+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
|
||||
| tst.js:218:15:218:19 | [^X]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'W'. |
|
||||
| tst.js:221:20:221:20 | b | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'bW'. |
|
||||
| tst.js:227:20:227:20 | b | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'bW'. |
|
||||
| tst.js:239:16:239:17 | ab | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'ab'. |
|
||||
| tst.js:245:15:245:21 | [\\n\\s]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
|
||||
| tst.js:254:87:254:89 | \\w* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
|
||||
| tst.js:257:14:257:116 | (.thisisagoddamnlongstringforstresstestingthequery\|\\sthisisagoddamnlongstringforstresstestingthequery)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' thisisagoddamnlongstringforstresstestingthequery'. |
|
||||
| tst.js:260:14:260:77 | (thisisagoddamnlongstringforstresstestingthequery\|this\\w+query)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'thisisagoddamnlongstringforstresstestingthequery'. |
|
||||
|
||||
@@ -43,7 +43,7 @@ app.use(function(req, res) {
|
||||
tainted.match(/^([a-z0-9-]+)[ \t]+([a-zA-Z0-9+\/]+[=]*)([\n \t]+([^\n]+))?$/); // NOT OK - but not flagged due to lack of support for inverted character classes
|
||||
tainted.match(/^([a-z0-9-]+)[ \t]+([a-zA-Z0-9+\/]+[=]*)([ \t]+([^ \t][^\n]*[\n]*)?)?$/); // OK
|
||||
|
||||
tainted.match(/^(?:\.?[a-zA-Z_][a-zA-Z_0-9]*)+$/); // NOT OK - but not flagged
|
||||
tainted.match(/^(?:\.?[a-zA-Z_][a-zA-Z_0-9]*)+$/); // NOT OK - but only flagged by js/redos
|
||||
tainted.match(/^(?:\.?[a-zA-Z_][a-zA-Z_0-9]*)(?:\.[a-zA-Z_][a-zA-Z_0-9]*)*$/); // OK
|
||||
tainted.replaceAll(/\s*\n\s*/g, ' '); // NOT OK
|
||||
|
||||
|
||||
@@ -8,15 +8,15 @@ var bad1 = /^\b_((?:__|[\s\S])+?)_\b|^\*((?:\*\*|[\s\S])+?)\*(?!\*)/;
|
||||
// under the MIT license; see file marked-LICENSE.
|
||||
var good1 = /^\b_((?:__|[^_])+?)_\b|^\*((?:\*\*|[^*])+?)\*(?!\*)/;
|
||||
|
||||
// NOT GOOD
|
||||
// GOOD - there is no witness in the end that could cause the regexp to not match
|
||||
// Adapted from brace-expansion (https://github.com/juliangruber/brace-expansion),
|
||||
// which is licensed under the MIT license; see file brace-expansion-LICENSE.
|
||||
var bad2 = /(.*,)+.+/;
|
||||
var good2 = /(.*,)+.+/;
|
||||
|
||||
// NOT GOOD; attack: " '" + "\\\\".repeat(100)
|
||||
// Adapted from CodeMirror (https://github.com/codemirror/codemirror),
|
||||
// which is licensed under the MIT license; see file CodeMirror-LICENSE.
|
||||
var bad3 = /^(?:\s+(?:"(?:[^"\\]|\\\\|\\.)+"|'(?:[^'\\]|\\\\|\\.)+'|\((?:[^)\\]|\\\\|\\.)+\)))?/;
|
||||
var bad2 = /^(?:\s+(?:"(?:[^"\\]|\\\\|\\.)+"|'(?:[^'\\]|\\\\|\\.)+'|\((?:[^)\\]|\\\\|\\.)+\)))?/;
|
||||
|
||||
// GOOD
|
||||
// Adapted from lulucms2 (https://github.com/yiifans/lulucms2).
|
||||
@@ -90,3 +90,180 @@ var bad17 = new RegExp('(a|aa?)*b');
|
||||
|
||||
// GOOD - not used as regexp
|
||||
var good9 = '(a|aa?)*b';
|
||||
|
||||
// NOT GOOD
|
||||
var bad18 = /(([^]|[^a])*)"/;
|
||||
|
||||
// GOOD - there is no witness in the end that could cause the regexp to not match
|
||||
var good10 = /([^"']+)*/g;
|
||||
|
||||
// NOT GOOD
|
||||
var bad20 = /((.|[^a])*)"/;
|
||||
|
||||
// GOOD
|
||||
var good10 = /((a|[^a])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad21 = /((b|[^a])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad22 = /((G|[^a])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad23 = /(([0-9]|[^a])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad24 = /(?:=(?:([!#\$%&'\*\+\-\.\^_`\|~0-9A-Za-z]+)|"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"])*)"))?/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad25 = /"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"])*)"/;
|
||||
|
||||
// GOOD
|
||||
var bad26 = /"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"\\])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad27 = /(([a-z]|[d-h])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad27 = /(([^a-z]|[^0-9])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad28 = /((\d|[0-9])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad29 = /((\s|\s)*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad30 = /((\w|G)*)"/;
|
||||
|
||||
// GOOD
|
||||
var good11 = /((\s|\d)*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad31 = /((\d|\w)*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad32 = /((\d|5)*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad33 = /((\s|[\f])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad34 = /((\s|[\v]|\\v)*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad35 = /((\f|[\f])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad36 = /((\W|\D)*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad37 = /((\S|\w)*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad38 = /((\S|[\w])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad39 = /((1s|[\da-z])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad40 = /((0|[\d])*)"/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad41 = /(([\d]+)*)"/;
|
||||
|
||||
// GOOD - there is no witness in the end that could cause the regexp to not match
|
||||
var good12 = /(\d+(X\d+)?)+/;
|
||||
|
||||
// GOOD - there is no witness in the end that could cause the regexp to not match
|
||||
var good13 = /([0-9]+(X[0-9]*)?)*/;
|
||||
|
||||
// GOOD - but still flagged (always matches something)
|
||||
var good15 = /^([^>]+)*(>|$)/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad43 = /^([^>a]+)*(>|$)/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad44 = /(\n\s*)+$/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad45 = /^(?:\s+|#.*|\(\?#[^)]*\))*(?:[?*+]|{\d+(?:,\d*)?})/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad46 = /\{\[\s*([a-zA-Z]+)\(([a-zA-Z]+)\)((\s*([a-zA-Z]+)\: ?([ a-zA-Z{}]+),?)+)*\s*\]\}/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad47 = /(a+|b+|c+)*c/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad48 = /(((a+a?)*)+b+)/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad49 = /(a+)+bbbb/;
|
||||
|
||||
// GOOD
|
||||
var good16 = /(a+)+aaaaa*a+/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad50 = /(a+)+aaaaa$/;
|
||||
|
||||
// GOOD
|
||||
var good17 = /(\n+)+\n\n/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad51 = /(\n+)+\n\n$/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad52 = /([^X]+)*$/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad53 = /(([^X]b)+)*$/;
|
||||
|
||||
// GOOD
|
||||
var good18 = /(([^X]b)+)*($|[^X]b)/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad54 = /(([^X]b)+)*($|[^X]c)/;
|
||||
|
||||
// GOOD
|
||||
var good20 = /((ab)+)*ababab/;
|
||||
|
||||
// GOOD
|
||||
var good21 = /((ab)+)*abab(ab)*(ab)+/;
|
||||
|
||||
// GOOD
|
||||
var good22 = /((ab)+)*/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad55 = /((ab)+)*$/;
|
||||
|
||||
// GOOD
|
||||
var good23 = /((ab)+)*[a1][b1][a2][b2][a3][b3]/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad56 = /([\n\s]+)*(.)/;
|
||||
|
||||
// GOOD - any witness passes through the accept state.
|
||||
var good24 = /(A*A*X)*/;
|
||||
|
||||
// GOOD
|
||||
var good26 = /([^\\\]]+)*/
|
||||
|
||||
// NOT GOOD
|
||||
var bad59 = /(\w*foobarbaz\w*foobarbaz\w*foobarbaz\w*foobarbaz\s*foobarbaz\d*foobarbaz\w*)+-/;
|
||||
|
||||
// NOT GOOD
|
||||
var bad60 = /(.thisisagoddamnlongstringforstresstestingthequery|\sthisisagoddamnlongstringforstresstestingthequery)*-/
|
||||
|
||||
// NOT GOOD
|
||||
var bad61 = /(thisisagoddamnlongstringforstresstestingthequery|this\w+query)*-/
|
||||
|
||||
// GOOD
|
||||
var good27 = /(thisisagoddamnlongstringforstresstestingthequery|imanotherbutunrelatedstringcomparedtotheotherstring)*-/
|
||||
|
||||
// GOOD
|
||||
var good28 = /foo([\uDC66\uDC67]|[\uDC68\uDC69])*foo/
|
||||
|
||||
// GOOD
|
||||
var good29 = /foo((\uDC66|\uDC67)|(\uDC68|\uDC69))*foo/
|
||||
Reference in New Issue
Block a user