move predicates that depend on isReDoSCandidate into a ReDoSPruning module

This commit is contained in:
Erik Krogh Kristensen
2022-02-14 13:11:49 +01:00
parent 3248f7b423
commit dc06e9df02
12 changed files with 1272 additions and 1264 deletions

View File

@@ -14,45 +14,6 @@
import ReDoSUtilSpecific import ReDoSUtilSpecific
/**
* A configuration for which parts of a regular expression should be considered relevant for
* the different predicates in `ReDoS.qll`.
* Used to adjust the computations for either superlinear or exponential backtracking.
*/
abstract class ReDoSConfiguration extends string {
bindingset[this]
ReDoSConfiguration() { any() }
/**
* Holds if `state` with the pump string `pump` is a candidate for a
* ReDoS vulnerable state.
* This is used to determine which states are considered for the prefix/suffix construction.
*/
abstract predicate isReDoSCandidate(State state, string pump);
}
/**
* Holds if repeating `pump` starting at `state` is a candidate for causing backtracking.
* No check whether a rejected suffix exists has been made.
*/
private predicate isReDoSCandidate(State state, string pump) {
any(ReDoSConfiguration conf).isReDoSCandidate(state, pump) and
(
not any(ReDoSConfiguration conf).isReDoSCandidate(epsilonSucc+(state), _)
or
epsilonSucc+(state) = state and
state =
max(State s, Location l |
s = epsilonSucc+(state) and
l = s.getRepr().getLocation() and
any(ReDoSConfiguration conf).isReDoSCandidate(s, _) and
s.getRepr() instanceof InfiniteRepetitionQuantifier
|
s order by l.getStartLine(), l.getStartColumn(), l.getEndColumn(), l.getEndLine()
)
)
}
/** /**
* Gets the char after `c` (from a simplified ASCII table). * Gets the char after `c` (from a simplified ASCII table).
*/ */
@@ -877,266 +838,341 @@ predicate isStartState(State state) {
} }
/** /**
* Predicates for constructing a prefix string that leads to a given state. * A configuration for which parts of a regular expression should be considered relevant for
* the different predicates in `ReDoS.qll`.
* Used to adjust the computations for either superlinear or exponential backtracking.
*/ */
private module PrefixConstruction { abstract class ReDoSConfiguration extends string {
/** bindingset[this]
* Holds if `state` is the textually last start state for the regular expression. ReDoSConfiguration() { any() }
*/
private predicate lastStartState(State state) {
exists(RegExpRoot root |
state =
max(StateInPumpableRegexp s, Location l |
isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
|
s
order by
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
l.getEndLine()
)
)
}
/** /**
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`. * Holds if `state` with the pump string `pump` is a candidate for a
* ReDoS vulnerable state.
* This is used to determine which states are considered for the prefix/suffix construction.
*/ */
private predicate existsTransition(State a, State b) { delta(a, _, b) } abstract predicate isReDoSCandidate(State state, string pump);
/**
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
*/
int prefixLength(State start, State state) =
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
/**
* Gets the minimum number of transitions it takes to reach `state` from the start state.
*/
private int lengthFromStart(State state) { result = prefixLength(_, state) }
/**
* Gets a string for which the regular expression will reach `state`.
*
* Has at most one result for any given `state`.
* This predicate will not always have a result even if there is a ReDoS issue in
* the regular expression.
*/
string prefix(State state) {
lastStartState(state) and
result = ""
or
// the search stops past the last redos candidate state.
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
exists(State prev |
// select a unique predecessor (by an arbitrary measure)
prev =
min(State s, Location loc |
lengthFromStart(s) = lengthFromStart(state) - 1 and
loc = s.getRepr().getLocation() and
delta(s, _, state)
|
s
order by
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
s.getRepr().toString()
)
|
// greedy search for the shortest prefix
result = prefix(prev) and delta(prev, Epsilon(), state)
or
not delta(prev, Epsilon(), state) and
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
)
}
/**
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
*/
private string getCanonicalEdgeChar(State prev, State next) {
result =
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
}
/**
* A state within a regular expression that has a pumpable state.
*/
class StateInPumpableRegexp extends State {
pragma[noinline]
StateInPumpableRegexp() {
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(this.getRepr()))
}
}
} }
/** module ReDoSPruning {
* Predicates for testing the presence of a rejecting suffix.
*
* These predicates are used to ensure that the all states reached from the fork
* by repeating `w` have a rejecting suffix.
*
* For example, a regexp like `/^(a+)+/` will accept any string as long the prefix is
* some number of `"a"`s, and it is therefore not possible to construct a rejecting suffix.
*
* A regexp like `/(a+)+$/` or `/(a+)+b/` trivially has a rejecting suffix,
* as the suffix "X" will cause both the regular expressions to be rejected.
*
* The string `w` is repeated any number of times because it needs to be
* infinitely repeatedable for the attack to work.
* For the regular expression `/((ab)+)*abab/` the accepting state is not reachable from the fork
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
*/
private module SuffixConstruction {
import PrefixConstruction
/** /**
* Holds if all states reachable from `fork` by repeating `w` * Holds if repeating `pump` starting at `state` is a candidate for causing backtracking.
* are likely rejectable by appending some suffix. * No check whether a rejected suffix exists has been made.
*/ */
predicate reachesOnlyRejectableSuffixes(State fork, string w) { private predicate isReDoSCandidate(State state, string pump) {
isReDoSCandidate(fork, w) and any(ReDoSConfiguration conf).isReDoSCandidate(state, pump) and
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
}
/**
* Holds if there likely exists a suffix starting from `s` that leads to the regular expression being rejected.
* This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs.
*/
pragma[noinline]
private predicate isLikelyRejectable(StateInPumpableRegexp s) {
// exists a reject edge with some char.
hasRejectEdge(s)
or
hasEdgeToLikelyRejectable(s)
or
// stopping here is rejection
isRejectState(s)
}
/**
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
*/
predicate isRejectState(StateInPumpableRegexp s) { not epsilonSucc*(s) = Accept(_) }
/**
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
*/
pragma[noopt]
predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
// all edges (at least one) with some char leads to another state that is rejectable.
// the `next` states might not share a common suffix, which can cause FPs.
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
// noopt to force `hasEdgeToLikelyRejectableHelper` to be first in the join-order.
exists(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) and
forall(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
)
}
/**
* Gets a char for there exists a transition away from `s`,
* and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`.
*/
pragma[noinline]
private string hasEdgeToLikelyRejectableHelper(StateInPumpableRegexp s) {
not hasRejectEdge(s) and
not isRejectState(s) and
deltaClosedChar(s, result, _)
}
/**
* Holds if there is a state `next` that can be reached from `prev`
* along epsilon edges, such that there is a transition from
* `prev` to `next` that the character symbol `char`.
*/
predicate deltaClosedChar(StateInPumpableRegexp prev, string char, StateInPumpableRegexp next) {
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
}
pragma[noinline]
InputSymbol getAnInputSymbolMatchingRelevant(string char) {
char = relevant(_) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets a char used for finding possible suffixes inside `root`.
*/
pragma[noinline]
private string relevant(RegExpRoot root) {
exists(ascii(result)) and exists(root)
or
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
or
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
result = ["|", "\n", "Z"] and exists(root)
}
/**
* Holds if there exists a `char` such that there is no edge from `s` labeled `char` in our NFA.
* The NFA does not model reject states, so the above is the same as saying there is a reject edge.
*/
private predicate hasRejectEdge(State s) {
hasSimpleRejectEdge(s)
or
not hasSimpleRejectEdge(s) and
exists(string char | char = relevant(getRoot(s.getRepr())) | not deltaClosedChar(s, char, _))
}
/**
* Holds if there is no edge from `s` labeled with "|", "\n", or "Z" in our NFA.
* This predicate is used as a cheap pre-processing to speed up `hasRejectEdge`.
*/
private predicate hasSimpleRejectEdge(State s) {
// The three chars were chosen arbitrarily. The three chars must be kept in sync with `relevant`.
exists(string char | char = ["|", "\n", "Z"] | not deltaClosedChar(s, char, _))
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
*/
pragma[noopt]
private State process(State fork, string w, int i) {
exists(State prev | prev = getProcessPrevious(fork, i, w) |
exists(string char, InputSymbol sym |
char = w.charAt(i) and
deltaClosed(prev, sym, result) and
// noopt to prevent joining `prev` with all possible `chars` that could transition away from `prev`.
// Instead only join with the set of `chars` where a relevant `InputSymbol` has already been found.
sym = getAProcessInputSymbol(char)
)
)
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i` characters of `w`.
*/
private State getProcessPrevious(State fork, int i, string w) {
isReDoSCandidate(fork, w) and
( (
i = 0 and result = fork not any(ReDoSConfiguration conf).isReDoSCandidate(epsilonSucc+(state), _)
or or
result = process(fork, w, i - 1) epsilonSucc+(state) = state and
or state =
// repeat until fixpoint max(State s, Location l |
i = 0 and s = epsilonSucc+(state) and
result = process(fork, w, w.length() - 1) l = s.getRepr().getLocation() and
any(ReDoSConfiguration conf).isReDoSCandidate(s, _) and
s.getRepr() instanceof InfiniteRepetitionQuantifier
|
s order by l.getStartLine(), l.getStartColumn(), l.getEndColumn(), l.getEndLine()
)
) )
} }
/** /**
* Gets an InputSymbol that matches `char`. * Predicates for constructing a prefix string that leads to a given state.
* The predicate is specialized to only have a result for the `char`s that are relevant for the `process` predicate.
*/ */
private InputSymbol getAProcessInputSymbol(string char) { private module PrefixConstruction {
char = getAProcessChar() and /**
result = getAnInputSymbolMatching(char) * Holds if `state` is the textually last start state for the regular expression.
*/
private predicate lastStartState(State state) {
exists(RegExpRoot root |
state =
max(StateInPumpableRegexp s, Location l |
isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
|
s
order by
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
l.getEndLine()
)
)
}
/**
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
*/
private predicate existsTransition(State a, State b) { delta(a, _, b) }
/**
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
*/
int prefixLength(State start, State state) =
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
/**
* Gets the minimum number of transitions it takes to reach `state` from the start state.
*/
private int lengthFromStart(State state) { result = prefixLength(_, state) }
/**
* Gets a string for which the regular expression will reach `state`.
*
* Has at most one result for any given `state`.
* This predicate will not always have a result even if there is a ReDoS issue in
* the regular expression.
*/
string prefix(State state) {
lastStartState(state) and
result = ""
or
// the search stops past the last redos candidate state.
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
exists(State prev |
// select a unique predecessor (by an arbitrary measure)
prev =
min(State s, Location loc |
lengthFromStart(s) = lengthFromStart(state) - 1 and
loc = s.getRepr().getLocation() and
delta(s, _, state)
|
s
order by
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
s.getRepr().toString()
)
|
// greedy search for the shortest prefix
result = prefix(prev) and delta(prev, Epsilon(), state)
or
not delta(prev, Epsilon(), state) and
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
)
}
/**
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
*/
private string getCanonicalEdgeChar(State prev, State next) {
result =
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
}
/**
* A state within a regular expression that has a pumpable state.
*/
class StateInPumpableRegexp extends State {
pragma[noinline]
StateInPumpableRegexp() {
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(this.getRepr()))
}
}
} }
/** /**
* Gets a `char` that occurs in a `pump` string. * Predicates for testing the presence of a rejecting suffix.
*
* These predicates are used to ensure that the all states reached from the fork
* by repeating `w` have a rejecting suffix.
*
* For example, a regexp like `/^(a+)+/` will accept any string as long the prefix is
* some number of `"a"`s, and it is therefore not possible to construct a rejecting suffix.
*
* A regexp like `/(a+)+$/` or `/(a+)+b/` trivially has a rejecting suffix,
* as the suffix "X" will cause both the regular expressions to be rejected.
*
* The string `w` is repeated any number of times because it needs to be
* infinitely repeatedable for the attack to work.
* For the regular expression `/((ab)+)*abab/` the accepting state is not reachable from the fork
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
*/ */
private string getAProcessChar() { result = any(string s | isReDoSCandidate(_, s)).charAt(_) } private module SuffixConstruction {
import PrefixConstruction
/**
* Holds if all states reachable from `fork` by repeating `w`
* are likely rejectable by appending some suffix.
*/
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
isReDoSCandidate(fork, w) and
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
}
/**
* Holds if there likely exists a suffix starting from `s` that leads to the regular expression being rejected.
* This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs.
*/
pragma[noinline]
private predicate isLikelyRejectable(StateInPumpableRegexp s) {
// exists a reject edge with some char.
hasRejectEdge(s)
or
hasEdgeToLikelyRejectable(s)
or
// stopping here is rejection
isRejectState(s)
}
/**
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
*/
predicate isRejectState(StateInPumpableRegexp s) { not epsilonSucc*(s) = Accept(_) }
/**
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
*/
pragma[noopt]
predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
// all edges (at least one) with some char leads to another state that is rejectable.
// the `next` states might not share a common suffix, which can cause FPs.
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
// noopt to force `hasEdgeToLikelyRejectableHelper` to be first in the join-order.
exists(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) and
forall(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
)
}
/**
* Gets a char for there exists a transition away from `s`,
* and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`.
*/
pragma[noinline]
private string hasEdgeToLikelyRejectableHelper(StateInPumpableRegexp s) {
not hasRejectEdge(s) and
not isRejectState(s) and
deltaClosedChar(s, result, _)
}
/**
* Holds if there is a state `next` that can be reached from `prev`
* along epsilon edges, such that there is a transition from
* `prev` to `next` that the character symbol `char`.
*/
predicate deltaClosedChar(StateInPumpableRegexp prev, string char, StateInPumpableRegexp next) {
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
}
pragma[noinline]
InputSymbol getAnInputSymbolMatchingRelevant(string char) {
char = relevant(_) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets a char used for finding possible suffixes inside `root`.
*/
pragma[noinline]
private string relevant(RegExpRoot root) {
exists(ascii(result)) and exists(root)
or
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
or
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
result = ["|", "\n", "Z"] and exists(root)
}
/**
* Holds if there exists a `char` such that there is no edge from `s` labeled `char` in our NFA.
* The NFA does not model reject states, so the above is the same as saying there is a reject edge.
*/
private predicate hasRejectEdge(State s) {
hasSimpleRejectEdge(s)
or
not hasSimpleRejectEdge(s) and
exists(string char | char = relevant(getRoot(s.getRepr())) | not deltaClosedChar(s, char, _))
}
/**
* Holds if there is no edge from `s` labeled with "|", "\n", or "Z" in our NFA.
* This predicate is used as a cheap pre-processing to speed up `hasRejectEdge`.
*/
private predicate hasSimpleRejectEdge(State s) {
// The three chars were chosen arbitrarily. The three chars must be kept in sync with `relevant`.
exists(string char | char = ["|", "\n", "Z"] | not deltaClosedChar(s, char, _))
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
*/
pragma[noopt]
private State process(State fork, string w, int i) {
exists(State prev | prev = getProcessPrevious(fork, i, w) |
exists(string char, InputSymbol sym |
char = w.charAt(i) and
deltaClosed(prev, sym, result) and
// noopt to prevent joining `prev` with all possible `chars` that could transition away from `prev`.
// Instead only join with the set of `chars` where a relevant `InputSymbol` has already been found.
sym = getAProcessInputSymbol(char)
)
)
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i` characters of `w`.
*/
private State getProcessPrevious(State fork, int i, string w) {
isReDoSCandidate(fork, w) and
(
i = 0 and result = fork
or
result = process(fork, w, i - 1)
or
// repeat until fixpoint
i = 0 and
result = process(fork, w, w.length() - 1)
)
}
/**
* Gets an InputSymbol that matches `char`.
* The predicate is specialized to only have a result for the `char`s that are relevant for the `process` predicate.
*/
private InputSymbol getAProcessInputSymbol(string char) {
char = getAProcessChar() and
result = getAnInputSymbolMatching(char)
}
/**
* Gets a `char` that occurs in a `pump` string.
*/
private string getAProcessChar() { result = any(string s | isReDoSCandidate(_, s)).charAt(_) }
}
/**
* Holds if `term` may cause superlinear backtracking on strings containing many repetitions of `pump`.
* Gets the shortest string that causes superlinear backtracking.
*/
private predicate isReDoSAttackable(RegExpTerm term, string pump, State s) {
exists(int i, string c | s = Match(term, i) |
c =
min(string w |
any(ReDoSConfiguration conf).isReDoSCandidate(s, w) and
SuffixConstruction::reachesOnlyRejectableSuffixes(s, w)
|
w order by w.length(), w
) and
pump = escape(rotate(c, i))
)
}
/**
* Holds if the state `s` (represented by the term `t`) can have backtracking with repetitions of `pump`.
*
* `prefixMsg` contains a friendly message for a prefix that reaches `s` (or `prefixMsg` is the empty string if the prefix is empty or if no prefix could be found).
*/
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
isReDoSAttackable(t, pump, s) and
(
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
not PrefixConstruction::prefix(s) = ""
or
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
or
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
)
}
} }
/** /**
@@ -1162,37 +1198,3 @@ bindingset[str, i]
private string rotate(string str, int i) { private string rotate(string str, int i) {
result = str.suffix(str.length() - i) + str.prefix(str.length() - i) result = str.suffix(str.length() - i) + str.prefix(str.length() - i)
} }
/**
* Holds if `term` may cause superlinear backtracking on strings containing many repetitions of `pump`.
* Gets the shortest string that causes superlinear backtracking.
*/
private predicate isReDoSAttackable(RegExpTerm term, string pump, State s) {
exists(int i, string c | s = Match(term, i) |
c =
min(string w |
any(ReDoSConfiguration conf).isReDoSCandidate(s, w) and
SuffixConstruction::reachesOnlyRejectableSuffixes(s, w)
|
w order by w.length(), w
) and
pump = escape(rotate(c, i))
)
}
/**
* Holds if the state `s` (represented by the term `t`) can have backtracking with repetitions of `pump`.
*
* `prefixMsg` contains a friendly message for a prefix that reaches `s` (or `prefixMsg` is the empty string if the prefix is empty or if no prefix could be found).
*/
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
isReDoSAttackable(t, pump, s) and
(
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
not PrefixConstruction::prefix(s) = ""
or
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
or
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
)
}

View File

@@ -391,7 +391,7 @@ predicate isPumpable(State pivot, State succ, string pump) {
*/ */
predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) { predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
exists(State s, State pivot | exists(State s, State pivot |
hasReDoSResult(t, pump, s, prefixMsg) and ReDoSPruning::hasReDoSResult(t, pump, s, prefixMsg) and
isPumpable(pivot, s, _) and isPumpable(pivot, s, _) and
prev = pivot.getRepr() prev = pivot.getRepr()
) )

View File

@@ -11,7 +11,7 @@ class HasExpRedos extends InlineExpectationsTest {
override predicate hasActualResult(Location location, string element, string tag, string value) { override predicate hasActualResult(Location location, string element, string tag, string value) {
tag = "hasExpRedos" and tag = "hasExpRedos" and
exists(RegExpTerm t, string pump, State s, string prefixMsg | exists(RegExpTerm t, string pump, State s, string prefixMsg |
hasReDoSResult(t, pump, s, prefixMsg) and ReDoSPruning::hasReDoSResult(t, pump, s, prefixMsg) and
not t.getRegex().getAMode() = "VERBOSE" and not t.getRegex().getAMode() = "VERBOSE" and
value = "" and value = "" and
location = t.getLocation() and location = t.getLocation() and

View File

@@ -14,45 +14,6 @@
import ReDoSUtilSpecific import ReDoSUtilSpecific
/**
* A configuration for which parts of a regular expression should be considered relevant for
* the different predicates in `ReDoS.qll`.
* Used to adjust the computations for either superlinear or exponential backtracking.
*/
abstract class ReDoSConfiguration extends string {
bindingset[this]
ReDoSConfiguration() { any() }
/**
* Holds if `state` with the pump string `pump` is a candidate for a
* ReDoS vulnerable state.
* This is used to determine which states are considered for the prefix/suffix construction.
*/
abstract predicate isReDoSCandidate(State state, string pump);
}
/**
* Holds if repeating `pump` starting at `state` is a candidate for causing backtracking.
* No check whether a rejected suffix exists has been made.
*/
private predicate isReDoSCandidate(State state, string pump) {
any(ReDoSConfiguration conf).isReDoSCandidate(state, pump) and
(
not any(ReDoSConfiguration conf).isReDoSCandidate(epsilonSucc+(state), _)
or
epsilonSucc+(state) = state and
state =
max(State s, Location l |
s = epsilonSucc+(state) and
l = s.getRepr().getLocation() and
any(ReDoSConfiguration conf).isReDoSCandidate(s, _) and
s.getRepr() instanceof InfiniteRepetitionQuantifier
|
s order by l.getStartLine(), l.getStartColumn(), l.getEndColumn(), l.getEndLine()
)
)
}
/** /**
* Gets the char after `c` (from a simplified ASCII table). * Gets the char after `c` (from a simplified ASCII table).
*/ */
@@ -877,266 +838,341 @@ predicate isStartState(State state) {
} }
/** /**
* Predicates for constructing a prefix string that leads to a given state. * A configuration for which parts of a regular expression should be considered relevant for
* the different predicates in `ReDoS.qll`.
* Used to adjust the computations for either superlinear or exponential backtracking.
*/ */
private module PrefixConstruction { abstract class ReDoSConfiguration extends string {
/** bindingset[this]
* Holds if `state` is the textually last start state for the regular expression. ReDoSConfiguration() { any() }
*/
private predicate lastStartState(State state) {
exists(RegExpRoot root |
state =
max(StateInPumpableRegexp s, Location l |
isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
|
s
order by
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
l.getEndLine()
)
)
}
/** /**
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`. * Holds if `state` with the pump string `pump` is a candidate for a
* ReDoS vulnerable state.
* This is used to determine which states are considered for the prefix/suffix construction.
*/ */
private predicate existsTransition(State a, State b) { delta(a, _, b) } abstract predicate isReDoSCandidate(State state, string pump);
/**
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
*/
int prefixLength(State start, State state) =
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
/**
* Gets the minimum number of transitions it takes to reach `state` from the start state.
*/
private int lengthFromStart(State state) { result = prefixLength(_, state) }
/**
* Gets a string for which the regular expression will reach `state`.
*
* Has at most one result for any given `state`.
* This predicate will not always have a result even if there is a ReDoS issue in
* the regular expression.
*/
string prefix(State state) {
lastStartState(state) and
result = ""
or
// the search stops past the last redos candidate state.
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
exists(State prev |
// select a unique predecessor (by an arbitrary measure)
prev =
min(State s, Location loc |
lengthFromStart(s) = lengthFromStart(state) - 1 and
loc = s.getRepr().getLocation() and
delta(s, _, state)
|
s
order by
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
s.getRepr().toString()
)
|
// greedy search for the shortest prefix
result = prefix(prev) and delta(prev, Epsilon(), state)
or
not delta(prev, Epsilon(), state) and
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
)
}
/**
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
*/
private string getCanonicalEdgeChar(State prev, State next) {
result =
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
}
/**
* A state within a regular expression that has a pumpable state.
*/
class StateInPumpableRegexp extends State {
pragma[noinline]
StateInPumpableRegexp() {
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(this.getRepr()))
}
}
} }
/** module ReDoSPruning {
* Predicates for testing the presence of a rejecting suffix.
*
* These predicates are used to ensure that the all states reached from the fork
* by repeating `w` have a rejecting suffix.
*
* For example, a regexp like `/^(a+)+/` will accept any string as long the prefix is
* some number of `"a"`s, and it is therefore not possible to construct a rejecting suffix.
*
* A regexp like `/(a+)+$/` or `/(a+)+b/` trivially has a rejecting suffix,
* as the suffix "X" will cause both the regular expressions to be rejected.
*
* The string `w` is repeated any number of times because it needs to be
* infinitely repeatedable for the attack to work.
* For the regular expression `/((ab)+)*abab/` the accepting state is not reachable from the fork
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
*/
private module SuffixConstruction {
import PrefixConstruction
/** /**
* Holds if all states reachable from `fork` by repeating `w` * Holds if repeating `pump` starting at `state` is a candidate for causing backtracking.
* are likely rejectable by appending some suffix. * No check whether a rejected suffix exists has been made.
*/ */
predicate reachesOnlyRejectableSuffixes(State fork, string w) { private predicate isReDoSCandidate(State state, string pump) {
isReDoSCandidate(fork, w) and any(ReDoSConfiguration conf).isReDoSCandidate(state, pump) and
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
}
/**
* Holds if there likely exists a suffix starting from `s` that leads to the regular expression being rejected.
* This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs.
*/
pragma[noinline]
private predicate isLikelyRejectable(StateInPumpableRegexp s) {
// exists a reject edge with some char.
hasRejectEdge(s)
or
hasEdgeToLikelyRejectable(s)
or
// stopping here is rejection
isRejectState(s)
}
/**
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
*/
predicate isRejectState(StateInPumpableRegexp s) { not epsilonSucc*(s) = Accept(_) }
/**
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
*/
pragma[noopt]
predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
// all edges (at least one) with some char leads to another state that is rejectable.
// the `next` states might not share a common suffix, which can cause FPs.
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
// noopt to force `hasEdgeToLikelyRejectableHelper` to be first in the join-order.
exists(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) and
forall(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
)
}
/**
* Gets a char for there exists a transition away from `s`,
* and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`.
*/
pragma[noinline]
private string hasEdgeToLikelyRejectableHelper(StateInPumpableRegexp s) {
not hasRejectEdge(s) and
not isRejectState(s) and
deltaClosedChar(s, result, _)
}
/**
* Holds if there is a state `next` that can be reached from `prev`
* along epsilon edges, such that there is a transition from
* `prev` to `next` that the character symbol `char`.
*/
predicate deltaClosedChar(StateInPumpableRegexp prev, string char, StateInPumpableRegexp next) {
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
}
pragma[noinline]
InputSymbol getAnInputSymbolMatchingRelevant(string char) {
char = relevant(_) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets a char used for finding possible suffixes inside `root`.
*/
pragma[noinline]
private string relevant(RegExpRoot root) {
exists(ascii(result)) and exists(root)
or
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
or
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
result = ["|", "\n", "Z"] and exists(root)
}
/**
* Holds if there exists a `char` such that there is no edge from `s` labeled `char` in our NFA.
* The NFA does not model reject states, so the above is the same as saying there is a reject edge.
*/
private predicate hasRejectEdge(State s) {
hasSimpleRejectEdge(s)
or
not hasSimpleRejectEdge(s) and
exists(string char | char = relevant(getRoot(s.getRepr())) | not deltaClosedChar(s, char, _))
}
/**
* Holds if there is no edge from `s` labeled with "|", "\n", or "Z" in our NFA.
* This predicate is used as a cheap pre-processing to speed up `hasRejectEdge`.
*/
private predicate hasSimpleRejectEdge(State s) {
// The three chars were chosen arbitrarily. The three chars must be kept in sync with `relevant`.
exists(string char | char = ["|", "\n", "Z"] | not deltaClosedChar(s, char, _))
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
*/
pragma[noopt]
private State process(State fork, string w, int i) {
exists(State prev | prev = getProcessPrevious(fork, i, w) |
exists(string char, InputSymbol sym |
char = w.charAt(i) and
deltaClosed(prev, sym, result) and
// noopt to prevent joining `prev` with all possible `chars` that could transition away from `prev`.
// Instead only join with the set of `chars` where a relevant `InputSymbol` has already been found.
sym = getAProcessInputSymbol(char)
)
)
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i` characters of `w`.
*/
private State getProcessPrevious(State fork, int i, string w) {
isReDoSCandidate(fork, w) and
( (
i = 0 and result = fork not any(ReDoSConfiguration conf).isReDoSCandidate(epsilonSucc+(state), _)
or or
result = process(fork, w, i - 1) epsilonSucc+(state) = state and
or state =
// repeat until fixpoint max(State s, Location l |
i = 0 and s = epsilonSucc+(state) and
result = process(fork, w, w.length() - 1) l = s.getRepr().getLocation() and
any(ReDoSConfiguration conf).isReDoSCandidate(s, _) and
s.getRepr() instanceof InfiniteRepetitionQuantifier
|
s order by l.getStartLine(), l.getStartColumn(), l.getEndColumn(), l.getEndLine()
)
) )
} }
/** /**
* Gets an InputSymbol that matches `char`. * Predicates for constructing a prefix string that leads to a given state.
* The predicate is specialized to only have a result for the `char`s that are relevant for the `process` predicate.
*/ */
private InputSymbol getAProcessInputSymbol(string char) { private module PrefixConstruction {
char = getAProcessChar() and /**
result = getAnInputSymbolMatching(char) * Holds if `state` is the textually last start state for the regular expression.
*/
private predicate lastStartState(State state) {
exists(RegExpRoot root |
state =
max(StateInPumpableRegexp s, Location l |
isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
|
s
order by
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
l.getEndLine()
)
)
}
/**
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
*/
private predicate existsTransition(State a, State b) { delta(a, _, b) }
/**
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
*/
int prefixLength(State start, State state) =
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
/**
* Gets the minimum number of transitions it takes to reach `state` from the start state.
*/
private int lengthFromStart(State state) { result = prefixLength(_, state) }
/**
* Gets a string for which the regular expression will reach `state`.
*
* Has at most one result for any given `state`.
* This predicate will not always have a result even if there is a ReDoS issue in
* the regular expression.
*/
string prefix(State state) {
lastStartState(state) and
result = ""
or
// the search stops past the last redos candidate state.
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
exists(State prev |
// select a unique predecessor (by an arbitrary measure)
prev =
min(State s, Location loc |
lengthFromStart(s) = lengthFromStart(state) - 1 and
loc = s.getRepr().getLocation() and
delta(s, _, state)
|
s
order by
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
s.getRepr().toString()
)
|
// greedy search for the shortest prefix
result = prefix(prev) and delta(prev, Epsilon(), state)
or
not delta(prev, Epsilon(), state) and
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
)
}
/**
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
*/
private string getCanonicalEdgeChar(State prev, State next) {
result =
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
}
/**
* A state within a regular expression that has a pumpable state.
*/
class StateInPumpableRegexp extends State {
pragma[noinline]
StateInPumpableRegexp() {
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(this.getRepr()))
}
}
} }
/** /**
* Gets a `char` that occurs in a `pump` string. * Predicates for testing the presence of a rejecting suffix.
*
* These predicates are used to ensure that the all states reached from the fork
* by repeating `w` have a rejecting suffix.
*
* For example, a regexp like `/^(a+)+/` will accept any string as long the prefix is
* some number of `"a"`s, and it is therefore not possible to construct a rejecting suffix.
*
* A regexp like `/(a+)+$/` or `/(a+)+b/` trivially has a rejecting suffix,
* as the suffix "X" will cause both the regular expressions to be rejected.
*
* The string `w` is repeated any number of times because it needs to be
* infinitely repeatedable for the attack to work.
* For the regular expression `/((ab)+)*abab/` the accepting state is not reachable from the fork
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
*/ */
private string getAProcessChar() { result = any(string s | isReDoSCandidate(_, s)).charAt(_) } private module SuffixConstruction {
import PrefixConstruction
/**
* Holds if all states reachable from `fork` by repeating `w`
* are likely rejectable by appending some suffix.
*/
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
isReDoSCandidate(fork, w) and
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
}
/**
* Holds if there likely exists a suffix starting from `s` that leads to the regular expression being rejected.
* This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs.
*/
pragma[noinline]
private predicate isLikelyRejectable(StateInPumpableRegexp s) {
// exists a reject edge with some char.
hasRejectEdge(s)
or
hasEdgeToLikelyRejectable(s)
or
// stopping here is rejection
isRejectState(s)
}
/**
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
*/
predicate isRejectState(StateInPumpableRegexp s) { not epsilonSucc*(s) = Accept(_) }
/**
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
*/
pragma[noopt]
predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
// all edges (at least one) with some char leads to another state that is rejectable.
// the `next` states might not share a common suffix, which can cause FPs.
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
// noopt to force `hasEdgeToLikelyRejectableHelper` to be first in the join-order.
exists(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) and
forall(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
)
}
/**
* Gets a char for there exists a transition away from `s`,
* and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`.
*/
pragma[noinline]
private string hasEdgeToLikelyRejectableHelper(StateInPumpableRegexp s) {
not hasRejectEdge(s) and
not isRejectState(s) and
deltaClosedChar(s, result, _)
}
/**
* Holds if there is a state `next` that can be reached from `prev`
* along epsilon edges, such that there is a transition from
* `prev` to `next` that the character symbol `char`.
*/
predicate deltaClosedChar(StateInPumpableRegexp prev, string char, StateInPumpableRegexp next) {
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
}
pragma[noinline]
InputSymbol getAnInputSymbolMatchingRelevant(string char) {
char = relevant(_) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets a char used for finding possible suffixes inside `root`.
*/
pragma[noinline]
private string relevant(RegExpRoot root) {
exists(ascii(result)) and exists(root)
or
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
or
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
result = ["|", "\n", "Z"] and exists(root)
}
/**
* Holds if there exists a `char` such that there is no edge from `s` labeled `char` in our NFA.
* The NFA does not model reject states, so the above is the same as saying there is a reject edge.
*/
private predicate hasRejectEdge(State s) {
hasSimpleRejectEdge(s)
or
not hasSimpleRejectEdge(s) and
exists(string char | char = relevant(getRoot(s.getRepr())) | not deltaClosedChar(s, char, _))
}
/**
* Holds if there is no edge from `s` labeled with "|", "\n", or "Z" in our NFA.
* This predicate is used as a cheap pre-processing to speed up `hasRejectEdge`.
*/
private predicate hasSimpleRejectEdge(State s) {
// The three chars were chosen arbitrarily. The three chars must be kept in sync with `relevant`.
exists(string char | char = ["|", "\n", "Z"] | not deltaClosedChar(s, char, _))
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
*/
pragma[noopt]
private State process(State fork, string w, int i) {
exists(State prev | prev = getProcessPrevious(fork, i, w) |
exists(string char, InputSymbol sym |
char = w.charAt(i) and
deltaClosed(prev, sym, result) and
// noopt to prevent joining `prev` with all possible `chars` that could transition away from `prev`.
// Instead only join with the set of `chars` where a relevant `InputSymbol` has already been found.
sym = getAProcessInputSymbol(char)
)
)
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i` characters of `w`.
*/
private State getProcessPrevious(State fork, int i, string w) {
isReDoSCandidate(fork, w) and
(
i = 0 and result = fork
or
result = process(fork, w, i - 1)
or
// repeat until fixpoint
i = 0 and
result = process(fork, w, w.length() - 1)
)
}
/**
* Gets an InputSymbol that matches `char`.
* The predicate is specialized to only have a result for the `char`s that are relevant for the `process` predicate.
*/
private InputSymbol getAProcessInputSymbol(string char) {
char = getAProcessChar() and
result = getAnInputSymbolMatching(char)
}
/**
* Gets a `char` that occurs in a `pump` string.
*/
private string getAProcessChar() { result = any(string s | isReDoSCandidate(_, s)).charAt(_) }
}
/**
* Holds if `term` may cause superlinear backtracking on strings containing many repetitions of `pump`.
* Gets the shortest string that causes superlinear backtracking.
*/
private predicate isReDoSAttackable(RegExpTerm term, string pump, State s) {
exists(int i, string c | s = Match(term, i) |
c =
min(string w |
any(ReDoSConfiguration conf).isReDoSCandidate(s, w) and
SuffixConstruction::reachesOnlyRejectableSuffixes(s, w)
|
w order by w.length(), w
) and
pump = escape(rotate(c, i))
)
}
/**
* Holds if the state `s` (represented by the term `t`) can have backtracking with repetitions of `pump`.
*
* `prefixMsg` contains a friendly message for a prefix that reaches `s` (or `prefixMsg` is the empty string if the prefix is empty or if no prefix could be found).
*/
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
isReDoSAttackable(t, pump, s) and
(
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
not PrefixConstruction::prefix(s) = ""
or
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
or
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
)
}
} }
/** /**
@@ -1162,37 +1198,3 @@ bindingset[str, i]
private string rotate(string str, int i) { private string rotate(string str, int i) {
result = str.suffix(str.length() - i) + str.prefix(str.length() - i) result = str.suffix(str.length() - i) + str.prefix(str.length() - i)
} }
/**
* Holds if `term` may cause superlinear backtracking on strings containing many repetitions of `pump`.
* Gets the shortest string that causes superlinear backtracking.
*/
private predicate isReDoSAttackable(RegExpTerm term, string pump, State s) {
exists(int i, string c | s = Match(term, i) |
c =
min(string w |
any(ReDoSConfiguration conf).isReDoSCandidate(s, w) and
SuffixConstruction::reachesOnlyRejectableSuffixes(s, w)
|
w order by w.length(), w
) and
pump = escape(rotate(c, i))
)
}
/**
* Holds if the state `s` (represented by the term `t`) can have backtracking with repetitions of `pump`.
*
* `prefixMsg` contains a friendly message for a prefix that reaches `s` (or `prefixMsg` is the empty string if the prefix is empty or if no prefix could be found).
*/
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
isReDoSAttackable(t, pump, s) and
(
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
not PrefixConstruction::prefix(s) = ""
or
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
or
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
)
}

View File

@@ -391,7 +391,7 @@ predicate isPumpable(State pivot, State succ, string pump) {
*/ */
predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) { predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
exists(State s, State pivot | exists(State s, State pivot |
hasReDoSResult(t, pump, s, prefixMsg) and ReDoSPruning::hasReDoSResult(t, pump, s, prefixMsg) and
isPumpable(pivot, s, _) and isPumpable(pivot, s, _) and
prev = pivot.getRepr() prev = pivot.getRepr()
) )

View File

@@ -19,7 +19,7 @@ import semmle.javascript.security.performance.ReDoSUtil
import semmle.javascript.security.performance.ExponentialBackTracking import semmle.javascript.security.performance.ExponentialBackTracking
from RegExpTerm t, string pump, State s, string prefixMsg from RegExpTerm t, string pump, State s, string prefixMsg
where hasReDoSResult(t, pump, s, prefixMsg) where ReDoSPruning::hasReDoSResult(t, pump, s, prefixMsg)
select t, select t,
"This part of the regular expression may cause exponential backtracking on strings " + prefixMsg + "This part of the regular expression may cause exponential backtracking on strings " + prefixMsg +
"containing many repetitions of '" + pump + "'." "containing many repetitions of '" + pump + "'."

View File

@@ -14,45 +14,6 @@
import ReDoSUtilSpecific import ReDoSUtilSpecific
/**
* A configuration for which parts of a regular expression should be considered relevant for
* the different predicates in `ReDoS.qll`.
* Used to adjust the computations for either superlinear or exponential backtracking.
*/
abstract class ReDoSConfiguration extends string {
bindingset[this]
ReDoSConfiguration() { any() }
/**
* Holds if `state` with the pump string `pump` is a candidate for a
* ReDoS vulnerable state.
* This is used to determine which states are considered for the prefix/suffix construction.
*/
abstract predicate isReDoSCandidate(State state, string pump);
}
/**
* Holds if repeating `pump` starting at `state` is a candidate for causing backtracking.
* No check whether a rejected suffix exists has been made.
*/
private predicate isReDoSCandidate(State state, string pump) {
any(ReDoSConfiguration conf).isReDoSCandidate(state, pump) and
(
not any(ReDoSConfiguration conf).isReDoSCandidate(epsilonSucc+(state), _)
or
epsilonSucc+(state) = state and
state =
max(State s, Location l |
s = epsilonSucc+(state) and
l = s.getRepr().getLocation() and
any(ReDoSConfiguration conf).isReDoSCandidate(s, _) and
s.getRepr() instanceof InfiniteRepetitionQuantifier
|
s order by l.getStartLine(), l.getStartColumn(), l.getEndColumn(), l.getEndLine()
)
)
}
/** /**
* Gets the char after `c` (from a simplified ASCII table). * Gets the char after `c` (from a simplified ASCII table).
*/ */
@@ -877,266 +838,341 @@ predicate isStartState(State state) {
} }
/** /**
* Predicates for constructing a prefix string that leads to a given state. * A configuration for which parts of a regular expression should be considered relevant for
* the different predicates in `ReDoS.qll`.
* Used to adjust the computations for either superlinear or exponential backtracking.
*/ */
private module PrefixConstruction { abstract class ReDoSConfiguration extends string {
/** bindingset[this]
* Holds if `state` is the textually last start state for the regular expression. ReDoSConfiguration() { any() }
*/
private predicate lastStartState(State state) {
exists(RegExpRoot root |
state =
max(StateInPumpableRegexp s, Location l |
isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
|
s
order by
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
l.getEndLine()
)
)
}
/** /**
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`. * Holds if `state` with the pump string `pump` is a candidate for a
* ReDoS vulnerable state.
* This is used to determine which states are considered for the prefix/suffix construction.
*/ */
private predicate existsTransition(State a, State b) { delta(a, _, b) } abstract predicate isReDoSCandidate(State state, string pump);
/**
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
*/
int prefixLength(State start, State state) =
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
/**
* Gets the minimum number of transitions it takes to reach `state` from the start state.
*/
private int lengthFromStart(State state) { result = prefixLength(_, state) }
/**
* Gets a string for which the regular expression will reach `state`.
*
* Has at most one result for any given `state`.
* This predicate will not always have a result even if there is a ReDoS issue in
* the regular expression.
*/
string prefix(State state) {
lastStartState(state) and
result = ""
or
// the search stops past the last redos candidate state.
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
exists(State prev |
// select a unique predecessor (by an arbitrary measure)
prev =
min(State s, Location loc |
lengthFromStart(s) = lengthFromStart(state) - 1 and
loc = s.getRepr().getLocation() and
delta(s, _, state)
|
s
order by
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
s.getRepr().toString()
)
|
// greedy search for the shortest prefix
result = prefix(prev) and delta(prev, Epsilon(), state)
or
not delta(prev, Epsilon(), state) and
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
)
}
/**
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
*/
private string getCanonicalEdgeChar(State prev, State next) {
result =
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
}
/**
* A state within a regular expression that has a pumpable state.
*/
class StateInPumpableRegexp extends State {
pragma[noinline]
StateInPumpableRegexp() {
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(this.getRepr()))
}
}
} }
/** module ReDoSPruning {
* Predicates for testing the presence of a rejecting suffix.
*
* These predicates are used to ensure that the all states reached from the fork
* by repeating `w` have a rejecting suffix.
*
* For example, a regexp like `/^(a+)+/` will accept any string as long the prefix is
* some number of `"a"`s, and it is therefore not possible to construct a rejecting suffix.
*
* A regexp like `/(a+)+$/` or `/(a+)+b/` trivially has a rejecting suffix,
* as the suffix "X" will cause both the regular expressions to be rejected.
*
* The string `w` is repeated any number of times because it needs to be
* infinitely repeatedable for the attack to work.
* For the regular expression `/((ab)+)*abab/` the accepting state is not reachable from the fork
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
*/
private module SuffixConstruction {
import PrefixConstruction
/** /**
* Holds if all states reachable from `fork` by repeating `w` * Holds if repeating `pump` starting at `state` is a candidate for causing backtracking.
* are likely rejectable by appending some suffix. * No check whether a rejected suffix exists has been made.
*/ */
predicate reachesOnlyRejectableSuffixes(State fork, string w) { private predicate isReDoSCandidate(State state, string pump) {
isReDoSCandidate(fork, w) and any(ReDoSConfiguration conf).isReDoSCandidate(state, pump) and
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
}
/**
* Holds if there likely exists a suffix starting from `s` that leads to the regular expression being rejected.
* This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs.
*/
pragma[noinline]
private predicate isLikelyRejectable(StateInPumpableRegexp s) {
// exists a reject edge with some char.
hasRejectEdge(s)
or
hasEdgeToLikelyRejectable(s)
or
// stopping here is rejection
isRejectState(s)
}
/**
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
*/
predicate isRejectState(StateInPumpableRegexp s) { not epsilonSucc*(s) = Accept(_) }
/**
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
*/
pragma[noopt]
predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
// all edges (at least one) with some char leads to another state that is rejectable.
// the `next` states might not share a common suffix, which can cause FPs.
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
// noopt to force `hasEdgeToLikelyRejectableHelper` to be first in the join-order.
exists(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) and
forall(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
)
}
/**
* Gets a char for there exists a transition away from `s`,
* and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`.
*/
pragma[noinline]
private string hasEdgeToLikelyRejectableHelper(StateInPumpableRegexp s) {
not hasRejectEdge(s) and
not isRejectState(s) and
deltaClosedChar(s, result, _)
}
/**
* Holds if there is a state `next` that can be reached from `prev`
* along epsilon edges, such that there is a transition from
* `prev` to `next` that the character symbol `char`.
*/
predicate deltaClosedChar(StateInPumpableRegexp prev, string char, StateInPumpableRegexp next) {
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
}
pragma[noinline]
InputSymbol getAnInputSymbolMatchingRelevant(string char) {
char = relevant(_) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets a char used for finding possible suffixes inside `root`.
*/
pragma[noinline]
private string relevant(RegExpRoot root) {
exists(ascii(result)) and exists(root)
or
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
or
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
result = ["|", "\n", "Z"] and exists(root)
}
/**
* Holds if there exists a `char` such that there is no edge from `s` labeled `char` in our NFA.
* The NFA does not model reject states, so the above is the same as saying there is a reject edge.
*/
private predicate hasRejectEdge(State s) {
hasSimpleRejectEdge(s)
or
not hasSimpleRejectEdge(s) and
exists(string char | char = relevant(getRoot(s.getRepr())) | not deltaClosedChar(s, char, _))
}
/**
* Holds if there is no edge from `s` labeled with "|", "\n", or "Z" in our NFA.
* This predicate is used as a cheap pre-processing to speed up `hasRejectEdge`.
*/
private predicate hasSimpleRejectEdge(State s) {
// The three chars were chosen arbitrarily. The three chars must be kept in sync with `relevant`.
exists(string char | char = ["|", "\n", "Z"] | not deltaClosedChar(s, char, _))
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
*/
pragma[noopt]
private State process(State fork, string w, int i) {
exists(State prev | prev = getProcessPrevious(fork, i, w) |
exists(string char, InputSymbol sym |
char = w.charAt(i) and
deltaClosed(prev, sym, result) and
// noopt to prevent joining `prev` with all possible `chars` that could transition away from `prev`.
// Instead only join with the set of `chars` where a relevant `InputSymbol` has already been found.
sym = getAProcessInputSymbol(char)
)
)
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i` characters of `w`.
*/
private State getProcessPrevious(State fork, int i, string w) {
isReDoSCandidate(fork, w) and
( (
i = 0 and result = fork not any(ReDoSConfiguration conf).isReDoSCandidate(epsilonSucc+(state), _)
or or
result = process(fork, w, i - 1) epsilonSucc+(state) = state and
or state =
// repeat until fixpoint max(State s, Location l |
i = 0 and s = epsilonSucc+(state) and
result = process(fork, w, w.length() - 1) l = s.getRepr().getLocation() and
any(ReDoSConfiguration conf).isReDoSCandidate(s, _) and
s.getRepr() instanceof InfiniteRepetitionQuantifier
|
s order by l.getStartLine(), l.getStartColumn(), l.getEndColumn(), l.getEndLine()
)
) )
} }
/** /**
* Gets an InputSymbol that matches `char`. * Predicates for constructing a prefix string that leads to a given state.
* The predicate is specialized to only have a result for the `char`s that are relevant for the `process` predicate.
*/ */
private InputSymbol getAProcessInputSymbol(string char) { private module PrefixConstruction {
char = getAProcessChar() and /**
result = getAnInputSymbolMatching(char) * Holds if `state` is the textually last start state for the regular expression.
*/
private predicate lastStartState(State state) {
exists(RegExpRoot root |
state =
max(StateInPumpableRegexp s, Location l |
isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
|
s
order by
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
l.getEndLine()
)
)
}
/**
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
*/
private predicate existsTransition(State a, State b) { delta(a, _, b) }
/**
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
*/
int prefixLength(State start, State state) =
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
/**
* Gets the minimum number of transitions it takes to reach `state` from the start state.
*/
private int lengthFromStart(State state) { result = prefixLength(_, state) }
/**
* Gets a string for which the regular expression will reach `state`.
*
* Has at most one result for any given `state`.
* This predicate will not always have a result even if there is a ReDoS issue in
* the regular expression.
*/
string prefix(State state) {
lastStartState(state) and
result = ""
or
// the search stops past the last redos candidate state.
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
exists(State prev |
// select a unique predecessor (by an arbitrary measure)
prev =
min(State s, Location loc |
lengthFromStart(s) = lengthFromStart(state) - 1 and
loc = s.getRepr().getLocation() and
delta(s, _, state)
|
s
order by
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
s.getRepr().toString()
)
|
// greedy search for the shortest prefix
result = prefix(prev) and delta(prev, Epsilon(), state)
or
not delta(prev, Epsilon(), state) and
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
)
}
/**
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
*/
private string getCanonicalEdgeChar(State prev, State next) {
result =
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
}
/**
* A state within a regular expression that has a pumpable state.
*/
class StateInPumpableRegexp extends State {
pragma[noinline]
StateInPumpableRegexp() {
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(this.getRepr()))
}
}
} }
/** /**
* Gets a `char` that occurs in a `pump` string. * Predicates for testing the presence of a rejecting suffix.
*
* These predicates are used to ensure that the all states reached from the fork
* by repeating `w` have a rejecting suffix.
*
* For example, a regexp like `/^(a+)+/` will accept any string as long the prefix is
* some number of `"a"`s, and it is therefore not possible to construct a rejecting suffix.
*
* A regexp like `/(a+)+$/` or `/(a+)+b/` trivially has a rejecting suffix,
* as the suffix "X" will cause both the regular expressions to be rejected.
*
* The string `w` is repeated any number of times because it needs to be
* infinitely repeatedable for the attack to work.
* For the regular expression `/((ab)+)*abab/` the accepting state is not reachable from the fork
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
*/ */
private string getAProcessChar() { result = any(string s | isReDoSCandidate(_, s)).charAt(_) } private module SuffixConstruction {
import PrefixConstruction
/**
* Holds if all states reachable from `fork` by repeating `w`
* are likely rejectable by appending some suffix.
*/
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
isReDoSCandidate(fork, w) and
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
}
/**
* Holds if there likely exists a suffix starting from `s` that leads to the regular expression being rejected.
* This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs.
*/
pragma[noinline]
private predicate isLikelyRejectable(StateInPumpableRegexp s) {
// exists a reject edge with some char.
hasRejectEdge(s)
or
hasEdgeToLikelyRejectable(s)
or
// stopping here is rejection
isRejectState(s)
}
/**
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
*/
predicate isRejectState(StateInPumpableRegexp s) { not epsilonSucc*(s) = Accept(_) }
/**
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
*/
pragma[noopt]
predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
// all edges (at least one) with some char leads to another state that is rejectable.
// the `next` states might not share a common suffix, which can cause FPs.
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
// noopt to force `hasEdgeToLikelyRejectableHelper` to be first in the join-order.
exists(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) and
forall(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
)
}
/**
* Gets a char for there exists a transition away from `s`,
* and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`.
*/
pragma[noinline]
private string hasEdgeToLikelyRejectableHelper(StateInPumpableRegexp s) {
not hasRejectEdge(s) and
not isRejectState(s) and
deltaClosedChar(s, result, _)
}
/**
* Holds if there is a state `next` that can be reached from `prev`
* along epsilon edges, such that there is a transition from
* `prev` to `next` that the character symbol `char`.
*/
predicate deltaClosedChar(StateInPumpableRegexp prev, string char, StateInPumpableRegexp next) {
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
}
pragma[noinline]
InputSymbol getAnInputSymbolMatchingRelevant(string char) {
char = relevant(_) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets a char used for finding possible suffixes inside `root`.
*/
pragma[noinline]
private string relevant(RegExpRoot root) {
exists(ascii(result)) and exists(root)
or
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
or
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
result = ["|", "\n", "Z"] and exists(root)
}
/**
* Holds if there exists a `char` such that there is no edge from `s` labeled `char` in our NFA.
* The NFA does not model reject states, so the above is the same as saying there is a reject edge.
*/
private predicate hasRejectEdge(State s) {
hasSimpleRejectEdge(s)
or
not hasSimpleRejectEdge(s) and
exists(string char | char = relevant(getRoot(s.getRepr())) | not deltaClosedChar(s, char, _))
}
/**
* Holds if there is no edge from `s` labeled with "|", "\n", or "Z" in our NFA.
* This predicate is used as a cheap pre-processing to speed up `hasRejectEdge`.
*/
private predicate hasSimpleRejectEdge(State s) {
// The three chars were chosen arbitrarily. The three chars must be kept in sync with `relevant`.
exists(string char | char = ["|", "\n", "Z"] | not deltaClosedChar(s, char, _))
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
*/
pragma[noopt]
private State process(State fork, string w, int i) {
exists(State prev | prev = getProcessPrevious(fork, i, w) |
exists(string char, InputSymbol sym |
char = w.charAt(i) and
deltaClosed(prev, sym, result) and
// noopt to prevent joining `prev` with all possible `chars` that could transition away from `prev`.
// Instead only join with the set of `chars` where a relevant `InputSymbol` has already been found.
sym = getAProcessInputSymbol(char)
)
)
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i` characters of `w`.
*/
private State getProcessPrevious(State fork, int i, string w) {
isReDoSCandidate(fork, w) and
(
i = 0 and result = fork
or
result = process(fork, w, i - 1)
or
// repeat until fixpoint
i = 0 and
result = process(fork, w, w.length() - 1)
)
}
/**
* Gets an InputSymbol that matches `char`.
* The predicate is specialized to only have a result for the `char`s that are relevant for the `process` predicate.
*/
private InputSymbol getAProcessInputSymbol(string char) {
char = getAProcessChar() and
result = getAnInputSymbolMatching(char)
}
/**
* Gets a `char` that occurs in a `pump` string.
*/
private string getAProcessChar() { result = any(string s | isReDoSCandidate(_, s)).charAt(_) }
}
/**
* Holds if `term` may cause superlinear backtracking on strings containing many repetitions of `pump`.
* Gets the shortest string that causes superlinear backtracking.
*/
private predicate isReDoSAttackable(RegExpTerm term, string pump, State s) {
exists(int i, string c | s = Match(term, i) |
c =
min(string w |
any(ReDoSConfiguration conf).isReDoSCandidate(s, w) and
SuffixConstruction::reachesOnlyRejectableSuffixes(s, w)
|
w order by w.length(), w
) and
pump = escape(rotate(c, i))
)
}
/**
* Holds if the state `s` (represented by the term `t`) can have backtracking with repetitions of `pump`.
*
* `prefixMsg` contains a friendly message for a prefix that reaches `s` (or `prefixMsg` is the empty string if the prefix is empty or if no prefix could be found).
*/
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
isReDoSAttackable(t, pump, s) and
(
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
not PrefixConstruction::prefix(s) = ""
or
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
or
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
)
}
} }
/** /**
@@ -1162,37 +1198,3 @@ bindingset[str, i]
private string rotate(string str, int i) { private string rotate(string str, int i) {
result = str.suffix(str.length() - i) + str.prefix(str.length() - i) result = str.suffix(str.length() - i) + str.prefix(str.length() - i)
} }
/**
* Holds if `term` may cause superlinear backtracking on strings containing many repetitions of `pump`.
* Gets the shortest string that causes superlinear backtracking.
*/
private predicate isReDoSAttackable(RegExpTerm term, string pump, State s) {
exists(int i, string c | s = Match(term, i) |
c =
min(string w |
any(ReDoSConfiguration conf).isReDoSCandidate(s, w) and
SuffixConstruction::reachesOnlyRejectableSuffixes(s, w)
|
w order by w.length(), w
) and
pump = escape(rotate(c, i))
)
}
/**
* Holds if the state `s` (represented by the term `t`) can have backtracking with repetitions of `pump`.
*
* `prefixMsg` contains a friendly message for a prefix that reaches `s` (or `prefixMsg` is the empty string if the prefix is empty or if no prefix could be found).
*/
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
isReDoSAttackable(t, pump, s) and
(
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
not PrefixConstruction::prefix(s) = ""
or
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
or
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
)
}

View File

@@ -391,7 +391,7 @@ predicate isPumpable(State pivot, State succ, string pump) {
*/ */
predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) { predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
exists(State s, State pivot | exists(State s, State pivot |
hasReDoSResult(t, pump, s, prefixMsg) and ReDoSPruning::hasReDoSResult(t, pump, s, prefixMsg) and
isPumpable(pivot, s, _) and isPumpable(pivot, s, _) and
prev = pivot.getRepr() prev = pivot.getRepr()
) )

View File

@@ -18,7 +18,7 @@ import semmle.python.security.performance.ExponentialBackTracking
from RegExpTerm t, string pump, State s, string prefixMsg from RegExpTerm t, string pump, State s, string prefixMsg
where where
hasReDoSResult(t, pump, s, prefixMsg) and ReDoSPruning::hasReDoSResult(t, pump, s, prefixMsg) and
// exclude verbose mode regexes for now // exclude verbose mode regexes for now
not t.getRegex().getAMode() = "VERBOSE" not t.getRegex().getAMode() = "VERBOSE"
select t, select t,

View File

@@ -14,45 +14,6 @@
import ReDoSUtilSpecific import ReDoSUtilSpecific
/**
* A configuration for which parts of a regular expression should be considered relevant for
* the different predicates in `ReDoS.qll`.
* Used to adjust the computations for either superlinear or exponential backtracking.
*/
abstract class ReDoSConfiguration extends string {
bindingset[this]
ReDoSConfiguration() { any() }
/**
* Holds if `state` with the pump string `pump` is a candidate for a
* ReDoS vulnerable state.
* This is used to determine which states are considered for the prefix/suffix construction.
*/
abstract predicate isReDoSCandidate(State state, string pump);
}
/**
* Holds if repeating `pump` starting at `state` is a candidate for causing backtracking.
* No check whether a rejected suffix exists has been made.
*/
private predicate isReDoSCandidate(State state, string pump) {
any(ReDoSConfiguration conf).isReDoSCandidate(state, pump) and
(
not any(ReDoSConfiguration conf).isReDoSCandidate(epsilonSucc+(state), _)
or
epsilonSucc+(state) = state and
state =
max(State s, Location l |
s = epsilonSucc+(state) and
l = s.getRepr().getLocation() and
any(ReDoSConfiguration conf).isReDoSCandidate(s, _) and
s.getRepr() instanceof InfiniteRepetitionQuantifier
|
s order by l.getStartLine(), l.getStartColumn(), l.getEndColumn(), l.getEndLine()
)
)
}
/** /**
* Gets the char after `c` (from a simplified ASCII table). * Gets the char after `c` (from a simplified ASCII table).
*/ */
@@ -877,266 +838,341 @@ predicate isStartState(State state) {
} }
/** /**
* Predicates for constructing a prefix string that leads to a given state. * A configuration for which parts of a regular expression should be considered relevant for
* the different predicates in `ReDoS.qll`.
* Used to adjust the computations for either superlinear or exponential backtracking.
*/ */
private module PrefixConstruction { abstract class ReDoSConfiguration extends string {
/** bindingset[this]
* Holds if `state` is the textually last start state for the regular expression. ReDoSConfiguration() { any() }
*/
private predicate lastStartState(State state) {
exists(RegExpRoot root |
state =
max(StateInPumpableRegexp s, Location l |
isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
|
s
order by
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
l.getEndLine()
)
)
}
/** /**
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`. * Holds if `state` with the pump string `pump` is a candidate for a
* ReDoS vulnerable state.
* This is used to determine which states are considered for the prefix/suffix construction.
*/ */
private predicate existsTransition(State a, State b) { delta(a, _, b) } abstract predicate isReDoSCandidate(State state, string pump);
/**
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
*/
int prefixLength(State start, State state) =
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
/**
* Gets the minimum number of transitions it takes to reach `state` from the start state.
*/
private int lengthFromStart(State state) { result = prefixLength(_, state) }
/**
* Gets a string for which the regular expression will reach `state`.
*
* Has at most one result for any given `state`.
* This predicate will not always have a result even if there is a ReDoS issue in
* the regular expression.
*/
string prefix(State state) {
lastStartState(state) and
result = ""
or
// the search stops past the last redos candidate state.
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
exists(State prev |
// select a unique predecessor (by an arbitrary measure)
prev =
min(State s, Location loc |
lengthFromStart(s) = lengthFromStart(state) - 1 and
loc = s.getRepr().getLocation() and
delta(s, _, state)
|
s
order by
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
s.getRepr().toString()
)
|
// greedy search for the shortest prefix
result = prefix(prev) and delta(prev, Epsilon(), state)
or
not delta(prev, Epsilon(), state) and
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
)
}
/**
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
*/
private string getCanonicalEdgeChar(State prev, State next) {
result =
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
}
/**
* A state within a regular expression that has a pumpable state.
*/
class StateInPumpableRegexp extends State {
pragma[noinline]
StateInPumpableRegexp() {
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(this.getRepr()))
}
}
} }
/** module ReDoSPruning {
* Predicates for testing the presence of a rejecting suffix.
*
* These predicates are used to ensure that the all states reached from the fork
* by repeating `w` have a rejecting suffix.
*
* For example, a regexp like `/^(a+)+/` will accept any string as long the prefix is
* some number of `"a"`s, and it is therefore not possible to construct a rejecting suffix.
*
* A regexp like `/(a+)+$/` or `/(a+)+b/` trivially has a rejecting suffix,
* as the suffix "X" will cause both the regular expressions to be rejected.
*
* The string `w` is repeated any number of times because it needs to be
* infinitely repeatedable for the attack to work.
* For the regular expression `/((ab)+)*abab/` the accepting state is not reachable from the fork
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
*/
private module SuffixConstruction {
import PrefixConstruction
/** /**
* Holds if all states reachable from `fork` by repeating `w` * Holds if repeating `pump` starting at `state` is a candidate for causing backtracking.
* are likely rejectable by appending some suffix. * No check whether a rejected suffix exists has been made.
*/ */
predicate reachesOnlyRejectableSuffixes(State fork, string w) { private predicate isReDoSCandidate(State state, string pump) {
isReDoSCandidate(fork, w) and any(ReDoSConfiguration conf).isReDoSCandidate(state, pump) and
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
}
/**
* Holds if there likely exists a suffix starting from `s` that leads to the regular expression being rejected.
* This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs.
*/
pragma[noinline]
private predicate isLikelyRejectable(StateInPumpableRegexp s) {
// exists a reject edge with some char.
hasRejectEdge(s)
or
hasEdgeToLikelyRejectable(s)
or
// stopping here is rejection
isRejectState(s)
}
/**
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
*/
predicate isRejectState(StateInPumpableRegexp s) { not epsilonSucc*(s) = Accept(_) }
/**
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
*/
pragma[noopt]
predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
// all edges (at least one) with some char leads to another state that is rejectable.
// the `next` states might not share a common suffix, which can cause FPs.
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
// noopt to force `hasEdgeToLikelyRejectableHelper` to be first in the join-order.
exists(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) and
forall(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
)
}
/**
* Gets a char for there exists a transition away from `s`,
* and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`.
*/
pragma[noinline]
private string hasEdgeToLikelyRejectableHelper(StateInPumpableRegexp s) {
not hasRejectEdge(s) and
not isRejectState(s) and
deltaClosedChar(s, result, _)
}
/**
* Holds if there is a state `next` that can be reached from `prev`
* along epsilon edges, such that there is a transition from
* `prev` to `next` that the character symbol `char`.
*/
predicate deltaClosedChar(StateInPumpableRegexp prev, string char, StateInPumpableRegexp next) {
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
}
pragma[noinline]
InputSymbol getAnInputSymbolMatchingRelevant(string char) {
char = relevant(_) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets a char used for finding possible suffixes inside `root`.
*/
pragma[noinline]
private string relevant(RegExpRoot root) {
exists(ascii(result)) and exists(root)
or
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
or
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
result = ["|", "\n", "Z"] and exists(root)
}
/**
* Holds if there exists a `char` such that there is no edge from `s` labeled `char` in our NFA.
* The NFA does not model reject states, so the above is the same as saying there is a reject edge.
*/
private predicate hasRejectEdge(State s) {
hasSimpleRejectEdge(s)
or
not hasSimpleRejectEdge(s) and
exists(string char | char = relevant(getRoot(s.getRepr())) | not deltaClosedChar(s, char, _))
}
/**
* Holds if there is no edge from `s` labeled with "|", "\n", or "Z" in our NFA.
* This predicate is used as a cheap pre-processing to speed up `hasRejectEdge`.
*/
private predicate hasSimpleRejectEdge(State s) {
// The three chars were chosen arbitrarily. The three chars must be kept in sync with `relevant`.
exists(string char | char = ["|", "\n", "Z"] | not deltaClosedChar(s, char, _))
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
*/
pragma[noopt]
private State process(State fork, string w, int i) {
exists(State prev | prev = getProcessPrevious(fork, i, w) |
exists(string char, InputSymbol sym |
char = w.charAt(i) and
deltaClosed(prev, sym, result) and
// noopt to prevent joining `prev` with all possible `chars` that could transition away from `prev`.
// Instead only join with the set of `chars` where a relevant `InputSymbol` has already been found.
sym = getAProcessInputSymbol(char)
)
)
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i` characters of `w`.
*/
private State getProcessPrevious(State fork, int i, string w) {
isReDoSCandidate(fork, w) and
( (
i = 0 and result = fork not any(ReDoSConfiguration conf).isReDoSCandidate(epsilonSucc+(state), _)
or or
result = process(fork, w, i - 1) epsilonSucc+(state) = state and
or state =
// repeat until fixpoint max(State s, Location l |
i = 0 and s = epsilonSucc+(state) and
result = process(fork, w, w.length() - 1) l = s.getRepr().getLocation() and
any(ReDoSConfiguration conf).isReDoSCandidate(s, _) and
s.getRepr() instanceof InfiniteRepetitionQuantifier
|
s order by l.getStartLine(), l.getStartColumn(), l.getEndColumn(), l.getEndLine()
)
) )
} }
/** /**
* Gets an InputSymbol that matches `char`. * Predicates for constructing a prefix string that leads to a given state.
* The predicate is specialized to only have a result for the `char`s that are relevant for the `process` predicate.
*/ */
private InputSymbol getAProcessInputSymbol(string char) { private module PrefixConstruction {
char = getAProcessChar() and /**
result = getAnInputSymbolMatching(char) * Holds if `state` is the textually last start state for the regular expression.
*/
private predicate lastStartState(State state) {
exists(RegExpRoot root |
state =
max(StateInPumpableRegexp s, Location l |
isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
|
s
order by
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
l.getEndLine()
)
)
}
/**
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
*/
private predicate existsTransition(State a, State b) { delta(a, _, b) }
/**
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
*/
int prefixLength(State start, State state) =
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
/**
* Gets the minimum number of transitions it takes to reach `state` from the start state.
*/
private int lengthFromStart(State state) { result = prefixLength(_, state) }
/**
* Gets a string for which the regular expression will reach `state`.
*
* Has at most one result for any given `state`.
* This predicate will not always have a result even if there is a ReDoS issue in
* the regular expression.
*/
string prefix(State state) {
lastStartState(state) and
result = ""
or
// the search stops past the last redos candidate state.
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
exists(State prev |
// select a unique predecessor (by an arbitrary measure)
prev =
min(State s, Location loc |
lengthFromStart(s) = lengthFromStart(state) - 1 and
loc = s.getRepr().getLocation() and
delta(s, _, state)
|
s
order by
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
s.getRepr().toString()
)
|
// greedy search for the shortest prefix
result = prefix(prev) and delta(prev, Epsilon(), state)
or
not delta(prev, Epsilon(), state) and
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
)
}
/**
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
*/
private string getCanonicalEdgeChar(State prev, State next) {
result =
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
}
/**
* A state within a regular expression that has a pumpable state.
*/
class StateInPumpableRegexp extends State {
pragma[noinline]
StateInPumpableRegexp() {
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(this.getRepr()))
}
}
} }
/** /**
* Gets a `char` that occurs in a `pump` string. * Predicates for testing the presence of a rejecting suffix.
*
* These predicates are used to ensure that the all states reached from the fork
* by repeating `w` have a rejecting suffix.
*
* For example, a regexp like `/^(a+)+/` will accept any string as long the prefix is
* some number of `"a"`s, and it is therefore not possible to construct a rejecting suffix.
*
* A regexp like `/(a+)+$/` or `/(a+)+b/` trivially has a rejecting suffix,
* as the suffix "X" will cause both the regular expressions to be rejected.
*
* The string `w` is repeated any number of times because it needs to be
* infinitely repeatedable for the attack to work.
* For the regular expression `/((ab)+)*abab/` the accepting state is not reachable from the fork
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
*/ */
private string getAProcessChar() { result = any(string s | isReDoSCandidate(_, s)).charAt(_) } private module SuffixConstruction {
import PrefixConstruction
/**
* Holds if all states reachable from `fork` by repeating `w`
* are likely rejectable by appending some suffix.
*/
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
isReDoSCandidate(fork, w) and
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
}
/**
* Holds if there likely exists a suffix starting from `s` that leads to the regular expression being rejected.
* This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs.
*/
pragma[noinline]
private predicate isLikelyRejectable(StateInPumpableRegexp s) {
// exists a reject edge with some char.
hasRejectEdge(s)
or
hasEdgeToLikelyRejectable(s)
or
// stopping here is rejection
isRejectState(s)
}
/**
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
*/
predicate isRejectState(StateInPumpableRegexp s) { not epsilonSucc*(s) = Accept(_) }
/**
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
*/
pragma[noopt]
predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
// all edges (at least one) with some char leads to another state that is rejectable.
// the `next` states might not share a common suffix, which can cause FPs.
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
// noopt to force `hasEdgeToLikelyRejectableHelper` to be first in the join-order.
exists(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) and
forall(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
)
}
/**
* Gets a char for there exists a transition away from `s`,
* and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`.
*/
pragma[noinline]
private string hasEdgeToLikelyRejectableHelper(StateInPumpableRegexp s) {
not hasRejectEdge(s) and
not isRejectState(s) and
deltaClosedChar(s, result, _)
}
/**
* Holds if there is a state `next` that can be reached from `prev`
* along epsilon edges, such that there is a transition from
* `prev` to `next` that the character symbol `char`.
*/
predicate deltaClosedChar(StateInPumpableRegexp prev, string char, StateInPumpableRegexp next) {
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
}
pragma[noinline]
InputSymbol getAnInputSymbolMatchingRelevant(string char) {
char = relevant(_) and
result = getAnInputSymbolMatching(char)
}
/**
* Gets a char used for finding possible suffixes inside `root`.
*/
pragma[noinline]
private string relevant(RegExpRoot root) {
exists(ascii(result)) and exists(root)
or
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
or
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
result = ["|", "\n", "Z"] and exists(root)
}
/**
* Holds if there exists a `char` such that there is no edge from `s` labeled `char` in our NFA.
* The NFA does not model reject states, so the above is the same as saying there is a reject edge.
*/
private predicate hasRejectEdge(State s) {
hasSimpleRejectEdge(s)
or
not hasSimpleRejectEdge(s) and
exists(string char | char = relevant(getRoot(s.getRepr())) | not deltaClosedChar(s, char, _))
}
/**
* Holds if there is no edge from `s` labeled with "|", "\n", or "Z" in our NFA.
* This predicate is used as a cheap pre-processing to speed up `hasRejectEdge`.
*/
private predicate hasSimpleRejectEdge(State s) {
// The three chars were chosen arbitrarily. The three chars must be kept in sync with `relevant`.
exists(string char | char = ["|", "\n", "Z"] | not deltaClosedChar(s, char, _))
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
*/
pragma[noopt]
private State process(State fork, string w, int i) {
exists(State prev | prev = getProcessPrevious(fork, i, w) |
exists(string char, InputSymbol sym |
char = w.charAt(i) and
deltaClosed(prev, sym, result) and
// noopt to prevent joining `prev` with all possible `chars` that could transition away from `prev`.
// Instead only join with the set of `chars` where a relevant `InputSymbol` has already been found.
sym = getAProcessInputSymbol(char)
)
)
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i` characters of `w`.
*/
private State getProcessPrevious(State fork, int i, string w) {
isReDoSCandidate(fork, w) and
(
i = 0 and result = fork
or
result = process(fork, w, i - 1)
or
// repeat until fixpoint
i = 0 and
result = process(fork, w, w.length() - 1)
)
}
/**
* Gets an InputSymbol that matches `char`.
* The predicate is specialized to only have a result for the `char`s that are relevant for the `process` predicate.
*/
private InputSymbol getAProcessInputSymbol(string char) {
char = getAProcessChar() and
result = getAnInputSymbolMatching(char)
}
/**
* Gets a `char` that occurs in a `pump` string.
*/
private string getAProcessChar() { result = any(string s | isReDoSCandidate(_, s)).charAt(_) }
}
/**
* Holds if `term` may cause superlinear backtracking on strings containing many repetitions of `pump`.
* Gets the shortest string that causes superlinear backtracking.
*/
private predicate isReDoSAttackable(RegExpTerm term, string pump, State s) {
exists(int i, string c | s = Match(term, i) |
c =
min(string w |
any(ReDoSConfiguration conf).isReDoSCandidate(s, w) and
SuffixConstruction::reachesOnlyRejectableSuffixes(s, w)
|
w order by w.length(), w
) and
pump = escape(rotate(c, i))
)
}
/**
* Holds if the state `s` (represented by the term `t`) can have backtracking with repetitions of `pump`.
*
* `prefixMsg` contains a friendly message for a prefix that reaches `s` (or `prefixMsg` is the empty string if the prefix is empty or if no prefix could be found).
*/
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
isReDoSAttackable(t, pump, s) and
(
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
not PrefixConstruction::prefix(s) = ""
or
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
or
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
)
}
} }
/** /**
@@ -1162,37 +1198,3 @@ bindingset[str, i]
private string rotate(string str, int i) { private string rotate(string str, int i) {
result = str.suffix(str.length() - i) + str.prefix(str.length() - i) result = str.suffix(str.length() - i) + str.prefix(str.length() - i)
} }
/**
* Holds if `term` may cause superlinear backtracking on strings containing many repetitions of `pump`.
* Gets the shortest string that causes superlinear backtracking.
*/
private predicate isReDoSAttackable(RegExpTerm term, string pump, State s) {
exists(int i, string c | s = Match(term, i) |
c =
min(string w |
any(ReDoSConfiguration conf).isReDoSCandidate(s, w) and
SuffixConstruction::reachesOnlyRejectableSuffixes(s, w)
|
w order by w.length(), w
) and
pump = escape(rotate(c, i))
)
}
/**
* Holds if the state `s` (represented by the term `t`) can have backtracking with repetitions of `pump`.
*
* `prefixMsg` contains a friendly message for a prefix that reaches `s` (or `prefixMsg` is the empty string if the prefix is empty or if no prefix could be found).
*/
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
isReDoSAttackable(t, pump, s) and
(
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
not PrefixConstruction::prefix(s) = ""
or
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
or
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
)
}

View File

@@ -391,7 +391,7 @@ predicate isPumpable(State pivot, State succ, string pump) {
*/ */
predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) { predicate polynimalReDoS(RegExpTerm t, string pump, string prefixMsg, RegExpTerm prev) {
exists(State s, State pivot | exists(State s, State pivot |
hasReDoSResult(t, pump, s, prefixMsg) and ReDoSPruning::hasReDoSResult(t, pump, s, prefixMsg) and
isPumpable(pivot, s, _) and isPumpable(pivot, s, _) and
prev = pivot.getRepr() prev = pivot.getRepr()
) )

View File

@@ -19,7 +19,7 @@ import codeql.ruby.security.performance.ReDoSUtil
import codeql.ruby.Regexp import codeql.ruby.Regexp
from RegExpTerm t, string pump, State s, string prefixMsg from RegExpTerm t, string pump, State s, string prefixMsg
where hasReDoSResult(t, pump, s, prefixMsg) where ReDoSPruning::hasReDoSResult(t, pump, s, prefixMsg)
select t, select t,
"This part of the regular expression may cause exponential backtracking on strings " + prefixMsg + "This part of the regular expression may cause exponential backtracking on strings " + prefixMsg +
"containing many repetitions of '" + pump + "'." "containing many repetitions of '" + pump + "'."