Merge pull request #12628 from erik-krogh/betterReDoS

ReDoS: better super-linear algorithm
This commit is contained in:
Erik Krogh Kristensen
2023-03-27 15:26:49 +02:00
committed by GitHub
10 changed files with 316 additions and 203 deletions

View File

@@ -147,6 +147,8 @@ module Make<RegexTreeViewSig TreeImpl> {
/**
* Gets a string for the full location of `t`.
*/
bindingset[t]
pragma[inline_late]
string getTermLocationString(RegExpTerm t) {
exists(string file, int startLine, int startColumn, int endLine, int endColumn |
t.hasLocationInfo(file, startLine, startColumn, endLine, endColumn) and
@@ -851,6 +853,10 @@ module Make<RegexTreeViewSig TreeImpl> {
* Gets the term represented by this state.
*/
RegExpTerm getRepr() { result = repr }
predicate hasLocationInfo(string file, int line, int column, int endline, int endcolumn) {
repr.hasLocationInfo(file, line, column, endline, endcolumn)
}
}
/**

View File

@@ -9,16 +9,16 @@
* Theorem 3 from the paper describes the basic idea.
*
* The following explains the idea using variables and predicate names that are used in the implementation:
* We consider a pair of repetitions, which we will call `pivot` and `succ`.
* We consider a pair of repetitions, which we will call `pivot` and `pumpEnd`.
*
* We create a product automaton of 3-tuples of states (see `StateTuple`).
* There exists a transition `(a,b,c) -> (d,e,f)` in the product automaton
* iff there exists three transitions in the NFA `a->d, b->e, c->f` where those three
* transitions all match a shared character `char`. (see `getAThreewayIntersect`)
*
* We start a search in the product automaton at `(pivot, pivot, succ)`,
* We start a search in the product automaton at `(pivot, pivot, pumpEnd)`,
* and search for a series of transitions (a `Trace`), such that we end
* at `(pivot, succ, succ)` (see `isReachableFromStartTuple`).
* at `(pivot, pumpEnd, pumpEnd)` (see `isReachableFromStartTuple`).
*
* For example, consider the regular expression `/^\d*5\w*$/`.
* The search will start at the tuple `(\d*, \d*, \w*)` and search
@@ -51,20 +51,30 @@ module Make<RegexTreeViewSig TreeImpl> {
private State getRootState() { result = mkMatch(any(RegExpRoot r)) }
private newtype TStateTuple =
MkStateTuple(State q1, State q2, State q3) {
// starts at (pivot, pivot, succ)
isStartLoops(q1, q3) and q1 = q2
/**
* A tuple of states `(q1, q2, q3)` in the product automaton that is reachable from `(pivot, pivot, pumpEnd)`.
*/
MkStateTuple(State pivot, State pumpEnd, State q1, State q2, State q3) {
// starts at (pivot, pivot, pumpEnd)
isStartLoops(q1, q3) and
q1 = q2 and
pivot = q1 and
pumpEnd = q3
or
step(_, _, _, _, q1, q2, q3) and FeasibleTuple::isFeasibleTuple(q1, q2, q3)
// recurse: any transition out where all 3 edges share a char (and the resulting tuple isn't obviously infeasible)
exists(StateTuple prev |
prev = MkStateTuple(pivot, pumpEnd, _, _, _) and
hasCommonStep(prev, _, _, _, q1, q2, q3) and
FeasibleTuple::isFeasibleTuple(pivot, pumpEnd, q1, q2, q3)
)
}
/**
* A state in the product automaton.
* The product automaton contains 3-tuples of states.
* A state `(q1, q2, q3)` in the product automaton, that is reachable from `(pivot, pivot, pumpEnd)`.
*
* We lazily only construct those states that we are actually
* going to need.
* Either a start state `(pivot, pivot, succ)`, or a state
* Either a start state `(pivot, pivot, pumpEnd)`, or a state
* where there exists a transition from an already existing state.
*
* The exponential variant of this query (`js/redos`) uses an optimization
@@ -72,11 +82,13 @@ module Make<RegexTreeViewSig TreeImpl> {
* of the elements matter.
*/
class StateTuple extends TStateTuple {
State pivot;
State pumpEnd;
State q1;
State q2;
State q3;
StateTuple() { this = MkStateTuple(q1, q2, q3) }
StateTuple() { this = MkStateTuple(pivot, pumpEnd, q1, q2, q3) }
/**
* Gest a string representation of this tuple.
@@ -88,6 +100,39 @@ module Make<RegexTreeViewSig TreeImpl> {
*/
pragma[noinline]
predicate isTuple(State r1, State r2, State r3) { r1 = q1 and r2 = q2 and r3 = q3 }
/**
* Gets the first state of the tuple.
*/
State getFirst() { result = q1 }
/**
* Gets the second state of the tuple.
*/
State getSecond() { result = q2 }
/**
* Gets the third state of the tuple.
*/
State getThird() { result = q3 }
/**
* Gets the pivot state.
*/
State getPivot() { result = pivot }
/**
* Gets the pumpEnd state.
*/
State getPumpEnd() { result = pumpEnd }
/**
* Holds if the pivot state has the specified location.
* This location has been chosen arbitrarily, and is only useful for debugging.
*/
predicate hasLocationInfo(string file, int line, int column, int endLine, int endColumn) {
pivot.hasLocationInfo(file, line, column, endLine, endColumn)
}
}
/**
@@ -97,21 +142,36 @@ module Make<RegexTreeViewSig TreeImpl> {
*/
private module FeasibleTuple {
/**
* Holds if the tuple `(r1, r2, r3)` might be on path from a start-state to an end-state in the product automaton.
* Holds if the tuple `(r1, r2, r3)` might be on path from a start-state `(pivot, pivot, pumpEnd)` to an end-state `(pivot, pumpEnd, pumpEnd)` in the product automaton.
*/
pragma[inline]
predicate isFeasibleTuple(State r1, State r2, State r3) {
bindingset[pivot, pumpEnd, r1, r2, r3]
pragma[inline_late]
predicate isFeasibleTuple(State pivot, State pumpEnd, State r1, State r2, State r3) {
isStartLoops(pivot, pumpEnd) and
// r1 can reach the pivot state
reachesBeginning(r1, pivot) and
// r2 and r3 can reach the pumpEnd state
reachesEnd(r2, pumpEnd) and
reachesEnd(r3, pumpEnd) and
// The first element is either inside a repetition (or the start state itself)
isRepetitionOrStart(r1) and
// The last element is inside a repetition
stateInsideRepetition(r3) and
// The states are reachable in the NFA in the order r1 -> r2 -> r3
delta+(r1) = r2 and
delta+(r2) = r3 and
// The first element can reach a beginning (the "pivot" state in a `(pivot, succ)` pair).
canReachABeginning(r1) and
// The last element can reach a target (the "succ" state in a `(pivot, succ)` pair).
canReachATarget(r3)
delta+(r2) = r3
}
pragma[noinline]
private predicate reachesBeginning(State s, State pivot) {
isStartLoops(pivot, _) and
delta+(s) = pivot
}
pragma[noinline]
private predicate reachesEnd(State s, State pumpEnd) {
isStartLoops(_, pumpEnd) and
delta+(s) = pumpEnd
}
/**
@@ -129,36 +189,18 @@ module Make<RegexTreeViewSig TreeImpl> {
private predicate stateInsideRepetition(State s) {
s.getRepr().getParent*() instanceof InfiniteRepetitionQuantifier
}
/**
* Holds if there exists a path in the NFA from `s` to a "pivot" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachABeginning(State s) {
delta+(s) = any(State pivot | isStartLoops(pivot, _))
}
/**
* Holds if there exists a path in the NFA from `s` to a "succ" state
* (from a `(pivot, succ)` pair that starts the search).
*/
pragma[noinline]
private predicate canReachATarget(State s) {
delta+(s) = any(State succ | isStartLoops(_, succ))
}
}
/**
* Holds if `pivot` and `succ` are a pair of loops that could be the beginning of a quadratic blowup.
* Holds if `pivot` and `pumpEnd` are a pair of loops that could be the beginning of a quadratic blowup.
*
* There is a slight implementation difference compared to the paper: this predicate requires that `pivot != succ`.
* The case where `pivot = succ` causes exponential backtracking and is handled by the `js/redos` query.
* There is a slight implementation difference compared to the paper: this predicate requires that `pivot != pumpEnd`.
* The case where `pivot = pumpEnd` causes exponential backtracking and is handled by the `js/redos` query.
*/
predicate isStartLoops(State pivot, State succ) {
pivot != succ and
succ.getRepr() instanceof InfiniteRepetitionQuantifier and
delta+(pivot) = succ and
predicate isStartLoops(State pivot, State pumpEnd) {
pivot != pumpEnd and
pumpEnd.getRepr() instanceof InfiniteRepetitionQuantifier and
delta+(pivot) = pumpEnd and
(
pivot.getRepr() instanceof InfiniteRepetitionQuantifier
or
@@ -174,62 +216,25 @@ module Make<RegexTreeViewSig TreeImpl> {
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1`, `s2`, and `s3`, respectively.
* Where the edges `s1`, `s2`, and `s3` all share at least one character.
*/
pragma[nomagic]
private predicate stepHelper(
StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r
) {
private predicate step(StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r) {
exists(State r1, State r2, State r3 |
step(q, s1, s2, s3, r1, r2, r3) and r = MkStateTuple(r1, r2, r3)
hasCommonStep(q, s1, s2, s3, r1, r2, r3) and
r =
MkStateTuple(pragma[only_bind_out](q.getPivot()), pragma[only_bind_out](q.getPumpEnd()),
pragma[only_bind_out](r1), pragma[only_bind_out](r2), pragma[only_bind_out](r3))
)
}
/**
* Holds if there are transitions from the components of `q` to the corresponding
* components of `r` labelled with `s1`, `s2`, and `s3`, respectively.
*
* Additionally, a heuristic is used to avoid blowups in the case of complex regexps.
* For regular expressions with more than 100 states, we only look at all the characters
* for the transitions out of `q` and only consider transitions that use the lexicographically
* smallest character.
*/
pragma[noinline]
predicate step(StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r) {
stepHelper(q, s1, s2, s3, r) and
(
countStates(any(State s | q.isTuple(s, _, _)).getRepr().getRootTerm()) < 100
or
// arbitrarily pick an edge out of `q` for complex regexps. This is a heuristic to avoid potential blowups.
exists(string char |
char =
min(string str, InputSymbol x1, InputSymbol x2, InputSymbol x3 |
stepHelper(q, x1, x2, x3, _) and str = getAStepChar(x1, x2, x3)
|
str
) and
char = getAStepChar(s1, s2, s3)
)
)
}
// specialized version of `getAThreewayIntersect` to be used in `step` above.
pragma[noinline]
private string getAStepChar(InputSymbol s1, InputSymbol s2, InputSymbol s3) {
stepHelper(_, s1, s2, s3, _) and result = getAThreewayIntersect(s1, s2, s3)
}
/** Gets the number of states in the NFA for `root`. This is used to determine a complexity metric used in the `step` predicate above. */
private int countStates(RegExpTerm root) {
root.isRootTerm() and
result = count(State s | s.getRepr().getRootTerm() = root)
}
/**
* Holds if there are transitions from the components of `q` to `r1`, `r2`, and `r3
* labelled with `s1`, `s2`, and `s3`, respectively.
* Where `s1`, `s2`, and `s3` all share at least one character.
*/
pragma[noopt]
predicate step(
predicate hasCommonStep(
StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, State r1, State r2, State r3
) {
exists(State q1, State q2, State q3 | q.isTuple(q1, q2, q3) |
@@ -264,41 +269,59 @@ module Make<RegexTreeViewSig TreeImpl> {
result = [min(intersect(a, b)), max(intersect(a, b))]
}
private newtype TTrace =
Nil() or
Step(InputSymbol s1, InputSymbol s2, InputSymbol s3, TTrace t) {
isReachableFromStartTuple(_, _, t, s1, s2, s3, _, _)
}
/**
* A list of tuples of input symbols that describe a path in the product automaton
* starting from some start state.
*/
class Trace extends TTrace {
/**
* Gets a string representation of this Trace that can be used for debug purposes.
*/
string toString() {
this = Nil() and result = "Nil()"
or
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace t | this = Step(s1, s2, s3, t) |
result = "Step(" + s1 + ", " + s2 + ", " + s3 + ", " + t + ")"
)
}
/** Gets a tuple reachable in a forwards exploratory search from the start state `(pivot, pivot, pivot)`. */
private StateTuple getReachableFromStartStateForwards(State pivot, State pumpEnd) {
// base case.
isStartLoops(pivot, pumpEnd) and
result = MkStateTuple(pivot, pumpEnd, pivot, pivot, pumpEnd)
or
// recursive case
exists(StateTuple p |
p = getReachableFromStartStateForwards(pivot, pumpEnd) and
step(p, _, _, _, result)
)
}
/**
* Holds if there exists a transition from `r` to `q` in the product automaton.
* Gets a state tuple that can reach the end state `(pivot, pumpEnd, pumpEnd)`, found via a backwards exploratory search.
* Where the end state was reachable from a forwards search from the start state `(pivot, pivot, pumpEnd)`.
* The resulting tuples are exactly those that are on a path from the start state to the end state.
*/
private StateTuple getARelevantStateTuple(State pivot, State pumpEnd) {
// base case.
isStartLoops(pivot, pumpEnd) and
result = MkStateTuple(pivot, pumpEnd, pivot, pumpEnd, pumpEnd) and
result = getReachableFromStartStateForwards(pivot, pumpEnd)
or
// recursive case
exists(StateTuple p |
p = getARelevantStateTuple(pivot, pumpEnd) and
step(result, _, _, _, p) and
pragma[only_bind_out](result) = getReachableFromStartStateForwards(pivot, pumpEnd) // was reachable in the forwards pass.
)
}
/**
* Holds if there exists a transition from `src` to `dst` in the product automaton.
* Where `src` and `dst` are both on a path from a start state to an end state.
* Notice that the arguments are flipped, and thus the direction is backwards.
*/
pragma[noinline]
predicate tupleDeltaBackwards(StateTuple q, StateTuple r) { step(r, _, _, _, q) }
predicate tupleDeltaBackwards(StateTuple dst, StateTuple src) {
step(src, _, _, _, dst) and
// `step` ensures that `src` and `dst` have the same pivot and pumpEnd.
src = getARelevantStateTuple(_, _) and
dst = getARelevantStateTuple(_, _)
}
/**
* Holds if `tuple` is an end state in our search.
* That means there exists a pair of loops `(pivot, succ)` such that `tuple = (pivot, succ, succ)`.
* Holds if `tuple` is an end state in our search, and `tuple` is on a path from a start state to an end state.
* That means there exists a pair of loops `(pivot, pumpEnd)` such that `tuple = (pivot, pumpEnd, pumpEnd)`.
*/
predicate isEndTuple(StateTuple tuple) { tuple = getAnEndTuple(_, _) }
predicate isEndTuple(StateTuple tuple) {
tuple = getEndTuple(_, _) and
tuple = getARelevantStateTuple(_, _)
}
/**
* Gets the minimum length of a path from `r` to some an end state `end`.
@@ -311,72 +334,138 @@ module Make<RegexTreeViewSig TreeImpl> {
shortestDistances(isEndTuple/1, tupleDeltaBackwards/2)(end, r, result)
/**
* Holds if there exists a pair of repetitions `(pivot, succ)` in the regular expression such that:
* `tuple` is reachable from `(pivot, pivot, succ)` in the product automaton,
* and there is a distance of `dist` from `tuple` to the nearest end-tuple `(pivot, succ, succ)`,
* and a path from a start-state to `tuple` follows the transitions in `trace`.
* Holds if there is a step from `q` to `r` in the product automaton labeled with `s1`, `s2`, and `s3`.
* Where the step is on a path from a start state to an end state.
*/
private predicate isReachableFromStartTuple(
State pivot, State succ, StateTuple tuple, Trace trace, int dist
private predicate isStepOnPath(
StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r
) {
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3, Trace v |
isReachableFromStartTuple(pivot, succ, v, s1, s2, s3, tuple, dist) and
trace = Step(s1, s2, s3, v)
)
}
private predicate isReachableFromStartTuple(
State pivot, State succ, Trace trace, InputSymbol s1, InputSymbol s2, InputSymbol s3,
StateTuple tuple, int dist
) {
// base case.
isStartLoops(pivot, succ) and
step(MkStateTuple(pivot, pivot, succ), s1, s2, s3, tuple) and
tuple = MkStateTuple(_, _, _) and
trace = Nil() and
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ))
or
// recursive case
exists(StateTuple p |
isReachableFromStartTuple(pivot, succ, p, trace, dist + 1) and
dist = distBackFromEnd(tuple, MkStateTuple(pivot, succ, succ)) and
step(p, s1, s2, s3, tuple)
step(q, s1, s2, s3, r) and
exists(State pivot, State pumpEnd, StateTuple end |
end = MkStateTuple(pivot, pumpEnd, pivot, pumpEnd, pumpEnd) and
pragma[only_bind_out](distBackFromEnd(q, end)) =
pragma[only_bind_out](distBackFromEnd(r, end)) + 1
)
}
/**
* Gets the tuple `(pivot, succ, succ)` from the product automaton.
* Gets a unique number for a `state`.
* Is used to create an ordering of states and tuples of states.
*/
StateTuple getAnEndTuple(State pivot, State succ) {
isStartLoops(pivot, succ) and
result = MkStateTuple(pivot, succ, succ)
private int rankState(State state) {
state =
rank[result](State s, int startLine, int endLine, int startColumn, int endColumn |
exists(StateTuple tuple | tuple = getARelevantStateTuple(_, _) |
s = [tuple.getFirst(), tuple.getSecond(), tuple.getThird()] and
s.getRepr().hasLocationInfo(_, startLine, startColumn, endLine, endColumn)
)
|
s order by startLine, startColumn, endLine, endColumn
)
}
/**
* Holds if there is a step from `q` to `r` in the product automaton labeled with `s1`, `s2`, and `s3`.
* Where the step is on a path from a start state to an end state.
* And the step is a uniquely chosen step from out of `q`.
*/
pragma[nomagic]
private predicate isUniqueMinStepOnPath(
StateTuple q, InputSymbol s1, InputSymbol s2, InputSymbol s3, StateTuple r
) {
isStepOnPath(q, s1, s2, s3, r) and
r =
min(StateTuple cand |
isStepOnPath(q, _, _, _, cand)
|
cand
order by
rankState(cand.getFirst()), rankState(cand.getSecond()), rankState(cand.getThird())
)
}
private newtype TTrace =
Nil(State pivot, State pumpEnd) {
isStartLoops(pivot, pumpEnd) and
getStartTuple(pivot, pumpEnd) = getARelevantStateTuple(pivot, pumpEnd)
} or
Step(TTrace prev, StateTuple nextTuple) {
exists(StateTuple prevTuple |
exists(State pivot, State pumpEnd |
prev = Nil(pivot, pumpEnd) and
prevTuple = getStartTuple(pivot, pumpEnd)
)
or
prev = Step(_, prevTuple)
|
isUniqueMinStepOnPath(prevTuple, _, _, _, nextTuple)
)
}
/**
* A list of tuples of input symbols that describe a path in the product automaton
* starting from a start state `(pivot, pivot, pumpEnd)`.
*/
class Trace extends TTrace {
/**
* Gets a string representation of this Trace that can be used for debug purposes.
*/
string toString() { result = "a trace" }
/** Gets a trace where the head has been removed. */
Trace getPrev() { this = Step(result, _) }
/** Gets the tuple at the head of this trace. */
StateTuple getTuple() {
this = Step(_, result)
or
exists(State prev, State pumpEnd |
this = Nil(prev, pumpEnd) and
result = getStartTuple(prev, pumpEnd)
)
}
}
/**
* Gets the tuple `(pivot, pumpEnd, pumpEnd)` from the product automaton.
*/
StateTuple getEndTuple(State pivot, State pumpEnd) {
isStartLoops(pivot, pumpEnd) and
result = MkStateTuple(pivot, pumpEnd, pivot, pumpEnd, pumpEnd)
}
/**
* Gets the tuple `(pivot, pivot, pumpEnd)` from the product automaton.
*/
StateTuple getStartTuple(State pivot, State pumpEnd) {
isStartLoops(pivot, pumpEnd) and
result = MkStateTuple(pivot, pumpEnd, pivot, pivot, pumpEnd)
}
/** An implementation of a chain containing chars for use by `Concretizer`. */
private module CharTreeImpl implements CharTree {
class CharNode = Trace;
CharNode getPrev(CharNode t) { t = Step(_, _, _, result) }
CharNode getPrev(CharNode t) { result = t.getPrev() }
/** Holds if `n` is used in `isPumpable`. */
predicate isARelevantEnd(CharNode n) {
exists(State pivot, State succ |
isReachableFromStartTuple(pivot, succ, getAnEndTuple(pivot, succ), n, _)
)
}
predicate isARelevantEnd(CharNode n) { n.getTuple() = getEndTuple(_, _) }
string getChar(CharNode t) {
exists(InputSymbol s1, InputSymbol s2, InputSymbol s3 | t = Step(s1, s2, s3, _) |
result = getAThreewayIntersect(s1, s2, s3)
)
result =
min(string c, InputSymbol s1, InputSymbol s2, InputSymbol s3 |
isUniqueMinStepOnPath(t.getPrev().getTuple(), s1, s2, s3, t.getTuple()) and
c = getAThreewayIntersect(s1, s2, s3)
|
c
)
}
}
/**
* Holds if matching repetitions of `pump` can:
* 1) Transition from `pivot` back to `pivot`.
* 2) Transition from `pivot` to `succ`.
* 3) Transition from `succ` to `succ`.
* 2) Transition from `pivot` to `pumpEnd`.
* 3) Transition from `pumpEnd` to `pumpEnd`.
*
* From theorem 3 in the paper linked in the top of this file we can therefore conclude that
* the regular expression has polynomial backtracking - if a rejecting suffix exists.
@@ -384,10 +473,10 @@ module Make<RegexTreeViewSig TreeImpl> {
* This predicate is used by `SuperLinearReDoSConfiguration`, and the final results are
* available in the `hasReDoSResult` predicate.
*/
predicate isPumpable(State pivot, State succ, string pump) {
predicate isPumpable(State pivot, State pumpEnd, string pump) {
exists(StateTuple q, Trace t |
isReachableFromStartTuple(pivot, succ, q, t, _) and
q = getAnEndTuple(pivot, succ) and
q = getEndTuple(pivot, pumpEnd) and
q = t.getTuple() and
pump = Concretizer<CharTreeImpl>::concretize(t)
)
}