Merge pull request #10062 from erik-krogh/redosPrefix

JS: use the shared regular expression libraries in `js/case-sensitive-middleware-path`
This commit is contained in:
Erik Krogh Kristensen
2022-08-25 12:57:16 +02:00
committed by GitHub
7 changed files with 606 additions and 485 deletions

View File

@@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm {
* Holds if this root term is relevant to the ReDoS analysis.
*/
predicate isRelevant() {
// there is at least one repetition
getRoot(any(InfiniteRepetitionQuantifier q)) = this and
// is actually used as a RegExp
this.isUsedAsRegExp() and
// not excluded for library specific reasons
@@ -877,6 +875,101 @@ predicate isStartState(State state) {
*/
signature predicate isCandidateSig(State state, string pump);
/**
* Holds if `state` is a candidate for ReDoS.
*/
signature predicate isCandidateSig(State state);
/**
* Predicates for constructing a prefix string that leads to a given state.
*/
module PrefixConstruction<isCandidateSig/1 isCandidate> {
/**
* Holds if `state` is the textually last start state for the regular expression.
*/
private predicate lastStartState(State state) {
exists(RegExpRoot root |
state =
max(State s, Location l |
s = stateInRelevantRegexp() and
isStartState(s) and
getRoot(s.getRepr()) = root and
l = s.getRepr().getLocation()
|
s
order by
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
l.getEndLine()
)
)
}
/**
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
*/
private predicate existsTransition(State a, State b) { delta(a, _, b) }
/**
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
*/
int prefixLength(State start, State state) =
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
/**
* Gets the minimum number of transitions it takes to reach `state` from the start state.
*/
private int lengthFromStart(State state) { result = prefixLength(_, state) }
/**
* Gets a string for which the regular expression will reach `state`.
*
* Has at most one result for any given `state`.
* This predicate will not always have a result even if there is a ReDoS issue in
* the regular expression.
*/
string prefix(State state) {
lastStartState(state) and
result = ""
or
// the search stops past the last redos candidate state.
lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
exists(State prev |
// select a unique predecessor (by an arbitrary measure)
prev =
min(State s, Location loc |
lengthFromStart(s) = lengthFromStart(state) - 1 and
loc = s.getRepr().getLocation() and
delta(s, _, state)
|
s
order by
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
s.getRepr().toString()
)
|
// greedy search for the shortest prefix
result = prefix(prev) and delta(prev, Epsilon(), state)
or
not delta(prev, Epsilon(), state) and
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
)
}
/**
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
*/
private string getCanonicalEdgeChar(State prev, State next) {
result =
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
}
/** Gets a state within a regular expression that contains a candidate state. */
pragma[noinline]
State stateInRelevantRegexp() {
exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
}
}
/**
* A module for pruning candidate ReDoS states.
* The candidates are specified by the `isCandidate` signature predicate.
@@ -910,95 +1003,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
/** Gets a state that can reach the `accept-any` state using only epsilon steps. */
private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
/**
* Predicates for constructing a prefix string that leads to a given state.
*/
private module PrefixConstruction {
/**
* Holds if `state` is the textually last start state for the regular expression.
*/
private predicate lastStartState(State state) {
exists(RegExpRoot root |
state =
max(State s, Location l |
s = stateInPumpableRegexp() and
isStartState(s) and
getRoot(s.getRepr()) = root and
l = s.getRepr().getLocation()
|
s
order by
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
l.getEndLine()
)
)
}
predicate isCandidateState(State s) { isReDoSCandidate(s, _) }
/**
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
*/
private predicate existsTransition(State a, State b) { delta(a, _, b) }
/**
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
*/
int prefixLength(State start, State state) =
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
/**
* Gets the minimum number of transitions it takes to reach `state` from the start state.
*/
private int lengthFromStart(State state) { result = prefixLength(_, state) }
/**
* Gets a string for which the regular expression will reach `state`.
*
* Has at most one result for any given `state`.
* This predicate will not always have a result even if there is a ReDoS issue in
* the regular expression.
*/
string prefix(State state) {
lastStartState(state) and
result = ""
or
// the search stops past the last redos candidate state.
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
exists(State prev |
// select a unique predecessor (by an arbitrary measure)
prev =
min(State s, Location loc |
lengthFromStart(s) = lengthFromStart(state) - 1 and
loc = s.getRepr().getLocation() and
delta(s, _, state)
|
s
order by
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
s.getRepr().toString()
)
|
// greedy search for the shortest prefix
result = prefix(prev) and delta(prev, Epsilon(), state)
or
not delta(prev, Epsilon(), state) and
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
)
}
/**
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
*/
private string getCanonicalEdgeChar(State prev, State next) {
result =
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
}
/** Gets a state within a regular expression that has a pumpable state. */
pragma[noinline]
State stateInPumpableRegexp() {
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
}
}
import PrefixConstruction<isCandidateState/1> as Prefix
/**
* Predicates for testing the presence of a rejecting suffix.
@@ -1018,8 +1025,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
*/
private module SuffixConstruction {
import PrefixConstruction
/**
* Holds if all states reachable from `fork` by repeating `w`
* are likely rejectable by appending some suffix.
@@ -1036,7 +1041,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
*/
pragma[noinline]
private predicate isLikelyRejectable(State s) {
s = stateInPumpableRegexp() and
s = Prefix::stateInRelevantRegexp() and
(
// exists a reject edge with some char.
hasRejectEdge(s)
@@ -1052,7 +1057,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
*/
predicate isRejectState(State s) {
s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_)
}
/**
@@ -1060,7 +1065,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
*/
pragma[noopt]
predicate hasEdgeToLikelyRejectable(State s) {
s = stateInPumpableRegexp() and
s = Prefix::stateInRelevantRegexp() and
// all edges (at least one) with some char leads to another state that is rejectable.
// the `next` states might not share a common suffix, which can cause FPs.
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
@@ -1076,7 +1081,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
*/
pragma[noinline]
private string hasEdgeToLikelyRejectableHelper(State s) {
s = stateInPumpableRegexp() and
s = Prefix::stateInRelevantRegexp() and
not hasRejectEdge(s) and
not isRejectState(s) and
deltaClosedChar(s, result, _)
@@ -1088,8 +1093,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
* `prev` to `next` that the character symbol `char`.
*/
predicate deltaClosedChar(State prev, string char, State next) {
prev = stateInPumpableRegexp() and
next = stateInPumpableRegexp() and
prev = Prefix::stateInRelevantRegexp() and
next = Prefix::stateInRelevantRegexp() and
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
}
@@ -1099,18 +1104,28 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
result = getAnInputSymbolMatching(char)
}
pragma[noinline]
RegExpRoot relevantRoot() {
exists(RegExpTerm term, State s |
s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm()
)
}
/**
* Gets a char used for finding possible suffixes inside `root`.
*/
pragma[noinline]
private string relevant(RegExpRoot root) {
exists(ascii(result)) and exists(root)
or
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
or
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
result = ["|", "\n", "Z"] and exists(root)
root = relevantRoot() and
(
exists(ascii(result)) and exists(root)
or
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
or
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
result = ["|", "\n", "Z"] and exists(root)
)
}
/**
@@ -1208,12 +1223,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
isReDoSAttackable(t, pump, s) and
(
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
not PrefixConstruction::prefix(s) = ""
prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
not Prefix::prefix(s) = ""
or
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
Prefix::prefix(s) = "" and prefixMsg = ""
or
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
not exists(Prefix::prefix(s)) and prefixMsg = ""
)
}