mirror of
https://github.com/github/codeql.git
synced 2026-04-30 11:15:13 +02:00
Merge pull request #10062 from erik-krogh/redosPrefix
JS: use the shared regular expression libraries in `js/case-sensitive-middleware-path`
This commit is contained in:
@@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm {
|
||||
* Holds if this root term is relevant to the ReDoS analysis.
|
||||
*/
|
||||
predicate isRelevant() {
|
||||
// there is at least one repetition
|
||||
getRoot(any(InfiniteRepetitionQuantifier q)) = this and
|
||||
// is actually used as a RegExp
|
||||
this.isUsedAsRegExp() and
|
||||
// not excluded for library specific reasons
|
||||
@@ -877,6 +875,101 @@ predicate isStartState(State state) {
|
||||
*/
|
||||
signature predicate isCandidateSig(State state, string pump);
|
||||
|
||||
/**
|
||||
* Holds if `state` is a candidate for ReDoS.
|
||||
*/
|
||||
signature predicate isCandidateSig(State state);
|
||||
|
||||
/**
|
||||
* Predicates for constructing a prefix string that leads to a given state.
|
||||
*/
|
||||
module PrefixConstruction<isCandidateSig/1 isCandidate> {
|
||||
/**
|
||||
* Holds if `state` is the textually last start state for the regular expression.
|
||||
*/
|
||||
private predicate lastStartState(State state) {
|
||||
exists(RegExpRoot root |
|
||||
state =
|
||||
max(State s, Location l |
|
||||
s = stateInRelevantRegexp() and
|
||||
isStartState(s) and
|
||||
getRoot(s.getRepr()) = root and
|
||||
l = s.getRepr().getLocation()
|
||||
|
|
||||
s
|
||||
order by
|
||||
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
|
||||
l.getEndLine()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
|
||||
*/
|
||||
private predicate existsTransition(State a, State b) { delta(a, _, b) }
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
|
||||
*/
|
||||
int prefixLength(State start, State state) =
|
||||
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the start state.
|
||||
*/
|
||||
private int lengthFromStart(State state) { result = prefixLength(_, state) }
|
||||
|
||||
/**
|
||||
* Gets a string for which the regular expression will reach `state`.
|
||||
*
|
||||
* Has at most one result for any given `state`.
|
||||
* This predicate will not always have a result even if there is a ReDoS issue in
|
||||
* the regular expression.
|
||||
*/
|
||||
string prefix(State state) {
|
||||
lastStartState(state) and
|
||||
result = ""
|
||||
or
|
||||
// the search stops past the last redos candidate state.
|
||||
lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
|
||||
exists(State prev |
|
||||
// select a unique predecessor (by an arbitrary measure)
|
||||
prev =
|
||||
min(State s, Location loc |
|
||||
lengthFromStart(s) = lengthFromStart(state) - 1 and
|
||||
loc = s.getRepr().getLocation() and
|
||||
delta(s, _, state)
|
||||
|
|
||||
s
|
||||
order by
|
||||
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
|
||||
s.getRepr().toString()
|
||||
)
|
||||
|
|
||||
// greedy search for the shortest prefix
|
||||
result = prefix(prev) and delta(prev, Epsilon(), state)
|
||||
or
|
||||
not delta(prev, Epsilon(), state) and
|
||||
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
|
||||
*/
|
||||
private string getCanonicalEdgeChar(State prev, State next) {
|
||||
result =
|
||||
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
|
||||
}
|
||||
|
||||
/** Gets a state within a regular expression that contains a candidate state. */
|
||||
pragma[noinline]
|
||||
State stateInRelevantRegexp() {
|
||||
exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A module for pruning candidate ReDoS states.
|
||||
* The candidates are specified by the `isCandidate` signature predicate.
|
||||
@@ -910,95 +1003,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
/** Gets a state that can reach the `accept-any` state using only epsilon steps. */
|
||||
private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
|
||||
|
||||
/**
|
||||
* Predicates for constructing a prefix string that leads to a given state.
|
||||
*/
|
||||
private module PrefixConstruction {
|
||||
/**
|
||||
* Holds if `state` is the textually last start state for the regular expression.
|
||||
*/
|
||||
private predicate lastStartState(State state) {
|
||||
exists(RegExpRoot root |
|
||||
state =
|
||||
max(State s, Location l |
|
||||
s = stateInPumpableRegexp() and
|
||||
isStartState(s) and
|
||||
getRoot(s.getRepr()) = root and
|
||||
l = s.getRepr().getLocation()
|
||||
|
|
||||
s
|
||||
order by
|
||||
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
|
||||
l.getEndLine()
|
||||
)
|
||||
)
|
||||
}
|
||||
predicate isCandidateState(State s) { isReDoSCandidate(s, _) }
|
||||
|
||||
/**
|
||||
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
|
||||
*/
|
||||
private predicate existsTransition(State a, State b) { delta(a, _, b) }
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
|
||||
*/
|
||||
int prefixLength(State start, State state) =
|
||||
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the start state.
|
||||
*/
|
||||
private int lengthFromStart(State state) { result = prefixLength(_, state) }
|
||||
|
||||
/**
|
||||
* Gets a string for which the regular expression will reach `state`.
|
||||
*
|
||||
* Has at most one result for any given `state`.
|
||||
* This predicate will not always have a result even if there is a ReDoS issue in
|
||||
* the regular expression.
|
||||
*/
|
||||
string prefix(State state) {
|
||||
lastStartState(state) and
|
||||
result = ""
|
||||
or
|
||||
// the search stops past the last redos candidate state.
|
||||
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
|
||||
exists(State prev |
|
||||
// select a unique predecessor (by an arbitrary measure)
|
||||
prev =
|
||||
min(State s, Location loc |
|
||||
lengthFromStart(s) = lengthFromStart(state) - 1 and
|
||||
loc = s.getRepr().getLocation() and
|
||||
delta(s, _, state)
|
||||
|
|
||||
s
|
||||
order by
|
||||
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
|
||||
s.getRepr().toString()
|
||||
)
|
||||
|
|
||||
// greedy search for the shortest prefix
|
||||
result = prefix(prev) and delta(prev, Epsilon(), state)
|
||||
or
|
||||
not delta(prev, Epsilon(), state) and
|
||||
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
|
||||
*/
|
||||
private string getCanonicalEdgeChar(State prev, State next) {
|
||||
result =
|
||||
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
|
||||
}
|
||||
|
||||
/** Gets a state within a regular expression that has a pumpable state. */
|
||||
pragma[noinline]
|
||||
State stateInPumpableRegexp() {
|
||||
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
|
||||
}
|
||||
}
|
||||
import PrefixConstruction<isCandidateState/1> as Prefix
|
||||
|
||||
/**
|
||||
* Predicates for testing the presence of a rejecting suffix.
|
||||
@@ -1018,8 +1025,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
|
||||
*/
|
||||
private module SuffixConstruction {
|
||||
import PrefixConstruction
|
||||
|
||||
/**
|
||||
* Holds if all states reachable from `fork` by repeating `w`
|
||||
* are likely rejectable by appending some suffix.
|
||||
@@ -1036,7 +1041,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate isLikelyRejectable(State s) {
|
||||
s = stateInPumpableRegexp() and
|
||||
s = Prefix::stateInRelevantRegexp() and
|
||||
(
|
||||
// exists a reject edge with some char.
|
||||
hasRejectEdge(s)
|
||||
@@ -1052,7 +1057,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
|
||||
*/
|
||||
predicate isRejectState(State s) {
|
||||
s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
|
||||
s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1060,7 +1065,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
*/
|
||||
pragma[noopt]
|
||||
predicate hasEdgeToLikelyRejectable(State s) {
|
||||
s = stateInPumpableRegexp() and
|
||||
s = Prefix::stateInRelevantRegexp() and
|
||||
// all edges (at least one) with some char leads to another state that is rejectable.
|
||||
// the `next` states might not share a common suffix, which can cause FPs.
|
||||
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
|
||||
@@ -1076,7 +1081,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
*/
|
||||
pragma[noinline]
|
||||
private string hasEdgeToLikelyRejectableHelper(State s) {
|
||||
s = stateInPumpableRegexp() and
|
||||
s = Prefix::stateInRelevantRegexp() and
|
||||
not hasRejectEdge(s) and
|
||||
not isRejectState(s) and
|
||||
deltaClosedChar(s, result, _)
|
||||
@@ -1088,8 +1093,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
* `prev` to `next` that the character symbol `char`.
|
||||
*/
|
||||
predicate deltaClosedChar(State prev, string char, State next) {
|
||||
prev = stateInPumpableRegexp() and
|
||||
next = stateInPumpableRegexp() and
|
||||
prev = Prefix::stateInRelevantRegexp() and
|
||||
next = Prefix::stateInRelevantRegexp() and
|
||||
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
|
||||
}
|
||||
|
||||
@@ -1099,18 +1104,28 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
result = getAnInputSymbolMatching(char)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
RegExpRoot relevantRoot() {
|
||||
exists(RegExpTerm term, State s |
|
||||
s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a char used for finding possible suffixes inside `root`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
private string relevant(RegExpRoot root) {
|
||||
exists(ascii(result)) and exists(root)
|
||||
or
|
||||
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
|
||||
or
|
||||
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
|
||||
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
|
||||
result = ["|", "\n", "Z"] and exists(root)
|
||||
root = relevantRoot() and
|
||||
(
|
||||
exists(ascii(result)) and exists(root)
|
||||
or
|
||||
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
|
||||
or
|
||||
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
|
||||
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
|
||||
result = ["|", "\n", "Z"] and exists(root)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1208,12 +1223,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
|
||||
isReDoSAttackable(t, pump, s) and
|
||||
(
|
||||
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
|
||||
not PrefixConstruction::prefix(s) = ""
|
||||
prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
|
||||
not Prefix::prefix(s) = ""
|
||||
or
|
||||
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
|
||||
Prefix::prefix(s) = "" and prefixMsg = ""
|
||||
or
|
||||
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
|
||||
not exists(Prefix::prefix(s)) and prefixMsg = ""
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm {
|
||||
* Holds if this root term is relevant to the ReDoS analysis.
|
||||
*/
|
||||
predicate isRelevant() {
|
||||
// there is at least one repetition
|
||||
getRoot(any(InfiniteRepetitionQuantifier q)) = this and
|
||||
// is actually used as a RegExp
|
||||
this.isUsedAsRegExp() and
|
||||
// not excluded for library specific reasons
|
||||
@@ -877,6 +875,101 @@ predicate isStartState(State state) {
|
||||
*/
|
||||
signature predicate isCandidateSig(State state, string pump);
|
||||
|
||||
/**
|
||||
* Holds if `state` is a candidate for ReDoS.
|
||||
*/
|
||||
signature predicate isCandidateSig(State state);
|
||||
|
||||
/**
|
||||
* Predicates for constructing a prefix string that leads to a given state.
|
||||
*/
|
||||
module PrefixConstruction<isCandidateSig/1 isCandidate> {
|
||||
/**
|
||||
* Holds if `state` is the textually last start state for the regular expression.
|
||||
*/
|
||||
private predicate lastStartState(State state) {
|
||||
exists(RegExpRoot root |
|
||||
state =
|
||||
max(State s, Location l |
|
||||
s = stateInRelevantRegexp() and
|
||||
isStartState(s) and
|
||||
getRoot(s.getRepr()) = root and
|
||||
l = s.getRepr().getLocation()
|
||||
|
|
||||
s
|
||||
order by
|
||||
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
|
||||
l.getEndLine()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
|
||||
*/
|
||||
private predicate existsTransition(State a, State b) { delta(a, _, b) }
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
|
||||
*/
|
||||
int prefixLength(State start, State state) =
|
||||
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the start state.
|
||||
*/
|
||||
private int lengthFromStart(State state) { result = prefixLength(_, state) }
|
||||
|
||||
/**
|
||||
* Gets a string for which the regular expression will reach `state`.
|
||||
*
|
||||
* Has at most one result for any given `state`.
|
||||
* This predicate will not always have a result even if there is a ReDoS issue in
|
||||
* the regular expression.
|
||||
*/
|
||||
string prefix(State state) {
|
||||
lastStartState(state) and
|
||||
result = ""
|
||||
or
|
||||
// the search stops past the last redos candidate state.
|
||||
lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
|
||||
exists(State prev |
|
||||
// select a unique predecessor (by an arbitrary measure)
|
||||
prev =
|
||||
min(State s, Location loc |
|
||||
lengthFromStart(s) = lengthFromStart(state) - 1 and
|
||||
loc = s.getRepr().getLocation() and
|
||||
delta(s, _, state)
|
||||
|
|
||||
s
|
||||
order by
|
||||
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
|
||||
s.getRepr().toString()
|
||||
)
|
||||
|
|
||||
// greedy search for the shortest prefix
|
||||
result = prefix(prev) and delta(prev, Epsilon(), state)
|
||||
or
|
||||
not delta(prev, Epsilon(), state) and
|
||||
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
|
||||
*/
|
||||
private string getCanonicalEdgeChar(State prev, State next) {
|
||||
result =
|
||||
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
|
||||
}
|
||||
|
||||
/** Gets a state within a regular expression that contains a candidate state. */
|
||||
pragma[noinline]
|
||||
State stateInRelevantRegexp() {
|
||||
exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A module for pruning candidate ReDoS states.
|
||||
* The candidates are specified by the `isCandidate` signature predicate.
|
||||
@@ -910,95 +1003,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
/** Gets a state that can reach the `accept-any` state using only epsilon steps. */
|
||||
private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
|
||||
|
||||
/**
|
||||
* Predicates for constructing a prefix string that leads to a given state.
|
||||
*/
|
||||
private module PrefixConstruction {
|
||||
/**
|
||||
* Holds if `state` is the textually last start state for the regular expression.
|
||||
*/
|
||||
private predicate lastStartState(State state) {
|
||||
exists(RegExpRoot root |
|
||||
state =
|
||||
max(State s, Location l |
|
||||
s = stateInPumpableRegexp() and
|
||||
isStartState(s) and
|
||||
getRoot(s.getRepr()) = root and
|
||||
l = s.getRepr().getLocation()
|
||||
|
|
||||
s
|
||||
order by
|
||||
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
|
||||
l.getEndLine()
|
||||
)
|
||||
)
|
||||
}
|
||||
predicate isCandidateState(State s) { isReDoSCandidate(s, _) }
|
||||
|
||||
/**
|
||||
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
|
||||
*/
|
||||
private predicate existsTransition(State a, State b) { delta(a, _, b) }
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
|
||||
*/
|
||||
int prefixLength(State start, State state) =
|
||||
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the start state.
|
||||
*/
|
||||
private int lengthFromStart(State state) { result = prefixLength(_, state) }
|
||||
|
||||
/**
|
||||
* Gets a string for which the regular expression will reach `state`.
|
||||
*
|
||||
* Has at most one result for any given `state`.
|
||||
* This predicate will not always have a result even if there is a ReDoS issue in
|
||||
* the regular expression.
|
||||
*/
|
||||
string prefix(State state) {
|
||||
lastStartState(state) and
|
||||
result = ""
|
||||
or
|
||||
// the search stops past the last redos candidate state.
|
||||
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
|
||||
exists(State prev |
|
||||
// select a unique predecessor (by an arbitrary measure)
|
||||
prev =
|
||||
min(State s, Location loc |
|
||||
lengthFromStart(s) = lengthFromStart(state) - 1 and
|
||||
loc = s.getRepr().getLocation() and
|
||||
delta(s, _, state)
|
||||
|
|
||||
s
|
||||
order by
|
||||
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
|
||||
s.getRepr().toString()
|
||||
)
|
||||
|
|
||||
// greedy search for the shortest prefix
|
||||
result = prefix(prev) and delta(prev, Epsilon(), state)
|
||||
or
|
||||
not delta(prev, Epsilon(), state) and
|
||||
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
|
||||
*/
|
||||
private string getCanonicalEdgeChar(State prev, State next) {
|
||||
result =
|
||||
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
|
||||
}
|
||||
|
||||
/** Gets a state within a regular expression that has a pumpable state. */
|
||||
pragma[noinline]
|
||||
State stateInPumpableRegexp() {
|
||||
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
|
||||
}
|
||||
}
|
||||
import PrefixConstruction<isCandidateState/1> as Prefix
|
||||
|
||||
/**
|
||||
* Predicates for testing the presence of a rejecting suffix.
|
||||
@@ -1018,8 +1025,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
|
||||
*/
|
||||
private module SuffixConstruction {
|
||||
import PrefixConstruction
|
||||
|
||||
/**
|
||||
* Holds if all states reachable from `fork` by repeating `w`
|
||||
* are likely rejectable by appending some suffix.
|
||||
@@ -1036,7 +1041,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate isLikelyRejectable(State s) {
|
||||
s = stateInPumpableRegexp() and
|
||||
s = Prefix::stateInRelevantRegexp() and
|
||||
(
|
||||
// exists a reject edge with some char.
|
||||
hasRejectEdge(s)
|
||||
@@ -1052,7 +1057,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
|
||||
*/
|
||||
predicate isRejectState(State s) {
|
||||
s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
|
||||
s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1060,7 +1065,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
*/
|
||||
pragma[noopt]
|
||||
predicate hasEdgeToLikelyRejectable(State s) {
|
||||
s = stateInPumpableRegexp() and
|
||||
s = Prefix::stateInRelevantRegexp() and
|
||||
// all edges (at least one) with some char leads to another state that is rejectable.
|
||||
// the `next` states might not share a common suffix, which can cause FPs.
|
||||
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
|
||||
@@ -1076,7 +1081,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
*/
|
||||
pragma[noinline]
|
||||
private string hasEdgeToLikelyRejectableHelper(State s) {
|
||||
s = stateInPumpableRegexp() and
|
||||
s = Prefix::stateInRelevantRegexp() and
|
||||
not hasRejectEdge(s) and
|
||||
not isRejectState(s) and
|
||||
deltaClosedChar(s, result, _)
|
||||
@@ -1088,8 +1093,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
* `prev` to `next` that the character symbol `char`.
|
||||
*/
|
||||
predicate deltaClosedChar(State prev, string char, State next) {
|
||||
prev = stateInPumpableRegexp() and
|
||||
next = stateInPumpableRegexp() and
|
||||
prev = Prefix::stateInRelevantRegexp() and
|
||||
next = Prefix::stateInRelevantRegexp() and
|
||||
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
|
||||
}
|
||||
|
||||
@@ -1099,18 +1104,28 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
result = getAnInputSymbolMatching(char)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
RegExpRoot relevantRoot() {
|
||||
exists(RegExpTerm term, State s |
|
||||
s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a char used for finding possible suffixes inside `root`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
private string relevant(RegExpRoot root) {
|
||||
exists(ascii(result)) and exists(root)
|
||||
or
|
||||
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
|
||||
or
|
||||
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
|
||||
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
|
||||
result = ["|", "\n", "Z"] and exists(root)
|
||||
root = relevantRoot() and
|
||||
(
|
||||
exists(ascii(result)) and exists(root)
|
||||
or
|
||||
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
|
||||
or
|
||||
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
|
||||
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
|
||||
result = ["|", "\n", "Z"] and exists(root)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1208,12 +1223,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
|
||||
isReDoSAttackable(t, pump, s) and
|
||||
(
|
||||
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
|
||||
not PrefixConstruction::prefix(s) = ""
|
||||
prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
|
||||
not Prefix::prefix(s) = ""
|
||||
or
|
||||
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
|
||||
Prefix::prefix(s) = "" and prefixMsg = ""
|
||||
or
|
||||
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
|
||||
not exists(Prefix::prefix(s)) and prefixMsg = ""
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -20,55 +20,29 @@ string toOtherCase(string s) {
|
||||
if s.regexpMatch(".*[a-z].*") then result = s.toUpperCase() else result = s.toLowerCase()
|
||||
}
|
||||
|
||||
RegExpCharacterClass getEnclosingClass(RegExpTerm term) {
|
||||
term = result.getAChild()
|
||||
or
|
||||
term = result.getAChild().(RegExpRange).getAChild()
|
||||
}
|
||||
import semmle.javascript.security.regexp.NfaUtils as NfaUtils
|
||||
|
||||
/**
|
||||
* Holds if `term` seems to distinguish between upper and lower case letters, assuming the `i` flag is not present.
|
||||
*/
|
||||
pragma[inline]
|
||||
predicate isLikelyCaseSensitiveRegExp(RegExpTerm term) {
|
||||
exists(RegExpConstant const |
|
||||
const = term.getAChild*() and
|
||||
const.getValue().regexpMatch(".*[a-zA-Z].*") and
|
||||
not getEnclosingClass(const).getAChild().(RegExpConstant).getValue() =
|
||||
toOtherCase(const.getValue()) and
|
||||
not const.getParent*() instanceof RegExpNegativeLookahead and
|
||||
not const.getParent*() instanceof RegExpNegativeLookbehind
|
||||
/** Holds if `s` is a relevant regexp term were we want to compute a string that matches the term (for `getCaseSensitiveBypassExample`). */
|
||||
predicate isCand(NfaUtils::State s) {
|
||||
s.getRepr() instanceof NfaUtils::RegExpRoot and
|
||||
exists(DataFlow::RegExpCreationNode creation |
|
||||
isCaseSensitiveMiddleware(_, creation, _) and
|
||||
s.getRepr().getRootTerm() = creation.getRoot()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a string matched by `term`, or part of such a string.
|
||||
*/
|
||||
import NfaUtils::PrefixConstruction<isCand/1> as Prefix
|
||||
|
||||
/** Gets a string matched by `term`. */
|
||||
string getExampleString(RegExpTerm term) {
|
||||
result = term.getAMatchedString()
|
||||
or
|
||||
// getAMatchedString does not recurse into sequences. Perform one step manually.
|
||||
exists(RegExpSequence seq | seq = term |
|
||||
result =
|
||||
strictconcat(RegExpTerm child, int i, string text |
|
||||
child = seq.getChild(i) and
|
||||
(
|
||||
text = child.getAMatchedString()
|
||||
or
|
||||
not exists(child.getAMatchedString()) and
|
||||
text = ""
|
||||
)
|
||||
|
|
||||
text order by i
|
||||
)
|
||||
)
|
||||
result = Prefix::prefix(any(NfaUtils::State s | s.getRepr() = term))
|
||||
}
|
||||
|
||||
string getCaseSensitiveBypassExample(RegExpTerm term) {
|
||||
exists(string example |
|
||||
example = getExampleString(term) and
|
||||
result = toOtherCase(example) and
|
||||
result != example // getting an example string is approximate; ensure we got a proper case-change example
|
||||
exists(string byPassExample |
|
||||
byPassExample = getExampleString(term) and
|
||||
result = toOtherCase(byPassExample) and
|
||||
result != byPassExample // getting an byPassExample string is approximate; ensure we got a proper case-change byPassExample
|
||||
)
|
||||
}
|
||||
|
||||
@@ -89,7 +63,6 @@ predicate isCaseSensitiveMiddleware(
|
||||
) and
|
||||
arg = call.getArgument(0) and
|
||||
regexp.getAReference().flowsTo(arg) and
|
||||
isLikelyCaseSensitiveRegExp(regexp.getRoot()) and
|
||||
exists(string flags |
|
||||
flags = regexp.getFlags() and
|
||||
not RegExp::isIgnoreCase(flags)
|
||||
@@ -108,14 +81,64 @@ predicate isGuardedCaseInsensitiveEndpoint(
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an example path that will hit `endpoint`.
|
||||
* Query parameters (e.g. the ":param" in "/foo/:param") have been replaced with example values.
|
||||
*/
|
||||
string getAnEndpointExample(Routing::RouteSetup endpoint) {
|
||||
exists(string raw |
|
||||
raw = endpoint.getRelativePath() and
|
||||
result = raw.regexpReplaceAll(":\\w+\\b|\\*", ["a", "1"])
|
||||
)
|
||||
}
|
||||
|
||||
import semmle.javascript.security.regexp.RegexpMatching as RegexpMatching
|
||||
|
||||
NfaUtils::RegExpRoot getARoot(DataFlow::RegExpCreationNode creator) {
|
||||
result.getRootTerm() = creator.getRoot()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the regexp matcher should test whether `root` matches `str`.
|
||||
* The result is used to test whether a case-sensitive bypass exists.
|
||||
*/
|
||||
predicate isMatchingCandidate(
|
||||
RegexpMatching::RootTerm root, string str, boolean ignorePrefix, boolean testWithGroups
|
||||
) {
|
||||
exists(
|
||||
Routing::RouteSetup middleware, Routing::RouteSetup endPoint,
|
||||
DataFlow::RegExpCreationNode regexp
|
||||
|
|
||||
isCaseSensitiveMiddleware(middleware, regexp, _) and
|
||||
isGuardedCaseInsensitiveEndpoint(endPoint, middleware)
|
||||
|
|
||||
root = regexp.getRoot() and
|
||||
exists(getCaseSensitiveBypassExample(getARoot(regexp))) and
|
||||
ignorePrefix = true and
|
||||
testWithGroups = false and
|
||||
str =
|
||||
[
|
||||
getCaseSensitiveBypassExample(getARoot(regexp)), getAnEndpointExample(endPoint),
|
||||
toOtherCase(getAnEndpointExample(endPoint))
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
import RegexpMatching::RegexpMatching<isMatchingCandidate/4> as Matcher
|
||||
|
||||
from
|
||||
DataFlow::RegExpCreationNode regexp, Routing::RouteSetup middleware, Routing::RouteSetup endpoint,
|
||||
DataFlow::Node arg, string example
|
||||
DataFlow::Node arg, string byPassExample, string endpointExample, string byPassEndPoint
|
||||
where
|
||||
isCaseSensitiveMiddleware(middleware, regexp, arg) and
|
||||
example = getCaseSensitiveBypassExample(regexp.getRoot()) and
|
||||
byPassExample = getCaseSensitiveBypassExample(getARoot(regexp)) and
|
||||
isGuardedCaseInsensitiveEndpoint(endpoint, middleware) and
|
||||
exists(endpoint.getRelativePath().toLowerCase().indexOf(example.toLowerCase()))
|
||||
// only report one example.
|
||||
endpointExample =
|
||||
min(string ex | ex = getAnEndpointExample(endpoint) and Matcher::matches(regexp.getRoot(), ex)) and
|
||||
not Matcher::matches(regexp.getRoot(), byPassExample) and
|
||||
byPassEndPoint = toOtherCase(endpointExample) and
|
||||
not Matcher::matches(regexp.getRoot(), byPassEndPoint)
|
||||
select arg,
|
||||
"This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '"
|
||||
+ example + "' will bypass the middleware.", regexp, "pattern", endpoint, "here"
|
||||
+ byPassEndPoint + "' will bypass the middleware.", regexp, "pattern", endpoint, "here"
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
| tst.js:8:9:8:19 | /\\/foo\\/.*/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/' will bypass the middleware. | tst.js:8:9:8:19 | /\\/foo\\/.*/ | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here |
|
||||
| tst.js:14:5:14:28 | new Reg ... (.*)?') | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO' will bypass the middleware. | tst.js:14:5:14:28 | new Reg ... (.*)?') | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here |
|
||||
| tst.js:41:9:41:25 | /\\/foo\\/([0-9]+)/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/' will bypass the middleware. | tst.js:41:9:41:25 | /\\/foo\\/([0-9]+)/ | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here |
|
||||
| tst.js:8:9:8:19 | /\\/foo\\/.*/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/1' will bypass the middleware. | tst.js:8:9:8:19 | /\\/foo\\/.*/ | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here |
|
||||
| tst.js:14:5:14:28 | new Reg ... (.*)?') | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/1' will bypass the middleware. | tst.js:14:5:14:28 | new Reg ... (.*)?') | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here |
|
||||
| tst.js:41:9:41:25 | /\\/foo\\/([0-9]+)/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/FOO/1' will bypass the middleware. | tst.js:41:9:41:25 | /\\/foo\\/([0-9]+)/ | pattern | tst.js:60:1:61:2 | app.get ... ware\\n}) | here |
|
||||
| tst.js:64:5:64:28 | new Reg ... (.*)?') | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/BAR/1' will bypass the middleware. | tst.js:64:5:64:28 | new Reg ... (.*)?') | pattern | tst.js:73:1:74:2 | app.get ... ware\\n}) | here |
|
||||
| tst.js:76:9:76:20 | /\\/baz\\/bla/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/BAZ/BLA' will bypass the middleware. | tst.js:76:9:76:20 | /\\/baz\\/bla/ | pattern | tst.js:77:1:79:2 | app.get ... });\\n}) | here |
|
||||
| tst.js:86:9:86:30 | /\\/[Bb] ... 3\\/[a]/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/BAZ3/A' will bypass the middleware. | tst.js:86:9:86:30 | /\\/[Bb] ... 3\\/[a]/ | pattern | tst.js:87:1:89:2 | app.get ... });\\n}) | here |
|
||||
| tst.js:91:9:91:40 | /\\/summ ... ntGame/ | This route uses a case-sensitive path $@, but is guarding a case-insensitive path $@. A path such as '/CURRENTGAME' will bypass the middleware. | tst.js:91:9:91:40 | /\\/summ ... ntGame/ | pattern | tst.js:93:1:95:2 | app.get ... O");\\n}) | here |
|
||||
|
||||
@@ -59,3 +59,37 @@ app.use(/\/foo\/([0-9]+)/i, (req, res) => { // OK - not middleware (also case in
|
||||
|
||||
app.get('/foo/:param', (req, res) => { // OK - not a middleware
|
||||
});
|
||||
|
||||
app.get(
|
||||
new RegExp('^/bar(.*)?'), // NOT OK - case sensitive
|
||||
unknown(),
|
||||
function(req, res, next) {
|
||||
if (req.params.blah) {
|
||||
next();
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
app.get('/bar/*', (req, res) => { // OK - not a middleware
|
||||
});
|
||||
|
||||
app.use(/\/baz\/bla/, unknown()); // NOT OK - case sensitive
|
||||
app.get('/baz/bla', (req, resp) => {
|
||||
resp.send({ test: 123 });
|
||||
});
|
||||
|
||||
app.use(/\/[Bb][Aa][Zz]2\/[aA]/, unknown()); // OK - not case sensitive
|
||||
app.get('/baz2/a', (req, resp) => {
|
||||
resp.send({ test: 123 });
|
||||
});
|
||||
|
||||
app.use(/\/[Bb][Aa][Zz]3\/[a]/, unknown()); // NOT OK - case sensitive
|
||||
app.get('/baz3/a', (req, resp) => {
|
||||
resp.send({ test: 123 });
|
||||
});
|
||||
|
||||
app.use(/\/summonerByName|\/currentGame/,apiLimit1, apiLimit2);
|
||||
|
||||
app.get('/currentGame', function (req, res) {
|
||||
res.send("FOO");
|
||||
});
|
||||
|
||||
@@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm {
|
||||
* Holds if this root term is relevant to the ReDoS analysis.
|
||||
*/
|
||||
predicate isRelevant() {
|
||||
// there is at least one repetition
|
||||
getRoot(any(InfiniteRepetitionQuantifier q)) = this and
|
||||
// is actually used as a RegExp
|
||||
this.isUsedAsRegExp() and
|
||||
// not excluded for library specific reasons
|
||||
@@ -877,6 +875,101 @@ predicate isStartState(State state) {
|
||||
*/
|
||||
signature predicate isCandidateSig(State state, string pump);
|
||||
|
||||
/**
|
||||
* Holds if `state` is a candidate for ReDoS.
|
||||
*/
|
||||
signature predicate isCandidateSig(State state);
|
||||
|
||||
/**
|
||||
* Predicates for constructing a prefix string that leads to a given state.
|
||||
*/
|
||||
module PrefixConstruction<isCandidateSig/1 isCandidate> {
|
||||
/**
|
||||
* Holds if `state` is the textually last start state for the regular expression.
|
||||
*/
|
||||
private predicate lastStartState(State state) {
|
||||
exists(RegExpRoot root |
|
||||
state =
|
||||
max(State s, Location l |
|
||||
s = stateInRelevantRegexp() and
|
||||
isStartState(s) and
|
||||
getRoot(s.getRepr()) = root and
|
||||
l = s.getRepr().getLocation()
|
||||
|
|
||||
s
|
||||
order by
|
||||
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
|
||||
l.getEndLine()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
|
||||
*/
|
||||
private predicate existsTransition(State a, State b) { delta(a, _, b) }
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
|
||||
*/
|
||||
int prefixLength(State start, State state) =
|
||||
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the start state.
|
||||
*/
|
||||
private int lengthFromStart(State state) { result = prefixLength(_, state) }
|
||||
|
||||
/**
|
||||
* Gets a string for which the regular expression will reach `state`.
|
||||
*
|
||||
* Has at most one result for any given `state`.
|
||||
* This predicate will not always have a result even if there is a ReDoS issue in
|
||||
* the regular expression.
|
||||
*/
|
||||
string prefix(State state) {
|
||||
lastStartState(state) and
|
||||
result = ""
|
||||
or
|
||||
// the search stops past the last redos candidate state.
|
||||
lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
|
||||
exists(State prev |
|
||||
// select a unique predecessor (by an arbitrary measure)
|
||||
prev =
|
||||
min(State s, Location loc |
|
||||
lengthFromStart(s) = lengthFromStart(state) - 1 and
|
||||
loc = s.getRepr().getLocation() and
|
||||
delta(s, _, state)
|
||||
|
|
||||
s
|
||||
order by
|
||||
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
|
||||
s.getRepr().toString()
|
||||
)
|
||||
|
|
||||
// greedy search for the shortest prefix
|
||||
result = prefix(prev) and delta(prev, Epsilon(), state)
|
||||
or
|
||||
not delta(prev, Epsilon(), state) and
|
||||
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
|
||||
*/
|
||||
private string getCanonicalEdgeChar(State prev, State next) {
|
||||
result =
|
||||
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
|
||||
}
|
||||
|
||||
/** Gets a state within a regular expression that contains a candidate state. */
|
||||
pragma[noinline]
|
||||
State stateInRelevantRegexp() {
|
||||
exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A module for pruning candidate ReDoS states.
|
||||
* The candidates are specified by the `isCandidate` signature predicate.
|
||||
@@ -910,95 +1003,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
/** Gets a state that can reach the `accept-any` state using only epsilon steps. */
|
||||
private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
|
||||
|
||||
/**
|
||||
* Predicates for constructing a prefix string that leads to a given state.
|
||||
*/
|
||||
private module PrefixConstruction {
|
||||
/**
|
||||
* Holds if `state` is the textually last start state for the regular expression.
|
||||
*/
|
||||
private predicate lastStartState(State state) {
|
||||
exists(RegExpRoot root |
|
||||
state =
|
||||
max(State s, Location l |
|
||||
s = stateInPumpableRegexp() and
|
||||
isStartState(s) and
|
||||
getRoot(s.getRepr()) = root and
|
||||
l = s.getRepr().getLocation()
|
||||
|
|
||||
s
|
||||
order by
|
||||
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
|
||||
l.getEndLine()
|
||||
)
|
||||
)
|
||||
}
|
||||
predicate isCandidateState(State s) { isReDoSCandidate(s, _) }
|
||||
|
||||
/**
|
||||
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
|
||||
*/
|
||||
private predicate existsTransition(State a, State b) { delta(a, _, b) }
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
|
||||
*/
|
||||
int prefixLength(State start, State state) =
|
||||
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the start state.
|
||||
*/
|
||||
private int lengthFromStart(State state) { result = prefixLength(_, state) }
|
||||
|
||||
/**
|
||||
* Gets a string for which the regular expression will reach `state`.
|
||||
*
|
||||
* Has at most one result for any given `state`.
|
||||
* This predicate will not always have a result even if there is a ReDoS issue in
|
||||
* the regular expression.
|
||||
*/
|
||||
string prefix(State state) {
|
||||
lastStartState(state) and
|
||||
result = ""
|
||||
or
|
||||
// the search stops past the last redos candidate state.
|
||||
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
|
||||
exists(State prev |
|
||||
// select a unique predecessor (by an arbitrary measure)
|
||||
prev =
|
||||
min(State s, Location loc |
|
||||
lengthFromStart(s) = lengthFromStart(state) - 1 and
|
||||
loc = s.getRepr().getLocation() and
|
||||
delta(s, _, state)
|
||||
|
|
||||
s
|
||||
order by
|
||||
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
|
||||
s.getRepr().toString()
|
||||
)
|
||||
|
|
||||
// greedy search for the shortest prefix
|
||||
result = prefix(prev) and delta(prev, Epsilon(), state)
|
||||
or
|
||||
not delta(prev, Epsilon(), state) and
|
||||
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
|
||||
*/
|
||||
private string getCanonicalEdgeChar(State prev, State next) {
|
||||
result =
|
||||
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
|
||||
}
|
||||
|
||||
/** Gets a state within a regular expression that has a pumpable state. */
|
||||
pragma[noinline]
|
||||
State stateInPumpableRegexp() {
|
||||
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
|
||||
}
|
||||
}
|
||||
import PrefixConstruction<isCandidateState/1> as Prefix
|
||||
|
||||
/**
|
||||
* Predicates for testing the presence of a rejecting suffix.
|
||||
@@ -1018,8 +1025,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
|
||||
*/
|
||||
private module SuffixConstruction {
|
||||
import PrefixConstruction
|
||||
|
||||
/**
|
||||
* Holds if all states reachable from `fork` by repeating `w`
|
||||
* are likely rejectable by appending some suffix.
|
||||
@@ -1036,7 +1041,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate isLikelyRejectable(State s) {
|
||||
s = stateInPumpableRegexp() and
|
||||
s = Prefix::stateInRelevantRegexp() and
|
||||
(
|
||||
// exists a reject edge with some char.
|
||||
hasRejectEdge(s)
|
||||
@@ -1052,7 +1057,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
|
||||
*/
|
||||
predicate isRejectState(State s) {
|
||||
s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
|
||||
s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1060,7 +1065,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
*/
|
||||
pragma[noopt]
|
||||
predicate hasEdgeToLikelyRejectable(State s) {
|
||||
s = stateInPumpableRegexp() and
|
||||
s = Prefix::stateInRelevantRegexp() and
|
||||
// all edges (at least one) with some char leads to another state that is rejectable.
|
||||
// the `next` states might not share a common suffix, which can cause FPs.
|
||||
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
|
||||
@@ -1076,7 +1081,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
*/
|
||||
pragma[noinline]
|
||||
private string hasEdgeToLikelyRejectableHelper(State s) {
|
||||
s = stateInPumpableRegexp() and
|
||||
s = Prefix::stateInRelevantRegexp() and
|
||||
not hasRejectEdge(s) and
|
||||
not isRejectState(s) and
|
||||
deltaClosedChar(s, result, _)
|
||||
@@ -1088,8 +1093,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
* `prev` to `next` that the character symbol `char`.
|
||||
*/
|
||||
predicate deltaClosedChar(State prev, string char, State next) {
|
||||
prev = stateInPumpableRegexp() and
|
||||
next = stateInPumpableRegexp() and
|
||||
prev = Prefix::stateInRelevantRegexp() and
|
||||
next = Prefix::stateInRelevantRegexp() and
|
||||
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
|
||||
}
|
||||
|
||||
@@ -1099,18 +1104,28 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
result = getAnInputSymbolMatching(char)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
RegExpRoot relevantRoot() {
|
||||
exists(RegExpTerm term, State s |
|
||||
s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a char used for finding possible suffixes inside `root`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
private string relevant(RegExpRoot root) {
|
||||
exists(ascii(result)) and exists(root)
|
||||
or
|
||||
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
|
||||
or
|
||||
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
|
||||
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
|
||||
result = ["|", "\n", "Z"] and exists(root)
|
||||
root = relevantRoot() and
|
||||
(
|
||||
exists(ascii(result)) and exists(root)
|
||||
or
|
||||
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
|
||||
or
|
||||
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
|
||||
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
|
||||
result = ["|", "\n", "Z"] and exists(root)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1208,12 +1223,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
|
||||
isReDoSAttackable(t, pump, s) and
|
||||
(
|
||||
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
|
||||
not PrefixConstruction::prefix(s) = ""
|
||||
prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
|
||||
not Prefix::prefix(s) = ""
|
||||
or
|
||||
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
|
||||
Prefix::prefix(s) = "" and prefixMsg = ""
|
||||
or
|
||||
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
|
||||
not exists(Prefix::prefix(s)) and prefixMsg = ""
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -93,8 +93,6 @@ class RegExpRoot extends RegExpTerm {
|
||||
* Holds if this root term is relevant to the ReDoS analysis.
|
||||
*/
|
||||
predicate isRelevant() {
|
||||
// there is at least one repetition
|
||||
getRoot(any(InfiniteRepetitionQuantifier q)) = this and
|
||||
// is actually used as a RegExp
|
||||
this.isUsedAsRegExp() and
|
||||
// not excluded for library specific reasons
|
||||
@@ -877,6 +875,101 @@ predicate isStartState(State state) {
|
||||
*/
|
||||
signature predicate isCandidateSig(State state, string pump);
|
||||
|
||||
/**
|
||||
* Holds if `state` is a candidate for ReDoS.
|
||||
*/
|
||||
signature predicate isCandidateSig(State state);
|
||||
|
||||
/**
|
||||
* Predicates for constructing a prefix string that leads to a given state.
|
||||
*/
|
||||
module PrefixConstruction<isCandidateSig/1 isCandidate> {
|
||||
/**
|
||||
* Holds if `state` is the textually last start state for the regular expression.
|
||||
*/
|
||||
private predicate lastStartState(State state) {
|
||||
exists(RegExpRoot root |
|
||||
state =
|
||||
max(State s, Location l |
|
||||
s = stateInRelevantRegexp() and
|
||||
isStartState(s) and
|
||||
getRoot(s.getRepr()) = root and
|
||||
l = s.getRepr().getLocation()
|
||||
|
|
||||
s
|
||||
order by
|
||||
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
|
||||
l.getEndLine()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
|
||||
*/
|
||||
private predicate existsTransition(State a, State b) { delta(a, _, b) }
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
|
||||
*/
|
||||
int prefixLength(State start, State state) =
|
||||
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the start state.
|
||||
*/
|
||||
private int lengthFromStart(State state) { result = prefixLength(_, state) }
|
||||
|
||||
/**
|
||||
* Gets a string for which the regular expression will reach `state`.
|
||||
*
|
||||
* Has at most one result for any given `state`.
|
||||
* This predicate will not always have a result even if there is a ReDoS issue in
|
||||
* the regular expression.
|
||||
*/
|
||||
string prefix(State state) {
|
||||
lastStartState(state) and
|
||||
result = ""
|
||||
or
|
||||
// the search stops past the last redos candidate state.
|
||||
lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
|
||||
exists(State prev |
|
||||
// select a unique predecessor (by an arbitrary measure)
|
||||
prev =
|
||||
min(State s, Location loc |
|
||||
lengthFromStart(s) = lengthFromStart(state) - 1 and
|
||||
loc = s.getRepr().getLocation() and
|
||||
delta(s, _, state)
|
||||
|
|
||||
s
|
||||
order by
|
||||
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
|
||||
s.getRepr().toString()
|
||||
)
|
||||
|
|
||||
// greedy search for the shortest prefix
|
||||
result = prefix(prev) and delta(prev, Epsilon(), state)
|
||||
or
|
||||
not delta(prev, Epsilon(), state) and
|
||||
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
|
||||
*/
|
||||
private string getCanonicalEdgeChar(State prev, State next) {
|
||||
result =
|
||||
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
|
||||
}
|
||||
|
||||
/** Gets a state within a regular expression that contains a candidate state. */
|
||||
pragma[noinline]
|
||||
State stateInRelevantRegexp() {
|
||||
exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A module for pruning candidate ReDoS states.
|
||||
* The candidates are specified by the `isCandidate` signature predicate.
|
||||
@@ -910,95 +1003,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
/** Gets a state that can reach the `accept-any` state using only epsilon steps. */
|
||||
private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
|
||||
|
||||
/**
|
||||
* Predicates for constructing a prefix string that leads to a given state.
|
||||
*/
|
||||
private module PrefixConstruction {
|
||||
/**
|
||||
* Holds if `state` is the textually last start state for the regular expression.
|
||||
*/
|
||||
private predicate lastStartState(State state) {
|
||||
exists(RegExpRoot root |
|
||||
state =
|
||||
max(State s, Location l |
|
||||
s = stateInPumpableRegexp() and
|
||||
isStartState(s) and
|
||||
getRoot(s.getRepr()) = root and
|
||||
l = s.getRepr().getLocation()
|
||||
|
|
||||
s
|
||||
order by
|
||||
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
|
||||
l.getEndLine()
|
||||
)
|
||||
)
|
||||
}
|
||||
predicate isCandidateState(State s) { isReDoSCandidate(s, _) }
|
||||
|
||||
/**
|
||||
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
|
||||
*/
|
||||
private predicate existsTransition(State a, State b) { delta(a, _, b) }
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
|
||||
*/
|
||||
int prefixLength(State start, State state) =
|
||||
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
|
||||
|
||||
/**
|
||||
* Gets the minimum number of transitions it takes to reach `state` from the start state.
|
||||
*/
|
||||
private int lengthFromStart(State state) { result = prefixLength(_, state) }
|
||||
|
||||
/**
|
||||
* Gets a string for which the regular expression will reach `state`.
|
||||
*
|
||||
* Has at most one result for any given `state`.
|
||||
* This predicate will not always have a result even if there is a ReDoS issue in
|
||||
* the regular expression.
|
||||
*/
|
||||
string prefix(State state) {
|
||||
lastStartState(state) and
|
||||
result = ""
|
||||
or
|
||||
// the search stops past the last redos candidate state.
|
||||
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
|
||||
exists(State prev |
|
||||
// select a unique predecessor (by an arbitrary measure)
|
||||
prev =
|
||||
min(State s, Location loc |
|
||||
lengthFromStart(s) = lengthFromStart(state) - 1 and
|
||||
loc = s.getRepr().getLocation() and
|
||||
delta(s, _, state)
|
||||
|
|
||||
s
|
||||
order by
|
||||
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
|
||||
s.getRepr().toString()
|
||||
)
|
||||
|
|
||||
// greedy search for the shortest prefix
|
||||
result = prefix(prev) and delta(prev, Epsilon(), state)
|
||||
or
|
||||
not delta(prev, Epsilon(), state) and
|
||||
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
|
||||
*/
|
||||
private string getCanonicalEdgeChar(State prev, State next) {
|
||||
result =
|
||||
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
|
||||
}
|
||||
|
||||
/** Gets a state within a regular expression that has a pumpable state. */
|
||||
pragma[noinline]
|
||||
State stateInPumpableRegexp() {
|
||||
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
|
||||
}
|
||||
}
|
||||
import PrefixConstruction<isCandidateState/1> as Prefix
|
||||
|
||||
/**
|
||||
* Predicates for testing the presence of a rejecting suffix.
|
||||
@@ -1018,8 +1025,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
|
||||
*/
|
||||
private module SuffixConstruction {
|
||||
import PrefixConstruction
|
||||
|
||||
/**
|
||||
* Holds if all states reachable from `fork` by repeating `w`
|
||||
* are likely rejectable by appending some suffix.
|
||||
@@ -1036,7 +1041,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
*/
|
||||
pragma[noinline]
|
||||
private predicate isLikelyRejectable(State s) {
|
||||
s = stateInPumpableRegexp() and
|
||||
s = Prefix::stateInRelevantRegexp() and
|
||||
(
|
||||
// exists a reject edge with some char.
|
||||
hasRejectEdge(s)
|
||||
@@ -1052,7 +1057,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
|
||||
*/
|
||||
predicate isRejectState(State s) {
|
||||
s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
|
||||
s = Prefix::stateInRelevantRegexp() and not epsilonSucc*(s) = Accept(_)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1060,7 +1065,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
*/
|
||||
pragma[noopt]
|
||||
predicate hasEdgeToLikelyRejectable(State s) {
|
||||
s = stateInPumpableRegexp() and
|
||||
s = Prefix::stateInRelevantRegexp() and
|
||||
// all edges (at least one) with some char leads to another state that is rejectable.
|
||||
// the `next` states might not share a common suffix, which can cause FPs.
|
||||
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
|
||||
@@ -1076,7 +1081,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
*/
|
||||
pragma[noinline]
|
||||
private string hasEdgeToLikelyRejectableHelper(State s) {
|
||||
s = stateInPumpableRegexp() and
|
||||
s = Prefix::stateInRelevantRegexp() and
|
||||
not hasRejectEdge(s) and
|
||||
not isRejectState(s) and
|
||||
deltaClosedChar(s, result, _)
|
||||
@@ -1088,8 +1093,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
* `prev` to `next` that the character symbol `char`.
|
||||
*/
|
||||
predicate deltaClosedChar(State prev, string char, State next) {
|
||||
prev = stateInPumpableRegexp() and
|
||||
next = stateInPumpableRegexp() and
|
||||
prev = Prefix::stateInRelevantRegexp() and
|
||||
next = Prefix::stateInRelevantRegexp() and
|
||||
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
|
||||
}
|
||||
|
||||
@@ -1099,18 +1104,28 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
result = getAnInputSymbolMatching(char)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
RegExpRoot relevantRoot() {
|
||||
exists(RegExpTerm term, State s |
|
||||
s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a char used for finding possible suffixes inside `root`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
private string relevant(RegExpRoot root) {
|
||||
exists(ascii(result)) and exists(root)
|
||||
or
|
||||
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
|
||||
or
|
||||
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
|
||||
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
|
||||
result = ["|", "\n", "Z"] and exists(root)
|
||||
root = relevantRoot() and
|
||||
(
|
||||
exists(ascii(result)) and exists(root)
|
||||
or
|
||||
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
|
||||
or
|
||||
// The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation).
|
||||
// The three chars must be kept in sync with `hasSimpleRejectEdge`.
|
||||
result = ["|", "\n", "Z"] and exists(root)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1208,12 +1223,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
|
||||
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
|
||||
isReDoSAttackable(t, pump, s) and
|
||||
(
|
||||
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
|
||||
not PrefixConstruction::prefix(s) = ""
|
||||
prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
|
||||
not Prefix::prefix(s) = ""
|
||||
or
|
||||
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
|
||||
Prefix::prefix(s) = "" and prefixMsg = ""
|
||||
or
|
||||
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
|
||||
not exists(Prefix::prefix(s)) and prefixMsg = ""
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user