general performance improvements in the ReDoS utility library

This commit is contained in:
Erik Krogh Kristensen
2020-12-21 10:06:13 +01:00
parent 3a43421193
commit cbad705029
4 changed files with 79 additions and 14 deletions

View File

@@ -802,12 +802,18 @@ private module PrefixConstruction {
result = prefix(prev) and delta(prev, Epsilon(), state)
or
not delta(prev, Epsilon(), state) and
result =
prefix(prev) +
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), state))
result = prefix(prev) + getMinimumEdgeChar(prev, state)
)
}
/**
* Gets the minimum char for which there exists a transition from `prev` to `next` in the NFA.
*/
private string getMinimumEdgeChar(State prev, State next) {
result =
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
}
/**
* A state within a regular expression that has a pumpable state.
*/
@@ -871,15 +877,27 @@ private module SuffixConstruction {
/**
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
*/
pragma[noinline]
pragma[noopt]
predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
// all edges (at least one) with some char leads to another state that is rejectable.
// the `next` states might not share a common suffix, which can cause FPs.
exists(string char | char = relevant(getRoot(s.getRepr())) |
forex(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
exists(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) and
forall(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
)
}
/**
* Gets a char for there exists a transition away from `s`,
* and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`.
*/
pragma[noinline]
private string hasEdgeToLikelyRejectableHelper(StateInPumpableRegexp s) {
not hasRejectEdge(s) and
not isRejectState(s) and
deltaClosedChar(s, result, _)
}
/**
* Holds if there is a state `next` that can be reached from `prev`
* along epsilon edges, such that there is a transition from
@@ -900,37 +918,76 @@ private module SuffixConstruction {
*/
pragma[noinline]
private string relevant(RegExpRoot root) {
result = ["a", "9", "|", "\n", " "]
result = ["a", "9", "|", "\n", " ", "|", "\n", "Z"] // must include all the strings from `hasSimpleRejectEdge`.
or
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
}
/**
* Holds if there is no edge from `s` labeled `char` in our NFA.
* Holds if there exists a `char` such that there is no edge from `s` labeled `char` in our NFA.
* The NFA does not model reject states, so the above is the same as saying there is a reject edge.
*/
private predicate hasRejectEdge(State s) {
hasSimpleRejectEdge(s)
or
not hasSimpleRejectEdge(s) and
exists(string char | char = relevant(getRoot(s.getRepr())) | not deltaClosedChar(s, char, _))
}
/**
* Holds if there is not edge from `s` labeled with "|", "\n", or "Z" in our NFA.
* This predicate is used as a cheap pre-processing to speed up `hasRejectEdge`.
*/
private predicate hasSimpleRejectEdge(State s) {
// The three chars were chosen arbitrarily.
exists(string char | char = ["|", "\n", "Z"] | not deltaClosedChar(s, char, _))
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
*/
pragma[noopt]
private State process(State fork, string w, int i) {
exists(State prev | prev = getProcessPrevious(fork, i, w) |
exists(string char, InputSymbol sym |
char = w.charAt(i) and
deltaClosed(prev, sym, result) and
sym = getAProcessInputSymbol(char)
)
)
}
/**
* Gets a state that can be reached from pumpable `fork` consuming all
* chars in `w` any number of times followed by the first `i` characters of `w`.
*/
private State getProcessPrevious(State fork, int i, string w) {
isReDoSCandidate(fork, w) and
exists(State prev |
i = 0 and prev = fork
(
i = 0 and result = fork
or
prev = process(fork, w, i - 1)
result = process(fork, w, i - 1)
or
// repeat until fixpoint
i = 0 and
prev = process(fork, w, w.length() - 1)
|
deltaClosed(prev, getAnInputSymbolMatching(w.charAt(i)), result)
result = process(fork, w, w.length() - 1)
)
}
/**
* Gets an InputSymbol that matches `char`.
* The predicate is specialized to only have a result for the `char`s that are relevant for the `process` predicate.
*/
private InputSymbol getAProcessInputSymbol(string char) {
char = getAProcessChar() and
result = getAnInputSymbolMatching(char)
}
/**
* Gets a `char` that occurs in a `pump` string.
*/
private string getAProcessChar() { result = any(string s | isReDoSCandidate(_, s)).charAt(_) }
}
/**

View File

@@ -76,6 +76,7 @@
| polynomial-redos.js:112:17:112:19 | \\s+ | Strings with many repetitions of ' ' can start matching anywhere after the start of the preceeding \\s+$ |
| polynomial-redos.js:114:22:114:24 | \\w* | Strings starting with '5' and with many repetitions of '5' can start matching anywhere after the start of the preceeding \\d* |
| polynomial-redos.js:116:21:116:28 | [\\d\\D]*? | Strings starting with '/*' and with many repetitions of 'a/*' can start matching anywhere after the start of the preceeding \\/\\*[\\d\\D]*?\\*\\/ |
| polynomial-redos.js:118:17:118:23 | (#\\d+)+ | Strings with many repetitions of '9' can start matching anywhere after the start of the preceeding \\d+ |
| regexplib/address.js:27:3:27:5 | \\s* | Strings with many repetitions of ' ' can start matching anywhere after the start of the preceeding (\\s*\\(?0\\d{4}\\)?(\\s*\|-)\\d{3}(\\s*\|-)\\d{3}\\s*) |
| regexplib/address.js:27:48:27:50 | \\s* | Strings with many repetitions of ' ' can start matching anywhere after the start of the preceeding (\\s*\\(?0\\d{3}\\)?(\\s*\|-)\\d{3}(\\s*\|-)\\d{4}\\s*) |
| regexplib/address.js:27:93:27:95 | \\s* | Strings with many repetitions of ' ' can start matching anywhere after the start of the preceeding (\\s*(7\|8)(\\d{7}\|\\d{3}(\\-\|\\s{1})\\d{4})\\s*) |

View File

@@ -132,6 +132,8 @@ nodes
| polynomial-redos.js:114:2:114:8 | tainted |
| polynomial-redos.js:116:2:116:8 | tainted |
| polynomial-redos.js:116:2:116:8 | tainted |
| polynomial-redos.js:118:2:118:8 | tainted |
| polynomial-redos.js:118:2:118:8 | tainted |
edges
| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:7:2:7:8 | tainted |
| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:7:2:7:8 | tainted |
@@ -257,6 +259,8 @@ edges
| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:114:2:114:8 | tainted |
| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:116:2:116:8 | tainted |
| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:116:2:116:8 | tainted |
| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:118:2:118:8 | tainted |
| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:118:2:118:8 | tainted |
| polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:5:6:5:32 | tainted |
| polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:5:6:5:32 | tainted |
| polynomial-redos.js:68:18:68:24 | req.url | polynomial-redos.js:68:18:68:24 | req.url |
@@ -335,3 +339,4 @@ edges
| polynomial-redos.js:112:2:112:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:112:2:112:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:112:17:112:19 | \\s+ | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value |
| polynomial-redos.js:114:2:114:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:114:2:114:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:114:22:114:24 | \\w* | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value |
| polynomial-redos.js:116:2:116:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:116:2:116:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:116:21:116:28 | [\\d\\D]*? | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value |
| polynomial-redos.js:118:2:118:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:118:2:118:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:118:17:118:23 | (#\\d+)+ | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value |

View File

@@ -114,4 +114,6 @@ app.use(function(req, res) {
tainted.match(/^\d*5\w*$/); // NOT OK
tainted.match(/\/\*[\d\D]*?\*\//g); // NOT OK
tainted.match(/(#\d+)+/); // OK - but still flagged due to insufficient suffix-checking.
});