mirror of
https://github.com/github/codeql.git
synced 2026-05-05 05:35:13 +02:00
general performance improvements in the ReDoS utility library
This commit is contained in:
@@ -802,12 +802,18 @@ private module PrefixConstruction {
|
||||
result = prefix(prev) and delta(prev, Epsilon(), state)
|
||||
or
|
||||
not delta(prev, Epsilon(), state) and
|
||||
result =
|
||||
prefix(prev) +
|
||||
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), state))
|
||||
result = prefix(prev) + getMinimumEdgeChar(prev, state)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the minimum char for which there exists a transition from `prev` to `next` in the NFA.
|
||||
*/
|
||||
private string getMinimumEdgeChar(State prev, State next) {
|
||||
result =
|
||||
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
|
||||
}
|
||||
|
||||
/**
|
||||
* A state within a regular expression that has a pumpable state.
|
||||
*/
|
||||
@@ -871,15 +877,27 @@ private module SuffixConstruction {
|
||||
/**
|
||||
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
pragma[noopt]
|
||||
predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
|
||||
// all edges (at least one) with some char leads to another state that is rejectable.
|
||||
// the `next` states might not share a common suffix, which can cause FPs.
|
||||
exists(string char | char = relevant(getRoot(s.getRepr())) |
|
||||
forex(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
|
||||
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
|
||||
exists(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next)) and
|
||||
forall(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a char for there exists a transition away from `s`,
|
||||
* and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
private string hasEdgeToLikelyRejectableHelper(StateInPumpableRegexp s) {
|
||||
not hasRejectEdge(s) and
|
||||
not isRejectState(s) and
|
||||
deltaClosedChar(s, result, _)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is a state `next` that can be reached from `prev`
|
||||
* along epsilon edges, such that there is a transition from
|
||||
@@ -900,37 +918,76 @@ private module SuffixConstruction {
|
||||
*/
|
||||
pragma[noinline]
|
||||
private string relevant(RegExpRoot root) {
|
||||
result = ["a", "9", "|", "\n", " "]
|
||||
result = ["a", "9", "|", "\n", " ", "|", "\n", "Z"] // must include all the strings from `hasSimpleRejectEdge`.
|
||||
or
|
||||
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is no edge from `s` labeled `char` in our NFA.
|
||||
* Holds if there exists a `char` such that there is no edge from `s` labeled `char` in our NFA.
|
||||
* The NFA does not model reject states, so the above is the same as saying there is a reject edge.
|
||||
*/
|
||||
private predicate hasRejectEdge(State s) {
|
||||
hasSimpleRejectEdge(s)
|
||||
or
|
||||
not hasSimpleRejectEdge(s) and
|
||||
exists(string char | char = relevant(getRoot(s.getRepr())) | not deltaClosedChar(s, char, _))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is not edge from `s` labeled with "|", "\n", or "Z" in our NFA.
|
||||
* This predicate is used as a cheap pre-processing to speed up `hasRejectEdge`.
|
||||
*/
|
||||
private predicate hasSimpleRejectEdge(State s) {
|
||||
// The three chars were chosen arbitrarily.
|
||||
exists(string char | char = ["|", "\n", "Z"] | not deltaClosedChar(s, char, _))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a state that can be reached from pumpable `fork` consuming all
|
||||
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
|
||||
*/
|
||||
pragma[noopt]
|
||||
private State process(State fork, string w, int i) {
|
||||
exists(State prev | prev = getProcessPrevious(fork, i, w) |
|
||||
exists(string char, InputSymbol sym |
|
||||
char = w.charAt(i) and
|
||||
deltaClosed(prev, sym, result) and
|
||||
sym = getAProcessInputSymbol(char)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a state that can be reached from pumpable `fork` consuming all
|
||||
* chars in `w` any number of times followed by the first `i` characters of `w`.
|
||||
*/
|
||||
private State getProcessPrevious(State fork, int i, string w) {
|
||||
isReDoSCandidate(fork, w) and
|
||||
exists(State prev |
|
||||
i = 0 and prev = fork
|
||||
(
|
||||
i = 0 and result = fork
|
||||
or
|
||||
prev = process(fork, w, i - 1)
|
||||
result = process(fork, w, i - 1)
|
||||
or
|
||||
// repeat until fixpoint
|
||||
i = 0 and
|
||||
prev = process(fork, w, w.length() - 1)
|
||||
|
|
||||
deltaClosed(prev, getAnInputSymbolMatching(w.charAt(i)), result)
|
||||
result = process(fork, w, w.length() - 1)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an InputSymbol that matches `char`.
|
||||
* The predicate is specialized to only have a result for the `char`s that are relevant for the `process` predicate.
|
||||
*/
|
||||
private InputSymbol getAProcessInputSymbol(string char) {
|
||||
char = getAProcessChar() and
|
||||
result = getAnInputSymbolMatching(char)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a `char` that occurs in a `pump` string.
|
||||
*/
|
||||
private string getAProcessChar() { result = any(string s | isReDoSCandidate(_, s)).charAt(_) }
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -76,6 +76,7 @@
|
||||
| polynomial-redos.js:112:17:112:19 | \\s+ | Strings with many repetitions of ' ' can start matching anywhere after the start of the preceeding \\s+$ |
|
||||
| polynomial-redos.js:114:22:114:24 | \\w* | Strings starting with '5' and with many repetitions of '5' can start matching anywhere after the start of the preceeding \\d* |
|
||||
| polynomial-redos.js:116:21:116:28 | [\\d\\D]*? | Strings starting with '/*' and with many repetitions of 'a/*' can start matching anywhere after the start of the preceeding \\/\\*[\\d\\D]*?\\*\\/ |
|
||||
| polynomial-redos.js:118:17:118:23 | (#\\d+)+ | Strings with many repetitions of '9' can start matching anywhere after the start of the preceeding \\d+ |
|
||||
| regexplib/address.js:27:3:27:5 | \\s* | Strings with many repetitions of ' ' can start matching anywhere after the start of the preceeding (\\s*\\(?0\\d{4}\\)?(\\s*\|-)\\d{3}(\\s*\|-)\\d{3}\\s*) |
|
||||
| regexplib/address.js:27:48:27:50 | \\s* | Strings with many repetitions of ' ' can start matching anywhere after the start of the preceeding (\\s*\\(?0\\d{3}\\)?(\\s*\|-)\\d{3}(\\s*\|-)\\d{4}\\s*) |
|
||||
| regexplib/address.js:27:93:27:95 | \\s* | Strings with many repetitions of ' ' can start matching anywhere after the start of the preceeding (\\s*(7\|8)(\\d{7}\|\\d{3}(\\-\|\\s{1})\\d{4})\\s*) |
|
||||
|
||||
@@ -132,6 +132,8 @@ nodes
|
||||
| polynomial-redos.js:114:2:114:8 | tainted |
|
||||
| polynomial-redos.js:116:2:116:8 | tainted |
|
||||
| polynomial-redos.js:116:2:116:8 | tainted |
|
||||
| polynomial-redos.js:118:2:118:8 | tainted |
|
||||
| polynomial-redos.js:118:2:118:8 | tainted |
|
||||
edges
|
||||
| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:7:2:7:8 | tainted |
|
||||
| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:7:2:7:8 | tainted |
|
||||
@@ -257,6 +259,8 @@ edges
|
||||
| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:114:2:114:8 | tainted |
|
||||
| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:116:2:116:8 | tainted |
|
||||
| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:116:2:116:8 | tainted |
|
||||
| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:118:2:118:8 | tainted |
|
||||
| polynomial-redos.js:5:6:5:32 | tainted | polynomial-redos.js:118:2:118:8 | tainted |
|
||||
| polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:5:6:5:32 | tainted |
|
||||
| polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:5:6:5:32 | tainted |
|
||||
| polynomial-redos.js:68:18:68:24 | req.url | polynomial-redos.js:68:18:68:24 | req.url |
|
||||
@@ -335,3 +339,4 @@ edges
|
||||
| polynomial-redos.js:112:2:112:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:112:2:112:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:112:17:112:19 | \\s+ | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value |
|
||||
| polynomial-redos.js:114:2:114:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:114:2:114:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:114:22:114:24 | \\w* | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value |
|
||||
| polynomial-redos.js:116:2:116:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:116:2:116:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:116:21:116:28 | [\\d\\D]*? | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value |
|
||||
| polynomial-redos.js:118:2:118:8 | tainted | polynomial-redos.js:5:16:5:32 | req.query.tainted | polynomial-redos.js:118:2:118:8 | tainted | This expensive $@ use depends on $@. | polynomial-redos.js:118:17:118:23 | (#\\d+)+ | regular expression | polynomial-redos.js:5:16:5:32 | req.query.tainted | a user-provided value |
|
||||
|
||||
@@ -114,4 +114,6 @@ app.use(function(req, res) {
|
||||
tainted.match(/^\d*5\w*$/); // NOT OK
|
||||
|
||||
tainted.match(/\/\*[\d\D]*?\*\//g); // NOT OK
|
||||
|
||||
tainted.match(/(#\d+)+/); // OK - but still flagged due to insufficient suffix-checking.
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user