mirror of
https://github.com/github/codeql.git
synced 2026-04-29 18:55:14 +02:00
improve performance of suffix checking
This commit is contained in:
@@ -838,7 +838,7 @@ private module SuffixConstruction {
|
||||
* Holds if there likely exists a suffix starting from `s` that leads to the regular expression being rejected.
|
||||
* This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
pragma[noinline]
|
||||
private predicate isLikelyRejectable(StateInPumpableRegexp s) {
|
||||
// exists a reject edge with some char.
|
||||
hasRejectEdge(s)
|
||||
@@ -857,10 +857,11 @@ private module SuffixConstruction {
|
||||
/**
|
||||
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
|
||||
*/
|
||||
pragma[noinline]
|
||||
predicate hasEdgeToLikelyRejectable(StateInPumpableRegexp s) {
|
||||
// all edges (at least one) with some char leads to another state that is rejectable.
|
||||
// the `next` states might not share a common suffix, which can cause FPs.
|
||||
exists(string char | char = relevant() |
|
||||
exists(string char | char = relevant(getRoot(s.getRepr())) |
|
||||
forex(State next | deltaClosedChar(s, char, next) | isLikelyRejectable(next))
|
||||
)
|
||||
}
|
||||
@@ -871,21 +872,31 @@ private module SuffixConstruction {
|
||||
* `prev` to `next` that the character symbol `char`.
|
||||
*/
|
||||
predicate deltaClosedChar(StateInPumpableRegexp prev, string char, StateInPumpableRegexp next) {
|
||||
char = relevant() and
|
||||
deltaClosed(prev, getAnInputSymbolMatching(char), next)
|
||||
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
|
||||
}
|
||||
|
||||
pragma[noinline]
|
||||
InputSymbol getAnInputSymbolMatchingRelevant(string char) {
|
||||
char = relevant(_) and
|
||||
result = getAnInputSymbolMatching(char)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a char used for finding possible suffixes.
|
||||
* Gets a char used for finding possible suffixes inside `root`.
|
||||
*/
|
||||
private string relevant() { result = CharacterClasses::getARelevantChar() }
|
||||
pragma[noinline]
|
||||
private string relevant(RegExpRoot root) {
|
||||
result = ["a", "9", "|", "\n", " "]
|
||||
or
|
||||
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is no edge from `s` labeled `char` in our NFA.
|
||||
* The NFA does not model reject states, so the above is the same as saying there is a reject edge.
|
||||
*/
|
||||
private predicate hasRejectEdge(State s) {
|
||||
exists(string char | char = relevant() | not deltaClosedChar(s, char, _))
|
||||
exists(string char | char = relevant(getRoot(s.getRepr())) | not deltaClosedChar(s, char, _))
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -348,7 +348,6 @@
|
||||
| tst.js:23:33:23:40 | [\\s\\S]*? | Strings starting with '(*(*' and with many repetitions of '(*' can start matching anywhere after the start of the preceeding (?:[\\s\\S]*?\\(\\*[\\s\\S]*?\\*\\))* |
|
||||
| tst.js:23:47:23:54 | [\\s\\S]*? | Strings starting with '(*' and with many repetitions of '(*a' can start matching anywhere after the start of the preceeding (?:[\\s\\S]*?\\(\\*[\\s\\S]*?\\*\\))* |
|
||||
| tst.js:28:24:28:25 | .* | Strings starting with '!\|' and with many repetitions of '\|' can start matching anywhere after the start of the preceeding .* |
|
||||
| tst.js:28:59:28:60 | .* | Strings starting with '!\|\\n-\|\\n\|' and with many repetitions of '\|' can start matching anywhere after the start of the preceeding .* |
|
||||
| tst.js:31:23:31:24 | .* | Strings starting with '!\|' and with many repetitions of '\|' can start matching anywhere after the start of the preceeding .* |
|
||||
| tst.js:31:54:31:55 | .* | Strings starting with '!\|\\n-\|\\n' and with many repetitions of '\|\\n\|' can start matching anywhere after the start of the preceeding .* |
|
||||
| tst.js:31:58:31:59 | .* | Strings starting with '!\|\\n-\|\\n\|' and with many repetitions of '\|' can start matching anywhere after the start of the preceeding .* |
|
||||
|
||||
Reference in New Issue
Block a user