refine isFork to remove false positive when a state has epsilon transition to itself

This commit is contained in:
Erik Krogh Kristensen
2020-11-29 21:42:50 +01:00
parent d7b22e3b1b
commit 33b2701551
3 changed files with 20 additions and 3 deletions

View File

@@ -705,7 +705,25 @@ predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
or
r1 = r2 and
q1 = q2 and
epsilonSucc+(q) = q
epsilonSucc+(q) = q and
exists(RegExpTerm term | term = q.getRepr() | term instanceof InfiniteRepetitionQuantifier) and
(
// There is either multiple possible "mid" states.
count(State mid |
mid = epsilonSucc+(q) and
q = epsilonSucc+(mid) and
not mid = q
) > 2
or
// Or one of the mid states is an infinite quantifier itself
exists(State mid, RegExpTerm term |
mid = epsilonSucc+(q) and
q = epsilonSucc+(mid) and
not mid = q and
term = mid.getRepr() and
term instanceof InfiniteRepetitionQuantifier
)
)
) and
stateInsideBacktracking(r1) and
stateInsideBacktracking(r2)

View File

@@ -133,7 +133,6 @@
| tst.js:317:18:317:23 | [\\w-]* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '-'. |
| tst.js:320:15:320:19 | (ab)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'ab'. |
| tst.js:323:14:323:20 | (a?a?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| tst.js:326:15:326:19 | (a?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| tst.js:329:14:329:20 | (c?a?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| tst.js:332:14:332:22 | (?:a\|a?)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| tst.js:335:14:335:20 | (a?b?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |

View File

@@ -322,7 +322,7 @@ var bad70 = /((ab)*)+c/;
// NOT GOOD
var bad71 = /(a?a?)*b/;
// GOOD - but still flagged. only quadratic blowup. (The NFA looks very similar to `/(a*)*b/`)
// GOOD
var good38 = /(a?)*b/;
// NOT GOOD - but wrong pump string.