improve precision of intersect

This commit is contained in:
Erik Krogh Kristensen
2021-01-04 11:55:51 +01:00
parent 44571ffeea
commit ce8cc2368b
2 changed files with 22 additions and 34 deletions

View File

@@ -177,7 +177,6 @@ CharClass getCanonicalCharClass(RegExpTerm term) {
/**
* Holds if `a` and `b` are input symbols from the same regexp.
* (And not a `Dot()`, `Any()` or `Epsilon()`)
*/
private predicate sharesRoot(TInputSymbol a, TInputSymbol b) {
exists(RegExpRoot root |
@@ -190,13 +189,10 @@ private predicate sharesRoot(TInputSymbol a, TInputSymbol b) {
* Holds if the `a` is an input symbol from a regexp that has root `root`.
*/
private predicate belongsTo(TInputSymbol a, RegExpRoot root) {
exists(RegExpTerm term | getRoot(term) = root |
a = Char(term.(RegexpCharacterConstant).getValue().charAt(_))
)
or
exists(string str, RegExpTerm term | a = CharClass(str) |
term.getRawValue() = str and
getRoot(term) = root
exists(State s | getRoot(s.getRepr()) = root |
delta(s, a, _)
or
delta(_, a, s)
)
}
@@ -675,37 +671,28 @@ private string getAOverlapBetweenCharacterClasses(CharacterClass c, CharacterCla
* Gets a character that is represented by both `c` and `d`.
*/
string intersect(InputSymbol c, InputSymbol d) {
c = Char(result) and
d = getAnInputSymbolMatching(result) and
(sharesRoot(c, d) or [c, d] = Any()) and
(
sharesRoot(c, d)
c = Char(result) and
d = getAnInputSymbolMatching(result)
or
d = Dot()
result = getMinOverlapBetweenCharacterClasses(c, d)
or
d = Any()
result = c.(CharacterClass).choose() and
(
d = c
or
d = Dot() and
not (result = "\n" or result = "\r")
or
d = Any()
)
or
(c = Dot() or c = Any()) and
(d = Dot() or d = Any()) and
result = "a"
)
or
result = getMinOverlapBetweenCharacterClasses(c, d)
or
result = c.(CharacterClass).choose() and
(
d = c
or
d = Dot() and
not (result = "\n" or result = "\r")
or
d = Any()
)
or
c = Dot() and
(
d = Dot() and result = "a"
or
d = Any() and result = "a"
)
or
c = Any() and d = Any() and result = "a"
or
result = intersect(d, c)
}

View File

@@ -349,6 +349,7 @@
| tst.js:23:33:23:40 | [\\s\\S]*? | Strings starting with '(*(*' and with many repetitions of '(*' can start matching anywhere after the start of the preceeding (?:[\\s\\S]*?\\(\\*[\\s\\S]*?\\*\\))* |
| tst.js:23:47:23:54 | [\\s\\S]*? | Strings starting with '(*' and with many repetitions of '(*a' can start matching anywhere after the start of the preceeding (?:[\\s\\S]*?\\(\\*[\\s\\S]*?\\*\\))* |
| tst.js:28:24:28:25 | .* | Strings starting with '!\|' and with many repetitions of '\|' can start matching anywhere after the start of the preceeding .* |
| tst.js:28:59:28:60 | .* | Strings starting with '!\|\\n-\|\\n\|' and with many repetitions of '\|' can start matching anywhere after the start of the preceeding .* |
| tst.js:31:23:31:24 | .* | Strings starting with '!\|' and with many repetitions of '\|' can start matching anywhere after the start of the preceeding .* |
| tst.js:31:54:31:55 | .* | Strings starting with '!\|\\n-\|\\n' and with many repetitions of '\|\\n\|' can start matching anywhere after the start of the preceeding .* |
| tst.js:31:58:31:59 | .* | Strings starting with '!\|\\n-\|\\n\|' and with many repetitions of '\|' can start matching anywhere after the start of the preceeding .* |