adjust regexp libraries to how unpaired surrogate are parsed now

This commit is contained in:
Erik Krogh Kristensen
2021-09-10 18:03:21 +02:00
committed by Chris Smowton
parent f24d7c4212
commit 05cc6bcf8a
5 changed files with 5 additions and 8 deletions

View File

@@ -258,8 +258,8 @@ class RegExpConstant extends RegExpTerm, @regexp_constant {
class RegExpCharEscape extends RegExpEscape, RegExpConstant, @regexp_char_escape {
override predicate isCharacter() {
not (
// unencodable characters are represented as '?' in the database
getValue() = "?" and
// unencodable characters are represented as '?' or \uFFFD in the database
getValue() = ["?", 65533.toUnicode()] and
exists(string s | s = toString().toLowerCase() |
// only Unicode escapes give rise to unencodable characters
s.matches("\\\\u%") and

View File

@@ -145,8 +145,6 @@
| tst.js:257:14:257:116 | (.thisisagoddamnlongstringforstresstestingthequery\|\\sthisisagoddamnlongstringforstresstestingthequery)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' thisisagoddamnlongstringforstresstestingthequery'. |
| tst.js:260:14:260:77 | (thisisagoddamnlongstringforstresstestingthequery\|this\\w+query)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'thisisagoddamnlongstringforstresstestingthequery'. |
| tst.js:260:68:260:70 | \\w+ | This part of the regular expression may cause exponential backtracking on strings starting with 'this' and containing many repetitions of 'aquerythis'. |
| tst.js:266:18:266:49 | ([\\uDC66\\uDC67]\|[\\uDC68\\uDC69])* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
| tst.js:269:18:269:51 | ((\\uDC66\|\\uDC67)\|(\\uDC68\|\\uDC69))* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
| tst.js:272:21:272:22 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
| tst.js:275:38:275:40 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with '<a a=' and containing many repetitions of '"" a='. |
| tst.js:281:16:281:17 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |

View File

@@ -262,10 +262,10 @@ var bad61 = /(thisisagoddamnlongstringforstresstestingthequery|this\w+query)*-/
// GOOD
var good27 = /(thisisagoddamnlongstringforstresstestingthequery|imanotherbutunrelatedstringcomparedtotheotherstring)*-/
// GOOD (but false positive caused by the extractor converting all four unpaired surrogates to \uFFFD)
// GOOD
var good28 = /foo([\uDC66\uDC67]|[\uDC68\uDC69])*foo/
// GOOD (but false positive caused by the extractor converting all four unpaired surrogates to \uFFFD)
// GOOD
var good29 = /foo((\uDC66|\uDC67)|(\uDC68|\uDC69))*foo/
// NOT GOOD (but cannot currently construct a prefix)

View File

@@ -1,5 +1,4 @@
| tst.js:1:4:1:4 | o | Character 'o' is repeated $@ in the same character class. | tst.js:1:5:1:5 | o | here |
| tst.js:3:3:3:8 | \\uDC3A | Character '\\uDC3A' is repeated $@ in the same character class. | tst.js:3:9:3:14 | \\uDC3C | here |
| tst.js:4:3:4:3 | ? | Character '?' is repeated $@ in the same character class. | tst.js:4:4:4:4 | ? | here |
| tst.js:5:3:5:8 | \\u003F | Character '\\u003F' is repeated $@ in the same character class. | tst.js:5:9:5:14 | \\u003f | here |
| tst.js:6:3:6:8 | \\u003F | Character '\\u003F' is repeated $@ in the same character class. | tst.js:6:9:6:9 | ? | here |

View File

@@ -1,6 +1,6 @@
/[foo]/;
/[a-zc]/;
/[\uDC3A\uDC3C]/; // False positive caused by the extractor converting both unpaired surrogates to \uFFFD
/[\uDC3A\uDC3C]/;
/[??]/;
/[\u003F\u003f]/;
/[\u003F?]/;