mirror of
https://github.com/github/codeql.git
synced 2026-04-30 11:15:13 +02:00
adjust regexp libraries to how unpaired surrogate are parsed now
This commit is contained in:
committed by
Chris Smowton
parent
f24d7c4212
commit
05cc6bcf8a
@@ -258,8 +258,8 @@ class RegExpConstant extends RegExpTerm, @regexp_constant {
|
||||
class RegExpCharEscape extends RegExpEscape, RegExpConstant, @regexp_char_escape {
|
||||
override predicate isCharacter() {
|
||||
not (
|
||||
// unencodable characters are represented as '?' in the database
|
||||
getValue() = "?" and
|
||||
// unencodable characters are represented as '?' or \uFFFD in the database
|
||||
getValue() = ["?", 65533.toUnicode()] and
|
||||
exists(string s | s = toString().toLowerCase() |
|
||||
// only Unicode escapes give rise to unencodable characters
|
||||
s.matches("\\\\u%") and
|
||||
|
||||
@@ -145,8 +145,6 @@
|
||||
| tst.js:257:14:257:116 | (.thisisagoddamnlongstringforstresstestingthequery\|\\sthisisagoddamnlongstringforstresstestingthequery)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' thisisagoddamnlongstringforstresstestingthequery'. |
|
||||
| tst.js:260:14:260:77 | (thisisagoddamnlongstringforstresstestingthequery\|this\\w+query)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'thisisagoddamnlongstringforstresstestingthequery'. |
|
||||
| tst.js:260:68:260:70 | \\w+ | This part of the regular expression may cause exponential backtracking on strings starting with 'this' and containing many repetitions of 'aquerythis'. |
|
||||
| tst.js:266:18:266:49 | ([\\uDC66\\uDC67]\|[\\uDC68\\uDC69])* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
|
||||
| tst.js:269:18:269:51 | ((\\uDC66\|\\uDC67)\|(\\uDC68\|\\uDC69))* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
|
||||
| tst.js:272:21:272:22 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
|
||||
| tst.js:275:38:275:40 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with '<a a=' and containing many repetitions of '"" a='. |
|
||||
| tst.js:281:16:281:17 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
|
||||
|
||||
@@ -262,10 +262,10 @@ var bad61 = /(thisisagoddamnlongstringforstresstestingthequery|this\w+query)*-/
|
||||
// GOOD
|
||||
var good27 = /(thisisagoddamnlongstringforstresstestingthequery|imanotherbutunrelatedstringcomparedtotheotherstring)*-/
|
||||
|
||||
// GOOD (but false positive caused by the extractor converting all four unpaired surrogates to \uFFFD)
|
||||
// GOOD
|
||||
var good28 = /foo([\uDC66\uDC67]|[\uDC68\uDC69])*foo/
|
||||
|
||||
// GOOD (but false positive caused by the extractor converting all four unpaired surrogates to \uFFFD)
|
||||
// GOOD
|
||||
var good29 = /foo((\uDC66|\uDC67)|(\uDC68|\uDC69))*foo/
|
||||
|
||||
// NOT GOOD (but cannot currently construct a prefix)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
| tst.js:1:4:1:4 | o | Character 'o' is repeated $@ in the same character class. | tst.js:1:5:1:5 | o | here |
|
||||
| tst.js:3:3:3:8 | \\uDC3A | Character '\\uDC3A' is repeated $@ in the same character class. | tst.js:3:9:3:14 | \\uDC3C | here |
|
||||
| tst.js:4:3:4:3 | ? | Character '?' is repeated $@ in the same character class. | tst.js:4:4:4:4 | ? | here |
|
||||
| tst.js:5:3:5:8 | \\u003F | Character '\\u003F' is repeated $@ in the same character class. | tst.js:5:9:5:14 | \\u003f | here |
|
||||
| tst.js:6:3:6:8 | \\u003F | Character '\\u003F' is repeated $@ in the same character class. | tst.js:6:9:6:9 | ? | here |
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/[foo]/;
|
||||
/[a-zc]/;
|
||||
/[\uDC3A\uDC3C]/; // False positive caused by the extractor converting both unpaired surrogates to \uFFFD
|
||||
/[\uDC3A\uDC3C]/;
|
||||
/[??]/;
|
||||
/[\u003F\u003f]/;
|
||||
/[\u003F?]/;
|
||||
|
||||
Reference in New Issue
Block a user