adjust regexp libraries to how unpaired surrogate are parsed now

This commit is contained in:
Erik Krogh Kristensen
2021-09-10 18:03:21 +02:00
committed by Chris Smowton
parent f24d7c4212
commit 05cc6bcf8a
5 changed files with 5 additions and 8 deletions

View File

@@ -258,8 +258,8 @@ class RegExpConstant extends RegExpTerm, @regexp_constant {
class RegExpCharEscape extends RegExpEscape, RegExpConstant, @regexp_char_escape {
override predicate isCharacter() {
not (
// unencodable characters are represented as '?' in the database
getValue() = "?" and
// unencodable characters are represented as '?' or \uFFFD in the database
getValue() = ["?", 65533.toUnicode()] and
exists(string s | s = toString().toLowerCase() |
// only Unicode escapes give rise to unencodable characters
s.matches("\\\\u%") and