add getACodepoint to the shared Strings library, and use it in NfaUtils

This commit is contained in:
erik-krogh
2023-10-12 12:59:47 +02:00
parent 822ba2ae59
commit fa1e8ee426
2 changed files with 11 additions and 5 deletions

View File

@@ -164,17 +164,17 @@ module Make<RegexTreeViewSig TreeImpl> {
/** An input symbol corresponding to character `c`. */
Char(string c) {
c =
getCodepointAt(any(RegexpCharacterConstant cc |
getACodepoint(any(RegexpCharacterConstant cc |
cc instanceof RelevantRegExpTerm and
not isIgnoreCase(cc.getRootTerm())
).getValue(), _)
).getValue())
or
// normalize everything to lower case if the regexp is case insensitive
c =
any(RegexpCharacterConstant cc, string char |
cc instanceof RelevantRegExpTerm and
isIgnoreCase(cc.getRootTerm()) and
char = getCodepointAt(cc.getValue(), _)
char = getACodepoint(cc.getValue())
|
char.toLowerCase()
)
@@ -370,7 +370,7 @@ module Make<RegexTreeViewSig TreeImpl> {
string getARelevantChar() {
exists(asciiPrintable(result))
or
exists(RegexpCharacterConstant c | result = getCodepointAt(c.getValue(), _))
exists(RegexpCharacterConstant c | result = getACodepoint(c.getValue()))
or
classEscapeMatches(_, result)
}
@@ -1258,7 +1258,7 @@ module Make<RegexTreeViewSig TreeImpl> {
* Gets a `char` that occurs in a `pump` string.
*/
private string getAProcessChar() {
result = getCodepointAt(any(string s | isReDoSCandidate(_, s)), _)
result = getACodepoint(any(string s | isReDoSCandidate(_, s)))
}
}

View File

@@ -63,6 +63,12 @@ string getCodepointAt(string s, int i) {
)
}
/**
* Gets any unicode character that appears in `s`.
*/
bindingset[s]
string getACodepoint(string s) { result = s.codePointAt(_).toUnicode() }
/**
* Gets the number of unicode codepoints in `s` not counting unpaired surrogates.
*/