mirror of
https://github.com/github/codeql.git
synced 2026-02-16 06:53:41 +01:00
93 lines
2.5 KiB
Plaintext
93 lines
2.5 KiB
Plaintext
overlay[local?]
|
|
module;
|
|
|
|
private import Numbers
|
|
|
|
/**
|
|
* Gets the result of backslash-escaping newlines, carriage-returns, backslashes, and unicode characters in `s`.
|
|
*/
|
|
bindingset[s]
|
|
string escape(string s) {
|
|
result =
|
|
escapeUnicodeString(s.replaceAll("\\", "\\\\")
|
|
.replaceAll("\n", "\\n")
|
|
.replaceAll("\r", "\\r")
|
|
.replaceAll("\t", "\\t"))
|
|
}
|
|
|
|
/**
|
|
* Gets a string where the unicode characters in `s` have been escaped.
|
|
*/
|
|
bindingset[s]
|
|
private string escapeUnicodeString(string s) {
|
|
result =
|
|
concat(int i, string char |
|
|
char = escapeUnicodeChar(s.codePointAt(i).toUnicode())
|
|
|
|
|
char order by i
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Gets a unicode escaped string for `char`.
|
|
* If `char` is a printable char, then `char` is returned.
|
|
*/
|
|
bindingset[char]
|
|
private string escapeUnicodeChar(string char) {
|
|
if isPrintable(char)
|
|
then result = char
|
|
else
|
|
if exists(to4digitHex(any(int i | i.toUnicode() = char)))
|
|
then result = "\\u" + to4digitHex(any(int i | i.toUnicode() = char))
|
|
else result = "\\u{" + toHex(any(int i | i.toUnicode() = char)) + "}"
|
|
}
|
|
|
|
/** Holds if `char` is easily printable char, or whitespace. */
|
|
private predicate isPrintable(string char) {
|
|
exists(asciiPrintable(char))
|
|
or
|
|
char = "\n\r\t".charAt(_)
|
|
}
|
|
|
|
/**
|
|
* Gets the `i`th codepoint in `s`.
|
|
* Unpaired surrogates are skipped.
|
|
*/
|
|
bindingset[s]
|
|
string getCodepointAt(string s, int i) {
|
|
// codePointAt returns the integer codePoint, so we need to convert to a string.
|
|
// codePointAt returns integers for both the high and low end. The invalid strings are filtered out by `toUnicode`, but we need to re-count the index, therefore the rank.
|
|
// rank is 1-indexed, so we need to offset for that to make this predicate 0-indexed.
|
|
result =
|
|
rank[i + 1](string char, int charIndex |
|
|
char = s.codePointAt(charIndex).toUnicode()
|
|
|
|
|
char order by charIndex
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Gets any unicode character that appears in `s`.
|
|
*/
|
|
bindingset[s]
|
|
string getACodepoint(string s) { result = s.codePointAt(_).toUnicode() }
|
|
|
|
/**
|
|
* Gets the number of unicode codepoints in `s` not counting unpaired surrogates.
|
|
*/
|
|
bindingset[str]
|
|
int getCodepointLength(string str) { result = str.codePointCount(0, str.length()) }
|
|
|
|
/**
|
|
* Gets the ASCII code for `char`.
|
|
* Only the easily printable chars are included (so no newline, tab, null, etc).
|
|
*/
|
|
int asciiPrintable(string char) {
|
|
char =
|
|
rank[result](string c |
|
|
c =
|
|
"! \"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
|
|
.charAt(_)
|
|
)
|
|
}
|