mirror of
https://github.com/github/codeql.git
synced 2025-12-24 04:36:35 +01:00
Merge pull request #14339 from erik-krogh/range-printing
JS/PY/RB/Java: escape unicode chars in overly-large-range
This commit is contained in:
@@ -8,3 +8,4 @@
|
|||||||
| test.py:25:32:25:34 | 7-F | Suspicious character range that is equivalent to [7-9:;<=>?@A-F]. |
|
| test.py:25:32:25:34 | 7-F | Suspicious character range that is equivalent to [7-9:;<=>?@A-F]. |
|
||||||
| test.py:27:36:27:38 | 0-9 | Suspicious character range that overlaps with \\d in the same character class. |
|
| test.py:27:36:27:38 | 0-9 | Suspicious character range that overlaps with \\d in the same character class. |
|
||||||
| test.py:29:39:29:41 | .-? | Suspicious character range that overlaps with \\w in the same character class, and is equivalent to [.\\/0-9:;<=>?]. |
|
| test.py:29:39:29:41 | .-? | Suspicious character range that overlaps with \\w in the same character class, and is equivalent to [.\\/0-9:;<=>?]. |
|
||||||
|
| test.py:31:30:31:32 | \ufffd-\ufffd | Suspicious character range that overlaps with \\ufffd-\\ufffd in the same character class. |
|
||||||
|
|||||||
@@ -27,3 +27,5 @@ numberToLetter = re.compile(r'[7-F]') # NOT OK
|
|||||||
overlapsWithClass1 = re.compile(r'[0-9\d]') # NOT OK
|
overlapsWithClass1 = re.compile(r'[0-9\d]') # NOT OK
|
||||||
|
|
||||||
overlapsWithClass2 = re.compile(r'[\w,.-?:*+]') # NOT OK
|
overlapsWithClass2 = re.compile(r'[\w,.-?:*+]') # NOT OK
|
||||||
|
|
||||||
|
unicodeStuff = re.compile('[\U0001D173-\U0001D17A\U000E0020-\U000E007F\U000e0001]') # NOT OK
|
||||||
@@ -9,6 +9,7 @@ private import RegexTreeView
|
|||||||
*/
|
*/
|
||||||
module Make<RegexTreeViewSig TreeImpl> {
|
module Make<RegexTreeViewSig TreeImpl> {
|
||||||
private import TreeImpl
|
private import TreeImpl
|
||||||
|
private import codeql.util.Strings as Strings
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a rank for `range` that is unique for ranges in the same file.
|
* Gets a rank for `range` that is unique for ranges in the same file.
|
||||||
@@ -213,7 +214,7 @@ module Make<RegexTreeViewSig TreeImpl> {
|
|||||||
bindingset[char]
|
bindingset[char]
|
||||||
private string escape(string char) {
|
private string escape(string char) {
|
||||||
exists(string reg | reg = "(\\[|\\]|\\\\|-|/)" |
|
exists(string reg | reg = "(\\[|\\]|\\\\|-|/)" |
|
||||||
if char.regexpMatch(reg) then result = "\\" + char else result = char
|
if char.regexpMatch(reg) then result = "\\" + char else result = Strings::escape(char)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -257,14 +258,15 @@ module Make<RegexTreeViewSig TreeImpl> {
|
|||||||
or
|
or
|
||||||
priority = 1 and
|
priority = 1 and
|
||||||
exists(RegExpCharacterRange other |
|
exists(RegExpCharacterRange other |
|
||||||
reason = "overlaps with " + other + " in the same character class" and
|
reason = "overlaps with " + Strings::escape(other.toString()) + " in the same character class" and
|
||||||
rankRange(result) < rankRange(other) and
|
rankRange(result) < rankRange(other) and
|
||||||
overlap(result, other)
|
overlap(result, other)
|
||||||
)
|
)
|
||||||
or
|
or
|
||||||
priority = 2 and
|
priority = 2 and
|
||||||
exists(RegExpCharacterClassEscape escape |
|
exists(RegExpCharacterClassEscape escape |
|
||||||
reason = "overlaps with " + escape + " in the same character class" and
|
reason =
|
||||||
|
"overlaps with " + escapeRegExpCharacterClassEscape(escape) + " in the same character class" and
|
||||||
overlapsWithCharEscape(result, escape)
|
overlapsWithCharEscape(result, escape)
|
||||||
)
|
)
|
||||||
or
|
or
|
||||||
@@ -276,6 +278,13 @@ module Make<RegexTreeViewSig TreeImpl> {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pragma[inline]
|
||||||
|
private string escapeRegExpCharacterClassEscape(RegExpCharacterClassEscape escape) {
|
||||||
|
if escape.toString().matches("%-%")
|
||||||
|
then result = Strings::escape(escape.toString()) // might contain unicode characters
|
||||||
|
else result = escape.toString() // just a plain `\d` or `\w` etc. Those are already escaped.
|
||||||
|
}
|
||||||
|
|
||||||
/** Holds if `range` matches suspiciously many characters. */
|
/** Holds if `range` matches suspiciously many characters. */
|
||||||
predicate problem(RegExpCharacterRange range, string reason) {
|
predicate problem(RegExpCharacterRange range, string reason) {
|
||||||
reason =
|
reason =
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
private import codeql.regex.RegexTreeView
|
private import codeql.regex.RegexTreeView
|
||||||
private import codeql.util.Numbers
|
private import codeql.util.Numbers
|
||||||
|
private import codeql.util.Strings
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Classes and predicates that create an NFA and various algorithms for working with it.
|
* Classes and predicates that create an NFA and various algorithms for working with it.
|
||||||
@@ -15,34 +16,7 @@ module Make<RegexTreeViewSig TreeImpl> {
|
|||||||
* Gets the char after `c` (from a simplified ASCII table).
|
* Gets the char after `c` (from a simplified ASCII table).
|
||||||
*/
|
*/
|
||||||
private string nextChar(string c) {
|
private string nextChar(string c) {
|
||||||
exists(int code | code = ascii(c) | code + 1 = ascii(result))
|
exists(int code | code = asciiPrintable(c) | code + 1 = asciiPrintable(result))
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the `i`th codepoint in `s`.
|
|
||||||
*/
|
|
||||||
bindingset[s]
|
|
||||||
private string getCodepointAt(string s, int i) { result = s.regexpFind("(.|\\s)", i, _) }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the length of `s` in codepoints.
|
|
||||||
*/
|
|
||||||
bindingset[str]
|
|
||||||
private int getCodepointLength(string str) {
|
|
||||||
result = str.regexpReplaceAll("(.|\\s)", "x").length()
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets an approximation for the ASCII code for `char`.
|
|
||||||
* Only the easily printable chars are included (so no newline, tab, null, etc).
|
|
||||||
*/
|
|
||||||
private int ascii(string char) {
|
|
||||||
char =
|
|
||||||
rank[result](string c |
|
|
||||||
c =
|
|
||||||
"! \"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
|
|
||||||
.charAt(_)
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -394,7 +368,7 @@ module Make<RegexTreeViewSig TreeImpl> {
|
|||||||
* Includes all printable ascii chars, all constants mentioned in a regexp, and all chars matches by the regexp `/\s|\d|\w/`.
|
* Includes all printable ascii chars, all constants mentioned in a regexp, and all chars matches by the regexp `/\s|\d|\w/`.
|
||||||
*/
|
*/
|
||||||
string getARelevantChar() {
|
string getARelevantChar() {
|
||||||
exists(ascii(result))
|
exists(asciiPrintable(result))
|
||||||
or
|
or
|
||||||
exists(RegexpCharacterConstant c | result = getCodepointAt(c.getValue(), _))
|
exists(RegexpCharacterConstant c | result = getCodepointAt(c.getValue(), _))
|
||||||
or
|
or
|
||||||
@@ -1191,7 +1165,7 @@ module Make<RegexTreeViewSig TreeImpl> {
|
|||||||
private string relevant(RegExpRoot root) {
|
private string relevant(RegExpRoot root) {
|
||||||
root = relevantRoot() and
|
root = relevantRoot() and
|
||||||
(
|
(
|
||||||
exists(ascii(result)) and exists(root)
|
exists(asciiPrintable(result)) and exists(root)
|
||||||
or
|
or
|
||||||
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
|
exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _))
|
||||||
or
|
or
|
||||||
@@ -1322,49 +1296,6 @@ module Make<RegexTreeViewSig TreeImpl> {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the result of backslash-escaping newlines, carriage-returns and
|
|
||||||
* backslashes in `s`.
|
|
||||||
*/
|
|
||||||
bindingset[s]
|
|
||||||
private string escape(string s) {
|
|
||||||
result =
|
|
||||||
escapeUnicodeString(s.replaceAll("\\", "\\\\")
|
|
||||||
.replaceAll("\n", "\\n")
|
|
||||||
.replaceAll("\r", "\\r")
|
|
||||||
.replaceAll("\t", "\\t"))
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a string where the unicode characters in `s` have been escaped.
|
|
||||||
*/
|
|
||||||
bindingset[s]
|
|
||||||
private string escapeUnicodeString(string s) {
|
|
||||||
result =
|
|
||||||
concat(int i, string char | char = escapeUnicodeChar(getCodepointAt(s, i)) | char order by i)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets a unicode escaped string for `char`.
|
|
||||||
* If `char` is a printable char, then `char` is returned.
|
|
||||||
*/
|
|
||||||
bindingset[char]
|
|
||||||
private string escapeUnicodeChar(string char) {
|
|
||||||
if isPrintable(char)
|
|
||||||
then result = char
|
|
||||||
else
|
|
||||||
if exists(to4digitHex(any(int i | i.toUnicode() = char)))
|
|
||||||
then result = "\\u" + to4digitHex(any(int i | i.toUnicode() = char))
|
|
||||||
else result = "\\u{" + toHex(any(int i | i.toUnicode() = char)) + "}"
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Holds if `char` is easily printable char, or whitespace. */
|
|
||||||
private predicate isPrintable(string char) {
|
|
||||||
exists(ascii(char))
|
|
||||||
or
|
|
||||||
char = "\n\r\t".charAt(_)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets `str` with the last `i` characters moved to the front.
|
* Gets `str` with the last `i` characters moved to the front.
|
||||||
*
|
*
|
||||||
|
|||||||
68
shared/util/codeql/util/Strings.qll
Normal file
68
shared/util/codeql/util/Strings.qll
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
private import Numbers
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the result of backslash-escaping newlines, carriage-returns, backslashes, and unicode characters in `s`.
|
||||||
|
*/
|
||||||
|
bindingset[s]
|
||||||
|
string escape(string s) {
|
||||||
|
result =
|
||||||
|
escapeUnicodeString(s.replaceAll("\\", "\\\\")
|
||||||
|
.replaceAll("\n", "\\n")
|
||||||
|
.replaceAll("\r", "\\r")
|
||||||
|
.replaceAll("\t", "\\t"))
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a string where the unicode characters in `s` have been escaped.
|
||||||
|
*/
|
||||||
|
bindingset[s]
|
||||||
|
private string escapeUnicodeString(string s) {
|
||||||
|
result =
|
||||||
|
concat(int i, string char | char = escapeUnicodeChar(getCodepointAt(s, i)) | char order by i)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a unicode escaped string for `char`.
|
||||||
|
* If `char` is a printable char, then `char` is returned.
|
||||||
|
*/
|
||||||
|
bindingset[char]
|
||||||
|
private string escapeUnicodeChar(string char) {
|
||||||
|
if isPrintable(char)
|
||||||
|
then result = char
|
||||||
|
else
|
||||||
|
if exists(to4digitHex(any(int i | i.toUnicode() = char)))
|
||||||
|
then result = "\\u" + to4digitHex(any(int i | i.toUnicode() = char))
|
||||||
|
else result = "\\u{" + toHex(any(int i | i.toUnicode() = char)) + "}"
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Holds if `char` is easily printable char, or whitespace. */
|
||||||
|
private predicate isPrintable(string char) {
|
||||||
|
exists(asciiPrintable(char))
|
||||||
|
or
|
||||||
|
char = "\n\r\t".charAt(_)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the `i`th codepoint in `s`.
|
||||||
|
*/
|
||||||
|
bindingset[s]
|
||||||
|
string getCodepointAt(string s, int i) { result = s.regexpFind("(.|\\s)", i, _) }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the length of `s` in codepoints.
|
||||||
|
*/
|
||||||
|
bindingset[str]
|
||||||
|
int getCodepointLength(string str) { result = str.regexpReplaceAll("(.|\\s)", "x").length() }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the ASCII code for `char`.
|
||||||
|
* Only the easily printable chars are included (so no newline, tab, null, etc).
|
||||||
|
*/
|
||||||
|
int asciiPrintable(string char) {
|
||||||
|
char =
|
||||||
|
rank[result](string c |
|
||||||
|
c =
|
||||||
|
"! \"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
|
||||||
|
.charAt(_)
|
||||||
|
)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user