mirror of
https://github.com/github/codeql.git
synced 2026-05-02 12:15:17 +02:00
enable unicode support in the Python ReDoS query
This commit is contained in:
@@ -473,46 +473,44 @@ class RegExpEscape extends RegExpNormalChar {
|
||||
* E.g. for `\u0061` this returns "a".
|
||||
*/
|
||||
private string getUnicode() {
|
||||
// TODO: Enable this once a supporting CLI is released.
|
||||
// exists(int codepoint | codepoint = sum(getHexValueFromUnicode(_)) |
|
||||
// result = codepoint.toUnicode()
|
||||
// )
|
||||
none()
|
||||
exists(int codepoint | codepoint = sum(getHexValueFromUnicode(_)) |
|
||||
result = codepoint.toUnicode()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets int value for the `index`th char in the hex number of the unicode escape.
|
||||
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
|
||||
*/
|
||||
private int getHexValueFromUnicode(int index) {
|
||||
isUnicode() and
|
||||
exists(string hex, string char | hex = getText().suffix(2) |
|
||||
char = hex.charAt(index) and
|
||||
result = 16.pow(hex.length() - index - 1) * toHex(char)
|
||||
)
|
||||
}
|
||||
// TODO: Enable this once a supporting CLI is released.
|
||||
// /**
|
||||
// * Gets int value for the `index`th char in the hex number of the unicode escape.
|
||||
// * E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
|
||||
// */
|
||||
// private int getHexValueFromUnicode(int index) {
|
||||
// isUnicode() and
|
||||
// exists(string hex, string char | hex = getText().suffix(2) |
|
||||
// char = hex.charAt(index) and
|
||||
// result = 16.pow(hex.length() - index - 1) * toHex(char)
|
||||
// )
|
||||
// }
|
||||
}
|
||||
|
||||
// TODO: Enable this once a supporting CLI is released.
|
||||
// /**
|
||||
// * Gets the hex number for the `hex` char.
|
||||
// */
|
||||
// private int toHex(string hex) {
|
||||
// hex = [0 .. 9].toString() and
|
||||
// result = hex.toInt()
|
||||
// or
|
||||
// result = 10 and hex = ["a", "A"]
|
||||
// or
|
||||
// result = 11 and hex = ["b", "B"]
|
||||
// or
|
||||
// result = 12 and hex = ["c", "C"]
|
||||
// or
|
||||
// result = 13 and hex = ["d", "D"]
|
||||
// or
|
||||
// result = 14 and hex = ["e", "E"]
|
||||
// or
|
||||
// result = 15 and hex = ["f", "F"]
|
||||
// }
|
||||
/**
|
||||
* Gets the hex number for the `hex` char.
|
||||
*/
|
||||
private int toHex(string hex) {
|
||||
hex = [0 .. 9].toString() and
|
||||
result = hex.toInt()
|
||||
or
|
||||
result = 10 and hex = ["a", "A"]
|
||||
or
|
||||
result = 11 and hex = ["b", "B"]
|
||||
or
|
||||
result = 12 and hex = ["c", "C"]
|
||||
or
|
||||
result = 13 and hex = ["d", "D"]
|
||||
or
|
||||
result = 14 and hex = ["e", "E"]
|
||||
or
|
||||
result = 15 and hex = ["f", "F"]
|
||||
}
|
||||
|
||||
/**
|
||||
* A character class escape in a regular expression.
|
||||
* That is, an escaped charachter that denotes multiple characters.
|
||||
|
||||
@@ -92,5 +92,6 @@
|
||||
| redos.py:363:25:363:43 | ((?:a{0\|-)\|\\w\\{\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0'. |
|
||||
| redos.py:364:25:364:45 | ((?:a{0,\|-)\|\\w\\{\\d,)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,'. |
|
||||
| redos.py:365:25:365:48 | ((?:a{0,2\|-)\|\\w\\{\\d,\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,2'. |
|
||||
| redos.py:371:25:371:35 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of 'a'. |
|
||||
| unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\u00c6'. |
|
||||
| unittests.py:9:16:9:24 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
|
||||
|
||||
@@ -366,3 +366,9 @@ bad86 = re.compile(r'''^((?:a{0,2|-)|\w\{\d,\d)+X$''')
|
||||
|
||||
# GOOD:
|
||||
good42 = re.compile(r'''^((?:a{0,2}|-)|\w\{\d,\d\})+X$''')
|
||||
|
||||
# NOT GOOD
|
||||
bad87 = re.compile(r'X(\u0061|a)*Y')
|
||||
|
||||
# GOOD
|
||||
good43 = re.compile(r'X(\u0061|b)+Y')
|
||||
Reference in New Issue
Block a user