Merge pull request #230 from github/redos-enable-tounicode

enable unicode parsing in the ReDoS query
This commit is contained in:
Arthur Baars
2021-08-02 10:42:09 +02:00
committed by GitHub
3 changed files with 43 additions and 37 deletions

View File

@@ -351,49 +351,48 @@ class RegExpEscape extends RegExpNormalChar {
* E.g. for `\u0061` this returns "a".
*/
private string getUnicode() {
// TODO: uncomment when toUnicode() is available
none()
//exists(int codepoint | codepoint = sum(getHexValueFromUnicode(_)) |
// result = codepoint.toUnicode()
//)
exists(int codepoint | codepoint = sum(getHexValueFromUnicode(_)) |
result = codepoint.toUnicode()
)
}
/**
* Gets int value for the `index`th char in the hex number of the unicode escape.
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
*/
private int getHexValueFromUnicode(int index) {
isUnicode() and
exists(string hex, string char | hex = getText().suffix(2) |
char = hex.charAt(index) and
result = 16.pow(hex.length() - index - 1) * toHex(char)
)
}
// TODO: uncomment when toUnicode() is available
///**
// * Gets int value for the `index`th char in the hex number of the unicode escape.
// * E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
// */
//private int getHexValueFromUnicode(int index) {
// isUnicode() and
// exists(string hex, string char | hex = getText().suffix(2) |
// char = hex.charAt(index) and
// result = 16.pow(hex.length() - index - 1) * toHex(char)
// )
//}
string getUnescaped() { result = this.getText().suffix(1) }
override string getAPrimaryQlClass() { result = "RegExpEscape" }
}
///**
// * Gets the hex number for the `hex` char.
// */
//private int toHex(string hex) {
// hex = [0 .. 9].toString() and
// result = hex.toInt()
// or
// result = 10 and hex = ["a", "A"]
// or
// result = 11 and hex = ["b", "B"]
// or
// result = 12 and hex = ["c", "C"]
// or
// result = 13 and hex = ["d", "D"]
// or
// result = 14 and hex = ["e", "E"]
// or
// result = 15 and hex = ["f", "F"]
//}
/**
* Gets the hex number for the `hex` char.
*/
private int toHex(string hex) {
hex = [0 .. 9].toString() and
result = hex.toInt()
or
result = 10 and hex = ["a", "A"]
or
result = 11 and hex = ["b", "B"]
or
result = 12 and hex = ["c", "C"]
or
result = 13 and hex = ["d", "D"]
or
result = 14 and hex = ["e", "E"]
or
result = 15 and hex = ["f", "F"]
}
/**
* A character class escape in a regular expression.
* That is, an escaped character that denotes multiple characters.

View File

@@ -90,3 +90,4 @@
| tst.rb:361:11:361:29 | ((?:a{0\|-)\|\\w\\{\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0'. |
| tst.rb:362:11:362:31 | ((?:a{0,\|-)\|\\w\\{\\d,)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,'. |
| tst.rb:363:11:363:34 | ((?:a{0,2\|-)\|\\w\\{\\d,\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,2'. |
| tst.rb:369:12:369:22 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |

View File

@@ -363,4 +363,10 @@ bad85 = /^((?:a{0,|-)|\w\{\d,)+X$/
bad86 = /^((?:a{0,2|-)|\w\{\d,\d)+X$/
# GOOD:
good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/
good42 = /^((?:a{0,2}|-)|\w\{\d,\d\})+X$/
# NOT GOOD
bad87 = /^X(\u0061|a)*Y$/
# GOOD
good43 = /^X(\u0061|b)+Y$/