mirror of
https://github.com/github/codeql.git
synced 2026-05-03 04:39:29 +02:00
Merge branch 'python-port-ReDoS' of github.com:yoff/codeql into python-port-ReDoS
This commit is contained in:
@@ -1,3 +1,3 @@
|
||||
lgtm,codescanning
|
||||
* Ported _Inefficient regular expression_ (`py/redos`) query from javascript.
|
||||
* Ported _Polynomial regular expression used on uncontrolled data_ [`py/polynomial-redos`] query from javascript.
|
||||
* Added _Inefficient regular expression_ (`py/redos`) query, which is already available in JavaScript.
|
||||
* Added _Polynomial regular expression used on uncontrolled data_ (`py/polynomial-redos`), which is already available in JavaScript.
|
||||
|
||||
@@ -130,36 +130,57 @@ abstract class RegexString extends Expr {
|
||||
/** result is true for those start chars that actually mark a start of a char set. */
|
||||
boolean char_set_start(int pos) {
|
||||
exists(int index |
|
||||
char_set_delimiter(index, pos) = true and
|
||||
// is opening bracket
|
||||
this.char_set_delimiter(index, pos) = true and
|
||||
(
|
||||
index = 1 and result = true // if a '[' is first in the string (among brackets), it starts a char set
|
||||
// if this is the first bracket, `pos` starts a char set
|
||||
index = 1 and result = true
|
||||
or
|
||||
// if the previous char set delimiter was not a closing bracket, `pos` does
|
||||
// not start a char set. This is needed to handle cases such as `[[]` (a
|
||||
// char set that matches the `[` char)
|
||||
index > 1 and
|
||||
not char_set_delimiter(index - 1, _) = false and
|
||||
not this.char_set_delimiter(index - 1, _) = false and
|
||||
result = false
|
||||
or
|
||||
exists(int p1 |
|
||||
char_set_delimiter(index - 1, p1) = false and // if it is preceded by a closing bracket, it starts a char set
|
||||
// special handling of cases such as `[][]` (the character-set of the characters `]` and `[`).
|
||||
exists(int prev_closing_bracket_pos |
|
||||
// previous bracket is a closing bracket
|
||||
this.char_set_delimiter(index - 1, prev_closing_bracket_pos) = false and
|
||||
if
|
||||
exists(int p2 |
|
||||
p1 = p2 + 1
|
||||
or
|
||||
this.getChar(p2 + 1) = "^" and
|
||||
p1 = p2 + 2
|
||||
// check if the character that comes before the previous closing bracket
|
||||
// is an opening bracket (taking `^` into account)
|
||||
exists(int pos_before_prev_closing_bracket |
|
||||
if this.getChar(prev_closing_bracket_pos - 1) = "^"
|
||||
then pos_before_prev_closing_bracket = prev_closing_bracket_pos - 2
|
||||
else pos_before_prev_closing_bracket = prev_closing_bracket_pos - 1
|
||||
|
|
||||
char_set_delimiter(index - 2, p2) = true // but the closing bracket only closes...
|
||||
this.char_set_delimiter(index - 2, pos_before_prev_closing_bracket) = true
|
||||
)
|
||||
then
|
||||
exists(int p2 | char_set_delimiter(index - 2, p2) = true |
|
||||
result = char_set_start(p2).booleanNot() // ...if it is not the first in a char set
|
||||
// brackets without anything in between is not valid character ranges, so
|
||||
// the first closing bracket in `[]]` and `[^]]` does not count,
|
||||
//
|
||||
// and we should _not_ mark the second opening bracket in `[][]` and `[^][]`
|
||||
// as starting a new char set. ^ ^
|
||||
exists(int pos_before_prev_closing_bracket |
|
||||
this.char_set_delimiter(index - 2, pos_before_prev_closing_bracket) = true
|
||||
|
|
||||
result = this.char_set_start(pos_before_prev_closing_bracket).booleanNot()
|
||||
)
|
||||
else result = true
|
||||
else
|
||||
// if not, `pos` does in fact mark a real start of a character range
|
||||
result = true
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/** result denotes if the index is a left bracket */
|
||||
/**
|
||||
* Helper predicate for chars that could be character-set delimiters.
|
||||
* Holds if the (non-escaped) char at `pos` in the string, is the (one-based) `index` occurrence of a bracket (`[` or `]`) in the string.
|
||||
* Result if `true` is the char is `[`, and `false` if the char is `]`.
|
||||
*/
|
||||
boolean char_set_delimiter(int index, int pos) {
|
||||
pos = rank[index](int p | this.nonEscapedCharAt(p) = "[" or this.nonEscapedCharAt(p) = "]") and
|
||||
(
|
||||
|
||||
@@ -1,13 +1,26 @@
|
||||
import re
|
||||
|
||||
re.compile(r'[]-[]') #$ charRange=1:2-3:4
|
||||
|
||||
re.compile(r'[A-Z]') #$ charRange=1:2-3:4
|
||||
|
||||
try:
|
||||
re.compile(r'[]-[]') #$ SPURIOUS: charRange=1:2-3:4
|
||||
raise Exception("this should not be reached")
|
||||
except re.error:
|
||||
pass
|
||||
|
||||
re.compile(r'[---]') #$ charRange=1:2-3:4
|
||||
re.compile(r'[\---]') #$ charRange=1:3-4:5
|
||||
re.compile(r'[--\-]') #$ charRange=1:2-3:5
|
||||
re.compile(r'[\--\-]') #$ charRange=1:3-4:6
|
||||
re.compile(r'[0-9-A-Z]') #$ charRange=1:2-3:4 charRange=5:6-7:8
|
||||
re.compile(r'[0\-9-A-Z]') #$ charRange=4:5-6:7
|
||||
re.compile(r'[0--9-A-Z]') #$ charRange=1:2-3:4 charRange=4:5-6:7
|
||||
|
||||
try:
|
||||
re.compile(r'[0--9-A-Z]') #$ SPURIOUS: charRange=1:2-3:4 charRange=4:5-6:7
|
||||
raise Exception("this should not be reached")
|
||||
except re.error:
|
||||
pass
|
||||
|
||||
re.compile(r'[^A-Z]') #$ charRange=2:3-4:5
|
||||
|
||||
|
||||
@@ -10,8 +10,18 @@ re.compile("[.][.]") #$ charSet=0:3 charSet=3:6
|
||||
re.compile("[[]]") #$ charSet=0:3
|
||||
re.compile("[^]]") #$ charSet=0:4
|
||||
re.compile("[^-]") #$ charSet=0:4
|
||||
re.compile("[]-[]") #$ charSet=0:5
|
||||
re.compile("[^]-[]") #$ charSet=0:6
|
||||
|
||||
try:
|
||||
re.compile("[]-[]") #$ SPURIOUS: charSet=0:5
|
||||
raise Exception("this should not be reached")
|
||||
except re.error:
|
||||
pass
|
||||
|
||||
try:
|
||||
re.compile("[^]-[]") #$ SPURIOUS: charSet=0:6
|
||||
raise Exception("this should not be reached")
|
||||
except re.error:
|
||||
pass
|
||||
|
||||
re.compile("]]][[[[]") #$ charSet=3:8
|
||||
|
||||
|
||||
Reference in New Issue
Block a user