mirror of
https://github.com/github/codeql.git
synced 2026-04-24 08:15:14 +02:00
Merge pull request #15390 from Marcono1234/marcono1234/python-ascii-regex-flag
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
---
|
||||
category: fix
|
||||
---
|
||||
* Fixed the `a` (ASCII) inline flag not being recognized by the regular expression library.
|
||||
@@ -116,13 +116,14 @@ class RegExp extends Expr instanceof StrConst {
|
||||
|
||||
/**
|
||||
* Gets a mode (if any) of this regular expression. Can be any of:
|
||||
* DEBUG
|
||||
* IGNORECASE
|
||||
* LOCALE
|
||||
* MULTILINE
|
||||
* DOTALL
|
||||
* UNICODE
|
||||
* VERBOSE
|
||||
* - DEBUG
|
||||
* - ASCII
|
||||
* - IGNORECASE
|
||||
* - LOCALE
|
||||
* - MULTILINE
|
||||
* - DOTALL
|
||||
* - UNICODE
|
||||
* - VERBOSE
|
||||
*/
|
||||
string getAMode() {
|
||||
result = FindRegexMode::getAMode(this)
|
||||
@@ -705,19 +706,19 @@ class RegExp extends Expr instanceof StrConst {
|
||||
private predicate flag_group_start_no_modes(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
this.getChar(start + 2) in ["i", "L", "m", "s", "u", "x"] and
|
||||
this.getChar(start + 2) in ["a", "i", "L", "m", "s", "u", "x"] and
|
||||
end = start + 2
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `pos` contains a mo character from the
|
||||
* Holds if `pos` contains a mode character from the
|
||||
* flag group starting at `start`.
|
||||
*/
|
||||
private predicate mode_character(int start, int pos) {
|
||||
this.flag_group_start_no_modes(start, pos)
|
||||
or
|
||||
this.mode_character(start, pos - 1) and
|
||||
this.getChar(pos) in ["i", "L", "m", "s", "u", "x"]
|
||||
this.getChar(pos) in ["a", "i", "L", "m", "s", "u", "x"]
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -740,6 +741,8 @@ class RegExp extends Expr instanceof StrConst {
|
||||
*/
|
||||
string getModeFromPrefix() {
|
||||
exists(string c | this.flag(c) |
|
||||
c = "a" and result = "ASCII"
|
||||
or
|
||||
c = "i" and result = "IGNORECASE"
|
||||
or
|
||||
c = "L" and result = "LOCALE"
|
||||
|
||||
@@ -20,6 +20,13 @@
|
||||
| (?!not-this)^[A-Z_]+$ | 16 | 17 |
|
||||
| (?!not-this)^[A-Z_]+$ | 17 | 18 |
|
||||
| (?!not-this)^[A-Z_]+$ | 20 | 21 |
|
||||
| (?-imsx:a+) | 2 | 3 |
|
||||
| (?-imsx:a+) | 3 | 4 |
|
||||
| (?-imsx:a+) | 4 | 5 |
|
||||
| (?-imsx:a+) | 5 | 6 |
|
||||
| (?-imsx:a+) | 6 | 7 |
|
||||
| (?-imsx:a+) | 7 | 8 |
|
||||
| (?-imsx:a+) | 8 | 9 |
|
||||
| (?:(?:\n\r?)\|^)( *)\\S | 6 | 7 |
|
||||
| (?:(?:\n\r?)\|^)( *)\\S | 7 | 8 |
|
||||
| (?:(?:\n\r?)\|^)( *)\\S | 11 | 12 |
|
||||
@@ -35,9 +42,21 @@
|
||||
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 19 | 21 |
|
||||
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 22 | 23 |
|
||||
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 24 | 25 |
|
||||
| (?Li)a+ | 5 | 6 |
|
||||
| (?P<name>[\\w]+)\| | 10 | 12 |
|
||||
| (?a-imsx:a+) | 3 | 4 |
|
||||
| (?a-imsx:a+) | 4 | 5 |
|
||||
| (?a-imsx:a+) | 5 | 6 |
|
||||
| (?a-imsx:a+) | 6 | 7 |
|
||||
| (?a-imsx:a+) | 7 | 8 |
|
||||
| (?a-imsx:a+) | 8 | 9 |
|
||||
| (?a-imsx:a+) | 9 | 10 |
|
||||
| (?aimsx)a+ | 8 | 9 |
|
||||
| (?aimsx:a+) | 7 | 8 |
|
||||
| (?aimsx:a+) | 8 | 9 |
|
||||
| (?m)^(?!$) | 4 | 5 |
|
||||
| (?m)^(?!$) | 8 | 9 |
|
||||
| (?ui)a+ | 5 | 6 |
|
||||
| (\\033\|~{) | 1 | 5 |
|
||||
| (\\033\|~{) | 6 | 7 |
|
||||
| (\\033\|~{) | 7 | 8 |
|
||||
|
||||
@@ -18,14 +18,32 @@
|
||||
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | first | 8 | 9 |
|
||||
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | first | 11 | 12 |
|
||||
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | last | 21 | 26 |
|
||||
| (?Li)a+ | first | 5 | 6 |
|
||||
| (?Li)a+ | first | 5 | 7 |
|
||||
| (?Li)a+ | last | 5 | 6 |
|
||||
| (?Li)a+ | last | 5 | 7 |
|
||||
| (?P<name>[\\w]+)\| | first | 9 | 13 |
|
||||
| (?P<name>[\\w]+)\| | first | 9 | 14 |
|
||||
| (?P<name>[\\w]+)\| | last | 9 | 13 |
|
||||
| (?P<name>[\\w]+)\| | last | 9 | 14 |
|
||||
| (?a-imsx:a+) | first | 3 | 9 |
|
||||
| (?a-imsx:a+) | last | 9 | 10 |
|
||||
| (?a-imsx:a+) | last | 9 | 11 |
|
||||
| (?aimsx)a+ | first | 8 | 9 |
|
||||
| (?aimsx)a+ | first | 8 | 10 |
|
||||
| (?aimsx)a+ | last | 8 | 9 |
|
||||
| (?aimsx)a+ | last | 8 | 10 |
|
||||
| (?aimsx:a+) | first | 7 | 8 |
|
||||
| (?aimsx:a+) | last | 8 | 9 |
|
||||
| (?aimsx:a+) | last | 8 | 10 |
|
||||
| (?m)^(?!$) | first | 4 | 5 |
|
||||
| (?m)^(?!$) | first | 8 | 9 |
|
||||
| (?m)^(?!$) | last | 4 | 5 |
|
||||
| (?m)^(?!$) | last | 8 | 9 |
|
||||
| (?ui)a+ | first | 5 | 6 |
|
||||
| (?ui)a+ | first | 5 | 7 |
|
||||
| (?ui)a+ | last | 5 | 6 |
|
||||
| (?ui)a+ | last | 5 | 7 |
|
||||
| (\\033\|~{) | first | 1 | 5 |
|
||||
| (\\033\|~{) | first | 6 | 8 |
|
||||
| (\\033\|~{) | last | 1 | 5 |
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 0 | 10 | (?:[^%]\|^) | 3 | 9 | [^%]\|^ |
|
||||
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 14 | 19 | (\\w*) | 15 | 18 | \\w* |
|
||||
| (?P<name>[\\w]+)\| | 0 | 15 | (?P<name>[\\w]+) | 9 | 14 | [\\w]+ |
|
||||
| (?a-imsx:a+) | 0 | 12 | (?a-imsx:a+) | 3 | 11 | -imsx:a+ |
|
||||
| (?aimsx:a+) | 0 | 11 | (?aimsx:a+) | 7 | 10 | :a+ |
|
||||
| (?m)^(?!$) | 5 | 10 | (?!$) | 8 | 9 | $ |
|
||||
| (\\033\|~{) | 0 | 9 | (\\033\|~{) | 1 | 8 | \\033\|~{ |
|
||||
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 2 | 16 | (?P<txt>[^[]*) | 10 | 15 | [^[]* |
|
||||
|
||||
@@ -10,4 +10,19 @@
|
||||
| 54 | DOTALL |
|
||||
| 54 | VERBOSE |
|
||||
| 56 | VERBOSE |
|
||||
| 68 | MULTILINE |
|
||||
| 59 | ASCII |
|
||||
| 59 | DOTALL |
|
||||
| 59 | IGNORECASE |
|
||||
| 59 | MULTILINE |
|
||||
| 59 | VERBOSE |
|
||||
| 60 | IGNORECASE |
|
||||
| 60 | UNICODE |
|
||||
| 61 | IGNORECASE |
|
||||
| 61 | LOCALE |
|
||||
| 63 | ASCII |
|
||||
| 63 | DOTALL |
|
||||
| 63 | IGNORECASE |
|
||||
| 63 | MULTILINE |
|
||||
| 63 | VERBOSE |
|
||||
| 65 | ASCII |
|
||||
| 77 | MULTILINE |
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
| (?!not-this)^[A-Z_]+$ | 13 | 20 | false | true |
|
||||
| (?-imsx:a+) | 8 | 10 | false | true |
|
||||
| (?:(?:\n\r?)\|^)( *)\\S | 7 | 9 | true | false |
|
||||
| (?:(?:\n\r?)\|^)( *)\\S | 14 | 16 | true | true |
|
||||
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 0 | 11 | true | false |
|
||||
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 15 | 18 | true | true |
|
||||
| (?Li)a+ | 5 | 7 | false | true |
|
||||
| (?P<name>[\\w]+)\| | 9 | 14 | false | true |
|
||||
| (?a-imsx:a+) | 9 | 11 | false | true |
|
||||
| (?aimsx)a+ | 8 | 10 | false | true |
|
||||
| (?aimsx:a+) | 8 | 10 | false | true |
|
||||
| (?ui)a+ | 5 | 7 | false | true |
|
||||
| \\A[+-]?\\d+ | 2 | 7 | true | false |
|
||||
| \\A[+-]?\\d+ | 7 | 10 | false | true |
|
||||
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 10 | 15 | true | true |
|
||||
|
||||
@@ -26,6 +26,14 @@
|
||||
| (?!not-this)^[A-Z_]+$ | qualified | 13 | 20 |
|
||||
| (?!not-this)^[A-Z_]+$ | sequence | 0 | 21 |
|
||||
| (?!not-this)^[A-Z_]+$ | sequence | 3 | 11 |
|
||||
| (?-imsx:a+) | char | 2 | 3 |
|
||||
| (?-imsx:a+) | char | 3 | 4 |
|
||||
| (?-imsx:a+) | char | 4 | 5 |
|
||||
| (?-imsx:a+) | char | 5 | 6 |
|
||||
| (?-imsx:a+) | char | 6 | 7 |
|
||||
| (?-imsx:a+) | char | 7 | 8 |
|
||||
| (?-imsx:a+) | char | 8 | 9 |
|
||||
| (?-imsx:a+) | qualified | 8 | 10 |
|
||||
| (?:(?:\n\r?)\|^)( *)\\S | ^ | 11 | 12 |
|
||||
| (?:(?:\n\r?)\|^)( *)\\S | char | 6 | 7 |
|
||||
| (?:(?:\n\r?)\|^)( *)\\S | char | 7 | 8 |
|
||||
@@ -69,18 +77,47 @@
|
||||
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | sequence | 0 | 26 |
|
||||
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | sequence | 3 | 7 |
|
||||
| (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | sequence | 8 | 9 |
|
||||
| (?Li)a+ | char | 5 | 6 |
|
||||
| (?Li)a+ | empty group | 0 | 5 |
|
||||
| (?Li)a+ | qualified | 5 | 7 |
|
||||
| (?Li)a+ | sequence | 0 | 7 |
|
||||
| (?P<name>[\\w]+)\| | char | 10 | 12 |
|
||||
| (?P<name>[\\w]+)\| | char-set | 9 | 13 |
|
||||
| (?P<name>[\\w]+)\| | choice | 0 | 16 |
|
||||
| (?P<name>[\\w]+)\| | non-empty group | 0 | 15 |
|
||||
| (?P<name>[\\w]+)\| | qualified | 9 | 14 |
|
||||
| (?P<name>[\\w]+)\| | sequence | 0 | 15 |
|
||||
| (?a-imsx:a+) | char | 3 | 4 |
|
||||
| (?a-imsx:a+) | char | 4 | 5 |
|
||||
| (?a-imsx:a+) | char | 5 | 6 |
|
||||
| (?a-imsx:a+) | char | 6 | 7 |
|
||||
| (?a-imsx:a+) | char | 7 | 8 |
|
||||
| (?a-imsx:a+) | char | 8 | 9 |
|
||||
| (?a-imsx:a+) | char | 9 | 10 |
|
||||
| (?a-imsx:a+) | non-empty group | 0 | 12 |
|
||||
| (?a-imsx:a+) | qualified | 9 | 11 |
|
||||
| (?a-imsx:a+) | sequence | 0 | 12 |
|
||||
| (?a-imsx:a+) | sequence | 3 | 11 |
|
||||
| (?aimsx)a+ | char | 8 | 9 |
|
||||
| (?aimsx)a+ | empty group | 0 | 8 |
|
||||
| (?aimsx)a+ | qualified | 8 | 10 |
|
||||
| (?aimsx)a+ | sequence | 0 | 10 |
|
||||
| (?aimsx:a+) | char | 7 | 8 |
|
||||
| (?aimsx:a+) | char | 8 | 9 |
|
||||
| (?aimsx:a+) | non-empty group | 0 | 11 |
|
||||
| (?aimsx:a+) | qualified | 8 | 10 |
|
||||
| (?aimsx:a+) | sequence | 0 | 11 |
|
||||
| (?aimsx:a+) | sequence | 7 | 10 |
|
||||
| (?m)^(?!$) | $ | 8 | 9 |
|
||||
| (?m)^(?!$) | ^ | 4 | 5 |
|
||||
| (?m)^(?!$) | empty group | 0 | 4 |
|
||||
| (?m)^(?!$) | empty group | 5 | 10 |
|
||||
| (?m)^(?!$) | sequence | 0 | 10 |
|
||||
| (?m)^(?!$) | sequence | 8 | 9 |
|
||||
| (?ui)a+ | char | 5 | 6 |
|
||||
| (?ui)a+ | empty group | 0 | 5 |
|
||||
| (?ui)a+ | qualified | 5 | 7 |
|
||||
| (?ui)a+ | sequence | 0 | 7 |
|
||||
| (\\033\|~{) | char | 1 | 5 |
|
||||
| (\\033\|~{) | char | 6 | 7 |
|
||||
| (\\033\|~{) | char | 7 | 8 |
|
||||
|
||||
@@ -55,6 +55,15 @@ re.compile("", re.VERBOSE+re.DOTALL)
|
||||
# re.X is an alias for re.VERBOSE
|
||||
re.compile("", re.X)
|
||||
|
||||
#Inline flags; 'a', 'L' and 'u' are mutually exclusive
|
||||
re.compile("(?aimsx)a+")
|
||||
re.compile("(?ui)a+")
|
||||
re.compile(b"(?Li)a+")
|
||||
#Group with inline flags; TODO: these are not properly parsed and handled yet
|
||||
re.compile("(?aimsx:a+)")
|
||||
re.compile("(?-imsx:a+)")
|
||||
re.compile("(?a-imsx:a+)")
|
||||
|
||||
#empty choice
|
||||
re.compile(r'|x')
|
||||
re.compile(r'x|')
|
||||
|
||||
Reference in New Issue
Block a user