Python: make mode characters not be characters

They are simply considered part of the group start.
This commit is contained in:
Rasmus Lerchedahl Petersen
2023-08-15 21:23:50 +02:00
parent a834703195
commit 7ad1a21c2d
2 changed files with 29 additions and 8 deletions

View File

@@ -683,12 +683,34 @@ class RegExp extends Expr instanceof StrConst {
* Holds if a parse mode starts between `start` and `end`.
*/
private predicate flag_group_start(int start, int end) {
exists(int no_modes_end |
this.flag_group_start_no_modes(start, no_modes_end) and
end = max(int i | this.mode_character(start, i) | i + 1)
)
}
/**
* Holds if the initial part of a parse mode, not containing any
* mode characters is between `start` and `end`.
*/
private predicate flag_group_start_no_modes(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
this.getChar(start + 2) in ["i", "L", "m", "s", "u", "x"] and
end = start + 2
}
/**
* Holds if `pos` contains a mo character from the
* flag group starting at `start`.
*/
private predicate mode_character(int start, int pos) {
this.flag_group_start_no_modes(start, pos)
or
this.mode_character(start, pos - 1) and
this.getChar(pos) in ["i", "L", "m", "s", "u", "x"]
}
/**
* Holds if a parse mode group is between `start` and `end`, and includes the
* mode flag `c`. For example the following span, with mode flag `i`:
@@ -696,11 +718,10 @@ class RegExp extends Expr instanceof StrConst {
* (?i)
* ```
*/
private predicate flag_group(int start, int end, string c) {
exists(int inStart, int inEnd |
this.flag_group_start(start, inStart) and
this.groupContents(start, end, inStart, inEnd) and
this.getChar([inStart .. inEnd - 1]) = c
private predicate flag(string c) {
exists(int pos |
this.mode_character(_, pos) and
this.getChar(pos) = c
)
}
@@ -709,7 +730,7 @@ class RegExp extends Expr instanceof StrConst {
* it is defined by a prefix.
*/
string getModeFromPrefix() {
exists(string c | this.flag_group(_, _, c) |
exists(string c | this.flag(c) |
c = "i" and result = "IGNORECASE"
or
c = "L" and result = "LOCALE"

View File

@@ -105,5 +105,5 @@
| redos.py:391:15:391:25 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of 'a'. |
| unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\\u00c6'. |
| unittests.py:9:16:9:24 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
| unittests.py:11:20:11:28 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings starting with 's' and containing many repetitions of '\\n'. |
| unittests.py:12:21:12:29 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings starting with 'is' and containing many repetitions of '\\n'. |
| unittests.py:11:20:11:28 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
| unittests.py:12:21:12:29 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |