Python: treat \A, \Z, \b, \B as special chars, not escapes

This commit is contained in:
Nick Rolfe
2021-11-19 15:49:53 +00:00
parent f63c768d9f
commit df6ba43cca
8 changed files with 67 additions and 16 deletions

View File

@@ -58,6 +58,11 @@
| \\A[+-]?\\d+ | 3 | 4 |
| \\A[+-]?\\d+ | 4 | 5 |
| \\A[+-]?\\d+ | 7 | 9 |
| \\Afoo\\Z | 0 | 2 |
| \\Afoo\\Z | 2 | 3 |
| \\Afoo\\Z | 3 | 4 |
| \\Afoo\\Z | 4 | 5 |
| \\Afoo\\Z | 5 | 7 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 0 | 2 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 12 | 13 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | 16 | 18 |
@@ -71,6 +76,11 @@
| \\\|\\[\\][123]\|\\{\\} | 9 | 10 |
| \\\|\\[\\][123]\|\\{\\} | 12 | 14 |
| \\\|\\[\\][123]\|\\{\\} | 14 | 16 |
| \\bfoo\\B | 0 | 2 |
| \\bfoo\\B | 2 | 3 |
| \\bfoo\\B | 3 | 4 |
| \\bfoo\\B | 4 | 5 |
| \\bfoo\\B | 5 | 7 |
| \|x | 1 | 2 |
| ^(^y\|^z)(u$\|v$)$ | 0 | 1 |
| ^(^y\|^z)(u$\|v$)$ | 2 | 3 |

View File

@@ -45,8 +45,16 @@
| \\+0 | first | 0 | 2 |
| \\+0 | last | 2 | 3 |
| \\A[+-]?\\d+ | first | 0 | 2 |
| \\A[+-]?\\d+ | first | 2 | 6 |
| \\A[+-]?\\d+ | first | 2 | 7 |
| \\A[+-]?\\d+ | first | 7 | 9 |
| \\A[+-]?\\d+ | first | 7 | 10 |
| \\A[+-]?\\d+ | last | 7 | 9 |
| \\A[+-]?\\d+ | last | 7 | 10 |
| \\Afoo\\Z | first | 0 | 2 |
| \\Afoo\\Z | first | 2 | 3 |
| \\Afoo\\Z | last | 4 | 5 |
| \\Afoo\\Z | last | 5 | 7 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | first | 0 | 2 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | last | 28 | 32 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | last | 28 | 33 |
@@ -54,6 +62,8 @@
| \\\|\\[\\][123]\|\\{\\} | first | 12 | 14 |
| \\\|\\[\\][123]\|\\{\\} | last | 6 | 11 |
| \\\|\\[\\][123]\|\\{\\} | last | 14 | 16 |
| \\bfoo\\B | first | 0 | 2 |
| \\bfoo\\B | last | 5 | 7 |
| \|x | first | 1 | 2 |
| \|x | last | 1 | 2 |
| ^(^y\|^z)(u$\|v$)$ | first | 0 | 1 |

View File

@@ -116,7 +116,7 @@
| \\+0 | char | 0 | 2 |
| \\+0 | char | 2 | 3 |
| \\+0 | sequence | 0 | 3 |
| \\A[+-]?\\d+ | char | 0 | 2 |
| \\A[+-]?\\d+ | \\A | 0 | 2 |
| \\A[+-]?\\d+ | char | 3 | 4 |
| \\A[+-]?\\d+ | char | 4 | 5 |
| \\A[+-]?\\d+ | char | 7 | 9 |
@@ -124,6 +124,12 @@
| \\A[+-]?\\d+ | qualified | 2 | 7 |
| \\A[+-]?\\d+ | qualified | 7 | 10 |
| \\A[+-]?\\d+ | sequence | 0 | 10 |
| \\Afoo\\Z | \\A | 0 | 2 |
| \\Afoo\\Z | \\Z | 5 | 7 |
| \\Afoo\\Z | char | 2 | 3 |
| \\Afoo\\Z | char | 3 | 4 |
| \\Afoo\\Z | char | 4 | 5 |
| \\Afoo\\Z | sequence | 0 | 7 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 0 | 2 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 12 | 13 |
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | char | 16 | 18 |
@@ -148,6 +154,12 @@
| \\\|\\[\\][123]\|\\{\\} | choice | 0 | 16 |
| \\\|\\[\\][123]\|\\{\\} | sequence | 0 | 11 |
| \\\|\\[\\][123]\|\\{\\} | sequence | 12 | 16 |
| \\bfoo\\B | \\B | 5 | 7 |
| \\bfoo\\B | \\b | 0 | 2 |
| \\bfoo\\B | char | 2 | 3 |
| \\bfoo\\B | char | 3 | 4 |
| \\bfoo\\B | char | 4 | 5 |
| \\bfoo\\B | sequence | 0 | 7 |
| \|x | char | 1 | 2 |
| \|x | choice | 0 | 2 |
| \|x | sequence | 1 | 2 |

View File

@@ -73,3 +73,7 @@ escaped = re.escape("https://www.humblebundle.com/home/library")
# Consistency check
baz = re.compile(r'\+0')
# Anchors
re.compile(r'\Afoo\Z')
re.compile(r'\bfoo\B')

View File

@@ -100,5 +100,8 @@
| redos.py:371:25:371:35 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of 'a'. |
| redos.py:380:35:380:41 | [^"\\s]+ | This part of the regular expression may cause exponential backtracking on strings starting with '/' and containing many repetitions of '!'. |
| redos.py:381:35:381:41 | [^"\\s]+ | This part of the regular expression may cause exponential backtracking on strings starting with '/' and containing many repetitions of '!'. |
| redos.py:384:26:384:32 | (\\d\|0)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:385:24:385:30 | (\\d\|0)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| redos.py:386:26:386:32 | (\\d\|0)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
| unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\u00c6'. |
| unittests.py:9:16:9:24 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |

View File

@@ -378,4 +378,9 @@ good44 = re.compile(r'("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)')
# BAD
bad88 = re.compile(r'/("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)X')
bad89 = re.compile(r'/("[^"]*?"|[^"\s]+)+(?=X)')
bad89 = re.compile(r'/("[^"]*?"|[^"\s]+)+(?=X)')
# BAD
bad90 = re.compile(r'\A(\d|0)*x')
bad91 = re.compile(r'(\d|0)*\Z')
bad92 = re.compile(r'\b(\d|0)*x')