Python: Fix parsing of octal escapes

This commit is contained in:
Rasmus Lerchedahl Petersen
2021-08-04 20:41:56 +02:00
parent 34b054ff53
commit c08f94ec04
9 changed files with 42 additions and 17 deletions

View File

@@ -8,8 +8,6 @@
| (?P<name>[\\w]+)\| | 0 | 16 | (?P<name>[\\w]+)\| | 16 | 16 | |
| (\\033\|~{) | 1 | 8 | \\033\|~{ | 1 | 5 | \\033 |
| (\\033\|~{) | 1 | 8 | \\033\|~{ | 6 | 8 | ~{ |
| \\+0 | 0 | 3 | \\+0 | 0 | 2 | \\+ |
| \\+0 | 0 | 3 | \\+0 | 0 | 3 | \\+0 |
| \\\|\\[\\][123]\|\\{\\} | 0 | 16 | \\\|\\[\\][123]\|\\{\\} | 0 | 11 | \\\|\\[\\][123] |
| \\\|\\[\\][123]\|\\{\\} | 0 | 16 | \\\|\\[\\][123]\|\\{\\} | 12 | 16 | \\{\\} |
| \|x | 0 | 2 | \|x | 0 | 0 | |

View File

@@ -53,7 +53,7 @@
| [^A-Z] | 4 | 5 |
| [^]] | 2 | 3 |
| \\+0 | 0 | 2 |
| \\+0 | 0 | 3 |
| \\+0 | 2 | 3 |
| \\A[+-]?\\d+ | 0 | 2 |
| \\A[+-]?\\d+ | 3 | 4 |
| \\A[+-]?\\d+ | 4 | 5 |

View File

@@ -43,9 +43,7 @@
| [^]] | first | 0 | 4 |
| [^]] | last | 0 | 4 |
| \\+0 | first | 0 | 2 |
| \\+0 | first | 0 | 3 |
| \\+0 | last | 0 | 2 |
| \\+0 | last | 0 | 3 |
| \\+0 | last | 2 | 3 |
| \\A[+-]?\\d+ | first | 0 | 2 |
| \\A[+-]?\\d+ | last | 7 | 9 |
| \\A[+-]?\\d+ | last | 7 | 10 |

View File

@@ -114,9 +114,7 @@
| [^]] | char-set | 0 | 4 |
| [^]] | sequence | 0 | 4 |
| \\+0 | char | 0 | 2 |
| \\+0 | char | 0 | 3 |
| \\+0 | choice | 0 | 3 |
| \\+0 | sequence | 0 | 2 |
| \\+0 | char | 2 | 3 |
| \\+0 | sequence | 0 | 3 |
| \\A[+-]?\\d+ | char | 0 | 2 |
| \\A[+-]?\\d+ | char | 3 | 4 |

View File

@@ -24,7 +24,8 @@ except re.error:
re.compile(r'[^A-Z]') #$ charRange=2:3-4:5
re.compile(r'[\0-\09]') #$ charRange=1:3-4:7
re.compile(r'[\0-\09]') #$ charRange=1:3-4:6
re.compile(r'[\0-\07]') #$ charRange=1:3-4:7
re.compile(r'[\0123-5]') #$ charRange=5:6-7:8

View File

@@ -10,8 +10,10 @@ re.compile(r'[\---]') #$ escapedCharacter=1:3
re.compile(r'[--\-]') #$ escapedCharacter=3:5
re.compile(r'[\--\-]') #$ escapedCharacter=1:3 escapedCharacter=4:6
re.compile(r'[0\-9-A-Z]') #$ escapedCharacter=2:4
re.compile(r'[\0-\09]') #$ escapedCharacter=1:3 escapedCharacter=4:7
re.compile(r'[\0-\09]') #$ escapedCharacter=1:3 escapedCharacter=4:6
re.compile(r'[\0-\07]') #$ escapedCharacter=1:3 escapedCharacter=4:7
re.compile(r'[\0123-5]') #$ escapedCharacter=1:5
re.compile(r'\1754\1854\17\18\07\08') #$ escapedCharacter=0:4 escapedCharacter=16:19 escapedCharacter=19:21
#ODASA-3985
#Half Surrogate pairs
@@ -21,3 +23,9 @@ re.compile(u'[\U00010000-\U0010ffff]') # not escapes
#Misparsed on LGTM
re.compile(r"\[(?P<txt>[^[]*)\]\((?P<uri>[^)]*)") #$ escapedCharacter=0:2 escapedCharacter=16:18 escapedCharacter=18:20
#Non-raw string
re_blank = re.compile('(\n|\r|\\s)*\n', re.M) #$ escapedCharacter=5:7
#Backreference confusion
re.compile(r'\+0') #$ escapedCharacter=0:2

View File

@@ -1 +0,0 @@
| \\+0 | 2 | test.py:2:18:2:23 | test.py:2 |