diff --git a/python/ql/src/semmle/python/regex.qll b/python/ql/src/semmle/python/regex.qll index 0636c485f06..da232c0fdf1 100644 --- a/python/ql/src/semmle/python/regex.qll +++ b/python/ql/src/semmle/python/regex.qll @@ -565,10 +565,8 @@ abstract class RegexString extends Expr { this.sequenceOrQualified(start, end) and not this.isOptionDivider(start-1) and item_start = start or - exists(int endp1 | end = endp1-1 | - start = end and not this.item_end(start) and this.isOptionDivider(endp1) and - item_start = start - ) + start = end and not this.item_end(start) and this.isOptionDivider(end) and + item_start = start or exists(int mid | this.subalternation(start, mid, _) and diff --git a/python/ql/test/library-tests/regex/Alternation.expected b/python/ql/test/library-tests/regex/Alternation.expected index cba6212a273..2fe6572074e 100644 --- a/python/ql/test/library-tests/regex/Alternation.expected +++ b/python/ql/test/library-tests/regex/Alternation.expected @@ -1,5 +1,7 @@ | (?:(?:\n\r?)\|^)( *)\\S | 3 | 12 | (?:\n\r?)\|^ | 3 | 10 | (?:\n\r?) | | (?:(?:\n\r?)\|^)( *)\\S | 3 | 12 | (?:\n\r?)\|^ | 11 | 12 | ^ | +| (?:(?P^(?:\|x))) | 14 | 16 | \|x | 14 | 14 | | +| (?:(?P^(?:\|x))) | 14 | 16 | \|x | 15 | 16 | x | | (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 3 | 9 | [^%]\|^ | 3 | 7 | [^%] | | (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 3 | 9 | [^%]\|^ | 8 | 9 | ^ | | (?P[\\w]+)\| | 0 | 16 | (?P[\\w]+)\| | 0 | 15 | (?P[\\w]+) | @@ -8,10 +10,13 @@ | (\\033\|~{) | 1 | 8 | \\033\|~{ | 6 | 8 | ~{ | | \\\|\\[\\][123]\|\\{\\} | 0 | 16 | \\\|\\[\\][123]\|\\{\\} | 0 | 11 | \\\|\\[\\][123] | | \\\|\\[\\][123]\|\\{\\} | 0 | 16 | \\\|\\[\\][123]\|\\{\\} | 12 | 16 | \\{\\} | +| \|x | 0 | 2 | \|x | 0 | 0 | | +| \|x | 0 | 2 | \|x | 1 | 2 | x | | ^(^y\|^z)(u$\|v$)$ | 2 | 7 | ^y\|^z | 2 | 4 | ^y | | ^(^y\|^z)(u$\|v$)$ | 2 | 7 | ^y\|^z | 5 | 7 | ^z | | ^(^y\|^z)(u$\|v$)$ | 9 | 14 | u$\|v$ | 9 | 11 | u$ | | ^(^y\|^z)(u$\|v$)$ | 9 | 14 | u$\|v$ | 12 | 14 | v$ | -| x\|(?^(?:\|x))) | 10 | 11 | +| (?:(?P^(?:\|x))) | 15 | 16 | | (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 5 | 6 | | (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 8 | 9 | | (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 11 | 12 | @@ -62,6 +64,7 @@ | \\\|\\[\\][123]\|\\{\\} | 9 | 10 | | \\\|\\[\\][123]\|\\{\\} | 12 | 14 | | \\\|\\[\\][123]\|\\{\\} | 14 | 16 | +| \|x | 1 | 2 | | ^(^y\|^z)(u$\|v$)$ | 0 | 1 | | ^(^y\|^z)(u$\|v$)$ | 2 | 3 | | ^(^y\|^z)(u$\|v$)$ | 3 | 4 | @@ -110,6 +113,7 @@ | ax{,3} | 3 | 4 | | ax{,3} | 4 | 5 | | ax{,3} | 5 | 6 | +| x\| | 0 | 1 | | x\|(?^(?:\|x))) | first | 10 | 11 | +| (?:(?P^(?:\|x))) | first | 15 | 16 | +| (?:(?P^(?:\|x))) | last | 15 | 16 | | (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | first | 0 | 11 | | (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | first | 3 | 7 | | (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | first | 8 | 9 | @@ -46,6 +49,8 @@ | \\\|\\[\\][123]\|\\{\\} | first | 12 | 14 | | \\\|\\[\\][123]\|\\{\\} | last | 6 | 11 | | \\\|\\[\\][123]\|\\{\\} | last | 14 | 16 | +| \|x | first | 1 | 2 | +| \|x | last | 1 | 2 | | ^(^y\|^z)(u$\|v$)$ | first | 0 | 1 | | ^(^y\|^z)(u$\|v$)$ | first | 2 | 3 | | ^(^y\|^z)(u$\|v$)$ | first | 3 | 4 | @@ -82,6 +87,8 @@ | ax{,3} | last | 1 | 2 | | ax{,3} | last | 1 | 6 | | ax{,3} | last | 5 | 6 | +| x\| | first | 0 | 1 | +| x\| | last | 0 | 1 | | x\|(?^(?:\|x))) | 0 | 19 | (?:(?P^(?:\|x))) | 3 | 18 | (?P^(?:\|x)) | +| (?:(?P^(?:\|x))) | 3 | 18 | (?P^(?:\|x)) | 10 | 17 | ^(?:\|x) | +| (?:(?P^(?:\|x))) | 11 | 17 | (?:\|x) | 14 | 16 | \|x | | (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 0 | 10 | (?:[^%]\|^) | 3 | 9 | [^%]\|^ | | (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | 14 | 19 | (\\w*) | 15 | 18 | \\w* | | (?P[\\w]+)\| | 0 | 15 | (?P[\\w]+) | 9 | 14 | [\\w]+ | diff --git a/python/ql/test/library-tests/regex/Regex.expected b/python/ql/test/library-tests/regex/Regex.expected index 788c7c28a3e..008607eabc9 100644 --- a/python/ql/test/library-tests/regex/Regex.expected +++ b/python/ql/test/library-tests/regex/Regex.expected @@ -41,6 +41,16 @@ | (?:(?:\n\r?)\|^)( *)\\S | sequence | 3 | 10 | | (?:(?:\n\r?)\|^)( *)\\S | sequence | 6 | 9 | | (?:(?:\n\r?)\|^)( *)\\S | sequence | 11 | 12 | +| (?:(?P^(?:\|x))) | ^ | 10 | 11 | +| (?:(?P^(?:\|x))) | char | 15 | 16 | +| (?:(?P^(?:\|x))) | choice | 14 | 16 | +| (?:(?P^(?:\|x))) | non-empty group | 0 | 19 | +| (?:(?P^(?:\|x))) | non-empty group | 3 | 18 | +| (?:(?P^(?:\|x))) | non-empty group | 11 | 17 | +| (?:(?P^(?:\|x))) | sequence | 0 | 19 | +| (?:(?P^(?:\|x))) | sequence | 3 | 18 | +| (?:(?P^(?:\|x))) | sequence | 10 | 17 | +| (?:(?P^(?:\|x))) | sequence | 15 | 16 | | (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | ^ | 8 | 9 | | (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | char | 5 | 6 | | (?:[^%]\|^)?%\\((\\w*)\\)[a-z] | char | 11 | 12 | @@ -123,6 +133,9 @@ | \\\|\\[\\][123]\|\\{\\} | choice | 0 | 16 | | \\\|\\[\\][123]\|\\{\\} | sequence | 0 | 11 | | \\\|\\[\\][123]\|\\{\\} | sequence | 12 | 16 | +| \|x | char | 1 | 2 | +| \|x | choice | 0 | 2 | +| \|x | sequence | 1 | 2 | | ^(^y\|^z)(u$\|v$)$ | $ | 10 | 11 | | ^(^y\|^z)(u$\|v$)$ | $ | 13 | 14 | | ^(^y\|^z)(u$\|v$)$ | $ | 15 | 16 | @@ -193,6 +206,9 @@ | ax{,3} | char | 5 | 6 | | ax{,3} | qualified | 1 | 6 | | ax{,3} | sequence | 0 | 6 | +| x\| | char | 0 | 1 | +| x\| | choice | 0 | 2 | +| x\| | sequence | 0 | 1 | | x\|(?^(?:|x)))') diff --git a/python/ql/test/query-tests/Expressions/Regex/test.py b/python/ql/test/query-tests/Expressions/Regex/test.py index 6a5742a8613..53576557394 100644 --- a/python/ql/test/query-tests/Expressions/Regex/test.py +++ b/python/ql/test/query-tests/Expressions/Regex/test.py @@ -133,3 +133,6 @@ VERBOSE_REGEX = r""" # Compiled regular expression marking it as verbose ODASA_6786 = re.compile(VERBOSE_REGEX, re.VERBOSE) + +#Named group with caret and empty choice. +re.compile(r'(?:(?P^(?:|x)))')