From bb16731b868eb7df46536209fdbd9567e338c6da Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Wed, 19 Jul 2023 18:53:00 +0100 Subject: [PATCH] Python: Fix for multiple parse mode flags. --- .../python/regexp/internal/ParseRegExp.qll | 19 +++++++++++++------ .../Security/CWE-730-ReDoS/ReDoS.expected | 3 ++- .../Security/CWE-730-ReDoS/unittests.py | 2 +- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll b/python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll index c6f8e7f76aa..b4246dc1385 100644 --- a/python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll +++ b/python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll @@ -617,7 +617,7 @@ class RegExp extends Expr instanceof StrConst { private predicate group_start(int start, int end) { this.non_capturing_group_start(start, end) or - this.flag_group_start(start, end, _) + this.flag_group_start(start, end) or this.named_group_start(start, end) or @@ -679,12 +679,19 @@ class RegExp extends Expr instanceof StrConst { end = min(int i | i > start + 4 and this.getChar(i) = "?") } - private predicate flag_group_start(int start, int end, string c) { + private predicate flag_group_start(int start, int end) { this.isGroupStart(start) and this.getChar(start + 1) = "?" and - end = start + 3 and - c = this.getChar(start + 2) and - c in ["i", "L", "m", "s", "u", "x"] + this.getChar(start + 2) in ["i", "L", "m", "s", "u", "x"] and + end = start + 2 + } + + private predicate flag_group(int start, int end, string c) { + exists(int inStart, int inEnd | + this.flag_group_start(start, inStart) and + this.groupContents(start, end, inStart, inEnd) and + this.getChar([inStart .. inEnd - 1]) = c + ) } /** @@ -692,7 +699,7 @@ class RegExp extends Expr instanceof StrConst { * it is defined by a prefix. */ string getModeFromPrefix() { - exists(string c | this.flag_group_start(_, _, c) | + exists(string c | this.flag_group(_, _, c) | c = "i" and result = "IGNORECASE" or c = "L" and result = "LOCALE" diff --git a/python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.expected b/python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.expected index 288ce4f782e..458c82afb7d 100644 --- a/python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.expected +++ b/python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.expected @@ -105,4 +105,5 @@ | redos.py:391:15:391:25 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of 'a'. | | unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\u00c6'. | | unittests.py:9:16:9:24 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. | -| unittests.py:11:20:11:28 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. | +| unittests.py:11:20:11:28 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings starting with 's' and containing many repetitions of '\\n'. | +| unittests.py:12:21:12:29 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings starting with 'is' and containing many repetitions of '\\n'. | diff --git a/python/ql/test/query-tests/Security/CWE-730-ReDoS/unittests.py b/python/ql/test/query-tests/Security/CWE-730-ReDoS/unittests.py index 41ff7a8cb27..0a49b8a52a9 100644 --- a/python/ql/test/query-tests/Security/CWE-730-ReDoS/unittests.py +++ b/python/ql/test/query-tests/Security/CWE-730-ReDoS/unittests.py @@ -9,4 +9,4 @@ re.compile(r'(?:.|\n)*b') # No ReDoS. re.compile(r'(?:.|\n)*b', re.DOTALL) # Has ReDoS. re.compile(r'(?i)(?:.|\n)*b') # No ReDoS. re.compile(r'(?s)(?:.|\n)*b') # Has ReDoS. -re.compile(r'(?is)(?:.|\n)*b') # Has ReDoS. [NOT DETECTED] +re.compile(r'(?is)(?:.|\n)*b') # Has ReDoS.