implement RegExpSubPattern.getOperand in the Python regexp implementation

2026-07-20 18:58:36 +02:00 · 2021-07-15 09:41:53 +02:00
parent de8f64c5be
commit 383b5f2ff2
3 changed files with 17 additions and 1 deletions
--- a/python/ql/src/semmle/python/RegexTreeView.qll
+++ b/python/ql/src/semmle/python/RegexTreeView.qll
@@ -836,6 +836,13 @@ class RegExpZeroWidthMatch extends RegExpGroup {
 */
 class RegExpSubPattern extends RegExpZeroWidthMatch {
  RegExpSubPattern() { not re.emptyGroup(start, end) }
+
+  /** Gets the lookahead term. */
+  RegExpTerm getOperand() {
+    result.getRegex() = re and
+    result.getStart() = start + 3 and
+    result.getEnd() = end - 1
+  }
 }

 /**
--- a/python/ql/test/query-tests/Security/CWE-730/ReDoS.expected
+++ b/python/ql/test/query-tests/Security/CWE-730/ReDoS.expected
@@ -93,5 +93,7 @@
 | redos.py:364:25:364:45 | ((?:a{0,\|-)\|\\w\\{\\d,)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,'. |
 | redos.py:365:25:365:48 | ((?:a{0,2\|-)\|\\w\\{\\d,\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,2'. |
 | redos.py:371:25:371:35 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of 'a'. |
+| redos.py:380:35:380:41 | [^"\\s]+ | This part of the regular expression may cause exponential backtracking on strings starting with '/' and containing many repetitions of '!'. |
+| redos.py:381:35:381:41 | [^"\\s]+ | This part of the regular expression may cause exponential backtracking on strings starting with '/' and containing many repetitions of '!'. |
 | unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\u00c6'. |
 | unittests.py:9:16:9:24 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
--- a/python/ql/test/query-tests/Security/CWE-730/redos.py
+++ b/python/ql/test/query-tests/Security/CWE-730/redos.py
@@ -371,4 +371,11 @@ good42 = re.compile(r'''^((?:a{0,2}|-)|\w\{\d,\d\})+X$''')
 bad87 = re.compile(r'X(\u0061|a)*Y')

 # GOOD
-good43 = re.compile(r'X(\u0061|b)+Y')
+good43 = re.compile(r'X(\u0061|b)+Y')
+
+# GOOD
+good44 = re.compile(r'("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)')
+
+# BAD
+bad88 = re.compile(r'/("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)X')
+bad89 = re.compile(r'/("[^"]*?"|[^"\s]+)+(?=X)')