move Regex into a ParseRegExp file, and rename the class to RegExp

2026-07-21 03:08:25 +02:00 · 2023-03-17 18:20:12 +01:00
parent 556bb41999
commit 59cc90e547
10 changed files with 1102 additions and 1094 deletions
--- a/python/ql/lib/semmle/python/frameworks/Django.qll
+++ b/python/ql/lib/semmle/python/frameworks/Django.qll
@@ -2512,7 +2512,7 @@ module PrivateDjango {
          any(int i | i < routeHandler.getFirstPossibleRoutedParamIndex() | routeHandler.getArg(i))
      )
      or
-      exists(DjangoRouteHandler routeHandler, DjangoRouteRegex regexUse, Regex regex |
+      exists(DjangoRouteHandler routeHandler, DjangoRouteRegex regexUse, RegExp regex |
        regex.getAUse() = regexUse and
        routeHandler = this.getARequestHandler() and
        regexUse.getRouteSetup() = this
--- a/python/ql/lib/semmle/python/frameworks/Tornado.qll
+++ b/python/ql/lib/semmle/python/frameworks/Tornado.qll
@@ -423,7 +423,7 @@ module Tornado {
        not result = requestHandler.getArg(0)
      )
      or
-      exists(Function requestHandler, TornadoRouteRegex regexUse, Regex regex |
+      exists(Function requestHandler, TornadoRouteRegex regexUse, RegExp regex |
        regex.getAUse() = regexUse and
        requestHandler = this.getARequestHandler() and
        regexUse.getRouteSetup() = this
--- a/python/ql/lib/semmle/python/regex.qll
+++ b/python/ql/lib/semmle/python/regex.qll
--- a/python/ql/lib/semmle/python/regexp/RegexTreeView.qll
+++ b/python/ql/lib/semmle/python/regexp/RegexTreeView.qll
@@ -22,16 +22,16 @@ RegExpTerm getParsedRegExp(StrConst re) { result.getRegex() = re and result.isRo
 */
 private newtype TRegExpParent =
  /** A string literal used as a regular expression */
-  TRegExpLiteral(Regex re) or
+  TRegExpLiteral(RegExp re) or
  /** A quantified term */
-  TRegExpQuantifier(Regex re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
+  TRegExpQuantifier(RegExp re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
  /** A sequence term */
-  TRegExpSequence(Regex re, int start, int end) {
+  TRegExpSequence(RegExp re, int start, int end) {
    re.sequence(start, end) and
    exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
  } or
  /** An alternation term */
-  TRegExpAlt(Regex re, int start, int end) {
+  TRegExpAlt(RegExp re, int start, int end) {
    re.alternation(start, end) and
    exists(int part_end |
      re.alternationOption(start, end, start, part_end) and
@@ -39,30 +39,30 @@ private newtype TRegExpParent =
    ) // if an alternation does not have more than one element, it should be treated as that element instead.
  } or
  /** A character class term */
-  TRegExpCharacterClass(Regex re, int start, int end) { re.charSet(start, end) } or
+  TRegExpCharacterClass(RegExp re, int start, int end) { re.charSet(start, end) } or
  /** A character range term */
-  TRegExpCharacterRange(Regex re, int start, int end) { re.charRange(_, start, _, _, end) } or
+  TRegExpCharacterRange(RegExp re, int start, int end) { re.charRange(_, start, _, _, end) } or
  /** A group term */
-  TRegExpGroup(Regex re, int start, int end) { re.group(start, end) } or
+  TRegExpGroup(RegExp re, int start, int end) { re.group(start, end) } or
  /** A special character */
-  TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
+  TRegExpSpecialChar(RegExp re, int start, int end) { re.specialCharacter(start, end, _) } or
  /** A normal character */
-  TRegExpNormalChar(Regex re, int start, int end) {
+  TRegExpNormalChar(RegExp re, int start, int end) {
    re.normalCharacterSequence(start, end)
    or
    re.escapedCharacter(start, end) and
    not re.specialCharacter(start, end, _)
  } or
  /** A back reference */
-  TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
+  TRegExpBackRef(RegExp re, int start, int end) { re.backreference(start, end) }

 pragma[nomagic]
-private int seqChildEnd(Regex re, int start, int end, int i) {
+private int seqChildEnd(RegExp re, int start, int end, int i) {
  result = seqChild(re, start, end, i).getEnd()
 }

 // moved out so we can use it in the charpred
-private RegExpTerm seqChild(Regex re, int start, int end, int i) {
+private RegExpTerm seqChild(RegExp re, int start, int end, int i) {
  re.sequence(start, end) and
  (
    i = 0 and
@@ -106,12 +106,12 @@ module Impl implements RegexTreeViewSig {
    RegExpTerm getLastChild() { result = this.getChild(this.getNumChild() - 1) }

    /** Gets the associated regex. */
-    abstract Regex getRegex();
+    abstract RegExp getRegex();
  }

  /** A string literal used as a regular expression */
  class RegExpLiteral extends TRegExpLiteral, RegExpParent {
-    Regex re;
+    RegExp re;

    RegExpLiteral() { this = TRegExpLiteral(re) }

@@ -126,7 +126,7 @@ module Impl implements RegexTreeViewSig {
    /** Get a string representing all modes for this regex. */
    string getFlags() { result = concat(string mode | mode = re.getAMode() | mode, " | ") }

-    override Regex getRegex() { result = re }
+    override RegExp getRegex() { result = re }

    /** Gets the primary QL class for this regex. */
    string getPrimaryQLClass() { result = "RegExpLiteral" }
@@ -136,7 +136,7 @@ module Impl implements RegexTreeViewSig {
   * A regular expression term, that is, a syntactic part of a regular expression.
   */
  class RegExpTerm extends RegExpParent {
-    Regex re;
+    RegExp re;
    int start;
    int end;

@@ -206,7 +206,7 @@ module Impl implements RegexTreeViewSig {
     */
    RegExpParent getParent() { result.getAChild() = this }

-    override Regex getRegex() { result = re }
+    override RegExp getRegex() { result = re }

    /** Gets the offset at which this term starts. */
    int getStart() { result = start }
--- a/python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll
+++ b/python/ql/lib/semmle/python/regexp/internal/ParseRegExp.qll
--- a/python/ql/src/Expressions/Regex/BackspaceEscape.ql
+++ b/python/ql/src/Expressions/Regex/BackspaceEscape.ql
@@ -13,7 +13,7 @@
 import python
 import semmle.python.regex

-from Regex r, int offset
+from RegExp r, int offset
 where
  r.escapingChar(offset) and
  r.getChar(offset + 1) = "b" and
--- a/python/ql/src/Expressions/Regex/DuplicateCharacterInSet.ql
+++ b/python/ql/src/Expressions/Regex/DuplicateCharacterInSet.ql
@@ -13,7 +13,7 @@
 import python
 import semmle.python.regex

-predicate duplicate_char_in_class(Regex r, string char) {
+predicate duplicate_char_in_class(RegExp r, string char) {
  exists(int i, int j, int x, int y, int start, int end |
    i != x and
    j != y and
@@ -36,7 +36,7 @@ predicate duplicate_char_in_class(Regex r, string char) {
  )
 }

-from Regex r, string char
+from RegExp r, string char
 where duplicate_char_in_class(r, char)
 select r,
  "This regular expression includes duplicate character '" + char + "' in a set of characters."
--- a/python/ql/src/Expressions/Regex/MissingPartSpecialGroup.ql
+++ b/python/ql/src/Expressions/Regex/MissingPartSpecialGroup.ql
@@ -13,6 +13,6 @@
 import python
 import semmle.python.regex

-from Regex r, string missing, string part
+from RegExp r, string missing, string part
 where r.getText().regexpMatch(".*\\(P<\\w+>.*") and missing = "?" and part = "named group"
 select r, "Regular expression is missing '" + missing + "' in " + part + "."
--- a/python/ql/src/Expressions/Regex/UnmatchableCaret.ql
+++ b/python/ql/src/Expressions/Regex/UnmatchableCaret.ql
@@ -13,14 +13,14 @@
 import python
 import semmle.python.regex

-predicate unmatchable_caret(Regex r, int start) {
+predicate unmatchable_caret(RegExp r, int start) {
  not r.getAMode() = "MULTILINE" and
  not r.getAMode() = "VERBOSE" and
  r.specialCharacter(start, start + 1, "^") and
  not r.firstItem(start, start + 1)
 }

-from Regex r, int offset
+from RegExp r, int offset
 where unmatchable_caret(r, offset)
 select r,
  "This regular expression includes an unmatchable caret at offset " + offset.toString() + "."
--- a/python/ql/src/Expressions/Regex/UnmatchableDollar.ql
+++ b/python/ql/src/Expressions/Regex/UnmatchableDollar.ql
@@ -13,14 +13,14 @@
 import python
 import semmle.python.regex

-predicate unmatchable_dollar(Regex r, int start) {
+predicate unmatchable_dollar(RegExp r, int start) {
  not r.getAMode() = "MULTILINE" and
  not r.getAMode() = "VERBOSE" and
  r.specialCharacter(start, start + 1, "$") and
  not r.lastItem(start, start + 1)
 }

-from Regex r, int offset
+from RegExp r, int offset
 where unmatchable_dollar(r, offset)
 select r,
  "This regular expression includes an unmatchable dollar at offset " + offset.toString() + "."