Ruby: refactor regex libraries

2025-12-17 01:03:14 +01:00 · 2022-03-18 17:49:41 +01:00
parent 496aab78a7
commit 74aea81fe3
15 changed files with 1318 additions and 879 deletions
--- a/python/ql/lib/semmle/python/RegexTreeView.qll
+++ b/python/ql/lib/semmle/python/RegexTreeView.qll
@@ -552,7 +552,7 @@ class RegExpWordBoundary extends RegExpSpecialChar {

 /**
 * A character class escape in a regular expression.
- * That is, an escaped charachter that denotes multiple characters.
+ * That is, an escaped character that denotes multiple characters.
 *
 * Examples:
 *
--- a/python/ql/lib/semmle/python/regex.qll
+++ b/python/ql/lib/semmle/python/regex.qll
@@ -186,7 +186,7 @@ abstract class RegexString extends Expr {
    )
  }

-  /** Hold is a character set starts between `start` and `end`. */
+  /** Holds if a character set starts between `start` and `end`. */
  predicate char_set_start(int start, int end) {
    this.char_set_start(start) = true and
    (
@@ -314,8 +314,10 @@ abstract class RegexString extends Expr {
    result = this.(Bytes).getS()
  }

+  /** Gets the `i`th character of this regex */
  string getChar(int i) { result = this.getText().charAt(i) }

+  /** Gets the `i`th character of this regex, unless it is part of an character escape sequence. */
  string nonEscapedCharAt(int i) {
    result = this.getText().charAt(i) and
    not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1])
@@ -327,6 +329,9 @@ abstract class RegexString extends Expr {

  private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) }

+  /**
+   * Holds if the `i`th character could not be parsed.
+   */
  predicate failedToParse(int i) {
    exists(this.getChar(i)) and
    not exists(int start, int end |
@@ -415,6 +420,9 @@ abstract class RegexString extends Expr {
    )
  }

+  /**
+   * Holds if a simple or escaped character is found between `start` and `end`.
+   */
  predicate character(int start, int end) {
    (
      this.simpleCharacter(start, end) and
@@ -426,12 +434,18 @@ abstract class RegexString extends Expr {
    not exists(int x, int y | this.backreference(x, y) and x <= start and y >= end)
  }

+  /**
+   * Holds if a normal character is found between `start` and `end`.
+   */
  predicate normalCharacter(int start, int end) {
    end = start + 1 and
    this.character(start, end) and
    not this.specialCharacter(start, end, _)
  }

+  /**
+   * Holds if a special character is found between `start` and `end`.
+   */
  predicate specialCharacter(int start, int end, string char) {
    not this.inCharSet(start) and
    this.character(start, end) and
@@ -490,7 +504,7 @@ abstract class RegexString extends Expr {
    this.specialCharacter(start, end, _)
  }

-  /** Whether the text in the range start,end is a group */
+  /** Whether the text in the range `start,end` is a group */
  predicate group(int start, int end) {
    this.groupContents(start, end, _, _)
    or
@@ -609,6 +623,7 @@ abstract class RegexString extends Expr {
    this.simple_group_start(start, end)
  }

+  /** Matches the start of a non-capturing group, e.g. `(?:` */
  private predicate non_capturing_group_start(int start, int end) {
    this.isGroupStart(start) and
    this.getChar(start + 1) = "?" and
@@ -616,12 +631,18 @@ abstract class RegexString extends Expr {
    end = start + 3
  }

+  /** Matches the start of a simple group, e.g. `(a+)`. */
  private predicate simple_group_start(int start, int end) {
    this.isGroupStart(start) and
    this.getChar(start + 1) != "?" and
    end = start + 1
  }

+  /**
+   * Matches the start of a named group, such as:
+   * - `(?<name>\w+)`
+   * - `(?'name'\w+)`
+   */
  private predicate named_group_start(int start, int end) {
    this.isGroupStart(start) and
    this.getChar(start + 1) = "?" and
@@ -673,6 +694,7 @@ abstract class RegexString extends Expr {
    )
  }

+  /** Matches the start of a positive lookahead assertion, i.e. `(?=`. */
  private predicate lookahead_assertion_start(int start, int end) {
    this.isGroupStart(start) and
    this.getChar(start + 1) = "?" and
@@ -680,6 +702,7 @@ abstract class RegexString extends Expr {
    end = start + 3
  }

+  /** Matches the start of a negative lookahead assertion, i.e. `(?!`. */
  private predicate negative_lookahead_assertion_start(int start, int end) {
    this.isGroupStart(start) and
    this.getChar(start + 1) = "?" and
@@ -687,6 +710,7 @@ abstract class RegexString extends Expr {
    end = start + 3
  }

+  /** Matches the start of a positive lookbehind assertion, i.e. `(?<=`. */
  private predicate lookbehind_assertion_start(int start, int end) {
    this.isGroupStart(start) and
    this.getChar(start + 1) = "?" and
@@ -695,6 +719,7 @@ abstract class RegexString extends Expr {
    end = start + 4
  }

+  /** Matches the start of a negative lookbehind assertion, i.e. `(?<!`. */
  private predicate negative_lookbehind_assertion_start(int start, int end) {
    this.isGroupStart(start) and
    this.getChar(start + 1) = "?" and
@@ -703,6 +728,7 @@ abstract class RegexString extends Expr {
    end = start + 4
  }

+  /** Matches the start of a comment group, i.e. `(?#`. */
  private predicate comment_group_start(int start, int end) {
    this.isGroupStart(start) and
    this.getChar(start + 1) = "?" and
@@ -710,6 +736,7 @@ abstract class RegexString extends Expr {
    end = start + 3
  }

+  /** Matches the contents of a group. */
  predicate groupContents(int start, int end, int in_start, int in_end) {
    this.group_start(start, in_start) and
    end = in_end + 1 and
@@ -717,12 +744,14 @@ abstract class RegexString extends Expr {
    this.isGroupEnd(in_end)
  }

+  /** Matches a named backreference, e.g. `\k<foo>`. */
  private predicate named_backreference(int start, int end, string name) {
    this.named_backreference_start(start, start + 4) and
    end = min(int i | i > start + 4 and this.getChar(i) = ")") + 1 and
    name = this.getText().substring(start + 4, end - 2)
  }

+  /** Matches a numbered backreference, e.g. `\1`. */
  private predicate numbered_backreference(int start, int end, int value) {
    this.escapingChar(start) and
    // starting with 0 makes it an octal escape
@@ -747,7 +776,7 @@ abstract class RegexString extends Expr {
    )
  }

-  /** Whether the text in the range start,end is a back reference */
+  /** Whether the text in the range `start,end` is a back reference */
  predicate backreference(int start, int end) {
    this.numbered_backreference(start, end, _)
    or
--- a/ruby/ql/consistency-queries/RegExpConsistency.ql
+++ b/ruby/ql/consistency-queries/RegExpConsistency.ql
@@ -1,4 +1,4 @@
-import codeql.ruby.security.performance.RegExpTreeView
+import codeql.ruby.Regexp

 query predicate nonUniqueChild(RegExpParent parent, int i, RegExpTerm child) {
  child = parent.getChild(i) and
--- a/ruby/ql/lib/codeql/ruby/Regexp.qll
+++ b/ruby/ql/lib/codeql/ruby/Regexp.qll
@@ -0,0 +1,143 @@
+/**
+ * Provides classes for working with regular expressions.
+ *
+ * Regular expression literals are represented as an abstract syntax tree of regular expression
+ * terms.
+ */
+
+import regexp.RegExpTreeView // re-export
+private import regexp.ParseRegExp
+private import codeql.ruby.ast.Literal as AST
+private import codeql.ruby.DataFlow
+private import codeql.ruby.controlflow.CfgNodes
+private import codeql.ruby.ApiGraphs
+private import codeql.ruby.dataflow.internal.tainttrackingforlibraries.TaintTrackingImpl
+
+/**
+ * Provides utility predicates related to regular expressions.
+ */
+module RegExpPatterns {
+  /**
+   * Gets a pattern that matches common top-level domain names in lower case.
+   */
+  string getACommonTld() {
+    // according to ranking by http://google.com/search?q=site:.<<TLD>>
+    result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
+  }
+}
+
+/**
+ * A node whose value may flow to a position where it is interpreted
+ * as a part of a regular expression.
+ */
+abstract class RegExpPatternSource extends DataFlow::Node {
+  /**
+   * Gets a node where the pattern of this node is parsed as a part of
+   * a regular expression.
+   */
+  abstract DataFlow::Node getAParse();
+
+  /**
+   * Gets the root term of the regular expression parsed from this pattern.
+   */
+  abstract RegExpTerm getRegExpTerm();
+}
+
+/**
+ * A regular expression literal, viewed as the pattern source for itself.
+ */
+private class RegExpLiteralPatternSource extends RegExpPatternSource {
+  private AST::RegExpLiteral astNode;
+
+  RegExpLiteralPatternSource() { astNode = this.asExpr().getExpr() }
+
+  override DataFlow::Node getAParse() { result = this }
+
+  override RegExpTerm getRegExpTerm() { result = astNode.getParsed() }
+}
+
+/**
+ * A node whose string value may flow to a position where it is interpreted
+ * as a part of a regular expression.
+ */
+private class StringRegExpPatternSource extends RegExpPatternSource {
+  private DataFlow::Node parse;
+
+  StringRegExpPatternSource() { this = regExpSource(parse) }
+
+  override DataFlow::Node getAParse() { result = parse }
+
+  override RegExpTerm getRegExpTerm() { result.getRegExp() = this.asExpr().getExpr() }
+}
+
+private class RegExpLiteralRegExp extends RegExp, AST::RegExpLiteral {
+  override predicate isDotAll() { this.hasMultilineFlag() }
+
+  override predicate isIgnoreCase() { this.hasCaseInsensitiveFlag() }
+
+  override string getFlags() { result = this.getFlagString() }
+}
+
+private class ParsedStringRegExp extends RegExp {
+  private DataFlow::Node parse;
+
+  ParsedStringRegExp() { this = regExpSource(parse).asExpr().getExpr() }
+
+  DataFlow::Node getAParse() { result = parse }
+
+  override predicate isDotAll() { none() }
+
+  override predicate isIgnoreCase() { none() }
+
+  override string getFlags() { none() }
+}
+
+/**
+ * Holds if `source` may be interpreted as a regular expression.
+ */
+private predicate isInterpretedAsRegExp(DataFlow::Node source) {
+  // The first argument to an invocation of `Regexp.new` or `Regexp.compile`.
+  source = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]).getArgument(0)
+  or
+  // The argument of a call that coerces the argument to a regular expression.
+  exists(DataFlow::CallNode mce |
+    mce.getMethodName() = ["match", "match?"] and
+    source = mce.getArgument(0) and
+    // exclude https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
+    not mce.getReceiver().asExpr().getExpr() instanceof AST::RegExpLiteral
+  )
+}
+
+private class RegExpConfiguration extends Configuration {
+  RegExpConfiguration() { this = "RegExpConfiguration" }
+
+  override predicate isSource(DataFlow::Node source) {
+    source.asExpr() =
+      any(ExprCfgNode e |
+        e.getConstantValue().isString(_) and
+        not e instanceof ExprNodes::VariableReadAccessCfgNode and
+        not e instanceof ExprNodes::ConstantReadAccessCfgNode
+      )
+  }
+
+  override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }
+
+  override predicate isSanitizer(DataFlow::Node node) {
+    // stop flow if `node` is receiver of
+    // https://ruby-doc.org/core-2.4.0/String.html#method-i-match
+    exists(DataFlow::CallNode mce |
+      mce.getMethodName() = ["match", "match?"] and
+      node = mce.getReceiver() and
+      mce.getArgument(0).asExpr().getExpr() instanceof AST::RegExpLiteral
+    )
+  }
+}
+
+/**
+ * Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
+ * as a part of a regular expression.
+ */
+cached
+DataFlow::Node regExpSource(DataFlow::Node re) {
+  exists(RegExpConfiguration c | c.hasFlow(result, re))
+}
--- a/ruby/ql/lib/codeql/ruby/ast/Literal.qll
+++ b/ruby/ql/lib/codeql/ruby/ast/Literal.qll
@@ -1,5 +1,5 @@
 private import codeql.ruby.AST
-private import codeql.ruby.security.performance.RegExpTreeView as RETV
+private import codeql.ruby.Regexp as RE
 private import internal.AST
 private import internal.Constant
 private import internal.Literal
@@ -594,7 +594,7 @@ class RegExpLiteral extends StringlikeLiteral, TRegExpLiteral {
  final predicate hasFreeSpacingFlag() { this.getFlagString().charAt(_) = "x" }

  /** Returns the root node of the parse tree of this regular expression. */
-  final RETV::RegExpTerm getParsed() { result = RETV::getParsedRegExp(this) }
+  final RE::RegExpTerm getParsed() { result = RE::getParsedRegExp(this) }
 }

 /**
--- a/ruby/ql/lib/codeql/ruby/printAst.qll
+++ b/ruby/ql/lib/codeql/ruby/printAst.qll
@@ -7,7 +7,7 @@
 */

 private import AST
-private import codeql.ruby.security.performance.RegExpTreeView as RETV
+private import codeql.ruby.Regexp as RE
 private import codeql.ruby.ast.internal.Synthesis

 /**
@@ -37,7 +37,7 @@ private predicate shouldPrintAstEdge(AstNode parent, string edgeName, AstNode ch

 newtype TPrintNode =
  TPrintRegularAstNode(AstNode n) { shouldPrintNode(n) } or
-  TPrintRegExpNode(RETV::RegExpTerm term) {
+  TPrintRegExpNode(RE::RegExpTerm term) {
    exists(RegExpLiteral literal |
      shouldPrintNode(literal) and
      term.getRootTerm() = literal.getParsed()
@@ -107,7 +107,7 @@ class PrintRegularAstNode extends PrintAstNode, TPrintRegularAstNode {
    or
    // If this AST node is a regexp literal, add the parsed regexp tree as a
    // child.
-    exists(RETV::RegExpTerm t | t = astNode.(RegExpLiteral).getParsed() |
+    exists(RE::RegExpTerm t | t = astNode.(RegExpLiteral).getParsed() |
      result = TPrintRegExpNode(t) and edgeName = "getParsed"
    )
  }
@@ -134,7 +134,7 @@ class PrintRegularAstNode extends PrintAstNode, TPrintRegularAstNode {

 /** A parsed regexp node in the output tree. */
 class PrintRegExpNode extends PrintAstNode, TPrintRegExpNode {
-  RETV::RegExpTerm regexNode;
+  RE::RegExpTerm regexNode;

  PrintRegExpNode() { this = TPrintRegExpNode(regexNode) }

@@ -147,7 +147,7 @@ class PrintRegExpNode extends PrintAstNode, TPrintRegExpNode {
    exists(int i | result = TPrintRegExpNode(regexNode.getChild(i)) and edgeName = i.toString())
  }

-  override int getOrder() { exists(RETV::RegExpTerm p | p.getChild(result) = regexNode) }
+  override int getOrder() { exists(RE::RegExpTerm p | p.getChild(result) = regexNode) }

  override predicate hasLocationInfo(
    string filepath, int startline, int startcolumn, int endline, int endcolumn
--- a/ruby/ql/lib/codeql/ruby/security/performance/ParseRegExp.qll
+++ b/ruby/ql/lib/codeql/ruby/security/performance/ParseRegExp.qll
@@ -7,10 +7,6 @@

 private import codeql.ruby.ast.Literal as AST
 private import codeql.Locations
-private import codeql.ruby.DataFlow
-private import codeql.ruby.controlflow.CfgNodes
-private import codeql.ruby.ApiGraphs
-private import codeql.ruby.dataflow.internal.tainttrackingforlibraries.TaintTrackingImpl

 /**
 * A `StringlikeLiteral` containing a regular expression term, that is, either
@@ -116,6 +112,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
    )
  }

+  /** Holds if a character set starts between `start` and `end`. */
  predicate charSetStart(int start, int end) {
    this.charSetStart(start) = true and
    (
@@ -145,14 +142,21 @@ abstract class RegExp extends AST::StringlikeLiteral {
    )
  }

-  predicate charSetToken(int charsetStart, int index, int tokenStart, int tokenEnd) {
+  /**
+   * Holds if the character set starting at `charsetStart` contains either
+   * a character or a `-` found between `start` and `end`.
+   */
+  private predicate charSetToken(int charsetStart, int index, int tokenStart, int tokenEnd) {
    tokenStart =
      rank[index](int start, int end | this.charSetToken(charsetStart, start, end) | start) and
    this.charSetToken(charsetStart, tokenStart, tokenEnd)
  }

-  /** Either a char or a - */
-  predicate charSetToken(int charsetStart, int start, int end) {
+  /**
+   * Holds if the character set starting at `charsetStart` contains either
+   * a character or a `-` found between `start` and `end`.
+   */
+  private predicate charSetToken(int charsetStart, int start, int end) {
    this.charSetStart(charsetStart, start) and
    (
      this.escapedCharacter(start, end)
@@ -174,6 +178,10 @@ abstract class RegExp extends AST::StringlikeLiteral {
    )
  }

+  /**
+   * Holds if the character set starting at `charsetStart` contains either
+   * a character or a range found between `start` and `end`.
+   */
  predicate charSetChild(int charsetStart, int start, int end) {
    this.charSetToken(charsetStart, start, end) and
    not exists(int rangeStart, int rangeEnd |
@@ -185,6 +193,11 @@ abstract class RegExp extends AST::StringlikeLiteral {
    this.charRange(charsetStart, start, _, _, end)
  }

+  /**
+   * Holds if the character set starting at `charset_start` contains a character range
+   * with lower bound found between `start` and `lower_end`
+   * and upper bound found between `upper_start` and `end`.
+   */
  predicate charRange(int charsetStart, int start, int lowerEnd, int upperStart, int end) {
    exists(int index |
      this.charRangeEnd(charsetStart, index) = true and
@@ -193,6 +206,13 @@ abstract class RegExp extends AST::StringlikeLiteral {
    )
  }

+  /**
+   * Helper predicate for `charRange`.
+   * We can determine where character ranges end by a left to right sweep.
+   *
+   * To avoid negative recursion we return a boolean. See `escaping`,
+   * the helper for `escapingChar`, for a clean use of this pattern.
+   */
  private boolean charRangeEnd(int charsetStart, int index) {
    this.charSetToken(charsetStart, index, _, _) and
    (
@@ -216,8 +236,15 @@ abstract class RegExp extends AST::StringlikeLiteral {
    )
  }

+  /** Holds if the character at `pos` is a "\" that is actually escaping what comes after. */
  predicate escapingChar(int pos) { this.escaping(pos) = true }

+  /**
+   * Helper predicate for `escapingChar`.
+   * In order to avoid negative recusrion, we return a boolean.
+   * This way, we can refer to `escaping(pos - 1).booleanNot()`
+   * rather than to a negated version of `escaping(pos)`.
+   */
  private boolean escaping(int pos) {
    pos = -1 and result = false
    or
@@ -229,8 +256,10 @@ abstract class RegExp extends AST::StringlikeLiteral {
  /** Gets the text of this regex */
  string getText() { result = this.getConstantValue().getString() }

+  /** Gets the `i`th character of this regex */
  string getChar(int i) { result = this.getText().charAt(i) }

+  /** Gets the `i`th character of this regex, unless it is part of an character escape sequence. */
  string nonEscapedCharAt(int i) {
    result = this.getText().charAt(i) and
    not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1])
@@ -242,6 +271,9 @@ abstract class RegExp extends AST::StringlikeLiteral {

  private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) }

+  /**
+   * Holds if the `i`th character could not be parsed.
+   */
  predicate failedToParse(int i) {
    exists(this.getChar(i)) and
    not exists(int start, int end |
@@ -331,6 +363,11 @@ abstract class RegExp extends AST::StringlikeLiteral {
    this.getChar(start + 3) = "^"
  }

+  /**
+   * Holds if an escaped character is found between `start` and `end`.
+   * Escaped characters include hex values, octal values and named escapes,
+   * but excludes backreferences.
+   */
  predicate escapedCharacter(int start, int end) {
    this.escapingChar(start) and
    not this.numberedBackreference(start, _, _) and
@@ -350,17 +387,25 @@ abstract class RegExp extends AST::StringlikeLiteral {
    )
  }

+  /**
+   * Holds if the character at `index` is inside a character set.
+   */
  predicate inCharSet(int index) {
    exists(int x, int y | this.charSet(x, y) and index in [x + 1 .. y - 2])
  }

+  /**
+   * Holds if the character at `index` is inside a posix bracket.
+   */
  predicate inPosixBracket(int index) {
    exists(int x, int y |
      this.posixStyleNamedCharacterProperty(x, y, _) and index in [x + 1 .. y - 2]
    )
  }

-  /** 'Simple' characters are any that don't alter the parsing of the regex. */
+  /**
+   * 'simple' characters are any that don't alter the parsing of the regex.
+   */
  private predicate simpleCharacter(int start, int end) {
    end = start + 1 and
    not this.charSet(start, _) and
@@ -391,6 +436,9 @@ abstract class RegExp extends AST::StringlikeLiteral {
    )
  }

+  /**
+   * Holds if a simple or escaped character is found between `start` and `end`.
+   */
  predicate character(int start, int end) {
    (
      this.simpleCharacter(start, end) and
@@ -406,12 +454,18 @@ abstract class RegExp extends AST::StringlikeLiteral {
    not exists(int x, int y | this.multiples(x, y, _, _) and x <= start and y >= end)
  }

+  /**
+   * Holds if a normal character is found between `start` and `end`.
+   */
  predicate normalCharacter(int start, int end) {
    end = start + 1 and
    this.character(start, end) and
    not this.specialCharacter(start, end, _)
  }

+  /**
+   * Holds if a special character is found between `start` and `end`.
+   */
  predicate specialCharacter(int start, int end, string char) {
    this.character(start, end) and
    not this.inCharSet(start) and
@@ -505,6 +559,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
    this.positiveLookbehindAssertionGroup(start, end)
  }

+  /** Holds if an empty group is found between `start` and `end`. */
  predicate emptyGroup(int start, int end) {
    exists(int endm1 | end = endm1 + 1 |
      this.groupStart(start, endm1) and
@@ -538,24 +593,28 @@ abstract class RegExp extends AST::StringlikeLiteral {
    )
  }

+  /** Holds if a negative lookahead is found between `start` and `end` */
  predicate negativeLookaheadAssertionGroup(int start, int end) {
    exists(int inStart | this.negativeLookaheadAssertionStart(start, inStart) |
      this.groupContents(start, end, inStart, _)
    )
  }

+  /** Holds if a negative lookbehind is found between `start` and `end` */
  predicate negativeLookbehindAssertionGroup(int start, int end) {
    exists(int inStart | this.negativeLookbehindAssertionStart(start, inStart) |
      this.groupContents(start, end, inStart, _)
    )
  }

+  /** Holds if a positive lookahead is found between `start` and `end` */
  predicate positiveLookaheadAssertionGroup(int start, int end) {
    exists(int inStart | this.lookaheadAssertionStart(start, inStart) |
      this.groupContents(start, end, inStart, _)
    )
  }

+  /** Holds if a positive lookbehind is found between `start` and `end` */
  predicate positiveLookbehindAssertionGroup(int start, int end) {
    exists(int inStart | this.lookbehindAssertionStart(start, inStart) |
      this.groupContents(start, end, inStart, _)
@@ -661,6 +720,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
    end = start + 3
  }

+  /** Matches the contents of a group. */
  predicate groupContents(int start, int end, int inStart, int inEnd) {
    this.groupStart(start, inStart) and
    end = inEnd + 1 and
@@ -747,6 +807,11 @@ abstract class RegExp extends AST::StringlikeLiteral {
    )
  }

+  /**
+   * Holds if a repetition quantifier is found between `start` and `end`,
+   * with the given lower and upper bounds. If a bound is omitted, the corresponding
+   * string is empty.
+   */
  predicate multiples(int start, int end, string lower, string upper) {
    exists(string text, string match, string inner |
      text = this.getText() and
@@ -774,6 +839,13 @@ abstract class RegExp extends AST::StringlikeLiteral {
    this.qualifiedPart(start, _, end, maybeEmpty, mayRepeatForever)
  }

+  /**
+   * Holds if a qualified part is found between `start` and `part_end` and the qualifier is
+   * found between `part_end` and `end`.
+   *
+   * `maybe_empty` is true if the part is optional.
+   * `may_repeat_forever` is true if the part may be repeated unboundedly.
+   */
  predicate qualifiedPart(
    int start, int partEnd, int end, boolean maybeEmpty, boolean mayRepeatForever
  ) {
@@ -781,6 +853,7 @@ abstract class RegExp extends AST::StringlikeLiteral {
    this.qualifier(partEnd, end, maybeEmpty, mayRepeatForever)
  }

+  /** Holds if the range `start`, `end` contains a character, a quantifier, a character set or a group. */
  predicate item(int start, int end) {
    this.qualifiedItem(start, end, _, _)
    or
@@ -960,75 +1033,3 @@ abstract class RegExp extends AST::StringlikeLiteral {
    this.lastPart(start, end)
  }
 }
-
-private class RegExpLiteralRegExp extends RegExp, AST::RegExpLiteral {
-  override predicate isDotAll() { this.hasMultilineFlag() }
-
-  override predicate isIgnoreCase() { this.hasCaseInsensitiveFlag() }
-
-  override string getFlags() { result = this.getFlagString() }
-}
-
-private class ParsedStringRegExp extends RegExp {
-  private DataFlow::Node parse;
-
-  ParsedStringRegExp() { this = regExpSource(parse).asExpr().getExpr() }
-
-  DataFlow::Node getAParse() { result = parse }
-
-  override predicate isDotAll() { none() }
-
-  override predicate isIgnoreCase() { none() }
-
-  override string getFlags() { none() }
-}
-
-/**
- * Holds if `source` may be interpreted as a regular expression.
- */
-private predicate isInterpretedAsRegExp(DataFlow::Node source) {
-  // The first argument to an invocation of `Regexp.new` or `Regexp.compile`.
-  source = API::getTopLevelMember("Regexp").getAMethodCall(["compile", "new"]).getArgument(0)
-  or
-  // The argument of a call that coerces the argument to a regular expression.
-  exists(DataFlow::CallNode mce |
-    mce.getMethodName() = ["match", "match?"] and
-    source = mce.getArgument(0) and
-    // exclude https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
-    not mce.getReceiver().asExpr().getExpr() instanceof AST::RegExpLiteral
-  )
-}
-
-private class RegExpConfiguration extends Configuration {
-  RegExpConfiguration() { this = "RegExpConfiguration" }
-
-  override predicate isSource(DataFlow::Node source) {
-    source.asExpr() =
-      any(ExprCfgNode e |
-        e.getConstantValue().isString(_) and
-        not e instanceof ExprNodes::VariableReadAccessCfgNode and
-        not e instanceof ExprNodes::ConstantReadAccessCfgNode
-      )
-  }
-
-  override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }
-
-  override predicate isSanitizer(DataFlow::Node node) {
-    // stop flow if `node` is receiver of
-    // https://ruby-doc.org/core-2.4.0/String.html#method-i-match
-    exists(DataFlow::CallNode mce |
-      mce.getMethodName() = ["match", "match?"] and
-      node = mce.getReceiver() and
-      mce.getArgument(0).asExpr().getExpr() instanceof AST::RegExpLiteral
-    )
-  }
-}
-
-/**
- * Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
- * as a part of a regular expression.
- */
-cached
-DataFlow::Node regExpSource(DataFlow::Node re) {
-  exists(RegExpConfiguration c | c.hasFlow(result, re))
-}
--- a/ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll
+++ b/ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll
--- a/ruby/ql/lib/codeql/ruby/security/performance/PolynomialReDoSCustomizations.qll
+++ b/ruby/ql/lib/codeql/ruby/security/performance/PolynomialReDoSCustomizations.qll
@@ -8,8 +8,7 @@ private import codeql.ruby.AST as AST
 private import codeql.ruby.CFG
 private import codeql.ruby.DataFlow
 private import codeql.ruby.dataflow.RemoteFlowSources
-private import codeql.ruby.security.performance.ParseRegExp as RegExp
-private import codeql.ruby.security.performance.RegExpTreeView
+private import codeql.ruby.Regexp
 private import codeql.ruby.security.performance.SuperlinearBackTracking

 module PolynomialReDoS {
--- a/ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll
+++ b/ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll
@@ -1,8 +1,10 @@
-private import codeql.ruby.ast.Literal as AST
-private import ParseRegExp
-private import codeql.NumberUtils
+/**
+ * This module should provide a class hierarchy corresponding to a parse tree of regular expressions.
+ */
+
+import codeql.ruby.regexp.RegExpTreeView
 import codeql.Locations
-private import codeql.ruby.DataFlow
+private import codeql.ruby.ast.Literal as AST

 /**
 * Holds if `term` is an ecape class representing e.g. `\d`.
@@ -59,776 +61,3 @@ module RegExpFlags {
    root.getLiteral().isDotAll()
  }
 }
-
-/**
- * Provides utility predicates related to regular expressions.
- */
-module RegExpPatterns {
-  /**
-   * Gets a pattern that matches common top-level domain names in lower case.
-   */
-  string getACommonTld() {
-    // according to ranking by http://google.com/search?q=site:.<<TLD>>
-    result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])"
-  }
-}
-
-/**
- * An element containing a regular expression term, that is, either
- * a string literal (parsed as a regular expression)
- * or another regular expression term.
- */
-class RegExpParent extends TRegExpParent {
-  string toString() { result = "RegExpParent" }
-
-  RegExpTerm getChild(int i) { none() }
-
-  final RegExpTerm getAChild() { result = this.getChild(_) }
-
-  int getNumChild() { result = count(this.getAChild()) }
-
-  /**
-   * Gets the name of a primary CodeQL class to which this regular
-   * expression term belongs.
-   */
-  string getAPrimaryQlClass() { result = "RegExpParent" }
-
-  /**
-   * Gets a comma-separated list of the names of the primary CodeQL classes to
-   * which this regular expression term belongs.
-   */
-  final string getPrimaryQlClasses() { result = concat(this.getAPrimaryQlClass(), ",") }
-}
-
-class RegExpLiteral extends TRegExpLiteral, RegExpParent {
-  RegExp re;
-
-  RegExpLiteral() { this = TRegExpLiteral(re) }
-
-  override RegExpTerm getChild(int i) { i = 0 and result.getRegExp() = re and result.isRootTerm() }
-
-  predicate isDotAll() { re.isDotAll() }
-
-  predicate isIgnoreCase() { re.isIgnoreCase() }
-
-  string getFlags() { result = re.getFlags() }
-
-  override string getAPrimaryQlClass() { result = "RegExpLiteral" }
-}
-
-class RegExpTerm extends RegExpParent {
-  RegExp re;
-  int start;
-  int end;
-
-  RegExpTerm() {
-    this = TRegExpAlt(re, start, end)
-    or
-    this = TRegExpBackRef(re, start, end)
-    or
-    this = TRegExpCharacterClass(re, start, end)
-    or
-    this = TRegExpCharacterRange(re, start, end)
-    or
-    this = TRegExpNormalChar(re, start, end)
-    or
-    this = TRegExpGroup(re, start, end)
-    or
-    this = TRegExpQuantifier(re, start, end)
-    or
-    this = TRegExpSequence(re, start, end) and
-    exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
-    or
-    this = TRegExpSpecialChar(re, start, end)
-    or
-    this = TRegExpNamedCharacterProperty(re, start, end)
-  }
-
-  RegExpTerm getRootTerm() {
-    this.isRootTerm() and result = this
-    or
-    result = this.getParent().(RegExpTerm).getRootTerm()
-  }
-
-  predicate isUsedAsRegExp() { any() }
-
-  predicate isRootTerm() { start = 0 and end = re.getText().length() }
-
-  override RegExpTerm getChild(int i) {
-    result = this.(RegExpAlt).getChild(i)
-    or
-    result = this.(RegExpBackRef).getChild(i)
-    or
-    result = this.(RegExpCharacterClass).getChild(i)
-    or
-    result = this.(RegExpCharacterRange).getChild(i)
-    or
-    result = this.(RegExpNormalChar).getChild(i)
-    or
-    result = this.(RegExpGroup).getChild(i)
-    or
-    result = this.(RegExpQuantifier).getChild(i)
-    or
-    result = this.(RegExpSequence).getChild(i)
-    or
-    result = this.(RegExpSpecialChar).getChild(i)
-    or
-    result = this.(RegExpNamedCharacterProperty).getChild(i)
-  }
-
-  RegExpParent getParent() { result.getAChild() = this }
-
-  RegExp getRegExp() { result = re }
-
-  int getStart() { result = start }
-
-  int getEnd() { result = end }
-
-  override string toString() { result = re.getText().substring(start, end) }
-
-  override string getAPrimaryQlClass() { result = "RegExpTerm" }
-
-  Location getLocation() { result = re.getLocation() }
-
-  pragma[noinline]
-  private predicate componentHasLocationInfo(
-    int i, string filepath, int startline, int startcolumn, int endline, int endcolumn
-  ) {
-    re.getComponent(i)
-        .getLocation()
-        .hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
-  }
-
-  predicate hasLocationInfo(
-    string filepath, int startline, int startcolumn, int endline, int endcolumn
-  ) {
-    exists(int re_start, int re_end |
-      this.componentHasLocationInfo(0, filepath, startline, re_start, _, _) and
-      this.componentHasLocationInfo(re.getNumberOfComponents() - 1, filepath, _, _, endline, re_end) and
-      startcolumn = re_start + start and
-      endcolumn = re_start + end - 1
-    )
-  }
-
-  File getFile() { result = this.getLocation().getFile() }
-
-  string getRawValue() { result = this.toString() }
-
-  RegExpLiteral getLiteral() { result = TRegExpLiteral(re) }
-
-  /** Gets the regular expression term that is matched (textually) before this one, if any. */
-  RegExpTerm getPredecessor() {
-    exists(RegExpTerm parent | parent = this.getParent() |
-      result = parent.(RegExpSequence).previousElement(this)
-      or
-      not exists(parent.(RegExpSequence).previousElement(this)) and
-      not parent instanceof RegExpSubPattern and
-      result = parent.getPredecessor()
-    )
-  }
-
-  /** Gets the regular expression term that is matched (textually) after this one, if any. */
-  RegExpTerm getSuccessor() {
-    exists(RegExpTerm parent | parent = this.getParent() |
-      result = parent.(RegExpSequence).nextElement(this)
-      or
-      not exists(parent.(RegExpSequence).nextElement(this)) and
-      not parent instanceof RegExpSubPattern and
-      result = parent.getSuccessor()
-    )
-  }
-}
-
-newtype TRegExpParent =
-  TRegExpLiteral(RegExp re) or
-  TRegExpQuantifier(RegExp re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
-  TRegExpSequence(RegExp re, int start, int end) { re.sequence(start, end) } or
-  TRegExpAlt(RegExp re, int start, int end) { re.alternation(start, end) } or
-  TRegExpCharacterClass(RegExp re, int start, int end) { re.charSet(start, end) } or
-  TRegExpCharacterRange(RegExp re, int start, int end) { re.charRange(_, start, _, _, end) } or
-  TRegExpGroup(RegExp re, int start, int end) { re.group(start, end) } or
-  TRegExpSpecialChar(RegExp re, int start, int end) { re.specialCharacter(start, end, _) } or
-  TRegExpNormalChar(RegExp re, int start, int end) {
-    re.normalCharacterSequence(start, end)
-    or
-    re.escapedCharacter(start, end) and
-    not re.specialCharacter(start, end, _)
-  } or
-  TRegExpBackRef(RegExp re, int start, int end) { re.backreference(start, end) } or
-  TRegExpNamedCharacterProperty(RegExp re, int start, int end) {
-    re.namedCharacterProperty(start, end, _)
-  }
-
-class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
-  int part_end;
-  boolean may_repeat_forever;
-
-  RegExpQuantifier() {
-    this = TRegExpQuantifier(re, start, end) and
-    re.qualifiedPart(start, part_end, end, _, may_repeat_forever)
-  }
-
-  override RegExpTerm getChild(int i) {
-    i = 0 and
-    result.getRegExp() = re and
-    result.getStart() = start and
-    result.getEnd() = part_end
-  }
-
-  predicate mayRepeatForever() { may_repeat_forever = true }
-
-  string getQualifier() { result = re.getText().substring(part_end, end) }
-
-  override string getAPrimaryQlClass() { result = "RegExpQuantifier" }
-}
-
-class InfiniteRepetitionQuantifier extends RegExpQuantifier {
-  InfiniteRepetitionQuantifier() { this.mayRepeatForever() }
-
-  override string getAPrimaryQlClass() { result = "InfiniteRepetitionQuantifier" }
-}
-
-class RegExpStar extends InfiniteRepetitionQuantifier {
-  RegExpStar() { this.getQualifier().charAt(0) = "*" }
-
-  override string getAPrimaryQlClass() { result = "RegExpStar" }
-}
-
-class RegExpPlus extends InfiniteRepetitionQuantifier {
-  RegExpPlus() { this.getQualifier().charAt(0) = "+" }
-
-  override string getAPrimaryQlClass() { result = "RegExpPlus" }
-}
-
-class RegExpOpt extends RegExpQuantifier {
-  RegExpOpt() { this.getQualifier().charAt(0) = "?" }
-
-  override string getAPrimaryQlClass() { result = "RegExpOpt" }
-}
-
-class RegExpRange extends RegExpQuantifier {
-  string upper;
-  string lower;
-
-  RegExpRange() { re.multiples(part_end, end, lower, upper) }
-
-  string getUpper() { result = upper }
-
-  string getLower() { result = lower }
-
-  /**
-   * Gets the upper bound of the range, if any.
-   *
-   * If there is no upper bound, any number of repetitions is allowed.
-   * For a term of the form `r{lo}`, both the lower and the upper bound
-   * are `lo`.
-   */
-  int getUpperBound() { result = this.getUpper().toInt() }
-
-  /** Gets the lower bound of the range. */
-  int getLowerBound() { result = this.getLower().toInt() }
-
-  override string getAPrimaryQlClass() { result = "RegExpRange" }
-}
-
-class RegExpSequence extends RegExpTerm, TRegExpSequence {
-  RegExpSequence() {
-    this = TRegExpSequence(re, start, end) and
-    exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
-  }
-
-  override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
-
-  /** Gets the element preceding `element` in this sequence. */
-  RegExpTerm previousElement(RegExpTerm element) { element = this.nextElement(result) }
-
-  /** Gets the element following `element` in this sequence. */
-  RegExpTerm nextElement(RegExpTerm element) {
-    exists(int i |
-      element = this.getChild(i) and
-      result = this.getChild(i + 1)
-    )
-  }
-
-  override string getAPrimaryQlClass() { result = "RegExpSequence" }
-}
-
-pragma[nomagic]
-private int seqChildEnd(RegExp re, int start, int end, int i) {
-  result = seqChild(re, start, end, i).getEnd()
-}
-
-// moved out so we can use it in the charpred
-private RegExpTerm seqChild(RegExp re, int start, int end, int i) {
-  re.sequence(start, end) and
-  (
-    i = 0 and
-    result.getRegExp() = re and
-    result.getStart() = start and
-    exists(int itemEnd |
-      re.item(start, itemEnd) and
-      result.getEnd() = itemEnd
-    )
-    or
-    i > 0 and
-    result.getRegExp() = re and
-    exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
-      result.getStart() = itemStart and
-      re.item(itemStart, result.getEnd())
-    )
-  )
-}
-
-class RegExpAlt extends RegExpTerm, TRegExpAlt {
-  RegExpAlt() { this = TRegExpAlt(re, start, end) }
-
-  override RegExpTerm getChild(int i) {
-    i = 0 and
-    result.getRegExp() = re and
-    result.getStart() = start and
-    exists(int part_end |
-      re.alternationOption(start, end, start, part_end) and
-      result.getEnd() = part_end
-    )
-    or
-    i > 0 and
-    result.getRegExp() = re and
-    exists(int part_start |
-      part_start = this.getChild(i - 1).getEnd() + 1 // allow for the |
-    |
-      result.getStart() = part_start and
-      re.alternationOption(start, end, part_start, result.getEnd())
-    )
-  }
-
-  override string getAPrimaryQlClass() { result = "RegExpAlt" }
-}
-
-class RegExpCharEscape = RegExpEscape;
-
-class RegExpEscape extends RegExpNormalChar {
-  RegExpEscape() { re.escapedCharacter(start, end) }
-
-  /**
-   * Gets the name of the escaped; for example, `w` for `\w`.
-   * TODO: Handle named escapes.
-   */
-  override string getValue() {
-    this.isIdentityEscape() and result = this.getUnescaped()
-    or
-    this.getUnescaped() = "n" and result = "\n"
-    or
-    this.getUnescaped() = "r" and result = "\r"
-    or
-    this.getUnescaped() = "t" and result = "\t"
-    or
-    this.isUnicode() and
-    result = this.getUnicode()
-  }
-
-  predicate isIdentityEscape() {
-    not this.getUnescaped() in ["n", "r", "t"] and not this.isUnicode()
-  }
-
-  /**
-   * Gets the text for this escape. That is e.g. "\w".
-   */
-  private string getText() { result = re.getText().substring(start, end) }
-
-  /**
-   * Holds if this is a unicode escape.
-   */
-  private predicate isUnicode() { this.getText().prefix(2) = ["\\u", "\\U"] }
-
-  /**
-   * Gets the unicode char for this escape.
-   * E.g. for `\u0061` this returns "a".
-   */
-  private string getUnicode() {
-    this.isUnicode() and
-    result = parseHexInt(this.getText().suffix(2)).toUnicode()
-  }
-
-  string getUnescaped() { result = this.getText().suffix(1) }
-
-  override string getAPrimaryQlClass() { result = "RegExpEscape" }
-}
-
-/**
- * A word boundary, that is, a regular expression term of the form `\b`.
- */
-class RegExpWordBoundary extends RegExpSpecialChar {
-  RegExpWordBoundary() { this.getChar() = "\\b" }
-}
-
-/**
- * A character class escape in a regular expression.
- * That is, an escaped character that denotes multiple characters.
- *
- * Examples:
- *
- * ```
- * \w
- * \S
- * ```
- */
-class RegExpCharacterClassEscape extends RegExpEscape {
-  RegExpCharacterClassEscape() { this.getValue() in ["d", "D", "s", "S", "w", "W", "h", "H"] }
-
-  /** Gets the name of the character class; for example, `w` for `\w`. */
-  // override string getValue() { result = value }
-  override RegExpTerm getChild(int i) { none() }
-
-  override string getAPrimaryQlClass() { result = "RegExpCharacterClassEscape" }
-}
-
-/**
- * A character class.
- *
- * Examples:
- *
- * ```rb
- * /[a-fA-F0-9]/
- * /[^abc]/
- * ```
- */
-class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
-  RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
-
-  predicate isInverted() { re.getChar(start + 1) = "^" }
-
-  predicate isUniversalClass() {
-    // [^]
-    this.isInverted() and not exists(this.getAChild())
-    or
-    // [\w\W] and similar
-    not this.isInverted() and
-    exists(string cce1, string cce2 |
-      cce1 = this.getAChild().(RegExpCharacterClassEscape).getValue() and
-      cce2 = this.getAChild().(RegExpCharacterClassEscape).getValue()
-    |
-      cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
-    )
-  }
-
-  override RegExpTerm getChild(int i) {
-    i = 0 and
-    result.getRegExp() = re and
-    exists(int itemStart, int itemEnd |
-      result.getStart() = itemStart and
-      re.charSetStart(start, itemStart) and
-      re.charSetChild(start, itemStart, itemEnd) and
-      result.getEnd() = itemEnd
-    )
-    or
-    i > 0 and
-    result.getRegExp() = re and
-    exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() |
-      result.getStart() = itemStart and
-      re.charSetChild(start, itemStart, result.getEnd())
-    )
-  }
-
-  override string getAPrimaryQlClass() { result = "RegExpCharacterClass" }
-}
-
-class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
-  int lower_end;
-  int upper_start;
-
-  RegExpCharacterRange() {
-    this = TRegExpCharacterRange(re, start, end) and
-    re.charRange(_, start, lower_end, upper_start, end)
-  }
-
-  predicate isRange(string lo, string hi) {
-    lo = re.getText().substring(start, lower_end) and
-    hi = re.getText().substring(upper_start, end)
-  }
-
-  override RegExpTerm getChild(int i) {
-    i = 0 and
-    result.getRegExp() = re and
-    result.getStart() = start and
-    result.getEnd() = lower_end
-    or
-    i = 1 and
-    result.getRegExp() = re and
-    result.getStart() = upper_start and
-    result.getEnd() = end
-  }
-
-  override string getAPrimaryQlClass() { result = "RegExpCharacterRange" }
-}
-
-class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
-  RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
-
-  predicate isCharacter() { any() }
-
-  string getValue() { result = re.getText().substring(start, end) }
-
-  override RegExpTerm getChild(int i) { none() }
-
-  override string getAPrimaryQlClass() { result = "RegExpNormalChar" }
-}
-
-class RegExpConstant extends RegExpTerm {
-  string value;
-
-  RegExpConstant() {
-    this = TRegExpNormalChar(re, start, end) and
-    not this instanceof RegExpCharacterClassEscape and
-    // exclude chars in qualifiers
-    // TODO: push this into regex library
-    not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
-      qstart <= start and end <= qend
-    ) and
-    value = this.(RegExpNormalChar).getValue()
-    or
-    this = TRegExpSpecialChar(re, start, end) and
-    re.inCharSet(start) and
-    value = this.(RegExpSpecialChar).getChar()
-  }
-
-  predicate isCharacter() { any() }
-
-  string getValue() { result = value }
-
-  override RegExpTerm getChild(int i) { none() }
-
-  override string getAPrimaryQlClass() { result = "RegExpConstant" }
-}
-
-class RegExpGroup extends RegExpTerm, TRegExpGroup {
-  RegExpGroup() { this = TRegExpGroup(re, start, end) }
-
-  /**
-   * Gets the index of this capture group within the enclosing regular
-   * expression literal.
-   *
-   * For example, in the regular expression `/((a?).)(?:b)/`, the
-   * group `((a?).)` has index 1, the group `(a?)` nested inside it
-   * has index 2, and the group `(?:b)` has no index, since it is
-   * not a capture group.
-   */
-  int getNumber() { result = re.getGroupNumber(start, end) }
-
-  /** Holds if this is a capture group. */
-  predicate isCapture() { exists(this.getNumber()) }
-
-  /** Holds if this is a named capture group. */
-  predicate isNamed() { exists(this.getName()) }
-
-  /** Gets the name of this capture group, if any. */
-  string getName() { result = re.getGroupName(start, end) }
-
-  predicate isCharacter() { any() }
-
-  string getValue() { result = re.getText().substring(start, end) }
-
-  override RegExpTerm getChild(int i) {
-    result.getRegExp() = re and
-    i = 0 and
-    re.groupContents(start, end, result.getStart(), result.getEnd())
-  }
-
-  override string getAPrimaryQlClass() { result = "RegExpGroup" }
-}
-
-class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
-  string char;
-
-  RegExpSpecialChar() {
-    this = TRegExpSpecialChar(re, start, end) and
-    re.specialCharacter(start, end, char)
-  }
-
-  predicate isCharacter() { any() }
-
-  string getChar() { result = char }
-
-  override RegExpTerm getChild(int i) { none() }
-
-  override string getAPrimaryQlClass() { result = "RegExpSpecialChar" }
-}
-
-class RegExpDot extends RegExpSpecialChar {
-  RegExpDot() { this.getChar() = "." }
-
-  override string getAPrimaryQlClass() { result = "RegExpDot" }
-}
-
-class RegExpDollar extends RegExpSpecialChar {
-  RegExpDollar() { this.getChar() = ["$", "\\Z", "\\z"] }
-
-  override string getAPrimaryQlClass() { result = "RegExpDollar" }
-}
-
-class RegExpCaret extends RegExpSpecialChar {
-  RegExpCaret() { this.getChar() = ["^", "\\A"] }
-
-  override string getAPrimaryQlClass() { result = "RegExpCaret" }
-}
-
-class RegExpZeroWidthMatch extends RegExpGroup {
-  RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
-
-  override predicate isCharacter() { any() }
-
-  override RegExpTerm getChild(int i) { none() }
-
-  override string getAPrimaryQlClass() { result = "RegExpZeroWidthMatch" }
-}
-
-/**
- * A zero-width lookahead or lookbehind assertion.
- *
- * Examples:
- *
- * ```
- * (?=\w)
- * (?!\n)
- * (?<=\.)
- * (?<!\\)
- * ```
- */
-class RegExpSubPattern extends RegExpZeroWidthMatch {
-  RegExpSubPattern() { not re.emptyGroup(start, end) }
-
-  /** Gets the lookahead term. */
-  RegExpTerm getOperand() {
-    exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
-      result.getRegExp() = re and
-      result.getStart() = in_start and
-      result.getEnd() = in_end
-    )
-  }
-}
-
-abstract class RegExpLookahead extends RegExpSubPattern { }
-
-class RegExpPositiveLookahead extends RegExpLookahead {
-  RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
-
-  override string getAPrimaryQlClass() { result = "RegExpPositiveLookahead" }
-}
-
-class RegExpNegativeLookahead extends RegExpLookahead {
-  RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
-
-  override string getAPrimaryQlClass() { result = "RegExpNegativeLookahead" }
-}
-
-abstract class RegExpLookbehind extends RegExpSubPattern { }
-
-class RegExpPositiveLookbehind extends RegExpLookbehind {
-  RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
-
-  override string getAPrimaryQlClass() { result = "RegExpPositiveLookbehind" }
-}
-
-class RegExpNegativeLookbehind extends RegExpLookbehind {
-  RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
-
-  override string getAPrimaryQlClass() { result = "RegExpNegativeLookbehind" }
-}
-
-class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
-  RegExpBackRef() { this = TRegExpBackRef(re, start, end) }
-
-  /**
-   * Gets the number of the capture group this back reference refers to, if any.
-   */
-  int getNumber() { result = re.getBackRefNumber(start, end) }
-
-  /**
-   * Gets the name of the capture group this back reference refers to, if any.
-   */
-  string getName() { result = re.getBackRefName(start, end) }
-
-  /** Gets the capture group this back reference refers to. */
-  RegExpGroup getGroup() {
-    result.getLiteral() = this.getLiteral() and
-    (
-      result.getNumber() = this.getNumber() or
-      result.getName() = this.getName()
-    )
-  }
-
-  override RegExpTerm getChild(int i) { none() }
-
-  override string getAPrimaryQlClass() { result = "RegExpBackRef" }
-}
-
-/**
- * A named character property. For example, the POSIX bracket expression
- * `[[:digit:]]`.
- */
-class RegExpNamedCharacterProperty extends RegExpTerm, TRegExpNamedCharacterProperty {
-  RegExpNamedCharacterProperty() { this = TRegExpNamedCharacterProperty(re, start, end) }
-
-  override RegExpTerm getChild(int i) { none() }
-
-  override string getAPrimaryQlClass() { result = "RegExpNamedCharacterProperty" }
-
-  /**
-   * Gets the property name. For example, in `\p{Space}`, the result is
-   * `"Space"`.
-   */
-  string getName() { result = re.getCharacterPropertyName(start, end) }
-
-  /**
-   * Holds if the property is inverted. For example, it holds for `\p{^Digit}`,
-   * which matches non-digits.
-   */
-  predicate isInverted() { re.namedCharacterPropertyIsInverted(start, end) }
-}
-
-RegExpTerm getParsedRegExp(AST::RegExpLiteral re) {
-  result.getRegExp() = re and result.isRootTerm()
-}
-
-/**
- * A node whose value may flow to a position where it is interpreted
- * as a part of a regular expression.
- */
-abstract class RegExpPatternSource extends DataFlow::Node {
-  /**
-   * Gets a node where the pattern of this node is parsed as a part of
-   * a regular expression.
-   */
-  abstract DataFlow::Node getAParse();
-
-  /**
-   * Gets the root term of the regular expression parsed from this pattern.
-   */
-  abstract RegExpTerm getRegExpTerm();
-}
-
-/**
- * A regular expression literal, viewed as the pattern source for itself.
- */
-private class RegExpLiteralPatternSource extends RegExpPatternSource {
-  private AST::RegExpLiteral astNode;
-
-  RegExpLiteralPatternSource() { astNode = this.asExpr().getExpr() }
-
-  override DataFlow::Node getAParse() { result = this }
-
-  override RegExpTerm getRegExpTerm() { result = astNode.getParsed() }
-}
-
-/**
- * A node whose string value may flow to a position where it is interpreted
- * as a part of a regular expression.
- */
-private class StringRegExpPatternSource extends RegExpPatternSource {
-  private DataFlow::Node parse;
-
-  StringRegExpPatternSource() { this = regExpSource(parse) }
-
-  override DataFlow::Node getAParse() { result = parse }
-
-  override RegExpTerm getRegExpTerm() { result.getRegExp() = this.asExpr().getExpr() }
-}
--- a/ruby/ql/src/queries/security/cwe-020/HostnameRegexpSpecific.qll
+++ b/ruby/ql/src/queries/security/cwe-020/HostnameRegexpSpecific.qll
@@ -1,2 +1,2 @@
-import codeql.ruby.security.performance.RegExpTreeView
+import codeql.ruby.Regexp
 import codeql.ruby.DataFlow
--- a/ruby/ql/src/queries/security/cwe-1333/ReDoS.ql
+++ b/ruby/ql/src/queries/security/cwe-1333/ReDoS.ql
@@ -16,7 +16,7 @@

 import codeql.ruby.security.performance.ExponentialBackTracking
 import codeql.ruby.security.performance.ReDoSUtil
-import codeql.ruby.security.performance.RegExpTreeView
+import codeql.ruby.Regexp

 from RegExpTerm t, string pump, State s, string prefixMsg
 where hasReDoSResult(t, pump, s, prefixMsg)
--- a/ruby/ql/test/library-tests/regexp/parse.ql
+++ b/ruby/ql/test/library-tests/regexp/parse.ql
@@ -3,9 +3,9 @@
 */

 import codeql.Locations
-import codeql.ruby.security.performance.RegExpTreeView as RETV
+import codeql.ruby.Regexp as RE

-query predicate nodes(RETV::RegExpTerm n, string attr, string val) {
+query predicate nodes(RE::RegExpTerm n, string attr, string val) {
  attr = "semmle.label" and
  val = "[" + concat(n.getAPrimaryQlClass(), ", ") + "] " + n.toString()
  or
@@ -13,7 +13,7 @@ query predicate nodes(RETV::RegExpTerm n, string attr, string val) {
  val =
    any(int i |
      n =
-        rank[i](RETV::RegExpTerm t, string fp, int sl, int sc, int el, int ec |
+        rank[i](RE::RegExpTerm t, string fp, int sl, int sc, int el, int ec |
          t.hasLocationInfo(fp, sl, sc, el, ec)
        |
          t order by fp, sl, sc, el, ec, t.toString()
@@ -21,7 +21,7 @@ query predicate nodes(RETV::RegExpTerm n, string attr, string val) {
    ).toString()
 }

-query predicate edges(RETV::RegExpTerm pred, RETV::RegExpTerm succ, string attr, string val) {
+query predicate edges(RE::RegExpTerm pred, RE::RegExpTerm succ, string attr, string val) {
  attr in ["semmle.label", "semmle.order"] and
  val = any(int i | succ = pred.getChild(i)).toString()
 }
--- a/ruby/ql/test/library-tests/regexp/regexp.ql
+++ b/ruby/ql/test/library-tests/regexp/regexp.ql
@@ -1,4 +1,4 @@
-import codeql.ruby.security.performance.RegExpTreeView
+import codeql.ruby.Regexp

 query predicate groupName(RegExpGroup g, string name) { name = g.getName() }

--- a/ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/ReDoS.expected
+++ b/ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/ReDoS.expected
@@ -33,7 +33,9 @@
 | tst.rb:137:11:137:17 | (\\w\|G)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
 | tst.rb:143:11:143:18 | (\\d\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
 | tst.rb:146:11:146:17 | (\\d\|5)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '5'. |
-| tst.rb:155:11:155:20 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'f'. |
+| tst.rb:149:11:149:20 | (\\s\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
+| tst.rb:152:11:152:24 | (\\s\|[\\v]\|\\\\v)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000b'. |
+| tst.rb:155:11:155:20 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
 | tst.rb:158:11:158:18 | (\\W\|\\D)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
 | tst.rb:161:11:161:18 | (\\S\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
 | tst.rb:164:11:164:20 | (\\S\|[\\w])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |