From 5fbcbbc584c208642d08f4346e372c849d80bc39 Mon Sep 17 00:00:00 2001
From: erik-krogh <erik-krogh@github.com>
Date: Tue, 1 Nov 2022 12:00:57 +0100
Subject: [PATCH] move existing regex-tree into a module

---
 python/ql/lib/semmle/python/RegexTreeView.qll | 1874 +++++++++--------
 1 file changed, 939 insertions(+), 935 deletions(-)

diff --git a/python/ql/lib/semmle/python/RegexTreeView.qll b/python/ql/lib/semmle/python/RegexTreeView.qll
index 808a784d472..aa884c56b61 100644
--- a/python/ql/lib/semmle/python/RegexTreeView.qll
+++ b/python/ql/lib/semmle/python/RegexTreeView.qll
@@ -2,6 +2,10 @@
 
 import python
 private import semmle.python.regex
+import Impl
+
+/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
+RegExpTerm getParsedRegExp(StrConst re) { result.getRegex() = re and result.isRootTerm() }
 
 /**
  * An element containing a regular expression term, that is, either
@@ -12,7 +16,7 @@ private import semmle.python.regex
  * Otherwise, we wish to represent the term differently.
  * This avoids multiple representations of the same term.
  */
-newtype TRegExpParent =
+private newtype TRegExpParent =
   /** A string literal used as a regular expression */
   TRegExpLiteral(Regex re) or
   /** A quantified term */
@@ -48,982 +52,982 @@ newtype TRegExpParent =
   /** A back reference */
   TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
 
-/**
- * An element containing a regular expression term, that is, either
- * a string literal (parsed as a regular expression)
- * or another regular expression term.
- */
-class RegExpParent extends TRegExpParent {
-  /** Gets a textual representation of this element. */
-  string toString() { result = "RegExpParent" }
+/** An implementation that statisfies the RegexTreeView signature. */
+module Impl {
+  /**
+   * An element containing a regular expression term, that is, either
+   * a string literal (parsed as a regular expression)
+   * or another regular expression term.
+   */
+  class RegExpParent extends TRegExpParent {
+    /** Gets a textual representation of this element. */
+    string toString() { result = "RegExpParent" }
 
-  /** Gets the `i`th child term. */
-  abstract RegExpTerm getChild(int i);
+    /** Gets the `i`th child term. */
+    abstract RegExpTerm getChild(int i);
 
-  /** Gets a child term . */
-  RegExpTerm getAChild() { result = this.getChild(_) }
+    /** Gets a child term . */
+    RegExpTerm getAChild() { result = this.getChild(_) }
 
-  /** Gets the number of child terms. */
-  int getNumChild() { result = count(this.getAChild()) }
+    /** Gets the number of child terms. */
+    int getNumChild() { result = count(this.getAChild()) }
 
-  /** Gets the associated regex. */
-  abstract Regex getRegex();
-}
+    /** Gets the associated regex. */
+    abstract Regex getRegex();
+  }
 
-/** A string literal used as a regular expression */
-class RegExpLiteral extends TRegExpLiteral, RegExpParent {
-  Regex re;
+  /** A string literal used as a regular expression */
+  class RegExpLiteral extends TRegExpLiteral, RegExpParent {
+    Regex re;
 
-  RegExpLiteral() { this = TRegExpLiteral(re) }
+    RegExpLiteral() { this = TRegExpLiteral(re) }
 
-  override RegExpTerm getChild(int i) { i = 0 and result.getRegex() = re and result.isRootTerm() }
+    override RegExpTerm getChild(int i) { i = 0 and result.getRegex() = re and result.isRootTerm() }
 
-  /** Holds if dot, `.`, matches all characters, including newlines. */
-  predicate isDotAll() { re.getAMode() = "DOTALL" }
+    /** Holds if dot, `.`, matches all characters, including newlines. */
+    predicate isDotAll() { re.getAMode() = "DOTALL" }
 
-  /** Holds if this regex matching is case-insensitive for this regex. */
-  predicate isIgnoreCase() { re.getAMode() = "IGNORECASE" }
+    /** Holds if this regex matching is case-insensitive for this regex. */
+    predicate isIgnoreCase() { re.getAMode() = "IGNORECASE" }
 
-  /** Get a string representing all modes for this regex. */
-  string getFlags() { result = concat(string mode | mode = re.getAMode() | mode, " | ") }
+    /** Get a string representing all modes for this regex. */
+    string getFlags() { result = concat(string mode | mode = re.getAMode() | mode, " | ") }
 
-  override Regex getRegex() { result = re }
+    override Regex getRegex() { result = re }
 
-  /** Gets the primary QL class for this regex. */
-  string getPrimaryQLClass() { result = "RegExpLiteral" }
-}
-
-/**
- * A regular expression term, that is, a syntactic part of a regular expression.
- */
-class RegExpTerm extends RegExpParent {
-  Regex re;
-  int start;
-  int end;
-
-  RegExpTerm() {
-    this = TRegExpAlt(re, start, end)
-    or
-    this = TRegExpBackRef(re, start, end)
-    or
-    this = TRegExpCharacterClass(re, start, end)
-    or
-    this = TRegExpCharacterRange(re, start, end)
-    or
-    this = TRegExpNormalChar(re, start, end)
-    or
-    this = TRegExpGroup(re, start, end)
-    or
-    this = TRegExpQuantifier(re, start, end)
-    or
-    this = TRegExpSequence(re, start, end)
-    or
-    this = TRegExpSpecialChar(re, start, end)
+    /** Gets the primary QL class for this regex. */
+    string getPrimaryQLClass() { result = "RegExpLiteral" }
   }
 
   /**
-   * Gets the outermost term of this regular expression.
+   * A regular expression term, that is, a syntactic part of a regular expression.
    */
-  RegExpTerm getRootTerm() {
-    this.isRootTerm() and result = this
-    or
-    result = this.getParent().(RegExpTerm).getRootTerm()
-  }
+  class RegExpTerm extends RegExpParent {
+    Regex re;
+    int start;
+    int end;
 
-  /**
-   * Holds if this term is part of a string literal
-   * that is interpreted as a regular expression.
-   */
-  predicate isUsedAsRegExp() { any() }
-
-  /**
-   * Holds if this is the root term of a regular expression.
-   */
-  predicate isRootTerm() { start = 0 and end = re.getText().length() }
-
-  override RegExpTerm getChild(int i) {
-    result = this.(RegExpAlt).getChild(i)
-    or
-    result = this.(RegExpBackRef).getChild(i)
-    or
-    result = this.(RegExpCharacterClass).getChild(i)
-    or
-    result = this.(RegExpCharacterRange).getChild(i)
-    or
-    result = this.(RegExpNormalChar).getChild(i)
-    or
-    result = this.(RegExpGroup).getChild(i)
-    or
-    result = this.(RegExpQuantifier).getChild(i)
-    or
-    result = this.(RegExpSequence).getChild(i)
-    or
-    result = this.(RegExpSpecialChar).getChild(i)
-  }
-
-  /**
-   * Gets the parent term of this regular expression term, or the
-   * regular expression literal if this is the root term.
-   */
-  RegExpParent getParent() { result.getAChild() = this }
-
-  override Regex getRegex() { result = re }
-
-  /** Gets the offset at which this term starts. */
-  int getStart() { result = start }
-
-  /** Gets the offset at which this term ends. */
-  int getEnd() { result = end }
-
-  override string toString() { result = re.getText().substring(start, end) }
-
-  /**
-   * Gets the location of the surrounding regex, as locations inside the regex do not exist.
-   * To get location information corresponding to the term inside the regex,
-   * use `hasLocationInfo`.
-   */
-  Location getLocation() { result = re.getLocation() }
-
-  /** Holds if this term is found at the specified location offsets. */
-  predicate hasLocationInfo(
-    string filepath, int startline, int startcolumn, int endline, int endcolumn
-  ) {
-    exists(int re_start, int re_end |
-      re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, re_end) and
-      startcolumn = re_start + start + 4 and
-      endcolumn = re_start + end + 3
-    )
-  }
-
-  /** Gets the file in which this term is found. */
-  File getFile() { result = this.getLocation().getFile() }
-
-  /** Gets the raw source text of this term. */
-  string getRawValue() { result = this.toString() }
-
-  /** Gets the string literal in which this term is found. */
-  RegExpLiteral getLiteral() { result = TRegExpLiteral(re) }
-
-  /** Gets the regular expression term that is matched (textually) before this one, if any. */
-  RegExpTerm getPredecessor() {
-    exists(RegExpTerm parent | parent = this.getParent() |
-      result = parent.(RegExpSequence).previousElement(this)
+    RegExpTerm() {
+      this = TRegExpAlt(re, start, end)
       or
-      not exists(parent.(RegExpSequence).previousElement(this)) and
-      not parent instanceof RegExpSubPattern and
-      result = parent.getPredecessor()
-    )
-  }
-
-  /** Gets the regular expression term that is matched (textually) after this one, if any. */
-  RegExpTerm getSuccessor() {
-    exists(RegExpTerm parent | parent = this.getParent() |
-      result = parent.(RegExpSequence).nextElement(this)
+      this = TRegExpBackRef(re, start, end)
       or
-      not exists(parent.(RegExpSequence).nextElement(this)) and
-      not parent instanceof RegExpSubPattern and
-      result = parent.getSuccessor()
-    )
+      this = TRegExpCharacterClass(re, start, end)
+      or
+      this = TRegExpCharacterRange(re, start, end)
+      or
+      this = TRegExpNormalChar(re, start, end)
+      or
+      this = TRegExpGroup(re, start, end)
+      or
+      this = TRegExpQuantifier(re, start, end)
+      or
+      this = TRegExpSequence(re, start, end)
+      or
+      this = TRegExpSpecialChar(re, start, end)
+    }
+
+    /**
+     * Gets the outermost term of this regular expression.
+     */
+    RegExpTerm getRootTerm() {
+      this.isRootTerm() and result = this
+      or
+      result = this.getParent().(RegExpTerm).getRootTerm()
+    }
+
+    /**
+     * Holds if this term is part of a string literal
+     * that is interpreted as a regular expression.
+     */
+    predicate isUsedAsRegExp() { any() }
+
+    /**
+     * Holds if this is the root term of a regular expression.
+     */
+    predicate isRootTerm() { start = 0 and end = re.getText().length() }
+
+    override RegExpTerm getChild(int i) {
+      result = this.(RegExpAlt).getChild(i)
+      or
+      result = this.(RegExpBackRef).getChild(i)
+      or
+      result = this.(RegExpCharacterClass).getChild(i)
+      or
+      result = this.(RegExpCharacterRange).getChild(i)
+      or
+      result = this.(RegExpNormalChar).getChild(i)
+      or
+      result = this.(RegExpGroup).getChild(i)
+      or
+      result = this.(RegExpQuantifier).getChild(i)
+      or
+      result = this.(RegExpSequence).getChild(i)
+      or
+      result = this.(RegExpSpecialChar).getChild(i)
+    }
+
+    /**
+     * Gets the parent term of this regular expression term, or the
+     * regular expression literal if this is the root term.
+     */
+    RegExpParent getParent() { result.getAChild() = this }
+
+    override Regex getRegex() { result = re }
+
+    /** Gets the offset at which this term starts. */
+    int getStart() { result = start }
+
+    /** Gets the offset at which this term ends. */
+    int getEnd() { result = end }
+
+    override string toString() { result = re.getText().substring(start, end) }
+
+    /**
+     * Gets the location of the surrounding regex, as locations inside the regex do not exist.
+     * To get location information corresponding to the term inside the regex,
+     * use `hasLocationInfo`.
+     */
+    Location getLocation() { result = re.getLocation() }
+
+    /** Holds if this term is found at the specified location offsets. */
+    predicate hasLocationInfo(
+      string filepath, int startline, int startcolumn, int endline, int endcolumn
+    ) {
+      exists(int re_start, int re_end |
+        re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, re_end) and
+        startcolumn = re_start + start + 4 and
+        endcolumn = re_start + end + 3
+      )
+    }
+
+    /** Gets the file in which this term is found. */
+    File getFile() { result = this.getLocation().getFile() }
+
+    /** Gets the raw source text of this term. */
+    string getRawValue() { result = this.toString() }
+
+    /** Gets the string literal in which this term is found. */
+    RegExpLiteral getLiteral() { result = TRegExpLiteral(re) }
+
+    /** Gets the regular expression term that is matched (textually) before this one, if any. */
+    RegExpTerm getPredecessor() {
+      exists(RegExpTerm parent | parent = this.getParent() |
+        result = parent.(RegExpSequence).previousElement(this)
+        or
+        not exists(parent.(RegExpSequence).previousElement(this)) and
+        not parent instanceof RegExpSubPattern and
+        result = parent.getPredecessor()
+      )
+    }
+
+    /** Gets the regular expression term that is matched (textually) after this one, if any. */
+    RegExpTerm getSuccessor() {
+      exists(RegExpTerm parent | parent = this.getParent() |
+        result = parent.(RegExpSequence).nextElement(this)
+        or
+        not exists(parent.(RegExpSequence).nextElement(this)) and
+        not parent instanceof RegExpSubPattern and
+        result = parent.getSuccessor()
+      )
+    }
+
+    /** Gets the primary QL class for this term. */
+    string getPrimaryQLClass() { result = "RegExpTerm" }
   }
 
-  /** Gets the primary QL class for this term. */
-  string getPrimaryQLClass() { result = "RegExpTerm" }
-}
-
-/**
- * A quantified regular expression term.
- *
- * Example:
- *
- * ```
- * ((ECMA|Java)[sS]cript)*
- * ```
- */
-class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
-  int part_end;
-  boolean maybe_empty;
-  boolean may_repeat_forever;
-
-  RegExpQuantifier() {
-    this = TRegExpQuantifier(re, start, end) and
-    re.qualifiedPart(start, part_end, end, maybe_empty, may_repeat_forever)
-  }
-
-  override RegExpTerm getChild(int i) {
-    i = 0 and
-    result.getRegex() = re and
-    result.getStart() = start and
-    result.getEnd() = part_end
-  }
-
-  /** Hols if this term may match an unlimited number of times. */
-  predicate mayRepeatForever() { may_repeat_forever = true }
-
-  /** Gets the qualifier for this term. That is e.g "?" for "a?". */
-  string getQualifier() { result = re.getText().substring(part_end, end) }
-
-  override string getPrimaryQLClass() { result = "RegExpQuantifier" }
-}
-
-/**
- * A regular expression term that permits unlimited repetitions.
- */
-class InfiniteRepetitionQuantifier extends RegExpQuantifier {
-  InfiniteRepetitionQuantifier() { this.mayRepeatForever() }
-}
-
-/**
- * A star-quantified term.
- *
- * Example:
- *
- * ```
- * \w*
- * ```
- */
-class RegExpStar extends InfiniteRepetitionQuantifier {
-  RegExpStar() { this.getQualifier().charAt(0) = "*" }
-
-  override string getPrimaryQLClass() { result = "RegExpStar" }
-}
-
-/**
- * A plus-quantified term.
- *
- * Example:
- *
- * ```
- * \w+
- * ```
- */
-class RegExpPlus extends InfiniteRepetitionQuantifier {
-  RegExpPlus() { this.getQualifier().charAt(0) = "+" }
-
-  override string getPrimaryQLClass() { result = "RegExpPlus" }
-}
-
-/**
- * An optional term.
- *
- * Example:
- *
- * ```
- * ;?
- * ```
- */
-class RegExpOpt extends RegExpQuantifier {
-  RegExpOpt() { this.getQualifier().charAt(0) = "?" }
-
-  override string getPrimaryQLClass() { result = "RegExpOpt" }
-}
-
-/**
- * A range-quantified term
- *
- * Examples:
- *
- * ```
- * \w{2,4}
- * \w{2,}
- * \w{2}
- * ```
- */
-class RegExpRange extends RegExpQuantifier {
-  string upper;
-  string lower;
-
-  RegExpRange() { re.multiples(part_end, end, lower, upper) }
-
-  /** Gets the string defining the upper bound of this range, if any. */
-  string getUpper() { result = upper }
-
-  /** Gets the string defining the lower bound of this range, if any. */
-  string getLower() { result = lower }
-
   /**
-   * Gets the upper bound of the range, if any.
+   * A quantified regular expression term.
    *
-   * If there is no upper bound, any number of repetitions is allowed.
-   * For a term of the form `r{lo}`, both the lower and the upper bound
-   * are `lo`.
-   */
-  int getUpperBound() { result = this.getUpper().toInt() }
-
-  /** Gets the lower bound of the range. */
-  int getLowerBound() { result = this.getLower().toInt() }
-
-  override string getPrimaryQLClass() { result = "RegExpRange" }
-}
-
-/**
- * A sequence term.
- *
- * Example:
- *
- * ```
- * (ECMA|Java)Script
- * ```
- *
- * This is a sequence with the elements `(ECMA|Java)` and `Script`.
- */
-class RegExpSequence extends RegExpTerm, TRegExpSequence {
-  RegExpSequence() { this = TRegExpSequence(re, start, end) }
-
-  override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
-
-  /** Gets the element preceding `element` in this sequence. */
-  RegExpTerm previousElement(RegExpTerm element) { element = this.nextElement(result) }
-
-  /** Gets the element following `element` in this sequence. */
-  RegExpTerm nextElement(RegExpTerm element) {
-    exists(int i |
-      element = this.getChild(i) and
-      result = this.getChild(i + 1)
-    )
-  }
-
-  override string getPrimaryQLClass() { result = "RegExpSequence" }
-}
-
-pragma[nomagic]
-private int seqChildEnd(Regex re, int start, int end, int i) {
-  result = seqChild(re, start, end, i).getEnd()
-}
-
-// moved out so we can use it in the charpred
-private RegExpTerm seqChild(Regex re, int start, int end, int i) {
-  re.sequence(start, end) and
-  (
-    i = 0 and
-    result.getRegex() = re and
-    result.getStart() = start and
-    exists(int itemEnd |
-      re.item(start, itemEnd) and
-      result.getEnd() = itemEnd
-    )
-    or
-    i > 0 and
-    result.getRegex() = re and
-    exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
-      result.getStart() = itemStart and
-      re.item(itemStart, result.getEnd())
-    )
-  )
-}
-
-/**
- * An alternative term, that is, a term of the form `a|b`.
- *
- * Example:
- *
- * ```
- * ECMA|Java
- * ```
- */
-class RegExpAlt extends RegExpTerm, TRegExpAlt {
-  RegExpAlt() { this = TRegExpAlt(re, start, end) }
-
-  override RegExpTerm getChild(int i) {
-    i = 0 and
-    result.getRegex() = re and
-    result.getStart() = start and
-    exists(int part_end |
-      re.alternationOption(start, end, start, part_end) and
-      result.getEnd() = part_end
-    )
-    or
-    i > 0 and
-    result.getRegex() = re and
-    exists(int part_start |
-      part_start = this.getChild(i - 1).getEnd() + 1 // allow for the |
-    |
-      result.getStart() = part_start and
-      re.alternationOption(start, end, part_start, result.getEnd())
-    )
-  }
-
-  override string getPrimaryQLClass() { result = "RegExpAlt" }
-}
-
-class RegExpCharEscape = RegExpEscape;
-
-/**
- * An escaped regular expression term, that is, a regular expression
- * term starting with a backslash, which is not a backreference.
- *
- * Example:
- *
- * ```
- * \.
- * \w
- * ```
- */
-class RegExpEscape extends RegExpNormalChar {
-  RegExpEscape() { re.escapedCharacter(start, end) }
-
-  /**
-   * Gets the name of the escaped; for example, `w` for `\w`.
-   * TODO: Handle named escapes.
-   */
-  override string getValue() {
-    not this.isUnicode() and
-    this.isIdentityEscape() and
-    result = this.getUnescaped()
-    or
-    this.getUnescaped() = "n" and result = "\n"
-    or
-    this.getUnescaped() = "r" and result = "\r"
-    or
-    this.getUnescaped() = "t" and result = "\t"
-    or
-    this.getUnescaped() = "f" and result = 12.toUnicode()
-    or
-    this.getUnescaped() = "v" and result = 11.toUnicode()
-    or
-    this.isUnicode() and
-    result = this.getUnicode()
-  }
-
-  /** Holds if this terms name is given by the part following the escape character. */
-  predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t", "f"] }
-
-  override string getPrimaryQLClass() { result = "RegExpEscape" }
-
-  /** Gets the part of the term following the escape character. That is e.g. "w" if the term is "\w". */
-  string getUnescaped() { result = this.getText().suffix(1) }
-
-  /**
-   * Gets the text for this escape. That is e.g. "\w".
-   */
-  private string getText() { result = re.getText().substring(start, end) }
-
-  /**
-   * Holds if this is a unicode escape.
-   */
-  private predicate isUnicode() { this.getText().prefix(2) = ["\\u", "\\U"] }
-
-  /**
-   * Gets the unicode char for this escape.
-   * E.g. for `\u0061` this returns "a".
-   */
-  private string getUnicode() {
-    exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
-      result = codepoint.toUnicode()
-    )
-  }
-
-  /**
-   * Gets int value for the `index`th char in the hex number of the unicode escape.
-   * E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
-   */
-  private int getHexValueFromUnicode(int index) {
-    this.isUnicode() and
-    exists(string hex, string char | hex = this.getText().suffix(2) |
-      char = hex.charAt(index) and
-      result = 16.pow(hex.length() - index - 1) * toHex(char)
-    )
-  }
-}
-
-/**
- * Gets the hex number for the `hex` char.
- */
-private int toHex(string hex) {
-  hex = [0 .. 9].toString() and
-  result = hex.toInt()
-  or
-  result = 10 and hex = ["a", "A"]
-  or
-  result = 11 and hex = ["b", "B"]
-  or
-  result = 12 and hex = ["c", "C"]
-  or
-  result = 13 and hex = ["d", "D"]
-  or
-  result = 14 and hex = ["e", "E"]
-  or
-  result = 15 and hex = ["f", "F"]
-}
-
-/**
- * A word boundary, that is, a regular expression term of the form `\b`.
- */
-class RegExpWordBoundary extends RegExpSpecialChar {
-  RegExpWordBoundary() { this.getChar() = "\\b" }
-}
-
-/**
- * A character class escape in a regular expression.
- * That is, an escaped character that denotes multiple characters.
- *
- * Examples:
- *
- * ```
- * \w
- * \S
- * ```
- */
-class RegExpCharacterClassEscape extends RegExpEscape {
-  RegExpCharacterClassEscape() { this.getValue() in ["d", "D", "s", "S", "w", "W"] }
-
-  override RegExpTerm getChild(int i) { none() }
-
-  override string getPrimaryQLClass() { result = "RegExpCharacterClassEscape" }
-}
-
-/**
- * A character class in a regular expression.
- *
- * Examples:
- *
- * ```
- * [a-z_]
- * [^<>&]
- * ```
- */
-class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
-  RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
-
-  /** Holds if this character class is inverted, matching the opposite of its content. */
-  predicate isInverted() { re.getChar(start + 1) = "^" }
-
-  /** Gets the `i`th char inside this charater class. */
-  string getCharThing(int i) { result = re.getChar(i + start) }
-
-  /** Holds if this character class can match anything. */
-  predicate isUniversalClass() {
-    // [^]
-    this.isInverted() and not exists(this.getAChild())
-    or
-    // [\w\W] and similar
-    not this.isInverted() and
-    exists(string cce1, string cce2 |
-      cce1 = this.getAChild().(RegExpCharacterClassEscape).getValue() and
-      cce2 = this.getAChild().(RegExpCharacterClassEscape).getValue()
-    |
-      cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
-    )
-  }
-
-  override RegExpTerm getChild(int i) {
-    i = 0 and
-    result.getRegex() = re and
-    exists(int itemStart, int itemEnd |
-      result.getStart() = itemStart and
-      re.char_set_start(start, itemStart) and
-      re.char_set_child(start, itemStart, itemEnd) and
-      result.getEnd() = itemEnd
-    )
-    or
-    i > 0 and
-    result.getRegex() = re and
-    exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() |
-      result.getStart() = itemStart and
-      re.char_set_child(start, itemStart, result.getEnd())
-    )
-  }
-
-  override string getPrimaryQLClass() { result = "RegExpCharacterClass" }
-}
-
-/**
- * A character range in a character class in a regular expression.
- *
- * Example:
- *
- * ```
- * a-z
- * ```
- */
-class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
-  int lower_end;
-  int upper_start;
-
-  RegExpCharacterRange() {
-    this = TRegExpCharacterRange(re, start, end) and
-    re.charRange(_, start, lower_end, upper_start, end)
-  }
-
-  /** Holds if this range goes from `lo` to `hi`, in effect is `lo-hi`. */
-  predicate isRange(string lo, string hi) {
-    lo = re.getText().substring(start, lower_end) and
-    hi = re.getText().substring(upper_start, end)
-  }
-
-  override RegExpTerm getChild(int i) {
-    i = 0 and
-    result.getRegex() = re and
-    result.getStart() = start and
-    result.getEnd() = lower_end
-    or
-    i = 1 and
-    result.getRegex() = re and
-    result.getStart() = upper_start and
-    result.getEnd() = end
-  }
-
-  override string getPrimaryQLClass() { result = "RegExpCharacterRange" }
-}
-
-/**
- * A normal character in a regular expression, that is, a character
- * without special meaning. This includes escaped characters.
- *
- * Examples:
- * ```
- * t
- * \t
- * ```
- */
-class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
-  RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
-
-  /**
-   * Holds if this constant represents a valid Unicode character (as opposed
-   * to a surrogate code point that does not correspond to a character by itself.)
-   */
-  predicate isCharacter() { any() }
-
-  /** Gets the string representation of the char matched by this term. */
-  string getValue() { result = re.getText().substring(start, end) }
-
-  override RegExpTerm getChild(int i) { none() }
-
-  override string getPrimaryQLClass() { result = "RegExpNormalChar" }
-}
-
-/**
- * A constant regular expression term, that is, a regular expression
- * term matching a single string. Currently, this will always be a single character.
- *
- * Example:
- *
- * ```
- * a
- * ```
- */
-class RegExpConstant extends RegExpTerm {
-  string value;
-
-  RegExpConstant() {
-    this = TRegExpNormalChar(re, start, end) and
-    not this instanceof RegExpCharacterClassEscape and
-    // exclude chars in qualifiers
-    // TODO: push this into regex library
-    not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
-      qstart <= start and end <= qend
-    ) and
-    value = this.(RegExpNormalChar).getValue()
-  }
-
-  /**
-   * Holds if this constant represents a valid Unicode character (as opposed
-   * to a surrogate code point that does not correspond to a character by itself.)
-   */
-  predicate isCharacter() { any() }
-
-  /** Gets the string matched by this constant term. */
-  string getValue() { result = value }
-
-  override RegExpTerm getChild(int i) { none() }
-
-  override string getPrimaryQLClass() { result = "RegExpConstant" }
-}
-
-/**
- * A grouped regular expression.
- *
- * Examples:
- *
- * ```
- * (ECMA|Java)
- * (?:ECMA|Java)
- * (?<quote>['"])
- * ```
- */
-class RegExpGroup extends RegExpTerm, TRegExpGroup {
-  RegExpGroup() { this = TRegExpGroup(re, start, end) }
-
-  /**
-   * Gets the index of this capture group within the enclosing regular
-   * expression literal.
+   * Example:
    *
-   * For example, in the regular expression `/((a?).)(?:b)/`, the
-   * group `((a?).)` has index 1, the group `(a?)` nested inside it
-   * has index 2, and the group `(?:b)` has no index, since it is
-   * not a capture group.
+   * ```
+   * ((ECMA|Java)[sS]cript)*
+   * ```
    */
-  int getNumber() { result = re.getGroupNumber(start, end) }
+  class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
+    int part_end;
+    boolean maybe_empty;
+    boolean may_repeat_forever;
 
-  /** Holds if this is a capture group. */
-  predicate isCapture() { exists(this.getNumber()) }
+    RegExpQuantifier() {
+      this = TRegExpQuantifier(re, start, end) and
+      re.qualifiedPart(start, part_end, end, maybe_empty, may_repeat_forever)
+    }
 
-  /** Holds if this is a named capture group. */
-  predicate isNamed() { exists(this.getName()) }
-
-  /** Gets the name of this capture group, if any. */
-  string getName() { result = re.getGroupName(start, end) }
-
-  override RegExpTerm getChild(int i) {
-    result.getRegex() = re and
-    i = 0 and
-    re.groupContents(start, end, result.getStart(), result.getEnd())
-  }
-
-  override string getPrimaryQLClass() { result = "RegExpGroup" }
-}
-
-/**
- * A special character in a regular expression.
- *
- * Examples:
- * ```
- * ^
- * $
- * .
- * ```
- */
-class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
-  string char;
-
-  RegExpSpecialChar() {
-    this = TRegExpSpecialChar(re, start, end) and
-    re.specialCharacter(start, end, char)
-  }
-
-  /**
-   * Holds if this constant represents a valid Unicode character (as opposed
-   * to a surrogate code point that does not correspond to a character by itself.)
-   */
-  predicate isCharacter() { any() }
-
-  /** Gets the char for this term. */
-  string getChar() { result = char }
-
-  override RegExpTerm getChild(int i) { none() }
-
-  override string getPrimaryQLClass() { result = "RegExpSpecialChar" }
-}
-
-/**
- * A dot regular expression.
- *
- * Example:
- *
- * ```
- * .
- * ```
- */
-class RegExpDot extends RegExpSpecialChar {
-  RegExpDot() { this.getChar() = "." }
-
-  override string getPrimaryQLClass() { result = "RegExpDot" }
-}
-
-/**
- * A dollar assertion `$` or `\Z` matching the end of a line.
- *
- * Example:
- *
- * ```
- * $
- * ```
- */
-class RegExpDollar extends RegExpSpecialChar {
-  RegExpDollar() { this.getChar() = ["$", "\\Z"] }
-
-  override string getPrimaryQLClass() { result = "RegExpDollar" }
-}
-
-/**
- * A caret assertion `^` or `\A` matching the beginning of a line.
- *
- * Example:
- *
- * ```
- * ^
- * ```
- */
-class RegExpCaret extends RegExpSpecialChar {
-  RegExpCaret() { this.getChar() = ["^", "\\A"] }
-
-  override string getPrimaryQLClass() { result = "RegExpCaret" }
-}
-
-/**
- * A zero-width match, that is, either an empty group or an assertion.
- *
- * Examples:
- * ```
- * ()
- * (?=\w)
- * ```
- */
-class RegExpZeroWidthMatch extends RegExpGroup {
-  RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
-
-  override RegExpTerm getChild(int i) { none() }
-
-  override string getPrimaryQLClass() { result = "RegExpZeroWidthMatch" }
-}
-
-/**
- * A zero-width lookahead or lookbehind assertion.
- *
- * Examples:
- *
- * ```
- * (?=\w)
- * (?!\n)
- * (?<=\.)
- * (?<!\\)
- * ```
- */
-class RegExpSubPattern extends RegExpZeroWidthMatch {
-  RegExpSubPattern() { not re.emptyGroup(start, end) }
-
-  /** Gets the lookahead term. */
-  RegExpTerm getOperand() {
-    exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
+    override RegExpTerm getChild(int i) {
+      i = 0 and
       result.getRegex() = re and
-      result.getStart() = in_start and
-      result.getEnd() = in_end
+      result.getStart() = start and
+      result.getEnd() = part_end
+    }
+
+    /** Hols if this term may match an unlimited number of times. */
+    predicate mayRepeatForever() { may_repeat_forever = true }
+
+    /** Gets the qualifier for this term. That is e.g "?" for "a?". */
+    string getQualifier() { result = re.getText().substring(part_end, end) }
+
+    override string getPrimaryQLClass() { result = "RegExpQuantifier" }
+  }
+
+  /**
+   * A regular expression term that permits unlimited repetitions.
+   */
+  class InfiniteRepetitionQuantifier extends RegExpQuantifier {
+    InfiniteRepetitionQuantifier() { this.mayRepeatForever() }
+  }
+
+  /**
+   * A star-quantified term.
+   *
+   * Example:
+   *
+   * ```
+   * \w*
+   * ```
+   */
+  class RegExpStar extends InfiniteRepetitionQuantifier {
+    RegExpStar() { this.getQualifier().charAt(0) = "*" }
+
+    override string getPrimaryQLClass() { result = "RegExpStar" }
+  }
+
+  /**
+   * A plus-quantified term.
+   *
+   * Example:
+   *
+   * ```
+   * \w+
+   * ```
+   */
+  class RegExpPlus extends InfiniteRepetitionQuantifier {
+    RegExpPlus() { this.getQualifier().charAt(0) = "+" }
+
+    override string getPrimaryQLClass() { result = "RegExpPlus" }
+  }
+
+  /**
+   * An optional term.
+   *
+   * Example:
+   *
+   * ```
+   * ;?
+   * ```
+   */
+  class RegExpOpt extends RegExpQuantifier {
+    RegExpOpt() { this.getQualifier().charAt(0) = "?" }
+
+    override string getPrimaryQLClass() { result = "RegExpOpt" }
+  }
+
+  /**
+   * A range-quantified term
+   *
+   * Examples:
+   *
+   * ```
+   * \w{2,4}
+   * \w{2,}
+   * \w{2}
+   * ```
+   */
+  class RegExpRange extends RegExpQuantifier {
+    string upper;
+    string lower;
+
+    RegExpRange() { re.multiples(part_end, end, lower, upper) }
+
+    /** Gets the string defining the upper bound of this range, if any. */
+    string getUpper() { result = upper }
+
+    /** Gets the string defining the lower bound of this range, if any. */
+    string getLower() { result = lower }
+
+    /**
+     * Gets the upper bound of the range, if any.
+     *
+     * If there is no upper bound, any number of repetitions is allowed.
+     * For a term of the form `r{lo}`, both the lower and the upper bound
+     * are `lo`.
+     */
+    int getUpperBound() { result = this.getUpper().toInt() }
+
+    /** Gets the lower bound of the range. */
+    int getLowerBound() { result = this.getLower().toInt() }
+
+    override string getPrimaryQLClass() { result = "RegExpRange" }
+  }
+
+  /**
+   * A sequence term.
+   *
+   * Example:
+   *
+   * ```
+   * (ECMA|Java)Script
+   * ```
+   *
+   * This is a sequence with the elements `(ECMA|Java)` and `Script`.
+   */
+  class RegExpSequence extends RegExpTerm, TRegExpSequence {
+    RegExpSequence() { this = TRegExpSequence(re, start, end) }
+
+    override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
+
+    /** Gets the element preceding `element` in this sequence. */
+    RegExpTerm previousElement(RegExpTerm element) { element = this.nextElement(result) }
+
+    /** Gets the element following `element` in this sequence. */
+    RegExpTerm nextElement(RegExpTerm element) {
+      exists(int i |
+        element = this.getChild(i) and
+        result = this.getChild(i + 1)
+      )
+    }
+
+    override string getPrimaryQLClass() { result = "RegExpSequence" }
+  }
+
+  pragma[nomagic]
+  private int seqChildEnd(Regex re, int start, int end, int i) {
+    result = seqChild(re, start, end, i).getEnd()
+  }
+
+  // moved out so we can use it in the charpred
+  private RegExpTerm seqChild(Regex re, int start, int end, int i) {
+    re.sequence(start, end) and
+    (
+      i = 0 and
+      result.getRegex() = re and
+      result.getStart() = start and
+      exists(int itemEnd |
+        re.item(start, itemEnd) and
+        result.getEnd() = itemEnd
+      )
+      or
+      i > 0 and
+      result.getRegex() = re and
+      exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
+        result.getStart() = itemStart and
+        re.item(itemStart, result.getEnd())
+      )
     )
   }
-}
-
-/**
- * A zero-width lookahead assertion.
- *
- * Examples:
- *
- * ```
- * (?=\w)
- * (?!\n)
- * ```
- */
-abstract class RegExpLookahead extends RegExpSubPattern { }
-
-/**
- * A positive-lookahead assertion.
- *
- * Examples:
- *
- * ```
- * (?=\w)
- * ```
- */
-class RegExpPositiveLookahead extends RegExpLookahead {
-  RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
-
-  override string getPrimaryQLClass() { result = "RegExpPositiveLookahead" }
-}
-
-/**
- * A negative-lookahead assertion.
- *
- * Examples:
- *
- * ```
- * (?!\n)
- * ```
- */
-class RegExpNegativeLookahead extends RegExpLookahead {
-  RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
-
-  override string getPrimaryQLClass() { result = "RegExpNegativeLookahead" }
-}
-
-/**
- * A zero-width lookbehind assertion.
- *
- * Examples:
- *
- * ```
- * (?<=\.)
- * (?<!\\)
- * ```
- */
-abstract class RegExpLookbehind extends RegExpSubPattern { }
-
-/**
- * A positive-lookbehind assertion.
- *
- * Examples:
- *
- * ```
- * (?<=\.)
- * ```
- */
-class RegExpPositiveLookbehind extends RegExpLookbehind {
-  RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
-
-  override string getPrimaryQLClass() { result = "RegExpPositiveLookbehind" }
-}
-
-/**
- * A negative-lookbehind assertion.
- *
- * Examples:
- *
- * ```
- * (?<!\\)
- * ```
- */
-class RegExpNegativeLookbehind extends RegExpLookbehind {
-  RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
-
-  override string getPrimaryQLClass() { result = "RegExpNegativeLookbehind" }
-}
-
-/**
- * A back reference, that is, a term of the form `\i` or `\k<name>`
- * in a regular expression.
- *
- * Examples:
- *
- * ```
- * \1
- * (?P=quote)
- * ```
- */
-class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
-  RegExpBackRef() { this = TRegExpBackRef(re, start, end) }
 
   /**
-   * Gets the number of the capture group this back reference refers to, if any.
+   * An alternative term, that is, a term of the form `a|b`.
+   *
+   * Example:
+   *
+   * ```
+   * ECMA|Java
+   * ```
    */
-  int getNumber() { result = re.getBackrefNumber(start, end) }
+  class RegExpAlt extends RegExpTerm, TRegExpAlt {
+    RegExpAlt() { this = TRegExpAlt(re, start, end) }
+
+    override RegExpTerm getChild(int i) {
+      i = 0 and
+      result.getRegex() = re and
+      result.getStart() = start and
+      exists(int part_end |
+        re.alternationOption(start, end, start, part_end) and
+        result.getEnd() = part_end
+      )
+      or
+      i > 0 and
+      result.getRegex() = re and
+      exists(int part_start |
+        part_start = this.getChild(i - 1).getEnd() + 1 // allow for the |
+      |
+        result.getStart() = part_start and
+        re.alternationOption(start, end, part_start, result.getEnd())
+      )
+    }
+
+    override string getPrimaryQLClass() { result = "RegExpAlt" }
+  }
+
+  class RegExpCharEscape = RegExpEscape;
 
   /**
-   * Gets the name of the capture group this back reference refers to, if any.
+   * An escaped regular expression term, that is, a regular expression
+   * term starting with a backslash, which is not a backreference.
+   *
+   * Example:
+   *
+   * ```
+   * \.
+   * \w
+   * ```
    */
-  string getName() { result = re.getBackrefName(start, end) }
+  class RegExpEscape extends RegExpNormalChar {
+    RegExpEscape() { re.escapedCharacter(start, end) }
 
-  /** Gets the capture group this back reference refers to. */
-  RegExpGroup getGroup() {
-    this.hasLiteralAndNumber(result.getLiteral(), result.getNumber()) or
-    this.hasLiteralAndName(result.getLiteral(), result.getName())
+    /**
+     * Gets the name of the escaped; for example, `w` for `\w`.
+     * TODO: Handle named escapes.
+     */
+    override string getValue() {
+      not this.isUnicode() and
+      this.isIdentityEscape() and
+      result = this.getUnescaped()
+      or
+      this.getUnescaped() = "n" and result = "\n"
+      or
+      this.getUnescaped() = "r" and result = "\r"
+      or
+      this.getUnescaped() = "t" and result = "\t"
+      or
+      this.getUnescaped() = "f" and result = 12.toUnicode()
+      or
+      this.getUnescaped() = "v" and result = 11.toUnicode()
+      or
+      this.isUnicode() and
+      result = this.getUnicode()
+    }
+
+    /** Holds if this terms name is given by the part following the escape character. */
+    predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t", "f"] }
+
+    override string getPrimaryQLClass() { result = "RegExpEscape" }
+
+    /** Gets the part of the term following the escape character. That is e.g. "w" if the term is "\w". */
+    string getUnescaped() { result = this.getText().suffix(1) }
+
+    /**
+     * Gets the text for this escape. That is e.g. "\w".
+     */
+    private string getText() { result = re.getText().substring(start, end) }
+
+    /**
+     * Holds if this is a unicode escape.
+     */
+    private predicate isUnicode() { this.getText().prefix(2) = ["\\u", "\\U"] }
+
+    /**
+     * Gets the unicode char for this escape.
+     * E.g. for `\u0061` this returns "a".
+     */
+    private string getUnicode() {
+      exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
+        result = codepoint.toUnicode()
+      )
+    }
+
+    /**
+     * Gets int value for the `index`th char in the hex number of the unicode escape.
+     * E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
+     */
+    private int getHexValueFromUnicode(int index) {
+      this.isUnicode() and
+      exists(string hex, string char | hex = this.getText().suffix(2) |
+        char = hex.charAt(index) and
+        result = 16.pow(hex.length() - index - 1) * toHex(char)
+      )
+    }
   }
 
-  /** Join-order helper for `getGroup`. */
-  pragma[nomagic]
-  private predicate hasLiteralAndNumber(RegExpLiteral literal, int number) {
-    literal = this.getLiteral() and
-    number = this.getNumber()
+  /**
+   * Gets the hex number for the `hex` char.
+   */
+  private int toHex(string hex) {
+    hex = [0 .. 9].toString() and
+    result = hex.toInt()
+    or
+    result = 10 and hex = ["a", "A"]
+    or
+    result = 11 and hex = ["b", "B"]
+    or
+    result = 12 and hex = ["c", "C"]
+    or
+    result = 13 and hex = ["d", "D"]
+    or
+    result = 14 and hex = ["e", "E"]
+    or
+    result = 15 and hex = ["f", "F"]
   }
 
-  /** Join-order helper for `getGroup`. */
-  pragma[nomagic]
-  private predicate hasLiteralAndName(RegExpLiteral literal, string name) {
-    literal = this.getLiteral() and
-    name = this.getName()
+  /**
+   * A word boundary, that is, a regular expression term of the form `\b`.
+   */
+  class RegExpWordBoundary extends RegExpSpecialChar {
+    RegExpWordBoundary() { this.getChar() = "\\b" }
   }
 
-  override RegExpTerm getChild(int i) { none() }
+  /**
+   * A character class escape in a regular expression.
+   * That is, an escaped character that denotes multiple characters.
+   *
+   * Examples:
+   *
+   * ```
+   * \w
+   * \S
+   * ```
+   */
+  class RegExpCharacterClassEscape extends RegExpEscape {
+    RegExpCharacterClassEscape() { this.getValue() in ["d", "D", "s", "S", "w", "W"] }
 
-  override string getPrimaryQLClass() { result = "RegExpBackRef" }
+    override RegExpTerm getChild(int i) { none() }
+
+    override string getPrimaryQLClass() { result = "RegExpCharacterClassEscape" }
+  }
+
+  /**
+   * A character class in a regular expression.
+   *
+   * Examples:
+   *
+   * ```
+   * [a-z_]
+   * [^<>&]
+   * ```
+   */
+  class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
+    RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
+
+    /** Holds if this character class is inverted, matching the opposite of its content. */
+    predicate isInverted() { re.getChar(start + 1) = "^" }
+
+    /** Gets the `i`th char inside this charater class. */
+    string getCharThing(int i) { result = re.getChar(i + start) }
+
+    /** Holds if this character class can match anything. */
+    predicate isUniversalClass() {
+      // [^]
+      this.isInverted() and not exists(this.getAChild())
+      or
+      // [\w\W] and similar
+      not this.isInverted() and
+      exists(string cce1, string cce2 |
+        cce1 = this.getAChild().(RegExpCharacterClassEscape).getValue() and
+        cce2 = this.getAChild().(RegExpCharacterClassEscape).getValue()
+      |
+        cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
+      )
+    }
+
+    override RegExpTerm getChild(int i) {
+      i = 0 and
+      result.getRegex() = re and
+      exists(int itemStart, int itemEnd |
+        result.getStart() = itemStart and
+        re.char_set_start(start, itemStart) and
+        re.char_set_child(start, itemStart, itemEnd) and
+        result.getEnd() = itemEnd
+      )
+      or
+      i > 0 and
+      result.getRegex() = re and
+      exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() |
+        result.getStart() = itemStart and
+        re.char_set_child(start, itemStart, result.getEnd())
+      )
+    }
+
+    override string getPrimaryQLClass() { result = "RegExpCharacterClass" }
+  }
+
+  /**
+   * A character range in a character class in a regular expression.
+   *
+   * Example:
+   *
+   * ```
+   * a-z
+   * ```
+   */
+  class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
+    int lower_end;
+    int upper_start;
+
+    RegExpCharacterRange() {
+      this = TRegExpCharacterRange(re, start, end) and
+      re.charRange(_, start, lower_end, upper_start, end)
+    }
+
+    /** Holds if this range goes from `lo` to `hi`, in effect is `lo-hi`. */
+    predicate isRange(string lo, string hi) {
+      lo = re.getText().substring(start, lower_end) and
+      hi = re.getText().substring(upper_start, end)
+    }
+
+    override RegExpTerm getChild(int i) {
+      i = 0 and
+      result.getRegex() = re and
+      result.getStart() = start and
+      result.getEnd() = lower_end
+      or
+      i = 1 and
+      result.getRegex() = re and
+      result.getStart() = upper_start and
+      result.getEnd() = end
+    }
+
+    override string getPrimaryQLClass() { result = "RegExpCharacterRange" }
+  }
+
+  /**
+   * A normal character in a regular expression, that is, a character
+   * without special meaning. This includes escaped characters.
+   *
+   * Examples:
+   * ```
+   * t
+   * \t
+   * ```
+   */
+  class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
+    RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
+
+    /**
+     * Holds if this constant represents a valid Unicode character (as opposed
+     * to a surrogate code point that does not correspond to a character by itself.)
+     */
+    predicate isCharacter() { any() }
+
+    /** Gets the string representation of the char matched by this term. */
+    string getValue() { result = re.getText().substring(start, end) }
+
+    override RegExpTerm getChild(int i) { none() }
+
+    override string getPrimaryQLClass() { result = "RegExpNormalChar" }
+  }
+
+  /**
+   * A constant regular expression term, that is, a regular expression
+   * term matching a single string. Currently, this will always be a single character.
+   *
+   * Example:
+   *
+   * ```
+   * a
+   * ```
+   */
+  class RegExpConstant extends RegExpTerm {
+    string value;
+
+    RegExpConstant() {
+      this = TRegExpNormalChar(re, start, end) and
+      not this instanceof RegExpCharacterClassEscape and
+      // exclude chars in qualifiers
+      // TODO: push this into regex library
+      not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
+        qstart <= start and end <= qend
+      ) and
+      value = this.(RegExpNormalChar).getValue()
+    }
+
+    /**
+     * Holds if this constant represents a valid Unicode character (as opposed
+     * to a surrogate code point that does not correspond to a character by itself.)
+     */
+    predicate isCharacter() { any() }
+
+    /** Gets the string matched by this constant term. */
+    string getValue() { result = value }
+
+    override RegExpTerm getChild(int i) { none() }
+
+    override string getPrimaryQLClass() { result = "RegExpConstant" }
+  }
+
+  /**
+   * A grouped regular expression.
+   *
+   * Examples:
+   *
+   * ```
+   * (ECMA|Java)
+   * (?:ECMA|Java)
+   * (?<quote>['"])
+   * ```
+   */
+  class RegExpGroup extends RegExpTerm, TRegExpGroup {
+    RegExpGroup() { this = TRegExpGroup(re, start, end) }
+
+    /**
+     * Gets the index of this capture group within the enclosing regular
+     * expression literal.
+     *
+     * For example, in the regular expression `/((a?).)(?:b)/`, the
+     * group `((a?).)` has index 1, the group `(a?)` nested inside it
+     * has index 2, and the group `(?:b)` has no index, since it is
+     * not a capture group.
+     */
+    int getNumber() { result = re.getGroupNumber(start, end) }
+
+    /** Holds if this is a capture group. */
+    predicate isCapture() { exists(this.getNumber()) }
+
+    /** Holds if this is a named capture group. */
+    predicate isNamed() { exists(this.getName()) }
+
+    /** Gets the name of this capture group, if any. */
+    string getName() { result = re.getGroupName(start, end) }
+
+    override RegExpTerm getChild(int i) {
+      result.getRegex() = re and
+      i = 0 and
+      re.groupContents(start, end, result.getStart(), result.getEnd())
+    }
+
+    override string getPrimaryQLClass() { result = "RegExpGroup" }
+  }
+
+  /**
+   * A special character in a regular expression.
+   *
+   * Examples:
+   * ```
+   * ^
+   * $
+   * .
+   * ```
+   */
+  class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
+    string char;
+
+    RegExpSpecialChar() {
+      this = TRegExpSpecialChar(re, start, end) and
+      re.specialCharacter(start, end, char)
+    }
+
+    /**
+     * Holds if this constant represents a valid Unicode character (as opposed
+     * to a surrogate code point that does not correspond to a character by itself.)
+     */
+    predicate isCharacter() { any() }
+
+    /** Gets the char for this term. */
+    string getChar() { result = char }
+
+    override RegExpTerm getChild(int i) { none() }
+
+    override string getPrimaryQLClass() { result = "RegExpSpecialChar" }
+  }
+
+  /**
+   * A dot regular expression.
+   *
+   * Example:
+   *
+   * ```
+   * .
+   * ```
+   */
+  class RegExpDot extends RegExpSpecialChar {
+    RegExpDot() { this.getChar() = "." }
+
+    override string getPrimaryQLClass() { result = "RegExpDot" }
+  }
+
+  /**
+   * A dollar assertion `$` or `\Z` matching the end of a line.
+   *
+   * Example:
+   *
+   * ```
+   * $
+   * ```
+   */
+  class RegExpDollar extends RegExpSpecialChar {
+    RegExpDollar() { this.getChar() = ["$", "\\Z"] }
+
+    override string getPrimaryQLClass() { result = "RegExpDollar" }
+  }
+
+  /**
+   * A caret assertion `^` or `\A` matching the beginning of a line.
+   *
+   * Example:
+   *
+   * ```
+   * ^
+   * ```
+   */
+  class RegExpCaret extends RegExpSpecialChar {
+    RegExpCaret() { this.getChar() = ["^", "\\A"] }
+
+    override string getPrimaryQLClass() { result = "RegExpCaret" }
+  }
+
+  /**
+   * A zero-width match, that is, either an empty group or an assertion.
+   *
+   * Examples:
+   * ```
+   * ()
+   * (?=\w)
+   * ```
+   */
+  class RegExpZeroWidthMatch extends RegExpGroup {
+    RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
+
+    override RegExpTerm getChild(int i) { none() }
+
+    override string getPrimaryQLClass() { result = "RegExpZeroWidthMatch" }
+  }
+
+  /**
+   * A zero-width lookahead or lookbehind assertion.
+   *
+   * Examples:
+   *
+   * ```
+   * (?=\w)
+   * (?!\n)
+   * (?<=\.)
+   * (?<!\\)
+   * ```
+   */
+  class RegExpSubPattern extends RegExpZeroWidthMatch {
+    RegExpSubPattern() { not re.emptyGroup(start, end) }
+
+    /** Gets the lookahead term. */
+    RegExpTerm getOperand() {
+      exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
+        result.getRegex() = re and
+        result.getStart() = in_start and
+        result.getEnd() = in_end
+      )
+    }
+  }
+
+  /**
+   * A zero-width lookahead assertion.
+   *
+   * Examples:
+   *
+   * ```
+   * (?=\w)
+   * (?!\n)
+   * ```
+   */
+  abstract class RegExpLookahead extends RegExpSubPattern { }
+
+  /**
+   * A positive-lookahead assertion.
+   *
+   * Examples:
+   *
+   * ```
+   * (?=\w)
+   * ```
+   */
+  class RegExpPositiveLookahead extends RegExpLookahead {
+    RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
+
+    override string getPrimaryQLClass() { result = "RegExpPositiveLookahead" }
+  }
+
+  /**
+   * A negative-lookahead assertion.
+   *
+   * Examples:
+   *
+   * ```
+   * (?!\n)
+   * ```
+   */
+  class RegExpNegativeLookahead extends RegExpLookahead {
+    RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
+
+    override string getPrimaryQLClass() { result = "RegExpNegativeLookahead" }
+  }
+
+  /**
+   * A zero-width lookbehind assertion.
+   *
+   * Examples:
+   *
+   * ```
+   * (?<=\.)
+   * (?<!\\)
+   * ```
+   */
+  abstract class RegExpLookbehind extends RegExpSubPattern { }
+
+  /**
+   * A positive-lookbehind assertion.
+   *
+   * Examples:
+   *
+   * ```
+   * (?<=\.)
+   * ```
+   */
+  class RegExpPositiveLookbehind extends RegExpLookbehind {
+    RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
+
+    override string getPrimaryQLClass() { result = "RegExpPositiveLookbehind" }
+  }
+
+  /**
+   * A negative-lookbehind assertion.
+   *
+   * Examples:
+   *
+   * ```
+   * (?<!\\)
+   * ```
+   */
+  class RegExpNegativeLookbehind extends RegExpLookbehind {
+    RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
+
+    override string getPrimaryQLClass() { result = "RegExpNegativeLookbehind" }
+  }
+
+  /**
+   * A back reference, that is, a term of the form `\i` or `\k<name>`
+   * in a regular expression.
+   *
+   * Examples:
+   *
+   * ```
+   * \1
+   * (?P=quote)
+   * ```
+   */
+  class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
+    RegExpBackRef() { this = TRegExpBackRef(re, start, end) }
+
+    /**
+     * Gets the number of the capture group this back reference refers to, if any.
+     */
+    int getNumber() { result = re.getBackrefNumber(start, end) }
+
+    /**
+     * Gets the name of the capture group this back reference refers to, if any.
+     */
+    string getName() { result = re.getBackrefName(start, end) }
+
+    /** Gets the capture group this back reference refers to. */
+    RegExpGroup getGroup() {
+      this.hasLiteralAndNumber(result.getLiteral(), result.getNumber()) or
+      this.hasLiteralAndName(result.getLiteral(), result.getName())
+    }
+
+    /** Join-order helper for `getGroup`. */
+    pragma[nomagic]
+    private predicate hasLiteralAndNumber(RegExpLiteral literal, int number) {
+      literal = this.getLiteral() and
+      number = this.getNumber()
+    }
+
+    /** Join-order helper for `getGroup`. */
+    pragma[nomagic]
+    private predicate hasLiteralAndName(RegExpLiteral literal, string name) {
+      literal = this.getLiteral() and
+      name = this.getName()
+    }
+
+    override RegExpTerm getChild(int i) { none() }
+
+    override string getPrimaryQLClass() { result = "RegExpBackRef" }
+  }
 }
-
-/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
-RegExpTerm getParsedRegExp(StrConst re) { result.getRegex() = re and result.isRootTerm() }