Merge branch 'main' into jorgectf/python/deserialization

2026-05-01 11:45:14 +02:00 · 2021-09-28 16:49:33 +02:00
parent 61a81b60e8 ee46717c76
commit 67fddda6d2
7130 changed files with 597514 additions and 224228 deletions
--- a/python/.vscode/ql.code-snippets
+++ b/python/.vscode/ql.code-snippets
@@ -106,7 +106,7 @@
        "prefix": "type tracking",
        "body": [
            "/** Gets a reference to ${3:a thing}. */",
-            "private DataFlow::LocalSourceNode ${1:myType}(DataFlow::TypeTracker t) {",
+            "private DataFlow::TypeTrackingNode ${1:myType}(DataFlow::TypeTracker t) {",
            "  t.start() and",
            "  result = ${2:value}",
            "  or",
@@ -152,4 +152,102 @@
        ]
    },

+    "Type tracking class": {
+        "scope": "ql",
+        "prefix": "type tracking class",
+        "body": [
+            "/**",
+            " * Provides models for the `${TM_SELECTED_TEXT}` class",
+            " *",
+            " * See ${1:https://apiref (TODO)}.",
+            " */",
+            "module ${TM_SELECTED_TEXT/^(.*)\\.([^.]+)$/$2/} {",
+            "  /** Gets a reference to the `${TM_SELECTED_TEXT}` class. */",
+            "  private API::Node classRef() {",
+            "    result = API::moduleImport(\"${TM_SELECTED_TEXT/\\.([^.]+)/\").getMember(\"$1/g}\")",
+            "  }",
+            "",
+            "  /**",
+            "   * A source of instances of `${TM_SELECTED_TEXT}`, extend this class to model new instances.",
+            "   *",
+            "   * This can include instantiations of the class, return values from function",
+            "   * calls, or a special parameter that will be set when functions are called by an external",
+            "   * library.",
+            "   *",
+            "   * Use the predicate `${TM_SELECTED_TEXT/^(.*)\\.([^.]+)$/$2/}::instance()` to get references to instances of `${TM_SELECTED_TEXT}`.",
+            "   */",
+            "  abstract class InstanceSource extends DataFlow::LocalSourceNode { }",
+            "",
+            "  /** A direct instantiation of `${TM_SELECTED_TEXT}`. */",
+            "  private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {",
+            "      ClassInstantiation() { this = classRef().getACall() }",
+            "  }",
+            "",
+            "  /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
+            "  private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {",
+            "    t.start() and",
+            "    result instanceof InstanceSource",
+            "    or",
+            "    exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))",
+            "  }",
+            "",
+            "  /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
+            "  DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }",
+            "",
+            "  /**",
+            "   * Taint propagation for `${TM_SELECTED_TEXT}`.",
+            "   */",
+            "  private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
+            "    InstanceTaintSteps() { this = \"${TM_SELECTED_TEXT}\" }",
+            "    ",
+            "    override DataFlow::Node getInstance() { result = instance() }",
+            "    ",
+            "    override string getAttributeName() { none() }",
+            "    ",
+            "    override string getMethodName() { none() }",
+            "    ",
+            "    override string getAsyncMethodName() { none() }",
+            "  }",
+            "",
+            "  /**",
+            "   * Extra taint propagation for `${TM_SELECTED_TEXT}`, not covered by `InstanceTaintSteps`.",
+            "   */",
+            "  private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {",
+            "    override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {",
+            "      // TODO",
+            "      none()",
+            "    }",
+            "  }",
+            "}",
+        ],
+        "description": "Type tracking class (select full class path before inserting)",
+    },
+    "foo": {
+        "scope": "ql",
+        "prefix": "foo",
+        "body": [
+            "    /**",
+            "     * Taint propagation for `$1`.",
+            "     */",
+            "     private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
+            "        InstanceTaintSteps() { this = \"$1\" }",
+            "",
+            "        override DataFlow::Node getInstance() { result = instance() }",
+            "",
+            "        override string getAttributeName() { none() }",
+            "",
+            "        override string getMethodName() { none() }",
+            "",
+            "        override string getAsyncMethodName() { none() }",
+            "      }",
+        ],
+    },
+    "API graph .getMember chain": {
+        "scope": "ql",
+        "prefix": "api graph .getMember chain",
+        "body": [
+            "API::moduleImport(\"${TM_SELECTED_TEXT/\\.([^.]+)/\").getMember(\"$1/g}\")"
+        ],
+        "description": "API graph .getMember chain (select full path before inserting)",
+    },
 }
--- a/python/change-notes/2021-04-09-split-weak-crypto-query.md
+++ b/python/change-notes/2021-04-09-split-weak-crypto-query.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Updated the _Use of a broken or weak cryptographic algorithm_ (`py/weak-cryptographic-algorithm`) query, so it alerts on any use of a weak cryptographic non-hashing algorithm. Introduced a new query _Use of a broken or weak cryptographic hashing algorithm on sensitive data_ (`py/weak-sensitive-data-hashing`) to handle weak cryptographic hashing algorithms, which only alerts when used on sensitive data.
--- a/python/change-notes/2021-05-10-idna-add-modeling.md
+++ b/python/change-notes/2021-05-10-idna-add-modeling.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added modeling of the PyPI package `idna`, for encoding/decoding Internationalised Domain Names in Applications.
--- a/python/change-notes/2021-05-10-simplejson-add-modeling.md
+++ b/python/change-notes/2021-05-10-simplejson-add-modeling.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added modeling of the PyPI package `simplejson`.
--- a/python/change-notes/2021-05-10-ujson-add-modeling.md
+++ b/python/change-notes/2021-05-10-ujson-add-modeling.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added modeling of the PyPI package `ujson`.
--- a/python/change-notes/2021-05-21-api-graph-await.md
+++ b/python/change-notes/2021-05-21-api-graph-await.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* API graph nodes now contain a `getAwaited()` member predicate, for getting the result of awaiting an item, such as `await foo`.
--- a/python/change-notes/2021-05-25-add-ClickHouse-sql-libs.md
+++ b/python/change-notes/2021-05-25-add-ClickHouse-sql-libs.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added model of SQL execution in `clickhouse-driver` and `aioch` PyPI packages, resulting in additional sinks for the SQL Injection query (`py/sql-injection`). This modeling was originally [submitted as a contribution by @japroc](https://github.com/github/codeql/pull/5889).
--- a/python/change-notes/2021-06-03-aiohttp-webserver-modeling.md
+++ b/python/change-notes/2021-06-03-aiohttp-webserver-modeling.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added modeling of sources/sinks when using the `aiohttp.web` web framework.
--- a/python/change-notes/2021-06-04-sensitive-data-modeling-expanded.md
+++ b/python/change-notes/2021-06-04-sensitive-data-modeling-expanded.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Expanded modeling of sensitive data sources to include: subscripting with a key that indicates sensitive data (`obj["password"]`), parameters whose names indicate sensitive data (`def func(password):`), and assignments to variables whose names indicate sensitive data (`password = ...`).
--- a/python/change-notes/2021-06-08-twisted-add-modeling.md
+++ b/python/change-notes/2021-06-08-twisted-add-modeling.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added modeling of sources/sinks when using `twisted` to create web servers.
--- a/python/change-notes/2021-06-09-add-jmespath-modeling.md
+++ b/python/change-notes/2021-06-09-add-jmespath-modeling.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added modeling of the PyPI package `jmespath`.
--- a/python/change-notes/2021-06-09-rsa-add-modeling.md
+++ b/python/change-notes/2021-06-09-rsa-add-modeling.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added modeling of the PyPI package `rsa`.
--- a/python/change-notes/2021-06-15-add-method-call-conveniences.md
+++ b/python/change-notes/2021-06-15-add-method-call-conveniences.md
@@ -0,0 +1,5 @@
+lgtm,codescanning
+* A new class `DataFlow::MethodCallNode` extends `DataFlow::CallCfgNode` with convenient methods for
+  accessing the receiver and method name of a method call.
+* The `LocalSourceNode` class now has a `getAMethodCall` method, with which one can easily access
+  method calls with the given node as a receiver.
--- a/python/change-notes/2021-06-16-MarkupSafe-add-modeling.md
+++ b/python/change-notes/2021-06-16-MarkupSafe-add-modeling.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added modeling of the PyPI package `MarkupSafe`.
--- a/python/change-notes/2021-06-24-add-CookieWrite-concept.md
+++ b/python/change-notes/2021-06-24-add-CookieWrite-concept.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added `HTTP::Server::CookieWrite` concept for statements that sets a cookie in an HTTP response, along with modeling of this in supported web frameworks (aiohttp/flask/django/tornado/twisted).
--- a/python/change-notes/2021-06-24-dataflow-implicit-reads.md
+++ b/python/change-notes/2021-06-24-dataflow-implicit-reads.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* The DataFlow libraries have been augmented with support for `Configuration`-specific in-place read steps at, for example, sinks and custom taint steps. This means that it is now possible to specify sinks that accept flow with non-empty access paths.
--- a/python/change-notes/2021-06-25-add-peewee-modeling.md
+++ b/python/change-notes/2021-06-25-add-peewee-modeling.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added modeling of raw SQL execution from the PyPI package `peewee`.
--- a/python/change-notes/2021-07-12-add-typetrackingnode.md
+++ b/python/change-notes/2021-07-12-add-typetrackingnode.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* The `track` and `backtrack` methods on `LocalSourceNode` are in the process of being deprecated. When using type trackers, the corresponding methods on `TypeTrackingNode` should be used instead.
--- a/python/change-notes/2021-07-13-path-problem-customization.md
+++ b/python/change-notes/2021-07-13-path-problem-customization.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Changed the way to provide extra sources/sinks for `@kind path-problem` queries, to avoid a potential performance problem due to re-evaluation of data-flow configurations. Please use the new `<query>Customization.qll` files and extend their classes instead (such as extending the `Sink` class from `python/ql/src/semmle/python/security/dataflow/SqlInjectionCustomizations.qll`). This is relevant for the queries: `py/sql-injection`, `py/code-injection`, `py/command-line-injection`, `py/reflective-xss`, `py/url-redirection`, `py/unsafe-deserialization`, `py/stack-trace-exposure`, `py/path-injection`.
--- a/python/change-notes/2021-07-16-deprecate-importnode.md
+++ b/python/change-notes/2021-07-16-deprecate-importnode.md
@@ -0,0 +1,4 @@
+lgtm,codescanning
+* The `importNode` predicate from the data-flow library has been deprecated. In its place, we
+  recommend using the API graphs library, accessible via `import semmle.python.ApiGraphs`.
+  
--- a/python/change-notes/2021-07-28-port-RoDoS-queries.md
+++ b/python/change-notes/2021-07-28-port-RoDoS-queries.md
@@ -0,0 +1,3 @@
+lgtm,codescanning
+* Added an experimental _Inefficient regular expression_ (`py/redos`) query, which is already available in JavaScript.
+* Added an experimental _Polynomial regular expression used on uncontrolled data_ (`py/polynomial-redos`), which is already available in JavaScript.
--- a/python/change-notes/2021-08-30-port-modifying-default-query.md
+++ b/python/change-notes/2021-08-30-port-modifying-default-query.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Updated _Modification of parameter with default_ (`py/modification-of-default-value`) query to use the new data flow library instead of the old taint tracking library and to remove the use of points-to analysis. You may see differences in the results found by the query, but overall this change should result in a more robust and accurate analysis.
--- a/python/change-notes/2021-09-02-add-Flask-SQLAlchemy-modeling.md
+++ b/python/change-notes/2021-09-02-add-Flask-SQLAlchemy-modeling.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added modeling of SQL execution in the `Flask-SQLAlchemy` PyPI package, resulting in additional sinks for the SQL Injection query (`py/sql-injection`).
--- a/python/change-notes/2021-09-02-add-SQLAlchemy-modeling.md
+++ b/python/change-notes/2021-09-02-add-SQLAlchemy-modeling.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added modeling of SQL execution in the `SQLAlchemy` PyPI package, resulting in additional sinks for the SQL Injection query (`py/sql-injection`). This modeling was originally [submitted as a contribution by @mrthankyou](https://github.com/github/codeql/pull/5680).
--- a/python/change-notes/2021-09-02-add-SQLAlchemyTextClauseInjection.md
+++ b/python/change-notes/2021-09-02-add-SQLAlchemyTextClauseInjection.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Expanded the query _SQL query built from user-controlled sources_ (`py/sql-injection`) to alert if user-input is added to a TextClause from SQLAlchemy, since that can lead to SQL injection.
--- a/python/change-notes/2021-09-08-add-flow-from-default-values.md
+++ b/python/change-notes/2021-09-08-add-flow-from-default-values.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Function parameters with default values will now see flow from those values.
--- a/python/ql/examples/qlpack.lock.yml
+++ b/python/ql/examples/qlpack.lock.yml
@@ -0,0 +1,4 @@
+---
+dependencies: {}
+compiled: false
+lockVersion: 1.0.0
--- a/python/ql/examples/qlpack.yml
+++ b/python/ql/examples/qlpack.yml
@@ -1,3 +1,4 @@
-name: codeql-python-examples
-version: 0.0.0
-libraryPathDependencies: codeql-python
+name: codeql/python-examples
+version: 0.0.2
+dependencies:
+    codeql/python-all: "*"
--- a/python/ql/lib/Customizations.qll
+++ b/python/ql/lib/Customizations.qll
--- a/python/ql/lib/default.qll
+++ b/python/ql/lib/default.qll
--- a/python/ql/lib/python.qll
+++ b/python/ql/lib/python.qll
--- a/python/ql/lib/qlpack.lock.yml
+++ b/python/ql/lib/qlpack.lock.yml
@@ -0,0 +1,4 @@
+---
+dependencies: {}
+compiled: false
+lockVersion: 1.0.0
--- a/python/ql/lib/qlpack.yml
+++ b/python/ql/lib/qlpack.yml
@@ -0,0 +1,7 @@
+name: codeql/python-all
+version: 0.0.2
+dbscheme: semmlecode.python.dbscheme
+extractor: python
+library: true
+dependencies:
+    codeql/python-upgrades: 0.0.2
--- a/python/ql/lib/semmle/crypto/Crypto.qll
+++ b/python/ql/lib/semmle/crypto/Crypto.qll
@@ -0,0 +1,3 @@
+/** DEPRECATED: Use `semmle.python.concepts.CryptoAlgorithms` instead. */
+
+import semmle.python.concepts.CryptoAlgorithms
--- a/python/ql/lib/semmle/dataflow/SSA.qll
+++ b/python/ql/lib/semmle/dataflow/SSA.qll
--- a/python/ql/lib/semmle/files/FileSystem.qll
+++ b/python/ql/lib/semmle/files/FileSystem.qll
--- a/python/ql/lib/semmle/python/ApiGraphs.qll
+++ b/python/ql/lib/semmle/python/ApiGraphs.qll
@@ -97,6 +97,11 @@ module API {
     */
    Node getASubclass() { result = getASuccessor(Label::subclass()) }

+    /**
+     * Gets a node representing the result from awaiting this node.
+     */
+    Node getAwaited() { result = getASuccessor(Label::await()) }
+
    /**
     * Gets a string representation of the lexicographically least among all shortest access paths
     * from the root to this node.
@@ -349,22 +354,135 @@ module API {
      )
    }

-    private import semmle.python.types.Builtins as Builtins
+    /** Gets the name of a known built-in. */
+    private string getBuiltInName() {
+      // These lists were created by inspecting the `builtins` and `__builtin__` modules in
+      // Python 3 and 2 respectively, using the `dir` built-in.
+      // Built-in functions and exceptions shared between Python 2 and 3
+      result in [
+          "abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
+          "compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
+          "float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
+          "id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
+          "max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
+          "property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
+          "staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
+          // Exceptions
+          "ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
+          "BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
+          "FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
+          "ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
+          "LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
+          "OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
+          "RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
+          "SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
+          "UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
+          "UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
+          // Added for compatibility
+          "exec"
+        ]
+      or
+      // Built-in constants shared between Python 2 and 3
+      result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
+      or
+      // Python 3 only
+      result in [
+          "ascii", "breakpoint", "bytes", "exec",
+          // Exceptions
+          "BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
+          "ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
+          "FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
+          "NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
+          "ResourceWarning", "StopAsyncIteration", "TimeoutError"
+        ]
+      or
+      // Python 2 only
+      result in [
+          "basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload",
+          "unichr", "unicode", "xrange"
+        ]
+    }

    /**
     * Gets a data flow node that is likely to refer to a built-in with the name `name`.
     *
-     * Currently this is an over-approximation, and does not account for things like overwriting a
+     * Currently this is an over-approximation, and may not account for things like overwriting a
     * built-in with a different value.
     */
    private DataFlow::Node likely_builtin(string name) {
-      result.asCfgNode() =
-        any(NameNode n |
-          n.isGlobal() and
-          n.isLoad() and
-          name = n.getId() and
-          name in [any(Builtins::Builtin b).getName(), "None", "True", "False"]
-        )
+      exists(Module m |
+        result.asCfgNode() =
+          any(NameNode n |
+            possible_builtin_accessed_in_module(n, name, m) and
+            not possible_builtin_defined_in_module(name, m)
+          )
+      )
+    }
+
+    /**
+     * Holds if a global variable called `name` (which is also the name of a built-in) is assigned
+     * a value in the module `m`.
+     */
+    private predicate possible_builtin_defined_in_module(string name, Module m) {
+      global_name_defined_in_module(name, m) and
+      name = getBuiltInName()
+    }
+
+    /**
+     * Holds if `n` is an access of a global variable called `name` (which is also the name of a
+     * built-in) inside the module `m`.
+     */
+    private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
+      n.isGlobal() and
+      n.isLoad() and
+      name = n.getId() and
+      name = getBuiltInName() and
+      m = n.getEnclosingModule()
+    }
+
+    /**
+     * Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
+     * built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
+     */
+    private predicate name_possibly_defined_in_import_star(NameNode n, string name, Scope s) {
+      n.isLoad() and
+      name = n.getId() and
+      // Not already defined in an enclosing scope.
+      not exists(LocalVariable v |
+        v.getId() = name and v.getScope() = n.getScope().getEnclosingScope*()
+      ) and
+      not name = getBuiltInName() and
+      s = n.getScope().getEnclosingScope*() and
+      exists(potential_import_star_base(s)) and
+      not global_name_defined_in_module(name, n.getEnclosingModule())
+    }
+
+    /** Holds if a global variable called `name` is assigned a value in the module `m`. */
+    private predicate global_name_defined_in_module(string name, Module m) {
+      exists(NameNode n |
+        not exists(LocalVariable v | n.defines(v)) and
+        n.isStore() and
+        name = n.getId() and
+        m = n.getEnclosingModule()
+      )
+    }
+
+    /**
+     * Gets the API graph node for all modules imported with `from ... import *` inside the scope `s`.
+     *
+     * For example, given
+     *
+     * `from foo.bar import *`
+     *
+     * this would be the API graph node with the path
+     *
+     * `moduleImport("foo").getMember("bar")`
+     */
+    private TApiNode potential_import_star_base(Scope s) {
+      exists(DataFlow::Node ref |
+        ref.asCfgNode() = any(ImportStarNode n | n.getScope() = s).getModule() and
+        use(result, ref)
+      )
    }

    /**
@@ -396,11 +514,28 @@ module API {
        exists(DataFlow::Node superclass | pred.flowsTo(superclass) |
          ref.asExpr().(ClassExpr).getABase() = superclass.asExpr()
        )
+        or
+        // awaiting
+        exists(Await await, DataFlow::Node awaitedValue |
+          lbl = Label::await() and
+          ref.asExpr() = await and
+          await.getValue() = awaitedValue.asExpr() and
+          pred.flowsTo(awaitedValue)
+        )
      )
      or
      // Built-ins, treated as members of the module `builtins`
      base = MkModuleImport("builtins") and
      lbl = Label::member(any(string name | ref = likely_builtin(name)))
+      or
+      // Unknown variables that may belong to a module imported with `import *`
+      exists(Scope s |
+        base = potential_import_star_base(s) and
+        lbl =
+          Label::member(any(string name |
+              name_possibly_defined_in_import_star(ref.asCfgNode(), name, s)
+            ))
+      )
    }

    /**
@@ -426,7 +561,7 @@ module API {
     *
     * The flow from `src` to that node may be inter-procedural.
     */
-    private DataFlow::LocalSourceNode trackUseNode(
+    private DataFlow::TypeTrackingNode trackUseNode(
      DataFlow::LocalSourceNode src, DataFlow::TypeTracker t
    ) {
      t.start() and
@@ -444,7 +579,6 @@ module API {
    cached
    DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
      result = trackUseNode(src, DataFlow::TypeTracker::end()) and
-      // We exclude module variable nodes, as these do not correspond to real uses.
      not result instanceof DataFlow::ModuleVariableNode
    }

@@ -512,5 +646,9 @@ private module Label {
  /** Gets the `return` edge label. */
  string return() { result = "getReturn()" }

+  /** Gets the `subclass` edge label. */
  string subclass() { result = "getASubclass()" }
+
+  /** Gets the `await` edge label. */
+  string await() { result = "getAwaited()" }
 }
--- a/python/ql/lib/semmle/python/AstExtended.qll
+++ b/python/ql/lib/semmle/python/AstExtended.qll
@@ -49,12 +49,15 @@ abstract class AstNode extends AstNode_ {
  /** Whether this contains `inner` syntactically */
  predicate contains(AstNode inner) { this.getAChildNode+() = inner }

-  /** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
-  predicate containsInScope(AstNode inner) {
+  pragma[noinline]
+  private predicate containsInScope(AstNode inner, Scope scope) {
    this.contains(inner) and
-    this.getScope() = inner.getScope() and
-    not inner instanceof Scope
+    not inner instanceof Scope and
+    scope = this.getScope()
  }
+
+  /** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
+  predicate containsInScope(AstNode inner) { this.containsInScope(inner, inner.getScope()) }
 }

 /* Parents */
--- a/python/ql/lib/semmle/python/AstGenerated.qll
+++ b/python/ql/lib/semmle/python/AstGenerated.qll
--- a/python/ql/lib/semmle/python/Class.qll
+++ b/python/ql/lib/semmle/python/Class.qll
--- a/python/ql/lib/semmle/python/Comment.qll
+++ b/python/ql/lib/semmle/python/Comment.qll
--- a/python/ql/lib/semmle/python/Comparisons.qll
+++ b/python/ql/lib/semmle/python/Comparisons.qll
--- a/python/ql/lib/semmle/python/Comprehensions.qll
+++ b/python/ql/lib/semmle/python/Comprehensions.qll
--- a/python/ql/lib/semmle/python/Concepts.qll
+++ b/python/ql/lib/semmle/python/Concepts.qll
@@ -4,7 +4,7 @@
 * provide concrete subclasses.
 */

-import python
+private import python
 private import semmle.python.dataflow.new.DataFlow
 private import semmle.python.dataflow.new.RemoteFlowSources
 private import semmle.python.dataflow.new.TaintTracking
@@ -72,6 +72,39 @@ module FileSystemAccess {
  }
 }

+/**
+ * A data flow node that writes data to the file system access.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `FileSystemWriteAccess::Range` instead.
+ */
+class FileSystemWriteAccess extends FileSystemAccess {
+  override FileSystemWriteAccess::Range range;
+
+  /**
+   * Gets a node that represents data to be written to the file system (possibly with
+   * some transformation happening before it is written, like JSON encoding).
+   */
+  DataFlow::Node getADataNode() { result = range.getADataNode() }
+}
+
+/** Provides a class for modeling new file system writes. */
+module FileSystemWriteAccess {
+  /**
+   * A data flow node that writes data to the file system access.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `FileSystemWriteAccess` instead.
+   */
+  abstract class Range extends FileSystemAccess::Range {
+    /**
+     * Gets a node that represents data to be written to the file system (possibly with
+     * some transformation happening before it is written, like JSON encoding).
+     */
+    abstract DataFlow::Node getADataNode();
+  }
+}
+
 /** Provides classes for modeling path-related APIs. */
 module Path {
  /**
@@ -235,6 +268,35 @@ private class EncodingAdditionalTaintStep extends TaintTracking::AdditionalTaint
  }
 }

+/**
+ * A data-flow node that logs data.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `Logging::Range` instead.
+ */
+class Logging extends DataFlow::Node {
+  Logging::Range range;
+
+  Logging() { this = range }
+
+  /** Gets an input that is logged. */
+  DataFlow::Node getAnInput() { result = range.getAnInput() }
+}
+
+/** Provides a class for modeling new logging mechanisms. */
+module Logging {
+  /**
+   * A data-flow node that logs data.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `Logging` instead.
+   */
+  abstract class Range extends DataFlow::Node {
+    /** Gets an input that is logged. */
+    abstract DataFlow::Node getAnInput();
+  }
+}
+
 /**
 * A data-flow node that dynamically executes Python code.
 *
@@ -293,6 +355,78 @@ module SqlExecution {
  }
 }

+/**
+ * A data-flow node that escapes meta-characters, which could be used to prevent
+ * injection attacks.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `Escaping::Range` instead.
+ */
+class Escaping extends DataFlow::Node {
+  Escaping::Range range;
+
+  Escaping() {
+    this = range and
+    // escapes that don't have _both_ input/output defined are not valid
+    exists(range.getAnInput()) and
+    exists(range.getOutput())
+  }
+
+  /** Gets an input that will be escaped. */
+  DataFlow::Node getAnInput() { result = range.getAnInput() }
+
+  /** Gets the output that contains the escaped data. */
+  DataFlow::Node getOutput() { result = range.getOutput() }
+
+  /**
+   * Gets the context that this function escapes for, such as `html`, or `url`.
+   */
+  string getKind() { result = range.getKind() }
+}
+
+/** Provides a class for modeling new escaping APIs. */
+module Escaping {
+  /**
+   * A data-flow node that escapes meta-characters, which could be used to prevent
+   * injection attacks.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `Escaping` instead.
+   */
+  abstract class Range extends DataFlow::Node {
+    /** Gets an input that will be escaped. */
+    abstract DataFlow::Node getAnInput();
+
+    /** Gets the output that contains the escaped data. */
+    abstract DataFlow::Node getOutput();
+
+    /**
+     * Gets the context that this function escapes for.
+     *
+     * While kinds are represented as strings, this should not be relied upon. Use the
+     * predicates in  the `Escaping` module, such as `getHtmlKind`.
+     */
+    abstract string getKind();
+  }
+
+  /** Gets the escape-kind for escaping a string so it can safely be included in HTML. */
+  string getHtmlKind() { result = "html" }
+  // TODO: If adding an XML kind, update the modeling of the `MarkupSafe` PyPI package.
+  //
+  // Technically it claims to escape for both HTML and XML, but for now we don't have
+  // anything that relies on XML escaping, so I'm going to defer deciding whether they
+  // should be the same kind, or whether they deserve to be treated differently.
+}
+
+/**
+ * An escape of a string so it can be safely included in
+ * the body of an HTML element, for example, replacing `{}` in
+ * `<p>{}</p>`.
+ */
+class HtmlEscaping extends Escaping {
+  HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() }
+}
+
 /** Provides classes for modeling HTTP-related APIs. */
 module HTTP {
  import semmle.python.web.HttpConstants
@@ -345,7 +479,7 @@ module HTTP {
        /** Gets the URL pattern for this route, if it can be statically determined. */
        string getUrlPattern() {
          exists(StrConst str |
-            DataFlow::exprNode(str).(DataFlow::LocalSourceNode).flowsTo(this.getUrlPatternArg()) and
+            this.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(str) and
            result = str.getText()
          )
        }
@@ -478,9 +612,7 @@ module HTTP {
        /** Gets the mimetype of this HTTP response, if it can be statically determined. */
        string getMimetype() {
          exists(StrConst str |
-            DataFlow::exprNode(str)
-                .(DataFlow::LocalSourceNode)
-                .flowsTo(this.getMimetypeOrContentTypeArg()) and
+            this.getMimetypeOrContentTypeArg().getALocalSource() = DataFlow::exprNode(str) and
            result = str.getText().splitAt(";", 0)
          )
          or
@@ -524,10 +656,73 @@ module HTTP {
        abstract DataFlow::Node getRedirectLocation();
      }
    }
+
+    /**
+     * A data-flow node that sets a cookie in an HTTP response.
+     *
+     * Extend this class to refine existing API models. If you want to model new APIs,
+     * extend `HTTP::CookieWrite::Range` instead.
+     */
+    class CookieWrite extends DataFlow::Node {
+      CookieWrite::Range range;
+
+      CookieWrite() { this = range }
+
+      /**
+       * Gets the argument, if any, specifying the raw cookie header.
+       */
+      DataFlow::Node getHeaderArg() { result = range.getHeaderArg() }
+
+      /**
+       * Gets the argument, if any, specifying the cookie name.
+       */
+      DataFlow::Node getNameArg() { result = range.getNameArg() }
+
+      /**
+       * Gets the argument, if any, specifying the cookie value.
+       */
+      DataFlow::Node getValueArg() { result = range.getValueArg() }
+    }
+
+    /** Provides a class for modeling new cookie writes on HTTP responses. */
+    module CookieWrite {
+      /**
+       * A data-flow node that sets a cookie in an HTTP response.
+       *
+       * Note: we don't require that this redirect must be sent to a client (a kind of
+       * "if a tree falls in a forest and nobody hears it" situation).
+       *
+       * Extend this class to model new APIs. If you want to refine existing API models,
+       * extend `HttpResponse` instead.
+       */
+      abstract class Range extends DataFlow::Node {
+        /**
+         * Gets the argument, if any, specifying the raw cookie header.
+         */
+        abstract DataFlow::Node getHeaderArg();
+
+        /**
+         * Gets the argument, if any, specifying the cookie name.
+         */
+        abstract DataFlow::Node getNameArg();
+
+        /**
+         * Gets the argument, if any, specifying the cookie value.
+         */
+        abstract DataFlow::Node getValueArg();
+      }
+    }
  }
 }

-/** Provides models for cryptographic things. */
+/**
+ * Provides models for cryptographic things.
+ *
+ * Note: The `CryptographicAlgorithm` class currently doesn't take weak keys into
+ * consideration for the `isWeak` member predicate. So RSA is always considered
+ * secure, although using a low number of bits will actually make it insecure. We plan
+ * to improve our libraries in the future to more precisely capture this aspect.
+ */
 module Cryptography {
  /** Provides models for public-key cryptography, also called asymmetric cryptography. */
  module PublicKey {
@@ -563,7 +758,7 @@ module Cryptography {
    /** Provides classes for modeling new key-pair generation APIs. */
    module KeyGeneration {
      /** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
-      private DataFlow::LocalSourceNode keysizeBacktracker(
+      private DataFlow::TypeTrackingNode keysizeBacktracker(
        DataFlow::TypeBackTracker t, DataFlow::Node arg
      ) {
        t.start() and
@@ -626,4 +821,43 @@ module Cryptography {
      }
    }
  }
+
+  import semmle.python.concepts.CryptoAlgorithms
+
+  /**
+   * A data-flow node that is an application of a cryptographic algorithm. For example,
+   * encryption, decryption, signature-validation.
+   *
+   * Extend this class to refine existing API models. If you want to model new APIs,
+   * extend `CryptographicOperation::Range` instead.
+   */
+  class CryptographicOperation extends DataFlow::Node {
+    CryptographicOperation::Range range;
+
+    CryptographicOperation() { this = range }
+
+    /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
+    CryptographicAlgorithm getAlgorithm() { result = range.getAlgorithm() }
+
+    /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
+    DataFlow::Node getAnInput() { result = range.getAnInput() }
+  }
+
+  /** Provides classes for modeling new applications of a cryptographic algorithms. */
+  module CryptographicOperation {
+    /**
+     * A data-flow node that is an application of a cryptographic algorithm. For example,
+     * encryption, decryption, signature-validation.
+     *
+     * Extend this class to model new APIs. If you want to refine existing API models,
+     * extend `CryptographicOperation` instead.
+     */
+    abstract class Range extends DataFlow::Node {
+      /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */
+      abstract CryptographicAlgorithm getAlgorithm();
+
+      /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */
+      abstract DataFlow::Node getAnInput();
+    }
+  }
 }
--- a/python/ql/lib/semmle/python/Constants.qll
+++ b/python/ql/lib/semmle/python/Constants.qll
--- a/python/ql/lib/semmle/python/Exprs.qll
+++ b/python/ql/lib/semmle/python/Exprs.qll
--- a/python/ql/lib/semmle/python/Files.qll
+++ b/python/ql/lib/semmle/python/Files.qll
@@ -1,9 +1,7 @@
 import python

 /** A file */
-class File extends Container {
-  File() { files(this, _, _, _, _) }
-
+class File extends Container, @file {
  /** DEPRECATED: Use `getAbsolutePath` instead. */
  deprecated override string getName() { result = this.getAbsolutePath() }

@@ -34,9 +32,7 @@ class File extends Container {
  }

  /** Gets a short name for this file (just the file name) */
-  string getShortName() {
-    exists(string simple, string ext | files(this, _, simple, ext, _) | result = simple + ext)
-  }
+  string getShortName() { result = this.getBaseName() }

  private int lastLine() {
    result = max(int i | exists(Location l | l.getFile() = this and l.getEndLine() = i))
@@ -55,7 +51,7 @@ class File extends Container {
    )
  }

-  override string getAbsolutePath() { files(this, result, _, _, _) }
+  override string getAbsolutePath() { files(this, result) }

  /** Gets the URL of this file. */
  override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" }
@@ -89,7 +85,15 @@ class File extends Container {
        i.getTest().(Compare).compares(name, op, main) and
        name.getId() = "__name__" and
        main.getText() = "__main__"
-      )
+      ) and
+      // Exclude files named `__main__.py`. These are often _not_ meant to be run directly, but
+      // contain this construct anyway.
+      //
+      // Their presence in a package (say, `foo`) means one can execute the package directly using
+      // `python -m foo` (which will run the `foo/__main__.py` file). Since being an entry point for
+      // execution means treating imports as absolute, this causes trouble, since when run with
+      // `python -m`, the interpreter uses the usual package semantics.
+      not this.getShortName() = "__main__.py"
      or
      // The file contains a `#!` line referencing the python interpreter
      exists(Comment c |
@@ -110,15 +114,10 @@ private predicate occupied_line(File f, int n) {
 }

 /** A folder (directory) */
-class Folder extends Container {
-  Folder() { folders(this, _, _) }
-
+class Folder extends Container, @folder {
  /** DEPRECATED: Use `getAbsolutePath` instead. */
  deprecated override string getName() { result = this.getAbsolutePath() }

-  /** DEPRECATED: Use `getBaseName` instead. */
-  deprecated string getSimple() { folders(this, _, result) }
-
  /**
   * Holds if this element is at the specified location.
   * The location spans column `startcolumn` of line `startline` to
@@ -136,7 +135,7 @@ class Folder extends Container {
    endcolumn = 0
  }

-  override string getAbsolutePath() { folders(this, result, _) }
+  override string getAbsolutePath() { folders(this, result) }

  /** Gets the URL of this folder. */
  override string getURL() { result = "folder://" + this.getAbsolutePath() }
--- a/python/ql/lib/semmle/python/Flow.qll
+++ b/python/ql/lib/semmle/python/Flow.qll
@@ -653,6 +653,8 @@ class DefinitionNode extends ControlFlowNode {
  DefinitionNode() {
    exists(Assign a | a.getATarget().getAFlowNode() = this)
    or
+    exists(AnnAssign a | a.getTarget().getAFlowNode() = this and exists(a.getValue()))
+    or
    exists(Alias a | a.getAsname().getAFlowNode() = this)
    or
    augstore(_, this)
@@ -795,6 +797,9 @@ private AstNode assigned_value(Expr lhs) {
  /* lhs = result */
  exists(Assign a | a.getATarget() = lhs and result = a.getValue())
  or
+  /* lhs : annotation = result */
+  exists(AnnAssign a | a.getTarget() = lhs and result = a.getValue())
+  or
  /* import result as lhs */
  exists(Alias a | a.getAsname() = lhs and result = a.getValue())
  or
--- a/python/ql/lib/semmle/python/Frameworks.qll
+++ b/python/ql/lib/semmle/python/Frameworks.qll
@@ -0,0 +1,35 @@
+/**
+ * Helper file that imports all framework modeling.
+ */
+
+// If you add modeling of a new framework/library, remember to add it it to the docs in
+// `docs/codeql/support/reusables/frameworks.rst`
+private import semmle.python.frameworks.Aioch
+private import semmle.python.frameworks.Aiohttp
+private import semmle.python.frameworks.ClickhouseDriver
+private import semmle.python.frameworks.Cryptodome
+private import semmle.python.frameworks.Cryptography
+private import semmle.python.frameworks.Dill
+private import semmle.python.frameworks.Django
+private import semmle.python.frameworks.Fabric
+private import semmle.python.frameworks.Flask
+private import semmle.python.frameworks.FlaskSqlAlchemy
+private import semmle.python.frameworks.Idna
+private import semmle.python.frameworks.Invoke
+private import semmle.python.frameworks.Jmespath
+private import semmle.python.frameworks.MarkupSafe
+private import semmle.python.frameworks.Multidict
+private import semmle.python.frameworks.Mysql
+private import semmle.python.frameworks.MySQLdb
+private import semmle.python.frameworks.Peewee
+private import semmle.python.frameworks.Psycopg2
+private import semmle.python.frameworks.PyMySQL
+private import semmle.python.frameworks.Rsa
+private import semmle.python.frameworks.Simplejson
+private import semmle.python.frameworks.SqlAlchemy
+private import semmle.python.frameworks.Stdlib
+private import semmle.python.frameworks.Tornado
+private import semmle.python.frameworks.Twisted
+private import semmle.python.frameworks.Ujson
+private import semmle.python.frameworks.Yaml
+private import semmle.python.frameworks.Yarl
--- a/python/ql/lib/semmle/python/Function.qll
+++ b/python/ql/lib/semmle/python/Function.qll
--- a/python/ql/lib/semmle/python/GuardedControlFlow.qll
+++ b/python/ql/lib/semmle/python/GuardedControlFlow.qll
--- a/python/ql/lib/semmle/python/Import.qll
+++ b/python/ql/lib/semmle/python/Import.qll
--- a/python/ql/lib/semmle/python/Keywords.qll
+++ b/python/ql/lib/semmle/python/Keywords.qll
--- a/python/ql/lib/semmle/python/Metrics.qll
+++ b/python/ql/lib/semmle/python/Metrics.qll
--- a/python/ql/lib/semmle/python/Module.qll
+++ b/python/ql/lib/semmle/python/Module.qll
--- a/python/ql/lib/semmle/python/Operations.qll
+++ b/python/ql/lib/semmle/python/Operations.qll
--- a/python/ql/lib/semmle/python/PrintAst.qll
+++ b/python/ql/lib/semmle/python/PrintAst.qll
@@ -7,6 +7,7 @@
 */

 import python
+import semmle.python.RegexTreeView

 private newtype TPrintAstConfiguration = MkPrintAstConfiguration()

@@ -53,6 +54,9 @@ private newtype TPrintAstNode =
    not list = any(Module mod).getBody() and
    not forall(AstNode child | child = list.getAnItem() | isNotNeeded(child)) and
    exists(list.getAnItem())
+  } or
+  TRegExpTermNode(RegExpTerm term) {
+    exists(StrConst str | term.getRootTerm() = getParsedRegExp(str) and shouldPrint(str, _))
  }

 /**
@@ -419,6 +423,42 @@ class ParameterNode extends AstElementNode {
  }
 }

+/**
+ * A print node for a `StrConst`.
+ *
+ * The string has a child, if the child is used as a regular expression,
+ * which is the root of the regular expression.
+ */
+class StrConstNode extends AstElementNode {
+  override StrConst element;
+
+  override PrintAstNode getChild(int childIndex) {
+    childIndex = 0 and result.(RegExpTermNode).getTerm() = getParsedRegExp(element)
+  }
+}
+
+/**
+ * A print node for a regular expression term.
+ */
+class RegExpTermNode extends TRegExpTermNode, PrintAstNode {
+  RegExpTerm term;
+
+  RegExpTermNode() { this = TRegExpTermNode(term) }
+
+  /** Gets the `RegExpTerm` for this node. */
+  RegExpTerm getTerm() { result = term }
+
+  override PrintAstNode getChild(int childIndex) {
+    result.(RegExpTermNode).getTerm() = term.getChild(childIndex)
+  }
+
+  override string toString() {
+    result = "[" + strictconcat(term.getPrimaryQLClass(), " | ") + "] " + term.toString()
+  }
+
+  override Location getLocation() { result = term.getLocation() }
+}
+
 /**
 * Gets the `i`th child from `node` ordered by location.
 */
@@ -447,7 +487,7 @@ private module PrettyPrinting {
  string getQlClass(AstNode a) {
    shouldPrint(a, _) and
    (
-      not exists(getQlCustomClass(a)) and result = a.toString()
+      not exists(getQlCustomClass(a)) and result = strictconcat(a.toString(), " | ")
      or
      result = strictconcat(getQlCustomClass(a), " | ")
    )
--- a/python/ql/lib/semmle/python/RegexTreeView.qll
+++ b/python/ql/lib/semmle/python/RegexTreeView.qll
@@ -0,0 +1,986 @@
+/** Provides a class hierarchy corresponding to a parse tree of regular expressions. */
+
+import python
+private import semmle.python.regex
+
+/**
+ * An element containing a regular expression term, that is, either
+ * a string literal (parsed as a regular expression)
+ * or another regular expression term.
+ *
+ * For sequences and alternations, we require at least one child.
+ * Otherwise, we wish to represent the term differently.
+ * This avoids multiple representations of the same term.
+ */
+newtype TRegExpParent =
+  /** A string literal used as a regular expression */
+  TRegExpLiteral(Regex re) or
+  /** A quantified term */
+  TRegExpQuantifier(Regex re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
+  /** A sequence term */
+  TRegExpSequence(Regex re, int start, int end) {
+    re.sequence(start, end) and
+    exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
+  } or
+  /** An alternation term */
+  TRegExpAlt(Regex re, int start, int end) {
+    re.alternation(start, end) and
+    exists(int part_end |
+      re.alternationOption(start, end, start, part_end) and
+      part_end < end
+    ) // if an alternation does not have more than one element, it should be treated as that element instead.
+  } or
+  /** A character class term */
+  TRegExpCharacterClass(Regex re, int start, int end) { re.charSet(start, end) } or
+  /** A character range term */
+  TRegExpCharacterRange(Regex re, int start, int end) { re.charRange(_, start, _, _, end) } or
+  /** A group term */
+  TRegExpGroup(Regex re, int start, int end) { re.group(start, end) } or
+  /** A special character */
+  TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
+  /** A normal character */
+  TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
+  /** A back reference */
+  TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
+
+/**
+ * An element containing a regular expression term, that is, either
+ * a string literal (parsed as a regular expression)
+ * or another regular expression term.
+ */
+class RegExpParent extends TRegExpParent {
+  string toString() { result = "RegExpParent" }
+
+  /** Gets the `i`th child term. */
+  abstract RegExpTerm getChild(int i);
+
+  /** Gets a child term . */
+  RegExpTerm getAChild() { result = getChild(_) }
+
+  /** Gets the number of child terms. */
+  int getNumChild() { result = count(getAChild()) }
+
+  /** Gets the associated regex. */
+  abstract Regex getRegex();
+}
+
+/** A string literal used as a regular expression */
+class RegExpLiteral extends TRegExpLiteral, RegExpParent {
+  Regex re;
+
+  RegExpLiteral() { this = TRegExpLiteral(re) }
+
+  override RegExpTerm getChild(int i) { i = 0 and result.getRegex() = re and result.isRootTerm() }
+
+  predicate isDotAll() { re.getAMode() = "DOTALL" }
+
+  predicate isIgnoreCase() { re.getAMode() = "IGNORECASE" }
+
+  string getFlags() { result = concat(string mode | mode = re.getAMode() | mode, " | ") }
+
+  override Regex getRegex() { result = re }
+
+  string getPrimaryQLClass() { result = "RegExpLiteral" }
+}
+
+/**
+ * A regular expression term, that is, a syntactic part of a regular expression.
+ */
+class RegExpTerm extends RegExpParent {
+  Regex re;
+  int start;
+  int end;
+
+  RegExpTerm() {
+    this = TRegExpAlt(re, start, end)
+    or
+    this = TRegExpBackRef(re, start, end)
+    or
+    this = TRegExpCharacterClass(re, start, end)
+    or
+    this = TRegExpCharacterRange(re, start, end)
+    or
+    this = TRegExpNormalChar(re, start, end)
+    or
+    this = TRegExpGroup(re, start, end)
+    or
+    this = TRegExpQuantifier(re, start, end)
+    or
+    this = TRegExpSequence(re, start, end)
+    or
+    this = TRegExpSpecialChar(re, start, end)
+  }
+
+  /**
+   * Gets the outermost term of this regular expression.
+   */
+  RegExpTerm getRootTerm() {
+    this.isRootTerm() and result = this
+    or
+    result = getParent().(RegExpTerm).getRootTerm()
+  }
+
+  /**
+   * Holds if this term is part of a string literal
+   * that is interpreted as a regular expression.
+   */
+  predicate isUsedAsRegExp() { any() }
+
+  /**
+   * Holds if this is the root term of a regular expression.
+   */
+  predicate isRootTerm() { start = 0 and end = re.getText().length() }
+
+  override RegExpTerm getChild(int i) {
+    result = this.(RegExpAlt).getChild(i)
+    or
+    result = this.(RegExpBackRef).getChild(i)
+    or
+    result = this.(RegExpCharacterClass).getChild(i)
+    or
+    result = this.(RegExpCharacterRange).getChild(i)
+    or
+    result = this.(RegExpNormalChar).getChild(i)
+    or
+    result = this.(RegExpGroup).getChild(i)
+    or
+    result = this.(RegExpQuantifier).getChild(i)
+    or
+    result = this.(RegExpSequence).getChild(i)
+    or
+    result = this.(RegExpSpecialChar).getChild(i)
+  }
+
+  /**
+   * Gets the parent term of this regular expression term, or the
+   * regular expression literal if this is the root term.
+   */
+  RegExpParent getParent() { result.getAChild() = this }
+
+  override Regex getRegex() { result = re }
+
+  /** Gets the offset at which this term starts. */
+  int getStart() { result = start }
+
+  /** Gets the offset at which this term ends. */
+  int getEnd() { result = end }
+
+  override string toString() { result = re.getText().substring(start, end) }
+
+  /**
+   * Gets the location of the surrounding regex, as locations inside the regex do not exist.
+   * To get location information corresponding to the term inside the regex,
+   * use `hasLocationInfo`.
+   */
+  Location getLocation() { result = re.getLocation() }
+
+  /** Holds if this term is found at the specified location offsets. */
+  predicate hasLocationInfo(
+    string filepath, int startline, int startcolumn, int endline, int endcolumn
+  ) {
+    exists(int re_start, int re_end |
+      re.getLocation().hasLocationInfo(filepath, startline, re_start, endline, re_end) and
+      startcolumn = re_start + start + 4 and
+      endcolumn = re_start + end + 3
+    )
+  }
+
+  /** Gets the file in which this term is found. */
+  File getFile() { result = this.getLocation().getFile() }
+
+  /** Gets the raw source text of this term. */
+  string getRawValue() { result = this.toString() }
+
+  /** Gets the string literal in which this term is found. */
+  RegExpLiteral getLiteral() { result = TRegExpLiteral(re) }
+
+  /** Gets the regular expression term that is matched (textually) before this one, if any. */
+  RegExpTerm getPredecessor() {
+    exists(RegExpTerm parent | parent = getParent() |
+      result = parent.(RegExpSequence).previousElement(this)
+      or
+      not exists(parent.(RegExpSequence).previousElement(this)) and
+      not parent instanceof RegExpSubPattern and
+      result = parent.getPredecessor()
+    )
+  }
+
+  /** Gets the regular expression term that is matched (textually) after this one, if any. */
+  RegExpTerm getSuccessor() {
+    exists(RegExpTerm parent | parent = getParent() |
+      result = parent.(RegExpSequence).nextElement(this)
+      or
+      not exists(parent.(RegExpSequence).nextElement(this)) and
+      not parent instanceof RegExpSubPattern and
+      result = parent.getSuccessor()
+    )
+  }
+
+  /** Gets the primary QL class for this term. */
+  string getPrimaryQLClass() { result = "RegExpTerm" }
+}
+
+/**
+ * A quantified regular expression term.
+ *
+ * Example:
+ *
+ * ```
+ * ((ECMA|Java)[sS]cript)*
+ * ```
+ */
+class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
+  int part_end;
+  boolean maybe_empty;
+  boolean may_repeat_forever;
+
+  RegExpQuantifier() {
+    this = TRegExpQuantifier(re, start, end) and
+    re.qualifiedPart(start, part_end, end, maybe_empty, may_repeat_forever)
+  }
+
+  override RegExpTerm getChild(int i) {
+    i = 0 and
+    result.getRegex() = re and
+    result.getStart() = start and
+    result.getEnd() = part_end
+  }
+
+  predicate mayRepeatForever() { may_repeat_forever = true }
+
+  string getQualifier() { result = re.getText().substring(part_end, end) }
+
+  override string getPrimaryQLClass() { result = "RegExpQuantifier" }
+}
+
+/**
+ * A regular expression term that permits unlimited repetitions.
+ */
+class InfiniteRepetitionQuantifier extends RegExpQuantifier {
+  InfiniteRepetitionQuantifier() { this.mayRepeatForever() }
+}
+
+/**
+ * A star-quantified term.
+ *
+ * Example:
+ *
+ * ```
+ * \w*
+ * ```
+ */
+class RegExpStar extends InfiniteRepetitionQuantifier {
+  RegExpStar() { this.getQualifier().charAt(0) = "*" }
+
+  override string getPrimaryQLClass() { result = "RegExpStar" }
+}
+
+/**
+ * A plus-quantified term.
+ *
+ * Example:
+ *
+ * ```
+ * \w+
+ * ```
+ */
+class RegExpPlus extends InfiniteRepetitionQuantifier {
+  RegExpPlus() { this.getQualifier().charAt(0) = "+" }
+
+  override string getPrimaryQLClass() { result = "RegExpPlus" }
+}
+
+/**
+ * An optional term.
+ *
+ * Example:
+ *
+ * ```
+ * ;?
+ * ```
+ */
+class RegExpOpt extends RegExpQuantifier {
+  RegExpOpt() { this.getQualifier().charAt(0) = "?" }
+
+  override string getPrimaryQLClass() { result = "RegExpOpt" }
+}
+
+/**
+ * A range-quantified term
+ *
+ * Examples:
+ *
+ * ```
+ * \w{2,4}
+ * \w{2,}
+ * \w{2}
+ * ```
+ */
+class RegExpRange extends RegExpQuantifier {
+  string upper;
+  string lower;
+
+  RegExpRange() { re.multiples(part_end, end, lower, upper) }
+
+  string getUpper() { result = upper }
+
+  string getLower() { result = lower }
+
+  /**
+   * Gets the upper bound of the range, if any.
+   *
+   * If there is no upper bound, any number of repetitions is allowed.
+   * For a term of the form `r{lo}`, both the lower and the upper bound
+   * are `lo`.
+   */
+  int getUpperBound() { result = this.getUpper().toInt() }
+
+  /** Gets the lower bound of the range. */
+  int getLowerBound() { result = this.getLower().toInt() }
+
+  override string getPrimaryQLClass() { result = "RegExpRange" }
+}
+
+/**
+ * A sequence term.
+ *
+ * Example:
+ *
+ * ```
+ * (ECMA|Java)Script
+ * ```
+ *
+ * This is a sequence with the elements `(ECMA|Java)` and `Script`.
+ */
+class RegExpSequence extends RegExpTerm, TRegExpSequence {
+  RegExpSequence() { this = TRegExpSequence(re, start, end) }
+
+  override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
+
+  /** Gets the element preceding `element` in this sequence. */
+  RegExpTerm previousElement(RegExpTerm element) { element = nextElement(result) }
+
+  /** Gets the element following `element` in this sequence. */
+  RegExpTerm nextElement(RegExpTerm element) {
+    exists(int i |
+      element = this.getChild(i) and
+      result = this.getChild(i + 1)
+    )
+  }
+
+  override string getPrimaryQLClass() { result = "RegExpSequence" }
+}
+
+pragma[nomagic]
+private int seqChildEnd(Regex re, int start, int end, int i) {
+  result = seqChild(re, start, end, i).getEnd()
+}
+
+// moved out so we can use it in the charpred
+private RegExpTerm seqChild(Regex re, int start, int end, int i) {
+  re.sequence(start, end) and
+  (
+    i = 0 and
+    result.getRegex() = re and
+    result.getStart() = start and
+    exists(int itemEnd |
+      re.item(start, itemEnd) and
+      result.getEnd() = itemEnd
+    )
+    or
+    i > 0 and
+    result.getRegex() = re and
+    exists(int itemStart | itemStart = seqChildEnd(re, start, end, i - 1) |
+      result.getStart() = itemStart and
+      re.item(itemStart, result.getEnd())
+    )
+  )
+}
+
+/**
+ * An alternative term, that is, a term of the form `a|b`.
+ *
+ * Example:
+ *
+ * ```
+ * ECMA|Java
+ * ```
+ */
+class RegExpAlt extends RegExpTerm, TRegExpAlt {
+  RegExpAlt() { this = TRegExpAlt(re, start, end) }
+
+  override RegExpTerm getChild(int i) {
+    i = 0 and
+    result.getRegex() = re and
+    result.getStart() = start and
+    exists(int part_end |
+      re.alternationOption(start, end, start, part_end) and
+      result.getEnd() = part_end
+    )
+    or
+    i > 0 and
+    result.getRegex() = re and
+    exists(int part_start |
+      part_start = this.getChild(i - 1).getEnd() + 1 // allow for the |
+    |
+      result.getStart() = part_start and
+      re.alternationOption(start, end, part_start, result.getEnd())
+    )
+  }
+
+  override string getPrimaryQLClass() { result = "RegExpAlt" }
+}
+
+/**
+ * An escaped regular expression term, that is, a regular expression
+ * term starting with a backslash, which is not a backreference.
+ *
+ * Example:
+ *
+ * ```
+ * \.
+ * \w
+ * ```
+ */
+class RegExpEscape extends RegExpNormalChar {
+  RegExpEscape() { re.escapedCharacter(start, end) }
+
+  /**
+   * Gets the name of the escaped; for example, `w` for `\w`.
+   * TODO: Handle named escapes.
+   */
+  override string getValue() {
+    this.isIdentityEscape() and result = this.getUnescaped()
+    or
+    this.getUnescaped() = "n" and result = "\n"
+    or
+    this.getUnescaped() = "r" and result = "\r"
+    or
+    this.getUnescaped() = "t" and result = "\t"
+    or
+    // TODO: Find a way to include a formfeed character
+    // this.getUnescaped() = "f" and result = ""
+    // or
+    isUnicode() and
+    result = getUnicode()
+  }
+
+  predicate isIdentityEscape() { not this.getUnescaped() in ["n", "r", "t", "f"] }
+
+  override string getPrimaryQLClass() { result = "RegExpEscape" }
+
+  string getUnescaped() { result = this.getText().suffix(1) }
+
+  /**
+   * Gets the text for this escape. That is e.g. "\w".
+   */
+  private string getText() { result = re.getText().substring(start, end) }
+
+  /**
+   * Holds if this is a unicode escape.
+   */
+  private predicate isUnicode() { getText().prefix(2) = ["\\u", "\\U"] }
+
+  /**
+   * Gets the unicode char for this escape.
+   * E.g. for `\u0061` this returns "a".
+   */
+  private string getUnicode() {
+    exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
+      result = codepoint.toUnicode()
+    )
+  }
+
+  /**
+   * Gets int value for the `index`th char in the hex number of the unicode escape.
+   * E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
+   */
+  private int getHexValueFromUnicode(int index) {
+    this.isUnicode() and
+    exists(string hex, string char | hex = this.getText().suffix(2) |
+      char = hex.charAt(index) and
+      result = 16.pow(hex.length() - index - 1) * toHex(char)
+    )
+  }
+}
+
+/**
+ * Gets the hex number for the `hex` char.
+ */
+private int toHex(string hex) {
+  hex = [0 .. 9].toString() and
+  result = hex.toInt()
+  or
+  result = 10 and hex = ["a", "A"]
+  or
+  result = 11 and hex = ["b", "B"]
+  or
+  result = 12 and hex = ["c", "C"]
+  or
+  result = 13 and hex = ["d", "D"]
+  or
+  result = 14 and hex = ["e", "E"]
+  or
+  result = 15 and hex = ["f", "F"]
+}
+
+/**
+ * A character class escape in a regular expression.
+ * That is, an escaped charachter that denotes multiple characters.
+ *
+ * Examples:
+ *
+ * ```
+ * \w
+ * \S
+ * ```
+ */
+class RegExpCharacterClassEscape extends RegExpEscape {
+  // string value;
+  RegExpCharacterClassEscape() {
+    // value = re.getText().substring(start + 1, end) and
+    // value in ["d", "D", "s", "S", "w", "W"]
+    this.getValue() in ["d", "D", "s", "S", "w", "W"]
+  }
+
+  /** Gets the name of the character class; for example, `w` for `\w`. */
+  // override string getValue() { result = value }
+  override RegExpTerm getChild(int i) { none() }
+
+  override string getPrimaryQLClass() { result = "RegExpCharacterClassEscape" }
+}
+
+/**
+ * A character class in a regular expression.
+ *
+ * Examples:
+ *
+ * ```
+ * [a-z_]
+ * [^<>&]
+ * ```
+ */
+class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
+  RegExpCharacterClass() { this = TRegExpCharacterClass(re, start, end) }
+
+  predicate isInverted() { re.getChar(start + 1) = "^" }
+
+  string getCharThing(int i) { result = re.getChar(i + start) }
+
+  predicate isUniversalClass() {
+    // [^]
+    isInverted() and not exists(getAChild())
+    or
+    // [\w\W] and similar
+    not isInverted() and
+    exists(string cce1, string cce2 |
+      cce1 = getAChild().(RegExpCharacterClassEscape).getValue() and
+      cce2 = getAChild().(RegExpCharacterClassEscape).getValue()
+    |
+      cce1 != cce2 and cce1.toLowerCase() = cce2.toLowerCase()
+    )
+  }
+
+  override RegExpTerm getChild(int i) {
+    i = 0 and
+    result.getRegex() = re and
+    exists(int itemStart, int itemEnd |
+      result.getStart() = itemStart and
+      re.char_set_start(start, itemStart) and
+      re.char_set_child(start, itemStart, itemEnd) and
+      result.getEnd() = itemEnd
+    )
+    or
+    i > 0 and
+    result.getRegex() = re and
+    exists(int itemStart | itemStart = this.getChild(i - 1).getEnd() |
+      result.getStart() = itemStart and
+      re.char_set_child(start, itemStart, result.getEnd())
+    )
+  }
+
+  override string getPrimaryQLClass() { result = "RegExpCharacterClass" }
+}
+
+/**
+ * A character range in a character class in a regular expression.
+ *
+ * Example:
+ *
+ * ```
+ * a-z
+ * ```
+ */
+class RegExpCharacterRange extends RegExpTerm, TRegExpCharacterRange {
+  int lower_end;
+  int upper_start;
+
+  RegExpCharacterRange() {
+    this = TRegExpCharacterRange(re, start, end) and
+    re.charRange(_, start, lower_end, upper_start, end)
+  }
+
+  predicate isRange(string lo, string hi) {
+    lo = re.getText().substring(start, lower_end) and
+    hi = re.getText().substring(upper_start, end)
+  }
+
+  override RegExpTerm getChild(int i) {
+    i = 0 and
+    result.getRegex() = re and
+    result.getStart() = start and
+    result.getEnd() = lower_end
+    or
+    i = 1 and
+    result.getRegex() = re and
+    result.getStart() = upper_start and
+    result.getEnd() = end
+  }
+
+  override string getPrimaryQLClass() { result = "RegExpCharacterRange" }
+}
+
+/**
+ * A normal character in a regular expression, that is, a character
+ * without special meaning. This includes escaped characters.
+ *
+ * Examples:
+ * ```
+ * t
+ * \t
+ * ```
+ */
+class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
+  RegExpNormalChar() { this = TRegExpNormalChar(re, start, end) }
+
+  predicate isCharacter() { any() }
+
+  string getValue() { result = re.getText().substring(start, end) }
+
+  override RegExpTerm getChild(int i) { none() }
+
+  override string getPrimaryQLClass() { result = "RegExpNormalChar" }
+}
+
+/**
+ * A constant regular expression term, that is, a regular expression
+ * term matching a single string. Currently, this will always be a single character.
+ *
+ * Example:
+ *
+ * ```
+ * a
+ * ```
+ */
+class RegExpConstant extends RegExpTerm {
+  string value;
+
+  RegExpConstant() {
+    this = TRegExpNormalChar(re, start, end) and
+    not this instanceof RegExpCharacterClassEscape and
+    // exclude chars in qualifiers
+    // TODO: push this into regex library
+    not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
+      qstart <= start and end <= qend
+    ) and
+    value = this.(RegExpNormalChar).getValue()
+    // This will never hold
+    // or
+    // this = TRegExpSpecialChar(re, start, end) and
+    // re.inCharSet(start) and
+    // value = this.(RegExpSpecialChar).getChar()
+  }
+
+  predicate isCharacter() { any() }
+
+  string getValue() { result = value }
+
+  override RegExpTerm getChild(int i) { none() }
+
+  override string getPrimaryQLClass() { result = "RegExpConstant" }
+}
+
+/**
+ * A grouped regular expression.
+ *
+ * Examples:
+ *
+ * ```
+ * (ECMA|Java)
+ * (?:ECMA|Java)
+ * (?<quote>['"])
+ * ```
+ */
+class RegExpGroup extends RegExpTerm, TRegExpGroup {
+  RegExpGroup() { this = TRegExpGroup(re, start, end) }
+
+  /**
+   * Gets the index of this capture group within the enclosing regular
+   * expression literal.
+   *
+   * For example, in the regular expression `/((a?).)(?:b)/`, the
+   * group `((a?).)` has index 1, the group `(a?)` nested inside it
+   * has index 2, and the group `(?:b)` has no index, since it is
+   * not a capture group.
+   */
+  int getNumber() { result = re.getGroupNumber(start, end) }
+
+  /** Holds if this is a named capture group. */
+  predicate isNamed() { exists(this.getName()) }
+
+  /** Gets the name of this capture group, if any. */
+  string getName() { result = re.getGroupName(start, end) }
+
+  predicate isCharacter() { any() }
+
+  string getValue() { result = re.getText().substring(start, end) }
+
+  override RegExpTerm getChild(int i) {
+    result.getRegex() = re and
+    i = 0 and
+    re.groupContents(start, end, result.getStart(), result.getEnd())
+  }
+
+  override string getPrimaryQLClass() { result = "RegExpGroup" }
+}
+
+/**
+ * A special character in a regular expression.
+ *
+ * Examples:
+ * ```
+ * ^
+ * $
+ * .
+ * ```
+ */
+class RegExpSpecialChar extends RegExpTerm, TRegExpSpecialChar {
+  string char;
+
+  RegExpSpecialChar() {
+    this = TRegExpSpecialChar(re, start, end) and
+    re.specialCharacter(start, end, char)
+  }
+
+  predicate isCharacter() { any() }
+
+  string getChar() { result = char }
+
+  override RegExpTerm getChild(int i) { none() }
+
+  override string getPrimaryQLClass() { result = "RegExpSpecialChar" }
+}
+
+/**
+ * A dot regular expression.
+ *
+ * Example:
+ *
+ * ```
+ * .
+ * ```
+ */
+class RegExpDot extends RegExpSpecialChar {
+  RegExpDot() { this.getChar() = "." }
+
+  override string getPrimaryQLClass() { result = "RegExpDot" }
+}
+
+/**
+ * A dollar assertion `$` matching the end of a line.
+ *
+ * Example:
+ *
+ * ```
+ * $
+ * ```
+ */
+class RegExpDollar extends RegExpSpecialChar {
+  RegExpDollar() { this.getChar() = "$" }
+
+  override string getPrimaryQLClass() { result = "RegExpDollar" }
+}
+
+/**
+ * A caret assertion `^` matching the beginning of a line.
+ *
+ * Example:
+ *
+ * ```
+ * ^
+ * ```
+ */
+class RegExpCaret extends RegExpSpecialChar {
+  RegExpCaret() { this.getChar() = "^" }
+
+  override string getPrimaryQLClass() { result = "RegExpCaret" }
+}
+
+/**
+ * A zero-width match, that is, either an empty group or an assertion.
+ *
+ * Examples:
+ * ```
+ * ()
+ * (?=\w)
+ * ```
+ */
+class RegExpZeroWidthMatch extends RegExpGroup {
+  RegExpZeroWidthMatch() { re.zeroWidthMatch(start, end) }
+
+  override predicate isCharacter() { any() }
+
+  override RegExpTerm getChild(int i) { none() }
+
+  override string getPrimaryQLClass() { result = "RegExpZeroWidthMatch" }
+}
+
+/**
+ * A zero-width lookahead or lookbehind assertion.
+ *
+ * Examples:
+ *
+ * ```
+ * (?=\w)
+ * (?!\n)
+ * (?<=\.)
+ * (?<!\\)
+ * ```
+ */
+class RegExpSubPattern extends RegExpZeroWidthMatch {
+  RegExpSubPattern() { not re.emptyGroup(start, end) }
+
+  /** Gets the lookahead term. */
+  RegExpTerm getOperand() {
+    exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
+      result.getRegex() = re and
+      result.getStart() = in_start and
+      result.getEnd() = in_end
+    )
+  }
+}
+
+/**
+ * A zero-width lookahead assertion.
+ *
+ * Examples:
+ *
+ * ```
+ * (?=\w)
+ * (?!\n)
+ * ```
+ */
+abstract class RegExpLookahead extends RegExpSubPattern { }
+
+/**
+ * A positive-lookahead assertion.
+ *
+ * Examples:
+ *
+ * ```
+ * (?=\w)
+ * ```
+ */
+class RegExpPositiveLookahead extends RegExpLookahead {
+  RegExpPositiveLookahead() { re.positiveLookaheadAssertionGroup(start, end) }
+
+  override string getPrimaryQLClass() { result = "RegExpPositiveLookahead" }
+}
+
+/**
+ * A negative-lookahead assertion.
+ *
+ * Examples:
+ *
+ * ```
+ * (?!\n)
+ * ```
+ */
+class RegExpNegativeLookahead extends RegExpLookahead {
+  RegExpNegativeLookahead() { re.negativeLookaheadAssertionGroup(start, end) }
+
+  override string getPrimaryQLClass() { result = "RegExpNegativeLookahead" }
+}
+
+/**
+ * A zero-width lookbehind assertion.
+ *
+ * Examples:
+ *
+ * ```
+ * (?<=\.)
+ * (?<!\\)
+ * ```
+ */
+abstract class RegExpLookbehind extends RegExpSubPattern { }
+
+/**
+ * A positive-lookbehind assertion.
+ *
+ * Examples:
+ *
+ * ```
+ * (?<=\.)
+ * ```
+ */
+class RegExpPositiveLookbehind extends RegExpLookbehind {
+  RegExpPositiveLookbehind() { re.positiveLookbehindAssertionGroup(start, end) }
+
+  override string getPrimaryQLClass() { result = "RegExpPositiveLookbehind" }
+}
+
+/**
+ * A negative-lookbehind assertion.
+ *
+ * Examples:
+ *
+ * ```
+ * (?<!\\)
+ * ```
+ */
+class RegExpNegativeLookbehind extends RegExpLookbehind {
+  RegExpNegativeLookbehind() { re.negativeLookbehindAssertionGroup(start, end) }
+
+  override string getPrimaryQLClass() { result = "RegExpNegativeLookbehind" }
+}
+
+/**
+ * A back reference, that is, a term of the form `\i` or `\k<name>`
+ * in a regular expression.
+ *
+ * Examples:
+ *
+ * ```
+ * \1
+ * (?P=quote)
+ * ```
+ */
+class RegExpBackRef extends RegExpTerm, TRegExpBackRef {
+  RegExpBackRef() { this = TRegExpBackRef(re, start, end) }
+
+  /**
+   * Gets the number of the capture group this back reference refers to, if any.
+   */
+  int getNumber() { result = re.getBackrefNumber(start, end) }
+
+  /**
+   * Gets the name of the capture group this back reference refers to, if any.
+   */
+  string getName() { result = re.getBackrefName(start, end) }
+
+  /** Gets the capture group this back reference refers to. */
+  RegExpGroup getGroup() {
+    result.getLiteral() = this.getLiteral() and
+    (
+      result.getNumber() = this.getNumber() or
+      result.getName() = this.getName()
+    )
+  }
+
+  override RegExpTerm getChild(int i) { none() }
+
+  override string getPrimaryQLClass() { result = "RegExpBackRef" }
+}
+
+/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
+RegExpTerm getParsedRegExp(StrConst re) { result.getRegex() = re and result.isRootTerm() }
--- a/python/ql/lib/semmle/python/SSA.qll
+++ b/python/ql/lib/semmle/python/SSA.qll
--- a/python/ql/lib/semmle/python/Scope.qll
+++ b/python/ql/lib/semmle/python/Scope.qll
--- a/python/ql/lib/semmle/python/SelfAttribute.qll
+++ b/python/ql/lib/semmle/python/SelfAttribute.qll
--- a/python/ql/lib/semmle/python/SpecialMethods.qll
+++ b/python/ql/lib/semmle/python/SpecialMethods.qll
--- a/python/ql/lib/semmle/python/Stmts.qll
+++ b/python/ql/lib/semmle/python/Stmts.qll
@@ -153,6 +153,12 @@ class ExceptStmt extends ExceptStmt_ {
  override Stmt getASubStatement() { result = this.getAStmt() }

  override Stmt getLastStatement() { result = this.getBody().getLastItem().getLastStatement() }
+
+  override Expr getType() {
+    result = super.getType() and not result instanceof Tuple
+    or
+    result = super.getType().(Tuple).getAnElt()
+  }
 }

 /** An assert statement, such as `assert a == b, "A is not equal to b"` */
--- a/python/ql/lib/semmle/python/TestUtils.qll
+++ b/python/ql/lib/semmle/python/TestUtils.qll
--- a/python/ql/lib/semmle/python/Unit.qll
+++ b/python/ql/lib/semmle/python/Unit.qll
--- a/python/ql/lib/semmle/python/Variables.qll
+++ b/python/ql/lib/semmle/python/Variables.qll
--- a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll
+++ b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll
--- a/python/ql/lib/semmle/python/dataflow/Configuration.qll
+++ b/python/ql/lib/semmle/python/dataflow/Configuration.qll
--- a/python/ql/lib/semmle/python/dataflow/DataFlow.qll
+++ b/python/ql/lib/semmle/python/dataflow/DataFlow.qll
--- a/python/ql/lib/semmle/python/dataflow/Files.qll
+++ b/python/ql/lib/semmle/python/dataflow/Files.qll
--- a/python/ql/lib/semmle/python/dataflow/Implementation.qll
+++ b/python/ql/lib/semmle/python/dataflow/Implementation.qll
--- a/python/ql/lib/semmle/python/dataflow/Legacy.qll
+++ b/python/ql/lib/semmle/python/dataflow/Legacy.qll
--- a/python/ql/lib/semmle/python/dataflow/StateTracking.qll
+++ b/python/ql/lib/semmle/python/dataflow/StateTracking.qll
--- a/python/ql/lib/semmle/python/dataflow/TaintTracking.qll
+++ b/python/ql/lib/semmle/python/dataflow/TaintTracking.qll
--- a/python/ql/lib/semmle/python/dataflow/new/BarrierGuards.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/BarrierGuards.qll
--- a/python/ql/lib/semmle/python/dataflow/new/DataFlow.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/DataFlow.qll
--- a/python/ql/lib/semmle/python/dataflow/new/DataFlow2.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/DataFlow2.qll
--- a/python/ql/lib/semmle/python/dataflow/new/DataFlow3.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/DataFlow3.qll
--- a/python/ql/lib/semmle/python/dataflow/new/DataFlow4.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/DataFlow4.qll
--- a/python/ql/lib/semmle/python/dataflow/new/RemoteFlowSources.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/RemoteFlowSources.qll
--- a/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll
@@ -0,0 +1,317 @@
+/**
+ * Provides an extension point for for modeling sensitive data, such as secrets, certificates, or passwords.
+ * Sensitive data can be interesting to use as data-flow sources in security queries.
+ */
+
+private import python
+private import semmle.python.dataflow.new.DataFlow
+// Need to import `semmle.python.Frameworks` since frameworks can extend `SensitiveDataSource::Range`
+private import semmle.python.Frameworks
+private import semmle.python.security.internal.SensitiveDataHeuristics as SensitiveDataHeuristics
+
+// We export these explicitly, so we don't also export the `HeuristicNames` module.
+class SensitiveDataClassification = SensitiveDataHeuristics::SensitiveDataClassification;
+
+module SensitiveDataClassification = SensitiveDataHeuristics::SensitiveDataClassification;
+
+/**
+ * A data flow source of sensitive data, such as secrets, certificates, or passwords.
+ *
+ * Extend this class to refine existing API models. If you want to model new APIs,
+ * extend `SensitiveDataSource::Range` instead.
+ */
+class SensitiveDataSource extends DataFlow::Node {
+  SensitiveDataSource::Range range;
+
+  SensitiveDataSource() { this = range }
+
+  /**
+   * Gets the classification of the sensitive data.
+   */
+  SensitiveDataClassification getClassification() { result = range.getClassification() }
+}
+
+/** Provides a class for modeling new sources of sensitive data, such as secrets, certificates, or passwords. */
+module SensitiveDataSource {
+  /**
+   * A data flow source of sensitive data, such as secrets, certificates, or passwords.
+   *
+   * Extend this class to model new APIs. If you want to refine existing API models,
+   * extend `SensitiveDataSource` instead.
+   */
+  abstract class Range extends DataFlow::Node {
+    /**
+     * Gets the classification of the sensitive data.
+     */
+    abstract SensitiveDataClassification getClassification();
+  }
+}
+
+/** Actual sensitive data modeling */
+private module SensitiveDataModeling {
+  private import SensitiveDataHeuristics::HeuristicNames
+
+  /**
+   * Gets a reference to a function that is considered to be a sensitive source of
+   * `classification`.
+   */
+  private DataFlow::TypeTrackingNode sensitiveFunction(
+    DataFlow::TypeTracker t, SensitiveDataClassification classification
+  ) {
+    t.start() and
+    exists(Function f |
+      f.getName() = sensitiveString(classification) and
+      result.asExpr() = f.getDefinition()
+    )
+    or
+    exists(DataFlow::TypeTracker t2 | result = sensitiveFunction(t2, classification).track(t2, t))
+  }
+
+  /**
+   * Gets a reference to a function that is considered to be a sensitive source of
+   * `classification`.
+   */
+  DataFlow::Node sensitiveFunction(SensitiveDataClassification classification) {
+    sensitiveFunction(DataFlow::TypeTracker::end(), classification).flowsTo(result)
+  }
+
+  /**
+   * Gets a reference (in local scope) to a string constant that, if used as the key in
+   * a lookup, indicates the presence of sensitive data with `classification`.
+   */
+  DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
+    // Note: If this is implemented with type-tracking, we will get cross-talk as
+    // illustrated in python/ql/test/experimental/dataflow/sensitive-data/test.py
+    exists(DataFlow::LocalSourceNode source |
+      source.asExpr().(StrConst).getText() = sensitiveString(classification) and
+      source.flowsTo(result)
+    )
+  }
+
+  /** A function call that is considered a source of sensitive data. */
+  class SensitiveFunctionCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
+    SensitiveDataClassification classification;
+
+    SensitiveFunctionCall() {
+      this.getFunction() = sensitiveFunction(classification)
+      or
+      // to cover functions that we don't have the definition for, and where the
+      // reference to the function has not already been marked as being sensitive
+      this.getFunction().asCfgNode().(NameNode).getId() = sensitiveString(classification)
+    }
+
+    override SensitiveDataClassification getClassification() { result = classification }
+  }
+
+  /**
+   * Tracks any modeled source of sensitive data (with any classification),
+   * to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
+   *
+   * Also see `extraStepForCalls`.
+   */
+  private DataFlow::TypeTrackingNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
+    t.start() and
+    result instanceof SensitiveDataSource
+    or
+    exists(DataFlow::TypeTracker t2 | result = possibleSensitiveCallable(t2).track(t2, t))
+  }
+
+  /**
+   * Tracks any modeled source of sensitive data (with any classification),
+   * to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
+   *
+   * Also see `extraStepForCalls`.
+   */
+  private DataFlow::Node possibleSensitiveCallable() {
+    possibleSensitiveCallable(DataFlow::TypeTracker::end()).flowsTo(result)
+  }
+
+  /**
+   * Holds if the step from `nodeFrom` to `nodeTo` should be considered a
+   * taint-flow step for sensitive-data, to ensure calls are handled correctly.
+   *
+   * To handle calls properly, while preserving a good source for path explanations,
+   * you need to include this predicate as an additional taint step in your taint-tracking
+   * configurations.
+   *
+   * The core problem can be illustrated by the example below. If we consider the
+   * `print` a sink, what path and what source do we want to show? My initial approach
+   * would be to use type-tracking to propagate from the `not_found.get_passwd` attribute
+   * lookup, to the use of `non_sensitive_name`, and then create a new `SensitiveDataSource::Range`
+   * like `SensitiveFunctionCall`. Although that seems likely to work, it will also end up
+   * with a non-optimal path, which starts at _bad source_, and therefore doesn't show
+   * how we figured out that `non_sensitive_name`
+   * could be a function that returns a password (and in cases where there is many calls to
+   * `my_func` it will be annoying for someone to figure this out manually).
+   *
+   * By including this additional taint-step in the taint-tracking configuration, it's possible
+   * to get a path explanation going from _good source_ to the sink.
+   *
+   * ```python
+   * def my_func(non_sensitive_name):
+   *     x = non_sensitive_name() # <-- bad source
+   *     print(x) # <-- sink
+   *
+   * import not_found
+   * f = not_found.get_passwd # <-- good source
+   * my_func(f)
+   * ```
+   */
+  predicate extraStepForCalls(DataFlow::Node nodeFrom, DataFlow::CallCfgNode nodeTo) {
+    // However, we do still use the type-tracking approach to limit the size of this
+    // predicate.
+    nodeTo.getFunction() = nodeFrom and
+    nodeFrom = possibleSensitiveCallable()
+  }
+
+  pragma[nomagic]
+  private string sensitiveStrConstCandidate() {
+    result = any(StrConst s | not s.isDocString()).getText() and
+    not result.regexpMatch(notSensitiveRegexp())
+  }
+
+  pragma[nomagic]
+  private string sensitiveAttributeNameCandidate() {
+    result = any(DataFlow::AttrRead a).getAttributeName() and
+    not result.regexpMatch(notSensitiveRegexp())
+  }
+
+  pragma[nomagic]
+  private string sensitiveParameterNameCandidate() {
+    result = any(Parameter p).getName() and
+    not result.regexpMatch(notSensitiveRegexp())
+  }
+
+  pragma[nomagic]
+  private string sensitiveFunctionNameCandidate() {
+    result = any(Function f).getName() and
+    not result.regexpMatch(notSensitiveRegexp())
+  }
+
+  pragma[nomagic]
+  private string sensitiveNameCandidate() {
+    result = any(Name n).getId() and
+    not result.regexpMatch(notSensitiveRegexp())
+  }
+
+  /**
+   * This helper predicate serves to deduplicate the results of the preceding predicates. This
+   * means that if, say, an attribute and a function parameter have the same name, then that name will
+   * only be matched once, which greatly cuts down on the number of regexp matches that have to be
+   * performed.
+   *
+   * Under normal circumstances, deduplication is only performed when a predicate is materialized, and
+   * so to see the effect of this we must create a separate predicate that calculates the union of the
+   * preceding predicates.
+   */
+  pragma[nomagic]
+  private string sensitiveStringCandidate() {
+    result in [
+        sensitiveNameCandidate(), sensitiveAttributeNameCandidate(),
+        sensitiveParameterNameCandidate(), sensitiveFunctionNameCandidate(),
+        sensitiveStrConstCandidate()
+      ]
+  }
+
+  /**
+   * Returns strings (primarily the names of various program entities) that may contain sensitive data
+   * with the classification `classification`.
+   *
+   * This helper predicate ends up being very similar to `nameIndicatesSensitiveData`,
+   * but is performance optimized to limit the number of regexp matches that have to be performed.
+   */
+  pragma[nomagic]
+  private string sensitiveString(SensitiveDataClassification classification) {
+    result = sensitiveStringCandidate() and
+    result.regexpMatch(maybeSensitiveRegexp(classification))
+  }
+
+  /**
+   * Any kind of variable assignment (also including with/for) where the name indicates
+   * it contains sensitive data.
+   *
+   * Note: We _could_ make any access to a variable with a sensitive name a source of
+   * sensitive data, but to make path explanations in data-flow/taint-tracking good,
+   * we don't want that, since it works against allowing users to understand the flow
+   * in the program (which is the whole point).
+   *
+   * Note: To make data-flow/taint-tracking work, the expression that is _assigned_ to
+   * the variable is marked as the source (as compared to marking the variable as the
+   * source).
+   */
+  class SensitiveVariableAssignment extends SensitiveDataSource::Range {
+    SensitiveDataClassification classification;
+
+    SensitiveVariableAssignment() {
+      exists(DefinitionNode def |
+        def.(NameNode).getId() = sensitiveString(classification) and
+        (
+          this.asCfgNode() = def.getValue()
+          or
+          this.asCfgNode() = def.getValue().(ForNode).getSequence()
+        ) and
+        not this.asExpr() instanceof FunctionExpr and
+        not this.asExpr() instanceof ClassExpr
+      )
+      or
+      exists(With with |
+        with.getOptionalVars().(Name).getId() = sensitiveString(classification) and
+        this.asExpr() = with.getContextExpr()
+      )
+    }
+
+    override SensitiveDataClassification getClassification() { result = classification }
+  }
+
+  /** An attribute access that is considered a source of sensitive data. */
+  class SensitiveAttributeAccess extends SensitiveDataSource::Range {
+    SensitiveDataClassification classification;
+
+    SensitiveAttributeAccess() {
+      // Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
+      // I considered excluding any `from ... import something_sensitive`, but then realized that
+      // we should flag up `form ... import password as ...` as a password
+      this.(DataFlow::AttrRead).getAttributeName() = sensitiveString(classification)
+      or
+      // Things like `getattr(foo, <reference-to-string>)`
+      this.(DataFlow::AttrRead).getAttributeNameExpr() = sensitiveLookupStringConst(classification)
+    }
+
+    override SensitiveDataClassification getClassification() { result = classification }
+  }
+
+  /** A subscript, where the key indicates the result will be sensitive data. */
+  class SensitiveSubscript extends SensitiveDataSource::Range {
+    SensitiveDataClassification classification;
+
+    SensitiveSubscript() {
+      this.asCfgNode().(SubscriptNode).getIndex() =
+        sensitiveLookupStringConst(classification).asCfgNode()
+    }
+
+    override SensitiveDataClassification getClassification() { result = classification }
+  }
+
+  /** A call to `get` on an object, where the key indicates the result will be sensitive data. */
+  class SensitiveGetCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
+    SensitiveDataClassification classification;
+
+    SensitiveGetCall() {
+      this.getFunction().(DataFlow::AttrRef).getAttributeName() = "get" and
+      this.getArg(0) = sensitiveLookupStringConst(classification)
+    }
+
+    override SensitiveDataClassification getClassification() { result = classification }
+  }
+
+  /** A parameter where the name indicates it will receive sensitive data. */
+  class SensitiveParameter extends SensitiveDataSource::Range, DataFlow::ParameterNode {
+    SensitiveDataClassification classification;
+
+    SensitiveParameter() { this.getParameter().getName() = sensitiveString(classification) }
+
+    override SensitiveDataClassification getClassification() { result = classification }
+  }
+}
+
+predicate sensitiveDataExtraStepForCalls = SensitiveDataModeling::extraStepForCalls/2;
--- a/python/ql/lib/semmle/python/dataflow/new/TaintTracking.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/TaintTracking.qll
--- a/python/ql/lib/semmle/python/dataflow/new/TaintTracking2.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/TaintTracking2.qll
--- a/python/ql/lib/semmle/python/dataflow/new/TaintTracking3.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/TaintTracking3.qll
--- a/python/ql/lib/semmle/python/dataflow/new/TaintTracking4.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/TaintTracking4.qll
--- a/python/ql/lib/semmle/python/dataflow/new/TypeTracker.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/TypeTracker.qll
@@ -0,0 +1,65 @@
+/**
+ * This file acts as a wrapper for `internal.TypeTracker`, exposing some of the functionality with
+ * names that are more appropriate for Python.
+ */
+
+private import python
+private import internal.TypeTracker as Internal
+
+/** Any string that may appear as the name of an attribute or access path. */
+class AttributeName = Internal::ContentName;
+
+/** Either an attribute name, or the empty string (representing no attribute). */
+class OptionalAttributeName = Internal::OptionalContentName;
+
+/**
+ * Summary of the steps needed to track a value to a given dataflow node.
+ *
+ * This can be used to track objects that implement a certain API in order to
+ * recognize calls to that API. Note that type-tracking does not by itself provide a
+ * source/sink relation, that is, it may determine that a node has a given type,
+ * but it won't determine where that type came from.
+ *
+ * It is recommended that all uses of this type are written in the following form,
+ * for tracking some type `myType`:
+ * ```ql
+ * DataFlow::TypeTrackingNode myType(DataFlow::TypeTracker t) {
+ *   t.start() and
+ *   result = < source of myType >
+ *   or
+ *   exists (DataFlow::TypeTracker t2 |
+ *     result = myType(t2).track(t2, t)
+ *   )
+ * }
+ *
+ * DataFlow::LocalSourceNode myType() { myType(DataFlow::TypeTracker::end()) }
+ * ```
+ *
+ * Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
+ * `t = t2.step(myType(t2), result)`. If you additionally want to track individual
+ * intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
+ */
+class TypeTracker extends Internal::TypeTracker {
+  /**
+   * Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
+   * The type tracking only ends after the attribute has been loaded.
+   */
+  predicate startInAttr(string attrName) { this.startInContent(attrName) }
+
+  /**
+   * INTERNAL. DO NOT USE.
+   *
+   * Gets the attribute associated with this type tracker.
+   */
+  string getAttr() { result = this.getContent() }
+}
+
+module TypeTracker = Internal::TypeTracker;
+
+class StepSummary = Internal::StepSummary;
+
+module StepSummary = Internal::StepSummary;
+
+class TypeBackTracker = Internal::TypeBackTracker;
+
+module TypeBackTracker = Internal::TypeBackTracker;
--- a/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll
@@ -17,6 +17,13 @@ abstract class AttrRef extends Node {
   */
  abstract Node getObject();

+  /**
+   * Holds if this data flow node accesses attribute named `attrName` on object `object`.
+   */
+  predicate accesses(Node object, string attrName) {
+    this.getObject() = object and this.getAttributeName() = attrName
+  }
+
  /**
   * Gets the expression node that defines the attribute being accessed, if any. This is
   * usually an identifier or literal.
@@ -191,7 +198,7 @@ private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode {
 * - Dynamic attribute reads using `getattr`: `getattr(object, attr)`
 * - Qualified imports: `from module import attr as name`
 */
-abstract class AttrRead extends AttrRef, Node { }
+abstract class AttrRead extends AttrRef, Node, LocalSourceNode { }

 /** A simple attribute read, e.g. `object.attr` */
 private class AttributeReadAsAttrRead extends AttrRead, CfgNode {
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl2.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl2.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl3.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl3.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl4.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl4.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplCommon.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplCommon.qll
@@ -35,22 +35,22 @@ predicate accessPathCostLimits(int apLimit, int tupleLimit) {
 * calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly.
 */
 private module LambdaFlow {
-  private predicate viableParamNonLambda(DataFlowCall call, int i, ParameterNode p) {
+  private predicate viableParamNonLambda(DataFlowCall call, int i, ParamNode p) {
    p.isParameterOf(viableCallable(call), i)
  }

-  private predicate viableParamLambda(DataFlowCall call, int i, ParameterNode p) {
+  private predicate viableParamLambda(DataFlowCall call, int i, ParamNode p) {
    p.isParameterOf(viableCallableLambda(call, _), i)
  }

-  private predicate viableParamArgNonLambda(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
+  private predicate viableParamArgNonLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
    exists(int i |
      viableParamNonLambda(call, i, p) and
      arg.argumentOf(call, i)
    )
  }

-  private predicate viableParamArgLambda(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
+  private predicate viableParamArgLambda(DataFlowCall call, ParamNode p, ArgNode arg) {
    exists(int i |
      viableParamLambda(call, i, p) and
      arg.argumentOf(call, i)
@@ -118,8 +118,8 @@ private module LambdaFlow {
    boolean toJump, DataFlowCallOption lastCall
  ) {
    revLambdaFlow0(lambdaCall, kind, node, t, toReturn, toJump, lastCall) and
-    if node instanceof CastNode or node instanceof ArgumentNode or node instanceof ReturnNode
-    then compatibleTypes(t, getNodeType(node))
+    if castNode(node) or node instanceof ArgNode or node instanceof ReturnNode
+    then compatibleTypes(t, getNodeDataFlowType(node))
    else any()
  }

@@ -129,7 +129,7 @@ private module LambdaFlow {
    boolean toJump, DataFlowCallOption lastCall
  ) {
    lambdaCall(lambdaCall, kind, node) and
-    t = getNodeType(node) and
+    t = getNodeDataFlowType(node) and
    toReturn = false and
    toJump = false and
    lastCall = TDataFlowCallNone()
@@ -146,7 +146,7 @@ private module LambdaFlow {
        getNodeEnclosingCallable(node) = getNodeEnclosingCallable(mid)
      |
        preservesValue = false and
-        t = getNodeType(node)
+        t = getNodeDataFlowType(node)
        or
        preservesValue = true and
        t = t0
@@ -160,7 +160,7 @@ private module LambdaFlow {
      toJump = true and
      lastCall = TDataFlowCallNone()
    |
-      jumpStep(node, mid) and
+      jumpStepCached(node, mid) and
      t = t0
      or
      exists(boolean preservesValue |
@@ -168,7 +168,7 @@ private module LambdaFlow {
        getNodeEnclosingCallable(node) != getNodeEnclosingCallable(mid)
      |
        preservesValue = false and
-        t = getNodeType(node)
+        t = getNodeDataFlowType(node)
        or
        preservesValue = true and
        t = t0
@@ -176,7 +176,7 @@ private module LambdaFlow {
    )
    or
    // flow into a callable
-    exists(ParameterNode p, DataFlowCallOption lastCall0, DataFlowCall call |
+    exists(ParamNode p, DataFlowCallOption lastCall0, DataFlowCall call |
      revLambdaFlowIn(lambdaCall, kind, p, t, toJump, lastCall0) and
      (
        if lastCall0 = TDataFlowCallNone() and toJump = false
@@ -227,7 +227,7 @@ private module LambdaFlow {

  pragma[nomagic]
  predicate revLambdaFlowIn(
-    DataFlowCall lambdaCall, LambdaCallKind kind, ParameterNode p, DataFlowType t, boolean toJump,
+    DataFlowCall lambdaCall, LambdaCallKind kind, ParamNode p, DataFlowType t, boolean toJump,
    DataFlowCallOption lastCall
  ) {
    revLambdaFlow(lambdaCall, kind, p, t, false, toJump, lastCall)
@@ -242,6 +242,89 @@ private DataFlowCallable viableCallableExt(DataFlowCall call) {

 cached
 private module Cached {
+  /**
+   * If needed, call this predicate from `DataFlowImplSpecific.qll` in order to
+   * force a stage-dependency on the `DataFlowImplCommon.qll` stage and therby
+   * collapsing the two stages.
+   */
+  cached
+  predicate forceCachingInSameStage() { any() }
+
+  cached
+  predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = n.getEnclosingCallable() }
+
+  cached
+  predicate callEnclosingCallable(DataFlowCall call, DataFlowCallable c) {
+    c = call.getEnclosingCallable()
+  }
+
+  cached
+  predicate nodeDataFlowType(Node n, DataFlowType t) { t = getNodeType(n) }
+
+  cached
+  predicate jumpStepCached(Node node1, Node node2) { jumpStep(node1, node2) }
+
+  cached
+  predicate clearsContentCached(Node n, Content c) { clearsContent(n, c) }
+
+  cached
+  predicate isUnreachableInCallCached(Node n, DataFlowCall call) { isUnreachableInCall(n, call) }
+
+  cached
+  predicate outNodeExt(Node n) {
+    n instanceof OutNode
+    or
+    n.(PostUpdateNode).getPreUpdateNode() instanceof ArgNode
+  }
+
+  cached
+  predicate hiddenNode(Node n) { nodeIsHidden(n) }
+
+  cached
+  OutNodeExt getAnOutNodeExt(DataFlowCall call, ReturnKindExt k) {
+    result = getAnOutNode(call, k.(ValueReturnKind).getKind())
+    or
+    exists(ArgNode arg |
+      result.(PostUpdateNode).getPreUpdateNode() = arg and
+      arg.argumentOf(call, k.(ParamUpdateReturnKind).getPosition())
+    )
+  }
+
+  cached
+  predicate returnNodeExt(Node n, ReturnKindExt k) {
+    k = TValueReturn(n.(ReturnNode).getKind())
+    or
+    exists(ParamNode p, int pos |
+      parameterValueFlowsToPreUpdate(p, n) and
+      p.isParameterOf(_, pos) and
+      k = TParamUpdate(pos)
+    )
+  }
+
+  cached
+  predicate castNode(Node n) { n instanceof CastNode }
+
+  cached
+  predicate castingNode(Node n) {
+    castNode(n) or
+    n instanceof ParamNode or
+    n instanceof OutNodeExt or
+    // For reads, `x.f`, we want to check that the tracked type after the read (which
+    // is obtained by popping the head of the access path stack) is compatible with
+    // the type of `x.f`.
+    read(_, _, n)
+  }
+
+  cached
+  predicate parameterNode(Node n, DataFlowCallable c, int i) {
+    n.(ParameterNode).isParameterOf(c, i)
+  }
+
+  cached
+  predicate argumentNode(Node n, DataFlowCall call, int pos) {
+    n.(ArgumentNode).argumentOf(call, pos)
+  }
+
  /**
   * Gets a viable target for the lambda call `call`.
   *
@@ -261,7 +344,7 @@ private module Cached {
   * The instance parameter is considered to have index `-1`.
   */
  pragma[nomagic]
-  private predicate viableParam(DataFlowCall call, int i, ParameterNode p) {
+  private predicate viableParam(DataFlowCall call, int i, ParamNode p) {
    p.isParameterOf(viableCallableExt(call), i)
  }

@@ -270,11 +353,11 @@ private module Cached {
   * dispatch into account.
   */
  cached
-  predicate viableParamArg(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
+  predicate viableParamArg(DataFlowCall call, ParamNode p, ArgNode arg) {
    exists(int i |
      viableParam(call, i, p) and
      arg.argumentOf(call, i) and
-      compatibleTypes(getNodeType(arg), getNodeType(p))
+      compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p))
    )
  }

@@ -312,7 +395,7 @@ private module Cached {
       * `read` indicates whether it is contents of `p` that can flow to `node`.
       */
      pragma[nomagic]
-      private predicate parameterValueFlowCand(ParameterNode p, Node node, boolean read) {
+      private predicate parameterValueFlowCand(ParamNode p, Node node, boolean read) {
        p = node and
        read = false
        or
@@ -325,30 +408,30 @@ private module Cached {
        // read
        exists(Node mid |
          parameterValueFlowCand(p, mid, false) and
-          readStep(mid, _, node) and
+          read(mid, _, node) and
          read = true
        )
        or
        // flow through: no prior read
-        exists(ArgumentNode arg |
+        exists(ArgNode arg |
          parameterValueFlowArgCand(p, arg, false) and
          argumentValueFlowsThroughCand(arg, node, read)
        )
        or
        // flow through: no read inside method
-        exists(ArgumentNode arg |
+        exists(ArgNode arg |
          parameterValueFlowArgCand(p, arg, read) and
          argumentValueFlowsThroughCand(arg, node, false)
        )
      }

      pragma[nomagic]
-      private predicate parameterValueFlowArgCand(ParameterNode p, ArgumentNode arg, boolean read) {
+      private predicate parameterValueFlowArgCand(ParamNode p, ArgNode arg, boolean read) {
        parameterValueFlowCand(p, arg, read)
      }

      pragma[nomagic]
-      predicate parameterValueFlowsToPreUpdateCand(ParameterNode p, PostUpdateNode n) {
+      predicate parameterValueFlowsToPreUpdateCand(ParamNode p, PostUpdateNode n) {
        parameterValueFlowCand(p, n.getPreUpdateNode(), false)
      }

@@ -360,7 +443,7 @@ private module Cached {
       * `read` indicates whether it is contents of `p` that can flow to the return
       * node.
       */
-      predicate parameterValueFlowReturnCand(ParameterNode p, ReturnKind kind, boolean read) {
+      predicate parameterValueFlowReturnCand(ParamNode p, ReturnKind kind, boolean read) {
        exists(ReturnNode ret |
          parameterValueFlowCand(p, ret, read) and
          kind = ret.getKind()
@@ -369,9 +452,9 @@ private module Cached {

      pragma[nomagic]
      private predicate argumentValueFlowsThroughCand0(
-        DataFlowCall call, ArgumentNode arg, ReturnKind kind, boolean read
+        DataFlowCall call, ArgNode arg, ReturnKind kind, boolean read
      ) {
-        exists(ParameterNode param | viableParamArg(call, param, arg) |
+        exists(ParamNode param | viableParamArg(call, param, arg) |
          parameterValueFlowReturnCand(param, kind, read)
        )
      }
@@ -382,14 +465,14 @@ private module Cached {
       *
       * `read` indicates whether it is contents of `arg` that can flow to `out`.
       */
-      predicate argumentValueFlowsThroughCand(ArgumentNode arg, Node out, boolean read) {
+      predicate argumentValueFlowsThroughCand(ArgNode arg, Node out, boolean read) {
        exists(DataFlowCall call, ReturnKind kind |
          argumentValueFlowsThroughCand0(call, arg, kind, read) and
          out = getAnOutNode(call, kind)
        )
      }

-      predicate cand(ParameterNode p, Node n) {
+      predicate cand(ParamNode p, Node n) {
        parameterValueFlowCand(p, n, _) and
        (
          parameterValueFlowReturnCand(p, _, _)
@@ -416,21 +499,21 @@ private module Cached {
       * If a read step was taken, then `read` captures the `Content`, the
       * container type, and the content type.
       */
-      predicate parameterValueFlow(ParameterNode p, Node node, ReadStepTypesOption read) {
+      predicate parameterValueFlow(ParamNode p, Node node, ReadStepTypesOption read) {
        parameterValueFlow0(p, node, read) and
        if node instanceof CastingNode
        then
          // normal flow through
          read = TReadStepTypesNone() and
-          compatibleTypes(getNodeType(p), getNodeType(node))
+          compatibleTypes(getNodeDataFlowType(p), getNodeDataFlowType(node))
          or
          // getter
-          compatibleTypes(read.getContentType(), getNodeType(node))
+          compatibleTypes(read.getContentType(), getNodeDataFlowType(node))
        else any()
      }

      pragma[nomagic]
-      private predicate parameterValueFlow0(ParameterNode p, Node node, ReadStepTypesOption read) {
+      private predicate parameterValueFlow0(ParamNode p, Node node, ReadStepTypesOption read) {
        p = node and
        Cand::cand(p, _) and
        read = TReadStepTypesNone()
@@ -447,7 +530,7 @@ private module Cached {
          readStepWithTypes(mid, read.getContainerType(), read.getContent(), node,
            read.getContentType()) and
          Cand::parameterValueFlowReturnCand(p, _, true) and
-          compatibleTypes(getNodeType(p), read.getContainerType())
+          compatibleTypes(getNodeDataFlowType(p), read.getContainerType())
        )
        or
        parameterValueFlow0_0(TReadStepTypesNone(), p, node, read)
@@ -455,34 +538,32 @@ private module Cached {

      pragma[nomagic]
      private predicate parameterValueFlow0_0(
-        ReadStepTypesOption mustBeNone, ParameterNode p, Node node, ReadStepTypesOption read
+        ReadStepTypesOption mustBeNone, ParamNode p, Node node, ReadStepTypesOption read
      ) {
        // flow through: no prior read
-        exists(ArgumentNode arg |
+        exists(ArgNode arg |
          parameterValueFlowArg(p, arg, mustBeNone) and
          argumentValueFlowsThrough(arg, read, node)
        )
        or
        // flow through: no read inside method
-        exists(ArgumentNode arg |
+        exists(ArgNode arg |
          parameterValueFlowArg(p, arg, read) and
          argumentValueFlowsThrough(arg, mustBeNone, node)
        )
      }

      pragma[nomagic]
-      private predicate parameterValueFlowArg(
-        ParameterNode p, ArgumentNode arg, ReadStepTypesOption read
-      ) {
+      private predicate parameterValueFlowArg(ParamNode p, ArgNode arg, ReadStepTypesOption read) {
        parameterValueFlow(p, arg, read) and
        Cand::argumentValueFlowsThroughCand(arg, _, _)
      }

      pragma[nomagic]
      private predicate argumentValueFlowsThrough0(
-        DataFlowCall call, ArgumentNode arg, ReturnKind kind, ReadStepTypesOption read
+        DataFlowCall call, ArgNode arg, ReturnKind kind, ReadStepTypesOption read
      ) {
-        exists(ParameterNode param | viableParamArg(call, param, arg) |
+        exists(ParamNode param | viableParamArg(call, param, arg) |
          parameterValueFlowReturn(param, kind, read)
        )
      }
@@ -496,18 +577,18 @@ private module Cached {
       * container type, and the content type.
       */
      pragma[nomagic]
-      predicate argumentValueFlowsThrough(ArgumentNode arg, ReadStepTypesOption read, Node out) {
+      predicate argumentValueFlowsThrough(ArgNode arg, ReadStepTypesOption read, Node out) {
        exists(DataFlowCall call, ReturnKind kind |
          argumentValueFlowsThrough0(call, arg, kind, read) and
          out = getAnOutNode(call, kind)
        |
          // normal flow through
          read = TReadStepTypesNone() and
-          compatibleTypes(getNodeType(arg), getNodeType(out))
+          compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(out))
          or
          // getter
-          compatibleTypes(getNodeType(arg), read.getContainerType()) and
-          compatibleTypes(read.getContentType(), getNodeType(out))
+          compatibleTypes(getNodeDataFlowType(arg), read.getContainerType()) and
+          compatibleTypes(read.getContentType(), getNodeDataFlowType(out))
        )
      }

@@ -516,7 +597,7 @@ private module Cached {
       * value-preserving steps and a single read step, not taking call
       * contexts into account, thus representing a getter-step.
       */
-      predicate getterStep(ArgumentNode arg, Content c, Node out) {
+      predicate getterStep(ArgNode arg, Content c, Node out) {
        argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out)
      }

@@ -529,7 +610,7 @@ private module Cached {
       * container type, and the content type.
       */
      private predicate parameterValueFlowReturn(
-        ParameterNode p, ReturnKind kind, ReadStepTypesOption read
+        ParamNode p, ReturnKind kind, ReadStepTypesOption read
      ) {
        exists(ReturnNode ret |
          parameterValueFlow(p, ret, read) and
@@ -553,7 +634,7 @@ private module Cached {
    private predicate mayBenefitFromCallContextExt(DataFlowCall call, DataFlowCallable callable) {
      mayBenefitFromCallContext(call, callable)
      or
-      callable = call.getEnclosingCallable() and
+      callEnclosingCallable(call, callable) and
      exists(viableCallableLambda(call, TDataFlowCallSome(_)))
    }

@@ -611,7 +692,7 @@ private module Cached {
        mayBenefitFromCallContextExt(call, _) and
        c = viableCallableExt(call) and
        ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContextExt(call, ctx)) and
-        tgts = strictcount(DataFlowCall ctx | viableCallableExt(ctx) = call.getEnclosingCallable()) and
+        tgts = strictcount(DataFlowCall ctx | callEnclosingCallable(call, viableCallableExt(ctx))) and
        ctxtgts < tgts
      )
    }
@@ -635,8 +716,7 @@ private module Cached {
   * Holds if `p` can flow to the pre-update node associated with post-update
   * node `n`, in the same callable, using only value-preserving steps.
   */
-  cached
-  predicate parameterValueFlowsToPreUpdate(ParameterNode p, PostUpdateNode n) {
+  private predicate parameterValueFlowsToPreUpdate(ParamNode p, PostUpdateNode n) {
    parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone())
  }

@@ -644,9 +724,8 @@ private module Cached {
    Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
  ) {
    storeStep(node1, c, node2) and
-    readStep(_, c, _) and
-    contentType = getNodeType(node1) and
-    containerType = getNodeType(node2)
+    contentType = getNodeDataFlowType(node1) and
+    containerType = getNodeDataFlowType(node2)
    or
    exists(Node n1, Node n2 |
      n1 = node1.(PostUpdateNode).getPreUpdateNode() and
@@ -654,12 +733,15 @@ private module Cached {
    |
      argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1)
      or
-      readStep(n2, c, n1) and
-      contentType = getNodeType(n1) and
-      containerType = getNodeType(n2)
+      read(n2, c, n1) and
+      contentType = getNodeDataFlowType(n1) and
+      containerType = getNodeDataFlowType(n2)
    )
  }

+  cached
+  predicate read(Node node1, Content c, Node node2) { readStep(node1, c, node2) }
+
  /**
   * Holds if data can flow from `node1` to `node2` via a direct assignment to
   * `f`.
@@ -678,8 +760,9 @@ private module Cached {
   * are aliases. A typical example is a function returning `this`, implementing a fluent
   * interface.
   */
-  cached
-  predicate reverseStepThroughInputOutputAlias(PostUpdateNode fromNode, PostUpdateNode toNode) {
+  private predicate reverseStepThroughInputOutputAlias(
+    PostUpdateNode fromNode, PostUpdateNode toNode
+  ) {
    exists(Node fromPre, Node toPre |
      fromPre = fromNode.getPreUpdateNode() and
      toPre = toNode.getPreUpdateNode()
@@ -688,23 +771,34 @@ private module Cached {
        // Does the language-specific simpleLocalFlowStep already model flow
        // from function input to output?
        fromPre = getAnOutNode(c, _) and
-        toPre.(ArgumentNode).argumentOf(c, _) and
-        simpleLocalFlowStep(toPre.(ArgumentNode), fromPre)
+        toPre.(ArgNode).argumentOf(c, _) and
+        simpleLocalFlowStep(toPre.(ArgNode), fromPre)
      )
      or
      argumentValueFlowsThrough(toPre, TReadStepTypesNone(), fromPre)
    )
  }

+  cached
+  predicate simpleLocalFlowStepExt(Node node1, Node node2) {
+    simpleLocalFlowStep(node1, node2) or
+    reverseStepThroughInputOutputAlias(node1, node2)
+  }
+
  /**
-   * Holds if the call context `call` either improves virtual dispatch in
-   * `callable` or if it allows us to prune unreachable nodes in `callable`.
+   * Holds if the call context `call` improves virtual dispatch in `callable`.
   */
  cached
-  predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
+  predicate recordDataFlowCallSiteDispatch(DataFlowCall call, DataFlowCallable callable) {
    reducedViableImplInCallContext(_, callable, call)
-    or
-    exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCall(n, call))
+  }
+
+  /**
+   * Holds if the call context `call` allows us to prune unreachable nodes in `callable`.
+   */
+  cached
+  predicate recordDataFlowCallSiteUnreachable(DataFlowCall call, DataFlowCallable callable) {
+    exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call))
  }

  cached
@@ -726,12 +820,12 @@ private module Cached {
  cached
  newtype TLocalFlowCallContext =
    TAnyLocalCall() or
-    TSpecificLocalCall(DataFlowCall call) { isUnreachableInCall(_, call) }
+    TSpecificLocalCall(DataFlowCall call) { isUnreachableInCallCached(_, call) }

  cached
  newtype TReturnKindExt =
    TValueReturn(ReturnKind kind) or
-    TParamUpdate(int pos) { exists(ParameterNode p | p.isParameterOf(_, pos)) }
+    TParamUpdate(int pos) { exists(ParamNode p | p.isParameterOf(_, pos)) }

  cached
  newtype TBooleanOption =
@@ -757,27 +851,28 @@ private module Cached {
    TAccessPathFrontSome(AccessPathFront apf)
 }

+/**
+ * Holds if the call context `call` either improves virtual dispatch in
+ * `callable` or if it allows us to prune unreachable nodes in `callable`.
+ */
+predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
+  recordDataFlowCallSiteDispatch(call, callable) or
+  recordDataFlowCallSiteUnreachable(call, callable)
+}
+
 /**
 * A `Node` at which a cast can occur such that the type should be checked.
 */
 class CastingNode extends Node {
-  CastingNode() {
-    this instanceof ParameterNode or
-    this instanceof CastNode or
-    this instanceof OutNodeExt or
-    // For reads, `x.f`, we want to check that the tracked type after the read (which
-    // is obtained by popping the head of the access path stack) is compatible with
-    // the type of `x.f`.
-    readStep(_, _, this)
-  }
+  CastingNode() { castingNode(this) }
 }

 private predicate readStepWithTypes(
  Node n1, DataFlowType container, Content c, Node n2, DataFlowType content
 ) {
-  readStep(n1, c, n2) and
-  container = getNodeType(n1) and
-  content = getNodeType(n2)
+  read(n1, c, n2) and
+  container = getNodeDataFlowType(n1) and
+  content = getNodeDataFlowType(n2)
 }

 private newtype TReadStepTypesOption =
@@ -854,7 +949,7 @@ class CallContextSomeCall extends CallContextCall, TSomeCall {
  override string toString() { result = "CcSomeCall" }

  override predicate relevantFor(DataFlowCallable callable) {
-    exists(ParameterNode p | getNodeEnclosingCallable(p) = callable)
+    exists(ParamNode p | getNodeEnclosingCallable(p) = callable)
  }

  override predicate matchesCall(DataFlowCall call) { any() }
@@ -866,7 +961,7 @@ class CallContextReturn extends CallContextNoCall, TReturn {
  }

  override predicate relevantFor(DataFlowCallable callable) {
-    exists(DataFlowCall call | this = TReturn(_, call) and call.getEnclosingCallable() = callable)
+    exists(DataFlowCall call | this = TReturn(_, call) and callEnclosingCallable(call, callable))
  }
 }

@@ -899,7 +994,7 @@ class LocalCallContextSpecificCall extends LocalCallContext, TSpecificLocalCall
 }

 private predicate relevantLocalCCtx(DataFlowCall call, DataFlowCallable callable) {
-  exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCall(n, call))
+  exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCallCached(n, call))
 }

 /**
@@ -913,26 +1008,37 @@ LocalCallContext getLocalCallContext(CallContext ctx, DataFlowCallable callable)
  else result instanceof LocalCallContextAny
 }

+/**
+ * The value of a parameter at function entry, viewed as a node in a data
+ * flow graph.
+ */
+class ParamNode extends Node {
+  ParamNode() { parameterNode(this, _, _) }
+
+  /**
+   * Holds if this node is the parameter of callable `c` at the specified
+   * (zero-based) position.
+   */
+  predicate isParameterOf(DataFlowCallable c, int i) { parameterNode(this, c, i) }
+}
+
+/** A data-flow node that represents a call argument. */
+class ArgNode extends Node {
+  ArgNode() { argumentNode(this, _, _) }
+
+  /** Holds if this argument occurs at the given position in the given call. */
+  final predicate argumentOf(DataFlowCall call, int pos) { argumentNode(this, call, pos) }
+}
+
 /**
 * A node from which flow can return to the caller. This is either a regular
 * `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
 */
 class ReturnNodeExt extends Node {
-  ReturnNodeExt() {
-    this instanceof ReturnNode or
-    parameterValueFlowsToPreUpdate(_, this)
-  }
+  ReturnNodeExt() { returnNodeExt(this, _) }

  /** Gets the kind of this returned value. */
-  ReturnKindExt getKind() {
-    result = TValueReturn(this.(ReturnNode).getKind())
-    or
-    exists(ParameterNode p, int pos |
-      parameterValueFlowsToPreUpdate(p, this) and
-      p.isParameterOf(_, pos) and
-      result = TParamUpdate(pos)
-    )
-  }
+  ReturnKindExt getKind() { returnNodeExt(this, result) }
 }

 /**
@@ -940,11 +1046,7 @@ class ReturnNodeExt extends Node {
 * or a post-update node associated with a call argument.
 */
 class OutNodeExt extends Node {
-  OutNodeExt() {
-    this instanceof OutNode
-    or
-    this.(PostUpdateNode).getPreUpdateNode() instanceof ArgumentNode
-  }
+  OutNodeExt() { outNodeExt(this) }
 }

 /**
@@ -957,7 +1059,7 @@ abstract class ReturnKindExt extends TReturnKindExt {
  abstract string toString();

  /** Gets a node corresponding to data flow out of `call`. */
-  abstract OutNodeExt getAnOutNode(DataFlowCall call);
+  final OutNodeExt getAnOutNode(DataFlowCall call) { result = getAnOutNodeExt(call, this) }
 }

 class ValueReturnKind extends ReturnKindExt, TValueReturn {
@@ -968,10 +1070,6 @@ class ValueReturnKind extends ReturnKindExt, TValueReturn {
  ReturnKind getKind() { result = kind }

  override string toString() { result = kind.toString() }
-
-  override OutNodeExt getAnOutNode(DataFlowCall call) {
-    result = getAnOutNode(call, this.getKind())
-  }
 }

 class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
@@ -982,13 +1080,6 @@ class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
  int getPosition() { result = pos }

  override string toString() { result = "param update " + pos }
-
-  override OutNodeExt getAnOutNode(DataFlowCall call) {
-    exists(ArgumentNode arg |
-      result.(PostUpdateNode).getPreUpdateNode() = arg and
-      arg.argumentOf(call, this.getPosition())
-    )
-  }
 }

 /** A callable tagged with a relevant return kind. */
@@ -1015,10 +1106,13 @@ class ReturnPosition extends TReturnPosition0 {
 */
 pragma[inline]
 DataFlowCallable getNodeEnclosingCallable(Node n) {
-  exists(Node n0 |
-    pragma[only_bind_into](n0) = n and
-    pragma[only_bind_into](result) = n0.getEnclosingCallable()
-  )
+  nodeEnclosingCallable(pragma[only_bind_out](n), pragma[only_bind_into](result))
+}
+
+/** Gets the type of `n` used for type pruning. */
+pragma[inline]
+DataFlowType getNodeDataFlowType(Node n) {
+  nodeDataFlowType(pragma[only_bind_out](n), pragma[only_bind_into](result))
 }

 pragma[noinline]
@@ -1037,17 +1131,59 @@ ReturnPosition getReturnPosition(ReturnNodeExt ret) {
  result = getReturnPosition0(ret, ret.getKind())
 }

+/**
+ * Checks whether `inner` can return to `call` in the call context `innercc`.
+ * Assumes a context of `inner = viableCallableExt(call)`.
+ */
+bindingset[innercc, inner, call]
+predicate checkCallContextReturn(CallContext innercc, DataFlowCallable inner, DataFlowCall call) {
+  innercc instanceof CallContextAny
+  or
+  exists(DataFlowCallable c0, DataFlowCall call0 |
+    callEnclosingCallable(call0, inner) and
+    innercc = TReturn(c0, call0) and
+    c0 = prunedViableImplInCallContextReverse(call0, call)
+  )
+}
+
+/**
+ * Checks whether `call` can resolve to `calltarget` in the call context `cc`.
+ * Assumes a context of `calltarget = viableCallableExt(call)`.
+ */
+bindingset[cc, call, calltarget]
+predicate checkCallContextCall(CallContext cc, DataFlowCall call, DataFlowCallable calltarget) {
+  exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
+    if reducedViableImplInCallContext(call, _, ctx)
+    then calltarget = prunedViableImplInCallContext(call, ctx)
+    else any()
+  )
+  or
+  cc instanceof CallContextSomeCall
+  or
+  cc instanceof CallContextAny
+  or
+  cc instanceof CallContextReturn
+}
+
+/**
+ * Resolves a return from `callable` in `cc` to `call`. This is equivalent to
+ * `callable = viableCallableExt(call) and checkCallContextReturn(cc, callable, call)`.
+ */
 bindingset[cc, callable]
 predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) {
  cc instanceof CallContextAny and callable = viableCallableExt(call)
  or
  exists(DataFlowCallable c0, DataFlowCall call0 |
-    call0.getEnclosingCallable() = callable and
+    callEnclosingCallable(call0, callable) and
    cc = TReturn(c0, call0) and
    c0 = prunedViableImplInCallContextReverse(call0, call)
  )
 }

+/**
+ * Resolves a call from `call` in `cc` to `result`. This is equivalent to
+ * `result = viableCallableExt(call) and checkCallContextCall(cc, call, result)`.
+ */
 bindingset[call, cc]
 DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
  exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
@@ -1063,8 +1199,6 @@ DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
  result = viableCallableExt(call) and cc instanceof CallContextReturn
 }

-predicate read = readStep/3;
-
 /** An optional Boolean value. */
 class BooleanOption extends TBooleanOption {
  string toString() {
@@ -1116,7 +1250,7 @@ abstract class AccessPathFront extends TAccessPathFront {

  TypedContent getHead() { this = TFrontHead(result) }

-  predicate isClearedAt(Node n) { clearsContent(n, getHead().getContent()) }
+  predicate isClearedAt(Node n) { clearsContentCached(n, getHead().getContent()) }
 }

 class AccessPathFrontNil extends AccessPathFront, TFrontNil {
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll
@@ -168,7 +168,13 @@ module Consistency {
    msg = "ArgumentNode is missing PostUpdateNode."
  }

-  query predicate postWithInFlow(PostUpdateNode n, string msg) {
+  // This predicate helps the compiler forget that in some languages
+  // it is impossible for a `PostUpdateNode` to be the target of
+  // `simpleLocalFlowStep`.
+  private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() }
+
+  query predicate postWithInFlow(Node n, string msg) {
+    isPostUpdateNode(n) and
    simpleLocalFlowStep(_, n) and
    msg = "PostUpdateNode should not be the target of local flow."
  }
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplSpecific.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplSpecific.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -228,7 +228,6 @@ module EssaFlow {
 * data flow. It is a strict subset of the `localFlowStep` predicate, as it
 * excludes SSA flow through instance fields.
 */
-cached
 predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
  // If there is ESSA-flow out of a node `node`, we want flow
  // both out of `node` and any post-update node of `node`.
@@ -870,6 +869,9 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
    module_export(mv.getScope(), r.getAttributeName(), nodeFrom) and
    nodeTo = r
  )
+  or
+  // Default value for parameter flows to that parameter
+  defaultValueFlowStep(nodeFrom, nodeTo)
 }

 /**
@@ -1034,6 +1036,19 @@ predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node
  )
 }

+predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
+  exists(Function f, Parameter p, ParameterDefinition def |
+    // `getArgByName` supports, unlike `getAnArg`, keyword-only parameters
+    p = f.getArgByName(_) and
+    nodeFrom.asExpr() = p.getDefault() and
+    // The following expresses
+    // nodeTo.(ParameterNode).getParameter() = p
+    // without non-monotonic recursion
+    def.getParameter() = p and
+    nodeTo.getNode() = def.getDefiningNode()
+  )
+}
+
 /**
 * Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
 */
@@ -1559,7 +1574,6 @@ predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node no
 * any value stored inside `f` is cleared at the pre-update node associated with `x`
 * in `x.f = newValue`.
 */
-cached
 predicate clearsContent(Node n, Content c) {
  exists(CallNode call, CallableValue callable, string name |
    call_unpacks(call, _, callable, name, _) and
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -180,6 +180,45 @@ class CallCfgNode extends CfgNode, LocalSourceNode {
  Node getArgByName(string name) { result.asCfgNode() = node.getArgByName(name) }
 }

+/**
+ * A data-flow node corresponding to a method call, that is `foo.bar(...)`.
+ *
+ * Also covers the case where the method lookup is done separately from the call itself, as in
+ * `temp = foo.bar; temp(...)`. Note that this is only tracked through local scope.
+ */
+class MethodCallNode extends CallCfgNode {
+  AttrRead method_lookup;
+
+  MethodCallNode() { method_lookup = this.getFunction().getALocalSource() }
+
+  /**
+   * Gets the name of the method being invoked (the `bar` in `foo.bar(...)`) if it can be determined.
+   *
+   * Note that this method may have multiple results if a single call node represents calls to
+   * multiple different objects and methods. If you want to link up objects and method names
+   * accurately, use the `calls` method instead.
+   */
+  string getMethodName() { result = method_lookup.getAttributeName() }
+
+  /**
+   * Gets the data-flow node corresponding to the object receiving this call. That is, the `foo` in
+   * `foo.bar(...)`.
+   *
+   * Note that this method may have multiple results if a single call node represents calls to
+   * multiple different objects and methods. If you want to link up objects and method names
+   * accurately, use the `calls` method instead.
+   */
+  Node getObject() { result = method_lookup.getObject() }
+
+  /** Holds if this data-flow node calls method `methodName` on the object node `object`. */
+  predicate calls(Node object, string methodName) {
+    // As `getObject` and `getMethodName` may both have multiple results, we must look up the object
+    // and method name directly on `method_lookup`.
+    object = method_lookup.getObject() and
+    methodName = method_lookup.getAttributeName()
+  }
+}
+
 /**
 * An expression, viewed as a node in a data flow graph.
 *
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowUtil.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowUtil.qll
@@ -18,6 +18,10 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFr
 predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }

 /**
+ * DEPRECATED. Use the API graphs library (`semmle.python.ApiGraphs`) instead.
+ *
+ * For a drop-in replacement, use `API::moduleImport(name).getAUse()`.
+ *
 * Gets a `Node` that refers to the module referenced by `name`.
 * Note that for the statement `import pkg.mod`, the new variable introduced is `pkg` that is a
 * reference to the module `pkg`.
@@ -37,7 +41,7 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
 *   `mypkg/foo.py` but the variable `foo` containing `42` -- however, `import mypkg.foo` will always cause `mypkg.foo`
 *   to refer to the module.
 */
-Node importNode(string name) {
+deprecated Node importNode(string name) {
  exists(Variable var, Import imp, Alias alias |
    alias = imp.getAName() and
    alias.getAsname() = var.getAStore() and
--- a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll
@@ -33,15 +33,20 @@ private import DataFlowPrivate
 class LocalSourceNode extends Node {
  cached
  LocalSourceNode() {
-    not simpleLocalFlowStep(_, this) and
-    // Currently, we create synthetic post-update nodes for
-    // - arguments to calls that may modify said argument
-    // - direct reads a writes of object attributes
-    // Both of these preserve the identity of the underlying pointer, and hence we exclude these as
-    // local source nodes.
-    // We do, however, allow the post-update nodes that arise from object creation (which are non-synthetic).
-    not this instanceof SyntheticPostUpdateNode
+    this instanceof ExprNode and
+    not simpleLocalFlowStep(_, this)
    or
+    // We include all module variable nodes, as these act as stepping stones between writes and
+    // reads of global variables. Without them, type tracking based on `LocalSourceNode`s would be
+    // unable to track across global variables.
+    //
+    // Once the `track` and `backtrack` methods have been fully deprecated, this disjunct can be
+    // removed, and the entire class can extend `ExprNode`. At that point, `TypeTrackingNode` should
+    // be used for type tracking instead of `LocalSourceNode`.
+    this instanceof ModuleVariableNode
+    or
+    // We explicitly include any read of a global variable, as some of these may have local flow going
+    // into them.
    this = any(ModuleVariableNode mvn).getARead()
  }

@@ -59,6 +64,11 @@ class LocalSourceNode extends Node {
   */
  AttrRead getAnAttributeRead(string attrName) { result = getAnAttributeReference(attrName) }

+  /**
+   * Gets a write of attribute `attrName` on this node.
+   */
+  AttrWrite getAnAttributeWrite(string attrName) { result = getAnAttributeReference(attrName) }
+
  /**
   * Gets a reference (read or write) of any attribute on this node.
   */
@@ -73,11 +83,26 @@ class LocalSourceNode extends Node {
   */
  AttrRead getAnAttributeRead() { result = getAnAttributeReference() }

+  /**
+   * Gets a write of any attribute on this node.
+   */
+  AttrWrite getAnAttributeWrite() { result = getAnAttributeReference() }
+
  /**
   * Gets a call to this node.
   */
  CallCfgNode getACall() { Cached::call(this, result) }

+  /**
+   * Gets a call to the method `methodName` on this node.
+   *
+   * Includes both calls that have the syntactic shape of a method call (as in `obj.m(...)`), and
+   * calls where the callee undergoes some additional local data flow (as in `tmp = obj.m; m(...)`).
+   */
+  MethodCallNode getAMethodCall(string methodName) {
+    result = this.getAnAttributeRead(methodName).getACall()
+  }
+
  /**
   * Gets a node that this node may flow to using one heap and/or interprocedural step.
   *
@@ -95,6 +120,53 @@ class LocalSourceNode extends Node {
  LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
 }

+/**
+ * A node that can be used for type tracking or type back-tracking.
+ *
+ * All steps made during type tracking should be between instances of this class.
+ */
+class TypeTrackingNode = LocalSourceNode;
+
+/** Temporary holding ground for the `TypeTrackingNode` class. */
+private module FutureWork {
+  class FutureTypeTrackingNode extends Node {
+    FutureTypeTrackingNode() {
+      this instanceof LocalSourceNode
+      or
+      this instanceof ModuleVariableNode
+    }
+
+    /**
+     * Holds if this node can flow to `nodeTo` in one or more local flow steps.
+     *
+     * For `ModuleVariableNode`s, the only "local" step is to the node itself.
+     * For `LocalSourceNode`s, this is the usual notion of local flow.
+     */
+    pragma[inline]
+    predicate flowsTo(Node node) {
+      this instanceof ModuleVariableNode and this = node
+      or
+      this.(LocalSourceNode).flowsTo(node)
+    }
+
+    /**
+     * Gets a node that this node may flow to using one heap and/or interprocedural step.
+     *
+     * See `TypeTracker` for more details about how to use this.
+     */
+    pragma[inline]
+    TypeTrackingNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
+
+    /**
+     * Gets a node that may flow into this one using one heap and/or interprocedural step.
+     *
+     * See `TypeBackTracker` for more details about how to use this.
+     */
+    pragma[inline]
+    TypeTrackingNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
+  }
+}
+
 cached
 private module Cached {
  /**
@@ -107,11 +179,21 @@ private module Cached {
    source = sink
    or
    exists(Node second |
-      simpleLocalFlowStep(source, second) and
-      simpleLocalFlowStep*(second, sink)
+      localSourceFlowStep(source, second) and
+      localSourceFlowStep*(second, sink)
    )
  }

+  /**
+   * Helper predicate for `hasLocalSource`. Removes any steps go to module variable reads, as these
+   * are already local source nodes in their own right.
+   */
+  cached
+  private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) {
+    simpleLocalFlowStep(nodeFrom, nodeTo) and
+    not nodeTo = any(ModuleVariableNode v).getARead()
+  }
+
  /**
   * Holds if `base` flows to the base of `ref` and `ref` has attribute name `attr`.
   */
--- a/python/ql/lib/semmle/python/dataflow/new/internal/PrintNode.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/PrintNode.qll
@@ -0,0 +1,73 @@
+/**
+ * INTERNAL: Do not use.
+ *
+ * Provides helper predicates for pretty-printing `DataFlow::Node`s.
+ *
+ * Since these have not been performance optimized, please only use them for
+ * debug-queries or in tests.
+ */
+
+private import python
+private import semmle.python.dataflow.new.DataFlow
+
+/**
+ * INTERNAL: Do not use.
+ *
+ * Gets the pretty-printed version of the Expr `e`.
+ */
+string prettyExpr(Expr e) {
+  not e instanceof Num and
+  not e instanceof StrConst and
+  not e instanceof Subscript and
+  not e instanceof Call and
+  not e instanceof Attribute and
+  result = e.toString()
+  or
+  result = e.(Num).getN()
+  or
+  result =
+    e.(StrConst).getPrefix() + e.(StrConst).getText() +
+      e.(StrConst).getPrefix().regexpReplaceAll("[a-zA-Z]+", "")
+  or
+  result = prettyExpr(e.(Subscript).getObject()) + "[" + prettyExpr(e.(Subscript).getIndex()) + "]"
+  or
+  (
+    if exists(e.(Call).getAnArg()) or exists(e.(Call).getANamedArg())
+    then result = prettyExpr(e.(Call).getFunc()) + "(..)"
+    else result = prettyExpr(e.(Call).getFunc()) + "()"
+  )
+  or
+  result = prettyExpr(e.(Attribute).getObject()) + "." + e.(Attribute).getName()
+}
+
+/**
+ * INTERNAL: Do not use.
+ *
+ * Gets the pretty-printed version of the DataFlow::Node `node`
+ */
+bindingset[node]
+string prettyNode(DataFlow::Node node) {
+  if exists(node.asExpr()) then result = prettyExpr(node.asExpr()) else result = node.toString()
+}
+
+/**
+ * INTERNAL: Do not use.
+ *
+ * Gets the pretty-printed version of the DataFlow::Node `node`, that is suitable for use
+ * with `TestUtilities.InlineExpectationsTest` (that is, no spaces unless required).
+ */
+bindingset[node]
+string prettyNodeForInlineTest(DataFlow::Node node) {
+  exists(node.asExpr()) and
+  result = prettyExpr(node.asExpr())
+  or
+  exists(Expr e | e = node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() |
+    // since PostUpdateNode both has space in the `[post <thing>]` annotation, and does
+    // not pretty print the pre-update node, we do custom handling of this.
+    result = "[post]" + prettyExpr(e)
+  )
+  or
+  not exists(node.asExpr()) and
+  not exists(Expr e | e = node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()) and
+  result = node.toString()
+}
--- a/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll
@@ -1,7 +1,8 @@
 private import python
 private import semmle.python.dataflow.new.DataFlow
-private import semmle.python.dataflow.new.internal.DataFlowPrivate
+private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
 private import semmle.python.dataflow.new.internal.TaintTrackingPublic
+private import semmle.python.ApiGraphs

 /**
 * Holds if `node` should be a sanitizer in all global taint flow configurations
@@ -10,35 +11,52 @@ private import semmle.python.dataflow.new.internal.TaintTrackingPublic
 predicate defaultTaintSanitizer(DataFlow::Node node) { none() }

 /**
- * Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
- * global taint flow configurations.
+ * Holds if default `TaintTracking::Configuration`s should allow implicit reads
+ * of `c` at sinks and inputs to additional taint steps.
 */
-predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
-  localAdditionalTaintStep(nodeFrom, nodeTo)
-  or
-  any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
+bindingset[node]
+predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() }
+
+private module Cached {
+  /**
+   * Holds if the additional step from `nodeFrom` to `nodeTo` should be included in all
+   * global taint flow configurations.
+   */
+  cached
+  predicate defaultAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+    localAdditionalTaintStep(nodeFrom, nodeTo)
+    or
+    any(AdditionalTaintStep a).step(nodeFrom, nodeTo)
+  }
+
+  /**
+   * Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
+   * local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
+   * different objects.
+   */
+  cached
+  predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+    concatStep(nodeFrom, nodeTo)
+    or
+    subscriptStep(nodeFrom, nodeTo)
+    or
+    stringManipulation(nodeFrom, nodeTo)
+    or
+    containerStep(nodeFrom, nodeTo)
+    or
+    copyStep(nodeFrom, nodeTo)
+    or
+    DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo)
+    or
+    DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo)
+    or
+    DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
+    or
+    awaitStep(nodeFrom, nodeTo)
+  }
 }

-/**
- * Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
- * local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
- * different objects.
- */
-predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
-  concatStep(nodeFrom, nodeTo)
-  or
-  subscriptStep(nodeFrom, nodeTo)
-  or
-  stringManipulation(nodeFrom, nodeTo)
-  or
-  containerStep(nodeFrom, nodeTo)
-  or
-  copyStep(nodeFrom, nodeTo)
-  or
-  forStep(nodeFrom, nodeTo)
-  or
-  unpackingAssignmentStep(nodeFrom, nodeTo)
-}
+import Cached

 /**
 * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to concatenation.
@@ -69,13 +87,13 @@ predicate subscriptStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
 */
 predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
  // transforming something tainted into a string will make the string tainted
-  exists(CallNode call | call = nodeTo.getNode() |
-    call.getFunction().(NameNode).getId() in ["str", "bytes", "unicode"] and
+  exists(DataFlow::CallCfgNode call | call = nodeTo |
    (
-      nodeFrom.getNode() = call.getArg(0)
+      call = API::builtin(["str", "bytes", "unicode"]).getACall()
      or
-      nodeFrom.getNode() = call.getArgByName("object")
-    )
+      call.getFunction().asCfgNode().(NameNode).getId() in ["str", "bytes", "unicode"]
+    ) and
+    nodeFrom in [call.getArg(0), call.getArgByName("object")]
  )
  or
  // String methods. Note that this doesn't recognize `meth = "foo".upper; meth()`
@@ -142,39 +160,37 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
 predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
  // construction by literal
  // TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
-  storeStep(nodeFrom, _, nodeTo)
+  DataFlowPrivate::storeStep(nodeFrom, _, nodeTo)
  or
  // constructor call
-  exists(CallNode call | call = nodeTo.asCfgNode() |
-    call.getFunction().(NameNode).getId() in [
-        "list", "set", "frozenset", "dict", "defaultdict", "tuple"
-      ] and
-    call.getArg(0) = nodeFrom.getNode()
+  exists(DataFlow::CallCfgNode call | call = nodeTo |
+    call = API::builtin(["list", "set", "frozenset", "dict", "tuple"]).getACall() and
+    call.getArg(0) = nodeFrom
+    // TODO: Properly handle defaultdict/namedtuple
  )
  or
  // functions operating on collections
-  exists(CallNode call | call = nodeTo.asCfgNode() |
-    call.getFunction().(NameNode).getId() in ["sorted", "reversed", "iter", "next"] and
-    call.getArg(0) = nodeFrom.getNode()
+  exists(DataFlow::CallCfgNode call | call = nodeTo |
+    call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and
+    call.getArg(0) = nodeFrom
  )
  or
  // methods
-  exists(CallNode call, string name | call = nodeTo.asCfgNode() |
-    name in [
+  exists(DataFlow::MethodCallNode call, string methodName | call = nodeTo |
+    methodName in [
        // general
        "copy", "pop",
        // dict
        "values", "items", "get", "popitem"
      ] and
-    call.getFunction().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
+    call.calls(nodeFrom, methodName)
  )
  or
  // list.append, set.add
-  exists(CallNode call, string name |
-    name in ["append", "add"] and
-    call.getFunction().(AttrNode).getObject(name) =
-      nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode().asCfgNode() and
-    call.getArg(0) = nodeFrom.getNode()
+  exists(DataFlow::MethodCallNode call, DataFlow::Node obj |
+    call.calls(obj, ["append", "add"]) and
+    obj = nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() and
+    call.getArg(0) = nodeFrom
  )
 }

@@ -182,38 +198,16 @@ predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
 * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying.
 */
 predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
-  exists(CallNode call | call = nodeTo.getNode() |
-    // Fully qualified: copy.copy, copy.deepcopy
-    (
-      call.getFunction().(NameNode).getId() in ["copy", "deepcopy"]
-      or
-      call.getFunction().(AttrNode).getObject(["copy", "deepcopy"]).(NameNode).getId() = "copy"
-    ) and
-    call.getArg(0) = nodeFrom.getNode()
+  exists(DataFlow::CallCfgNode call | call = nodeTo |
+    call = API::moduleImport("copy").getMember(["copy", "deepcopy"]).getACall() and
+    call.getArg(0) = nodeFrom
  )
 }

 /**
- * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
- * for example `for x in xs`, or `for x,y in points`.
+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with an `await`-step,
+ * such that the whole expression `await x` is tainted if `x` is tainted.
 */
-predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
-  exists(EssaNodeDefinition defn, For for |
-    for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
-    nodeTo.getVar() = defn and
-    nodeFrom.asExpr() = for.getIter()
-  )
-}
-
-/**
- * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
- * Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
- */
-predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
-  // `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
-  exists(MultiAssignmentDefinition defn, Assign assign |
-    assign.getATarget().contains(defn.getDefiningNode().getNode()) and
-    nodeTo.getVar() = defn and
-    nodeFrom.asExpr() = assign.getValue()
-  )
+predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+  nodeTo.asExpr().(Await).getValue() = nodeFrom.asExpr()
 }
--- a/Show More
+++ b/Show More