Merge branch 'main' into python-fix-exceptstmt-gettype

2026-04-30 11:15:13 +02:00 · 2021-09-07 14:21:13 +02:00
parent 5a9fca48e8 b99c075282
commit 5ac32f145f
3403 changed files with 119186 additions and 43114 deletions
--- a/python/.vscode/ql.code-snippets
+++ b/python/.vscode/ql.code-snippets
@@ -106,7 +106,7 @@
        "prefix": "type tracking",
        "body": [
            "/** Gets a reference to ${3:a thing}. */",
-            "private DataFlow::LocalSourceNode ${1:myType}(DataFlow::TypeTracker t) {",
+            "private DataFlow::TypeTrackingNode ${1:myType}(DataFlow::TypeTracker t) {",
            "  t.start() and",
            "  result = ${2:value}",
            "  or",
@@ -152,4 +152,102 @@
        ]
    },

+    "Type tracking class": {
+        "scope": "ql",
+        "prefix": "type tracking class",
+        "body": [
+            "/**",
+            " * Provides models for the `${TM_SELECTED_TEXT}` class",
+            " *",
+            " * See ${1:https://apiref (TODO)}.",
+            " */",
+            "module ${TM_SELECTED_TEXT/^(.*)\\.([^.]+)$/$2/} {",
+            "  /** Gets a reference to the `${TM_SELECTED_TEXT}` class. */",
+            "  private API::Node classRef() {",
+            "    result = API::moduleImport(\"${TM_SELECTED_TEXT/\\.([^.]+)/\").getMember(\"$1/g}\")",
+            "  }",
+            "",
+            "  /**",
+            "   * A source of instances of `${TM_SELECTED_TEXT}`, extend this class to model new instances.",
+            "   *",
+            "   * This can include instantiations of the class, return values from function",
+            "   * calls, or a special parameter that will be set when functions are called by an external",
+            "   * library.",
+            "   *",
+            "   * Use the predicate `${TM_SELECTED_TEXT/^(.*)\\.([^.]+)$/$2/}::instance()` to get references to instances of `${TM_SELECTED_TEXT}`.",
+            "   */",
+            "  abstract class InstanceSource extends DataFlow::LocalSourceNode { }",
+            "",
+            "  /** A direct instantiation of `${TM_SELECTED_TEXT}`. */",
+            "  private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {",
+            "      ClassInstantiation() { this = classRef().getACall() }",
+            "  }",
+            "",
+            "  /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
+            "  private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {",
+            "    t.start() and",
+            "    result instanceof InstanceSource",
+            "    or",
+            "    exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))",
+            "  }",
+            "",
+            "  /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
+            "  DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }",
+            "",
+            "  /**",
+            "   * Taint propagation for `${TM_SELECTED_TEXT}`.",
+            "   */",
+            "  private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
+            "    InstanceTaintSteps() { this = \"${TM_SELECTED_TEXT}\" }",
+            "    ",
+            "    override DataFlow::Node getInstance() { result = instance() }",
+            "    ",
+            "    override string getAttributeName() { none() }",
+            "    ",
+            "    override string getMethodName() { none() }",
+            "    ",
+            "    override string getAsyncMethodName() { none() }",
+            "  }",
+            "",
+            "  /**",
+            "   * Extra taint propagation for `${TM_SELECTED_TEXT}`, not covered by `InstanceTaintSteps`.",
+            "   */",
+            "  private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {",
+            "    override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {",
+            "      // TODO",
+            "      none()",
+            "    }",
+            "  }",
+            "}",
+        ],
+        "description": "Type tracking class (select full class path before inserting)",
+    },
+    "foo": {
+        "scope": "ql",
+        "prefix": "foo",
+        "body": [
+            "    /**",
+            "     * Taint propagation for `$1`.",
+            "     */",
+            "     private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
+            "        InstanceTaintSteps() { this = \"$1\" }",
+            "",
+            "        override DataFlow::Node getInstance() { result = instance() }",
+            "",
+            "        override string getAttributeName() { none() }",
+            "",
+            "        override string getMethodName() { none() }",
+            "",
+            "        override string getAsyncMethodName() { none() }",
+            "      }",
+        ],
+    },
+    "API graph .getMember chain": {
+        "scope": "ql",
+        "prefix": "api graph .getMember chain",
+        "body": [
+            "API::moduleImport(\"${TM_SELECTED_TEXT/\\.([^.]+)/\").getMember(\"$1/g}\")"
+        ],
+        "description": "API graph .getMember chain (select full path before inserting)",
+    },
 }
--- a/python/change-notes/2021-06-25-add-peewee-modeling.md
+++ b/python/change-notes/2021-06-25-add-peewee-modeling.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Added modeling of raw SQL execution from the PyPI package `peewee`.
--- a/python/change-notes/2021-07-12-add-typetrackingnode.md
+++ b/python/change-notes/2021-07-12-add-typetrackingnode.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* The `track` and `backtrack` methods on `LocalSourceNode` are in the process of being deprecated. When using type trackers, the corresponding methods on `TypeTrackingNode` should be used instead.
--- a/python/change-notes/2021-07-13-path-problem-customization.md
+++ b/python/change-notes/2021-07-13-path-problem-customization.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* Changed the way to provide extra sources/sinks for `@kind path-problem` queries, to avoid a potential performance problem due to re-evaluation of data-flow configurations. Please use the new `<query>Customization.qll` files and extend their classes instead (such as extending the `Sink` class from `python/ql/src/semmle/python/security/dataflow/SqlInjectionCustomizations.qll`). This is relevant for the queries: `py/sql-injection`, `py/code-injection`, `py/command-line-injection`, `py/reflective-xss`, `py/url-redirection`, `py/unsafe-deserialization`, `py/stack-trace-exposure`, `py/path-injection`.
--- a/python/change-notes/2021-07-28-port-RoDoS-queries.md
+++ b/python/change-notes/2021-07-28-port-RoDoS-queries.md
@@ -1,3 +1,3 @@
 lgtm,codescanning
-* Added _Inefficient regular expression_ (`py/redos`) query, which is already available in JavaScript.
-* Added _Polynomial regular expression used on uncontrolled data_ (`py/polynomial-redos`), which is already available in JavaScript.
+* Added an experimental _Inefficient regular expression_ (`py/redos`) query, which is already available in JavaScript.
+* Added an experimental _Polynomial regular expression used on uncontrolled data_ (`py/polynomial-redos`), which is already available in JavaScript.
--- a/python/ql/examples/qlpack.lock.yml
+++ b/python/ql/examples/qlpack.lock.yml
@@ -0,0 +1,4 @@
+---
+dependencies: {}
+compiled: false
+lockVersion: 1.0.0
--- a/python/ql/examples/qlpack.yml
+++ b/python/ql/examples/qlpack.yml
@@ -1,3 +1,4 @@
-name: codeql-python-examples
-version: 0.0.0
-libraryPathDependencies: codeql-python
+name: codeql/python-examples
+version: 0.0.2
+dependencies:
+    codeql/python-all: "*"
--- a/python/ql/lib/Customizations.qll
+++ b/python/ql/lib/Customizations.qll
--- a/python/ql/lib/default.qll
+++ b/python/ql/lib/default.qll
--- a/python/ql/lib/python.qll
+++ b/python/ql/lib/python.qll
--- a/python/ql/lib/qlpack.lock.yml
+++ b/python/ql/lib/qlpack.lock.yml
@@ -0,0 +1,4 @@
+---
+dependencies: {}
+compiled: false
+lockVersion: 1.0.0
--- a/python/ql/lib/qlpack.yml
+++ b/python/ql/lib/qlpack.yml
@@ -0,0 +1,7 @@
+name: codeql/python-all
+version: 0.0.2
+dbscheme: semmlecode.python.dbscheme
+extractor: python
+library: true
+dependencies:
+    codeql/python-upgrades: 0.0.2
--- a/python/ql/lib/semmle/crypto/Crypto.qll
+++ b/python/ql/lib/semmle/crypto/Crypto.qll
--- a/python/ql/lib/semmle/dataflow/SSA.qll
+++ b/python/ql/lib/semmle/dataflow/SSA.qll
--- a/python/ql/lib/semmle/files/FileSystem.qll
+++ b/python/ql/lib/semmle/files/FileSystem.qll
--- a/python/ql/lib/semmle/python/ApiGraphs.qll
+++ b/python/ql/lib/semmle/python/ApiGraphs.qll
@@ -512,7 +512,7 @@ module API {
     *
     * The flow from `src` to that node may be inter-procedural.
     */
-    private DataFlow::LocalSourceNode trackUseNode(
+    private DataFlow::TypeTrackingNode trackUseNode(
      DataFlow::LocalSourceNode src, DataFlow::TypeTracker t
    ) {
      t.start() and
@@ -530,7 +530,6 @@ module API {
    cached
    DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
      result = trackUseNode(src, DataFlow::TypeTracker::end()) and
-      // We exclude module variable nodes, as these do not correspond to real uses.
      not result instanceof DataFlow::ModuleVariableNode
    }

--- a/python/ql/lib/semmle/python/AstExtended.qll
+++ b/python/ql/lib/semmle/python/AstExtended.qll
@@ -49,12 +49,15 @@ abstract class AstNode extends AstNode_ {
  /** Whether this contains `inner` syntactically */
  predicate contains(AstNode inner) { this.getAChildNode+() = inner }

-  /** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
-  predicate containsInScope(AstNode inner) {
+  pragma[noinline]
+  private predicate containsInScope(AstNode inner, Scope scope) {
    this.contains(inner) and
-    this.getScope() = inner.getScope() and
-    not inner instanceof Scope
+    not inner instanceof Scope and
+    scope = this.getScope()
  }
+
+  /** Whether this contains `inner` syntactically and `inner` has the same scope as `this` */
+  predicate containsInScope(AstNode inner) { this.containsInScope(inner, inner.getScope()) }
 }

 /* Parents */
--- a/python/ql/lib/semmle/python/AstGenerated.qll
+++ b/python/ql/lib/semmle/python/AstGenerated.qll
--- a/python/ql/lib/semmle/python/Class.qll
+++ b/python/ql/lib/semmle/python/Class.qll
--- a/python/ql/lib/semmle/python/Comment.qll
+++ b/python/ql/lib/semmle/python/Comment.qll
--- a/python/ql/lib/semmle/python/Comparisons.qll
+++ b/python/ql/lib/semmle/python/Comparisons.qll
--- a/python/ql/lib/semmle/python/Comprehensions.qll
+++ b/python/ql/lib/semmle/python/Comprehensions.qll
--- a/python/ql/lib/semmle/python/Concepts.qll
+++ b/python/ql/lib/semmle/python/Concepts.qll
@@ -4,7 +4,7 @@
 * provide concrete subclasses.
 */

-import python
+private import python
 private import semmle.python.dataflow.new.DataFlow
 private import semmle.python.dataflow.new.RemoteFlowSources
 private import semmle.python.dataflow.new.TaintTracking
@@ -758,7 +758,7 @@ module Cryptography {
    /** Provides classes for modeling new key-pair generation APIs. */
    module KeyGeneration {
      /** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */
-      private DataFlow::LocalSourceNode keysizeBacktracker(
+      private DataFlow::TypeTrackingNode keysizeBacktracker(
        DataFlow::TypeBackTracker t, DataFlow::Node arg
      ) {
        t.start() and
--- a/python/ql/lib/semmle/python/Constants.qll
+++ b/python/ql/lib/semmle/python/Constants.qll
--- a/python/ql/lib/semmle/python/Exprs.qll
+++ b/python/ql/lib/semmle/python/Exprs.qll
--- a/python/ql/lib/semmle/python/Files.qll
+++ b/python/ql/lib/semmle/python/Files.qll
--- a/python/ql/lib/semmle/python/Flow.qll
+++ b/python/ql/lib/semmle/python/Flow.qll
--- a/python/ql/lib/semmle/python/Frameworks.qll
+++ b/python/ql/lib/semmle/python/Frameworks.qll
@@ -26,6 +26,7 @@ private import semmle.python.frameworks.Rsa
 private import semmle.python.frameworks.Simplejson
 private import semmle.python.frameworks.Stdlib
 private import semmle.python.frameworks.Tornado
+private import semmle.python.frameworks.Peewee
 private import semmle.python.frameworks.Twisted
 private import semmle.python.frameworks.Ujson
 private import semmle.python.frameworks.Yaml
--- a/python/ql/lib/semmle/python/Function.qll
+++ b/python/ql/lib/semmle/python/Function.qll
--- a/python/ql/lib/semmle/python/GuardedControlFlow.qll
+++ b/python/ql/lib/semmle/python/GuardedControlFlow.qll
--- a/python/ql/lib/semmle/python/Import.qll
+++ b/python/ql/lib/semmle/python/Import.qll
--- a/python/ql/lib/semmle/python/Keywords.qll
+++ b/python/ql/lib/semmle/python/Keywords.qll
--- a/python/ql/lib/semmle/python/Metrics.qll
+++ b/python/ql/lib/semmle/python/Metrics.qll
--- a/python/ql/lib/semmle/python/Module.qll
+++ b/python/ql/lib/semmle/python/Module.qll
--- a/python/ql/lib/semmle/python/Operations.qll
+++ b/python/ql/lib/semmle/python/Operations.qll
--- a/python/ql/lib/semmle/python/PrintAst.qll
+++ b/python/ql/lib/semmle/python/PrintAst.qll
--- a/python/ql/lib/semmle/python/RegexTreeView.qll
+++ b/python/ql/lib/semmle/python/RegexTreeView.qll
@@ -7,6 +7,10 @@ private import semmle.python.regex
 * An element containing a regular expression term, that is, either
 * a string literal (parsed as a regular expression)
 * or another regular expression term.
+ *
+ * For sequences and alternations, we require at least one child.
+ * Otherwise, we wish to represent the term differently.
+ * This avoids multiple representations of the same term.
 */
 newtype TRegExpParent =
  /** A string literal used as a regular expression */
@@ -14,9 +18,18 @@ newtype TRegExpParent =
  /** A quantified term */
  TRegExpQuantifier(Regex re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
  /** A sequence term */
-  TRegExpSequence(Regex re, int start, int end) { re.sequence(start, end) } or
-  /** An alternatio term */
-  TRegExpAlt(Regex re, int start, int end) { re.alternation(start, end) } or
+  TRegExpSequence(Regex re, int start, int end) {
+    re.sequence(start, end) and
+    exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
+  } or
+  /** An alternation term */
+  TRegExpAlt(Regex re, int start, int end) {
+    re.alternation(start, end) and
+    exists(int part_end |
+      re.alternationOption(start, end, start, part_end) and
+      part_end < end
+    ) // if an alternation does not have more than one element, it should be treated as that element instead.
+  } or
  /** A character class term */
  TRegExpCharacterClass(Regex re, int start, int end) { re.charSet(start, end) } or
  /** A character range term */
@@ -61,6 +74,10 @@ class RegExpLiteral extends TRegExpLiteral, RegExpParent {

  predicate isDotAll() { re.getAMode() = "DOTALL" }

+  predicate isIgnoreCase() { re.getAMode() = "IGNORECASE" }
+
+  string getFlags() { result = concat(string mode | mode = re.getAMode() | mode, " | ") }
+
  override Regex getRegex() { result = re }

  string getPrimaryQLClass() { result = "RegExpLiteral" }
@@ -89,8 +106,7 @@ class RegExpTerm extends RegExpParent {
    or
    this = TRegExpQuantifier(re, start, end)
    or
-    this = TRegExpSequence(re, start, end) and
-    exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
+    this = TRegExpSequence(re, start, end)
    or
    this = TRegExpSpecialChar(re, start, end)
  }
@@ -337,10 +353,7 @@ class RegExpRange extends RegExpQuantifier {
 * This is a sequence with the elements `(ECMA|Java)` and `Script`.
 */
 class RegExpSequence extends RegExpTerm, TRegExpSequence {
-  RegExpSequence() {
-    this = TRegExpSequence(re, start, end) and
-    exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
-  }
+  RegExpSequence() { this = TRegExpSequence(re, start, end) }

  override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }

@@ -473,46 +486,44 @@ class RegExpEscape extends RegExpNormalChar {
   * E.g. for `\u0061` this returns "a".
   */
  private string getUnicode() {
-    // TODO: Enable this once a supporting CLI is released.
-    // exists(int codepoint | codepoint = sum(getHexValueFromUnicode(_)) |
-    //   result = codepoint.toUnicode()
-    // )
-    none()
+    exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
+      result = codepoint.toUnicode()
+    )
+  }
+
+  /**
+   * Gets int value for the `index`th char in the hex number of the unicode escape.
+   * E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
+   */
+  private int getHexValueFromUnicode(int index) {
+    this.isUnicode() and
+    exists(string hex, string char | hex = this.getText().suffix(2) |
+      char = hex.charAt(index) and
+      result = 16.pow(hex.length() - index - 1) * toHex(char)
+    )
  }
-  // TODO: Enable this once a supporting CLI is released.
-  // /**
-  //  * Gets int value for the `index`th char in the hex number of the unicode escape.
-  //  * E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
-  //  */
-  // private int getHexValueFromUnicode(int index) {
-  //   isUnicode() and
-  //   exists(string hex, string char | hex = getText().suffix(2) |
-  //     char = hex.charAt(index) and
-  //     result = 16.pow(hex.length() - index - 1) * toHex(char)
-  //   )
-  // }
 }

-// TODO: Enable this once a supporting CLI is released.
-// /**
-//  * Gets the hex number for the `hex` char.
-//  */
-// private int toHex(string hex) {
-//   hex = [0 .. 9].toString() and
-//   result = hex.toInt()
-//   or
-//   result = 10 and hex = ["a", "A"]
-//   or
-//   result = 11 and hex = ["b", "B"]
-//   or
-//   result = 12 and hex = ["c", "C"]
-//   or
-//   result = 13 and hex = ["d", "D"]
-//   or
-//   result = 14 and hex = ["e", "E"]
-//   or
-//   result = 15 and hex = ["f", "F"]
-// }
+/**
+ * Gets the hex number for the `hex` char.
+ */
+private int toHex(string hex) {
+  hex = [0 .. 9].toString() and
+  result = hex.toInt()
+  or
+  result = 10 and hex = ["a", "A"]
+  or
+  result = 11 and hex = ["b", "B"]
+  or
+  result = 12 and hex = ["c", "C"]
+  or
+  result = 13 and hex = ["d", "D"]
+  or
+  result = 14 and hex = ["e", "E"]
+  or
+  result = 15 and hex = ["f", "F"]
+}
+
 /**
 * A character class escape in a regular expression.
 * That is, an escaped charachter that denotes multiple characters.
@@ -838,6 +849,15 @@ class RegExpZeroWidthMatch extends RegExpGroup {
 */
 class RegExpSubPattern extends RegExpZeroWidthMatch {
  RegExpSubPattern() { not re.emptyGroup(start, end) }
+
+  /** Gets the lookahead term. */
+  RegExpTerm getOperand() {
+    exists(int in_start, int in_end | re.groupContents(start, end, in_start, in_end) |
+      result.getRegex() = re and
+      result.getStart() = in_start and
+      result.getEnd() = in_end
+    )
+  }
 }

 /**
--- a/python/ql/lib/semmle/python/SSA.qll
+++ b/python/ql/lib/semmle/python/SSA.qll
--- a/python/ql/lib/semmle/python/Scope.qll
+++ b/python/ql/lib/semmle/python/Scope.qll
--- a/python/ql/lib/semmle/python/SelfAttribute.qll
+++ b/python/ql/lib/semmle/python/SelfAttribute.qll
--- a/python/ql/lib/semmle/python/SpecialMethods.qll
+++ b/python/ql/lib/semmle/python/SpecialMethods.qll
--- a/python/ql/lib/semmle/python/Stmts.qll
+++ b/python/ql/lib/semmle/python/Stmts.qll
--- a/python/ql/lib/semmle/python/TestUtils.qll
+++ b/python/ql/lib/semmle/python/TestUtils.qll
--- a/python/ql/lib/semmle/python/Unit.qll
+++ b/python/ql/lib/semmle/python/Unit.qll
--- a/python/ql/lib/semmle/python/Variables.qll
+++ b/python/ql/lib/semmle/python/Variables.qll
--- a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll
+++ b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll
--- a/python/ql/lib/semmle/python/dataflow/Configuration.qll
+++ b/python/ql/lib/semmle/python/dataflow/Configuration.qll
--- a/python/ql/lib/semmle/python/dataflow/DataFlow.qll
+++ b/python/ql/lib/semmle/python/dataflow/DataFlow.qll
--- a/python/ql/lib/semmle/python/dataflow/Files.qll
+++ b/python/ql/lib/semmle/python/dataflow/Files.qll
--- a/python/ql/lib/semmle/python/dataflow/Implementation.qll
+++ b/python/ql/lib/semmle/python/dataflow/Implementation.qll
--- a/python/ql/lib/semmle/python/dataflow/Legacy.qll
+++ b/python/ql/lib/semmle/python/dataflow/Legacy.qll
--- a/python/ql/lib/semmle/python/dataflow/StateTracking.qll
+++ b/python/ql/lib/semmle/python/dataflow/StateTracking.qll
--- a/python/ql/lib/semmle/python/dataflow/TaintTracking.qll
+++ b/python/ql/lib/semmle/python/dataflow/TaintTracking.qll
--- a/python/ql/lib/semmle/python/dataflow/new/BarrierGuards.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/BarrierGuards.qll
--- a/python/ql/lib/semmle/python/dataflow/new/DataFlow.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/DataFlow.qll
--- a/python/ql/lib/semmle/python/dataflow/new/DataFlow2.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/DataFlow2.qll
--- a/python/ql/lib/semmle/python/dataflow/new/DataFlow3.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/DataFlow3.qll
--- a/python/ql/lib/semmle/python/dataflow/new/DataFlow4.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/DataFlow4.qll
--- a/python/ql/lib/semmle/python/dataflow/new/RemoteFlowSources.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/RemoteFlowSources.qll
--- a/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll
@@ -55,12 +55,12 @@ private module SensitiveDataModeling {
   * Gets a reference to a function that is considered to be a sensitive source of
   * `classification`.
   */
-  private DataFlow::LocalSourceNode sensitiveFunction(
+  private DataFlow::TypeTrackingNode sensitiveFunction(
    DataFlow::TypeTracker t, SensitiveDataClassification classification
  ) {
    t.start() and
    exists(Function f |
-      nameIndicatesSensitiveData(f.getName(), classification) and
+      f.getName() = sensitiveString(classification) and
      result.asExpr() = f.getDefinition()
    )
    or
@@ -83,7 +83,7 @@ private module SensitiveDataModeling {
    // Note: If this is implemented with type-tracking, we will get cross-talk as
    // illustrated in python/ql/test/experimental/dataflow/sensitive-data/test.py
    exists(DataFlow::LocalSourceNode source |
-      nameIndicatesSensitiveData(source.asExpr().(StrConst).getText(), classification) and
+      source.asExpr().(StrConst).getText() = sensitiveString(classification) and
      source.flowsTo(result)
    )
  }
@@ -97,7 +97,7 @@ private module SensitiveDataModeling {
      or
      // to cover functions that we don't have the definition for, and where the
      // reference to the function has not already been marked as being sensitive
-      nameIndicatesSensitiveData(this.getFunction().asCfgNode().(NameNode).getId(), classification)
+      this.getFunction().asCfgNode().(NameNode).getId() = sensitiveString(classification)
    }

    override SensitiveDataClassification getClassification() { result = classification }
@@ -109,7 +109,7 @@ private module SensitiveDataModeling {
   *
   * Also see `extraStepForCalls`.
   */
-  private DataFlow::LocalSourceNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
+  private DataFlow::TypeTrackingNode possibleSensitiveCallable(DataFlow::TypeTracker t) {
    t.start() and
    result instanceof SensitiveDataSource
    or
@@ -164,6 +164,68 @@ private module SensitiveDataModeling {
    nodeFrom = possibleSensitiveCallable()
  }

+  pragma[nomagic]
+  private string sensitiveStrConstCandidate() {
+    result = any(StrConst s | not s.isDocString()).getText() and
+    not result.regexpMatch(notSensitiveRegexp())
+  }
+
+  pragma[nomagic]
+  private string sensitiveAttributeNameCandidate() {
+    result = any(DataFlow::AttrRead a).getAttributeName() and
+    not result.regexpMatch(notSensitiveRegexp())
+  }
+
+  pragma[nomagic]
+  private string sensitiveParameterNameCandidate() {
+    result = any(Parameter p).getName() and
+    not result.regexpMatch(notSensitiveRegexp())
+  }
+
+  pragma[nomagic]
+  private string sensitiveFunctionNameCandidate() {
+    result = any(Function f).getName() and
+    not result.regexpMatch(notSensitiveRegexp())
+  }
+
+  pragma[nomagic]
+  private string sensitiveNameCandidate() {
+    result = any(Name n).getId() and
+    not result.regexpMatch(notSensitiveRegexp())
+  }
+
+  /**
+   * This helper predicate serves to deduplicate the results of the preceding predicates. This
+   * means that if, say, an attribute and a function parameter have the same name, then that name will
+   * only be matched once, which greatly cuts down on the number of regexp matches that have to be
+   * performed.
+   *
+   * Under normal circumstances, deduplication is only performed when a predicate is materialized, and
+   * so to see the effect of this we must create a separate predicate that calculates the union of the
+   * preceding predicates.
+   */
+  pragma[nomagic]
+  private string sensitiveStringCandidate() {
+    result in [
+        sensitiveNameCandidate(), sensitiveAttributeNameCandidate(),
+        sensitiveParameterNameCandidate(), sensitiveFunctionNameCandidate(),
+        sensitiveStrConstCandidate()
+      ]
+  }
+
+  /**
+   * Returns strings (primarily the names of various program entities) that may contain sensitive data
+   * with the classification `classification`.
+   *
+   * This helper predicate ends up being very similar to `nameIndicatesSensitiveData`,
+   * but is performance optimized to limit the number of regexp matches that have to be performed.
+   */
+  pragma[nomagic]
+  private string sensitiveString(SensitiveDataClassification classification) {
+    result = sensitiveStringCandidate() and
+    result.regexpMatch(maybeSensitiveRegexp(classification))
+  }
+
  /**
   * Any kind of variable assignment (also including with/for) where the name indicates
   * it contains sensitive data.
@@ -182,7 +244,7 @@ private module SensitiveDataModeling {

    SensitiveVariableAssignment() {
      exists(DefinitionNode def |
-        nameIndicatesSensitiveData(def.(NameNode).getId(), classification) and
+        def.(NameNode).getId() = sensitiveString(classification) and
        (
          this.asCfgNode() = def.getValue()
          or
@@ -193,7 +255,7 @@ private module SensitiveDataModeling {
      )
      or
      exists(With with |
-        nameIndicatesSensitiveData(with.getOptionalVars().(Name).getId(), classification) and
+        with.getOptionalVars().(Name).getId() = sensitiveString(classification) and
        this.asExpr() = with.getContextExpr()
      )
    }
@@ -209,7 +271,7 @@ private module SensitiveDataModeling {
      // Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
      // I considered excluding any `from ... import something_sensitive`, but then realized that
      // we should flag up `form ... import password as ...` as a password
-      nameIndicatesSensitiveData(this.(DataFlow::AttrRead).getAttributeName(), classification)
+      this.(DataFlow::AttrRead).getAttributeName() = sensitiveString(classification)
      or
      // Things like `getattr(foo, <reference-to-string>)`
      this.(DataFlow::AttrRead).getAttributeNameExpr() = sensitiveLookupStringConst(classification)
@@ -246,9 +308,7 @@ private module SensitiveDataModeling {
  class SensitiveParameter extends SensitiveDataSource::Range, DataFlow::ParameterNode {
    SensitiveDataClassification classification;

-    SensitiveParameter() {
-      nameIndicatesSensitiveData(this.getParameter().getName(), classification)
-    }
+    SensitiveParameter() { this.getParameter().getName() = sensitiveString(classification) }

    override SensitiveDataClassification getClassification() { result = classification }
  }
--- a/python/ql/lib/semmle/python/dataflow/new/TaintTracking.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/TaintTracking.qll
--- a/python/ql/lib/semmle/python/dataflow/new/TaintTracking2.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/TaintTracking2.qll
--- a/python/ql/lib/semmle/python/dataflow/new/TaintTracking3.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/TaintTracking3.qll
--- a/python/ql/lib/semmle/python/dataflow/new/TaintTracking4.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/TaintTracking4.qll
--- a/python/ql/lib/semmle/python/dataflow/new/TypeTracker.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/TypeTracker.qll
@@ -23,7 +23,7 @@ class OptionalAttributeName = Internal::OptionalContentName;
 * It is recommended that all uses of this type are written in the following form,
 * for tracking some type `myType`:
 * ```ql
- * DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
+ * DataFlow::TypeTrackingNode myType(DataFlow::TypeTracker t) {
 *   t.start() and
 *   result = < source of myType >
 *   or
--- a/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl.qll
@@ -943,13 +943,8 @@ private module Stage2 {
  bindingset[call, c, outercc]
  private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }

-  bindingset[call, c]
-  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
-
-  bindingset[innercc, inner, call]
-  private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
-    any()
-  }
+  bindingset[call, c, innercc]
+  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }

  bindingset[node, cc, config]
  private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1122,8 +1117,7 @@ private module Stage2 {
      fwdFlow(ret, innercc, argAp, ap, config) and
      flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
      inner = ret.getEnclosingCallable() and
-      checkCallContextReturn(innercc, inner, call) and
-      ccOut = getCallContextReturn(inner, call)
+      ccOut = getCallContextReturn(inner, call, innercc)
    |
      ap instanceof ApNil or allowsFieldFlow = true
    )
@@ -1615,13 +1609,8 @@ private module Stage3 {
  bindingset[call, c, outercc]
  private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }

-  bindingset[call, c]
-  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
-
-  bindingset[innercc, inner, call]
-  private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
-    any()
-  }
+  bindingset[call, c, innercc]
+  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }

  bindingset[node, cc, config]
  private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1816,8 +1805,7 @@ private module Stage3 {
      fwdFlow(ret, innercc, argAp, ap, config) and
      flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
      inner = ret.getEnclosingCallable() and
-      checkCallContextReturn(innercc, inner, call) and
-      ccOut = getCallContextReturn(inner, call)
+      ccOut = getCallContextReturn(inner, call, innercc)
    |
      ap instanceof ApNil or allowsFieldFlow = true
    )
@@ -2364,20 +2352,16 @@ private module Stage4 {

  bindingset[call, c, outercc]
  private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
-    c = resolveCall(call, outercc) and
+    checkCallContextCall(outercc, call, c) and
    if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall()
  }

-  bindingset[call, c]
-  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) {
+  bindingset[call, c, innercc]
+  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+    checkCallContextReturn(innercc, c, call) and
    if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
  }

-  bindingset[innercc, inner, call]
-  private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
-    resolveReturn(innercc, inner, call)
-  }
-
  bindingset[node, cc, config]
  private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
    localFlowEntry(node, config) and
@@ -2579,8 +2563,7 @@ private module Stage4 {
      fwdFlow(ret, innercc, argAp, ap, config) and
      flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
      inner = ret.getEnclosingCallable() and
-      checkCallContextReturn(innercc, inner, call) and
-      ccOut = getCallContextReturn(inner, call)
+      ccOut = getCallContextReturn(inner, call, innercc)
    |
      ap instanceof ApNil or allowsFieldFlow = true
    )
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl2.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl2.qll
@@ -943,13 +943,8 @@ private module Stage2 {
  bindingset[call, c, outercc]
  private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }

-  bindingset[call, c]
-  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
-
-  bindingset[innercc, inner, call]
-  private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
-    any()
-  }
+  bindingset[call, c, innercc]
+  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }

  bindingset[node, cc, config]
  private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1122,8 +1117,7 @@ private module Stage2 {
      fwdFlow(ret, innercc, argAp, ap, config) and
      flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
      inner = ret.getEnclosingCallable() and
-      checkCallContextReturn(innercc, inner, call) and
-      ccOut = getCallContextReturn(inner, call)
+      ccOut = getCallContextReturn(inner, call, innercc)
    |
      ap instanceof ApNil or allowsFieldFlow = true
    )
@@ -1615,13 +1609,8 @@ private module Stage3 {
  bindingset[call, c, outercc]
  private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }

-  bindingset[call, c]
-  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
-
-  bindingset[innercc, inner, call]
-  private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
-    any()
-  }
+  bindingset[call, c, innercc]
+  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }

  bindingset[node, cc, config]
  private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1816,8 +1805,7 @@ private module Stage3 {
      fwdFlow(ret, innercc, argAp, ap, config) and
      flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
      inner = ret.getEnclosingCallable() and
-      checkCallContextReturn(innercc, inner, call) and
-      ccOut = getCallContextReturn(inner, call)
+      ccOut = getCallContextReturn(inner, call, innercc)
    |
      ap instanceof ApNil or allowsFieldFlow = true
    )
@@ -2364,20 +2352,16 @@ private module Stage4 {

  bindingset[call, c, outercc]
  private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
-    c = resolveCall(call, outercc) and
+    checkCallContextCall(outercc, call, c) and
    if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall()
  }

-  bindingset[call, c]
-  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) {
+  bindingset[call, c, innercc]
+  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+    checkCallContextReturn(innercc, c, call) and
    if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
  }

-  bindingset[innercc, inner, call]
-  private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
-    resolveReturn(innercc, inner, call)
-  }
-
  bindingset[node, cc, config]
  private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
    localFlowEntry(node, config) and
@@ -2579,8 +2563,7 @@ private module Stage4 {
      fwdFlow(ret, innercc, argAp, ap, config) and
      flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
      inner = ret.getEnclosingCallable() and
-      checkCallContextReturn(innercc, inner, call) and
-      ccOut = getCallContextReturn(inner, call)
+      ccOut = getCallContextReturn(inner, call, innercc)
    |
      ap instanceof ApNil or allowsFieldFlow = true
    )
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl3.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl3.qll
@@ -943,13 +943,8 @@ private module Stage2 {
  bindingset[call, c, outercc]
  private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }

-  bindingset[call, c]
-  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
-
-  bindingset[innercc, inner, call]
-  private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
-    any()
-  }
+  bindingset[call, c, innercc]
+  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }

  bindingset[node, cc, config]
  private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1122,8 +1117,7 @@ private module Stage2 {
      fwdFlow(ret, innercc, argAp, ap, config) and
      flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
      inner = ret.getEnclosingCallable() and
-      checkCallContextReturn(innercc, inner, call) and
-      ccOut = getCallContextReturn(inner, call)
+      ccOut = getCallContextReturn(inner, call, innercc)
    |
      ap instanceof ApNil or allowsFieldFlow = true
    )
@@ -1615,13 +1609,8 @@ private module Stage3 {
  bindingset[call, c, outercc]
  private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }

-  bindingset[call, c]
-  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
-
-  bindingset[innercc, inner, call]
-  private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
-    any()
-  }
+  bindingset[call, c, innercc]
+  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }

  bindingset[node, cc, config]
  private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1816,8 +1805,7 @@ private module Stage3 {
      fwdFlow(ret, innercc, argAp, ap, config) and
      flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
      inner = ret.getEnclosingCallable() and
-      checkCallContextReturn(innercc, inner, call) and
-      ccOut = getCallContextReturn(inner, call)
+      ccOut = getCallContextReturn(inner, call, innercc)
    |
      ap instanceof ApNil or allowsFieldFlow = true
    )
@@ -2364,20 +2352,16 @@ private module Stage4 {

  bindingset[call, c, outercc]
  private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
-    c = resolveCall(call, outercc) and
+    checkCallContextCall(outercc, call, c) and
    if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall()
  }

-  bindingset[call, c]
-  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) {
+  bindingset[call, c, innercc]
+  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+    checkCallContextReturn(innercc, c, call) and
    if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
  }

-  bindingset[innercc, inner, call]
-  private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
-    resolveReturn(innercc, inner, call)
-  }
-
  bindingset[node, cc, config]
  private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
    localFlowEntry(node, config) and
@@ -2579,8 +2563,7 @@ private module Stage4 {
      fwdFlow(ret, innercc, argAp, ap, config) and
      flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
      inner = ret.getEnclosingCallable() and
-      checkCallContextReturn(innercc, inner, call) and
-      ccOut = getCallContextReturn(inner, call)
+      ccOut = getCallContextReturn(inner, call, innercc)
    |
      ap instanceof ApNil or allowsFieldFlow = true
    )
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl4.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl4.qll
@@ -943,13 +943,8 @@ private module Stage2 {
  bindingset[call, c, outercc]
  private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }

-  bindingset[call, c]
-  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
-
-  bindingset[innercc, inner, call]
-  private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
-    any()
-  }
+  bindingset[call, c, innercc]
+  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }

  bindingset[node, cc, config]
  private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1122,8 +1117,7 @@ private module Stage2 {
      fwdFlow(ret, innercc, argAp, ap, config) and
      flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
      inner = ret.getEnclosingCallable() and
-      checkCallContextReturn(innercc, inner, call) and
-      ccOut = getCallContextReturn(inner, call)
+      ccOut = getCallContextReturn(inner, call, innercc)
    |
      ap instanceof ApNil or allowsFieldFlow = true
    )
@@ -1615,13 +1609,8 @@ private module Stage3 {
  bindingset[call, c, outercc]
  private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() }

-  bindingset[call, c]
-  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() }
-
-  bindingset[innercc, inner, call]
-  private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
-    any()
-  }
+  bindingset[call, c, innercc]
+  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() }

  bindingset[node, cc, config]
  private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) { any() }
@@ -1816,8 +1805,7 @@ private module Stage3 {
      fwdFlow(ret, innercc, argAp, ap, config) and
      flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
      inner = ret.getEnclosingCallable() and
-      checkCallContextReturn(innercc, inner, call) and
-      ccOut = getCallContextReturn(inner, call)
+      ccOut = getCallContextReturn(inner, call, innercc)
    |
      ap instanceof ApNil or allowsFieldFlow = true
    )
@@ -2364,20 +2352,16 @@ private module Stage4 {

  bindingset[call, c, outercc]
  private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) {
-    c = resolveCall(call, outercc) and
+    checkCallContextCall(outercc, call, c) and
    if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall()
  }

-  bindingset[call, c]
-  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) {
+  bindingset[call, c, innercc]
+  private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) {
+    checkCallContextReturn(innercc, c, call) and
    if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone()
  }

-  bindingset[innercc, inner, call]
-  private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) {
-    resolveReturn(innercc, inner, call)
-  }
-
  bindingset[node, cc, config]
  private LocalCc getLocalCc(NodeEx node, Cc cc, Configuration config) {
    localFlowEntry(node, config) and
@@ -2579,8 +2563,7 @@ private module Stage4 {
      fwdFlow(ret, innercc, argAp, ap, config) and
      flowOutOfCall(call, ret, out, allowsFieldFlow, config) and
      inner = ret.getEnclosingCallable() and
-      checkCallContextReturn(innercc, inner, call) and
-      ccOut = getCallContextReturn(inner, call)
+      ccOut = getCallContextReturn(inner, call, innercc)
    |
      ap instanceof ApNil or allowsFieldFlow = true
    )
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplCommon.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplCommon.qll
@@ -1117,6 +1117,44 @@ ReturnPosition getReturnPosition(ReturnNodeExt ret) {
  result = getReturnPosition0(ret, ret.getKind())
 }

+/**
+ * Checks whether `inner` can return to `call` in the call context `innercc`.
+ * Assumes a context of `inner = viableCallableExt(call)`.
+ */
+bindingset[innercc, inner, call]
+predicate checkCallContextReturn(CallContext innercc, DataFlowCallable inner, DataFlowCall call) {
+  innercc instanceof CallContextAny
+  or
+  exists(DataFlowCallable c0, DataFlowCall call0 |
+    callEnclosingCallable(call0, inner) and
+    innercc = TReturn(c0, call0) and
+    c0 = prunedViableImplInCallContextReverse(call0, call)
+  )
+}
+
+/**
+ * Checks whether `call` can resolve to `calltarget` in the call context `cc`.
+ * Assumes a context of `calltarget = viableCallableExt(call)`.
+ */
+bindingset[cc, call, calltarget]
+predicate checkCallContextCall(CallContext cc, DataFlowCall call, DataFlowCallable calltarget) {
+  exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
+    if reducedViableImplInCallContext(call, _, ctx)
+    then calltarget = prunedViableImplInCallContext(call, ctx)
+    else any()
+  )
+  or
+  cc instanceof CallContextSomeCall
+  or
+  cc instanceof CallContextAny
+  or
+  cc instanceof CallContextReturn
+}
+
+/**
+ * Resolves a return from `callable` in `cc` to `call`. This is equivalent to
+ * `callable = viableCallableExt(call) and checkCallContextReturn(cc, callable, call)`.
+ */
 bindingset[cc, callable]
 predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) {
  cc instanceof CallContextAny and callable = viableCallableExt(call)
@@ -1128,6 +1166,10 @@ predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall
  )
 }

+/**
+ * Resolves a call from `call` in `cc` to `result`. This is equivalent to
+ * `result = viableCallableExt(call) and checkCallContextCall(cc, call, result)`.
+ */
 bindingset[call, cc]
 DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
  exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplConsistency.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplSpecific.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplSpecific.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowUtil.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowUtil.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll
@@ -36,8 +36,13 @@ class LocalSourceNode extends Node {
    this instanceof ExprNode and
    not simpleLocalFlowStep(_, this)
    or
-    // Module variable nodes must be local source nodes, otherwise type trackers cannot step through
-    // them.
+    // We include all module variable nodes, as these act as stepping stones between writes and
+    // reads of global variables. Without them, type tracking based on `LocalSourceNode`s would be
+    // unable to track across global variables.
+    //
+    // Once the `track` and `backtrack` methods have been fully deprecated, this disjunct can be
+    // removed, and the entire class can extend `ExprNode`. At that point, `TypeTrackingNode` should
+    // be used for type tracking instead of `LocalSourceNode`.
    this instanceof ModuleVariableNode
    or
    // We explicitly include any read of a global variable, as some of these may have local flow going
@@ -115,6 +120,53 @@ class LocalSourceNode extends Node {
  LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
 }

+/**
+ * A node that can be used for type tracking or type back-tracking.
+ *
+ * All steps made during type tracking should be between instances of this class.
+ */
+class TypeTrackingNode = LocalSourceNode;
+
+/** Temporary holding ground for the `TypeTrackingNode` class. */
+private module FutureWork {
+  class FutureTypeTrackingNode extends Node {
+    FutureTypeTrackingNode() {
+      this instanceof LocalSourceNode
+      or
+      this instanceof ModuleVariableNode
+    }
+
+    /**
+     * Holds if this node can flow to `nodeTo` in one or more local flow steps.
+     *
+     * For `ModuleVariableNode`s, the only "local" step is to the node itself.
+     * For `LocalSourceNode`s, this is the usual notion of local flow.
+     */
+    pragma[inline]
+    predicate flowsTo(Node node) {
+      this instanceof ModuleVariableNode and this = node
+      or
+      this.(LocalSourceNode).flowsTo(node)
+    }
+
+    /**
+     * Gets a node that this node may flow to using one heap and/or interprocedural step.
+     *
+     * See `TypeTracker` for more details about how to use this.
+     */
+    pragma[inline]
+    TypeTrackingNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
+
+    /**
+     * Gets a node that may flow into this one using one heap and/or interprocedural step.
+     *
+     * See `TypeBackTracker` for more details about how to use this.
+     */
+    pragma[inline]
+    TypeTrackingNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
+  }
+}
+
 cached
 private module Cached {
  /**
@@ -127,11 +179,21 @@ private module Cached {
    source = sink
    or
    exists(Node second |
-      simpleLocalFlowStep(source, second) and
-      simpleLocalFlowStep*(second, sink)
+      localSourceFlowStep(source, second) and
+      localSourceFlowStep*(second, sink)
    )
  }

+  /**
+   * Helper predicate for `hasLocalSource`. Removes any steps go to module variable reads, as these
+   * are already local source nodes in their own right.
+   */
+  cached
+  private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) {
+    simpleLocalFlowStep(nodeFrom, nodeTo) and
+    not nodeTo = any(ModuleVariableNode v).getARead()
+  }
+
  /**
   * Holds if `base` flows to the base of `ref` and `ref` has attribute name `attr`.
   */
--- a/python/ql/lib/semmle/python/dataflow/new/internal/PrintNode.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/PrintNode.qll
@@ -1,6 +1,20 @@
-import python
-import semmle.python.dataflow.new.DataFlow
+/**
+ * INTERNAL: Do not use.
+ *
+ * Provides helper predicates for pretty-printing `DataFlow::Node`s.
+ *
+ * Since these have not been performance optimized, please only use them for
+ * debug-queries or in tests.
+ */

+private import python
+private import semmle.python.dataflow.new.DataFlow
+
+/**
+ * INTERNAL: Do not use.
+ *
+ * Gets the pretty-printed version of the Expr `e`.
+ */
 string prettyExpr(Expr e) {
  not e instanceof Num and
  not e instanceof StrConst and
@@ -27,7 +41,9 @@ string prettyExpr(Expr e) {
 }

 /**
- * Gets pretty-printed version of the DataFlow::Node `node`
+ * INTERNAL: Do not use.
+ *
+ * Gets the pretty-printed version of the DataFlow::Node `node`
 */
 bindingset[node]
 string prettyNode(DataFlow::Node node) {
@@ -35,7 +51,9 @@ string prettyNode(DataFlow::Node node) {
 }

 /**
- * Gets pretty-printed version of the DataFlow::Node `node`, that is suitable for use
+ * INTERNAL: Do not use.
+ *
+ * Gets the pretty-printed version of the DataFlow::Node `node`, that is suitable for use
 * with `TestUtilities.InlineExpectationsTest` (that is, no spaces unless required).
 */
 bindingset[node]
--- a/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll
@@ -46,9 +46,13 @@ private module Cached {
    or
    copyStep(nodeFrom, nodeTo)
    or
-    forStep(nodeFrom, nodeTo)
+    DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo)
    or
-    unpackingAssignmentStep(nodeFrom, nodeTo)
+    DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo)
+    or
+    DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
+    or
+    awaitStep(nodeFrom, nodeTo)
  }
 }

@@ -201,26 +205,9 @@ predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
 }

 /**
- * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
- * for example `for x in xs`, or `for x,y in points`.
+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with an `await`-step,
+ * such that the whole expression `await x` is tainted if `x` is tainted.
 */
-predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
-  exists(EssaNodeDefinition defn, For for |
-    for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
-    nodeTo.getVar() = defn and
-    nodeFrom.asExpr() = for.getIter()
-  )
-}
-
-/**
- * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
- * Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
- */
-predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
-  // `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
-  exists(MultiAssignmentDefinition defn, Assign assign |
-    assign.getATarget().contains(defn.getDefiningNode().getNode()) and
-    nodeTo.getVar() = defn and
-    nodeFrom.asExpr() = assign.getValue()
-  )
+predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+  nodeTo.asExpr().(Await).getValue() = nodeFrom.asExpr()
 }
--- a/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPublic.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll
@@ -59,7 +59,7 @@ private module Cached {
   * Steps contained in this predicate should _not_ depend on the call graph.
   */
  cached
-  predicate stepNoCall(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+  predicate stepNoCall(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
    exists(Node mid | nodeFrom.flowsTo(mid) and smallstepNoCall(mid, nodeTo, summary))
  }

@@ -68,7 +68,7 @@ private module Cached {
   * inter-procedural step from `nodeFrom` to `nodeTo`.
   */
  cached
-  predicate stepCall(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+  predicate stepCall(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
    exists(Node mid | nodeFrom.flowsTo(mid) and smallstepCall(mid, nodeTo, summary))
  }
 }
@@ -96,7 +96,7 @@ class StepSummary extends TStepSummary {
 }

 pragma[noinline]
-private predicate smallstepNoCall(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+private predicate smallstepNoCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
  jumpStep(nodeFrom, nodeTo) and
  summary = LevelStep()
  or
@@ -109,7 +109,7 @@ private predicate smallstepNoCall(Node nodeFrom, LocalSourceNode nodeTo, StepSum
 }

 pragma[noinline]
-private predicate smallstepCall(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+private predicate smallstepCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
  callStep(nodeFrom, nodeTo) and summary = CallStep()
  or
  returnStep(nodeFrom, nodeTo) and
@@ -129,7 +129,7 @@ module StepSummary {
   * call graph.
   */
  pragma[inline]
-  predicate step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+  predicate step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
    stepNoCall(nodeFrom, nodeTo, summary)
    or
    stepCall(nodeFrom, nodeTo, summary)
@@ -143,7 +143,7 @@ module StepSummary {
   * type-preserving steps.
   */
  pragma[inline]
-  predicate smallstep(Node nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
+  predicate smallstep(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
    smallstepNoCall(nodeFrom, nodeTo, summary)
    or
    smallstepCall(nodeFrom, nodeTo, summary)
@@ -174,7 +174,7 @@ module StepSummary {
   * function. This means we will track the fact that `x.attr` can have the type of `y` into the
   * assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
   */
-  predicate localSourceStoreStep(Node nodeFrom, LocalSourceNode nodeTo, string content) {
+  predicate localSourceStoreStep(Node nodeFrom, TypeTrackingNode nodeTo, string content) {
    exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
  }
 }
@@ -192,7 +192,7 @@ private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentNam
 * It is recommended that all uses of this type are written in the following form,
 * for tracking some type `myType`:
 * ```ql
- * DataFlow::LocalSourceNode myType(DataFlow::TypeTracker t) {
+ * DataFlow::TypeTrackingNode myType(DataFlow::TypeTracker t) {
 *   t.start() and
 *   result = < source of myType >
 *   or
@@ -275,7 +275,7 @@ class TypeTracker extends TTypeTracker {
   * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
   */
  pragma[inline]
-  TypeTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
+  TypeTracker step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo) {
    exists(StepSummary summary |
      StepSummary::step(nodeFrom, pragma[only_bind_out](nodeTo), pragma[only_bind_into](summary)) and
      result = this.append(pragma[only_bind_into](summary))
@@ -342,7 +342,7 @@ private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, Optional
 * for back-tracking some callback type `myCallback`:
 *
 * ```ql
- * DataFlow::LocalSourceNode myCallback(DataFlow::TypeBackTracker t) {
+ * DataFlow::TypeTrackingNode myCallback(DataFlow::TypeBackTracker t) {
 *   t.start() and
 *   result = (< some API call >).getArgument(< n >).getALocalSource()
 *   or
@@ -351,7 +351,7 @@ private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, Optional
 *   )
 * }
 *
- * DataFlow::LocalSourceNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
+ * DataFlow::TypeTrackingNode myCallback() { result = myCallback(DataFlow::TypeBackTracker::end()) }
 * ```
 *
 * Instead of `result = myCallback(t2).backtrack(t2, t)`, you can also use the equivalent
@@ -418,7 +418,7 @@ class TypeBackTracker extends TTypeBackTracker {
   * heap and/or inter-procedural step from `nodeTo` to `nodeFrom`.
   */
  pragma[inline]
-  TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
+  TypeBackTracker step(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo) {
    exists(StepSummary summary |
      StepSummary::step(pragma[only_bind_out](nodeFrom), nodeTo, pragma[only_bind_into](summary)) and
      this = result.prepend(pragma[only_bind_into](summary))
@@ -431,7 +431,7 @@ class TypeBackTracker extends TTypeBackTracker {
   *
   * Unlike `TypeBackTracker::step`, this predicate exposes all edges
   * in the flowgraph, and not just the edges between
-   * `LocalSourceNode`s. It may therefore be less performant.
+   * `TypeTrackingNode`s. It may therefore be less performant.
   *
   * Type tracking predicates using small steps typically take the following form:
   * ```ql
--- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
@@ -8,7 +8,7 @@ private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPr

 class Node = DataFlowPublic::Node;

-class LocalSourceNode = DataFlowPublic::LocalSourceNode;
+class TypeTrackingNode = DataFlowPublic::TypeTrackingNode;

 predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStep/2;

--- a/python/ql/lib/semmle/python/dataflow/new/internal/readme.md
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/readme.md
--- a/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking1/TaintTrackingImpl.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking1/TaintTrackingImpl.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking1/TaintTrackingParameter.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking1/TaintTrackingParameter.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking2/TaintTrackingImpl.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking2/TaintTrackingImpl.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking2/TaintTrackingParameter.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking2/TaintTrackingParameter.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking3/TaintTrackingImpl.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking3/TaintTrackingImpl.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking3/TaintTrackingParameter.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking3/TaintTrackingParameter.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking4/TaintTrackingImpl.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking4/TaintTrackingImpl.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking4/TaintTrackingParameter.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/tainttracking4/TaintTrackingParameter.qll
--- a/python/ql/lib/semmle/python/dataflow/old/Configuration.qll
+++ b/python/ql/lib/semmle/python/dataflow/old/Configuration.qll
--- a/python/ql/lib/semmle/python/dataflow/old/DataFlow.qll
+++ b/python/ql/lib/semmle/python/dataflow/old/DataFlow.qll
--- a/python/ql/lib/semmle/python/dataflow/old/Files.qll
+++ b/python/ql/lib/semmle/python/dataflow/old/Files.qll
--- a/python/ql/lib/semmle/python/dataflow/old/Implementation.qll
+++ b/python/ql/lib/semmle/python/dataflow/old/Implementation.qll
--- a/python/ql/lib/semmle/python/dataflow/old/Legacy.qll
+++ b/python/ql/lib/semmle/python/dataflow/old/Legacy.qll
--- a/python/ql/lib/semmle/python/dataflow/old/StateTracking.qll
+++ b/python/ql/lib/semmle/python/dataflow/old/StateTracking.qll
--- a/python/ql/lib/semmle/python/dataflow/old/TaintTracking.qll
+++ b/python/ql/lib/semmle/python/dataflow/old/TaintTracking.qll
--- a/python/ql/lib/semmle/python/dependencies/Dependencies.qll
+++ b/python/ql/lib/semmle/python/dependencies/Dependencies.qll
--- a/python/ql/lib/semmle/python/dependencies/DependencyKind.qll
+++ b/python/ql/lib/semmle/python/dependencies/DependencyKind.qll
--- a/Show More
+++ b/Show More