Merge branch 'main' into import-refined

2026-03-06 15:49:08 +01:00 · 2023-02-27 17:00:48 +01:00
parent 11000fd123 927c322b7b
commit be7d6689b8
781 changed files with 34418 additions and 4274 deletions
--- a/python/ql/lib/CHANGELOG.md
+++ b/python/ql/lib/CHANGELOG.md
@@ -1,3 +1,16 @@
+## 0.8.0
+
+### Breaking Changes
+
+- Python 2 is no longer supported for extracting databases using the CodeQL CLI. As a consequence,
+  the previously deprecated support for `pyxl` and `spitfire` templates has also been removed. When
+  extracting Python 2 code, having Python 2 installed is still recommended, as this ensures the
+  correct version of the Python standard library is extracted.
+
+### Minor Analysis Improvements
+
+* Fixed module resolution so we properly recognize that in `from <pkg> import *`, where `<pkg>` is a package, the actual imports are made from the `<pkg>/__init__.py` file.
+
 ## 0.7.2

 No user-facing changes.
--- a/python/ql/lib/change-notes/2023-01-16-new-call-graph.md
+++ b/python/ql/lib/change-notes/2023-01-16-new-call-graph.md
@@ -0,0 +1,4 @@
+---
+category: majorAnalysis
+---
+* We use a new analysis for the call-graph (determining which function is called). This can lead to changed results. In most cases this is much more accurate than the old call-graph that was based on points-to, but we do lose a few valid edges in the call-graph, especially around methods that are not defined inside its' class.
--- a/python/ql/lib/change-notes/2023-02-13-hmac-modeling.md
+++ b/python/ql/lib/change-notes/2023-02-13-hmac-modeling.md
@@ -0,0 +1,4 @@
+---
+category: minorAnalysis
+---
+* Added modeling of cryptographic operations in the `hmac` library.
--- a/python/ql/lib/change-notes/2023-02-15-import-star-package.md
+++ b/python/ql/lib/change-notes/2023-02-15-import-star-package.md
@@ -1,4 +0,0 @@
---
-category: minorAnalysis
---
-* Fixed module resolution so we properly recognize that in `from <pkg> import *`, where `<pkg>` is a package, the actual imports are made from the `<pkg>/__init__.py` file.
--- a/python/ql/lib/change-notes/2023-02-14-python-2-no-longer-supported.md
+++ b/python/ql/lib/change-notes/2023-02-14-python-2-no-longer-supported.md
@@ -1,7 +1,12 @@
---
-category: breaking
---
+## 0.8.0
+
+### Breaking Changes
+
 - Python 2 is no longer supported for extracting databases using the CodeQL CLI. As a consequence,
  the previously deprecated support for `pyxl` and `spitfire` templates has also been removed. When
  extracting Python 2 code, having Python 2 installed is still recommended, as this ensures the
  correct version of the Python standard library is extracted.
+
+### Minor Analysis Improvements
+
+* Fixed module resolution so we properly recognize that in `from <pkg> import *`, where `<pkg>` is a package, the actual imports are made from the `<pkg>/__init__.py` file.
--- a/python/ql/lib/codeql-pack.release.yml
+++ b/python/ql/lib/codeql-pack.release.yml
@@ -1,2 +1,2 @@
 ---
-lastReleaseVersion: 0.7.2
+lastReleaseVersion: 0.8.0
--- a/python/ql/lib/qlpack.yml
+++ b/python/ql/lib/qlpack.yml
@@ -1,5 +1,5 @@
 name: codeql/python-all
-version: 0.7.3-dev
+version: 0.8.1-dev
 groups: python
 dbscheme: semmlecode.python.dbscheme
 extractor: python
--- a/python/ql/lib/semmle/python/Flow.qll
+++ b/python/ql/lib/semmle/python/Flow.qll
@@ -125,7 +125,7 @@ class ControlFlowNode extends @py_flow_node {
  /** Gets a textual representation of this element. */
  cached
  string toString() {
-    Stages::DataFlow::ref() and
+    Stages::AST::ref() and
    exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
    or
    exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
@@ -411,6 +411,12 @@ class CallNode extends ControlFlowNode {
    result.getNode() = this.getNode().getStarArg() and
    result.getBasicBlock().dominates(this.getBasicBlock())
  }
+
+  /** Gets a dictionary (**) argument of this call, if any. */
+  ControlFlowNode getKwargs() {
+    result.getNode() = this.getNode().getKwargs() and
+    result.getBasicBlock().dominates(this.getBasicBlock())
+  }
 }

 /** A control flow corresponding to an attribute expression, such as `value.attr` */
--- a/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll
+++ b/python/ql/lib/semmle/python/concepts/CryptoAlgorithms.qll
@@ -26,6 +26,26 @@ private newtype TCryptographicAlgorithm =
    isWeakPasswordHashingAlgorithm(name) and isWeak = true
  }

+/**
+ * Gets the most specific `CryptographicAlgorithm` that matches the given `name`.
+ * A matching algorithm is one where the name of the algorithm matches the start of name, with allowances made for different name formats.
+ * In the case that multiple `CryptographicAlgorithm`s match the given `name`, the algorithm(s) with the longest name will be selected. This is intended to select more specific versions of algorithms when multiple versions could match - for example "SHA3_224" matches against both "SHA3" and "SHA3224", but the latter is a more precise match.
+ */
+bindingset[name]
+private CryptographicAlgorithm getBestAlgorithmForName(string name) {
+  result =
+    max(CryptographicAlgorithm algorithm |
+      algorithm.getName() =
+        [
+          name.toUpperCase(), // the full name
+          name.toUpperCase().regexpCapture("^([\\w]+)(?:-.*)?$", 1), // the name prior to any dashes or spaces
+          name.toUpperCase().regexpCapture("^([A-Z0-9]+)(?:(-|_).*)?$", 1) // the name prior to any dashes, spaces, or underscores
+        ].regexpReplaceAll("[-_ ]", "") // strip dashes, underscores, and spaces
+    |
+      algorithm order by algorithm.getName().length()
+    )
+}
+
 /**
 * A cryptographic algorithm.
 */
@@ -39,15 +59,11 @@ abstract class CryptographicAlgorithm extends TCryptographicAlgorithm {
  abstract string getName();

  /**
-   * Holds if the name of this algorithm matches `name` modulo case,
-   * white space, dashes, underscores, and anything after a dash in the name
-   * (to ignore modes of operation, such as CBC or ECB).
+   * Holds if the name of this algorithm is the most specific match for `name`.
+   * This predicate matches quite liberally to account for different ways of formatting algorithm names, e.g. using dashes, underscores, or spaces as separators, including or not including block modes of operation, etc.
   */
  bindingset[name]
-  predicate matchesName(string name) {
-    [name.toUpperCase(), name.toUpperCase().regexpCapture("^(\\w+)(?:-.*)?$", 1)]
-        .regexpReplaceAll("[-_ ]", "") = getName()
-  }
+  predicate matchesName(string name) { this = getBestAlgorithmForName(name) }

  /**
   * Holds if this algorithm is weak.
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatchPointsTo.qll
@@ -1,838 +0,0 @@
-/**
- * INTERNAL: Do not use.
- *
- * Points-to based call-graph.
- */
-
-private import python
-private import DataFlowPublic
-private import semmle.python.SpecialMethods
-private import FlowSummaryImpl as FlowSummaryImpl
-
-/** A parameter position represented by an integer. */
-class ParameterPosition extends int {
-  ParameterPosition() { exists(any(DataFlowCallable c).getParameter(this)) }
-
-  /** Holds if this position represents a positional parameter at position `pos`. */
-  predicate isPositional(int pos) { this = pos } // with the current representation, all parameters are positional
-}
-
-/** An argument position represented by an integer. */
-class ArgumentPosition extends int {
-  ArgumentPosition() { this in [-2, -1] or exists(any(Call c).getArg(this)) }
-
-  /** Holds if this position represents a positional argument at position `pos`. */
-  predicate isPositional(int pos) { this = pos } // with the current representation, all arguments are positional
-}
-
-/** Holds if arguments at position `apos` match parameters at position `ppos`. */
-pragma[inline]
-predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos }
-
-/**
- * Computes routing of arguments to parameters
- *
- * When a call contains more positional arguments than there are positional parameters,
- * the extra positional arguments are passed as a tuple to a starred parameter. This is
- * achieved by synthesizing a node `TPosOverflowNode(call, callable)`
- * that represents the tuple of extra positional arguments. There is a store step from each
- * extra positional argument to this node.
- *
- * CURRENTLY NOT SUPPORTED:
- * When a call contains an iterable unpacking argument, such as `func(*args)`, it is expanded into positional arguments.
- *
- * CURRENTLY NOT SUPPORTED:
- * If a call contains an iterable unpacking argument, such as `func(*args)`, and the callee contains a starred argument, any extra
- * positional arguments are passed to the starred argument.
- *
- * When a call contains keyword arguments that do not correspond to keyword parameters, these
- * extra keyword arguments are passed as a dictionary to a doubly starred parameter. This is
- * achieved by synthesizing a node `TKwOverflowNode(call, callable)`
- * that represents the dictionary of extra keyword arguments. There is a store step from each
- * extra keyword argument to this node.
- *
- * When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, with entries corresponding to a keyword parameter,
- * the value at such a key is unpacked and passed to the parameter. This is achieved
- * by synthesizing an argument node `TKwUnpacked(call, callable, name)` representing the unpacked
- * value. This node is used as the argument passed to the matching keyword parameter. There is a read
- * step from the dictionary argument to the synthesized argument node.
- *
- * When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, and the callee contains a doubly starred parameter,
- * entries which are not unpacked are passed to the doubly starred parameter. This is achieved by
- * adding a dataflow step from the dictionary argument to `TKwOverflowNode(call, callable)` and a
- * step to clear content of that node at any unpacked keys.
- *
- * ## Examples:
- * Assume that we have the callable
- * ```python
- * def f(x, y, *t, **d):
- *   pass
- * ```
- * Then the call
- * ```python
- * f(0, 1, 2, a=3)
- * ```
- * will be modeled as
- * ```python
- * f(0, 1, [*t], [**d])
- * ```
- * where `[` and `]` denotes synthesized nodes, so `[*t]` is the synthesized tuple argument
- * `TPosOverflowNode` and `[**d]` is the synthesized dictionary argument `TKwOverflowNode`.
- * There will be a store step from `2` to `[*t]` at pos `0` and one from `3` to `[**d]` at key
- * `a`.
- *
- * For the call
- * ```python
- * f(0, **{"y": 1, "a": 3})
- * ```
- * no tuple argument is synthesized. It is modeled as
- * ```python
- * f(0, [y=1], [**d])
- * ```
- * where `[y=1]` is the synthesized unpacked argument `TKwUnpacked` (with `name` = `y`). There is
- * a read step from `**{"y": 1, "a": 3}` to `[y=1]` at key `y` to get the value passed to the parameter
- * `y`. There is a dataflow step from `**{"y": 1, "a": 3}` to `[**d]` to transfer the content and
- * a clearing of content at key `y` for node `[**d]`, since that value has been unpacked.
- */
-module ArgumentPassing {
-  /**
-   * Holds if `call` represents a `DataFlowCall` to a `DataFlowCallable` represented by `callable`.
-   *
-   * It _may not_ be the case that `call = callable.getACall()`, i.e. if `call` represents a `ClassCall`.
-   *
-   * Used to limit the size of predicates.
-   */
-  predicate connects(CallNode call, CallableValue callable) {
-    exists(NormalCall c |
-      call = c.getNode() and
-      callable = c.getCallable().getCallableValue()
-    )
-  }
-
-  /**
-   * Gets the `n`th parameter of `callable`.
-   * If the callable has a starred parameter, say `*tuple`, that is matched with `n=-1`.
-   * If the callable has a doubly starred parameter, say `**dict`, that is matched with `n=-2`.
-   * Note that, unlike other languages, we do _not_ use -1 for the position of `self` in Python,
-   * as it is an explicit parameter at position 0.
-   */
-  NameNode getParameter(CallableValue callable, int n) {
-    // positional parameter
-    result = callable.getParameter(n)
-    or
-    // starred parameter, `*tuple`
-    exists(Function f |
-      f = callable.getScope() and
-      n = -1 and
-      result = f.getVararg().getAFlowNode()
-    )
-    or
-    // doubly starred parameter, `**dict`
-    exists(Function f |
-      f = callable.getScope() and
-      n = -2 and
-      result = f.getKwarg().getAFlowNode()
-    )
-  }
-
-  /**
-   * A type representing a mapping from argument indices to parameter indices.
-   * We currently use two mappings: NoShift, the identity, used for ordinary
-   * function calls, and ShiftOneUp which is used for calls where an extra argument
-   * is inserted. These include method calls, constructor calls and class calls.
-   * In these calls, the argument at index `n` is mapped to the parameter at position `n+1`.
-   */
-  newtype TArgParamMapping =
-    TNoShift() or
-    TShiftOneUp()
-
-  /** A mapping used for parameter passing. */
-  abstract class ArgParamMapping extends TArgParamMapping {
-    /** Gets the index of the parameter that corresponds to the argument at index `argN`. */
-    bindingset[argN]
-    abstract int getParamN(int argN);
-
-    /** Gets a textual representation of this element. */
-    abstract string toString();
-  }
-
-  /** A mapping that passes argument `n` to parameter `n`. */
-  class NoShift extends ArgParamMapping, TNoShift {
-    NoShift() { this = TNoShift() }
-
-    override string toString() { result = "NoShift [n -> n]" }
-
-    bindingset[argN]
-    override int getParamN(int argN) { result = argN }
-  }
-
-  /** A mapping that passes argument `n` to parameter `n+1`. */
-  class ShiftOneUp extends ArgParamMapping, TShiftOneUp {
-    ShiftOneUp() { this = TShiftOneUp() }
-
-    override string toString() { result = "ShiftOneUp [n -> n+1]" }
-
-    bindingset[argN]
-    override int getParamN(int argN) { result = argN + 1 }
-  }
-
-  /**
-   * Gets the node representing the argument to `call` that is passed to the parameter at
-   * (zero-based) index `paramN` in `callable`. If this is a positional argument, it must appear
-   * at an index, `argN`, in `call` which satisfies `paramN = mapping.getParamN(argN)`.
-   *
-   * `mapping` will be the identity for function calls, but not for method- or constructor calls,
-   * where the first parameter is `self` and the first positional argument is passed to the second positional parameter.
-   * Similarly for classmethod calls, where the first parameter is `cls`.
-   *
-   * NOT SUPPORTED: Keyword-only parameters.
-   */
-  Node getArg(CallNode call, ArgParamMapping mapping, CallableValue callable, int paramN) {
-    connects(call, callable) and
-    (
-      // positional argument
-      exists(int argN |
-        paramN = mapping.getParamN(argN) and
-        result = TCfgNode(call.getArg(argN))
-      )
-      or
-      // keyword argument
-      // TODO: Since `getArgName` have no results for keyword-only parameters,
-      // these are currently not supported.
-      exists(Function f, string argName |
-        f = callable.getScope() and
-        f.getArgName(paramN) = argName and
-        result = TCfgNode(call.getArgByName(unbind_string(argName)))
-      )
-      or
-      // a synthesized argument passed to the starred parameter (at position -1)
-      callable.getScope().hasVarArg() and
-      paramN = -1 and
-      result = TPosOverflowNode(call, callable)
-      or
-      // a synthesized argument passed to the doubly starred parameter (at position -2)
-      callable.getScope().hasKwArg() and
-      paramN = -2 and
-      result = TKwOverflowNode(call, callable)
-      or
-      // argument unpacked from dict
-      exists(string name |
-        call_unpacks(call, mapping, callable, name, paramN) and
-        result = TKwUnpackedNode(call, callable, name)
-      )
-    )
-  }
-
-  /** Currently required in `getArg` in order to prevent a bad join. */
-  bindingset[result, s]
-  private string unbind_string(string s) { result <= s and s <= result }
-
-  /** Gets the control flow node that is passed as the `n`th overflow positional argument. */
-  ControlFlowNode getPositionalOverflowArg(CallNode call, CallableValue callable, int n) {
-    connects(call, callable) and
-    exists(Function f, int posCount, int argNr |
-      f = callable.getScope() and
-      f.hasVarArg() and
-      posCount = f.getPositionalParameterCount() and
-      result = call.getArg(argNr) and
-      argNr >= posCount and
-      argNr = posCount + n
-    )
-  }
-
-  /** Gets the control flow node that is passed as the overflow keyword argument with key `key`. */
-  ControlFlowNode getKeywordOverflowArg(CallNode call, CallableValue callable, string key) {
-    connects(call, callable) and
-    exists(Function f |
-      f = callable.getScope() and
-      f.hasKwArg() and
-      not exists(f.getArgByName(key)) and
-      result = call.getArgByName(key)
-    )
-  }
-
-  /**
-   * Holds if `call` unpacks a dictionary argument in order to pass it via `name`.
-   * It will then be passed to the parameter of `callable` at index `paramN`.
-   */
-  predicate call_unpacks(
-    CallNode call, ArgParamMapping mapping, CallableValue callable, string name, int paramN
-  ) {
-    connects(call, callable) and
-    exists(Function f |
-      f = callable.getScope() and
-      not exists(int argN | paramN = mapping.getParamN(argN) | exists(call.getArg(argN))) and // no positional argument available
-      name = f.getArgName(paramN) and
-      // not exists(call.getArgByName(name)) and // only matches keyword arguments not preceded by **
-      // TODO: make the below logic respect control flow splitting (by not going to the AST).
-      not call.getNode().getANamedArg().(Keyword).getArg() = name and // no keyword argument available
-      paramN >= 0 and
-      paramN < f.getPositionalParameterCount() + f.getKeywordOnlyParameterCount() and
-      exists(call.getNode().getKwargs()) // dict argument available
-    )
-  }
-}
-
-import ArgumentPassing
-
-/** A callable defined in library code, identified by a unique string. */
-abstract class LibraryCallable extends string {
-  bindingset[this]
-  LibraryCallable() { any() }
-
-  /** Gets a call to this library callable. */
-  abstract CallCfgNode getACall();
-
-  /** Gets a data-flow node, where this library callable is used as a call-back. */
-  abstract ArgumentNode getACallback();
-}
-
-/**
- * IPA type for DataFlowCallable.
- *
- * A callable is either a function value, a class value, or a module (for enclosing `ModuleVariableNode`s).
- * A module has no calls.
- */
-newtype TDataFlowCallable =
-  TCallableValue(CallableValue callable) {
-    callable instanceof FunctionValue and
-    not callable.(FunctionValue).isLambda()
-    or
-    callable instanceof ClassValue
-  } or
-  TLambda(Function lambda) { lambda.isLambda() } or
-  TModule(Module m) or
-  TLibraryCallable(LibraryCallable callable)
-
-/** A callable. */
-class DataFlowCallable extends TDataFlowCallable {
-  /** Gets a textual representation of this element. */
-  string toString() { result = "DataFlowCallable" }
-
-  /** Gets a call to this callable. */
-  CallNode getACall() { none() }
-
-  /** Gets the scope of this callable */
-  Scope getScope() { none() }
-
-  /** Gets the specified parameter of this callable */
-  NameNode getParameter(int n) { none() }
-
-  /** Gets the name of this callable. */
-  string getName() { none() }
-
-  /** Gets a callable value for this callable, if any. */
-  CallableValue getCallableValue() { none() }
-
-  /** Gets the underlying library callable, if any. */
-  LibraryCallable asLibraryCallable() { this = TLibraryCallable(result) }
-
-  Location getLocation() { none() }
-}
-
-/** A class representing a callable value. */
-class DataFlowCallableValue extends DataFlowCallable, TCallableValue {
-  CallableValue callable;
-
-  DataFlowCallableValue() { this = TCallableValue(callable) }
-
-  override string toString() { result = callable.toString() }
-
-  override CallNode getACall() { result = callable.getACall() }
-
-  override Scope getScope() { result = callable.getScope() }
-
-  override NameNode getParameter(int n) { result = getParameter(callable, n) }
-
-  override string getName() { result = callable.getName() }
-
-  override CallableValue getCallableValue() { result = callable }
-}
-
-/** A class representing a callable lambda. */
-class DataFlowLambda extends DataFlowCallable, TLambda {
-  Function lambda;
-
-  DataFlowLambda() { this = TLambda(lambda) }
-
-  override string toString() { result = lambda.toString() }
-
-  override CallNode getACall() { result = this.getCallableValue().getACall() }
-
-  override Scope getScope() { result = lambda.getEvaluatingScope() }
-
-  override NameNode getParameter(int n) { result = getParameter(this.getCallableValue(), n) }
-
-  override string getName() { result = "Lambda callable" }
-
-  override FunctionValue getCallableValue() {
-    result.getOrigin().getNode() = lambda.getDefinition()
-  }
-
-  Expr getDefinition() { result = lambda.getDefinition() }
-}
-
-/** A class representing the scope in which a `ModuleVariableNode` appears. */
-class DataFlowModuleScope extends DataFlowCallable, TModule {
-  Module mod;
-
-  DataFlowModuleScope() { this = TModule(mod) }
-
-  override string toString() { result = mod.toString() }
-
-  override CallNode getACall() { none() }
-
-  override Scope getScope() { result = mod }
-
-  override NameNode getParameter(int n) { none() }
-
-  override string getName() { result = mod.getName() }
-
-  override CallableValue getCallableValue() { none() }
-}
-
-class LibraryCallableValue extends DataFlowCallable, TLibraryCallable {
-  LibraryCallable callable;
-
-  LibraryCallableValue() { this = TLibraryCallable(callable) }
-
-  override string toString() { result = callable.toString() }
-
-  override CallNode getACall() { result = callable.getACall().getNode() }
-
-  /** Gets a data-flow node, where this library callable is used as a call-back. */
-  ArgumentNode getACallback() { result = callable.getACallback() }
-
-  override Scope getScope() { none() }
-
-  override NameNode getParameter(int n) { none() }
-
-  override string getName() { result = callable }
-
-  override LibraryCallable asLibraryCallable() { result = callable }
-}
-
-/**
- * IPA type for DataFlowCall.
- *
- * Calls corresponding to `CallNode`s are either to callable values or to classes.
- * The latter is directed to the callable corresponding to the `__init__` method of the class.
- *
- * An `__init__` method can also be called directly, so that the callable can be targeted by
- * different types of calls. In that case, the parameter mappings will be different,
- * as the class call will synthesize an argument node to be mapped to the `self` parameter.
- *
- * A call corresponding to a special method call is handled by the corresponding `SpecialMethodCallNode`.
- *
- * TODO: Add `TClassMethodCall` mapping `cls` appropriately.
- */
-newtype TDataFlowCall =
-  /**
-   * Includes function calls, method calls, class calls and library calls.
-   * All these will be associated with a `CallNode`.
-   */
-  TNormalCall(CallNode call) or
-  /**
-   * Includes calls to special methods.
-   * These will be associated with a `SpecialMethodCallNode`.
-   */
-  TSpecialCall(SpecialMethodCallNode special) or
-  /** A synthesized call inside a summarized callable */
-  TSummaryCall(FlowSummaryImpl::Public::SummarizedCallable c, Node receiver) {
-    FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
-  }
-
-/** A call found in the program source (as opposed to a synthesised summary call). */
-class TExtractedDataFlowCall = TSpecialCall or TNormalCall;
-
-/** A call that is taken into account by the global data flow computation. */
-abstract class DataFlowCall extends TDataFlowCall {
-  /** Gets a textual representation of this element. */
-  abstract string toString();
-
-  /** Get the callable to which this call goes, if such exists. */
-  abstract DataFlowCallable getCallable();
-
-  /**
-   * Gets the argument to this call that will be sent
-   * to the `n`th parameter of the callable, if any.
-   */
-  abstract Node getArg(int n);
-
-  /** Get the control flow node representing this call, if any. */
-  abstract ControlFlowNode getNode();
-
-  /** Gets the enclosing callable of this call. */
-  abstract DataFlowCallable getEnclosingCallable();
-
-  /** Gets the location of this dataflow call. */
-  abstract Location getLocation();
-
-  /**
-   * Holds if this element is at the specified location.
-   * The location spans column `startcolumn` of line `startline` to
-   * column `endcolumn` of line `endline` in file `filepath`.
-   * For more information, see
-   * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
-   */
-  predicate hasLocationInfo(
-    string filepath, int startline, int startcolumn, int endline, int endcolumn
-  ) {
-    this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
-  }
-}
-
-/** A call found in the program source (as opposed to a synthesised call). */
-abstract class ExtractedDataFlowCall extends DataFlowCall, TExtractedDataFlowCall {
-  final override Location getLocation() { result = this.getNode().getLocation() }
-
-  abstract override DataFlowCallable getCallable();
-
-  abstract override Node getArg(int n);
-
-  abstract override ControlFlowNode getNode();
-}
-
-/** A call associated with a `CallNode`. */
-class NormalCall extends ExtractedDataFlowCall, TNormalCall {
-  CallNode call;
-
-  NormalCall() { this = TNormalCall(call) }
-
-  override string toString() { result = call.toString() }
-
-  abstract override Node getArg(int n);
-
-  override CallNode getNode() { result = call }
-
-  abstract override DataFlowCallable getCallable();
-
-  override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getNode().getScope() }
-}
-
-/**
- * A call to a function.
- * This excludes calls to bound methods, classes, and special methods.
- * Bound method calls and class calls insert an argument for the explicit
- * `self` parameter, and special method calls have special argument passing.
- */
-class FunctionCall extends NormalCall {
-  DataFlowCallableValue callable;
-
-  FunctionCall() {
-    call = any(FunctionValue f).getAFunctionCall() and
-    call = callable.getACall()
-  }
-
-  override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
-
-  override DataFlowCallable getCallable() { result = callable }
-}
-
-/** A call to a lambda. */
-class LambdaCall extends NormalCall {
-  DataFlowLambda callable;
-
-  LambdaCall() {
-    call = callable.getACall() and
-    callable = TLambda(any(Function f))
-  }
-
-  override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }
-
-  override DataFlowCallable getCallable() { result = callable }
-}
-
-/**
- * Represents a call to a bound method call.
- * The node representing the instance is inserted as argument to the `self` parameter.
- */
-class MethodCall extends NormalCall {
-  FunctionValue bm;
-
-  MethodCall() { call = bm.getAMethodCall() }
-
-  private CallableValue getCallableValue() { result = bm }
-
-  override Node getArg(int n) {
-    n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
-    or
-    n = 0 and result = TCfgNode(call.getFunction().(AttrNode).getObject())
-  }
-
-  override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
-}
-
-/**
- * Represents a call to a class.
- * The pre-update node for the call is inserted as argument to the `self` parameter.
- * That makes the call node be the post-update node holding the value of the object
- * after the constructor has run.
- */
-class ClassCall extends NormalCall {
-  ClassValue c;
-
-  ClassCall() {
-    not c.isAbsent() and
-    call = c.getACall()
-  }
-
-  private CallableValue getCallableValue() { c.getScope().getInitMethod() = result.getScope() }
-
-  override Node getArg(int n) {
-    n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
-    or
-    n = 0 and result = TSyntheticPreUpdateNode(TCfgNode(call))
-  }
-
-  override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }
-}
-
-/** A call to a special method. */
-class SpecialCall extends ExtractedDataFlowCall, TSpecialCall {
-  SpecialMethodCallNode special;
-
-  SpecialCall() { this = TSpecialCall(special) }
-
-  override string toString() { result = special.toString() }
-
-  override Node getArg(int n) { result = TCfgNode(special.(SpecialMethod::Potential).getArg(n)) }
-
-  override ControlFlowNode getNode() { result = special }
-
-  override DataFlowCallable getCallable() {
-    result = TCallableValue(special.getResolvedSpecialMethod())
-  }
-
-  override DataFlowCallable getEnclosingCallable() {
-    result.getScope() = special.getNode().getScope()
-  }
-}
-
-/**
- * A call to a summarized callable, a `LibraryCallable`.
- *
- * We currently exclude all resolved calls. This means that a call to, say, `map`, which
- * is a `ClassCall`, cannot currently be given a summary.
- * We hope to lift this restriction in the future and include all potential calls to summaries
- * in this class.
- */
-class LibraryCall extends NormalCall {
-  LibraryCall() {
-    // TODO: share this with `resolvedCall`
-    not (
-      call = any(DataFlowCallableValue cv).getACall()
-      or
-      call = any(DataFlowLambda l).getACall()
-      or
-      // TODO: this should be covered by `DataFlowCallableValue`, but a `ClassValue` is not a `CallableValue`.
-      call = any(ClassValue c).getACall()
-    )
-  }
-
-  // TODO: Implement Python calling convention?
-  override Node getArg(int n) { result = TCfgNode(call.getArg(n)) }
-
-  // We cannot refer to a `LibraryCallable` here,
-  // as that could in turn refer to type tracking.
-  // This call will be tied to a `LibraryCallable` via
-  // `getViableCallabe` when the global data flow is assembled.
-  override DataFlowCallable getCallable() { none() }
-}
-
-/**
- * A synthesized call inside a callable with a flow summary.
- *
- * For example, in
- * ```python
- * map(lambda x: x + 1, [1, 2, 3])
- * ```
- *
- * there is a synthesized call to the lambda argument inside `map`.
- */
-class SummaryCall extends DataFlowCall, TSummaryCall {
-  private FlowSummaryImpl::Public::SummarizedCallable c;
-  private Node receiver;
-
-  SummaryCall() { this = TSummaryCall(c, receiver) }
-
-  /** Gets the data flow node that this call targets. */
-  Node getReceiver() { result = receiver }
-
-  override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
-
-  override DataFlowCallable getCallable() { none() }
-
-  override Node getArg(int n) { none() }
-
-  override ControlFlowNode getNode() { none() }
-
-  override string toString() { result = "[summary] call to " + receiver + " in " + c }
-
-  override Location getLocation() { none() }
-}
-
-/**
- * The value of a parameter at function entry, viewed as a node in a data
- * flow graph.
- */
-abstract class ParameterNodeImpl extends Node {
-  abstract Parameter getParameter();
-
-  /**
-   * Holds if this node is the parameter of callable `c` at the
-   * (zero-based) index `i`.
-   */
-  abstract predicate isParameterOf(DataFlowCallable c, int i);
-}
-
-/** A parameter for a library callable with a flow summary. */
-class SummaryParameterNode extends ParameterNodeImpl, TSummaryParameterNode {
-  private FlowSummaryImpl::Public::SummarizedCallable sc;
-  private int pos;
-
-  SummaryParameterNode() { this = TSummaryParameterNode(sc, pos) }
-
-  override Parameter getParameter() { none() }
-
-  override predicate isParameterOf(DataFlowCallable c, int i) {
-    sc = c.asLibraryCallable() and i = pos
-  }
-
-  override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = sc }
-
-  override string toString() { result = "parameter " + pos + " of " + sc }
-
-  // Hack to return "empty location"
-  override predicate hasLocationInfo(
-    string file, int startline, int startcolumn, int endline, int endcolumn
-  ) {
-    file = "" and
-    startline = 0 and
-    startcolumn = 0 and
-    endline = 0 and
-    endcolumn = 0
-  }
-}
-
-/** A data-flow node used to model flow summaries. */
-class SummaryNode extends Node, TSummaryNode {
-  private FlowSummaryImpl::Public::SummarizedCallable c;
-  private FlowSummaryImpl::Private::SummaryNodeState state;
-
-  SummaryNode() { this = TSummaryNode(c, state) }
-
-  override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c }
-
-  override string toString() { result = "[summary] " + state + " in " + c }
-
-  // Hack to return "empty location"
-  override predicate hasLocationInfo(
-    string file, int startline, int startcolumn, int endline, int endcolumn
-  ) {
-    file = "" and
-    startline = 0 and
-    startcolumn = 0 and
-    endline = 0 and
-    endcolumn = 0
-  }
-}
-
-private class SummaryReturnNode extends SummaryNode, ReturnNode {
-  private ReturnKind rk;
-
-  SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this, rk) }
-
-  override ReturnKind getKind() { result = rk }
-}
-
-private class SummaryArgumentNode extends SummaryNode, ArgumentNode {
-  SummaryArgumentNode() { FlowSummaryImpl::Private::summaryArgumentNode(_, this, _) }
-
-  override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
-    FlowSummaryImpl::Private::summaryArgumentNode(call, this, pos)
-  }
-}
-
-private class SummaryPostUpdateNode extends SummaryNode, PostUpdateNode {
-  private Node pre;
-
-  SummaryPostUpdateNode() { FlowSummaryImpl::Private::summaryPostUpdateNode(this, pre) }
-
-  override Node getPreUpdateNode() { result = pre }
-}
-
-/** Gets a viable run-time target for the call `call`. */
-DataFlowCallable viableCallable(ExtractedDataFlowCall call) {
-  result = call.getCallable()
-  or
-  // A call to a library callable with a flow summary
-  // In this situation we can not resolve the callable from the call,
-  // as that would make data flow depend on type tracking.
-  // Instead we resolve the call from the summary.
-  exists(LibraryCallable callable |
-    result = TLibraryCallable(callable) and
-    call.getNode() = callable.getACall().getNode()
-  )
-}
-
-private newtype TReturnKind = TNormalReturnKind()
-
-/**
- * A return kind. A return kind describes how a value can be returned
- * from a callable. For Python, this is simply a method return.
- */
-class ReturnKind extends TReturnKind {
-  /** Gets a textual representation of this element. */
-  string toString() { result = "return" }
-}
-
-/** A data flow node that represents a value returned by a callable. */
-abstract class ReturnNode extends Node {
-  /** Gets the kind of this return node. */
-  ReturnKind getKind() { any() }
-}
-
-/** A data flow node that represents a value returned by a callable. */
-class ExtractedReturnNode extends ReturnNode, CfgNode {
-  // See `TaintTrackingImplementation::returnFlowStep`
-  ExtractedReturnNode() { node = any(Return ret).getValue().getAFlowNode() }
-
-  override ReturnKind getKind() { any() }
-}
-
-/** A data-flow node that represents the output of a call. */
-abstract class OutNode extends Node {
-  /** Gets the underlying call, where this node is a corresponding output of kind `kind`. */
-  abstract DataFlowCall getCall(ReturnKind kind);
-}
-
-private module OutNodes {
-  /**
-   * A data-flow node that reads a value returned directly by a callable.
-   */
-  class ExprOutNode extends OutNode, ExprNode {
-    private DataFlowCall call;
-
-    ExprOutNode() { call.(ExtractedDataFlowCall).getNode() = this.getNode() }
-
-    override DataFlowCall getCall(ReturnKind kind) {
-      result = call and
-      kind = kind
-    }
-  }
-
-  private class SummaryOutNode extends SummaryNode, OutNode {
-    SummaryOutNode() { FlowSummaryImpl::Private::summaryOutNode(_, this, _) }
-
-    override DataFlowCall getCall(ReturnKind kind) {
-      FlowSummaryImpl::Private::summaryOutNode(result, this, kind)
-    }
-  }
-}
-
-/**
- * Gets a node that can read the value returned from `call` with return kind
- * `kind`.
- */
-OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) }
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -16,7 +16,7 @@ private import semmle.python.Frameworks
 // make it more digestible.
 import MatchUnpacking
 import IterableUnpacking
-import DataFlowDispatchPointsTo
+import DataFlowDispatch

 /** Gets the callable in which this node occurs. */
 DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
@@ -39,162 +39,267 @@ predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos)
 //--------
 predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }

-/** DEPRECATED: Alias for `SyntheticPreUpdateNode` */
-deprecated module syntheticPreUpdateNode = SyntheticPreUpdateNode;
+// =============================================================================
+// SyntheticPreUpdateNode
+// =============================================================================
+class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
+  CallNode node;

-/** A module collecting the different reasons for synthesising a pre-update node. */
-module SyntheticPreUpdateNode {
-  class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
-    NeedsSyntheticPreUpdateNode post;
+  SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(node) }

-    SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(post) }
+  /** Gets the node for which this is a synthetic pre-update node. */
+  CfgNode getPostUpdateNode() { result.getNode() = node }

-    /** Gets the node for which this is a synthetic pre-update node. */
-    Node getPostUpdateNode() { result = post }
+  override string toString() { result = "[pre] " + node.toString() }

-    override string toString() { result = "[pre " + post.label() + "] " + post.toString() }
+  override Scope getScope() { result = node.getScope() }

-    override Scope getScope() { result = post.getScope() }
-
-    override Location getLocation() { result = post.getLocation() }
-  }
-
-  /** A data flow node for which we should synthesise an associated pre-update node. */
-  class NeedsSyntheticPreUpdateNode extends PostUpdateNode {
-    NeedsSyntheticPreUpdateNode() { this = objectCreationNode() }
-
-    override Node getPreUpdateNode() { result.(SyntheticPreUpdateNode).getPostUpdateNode() = this }
-
-    /**
-     * Gets the label for this kind of node. This will figure in the textual representation of the synthesized pre-update node.
-     *
-     * There is currently only one reason for needing a pre-update node, so we always use that as the label.
-     */
-    string label() { result = "objCreate" }
-  }
-
-  /**
-   * Calls to constructors are treated as post-update nodes for the synthesized argument
-   * that is mapped to the `self` parameter. That way, constructor calls represent the value of the
-   * object after the constructor (currently only `__init__`) has run.
-   */
-  CfgNode objectCreationNode() { result.getNode() = any(ClassCall c).getNode() }
+  override Location getLocation() { result = node.getLocation() }
 }

-import SyntheticPreUpdateNode
+// =============================================================================
+// *args (StarArgs) related
+// =============================================================================
+/**
+ * A (synthetic) data-flow parameter node to capture all positional arguments that
+ * should be passed to the `*args` parameter.
+ *
+ * To handle
+ * ```py
+ * def func(*args):
+ *     for arg in args:
+ *         sink(arg)
+ *
+ * func(source1, source2, ...)
+ * ```
+ *
+ * we add a synthetic parameter to `func` that accepts any positional argument at (or
+ * after) the index for the `*args` parameter. We add a store step (at any list index) to the real
+ * `*args` parameter. This means we can handle the code above, but if the code had done `sink(args[0])`
+ * we would (wrongly) add flow for `source2` as well.
+ *
+ * To solve this more precisely, we could add a synthetic argument with position `*args`
+ * that had store steps with the correct index (like we do for mapping keyword arguments to a
+ * `**kwargs` parameter). However, if a single call could go to 2 different
+ * targets with `*args` parameters at different positions, as in the example below, it's unclear what
+ * index to store `2` at. For the `foo` callable it should be 1, for the `bar` callable it should be 0.
+ * So this information would need to be encoded in the arguments of a `ArgumentPosition` branch, and
+ * one of the arguments would be which callable is the target. However, we cannot build `ArgumentPosition`
+ * branches based on the call-graph, so this strategy doesn't work.
+ *
+ * Another approach to solving it precisely is to add multiple synthetic parameters that have store steps
+ * to the real `*args` parameter. So for the example below, `foo` would need to have synthetic parameter
+ * nodes for indexes 1 and 2 (which would have store step for index 0 and 1 of the `*args` parameter),
+ * and `bar` would need it for indexes 1, 2, and 3. The question becomes how many synthetic parameters to
+ * create, which _must_ be `max(Call call, int i | exists(call.getArg(i)))`, since (again) we can't base
+ * this on the call-graph. And each function with a `*args` parameter would need this many extra synthetic
+ * nodes. My gut feeling at that this simple approach will be good enough, but if we need to get it more
+ * precise, it should be possible to do it like this.
+ *
+ * In PR review, @yoff suggested an alternative approach for more precise handling:
+ *
+ * - At the call site, all positional arguments are stored into a synthetic starArgs argument, always tarting at index 0
+ * - This is sent to a synthetic star parameter
+ * - At the receiving end, we know the offset of a potential real star parameter, so we can define read steps accordingly: In foo, we read from the synthetic star parameter at index 1 and store to the real star parameter at index 0.
+ *
+ * ```py
+ * def foo(one, *args): ...
+ * def bar(*args): ...
+ *
+ * func = foo if <cond> else bar
+ * func(1, 2, 3)
+ */
+class SynthStarArgsElementParameterNode extends ParameterNodeImpl,
+  TSynthStarArgsElementParameterNode {
+  DataFlowCallable callable;

-/** DEPRECATED: Alias for `SyntheticPostUpdateNode` */
-deprecated module syntheticPostUpdateNode = SyntheticPostUpdateNode;
+  SynthStarArgsElementParameterNode() { this = TSynthStarArgsElementParameterNode(callable) }

-/** A module collecting the different reasons for synthesising a post-update node. */
-module SyntheticPostUpdateNode {
-  private import semmle.python.SpecialMethods
+  override string toString() { result = "SynthStarArgsElementParameterNode" }

-  /** A post-update node is synthesized for all nodes which satisfy `NeedsSyntheticPostUpdateNode`. */
-  class SyntheticPostUpdateNode extends PostUpdateNode, TSyntheticPostUpdateNode {
-    NeedsSyntheticPostUpdateNode pre;
+  override Scope getScope() { result = callable.getScope() }

-    SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(pre) }
+  override Location getLocation() { result = callable.getLocation() }

-    override Node getPreUpdateNode() { result = pre }
-
-    override string toString() { result = "[post " + pre.label() + "] " + pre.toString() }
-
-    override Scope getScope() { result = pre.getScope() }
-
-    override Location getLocation() { result = pre.getLocation() }
-  }
-
-  /** A data flow node for which we should synthesise an associated post-update node. */
-  class NeedsSyntheticPostUpdateNode extends Node {
-    NeedsSyntheticPostUpdateNode() {
-      this = argumentPreUpdateNode()
-      or
-      this = storePreUpdateNode()
-      or
-      this = readPreUpdateNode()
-    }
-
-    /**
-     * Gets the label for this kind of node. This will figure in the textual representation of the synthesized post-update node.
-     * We favour being an arguments as the reason for the post-update node in case multiple reasons apply.
-     */
-    string label() {
-      if this = argumentPreUpdateNode()
-      then result = "arg"
-      else
-        if this = storePreUpdateNode()
-        then result = "store"
-        else result = "read"
-    }
-  }
-
-  /**
-   * Gets the pre-update node for this node.
-   *
-   * An argument might have its value changed as a result of a call.
-   * Certain arguments, such as implicit self arguments are already post-update nodes
-   * and should not have an extra node synthesised.
-   */
-  Node argumentPreUpdateNode() {
-    result = any(FunctionCall c).getArg(_)
-    or
-    result = any(LambdaCall c).getArg(_)
-    or
-    // Avoid argument 0 of method calls as those have read post-update nodes.
-    exists(MethodCall c, int n | n > 0 | result = c.getArg(n))
-    or
-    result = any(SpecialCall c).getArg(_)
-    or
-    // Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
-    exists(ClassCall c, int n | n > 0 | result = c.getArg(n))
-    or
-    // any argument of any call that we have not been able to resolve
-    exists(CallNode call | not resolvedCall(call) |
-      result.(CfgNode).getNode() in [call.getArg(_), call.getArgByName(_)]
-    )
-  }
-
-  /** Holds if `call` can be resolved as a normal call */
-  private predicate resolvedCall(CallNode call) {
-    call = any(DataFlowCallableValue cv).getACall()
-    or
-    call = any(DataFlowLambda l).getACall()
-  }
-
-  /** Gets the pre-update node associated with a store. This is used for when an object might have its value changed after a store. */
-  CfgNode storePreUpdateNode() {
-    exists(Attribute a |
-      result.getNode() = a.getObject().getAFlowNode() and
-      a.getCtx() instanceof Store
-    )
-  }
-
-  /**
-   * Gets a node marking the state change of an object after a read.
-   *
-   * A reverse read happens when the result of a read is modified, e.g. in
-   * ```python
-   * l = [ mutable ]
-   * l[0].mutate()
-   * ```
-   * we may now have changed the content of `l`. To track this, there must be
-   * a postupdate node for `l`.
-   */
-  CfgNode readPreUpdateNode() {
-    exists(Attribute a |
-      result.getNode() = a.getObject().getAFlowNode() and
-      a.getCtx() instanceof Load
-    )
-    or
-    result.getNode() = any(SubscriptNode s).getObject()
-    or
-    // The dictionary argument is read from if the callable has parameters matching the keys.
-    result.getNode().getNode() = any(Call call).getKwargs()
-  }
+  override Parameter getParameter() { none() }
 }

-import SyntheticPostUpdateNode
+predicate synthStarArgsElementParameterNodeStoreStep(
+  SynthStarArgsElementParameterNode nodeFrom, ListElementContent c, ParameterNode nodeTo
+) {
+  c = c and // suppress warning about unused parameter
+  exists(DataFlowCallable callable, ParameterPosition ppos |
+    nodeFrom = TSynthStarArgsElementParameterNode(callable) and
+    nodeTo = callable.getParameter(ppos) and
+    ppos.isStarArgs(_)
+  )
+}
+
+// =============================================================================
+// **kwargs (DictSplat) related
+// =============================================================================
+/**
+ * A (synthetic) data-flow node that represents all keyword arguments, as if they had
+ * been passed in a `**kwargs` argument.
+ */
+class SynthDictSplatArgumentNode extends Node, TSynthDictSplatArgumentNode {
+  CallNode node;
+
+  SynthDictSplatArgumentNode() { this = TSynthDictSplatArgumentNode(node) }
+
+  override string toString() { result = "SynthDictSplatArgumentNode" }
+
+  override Scope getScope() { result = node.getScope() }
+
+  override Location getLocation() { result = node.getLocation() }
+}
+
+private predicate synthDictSplatArgumentNodeStoreStep(
+  ArgumentNode nodeFrom, DictionaryElementContent c, SynthDictSplatArgumentNode nodeTo
+) {
+  exists(string name, CallNode call, ArgumentPosition keywordPos |
+    nodeTo = TSynthDictSplatArgumentNode(call) and
+    getCallArg(call, _, _, nodeFrom, keywordPos) and
+    keywordPos.isKeyword(name) and
+    c.getKey() = name
+  )
+}
+
+/**
+ * Ensures that the a `**kwargs` parameter will not contain elements with names of
+ * keyword parameters.
+ *
+ * For example, for the function below, it's not possible that the `kwargs` dictionary
+ * can contain an element with the name `a`, since that parameter can be given as a
+ * keyword argument.
+ *
+ * ```py
+ * def func(a, **kwargs):
+ *     ...
+ * ```
+ */
+private predicate dictSplatParameterNodeClearStep(ParameterNode n, DictionaryElementContent c) {
+  exists(DataFlowCallable callable, ParameterPosition dictSplatPos, ParameterPosition keywordPos |
+    dictSplatPos.isDictSplat() and
+    (
+      n.getParameter() = callable.(DataFlowFunction).getScope().getKwarg()
+      or
+      n = TSummaryParameterNode(callable.asLibraryCallable(), dictSplatPos)
+    ) and
+    exists(callable.getParameter(keywordPos)) and
+    keywordPos.isKeyword(c.getKey())
+  )
+}
+
+/**
+ * A synthetic data-flow node to allow flow to keyword parameters from a `**kwargs` argument.
+ *
+ * Take the code snippet below as an example. Since the call only has a `**kwargs` argument,
+ * with a `**` argument position, we add this synthetic parameter node with `**` parameter position,
+ * and a read step to the `p1` parameter.
+ *
+ * ```py
+ * def foo(p1, p2): ...
+ *
+ * kwargs = {"p1": 42, "p2": 43}
+ * foo(**kwargs)
+ * ```
+ *
+ *
+ * Note that this will introduce a bit of redundancy in cases like
+ *
+ * ```py
+ * foo(p1=taint(1), p2=taint(2))
+ * ```
+ *
+ * where direct keyword matching is possible, since we construct a synthesized dict
+ * splat argument (`SynthDictSplatArgumentNode`) at the call site, which means that
+ * `taint(1)` will flow into `p1` both via normal keyword matching and via the synthesized
+ * nodes (and similarly for `p2`). However, this redundancy is OK since
+ *  (a) it means that type-tracking through keyword arguments also works in most cases,
+ *  (b) read/store steps can be avoided when direct keyword matching is possible, and
+ *      hence access path limits are not a concern, and
+ *  (c) since the synthesized nodes are hidden, the reported data-flow paths will be
+ *      collapsed anyway.
+ */
+class SynthDictSplatParameterNode extends ParameterNodeImpl, TSynthDictSplatParameterNode {
+  DataFlowCallable callable;
+
+  SynthDictSplatParameterNode() { this = TSynthDictSplatParameterNode(callable) }
+
+  override string toString() { result = "SynthDictSplatParameterNode" }
+
+  override Scope getScope() { result = callable.getScope() }
+
+  override Location getLocation() { result = callable.getLocation() }
+
+  override Parameter getParameter() { none() }
+}
+
+/**
+ * Flow step from the synthetic `**kwargs` parameter to the real `**kwargs` parameter.
+ * Due to restriction in dataflow library, we can only give one of them as result for
+ * `DataFlowCallable.getParameter`, so this is a workaround to ensure there is flow to
+ * _both_ of them.
+ */
+private predicate dictSplatParameterNodeFlowStep(
+  ParameterNodeImpl nodeFrom, ParameterNodeImpl nodeTo
+) {
+  exists(DataFlowCallable callable |
+    nodeFrom = TSynthDictSplatParameterNode(callable) and
+    (
+      nodeTo.getParameter() = callable.(DataFlowFunction).getScope().getKwarg()
+      or
+      exists(ParameterPosition pos |
+        nodeTo = TSummaryParameterNode(callable.asLibraryCallable(), pos) and
+        pos.isDictSplat()
+      )
+    )
+  )
+}
+
+/**
+ * Reads from the synthetic **kwargs parameter to each keyword parameter.
+ */
+predicate synthDictSplatParameterNodeReadStep(
+  SynthDictSplatParameterNode nodeFrom, DictionaryElementContent c, ParameterNode nodeTo
+) {
+  exists(DataFlowCallable callable, ParameterPosition ppos |
+    nodeFrom = TSynthDictSplatParameterNode(callable) and
+    nodeTo = callable.getParameter(ppos) and
+    ppos.isKeyword(c.getKey())
+  )
+}
+
+// =============================================================================
+// PostUpdateNode
+// =============================================================================
+abstract class PostUpdateNodeImpl extends Node {
+  /** Gets the node before the state update. */
+  abstract Node getPreUpdateNode();
+}
+
+class SyntheticPostUpdateNode extends PostUpdateNodeImpl, TSyntheticPostUpdateNode {
+  ControlFlowNode node;
+
+  SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(node) }
+
+  override Node getPreUpdateNode() { result.(CfgNode).getNode() = node }
+
+  override string toString() { result = "[post] " + node.toString() }
+
+  override Scope getScope() { result = node.getScope() }
+
+  override Location getLocation() { result = node.getLocation() }
+}
+
+class NonSyntheticPostUpdateNode extends PostUpdateNodeImpl, CfgNode {
+  SyntheticPreUpdateNode pre;
+
+  NonSyntheticPostUpdateNode() { this = pre.getPostUpdateNode() }
+
+  override Node getPreUpdateNode() { result = pre }
+}

 class DataFlowExpr = Expr;

@@ -274,13 +379,6 @@ module EssaFlow {
    iterableUnpackingFlowStep(nodeFrom, nodeTo)
    or
    matchFlowStep(nodeFrom, nodeTo)
-    or
-    // Overflow keyword argument
-    exists(CallNode call, CallableValue callable |
-      call = callable.getACall() and
-      nodeTo = TKwOverflowNode(call, callable) and
-      nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode()
-    )
  }

  predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
@@ -305,6 +403,8 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
  simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo)
  or
  summaryFlowSteps(nodeFrom, nodeTo)
+  or
+  dictSplatParameterNodeFlowStep(nodeFrom, nodeTo)
 }

 /**
@@ -521,15 +621,15 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
  or
  attributeStoreStep(nodeFrom, c, nodeTo)
  or
-  posOverflowStoreStep(nodeFrom, c, nodeTo)
-  or
-  kwOverflowStoreStep(nodeFrom, c, nodeTo)
-  or
  matchStoreStep(nodeFrom, c, nodeTo)
  or
  any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
  or
  FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom, c, nodeTo)
+  or
+  synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
+  or
+  synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
 }

 /**
@@ -669,30 +769,6 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, PostUpdateNode n
  )
 }

-/**
- * Holds if `nodeFrom` flows into the synthesized positional overflow argument (`nodeTo`)
- * at the position indicated by `c`.
- */
-predicate posOverflowStoreStep(CfgNode nodeFrom, TupleElementContent c, Node nodeTo) {
-  exists(CallNode call, CallableValue callable, int n |
-    nodeFrom.asCfgNode() = getPositionalOverflowArg(call, callable, n) and
-    nodeTo = TPosOverflowNode(call, callable) and
-    c.getIndex() = n
-  )
-}
-
-/**
- * Holds if `nodeFrom` flows into the synthesized keyword overflow argument (`nodeTo`)
- * at the key indicated by `c`.
- */
-predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
-  exists(CallNode call, CallableValue callable, string key |
-    nodeFrom.asCfgNode() = getKeywordOverflowArg(call, callable, key) and
-    nodeTo = TKwOverflowNode(call, callable) and
-    c.getKey() = key
-  )
-}
-
 predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
  exists(Function f, Parameter p, ParameterDefinition def |
    // `getArgByName` supports, unlike `getAnArg`, keyword-only parameters
@@ -722,9 +798,9 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
  or
  attributeReadStep(nodeFrom, c, nodeTo)
  or
-  kwUnpackReadStep(nodeFrom, c, nodeTo)
-  or
  FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom, c, nodeTo)
+  or
+  synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
 }

 /** Data flows from a sequence to a subscript of the sequence. */
@@ -814,43 +890,19 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
  nodeTo.accesses(nodeFrom, c.getAttribute())
 }

-/**
- * Holds if `nodeFrom` is a dictionary argument being unpacked and `nodeTo` is the
- * synthesized unpacked argument with the name indicated by `c`.
- */
-predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
-  exists(CallNode call, string name |
-    nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode() and
-    nodeTo = TKwUnpackedNode(call, _, name) and
-    name = c.getKey()
-  )
-}
-
-/**
- * Clear content at key `name` of the synthesized dictionary `TKwOverflowNode(call, callable)`,
- * whenever `call` unpacks `name`.
- */
-predicate kwOverflowClearStep(Node n, Content c) {
-  exists(CallNode call, CallableValue callable, string name |
-    call_unpacks(call, _, callable, name, _) and
-    n = TKwOverflowNode(call, callable) and
-    c.(DictionaryElementContent).getKey() = name
-  )
-}
-
 /**
 * Holds if values stored inside content `c` are cleared at node `n`. For example,
 * any value stored inside `f` is cleared at the pre-update node associated with `x`
 * in `x.f = newValue`.
 */
 predicate clearsContent(Node n, Content c) {
-  kwOverflowClearStep(n, c)
-  or
  matchClearStep(n, c)
  or
  attributeClearStep(n, c)
  or
  FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
+  or
+  dictSplatParameterNodeClearStep(n, c)
 }

 /**
@@ -906,23 +958,24 @@ predicate nodeIsHidden(Node n) {
  n instanceof SummaryNode
  or
  n instanceof SummaryParameterNode
+  or
+  n instanceof SynthStarArgsElementParameterNode
+  or
+  n instanceof SynthDictSplatArgumentNode
+  or
+  n instanceof SynthDictSplatParameterNode
 }

 class LambdaCallKind = Unit;

 /** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
 predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) {
-  // lambda
+  // lambda and plain functions
  kind = kind and
-  creation.asExpr() = c.(DataFlowLambda).getDefinition()
-  or
-  // normal function
-  exists(FunctionDef def |
-    def.defines(creation.asVar().getSourceVariable()) and
-    def.getDefinedFunction() = c.(DataFlowCallableValue).getCallableValue().getScope()
-  )
+  creation.asExpr() = c.(DataFlowPlainFunction).getScope().getDefinition()
  or
  // summarized function
+  exists(kind) and // avoid warning on unused 'kind'
  exists(Call call |
    creation.asExpr() = call.getAnArg() and
    creation = c.(LibraryCallableValue).getACallback()
--- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -31,10 +31,44 @@ newtype TNode =
    or
    node.getNode() instanceof Pattern
  } or
-  /** A synthetic node representing the value of an object before a state change */
-  TSyntheticPreUpdateNode(NeedsSyntheticPreUpdateNode post) or
-  /** A synthetic node representing the value of an object after a state change. */
-  TSyntheticPostUpdateNode(NeedsSyntheticPostUpdateNode pre) or
+  /**
+   * A synthetic node representing the value of an object before a state change.
+   *
+   * For class calls we pass a synthetic self argument, so attribute writes in
+   * `__init__` is reflected on the resulting object (we need special logic for this
+   * since there is no `return` in `__init__`)
+   */
+  // NOTE: since we can't rely on the call graph, but we want to have synthetic
+  // pre-update nodes for class calls, we end up getting synthetic pre-update nodes for
+  // ALL calls :|
+  TSyntheticPreUpdateNode(CallNode call) or
+  /**
+   * A synthetic node representing the value of an object after a state change.
+   * See QLDoc for `PostUpdateNode`.
+   */
+  TSyntheticPostUpdateNode(ControlFlowNode node) {
+    exists(CallNode call |
+      node = call.getArg(_)
+      or
+      node = call.getArgByName(_)
+      or
+      // `self` argument when handling class instance calls (`__call__` special method))
+      node = call.getFunction()
+    )
+    or
+    node = any(AttrNode a).getObject()
+    or
+    node = any(SubscriptNode s).getObject()
+    or
+    // self parameter when used implicitly in `super()`
+    exists(Class cls, Function func, ParameterDefinition def |
+      func = cls.getAMethod() and
+      not isStaticmethod(func) and
+      // this matches what we do in ExtractedParameterNode
+      def.getDefiningNode() = node and
+      def.getParameter() = func.getArg(0)
+    )
+  } or
  /** A node representing a global (module-level) variable in a specific module. */
  TModuleVariableNode(Module m, GlobalVariable v) {
    v.getScope() = m and
@@ -45,37 +79,6 @@ newtype TNode =
      ImportStar::globalNameDefinedInModule(v.getId(), m)
    )
  } or
-  /**
-   * A node representing the overflow positional arguments to a call.
-   * That is, `call` contains more positional arguments than there are
-   * positional parameters in `callable`. The extra ones are passed as
-   * a tuple to a starred parameter; this synthetic node represents that tuple.
-   */
-  TPosOverflowNode(CallNode call, CallableValue callable) {
-    exists(getPositionalOverflowArg(call, callable, _))
-  } or
-  /**
-   * A node representing the overflow keyword arguments to a call.
-   * That is, `call` contains keyword arguments for keys that do not have
-   * keyword parameters in `callable`. These extra ones are passed as
-   * a dictionary to a doubly starred parameter; this synthetic node
-   * represents that dictionary.
-   */
-  TKwOverflowNode(CallNode call, CallableValue callable) {
-    exists(getKeywordOverflowArg(call, callable, _))
-    or
-    ArgumentPassing::connects(call, callable) and
-    exists(call.getNode().getKwargs()) and
-    callable.getScope().hasKwArg()
-  } or
-  /**
-   * A node representing an unpacked element of a dictionary argument.
-   * That is, `call` contains argument `**{"foo": bar}` which is passed
-   * to parameter `foo` of `callable`.
-   */
-  TKwUnpackedNode(CallNode call, CallableValue callable, string name) {
-    call_unpacks(call, _, callable, name, _)
-  } or
  /**
   * A synthetic node representing that an iterable sequence flows to consumer.
   */
@@ -109,10 +112,18 @@ newtype TNode =
  } or
  TSummaryParameterNode(FlowSummaryImpl::Public::SummarizedCallable c, ParameterPosition pos) {
    FlowSummaryImpl::Private::summaryParameterNodeRange(c, pos)
+  } or
+  /** A synthetic node to capture positional arguments that are passed to a `*args` parameter. */
+  TSynthStarArgsElementParameterNode(DataFlowCallable callable) {
+    exists(ParameterPosition ppos | ppos.isStarArgs(_) | exists(callable.getParameter(ppos)))
+  } or
+  /** A synthetic node to capture keyword arguments that are passed to a `**kwargs` parameter. */
+  TSynthDictSplatArgumentNode(CallNode call) { exists(call.getArgByName(_)) } or
+  /** A synthetic node to allow flow to keyword parameters from a `**kwargs` argument. */
+  TSynthDictSplatParameterNode(DataFlowCallable callable) {
+    exists(ParameterPosition ppos | ppos.isKeyword(_) | exists(callable.getParameter(ppos)))
  }

-class TParameterNode = TCfgNode or TSummaryParameterNode;
-
 /** Helper for `Node::getEnclosingCallable`. */
 private DataFlowCallable getCallableScope(Scope s) {
  result.getScope() = s
@@ -288,7 +299,7 @@ ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e }
 * The value of a parameter at function entry, viewed as a node in a data
 * flow graph.
 */
-class ParameterNode extends Node, TParameterNode instanceof ParameterNodeImpl {
+class ParameterNode extends Node instanceof ParameterNodeImpl {
  /** Gets the parameter corresponding to this node, if any. */
  final Parameter getParameter() { result = super.getParameter() }
 }
@@ -298,18 +309,8 @@ class ExtractedParameterNode extends ParameterNodeImpl, CfgNode {
  //, LocalSourceNode {
  ParameterDefinition def;

-  ExtractedParameterNode() {
-    node = def.getDefiningNode() and
-    // Disregard parameters that we cannot resolve
-    // TODO: Make this unnecessary
-    exists(DataFlowCallable c | node = c.getParameter(_))
-  }
+  ExtractedParameterNode() { node = def.getDefiningNode() }

-  override predicate isParameterOf(DataFlowCallable c, int i) { node = c.getParameter(i) }
-
-  override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
-
-  /** Gets the `Parameter` this `ParameterNode` represents. */
  override Parameter getParameter() { result = def.getParameter() }
 }

@@ -327,16 +328,24 @@ abstract class ArgumentNode extends Node {
  final ExtractedDataFlowCall getCall() { this.argumentOf(result, _) }
 }

-/** A data flow node that represents a call argument found in the source code. */
+/**
+ * A data flow node that represents a call argument found in the source code.
+ */
 class ExtractedArgumentNode extends ArgumentNode {
-  ExtractedArgumentNode() { this = any(ExtractedDataFlowCall c).getArg(_) }
-
-  final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
-    this.extractedArgumentOf(call, pos)
+  ExtractedArgumentNode() {
+    // for resolved calls, we need to allow all argument nodes
+    getCallArg(_, _, _, this, _)
+    or
+    // for potential summaries we allow all normal call arguments
+    normalCallArg(_, this, _)
+    or
+    // and self arguments
+    this.asCfgNode() = any(CallNode c).getFunction().(AttrNode).getObject()
  }

-  predicate extractedArgumentOf(ExtractedDataFlowCall call, ArgumentPosition pos) {
-    this = call.getArg(pos)
+  final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
+    this = call.getArgument(pos) and
+    call instanceof ExtractedDataFlowCall
  }
 }

@@ -345,16 +354,17 @@ class ExtractedArgumentNode extends ArgumentNode {
 * changed its state.
 *
 * This can be either the argument to a callable after the callable returns
- * (which might have mutated the argument), or the qualifier of a field after
- * an update to the field.
+ * (which might have mutated the argument), the qualifier of a field after
+ * an update to the field, or a container such as a list/dictionary after an element
+ * update.
 *
 * Nodes corresponding to AST elements, for example `ExprNode`s, usually refer
- * to the value before the update with the exception of `ObjectCreationNode`s,
+ * to the value before the update with the exception of class calls,
 * which represents the value _after_ the constructor has run.
 */
-abstract class PostUpdateNode extends Node {
+class PostUpdateNode extends Node instanceof PostUpdateNodeImpl {
  /** Gets the node before the state update. */
-  abstract Node getPreUpdateNode();
+  Node getPreUpdateNode() { result = super.getPreUpdateNode() }
 }

 /**
@@ -448,70 +458,6 @@ private predicate resolved_import_star_module(Module m, string name, Node n) {
  )
 }

-/**
- * The node holding the extra positional arguments to a call. This node is passed as a tuple
- * to the starred parameter of the callable.
- */
-class PosOverflowNode extends Node, TPosOverflowNode {
-  CallNode call;
-
-  PosOverflowNode() { this = TPosOverflowNode(call, _) }
-
-  override string toString() { result = "PosOverflowNode for " + call.getNode().toString() }
-
-  override DataFlowCallable getEnclosingCallable() {
-    exists(Node node |
-      node = TCfgNode(call) and
-      result = node.getEnclosingCallable()
-    )
-  }
-
-  override Location getLocation() { result = call.getLocation() }
-}
-
-/**
- * The node holding the extra keyword arguments to a call. This node is passed as a dictionary
- * to the doubly starred parameter of the callable.
- */
-class KwOverflowNode extends Node, TKwOverflowNode {
-  CallNode call;
-
-  KwOverflowNode() { this = TKwOverflowNode(call, _) }
-
-  override string toString() { result = "KwOverflowNode for " + call.getNode().toString() }
-
-  override DataFlowCallable getEnclosingCallable() {
-    exists(Node node |
-      node = TCfgNode(call) and
-      result = node.getEnclosingCallable()
-    )
-  }
-
-  override Location getLocation() { result = call.getLocation() }
-}
-
-/**
- * The node representing the synthetic argument of a call that is unpacked from a dictionary
- * argument.
- */
-class KwUnpackedNode extends Node, TKwUnpackedNode {
-  CallNode call;
-  string name;
-
-  KwUnpackedNode() { this = TKwUnpackedNode(call, _, name) }
-
-  override string toString() { result = "KwUnpacked " + name }
-
-  override DataFlowCallable getEnclosingCallable() {
-    exists(Node node |
-      node = TCfgNode(call) and
-      result = node.getEnclosingCallable()
-    )
-  }
-
-  override Location getLocation() { result = call.getLocation() }
-}
-
 /**
 * A synthetic node representing an iterable sequence. Used for changing content type
 * for instance from a `ListElement` to a `TupleElement`, especially if the content is
--- a/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImplSpecific.qll
@@ -61,11 +61,11 @@ bindingset[c, rk]
 DataFlowType getReturnType(SummarizedCallable c, ReturnKind rk) { any() }

 /**
- * Gets the type of the `i`th parameter in a synthesized call that targets a
- * callback of type `t`.
+ * Gets the type of the parameter matching arguments at position `pos` in a
+ * synthesized call that targets a callback of type `t`.
 */
-bindingset[t, i]
-DataFlowType getCallbackParameterType(DataFlowType t, int i) { any() }
+bindingset[t, pos]
+DataFlowType getCallbackParameterType(DataFlowType t, ArgumentPosition pos) { any() }

 /**
 * Gets the return type of kind `rk` in a synthesized call that targets a
@@ -114,10 +114,34 @@ string getComponentSpecific(SummaryComponent sc) {
 }

 /** Gets the textual representation of a parameter position in the format used for flow summaries. */
-string getParameterPosition(ParameterPosition pos) { result = pos.toString() }
+string getParameterPosition(ParameterPosition pos) {
+  pos.isSelf() and result = "self"
+  or
+  exists(int i |
+    pos.isPositional(i) and
+    result = i.toString()
+  )
+  or
+  exists(string name |
+    pos.isKeyword(name) and
+    result = name + ":"
+  )
+}

 /** Gets the textual representation of an argument position in the format used for flow summaries. */
-string getArgumentPosition(ArgumentPosition pos) { result = pos.toString() }
+string getArgumentPosition(ArgumentPosition pos) {
+  pos.isSelf() and result = "self"
+  or
+  exists(int i |
+    pos.isPositional(i) and
+    result = i.toString()
+  )
+  or
+  exists(string name |
+    pos.isKeyword(name) and
+    result = name + ":"
+  )
+}

 /** Holds if input specification component `c` needs a reference. */
 predicate inputNeedsReferenceSpecific(string c) { none() }
@@ -197,29 +221,55 @@ module ParsePositions {
    )
  }

-  predicate isParsedParameterPosition(string c, int i) {
+  predicate isParsedPositionalParameterPosition(string c, int i) {
    isParamBody(c) and
    i = AccessPath::parseInt(c)
  }

-  predicate isParsedArgumentPosition(string c, int i) {
+  predicate isParsedKeywordParameterPosition(string c, string paramName) {
+    isParamBody(c) and
+    c = paramName + ":"
+  }
+
+  predicate isParsedPositionalArgumentPosition(string c, int i) {
    isArgBody(c) and
    i = AccessPath::parseInt(c)
  }
+
+  predicate isParsedKeywordArgumentPosition(string c, string argName) {
+    isArgBody(c) and
+    c = argName + ":"
+  }
 }

 /** Gets the argument position obtained by parsing `X` in `Parameter[X]`. */
 ArgumentPosition parseParamBody(string s) {
  exists(int i |
-    ParsePositions::isParsedParameterPosition(s, i) and
+    ParsePositions::isParsedPositionalParameterPosition(s, i) and
    result.isPositional(i)
  )
+  or
+  exists(string name |
+    ParsePositions::isParsedKeywordParameterPosition(s, name) and
+    result.isKeyword(name)
+  )
+  or
+  s = "self" and
+  result.isSelf()
 }

 /** Gets the parameter position obtained by parsing `X` in `Argument[X]`. */
 ParameterPosition parseArgBody(string s) {
  exists(int i |
-    ParsePositions::isParsedArgumentPosition(s, i) and
+    ParsePositions::isParsedPositionalArgumentPosition(s, i) and
    result.isPositional(i)
  )
+  or
+  exists(string name |
+    ParsePositions::isParsedKeywordArgumentPosition(s, name) and
+    result.isKeyword(name)
+  )
+  or
+  s = "self" and
+  result.isSelf()
 }
--- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
+++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll
@@ -60,22 +60,6 @@ string getPossibleContentName() {
  result = any(DataFlowPublic::AttrRef a).getAttributeName()
 }

-/**
- * Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
- *
- * Helper predicate to avoid bad join order experienced in `callStep`.
- * This happened when `isParameterOf` was joined _before_ `getCallable`.
- */
-pragma[nomagic]
-private DataFlowPrivate::DataFlowCallable getCallableForArgument(
-  DataFlowPublic::ExtractedArgumentNode nodeFrom, int i
-) {
-  exists(DataFlowPrivate::ExtractedDataFlowCall call |
-    nodeFrom.extractedArgumentOf(call, i) and
-    result = call.getCallable()
-  )
-}
-
 /**
 * Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call.
 *
@@ -83,11 +67,15 @@ private DataFlowPrivate::DataFlowCallable getCallableForArgument(
 * recursion (or, at best, terrible performance), since identifying calls to library
 * methods is done using API graphs (which uses type tracking).
 */
-predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPrivate::ParameterNodeImpl nodeTo) {
-  // TODO: Support special methods?
-  exists(DataFlowPrivate::DataFlowCallable callable, int i |
-    callable = getCallableForArgument(nodeFrom, i) and
-    nodeTo.isParameterOf(callable, i)
+predicate callStep(DataFlowPublic::ArgumentNode nodeFrom, DataFlowPublic::ParameterNode nodeTo) {
+  exists(
+    DataFlowPrivate::DataFlowCall call, DataFlowPrivate::DataFlowCallable callable,
+    DataFlowPrivate::ArgumentPosition apos, DataFlowPrivate::ParameterPosition ppos
+  |
+    nodeFrom = call.getArgument(apos) and
+    nodeTo = callable.getParameter(ppos) and
+    DataFlowPrivate::parameterMatch(ppos, apos) and
+    callable = call.getCallable()
  )
 }

--- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll
+++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll
@@ -1465,7 +1465,19 @@ private module StdlibPrivate {
    t.start() and
    result = openCall and
    (
-      openCall instanceof OpenCall
+      openCall instanceof OpenCall and
+      // don't include the open call inside of Path.open in pathlib.py since
+      // the call to `path_obj.open` is covered by `PathLibOpenCall`.
+      not exists(Module mod, Class cls, Function func |
+        openCall.(OpenCall).asCfgNode().getScope() = func and
+        func.getName() = "open" and
+        func.getScope() = cls and
+        cls.getName() = "Path" and
+        cls.getScope() = mod and
+        mod.getName() = "pathlib" and
+        // do allow this call if we're analyzing pathlib.py as part of CPython though
+        not exists(mod.getFile().getRelativePath())
+      )
      or
      openCall instanceof PathLibOpenCall
    )
@@ -2669,6 +2681,7 @@ private module StdlibPrivate {

    HashlibNewCall() {
      this = hashlibNewCall(hashName) and
+      // we only want to consider it as an cryptographic operation if the input is available
      exists(this.getParameter(1, "data"))
    }

@@ -2751,6 +2764,78 @@ private module StdlibPrivate {
    }
  }

+  // ---------------------------------------------------------------------------
+  // hmac
+  // ---------------------------------------------------------------------------
+  abstract class HmacCryptographicOperation extends Cryptography::CryptographicOperation::Range,
+    API::CallNode {
+    abstract API::Node getDigestArg();
+
+    override Cryptography::CryptographicAlgorithm getAlgorithm() {
+      exists(string algorithmName | result.matchesName(algorithmName) |
+        this.getDigestArg().asSink() = hashlibMember(algorithmName).asSource()
+        or
+        this.getDigestArg().getAValueReachingSink().asExpr().(StrConst).getText() = algorithmName
+      )
+    }
+
+    override Cryptography::BlockMode getBlockMode() { none() }
+  }
+
+  API::CallNode getHmacConstructorCall(API::Node digestArg) {
+    result = API::moduleImport("hmac").getMember(["new", "HMAC"]).getACall() and
+    digestArg = result.getParameter(2, "digestmod")
+  }
+
+  /**
+   * A call to `hmac.new`/`hmac.HMAC`.
+   *
+   * See https://docs.python.org/3.11/library/hmac.html#hmac.new
+   */
+  class HmacNewCall extends HmacCryptographicOperation {
+    API::Node digestArg;
+
+    HmacNewCall() {
+      this = getHmacConstructorCall(digestArg) and
+      // we only want to consider it as an cryptographic operation if the input is available
+      exists(this.getParameter(1, "msg").asSink())
+    }
+
+    override API::Node getDigestArg() { result = digestArg }
+
+    override DataFlow::Node getAnInput() { result = this.getParameter(1, "msg").asSink() }
+  }
+
+  /**
+   * A call to `.update` on an HMAC object.
+   *
+   * See https://docs.python.org/3.11/library/hmac.html#hmac.HMAC.update
+   */
+  class HmacUpdateCall extends HmacCryptographicOperation {
+    API::Node digestArg;
+
+    HmacUpdateCall() {
+      this = getHmacConstructorCall(digestArg).getReturn().getMember("update").getACall()
+    }
+
+    override API::Node getDigestArg() { result = digestArg }
+
+    override DataFlow::Node getAnInput() { result = this.getParameter(0, "msg").asSink() }
+  }
+
+  /**
+   * A call to `hmac.digest`.
+   *
+   * See https://docs.python.org/3.11/library/hmac.html#hmac.digest
+   */
+  class HmacDigestCall extends HmacCryptographicOperation {
+    HmacDigestCall() { this = API::moduleImport("hmac").getMember("digest").getACall() }
+
+    override API::Node getDigestArg() { result = this.getParameter(2, "digest") }
+
+    override DataFlow::Node getAnInput() { result = this.getParameter(1, "msg").asSink() }
+  }
+
  // ---------------------------------------------------------------------------
  // logging
  // ---------------------------------------------------------------------------
--- a/python/ql/lib/semmle/python/internal/CachedStages.qll
+++ b/python/ql/lib/semmle/python/internal/CachedStages.qll
@@ -93,6 +93,8 @@ module Stages {
      exists(PyFlow::DefinitionNode b)
      or
      exists(any(PyFlow::SequenceNode n).getElement(_))
+      or
+      exists(any(PyFlow::ControlFlowNode c).toString())
    }
  }

@@ -125,6 +127,45 @@ module Stages {
    }
  }

+  /**
+   * The points-to stage.
+   */
+  cached
+  module PointsTo {
+    /**
+     * Always holds.
+     * Ensures that a predicate is evaluated as part of the points-to stage.
+     */
+    cached
+    predicate ref() { 1 = 1 }
+
+    private import semmle.python.pointsto.Base as PointsToBase
+    private import semmle.python.types.Object as TypeObject
+    private import semmle.python.objects.TObject as TObject
+    private import semmle.python.objects.ObjectInternal as ObjectInternal
+    // have to alias since this module is also called PointsTo
+    private import semmle.python.pointsto.PointsTo as RealPointsTo
+
+    /**
+     * DONT USE!
+     * Contains references to each predicate that use the above `ref` predicate.
+     */
+    cached
+    predicate backref() {
+      1 = 1
+      or
+      PointsToBase::BaseFlow::scope_entry_value_transfer_from_earlier(_, _, _, _)
+      or
+      exists(TypeObject::Object a)
+      or
+      exists(TObject::TObject f)
+      or
+      exists(any(ObjectInternal::ObjectInternal o).toString())
+      or
+      RealPointsTo::AttributePointsTo::variableAttributePointsTo(_, _, _, _, _)
+    }
+  }
+
  /**
   * The `dataflow` stage.
   */
@@ -138,14 +179,9 @@ module Stages {
    predicate ref() { 1 = 1 }

    private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic
+    private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
    private import semmle.python.dataflow.new.internal.LocalSources as LocalSources
    private import semmle.python.internal.Awaited as Awaited
-    private import semmle.python.pointsto.Base as PointsToBase
-    private import semmle.python.types.Object as TypeObject
-    private import semmle.python.objects.TObject as TObject
-    private import semmle.python.Flow as Flow
-    private import semmle.python.objects.ObjectInternal as ObjectInternal
-    private import semmle.python.pointsto.PointsTo as PointsTo

    /**
     * DONT USE!
@@ -159,21 +195,13 @@ module Stages {
      or
      any(DataFlowPublic::Node node).hasLocationInfo(_, _, _, _, _)
      or
+      DataFlowDispatch::resolveCall(_, _, _)
+      or
+      DataFlowDispatch::getCallArg(_, _, _, _, _)
+      or
      any(LocalSources::LocalSourceNode n).flowsTo(_)
      or
      exists(Awaited::awaited(_))
-      or
-      PointsToBase::BaseFlow::scope_entry_value_transfer_from_earlier(_, _, _, _)
-      or
-      exists(TypeObject::Object a)
-      or
-      exists(TObject::TObject f)
-      or
-      exists(any(Flow::ControlFlowNode c).toString())
-      or
-      exists(any(ObjectInternal::ObjectInternal o).toString())
-      or
-      PointsTo::AttributePointsTo::variableAttributePointsTo(_, _, _, _, _)
    }
  }
 }
--- a/python/ql/lib/semmle/python/internal/ConceptsShared.qll
+++ b/python/ql/lib/semmle/python/internal/ConceptsShared.qll
@@ -45,8 +45,12 @@ module Cryptography {

    /**
     * Gets the block mode used to perform this cryptographic operation.
-     * This may have no result - for example if the `CryptographicAlgorithm` used
-     * is a stream cipher rather than a block cipher.
+     *
+     * This predicate is only expected to have a result if two conditions hold:
+     *  1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and
+     *  2. The algorithm used is a block cipher (not a stream cipher).
+     *
+     * If either of these conditions do not hold, then this predicate should have no result.
     */
    BlockMode getBlockMode() { result = super.getBlockMode() }
  }
@@ -69,8 +73,12 @@ module Cryptography {

      /**
       * Gets the block mode used to perform this cryptographic operation.
-       * This may have no result - for example if the `CryptographicAlgorithm` used
-       * is a stream cipher rather than a block cipher.
+       *
+       * This predicate is only expected to have a result if two conditions hold:
+       *  1. The operation is an encryption operation, i.e. the algorithm used is an `EncryptionAlgorithm`, and
+       *  2. The algorithm used is a block cipher (not a stream cipher).
+       *
+       * If either of these conditions do not hold, then this predicate should have no result.
       */
      abstract BlockMode getBlockMode();
    }
@@ -92,6 +100,10 @@ module Cryptography {

    /** Holds if this block mode is considered to be insecure. */
    predicate isWeak() { this = "ECB" }
+
+    /** Holds if the given string appears to match this block mode. */
+    bindingset[s]
+    predicate matchesString(string s) { s.toUpperCase().matches("%" + this + "%") }
  }
 }

--- a/python/ql/lib/semmle/python/objects/ObjectInternal.qll
+++ b/python/ql/lib/semmle/python/objects/ObjectInternal.qll
@@ -216,7 +216,7 @@ class BuiltinOpaqueObjectInternal extends ObjectInternal, TBuiltinOpaqueObject {
  override Builtin getBuiltin() { this = TBuiltinOpaqueObject(result) }

  override string toString() {
-    Stages::DataFlow::ref() and
+    Stages::PointsTo::ref() and
    result = this.getBuiltin().getClass().getName() + " object"
  }

--- a/python/ql/lib/semmle/python/pointsto/Base.qll
+++ b/python/ql/lib/semmle/python/pointsto/Base.qll
@@ -318,7 +318,7 @@ module BaseFlow {
  predicate scope_entry_value_transfer_from_earlier(
    EssaVariable pred_var, Scope pred_scope, ScopeEntryDefinition succ_def, Scope succ_scope
  ) {
-    Stages::DataFlow::ref() and
+    Stages::PointsTo::ref() and
    exists(SsaSourceVariable var |
      essa_var_scope(var, pred_scope, pred_var) and
      scope_entry_def_scope(var, succ_scope, succ_def)
--- a/python/ql/lib/semmle/python/pointsto/PointsTo.qll
+++ b/python/ql/lib/semmle/python/pointsto/PointsTo.qll
@@ -2566,7 +2566,7 @@ module AttributePointsTo {
  predicate variableAttributePointsTo(
    EssaVariable var, Context context, string name, ObjectInternal value, CfgOrigin origin
  ) {
-    Stages::DataFlow::ref() and
+    Stages::PointsTo::ref() and
    definitionAttributePointsTo(var.getDefinition(), context, name, value, origin)
    or
    exists(EssaVariable prev |
--- a/python/ql/lib/semmle/python/security/dataflow/CleartextLoggingCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/CleartextLoggingCustomizations.qll
@@ -57,16 +57,43 @@ module CleartextLogging {
  /** A piece of data printed, considered as a flow sink. */
  class PrintedDataAsSink extends Sink {
    PrintedDataAsSink() {
-      this = API::builtin("print").getACall().getArg(_)
-      or
-      // special handling of writing to `sys.stdout` and `sys.stderr`, which is
-      // essentially the same as printing
-      this =
-        API::moduleImport("sys")
-            .getMember(["stdout", "stderr"])
-            .getMember("write")
-            .getACall()
-            .getArg(0)
+      (
+        this = API::builtin("print").getACall().getArg(_)
+        or
+        // special handling of writing to `sys.stdout` and `sys.stderr`, which is
+        // essentially the same as printing
+        this =
+          API::moduleImport("sys")
+              .getMember(["stdout", "stderr"])
+              .getMember("write")
+              .getACall()
+              .getArg(0)
+      ) and
+      // since some of the inner error handling implementation of the logging module is
+      // ```py
+      //         sys.stderr.write('Message: %r\n'
+      //         'Arguments: %s\n' % (record.msg,
+      //                              record.args))
+      // ```
+      // any time we would report flow to such a logging sink, we can ALSO report
+      // the flow to the `record.msg`/`record.args` sinks -- obviously we
+      // don't want that.
+      //
+      // However, simply removing taint edges out of a sink is not a good enough solution,
+      // since we would only flag one of the `logging.info` calls in the following example
+      // due to use-use flow
+      // ```py
+      // logging.info(user_controlled)
+      // logging.info(user_controlled)
+      // ```
+      //
+      // The same approach is used in the command injection query.
+      not exists(Module loggingInit |
+        loggingInit.getName() = "logging.__init__" and
+        this.getScope().getEnclosingModule() = loggingInit and
+        // do allow this call if we're analyzing logging/__init__.py as part of CPython though
+        not exists(loggingInit.getFile().getRelativePath())
+      )
    }
  }
 }
--- a/python/ql/lib/semmle/python/security/dataflow/CleartextStorageCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/CleartextStorageCustomizations.qll
@@ -50,7 +50,34 @@ module CleartextStorage {

  /** The data written to a file, considered as a flow sink. */
  class FileWriteDataAsSink extends Sink {
-    FileWriteDataAsSink() { this = any(FileSystemWriteAccess write).getADataNode() }
+    FileWriteDataAsSink() {
+      this = any(FileSystemWriteAccess write).getADataNode() and
+      // since implementation of Path.write_bytes in pathlib.py is like
+      // ```py
+      // def write_bytes(self, data):
+      //     with self.open(mode='wb') as f:
+      //         return f.write(data)
+      // ```
+      // any time we would report flow to the `Path.write_bytes` sink, we can ALSO report
+      // the flow from the `data` parameter to the `f.write` sink -- obviously we
+      // don't want that.
+      //
+      // However, simply removing taint edges out of a sink is not a good enough solution,
+      // since we would only flag one of the `p.write` calls in the following example
+      // due to use-use flow
+      // ```py
+      // p.write(user_controlled)
+      // p.write(user_controlled)
+      // ```
+      //
+      // The same approach is used in the command injection query.
+      not exists(Module pathlib |
+        pathlib.getName() = "pathlib" and
+        this.getScope().getEnclosingModule() = pathlib and
+        // do allow this call if we're analyzing pathlib.py as part of CPython though
+        not exists(pathlib.getFile().getRelativePath())
+      )
+    }
  }

  /** The data written to a cookie on a HTTP response, considered as a flow sink. */
--- a/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll
@@ -76,6 +76,9 @@ module CommandInjection {
      // `subprocess`. See:
      // https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
      // https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
+      //
+      // The same approach is used in the path-injection, cleartext-storage, and
+      // cleartext-logging queries.
      not this.getScope().getEnclosingModule().getName() in [
          "os", "subprocess", "platform", "popen2"
        ]
--- a/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll
@@ -58,7 +58,33 @@ module PathInjection {
   * A file system access, considered as a flow sink.
   */
  class FileSystemAccessAsSink extends Sink {
-    FileSystemAccessAsSink() { this = any(FileSystemAccess e).getAPathArgument() }
+    FileSystemAccessAsSink() {
+      this = any(FileSystemAccess e).getAPathArgument() and
+      // since implementation of Path.open in pathlib.py is like
+      // ```py
+      // def open(self, ...):
+      //     return io.open(self, ...)
+      // ```
+      // any time we would report flow to the `path_obj.open` sink, we can ALSO report
+      // the flow from the `self` parameter to the `io.open` sink -- obviously we
+      // don't want that.
+      //
+      // However, simply removing taint edges out of a sink is not a good enough solution,
+      // since we would only flag one of the `p.open` calls in the following example
+      // due to use-use flow
+      // ```py
+      // p.open()
+      // p.open()
+      // ```
+      //
+      // The same approach is used in the command injection query.
+      not exists(Module pathlib |
+        pathlib.getName() = "pathlib" and
+        this.getScope().getEnclosingModule() = pathlib and
+        // do allow this call if we're analyzing pathlib.py as part of CPython though
+        not exists(pathlib.getFile().getRelativePath())
+      )
+    }
  }

  private import semmle.python.frameworks.data.ModelsAsData
--- a/python/ql/lib/semmle/python/security/dataflow/StackTraceExposureCustomizations.qll
+++ b/python/ql/lib/semmle/python/security/dataflow/StackTraceExposureCustomizations.qll
@@ -41,7 +41,32 @@ module StackTraceExposure {
  /**
   * A source of exception info, considered as a flow source.
   */
-  class ExceptionInfoAsSource extends Source instanceof ExceptionInfo { }
+  class ExceptionInfoAsSource extends Source instanceof ExceptionInfo {
+    ExceptionInfoAsSource() {
+      // since `traceback.format_exc()` in Python 2 is internally implemented as
+      // ```py
+      // def format_exc(limit=None):
+      //     """Like print_exc() but return a string."""
+      //     try:
+      //         etype, value, tb = sys.exc_info()
+      //         return ''.join(format_exception(etype, value, tb, limit))
+      //     finally:
+      //         etype = value = tb = None
+      // ```
+      // any time we would report flow to such from a call to format_exc, we can ALSO report
+      // the flow from the `sys.exc_info()` source -- obviously we don't want that.
+      //
+      //
+      // To avoid this, we use the same approach as for sinks in the command injection
+      // query (and others).
+      not exists(Module traceback |
+        traceback.getName() = "traceback" and
+        this.getScope().getEnclosingModule() = traceback and
+        // do allow this call if we're analyzing traceback.py as part of CPython though
+        not exists(traceback.getFile().getRelativePath())
+      )
+    }
+  }

  /**
   * The body of a HTTP response that will be returned from a server, considered as a flow sink.
--- a/python/ql/lib/semmle/python/types/Object.qll
+++ b/python/ql/lib/semmle/python/types/Object.qll
@@ -5,7 +5,7 @@ private import semmle.python.internal.CachedStages

 cached
 private predicate is_an_object(@py_object obj) {
-  Stages::DataFlow::ref() and
+  Stages::PointsTo::ref() and
  /* CFG nodes for numeric literals, all of which have a @py_cobject for the value of that literal */
  obj instanceof ControlFlowNode and
  not obj.(ControlFlowNode).getNode() instanceof IntegerLiteral and
@@ -78,7 +78,7 @@ class Object extends @py_object {
  predicate hasLocationInfo(
    string filepath, int startline, int startcolumn, int endline, int endcolumn
  ) {
-    Stages::DataFlow::ref() and
+    Stages::PointsTo::ref() and
    this.hasOrigin() and
    this.getOrigin()
        .getLocation()
@@ -98,7 +98,7 @@ class Object extends @py_object {
  /** Gets a textual representation of this element. */
  cached
  string toString() {
-    Stages::DataFlow::ref() and
+    Stages::PointsTo::ref() and
    not this = undefinedVariable() and
    not this = unknownValue() and
    exists(ClassObject type | type.asBuiltin() = this.asBuiltin().getClass() |
--- a/python/ql/src/CHANGELOG.md
+++ b/python/ql/src/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 0.6.3
+
+No user-facing changes.
+
 ## 0.6.2

 No user-facing changes.
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIs.qll
@@ -1,48 +1,36 @@
 /**
 * Definitions for reasoning about untrusted data used in APIs defined outside the
- * database.
+ * user-written code.
 */

-import python
+private import python
 import semmle.python.dataflow.new.DataFlow
-import semmle.python.dataflow.new.TaintTracking
-import semmle.python.Concepts
-import semmle.python.dataflow.new.RemoteFlowSources
+private import semmle.python.dataflow.new.TaintTracking
+private import semmle.python.dataflow.new.RemoteFlowSources
+private import semmle.python.ApiGraphs
 private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
 private import semmle.python.dataflow.new.internal.TaintTrackingPrivate as TaintTrackingPrivate
-private import semmle.python.types.Builtins
-private import semmle.python.objects.ObjectInternal

-// IMPLEMENTATION NOTES:
-//
-// This query uses *both* the new data-flow library, and points-to. Why? To get this
-// finished quickly, so it can provide value for our field team and ourselves.
-//
-// In the long run, it should not need to use points-to for anything. Possibly this can
-// even be helpful in figuring out what we need from TypeTrackers and the new data-flow
-// library to be fully operational.
-//
-// At least it will allow us to provide a baseline comparison against a solution that
-// doesn't use points-to at all
-//
-// There is a few dirty things we do here:
-// 1. DataFlowPrivate: since `DataFlowCall` and `DataFlowCallable` are not exposed
-//    publicly, but we really want access to them.
-// 2. points-to: we kinda need to do this since this is what powers `DataFlowCall` and
-//    `DataFlowCallable`
-// 3. ObjectInternal: to provide better names for built-in functions and methods. If we
-//    really wanted to polish our points-to implementation, we could move this
-//    functionality into `BuiltinFunctionValue` and `BuiltinMethodValue`, but will
-//    probably require some more work: for this query, it's totally ok to use
-//    `builtins.open` for the code `open(f)`, but well, it requires a bit of thinking to
-//    figure out if that is desirable in general. I simply skipped a corner here!
-// 4. TaintTrackingPrivate: Nothing else gives us access to `defaultAdditionalTaintStep` :(
 /**
- * A callable that is considered a "safe" external API from a security perspective.
+ * An external API that is considered "safe" from a security perspective.
 */
 class SafeExternalApi extends Unit {
-  /** Gets a callable that is considered a "safe" external API from a security perspective. */
-  abstract DataFlowPrivate::DataFlowCallable getSafeCallable();
+  /**
+   * Gets a call that is considered "safe" from a security perspective. You can use API
+   * graphs to find calls to functions you know are safe.
+   *
+   * Which works even when the external library isn't extracted.
+   */
+  abstract DataFlow::CallCfgNode getSafeCall();
+
+  /**
+   * Gets a callable that is considered a "safe" external API from a security
+   * perspective.
+   *
+   * You probably want to define this as `none()` and use `getSafeCall` instead, since
+   * that can handle the external library not being extracted.
+   */
+  DataFlowPrivate::DataFlowCallable getSafeCallable() { none() }
 }

 /** DEPRECATED: Alias for SafeExternalApi */
@@ -50,42 +38,127 @@ deprecated class SafeExternalAPI = SafeExternalApi;

 /** The default set of "safe" external APIs. */
 private class DefaultSafeExternalApi extends SafeExternalApi {
-  override DataFlowPrivate::DataFlowCallable getSafeCallable() {
-    exists(CallableValue cv | cv = result.getCallableValue() |
-      cv = Value::named(["len", "isinstance", "getattr", "hasattr"])
-      or
-      exists(ClassValue cls, string attr |
-        cls = Value::named("dict") and attr in ["__getitem__", "__setitem__"]
-      |
-        cls.lookup(attr) = cv
-      )
+  override DataFlow::CallCfgNode getSafeCall() {
+    result =
+      API::builtin([
+          "len", "enumerate", "isinstance", "getattr", "hasattr", "bool", "float", "int", "repr",
+          "str", "type"
+        ]).getACall()
+  }
+}
+
+/**
+ * Gets a human readable representation of `node`.
+ *
+ * Note that this is only defined for API nodes that are allowed as external APIs,
+ * so `None.json.dumps` will for example not be allowed.
+ */
+string apiNodeToStringRepr(API::Node node) {
+  node = API::builtin(result)
+  or
+  node = API::moduleImport(result)
+  or
+  exists(API::Node base, string basename |
+    base.getDepth() < node.getDepth() and
+    basename = apiNodeToStringRepr(base) and
+    not base = API::builtin(["None", "True", "False"])
+  |
+    exists(string m | node = base.getMember(m) | result = basename + "." + m)
+    or
+    node = base.getReturn() and
+    result = basename + "()" and
+    not base.getACall() = any(SafeExternalApi safe).getSafeCall()
+    or
+    node = base.getAwaited() and
+    result = basename
+  )
+}
+
+predicate resolvedCall(CallNode call) {
+  DataFlowPrivate::resolveCall(call, _, _) or
+  DataFlowPrivate::resolveClassCall(call, _)
+}
+
+newtype TInterestingExternalApiCall =
+  TUnresolvedCall(DataFlow::CallCfgNode call) {
+    exists(call.getLocation().getFile().getRelativePath()) and
+    not resolvedCall(call.getNode()) and
+    not call = any(SafeExternalApi safe).getSafeCall()
+  } or
+  TResolvedCall(DataFlowPrivate::DataFlowCall call) {
+    exists(call.getLocation().getFile().getRelativePath()) and
+    exists(call.getCallable()) and
+    not call.getCallable() = any(SafeExternalApi safe).getSafeCallable() and
+    // ignore calls inside codebase, and ignore calls that are marked as  safe. This is
+    // only needed as long as we extract dependencies. When we stop doing that, all
+    // targets of resolved calls will be from user-written code.
+    not exists(call.getCallable().getLocation().getFile().getRelativePath()) and
+    not exists(DataFlow::CallCfgNode callCfgNode | callCfgNode.getNode() = call.getNode() |
+      any(SafeExternalApi safe).getSafeCall() = callCfgNode
+    )
+  }
+
+abstract class InterestingExternalApiCall extends TInterestingExternalApiCall {
+  /** Gets the argument at position `apos`, if any */
+  abstract DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos);
+
+  /** Gets a textual representation of this element. */
+  abstract string toString();
+
+  /**
+   * Gets a human-readable name for the external API.
+   */
+  abstract string getApiName();
+}
+
+class UnresolvedCall extends InterestingExternalApiCall, TUnresolvedCall {
+  DataFlow::CallCfgNode call;
+
+  UnresolvedCall() { this = TUnresolvedCall(call) }
+
+  override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
+    exists(int i | apos.isPositional(i) | result = call.getArg(i))
+    or
+    exists(string name | apos.isKeyword(name) | result = call.getArgByName(name))
+  }
+
+  override string toString() {
+    result = "ExternalAPI:UnresolvedCall: " + call.getNode().getNode().toString()
+  }
+
+  override string getApiName() {
+    exists(API::Node apiNode |
+      result = apiNodeToStringRepr(apiNode) and
+      apiNode.getACall() = call
+    )
+  }
+}
+
+class ResolvedCall extends InterestingExternalApiCall, TResolvedCall {
+  DataFlowPrivate::DataFlowCall dfCall;
+
+  ResolvedCall() { this = TResolvedCall(dfCall) }
+
+  override DataFlow::Node getArgument(DataFlowPrivate::ArgumentPosition apos) {
+    result = dfCall.getArgument(apos)
+  }
+
+  override string toString() {
+    result = "ExternalAPI:ResolvedCall: " + dfCall.getNode().getNode().toString()
+  }
+
+  override string getApiName() {
+    exists(DataFlow::CallCfgNode call, API::Node apiNode | dfCall.getNode() = call.getNode() |
+      result = apiNodeToStringRepr(apiNode) and
+      apiNode.getACall() = call
    )
  }
 }

 /** A node representing data being passed to an external API through a call. */
 class ExternalApiDataNode extends DataFlow::Node {
-  DataFlowPrivate::DataFlowCallable callable;
-  int i;
-
  ExternalApiDataNode() {
-    exists(DataFlowPrivate::DataFlowCall call |
-      exists(call.getLocation().getFile().getRelativePath())
-    |
-      callable = call.getCallable() and
-      // TODO: this ignores some complexity of keyword arguments (especially keyword-only args)
-      this = call.getArg(i)
-    ) and
-    not any(SafeExternalApi safe).getSafeCallable() = callable and
-    exists(Value cv | cv = callable.getCallableValue() |
-      cv.isAbsent()
-      or
-      cv.isBuiltin()
-      or
-      cv.(CallableValue).getScope().getLocation().getFile().inStdlib()
-      or
-      not exists(cv.(CallableValue).getScope().getLocation().getFile().getRelativePath())
-    ) and
+    exists(InterestingExternalApiCall call | this = call.getArgument(_)) and
    // Not already modeled as a taint step
    not TaintTrackingPrivate::defaultAdditionalTaintStep(this, _) and
    // for `list.append(x)`, we have a additional taint step from x -> [post] list.
@@ -95,12 +168,6 @@ class ExternalApiDataNode extends DataFlow::Node {
      TaintTrackingPrivate::defaultAdditionalTaintStep(_, post)
    )
  }
-
-  /** Gets the index for the parameter that will receive this untrusted data */
-  int getIndex() { result = i }
-
-  /** Gets the callable to which this argument is passed. */
-  DataFlowPrivate::DataFlowCallable getCallable() { result = callable }
 }

 /** DEPRECATED: Alias for ExternalApiDataNode */
@@ -133,19 +200,26 @@ deprecated class UntrustedExternalAPIDataNode = UntrustedExternalApiDataNode;

 /** An external API which is used with untrusted data. */
 private newtype TExternalApi =
-  /** An untrusted API method `m` where untrusted data is passed at `index`. */
-  TExternalApiParameter(DataFlowPrivate::DataFlowCallable callable, int index) {
-    exists(UntrustedExternalApiDataNode n |
-      callable = n.getCallable() and
-      index = n.getIndex()
+  MkExternalApi(string repr, DataFlowPrivate::ArgumentPosition apos) {
+    exists(UntrustedExternalApiDataNode ex, InterestingExternalApiCall call |
+      ex = call.getArgument(apos) and
+      repr = call.getApiName()
    )
  }

-/** An external API which is used with untrusted data. */
-class ExternalApiUsedWithUntrustedData extends TExternalApi {
+/** A argument of an external API which is used with untrusted data. */
+class ExternalApiUsedWithUntrustedData extends MkExternalApi {
+  string repr;
+  DataFlowPrivate::ArgumentPosition apos;
+
+  ExternalApiUsedWithUntrustedData() { this = MkExternalApi(repr, apos) }
+
  /** Gets a possibly untrusted use of this external API. */
  UntrustedExternalApiDataNode getUntrustedDataNode() {
-    this = TExternalApiParameter(result.getCallable(), result.getIndex())
+    exists(InterestingExternalApiCall call |
+      result = call.getArgument(apos) and
+      call.getApiName() = repr
+    )
  }

  /** Gets the number of untrusted sources used with this external API. */
@@ -154,63 +228,8 @@ class ExternalApiUsedWithUntrustedData extends TExternalApi {
  }

  /** Gets a textual representation of this element. */
-  string toString() {
-    exists(
-      DataFlowPrivate::DataFlowCallable callable, int index, string callableString,
-      string indexString
-    |
-      this = TExternalApiParameter(callable, index) and
-      indexString = "param " + index and
-      exists(CallableValue cv | cv = callable.getCallableValue() |
-        callableString =
-          cv.getScope().getEnclosingModule().getName() + "." + cv.getScope().getQualifiedName()
-        or
-        not exists(cv.getScope()) and
-        (
-          cv instanceof BuiltinFunctionValue and
-          callableString = pretty_builtin_function_value(cv)
-          or
-          cv instanceof BuiltinMethodValue and
-          callableString = pretty_builtin_method_value(cv)
-          or
-          not cv instanceof BuiltinFunctionValue and
-          not cv instanceof BuiltinMethodValue and
-          callableString = cv.toString()
-        )
-      ) and
-      result = callableString + " [" + indexString + "]"
-    )
-  }
+  string toString() { result = repr + " [" + apos + "]" }
 }

 /** DEPRECATED: Alias for ExternalApiUsedWithUntrustedData */
 deprecated class ExternalAPIUsedWithUntrustedData = ExternalApiUsedWithUntrustedData;
-
-/** Gets the fully qualified name for the `BuiltinFunctionValue` bfv. */
-private string pretty_builtin_function_value(BuiltinFunctionValue bfv) {
-  exists(Builtin b | b = bfv.(BuiltinFunctionObjectInternal).getBuiltin() |
-    result = prefix_with_module_if_found(b)
-  )
-}
-
-/** Gets the fully qualified name for the `BuiltinMethodValue` bmv. */
-private string pretty_builtin_method_value(BuiltinMethodValue bmv) {
-  exists(Builtin b | b = bmv.(BuiltinMethodObjectInternal).getBuiltin() |
-    exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b |
-      result = prefix_with_module_if_found(cls) + "." + b.getName()
-    )
-    or
-    not exists(Builtin cls | cls.isClass() and cls.getMember(b.getName()) = b) and
-    result = b.getName()
-  )
-}
-
-/** Helper predicate that tries to adds module qualifier to `b`. Will succeed even if module not found. */
-private string prefix_with_module_if_found(Builtin b) {
-  exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b |
-    result = mod.getName() + "." + b.getName()
-  )
-  or
-  not exists(Builtin mod | mod.isModule() and mod.getMember(b.getName()) = b) and
-  result = b.getName()
-}
--- a/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.qhelp
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/ExternalAPIsUsedWithUntrustedData.qhelp
@@ -11,11 +11,9 @@ relevant for security analysis of this application.</p>

 <p>An external API is defined as a call to a method that is not defined in the source
 code, and is not modeled as a taint step in the default taint library. External APIs may
-be from the Python standard library or dependencies. The query will report the fully qualified name,
-along with <code>[param x]</code>, where <code>x</code> indicates the position of
-the parameter receiving the untrusted data. Note that for methods and
-<code>classmethod</code>s, parameter 0 represents the class instance or class itself
-respectively.</p>
+be from the Python standard library or dependencies. The query will report the fully
+qualified name, along with <code>[position index]</code> or <code>[keyword name]</code>,
+to indicate the argument passing the untrusted data.</p>

 <p>Note that an excepted sink might not be included in the results, if it also defines a
 taint step. This is the case for <code>pickle.loads</code> which is a sink for the
@@ -24,8 +22,6 @@ Unsafe Deserialization query, but is also a taint step for other queries.</p>
 <p>Note: Compared to the Java version of this query, we currently do not give special
 care to methods that are overridden in the source code.</p>

-<p>Note: Currently this query will only report results for external packages that are extracted.</p>
-
 </overview>
 <recommendation>

--- a/python/ql/src/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.qhelp
+++ b/python/ql/src/Security/CWE-020-ExternalAPIs/UntrustedDataToExternalAPI.qhelp
@@ -11,11 +11,9 @@ be modeled as either taint steps, or sinks for specific problems.</p>

 <p>An external API is defined as a call to a method that is not defined in the source
 code, and is not modeled as a taint step in the default taint library. External APIs may
-be from the Python standard library or dependencies. The query will report the fully qualified name,
-along with <code>[param x]</code>, where <code>x</code> indicates the position of
-the parameter receiving the untrusted data. Note that for methods and
-<code>classmethod</code>s, parameter 0 represents the class instance or class itself
-respectively.</p>
+be from the Python standard library or dependencies. The query will report the fully
+qualified name, along with <code>[position index]</code> or <code>[keyword name]</code>,
+to indicate the argument passing the untrusted data.</p>

 <p>Note that an excepted sink might not be included in the results, if it also defines a
 taint step. This is the case for <code>pickle.loads</code> which is a sink for the
@@ -24,8 +22,6 @@ Unsafe Deserialization query, but is also a taint step for other queries.</p>
 <p>Note: Compared to the Java version of this query, we currently do not give special
 care to methods that are overridden in the source code.</p>

-<p>Note: Currently this query will only report results for external packages that are extracted.</p>
-
 </overview>
 <recommendation>

--- a/python/ql/src/change-notes/released/0.6.3.md
+++ b/python/ql/src/change-notes/released/0.6.3.md
@@ -0,0 +1,3 @@
+## 0.6.3
+
+No user-facing changes.
--- a/python/ql/src/codeql-pack.release.yml
+++ b/python/ql/src/codeql-pack.release.yml
@@ -1,2 +1,2 @@
 ---
-lastReleaseVersion: 0.6.2
+lastReleaseVersion: 0.6.3
--- a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
+++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql
@@ -8,7 +8,7 @@
 * @id py/unsafe-unpacking
 * @problem.severity error
 * @security-severity 7.5
- * @precision high
+ * @precision medium
 * @tags security
 *       experimental
 *       external/cwe/cwe-022
--- a/python/ql/src/experimental/semmle/python/security/InsecureRandomnessCustomizations.qll
+++ b/python/ql/src/experimental/semmle/python/security/InsecureRandomnessCustomizations.qll
@@ -59,12 +59,11 @@ module InsecureRandomness {
   */
  class RandomFnSink extends Sink {
    RandomFnSink() {
-      exists(DataFlowCallable randomFn |
-        randomFn
-            .getName()
+      exists(Function func |
+        func.getName()
            .regexpMatch("(?i).*(gen(erate)?|make|mk|create).*(nonce|salt|pepper|Password).*")
      |
-        this.getEnclosingCallable() = randomFn
+        this.asExpr().getScope() = func
      )
    }
  }
--- a/python/ql/src/meta/analysis-quality/CallGraph.ql
+++ b/python/ql/src/meta/analysis-quality/CallGraph.ql
@@ -1,9 +1,9 @@
 /**
 * @name Call graph
- * @description An edge in the points-to call graph.
+ * @description An edge in the call graph.
 * @kind problem
 * @problem.severity recommendation
- * @id py/meta/points-to-call-graph
+ * @id py/meta/call-graph
 * @tags meta
 * @precision very-low
 */
@@ -12,9 +12,9 @@ import python
 import semmle.python.dataflow.new.internal.DataFlowPrivate
 import meta.MetaMetrics

-from DataFlowCall c, DataFlowCallableValue f
+from DataFlowCall call, DataFlowCallable target
 where
-  c.getCallable() = f and
-  not c.getLocation().getFile() instanceof IgnoredFile and
-  not f.getScope().getLocation().getFile() instanceof IgnoredFile
-select c, "Call to $@", f.getScope(), f.toString()
+  target = viableCallable(call) and
+  not call.getLocation().getFile() instanceof IgnoredFile and
+  not target.getScope().getLocation().getFile() instanceof IgnoredFile
+select call, "Call to $@", target.getScope(), target.toString()
--- a/python/ql/src/meta/analysis-quality/CallGraphQuality.qll
+++ b/python/ql/src/meta/analysis-quality/CallGraphQuality.qll
@@ -1,16 +1,55 @@
 /**
 * Provides predicates for measuring the quality of the call graph, that is,
- * the number of calls that could be resolved to a callee.
+ * the number of calls that could be resolved to a target.
 */

 import python
 import meta.MetaMetrics

+newtype TTarget =
+  TFunction(Function func) or
+  TClass(Class cls)
+
+class Target extends TTarget {
+  /** Gets a textual representation of this element. */
+  abstract string toString();
+
+  /** Gets the location of this dataflow call. */
+  abstract Location getLocation();
+
+  /** Whether this target is relevant. */
+  predicate isRelevant() { exists(this.getLocation().getFile().getRelativePath()) }
+}
+
+class TargetFunction extends Target, TFunction {
+  Function func;
+
+  TargetFunction() { this = TFunction(func) }
+
+  override string toString() { result = func.toString() }
+
+  override Location getLocation() { result = func.getLocation() }
+
+  Function getFunction() { result = func }
+}
+
+class TargetClass extends Target, TClass {
+  Class cls;
+
+  TargetClass() { this = TClass(cls) }
+
+  override string toString() { result = cls.toString() }
+
+  override Location getLocation() { result = cls.getLocation() }
+
+  Class getClass() { result = cls }
+}
+
 /**
 * A call that is (possibly) relevant for analysis quality.
 * See `IgnoredFile` for details on what is excluded.
 */
-class RelevantCall extends Call {
+class RelevantCall extends CallNode {
  RelevantCall() { not this.getLocation().getFile() instanceof IgnoredFile }
 }

@@ -18,12 +57,16 @@ class RelevantCall extends Call {
 module PointsToBasedCallGraph {
  /** A call that can be resolved by points-to. */
  class ResolvableCall extends RelevantCall {
-    Value callee;
+    Value targetValue;

-    ResolvableCall() { callee.getACall() = this.getAFlowNode() }
+    ResolvableCall() { targetValue.getACall() = this }

-    /** Gets a resolved callee of this call. */
-    Value getCallee() { result = callee }
+    /** Gets a resolved target of this call. */
+    Target getTarget() {
+      result.(TargetFunction).getFunction() = targetValue.(CallableValue).getScope()
+      or
+      result.(TargetClass).getClass() = targetValue.(ClassValue).getScope()
+    }
  }

  /** A call that cannot be resolved by points-to. */
@@ -32,34 +75,79 @@ module PointsToBasedCallGraph {
  }

  /**
-   * A call that can be resolved by points-to, where the resolved callee is relevant.
-   * Relevant callees include:
-   * - builtins
-   * - standard library
+   * A call that can be resolved by points-to, where the resolved target is relevant.
+   * Relevant targets include:
   * - source code of the project
   */
-  class ResolvableCallRelevantCallee extends ResolvableCall {
-    ResolvableCallRelevantCallee() {
-      callee.isBuiltin()
-      or
-      exists(File file |
-        file = callee.(CallableValue).getScope().getLocation().getFile()
-        or
-        file = callee.(ClassValue).getScope().getLocation().getFile()
-      |
-        file.inStdlib()
-        or
-        // part of the source code of the project
-        exists(file.getRelativePath())
+  class ResolvableCallRelevantTarget extends ResolvableCall {
+    ResolvableCallRelevantTarget() {
+      exists(Target target | target = this.getTarget() |
+        exists(target.getLocation().getFile().getRelativePath())
      )
    }
  }

  /**
-   * A call that can be resolved by points-to, where the resolved callee is not considered relevant.
-   * See `ResolvableCallRelevantCallee` for the definition of relevance.
+   * A call that can be resolved by points-to, where the resolved target is not considered relevant.
+   * See `ResolvableCallRelevantTarget` for the definition of relevance.
   */
-  class ResolvableCallIrrelevantCallee extends ResolvableCall {
-    ResolvableCallIrrelevantCallee() { not this instanceof ResolvableCallRelevantCallee }
+  class ResolvableCallIrrelevantTarget extends ResolvableCall {
+    ResolvableCallIrrelevantTarget() { not this instanceof ResolvableCallRelevantTarget }
+  }
+}
+
+/** Provides classes for call-graph resolution by using type-tracking. */
+module TypeTrackingBasedCallGraph {
+  private import semmle.python.dataflow.new.internal.DataFlowDispatch as TT
+
+  /** A call that can be resolved by type-tracking. */
+  class ResolvableCall extends RelevantCall {
+    ResolvableCall() {
+      exists(TT::TNormalCall(this, _, _))
+      or
+      TT::resolveClassCall(this, _)
+    }
+
+    /** Gets a resolved target of this call. */
+    Target getTarget() {
+      exists(TT::DataFlowCall call, TT::CallType ct, Function targetFunc |
+        call = TT::TNormalCall(this, targetFunc, ct) and
+        not ct instanceof TT::CallTypeClass and
+        targetFunc = result.(TargetFunction).getFunction()
+      )
+      or
+      // a TT::TNormalCall only exists when the call can be resolved to a function.
+      // Since points-to just says the call goes directly to the class itself, and
+      // type-tracking based wants to resolve this to the constructor, which might not
+      // exist. So to do a proper comparison, we don't require the call to be resolve to
+      // a specific function.
+      TT::resolveClassCall(this, result.(TargetClass).getClass())
+    }
+  }
+
+  /** A call that cannot be resolved by type-tracking. */
+  class UnresolvableCall extends RelevantCall {
+    UnresolvableCall() { not this instanceof ResolvableCall }
+  }
+
+  /**
+   * A call that can be resolved by type-tracking, where the resolved callee is relevant.
+   * Relevant targets include:
+   * - source code of the project
+   */
+  class ResolvableCallRelevantTarget extends ResolvableCall {
+    ResolvableCallRelevantTarget() {
+      exists(Target target | target = this.getTarget() |
+        exists(target.getLocation().getFile().getRelativePath())
+      )
+    }
+  }
+
+  /**
+   * A call that can be resolved by type-tracking, where the resolved target is not considered relevant.
+   * See `ResolvableCallRelevantTarget` for the definition of relevance.
+   */
+  class ResolvableCallIrrelevantTarget extends ResolvableCall {
+    ResolvableCallIrrelevantTarget() { not this instanceof ResolvableCallRelevantTarget }
  }
 }
--- a/python/ql/src/meta/analysis-quality/PointsToResolvableCallsRelevantTarget.ql
+++ b/python/ql/src/meta/analysis-quality/PointsToResolvableCallsRelevantTarget.ql
@@ -11,4 +11,4 @@
 import python
 import CallGraphQuality

-select projectRoot(), count(PointsToBasedCallGraph::ResolvableCallRelevantCallee call)
+select projectRoot(), count(PointsToBasedCallGraph::ResolvableCallRelevantTarget call)
--- a/python/ql/src/meta/analysis-quality/TTCallGraph.ql
+++ b/python/ql/src/meta/analysis-quality/TTCallGraph.ql
@@ -0,0 +1,17 @@
+/**
+ * @name New call graph edge from using type-tracking instead of points-to
+ * @kind problem
+ * @problem.severity recommendation
+ * @id py/meta/type-tracking-call-graph
+ * @tags meta
+ * @precision very-low
+ */
+
+import python
+import CallGraphQuality
+
+from CallNode call, Target target
+where
+  target.isRelevant() and
+  call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
+select call, "$@ to $@", call, "Call", target, target.toString()
--- a/python/ql/src/meta/analysis-quality/TTCallGraphMissing.ql
+++ b/python/ql/src/meta/analysis-quality/TTCallGraphMissing.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Missing call graph edge from using type-tracking instead of points-to
+ * @kind problem
+ * @problem.severity recommendation
+ * @id py/meta/call-graph-missing
+ * @tags meta
+ * @precision very-low
+ */
+
+import python
+import CallGraphQuality
+
+from CallNode call, Target target
+where
+  target.isRelevant() and
+  call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
+  not call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
+select call, "MISSING: $@ to $@", call, "Call", target, target.toString()
--- a/python/ql/src/meta/analysis-quality/TTCallGraphNew.ql
+++ b/python/ql/src/meta/analysis-quality/TTCallGraphNew.ql
@@ -0,0 +1,18 @@
+/**
+ * @name New call graph edge from using type-tracking instead of points-to
+ * @kind problem
+ * @problem.severity recommendation
+ * @id py/meta/call-graph-new
+ * @tags meta
+ * @precision very-low
+ */
+
+import python
+import CallGraphQuality
+
+from CallNode call, Target target
+where
+  target.isRelevant() and
+  not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
+  call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
+select call, "NEW: $@ to $@", call, "Call", target, target.toString()
--- a/python/ql/src/meta/analysis-quality/TTCallGraphNewAmbiguous.ql
+++ b/python/ql/src/meta/analysis-quality/TTCallGraphNewAmbiguous.ql
@@ -0,0 +1,19 @@
+/**
+ * @name New call graph edge from using type-tracking instead of points-to, that is ambiguous
+ * @kind problem
+ * @problem.severity recommendation
+ * @id py/meta/call-graph-new-ambiguous
+ * @tags meta
+ * @precision very-low
+ */
+
+import python
+import CallGraphQuality
+
+from CallNode call, Target target
+where
+  target.isRelevant() and
+  not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
+  call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target and
+  1 < count(call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget())
+select call, "NEW: $@ to $@", call, "Call", target, target.toString()
--- a/python/ql/src/meta/analysis-quality/TTCallGraphOverview.ql
+++ b/python/ql/src/meta/analysis-quality/TTCallGraphOverview.ql
@@ -0,0 +1,35 @@
+/**
+ * @name Call graph edge overview from using type-tracking instead of points-to
+ * @id py/meta/call-graph-overview
+ * @precision very-low
+ */
+
+import python
+import CallGraphQuality
+
+from string tag, int c
+where
+  tag = "SHARED" and
+  c =
+    count(CallNode call, Target target |
+      target.isRelevant() and
+      call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
+      call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
+    )
+  or
+  tag = "NEW" and
+  c =
+    count(CallNode call, Target target |
+      target.isRelevant() and
+      not call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
+      call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
+    )
+  or
+  tag = "MISSING" and
+  c =
+    count(CallNode call, Target target |
+      target.isRelevant() and
+      call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
+      not call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
+    )
+select tag, c
--- a/python/ql/src/meta/analysis-quality/TTCallGraphShared.ql
+++ b/python/ql/src/meta/analysis-quality/TTCallGraphShared.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Shared call graph edge from using type-tracking instead of points-to
+ * @kind problem
+ * @problem.severity recommendation
+ * @id py/meta/call-graph-shared
+ * @tags meta
+ * @precision very-low
+ */
+
+import python
+import CallGraphQuality
+
+from CallNode call, Target target
+where
+  target.isRelevant() and
+  call.(PointsToBasedCallGraph::ResolvableCall).getTarget() = target and
+  call.(TypeTrackingBasedCallGraph::ResolvableCall).getTarget() = target
+select call, "SHARED: $@ to $@", call, "Call", target, target.toString()
--- a/python/ql/src/qlpack.yml
+++ b/python/ql/src/qlpack.yml
@@ -1,5 +1,5 @@
 name: codeql/python-queries
-version: 0.6.3-dev
+version: 0.6.4-dev
 groups: 
  - python
  - queries
--- a/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
+++ b/python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll
@@ -0,0 +1,47 @@
+import python
+import semmle.python.dataflow.new.DataFlow::DataFlow
+import semmle.python.dataflow.new.internal.DataFlowPrivate
+import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+
+// TODO: this should be promoted to be a REAL consistency query by being placed in
+// `python/ql/consistency-queries`. For for now it resides here.
+private class MyConsistencyConfiguration extends ConsistencyConfiguration {
+  override predicate argHasPostUpdateExclude(ArgumentNode n) {
+    exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_))
+    or
+    exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
+  }
+
+  override predicate reverseReadExclude(Node n) {
+    // since `self`/`cls` parameters can be marked as implicit argument to `super()`,
+    // they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs`
+    // parameter, but dataflow-consistency queries should _not_ complain about there not
+    // being a post-update node for the synthetic `**kwargs` parameter.
+    n instanceof SynthDictSplatParameterNode
+  }
+
+  override predicate uniqueParameterNodeAtPositionExclude(
+    DataFlowCallable c, ParameterPosition pos, Node p
+  ) {
+    // TODO: This can be removed once we solve the overlap of dictionary splat parameters
+    c.getParameter(pos) = p and
+    pos.isDictSplat() and
+    not exists(p.getLocation().getFile().getRelativePath())
+  }
+
+  override predicate uniqueParameterNodePositionExclude(
+    DataFlowCallable c, ParameterPosition pos, Node p
+  ) {
+    // For normal parameters that can both be passed as positional arguments or keyword
+    // arguments, we currently have parameter positions for both cases..
+    //
+    // TODO: Figure out how bad breaking this consistency check is
+    exists(Function func, Parameter param |
+      c.getScope() = func and
+      p = parameterNode(param) and
+      c.getParameter(pos) = p and
+      param = func.getArg(_) and
+      param = func.getArgByName(_)
+    )
+  }
+}
--- a/python/ql/test/experimental/dataflow/TestUtil/RoutingTest.qll
+++ b/python/ql/test/experimental/dataflow/TestUtil/RoutingTest.qll
@@ -26,29 +26,30 @@ abstract class RoutingTest extends InlineExpectationsTest {
      element = fromNode.toString() and
      (
        tag = this.flowTag() and
-        if "\"" + tag + "\"" = this.fromValue(fromNode)
-        then value = ""
-        else value = this.fromValue(fromNode)
+        if "\"" + tag + "\"" = fromValue(fromNode) then value = "" else value = fromValue(fromNode)
        or
+        // only have result for `func` tag if the function where `arg<n>` is used, is
+        // different from the function name of the call where `arg<n>` was specified as
+        // an argument
        tag = "func" and
-        value = this.toFunc(toNode) and
-        not value = this.fromFunc(fromNode)
+        value = toFunc(toNode) and
+        not value = fromFunc(fromNode)
      )
    )
  }
-
-  pragma[inline]
-  private string fromValue(DataFlow::Node fromNode) {
-    result = "\"" + prettyNode(fromNode).replaceAll("\"", "'") + "\""
-  }
-
-  pragma[inline]
-  private string fromFunc(DataFlow::ArgumentNode fromNode) {
-    result = fromNode.getCall().getNode().(CallNode).getFunction().getNode().(Name).getId()
-  }
-
-  pragma[inline]
-  private string toFunc(DataFlow::Node toNode) {
-    result = toNode.getEnclosingCallable().getCallableValue().getScope().getQualifiedName() // TODO: More robust pretty printing?
-  }
+}
+
+pragma[inline]
+private string fromValue(DataFlow::Node fromNode) {
+  result = "\"" + prettyNode(fromNode).replaceAll("\"", "'") + "\""
+}
+
+pragma[inline]
+private string fromFunc(DataFlow::ArgumentNode fromNode) {
+  result = fromNode.getCall().getNode().(CallNode).getFunction().getNode().(Name).getId()
+}
+
+pragma[inline]
+private string toFunc(DataFlow::Node toNode) {
+  result = toNode.getEnclosingCallable().getQualifiedName()
 }
--- a/python/ql/test/experimental/dataflow/TestUtil/UnresolvedCalls.qll
+++ b/python/ql/test/experimental/dataflow/TestUtil/UnresolvedCalls.qll
@@ -12,13 +12,10 @@ class UnresolvedCallExpectations extends InlineExpectationsTest {
  override predicate hasActualResult(Location location, string element, string tag, string value) {
    exists(location.getFile().getRelativePath()) and
    exists(CallNode call |
-      not exists(DataFlowPrivate::DataFlowCall dfc | dfc.getNode() = call |
-        // For every `CallNode`, there is a `DataFlowCall` in the form of a `NormalCall`.
-        // It does not really count, as it has some abstract overrides. For instance, it does not
-        // define `getCallable`, so checking for the existence of this guarantees that we are in a
-        // properly resolved call.
-        exists(dfc.getCallable())
+      not exists(DataFlowPrivate::DataFlowCall dfc |
+        exists(dfc.getCallable()) and dfc.getNode() = call
      ) and
+      not DataFlowPrivate::resolveClassCall(call, _) and
      not call = API::builtin(_).getACall().asCfgNode() and
      location = call.getLocation() and
      tag = "unresolved_call" and
--- a/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected
+++ b/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected
@@ -1,3 +1,4 @@
-| file://:0:0:0:0 | parameter 0 of builtins.reversed |
+| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
+| test.py:1:1:1:21 | SynthDictSplatParameterNode |
 | test.py:1:19:1:19 | ControlFlowNode for x |
 | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
--- a/python/ql/test/experimental/dataflow/basic/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/basic/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
--- a/python/ql/test/experimental/dataflow/basic/global.expected
+++ b/python/ql/test/experimental/dataflow/basic/global.expected
@@ -1,4 +1,4 @@
-| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
 | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
 | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
 | test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
--- a/python/ql/test/experimental/dataflow/basic/globalStep.expected
+++ b/python/ql/test/experimental/dataflow/basic/globalStep.expected
@@ -1,4 +1,4 @@
-| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
 | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
 | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
 | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
--- a/python/ql/test/experimental/dataflow/basic/local.expected
+++ b/python/ql/test/experimental/dataflow/basic/local.expected
@@ -1,8 +1,8 @@
-| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
-| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
 | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
 | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
-| file://:0:0:0:0 | parameter 0 of builtins.reversed | file://:0:0:0:0 | parameter 0 of builtins.reversed |
+| file://:0:0:0:0 | parameter position 0 of builtins.reversed | file://:0:0:0:0 | parameter position 0 of builtins.reversed |
 | test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ |
 | test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ |
 | test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |
@@ -10,6 +10,7 @@
 | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
 | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
 | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:1:1:1:21 | SynthDictSplatParameterNode | test.py:1:1:1:21 | SynthDictSplatParameterNode |
 | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
 | test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
 | test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
@@ -52,8 +53,10 @@
 | test.py:7:1:7:1 | ControlFlowNode for b | test.py:7:1:7:1 | ControlFlowNode for b |
 | test.py:7:1:7:1 | GSSA Variable b | test.py:7:1:7:1 | GSSA Variable b |
 | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
 | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
 | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
 | test.py:7:5:7:20 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
+| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
 | test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
-| test.py:7:19:7:19 | [post arg] ControlFlowNode for a | test.py:7:19:7:19 | [post arg] ControlFlowNode for a |
+| test.py:7:19:7:19 | [post] ControlFlowNode for a | test.py:7:19:7:19 | [post] ControlFlowNode for a |
--- a/python/ql/test/experimental/dataflow/basic/localStep.expected
+++ b/python/ql/test/experimental/dataflow/basic/localStep.expected
@@ -1,4 +1,4 @@
-| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
 | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
 | test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
 | test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | SSA variable x |
--- a/python/ql/test/experimental/dataflow/basic/sinks.expected
+++ b/python/ql/test/experimental/dataflow/basic/sinks.expected
@@ -1,12 +1,13 @@
-| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
 | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
 | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
-| file://:0:0:0:0 | parameter 0 of builtins.reversed |
+| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
 | test.py:0:0:0:0 | GSSA Variable __name__ |
 | test.py:0:0:0:0 | GSSA Variable __package__ |
 | test.py:0:0:0:0 | GSSA Variable b |
 | test.py:0:0:0:0 | SSA variable $ |
 | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
+| test.py:1:1:1:21 | SynthDictSplatParameterNode |
 | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
 | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
 | test.py:1:19:1:19 | ControlFlowNode for x |
@@ -24,7 +25,9 @@
 | test.py:7:1:7:1 | ControlFlowNode for b |
 | test.py:7:1:7:1 | GSSA Variable b |
 | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
 | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
 | test.py:7:5:7:20 | GSSA Variable a |
+| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
 | test.py:7:19:7:19 | ControlFlowNode for a |
-| test.py:7:19:7:19 | [post arg] ControlFlowNode for a |
+| test.py:7:19:7:19 | [post] ControlFlowNode for a |
--- a/python/ql/test/experimental/dataflow/basic/sources.expected
+++ b/python/ql/test/experimental/dataflow/basic/sources.expected
@@ -1,12 +1,13 @@
-| file://:0:0:0:0 | [summary] read: argument 0.List element in builtins.reversed |
+| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
 | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
 | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
-| file://:0:0:0:0 | parameter 0 of builtins.reversed |
+| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
 | test.py:0:0:0:0 | GSSA Variable __name__ |
 | test.py:0:0:0:0 | GSSA Variable __package__ |
 | test.py:0:0:0:0 | GSSA Variable b |
 | test.py:0:0:0:0 | SSA variable $ |
 | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
+| test.py:1:1:1:21 | SynthDictSplatParameterNode |
 | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
 | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
 | test.py:1:19:1:19 | ControlFlowNode for x |
@@ -24,7 +25,9 @@
 | test.py:7:1:7:1 | ControlFlowNode for b |
 | test.py:7:1:7:1 | GSSA Variable b |
 | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:7:5:7:17 | [post] ControlFlowNode for obfuscated_id |
 | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
 | test.py:7:5:7:20 | GSSA Variable a |
+| test.py:7:5:7:20 | [pre] ControlFlowNode for obfuscated_id() |
 | test.py:7:19:7:19 | ControlFlowNode for a |
-| test.py:7:19:7:19 | [post arg] ControlFlowNode for a |
+| test.py:7:19:7:19 | [post] ControlFlowNode for a |
--- a/python/ql/test/experimental/dataflow/callgraph_crosstalk/Arguments.expected
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/Arguments.expected
@@ -0,0 +1,13 @@
+| test.py:32:8:32:23 | CrosstalkTestX() | test.py:9:5:9:23 | Function __init__ | test.py:32:8:32:23 | [pre] ControlFlowNode for CrosstalkTestX() | self |
+| test.py:33:8:33:23 | CrosstalkTestY() | test.py:21:5:21:23 | Function __init__ | test.py:33:8:33:23 | [pre] ControlFlowNode for CrosstalkTestY() | self |
+| test.py:43:1:43:8 | func() | test.py:13:5:13:26 | Function setx | test.py:36:12:36:15 | ControlFlowNode for objx | self |
+| test.py:43:1:43:8 | func() | test.py:13:5:13:26 | Function setx | test.py:43:6:43:7 | ControlFlowNode for IntegerLiteral | position 0 |
+| test.py:43:1:43:8 | func() | test.py:25:5:25:26 | Function sety | test.py:38:12:38:15 | ControlFlowNode for objy | self |
+| test.py:43:1:43:8 | func() | test.py:25:5:25:26 | Function sety | test.py:43:6:43:7 | ControlFlowNode for IntegerLiteral | position 0 |
+| test.py:51:1:51:8 | func() | test.py:16:5:16:30 | Function setvalue | test.py:47:12:47:15 | ControlFlowNode for objx | self |
+| test.py:51:1:51:8 | func() | test.py:16:5:16:30 | Function setvalue | test.py:51:6:51:7 | ControlFlowNode for IntegerLiteral | position 0 |
+| test.py:51:1:51:8 | func() | test.py:28:5:28:30 | Function setvalue | test.py:49:12:49:15 | ControlFlowNode for objy | self |
+| test.py:51:1:51:8 | func() | test.py:28:5:28:30 | Function setvalue | test.py:51:6:51:7 | ControlFlowNode for IntegerLiteral | position 0 |
+| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:63:12:63:12 | ControlFlowNode for a | self |
+| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:70:6:70:7 | ControlFlowNode for IntegerLiteral | position 0 |
+| test.py:70:1:70:8 | func() | test.py:58:5:58:33 | Function foo | test.py:70:6:70:7 | ControlFlowNode for IntegerLiteral | self |
--- a/python/ql/test/experimental/dataflow/callgraph_crosstalk/Arguments.ql
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/Arguments.ql
@@ -0,0 +1,9 @@
+private import python
+private import semmle.python.dataflow.new.internal.DataFlowPrivate
+private import semmle.python.dataflow.new.internal.DataFlowPublic
+
+from DataFlowCall call, DataFlowCallable callable, ArgumentNode arg, ArgumentPosition apos
+where
+  callable = call.getCallable() and
+  arg = call.getArgument(apos)
+select call, callable, arg, apos
--- a/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected
@@ -0,0 +1,24 @@
+uniqueEnclosingCallable
+uniqueType
+uniqueNodeLocation
+missingLocation
+uniqueNodeToString
+missingToString
+parameterCallable
+localFlowIsLocal
+readStepIsLocal
+storeStepIsLocal
+compatibleTypesReflexive
+unreachableNodeCCtx
+localCallNodes
+postIsNotPre
+postHasUniquePre
+uniquePostUpdate
+postIsInSameCallable
+reverseRead
+argHasPostUpdate
+postWithInFlow
+viableImplInCallContextTooLarge
+uniqueParameterNodeAtPosition
+uniqueParameterNodePosition
+uniqueContentApprox
--- a/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.ql
@@ -0,0 +1,2 @@
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
--- a/python/ql/test/experimental/dataflow/callgraph_crosstalk/options
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/options
@@ -0,0 +1 @@
+semmle-extractor-options: --max-import-depth=0
--- a/python/ql/test/experimental/dataflow/callgraph_crosstalk/test.py
+++ b/python/ql/test/experimental/dataflow/callgraph_crosstalk/test.py
@@ -0,0 +1,70 @@
+import random
+cond = random.randint(0,1) == 1
+
+# ------------------------------------------------------------------------------
+# Calling different bound-methods based on conditional
+# ------------------------------------------------------------------------------
+
+class CrosstalkTestX:
+    def __init__(self):
+        self.x = None
+        self.y = None
+
+    def setx(self, value):
+        self.x = value
+
+    def setvalue(self, value):
+        self.x = value
+
+
+class CrosstalkTestY:
+    def __init__(self):
+        self.x = None
+        self.y = None
+
+    def sety(self ,value):
+        self.y = value
+
+    def setvalue(self, value):
+        self.y = value
+
+
+objx = CrosstalkTestX()
+objy = CrosstalkTestY()
+
+if cond:
+    func = objx.setx
+else:
+    func = objy.sety
+
+# What we're testing for is whether both objects are passed as self to both methods,
+# which is wrong.
+
+func(42)
+
+
+if cond:
+    func = objx.setvalue
+else:
+    func = objy.setvalue
+
+func(43)
+
+# ------------------------------------------------------------------------------
+# Calling methods in different ways
+# ------------------------------------------------------------------------------
+
+class A(object):
+    def foo(self, arg="Default"):
+        print("A.foo", self, arg)
+
+a = A()
+if cond:
+    func = a.foo # `44` is passed as arg
+else:
+    func = A.foo # `44` is passed as self
+
+# What we're testing for is whether a single call ends up having both `a` and `44` is
+# passed as self to `A.foo`, which is wrong.
+
+func(44)
--- a/python/ql/test/experimental/dataflow/calls/DataFlowCallTest.ql
+++ b/python/ql/test/experimental/dataflow/calls/DataFlowCallTest.ql
@@ -1,6 +1,6 @@
 import python
 import semmle.python.dataflow.new.DataFlow
-import semmle.python.dataflow.new.internal.DataFlowPrivate
+import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
 import TestUtilities.InlineExpectationsTest
 private import semmle.python.dataflow.new.internal.PrintNode

@@ -8,26 +8,29 @@ class DataFlowCallTest extends InlineExpectationsTest {
  DataFlowCallTest() { this = "DataFlowCallTest" }

  override string getARelevantTag() {
-    result in ["call", "qlclass"]
+    result in ["call", "callType"]
    or
-    result = "arg_" + [0 .. 10]
+    result = "arg[" + any(DataFlowDispatch::ArgumentPosition pos).toString() + "]"
  }

  override predicate hasActualResult(Location location, string element, string tag, string value) {
    exists(location.getFile().getRelativePath()) and
-    exists(DataFlowCall call |
+    exists(DataFlowDispatch::DataFlowCall call |
      location = call.getLocation() and
-      element = call.toString()
+      element = call.toString() and
+      exists(call.getCallable())
    |
      value = prettyExpr(call.getNode().getNode()) and
      tag = "call"
      or
-      value = call.getAQlClass() and
-      tag = "qlclass"
+      value = call.(DataFlowDispatch::NormalCall).getCallType().toString() and
+      tag = "callType"
      or
-      exists(int n, DataFlow::Node arg | arg = call.getArg(n) |
+      exists(DataFlowDispatch::ArgumentPosition pos, DataFlow::Node arg |
+        arg = call.getArgument(pos)
+      |
        value = prettyNodeForInlineTest(arg) and
-        tag = "arg_" + n
+        tag = "arg[" + pos + "]"
      )
    )
  }
--- a/python/ql/test/experimental/dataflow/calls/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/calls/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
--- a/python/ql/test/experimental/dataflow/calls/new_cls_param.py
+++ b/python/ql/test/experimental/dataflow/calls/new_cls_param.py
@@ -0,0 +1,16 @@
+# We want to ensure that the __new__ method is considered a classmethod even though it
+# doesn't have a decorator. This means that the `cls` parameter should be considered a
+# reference to the class (or subclass), and not an instance of the class. We can detect
+# this from looking at the arguments passed in the `cls.foo` call. if we see a `self`
+# argument, this means it has correct behavior (because we're targeting a classmethod),
+# if there is no `self` argument, this means we've only considered `cls` to be a class
+# instance, since we don't want to pass that to the `cls` parameter of the classmethod `WithNewImpl.foo`.
+
+class WithNewImpl(object):
+    def __new__(cls):
+        print("WithNewImpl.foo")
+        cls.foo() # $ call=cls.foo() callType=CallTypeClassMethod arg[self]=cls
+
+    @classmethod
+    def foo(cls):
+        print("WithNewImpl.foo")
--- a/python/ql/test/experimental/dataflow/calls/test.py
+++ b/python/ql/test/experimental/dataflow/calls/test.py
@@ -14,24 +14,69 @@ class MyClass(object):
    def my_method(self, arg):
        pass

+    def other_method(self):
+        self.my_method(42) # $ arg[self]=self call=self.my_method(..) callType=CallTypeNormalMethod arg[position 0]=42
+        self.sm(42) # $ call=self.sm(..) callType=CallTypeStaticMethod arg[position 0]=42
+
+    @staticmethod
+    def sm(arg):
+        pass
+
+    @classmethod
+    def cm(cls, arg):
+        pass
+
+    @classmethod
+    def other_classmethod(cls):
+        cls.cm(42) # $ call=cls.cm(..) callType=CallTypeClassMethod arg[position 0]=42 arg[self]=cls
+        cls.sm(42) # $ call=cls.sm(..) callType=CallTypeStaticMethod arg[position 0]=42
+
    def __getitem__(self, key):
        pass

+func(0) # $ call=func(..) arg[position 0]=0 callType=CallTypePlainFunction

-func("foo") # $ call=func(..) qlclass=FunctionCall arg_0="foo"
-x = MyClass(1) # $ call=MyClass(..) qlclass=ClassCall arg_0=[pre]MyClass(..) arg_1=1
-x.my_method(2) # $ call=x.my_method(..) qlclass=MethodCall arg_0=x arg_1=2
+x = MyClass(1) # $ call=MyClass(..) arg[self]=[pre]MyClass(..) arg[position 0]=1 callType=CallTypeClass
+
+x.my_method(2) # $ call=x.my_method(..) arg[self]=x arg[position 0]=2 callType=CallTypeNormalMethod
 mm = x.my_method
-mm(2) # $ call=mm(..) qlclass=MethodCall arg_1=2 MISSING: arg_0=x
-x[3] # $ call=x[3] qlclass=SpecialCall arg_0=x arg_1=3
+mm(2) # $ call=mm(..) arg[self]=x arg[position 0]=2  callType=CallTypeNormalMethod
+MyClass.my_method(x, 2) # $ call=MyClass.my_method(..) arg[position 0]=2 arg[self]=x callType=CallTypeMethodAsPlainFunction
+
+x.sm(3) # $ call=x.sm(..) arg[position 0]=3  callType=CallTypeStaticMethod
+MyClass.sm(3) # $ call=MyClass.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
+
+x.cm(4) # $ call=x.cm(..) arg[position 0]=4 callType=CallTypeClassMethod
+MyClass.cm(4) # $ call=MyClass.cm(..) arg[position 0]=4 arg[self]=MyClass callType=CallTypeClassMethod
+
+x[5] # $ MISSING: call=x[5] arg[self]=x arg[position 0]=5
+
+
+class Subclass(MyClass):
+    pass
+
+y = Subclass(1) # $ call=Subclass(..) arg[self]=[pre]Subclass(..) arg[position 0]=1 callType=CallTypeClass
+
+y.my_method(2) # $ call=y.my_method(..) arg[self]=y arg[position 0]=2 callType=CallTypeNormalMethod
+mm = y.my_method
+mm(2) # $ call=mm(..) arg[self]=y arg[position 0]=2 callType=CallTypeNormalMethod
+Subclass.my_method(y, 2) # $ call=Subclass.my_method(..) arg[self]=y arg[position 0]=2 callType=CallTypeMethodAsPlainFunction
+
+y.sm(3) # $ call=y.sm(..) arg[position 0]=3  callType=CallTypeStaticMethod
+Subclass.sm(3) # $ call=Subclass.sm(..) arg[position 0]=3 callType=CallTypeStaticMethod
+
+y.cm(4) # $ call=y.cm(..) arg[position 0]=4 callType=CallTypeClassMethod
+Subclass.cm(4) # $ call=Subclass.cm(..) arg[self]=Subclass arg[position 0]=4 callType=CallTypeClassMethod
+
+y[5] # $ MISSING: call=y[5] arg[self]=y arg[position 0]=5


 try:
-    # These are included to show how we handle absent things with points-to where
-    # `mypkg.foo` is a `missing module variable`, but `mypkg.subpkg.bar` is compeltely
-    # ignored.
+    # These are included to show whether we have a DataFlowCall for things we can't
+    # resolve. Both are interesting since with points-to we used to have a DataFlowCall
+    # for _one_ but not the other
    import mypkg
-    mypkg.foo(42) # $ call=mypkg.foo(..) qlclass=NormalCall
-    mypkg.subpkg.bar(43) # $ call=mypkg.subpkg.bar(..) qlclass=LibraryCall arg_0=43
+    mypkg.foo(42)
+    mypkg.subpkg.bar(43)
 except:
    pass
--- a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
--- a/python/ql/test/experimental/dataflow/consistency/modeling-consistency.expected
+++ b/python/ql/test/experimental/dataflow/consistency/modeling-consistency.expected
@@ -1 +0,0 @@
-| test.py:239:27:239:27 | Parameter | There is no `ParameterNode` associated with this parameter. |
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing.py
@@ -38,6 +38,14 @@ SINK5 = functools.partial(SINK, expected=arg5)
 SINK6 = functools.partial(SINK, expected=arg6)
 SINK7 = functools.partial(SINK, expected=arg7)

+SINK1_F = functools.partial(SINK_F, unexpected=arg1)
+SINK2_F = functools.partial(SINK_F, unexpected=arg2)
+SINK3_F = functools.partial(SINK_F, unexpected=arg3)
+SINK4_F = functools.partial(SINK_F, unexpected=arg4)
+SINK5_F = functools.partial(SINK_F, unexpected=arg5)
+SINK6_F = functools.partial(SINK_F, unexpected=arg6)
+SINK7_F = functools.partial(SINK_F, unexpected=arg7)
+

 def argument_passing(
    a,
@@ -64,12 +72,12 @@ def argument_passing(

@expects(7)
 def test_argument_passing1():
-    argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7})  #$ arg1 arg7 func=argument_passing MISSING: arg2 arg3="arg3 arg4 arg5 arg6
+    argument_passing(arg1, *(arg2, arg3, arg4), e=arg5, **{"f": arg6, "g": arg7})  #$ arg1 arg5 arg6 arg7 func=argument_passing MISSING: arg2 arg3 arg4


@expects(7)
 def test_argument_passing2():
-    argument_passing(arg1, arg2, arg3, f=arg6)  #$ arg1 arg2 arg3
+    argument_passing(arg1, arg2, arg3, f=arg6)  #$ arg1 arg2 arg3 arg6


 def with_pos_only(a, /, b):
@@ -94,7 +102,7 @@ def with_multiple_kw_args(a, b, c):
 def test_multiple_kw_args():
    with_multiple_kw_args(b=arg2, c=arg3, a=arg1)  #$ arg1 arg2 arg3
    with_multiple_kw_args(arg1, *(arg2,), arg3)  #$ arg1 MISSING: arg2 arg3
-    with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2)  #$ arg1 arg2 arg3 func=with_multiple_kw_args MISSING:
+    with_multiple_kw_args(arg1, **{"c": arg3}, b=arg2)  #$ arg1 arg2 arg3 func=with_multiple_kw_args
    with_multiple_kw_args(**{"b": arg2}, **{"c": arg3}, **{"a": arg1})  #$ arg1 arg2 arg3 func=with_multiple_kw_args


@@ -112,32 +120,6 @@ def test_default_arguments():
    with_default_arguments(**{"c": arg3})  #$ arg3 func=with_default_arguments


-# Nested constructor pattern
-def grab_foo_bar_baz(foo, **kwargs):
-    SINK1(foo)
-    grab_bar_baz(**kwargs)
-
-
-# It is not possible to pass `bar` into `kwargs`,
-# since `bar` is a valid keyword argument.
-def grab_bar_baz(bar, **kwargs):
-    SINK2(bar)
-    try:
-        SINK2_F(kwargs["bar"])
-    except:
-        print("OK")
-    grab_baz(**kwargs)
-
-
-def grab_baz(baz):
-    SINK3(baz)
-
-
-@expects(4)
-def test_grab():
-    grab_foo_bar_baz(baz=arg3, bar=arg2, foo=arg1)  #$ arg1 arg2 arg3 func=grab_bar_baz func=grab_baz
-
-
 # All combinations
 def test_pos_pos():
    def with_pos(a):
@@ -183,7 +165,95 @@ def test_kw_kw():


 def test_kw_doublestar():
-    def with_doublestar(**a):
-        SINK1(a["a"])
+    def with_doublestar(**kwargs):
+        SINK1(kwargs["a"])

    with_doublestar(a=arg1)  #$ arg1 func=test_kw_doublestar.with_doublestar
+
+
+def only_kwargs(**kwargs):
+    SINK1(kwargs["a"])
+    SINK2(kwargs["b"])
+    # testing precise content tracking, that content from `a` or `b` does not end up here.
+    SINK3_F(kwargs["c"])
+
+@expects(3)
+def test_kwargs():
+    args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg1 arg2 func=only_kwargs
+    only_kwargs(**args)
+
+
+def mixed(a, **kwargs):
+    SINK1(a)
+    try:
+        SINK1_F(kwargs["a"]) # since 'a' is a keyword argument, it cannot be part of **kwargs
+    except KeyError:
+        print("OK")
+    SINK2(kwargs["b"])
+    # testing precise content tracking, that content from `a` or `b` does not end up here.
+    SINK3_F(kwargs["c"])
+
+@expects(4*3)
+def test_mixed():
+    mixed(a=arg1, b=arg2, c="safe") # $ arg1 arg2
+
+    args = {"b": arg2, "c": "safe"} # $ arg2 func=mixed
+    mixed(a=arg1, **args) # $ arg1
+
+    args = {"a": arg1, "b": arg2, "c": "safe"} # $ arg1 arg2 func=mixed
+    mixed(**args)
+
+
+def starargs_only(*args):
+    SINK1(args[0])
+    SINK2(args[1])
+    SINK3_F(args[2])
+
+@expects(5*3)
+def test_only_starargs():
+    starargs_only(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad2,bad3="arg1" bad1,bad3="arg2"
+
+    args = (arg2, "safe") # $ MISSING: arg2
+    starargs_only(arg1, *args) # $ arg1 SPURIOUS: bad2,bad3="arg1"
+
+    args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
+    starargs_only(*args)
+
+    empty_args = ()
+
+    args = (arg1, arg2, "safe") # $ arg1 arg2 func=starargs_only
+    starargs_only(*args, *empty_args)
+    args = (arg1, arg2, "safe") # $ MISSING: arg1 arg2 func=starargs_only
+    starargs_only(*empty_args, *args)
+
+
+def starargs_mixed(a, *args):
+    SINK1(a)
+    SINK2(args[0])
+    SINK3_F(args[1])
+
+@expects(3*8)
+def test_stararg_mixed():
+    starargs_mixed(arg1, arg2, "safe") # $ arg1 arg2 SPURIOUS: bad3="arg2"
+
+    args = (arg2, "safe") # $ arg2 func=starargs_mixed
+    starargs_mixed(arg1, *args) # $ arg1
+
+    args = (arg1, arg2, "safe")
+    starargs_mixed(*args) # $ MISSING: arg1 arg2
+
+    args = (arg1, arg2, "safe")
+    more_args = ("foo", "bar")
+    starargs_mixed(*args, *more_args) # $ MISSING: arg1 arg2
+
+    empty_args = ()
+
+    # adding first/last
+    starargs_mixed(arg1, arg2, "safe", *empty_args) # $ arg1 arg2 SPURIOUS: bad3="arg2"
+    starargs_mixed(*empty_args, arg1, arg2, "safe") # $ MISSING: arg1 arg2
+
+    # adding before/after *args
+    args = (arg2, "safe") # $ arg2 func=starargs_mixed
+    starargs_mixed(arg1, *args, *empty_args) # $ arg1
+    args = (arg2, "safe")
+    starargs_mixed(arg1, *empty_args, *args) # $ arg1 MISSING: arg2
--- a/python/ql/test/experimental/dataflow/coverage/argumentPassing_bad_flow_test.py
+++ b/python/ql/test/experimental/dataflow/coverage/argumentPassing_bad_flow_test.py
@@ -0,0 +1,63 @@
+import sys
+import os
+import functools
+
+sys.path.append(os.path.dirname(os.path.dirname((__file__))))
+from testlib import expects
+
+arg = "source"
+arg1 = "source1"
+arg2 = "source2"
+arg3 = "source3"
+arg4 = "source4"
+arg5 = "source5"
+arg6 = "source6"
+arg7 = "source7"
+
+
+def SINK_TEST(x, test):
+    if test(x):
+        print("OK")
+    else:
+        print("Unexpected flow", x)
+
+
+def SINK(x, expected=arg):
+    SINK_TEST(x, test=lambda x: x == expected)
+
+
+def SINK_F(x, unexpected=arg):
+    SINK_TEST(x, test=lambda x: x != unexpected)
+
+
+SINK1 = functools.partial(SINK, expected=arg1)
+SINK2 = functools.partial(SINK, expected=arg2)
+SINK3 = functools.partial(SINK, expected=arg3)
+SINK4 = functools.partial(SINK, expected=arg4)
+SINK5 = functools.partial(SINK, expected=arg5)
+SINK6 = functools.partial(SINK, expected=arg6)
+SINK7 = functools.partial(SINK, expected=arg7)
+
+SINK1_F = functools.partial(SINK_F, unexpected=arg1)
+SINK2_F = functools.partial(SINK_F, unexpected=arg2)
+SINK3_F = functools.partial(SINK_F, unexpected=arg3)
+SINK4_F = functools.partial(SINK_F, unexpected=arg4)
+SINK5_F = functools.partial(SINK_F, unexpected=arg5)
+SINK6_F = functools.partial(SINK_F, unexpected=arg6)
+SINK7_F = functools.partial(SINK_F, unexpected=arg7)
+
+
+def bad_argument_flow_func(arg):
+    SINK1_F(arg)
+
+def bad_argument_flow_func2(arg):
+    SINK2(arg)
+
+def test_bad_argument_flow():
+    # this is just a test to show that the testing setup works
+
+    # in the first one, we pretend we expected no flow for arg1
+    bad_argument_flow_func(arg1) # $ bad1="arg1"
+
+    # in the second one, we pretend we wanted flow for arg2 instead
+    bad_argument_flow_func2(arg1) # $ bad2="arg1"
--- a/python/ql/test/experimental/dataflow/coverage/argumentRoutingTest.ql
+++ b/python/ql/test/experimental/dataflow/coverage/argumentRoutingTest.ql
@@ -9,23 +9,64 @@ class Argument1RoutingTest extends RoutingTest {
  override string flowTag() { result = "arg1" }

  override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
-    exists(Argument1RoutingConfig cfg | cfg.hasFlow(source, sink))
+    exists(Argument1ExtraRoutingConfig cfg | cfg.hasFlow(source, sink))
+    or
+    exists(ArgumentRoutingConfig cfg |
+      cfg.hasFlow(source, sink) and
+      cfg.isArgSource(source, 1) and
+      cfg.isGoodSink(sink, 1)
+    )
  }
 }

-/**
- * A configuration to check routing of arguments through magic methods.
- */
-class Argument1RoutingConfig extends DataFlow::Configuration {
-  Argument1RoutingConfig() { this = "Argument1RoutingConfig" }
+class ArgNumber extends int {
+  ArgNumber() { this in [1 .. 7] }
+}
+
+class ArgumentRoutingConfig extends DataFlow::Configuration {
+  ArgumentRoutingConfig() { this = "ArgumentRoutingConfig" }
+
+  predicate isArgSource(DataFlow::Node node, ArgNumber argNumber) {
+    node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg" + argNumber
+  }
+
+  override predicate isSource(DataFlow::Node node) { this.isArgSource(node, _) }
+
+  predicate isGoodSink(DataFlow::Node node, ArgNumber argNumber) {
+    exists(CallNode call |
+      call.getFunction().(NameNode).getId() = "SINK" + argNumber and
+      node.(DataFlow::CfgNode).getNode() = call.getAnArg()
+    )
+  }
+
+  predicate isBadSink(DataFlow::Node node, ArgNumber argNumber) {
+    exists(CallNode call |
+      call.getFunction().(NameNode).getId() = "SINK" + argNumber + "_F" and
+      node.(DataFlow::CfgNode).getNode() = call.getAnArg()
+    )
+  }
+
+  override predicate isSink(DataFlow::Node node) {
+    this.isGoodSink(node, _) or this.isBadSink(node, _)
+  }
+
+  /**
+   * We want to be able to use `arg` in a sequence of calls such as `func(kw=arg); ... ; func(arg)`.
+   * Use-use flow lets the argument to the first call reach the sink inside the second call,
+   * making it seem like we handle all cases even if we only handle the last one.
+   * We make the test honest by preventing flow into source nodes.
+   */
+  override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
+}
+
+class Argument1ExtraRoutingConfig extends DataFlow::Configuration {
+  Argument1ExtraRoutingConfig() { this = "Argument1ExtraRoutingConfig" }

  override predicate isSource(DataFlow::Node node) {
-    node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg1"
-    or
-    exists(AssignmentDefinition def, DataFlowPrivate::DataFlowCall call |
+    exists(AssignmentDefinition def, DataFlow::CallCfgNode call |
      def.getVariable() = node.(DataFlow::EssaNode).getVar() and
      def.getValue() = call.getNode() and
-      call.getNode().(CallNode).getFunction().(NameNode).getId().matches("With\\_%")
+      call.getFunction().asCfgNode().(NameNode).getId().matches("With\\_%")
    ) and
    node.(DataFlow::EssaNode).getVar().getName().matches("with\\_%")
  }
@@ -46,57 +87,59 @@ class Argument1RoutingConfig extends DataFlow::Configuration {
  override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
 }

-// for argument 2 and up, we use a generic approach. Change `maxNumArgs` below if we
-// need to increase the maximum number of arguments.
-private int maxNumArgs() { result = 7 }
-
 class RestArgumentRoutingTest extends RoutingTest {
-  int argNumber;
+  ArgNumber argNumber;

  RestArgumentRoutingTest() {
-    argNumber in [2 .. maxNumArgs()] and
+    argNumber > 1 and
    this = "Argument" + argNumber + "RoutingTest"
  }

  override string flowTag() { result = "arg" + argNumber }

  override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
-    exists(RestArgumentRoutingConfig cfg | cfg.getArgNumber() = argNumber |
-      cfg.hasFlow(source, sink)
+    exists(ArgumentRoutingConfig cfg |
+      cfg.hasFlow(source, sink) and
+      cfg.isArgSource(source, argNumber) and
+      cfg.isGoodSink(sink, argNumber)
    )
  }
 }

-/**
- * A configuration to check routing of arguments through magic methods.
- */
-class RestArgumentRoutingConfig extends DataFlow::Configuration {
-  int argNumber;
+/** Bad flow from `arg<n>` to `SINK<N>_F` */
+class BadArgumentRoutingTestSinkF extends RoutingTest {
+  ArgNumber argNumber;

-  RestArgumentRoutingConfig() {
-    argNumber in [2 .. maxNumArgs()] and
-    this = "Argument" + argNumber + "RoutingConfig"
-  }
+  BadArgumentRoutingTestSinkF() { this = "BadArgumentRoutingTestSinkF" + argNumber }

-  /** Gets the argument number this configuration is for. */
-  int getArgNumber() { result = argNumber }
+  override string flowTag() { result = "bad" + argNumber }

-  override predicate isSource(DataFlow::Node node) {
-    node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "arg" + argNumber
-  }
-
-  override predicate isSink(DataFlow::Node node) {
-    exists(CallNode call |
-      call.getFunction().(NameNode).getId() = "SINK" + argNumber and
-      node.(DataFlow::CfgNode).getNode() = call.getAnArg()
+  override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
+    exists(ArgumentRoutingConfig cfg |
+      cfg.hasFlow(source, sink) and
+      cfg.isArgSource(source, argNumber) and
+      cfg.isBadSink(sink, argNumber)
+    )
+  }
+}
+
+/** Bad flow from `arg<n>` to `SINK<M>` or `SINK<M>_F`, where `n != m`. */
+class BadArgumentRoutingTestWrongSink extends RoutingTest {
+  ArgNumber argNumber;
+
+  BadArgumentRoutingTestWrongSink() { this = "BadArgumentRoutingTestWrongSink" + argNumber }
+
+  override string flowTag() { result = "bad" + argNumber }
+
+  override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
+    exists(ArgumentRoutingConfig cfg |
+      cfg.hasFlow(source, sink) and
+      cfg.isArgSource(source, any(ArgNumber i | not i = argNumber)) and
+      (
+        cfg.isGoodSink(sink, argNumber)
+        or
+        cfg.isBadSink(sink, argNumber)
+      )
    )
  }
-
-  /**
-   * We want to be able to use `arg` in a sequence of calls such as `func(kw=arg); ... ; func(arg)`.
-   * Use-use flow lets the argument to the first call reach the sink inside the second call,
-   * making it seem like we handle all cases even if we only handle the last one.
-   * We make the test honest by preventing flow into source nodes.
-   */
-  override predicate isBarrierIn(DataFlow::Node node) { this.isSource(node) }
 }
--- a/python/ql/test/experimental/dataflow/coverage/classes.py
+++ b/python/ql/test/experimental/dataflow/coverage/classes.py
@@ -506,7 +506,7 @@ class With_call:


 def test_call():
-    with_call = With_call()  #$ MISSING: arg1="SSA variable with_call" func=With_call.__call__
+    with_call = With_call()  #$ arg1="SSA variable with_call" func=With_call.__call__
    with_call()


@@ -560,9 +560,9 @@ class With_getitem:


 def test_getitem():
-    with_getitem = With_getitem() #$ arg1="SSA variable with_getitem" func=With_getitem.__getitem__
+    with_getitem = With_getitem() #$ MISSING: arg1="SSA variable with_getitem" func=With_getitem.__getitem__
    arg2 = 0
-    with_getitem[arg2] #$ arg2 func=With_getitem.__getitem__
+    with_getitem[arg2] #$ MISSING: arg2 func=With_getitem.__getitem__


 # object.__setitem__(self, key, value)
@@ -575,10 +575,10 @@ class With_setitem:


 def test_setitem():
-    with_setitem = With_setitem()  #$ arg1="SSA variable with_setitem" func=With_setitem.__setitem__
+    with_setitem = With_setitem()  #$ MISSING: arg1="SSA variable with_setitem" func=With_setitem.__setitem__
    arg2 = 0
    arg3 = ""
-    with_setitem[arg2] = arg3  #$ arg2 arg3 func=With_setitem.__setitem__
+    with_setitem[arg2] = arg3  #$ MISSING: arg2 arg3 func=With_setitem.__setitem__


 # object.__delitem__(self, key)
@@ -590,9 +590,9 @@ class With_delitem:


 def test_delitem():
-    with_delitem = With_delitem()  #$ arg1="SSA variable with_delitem" func=With_delitem.__delitem__
+    with_delitem = With_delitem()  #$ MISSING: arg1="SSA variable with_delitem" func=With_delitem.__delitem__
    arg2 = 0
-    del with_delitem[arg2]  #$ arg2 func=With_delitem.__delitem__
+    del with_delitem[arg2]  #$ MISSING: arg2 func=With_delitem.__delitem__


 # object.__missing__(self, key)
@@ -662,9 +662,9 @@ class With_add:


 def test_add():
-    with_add = With_add()  #$ arg1="SSA variable with_add" func=With_add.__add__
+    with_add = With_add()  #$ MISSING: arg1="SSA variable with_add" func=With_add.__add__
    arg2 = with_add
-    with_add + arg2  #$ arg2 func=With_add.__add__
+    with_add + arg2  #$ MISSING: arg2 func=With_add.__add__


 # object.__sub__(self, other)
@@ -677,9 +677,9 @@ class With_sub:


 def test_sub():
-    with_sub = With_sub()  #$ arg1="SSA variable with_sub" func=With_sub.__sub__
+    with_sub = With_sub()  #$ MISSING: arg1="SSA variable with_sub" func=With_sub.__sub__
    arg2 = with_sub
-    with_sub - arg2  #$ arg2 func=With_sub.__sub__
+    with_sub - arg2  #$ MISSING: arg2 func=With_sub.__sub__


 # object.__mul__(self, other)
@@ -692,9 +692,9 @@ class With_mul:


 def test_mul():
-    with_mul = With_mul()  #$ arg1="SSA variable with_mul" func=With_mul.__mul__
+    with_mul = With_mul()  #$ MISSING: arg1="SSA variable with_mul" func=With_mul.__mul__
    arg2 = with_mul
-    with_mul * arg2  #$ arg2 func=With_mul.__mul__
+    with_mul * arg2  #$ MISSING: arg2 func=With_mul.__mul__


 # object.__matmul__(self, other)
@@ -707,9 +707,9 @@ class With_matmul:


 def test_matmul():
-    with_matmul = With_matmul()  #$ arg1="SSA variable with_matmul" func=With_matmul.__matmul__
+    with_matmul = With_matmul()  #$ MISSING: arg1="SSA variable with_matmul" func=With_matmul.__matmul__
    arg2 = with_matmul
-    with_matmul @ arg2  #$ arg2 func=With_matmul.__matmul__
+    with_matmul @ arg2  #$ MISSING: arg2 func=With_matmul.__matmul__


 # object.__truediv__(self, other)
@@ -722,9 +722,9 @@ class With_truediv:


 def test_truediv():
-    with_truediv = With_truediv()  #$ arg1="SSA variable with_truediv" func=With_truediv.__truediv__
+    with_truediv = With_truediv()  #$ MISSING: arg1="SSA variable with_truediv" func=With_truediv.__truediv__
    arg2 = with_truediv
-    with_truediv / arg2  #$ arg2 func=With_truediv.__truediv__
+    with_truediv / arg2  #$ MISSING: arg2 func=With_truediv.__truediv__


 # object.__floordiv__(self, other)
@@ -737,9 +737,9 @@ class With_floordiv:


 def test_floordiv():
-    with_floordiv = With_floordiv()  #$ arg1="SSA variable with_floordiv" func=With_floordiv.__floordiv__
+    with_floordiv = With_floordiv()  #$ MISSING: arg1="SSA variable with_floordiv" func=With_floordiv.__floordiv__
    arg2 = with_floordiv
-    with_floordiv // arg2  #$ arg2 func=With_floordiv.__floordiv__
+    with_floordiv // arg2  #$ MISSING: arg2 func=With_floordiv.__floordiv__


 # object.__mod__(self, other)
@@ -752,9 +752,9 @@ class With_mod:


 def test_mod():
-    with_mod = With_mod()  #$ arg1="SSA variable with_mod" func=With_mod.__mod__
+    with_mod = With_mod()  #$ MISSING: arg1="SSA variable with_mod" func=With_mod.__mod__
    arg2 = with_mod
-    with_mod % arg2  #$ arg2 func=With_mod.__mod__
+    with_mod % arg2  #$ MISSING: arg2 func=With_mod.__mod__


 # object.__divmod__(self, other)
@@ -788,9 +788,9 @@ def test_pow():


 def test_pow_op():
-    with_pow = With_pow()  #$ arg1="SSA variable with_pow" func=With_pow.__pow__
+    with_pow = With_pow()  #$ MISSING: arg1="SSA variable with_pow" func=With_pow.__pow__
    arg2 = with_pow
-    with_pow ** arg2  #$ arg2 func=With_pow.__pow__
+    with_pow ** arg2  #$ MISSING: arg2 func=With_pow.__pow__


 # object.__lshift__(self, other)
@@ -803,9 +803,9 @@ class With_lshift:


 def test_lshift():
-    with_lshift = With_lshift()  #$ arg1="SSA variable with_lshift" func=With_lshift.__lshift__
+    with_lshift = With_lshift()  #$ MISSING: arg1="SSA variable with_lshift" func=With_lshift.__lshift__
    arg2 = with_lshift
-    with_lshift << arg2  #$ arg2 func=With_lshift.__lshift__
+    with_lshift << arg2  #$ MISSING: arg2 func=With_lshift.__lshift__


 # object.__rshift__(self, other)
@@ -818,9 +818,9 @@ class With_rshift:


 def test_rshift():
-    with_rshift = With_rshift()  #$ arg1="SSA variable with_rshift" func=With_rshift.__rshift__
+    with_rshift = With_rshift()  #$ MISSING: arg1="SSA variable with_rshift" func=With_rshift.__rshift__
    arg2 = with_rshift
-    with_rshift >> arg2  #$ arg2 func=With_rshift.__rshift__
+    with_rshift >> arg2  #$ MISSING: arg2 func=With_rshift.__rshift__


 # object.__and__(self, other)
@@ -833,9 +833,9 @@ class With_and:


 def test_and():
-    with_and = With_and()  #$ arg1="SSA variable with_and" func=With_and.__and__
+    with_and = With_and()  #$ MISSING: arg1="SSA variable with_and" func=With_and.__and__
    arg2 = with_and
-    with_and & arg2  #$ arg2 func=With_and.__and__
+    with_and & arg2  #$ MISSING: arg2 func=With_and.__and__


 # object.__xor__(self, other)
@@ -848,9 +848,9 @@ class With_xor:


 def test_xor():
-    with_xor = With_xor()  #$ arg1="SSA variable with_xor" func=With_xor.__xor__
+    with_xor = With_xor()  #$ MISSING: arg1="SSA variable with_xor" func=With_xor.__xor__
    arg2 = with_xor
-    with_xor ^ arg2  #$ arg2 func=With_xor.__xor__
+    with_xor ^ arg2  #$ MISSING: arg2 func=With_xor.__xor__


 # object.__or__(self, other)
@@ -863,9 +863,9 @@ class With_or:


 def test_or():
-    with_or = With_or()  #$ arg1="SSA variable with_or" func=With_or.__or__
+    with_or = With_or()  #$ MISSING: arg1="SSA variable with_or" func=With_or.__or__
    arg2 = with_or
-    with_or | arg2  #$ arg2 func=With_or.__or__
+    with_or | arg2  #$ MISSING: arg2 func=With_or.__or__


 # object.__radd__(self, other)
--- a/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/coverage/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
--- a/python/ql/test/experimental/dataflow/coverage/datamodel.py
+++ b/python/ql/test/experimental/dataflow/coverage/datamodel.py
@@ -124,6 +124,40 @@ def test_staticmethod_call():
  C.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2


+# subclass
+class SC(C):
+    pass
+sc = SC()
+
+@expects(6)
+def test_subclass_method_call():
+  func_obj = sc.method.__func__
+
+  sc.method(arg1, arg2) # $ func=C.method arg1 arg2
+  SC.method(sc, arg1, arg2) # $ func=C.method arg1 arg2
+  func_obj(sc, arg1, arg2) # $ MISSING: func=C.method arg1 arg2
+
+
+@expects(6)
+def test_subclass_classmethod_call():
+  c_func_obj = SC.classmethod.__func__
+
+  sc.classmethod(arg1, arg2) # $ func=C.classmethod arg1 arg2
+  SC.classmethod(arg1, arg2) # $ func=C.classmethod arg1 arg2
+  c_func_obj(SC, arg1, arg2) # $ MISSING: func=C.classmethod arg1 arg2
+
+
+@expects(5)
+def test_subclass_staticmethod_call():
+  try:
+    SC.staticmethod.__func__
+  except AttributeError:
+    print("OK")
+
+  sc.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
+  SC.staticmethod(arg1, arg2) # $ func=C.staticmethod arg1 arg2
+
+
 # Generator functions
 # A function or method which uses the yield statement (see section The yield statement) is called a generator function. Such a function, when called, always returns an iterator object which can be used to execute the body of the function: calling the iterator’s iterator.__next__() method will cause the function to execute until it provides a value using the yield statement. When the function executes a return statement or falls off the end, a StopIteration exception is raised and the iterator will have reached the end of the set of values to be returned.
 def gen(x, count):
@@ -198,5 +232,16 @@ class Customized:
 customized = Customized()
 SINK(Customized.a)  #$ MISSING:flow="SOURCE, l:-8 -> customized.a"
 SINK_F(Customized.b)
-SINK(customized.a)  #$ MISSING:flow="SOURCE, l:-10 -> customized.a"
+SINK(customized.a)  #$ MISSING: flow="SOURCE, l:-10 -> customized.a"
 SINK(customized.b)  #$ flow="SOURCE, l:-7 -> customized.b"
+
+
+class Test2:
+
+  def __init__(self, arg):
+    self.x = SOURCE
+    self.y = arg
+
+t = Test2(SOURCE)
+SINK(t.x) # $ flow="SOURCE, l:-4 -> t.x"
+SINK(t.y) # $ flow="SOURCE, l:-2 -> t.y"
--- a/python/ql/test/experimental/dataflow/coverage/localFlow.ql
+++ b/python/ql/test/experimental/dataflow/coverage/localFlow.ql
@@ -4,5 +4,5 @@ import semmle.python.dataflow.new.DataFlow
 from DataFlow::Node nodeFrom, DataFlow::Node nodeTo
 where
  DataFlow::localFlowStep(nodeFrom, nodeTo) and
-  nodeFrom.getEnclosingCallable().getName().matches("%\\_with\\_local\\_flow")
+  nodeFrom.getEnclosingCallable().getQualifiedName().matches("%\\_with\\_local\\_flow")
 select nodeFrom, nodeTo
--- a/python/ql/test/experimental/dataflow/coverage/test.py
+++ b/python/ql/test/experimental/dataflow/coverage/test.py
@@ -697,9 +697,16 @@ def test_overflow_iteration():
  s = SOURCE
  iterate_star_args(NONSOURCE, NONSOURCE, SOURCE, s)

+@expects(6)
 def test_deep_callgraph():
    # port of python/ql/test/library-tests/taint/general/deep.py

+    # based on the fact that `test_deep_callgraph_defined_in_module` works the problem
+    # seems to be that we're defining these functions inside another function and that
+    # the flow of these function definitions DOESN'T flow into the body of the `f<n>`
+    # functions (they DO flow into the body of `test_deep_callgraph`, otherwise the
+    # `f1` call wouldn't work).
+
    def f1(arg):
        return arg

@@ -720,8 +727,51 @@ def test_deep_callgraph():

    x = f6(SOURCE)
    SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
+    x = f5(SOURCE)
+    SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
+    x = f4(SOURCE)
+    SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
+    x = f3(SOURCE)
+    SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
+    x = f2(SOURCE)
+    SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
+    x = f1(SOURCE)
+    SINK(x) #$ flow="SOURCE, l:-1 -> x"


+def wat_f1(arg):
+    return arg
+
+def wat_f2(arg):
+    return wat_f1(arg)
+
+def wat_f3(arg):
+    return wat_f2(arg)
+
+def wat_f4(arg):
+    return wat_f3(arg)
+
+def wat_f5(arg):
+    return wat_f4(arg)
+
+def wat_f6(arg):
+    return wat_f5(arg)
+
+@expects(6)
+def test_deep_callgraph_defined_in_module():
+    x = wat_f6(SOURCE)
+    SINK(x) #$ flow="SOURCE, l:-1 -> x"
+    x = wat_f5(SOURCE)
+    SINK(x) #$ flow="SOURCE, l:-1 -> x"
+    x = wat_f4(SOURCE)
+    SINK(x) #$ flow="SOURCE, l:-1 -> x"
+    x = wat_f3(SOURCE)
+    SINK(x) #$ flow="SOURCE, l:-1 -> x"
+    x = wat_f2(SOURCE)
+    SINK(x) #$ flow="SOURCE, l:-1 -> x"
+    x = wat_f1(SOURCE)
+    SINK(x) #$ flow="SOURCE, l:-1 -> x"
+
@expects(2)
 def test_dynamic_tuple_creation_1():
    tup = tuple()
--- a/python/ql/test/experimental/dataflow/enclosing-callable/EnclosingCallable.expected
+++ b/python/ql/test/experimental/dataflow/enclosing-callable/EnclosingCallable.expected
@@ -1,24 +1,24 @@
-| file://:0:0:0:0 | Function generator_func | generator.py:1:20:1:21 | ControlFlowNode for xs |
-| file://:0:0:0:0 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
-| file://:0:0:0:0 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
-| file://:0:0:0:0 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for ListComp |
-| file://:0:0:0:0 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for Yield |
-| file://:0:0:0:0 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for x |
-| file://:0:0:0:0 | Function generator_func | generator.py:2:19:2:19 | ControlFlowNode for x |
-| file://:0:0:0:0 | Function generator_func | generator.py:2:24:2:25 | ControlFlowNode for xs |
-| file://:0:0:0:0 | Module class_example | class_example.py:1:1:1:3 | ControlFlowNode for wat |
-| file://:0:0:0:0 | Module class_example | class_example.py:1:7:1:7 | ControlFlowNode for IntegerLiteral |
-| file://:0:0:0:0 | Module class_example | class_example.py:3:1:3:10 | ControlFlowNode for ClassExpr |
-| file://:0:0:0:0 | Module class_example | class_example.py:3:7:3:9 | ControlFlowNode for Wat |
-| file://:0:0:0:0 | Module class_example | class_example.py:4:5:4:7 | ControlFlowNode for wat |
-| file://:0:0:0:0 | Module class_example | class_example.py:4:11:4:11 | ControlFlowNode for IntegerLiteral |
-| file://:0:0:0:0 | Module class_example | class_example.py:5:5:5:9 | ControlFlowNode for print |
-| file://:0:0:0:0 | Module class_example | class_example.py:5:5:5:26 | ControlFlowNode for print() |
-| file://:0:0:0:0 | Module class_example | class_example.py:5:11:5:20 | ControlFlowNode for Str |
-| file://:0:0:0:0 | Module class_example | class_example.py:5:23:5:25 | ControlFlowNode for wat |
-| file://:0:0:0:0 | Module class_example | class_example.py:7:1:7:5 | ControlFlowNode for print |
-| file://:0:0:0:0 | Module class_example | class_example.py:7:1:7:23 | ControlFlowNode for print() |
-| file://:0:0:0:0 | Module class_example | class_example.py:7:7:7:17 | ControlFlowNode for Str |
-| file://:0:0:0:0 | Module class_example | class_example.py:7:20:7:22 | ControlFlowNode for wat |
-| file://:0:0:0:0 | Module generator | generator.py:1:1:1:23 | ControlFlowNode for FunctionExpr |
-| file://:0:0:0:0 | Module generator | generator.py:1:5:1:18 | ControlFlowNode for generator_func |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:1:1:1:3 | ControlFlowNode for wat |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:1:7:1:7 | ControlFlowNode for IntegerLiteral |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:3:1:3:10 | ControlFlowNode for ClassExpr |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:3:7:3:9 | ControlFlowNode for Wat |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:4:5:4:7 | ControlFlowNode for wat |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:4:11:4:11 | ControlFlowNode for IntegerLiteral |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:5:5:9 | ControlFlowNode for print |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:5:5:26 | ControlFlowNode for print() |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:11:5:20 | ControlFlowNode for Str |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:5:23:5:25 | ControlFlowNode for wat |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:1:7:5 | ControlFlowNode for print |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:1:7:23 | ControlFlowNode for print() |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:7:7:17 | ControlFlowNode for Str |
+| class_example.py:0:0:0:0 | Module class_example | class_example.py:7:20:7:22 | ControlFlowNode for wat |
+| generator.py:0:0:0:0 | Module generator | generator.py:1:1:1:23 | ControlFlowNode for FunctionExpr |
+| generator.py:0:0:0:0 | Module generator | generator.py:1:5:1:18 | ControlFlowNode for generator_func |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:1:20:1:21 | ControlFlowNode for xs |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for .0 |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:2:12:2:26 | ControlFlowNode for ListComp |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for Yield |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:2:13:2:13 | ControlFlowNode for x |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:2:19:2:19 | ControlFlowNode for x |
+| generator.py:1:1:1:23 | Function generator_func | generator.py:2:24:2:25 | ControlFlowNode for xs |
--- a/python/ql/test/experimental/dataflow/exceptions/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/exceptions/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
--- a/python/ql/test/experimental/dataflow/fieldflow/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/fieldflow/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
--- a/python/ql/test/experimental/dataflow/fieldflow/test.py
+++ b/python/ql/test/experimental/dataflow/fieldflow/test.py
@@ -84,10 +84,10 @@ def test_indirect_assign_bound_method():
    sf = myobj.setFoo

    sf(SOURCE)
-    SINK(myobj.foo) # $ MISSING: flow="SOURCE, l:-1 -> myobj.foo"
+    SINK(myobj.foo) # $ flow="SOURCE, l:-1 -> myobj.foo"

    sf(NONSOURCE)
-    SINK_F(myobj.foo)
+    SINK_F(myobj.foo) # $ SPURIOUS: flow="SOURCE, l:-4 -> myobj.foo"


@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
@@ -167,6 +167,17 @@ def fields_with_local_flow(x):
 def test_fields():
    SINK(fields_with_local_flow(SOURCE)) # $ flow="SOURCE -> fields_with_local_flow(..)"

+
+def call_with_source(func):
+    func(SOURCE)
+
+
+def test_bound_method_passed_as_arg():
+    myobj = MyObj(NONSOURCE)
+    call_with_source(myobj.setFoo)
+    SINK(myobj.foo) # $ MISSING: flow="SOURCE, l:-5 -> foo.x"
+
+
 # ------------------------------------------------------------------------------
 # Nested Object
 # ------------------------------------------------------------------------------
@@ -244,6 +255,9 @@ class CrosstalkTestX:
    def setvalue(self, value):
        self.x = value

+    def do_nothing(self, value):
+        pass
+

 class CrosstalkTestY:
    def __init__(self):
@@ -295,10 +309,10 @@ def test_potential_crosstalk_different_name(cond=True):

    func(SOURCE)

-    SINK(objx.x) # $ MISSING: flow="SOURCE, l:-2 -> objx.x"
+    SINK(objx.x) # $ flow="SOURCE, l:-2 -> objx.x"
    SINK_F(objx.y)
    SINK_F(objy.x)
-    SINK(objy.y, not_present_at_runtime=True) # $ MISSING: flow="SOURCE, l:-5 -> objy.y"
+    SINK(objy.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-5 -> objy.y"


@expects(8) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
@@ -318,10 +332,10 @@ def test_potential_crosstalk_same_name(cond=True):

    func(SOURCE)

-    SINK(objx.x) # $ MISSING: flow="SOURCE, l:-2 -> objx.x"
+    SINK(objx.x) # $ flow="SOURCE, l:-2 -> objx.x"
    SINK_F(objx.y)
    SINK_F(objy.x)
-    SINK(objy.y, not_present_at_runtime=True) # $ MISSING: flow="SOURCE, l:-5 -> objy.y"
+    SINK(objy.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-5 -> objy.y"


@expects(10) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
@@ -350,6 +364,53 @@ def test_potential_crosstalk_same_name_object_reference(cond=True):
    SINK(obj.y, not_present_at_runtime=True) # $ flow="SOURCE, l:-8 -> obj.y"


+@expects(4) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
+def test_potential_crosstalk_same_class(cond=True):
+    objx1 = CrosstalkTestX()
+    SINK_F(objx1.x)
+
+    objx2 = CrosstalkTestX()
+    SINK_F(objx2.x)
+
+    if cond:
+        func = objx1.setvalue
+    else:
+        func = objx2.do_nothing
+
+    # We want to ensure that objx2.x does not end up getting tainted, since that would
+    # be cross-talk between the self arguments are their functions.
+    func(SOURCE)
+
+    SINK(objx1.x) # $ flow="SOURCE, l:-2 -> objx1.x"
+    SINK_F(objx2.x)
+
+
+class NewTest(object):
+    def __new__(cls, arg):
+        cls.foo = arg
+        return super().__new__(cls) # $ unresolved_call=super().__new__(..)
+
+@expects(4) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
+def test__new__():
+    # we want to make sure that we DON'T pass the synthetic pre-update node for
+    # the class instance to __new__, like we do for __init__.
+    nt = NewTest(SOURCE)
+    # the __new__ implementation sets the foo attribute on THE CLASS itself. The
+    # attribute lookup on the class instance will go to the class itself when the
+    # attribute isn't defined on the class instance, so we will actually see `nt.foo`
+    # contain the source, but the point of this test is that we should see identical
+    # behavior between NewTest.foo and nt.foo, which we dont!
+    #
+    # Also note that we currently (October 2022) dont' model writes to classes very
+    # well.
+
+    SINK(NewTest.foo) # $ MISSING: flow="SOURCE, l:-10 -> NewTest.foo"
+    SINK(nt.foo) # $ MISSING: flow="SOURCE, l:-11 -> nt.foo"
+
+    NewTest.foo = NONSOURCE
+    SINK_F(NewTest.foo)
+    SINK_F(nt.foo)
+
 # ------------------------------------------------------------------------------
 # Global scope
 # ------------------------------------------------------------------------------
@@ -400,7 +461,7 @@ SINK(obj2.foo) # $ flow="SOURCE, l:-1 -> obj2.foo"

 # apparently these if statements below makes a difference :O
 # but one is not enough
-cond = os.urandom(1)[0] > 128
+cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)

 if cond:
    pass
--- a/python/ql/test/experimental/dataflow/global-flow/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/global-flow/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
--- a/python/ql/test/experimental/dataflow/match/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/match/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
--- a/python/ql/test/experimental/dataflow/pep_328/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/pep_328/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
--- a/python/ql/test/experimental/dataflow/regression/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/regression/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
--- a/python/ql/test/experimental/dataflow/strange-essaflow/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/strange-essaflow/dataflow-consistency.ql
@@ -1 +1,2 @@
-import semmle.python.dataflow.new.internal.DataFlowImplConsistency::Consistency
+import python
+import experimental.dataflow.TestUtil.DataFlowConsistency
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/README.md
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/README.md
@@ -1,96 +0,0 @@
-While working on the field-flow tests, I encountered some very strange behavior. By moving some tests into a new file, they suddenly started working :O
-
-This folder contains the artifacts from investigating this problem, so we can recall the facts (but besides that, don't have much value in itself).
-
-The test files can be found in `src/`, and I have set of a bunch of different tests with different extractor options in the `test-*` folders.
-
-The core of the problem is that in _some_ configuration of extractor options, after seeing the code below, points-to gives up trying to resolve calls :flushed:
-
-```py
-import os
-cond = os.urandom(1)[0] > 128
-
-if cond:
-    pass
-
-if cond:
-    pass
-```
-
-This seems to have been caused by not allowing enough imports to be resolved. There is also some interaction with splitting, since turning that off also removes the problem.
-
-But allowing our test to see more imports is more representative of what happens when analyzing real code, so that's the better approach :+1: (and going above 3 does not seem to change anything in this case).
-
-I've thought about whether we can write a query to reliably cases such as this, but I don't see any solutions. However, we can easily try running all our tests with `--max-import-depth=100` and see if anything changes from this.
-
-# Seeing the solutions work
-
-Doing `diff -u -r test-1-normal/ test-5-max-import-depth-3/` shows that all the calls we should be able to resolve, are now resolved properly. and critically this line is added:
-
-```diff
-+| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
-```
-
-<details>
-<summary>full diff</summary>
-
-```diff
-diff '--color=auto' -u -r test-1-normal/NormalDataflowTest.expected test-5-max-import-depth-3/NormalDataflowTest.expected
--- test-1-normal/NormalDataflowTest.expected	2022-02-27 10:33:00.603882599 +0100
-+++ test-5-max-import-depth-3/NormalDataflowTest.expected	2022-02-28 10:10:08.930743800 +0100
-@@ -1,2 +1,3 @@
- missingAnnotationOnSink
- failures
-+| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
-diff '--color=auto' -u -r test-1-normal/options test-5-max-import-depth-3/options
--- test-1-normal/options	2022-02-27 10:36:51.124793909 +0100
-+++ test-5-max-import-depth-3/options	2022-02-27 11:01:43.908098372 +0100
-@@ -1 +1 @@
-semmle-extractor-options: --max-import-depth=1 -R ../src
-+semmle-extractor-options: --max-import-depth=3 -R ../src
-diff '--color=auto' -u -r test-1-normal/UnresolvedCalls.expected test-5-max-import-depth-3/UnresolvedCalls.expected
--- test-1-normal/UnresolvedCalls.expected	2022-02-28 10:09:19.213742437 +0100
-+++ test-5-max-import-depth-3/UnresolvedCalls.expected	2022-02-28 10:10:08.638737921 +0100
-@@ -0,0 +1,5 @@
-+| ../src/isfile_no_problem.py:34:33:34:70 | Comment # $ unresolved_call=os.path.isfile(..) | Missing result:unresolved_call=os.path.isfile(..) |
-+| ../src/urandom_no_if_no_problem.py:34:31:34:64 | Comment # $ unresolved_call=os.urandom(..) | Missing result:unresolved_call=os.urandom(..) |
-+| ../src/urandom_problem.py:34:31:34:64 | Comment # $ unresolved_call=os.urandom(..) | Missing result:unresolved_call=os.urandom(..) |
-+| ../src/urandom_problem.py:42:18:42:47 | Comment # $ unresolved_call=give_src() | Missing result:unresolved_call=give_src() |
-+| ../src/urandom_problem.py:43:11:43:75 | Comment # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo" | Missing result:unresolved_call=SINK(..) |
-diff '--color=auto' -u -r test-1-normal/UnresolvedPointsToCalls.expected test-5-max-import-depth-3/UnresolvedPointsToCalls.expected
--- test-1-normal/UnresolvedPointsToCalls.expected	2022-02-28 10:09:19.033738812 +0100
-+++ test-5-max-import-depth-3/UnresolvedPointsToCalls.expected	2022-02-28 10:12:48.572752108 +0100
-@@ -1,5 +1 @@
-| ../src/urandom_no_if_no_problem.py:34:8:34:20 | ../src/urandom_no_if_no_problem.py:34 | os.urandom(..) |
- | ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
-| ../src/urandom_problem.py:34:8:34:20 | ../src/urandom_problem.py:34 | os.urandom(..) |
-| ../src/urandom_problem.py:42:7:42:16 | ../src/urandom_problem.py:42 | give_src() |
-| ../src/urandom_problem.py:43:1:43:9 | ../src/urandom_problem.py:43 | SINK(..) |
-```
-
-</details>
-
-There are no benefit in increasing import depth above 3 for this test-example:
-
-```diff
-$ diff -u -r test-4-max-import-depth-100/ test-5-max-import-depth-3/
--- test-4-max-import-depth-100/options 2022-02-28 10:02:09.269071781 +0100
-+++ test-5-max-import-depth-3/options   2022-02-27 11:01:43.908098372 +0100
-@@ -1 +1 @@
-semmle-extractor-options: --max-import-depth=100 -R ../src
-+semmle-extractor-options: --max-import-depth=3 -R ../src
-```
-
-Also notice that using import depth 2 actually makes things worse, as we no longer handle the `isfile_no_problem.py` file properly :facepalm: :sweat_smile: NOTE: This was only for Python 3, for Python 2 there was no change :flushed:
-
-```diff
-diff '--color=auto' -u -r test-4-max-import-depth-100/NormalDataflowTest.expected test-6-max-import-depth-2/NormalDataflowTest.expected
--- test-4-max-import-depth-100/NormalDataflowTest.expected     2022-02-28 10:10:02.206608379 +0100
-+++ test-6-max-import-depth-2/NormalDataflowTest.expected       2022-02-28 10:10:13.882716665 +0100
-@@ -1,3 +1,5 @@
- missingAnnotationOnSink
-+| ../src/isfile_no_problem.py:43:6:43:8 | ../src/isfile_no_problem.py:43 | ERROR, you should add `# $ MISSING: flow` annotation | foo |
- failures
-+| ../src/isfile_no_problem.py:43:11:43:41 | Comment # $ flow="SOURCE, l:-15 -> foo" | Missing result:flow="SOURCE, l:-15 -> foo" |
- | ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
-```
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/eval_no_problem.py
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/eval_no_problem.py
@@ -1,43 +0,0 @@
-# These are defined so that we can evaluate the test code.
-NONSOURCE = "not a source"
-SOURCE = "source"
-
-
-def is_source(x):
-    return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
-
-
-def SINK(x):
-    if is_source(x):
-        print("OK")
-    else:
-        print("Unexpected flow", x)
-
-
-def SINK_F(x):
-    if is_source(x):
-        print("Unexpected flow", x)
-    else:
-        print("OK")
-
-# ------------------------------------------------------------------------------
-# Actual tests
-# ------------------------------------------------------------------------------
-
-def give_src():
-    return SOURCE
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
-
-import os
-cond = eval("False")
-
-if cond:
-    pass
-
-if cond:
-    pass
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/isfile_no_problem.py
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/isfile_no_problem.py
@@ -1,43 +0,0 @@
-# These are defined so that we can evaluate the test code.
-NONSOURCE = "not a source"
-SOURCE = "source"
-
-
-def is_source(x):
-    return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
-
-
-def SINK(x):
-    if is_source(x):
-        print("OK")
-    else:
-        print("Unexpected flow", x)
-
-
-def SINK_F(x):
-    if is_source(x):
-        print("Unexpected flow", x)
-    else:
-        print("OK")
-
-# ------------------------------------------------------------------------------
-# Actual tests
-# ------------------------------------------------------------------------------
-
-def give_src():
-    return SOURCE
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
-
-import os
-cond = os.path.isfile(__file__) # $ unresolved_call=os.path.isfile(..)
-
-if cond:
-    pass
-
-if cond:
-    pass
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/simple_no_problem.py
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/simple_no_problem.py
@@ -1,43 +0,0 @@
-# These are defined so that we can evaluate the test code.
-NONSOURCE = "not a source"
-SOURCE = "source"
-
-
-def is_source(x):
-    return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
-
-
-def SINK(x):
-    if is_source(x):
-        print("OK")
-    else:
-        print("Unexpected flow", x)
-
-
-def SINK_F(x):
-    if is_source(x):
-        print("Unexpected flow", x)
-    else:
-        print("OK")
-
-# ------------------------------------------------------------------------------
-# Actual tests
-# ------------------------------------------------------------------------------
-
-def give_src():
-    return SOURCE
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
-
-import os
-cond = 1 + 1 == 2
-
-if cond:
-    pass
-
-if cond:
-    pass
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_no_if_no_problem.py
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_no_if_no_problem.py
@@ -1,43 +0,0 @@
-# These are defined so that we can evaluate the test code.
-NONSOURCE = "not a source"
-SOURCE = "source"
-
-
-def is_source(x):
-    return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
-
-
-def SINK(x):
-    if is_source(x):
-        print("OK")
-    else:
-        print("Unexpected flow", x)
-
-
-def SINK_F(x):
-    if is_source(x):
-        print("Unexpected flow", x)
-    else:
-        print("OK")
-
-# ------------------------------------------------------------------------------
-# Actual tests
-# ------------------------------------------------------------------------------
-
-def give_src():
-    return SOURCE
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
-
-import os
-cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
-
-# if cond:
-#     pass
-#
-# if cond:
-#     pass
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_no_import_no_problem.py
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_no_import_no_problem.py
@@ -1,43 +0,0 @@
-# These are defined so that we can evaluate the test code.
-NONSOURCE = "not a source"
-SOURCE = "source"
-
-
-def is_source(x):
-    return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
-
-
-def SINK(x):
-    if is_source(x):
-        print("OK")
-    else:
-        print("Unexpected flow", x)
-
-
-def SINK_F(x):
-    if is_source(x):
-        print("Unexpected flow", x)
-    else:
-        print("OK")
-
-# ------------------------------------------------------------------------------
-# Actual tests
-# ------------------------------------------------------------------------------
-
-def give_src():
-    return SOURCE
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
-
-# import os
-cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
-
-# if cond:
-#     pass
-#
-# if cond:
-#     pass
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-15 -> foo"
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_problem.py
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/src/urandom_problem.py
@@ -1,43 +0,0 @@
-# These are defined so that we can evaluate the test code.
-NONSOURCE = "not a source"
-SOURCE = "source"
-
-
-def is_source(x):
-    return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
-
-
-def SINK(x):
-    if is_source(x):
-        print("OK")
-    else:
-        print("Unexpected flow", x)
-
-
-def SINK_F(x):
-    if is_source(x):
-        print("Unexpected flow", x)
-    else:
-        print("OK")
-
-# ------------------------------------------------------------------------------
-# Actual tests
-# ------------------------------------------------------------------------------
-
-def give_src():
-    return SOURCE
-
-foo = give_src()
-SINK(foo) # $ flow="SOURCE, l:-3 -> foo"
-
-import os
-cond = os.urandom(1)[0] > 128 # $ unresolved_call=os.urandom(..)
-
-if cond:
-    pass
-
-if cond:
-    pass
-
-foo = give_src() # $ unresolved_call=give_src()
-SINK(foo) # $ unresolved_call=SINK(..) MISSING: flow="SOURCE, l:-15 -> foo"
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/Splitting.expected
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/Splitting.expected
@@ -1,6 +0,0 @@
-| ../src/eval_no_problem.py | has splitting |
-| ../src/isfile_no_problem.py | has splitting |
-| ../src/simple_no_problem.py | has splitting |
-| ../src/urandom_no_if_no_problem.py | does not have splitting |
-| ../src/urandom_no_import_no_problem.py | does not have splitting |
-| ../src/urandom_problem.py | has splitting |
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/Splitting.ql
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/Splitting.ql
@@ -1,16 +0,0 @@
-import python
-
-// this can be quick-eval to see which ones have splitting. But that's basically just
-// anything from line 39 and further.
-predicate exprWithSplitting(Expr e) {
-  exists(e.getLocation().getFile().getRelativePath()) and
-  1 < count(ControlFlowNode cfn | cfn.getNode() = e)
-}
-
-from File f, string msg
-where
-  exists(f.getRelativePath()) and
-  if exists(Expr e | e.getLocation().getFile() = f and exprWithSplitting(e))
-  then msg = "has splitting"
-  else msg = "does not have splitting"
-select f.toString(), msg
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedCalls.ql
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedCalls.ql
@@ -1,2 +0,0 @@
-import python
-import experimental.dataflow.TestUtil.UnresolvedCalls
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedPointsToCalls.expected
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedPointsToCalls.expected
@@ -1,5 +0,0 @@
-| ../src/urandom_no_if_no_problem.py:34:8:34:20 | ../src/urandom_no_if_no_problem.py:34 | os.urandom(..) |
-| ../src/urandom_no_import_no_problem.py:34:8:34:20 | ../src/urandom_no_import_no_problem.py:34 | os.urandom(..) |
-| ../src/urandom_problem.py:34:8:34:20 | ../src/urandom_problem.py:34 | os.urandom(..) |
-| ../src/urandom_problem.py:42:7:42:16 | ../src/urandom_problem.py:42 | give_src() |
-| ../src/urandom_problem.py:43:1:43:9 | ../src/urandom_problem.py:43 | SINK(..) |
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedPointsToCalls.ql
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/UnresolvedPointsToCalls.ql
@@ -1,10 +0,0 @@
-import python
-private import semmle.python.dataflow.new.internal.PrintNode
-
-from CallNode call
-where
-  exists(call.getLocation().getFile().getRelativePath()) and
-  not exists(Value value | call = value.getACall()) and
-  // somehow print is not resolved, but that is not the focus right now
-  not call.getFunction().(NameNode).getId() = "print"
-select call.getLocation(), prettyExpr(call.getNode())
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/options
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-1-normal/options
@@ -1 +0,0 @@
-semmle-extractor-options: --lang=3 --max-import-depth=1 -R ../src
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/NormalDataflowTest.expected
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/NormalDataflowTest.expected
@@ -1,3 +0,0 @@
-missingAnnotationOnSink
-failures
-| ../src/urandom_problem.py:43:6:43:8 | ControlFlowNode for foo | Fixed missing result:flow="SOURCE, l:-15 -> foo" |
--- a/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/NormalDataflowTest.ql
+++ b/python/ql/test/experimental/dataflow/strange-pointsto-interaction-investigation/test-2-without-splitting/NormalDataflowTest.ql
@@ -1,2 +0,0 @@
-import python
-import experimental.dataflow.TestUtil.NormalDataflowTest
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`semmle-extractor-options: --max-import-depth=0`
				`@@ -1 +0,0 @@`
				\| test.py:239:27:239:27 \| Parameter \| There is no `ParameterNode` associated with this parameter. \|
				`@@ -1 +0,0 @@`
				`semmle-extractor-options: --lang=3 --max-import-depth=1 -R ../src`