codeql/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

private import python
private import DataFlowPublic
import semmle.python.SpecialMethods
private import semmle.python.essa.SsaCompute
private import semmle.python.dataflow.new.internal.ImportStar

/** Gets the callable in which this node occurs. */
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }

/** A parameter position represented by an integer. */
class ParameterPosition extends int {
  ParameterPosition() { exists(any(DataFlowCallable c).getParameter(this)) }
}

/** An argument position represented by an integer. */
class ArgumentPosition extends int {
  ArgumentPosition() { exists(any(DataFlowCall c).getArg(this)) }
}

/** Holds if arguments at position `apos` match parameters at position `ppos`. */
pragma[inline]
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos }

/** Holds if `p` is a `ParameterNode` of `c` with position `pos`. */
predicate isParameterNode(ParameterNode p, DataFlowCallable c, ParameterPosition pos) {
  p.isParameterOf(c, pos)
}

/** Holds if `arg` is an `ArgumentNode` of `c` with position `pos`. */
predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos) {
  arg.argumentOf(c, pos)
}

//--------
// Data flow graph
//--------
//--------
// Nodes
//--------
predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }

/** A module collecting the different reasons for synthesising a pre-update node. */
module syntheticPreUpdateNode {
  class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
    NeedsSyntheticPreUpdateNode post;

    SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(post) }

    /** Gets the node for which this is a synthetic pre-update node. */
    Node getPostUpdateNode() { result = post }

    override string toString() { result = "[pre " + post.label() + "] " + post.toString() }

    override Scope getScope() { result = post.getScope() }

    override Location getLocation() { result = post.getLocation() }
  }

  /** A data flow node for which we should synthesise an associated pre-update node. */
  class NeedsSyntheticPreUpdateNode extends PostUpdateNode {
    NeedsSyntheticPreUpdateNode() { this = objectCreationNode() }

    override Node getPreUpdateNode() { result.(SyntheticPreUpdateNode).getPostUpdateNode() = this }

    /**
     * A label for this kind of node. This will figure in the textual representation of the synthesized pre-update node.
     *
     * There is currently only one reason for needing a pre-update node, so we always use that as the label.
     */
    string label() { result = "objCreate" }
  }

  /**
   * Calls to constructors are treated as post-update nodes for the synthesized argument
   * that is mapped to the `self` parameter. That way, constructor calls represent the value of the
   * object after the constructor (currently only `__init__`) has run.
   */
  CfgNode objectCreationNode() { result.getNode().(CallNode) = any(ClassCall c).getNode() }
}

import syntheticPreUpdateNode

/** A module collecting the different reasons for synthesising a post-update node. */
module syntheticPostUpdateNode {
  /** A post-update node is synthesized for all nodes which satisfy `NeedsSyntheticPostUpdateNode`. */
  class SyntheticPostUpdateNode extends PostUpdateNode, TSyntheticPostUpdateNode {
    NeedsSyntheticPostUpdateNode pre;

    SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(pre) }

    override Node getPreUpdateNode() { result = pre }

    override string toString() { result = "[post " + pre.label() + "] " + pre.toString() }

    override Scope getScope() { result = pre.getScope() }

    override Location getLocation() { result = pre.getLocation() }
  }

  /** A data flow node for which we should synthesise an associated post-update node. */
  class NeedsSyntheticPostUpdateNode extends Node {
    NeedsSyntheticPostUpdateNode() {
      this = argumentPreUpdateNode()
      or
      this = storePreUpdateNode()
      or
      this = readPreUpdateNode()
    }

    /**
     * A label for this kind of node. This will figure in the textual representation of the synthesized post-update node.
     * We favour being an arguments as the reason for the post-update node in case multiple reasons apply.
     */
    string label() {
      if this = argumentPreUpdateNode()
      then result = "arg"
      else
        if this = storePreUpdateNode()
        then result = "store"
        else result = "read"
    }
  }

  /**
   * An argument might have its value changed as a result of a call.
   * Certain arguments, such as implicit self arguments are already post-update nodes
   * and should not have an extra node synthesised.
   */
  ArgumentNode argumentPreUpdateNode() {
    result = any(FunctionCall c).getArg(_)
    or
    // Avoid argument 0 of method calls as those have read post-update nodes.
    exists(MethodCall c, int n | n > 0 | result = c.getArg(n))
    or
    result = any(SpecialCall c).getArg(_)
    or
    // Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
    exists(ClassCall c, int n | n > 0 | result = c.getArg(n))
  }

  /** An object might have its value changed after a store. */
  CfgNode storePreUpdateNode() {
    exists(Attribute a |
      result.getNode() = a.getObject().getAFlowNode() and
      a.getCtx() instanceof Store
    )
  }

  /**
   * A node marking the state change of an object after a read.
   *
   * A reverse read happens when the result of a read is modified, e.g. in
   * ```python
   * l = [ mutable ]
   * l[0].mutate()
   * ```
   * we may now have changed the content of `l`. To track this, there must be
   * a postupdate node for `l`.
   */
  CfgNode readPreUpdateNode() {
    exists(Attribute a |
      result.getNode() = a.getObject().getAFlowNode() and
      a.getCtx() instanceof Load
    )
    or
    result.getNode() = any(SubscriptNode s).getObject()
    or
    // The dictionary argument is read from if the callable has parameters matching the keys.
    result.getNode().getNode() = any(Call call).getKwargs()
  }
}

import syntheticPostUpdateNode

class DataFlowExpr = Expr;

/**
 * Flow between ESSA variables.
 * This includes both local and global variables.
 * Flow comes from definitions, uses and refinements.
 */
// TODO: Consider constraining `nodeFrom` and `nodeTo` to be in the same scope.
// If they have different enclosing callables, we get consistency errors.
module EssaFlow {
  predicate essaFlowStep(Node nodeFrom, Node nodeTo) {
    // Definition
    //   `x = f(42)`
    //   nodeFrom is `f(42)`, cfg node
    //   nodeTo is `x`, essa var
    nodeFrom.(CfgNode).getNode() =
      nodeTo.(EssaNode).getVar().getDefinition().(AssignmentDefinition).getValue()
    or
    // With definition
    //   `with f(42) as x:`
    //   nodeFrom is `f(42)`, cfg node
    //   nodeTo is `x`, essa var
    exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
      nodeFrom.(CfgNode).getNode() = contextManager and
      nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
      // see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
      with.getContextExpr() = contextManager.getNode() and
      with.getOptionalVars() = var.getNode() and
      not with.isAsync() and
      contextManager.strictlyDominates(var)
    )
    or
    // Async with var definition
    //  `async with f(42) as x:`
    //  nodeFrom is `x`, cfg node
    //  nodeTo is `x`, essa var
    //
    // This makes the cfg node the local source of the awaited value.
    exists(With with, ControlFlowNode var |
      nodeFrom.(CfgNode).getNode() = var and
      nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
      with.getOptionalVars() = var.getNode() and
      with.isAsync()
    )
    or
    // Parameter definition
    //   `def foo(x):`
    //   nodeFrom is `x`, cfgNode
    //   nodeTo is `x`, essa var
    exists(ParameterDefinition pd |
      nodeFrom.asCfgNode() = pd.getDefiningNode() and
      nodeTo.asVar() = pd.getVariable()
    )
    or
    // First use after definition
    //   `y = 42`
    //   `x = f(y)`
    //   nodeFrom is `y` on first line, essa var
    //   nodeTo is `y` on second line, cfg node
    defToFirstUse(nodeFrom.asVar(), nodeTo.asCfgNode())
    or
    // Next use after use
    //   `x = f(y)`
    //   `z = y + 1`
    //   nodeFrom is 'y' on first line, cfg node
    //   nodeTo is `y` on second line, cfg node
    useToNextUse(nodeFrom.asCfgNode(), nodeTo.asCfgNode())
    or
    // If expressions
    nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
    or
    // boolean inline expressions such as `x or y` or `x and y`
    nodeFrom.asCfgNode() = nodeTo.asCfgNode().(BoolExprNode).getAnOperand()
    or
    // Flow inside an unpacking assignment
    iterableUnpackingFlowStep(nodeFrom, nodeTo)
    or
    matchFlowStep(nodeFrom, nodeTo)
    or
    // Overflow keyword argument
    exists(CallNode call, CallableValue callable |
      call = callable.getACall() and
      nodeTo = TKwOverflowNode(call, callable) and
      nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode()
    )
  }

  predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
    AdjacentUses::adjacentUseUse(nodeFrom, nodeTo)
  }

  predicate defToFirstUse(EssaVariable var, NameNode nodeTo) {
    AdjacentUses::firstUse(var.getDefinition(), nodeTo)
  }
}

//--------
// Local flow
//--------
/**
 * This is the local flow predicate that is used as a building block in global
 * data flow.
 *
 * Local flow can happen either at import time, when the module is initialised
 * or at runtime when callables in the module are called.
 */
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
  // If there is local flow out of a node `node`, we want flow
  // both out of `node` and any post-update node of `node`.
  exists(Node node |
    nodeFrom = update(node) and
    (
      importTimeLocalFlowStep(node, nodeTo) or
      runtimeLocalFlowStep(node, nodeTo)
    )
  )
}

/**
 * Holds if `node` is found at the top level of a module.
 */
pragma[inline]
predicate isTopLevel(Node node) { node.getScope() instanceof Module }

/** Holds if there is local flow from `nodeFrom` to `nodeTo` at import time. */
predicate importTimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
  // As a proxy for whether statements can be executed at import time,
  // we check if they appear at the top level.
  // This will miss statements inside functions called from the top level.
  isTopLevel(nodeFrom) and
  isTopLevel(nodeTo) and
  EssaFlow::essaFlowStep(nodeFrom, nodeTo)
}

/** Holds if there is local flow from `nodeFrom` to `nodeTo` at runtime. */
predicate runtimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
  // Anything not at the top level can be executed at runtime.
  not isTopLevel(nodeFrom) and
  not isTopLevel(nodeTo) and
  EssaFlow::essaFlowStep(nodeFrom, nodeTo)
}

/** `ModuleVariable`s are accessed via jump steps at runtime. */
predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
  // Module variable read
  nodeFrom.(ModuleVariableNode).getARead() = nodeTo
  or
  // Module variable write
  nodeFrom = nodeTo.(ModuleVariableNode).getAWrite()
  or
  // Setting the possible values of the variable at the end of import time
  exists(SsaVariable def |
    def = any(SsaVariable var).getAnUltimateDefinition() and
    def.getDefinition() = nodeFrom.asCfgNode() and
    def.getVariable() = nodeTo.(ModuleVariableNode).getVariable()
  )
}

/**
 * Holds if `result` is either `node`, or the post-update node for `node`.
 */
private Node update(Node node) {
  result = node
  or
  result.(PostUpdateNode).getPreUpdateNode() = node
}

// TODO: Make modules for these headings
//--------
// Global flow
//--------
//
/**
 * Computes routing of arguments to parameters
 *
 * When a call contains more positional arguments than there are positional parameters,
 * the extra positional arguments are passed as a tuple to a starred parameter. This is
 * achieved by synthesizing a node `TPosOverflowNode(call, callable)`
 * that represents the tuple of extra positional arguments. There is a store step from each
 * extra positional argument to this node.
 *
 * CURRENTLY NOT SUPPORTED:
 * When a call contains an iterable unpacking argument, such as `func(*args)`, it is expanded into positional arguments.
 *
 * CURRENTLY NOT SUPPORTED:
 * If a call contains an iterable unpacking argument, such as `func(*args)`, and the callee contains a starred argument, any extra
 * positional arguments are passed to the starred argument.
 *
 * When a call contains keyword arguments that do not correspond to keyword parameters, these
 * extra keyword arguments are passed as a dictionary to a doubly starred parameter. This is
 * achieved by synthesizing a node `TKwOverflowNode(call, callable)`
 * that represents the dictionary of extra keyword arguments. There is a store step from each
 * extra keyword argument to this node.
 *
 * When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, with entries corresponding to a keyword parameter,
 * the value at such a key is unpacked and passed to the parameter. This is achieved
 * by synthesizing an argument node `TKwUnpacked(call, callable, name)` representing the unpacked
 * value. This node is used as the argument passed to the matching keyword parameter. There is a read
 * step from the dictionary argument to the synthesized argument node.
 *
 * When a call contains a dictionary unpacking argument, such as `func(**kwargs)`, and the callee contains a doubly starred parameter,
 * entries which are not unpacked are passed to the doubly starred parameter. This is achieved by
 * adding a dataflow step from the dictionary argument to `TKwOverflowNode(call, callable)` and a
 * step to clear content of that node at any unpacked keys.
 *
 * ## Examples:
 * Assume that we have the callable
 * ```python
 * def f(x, y, *t, **d):
 *   pass
 * ```
 * Then the call
 * ```python
 * f(0, 1, 2, a=3)
 * ```
 * will be modeled as
 * ```python
 * f(0, 1, [*t], [**d])
 * ```
 * where `[` and `]` denotes synthesized nodes, so `[*t]` is the synthesized tuple argument
 * `TPosOverflowNode` and `[**d]` is the synthesized dictionary argument `TKwOverflowNode`.
 * There will be a store step from `2` to `[*t]` at pos `0` and one from `3` to `[**d]` at key
 * `a`.
 *
 * For the call
 * ```python
 * f(0, **{"y": 1, "a": 3})
 * ```
 * no tuple argument is synthesized. It is modeled as
 * ```python
 * f(0, [y=1], [**d])
 * ```
 * where `[y=1]` is the synthesized unpacked argument `TKwUnpacked` (with `name` = `y`). There is
 * a read step from `**{"y": 1, "a": 3}` to `[y=1]` at key `y` to get the value passed to the parameter
 * `y`. There is a dataflow step from `**{"y": 1, "a": 3}` to `[**d]` to transfer the content and
 * a clearing of content at key `y` for node `[**d]`, since that value has been unpacked.
 */
module ArgumentPassing {
  /**
   * Holds if `call` represents a `DataFlowCall` to a `DataFlowCallable` represented by `callable`.
   *
   * It _may not_ be the case that `call = callable.getACall()`, i.e. if `call` represents a `ClassCall`.
   *
   * Used to limit the size of predicates.
   */
  predicate connects(CallNode call, CallableValue callable) {
    exists(DataFlowCall c |
      call = c.getNode() and
      callable = c.getCallable().getCallableValue()
    )
  }

  /**
   * Gets the `n`th parameter of `callable`.
   * If the callable has a starred parameter, say `*tuple`, that is matched with `n=-1`.
   * If the callable has a doubly starred parameter, say `**dict`, that is matched with `n=-2`.
   * Note that, unlike other languages, we do _not_ use -1 for the position of `self` in Python,
   * as it is an explicit parameter at position 0.
   */
  NameNode getParameter(CallableValue callable, int n) {
    // positional parameter
    result = callable.getParameter(n)
    or
    // starred parameter, `*tuple`
    exists(Function f |
      f = callable.getScope() and
      n = -1 and
      result = f.getVararg().getAFlowNode()
    )
    or
    // doubly starred parameter, `**dict`
    exists(Function f |
      f = callable.getScope() and
      n = -2 and
      result = f.getKwarg().getAFlowNode()
    )
  }

  /**
   * A type representing a mapping from argument indices to parameter indices.
   * We currently use two mappings: NoShift, the identity, used for ordinary
   * function calls, and ShiftOneUp which is used for calls where an extra argument
   * is inserted. These include method calls, constructor calls and class calls.
   * In these calls, the argument at index `n` is mapped to the parameter at position `n+1`.
   */
  newtype TArgParamMapping =
    TNoShift() or
    TShiftOneUp()

  /** A mapping used for parameter passing. */
  abstract class ArgParamMapping extends TArgParamMapping {
    /** Gets the index of the parameter that corresponds to the argument at index `argN`. */
    bindingset[argN]
    abstract int getParamN(int argN);

    /** Gets a textual representation of this element. */
    abstract string toString();
  }

  /** A mapping that passes argument `n` to parameter `n`. */
  class NoShift extends ArgParamMapping, TNoShift {
    NoShift() { this = TNoShift() }

    override string toString() { result = "NoShift [n -> n]" }

    bindingset[argN]
    override int getParamN(int argN) { result = argN }
  }

  /** A mapping that passes argument `n` to parameter `n+1`. */
  class ShiftOneUp extends ArgParamMapping, TShiftOneUp {
    ShiftOneUp() { this = TShiftOneUp() }

    override string toString() { result = "ShiftOneUp [n -> n+1]" }

    bindingset[argN]
    override int getParamN(int argN) { result = argN + 1 }
  }

  /**
   * Gets the node representing the argument to `call` that is passed to the parameter at
   * (zero-based) index `paramN` in `callable`. If this is a positional argument, it must appear
   * at an index, `argN`, in `call` wich satisfies `paramN = mapping.getParamN(argN)`.
   *
   * `mapping` will be the identity for function calls, but not for method- or constructor calls,
   * where the first parameter is `self` and the first positional argument is passed to the second positional parameter.
   * Similarly for classmethod calls, where the first parameter is `cls`.
   *
   * NOT SUPPORTED: Keyword-only parameters.
   */
  Node getArg(CallNode call, ArgParamMapping mapping, CallableValue callable, int paramN) {
    connects(call, callable) and
    (
      // positional argument
      exists(int argN |
        paramN = mapping.getParamN(argN) and
        result = TCfgNode(call.getArg(argN))
      )
      or
      // keyword argument
      // TODO: Since `getArgName` have no results for keyword-only parameters,
      // these are currently not supported.
      exists(Function f, string argName |
        f = callable.getScope() and
        f.getArgName(paramN) = argName and
        result = TCfgNode(call.getArgByName(unbind_string(argName)))
      )
      or
      // a synthezised argument passed to the starred parameter (at position -1)
      callable.getScope().hasVarArg() and
      paramN = -1 and
      result = TPosOverflowNode(call, callable)
      or
      // a synthezised argument passed to the doubly starred parameter (at position -2)
      callable.getScope().hasKwArg() and
      paramN = -2 and
      result = TKwOverflowNode(call, callable)
      or
      // argument unpacked from dict
      exists(string name |
        call_unpacks(call, mapping, callable, name, paramN) and
        result = TKwUnpackedNode(call, callable, name)
      )
    )
  }

  /** Currently required in `getArg` in order to prevent a bad join. */
  bindingset[result, s]
  private string unbind_string(string s) { result <= s and s <= result }

  /** Gets the control flow node that is passed as the `n`th overflow positional argument. */
  ControlFlowNode getPositionalOverflowArg(CallNode call, CallableValue callable, int n) {
    connects(call, callable) and
    exists(Function f, int posCount, int argNr |
      f = callable.getScope() and
      f.hasVarArg() and
      posCount = f.getPositionalParameterCount() and
      result = call.getArg(argNr) and
      argNr >= posCount and
      argNr = posCount + n
    )
  }

  /** Gets the control flow node that is passed as the overflow keyword argument with key `key`. */
  ControlFlowNode getKeywordOverflowArg(CallNode call, CallableValue callable, string key) {
    connects(call, callable) and
    exists(Function f |
      f = callable.getScope() and
      f.hasKwArg() and
      not exists(f.getArgByName(key)) and
      result = call.getArgByName(key)
    )
  }

  /**
   * Holds if `call` unpacks a dictionary argument in order to pass it via `name`.
   * It will then be passed to the parameter of `callable` at index `paramN`.
   */
  predicate call_unpacks(
    CallNode call, ArgParamMapping mapping, CallableValue callable, string name, int paramN
  ) {
    connects(call, callable) and
    exists(Function f |
      f = callable.getScope() and
      not exists(int argN | paramN = mapping.getParamN(argN) | exists(call.getArg(argN))) and // no positional argument available
      name = f.getArgName(paramN) and
      // not exists(call.getArgByName(name)) and // only matches keyword arguments not preceded by **
      // TODO: make the below logic respect control flow splitting (by not going to the AST).
      not call.getNode().getANamedArg().(Keyword).getArg() = name and // no keyword argument available
      paramN >= 0 and
      paramN < f.getPositionalParameterCount() + f.getKeywordOnlyParameterCount() and
      exists(call.getNode().getKwargs()) // dict argument available
    )
  }
}

import ArgumentPassing

/**
 * IPA type for DataFlowCallable.
 *
 * A callable is either a function value, a class value, or a module (for enclosing `ModuleVariableNode`s).
 * A module has no calls.
 */
newtype TDataFlowCallable =
  TCallableValue(CallableValue callable) {
    callable instanceof FunctionValue and
    not callable.(FunctionValue).isLambda()
    or
    callable instanceof ClassValue
  } or
  TLambda(Function lambda) { lambda.isLambda() } or
  TModule(Module m)

/** Represents a callable. */
abstract class DataFlowCallable extends TDataFlowCallable {
  /** Gets a textual representation of this element. */
  abstract string toString();

  /** Gets a call to this callable. */
  abstract CallNode getACall();

  /** Gets the scope of this callable */
  abstract Scope getScope();

  /** Gets the specified parameter of this callable */
  abstract NameNode getParameter(int n);

  /** Gets the name of this callable. */
  abstract string getName();

  /** Gets a callable value for this callable, if one exists. */
  abstract CallableValue getCallableValue();
}

/** A class representing a callable value. */
class DataFlowCallableValue extends DataFlowCallable, TCallableValue {
  CallableValue callable;

  DataFlowCallableValue() { this = TCallableValue(callable) }

  override string toString() { result = callable.toString() }

  override CallNode getACall() { result = callable.getACall() }

  override Scope getScope() { result = callable.getScope() }

  override NameNode getParameter(int n) { result = getParameter(callable, n) }

  override string getName() { result = callable.getName() }

  override CallableValue getCallableValue() { result = callable }
}

/** A class representing a callable lambda. */
class DataFlowLambda extends DataFlowCallable, TLambda {
  Function lambda;

  DataFlowLambda() { this = TLambda(lambda) }

  override string toString() { result = lambda.toString() }

  override CallNode getACall() { result = this.getCallableValue().getACall() }

  override Scope getScope() { result = lambda.getEvaluatingScope() }

  override NameNode getParameter(int n) { result = getParameter(this.getCallableValue(), n) }

  override string getName() { result = "Lambda callable" }

  override FunctionValue getCallableValue() {
    result.getOrigin().getNode() = lambda.getDefinition()
  }
}

/** A class representing the scope in which a `ModuleVariableNode` appears. */
class DataFlowModuleScope extends DataFlowCallable, TModule {
  Module mod;

  DataFlowModuleScope() { this = TModule(mod) }

  override string toString() { result = mod.toString() }

  override CallNode getACall() { none() }

  override Scope getScope() { result = mod }

  override NameNode getParameter(int n) { none() }

  override string getName() { result = mod.getName() }

  override CallableValue getCallableValue() { none() }
}

/**
 * IPA type for DataFlowCall.
 *
 * Calls corresponding to `CallNode`s are either to callable values or to classes.
 * The latter is directed to the callable corresponding to the `__init__` method of the class.
 *
 * An `__init__` method can also be called directly, so that the callable can be targeted by
 * different types of calls. In that case, the parameter mappings will be different,
 * as the class call will synthesize an argument node to be mapped to the `self` parameter.
 *
 * A call corresponding to a special method call is handled by the corresponding `SpecialMethodCallNode`.
 *
 * TODO: Add `TClassMethodCall` mapping `cls` appropriately.
 */
newtype TDataFlowCall =
  TFunctionCall(CallNode call) { call = any(FunctionValue f).getAFunctionCall() } or
  /** Bound methods need to make room for the explicit self parameter */
  TMethodCall(CallNode call) { call = any(FunctionValue f).getAMethodCall() } or
  TClassCall(CallNode call) { call = any(ClassValue c).getACall() } or
  TSpecialCall(SpecialMethodCallNode special)

/** Represents a call. */
abstract class DataFlowCall extends TDataFlowCall {
  /** Gets a textual representation of this element. */
  abstract string toString();

  /** Get the callable to which this call goes. */
  abstract DataFlowCallable getCallable();

  /**
   * Gets the argument to this call that will be sent
   * to the `n`th parameter of the callable.
   */
  abstract Node getArg(int n);

  /** Get the control flow node representing this call. */
  abstract ControlFlowNode getNode();

  /** Gets the enclosing callable of this call. */
  abstract DataFlowCallable getEnclosingCallable();

  /** Gets the location of this dataflow call. */
  Location getLocation() { result = this.getNode().getLocation() }
}

/**
 * Represents a call to a function/lambda.
 * This excludes calls to bound methods, classes, and special methods.
 * Bound method calls and class calls insert an argument for the explicit
 * `self` parameter, and special method calls have special argument passing.
 */
class FunctionCall extends DataFlowCall, TFunctionCall {
  CallNode call;
  DataFlowCallable callable;

  FunctionCall() {
    this = TFunctionCall(call) and
    call = callable.getACall()
  }

  override string toString() { result = call.toString() }

  override Node getArg(int n) { result = getArg(call, TNoShift(), callable.getCallableValue(), n) }

  override ControlFlowNode getNode() { result = call }

  override DataFlowCallable getCallable() { result = callable }

  override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getNode().getScope() }
}

/**
 * Represents a call to a bound method call.
 * The node representing the instance is inserted as argument to the `self` parameter.
 */
class MethodCall extends DataFlowCall, TMethodCall {
  CallNode call;
  FunctionValue bm;

  MethodCall() {
    this = TMethodCall(call) and
    call = bm.getACall()
  }

  private CallableValue getCallableValue() { result = bm }

  override string toString() { result = call.toString() }

  override Node getArg(int n) {
    n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
    or
    n = 0 and result = TCfgNode(call.getFunction().(AttrNode).getObject())
  }

  override ControlFlowNode getNode() { result = call }

  override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }

  override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
}

/**
 * Represents a call to a class.
 * The pre-update node for the call is inserted as argument to the `self` parameter.
 * That makes the call node be the post-update node holding the value of the object
 * after the constructor has run.
 */
class ClassCall extends DataFlowCall, TClassCall {
  CallNode call;
  ClassValue c;

  ClassCall() {
    this = TClassCall(call) and
    call = c.getACall()
  }

  private CallableValue getCallableValue() { c.getScope().getInitMethod() = result.getScope() }

  override string toString() { result = call.toString() }

  override Node getArg(int n) {
    n > 0 and result = getArg(call, TShiftOneUp(), this.getCallableValue(), n)
    or
    n = 0 and result = TSyntheticPreUpdateNode(TCfgNode(call))
  }

  override ControlFlowNode getNode() { result = call }

  override DataFlowCallable getCallable() { result = TCallableValue(this.getCallableValue()) }

  override DataFlowCallable getEnclosingCallable() { result.getScope() = call.getScope() }
}

/** Represents a call to a special method. */
class SpecialCall extends DataFlowCall, TSpecialCall {
  SpecialMethodCallNode special;

  SpecialCall() { this = TSpecialCall(special) }

  override string toString() { result = special.toString() }

  override Node getArg(int n) { result = TCfgNode(special.(SpecialMethod::Potential).getArg(n)) }

  override ControlFlowNode getNode() { result = special }

  override DataFlowCallable getCallable() {
    result = TCallableValue(special.getResolvedSpecialMethod())
  }

  override DataFlowCallable getEnclosingCallable() {
    result.getScope() = special.getNode().getScope()
  }
}

/** Gets a viable run-time target for the call `call`. */
DataFlowCallable viableCallable(DataFlowCall call) { result = call.getCallable() }

private newtype TReturnKind = TNormalReturnKind()

/**
 * A return kind. A return kind describes how a value can be returned
 * from a callable. For Python, this is simply a method return.
 */
class ReturnKind extends TReturnKind {
  /** Gets a textual representation of this element. */
  string toString() { result = "return" }
}

/** A data flow node that represents a value returned by a callable. */
class ReturnNode extends CfgNode {
  Return ret;

  // See `TaintTrackingImplementation::returnFlowStep`
  ReturnNode() { node = ret.getValue().getAFlowNode() }

  /** Gets the kind of this return node. */
  ReturnKind getKind() { any() }
}

/** A data flow node that represents the output of a call. */
class OutNode extends CfgNode {
  OutNode() { node instanceof CallNode }
}

/**
 * Gets a node that can read the value returned from `call` with return kind
 * `kind`.
 */
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) {
  call.getNode() = result.getNode() and
  kind = TNormalReturnKind()
}

//--------
// Type pruning
//--------
newtype TDataFlowType = TAnyFlow()

class DataFlowType extends TDataFlowType {
  /** Gets a textual representation of this element. */
  string toString() { result = "DataFlowType" }
}

/** A node that performs a type cast. */
class CastNode extends Node {
  // We include read- and store steps here to force them to be
  // shown in path explanations.
  // This hack is necessary, because we have included some of these
  // steps as default taint steps, making them be suppressed in path
  // explanations.
  // We should revert this once, we can remove this steps from the
  // default taint steps; this should be possible once we have
  // implemented flow summaries and recursive content.
  CastNode() { readStep(_, _, this) or storeStep(_, _, this) }
}

/**
 * Holds if `t1` and `t2` are compatible, that is, whether data can flow from
 * a node of type `t1` to a node of type `t2`.
 */
pragma[inline]
predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }

/**
 * Gets the type of `node`.
 */
DataFlowType getNodeType(Node node) {
  result = TAnyFlow() and
  // Suppress unused variable warning
  node = node
}

/** Gets a string representation of a type returned by `getErasedRepr`. */
string ppReprType(DataFlowType t) { none() }

//--------
// Extra flow
//--------
/**
 * Holds if `pred` can flow to `succ`, by jumping from one callable to
 * another. Additional steps specified by the configuration are *not*
 * taken into account.
 */
predicate jumpStep(Node nodeFrom, Node nodeTo) {
  runtimeJumpStep(nodeFrom, nodeTo)
  or
  // Read of module attribute:
  exists(AttrRead r, ModuleValue mv |
    r.getObject().asCfgNode().pointsTo(mv) and
    module_export(mv.getScope(), r.getAttributeName(), nodeFrom) and
    nodeTo = r
  )
  or
  // Default value for parameter flows to that parameter
  defaultValueFlowStep(nodeFrom, nodeTo)
}

/**
 * Holds if the module `m` defines a name `name` by assigning `defn` to it. This is an
 * overapproximation, as `name` may not in fact be exported (e.g. by defining an `__all__` that does
 * not include `name`).
 */
private predicate module_export(Module m, string name, CfgNode defn) {
  exists(EssaVariable v |
    v.getName() = name and
    v.getAUse() = ImportStar::getStarImported*(m).getANormalExit()
  |
    defn.getNode() = v.getDefinition().(AssignmentDefinition).getValue()
    or
    defn.getNode() = v.getDefinition().(ArgumentRefinement).getArgument()
  )
}

//--------
// Field flow
//--------
/**
 * Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to
 * content `c`.
 */
predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
  listStoreStep(nodeFrom, c, nodeTo)
  or
  setStoreStep(nodeFrom, c, nodeTo)
  or
  tupleStoreStep(nodeFrom, c, nodeTo)
  or
  dictStoreStep(nodeFrom, c, nodeTo)
  or
  comprehensionStoreStep(nodeFrom, c, nodeTo)
  or
  iterableUnpackingStoreStep(nodeFrom, c, nodeTo)
  or
  attributeStoreStep(nodeFrom, c, nodeTo)
  or
  posOverflowStoreStep(nodeFrom, c, nodeTo)
  or
  kwOverflowStoreStep(nodeFrom, c, nodeTo)
  or
  matchStoreStep(nodeFrom, c, nodeTo)
}

/** Data flows from an element of a list to the list. */
predicate listStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo) {
  // List
  //   `[..., 42, ...]`
  //   nodeFrom is `42`, cfg node
  //   nodeTo is the list, `[..., 42, ...]`, cfg node
  //   c denotes element of list
  nodeTo.getNode().(ListNode).getAnElement() = nodeFrom.getNode() and
  not nodeTo.getNode() instanceof UnpackingAssignmentSequenceTarget and
  // Suppress unused variable warning
  c = c
}

/** Data flows from an element of a set to the set. */
predicate setStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo) {
  // Set
  //   `{..., 42, ...}`
  //   nodeFrom is `42`, cfg node
  //   nodeTo is the set, `{..., 42, ...}`, cfg node
  //   c denotes element of list
  nodeTo.getNode().(SetNode).getAnElement() = nodeFrom.getNode() and
  // Suppress unused variable warning
  c = c
}

/** Data flows from an element of a tuple to the tuple at a specific index. */
predicate tupleStoreStep(CfgNode nodeFrom, TupleElementContent c, CfgNode nodeTo) {
  // Tuple
  //   `(..., 42, ...)`
  //   nodeFrom is `42`, cfg node
  //   nodeTo is the tuple, `(..., 42, ...)`, cfg node
  //   c denotes element of tuple and index of nodeFrom
  exists(int n |
    nodeTo.getNode().(TupleNode).getElement(n) = nodeFrom.getNode() and
    not nodeTo.getNode() instanceof UnpackingAssignmentSequenceTarget and
    c.getIndex() = n
  )
}

/** Data flows from an element of a dictionary to the dictionary at a specific key. */
predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, CfgNode nodeTo) {
  // Dictionary
  //   `{..., "key" = 42, ...}`
  //   nodeFrom is `42`, cfg node
  //   nodeTo is the dict, `{..., "key" = 42, ...}`, cfg node
  //   c denotes element of dictionary and the key `"key"`
  exists(KeyValuePair item |
    item = nodeTo.getNode().(DictNode).getNode().(Dict).getAnItem() and
    nodeFrom.getNode().getNode() = item.getValue() and
    c.getKey() = item.getKey().(StrConst).getS()
  )
}

/** Data flows from an element expression in a comprehension to the comprehension. */
predicate comprehensionStoreStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
  // Comprehension
  //   `[x+1 for x in l]`
  //   nodeFrom is `x+1`, cfg node
  //   nodeTo is `[x+1 for x in l]`, cfg node
  //   c denotes list or set or dictionary without index
  //
  // List
  nodeTo.getNode().getNode().(ListComp).getElt() = nodeFrom.getNode().getNode() and
  c instanceof ListElementContent
  or
  // Set
  nodeTo.getNode().getNode().(SetComp).getElt() = nodeFrom.getNode().getNode() and
  c instanceof SetElementContent
  or
  // Dictionary
  nodeTo.getNode().getNode().(DictComp).getElt() = nodeFrom.getNode().getNode() and
  c instanceof DictionaryElementAnyContent
  or
  // Generator
  nodeTo.getNode().getNode().(GeneratorExp).getElt() = nodeFrom.getNode().getNode() and
  c instanceof ListElementContent
}

/**
 * Holds if `nodeFrom` flows into an attribute (corresponding to `c`) of `nodeTo` via an attribute assignment.
 *
 * For example, in
 * ```python
 * obj.foo = x
 * ```
 * data flows from `x` to (the post-update node for) `obj` via assignment to `foo`.
 */
predicate attributeStoreStep(CfgNode nodeFrom, AttributeContent c, PostUpdateNode nodeTo) {
  exists(AttrNode attr |
    nodeFrom.asCfgNode() = attr.(DefinitionNode).getValue() and
    attr.getName() = c.getAttribute() and
    attr.getObject() = nodeTo.getPreUpdateNode().(CfgNode).getNode()
  )
}

/**
 * Holds if `nodeFrom` flows into the synthezised positional overflow argument (`nodeTo`)
 * at the position indicated by `c`.
 */
predicate posOverflowStoreStep(CfgNode nodeFrom, TupleElementContent c, Node nodeTo) {
  exists(CallNode call, CallableValue callable, int n |
    nodeFrom.asCfgNode() = getPositionalOverflowArg(call, callable, n) and
    nodeTo = TPosOverflowNode(call, callable) and
    c.getIndex() = n
  )
}

/**
 * Holds if `nodeFrom` flows into the synthezised keyword overflow argument (`nodeTo`)
 * at the key indicated by `c`.
 */
predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
  exists(CallNode call, CallableValue callable, string key |
    nodeFrom.asCfgNode() = getKeywordOverflowArg(call, callable, key) and
    nodeTo = TKwOverflowNode(call, callable) and
    c.getKey() = key
  )
}

predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
  exists(Function f, Parameter p, ParameterDefinition def |
    // `getArgByName` supports, unlike `getAnArg`, keyword-only parameters
    p = f.getArgByName(_) and
    nodeFrom.asExpr() = p.getDefault() and
    // The following expresses
    // nodeTo.(ParameterNode).getParameter() = p
    // without non-monotonic recursion
    def.getParameter() = p and
    nodeTo.getNode() = def.getDefiningNode()
  )
}

/**
 * Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
 */
predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
  subscriptReadStep(nodeFrom, c, nodeTo)
  or
  iterableUnpackingReadStep(nodeFrom, c, nodeTo)
  or
  matchReadStep(nodeFrom, c, nodeTo)
  or
  popReadStep(nodeFrom, c, nodeTo)
  or
  forReadStep(nodeFrom, c, nodeTo)
  or
  attributeReadStep(nodeFrom, c, nodeTo)
  or
  kwUnpackReadStep(nodeFrom, c, nodeTo)
}

/** Data flows from a sequence to a subscript of the sequence. */
predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
  // Subscript
  //   `l[3]`
  //   nodeFrom is `l`, cfg node
  //   nodeTo is `l[3]`, cfg node
  //   c is compatible with 3
  nodeFrom.getNode() = nodeTo.getNode().(SubscriptNode).getObject() and
  (
    c instanceof ListElementContent
    or
    c instanceof SetElementContent
    or
    c instanceof DictionaryElementAnyContent
    or
    c.(TupleElementContent).getIndex() =
      nodeTo.getNode().(SubscriptNode).getIndex().getNode().(IntegerLiteral).getValue()
    or
    c.(DictionaryElementContent).getKey() =
      nodeTo.getNode().(SubscriptNode).getIndex().getNode().(StrConst).getS()
  )
}

/**
 * The unpacking assignment takes the general form
 * ```python
 *   sequence = iterable
 * ```
 * where `sequence` is either a tuple or a list and it can contain wildcards.
 * The iterable can be any iterable, which means that (CodeQL modeling of) content
 * will need to change type if it should be transferred from the LHS to the RHS.
 *
 * Note that (CodeQL modeling of) content does not have to change type on data-flow
 * paths _inside_ the LHS, as the different allowed syntaxes here are merely a convenience.
 * Consequently, we model all LHS sequences as tuples, which have the more precise content
 * model, making flow to the elements more precise. If an element is a starred variable,
 * we will have to mutate the content type to be list content.
 *
 * We may for instance have
 * ```python
 *    (a, b) = ["a", SOURCE]  # RHS has content `ListElementContent`
 * ```
 * Due to the abstraction for list content, we do not know whether `SOURCE`
 * ends up in `a` or in `b`, so we want to overapproximate and see it in both.
 *
 * Using wildcards we may have
 * ```python
 *   (a, *b) = ("a", "b", SOURCE)  # RHS has content `TupleElementContent(2)`
 * ```
 * Since the starred variables are always assigned (Python-)type list, `*b` will be
 * `["b", SOURCE]`, and we will again overapproximate and assign it
 * content corresponding to anything found in the RHS.
 *
 * For a precise transfer
 * ```python
 *    (a, b) = ("a", SOURCE)  # RHS has content `TupleElementContent(1)`
 * ```
 * we wish to keep the precision, so only `b` receives the tuple content at index 1.
 *
 * Finally, `sequence` is actually a pattern and can have a more complicated structure,
 * such as
 * ```python
 *   (a, [b, *c]) = ("a", ["b", SOURCE])  # RHS has content `TupleElementContent(1); ListElementContent`
 * ```
 * where `a` should not receive content, but `b` and `c` should. `c` will be `[SOURCE]` so
 * should have the content transferred, while `b` should read it.
 *
 * To transfer content from RHS to the elements of the LHS in the expression `sequence = iterable`,
 * we use two synthetic nodes:
 *
 * - `TIterableSequence(sequence)` which captures the content-modeling the entire `sequence` will have
 * (essentially just a copy of the content-modeling the RHS has)
 *
 * - `TIterableElement(sequence)` which captures the content-modeling that will be assigned to an element.
 * Note that an empty access path means that the value we are tracking flows directly to the element.
 *
 *
 * The `TIterableSequence(sequence)` is at this point superflous but becomes useful when handling recursive
 * structures in the LHS, where `sequence` is some internal sequence node. We can have a uniform treatment
 * by always having these two synthetic nodes. So we transfer to (or, in the recursive case, read into)
 * `TIterableSequence(sequence)`, from which we take a read step to `TIterableElement(sequence)` and then a
 * store step to `sequence`.
 *
 * This allows the unknown content from the RHS to be read into `TIterableElement(sequence)` and tuple content
 * to then be stored into `sequence`. If the content is already tuple content, this inderection creates crosstalk
 * between indices. Therefore, tuple content is never read into `TIterableElement(sequence)`; it is instead
 * transferred directly from `TIterableSequence(sequence)` to `sequence` via a flow step. Such a flow step will
 * also transfer other content, but only tuple content is further read from `sequence` into its elements.
 *
 * The strategy is then via several read-, store-, and flow steps:
 * 1. a) [Flow] Content is transferred from `iterable` to `TIterableSequence(sequence)` via a
 *    flow step. From here, everything happens on the LHS.
 *
 *    b) [Read] If the unpacking happens inside a for as in
 *    ```python
 *       for sequence in iterable
 *    ```
 *    then content is read from `iterable` to `TIterableSequence(sequence)`.
 *
 * 2. [Flow] Content is transferred from `TIterableSequence(sequence)` to `sequence` via a
 *    flow step. (Here only tuple content is relevant.)
 *
 * 3. [Read] Content is read from `TIterableSequence(sequence)` into  `TIterableElement(sequence)`.
 *    As `sequence` is modeled as a tuple, we will not read tuple content as that would allow
 *    crosstalk.
 *
 * 4. [Store] Content is stored from `TIterableElement(sequence)` to `sequence`.
 *    Content type is `TupleElementContent` with indices taken from the syntax.
 *    For instance, if `sequence` is `(a, *b, c)`, content is written to index 0, 1, and 2.
 *    This is adequate as the route through `TIterableElement(sequence)` does not transfer precise content.
 *
 * 5. [Read] Content is read from `sequence` to its elements.
 *    a) If the element is a plain variable, the target is the corresponding essa node.
 *
 *    b) If the element is itself a sequence, with control-flow node `seq`, the target is `TIterableSequence(seq)`.
 *
 *    c) If the element is a starred variable, with control-flow node `v`, the target is `TIterableElement(v)`.
 *
 * 6. [Store] Content is stored from `TIterableElement(v)` to the essa variable for `v`, with
 *    content type `ListElementContent`.
 *
 * 7. [Flow, Read, Store] Steps 2 through 7 are repeated for all recursive elements which are sequences.
 *
 *
 * We illustrate the above steps on the assignment
 *
 * ```python
 * (a, b) = ["a", SOURCE]
 * ```
 *
 * Looking at the content propagation to `a`:
 *   `["a", SOURCE]`: [ListElementContent]
 *
 * --Step 1a-->
 *
 *   `TIterableSequence((a, b))`: [ListElementContent]
 *
 * --Step 3-->
 *
 *   `TIterableElement((a, b))`: []
 *
 * --Step 4-->
 *
 *   `(a, b)`: [TupleElementContent(0)]
 *
 * --Step 5a-->
 *
 *   `a`: []
 *
 * Meaning there is data-flow from the RHS to `a` (an over approximation). The same logic would be applied to show there is data-flow to `b`. Note that _Step 3_ and _Step 4_ would not have been needed if the RHS had been a tuple (since that would have been able to use _Step 2_ instead).
 *
 * Another, more complicated example:
 * ```python
 *   (a, [b, *c]) = ["a", [SOURCE]]
 * ```
 * where the path to `c` is
 *
 *   `["a", [SOURCE]]`: [ListElementContent; ListElementContent]
 *
 * --Step 1a-->
 *
 *   `TIterableSequence((a, [b, *c]))`: [ListElementContent; ListElementContent]
 *
 * --Step 3-->
 *
 *   `TIterableElement((a, [b, *c]))`: [ListElementContent]
 *
 * --Step 4-->
 *
 *   `(a, [b, *c])`: [TupleElementContent(1); ListElementContent]
 *
 * --Step 5b-->
 *
 *   `TIterableSequence([b, *c])`: [ListElementContent]
 *
 * --Step 3-->
 *
 *   `TIterableElement([b, *c])`: []
 *
 * --Step 4-->
 *
 *   `[b, *c]`: [TupleElementContent(1)]
 *
 * --Step 5c-->
 *
 *   `TIterableElement(c)`: []
 *
 * --Step 6-->
 *
 *  `c`: [ListElementContent]
 */
module IterableUnpacking {
  /**
   * The target of a `for`, e.g. `x` in `for x in list` or in `[42 for x in list]`.
   * This class also records the source, which in both above cases is `list`.
   * This class abstracts away the differing representations of comprehensions and
   * for statements.
   */
  class ForTarget extends ControlFlowNode {
    Expr source;

    ForTarget() {
      exists(For for |
        source = for.getIter() and
        this.getNode() = for.getTarget() and
        not for = any(Comp comp).getNthInnerLoop(0)
      )
      or
      exists(Comp comp |
        source = comp.getIterable() and
        this.getNode() = comp.getNthInnerLoop(0).getTarget()
      )
    }

    Expr getSource() { result = source }
  }

  /** The LHS of an assignment, it also records the assigned value. */
  class AssignmentTarget extends ControlFlowNode {
    Expr value;

    AssignmentTarget() {
      exists(Assign assign | this.getNode() = assign.getATarget() | value = assign.getValue())
    }

    Expr getValue() { result = value }
  }

  /** A direct (or top-level) target of an unpacking assignment. */
  class UnpackingAssignmentDirectTarget extends ControlFlowNode {
    Expr value;

    UnpackingAssignmentDirectTarget() {
      this instanceof SequenceNode and
      (
        value = this.(AssignmentTarget).getValue()
        or
        value = this.(ForTarget).getSource()
      )
    }

    Expr getValue() { result = value }
  }

  /** A (possibly recursive) target of an unpacking assignment. */
  class UnpackingAssignmentTarget extends ControlFlowNode {
    UnpackingAssignmentTarget() {
      this instanceof UnpackingAssignmentDirectTarget
      or
      this = any(UnpackingAssignmentSequenceTarget parent).getAnElement()
    }
  }

  /** A (possibly recursive) target of an unpacking assignment which is also a sequence. */
  class UnpackingAssignmentSequenceTarget extends UnpackingAssignmentTarget instanceof SequenceNode {
    ControlFlowNode getElement(int i) { result = super.getElement(i) }

    ControlFlowNode getAnElement() { result = this.getElement(_) }
  }

  /**
   * Step 1a
   * Data flows from `iterable` to `TIterableSequence(sequence)`
   */
  predicate iterableUnpackingAssignmentFlowStep(Node nodeFrom, Node nodeTo) {
    exists(AssignmentTarget target |
      nodeFrom.asExpr() = target.getValue() and
      nodeTo = TIterableSequenceNode(target)
    )
  }

  /**
   * Step 1b
   * Data is read from `iterable` to `TIterableSequence(sequence)`
   */
  predicate iterableUnpackingForReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
    exists(ForTarget target |
      nodeFrom.asExpr() = target.getSource() and
      target instanceof SequenceNode and
      nodeTo = TIterableSequenceNode(target)
    ) and
    (
      c instanceof ListElementContent
      or
      c instanceof SetElementContent
    )
  }

  /**
   * Step 2
   * Data flows from `TIterableSequence(sequence)` to `sequence`
   */
  predicate iterableUnpackingTupleFlowStep(Node nodeFrom, Node nodeTo) {
    exists(UnpackingAssignmentSequenceTarget target |
      nodeFrom = TIterableSequenceNode(target) and
      nodeTo.asCfgNode() = target
    )
  }

  /**
   * Step 3
   * Data flows from `TIterableSequence(sequence)` into  `TIterableElement(sequence)`.
   * As `sequence` is modeled as a tuple, we will not read tuple content as that would allow
   * crosstalk.
   */
  predicate iterableUnpackingConvertingReadStep(Node nodeFrom, Content c, Node nodeTo) {
    exists(UnpackingAssignmentSequenceTarget target |
      nodeFrom = TIterableSequenceNode(target) and
      nodeTo = TIterableElementNode(target) and
      (
        c instanceof ListElementContent
        or
        c instanceof SetElementContent
        // TODO: dict content in iterable unpacking not handled
      )
    )
  }

  /**
   * Step 4
   * Data flows from `TIterableElement(sequence)` to `sequence`.
   * Content type is `TupleElementContent` with indices taken from the syntax.
   * For instance, if `sequence` is `(a, *b, c)`, content is written to index 0, 1, and 2.
   */
  predicate iterableUnpackingConvertingStoreStep(Node nodeFrom, Content c, Node nodeTo) {
    exists(UnpackingAssignmentSequenceTarget target |
      nodeFrom = TIterableElementNode(target) and
      nodeTo.asCfgNode() = target and
      exists(int index | exists(target.getElement(index)) |
        c.(TupleElementContent).getIndex() = index
      )
    )
  }

  /**
   * Step 5
   * For a sequence node inside an iterable unpacking, data flows from the sequence to its elements. There are
   * three cases for what `toNode` should be:
   *    a) If the element is a plain variable, `toNode` is the corresponding essa node.
   *
   *    b) If the element is itself a sequence, with control-flow node `seq`, `toNode` is `TIterableSequence(seq)`.
   *
   *    c) If the element is a starred variable, with control-flow node `v`, `toNode` is `TIterableElement(v)`.
   */
  predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo) {
    exists(
      UnpackingAssignmentSequenceTarget target, int index, ControlFlowNode element, int starIndex
    |
      target.getElement(starIndex) instanceof StarredNode
      or
      not exists(target.getAnElement().(StarredNode)) and
      starIndex = -1
    |
      nodeFrom.asCfgNode() = target and
      element = target.getElement(index) and
      (
        if starIndex = -1 or index < starIndex
        then c.(TupleElementContent).getIndex() = index
        else
          // This could get big if big tuples exist
          if index = starIndex
          then c.(TupleElementContent).getIndex() >= index
          else c.(TupleElementContent).getIndex() >= index - 1
      ) and
      (
        if element instanceof SequenceNode
        then
          // Step 5b
          nodeTo = TIterableSequenceNode(element)
        else
          if element instanceof StarredNode
          then
            // Step 5c
            nodeTo = TIterableElementNode(element)
          else
            // Step 5a
            nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = element
      )
    )
  }

  /**
   * Step 6
   * Data flows from `TIterableElement(v)` to the essa variable for `v`, with
   * content type `ListElementContent`.
   */
  predicate iterableUnpackingStarredElementStoreStep(Node nodeFrom, Content c, Node nodeTo) {
    exists(ControlFlowNode starred | starred.getNode() instanceof Starred |
      nodeFrom = TIterableElementNode(starred) and
      nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = starred and
      c instanceof ListElementContent
    )
  }

  /** All read steps associated with unpacking assignment. */
  predicate iterableUnpackingReadStep(Node nodeFrom, Content c, Node nodeTo) {
    iterableUnpackingForReadStep(nodeFrom, c, nodeTo)
    or
    iterableUnpackingElementReadStep(nodeFrom, c, nodeTo)
    or
    iterableUnpackingConvertingReadStep(nodeFrom, c, nodeTo)
  }

  /** All store steps associated with unpacking assignment. */
  predicate iterableUnpackingStoreStep(Node nodeFrom, Content c, Node nodeTo) {
    iterableUnpackingStarredElementStoreStep(nodeFrom, c, nodeTo)
    or
    iterableUnpackingConvertingStoreStep(nodeFrom, c, nodeTo)
  }

  /** All flow steps associated with unpacking assignment. */
  predicate iterableUnpackingFlowStep(Node nodeFrom, Node nodeTo) {
    iterableUnpackingAssignmentFlowStep(nodeFrom, nodeTo)
    or
    iterableUnpackingTupleFlowStep(nodeFrom, nodeTo)
  }
}

import IterableUnpacking

/**
 * There are a number of patterns available for the match statement.
 * Each one transfers data and content differently to its parts.
 *
 * Furthermore, given a successful match, we can infer some data about
 * the subject. Consider the example:
 * ```python
 * match choice:
 *   case 'Y':
 *     ...body
 * ```
 * Inside `body`, we know that `choice` has the value `'Y'`.
 *
 * A similar thing happens with the "as pattern". Consider the example:
 * ```python
 * match choice:
 *  case ('y'|'Y') as c:
 *   ...body
 * ```
 * By the binding rules, there is data flow from `choice` to `c`. But we
 * can infer the value of `c` to be either `'y'` or `'Y'` if the match succeeds.
 *
 * We will treat such inferences separately as guards. First we will model the data flow
 * stemming from the bindings and the matching of shape. Below, 'subject' is not necessarily the
 * top-level subject of the match, but rather the part recursively matched by the current pattern.
 * For instance, in the example:
 * ```python
 * match command:
 *  case ('quit' as c) | ('go', ('up'|'down') as c):
 *   ...body
 * ```
 * `command` is the subject of first the as-pattern, while the second component of `command`
 * is the subject of the second as-pattern. As such, 'subject' refers to the pattern under evaluation.
 *
 * - as pattern: subject flows to alias as well as to the interior pattern
 * - or pattern: subject flows to each alternative
 * - literal pattern: flow from the literal to the pattern, to add information
 * - capture pattern: subject flows to the variable
 * - wildcard pattern: no flow
 * - value pattern: flow from the value to the pattern, to add information
 * - sequence pattern: each element reads from subject at the associated index
 * - star pattern: subject flows to the variable, possibly via a conversion
 * - mapping pattern: each value reads from subject at the associated key
 * - double star pattern: subject flows to the variable, possibly via a conversion
 * - key-value pattern: the value reads from the subject at the key (see mapping pattern)
 * - class pattern: all keywords read the appropriate attribute from the subject
 * - keyword pattern: the appropriate attribute is read from the subject (see class pattern)
 *
 * Inside the class pattern, we also find positional arguments. They are converted to
 * keyword arguments using the `__match_args__` attribute on the class. We do not
 * currently model this.
 */
module MatchUnpacking {
  /**
   * The subject of a match flows to each top-level pattern
   * (a pattern directly under a `case` statement).
   *
   * We could consider a model closer to use-use-flow, where the subject
   * only flows to the first top-level pattern and from there to the
   * following ones.
   */
  predicate matchSubjectFlowStep(Node nodeFrom, Node nodeTo) {
    exists(MatchStmt match, Expr subject, Pattern target |
      subject = match.getSubject() and
      target = match.getCase(_).(Case).getPattern()
    |
      nodeFrom.asExpr() = subject and
      nodeTo.asCfgNode().getNode() = target
    )
  }

  /**
   * as pattern: subject flows to alias as well as to the interior pattern
   * syntax (toplevel): `case pattern as alias:`
   */
  predicate matchAsFlowStep(Node nodeFrom, Node nodeTo) {
    exists(MatchAsPattern subject, Name alias | alias = subject.getAlias() |
      // We make the subject flow to the interior pattern via the alias.
      // That way, information can propagate from the interior pattern to the alias.
      //
      // the subject flows to the interior pattern
      nodeFrom.asCfgNode().getNode() = subject and
      nodeTo.asCfgNode().getNode() = subject.getPattern()
      or
      // the interior pattern flows to the alias
      nodeFrom.asCfgNode().getNode() = subject.getPattern() and
      nodeTo.asVar().getDefinition().(PatternAliasDefinition).getDefiningNode().getNode() = alias
    )
  }

  /**
   * or pattern: subject flows to each alternative
   * syntax (toplevel): `case alt1 | alt2:`
   */
  predicate matchOrFlowStep(Node nodeFrom, Node nodeTo) {
    exists(MatchOrPattern subject, Pattern pattern | pattern = subject.getAPattern() |
      nodeFrom.asCfgNode().getNode() = subject and
      nodeTo.asCfgNode().getNode() = pattern
    )
  }

  /**
   * literal pattern: flow from the literal to the pattern, to add information
   * syntax (toplevel): `case literal:`
   */
  predicate matchLiteralFlowStep(Node nodeFrom, Node nodeTo) {
    exists(MatchLiteralPattern pattern, Expr literal | literal = pattern.getLiteral() |
      nodeFrom.asExpr() = literal and
      nodeTo.asCfgNode().getNode() = pattern
    )
  }

  /**
   * capture pattern: subject flows to the variable
   * syntax (toplevel): `case var:`
   */
  predicate matchCaptureFlowStep(Node nodeFrom, Node nodeTo) {
    exists(MatchCapturePattern capture, Name var | capture.getVariable() = var |
      nodeFrom.asCfgNode().getNode() = capture and
      nodeTo.asVar().getDefinition().(PatternCaptureDefinition).getDefiningNode().getNode() = var
    )
  }

  /**
   * value pattern: flow from the value to the pattern, to add information
   * syntax (toplevel): `case Dotted.value:`
   */
  predicate matchValueFlowStep(Node nodeFrom, Node nodeTo) {
    exists(MatchValuePattern pattern, Expr value | value = pattern.getValue() |
      nodeFrom.asExpr() = value and
      nodeTo.asCfgNode().getNode() = pattern
    )
  }

  /**
   * sequence pattern: each element reads from subject at the associated index
   * syntax (toplevel): `case [a, b]:`
   */
  predicate matchSequenceReadStep(Node nodeFrom, Content c, Node nodeTo) {
    exists(MatchSequencePattern subject, int index, Pattern element |
      element = subject.getPattern(index)
    |
      nodeFrom.asCfgNode().getNode() = subject and
      nodeTo.asCfgNode().getNode() = element and
      (
        // tuple content
        c.(TupleElementContent).getIndex() = index
        or
        // list content
        c instanceof ListElementContent
        // set content is excluded from sequence patterns,
        // see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
      )
    )
  }

  /**
   * star pattern: subject flows to the variable, possibly via a conversion
   * syntax (toplevel): `case *var:`
   *
   * We decompose this flow into a read step and a store step. The read step
   * reads both tuple and list content, the store step only stores list content.
   * This way, we convert all content to list content.
   *
   * This is the read step.
   */
  predicate matchStarReadStep(Node nodeFrom, Content c, Node nodeTo) {
    exists(MatchSequencePattern subject, int index, MatchStarPattern star |
      star = subject.getPattern(index)
    |
      nodeFrom.asCfgNode().getNode() = subject and
      nodeTo = TStarPatternElementNode(star) and
      (
        // tuple content
        c.(TupleElementContent).getIndex() >= index
        or
        // list content
        c instanceof ListElementContent
        // set content is excluded from sequence patterns,
        // see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
      )
    )
  }

  /**
   * star pattern: subject flows to the variable, possibly via a conversion
   * syntax (toplevel): `case *var:`
   *
   * We decompose this flow into a read step and a store step. The read step
   * reads both tuple and list content, the store step only stores list content.
   * This way, we convert all content to list content.
   *
   * This is the store step.
   */
  predicate matchStarStoreStep(Node nodeFrom, Content c, Node nodeTo) {
    exists(MatchStarPattern star |
      nodeFrom = TStarPatternElementNode(star) and
      nodeTo.asCfgNode().getNode() = star.getTarget() and
      c instanceof ListElementContent
    )
  }

  /**
   * mapping pattern: each value reads from subject at the associated key
   * syntax (toplevel): `case {"color": c, "height": x}:`
   */
  predicate matchMappingReadStep(Node nodeFrom, Content c, Node nodeTo) {
    exists(
      MatchMappingPattern subject, MatchKeyValuePattern keyValue, MatchLiteralPattern key,
      Pattern value
    |
      keyValue = subject.getAMapping() and
      key = keyValue.getKey() and
      value = keyValue.getValue()
    |
      nodeFrom.asCfgNode().getNode() = subject and
      nodeTo.asCfgNode().getNode() = value and
      c.(DictionaryElementContent).getKey() = key.getLiteral().(StrConst).getText()
    )
  }

  /**
   * double star pattern: subject flows to the variable, possibly via a conversion
   * syntax (toplevel): `case {**var}:`
   *
   * Dictionary content flows to the double star, but all mentioned keys in the
   * mapping pattern should be cleared.
   */
  predicate matchMappingFlowStep(Node nodeFrom, Node nodeTo) {
    exists(MatchMappingPattern subject, MatchDoubleStarPattern dstar |
      dstar = subject.getAMapping()
    |
      nodeFrom.asCfgNode().getNode() = subject and
      nodeTo.asCfgNode().getNode() = dstar.getTarget()
    )
  }

  /**
   * Bindings that are mentioned in a mapping pattern will not be available
   * to a double star pattern in the same mapping pattern.
   */
  predicate matchMappingClearStep(Node n, Content c) {
    exists(
      MatchMappingPattern subject, MatchKeyValuePattern keyValue, MatchLiteralPattern key,
      MatchDoubleStarPattern dstar
    |
      keyValue = subject.getAMapping() and
      key = keyValue.getKey() and
      dstar = subject.getAMapping()
    |
      n.asCfgNode().getNode() = dstar.getTarget() and
      c.(DictionaryElementContent).getKey() = key.getLiteral().(StrConst).getText()
    )
  }

  /**
   * class pattern: all keywords read the appropriate attribute from the subject
   * syntax (toplevel): `case ClassName(attr = val):`
   */
  predicate matchClassReadStep(Node nodeFrom, Content c, Node nodeTo) {
    exists(MatchClassPattern subject, MatchKeywordPattern keyword, Name attr, Pattern value |
      keyword = subject.getKeyword(_) and
      attr = keyword.getAttribute() and
      value = keyword.getValue()
    |
      nodeFrom.asCfgNode().getNode() = subject and
      nodeTo.asCfgNode().getNode() = value and
      c.(AttributeContent).getAttribute() = attr.getId()
    )
  }

  /** All flow steps associated with match. */
  predicate matchFlowStep(Node nodeFrom, Node nodeTo) {
    matchSubjectFlowStep(nodeFrom, nodeTo)
    or
    matchAsFlowStep(nodeFrom, nodeTo)
    or
    matchOrFlowStep(nodeFrom, nodeTo)
    or
    matchLiteralFlowStep(nodeFrom, nodeTo)
    or
    matchCaptureFlowStep(nodeFrom, nodeTo)
    or
    matchValueFlowStep(nodeFrom, nodeTo)
    or
    matchMappingFlowStep(nodeFrom, nodeTo)
  }

  /** All read steps associated with match. */
  predicate matchReadStep(Node nodeFrom, Content c, Node nodeTo) {
    matchClassReadStep(nodeFrom, c, nodeTo)
    or
    matchSequenceReadStep(nodeFrom, c, nodeTo)
    or
    matchMappingReadStep(nodeFrom, c, nodeTo)
    or
    matchStarReadStep(nodeFrom, c, nodeTo)
  }

  /** All store steps associated with match. */
  predicate matchStoreStep(Node nodeFrom, Content c, Node nodeTo) {
    matchStarStoreStep(nodeFrom, c, nodeTo)
  }

  /**
   * All clear steps associated with match
   */
  predicate matchClearStep(Node n, Content c) { matchMappingClearStep(n, c) }
}

import MatchUnpacking

/** Data flows from a sequence to a call to `pop` on the sequence. */
predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
  // set.pop or list.pop
  //   `s.pop()`
  //   nodeFrom is `s`, cfg node
  //   nodeTo is `s.pop()`, cfg node
  //   c denotes element of list or set
  exists(CallNode call, AttrNode a |
    call.getFunction() = a and
    a.getName() = "pop" and // Should match appropriate call since we tracked a sequence here.
    not exists(call.getAnArg()) and
    nodeFrom.getNode() = a.getObject() and
    nodeTo.getNode() = call and
    (
      c instanceof ListElementContent
      or
      c instanceof SetElementContent
    )
  )
  or
  // dict.pop
  //   `d.pop("key")`
  //   nodeFrom is `d`, cfg node
  //   nodeTo is `d.pop("key")`, cfg node
  //   c denotes the key `"key"`
  exists(CallNode call, AttrNode a |
    call.getFunction() = a and
    a.getName() = "pop" and // Should match appropriate call since we tracked a dictionary here.
    nodeFrom.getNode() = a.getObject() and
    nodeTo.getNode() = call and
    c.(DictionaryElementContent).getKey() = call.getArg(0).getNode().(StrConst).getS()
  )
}

predicate forReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
  exists(ForTarget target |
    nodeFrom.asExpr() = target.getSource() and
    nodeTo.asVar().(EssaNodeDefinition).getDefiningNode() = target
  ) and
  (
    c instanceof ListElementContent
    or
    c instanceof SetElementContent
    or
    c = small_tuple()
  )
}

pragma[noinline]
TupleElementContent small_tuple() { result.getIndex() <= 7 }

/**
 * Holds if `nodeTo` is a read of an attribute (corresponding to `c`) of the object in `nodeFrom`.
 *
 * For example, in
 * ```python
 * obj.foo
 * ```
 * data flows from `obj` to `obj.foo` via a read from `foo`.
 */
predicate attributeReadStep(CfgNode nodeFrom, AttributeContent c, CfgNode nodeTo) {
  exists(AttrNode attr |
    nodeFrom.asCfgNode() = attr.getObject() and
    nodeTo.asCfgNode() = attr and
    attr.getName() = c.getAttribute() and
    attr.isLoad()
  )
}

/**
 * Holds if `nodeFrom` is a dictionary argument being unpacked and `nodeTo` is the
 * synthezised unpacked argument with the name indicated by `c`.
 */
predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
  exists(CallNode call, CallableValue callable, string name |
    nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode() and
    nodeTo = TKwUnpackedNode(call, callable, name) and
    name = c.getKey()
  )
}

/**
 * Clear content at key `name` of the synthesized dictionary `TKwOverflowNode(call, callable)`,
 * whenever `call` unpacks `name`.
 */
predicate kwOverflowClearStep(Node n, Content c) {
  exists(CallNode call, CallableValue callable, string name |
    call_unpacks(call, _, callable, name, _) and
    n = TKwOverflowNode(call, callable) and
    c.(DictionaryElementContent).getKey() = name
  )
}

/**
 * Holds if values stored inside content `c` are cleared at node `n`. For example,
 * any value stored inside `f` is cleared at the pre-update node associated with `x`
 * in `x.f = newValue`.
 */
predicate clearsContent(Node n, Content c) {
  kwOverflowClearStep(n, c)
  or
  matchClearStep(n, c)
  or
  attributeClearStep(n, c)
}

/**
 * Holds if values stored inside attribute `c` are cleared at node `n`.
 *
 * In `obj.foo = x` the any old value stored in `foo` is cleared at the pre-update node
 * associated with `obj`
 */
predicate attributeClearStep(Node n, AttributeContent c) {
  exists(PostUpdateNode post | post.getPreUpdateNode() = n | attributeStoreStep(_, c, post))
}

//--------
// Fancy context-sensitive guards
//--------
/**
 * Holds if the node `n` is unreachable when the call context is `call`.
 */
predicate isUnreachableInCall(Node n, DataFlowCall call) { none() }

//--------
// Virtual dispatch with call context
//--------
/**
 * Gets a viable dispatch target of `call` in the context `ctx`. This is
 * restricted to those `call`s for which a context might make a difference.
 */
DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { none() }

/**
 * Holds if the set of viable implementations that can be called by `call`
 * might be improved by knowing the call context. This is the case if the qualifier accesses a parameter of
 * the enclosing callable `c` (including the implicit `this` parameter).
 */
predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) { none() }

int accessPathLimit() { result = 5 }

/**
 * Holds if access paths with `c` at their head always should be tracked at high
 * precision. This disables adaptive access path precision for such access paths.
 */
predicate forceHighPrecision(Content c) { none() }

/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) { none() }

class LambdaCallKind = Unit;

/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) { none() }

/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() }

/** Extra data-flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }

/**
 * Holds if flow is allowed to pass from parameter `p` and back to itself as a
 * side-effect, resulting in a summary from `p` to itself.
 *
 * One example would be to allow flow like `p.foo = p.bar;`, which is disallowed
 * by default as a heuristic.
 */
predicate allowParameterReturnInSelf(ParameterNode p) { none() }