Merge branch 'main' into port-url-redirect-query

2025-12-20 02:44:30 +01:00 · 2021-01-29 16:22:50 +01:00
parent ef831bb16f c9537f2639
commit 94e7980ca4
469 changed files with 78092 additions and 11197 deletions
--- a/python/ql/src/semmle/python/Function.qll
+++ b/python/ql/src/semmle/python/Function.qll
@@ -39,6 +39,16 @@ class Function extends Function_, Scope, AstNode {
    exists(YieldFrom y | y.getScope() = this)
  }

+  /**
+   * Holds if this function represents a lambda.
+   *
+   * The extractor reifies each lambda expression as a (local) function with the name
+   * "lambda". As `lambda` is a keyword in Python, it's impossible to create a function with this
+   * name otherwise, and so it's impossible to get a non-lambda function accidentally
+   * classified as a lambda.
+   */
+  predicate isLambda() { this.getName() = "lambda" }
+
  /** Whether this function is declared in a class and is named `__init__` */
  predicate isInitMethod() { this.isMethod() and this.getName() = "__init__" }

--- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
@@ -11,103 +11,138 @@ private import semmle.python.essa.SsaCompute
 //--------
 predicate isExpressionNode(ControlFlowNode node) { node.getNode() instanceof Expr }

-/** A data flow node for which we should synthesise an associated pre-update node. */
-abstract class NeedsSyntheticPreUpdateNode extends Node {
-  /** A label for this kind of node. This will figure in the textual representation of the synthesized pre-update node. */
-  abstract string label();
-}
+/** A module collecting the different reasons for synthesising a pre-update node. */
+module syntheticPreUpdateNode {
+  class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
+    NeedsSyntheticPreUpdateNode post;

-class SyntheticPreUpdateNode extends Node, TSyntheticPreUpdateNode {
-  NeedsSyntheticPreUpdateNode post;
+    SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(post) }

-  SyntheticPreUpdateNode() { this = TSyntheticPreUpdateNode(post) }
+    /** Gets the node for which this is a synthetic pre-update node. */
+    Node getPostUpdateNode() { result = post }

-  /** Gets the node for which this is a synthetic pre-update node. */
-  Node getPostUpdateNode() { result = post }
+    override string toString() { result = "[pre " + post.label() + "] " + post.toString() }

-  override string toString() { result = "[pre " + post.label() + "] " + post.toString() }
+    override Scope getScope() { result = post.getScope() }

-  override Scope getScope() { result = post.getScope() }
-
-  override Location getLocation() { result = post.getLocation() }
-}
-
-/** A data flow node for which we should synthesise an associated post-update node. */
-abstract class NeedsSyntheticPostUpdateNode extends Node {
-  /** A label for this kind of node. This will figure in the textual representation of the synthesized post-update node. */
-  abstract string label();
-}
-
-/** An argument might have its value changed as a result of a call. */
-class ArgumentPreUpdateNode extends NeedsSyntheticPostUpdateNode, ArgumentNode {
-  // Certain arguments, such as implicit self arguments are already post-update nodes
-  // and should not have an extra node synthesised.
-  ArgumentPreUpdateNode() {
-    this = any(FunctionCall c).getArg(_)
-    or
-    // Avoid argument 0 of method calls as those have read post-update nodes.
-    exists(MethodCall c, int n | n > 0 | this = c.getArg(n))
-    or
-    this = any(SpecialCall c).getArg(_)
-    or
-    // Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
-    exists(ClassCall c, int n | n > 0 | this = c.getArg(n))
+    override Location getLocation() { result = post.getLocation() }
  }

-  override string label() { result = "arg" }
+  /** A data flow node for which we should synthesise an associated pre-update node. */
+  class NeedsSyntheticPreUpdateNode extends PostUpdateNode {
+    NeedsSyntheticPreUpdateNode() { this = objectCreationNode() }
+
+    override Node getPreUpdateNode() { result.(SyntheticPreUpdateNode).getPostUpdateNode() = this }
+
+    /**
+     * A label for this kind of node. This will figure in the textual representation of the synthesized pre-update node.
+     *
+     * There is currently only one reason for needing a pre-update node, so we always use that as the label.
+     */
+    string label() { result = "objCreate" }
+  }
+
+  /**
+   * Calls to constructors are treated as post-update nodes for the synthesized argument
+   * that is mapped to the `self` parameter. That way, constructor calls represent the value of the
+   * object after the constructor (currently only `__init__`) has run.
+   */
+  CfgNode objectCreationNode() { result.getNode().(CallNode) = any(ClassCall c).getNode() }
 }

-/** An object might have its value changed after a store. */
-class StorePreUpdateNode extends NeedsSyntheticPostUpdateNode, CfgNode {
-  StorePreUpdateNode() {
+import syntheticPreUpdateNode
+
+/** A module collecting the different reasons for synthesising a post-update node. */
+module syntheticPostUpdateNode {
+  /** A post-update node is synthesized for all nodes which satisfy `NeedsSyntheticPostUpdateNode`. */
+  class SyntheticPostUpdateNode extends PostUpdateNode, TSyntheticPostUpdateNode {
+    NeedsSyntheticPostUpdateNode pre;
+
+    SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(pre) }
+
+    override Node getPreUpdateNode() { result = pre }
+
+    override string toString() { result = "[post " + pre.label() + "] " + pre.toString() }
+
+    override Scope getScope() { result = pre.getScope() }
+
+    override Location getLocation() { result = pre.getLocation() }
+  }
+
+  /** A data flow node for which we should synthesise an associated post-update node. */
+  class NeedsSyntheticPostUpdateNode extends Node {
+    NeedsSyntheticPostUpdateNode() {
+      this = argumentPreUpdateNode()
+      or
+      this = storePreUpdateNode()
+      or
+      this = readPreUpdateNode()
+    }
+
+    /**
+     * A label for this kind of node. This will figure in the textual representation of the synthesized post-update node.
+     * We favour being an arguments as the reason for the post-update node in case multiple reasons apply.
+     */
+    string label() {
+      if this = argumentPreUpdateNode()
+      then result = "arg"
+      else
+        if this = storePreUpdateNode()
+        then result = "store"
+        else result = "read"
+    }
+  }
+
+  /**
+   * An argument might have its value changed as a result of a call.
+   * Certain arguments, such as implicit self arguments are already post-update nodes
+   * and should not have an extra node synthesised.
+   */
+  ArgumentNode argumentPreUpdateNode() {
+    result = any(FunctionCall c).getArg(_)
+    or
+    // Avoid argument 0 of method calls as those have read post-update nodes.
+    exists(MethodCall c, int n | n > 0 | result = c.getArg(n))
+    or
+    result = any(SpecialCall c).getArg(_)
+    or
+    // Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
+    exists(ClassCall c, int n | n > 0 | result = c.getArg(n))
+  }
+
+  /** An object might have its value changed after a store. */
+  CfgNode storePreUpdateNode() {
    exists(Attribute a |
-      node = a.getObject().getAFlowNode() and
+      result.getNode() = a.getObject().getAFlowNode() and
      a.getCtx() instanceof Store
    )
  }

-  override string label() { result = "store" }
-}
-
-/** A node marking the state change of an object after a read. */
-class ReadPreUpdateNode extends NeedsSyntheticPostUpdateNode, CfgNode {
-  ReadPreUpdateNode() {
+  /**
+   * A node marking the state change of an object after a read.
+   *
+   * A reverse read happens when the result of a read is modified, e.g. in
+   * ```python
+   * l = [ mutable ]
+   * l[0].mutate()
+   * ```
+   * we may now have changed the content of `l`. To track this, there must be
+   * a postupdate node for `l`.
+   */
+  CfgNode readPreUpdateNode() {
    exists(Attribute a |
-      node = a.getObject().getAFlowNode() and
+      result.getNode() = a.getObject().getAFlowNode() and
      a.getCtx() instanceof Load
    )
+    or
+    result.getNode() = any(SubscriptNode s).getObject()
+    or
+    // The dictionary argument is read from if the callable has parameters matching the keys.
+    result.getNode().getNode() = any(Call call).getKwargs()
  }
-
-  override string label() { result = "read" }
 }

-/** A post-update node is synthesized for all nodes which satisfy `NeedsSyntheticPostUpdateNode`. */
-class SyntheticPostUpdateNode extends PostUpdateNode, TSyntheticPostUpdateNode {
-  NeedsSyntheticPostUpdateNode pre;
-
-  SyntheticPostUpdateNode() { this = TSyntheticPostUpdateNode(pre) }
-
-  override Node getPreUpdateNode() { result = pre }
-
-  override string toString() { result = "[post " + pre.label() + "] " + pre.toString() }
-
-  override Scope getScope() { result = pre.getScope() }
-
-  override Location getLocation() { result = pre.getLocation() }
-}
-
-/**
- * Calls to constructors are treated as post-update nodes for the synthesized argument
- * that is mapped to the `self` parameter. That way, constructor calls represent the value of the
- * object after the constructor (currently only `__init__`) has run.
- */
-class ObjectCreationNode extends PostUpdateNode, NeedsSyntheticPreUpdateNode, CfgNode {
-  ObjectCreationNode() { node.(CallNode) = any(ClassCall c).getNode() }
-
-  override Node getPreUpdateNode() { result.(SyntheticPreUpdateNode).getPostUpdateNode() = this }
-
-  override string label() { result = "objCreate" }
-}
+import syntheticPostUpdateNode

 class DataFlowExpr = Expr;

@@ -126,6 +161,15 @@ module EssaFlow {
    nodeFrom.(CfgNode).getNode() =
      nodeTo.(EssaNode).getVar().getDefinition().(AssignmentDefinition).getValue()
    or
+    // Definition
+    //   `[a, b] = iterable`
+    //   nodeFrom = `iterable`, cfg node
+    //   nodeTo = `TIterableSequence([a, b])`
+    exists(UnpackingAssignmentDirectTarget target |
+      nodeFrom.asExpr() = target.getValue() and
+      nodeTo = TIterableSequenceNode(target)
+    )
+    or
    // With definition
    //   `with f(42) as x:`
    //   nodeFrom is `f(42)`, cfg node
@@ -139,6 +183,10 @@ module EssaFlow {
      contextManager.strictlyDominates(var)
    )
    or
+    // Parameter definition
+    //   `def foo(x):`
+    //   nodeFrom is `x`, cfgNode
+    //   nodeTo is `x`, essa var
    exists(ParameterDefinition pd |
      nodeFrom.asCfgNode() = pd.getDefiningNode() and
      nodeTo.asVar() = pd.getVariable()
@@ -161,6 +209,9 @@ module EssaFlow {
    // If expressions
    nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
    or
+    // Flow inside an unpacking assignment
+    unpackingAssignmentFlowStep(nodeFrom, nodeTo)
+    or
    // Overflow keyword argument
    exists(CallNode call, CallableValue callable |
      call = callable.getACall() and
@@ -419,7 +470,7 @@ module ArgumentPassing {
      // argument unpacked from dict
      exists(string name |
        call_unpacks(call, mapping, callable, name, paramN) and
-        result = TKwUnpacked(call, callable, name)
+        result = TKwUnpackedNode(call, callable, name)
      )
    )
  }
@@ -484,10 +535,12 @@ import ArgumentPassing
 */
 newtype TDataFlowCallable =
  TCallableValue(CallableValue callable) {
-    callable instanceof FunctionValue
+    callable instanceof FunctionValue and
+    not callable.(FunctionValue).isLambda()
    or
    callable instanceof ClassValue
  } or
+  TLambda(Function lambda) { lambda.isLambda() } or
  TModule(Module m)

 /** Represents a callable. */
@@ -530,6 +583,27 @@ class DataFlowCallableValue extends DataFlowCallable, TCallableValue {
  override CallableValue getCallableValue() { result = callable }
 }

+/** A class representing a callable lambda. */
+class DataFlowLambda extends DataFlowCallable, TLambda {
+  Function lambda;
+
+  DataFlowLambda() { this = TLambda(lambda) }
+
+  override string toString() { result = lambda.toString() }
+
+  override CallNode getACall() { result = getCallableValue().getACall() }
+
+  override Scope getScope() { result = lambda.getEvaluatingScope() }
+
+  override NameNode getParameter(int n) { result = getParameter(getCallableValue(), n) }
+
+  override string getName() { result = "Lambda callable" }
+
+  override FunctionValue getCallableValue() {
+    result.getOrigin().getNode() = lambda.getDefinition()
+  }
+}
+
 /** A class representing the scope in which a `ModuleVariableNode` appears. */
 class DataFlowModuleScope extends DataFlowCallable, TModule {
  Module mod;
@@ -703,17 +777,6 @@ class SpecialCall extends DataFlowCall, TSpecialCall {
  }
 }

-/** A data flow node that represents a call argument. */
-class ArgumentNode extends Node {
-  ArgumentNode() { this = any(DataFlowCall c).getArg(_) }
-
-  /** Holds if this argument occurs at the given position in the given call. */
-  predicate argumentOf(DataFlowCall call, int pos) { this = call.getArg(pos) }
-
-  /** Gets the call in which this node is an argument. */
-  final DataFlowCall getCall() { this.argumentOf(result, _) }
-}
-
 /** Gets a viable run-time target for the call `call`. */
 DataFlowCallable viableCallable(DataFlowCall call) { result = call.getCallable() }

@@ -844,6 +907,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
  or
  comprehensionStoreStep(nodeFrom, c, nodeTo)
  or
+  unpackingAssignmentStoreStep(nodeFrom, c, nodeTo)
+  or
  attributeStoreStep(nodeFrom, c, nodeTo)
  or
  posOverflowStoreStep(nodeFrom, c, nodeTo)
@@ -859,6 +924,7 @@ predicate listStoreStep(CfgNode nodeFrom, ListElementContent c, CfgNode nodeTo)
  //   nodeTo is the list, `[..., 42, ...]`, cfg node
  //   c denotes element of list
  nodeTo.getNode().(ListNode).getAnElement() = nodeFrom.getNode() and
+  not nodeTo.getNode() instanceof UnpackingAssignmentSequenceTarget and
  // Suppress unused variable warning
  c = c
 }
@@ -884,6 +950,7 @@ predicate tupleStoreStep(CfgNode nodeFrom, TupleElementContent c, CfgNode nodeTo
  //   c denotes element of tuple and index of nodeFrom
  exists(int n |
    nodeTo.getNode().(TupleNode).getElement(n) = nodeFrom.getNode() and
+    not nodeTo.getNode() instanceof UnpackingAssignmentSequenceTarget and
    c.getIndex() = n
  )
 }
@@ -974,6 +1041,8 @@ predicate kwOverflowStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node
 predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
  subscriptReadStep(nodeFrom, c, nodeTo)
  or
+  unpackingAssignmentReadStep(nodeFrom, c, nodeTo)
+  or
  popReadStep(nodeFrom, c, nodeTo)
  or
  comprehensionReadStep(nodeFrom, c, nodeTo)
@@ -1006,6 +1075,322 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
  )
 }

+/**
+ * The unpacking assignment takes the general form
+ * ```python
+ *   sequence = iterable
+ * ```
+ * where `sequence` is either a tuple or a list and it can contain wildcards.
+ * The iterable can be any iterable, which means that (CodeQL modeling of) content
+ * will need to change type if it should be transferred from the LHS to the RHS.
+ *
+ * Note that (CodeQL modeling of) content does not have to change type on data-flow
+ * paths _inside_ the LHS, as the different allowed syntaxes here are merely a convenience.
+ * Consequently, we model all LHS sequences as tuples, which have the more precise content
+ * model, making flow to the elements more precise. If an element is a starred variable,
+ * we will have to mutate the content type to be list content.
+ *
+ * We may for instance have
+ * ```python
+ *    (a, b) = ["a", SOURCE]  # RHS has content `ListElementContent`
+ * ```
+ * Due to the abstraction for list content, we do not know whether `SOURCE`
+ * ends up in `a` or in `b`, so we want to overapproximate and see it in both.
+ *
+ * Using wildcards we may have
+ * ```python
+ *   (a, *b) = ("a", "b", SOURCE)  # RHS has content `TupleElementContent(2)`
+ * ```
+ * Since the starred variables are always assigned (Python-)type list, `*b` will be
+ * `["b", SOURCE]`, and we will again overapproximate and assign it
+ * content corresponding to anything found in the RHS.
+ *
+ * For a precise transfer
+ * ```python
+ *    (a, b) = ("a", SOURCE)  # RHS has content `TupleElementContent(1)`
+ * ```
+ * we wish to keep the precision, so only `b` receives the tuple content at index 1.
+ *
+ * Finally, `sequence` is actually a pattern and can have a more complicated structure,
+ * such as
+ * ```python
+ *   (a, [b, *c]) = ("a", ["b", SOURCE])  # RHS has content `TupleElementContent(1); ListElementContent`
+ * ```
+ * where `a` should not receive content, but `b` and `c` should. `c` will be `[SOURCE]` so
+ * should have the content transferred, while `b` should read it.
+ *
+ * To transfer content from RHS to the elements of the LHS in the expression `sequence = iterable`,
+ * we use two synthetic nodes:
+ *
+ * - `TIterableSequence(sequence)` which captures the content-modeling the entire `sequence` will have
+ * (essentially just a copy of the content-modeling the RHS has)
+ *
+ * - `TIterableElement(sequence)` which captures the content-modeling that will be assigned to an element.
+ * Note that an empty access path means that the value we are tracking flows directly to the element.
+ *
+ *
+ * The `TIterableSequence(sequence)` is at this point superflous but becomes useful when handling recursive
+ * structures in the LHS, where `sequence` is some internal sequence node. We can have a uniform treatment
+ * by always having these two synthetic nodes. So we transfer to (or, in the recursive case, read into)
+ * `TIterableSequence(sequence)`, from which we take a read step to `TIterableElement(sequence)` and then a
+ * store step to `sequence`.
+ *
+ * This allows the unknown content from the RHS to be read into `TIterableElement(sequence)` and tuple content
+ * to then be stored into `sequence`. If the content is already tuple content, this inderection creates crosstalk
+ * between indices. Therefore, tuple content is never read into `TIterableElement(sequence)`; it is instead
+ * transferred directly from `TIterableSequence(sequence)` to `sequence` via a flow step. Such a flow step will
+ * also transfer other content, but only tuple content is further read from `sequence` into its elements.
+ *
+ * The strategy is then via several read-, store-, and flow steps:
+ * 1. [Flow] Content is transferred from `iterable` to `TIterableSequence(sequence)` via a
+ *    flow step. From here, everything happens on the LHS.
+ *
+ * 2. [Flow] Content is transferred from `TIterableSequence(sequence)` to `sequence` via a
+ *    flow step. (Here only tuple content is relevant.)
+ *
+ * 3. [Read] Content is read from `TIterableSequence(sequence)` into  `TIterableElement(sequence)`.
+ *    As `sequence` is modeled as a tuple, we will not read tuple content as that would allow
+ *    crosstalk.
+ *
+ * 4. [Store] Content is stored from `TIterableElement(sequence)` to `sequence`.
+ *    Content type is `TupleElementContent` with indices taken from the syntax.
+ *    For instance, if `sequence` is `(a, *b, c)`, content is written to index 0, 1, and 2.
+ *    This is adequate as the route through `TIterableElement(sequence)` does not transfer precise content.
+ *
+ * 5. [Read] Content is read from `sequence` to its elements.
+ *    a) If the element is a plain variable, the target is the corresponding essa node.
+ *
+ *    b) If the element is itself a sequence, with control-flow node `seq`, the target is `TIterableSequence(seq)`.
+ *
+ *    c) If the element is a starred variable, with control-flow node `v`, the target is `TIterableElement(v)`.
+ *
+ * 6. [Store] Content is stored from `TIterableElement(v)` to the essa variable for `v`, with
+ *    content type `ListElementContent`.
+ *
+ * 7. [Flow, Read, Store] Steps 2 through 7 are repeated for all recursive elements which are sequences.
+ *
+ *
+ * We illustrate the above steps on the assignment
+ *
+ * ```python
+ * (a, b) = ["a", SOURCE]
+ * ```
+ *
+ * Looking at the content propagation to `a`:
+ *   `["a", SOURCE]`: [ListElementContent]
+ *
+ * --Step 1-->
+ *
+ *   `TIterableSequence((a, b))`: [ListElementContent]
+ *
+ * --Step 3-->
+ *
+ *   `TIterableElement((a, b))`: []
+ *
+ * --Step 4-->
+ *
+ *   `(a, b)`: [TupleElementContent(0)]
+ *
+ * --Step 5a-->
+ *
+ *   `a`: []
+ *
+ * Meaning there is data-flow from the RHS to `a` (an over approximation). The same logic would be applied to show there is data-flow to `b`. Note that _Step 3_ and _Step 4_ would not have been needed if the RHS had been a tuple (since that would have been able to use _Step 2_ instead).
+ *
+ * Another, more complicated example:
+ * ```python
+ *   (a, [b, *c]) = ["a", [SOURCE]]
+ * ```
+ * where the path to `c` is
+ *
+ *   `["a", [SOURCE]]`: [ListElementContent; ListElementContent]
+ *
+ * --Step 1-->
+ *
+ *   `TIterableSequence((a, [b, *c]))`: [ListElementContent; ListElementContent]
+ *
+ * --Step 3-->
+ *
+ *   `TIterableElement((a, [b, *c]))`: [ListElementContent]
+ *
+ * --Step 4-->
+ *
+ *   `(a, [b, *c])`: [TupleElementContent(1); ListElementContent]
+ *
+ * --Step 5b-->
+ *
+ *   `TIterableSequence([b, *c])`: [ListElementContent]
+ *
+ * --Step 3-->
+ *
+ *   `TIterableElement([b, *c])`: []
+ *
+ * --Step 4-->
+ *
+ *   `[b, *c]`: [TupleElementContent(1)]
+ *
+ * --Step 5c-->
+ *
+ *   `TIterableElement(c)`: []
+ *
+ * --Step 6-->
+ *
+ *  `c`: [ListElementContent]
+ */
+module UnpackingAssignment {
+  /** A direct (or top-level) target of an unpacking assignment. */
+  class UnpackingAssignmentDirectTarget extends ControlFlowNode {
+    Expr value;
+
+    UnpackingAssignmentDirectTarget() {
+      this instanceof SequenceNode and
+      exists(Assign assign | this.getNode() = assign.getATarget() | value = assign.getValue())
+    }
+
+    Expr getValue() { result = value }
+  }
+
+  /** A (possibly recursive) target of an unpacking assignment. */
+  class UnpackingAssignmentTarget extends ControlFlowNode {
+    UnpackingAssignmentTarget() {
+      this instanceof UnpackingAssignmentDirectTarget
+      or
+      this = any(UnpackingAssignmentSequenceTarget parent).getAnElement()
+    }
+  }
+
+  /** A (possibly recursive) target of an unpacking assignment which is also a sequence. */
+  class UnpackingAssignmentSequenceTarget extends UnpackingAssignmentTarget {
+    UnpackingAssignmentSequenceTarget() { this instanceof SequenceNode }
+
+    ControlFlowNode getElement(int i) { result = this.(SequenceNode).getElement(i) }
+
+    ControlFlowNode getAnElement() { result = this.getElement(_) }
+  }
+
+  /**
+   * Step 2
+   * Data flows from `TIterableSequence(sequence)` to `sequence`
+   */
+  predicate unpackingAssignmentFlowStep(Node nodeFrom, Node nodeTo) {
+    exists(UnpackingAssignmentSequenceTarget target |
+      nodeFrom = TIterableSequenceNode(target) and
+      nodeTo.asCfgNode() = target
+    )
+  }
+
+  /**
+   * Step 3
+   * Data flows from `TIterableSequence(sequence)` into  `TIterableElement(sequence)`.
+   * As `sequence` is modeled as a tuple, we will not read tuple content as that would allow
+   * crosstalk.
+   */
+  predicate unpackingAssignmentConvertingReadStep(Node nodeFrom, Content c, Node nodeTo) {
+    exists(UnpackingAssignmentSequenceTarget target |
+      nodeFrom = TIterableSequenceNode(target) and
+      nodeTo = TIterableElementNode(target) and
+      (
+        c instanceof ListElementContent
+        or
+        c instanceof SetElementContent
+        // TODO: dict content in iterable unpacking not handled
+      )
+    )
+  }
+
+  /**
+   * Step 4
+   * Data flows from `TIterableElement(sequence)` to `sequence`.
+   * Content type is `TupleElementContent` with indices taken from the syntax.
+   * For instance, if `sequence` is `(a, *b, c)`, content is written to index 0, 1, and 2.
+   */
+  predicate unpackingAssignmentConvertingStoreStep(Node nodeFrom, Content c, Node nodeTo) {
+    exists(UnpackingAssignmentSequenceTarget target |
+      nodeFrom = TIterableElementNode(target) and
+      nodeTo.asCfgNode() = target and
+      exists(int index | exists(target.getElement(index)) |
+        c.(TupleElementContent).getIndex() = index
+      )
+    )
+  }
+
+  /**
+   * Step 5
+   * For a sequence node inside an iterable unpacking, data flows from the sequence to its elements. There are
+   * three cases for what `toNode` should be:
+   *    a) If the element is a plain variable, `toNode` is the corresponding essa node.
+   *
+   *    b) If the element is itself a sequence, with control-flow node `seq`, `toNode` is `TIterableSequence(seq)`.
+   *
+   *    c) If the element is a starred variable, with control-flow node `v`, `toNode` is `TIterableElement(v)`.
+   */
+  predicate unpackingAssignmentElementReadStep(Node nodeFrom, Content c, Node nodeTo) {
+    exists(
+      UnpackingAssignmentSequenceTarget target, int index, ControlFlowNode element, int starIndex
+    |
+      target.getElement(starIndex) instanceof StarredNode
+      or
+      not exists(target.getAnElement().(StarredNode)) and
+      starIndex = -1
+    |
+      nodeFrom.asCfgNode() = target and
+      element = target.getElement(index) and
+      (
+        if starIndex = -1 or index < starIndex
+        then c.(TupleElementContent).getIndex() = index
+        else
+          // This could get big if big tuples exist
+          if index = starIndex
+          then c.(TupleElementContent).getIndex() >= index
+          else c.(TupleElementContent).getIndex() >= index - 1
+      ) and
+      (
+        if element instanceof SequenceNode
+        then
+          // Step 5b
+          nodeTo = TIterableSequenceNode(element)
+        else
+          if element instanceof StarredNode
+          then
+            // Step 5c
+            nodeTo = TIterableElementNode(element)
+          else
+            // Step 5a
+            nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = element
+      )
+    )
+  }
+
+  /**
+   * Step 6
+   * Data flows from `TIterableElement(v)` to the essa variable for `v`, with
+   * content type `ListElementContent`.
+   */
+  predicate unpackingAssignmentStarredElementStoreStep(Node nodeFrom, Content c, Node nodeTo) {
+    exists(ControlFlowNode starred | starred.getNode() instanceof Starred |
+      nodeFrom = TIterableElementNode(starred) and
+      nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = starred and
+      c instanceof ListElementContent
+    )
+  }
+
+  /** All read steps associated with unpacking assignment. */
+  predicate unpackingAssignmentReadStep(Node nodeFrom, Content c, Node nodeTo) {
+    unpackingAssignmentElementReadStep(nodeFrom, c, nodeTo)
+    or
+    unpackingAssignmentConvertingReadStep(nodeFrom, c, nodeTo)
+  }
+
+  /** All store steps associated with unpacking assignment. */
+  predicate unpackingAssignmentStoreStep(Node nodeFrom, Content c, Node nodeTo) {
+    unpackingAssignmentStarredElementStoreStep(nodeFrom, c, nodeTo)
+    or
+    unpackingAssignmentConvertingStoreStep(nodeFrom, c, nodeTo)
+  }
+}
+
+import UnpackingAssignment
+
 /** Data flows from a sequence to a call to `pop` on the sequence. */
 predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
  // set.pop or list.pop
@@ -1092,7 +1477,7 @@ predicate attributeReadStep(CfgNode nodeFrom, AttributeContent c, CfgNode nodeTo
 predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
  exists(CallNode call, CallableValue callable, string name |
    nodeFrom.asCfgNode() = call.getNode().getKwargs().getAFlowNode() and
-    nodeTo = TKwUnpacked(call, callable, name) and
+    nodeTo = TKwUnpackedNode(call, callable, name) and
    name = c.getKey()
  )
 }
--- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll
@@ -58,9 +58,18 @@ newtype TNode =
   * That is, `call` contains argument `**{"foo": bar}` which is passed
   * to parameter `foo` of `callable`.
   */
-  TKwUnpacked(CallNode call, CallableValue callable, string name) {
+  TKwUnpackedNode(CallNode call, CallableValue callable, string name) {
    call_unpacks(call, _, callable, name, _)
-  }
+  } or
+  /**
+   * A synthetic node representing that an iterable sequence flows to consumer.
+   */
+  TIterableSequenceNode(UnpackingAssignmentSequenceTarget consumer) or
+  /**
+   * A synthetic node representing that there may be an iterable element
+   * for `consumer` to consume.
+   */
+  TIterableElementNode(UnpackingAssignmentTarget consumer)

 /** Helper for `Node::getEnclosingCallable`. */
 private DataFlowCallable getCallableScope(Scope s) {
@@ -179,7 +188,12 @@ ExprNode exprNode(DataFlowExpr e) { result.getNode().getNode() = e }
 class ParameterNode extends CfgNode {
  ParameterDefinition def;

-  ParameterNode() { node = def.getDefiningNode() }
+  ParameterNode() {
+    node = def.getDefiningNode() and
+    // Disregard parameters that we cannot resolve
+    // TODO: Make this unnecessary
+    exists(DataFlowCallable c | node = c.getParameter(_))
+  }

  /**
   * Holds if this node is the parameter of callable `c` at the
@@ -193,6 +207,17 @@ class ParameterNode extends CfgNode {
  Parameter getParameter() { result = def.getParameter() }
 }

+/** A data flow node that represents a call argument. */
+class ArgumentNode extends Node {
+  ArgumentNode() { this = any(DataFlowCall c).getArg(_) }
+
+  /** Holds if this argument occurs at the given position in the given call. */
+  predicate argumentOf(DataFlowCall call, int pos) { this = call.getArg(pos) }
+
+  /** Gets the call in which this node is an argument. */
+  final DataFlowCall getCall() { this.argumentOf(result, _) }
+}
+
 /**
 * A node associated with an object after an operation that might have
 * changed its state.
@@ -322,11 +347,11 @@ class KwOverflowNode extends Node, TKwOverflowNode {
 * The node representing the synthetic argument of a call that is unpacked from a dictionary
 * argument.
 */
-class KwUnpacked extends Node, TKwUnpacked {
+class KwUnpackedNode extends Node, TKwUnpackedNode {
  CallNode call;
  string name;

-  KwUnpacked() { this = TKwUnpacked(call, _, name) }
+  KwUnpackedNode() { this = TKwUnpackedNode(call, _, name) }

  override string toString() { result = "KwUnpacked " + name }

@@ -340,6 +365,42 @@ class KwUnpacked extends Node, TKwUnpacked {
  override Location getLocation() { result = call.getLocation() }
 }

+/**
+ * A synthetic node representing an iterable sequence. Used for changing content type
+ * for instance from a `ListElement` to a `TupleElement`, especially if the content is
+ * transferred via a read step which cannot be broken up into a read and a store. The
+ * read step then targets TIterableSequence, and the conversion can happen via a read
+ * step to TIterableElement followed by a store step to the target.
+ */
+class IterableSequenceNode extends Node, TIterableSequenceNode {
+  CfgNode consumer;
+
+  IterableSequenceNode() { this = TIterableSequenceNode(consumer.getNode()) }
+
+  override string toString() { result = "IterableSequence" }
+
+  override DataFlowCallable getEnclosingCallable() { result = consumer.getEnclosingCallable() }
+
+  override Location getLocation() { result = consumer.getLocation() }
+}
+
+/**
+ * A synthetic node representing an iterable element. Used for changing content type
+ * for instance from a `ListElement` to a `TupleElement`. This would happen via a
+ * read step from the list to IterableElement followed by a store step to the tuple.
+ */
+class IterableElementNode extends Node, TIterableElementNode {
+  CfgNode consumer;
+
+  IterableElementNode() { this = TIterableElementNode(consumer.getNode()) }
+
+  override string toString() { result = "IterableElement" }
+
+  override DataFlowCallable getEnclosingCallable() { result = consumer.getEnclosingCallable() }
+
+  override Location getLocation() { result = consumer.getLocation() }
+}
+
 /**
 * A node that controls whether other nodes are evaluated.
 */
--- a/python/ql/src/semmle/python/dataflow/new/internal/DataFlowUtil.qll
+++ b/python/ql/src/semmle/python/dataflow/new/internal/DataFlowUtil.qll
@@ -68,5 +68,11 @@ Node importNode(string name) {
  // Because named imports are modelled as `AttrRead`s, the statement `from foo import bar as baz`
  // is interpreted as if it was an assignment `baz = foo.bar`, which means `baz` gets tracked as a
  // reference to `foo.bar`, as desired.
-  result.asCfgNode().getNode() = any(ImportExpr i | i.getName() = name)
+  exists(ImportExpr imp_expr |
+    imp_expr.getName() = name and
+    result.asCfgNode().getNode() = imp_expr and
+    // in `import foo.bar` we DON'T want to give a result for `importNode("foo.bar")`,
+    // only for `importNode("foo")`. We exclude those cases with the following clause.
+    not exists(Import imp | imp.getAName().getValue() = imp_expr)
+  )
 }
--- a/python/ql/src/semmle/python/frameworks/Django.qll
+++ b/python/ql/src/semmle/python/frameworks/Django.qll
@@ -1938,7 +1938,23 @@ private module Django {
  private class DjangoUrlsRePathCall extends DjangoRegexRouteSetup {
    override CallNode node;

-    DjangoUrlsRePathCall() { node.getFunction() = django::urls::re_path().asCfgNode() }
+    DjangoUrlsRePathCall() {
+      node.getFunction() = django::urls::re_path().asCfgNode() and
+      // `django.conf.urls.url` (which we support directly with
+      // `DjangoConfUrlsUrlCall`), is implemented in Django 2+ as backward compatibility
+      // using `django.urls.re_path`. See
+      // https://github.com/django/django/blob/stable/3.2.x/django/conf/urls/__init__.py#L22
+      // Since we're still installing dependencies and analyzing their source code,
+      // without explicitly filtering out this call, we would be double-counting such
+      // route-setups :( One practical negative side effect of double-counting it, is
+      // that since we can't figure out the URL in the library code calling `django.urls.re_path`
+      // (because we only consider local flow), we will for all those cases mark ANY parameter
+      // as being a routed-parameter, which can lead to FPs.
+      not exists(Module mod |
+        mod.getName() = "django.conf.urls.__init__" and
+        node.getEnclosingModule() = mod
+      )
+    }

    override DataFlow::Node getUrlPatternArg() {
      result.asCfgNode() = [node.getArg(0), node.getArgByName("route")]
--- a/python/ql/src/semmle/python/frameworks/Tornado.qll
+++ b/python/ql/src/semmle/python/frameworks/Tornado.qll
@@ -227,6 +227,17 @@ private module Tornado {
        /** Gets a reference the `redirect` method. */
        DataFlow::Node redirectMethod() { result = redirectMethod(DataFlow::TypeTracker::end()) }

+        /** Gets a reference to the `write` method. */
+        private DataFlow::Node writeMethod(DataFlow::TypeTracker t) {
+          t.startInAttr("write") and
+          result = instance()
+          or
+          exists(DataFlow::TypeTracker t2 | result = writeMethod(t2).track(t2, t))
+        }
+
+        /** Gets a reference to the `write` method. */
+        DataFlow::Node writeMethod() { result = writeMethod(DataFlow::TypeTracker::end()) }
+
        private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
          override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
            // Method access
@@ -575,6 +586,26 @@ private module Tornado {
    override DataFlow::Node getBody() { none() }

    override string getMimetypeDefault() { none() }
+  }
+
+  /**
+   * A call to `tornado.web.RequestHandler.write` method.
+   *
+   * See https://www.tornadoweb.org/en/stable/web.html?highlight=write#tornado.web.RequestHandler.write
+   */
+  private class TornadoRequestHandlerWriteCall extends HTTP::Server::HttpResponse::Range,
+    DataFlow::CfgNode {
+    override CallNode node;
+
+    TornadoRequestHandlerWriteCall() {
+      node.getFunction() = tornado::web::RequestHandler::writeMethod().asCfgNode()
+    }
+
+    override DataFlow::Node getBody() {
+      result.asCfgNode() in [node.getArg(0), node.getArgByName("chunk")]
+    }
+
+    override string getMimetypeDefault() { result = "text/html" }

    override DataFlow::Node getMimetypeOrContentTypeArg() { none() }
  }
--- a/python/ql/src/semmle/python/objects/ObjectAPI.qll
+++ b/python/ql/src/semmle/python/objects/ObjectAPI.qll
@@ -720,6 +720,9 @@ abstract class FunctionValue extends CallableValue {

  /** Gets a class that this function may return */
  abstract ClassValue getAnInferredReturnType();
+
+  /** Holds if this function represents a lambda. */
+  predicate isLambda() { this.getOrigin().getNode() instanceof Lambda }
 }

 /** Class representing Python functions */