Merge branch 'main' into python-more-complete-dataflow-tests

2026-05-01 03:35:13 +02:00 · 2020-09-03 14:58:20 +02:00
parent 9a821bf449 2ba84be565
commit febbe1229a
458 changed files with 11018 additions and 1637 deletions
--- a/python/ql/src/Lexical/CommentedOutCodeMetricOverview.qhelp
+++ b/python/ql/src/Lexical/CommentedOutCodeMetricOverview.qhelp
@@ -0,0 +1,12 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+<overview>
+<p>
+This metric counts the number of lines of commented-out code in each file. Large amounts of
+commented-out code often indicate poorly maintained code.
+</p>
+
+</overview>
+</qhelp>
--- a/python/ql/src/Lexical/CommentedOutCodeQuery.qhelp
+++ b/python/ql/src/Lexical/CommentedOutCodeQuery.qhelp
@@ -0,0 +1,25 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>
+Commented-out code is distracting and confusing for developers who read the surrounding code,
+and its significance is often unclear. It will not get compiled or tested when the code around
+it changes, so it's likely to break over time. For these reasons, commented-out code should be
+avoided.
+</p>
+
+</overview>
+
+<recommendation>
+
+<p>
+Remove or reinstate the commented-out code. If you want to include a snippet of example code
+in a comment, consider enclosing it in quotes or marking it up as appropriate for the source
+language.
+</p>
+
+</recommendation>
+</qhelp>
--- a/python/ql/src/Lexical/CommentedOutCodeReferences.qhelp
+++ b/python/ql/src/Lexical/CommentedOutCodeReferences.qhelp
@@ -0,0 +1,12 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+<references>
+
+<li>Mark Needham: <a href="http://www.markhneedham.com/blog/2009/01/17/the-danger-of-commenting-out-code/">The danger of commenting out code</a>.</li>
+<li>Los Techies: <a href="http://lostechies.com/rodpaddock/2010/12/29/commented-code-technical-debt">Commented Code == Technical Debt</a>.</li>
+<li>High Integrity C++ Coding Standard: <a href="http://www.codingstandard.com/rule/2-3-2-do-not-comment-out-code/">2.3.2 Do not comment out code</a>.</li>
+
+</references>
+</qhelp>
--- a/python/ql/src/Metrics/DuplicationProblems.qhelp
+++ b/python/ql/src/Metrics/DuplicationProblems.qhelp
@@ -12,6 +12,5 @@ a poorly designed or hastily written code base, which typically suffers from oth
 problems as well.
 </p>

-
 </overview>
 </qhelp>
--- a/python/ql/src/Metrics/FLinesOfDuplicatedCodeCommon.qhelp
+++ b/python/ql/src/Metrics/FLinesOfDuplicatedCodeCommon.qhelp
@@ -0,0 +1,35 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+<overview>
+
+<p>
+This metric measures the number of lines in a file that are contained within a block that is duplicated elsewhere. These lines may include code, comments and whitespace, and the duplicate block may be in this file or in another file.
+</p>
+
+<p>
+A file that contains many lines that are duplicated within the code base is problematic
+for a number of reasons.
+</p>
+
+</overview>
+<include src="DuplicationProblems.qhelp" />
+
+<recommendation>
+
+<p>
+Refactor files with lots of duplicated code to extract the common code into
+a shared library or module.
+</p>
+
+</recommendation>
+<references> 
+
+
+<li>Wikipedia: <a href="http://en.wikipedia.org/wiki/Duplicate_code">Duplicate code</a>.</li>
+<li>M. Fowler, <em>Refactoring</em>. Addison-Wesley, 1999.</li>
+
+
+</references>
+</qhelp>
--- a/python/ql/src/experimental/Security/CWE-074/JinjaBad.py
+++ b/python/ql/src/experimental/Security/CWE-074/JinjaBad.py
--- a/python/ql/src/experimental/Security/CWE-074/JinjaGood.py
+++ b/python/ql/src/experimental/Security/CWE-074/JinjaGood.py
--- a/python/ql/src/experimental/Security/CWE-074/TemplateInjection.qhelp
+++ b/python/ql/src/experimental/Security/CWE-074/TemplateInjection.qhelp
--- a/python/ql/src/experimental/Security/CWE-074/TemplateInjection.ql
+++ b/python/ql/src/experimental/Security/CWE-074/TemplateInjection.ql
--- a/python/ql/src/experimental/Security/CWE-091/Xslt.qhelp
+++ b/python/ql/src/experimental/Security/CWE-091/Xslt.qhelp
--- a/python/ql/src/experimental/Security/CWE-091/Xslt.ql
+++ b/python/ql/src/experimental/Security/CWE-091/Xslt.ql
--- a/python/ql/src/experimental/Security/CWE-091/xslt.py
+++ b/python/ql/src/experimental/Security/CWE-091/xslt.py
--- a/python/ql/src/experimental/Security/CWE-643/xpath.qhelp
+++ b/python/ql/src/experimental/Security/CWE-643/xpath.qhelp
--- a/python/ql/src/experimental/Security/CWE-643/xpath.ql
+++ b/python/ql/src/experimental/Security/CWE-643/xpath.ql
--- a/python/ql/src/experimental/Security/CWE-643/xpathBad.py
+++ b/python/ql/src/experimental/Security/CWE-643/xpathBad.py
--- a/python/ql/src/experimental/Security/CWE-643/xpathGood.py
+++ b/python/ql/src/experimental/Security/CWE-643/xpathGood.py
--- a/python/ql/src/experimental/dataflow/DataFlow.qll
+++ b/python/ql/src/experimental/dataflow/DataFlow.qll
@@ -15,7 +15,7 @@
 * `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
 */

-import python
+private import python

 /**
 * Provides classes for performing local (intra-procedural) and
--- a/python/ql/src/experimental/dataflow/DataFlow2.qll
+++ b/python/ql/src/experimental/dataflow/DataFlow2.qll
@@ -15,7 +15,7 @@
 * `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
 */

-import python
+private import python

 /**
 * Provides classes for performing local (intra-procedural) and
--- a/python/ql/src/experimental/dataflow/TaintTracking.qll
+++ b/python/ql/src/experimental/dataflow/TaintTracking.qll
@@ -8,7 +8,7 @@
 * `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
 */

-import python
+private import python

 /**
 * Provides classes for performing local (intra-procedural) and
--- a/python/ql/src/experimental/dataflow/TypeTracker.qll
+++ b/python/ql/src/experimental/dataflow/TypeTracker.qll
@@ -0,0 +1,282 @@
+/** Step Summaries and Type Tracking */
+
+import python
+import internal.DataFlowPublic
+import internal.DataFlowPrivate
+
+/** Any string that may appear as the name of an attribute or access path. */
+class AttributeName extends string {
+  AttributeName() { this = any(Attribute a).getName() }
+}
+
+/** Either an attribute name, or the empty string (representing no attribute). */
+class OptionalAttributeName extends string {
+  OptionalAttributeName() { this instanceof AttributeName or this = "" }
+}
+
+/**
+ * A description of a step on an inter-procedural data flow path.
+ */
+private newtype TStepSummary =
+  LevelStep() or
+  CallStep() or
+  ReturnStep() or
+  StoreStep(AttributeName attr) or
+  LoadStep(AttributeName attr)
+
+/**
+ * INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
+ *
+ * A description of a step on an inter-procedural data flow path.
+ */
+class StepSummary extends TStepSummary {
+  /** Gets a textual representation of this step summary. */
+  string toString() {
+    this instanceof LevelStep and result = "level"
+    or
+    this instanceof CallStep and result = "call"
+    or
+    this instanceof ReturnStep and result = "return"
+    or
+    exists(string attr | this = StoreStep(attr) | result = "store " + attr)
+    or
+    exists(string attr | this = LoadStep(attr) | result = "load " + attr)
+  }
+}
+
+module StepSummary {
+  cached
+  predicate step(Node nodeFrom, Node nodeTo, StepSummary summary) {
+    exists(Node mid | EssaFlow::essaFlowStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
+  }
+
+  predicate smallstep(Node nodeFrom, Node nodeTo, StepSummary summary) {
+    EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
+    summary = LevelStep()
+    or
+    callStep(nodeFrom, nodeTo) and summary = CallStep()
+    or
+    returnStep(nodeFrom, nodeTo) and
+    summary = ReturnStep()
+    or
+    exists(string attr |
+      basicStoreStep(nodeFrom, nodeTo, attr) and
+      summary = StoreStep(attr)
+      or
+      basicLoadStep(nodeFrom, nodeTo, attr) and summary = LoadStep(attr)
+    )
+  }
+}
+
+/** Holds if `nodeFrom` steps to `nodeTo` by being passed as a parameter in a call. */
+predicate callStep(ArgumentNode nodeFrom, ParameterNode nodeTo) {
+  // TODO: Support special methods?
+  exists(DataFlowCall call, int i |
+    nodeFrom.argumentOf(call, i) and nodeTo.isParameterOf(call.getCallable(), i)
+  )
+}
+
+/** Holds if `nodeFrom` steps to `nodeTo` by being returned from a call. */
+predicate returnStep(ReturnNode nodeFrom, Node nodeTo) {
+  exists(DataFlowCall call |
+    nodeFrom.getEnclosingCallable() = call.getCallable() and nodeTo.asCfgNode() = call.getNode()
+  )
+}
+
+/**
+ * Holds if `nodeFrom` is being written to the `attr` attribute of the object in `nodeTo`.
+ *
+ * Note that the choice of `nodeTo` does not have to make sense "chronologically".
+ * All we care about is whether the `attr` attribute of `nodeTo` can have a specific type,
+ * and the assumption is that if a specific type appears here, then any access of that
+ * particular attribute can yield something of that particular type.
+ *
+ * Thus, in an example such as
+ *
+ * ```python
+ * def foo(y):
+ *    x = Foo()
+ *    bar(x)
+ *    x.attr = y
+ *    baz(x)
+ *
+ * def bar(x):
+ *    z = x.attr
+ * ```
+ * for the attribute write `x.attr = y`, we will have `attr` being the literal string `"attr"`,
+ * `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
+ * function. This means we will track the fact that `x.attr` can have the type of `y` into the
+ * assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
+ */
+predicate basicStoreStep(Node nodeFrom, Node nodeTo, string attr) {
+  exists(AttributeAssignment a, Node var |
+    a.getName() = attr and
+    EssaFlow::essaFlowStep*(nodeTo, var) and
+    var.asVar() = a.getInput() and
+    nodeFrom.asCfgNode() = a.getValue()
+  )
+}
+
+/**
+ * Holds if `nodeTo` is the result of accessing the `attr` attribute of `nodeFrom`.
+ */
+predicate basicLoadStep(Node nodeFrom, Node nodeTo, string attr) {
+  exists(AttrNode s | nodeTo.asCfgNode() = s and s.getObject(attr) = nodeFrom.asCfgNode())
+}
+
+/**
+ * A utility class that is equivalent to `boolean` but does not require type joining.
+ */
+private class Boolean extends boolean {
+  Boolean() { this = true or this = false }
+}
+
+private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalAttributeName attr)
+
+/**
+ * Summary of the steps needed to track a value to a given dataflow node.
+ *
+ * This can be used to track objects that implement a certain API in order to
+ * recognize calls to that API. Note that type-tracking does not by itself provide a
+ * source/sink relation, that is, it may determine that a node has a given type,
+ * but it won't determine where that type came from.
+ *
+ * It is recommended that all uses of this type are written in the following form,
+ * for tracking some type `myType`:
+ * ```
+ * Node myType(DataFlow::TypeTracker t) {
+ *   t.start() and
+ *   result = < source of myType >
+ *   or
+ *   exists (TypeTracker t2 |
+ *     result = myType(t2).track(t2, t)
+ *   )
+ * }
+ *
+ * DataFlow::SourceNode myType() { result = myType(DataFlow::TypeTracker::end()) }
+ * ```
+ *
+ * Instead of `result = myType(t2).track(t2, t)`, you can also use the equivalent
+ * `t = t2.step(myType(t2), result)`. If you additionally want to track individual
+ * intra-procedural steps, use `t = t2.smallstep(myCallback(t2), result)`.
+ */
+class TypeTracker extends TTypeTracker {
+  Boolean hasCall;
+  OptionalAttributeName attr;
+
+  TypeTracker() { this = MkTypeTracker(hasCall, attr) }
+
+  /** Gets the summary resulting from appending `step` to this type-tracking summary. */
+  cached
+  TypeTracker append(StepSummary step) {
+    step = LevelStep() and result = this
+    or
+    step = CallStep() and result = MkTypeTracker(true, attr)
+    or
+    step = ReturnStep() and hasCall = false and result = this
+    or
+    step = LoadStep(attr) and result = MkTypeTracker(hasCall, "")
+    or
+    exists(string p | step = StoreStep(p) and attr = "" and result = MkTypeTracker(hasCall, p))
+  }
+
+  /** Gets a textual representation of this summary. */
+  string toString() {
+    exists(string withCall, string withAttr |
+      (if hasCall = true then withCall = "with" else withCall = "without") and
+      (if attr != "" then withAttr = " with attribute " + attr else withAttr = "") and
+      result = "type tracker " + withCall + " call steps" + withAttr
+    )
+  }
+
+  /**
+   * Holds if this is the starting point of type tracking.
+   */
+  predicate start() { hasCall = false and attr = "" }
+
+  /**
+   * Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
+   * The type tracking only ends after the attribute has been loaded.
+   */
+  predicate startInAttr(AttributeName attrName) { hasCall = false and attr = attrName }
+
+  /**
+   * Holds if this is the starting point of type tracking
+   * when tracking a parameter into a call, but not out of it.
+   */
+  predicate call() { hasCall = true and attr = "" }
+
+  /**
+   * Holds if this is the end point of type tracking.
+   */
+  predicate end() { attr = "" }
+
+  /**
+   * INTERNAL. DO NOT USE.
+   *
+   * Holds if this type has been tracked into a call.
+   */
+  boolean hasCall() { result = hasCall }
+
+  /**
+   * INTERNAL. DO NOT USE.
+   *
+   * Gets the attribute associated with this type tracker.
+   */
+  string getAttr() { result = attr }
+
+  /**
+   * Gets a type tracker that starts where this one has left off to allow continued
+   * tracking.
+   *
+   * This predicate is only defined if the type has not been tracked into an attribute.
+   */
+  TypeTracker continue() { attr = "" and result = this }
+
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+   */
+  pragma[inline]
+  TypeTracker step(Node nodeFrom, Node nodeTo) {
+    exists(StepSummary summary |
+      StepSummary::step(nodeFrom, nodeTo, summary) and
+      result = this.append(summary)
+    )
+  }
+
+  /**
+   * Gets the summary that corresponds to having taken a forwards
+   * local, heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
+   *
+   * Unlike `TypeTracker::step`, this predicate exposes all edges
+   * in the flow graph, and not just the edges between `Node`s.
+   * It may therefore be less performant.
+   *
+   * Type tracking predicates using small steps typically take the following form:
+   * ```ql
+   * DataFlow::Node myType(DataFlow::TypeTracker t) {
+   *   t.start() and
+   *   result = < source of myType >
+   *   or
+   *   exists (DataFlow::TypeTracker t2 |
+   *     t = t2.smallstep(myType(t2), result)
+   *   )
+   * }
+   *
+   * DataFlow::Node myType() {
+   *   result = myType(DataFlow::TypeTracker::end())
+   * }
+   * ```
+   */
+  pragma[inline]
+  TypeTracker smallstep(Node nodeFrom, Node nodeTo) {
+    exists(StepSummary summary |
+      StepSummary::smallstep(nodeFrom, nodeTo, summary) and
+      result = this.append(summary)
+    )
+    or
+    EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
+    result = this
+  }
+}
--- a/python/ql/src/experimental/dataflow/internal/DataFlowImplConsistency.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowImplConsistency.qll
@@ -123,8 +123,18 @@ module Consistency {
    n.getEnclosingCallable() != call.getEnclosingCallable()
  }

+  // This predicate helps the compiler forget that in some languages
+  // it is impossible for a result of `getPreUpdateNode` to be an
+  // instance of `PostUpdateNode`.
+  private Node getPre(PostUpdateNode n) {
+    result = n.getPreUpdateNode()
+    or
+    none()
+  }
+
  query predicate postIsNotPre(PostUpdateNode n, string msg) {
-    n.getPreUpdateNode() = n and msg = "PostUpdateNode should not equal its pre-update node."
+    getPre(n) = n and
+    msg = "PostUpdateNode should not equal its pre-update node."
  }

  query predicate postHasUniquePre(PostUpdateNode n, string msg) {
@@ -152,12 +162,6 @@ module Consistency {
    msg = "Origin of readStep is missing a PostUpdateNode."
  }

-  query predicate storeIsPostUpdate(Node n, string msg) {
-    storeStep(_, _, n) and
-    not n instanceof PostUpdateNode and
-    msg = "Store targets should be PostUpdateNodes."
-  }
-
  query predicate argHasPostUpdate(ArgumentNode n, string msg) {
    not hasPost(n) and
    not isImmutableOrUnobservable(n) and
--- a/python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll
@@ -15,6 +15,32 @@ class DataFlowCfgNode extends ControlFlowNode {
  DataFlowCfgNode() { isExpressionNode(this) }
 }

+/** A data flow node which should have an associated post-update node. */
+abstract class PreUpdateNode extends Node { }
+
+/** An argument might have its value changed as a result of a call. */
+class ArgumentPreUpdateNode extends PreUpdateNode, ArgumentNode { }
+
+/** An object might have its value changed after a store. */
+class StorePreUpdateNode extends PreUpdateNode, CfgNode {
+  StorePreUpdateNode() {
+    exists(Attribute a |
+      node = a.getObject().getAFlowNode() and
+      a.getCtx() instanceof Store
+    )
+  }
+}
+
+/** A node marking the state change of an object after a read */
+class ReadPreUpdateNode extends PreUpdateNode, CfgNode {
+  ReadPreUpdateNode() {
+    exists(Attribute a |
+      node = a.getObject().getAFlowNode() and
+      a.getCtx() instanceof Load
+    )
+  }
+}
+
 /**
 * A node associated with an object after an operation that might have
 * changed its state.
@@ -24,12 +50,21 @@ class DataFlowCfgNode extends ControlFlowNode {
 * an update to the field.
 *
 * Nodes corresponding to AST elements, for example `ExprNode`, usually refer
- * to the value before the update with the exception of `ObjectCreation`,
- * which represents the value after the constructor has run.
+ * to the value before the update.
 */
-abstract class PostUpdateNode extends Node {
+class PostUpdateNode extends Node, TPostUpdateNode {
+  PreUpdateNode pre;
+
+  PostUpdateNode() { this = TPostUpdateNode(pre) }
+
  /** Gets the node before the state update. */
-  abstract Node getPreUpdateNode();
+  Node getPreUpdateNode() { result = pre }
+
+  override string toString() { result = "[post] " + pre.toString() }
+
+  override Scope getScope() { result = pre.getScope() }
+
+  override Location getLocation() { result = pre.getLocation() }
 }

 class DataFlowExpr = Expr;
@@ -98,7 +133,17 @@ module EssaFlow {
 predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
  not nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable and
  not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and
-  EssaFlow::essaFlowStep(nodeFrom, nodeTo)
+  EssaFlow::essaFlowStep(update(nodeFrom), nodeTo)
+}
+
+private Node update(Node node) {
+  exists(PostUpdateNode pun |
+    node = pun.getPreUpdateNode() and
+    result = pun
+  )
+  or
+  not exists(PostUpdateNode pun | node = pun.getPreUpdateNode()) and
+  result = node
 }

 // TODO: Make modules for these headings
--- a/python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll
@@ -2,8 +2,9 @@
 * Provides Python-specific definitions for use in the data flow library.
 */

-import python
+private import python
 private import DataFlowPrivate
+import experimental.dataflow.TypeTracker

 /**
 * IPA type for data flow nodes.
@@ -20,7 +21,9 @@ newtype TNode =
  /** A node corresponding to an SSA variable. */
  TEssaNode(EssaVariable var) or
  /** A node corresponding to a control flow node. */
-  TCfgNode(DataFlowCfgNode node)
+  TCfgNode(DataFlowCfgNode node) or
+  /** A node representing the value of an object after a state change */
+  TPostUpdateNode(PreUpdateNode pre)

 /**
 * An element, viewed as a node in a data flow graph. Either an SSA variable
@@ -67,6 +70,14 @@ class Node extends TNode {

  /** Convenience method for casting to ExprNode and calling getNode and getNode again. */
  Expr asExpr() { none() }
+
+  /**
+   * Gets a node that this node may flow to using one heap and/or interprocedural step.
+   *
+   * See `TypeTracker` for more details about how to use this.
+   */
+  pragma[inline]
+  Node track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
 }

 class EssaNode extends Node, TEssaNode {
--- a/python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll
+++ b/python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll
@@ -30,14 +30,24 @@ predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeT
  subscriptStep(nodeFrom, nodeTo)
  or
  stringManipulation(nodeFrom, nodeTo)
+  or
+  jsonStep(nodeFrom, nodeTo)
+  or
+  containerStep(nodeFrom, nodeTo)
+  or
+  copyStep(nodeFrom, nodeTo)
+  or
+  forStep(nodeFrom, nodeTo)
+  or
+  unpackingAssignmentStep(nodeFrom, nodeTo)
 }

 /**
 * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to concatenation.
 *
 * Note that since we cannot easily distinguish interesting types (like string, list, tuple),
- * we consider any `+` operation to propagate taint. After consulting with the JS team, this
- * doesn't sound like it is a big problem in practice.
+ * we consider any `+` operation to propagate taint. This is what is done in the JS libraries,
+ * and isn't a big problem in practice.
 */
 predicate concatStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
  exists(BinaryExprNode add | add = nodeTo.getNode() |
@@ -118,8 +128,101 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
  )
  or
  // f-strings
-  nodeTo.getNode().getNode().(Fstring).getAValue() = nodeFrom.getNode().getNode()
+  nodeTo.asExpr().(Fstring).getAValue() = nodeFrom.asExpr()
  // TODO: Handle encode/decode from base64/quopri
  // TODO: Handle os.path.join
  // TODO: Handle functions in https://docs.python.org/3/library/binascii.html
 }
+
+/**
+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to JSON encoding/decoding.
+ */
+predicate jsonStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
+  exists(CallNode call | call = nodeTo.getNode() |
+    call.getFunction().(AttrNode).getObject(["load", "loads", "dumps"]).(NameNode).getId() = "json" and
+    call.getArg(0) = nodeFrom.getNode()
+  )
+}
+
+/**
+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to containers
+ * (lists/sets/dictionaries): literals, constructor invocation, methods. Note that this
+ * is currently very imprecise, as an example, since we model `dict.get`, we treat any
+ * `<tainted object>.get(<arg>)` will be tainted, whether it's true or not.
+ */
+predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
+  // construction by literal
+  // TODO: Not limiting the content argument here feels like a BIG hack, but we currently get nothing for free :|
+  storeStep(nodeFrom, _, nodeTo)
+  or
+  // constructor call
+  exists(CallNode call | call = nodeTo.asCfgNode() |
+    call.getFunction().(NameNode).getId() in ["list", "set", "frozenset", "dict", "defaultdict",
+          "tuple"] and
+    call.getArg(0) = nodeFrom.getNode()
+  )
+  or
+  // functions operating on collections
+  exists(CallNode call | call = nodeTo.asCfgNode() |
+    call.getFunction().(NameNode).getId() in ["sorted", "reversed", "iter", "next"] and
+    call.getArg(0) = nodeFrom.getNode()
+  )
+  or
+  // methods
+  exists(CallNode call, string name | call = nodeTo.asCfgNode() |
+    name in ["copy",
+          // general
+          "pop",
+          // dict
+          "values", "items", "get", "popitem"] and
+    call.getFunction().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
+  )
+  or
+  // list.append, set.add
+  exists(CallNode call, string name |
+    name in ["append", "add"] and
+    call.getFunction().(AttrNode).getObject(name) =
+      nodeTo.(PostUpdateNode).getPreUpdateNode().asCfgNode() and
+    call.getArg(0) = nodeFrom.getNode()
+  )
+}
+
+/**
+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to copying.
+ */
+predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
+  exists(CallNode call | call = nodeTo.getNode() |
+    // Fully qualified: copy.copy, copy.deepcopy
+    (
+      call.getFunction().(NameNode).getId() in ["copy", "deepcopy"]
+      or
+      call.getFunction().(AttrNode).getObject(["copy", "deepcopy"]).(NameNode).getId() = "copy"
+    ) and
+    call.getArg(0) = nodeFrom.getNode()
+  )
+}
+
+/**
+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
+ * for example `for x in xs`, or `for x,y in points`.
+ */
+predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
+  exists(EssaNodeDefinition defn, For for |
+    for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
+    nodeTo.getVar() = defn and
+    nodeFrom.asExpr() = for.getIter()
+  )
+}
+
+/**
+ * Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
+ * Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
+ */
+predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
+  // `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
+  exists(MultiAssignmentDefinition defn, Assign assign |
+    assign.getATarget().contains(defn.getDefiningNode().getNode()) and
+    nodeTo.getVar() = defn and
+    nodeFrom.asExpr() = assign.getValue()
+  )
+}
--- a/python/ql/test/TestUtilities/InlineExpectationsTest.qll
+++ b/python/ql/test/TestUtilities/InlineExpectationsTest.qll
@@ -28,11 +28,11 @@
 *   }
 *
 *   override predicate hasActualResult(
- *     Location location, string element, string tag, string valuesasas
+ *     Location location, string element, string tag, string value
 *   ) {
 *     exists(Expr e |
 *       tag = "const" and // The tag for this test.
- *       valuesasas = e.getValue() and // The expected value. Will only hold for constant expressions.
+ *       value = e.getValue() and // The expected value. Will only hold for constant expressions.
 *       location = e.getLocation() and // The location of the result to be reported.
 *       element = e.toString() // The display text for the result.
 *     )
--- a/python/ql/test/experimental/CWE-074/TemplateInjection.qlref
+++ b/python/ql/test/experimental/CWE-074/TemplateInjection.qlref
@@ -1 +0,0 @@
-experimental/CWE-074/TemplateInjection.ql
--- a/python/ql/test/experimental/CWE-074/options
+++ b/python/ql/test/experimental/CWE-074/options
@@ -1 +0,0 @@
-semmle-extractor-options: --max-import-depth=3 -p ../../query-tests/Security/lib/
--- a/python/ql/test/experimental/CWE-091/Xslt.qlref
+++ b/python/ql/test/experimental/CWE-091/Xslt.qlref
@@ -1 +0,0 @@
-experimental/CWE-091/Xslt.ql
--- a/python/ql/test/experimental/CWE-091/options
+++ b/python/ql/test/experimental/CWE-091/options
@@ -1 +0,0 @@
-semmle-extractor-options: -p ../../query-tests/Security/lib/ --max-import-depth=3
--- a/python/ql/test/experimental/CWE-643/options
+++ b/python/ql/test/experimental/CWE-643/options
@@ -1 +0,0 @@
-semmle-extractor-options: --max-import-depth=3 -p ../../query-tests/Security/lib/
--- a/python/ql/test/experimental/CWE-643/xpath.qlref
+++ b/python/ql/test/experimental/CWE-643/xpath.qlref
@@ -1 +0,0 @@
-experimental/CWE-643/xpath.ql
--- a/python/ql/test/experimental/dataflow/basic/local.expected
+++ b/python/ql/test/experimental/dataflow/basic/local.expected
@@ -38,3 +38,4 @@
 | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
 | test.py:7:5:7:20 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
 | test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
+| test.py:7:19:7:19 | [post] ControlFlowNode for a | test.py:7:19:7:19 | [post] ControlFlowNode for a |
--- a/python/ql/test/experimental/dataflow/basic/sinks.expected
+++ b/python/ql/test/experimental/dataflow/basic/sinks.expected
@@ -23,3 +23,4 @@
 | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
 | test.py:7:5:7:20 | GSSA Variable a |
 | test.py:7:19:7:19 | ControlFlowNode for a |
+| test.py:7:19:7:19 | [post] ControlFlowNode for a |
--- a/python/ql/test/experimental/dataflow/basic/sources.expected
+++ b/python/ql/test/experimental/dataflow/basic/sources.expected
@@ -23,3 +23,4 @@
 | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
 | test.py:7:5:7:20 | GSSA Variable a |
 | test.py:7:19:7:19 | ControlFlowNode for a |
+| test.py:7:19:7:19 | [post] ControlFlowNode for a |
--- a/python/ql/test/experimental/dataflow/callGraphConfig.qll
+++ b/python/ql/test/experimental/dataflow/callGraphConfig.qll
@@ -1,3 +1,4 @@
+private import python
 import experimental.dataflow.DataFlow

 /**
--- a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected
+++ b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected
@@ -127,27 +127,4 @@ postHasUniquePre
 uniquePostUpdate
 postIsInSameCallable
 reverseRead
-storeIsPostUpdate
-| test.py:172:9:172:16 | ControlFlowNode for List | Store targets should be PostUpdateNodes. |
-| test.py:173:9:173:24 | ControlFlowNode for Dict | Store targets should be PostUpdateNodes. |
-| test.py:212:11:212:18 | ControlFlowNode for List | Store targets should be PostUpdateNodes. |
 argHasPostUpdate
-| test.py:25:10:25:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
-| test.py:29:10:29:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
-| test.py:48:19:48:21 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
-| test.py:51:10:51:12 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
-| test.py:55:14:55:16 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
-| test.py:59:11:59:11 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
-| test.py:67:11:67:14 | ControlFlowNode for cond | ArgumentNode is missing PostUpdateNode. |
-| test.py:67:17:67:17 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
-| test.py:74:11:74:14 | ControlFlowNode for cond | ArgumentNode is missing PostUpdateNode. |
-| test.py:74:17:74:17 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
-| test.py:81:13:81:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
-| test.py:86:13:86:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
-| test.py:120:13:120:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
-| test.py:121:12:121:12 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
-| test.py:125:13:125:29 | ControlFlowNode for TAINT_FROM_ARG() | ArgumentNode is missing PostUpdateNode. |
-| test.py:178:15:178:15 | ControlFlowNode for l | ArgumentNode is missing PostUpdateNode. |
-| test.py:179:15:179:15 | ControlFlowNode for d | ArgumentNode is missing PostUpdateNode. |
-| test.py:200:19:200:19 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
-| test.py:200:22:200:24 | ControlFlowNode for str | ArgumentNode is missing PostUpdateNode. |
--- a/python/ql/test/experimental/dataflow/coverage/argumentRouting1.ql
+++ b/python/ql/test/experimental/dataflow/coverage/argumentRouting1.ql
@@ -1,3 +1,4 @@
+import python
 import experimental.dataflow.DataFlow

 /**
--- a/python/ql/test/experimental/dataflow/coverage/argumentRouting2.ql
+++ b/python/ql/test/experimental/dataflow/coverage/argumentRouting2.ql
@@ -1,3 +1,4 @@
+import python
 import experimental.dataflow.DataFlow

 /**
--- a/python/ql/test/experimental/dataflow/coverage/argumentRouting3.ql
+++ b/python/ql/test/experimental/dataflow/coverage/argumentRouting3.ql
@@ -1,3 +1,4 @@
+import python
 import experimental.dataflow.DataFlow

 /**
--- a/python/ql/test/experimental/dataflow/coverage/argumentRouting4.ql
+++ b/python/ql/test/experimental/dataflow/coverage/argumentRouting4.ql
@@ -1,3 +1,4 @@
+import python
 import experimental.dataflow.DataFlow

 /**
--- a/python/ql/test/experimental/dataflow/coverage/classes.py
+++ b/python/ql/test/experimental/dataflow/coverage/classes.py
@@ -93,7 +93,7 @@ class With_str:

    def __str__(self):
        SINK1(self)  # Flow not found
-        OK()  # Call not found  # Call not found
+        OK()  # Call not found
        return "Awesome"


@@ -108,7 +108,7 @@ class With_bytes:

    def __bytes__(self):
        SINK1(self)  # Flow not found
-        OK()  # Call not found  # Call not found
+        OK()  # Call not found
        return b"Awesome"


@@ -124,7 +124,7 @@ class With_format:
    def __format__(self, format_spec):
        SINK2(format_spec)  # Flow not found
        SINK1(self)  # Flow not found
-        OK()  # Call not found  # Call not found
+        OK()  # Call not found
        return "Awesome"


@@ -151,7 +151,7 @@ class With_lt:
    def __lt__(self, other):
        SINK2(other)  # Flow not found
        SINK1(self)  # Flow not found
-        OK()  # Call not found  # Call not found
+        OK()  # Call not found
        return ""


--- a/python/ql/test/experimental/dataflow/coverage/dataflow.ql
+++ b/python/ql/test/experimental/dataflow/coverage/dataflow.ql
@@ -2,6 +2,7 @@
 * @kind path-problem
 */

+import python
 import experimental.dataflow.testConfig
 import DataFlow::PathGraph

--- a/python/ql/test/experimental/dataflow/regression/dataflow.ql
+++ b/python/ql/test/experimental/dataflow/regression/dataflow.ql
@@ -5,6 +5,7 @@
 * hope to remove the false positive.
 */

+import python
 import experimental.dataflow.testConfig

 from DataFlow::Node source, DataFlow::Node sink
--- a/python/ql/test/experimental/dataflow/tainttracking/TestTaintLib.qll
+++ b/python/ql/test/experimental/dataflow/tainttracking/TestTaintLib.qll
@@ -6,7 +6,8 @@ class TestTaintTrackingConfiguration extends TaintTracking::Configuration {
  TestTaintTrackingConfiguration() { this = "TestTaintTrackingConfiguration" }

  override predicate isSource(DataFlow::Node source) {
-    source.(DataFlow::CfgNode).getNode().(NameNode).getId() in ["TAINTED_STRING", "TAINTED_BYTES"]
+    source.(DataFlow::CfgNode).getNode().(NameNode).getId() in ["TAINTED_STRING", "TAINTED_BYTES",
+          "TAINTED_LIST", "TAINTED_DICT"]
  }

  override predicate isSink(DataFlow::Node sink) {
@@ -44,7 +45,8 @@ private string repr(Expr e) {

 query predicate test_taint(string arg_location, string test_res, string function_name, string repr) {
  exists(Call call, Expr arg, boolean expected_taint, boolean has_taint |
-    call.getLocation().getFile().getShortName() = "test.py" and
+    // only consider files that are extracted as part of the test
+    exists(call.getLocation().getFile().getRelativePath()) and
    (
      call.getFunc().(Name).getId() = "ensure_tainted" and
      expected_taint = true
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/TestTaint.expected
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/TestTaint.expected
@@ -0,0 +1,22 @@
+| test_collections.py:16 | ok   | test_access | tainted_list.copy() |
+| test_collections.py:24 | ok   | list_clear | tainted_list |
+| test_collections.py:27 | fail | list_clear | tainted_list |
+| test_string.py:17 | ok   | str_methods | ts.casefold() |
+| test_string.py:19 | ok   | str_methods | ts.format_map(..) |
+| test_string.py:20 | ok   | str_methods | "{unsafe}".format_map(..) |
+| test_string.py:31 | fail | binary_decode_encode | base64.a85encode(..) |
+| test_string.py:32 | fail | binary_decode_encode | base64.a85decode(..) |
+| test_string.py:35 | fail | binary_decode_encode | base64.b85encode(..) |
+| test_string.py:36 | fail | binary_decode_encode | base64.b85decode(..) |
+| test_string.py:39 | fail | binary_decode_encode | base64.encodebytes(..) |
+| test_string.py:40 | fail | binary_decode_encode | base64.decodebytes(..) |
+| test_string.py:48 | ok   | f_strings | Fstring |
+| test_unpacking.py:18 | ok   | extended_unpacking | first |
+| test_unpacking.py:18 | ok   | extended_unpacking | last |
+| test_unpacking.py:18 | ok   | extended_unpacking | rest |
+| test_unpacking.py:23 | ok   | also_allowed | a |
+| test_unpacking.py:31 | ok   | also_allowed | b |
+| test_unpacking.py:31 | ok   | also_allowed | c |
+| test_unpacking.py:39 | ok   | nested | x |
+| test_unpacking.py:39 | ok   | nested | xs |
+| test_unpacking.py:39 | ok   | nested | ys |
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/TestTaint.ql
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/TestTaint.ql
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/options
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/options
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_collections.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_collections.py
@@ -0,0 +1,32 @@
+# Add taintlib to PATH so it can be imported during runtime without any hassle
+import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
+from taintlib import *
+
+# This has no runtime impact, but allows autocomplete to work
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from ..taintlib import *
+
+# Actual tests
+
+def test_access():
+    tainted_list = TAINTED_LIST
+
+    ensure_tainted(
+        tainted_list.copy(),
+    )
+
+
+def list_clear():
+    tainted_string = TAINTED_STRING
+    tainted_list = [tainted_string]
+
+    ensure_tainted(tainted_list)
+
+    tainted_list.clear()
+    ensure_not_tainted(tainted_list)
+
+# Make tests runable
+
+test_access()
+list_clear()
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_string.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_string.py
@@ -1,20 +1,11 @@
-# Python 3 specific taint tracking for string
-
-TAINTED_STRING = "TAINTED_STRING"
-TAINTED_BYTES = b"TAINTED_BYTES"
-
-
-def ensure_tainted(*args):
-    print("- ensure_tainted")
-    for i, arg in enumerate(args):
-        print("arg {}: {!r}".format(i, arg))
-
-
-def ensure_not_tainted(*args):
-    print("- ensure_not_tainted")
-    for i, arg in enumerate(args):
-        print("arg {}: {!r}".format(i, arg))
+# Add taintlib to PATH so it can be imported during runtime without any hassle
+import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
+from taintlib import *

+# This has no runtime impact, but allows autocomplete to work
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from ..taintlib import *

 # Actual tests

--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_unpacking.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_unpacking.py
@@ -0,0 +1,46 @@
+# Add taintlib to PATH so it can be imported during runtime without any hassle
+import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
+from taintlib import *
+
+# This has no runtime impact, but allows autocomplete to work
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from ..taintlib import *
+
+# Actual tests
+
+# Extended Iterable Unpacking -- PEP 3132
+# https://www.python.org/dev/peps/pep-3132/
+
+
+def extended_unpacking():
+    first, *rest, last = TAINTED_LIST
+    ensure_tainted(first, rest, last)
+
+
+def also_allowed():
+    *a, = TAINTED_LIST
+    ensure_tainted(a)
+
+    # for b, *c in [(1, 2, 3), (4, 5, 6, 7)]:
+        # print(c)
+        # i=0; c=[2,3]
+        # i=1; c=[5,6,7]
+
+    for b, *c in [TAINTED_LIST, TAINTED_LIST]:
+        ensure_tainted(b, c)
+
+
+def nested():
+    l = TAINTED_LIST
+    ll = [l,l]
+
+    [[x, *xs], ys] = ll
+    ensure_tainted(x, xs, ys)
+
+
+# Make tests runable
+
+extended_unpacking()
+also_allowed()
+nested()
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.expected
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.expected
@@ -0,0 +1,172 @@
+| test_collections.py:23 | ok   | test_construction | tainted_string |
+| test_collections.py:24 | ok   | test_construction | tainted_list |
+| test_collections.py:25 | ok   | test_construction | tainted_tuple |
+| test_collections.py:26 | ok   | test_construction | tainted_set |
+| test_collections.py:27 | ok   | test_construction | tainted_dict |
+| test_collections.py:31 | ok   | test_construction | list(..) |
+| test_collections.py:32 | ok   | test_construction | list(..) |
+| test_collections.py:33 | ok   | test_construction | list(..) |
+| test_collections.py:34 | ok   | test_construction | list(..) |
+| test_collections.py:35 | ok   | test_construction | list(..) |
+| test_collections.py:37 | ok   | test_construction | tuple(..) |
+| test_collections.py:38 | ok   | test_construction | set(..) |
+| test_collections.py:39 | ok   | test_construction | frozenset(..) |
+| test_collections.py:47 | ok   | test_access | tainted_list[0] |
+| test_collections.py:48 | ok   | test_access | tainted_list[x] |
+| test_collections.py:49 | ok   | test_access | tainted_list[Slice] |
+| test_collections.py:51 | ok   | test_access | sorted(..) |
+| test_collections.py:52 | ok   | test_access | reversed(..) |
+| test_collections.py:53 | ok   | test_access | iter(..) |
+| test_collections.py:54 | ok   | test_access | next(..) |
+| test_collections.py:58 | ok   | test_access | a |
+| test_collections.py:58 | ok   | test_access | b |
+| test_collections.py:58 | ok   | test_access | c |
+| test_collections.py:61 | ok   | test_access | h |
+| test_collections.py:63 | ok   | test_access | i |
+| test_collections.py:70 | ok   | test_dict_access | tainted_dict["name"] |
+| test_collections.py:71 | ok   | test_dict_access | tainted_dict.get(..) |
+| test_collections.py:72 | ok   | test_dict_access | tainted_dict[x] |
+| test_collections.py:73 | ok   | test_dict_access | tainted_dict.copy() |
+| test_collections.py:77 | ok   | test_dict_access | v |
+| test_collections.py:79 | ok   | test_dict_access | v |
+| test_collections.py:87 | fail | test_named_tuple | point[0] |
+| test_collections.py:88 | fail | test_named_tuple | point.x |
+| test_collections.py:92 | ok   | test_named_tuple | point[1] |
+| test_collections.py:93 | ok   | test_named_tuple | point.y |
+| test_collections.py:97 | fail | test_named_tuple | a |
+| test_collections.py:98 | ok   | test_named_tuple | b |
+| test_collections.py:106 | fail | test_defaultdict | tainted_default_dict["name"] |
+| test_collections.py:107 | fail | test_defaultdict | tainted_default_dict.get(..) |
+| test_collections.py:108 | fail | test_defaultdict | tainted_default_dict[x] |
+| test_collections.py:109 | fail | test_defaultdict | tainted_default_dict.copy() |
+| test_collections.py:112 | fail | test_defaultdict | v |
+| test_collections.py:114 | fail | test_defaultdict | v |
+| test_collections.py:121 | ok   | test_copy_1 | copy(..) |
+| test_collections.py:122 | ok   | test_copy_1 | deepcopy(..) |
+| test_collections.py:130 | ok   | test_copy_2 | copy.copy(..) |
+| test_collections.py:131 | ok   | test_copy_2 | copy.deepcopy(..) |
+| test_collections.py:139 | ok   | list_index_assign | my_list |
+| test_collections.py:142 | fail | list_index_assign | my_list |
+| test_collections.py:149 | ok   | list_index_aug_assign | my_list |
+| test_collections.py:152 | fail | list_index_aug_assign | my_list |
+| test_collections.py:159 | ok   | list_append | my_list |
+| test_collections.py:162 | fail | list_append | my_list |
+| test_collections.py:169 | ok   | list_extend | my_list |
+| test_collections.py:172 | fail | list_extend | my_list |
+| test_collections.py:179 | ok   | dict_update_dict | my_dict |
+| test_collections.py:182 | fail | dict_update_dict | my_dict |
+| test_collections.py:189 | ok   | dict_update_kv_list | my_dict |
+| test_collections.py:192 | fail | dict_update_kv_list | my_dict |
+| test_collections.py:198 | ok   | dict_update_kv_arg | my_dict |
+| test_collections.py:201 | fail | dict_update_kv_arg | my_dict |
+| test_collections.py:208 | ok   | dict_manual_update | my_dict |
+| test_collections.py:212 | fail | dict_manual_update | my_dict |
+| test_collections.py:220 | fail | dict_merge | merged |
+| test_collections.py:227 | ok   | set_add | my_set |
+| test_collections.py:230 | fail | set_add | my_set |
+| test_json.py:26 | ok   | test | json.dumps(..) |
+| test_json.py:27 | ok   | test | json.loads(..) |
+| test_json.py:34 | fail | test | tainted_filelike |
+| test_json.py:35 | fail | test | json.load(..) |
+| test_json.py:48 | fail | non_syntacical | dumps(..) |
+| test_json.py:49 | fail | non_syntacical | dumps_alias(..) |
+| test_json.py:50 | fail | non_syntacical | loads(..) |
+| test_json.py:57 | fail | non_syntacical | tainted_filelike |
+| test_json.py:58 | fail | non_syntacical | load(..) |
+| test_string.py:25 | ok   | str_operations | ts |
+| test_string.py:26 | ok   | str_operations | BinaryExpr |
+| test_string.py:27 | ok   | str_operations | BinaryExpr |
+| test_string.py:28 | ok   | str_operations | BinaryExpr |
+| test_string.py:29 | ok   | str_operations | ts[Slice] |
+| test_string.py:30 | ok   | str_operations | ts[Slice] |
+| test_string.py:31 | ok   | str_operations | ts[Slice] |
+| test_string.py:32 | ok   | str_operations | ts[0] |
+| test_string.py:33 | ok   | str_operations | str(..) |
+| test_string.py:34 | ok   | str_operations | bytes(..) |
+| test_string.py:35 | ok   | str_operations | unicode(..) |
+| test_string.py:39 | ok   | str_operations | aug_assignment |
+| test_string.py:41 | ok   | str_operations | aug_assignment |
+| test_string.py:49 | ok   | str_methods | ts.capitalize() |
+| test_string.py:50 | ok   | str_methods | ts.center(..) |
+| test_string.py:51 | ok   | str_methods | ts.expandtabs() |
+| test_string.py:53 | ok   | str_methods | ts.format() |
+| test_string.py:54 | ok   | str_methods | "{}".format(..) |
+| test_string.py:55 | ok   | str_methods | "{unsafe}".format(..) |
+| test_string.py:57 | ok   | str_methods | ts.join(..) |
+| test_string.py:58 | ok   | str_methods | "".join(..) |
+| test_string.py:60 | ok   | str_methods | ts.ljust(..) |
+| test_string.py:61 | ok   | str_methods | ts.lstrip() |
+| test_string.py:62 | ok   | str_methods | ts.lower() |
+| test_string.py:64 | ok   | str_methods | ts.replace(..) |
+| test_string.py:65 | ok   | str_methods | "safe".replace(..) |
+| test_string.py:67 | ok   | str_methods | ts.rjust(..) |
+| test_string.py:68 | ok   | str_methods | ts.rstrip() |
+| test_string.py:69 | ok   | str_methods | ts.strip() |
+| test_string.py:70 | ok   | str_methods | ts.swapcase() |
+| test_string.py:71 | ok   | str_methods | ts.title() |
+| test_string.py:72 | ok   | str_methods | ts.upper() |
+| test_string.py:73 | ok   | str_methods | ts.zfill(..) |
+| test_string.py:75 | ok   | str_methods | ts.encode(..) |
+| test_string.py:76 | ok   | str_methods | ts.encode(..).decode(..) |
+| test_string.py:78 | ok   | str_methods | tb.decode(..) |
+| test_string.py:79 | ok   | str_methods | tb.decode(..).encode(..) |
+| test_string.py:82 | ok   | str_methods | ts.partition(..) |
+| test_string.py:83 | ok   | str_methods | ts.rpartition(..) |
+| test_string.py:84 | ok   | str_methods | ts.rsplit(..) |
+| test_string.py:85 | ok   | str_methods | ts.split(..) |
+| test_string.py:86 | ok   | str_methods | ts.splitlines() |
+| test_string.py:91 | ok   | str_methods | "safe".replace(..) |
+| test_string.py:93 | fail | str_methods | ts.join(..) |
+| test_string.py:94 | fail | str_methods | ts.join(..) |
+| test_string.py:104 | fail | non_syntactic | meth() |
+| test_string.py:105 | fail | non_syntactic | _str(..) |
+| test_string.py:114 | ok   | percent_fmt | BinaryExpr |
+| test_string.py:115 | ok   | percent_fmt | BinaryExpr |
+| test_string.py:116 | ok   | percent_fmt | BinaryExpr |
+| test_string.py:126 | fail | binary_decode_encode | base64.b64encode(..) |
+| test_string.py:127 | fail | binary_decode_encode | base64.b64decode(..) |
+| test_string.py:129 | fail | binary_decode_encode | base64.standard_b64encode(..) |
+| test_string.py:130 | fail | binary_decode_encode | base64.standard_b64decode(..) |
+| test_string.py:132 | fail | binary_decode_encode | base64.urlsafe_b64encode(..) |
+| test_string.py:133 | fail | binary_decode_encode | base64.urlsafe_b64decode(..) |
+| test_string.py:135 | fail | binary_decode_encode | base64.b32encode(..) |
+| test_string.py:136 | fail | binary_decode_encode | base64.b32decode(..) |
+| test_string.py:138 | fail | binary_decode_encode | base64.b16encode(..) |
+| test_string.py:139 | fail | binary_decode_encode | base64.b16decode(..) |
+| test_string.py:142 | fail | binary_decode_encode | base64.encodestring(..) |
+| test_string.py:143 | fail | binary_decode_encode | base64.decodestring(..) |
+| test_string.py:148 | fail | binary_decode_encode | quopri.encodestring(..) |
+| test_string.py:149 | fail | binary_decode_encode | quopri.decodestring(..) |
+| test_unpacking.py:16 | ok   | unpacking | a |
+| test_unpacking.py:16 | ok   | unpacking | b |
+| test_unpacking.py:16 | ok   | unpacking | c |
+| test_unpacking.py:22 | ok   | unpacking_to_list | a |
+| test_unpacking.py:22 | ok   | unpacking_to_list | b |
+| test_unpacking.py:22 | ok   | unpacking_to_list | c |
+| test_unpacking.py:31 | ok   | nested | a1 |
+| test_unpacking.py:31 | ok   | nested | a2 |
+| test_unpacking.py:31 | ok   | nested | a3 |
+| test_unpacking.py:31 | ok   | nested | b |
+| test_unpacking.py:31 | ok   | nested | c |
+| test_unpacking.py:35 | ok   | nested | a1 |
+| test_unpacking.py:35 | ok   | nested | a2 |
+| test_unpacking.py:35 | ok   | nested | a3 |
+| test_unpacking.py:35 | ok   | nested | b |
+| test_unpacking.py:35 | ok   | nested | c |
+| test_unpacking.py:39 | ok   | nested | a1 |
+| test_unpacking.py:39 | ok   | nested | a2 |
+| test_unpacking.py:39 | ok   | nested | a3 |
+| test_unpacking.py:39 | ok   | nested | b |
+| test_unpacking.py:39 | ok   | nested | c |
+| test_unpacking.py:46 | ok   | unpack_from_set | a |
+| test_unpacking.py:46 | ok   | unpack_from_set | b |
+| test_unpacking.py:46 | ok   | unpack_from_set | c |
+| test_unpacking.py:55 | ok   | contrived_1 | a |
+| test_unpacking.py:55 | ok   | contrived_1 | b |
+| test_unpacking.py:55 | ok   | contrived_1 | c |
+| test_unpacking.py:56 | fail | contrived_1 | d |
+| test_unpacking.py:56 | fail | contrived_1 | e |
+| test_unpacking.py:56 | fail | contrived_1 | f |
+| test_unpacking.py:65 | ok   | contrived_2 | a |
+| test_unpacking.py:65 | ok   | contrived_2 | b |
+| test_unpacking.py:65 | ok   | contrived_2 | c |
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.ql
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/TestTaint.ql
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_collections.py
@@ -0,0 +1,254 @@
+# Add taintlib to PATH so it can be imported during runtime without any hassle
+import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
+from taintlib import *
+
+# This has no runtime impact, but allows autocomplete to work
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from ..taintlib import *
+
+
+# Actual tests
+
+from collections import defaultdict, namedtuple
+
+def test_construction():
+    tainted_string = TAINTED_STRING
+    tainted_list = [tainted_string]
+    tainted_tuple = (tainted_string,)
+    tainted_set = {tainted_string}
+    tainted_dict = {'key': tainted_string}
+
+    ensure_tainted(
+        tainted_string,
+        tainted_list,
+        tainted_tuple,
+        tainted_set,
+        tainted_dict,
+    )
+
+    ensure_tainted(
+        list(tainted_list),
+        list(tainted_tuple),
+        list(tainted_set),
+        list(tainted_dict.values()),
+        list(tainted_dict.items()),
+
+        tuple(tainted_list),
+        set(tainted_list),
+        frozenset(tainted_list),
+    )
+
+
+def test_access(x, y, z):
+    tainted_list = TAINTED_LIST
+
+    ensure_tainted(
+        tainted_list[0],
+        tainted_list[x],
+        tainted_list[y:z],
+
+        sorted(tainted_list),
+        reversed(tainted_list),
+        iter(tainted_list),
+        next(iter(tainted_list)),
+    )
+
+    a, b, c = tainted_list[0:3]
+    ensure_tainted(a, b, c)
+
+    for h in tainted_list:
+        ensure_tainted(h)
+    for i in reversed(tainted_list):
+        ensure_tainted(i)
+
+
+def test_dict_access(x):
+    tainted_dict = TAINTED_DICT
+
+    ensure_tainted(
+        tainted_dict["name"],
+        tainted_dict.get("name"),
+        tainted_dict[x],
+        tainted_dict.copy(),
+    )
+
+    for v in tainted_dict.values():
+        ensure_tainted(v)
+    for k, v in tainted_dict.items():
+        ensure_tainted(v)
+
+
+def test_named_tuple(): # TODO: namedtuple currently not handled
+    Point = namedtuple('Point', ['x', 'y'])
+    point = Point(TAINTED_STRING, 'safe')
+
+    ensure_tainted(
+        point[0],
+        point.x,
+    )
+
+    ensure_not_tainted(
+        point[1],
+        point.y,
+    )
+
+    a, b = point
+    ensure_tainted(a)
+    ensure_not_tainted(b)
+
+
+def test_defaultdict(key, x): # TODO: defaultdict currently not handled
+    tainted_default_dict = defaultdict(str)
+    tainted_default_dict[key] += TAINTED_STRING
+
+    ensure_tainted(
+        tainted_default_dict["name"],
+        tainted_default_dict.get("name"),
+        tainted_default_dict[x],
+        tainted_default_dict.copy(),
+    )
+    for v in tainted_default_dict.values():
+        ensure_tainted(v)
+    for k, v in tainted_default_dict.items():
+        ensure_tainted(v)
+
+
+def test_copy_1():
+    from copy import copy, deepcopy
+
+    ensure_tainted(
+        copy(TAINTED_LIST),
+        deepcopy(TAINTED_LIST),
+    )
+
+
+def test_copy_2():
+    import copy
+
+    ensure_tainted(
+        copy.copy(TAINTED_LIST),
+        copy.deepcopy(TAINTED_LIST),
+    )
+
+
+def list_index_assign():
+    tainted_string = TAINTED_STRING
+    my_list = ["safe"]
+
+    ensure_not_tainted(my_list)
+
+    my_list[0] = tainted_string
+    ensure_tainted(my_list)
+
+
+def list_index_aug_assign():
+    tainted_string = TAINTED_STRING
+    my_list = ["safe"]
+
+    ensure_not_tainted(my_list)
+
+    my_list[0] += tainted_string
+    ensure_tainted(my_list)
+
+
+def list_append():
+    tainted_string = TAINTED_STRING
+    my_list = ["safe"]
+
+    ensure_not_tainted(my_list)
+
+    my_list.append(tainted_string)
+    ensure_tainted(my_list)
+
+
+def list_extend():
+    my_list = ["safe"]
+    tainted_list = [TAINTED_STRING]
+
+    ensure_not_tainted(my_list)
+
+    my_list.extend(tainted_list)
+    ensure_tainted(my_list)
+
+
+def dict_update_dict():
+    my_dict = {"key1": "safe"}
+    tainted_dict = {"key2": TAINTED_STRING}
+
+    ensure_not_tainted(my_dict)
+
+    my_dict.update(tainted_dict)
+    ensure_tainted(my_dict)
+
+
+def dict_update_kv_list():
+    my_dict = {"key1": "safe"}
+    tainted_kv_list = [("key2", TAINTED_STRING)]
+
+    ensure_not_tainted(my_dict)
+
+    my_dict.update(tainted_kv_list)
+    ensure_tainted(my_dict)
+
+
+def dict_update_kv_arg():
+    my_dict = {"key1": "safe"}
+
+    ensure_not_tainted(my_dict)
+
+    my_dict.update(key2=TAINTED_STRING)
+    ensure_tainted(my_dict)
+
+
+def dict_manual_update():
+    my_dict = {"key1": "safe"}
+    tainted_dict = {"key2": TAINTED_STRING}
+
+    ensure_not_tainted(my_dict)
+
+    for k in tainted_dict:
+        my_dict[k] = tainted_dict[k]
+    ensure_tainted(my_dict)
+
+
+def dict_merge():
+    my_dict = {"key1": "safe"}
+    tainted_dict = {"key2": TAINTED_STRING}
+
+    merged = {**my_dict, **tainted_dict}
+    ensure_tainted(merged)
+
+
+def set_add():
+    tainted_string = TAINTED_STRING
+    my_set = {"safe"}
+
+    ensure_not_tainted(my_set)
+
+    my_set.add(tainted_string)
+    ensure_tainted(my_set)
+
+
+# Make tests runable
+
+test_construction()
+test_access(0, 0, 2)
+test_dict_access("name")
+test_named_tuple()
+test_defaultdict("key", "key")
+test_copy_1()
+test_copy_2()
+
+list_index_assign()
+list_index_aug_assign()
+list_append()
+list_extend()
+
+dict_update_dict()
+dict_update_kv_list()
+dict_update_kv_arg()
+dict_manual_update()
+dict_merge()
+
+set_add()
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_json.py
@@ -0,0 +1,64 @@
+# Add taintlib to PATH so it can be imported during runtime without any hassle
+import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
+from taintlib import *
+
+# This has no runtime impact, but allows autocomplete to work
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from ..taintlib import *
+
+
+# Actual tests
+
+from io import StringIO
+
+# Workaround for Python3 not having unicode
+import sys
+if sys.version_info[0] == 3:
+    unicode = str
+
+def test():
+    print("\n# test")
+    ts = TAINTED_STRING
+    import json
+
+    ensure_tainted(
+        json.dumps(ts),
+        json.loads(json.dumps(ts)),
+    )
+
+    # For Python2, need to convert to unicode for StringIO to work
+    tainted_filelike = StringIO(unicode(json.dumps(ts)))
+
+    ensure_tainted(
+        tainted_filelike,
+        json.load(tainted_filelike),
+    )
+
+def non_syntacical():
+    print("\n# non_syntacical")
+    ts = TAINTED_STRING
+
+    # a less syntactical approach
+    from json import load, loads, dumps
+
+    dumps_alias = dumps
+
+    ensure_tainted(
+        dumps(ts),
+        dumps_alias(ts),
+        loads(dumps(ts)),
+    )
+
+    # For Python2, need to convert to unicode for StringIO to work
+    tainted_filelike = StringIO(unicode(dumps(ts)))
+
+    ensure_tainted(
+        tainted_filelike,
+        load(tainted_filelike),
+    )
+
+# Make tests runable
+
+test()
+non_syntacical()
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py
@@ -1,27 +1,20 @@
-import sys
+# Add taintlib to PATH so it can be imported during runtime without any hassle
+import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
+from taintlib import *

-if sys.version_info[0] == 3:
-    unicode = str
-
-
-TAINTED_STRING = "TAINTED_STRING"
-TAINTED_BYTES = b"TAINTED_BYTES"
-
-
-def ensure_tainted(*args):
-    print("- ensure_tainted")
-    for i, arg in enumerate(args):
-        print("arg {}: {!r}".format(i, arg))
-
-
-def ensure_not_tainted(*args):
-    print("- ensure_not_tainted")
-    for i, arg in enumerate(args):
-        print("arg {}: {!r}".format(i, arg))
+# This has no runtime impact, but allows autocomplete to work
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from ..taintlib import *


 # Actual tests

+# Workaround for Python3 not having unicode
+import sys
+if sys.version_info[0] == 3:
+    unicode = str
+

 def str_operations():
    print("\n# str_operations")
@@ -42,6 +35,11 @@ def str_operations():
        unicode(ts),
    )

+    aug_assignment = "safe"
+    ensure_not_tainted(aug_assignment)
+    aug_assignment += TAINTED_STRING
+    ensure_tainted(aug_assignment)
+

 def str_methods():
    print("\n# str_methods")
@@ -140,18 +138,6 @@ def binary_decode_encode():
        base64.b16encode(tb),
        base64.b16decode(base64.b16encode(tb)),

-        # # New in Python 3.4
-        # base64.a85encode(tb),
-        # base64.a85decode(base64.a85encode(tb)),
-
-        # # New in Python 3.4
-        # base64.b85encode(tb),
-        # base64.b85decode(base64.b85encode(tb)),
-
-        # # New in Python 3.1
-        # base64.encodebytes(tb),
-        # base64.decodebytes(base64.encodebytes(tb)),
-
        # deprecated since Python 3.1, but still works
        base64.encodestring(tb),
        base64.decodestring(base64.encodestring(tb)),
--- a/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_unpacking.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_unpacking.py
@@ -0,0 +1,75 @@
+# Add taintlib to PATH so it can be imported during runtime without any hassle
+import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
+from taintlib import *
+
+# This has no runtime impact, but allows autocomplete to work
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from ..taintlib import *
+
+
+# Actual tests
+
+def unpacking():
+    l = TAINTED_LIST[0:3]
+    a, b, c = l
+    ensure_tainted(a, b, c)
+
+
+def unpacking_to_list():
+    l = TAINTED_LIST[0:3]
+    [a, b, c] = l
+    ensure_tainted(a, b, c)
+
+
+def nested():
+    l = TAINTED_LIST[0:3]
+    ll = [l, l, l]
+
+    # list
+    [[a1, a2, a3], b, c] = ll
+    ensure_tainted(a1, a2, a3, b, c)
+
+    # tuple
+    ((a1, a2, a3), b, c) = ll
+    ensure_tainted(a1, a2, a3, b, c)
+
+    # mixed
+    [(a1, a2, a3), b, c] = ll
+    ensure_tainted(a1, a2, a3, b, c)
+
+
+def unpack_from_set():
+    # no guarantee on ordering ... don't know why you would ever do this
+    a, b, c = {"foo", "bar", TAINTED_STRING}
+    # either all should be tainted, or none of them
+    ensure_tainted(a, b, c)
+
+
+def contrived_1():
+    # A contrived example. Don't know why anyone would ever actually do this.
+    tainted_list = TAINTED_LIST[0:3]
+    no_taint_list = [1,2,3]
+
+    (a, b, c), (d, e, f) = tainted_list, no_taint_list
+    ensure_tainted(a, b, c)
+    ensure_not_tainted(d, e, f) # FP: we mark `d`, `e` and `f` as tainted.
+
+
+def contrived_2():
+    # A contrived example. Don't know why anyone would ever actually do this.
+
+    # Old taint tracking was only able to handle taint nested 2 levels in sequences,
+    # so would not mark a, b, c as tainted
+    [[[ (a, b, c) ]]] = [[[ TAINTED_LIST[0:3] ]]]
+    ensure_tainted(a, b, c)
+
+
+# Make tests runable
+
+unpacking()
+unpacking_to_list()
+nested()
+unpack_from_set()
+contrived_1()
+contrived_2()
--- a/python/ql/test/experimental/dataflow/tainttracking/string-py3/TestTaint.expected
+++ b/python/ql/test/experimental/dataflow/tainttracking/string-py3/TestTaint.expected
@@ -1,10 +0,0 @@
-| test.py:26 | ok   | str_methods | ts.casefold() |
-| test.py:28 | ok   | str_methods | ts.format_map(..) |
-| test.py:29 | fail | str_methods | "{unsafe}".format_map(..) |
-| test.py:40 | fail | binary_decode_encode | base64.a85encode(..) |
-| test.py:41 | fail | binary_decode_encode | base64.a85decode(..) |
-| test.py:44 | fail | binary_decode_encode | base64.b85encode(..) |
-| test.py:45 | fail | binary_decode_encode | base64.b85decode(..) |
-| test.py:48 | fail | binary_decode_encode | base64.encodebytes(..) |
-| test.py:49 | fail | binary_decode_encode | base64.decodebytes(..) |
-| test.py:57 | ok   | f_strings | Fstring |
--- a/python/ql/test/experimental/dataflow/tainttracking/string/TestTaint.expected
+++ b/python/ql/test/experimental/dataflow/tainttracking/string/TestTaint.expected
@@ -1,62 +0,0 @@
-| test.py:32 | ok   | str_operations | ts |
-| test.py:33 | ok   | str_operations | BinaryExpr |
-| test.py:34 | ok   | str_operations | BinaryExpr |
-| test.py:35 | ok   | str_operations | BinaryExpr |
-| test.py:36 | ok   | str_operations | ts[Slice] |
-| test.py:37 | ok   | str_operations | ts[Slice] |
-| test.py:38 | ok   | str_operations | ts[Slice] |
-| test.py:39 | ok   | str_operations | ts[0] |
-| test.py:40 | ok   | str_operations | str(..) |
-| test.py:41 | ok   | str_operations | bytes(..) |
-| test.py:42 | ok   | str_operations | unicode(..) |
-| test.py:51 | ok   | str_methods | ts.capitalize() |
-| test.py:52 | ok   | str_methods | ts.center(..) |
-| test.py:53 | ok   | str_methods | ts.expandtabs() |
-| test.py:55 | ok   | str_methods | ts.format() |
-| test.py:56 | ok   | str_methods | "{}".format(..) |
-| test.py:57 | ok   | str_methods | "{unsafe}".format(..) |
-| test.py:59 | ok   | str_methods | ts.join(..) |
-| test.py:60 | fail | str_methods | "".join(..) |
-| test.py:62 | ok   | str_methods | ts.ljust(..) |
-| test.py:63 | ok   | str_methods | ts.lstrip() |
-| test.py:64 | ok   | str_methods | ts.lower() |
-| test.py:66 | ok   | str_methods | ts.replace(..) |
-| test.py:67 | ok   | str_methods | "safe".replace(..) |
-| test.py:69 | ok   | str_methods | ts.rjust(..) |
-| test.py:70 | ok   | str_methods | ts.rstrip() |
-| test.py:71 | ok   | str_methods | ts.strip() |
-| test.py:72 | ok   | str_methods | ts.swapcase() |
-| test.py:73 | ok   | str_methods | ts.title() |
-| test.py:74 | ok   | str_methods | ts.upper() |
-| test.py:75 | ok   | str_methods | ts.zfill(..) |
-| test.py:77 | ok   | str_methods | ts.encode(..) |
-| test.py:78 | ok   | str_methods | ts.encode(..).decode(..) |
-| test.py:80 | ok   | str_methods | tb.decode(..) |
-| test.py:81 | ok   | str_methods | tb.decode(..).encode(..) |
-| test.py:84 | ok   | str_methods | ts.partition(..) |
-| test.py:85 | ok   | str_methods | ts.rpartition(..) |
-| test.py:86 | ok   | str_methods | ts.rsplit(..) |
-| test.py:87 | ok   | str_methods | ts.split(..) |
-| test.py:88 | ok   | str_methods | ts.splitlines() |
-| test.py:93 | ok   | str_methods | "safe".replace(..) |
-| test.py:95 | fail | str_methods | ts.join(..) |
-| test.py:96 | fail | str_methods | ts.join(..) |
-| test.py:106 | fail | non_syntactic | meth() |
-| test.py:107 | fail | non_syntactic | _str(..) |
-| test.py:116 | ok   | percent_fmt | BinaryExpr |
-| test.py:117 | ok   | percent_fmt | BinaryExpr |
-| test.py:118 | fail | percent_fmt | BinaryExpr |
-| test.py:128 | fail | binary_decode_encode | base64.b64encode(..) |
-| test.py:129 | fail | binary_decode_encode | base64.b64decode(..) |
-| test.py:131 | fail | binary_decode_encode | base64.standard_b64encode(..) |
-| test.py:132 | fail | binary_decode_encode | base64.standard_b64decode(..) |
-| test.py:134 | fail | binary_decode_encode | base64.urlsafe_b64encode(..) |
-| test.py:135 | fail | binary_decode_encode | base64.urlsafe_b64decode(..) |
-| test.py:137 | fail | binary_decode_encode | base64.b32encode(..) |
-| test.py:138 | fail | binary_decode_encode | base64.b32decode(..) |
-| test.py:140 | fail | binary_decode_encode | base64.b16encode(..) |
-| test.py:141 | fail | binary_decode_encode | base64.b16decode(..) |
-| test.py:156 | fail | binary_decode_encode | base64.encodestring(..) |
-| test.py:157 | fail | binary_decode_encode | base64.decodestring(..) |
-| test.py:162 | fail | binary_decode_encode | quopri.encodestring(..) |
-| test.py:163 | fail | binary_decode_encode | quopri.decodestring(..) |
--- a/python/ql/test/experimental/dataflow/tainttracking/taintlib.py
+++ b/python/ql/test/experimental/dataflow/tainttracking/taintlib.py
@@ -0,0 +1,15 @@
+TAINTED_STRING = "TAINTED_STRING"
+TAINTED_BYTES = b"TAINTED_BYTES"
+TAINTED_LIST = ["tainted-{}".format(i) for i in range(5)]
+TAINTED_DICT = {"name": TAINTED_STRING, "some key": "foo"}
+
+def ensure_tainted(*args):
+    print("- ensure_tainted")
+    for i, arg in enumerate(args):
+        print("arg {}: {!r}".format(i, arg))
+
+
+def ensure_not_tainted(*args):
+    print("- ensure_not_tainted")
+    for i, arg in enumerate(args):
+        print("arg {}: {!r}".format(i, arg))
--- a/python/ql/test/experimental/dataflow/testConfig.qll
+++ b/python/ql/test/experimental/dataflow/testConfig.qll
@@ -20,6 +20,7 @@
 * complex | `42j` (not supported yet)
 */

+private import python
 import experimental.dataflow.DataFlow

 class TestConfiguration extends DataFlow::Configuration {
--- a/python/ql/test/experimental/dataflow/typetracking/attribute_tests.py
+++ b/python/ql/test/experimental/dataflow/typetracking/attribute_tests.py
@@ -0,0 +1,31 @@
+class SomeClass:
+    pass
+
+def simple_read_write():
+    x = SomeClass() # $tracked=foo
+    x.foo = tracked # $tracked $tracked=foo
+    y = x.foo # $tracked=foo $tracked
+    do_stuff(y) # $tracked
+
+def foo():
+    x = SomeClass() # $tracked=attr
+    bar(x) # $tracked=attr
+    x.attr = tracked # $tracked=attr $tracked
+    baz(x) # $tracked=attr
+
+def bar(x): # $tracked=attr
+    z = x.attr # $tracked $tracked=attr
+    do_stuff(z) # $tracked
+
+def expects_int(x): # $int=field $f+:str=field
+    do_int_stuff(x.field) # $int $f+:str $int=field $f+:str=field
+
+def expects_string(x): # $f+:int=field $str=field
+    do_string_stuff(x.field) # $f+:int $str $f+:int=field $str=field
+
+def test_incompatible_types():
+    x = SomeClass() # $int,str=field
+    x.field = int(5) # $int=field $f+:str=field $int $f+:str
+    expects_int(x) # $int=field $f+:str=field
+    x.field = str("Hello") # $f+:int=field $str=field $f+:int $str
+    expects_string(x) # $f+:int=field $str=field
--- a/python/ql/test/experimental/dataflow/typetracking/test.py
+++ b/python/ql/test/experimental/dataflow/typetracking/test.py
@@ -0,0 +1,61 @@
+def get_tracked():
+    x = tracked # $tracked
+    return x # $tracked
+
+def use_tracked_foo(x): # $tracked
+    do_stuff(x) # $tracked
+
+def foo():
+    use_tracked_foo(
+        get_tracked() # $tracked
+    )
+
+def use_tracked_bar(x): # $tracked
+    do_stuff(x) # $tracked
+
+def bar():
+    x = get_tracked() # $tracked
+    use_tracked_bar(x) # $tracked
+
+def use_tracked_baz(x): # $tracked
+    do_stuff(x) # $tracked
+
+def baz():
+    x = tracked # $tracked
+    use_tracked_baz(x) # $tracked
+
+def id(x): # $tracked
+    return x # $tracked
+
+def use_tracked_quux(x): # $f-:tracked
+    do_stuff(y) # call after return -- not tracked in here.
+
+def quux():
+    x = tracked # $tracked
+    y = id(x) # $tracked
+    use_tracked_quux(y) # not tracked out of call to id.
+
+g = None
+
+def write_g(x): # $tracked
+    g = x # $tracked
+
+def use_g():
+    do_stuff(g) # $f-:tracked // no global flow for now.
+
+def global_var_write_test():
+    x = tracked # $tracked
+    write_g(x) # $tracked
+    use_g()
+
+def expects_int(x): # $int
+    do_int_stuff(x) # $int
+
+def expects_string(x): # $str
+    do_string_stuff(x) # $str
+
+def redefine_test():
+    x = int(5) # $int
+    expects_int(x) # $int
+    x = str("Hello") # $str
+    expects_string(x) # $str
--- a/python/ql/test/experimental/dataflow/typetracking/tracked.expected
+++ b/python/ql/test/experimental/dataflow/typetracking/tracked.expected
--- a/python/ql/test/experimental/dataflow/typetracking/tracked.ql
+++ b/python/ql/test/experimental/dataflow/typetracking/tracked.ql
@@ -0,0 +1,72 @@
+import python
+import experimental.dataflow.TypeTracker
+import TestUtilities.InlineExpectationsTest
+
+Node tracked(TypeTracker t) {
+  t.start() and
+  result.asCfgNode() = any(NameNode n | n.getId() = "tracked")
+  or
+  exists(TypeTracker t2 | result = tracked(t2).track(t2, t))
+}
+
+class TrackedTest extends InlineExpectationsTest {
+  TrackedTest() { this = "TrackedTest" }
+
+  override string getARelevantTag() { result = "tracked" }
+
+  override predicate hasActualResult(Location location, string element, string tag, string value) {
+    exists(Node e, TypeTracker t |
+      e = tracked(t) and
+      tag = "tracked" and
+      location = e.getLocation() and
+      value = t.getAttr() and
+      element = e.toString()
+    )
+  }
+}
+
+Node int_type(TypeTracker t) {
+  t.start() and
+  result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "int")
+  or
+  exists(TypeTracker t2 | result = int_type(t2).track(t2, t))
+}
+
+Node string_type(TypeTracker t) {
+  t.start() and
+  result.asCfgNode() = any(CallNode c | c.getFunction().(NameNode).getId() = "str")
+  or
+  exists(TypeTracker t2 | result = string_type(t2).track(t2, t))
+}
+
+class TrackedIntTest extends InlineExpectationsTest {
+  TrackedIntTest() { this = "TrackedIntTest" }
+
+  override string getARelevantTag() { result = "int" }
+
+  override predicate hasActualResult(Location location, string element, string tag, string value) {
+    exists(Node e, TypeTracker t |
+      e = int_type(t) and
+      tag = "int" and
+      location = e.getLocation() and
+      value = t.getAttr() and
+      element = e.toString()
+    )
+  }
+}
+
+class TrackedStringTest extends InlineExpectationsTest {
+  TrackedStringTest() { this = "TrackedStringTest" }
+
+  override string getARelevantTag() { result = "str" }
+
+  override predicate hasActualResult(Location location, string element, string tag, string value) {
+    exists(Node e, TypeTracker t |
+      e = string_type(t) and
+      tag = "str" and
+      location = e.getLocation() and
+      value = t.getAttr() and
+      element = e.toString()
+    )
+  }
+}
--- a/python/ql/test/experimental/query-tests/Security/CWE-074/AirspeedSsti.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-074/AirspeedSsti.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-074/BottleSsti.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-074/BottleSsti.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-074/Chameleon.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-074/Chameleon.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-074/ChevronSsti.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-074/ChevronSsti.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-074/DjangoTemplates.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-074/DjangoTemplates.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-074/FlaskTemplate.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-074/FlaskTemplate.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-074/Genshi.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-074/Genshi.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-074/JinjaSsti.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-074/JinjaSsti.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-074/MakoSsti.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-074/MakoSsti.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-074/TRender.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-074/TRender.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-074/TemplateInjection.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-074/TemplateInjection.expected
--- a/python/ql/test/experimental/query-tests/Security/CWE-074/TemplateInjection.qlref
+++ b/python/ql/test/experimental/query-tests/Security/CWE-074/TemplateInjection.qlref
@@ -0,0 +1 @@
+experimental/Security/CWE-074/TemplateInjection.ql
--- a/python/ql/test/experimental/query-tests/Security/CWE-074/options
+++ b/python/ql/test/experimental/query-tests/Security/CWE-074/options
@@ -0,0 +1 @@
+semmle-extractor-options: --max-import-depth=3 -p ../../../../query-tests/Security/lib/
--- a/python/ql/test/experimental/query-tests/Security/CWE-091/Xslt.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-091/Xslt.expected
--- a/python/ql/test/experimental/query-tests/Security/CWE-091/Xslt.qlref
+++ b/python/ql/test/experimental/query-tests/Security/CWE-091/Xslt.qlref
@@ -0,0 +1 @@
+experimental/Security/CWE-091/Xslt.ql
--- a/python/ql/test/experimental/query-tests/Security/CWE-091/XsltSinks.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-091/XsltSinks.expected
--- a/python/ql/test/experimental/query-tests/Security/CWE-091/XsltSinks.ql
+++ b/python/ql/test/experimental/query-tests/Security/CWE-091/XsltSinks.ql
--- a/python/ql/test/experimental/query-tests/Security/CWE-091/options
+++ b/python/ql/test/experimental/query-tests/Security/CWE-091/options
@@ -0,0 +1 @@
+semmle-extractor-options: --max-import-depth=3 -p ../../../../query-tests/Security/lib/
--- a/python/ql/test/experimental/query-tests/Security/CWE-091/xslt.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-091/xslt.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-091/xsltInjection.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-091/xsltInjection.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-091/xsltSinks.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-091/xsltSinks.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-643/options
+++ b/python/ql/test/experimental/query-tests/Security/CWE-643/options
@@ -0,0 +1 @@
+semmle-extractor-options: --max-import-depth=3 -p ../../../../query-tests/Security/lib/
--- a/python/ql/test/experimental/query-tests/Security/CWE-643/xpath.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-643/xpath.expected
--- a/python/ql/test/experimental/query-tests/Security/CWE-643/xpath.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-643/xpath.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-643/xpath.qlref
+++ b/python/ql/test/experimental/query-tests/Security/CWE-643/xpath.qlref
@@ -0,0 +1 @@
+experimental/Security/CWE-643/xpath.ql
--- a/python/ql/test/experimental/query-tests/Security/CWE-643/xpathBad.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-643/xpathBad.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-643/xpathFlow.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-643/xpathFlow.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-643/xpathGood.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-643/xpathGood.py
--- a/python/ql/test/experimental/query-tests/Security/CWE-643/xpathSinks.expected
+++ b/python/ql/test/experimental/query-tests/Security/CWE-643/xpathSinks.expected
--- a/python/ql/test/experimental/query-tests/Security/CWE-643/xpathSinks.ql
+++ b/python/ql/test/experimental/query-tests/Security/CWE-643/xpathSinks.ql
--- a/python/ql/test/experimental/query-tests/options
+++ b/python/ql/test/experimental/query-tests/options
@@ -0,0 +1 @@
+semmle-extractor-options: --max-import-depth=1
				`@@ -1 +0,0 @@`
				`semmle-extractor-options: --max-import-depth=3 -p ../../query-tests/Security/lib/`
				`@@ -1 +0,0 @@`
				`semmle-extractor-options: -p ../../query-tests/Security/lib/ --max-import-depth=3`
				`@@ -0,0 +1 @@`
				`experimental/Security/CWE-074/TemplateInjection.ql`