Merge pull request #3701 from yoff/SharedDataflow

Python: Start using the shared data flow libraries
2026-04-30 11:15:13 +02:00 · 2020-07-03 16:03:20 +02:00
parent 04a0d47ab9 fe9520b50b
commit 01c4852360
53 changed files with 8975 additions and 8 deletions
--- a/python/ql/src/experimental/dataflow/DataFlow.qll
+++ b/python/ql/src/experimental/dataflow/DataFlow.qll
@@ -0,0 +1,26 @@
+/**
+ * Provides a library for local (intra-procedural) and global (inter-procedural)
+ * data flow analysis: deciding whether data can flow from a _source_ to a
+ * _sink_.
+ *
+ * Unless configured otherwise, _flow_ means that the exact value of
+ * the source may reach the sink. We do not track flow across pointer
+ * dereferences or array indexing. To track these types of flow, where the
+ * exact value may not be preserved, import
+ * `experimental.dataflow.TaintTracking`.
+ *
+ * To use global (interprocedural) data flow, extend the class
+ * `DataFlow::Configuration` as documented on that class. To use local
+ * (intraprocedural) data flow, call `DataFlow::localFlow` or
+ * `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
+ */
+
+import python
+
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) data flow analyses.
+ */
+module DataFlow {
+  import experimental.dataflow.internal.DataFlowImpl
+}
--- a/python/ql/src/experimental/dataflow/DataFlow2.qll
+++ b/python/ql/src/experimental/dataflow/DataFlow2.qll
@@ -0,0 +1,26 @@
+/**
+ * Provides a library for local (intra-procedural) and global (inter-procedural)
+ * data flow analysis: deciding whether data can flow from a _source_ to a
+ * _sink_.
+ *
+ * Unless configured otherwise, _flow_ means that the exact value of
+ * the source may reach the sink. We do not track flow across pointer
+ * dereferences or array indexing. To track these types of flow, where the
+ * exact value may not be preserved, import
+ * `experimental.dataflow.TaintTracking`.
+ *
+ * To use global (interprocedural) data flow, extend the class
+ * `DataFlow::Configuration` as documented on that class. To use local
+ * (intraprocedural) data flow, call `DataFlow::localFlow` or
+ * `DataFlow::localFlowStep` with arguments of type `DataFlow::Node`.
+ */
+
+import python
+
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) data flow analyses.
+ */
+module DataFlow2 {
+  import experimental.dataflow.internal.DataFlowImpl2
+}
--- a/python/ql/src/experimental/dataflow/TaintTracking.qll
+++ b/python/ql/src/experimental/dataflow/TaintTracking.qll
@@ -0,0 +1,19 @@
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) taint-tracking analyses.
+ *
+ * To use global (interprocedural) taint tracking, extend the class
+ * `TaintTracking::Configuration` as documented on that class. To use local
+ * (intraprocedural) taint tracking, call `TaintTracking::localTaint` or
+ * `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
+ */
+
+import python
+
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) taint-tracking analyses.
+ */
+module TaintTracking {
+  import experimental.dataflow.internal.tainttracking1.TaintTrackingImpl
+}
--- a/python/ql/src/experimental/dataflow/internal/DataFlowImpl.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowImpl.qll
--- a/python/ql/src/experimental/dataflow/internal/DataFlowImpl2.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowImpl2.qll
--- a/python/ql/src/experimental/dataflow/internal/DataFlowImplCommon.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowImplCommon.qll
@@ -0,0 +1,812 @@
+private import DataFlowImplSpecific::Private
+private import DataFlowImplSpecific::Public
+import Cached
+
+cached
+private module Cached {
+  /**
+   * Holds if `p` is the `i`th parameter of a viable dispatch target of `call`.
+   * The instance parameter is considered to have index `-1`.
+   */
+  pragma[nomagic]
+  private predicate viableParam(DataFlowCall call, int i, ParameterNode p) {
+    p.isParameterOf(viableCallable(call), i)
+  }
+
+  /**
+   * Holds if `arg` is a possible argument to `p` in `call`, taking virtual
+   * dispatch into account.
+   */
+  cached
+  predicate viableParamArg(DataFlowCall call, ParameterNode p, ArgumentNode arg) {
+    exists(int i |
+      viableParam(call, i, p) and
+      arg.argumentOf(call, i) and
+      compatibleTypes(getNodeType(arg), getNodeType(p))
+    )
+  }
+
+  pragma[nomagic]
+  private ReturnPosition viableReturnPos(DataFlowCall call, ReturnKindExt kind) {
+    viableCallable(call) = result.getCallable() and
+    kind = result.getKind()
+  }
+
+  /**
+   * Holds if a value at return position `pos` can be returned to `out` via `call`,
+   * taking virtual dispatch into account.
+   */
+  cached
+  predicate viableReturnPosOut(DataFlowCall call, ReturnPosition pos, Node out) {
+    exists(ReturnKindExt kind |
+      pos = viableReturnPos(call, kind) and
+      out = kind.getAnOutNode(call)
+    )
+  }
+
+  /** Provides predicates for calculating flow-through summaries. */
+  private module FlowThrough {
+    /**
+     * The first flow-through approximation:
+     *
+     * - Input access paths are abstracted with a Boolean parameter
+     *   that indicates (non-)emptiness.
+     */
+    private module Cand {
+      /**
+       * Holds if `p` can flow to `node` in the same callable using only
+       * value-preserving steps.
+       *
+       * `read` indicates whether it is contents of `p` that can flow to `node`.
+       */
+      pragma[nomagic]
+      private predicate parameterValueFlowCand(ParameterNode p, Node node, boolean read) {
+        p = node and
+        read = false
+        or
+        // local flow
+        exists(Node mid |
+          parameterValueFlowCand(p, mid, read) and
+          simpleLocalFlowStep(mid, node)
+        )
+        or
+        // read
+        exists(Node mid |
+          parameterValueFlowCand(p, mid, false) and
+          readStep(mid, _, node) and
+          read = true
+        )
+        or
+        // flow through: no prior read
+        exists(ArgumentNode arg |
+          parameterValueFlowArgCand(p, arg, false) and
+          argumentValueFlowsThroughCand(arg, node, read)
+        )
+        or
+        // flow through: no read inside method
+        exists(ArgumentNode arg |
+          parameterValueFlowArgCand(p, arg, read) and
+          argumentValueFlowsThroughCand(arg, node, false)
+        )
+      }
+
+      pragma[nomagic]
+      private predicate parameterValueFlowArgCand(ParameterNode p, ArgumentNode arg, boolean read) {
+        parameterValueFlowCand(p, arg, read)
+      }
+
+      pragma[nomagic]
+      predicate parameterValueFlowsToPreUpdateCand(ParameterNode p, PostUpdateNode n) {
+        parameterValueFlowCand(p, n.getPreUpdateNode(), false)
+      }
+
+      /**
+       * Holds if `p` can flow to a return node of kind `kind` in the same
+       * callable using only value-preserving steps, not taking call contexts
+       * into account.
+       *
+       * `read` indicates whether it is contents of `p` that can flow to the return
+       * node.
+       */
+      predicate parameterValueFlowReturnCand(ParameterNode p, ReturnKind kind, boolean read) {
+        exists(ReturnNode ret |
+          parameterValueFlowCand(p, ret, read) and
+          kind = ret.getKind()
+        )
+      }
+
+      pragma[nomagic]
+      private predicate argumentValueFlowsThroughCand0(
+        DataFlowCall call, ArgumentNode arg, ReturnKind kind, boolean read
+      ) {
+        exists(ParameterNode param | viableParamArg(call, param, arg) |
+          parameterValueFlowReturnCand(param, kind, read)
+        )
+      }
+
+      /**
+       * Holds if `arg` flows to `out` through a call using only value-preserving steps,
+       * not taking call contexts into account.
+       *
+       * `read` indicates whether it is contents of `arg` that can flow to `out`.
+       */
+      predicate argumentValueFlowsThroughCand(ArgumentNode arg, Node out, boolean read) {
+        exists(DataFlowCall call, ReturnKind kind |
+          argumentValueFlowsThroughCand0(call, arg, kind, read) and
+          out = getAnOutNode(call, kind)
+        )
+      }
+
+      predicate cand(ParameterNode p, Node n) {
+        parameterValueFlowCand(p, n, _) and
+        (
+          parameterValueFlowReturnCand(p, _, _)
+          or
+          parameterValueFlowsToPreUpdateCand(p, _)
+        )
+      }
+    }
+
+    /**
+     * The final flow-through calculation:
+     *
+     * - Calculated flow is either value-preserving (`read = TReadStepTypesNone()`)
+     *   or summarized as a single read step with before and after types recorded
+     *   in the `ReadStepTypesOption` parameter.
+     * - Types are checked using the `compatibleTypes()` relation.
+     */
+    private module Final {
+      /**
+       * Holds if `p` can flow to `node` in the same callable using only
+       * value-preserving steps and possibly a single read step, not taking
+       * call contexts into account.
+       *
+       * If a read step was taken, then `read` captures the `Content`, the
+       * container type, and the content type.
+       */
+      predicate parameterValueFlow(ParameterNode p, Node node, ReadStepTypesOption read) {
+        parameterValueFlow0(p, node, read) and
+        if node instanceof CastingNode
+        then
+          // normal flow through
+          read = TReadStepTypesNone() and
+          compatibleTypes(getNodeType(p), getNodeType(node))
+          or
+          // getter
+          compatibleTypes(read.getContentType(), getNodeType(node))
+        else any()
+      }
+
+      pragma[nomagic]
+      private predicate parameterValueFlow0(ParameterNode p, Node node, ReadStepTypesOption read) {
+        p = node and
+        Cand::cand(p, _) and
+        read = TReadStepTypesNone()
+        or
+        // local flow
+        exists(Node mid |
+          parameterValueFlow(p, mid, read) and
+          simpleLocalFlowStep(mid, node)
+        )
+        or
+        // read
+        exists(Node mid |
+          parameterValueFlow(p, mid, TReadStepTypesNone()) and
+          readStepWithTypes(mid, read.getContainerType(), read.getContent(), node,
+            read.getContentType()) and
+          Cand::parameterValueFlowReturnCand(p, _, true) and
+          compatibleTypes(getNodeType(p), read.getContainerType())
+        )
+        or
+        parameterValueFlow0_0(TReadStepTypesNone(), p, node, read)
+      }
+
+      pragma[nomagic]
+      private predicate parameterValueFlow0_0(
+        ReadStepTypesOption mustBeNone, ParameterNode p, Node node, ReadStepTypesOption read
+      ) {
+        // flow through: no prior read
+        exists(ArgumentNode arg |
+          parameterValueFlowArg(p, arg, mustBeNone) and
+          argumentValueFlowsThrough(arg, read, node)
+        )
+        or
+        // flow through: no read inside method
+        exists(ArgumentNode arg |
+          parameterValueFlowArg(p, arg, read) and
+          argumentValueFlowsThrough(arg, mustBeNone, node)
+        )
+      }
+
+      pragma[nomagic]
+      private predicate parameterValueFlowArg(
+        ParameterNode p, ArgumentNode arg, ReadStepTypesOption read
+      ) {
+        parameterValueFlow(p, arg, read) and
+        Cand::argumentValueFlowsThroughCand(arg, _, _)
+      }
+
+      pragma[nomagic]
+      private predicate argumentValueFlowsThrough0(
+        DataFlowCall call, ArgumentNode arg, ReturnKind kind, ReadStepTypesOption read
+      ) {
+        exists(ParameterNode param | viableParamArg(call, param, arg) |
+          parameterValueFlowReturn(param, kind, read)
+        )
+      }
+
+      /**
+       * Holds if `arg` flows to `out` through a call using only
+       * value-preserving steps and possibly a single read step, not taking
+       * call contexts into account.
+       *
+       * If a read step was taken, then `read` captures the `Content`, the
+       * container type, and the content type.
+       */
+      pragma[nomagic]
+      predicate argumentValueFlowsThrough(ArgumentNode arg, ReadStepTypesOption read, Node out) {
+        exists(DataFlowCall call, ReturnKind kind |
+          argumentValueFlowsThrough0(call, arg, kind, read) and
+          out = getAnOutNode(call, kind)
+        |
+          // normal flow through
+          read = TReadStepTypesNone() and
+          compatibleTypes(getNodeType(arg), getNodeType(out))
+          or
+          // getter
+          compatibleTypes(getNodeType(arg), read.getContainerType()) and
+          compatibleTypes(read.getContentType(), getNodeType(out))
+        )
+      }
+
+      /**
+       * Holds if `arg` flows to `out` through a call using only
+       * value-preserving steps and a single read step, not taking call
+       * contexts into account, thus representing a getter-step.
+       */
+      predicate getterStep(ArgumentNode arg, Content c, Node out) {
+        argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out)
+      }
+
+      /**
+       * Holds if `p` can flow to a return node of kind `kind` in the same
+       * callable using only value-preserving steps and possibly a single read
+       * step.
+       *
+       * If a read step was taken, then `read` captures the `Content`, the
+       * container type, and the content type.
+       */
+      private predicate parameterValueFlowReturn(
+        ParameterNode p, ReturnKind kind, ReadStepTypesOption read
+      ) {
+        exists(ReturnNode ret |
+          parameterValueFlow(p, ret, read) and
+          kind = ret.getKind()
+        )
+      }
+    }
+
+    import Final
+  }
+
+  import FlowThrough
+
+  cached
+  private module DispatchWithCallContext {
+    /**
+     * Holds if the call context `ctx` reduces the set of viable run-time
+     * dispatch targets of call `call` in `c`.
+     */
+    cached
+    predicate reducedViableImplInCallContext(DataFlowCall call, DataFlowCallable c, DataFlowCall ctx) {
+      exists(int tgts, int ctxtgts |
+        mayBenefitFromCallContext(call, c) and
+        c = viableCallable(ctx) and
+        ctxtgts = count(viableImplInCallContext(call, ctx)) and
+        tgts = strictcount(viableCallable(call)) and
+        ctxtgts < tgts
+      )
+    }
+
+    /**
+     * Gets a viable run-time dispatch target for the call `call` in the
+     * context `ctx`. This is restricted to those calls for which a context
+     * makes a difference.
+     */
+    cached
+    DataFlowCallable prunedViableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
+      result = viableImplInCallContext(call, ctx) and
+      reducedViableImplInCallContext(call, _, ctx)
+    }
+
+    /**
+     * Holds if flow returning from callable `c` to call `call` might return
+     * further and if this path restricts the set of call sites that can be
+     * returned to.
+     */
+    cached
+    predicate reducedViableImplInReturn(DataFlowCallable c, DataFlowCall call) {
+      exists(int tgts, int ctxtgts |
+        mayBenefitFromCallContext(call, _) and
+        c = viableCallable(call) and
+        ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContext(call, ctx)) and
+        tgts = strictcount(DataFlowCall ctx | viableCallable(ctx) = call.getEnclosingCallable()) and
+        ctxtgts < tgts
+      )
+    }
+
+    /**
+     * Gets a viable run-time dispatch target for the call `call` in the
+     * context `ctx`. This is restricted to those calls and results for which
+     * the return flow from the result to `call` restricts the possible context
+     * `ctx`.
+     */
+    cached
+    DataFlowCallable prunedViableImplInCallContextReverse(DataFlowCall call, DataFlowCall ctx) {
+      result = viableImplInCallContext(call, ctx) and
+      reducedViableImplInReturn(result, call)
+    }
+  }
+
+  import DispatchWithCallContext
+
+  /**
+   * Holds if `p` can flow to the pre-update node associated with post-update
+   * node `n`, in the same callable, using only value-preserving steps.
+   */
+  cached
+  predicate parameterValueFlowsToPreUpdate(ParameterNode p, PostUpdateNode n) {
+    parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone())
+  }
+
+  private predicate store(
+    Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType
+  ) {
+    storeStep(node1, c, node2) and
+    readStep(_, c, _) and
+    contentType = getNodeType(node1) and
+    containerType = getNodeType(node2)
+    or
+    exists(Node n1, Node n2 |
+      n1 = node1.(PostUpdateNode).getPreUpdateNode() and
+      n2 = node2.(PostUpdateNode).getPreUpdateNode()
+    |
+      argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1)
+      or
+      readStep(n2, c, n1) and
+      contentType = getNodeType(n1) and
+      containerType = getNodeType(n2)
+    )
+  }
+
+  /**
+   * Holds if data can flow from `node1` to `node2` via a direct assignment to
+   * `f`.
+   *
+   * This includes reverse steps through reads when the result of the read has
+   * been stored into, in order to handle cases like `x.f1.f2 = y`.
+   */
+  cached
+  predicate store(Node node1, TypedContent tc, Node node2, DataFlowType contentType) {
+    store(node1, tc.getContent(), node2, contentType, tc.getContainerType())
+  }
+
+  /**
+   * Holds if the call context `call` either improves virtual dispatch in
+   * `callable` or if it allows us to prune unreachable nodes in `callable`.
+   */
+  cached
+  predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) {
+    reducedViableImplInCallContext(_, callable, call)
+    or
+    exists(Node n | n.getEnclosingCallable() = callable | isUnreachableInCall(n, call))
+  }
+
+  cached
+  newtype TCallContext =
+    TAnyCallContext() or
+    TSpecificCall(DataFlowCall call) { recordDataFlowCallSite(call, _) } or
+    TSomeCall() or
+    TReturn(DataFlowCallable c, DataFlowCall call) { reducedViableImplInReturn(c, call) }
+
+  cached
+  newtype TReturnPosition =
+    TReturnPosition0(DataFlowCallable c, ReturnKindExt kind) {
+      exists(ReturnNodeExt ret |
+        c = returnNodeGetEnclosingCallable(ret) and
+        kind = ret.getKind()
+      )
+    }
+
+  cached
+  newtype TLocalFlowCallContext =
+    TAnyLocalCall() or
+    TSpecificLocalCall(DataFlowCall call) { isUnreachableInCall(_, call) }
+
+  cached
+  newtype TReturnKindExt =
+    TValueReturn(ReturnKind kind) or
+    TParamUpdate(int pos) { exists(ParameterNode p | p.isParameterOf(_, pos)) }
+
+  cached
+  newtype TBooleanOption =
+    TBooleanNone() or
+    TBooleanSome(boolean b) { b = true or b = false }
+
+  cached
+  newtype TTypedContent = MkTypedContent(Content c, DataFlowType t) { store(_, c, _, _, t) }
+
+  cached
+  newtype TAccessPathFront =
+    TFrontNil(DataFlowType t) or
+    TFrontHead(TypedContent tc)
+
+  cached
+  newtype TAccessPathFrontOption =
+    TAccessPathFrontNone() or
+    TAccessPathFrontSome(AccessPathFront apf)
+}
+
+/**
+ * A `Node` at which a cast can occur such that the type should be checked.
+ */
+class CastingNode extends Node {
+  CastingNode() {
+    this instanceof ParameterNode or
+    this instanceof CastNode or
+    this instanceof OutNodeExt or
+    // For reads, `x.f`, we want to check that the tracked type after the read (which
+    // is obtained by popping the head of the access path stack) is compatible with
+    // the type of `x.f`.
+    readStep(_, _, this)
+  }
+}
+
+private predicate readStepWithTypes(
+  Node n1, DataFlowType container, Content c, Node n2, DataFlowType content
+) {
+  readStep(n1, c, n2) and
+  container = getNodeType(n1) and
+  content = getNodeType(n2)
+}
+
+private newtype TReadStepTypesOption =
+  TReadStepTypesNone() or
+  TReadStepTypesSome(DataFlowType container, Content c, DataFlowType content) {
+    readStepWithTypes(_, container, c, _, content)
+  }
+
+private class ReadStepTypesOption extends TReadStepTypesOption {
+  predicate isSome() { this instanceof TReadStepTypesSome }
+
+  DataFlowType getContainerType() { this = TReadStepTypesSome(result, _, _) }
+
+  Content getContent() { this = TReadStepTypesSome(_, result, _) }
+
+  DataFlowType getContentType() { this = TReadStepTypesSome(_, _, result) }
+
+  string toString() { if this.isSome() then result = "Some(..)" else result = "None()" }
+}
+
+/**
+ * A call context to restrict the targets of virtual dispatch, prune local flow,
+ * and match the call sites of flow into a method with flow out of a method.
+ *
+ * There are four cases:
+ * - `TAnyCallContext()` : No restrictions on method flow.
+ * - `TSpecificCall(DataFlowCall call)` : Flow entered through the
+ *    given `call`. This call improves the set of viable
+ *    dispatch targets for at least one method call in the current callable
+ *    or helps prune unreachable nodes in the current callable.
+ * - `TSomeCall()` : Flow entered through a parameter. The
+ *    originating call does not improve the set of dispatch targets for any
+ *    method call in the current callable and was therefore not recorded.
+ * - `TReturn(Callable c, DataFlowCall call)` : Flow reached `call` from `c` and
+ *    this dispatch target of `call` implies a reduced set of dispatch origins
+ *    to which data may flow if it should reach a `return` statement.
+ */
+abstract class CallContext extends TCallContext {
+  abstract string toString();
+
+  /** Holds if this call context is relevant for `callable`. */
+  abstract predicate relevantFor(DataFlowCallable callable);
+}
+
+class CallContextAny extends CallContext, TAnyCallContext {
+  override string toString() { result = "CcAny" }
+
+  override predicate relevantFor(DataFlowCallable callable) { any() }
+}
+
+abstract class CallContextCall extends CallContext { }
+
+class CallContextSpecificCall extends CallContextCall, TSpecificCall {
+  override string toString() {
+    exists(DataFlowCall call | this = TSpecificCall(call) | result = "CcCall(" + call + ")")
+  }
+
+  override predicate relevantFor(DataFlowCallable callable) {
+    recordDataFlowCallSite(getCall(), callable)
+  }
+
+  DataFlowCall getCall() { this = TSpecificCall(result) }
+}
+
+class CallContextSomeCall extends CallContextCall, TSomeCall {
+  override string toString() { result = "CcSomeCall" }
+
+  override predicate relevantFor(DataFlowCallable callable) {
+    exists(ParameterNode p | p.getEnclosingCallable() = callable)
+  }
+}
+
+class CallContextReturn extends CallContext, TReturn {
+  override string toString() {
+    exists(DataFlowCall call | this = TReturn(_, call) | result = "CcReturn(" + call + ")")
+  }
+
+  override predicate relevantFor(DataFlowCallable callable) {
+    exists(DataFlowCall call | this = TReturn(_, call) and call.getEnclosingCallable() = callable)
+  }
+}
+
+/**
+ * A call context that is relevant for pruning local flow.
+ */
+abstract class LocalCallContext extends TLocalFlowCallContext {
+  abstract string toString();
+
+  /** Holds if this call context is relevant for `callable`. */
+  abstract predicate relevantFor(DataFlowCallable callable);
+}
+
+class LocalCallContextAny extends LocalCallContext, TAnyLocalCall {
+  override string toString() { result = "LocalCcAny" }
+
+  override predicate relevantFor(DataFlowCallable callable) { any() }
+}
+
+class LocalCallContextSpecificCall extends LocalCallContext, TSpecificLocalCall {
+  LocalCallContextSpecificCall() { this = TSpecificLocalCall(call) }
+
+  DataFlowCall call;
+
+  DataFlowCall getCall() { result = call }
+
+  override string toString() { result = "LocalCcCall(" + call + ")" }
+
+  override predicate relevantFor(DataFlowCallable callable) { relevantLocalCCtx(call, callable) }
+}
+
+private predicate relevantLocalCCtx(DataFlowCall call, DataFlowCallable callable) {
+  exists(Node n | n.getEnclosingCallable() = callable and isUnreachableInCall(n, call))
+}
+
+/**
+ * Gets the local call context given the call context and the callable that
+ * the contexts apply to.
+ */
+LocalCallContext getLocalCallContext(CallContext ctx, DataFlowCallable callable) {
+  ctx.relevantFor(callable) and
+  if relevantLocalCCtx(ctx.(CallContextSpecificCall).getCall(), callable)
+  then result.(LocalCallContextSpecificCall).getCall() = ctx.(CallContextSpecificCall).getCall()
+  else result instanceof LocalCallContextAny
+}
+
+/**
+ * A node from which flow can return to the caller. This is either a regular
+ * `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
+ */
+class ReturnNodeExt extends Node {
+  ReturnNodeExt() {
+    this instanceof ReturnNode or
+    parameterValueFlowsToPreUpdate(_, this)
+  }
+
+  /** Gets the kind of this returned value. */
+  ReturnKindExt getKind() {
+    result = TValueReturn(this.(ReturnNode).getKind())
+    or
+    exists(ParameterNode p, int pos |
+      parameterValueFlowsToPreUpdate(p, this) and
+      p.isParameterOf(_, pos) and
+      result = TParamUpdate(pos)
+    )
+  }
+}
+
+/**
+ * A node to which data can flow from a call. Either an ordinary out node
+ * or a post-update node associated with a call argument.
+ */
+class OutNodeExt extends Node {
+  OutNodeExt() {
+    this instanceof OutNode
+    or
+    this.(PostUpdateNode).getPreUpdateNode() instanceof ArgumentNode
+  }
+}
+
+/**
+ * An extended return kind. A return kind describes how data can be returned
+ * from a callable. This can either be through a returned value or an updated
+ * parameter.
+ */
+abstract class ReturnKindExt extends TReturnKindExt {
+  /** Gets a textual representation of this return kind. */
+  abstract string toString();
+
+  /** Gets a node corresponding to data flow out of `call`. */
+  abstract OutNodeExt getAnOutNode(DataFlowCall call);
+}
+
+class ValueReturnKind extends ReturnKindExt, TValueReturn {
+  private ReturnKind kind;
+
+  ValueReturnKind() { this = TValueReturn(kind) }
+
+  ReturnKind getKind() { result = kind }
+
+  override string toString() { result = kind.toString() }
+
+  override OutNodeExt getAnOutNode(DataFlowCall call) {
+    result = getAnOutNode(call, this.getKind())
+  }
+}
+
+class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate {
+  private int pos;
+
+  ParamUpdateReturnKind() { this = TParamUpdate(pos) }
+
+  int getPosition() { result = pos }
+
+  override string toString() { result = "param update " + pos }
+
+  override OutNodeExt getAnOutNode(DataFlowCall call) {
+    exists(ArgumentNode arg |
+      result.(PostUpdateNode).getPreUpdateNode() = arg and
+      arg.argumentOf(call, this.getPosition())
+    )
+  }
+}
+
+/** A callable tagged with a relevant return kind. */
+class ReturnPosition extends TReturnPosition0 {
+  private DataFlowCallable c;
+  private ReturnKindExt kind;
+
+  ReturnPosition() { this = TReturnPosition0(c, kind) }
+
+  /** Gets the callable. */
+  DataFlowCallable getCallable() { result = c }
+
+  /** Gets the return kind. */
+  ReturnKindExt getKind() { result = kind }
+
+  /** Gets a textual representation of this return position. */
+  string toString() { result = "[" + kind + "] " + c }
+}
+
+pragma[noinline]
+private DataFlowCallable returnNodeGetEnclosingCallable(ReturnNodeExt ret) {
+  result = ret.getEnclosingCallable()
+}
+
+pragma[noinline]
+private ReturnPosition getReturnPosition0(ReturnNodeExt ret, ReturnKindExt kind) {
+  result.getCallable() = returnNodeGetEnclosingCallable(ret) and
+  kind = result.getKind()
+}
+
+pragma[noinline]
+ReturnPosition getReturnPosition(ReturnNodeExt ret) {
+  result = getReturnPosition0(ret, ret.getKind())
+}
+
+bindingset[cc, callable]
+predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) {
+  cc instanceof CallContextAny and callable = viableCallable(call)
+  or
+  exists(DataFlowCallable c0, DataFlowCall call0 |
+    call0.getEnclosingCallable() = callable and
+    cc = TReturn(c0, call0) and
+    c0 = prunedViableImplInCallContextReverse(call0, call)
+  )
+}
+
+bindingset[call, cc]
+DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) {
+  exists(DataFlowCall ctx | cc = TSpecificCall(ctx) |
+    if reducedViableImplInCallContext(call, _, ctx)
+    then result = prunedViableImplInCallContext(call, ctx)
+    else result = viableCallable(call)
+  )
+  or
+  result = viableCallable(call) and cc instanceof CallContextSomeCall
+  or
+  result = viableCallable(call) and cc instanceof CallContextAny
+  or
+  result = viableCallable(call) and cc instanceof CallContextReturn
+}
+
+predicate read = readStep/3;
+
+/** An optional Boolean value. */
+class BooleanOption extends TBooleanOption {
+  string toString() {
+    this = TBooleanNone() and result = "<none>"
+    or
+    this = TBooleanSome(any(boolean b | result = b.toString()))
+  }
+}
+
+/** Content tagged with the type of a containing object. */
+class TypedContent extends MkTypedContent {
+  private Content c;
+  private DataFlowType t;
+
+  TypedContent() { this = MkTypedContent(c, t) }
+
+  /** Gets the content. */
+  Content getContent() { result = c }
+
+  /** Gets the container type. */
+  DataFlowType getContainerType() { result = t }
+
+  /** Gets a textual representation of this content. */
+  string toString() { result = c.toString() }
+}
+
+/**
+ * The front of an access path. This is either a head or a nil.
+ */
+abstract class AccessPathFront extends TAccessPathFront {
+  abstract string toString();
+
+  abstract DataFlowType getType();
+
+  abstract boolean toBoolNonEmpty();
+
+  predicate headUsesContent(TypedContent tc) { this = TFrontHead(tc) }
+
+  predicate isClearedAt(Node n) {
+    exists(TypedContent tc |
+      this.headUsesContent(tc) and
+      clearsContent(n, tc.getContent())
+    )
+  }
+}
+
+class AccessPathFrontNil extends AccessPathFront, TFrontNil {
+  private DataFlowType t;
+
+  AccessPathFrontNil() { this = TFrontNil(t) }
+
+  override string toString() { result = ppReprType(t) }
+
+  override DataFlowType getType() { result = t }
+
+  override boolean toBoolNonEmpty() { result = false }
+}
+
+class AccessPathFrontHead extends AccessPathFront, TFrontHead {
+  private TypedContent tc;
+
+  AccessPathFrontHead() { this = TFrontHead(tc) }
+
+  override string toString() { result = tc.toString() }
+
+  override DataFlowType getType() { result = tc.getContainerType() }
+
+  override boolean toBoolNonEmpty() { result = true }
+}
+
+/** An optional access path front. */
+class AccessPathFrontOption extends TAccessPathFrontOption {
+  string toString() {
+    this = TAccessPathFrontNone() and result = "<none>"
+    or
+    this = TAccessPathFrontSome(any(AccessPathFront apf | result = apf.toString()))
+  }
+}
--- a/python/ql/src/experimental/dataflow/internal/DataFlowImplConsistency.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowImplConsistency.qll
@@ -0,0 +1,166 @@
+/**
+ * Provides consistency queries for checking invariants in the language-specific
+ * data-flow classes and predicates.
+ */
+
+private import DataFlowImplSpecific::Private
+private import DataFlowImplSpecific::Public
+private import tainttracking1.TaintTrackingParameter::Private
+private import tainttracking1.TaintTrackingParameter::Public
+
+module Consistency {
+  private class RelevantNode extends Node {
+    RelevantNode() {
+      this instanceof ArgumentNode or
+      this instanceof ParameterNode or
+      this instanceof ReturnNode or
+      this = getAnOutNode(_, _) or
+      simpleLocalFlowStep(this, _) or
+      simpleLocalFlowStep(_, this) or
+      jumpStep(this, _) or
+      jumpStep(_, this) or
+      storeStep(this, _, _) or
+      storeStep(_, _, this) or
+      readStep(this, _, _) or
+      readStep(_, _, this) or
+      defaultAdditionalTaintStep(this, _) or
+      defaultAdditionalTaintStep(_, this)
+    }
+  }
+
+  query predicate uniqueEnclosingCallable(Node n, string msg) {
+    exists(int c |
+      n instanceof RelevantNode and
+      c = count(n.getEnclosingCallable()) and
+      c != 1 and
+      msg = "Node should have one enclosing callable but has " + c + "."
+    )
+  }
+
+  query predicate uniqueType(Node n, string msg) {
+    exists(int c |
+      n instanceof RelevantNode and
+      c = count(getNodeType(n)) and
+      c != 1 and
+      msg = "Node should have one type but has " + c + "."
+    )
+  }
+
+  query predicate uniqueNodeLocation(Node n, string msg) {
+    exists(int c |
+      c =
+        count(string filepath, int startline, int startcolumn, int endline, int endcolumn |
+          n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+        ) and
+      c != 1 and
+      msg = "Node should have one location but has " + c + "."
+    )
+  }
+
+  query predicate missingLocation(string msg) {
+    exists(int c |
+      c =
+        strictcount(Node n |
+          not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn |
+            n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+          )
+        ) and
+      msg = "Nodes without location: " + c
+    )
+  }
+
+  query predicate uniqueNodeToString(Node n, string msg) {
+    exists(int c |
+      c = count(n.toString()) and
+      c != 1 and
+      msg = "Node should have one toString but has " + c + "."
+    )
+  }
+
+  query predicate missingToString(string msg) {
+    exists(int c |
+      c = strictcount(Node n | not exists(n.toString())) and
+      msg = "Nodes without toString: " + c
+    )
+  }
+
+  query predicate parameterCallable(ParameterNode p, string msg) {
+    exists(DataFlowCallable c | p.isParameterOf(c, _) and c != p.getEnclosingCallable()) and
+    msg = "Callable mismatch for parameter."
+  }
+
+  query predicate localFlowIsLocal(Node n1, Node n2, string msg) {
+    simpleLocalFlowStep(n1, n2) and
+    n1.getEnclosingCallable() != n2.getEnclosingCallable() and
+    msg = "Local flow step does not preserve enclosing callable."
+  }
+
+  private DataFlowType typeRepr() { result = getNodeType(_) }
+
+  query predicate compatibleTypesReflexive(DataFlowType t, string msg) {
+    t = typeRepr() and
+    not compatibleTypes(t, t) and
+    msg = "Type compatibility predicate is not reflexive."
+  }
+
+  query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) {
+    isUnreachableInCall(n, call) and
+    exists(DataFlowCallable c |
+      c = n.getEnclosingCallable() and
+      not viableCallable(call) = c
+    ) and
+    msg = "Call context for isUnreachableInCall is inconsistent with call graph."
+  }
+
+  query predicate localCallNodes(DataFlowCall call, Node n, string msg) {
+    (
+      n = getAnOutNode(call, _) and
+      msg = "OutNode and call does not share enclosing callable."
+      or
+      n.(ArgumentNode).argumentOf(call, _) and
+      msg = "ArgumentNode and call does not share enclosing callable."
+    ) and
+    n.getEnclosingCallable() != call.getEnclosingCallable()
+  }
+
+  query predicate postIsNotPre(PostUpdateNode n, string msg) {
+    n.getPreUpdateNode() = n and msg = "PostUpdateNode should not equal its pre-update node."
+  }
+
+  query predicate postHasUniquePre(PostUpdateNode n, string msg) {
+    exists(int c |
+      c = count(n.getPreUpdateNode()) and
+      c != 1 and
+      msg = "PostUpdateNode should have one pre-update node but has " + c + "."
+    )
+  }
+
+  query predicate uniquePostUpdate(Node n, string msg) {
+    1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and
+    msg = "Node has multiple PostUpdateNodes."
+  }
+
+  query predicate postIsInSameCallable(PostUpdateNode n, string msg) {
+    n.getEnclosingCallable() != n.getPreUpdateNode().getEnclosingCallable() and
+    msg = "PostUpdateNode does not share callable with its pre-update node."
+  }
+
+  private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) }
+
+  query predicate reverseRead(Node n, string msg) {
+    exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and
+    msg = "Origin of readStep is missing a PostUpdateNode."
+  }
+
+  query predicate storeIsPostUpdate(Node n, string msg) {
+    storeStep(_, _, n) and
+    not n instanceof PostUpdateNode and
+    msg = "Store targets should be PostUpdateNodes."
+  }
+
+  query predicate argHasPostUpdate(ArgumentNode n, string msg) {
+    not hasPost(n) and
+    not isImmutableOrUnobservable(n) and
+    msg = "ArgumentNode is missing PostUpdateNode."
+  }
+}
--- a/python/ql/src/experimental/dataflow/internal/DataFlowImplSpecific.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowImplSpecific.qll
@@ -0,0 +1,12 @@
+/**
+ * Provides Python-specific definitions for use in the data flow library.
+ */
+module Private {
+  import DataFlowPrivate
+//   import DataFlowDispatch
+}
+
+module Public {
+  import DataFlowPublic
+  import DataFlowUtil
+}
--- a/python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowPrivate.qll
@@ -0,0 +1,280 @@
+private import python
+private import DataFlowPublic
+
+//--------
+// Data flow graph
+//--------
+//--------
+// Nodes
+//--------
+/**
+ * A node associated with an object after an operation that might have
+ * changed its state.
+ *
+ * This can be either the argument to a callable after the callable returns
+ * (which might have mutated the argument), or the qualifier of a field after
+ * an update to the field.
+ *
+ * Nodes corresponding to AST elements, for example `ExprNode`, usually refer
+ * to the value before the update with the exception of `ObjectCreation`,
+ * which represents the value after the constructor has run.
+ */
+abstract class PostUpdateNode extends Node {
+  /** Gets the node before the state update. */
+  abstract Node getPreUpdateNode();
+}
+
+class DataFlowExpr = Expr;
+
+/**
+ * Flow between ESSA variables.
+ * This includes both local and global variables.
+ * Flow comes from definitions, uses and refinements.
+ */
+// TODO: Consider constraining `nodeFrom` and `nodeTo` to be in the same scope.
+module EssaFlow {
+  predicate essaFlowStep(Node nodeFrom, Node nodeTo) {
+    // Definition
+    //   `x = f(42)`
+    //   nodeFrom is `f(42)`, cfg node
+    //   nodeTo is `x`, essa var
+    nodeFrom.(CfgNode).getNode() =
+      nodeTo.(EssaNode).getVar().getDefinition().(AssignmentDefinition).getValue()
+    or
+    // With definition
+    //   `with f(42) as x:`
+    //   nodeFrom is `f(42)`, cfg node
+    //   nodeTo is `x`, essa var
+    exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
+      nodeFrom.(CfgNode).getNode() = contextManager and
+      nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
+      // see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
+      with.getContextExpr() = contextManager.getNode() and
+      with.getOptionalVars() = var.getNode() and
+      contextManager.strictlyDominates(var)
+    )
+    or
+    // Use
+    //   `y = 42`
+    //   `x = f(y)`
+    //   nodeFrom is `y` on first line, essa var
+    //   nodeTo is `y` on second line, cfg node
+    nodeFrom.(EssaNode).getVar().getAUse() = nodeTo.(CfgNode).getNode()
+    or
+    // Refinements
+    exists(EssaEdgeRefinement r |
+      nodeTo.(EssaNode).getVar() = r.getVariable() and
+      nodeFrom.(EssaNode).getVar() = r.getInput()
+    )
+    or
+    exists(EssaNodeRefinement r |
+      nodeTo.(EssaNode).getVar() = r.getVariable() and
+      nodeFrom.(EssaNode).getVar() = r.getInput()
+    )
+    or
+    exists(PhiFunction p |
+      nodeTo.(EssaNode).getVar() = p.getVariable() and
+      nodeFrom.(EssaNode).getVar() = p.getAnInput()
+    )
+  }
+}
+
+//--------
+// Local flow
+//--------
+/**
+ * This is the local flow predicate that is used as a building block in global
+ * data flow. It is a strict subset of the `localFlowStep` predicate, as it
+ * excludes SSA flow through instance fields.
+ */
+predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
+  not nodeFrom.(EssaNode).getVar() instanceof GlobalSsaVariable and
+  not nodeTo.(EssaNode).getVar() instanceof GlobalSsaVariable and
+  EssaFlow::essaFlowStep(nodeFrom, nodeTo)
+}
+
+// TODO: Make modules for these headings
+//--------
+// Global flow
+//--------
+/** Represents a callable */
+class DataFlowCallable = CallableValue;
+
+/** Represents a call to a callable */
+class DataFlowCall extends CallNode {
+  DataFlowCallable callable;
+
+  DataFlowCall() { this = callable.getACall() }
+
+  /** Get the callable to which this call goes. */
+  DataFlowCallable getCallable() { result = callable }
+
+  /** Gets the enclosing callable of this call. */
+  DataFlowCallable getEnclosingCallable() { result.getScope() = this.getNode().getScope() }
+}
+
+/** A data flow node that represents a call argument. */
+class ArgumentNode extends CfgNode {
+  ArgumentNode() { exists(DataFlowCall call, int pos | node = call.getArg(pos)) }
+
+  /** Holds if this argument occurs at the given position in the given call. */
+  predicate argumentOf(DataFlowCall call, int pos) { node = call.getArg(pos) }
+
+  /** Gets the call in which this node is an argument. */
+  final DataFlowCall getCall() { this.argumentOf(result, _) }
+}
+
+/** Gets a viable run-time target for the call `call`. */
+DataFlowCallable viableCallable(DataFlowCall call) { result = call.getCallable() }
+
+private newtype TReturnKind = TNormalReturnKind()
+
+/**
+ * A return kind. A return kind describes how a value can be returned
+ * from a callable. For Python, this is simply a method return.
+ */
+class ReturnKind extends TReturnKind {
+  /** Gets a textual representation of this element. */
+  string toString() { result = "return" }
+}
+
+/** A data flow node that represents a value returned by a callable. */
+class ReturnNode extends CfgNode {
+  Return ret;
+
+  // See `TaintTrackingImplementation::returnFlowStep`
+  ReturnNode() { node = ret.getValue().getAFlowNode() }
+
+  /** Gets the kind of this return node. */
+  ReturnKind getKind() { any() }
+
+  override DataFlowCallable getEnclosingCallable() {
+    result.getScope().getAStmt() = ret // TODO: check nested function definitions
+  }
+}
+
+/** A data flow node that represents the output of a call. */
+class OutNode extends CfgNode {
+  OutNode() { node instanceof CallNode }
+}
+
+/**
+ * Gets a node that can read the value returned from `call` with return kind
+ * `kind`.
+ */
+OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) {
+  call = result.getNode() and
+  kind = TNormalReturnKind()
+}
+
+//--------
+// Type pruning
+//--------
+newtype TDataFlowType = TAnyFlow()
+
+class DataFlowType extends TDataFlowType {
+  /** Gets a textual representation of this element. */
+  string toString() { result = "DataFlowType" }
+}
+
+/** A node that performs a type cast. */
+class CastNode extends Node {
+  CastNode() { none() }
+}
+
+/**
+ * Holds if `t1` and `t2` are compatible, that is, whether data can flow from
+ * a node of type `t1` to a node of type `t2`.
+ */
+pragma[inline]
+predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }
+
+/**
+ * Gets the type of `node`.
+ */
+DataFlowType getNodeType(Node node) { result = TAnyFlow() }
+
+/** Gets a string representation of a type returned by `getErasedRepr`. */
+string ppReprType(DataFlowType t) { none() }
+
+//--------
+// Extra flow
+//--------
+/**
+ * Holds if `pred` can flow to `succ`, by jumping from one callable to
+ * another. Additional steps specified by the configuration are *not*
+ * taken into account.
+ */
+predicate jumpStep(Node pred, Node succ) {
+  // As we have ESSA variables for global variables,
+  // we include ESSA flow steps involving global variables.
+  (
+    pred.(EssaNode).getVar() instanceof GlobalSsaVariable
+    or
+    succ.(EssaNode).getVar() instanceof GlobalSsaVariable
+  ) and
+  EssaFlow::essaFlowStep(pred, succ)
+}
+
+//--------
+// Field flow
+//--------
+/**
+ * Holds if data can flow from `node1` to `node2` via an assignment to
+ * content `c`.
+ */
+predicate storeStep(Node node1, Content c, Node node2) { none() }
+
+/**
+ * Holds if data can flow from `node1` to `node2` via a read of content `c`.
+ */
+predicate readStep(Node node1, Content c, Node node2) { none() }
+
+/**
+ * Holds if values stored inside content `c` are cleared at node `n`. For example,
+ * any value stored inside `f` is cleared at the pre-update node associated with `x`
+ * in `x.f = newValue`.
+ */
+cached
+predicate clearsContent(Node n, Content c) { none() }
+
+//--------
+// Fancy context-sensitive guards
+//--------
+/**
+ * Holds if the node `n` is unreachable when the call context is `call`.
+ */
+predicate isUnreachableInCall(Node n, DataFlowCall call) { none() }
+
+//--------
+// Virtual dispatch with call context
+//--------
+/**
+ * Gets a viable dispatch target of `call` in the context `ctx`. This is
+ * restricted to those `call`s for which a context might make a difference.
+ */
+DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { none() }
+
+/**
+ * Holds if the set of viable implementations that can be called by `call`
+ * might be improved by knowing the call context. This is the case if the qualifier accesses a parameter of
+ * the enclosing callable `c` (including the implicit `this` parameter).
+ */
+predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) { none() }
+
+//--------
+// Misc
+//--------
+/**
+ * Holds if `n` does not require a `PostUpdateNode` as it either cannot be
+ * modified or its modification cannot be observed, for example if it is a
+ * freshly created object that is not saved in a variable.
+ *
+ * This predicate is only used for consistency checks.
+ */
+predicate isImmutableOrUnobservable(Node n) { none() }
+
+int accessPathLimit() { result = 5 }
+
+/** Holds if `n` should be hidden from path explanations. */
+predicate nodeIsHidden(Node n) { none() }
--- a/python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowPublic.qll
@@ -0,0 +1,143 @@
+/**
+ * Provides Python-specific definitions for use in the data flow library.
+ */
+
+import python
+private import DataFlowPrivate
+
+/**
+ * IPA type for data flow nodes.
+ *
+ * Flow between SSA variables are computed in `Essa.qll`
+ *
+ * Flow from SSA variables to control flow nodes are generally via uses.
+ *
+ * Flow from control flow nodes to SSA variables are generally via assignments.
+ *
+ * The current implementation of these cross flows can be seen in `EssaTaintTracking`.
+ */
+newtype TNode =
+  /** A node corresponding to an SSA variable. */
+  TEssaNode(EssaVariable var) or
+  /** A node corresponding to a control flow node. */
+  TCfgNode(ControlFlowNode node)
+
+/**
+ * An element, viewed as a node in a data flow graph. Either an SSA variable
+ * (`EssaNode`) or a control flow node (`CfgNode`).
+ */
+class Node extends TNode {
+  /** Gets a textual representation of this element. */
+  string toString() { result = "Data flow node" }
+
+  /** Gets the scope of this node. */
+  Scope getScope() { none() }
+
+  /** Gets the enclosing callable of this node. */
+  DataFlowCallable getEnclosingCallable() { result.getScope() = this.getScope() }
+
+  /** Gets the location of this node */
+  Location getLocation() { none() }
+
+  /**
+   * Holds if this element is at the specified location.
+   * The location spans column `startcolumn` of line `startline` to
+   * column `endcolumn` of line `endline` in file `filepath`.
+   * For more information, see
+   * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
+   */
+  predicate hasLocationInfo(
+    string filepath, int startline, int startcolumn, int endline, int endcolumn
+  ) {
+    this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
+  }
+}
+
+class EssaNode extends Node, TEssaNode {
+  EssaVariable var;
+
+  EssaNode() { this = TEssaNode(var) }
+
+  EssaVariable getVar() { result = var }
+
+  /** Gets a textual representation of this element. */
+  override string toString() { result = var.toString() }
+
+  override Scope getScope() { result = var.getScope() }
+
+  override Location getLocation() { result = var.getDefinition().getLocation() }
+}
+
+class CfgNode extends Node, TCfgNode {
+  ControlFlowNode node;
+
+  CfgNode() { this = TCfgNode(node) }
+
+  ControlFlowNode getNode() { result = node }
+
+  /** Gets a textual representation of this element. */
+  override string toString() { result = node.toString() }
+
+  override Scope getScope() { result = node.getScope() }
+
+  override Location getLocation() { result = node.getLocation() }
+}
+
+/**
+ * An expression, viewed as a node in a data flow graph.
+ *
+ * Note that because of control-flow splitting, one `Expr` may correspond
+ * to multiple `ExprNode`s, just like it may correspond to multiple
+ * `ControlFlow::Node`s.
+ */
+class ExprNode extends Node { }
+
+/** Gets a node corresponding to expression `e`. */
+ExprNode exprNode(DataFlowExpr e) { none() }
+
+/**
+ * The value of a parameter at function entry, viewed as a node in a data
+ * flow graph.
+ */
+class ParameterNode extends EssaNode {
+  ParameterNode() { var instanceof ParameterDefinition }
+
+  /**
+   * Holds if this node is the parameter of callable `c` at the
+   * (zero-based) index `i`.
+   */
+  predicate isParameterOf(DataFlowCallable c, int i) {
+    var.(ParameterDefinition).getDefiningNode() = c.getParameter(i)
+  }
+
+  override DataFlowCallable getEnclosingCallable() { this.isParameterOf(result, _) }
+}
+
+/**
+ * A guard that validates some expression.
+ *
+ * To use this in a configuration, extend the class and provide a
+ * characteristic predicate precisely specifying the guard, and override
+ * `checks` to specify what is being validated and in which branch.
+ *
+ * It is important that all extending classes in scope are disjoint.
+ */
+class BarrierGuard extends Expr {
+  // /** Holds if this guard validates `e` upon evaluating to `v`. */
+  // abstract predicate checks(Expr e, AbstractValue v);
+  /** Gets a node guarded by this guard. */
+  final ExprNode getAGuardedNode() {
+    none()
+    // exists(Expr e, AbstractValue v |
+    //   this.checks(e, v) and
+    //   this.controlsNode(result.getControlFlowNode(), e, v)
+    // )
+  }
+}
+
+/**
+ * A reference contained in an object. This is either a field or a property.
+ */
+class Content extends string {
+  Content() { this = "Content" }
+}
--- a/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll
+++ b/python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll
@@ -0,0 +1,18 @@
+/**
+ * Contains utility functions for writing data flow queries
+ */
+
+import DataFlowPrivate
+import DataFlowPublic
+
+/**
+ * Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
+ * (intra-procedural) step.
+ */
+predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo) }
+
+/**
+ * Holds if data flows from `source` to `sink` in zero or more local
+ * (intra-procedural) steps.
+ */
+predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) }
--- a/python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll
+++ b/python/ql/src/experimental/dataflow/internal/TaintTrackingPrivate.qll
@@ -0,0 +1,21 @@
+private import python
+private import TaintTrackingPublic
+private import experimental.dataflow.DataFlow
+private import experimental.dataflow.internal.DataFlowPrivate
+
+/**
+ * Holds if `node` should be a barrier in all global taint flow configurations
+ * but not in local taint.
+ */
+predicate defaultTaintBarrier(DataFlow::Node node) { none() }
+
+/**
+ * Holds if the additional step from `pred` to `succ` should be included in all
+ * global taint flow configurations.
+ */
+predicate defaultAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
+  none()
+  // localAdditionalTaintStep(pred, succ)
+  // or
+  // succ = pred.(DataFlow::NonLocalJumpNode).getAJumpSuccessor(false)
+}
--- a/python/ql/src/experimental/dataflow/internal/TaintTrackingPublic.qll
+++ b/python/ql/src/experimental/dataflow/internal/TaintTrackingPublic.qll
@@ -0,0 +1,36 @@
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) taint-tracking analyses.
+ */
+
+private import python
+private import TaintTrackingPrivate
+private import experimental.dataflow.DataFlow
+
+// /**
+//  * Holds if taint propagates from `source` to `sink` in zero or more local
+//  * (intra-procedural) steps.
+//  */
+// predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) }
+
+// // /**
+// //  * Holds if taint can flow from `e1` to `e2` in zero or more
+// //  * local (intra-procedural) steps.
+// //  */
+// // predicate localExprTaint(Expr e1, Expr e2) {
+// //   localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2))
+// // }
+
+// // /** A member (property or field) that is tainted if its containing object is tainted. */
+// // abstract class TaintedMember extends AssignableMember { }
+
+// /**
+//  * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
+//  * (intra-procedural) step.
+//  */
+// predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+//   // Ordinary data flow
+//   DataFlow::localFlowStep(nodeFrom, nodeTo)
+//   or
+//   localAdditionalTaintStep(nodeFrom, nodeTo)
+// }
--- a/python/ql/src/experimental/dataflow/internal/readme.md
+++ b/python/ql/src/experimental/dataflow/internal/readme.md
@@ -0,0 +1,138 @@
+# Using the shared dataflow library
+
+## File organisation
+
+The files currently live in `experimental` (whereas the existing implementation lives in `semmle\python\dataflow`).
+
+In there is found `DataFlow.qll`, `DataFlow2.qll` etc. which refer to `internal\DataFlowImpl`, `internal\DataFlowImpl2` etc. respectively. The `DataFlowImplN`-files are all identical copies to avoid mutual recursion. They start off by including two files `internal\DataFlowImplCommon` and `internal\DataFlowImplSpecific`. The former contains all the language-agnostic definitions, while the latter is where we describe our favorite language. `Sepcific` simply forwards to two other files `internal\DataFlowPrivate.qll` and `internal\DataFlowPublic.qll`. Definitions in the former will be hidden behind a `private` modifier, while those in the latter can be referred to in data flow queries. For instance, the definition of `DataFlow::Node` should likely be in `DataFlowPublic.qll`.
+
+## Define the dataflow graph
+
+In order to use the dataflow library, we need to define the dataflow graph,
+that is define the nodes and the edges.
+
+### Define the nodes
+
+The nodes are defined in the type `DataFlow::Node` (found in `DataFlowPublic.qll`).
+This should likely be an IPA type, so we can extend it as needed.
+
+Typical cases needed to construct the call graph include
+ - argument node
+ - parameter node
+ - return node
+
+Typical extensions include
+ - postupdate nodes
+ - implicit `this`-nodes
+
+### Define the edges
+
+The edges split into local flow (within a function) and global flow (the call graph, between functions/procedures).
+
+Extra flow, such as reading from and writing to global variables, can be captured in `jumpStep`.
+The local flow should be obtainalble from an SSA computation.
+Local flow nodes are generally either control flow nodes or SSA variables.
+Flow from control flow nodes to SSA variables comes from SSA variable definitions, while flow from SSA variables to control flow nodes comes from def-use pairs.
+
+The global flow should be obtainable from a `PointsTo` analysis. It is specified via `viableCallable` and
+`getAnOutNode`. Consider making `ReturnKind` a singleton IPA type as in java.
+
+Global flow includes local flow within a consistent call context. Thus, for local flow to count as global flow, all relevant nodes should implement `getEnclosingCallable`.
+
+If complicated dispatch needs to be modelled, try using the `[reduced|pruned]viable*` predicates.
+
+## Field flow
+
+To track flow through fields we need to provide a model of fields, that is the `Content` class.
+
+Field access is specified via `read_step` and `store_step`.
+
+Work is being done to make field flow handle lists and dictionaries and the like.
+
+`PostUpdateNode`s become important when field flow is used, as they track modifications to fields resulting from function calls.
+
+## Type pruning
+
+If type information is available, flows can be discarded on the grounds of type mismatch.
+
+Tracked types are given by the class `DataFlowType` and the predicate `getTypeBound`, and compatibility is recorded in the predicate `compatibleTypes`.
+If type pruning is not used, `compatibleTypes` should be implemented as `any`; if it is implemented, say, as `none`, all flows will be pruned.
+
+Further, possible casts are given by the class `CastNode`.
+
+---
+
+# Plan
+
+## Stage I, data flow
+
+### Phase 0, setup
+Define minimal IPA type for `DataFlow::Node`
+Define all required predicates empty (via `none()`),
+except `compatibleTypes` which should be `any()`.
+Define `ReturnKind`, `DataFlowType`, and `Content` as singleton IPA types.
+
+
+### Phase 1, local flow
+Implement `simpleLocalFlowStep` based on the existing SSA computation
+
+### Phase 2, local flow
+Implement `viableCallable` and `getAnOutNode` based on the existing predicate `PointsTo`.
+
+### Phase 3, field flow
+Redefine `Content` and implement `read_step` and `store_step`.
+
+Review use of post-update nodes.
+
+### Phase 4, type pruning
+Use type trackers to obtain relevant type information and redefine `DataFlowType` to contain appropriate cases. Record the type information in `getTypeBound`.
+
+Implement `compatibleTypes` (perhaps simply as the identity).
+
+If necessary, re-implement `getErasedRepr` and `ppReprType`.
+
+If necessary, redefine `CastNode`.
+
+### Phase 5, bonus
+Review possible use of `[reduced|pruned]viable*` predicates.
+
+Review need for more elaborate `ReturnKind`.
+
+Review need for non-empty `jumpStep`.
+
+Review need for non-empty `isUnreachableInCall`.
+
+## Stage II, taint tracking
+
+# Phase 0, setup
+Implement all predicates empty.
+
+# Phase 1, experiments
+Try recovering an existing taint tracking query by implementing sources, sinks, sanitizers, and barriers.
+
+---
+
+# Status
+
+## Achieved
+
+- Copy of shared library; implemented enough predicates to make it compile.
+- Simple flow into, out of, and through functions.
+- Some tests, in particular a sceleton for something comprehensive.
+
+## TODO
+
+- Implementation has largely been done by finding a plausibly-sounding predicate in the python library to refer to. We should review that we actually have the intended semantics in all places.
+- Comprehensive testing.
+- The regression tests track the value of guards in order to eliminate impossible data flow. We currently have regressions because of this. We cannot readily replicate the existing method, as it uses the interdefinedness of data flow and taint tracking (there is a boolean taint kind). C++ [does something similar](https://github.com/github/codeql/blob/master/cpp/ql/src/semmle/code/cpp/controlflow/internal/ConstantExprs.qll#L27-L36) for eliminating impossible control flow, which we might be able to replicate (they infer values of "interesting" control flow nodes, which are those needed to determine values of guards).
+- Flow for some syntactic constructs are done via extra taint steps in the existing implementation, we should find a way to get data flow for it. Some of this should be covered by field flow.
+- A document is being written about proper use of the shared data flow library, this should be adhered to. In particular, we should consider replacing def-use with def-to-first-use and use-to-next-use in local flow.
+- We seem to get duplicated results for global flow, as well as flow with and without type (so four times the "unique" results).
+- We currently consider control flow nodes like exit nodes for functions, we should probably filter down which ones are of interest.
+- We should probably override ToString for a number of data flow nodes.
+- Test flow through classes, constructors and methods.
+- What happens with named arguments? What does C# do?
+- What should the enclosable callable for global variables be? C++ [makes it the variable itself](https://github.com/github/codeql/blob/master/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll#L417), C# seems to not have nodes for these but only for their reads and writes.
+- Is `yield` another return type? If not, how is it handled?
+- Should `OutNode` include magic function calls?
+- Consider creating an internal abstract class for nodes as C# does. Among other things, this can help the optimizer by stating that `getEnclosingCallable` [is functional](https://github.com/github/codeql/blob/master/csharp/ql/src/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll#L62).
--- a/python/ql/src/experimental/dataflow/internal/tainttracking1/TaintTrackingImpl.qll
+++ b/python/ql/src/experimental/dataflow/internal/tainttracking1/TaintTrackingImpl.qll
@@ -0,0 +1,115 @@
+/**
+ * Provides an implementation of global (interprocedural) taint tracking.
+ * This file re-exports the local (intraprocedural) taint-tracking analysis
+ * from `TaintTrackingParameter::Public` and adds a global analysis, mainly
+ * exposed through the `Configuration` class. For some languages, this file
+ * exists in several identical copies, allowing queries to use multiple
+ * `Configuration` classes that depend on each other without introducing
+ * mutual recursion among those configurations.
+ */
+
+import TaintTrackingParameter::Public
+private import TaintTrackingParameter::Private
+
+/**
+ * A configuration of interprocedural taint tracking analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the taint tracking library must define its own unique extension of
+ * this abstract class.
+ *
+ * A taint-tracking configuration is a special data flow configuration
+ * (`DataFlow::Configuration`) that allows for flow through nodes that do not
+ * necessarily preserve values but are still relevant from a taint tracking
+ * perspective. (For example, string concatenation, where one of the operands
+ * is tainted.)
+ *
+ * To create a configuration, extend this class with a subclass whose
+ * characteristic predicate is a unique singleton string. For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends TaintTracking::Configuration {
+ *   MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ *   // Override `isSource` and `isSink`.
+ *   // Optionally override `isSanitizer`.
+ *   // Optionally override `isSanitizerIn`.
+ *   // Optionally override `isSanitizerOut`.
+ *   // Optionally override `isSanitizerGuard`.
+ *   // Optionally override `isAdditionalTaintStep`.
+ * }
+ * ```
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but it is unsupported to depend on
+ * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
+ * overridden predicates that define sources, sinks, or additional steps.
+ * Instead, the dependency should go to a `TaintTracking2::Configuration` or a
+ * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
+ */
+abstract class Configuration extends DataFlow::Configuration {
+  bindingset[this]
+  Configuration() { any() }
+
+  /**
+   * Holds if `source` is a relevant taint source.
+   *
+   * The smaller this predicate is, the faster `hasFlow()` will converge.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  abstract override predicate isSource(DataFlow::Node source);
+
+  /**
+   * Holds if `sink` is a relevant taint sink.
+   *
+   * The smaller this predicate is, the faster `hasFlow()` will converge.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  abstract override predicate isSink(DataFlow::Node sink);
+
+  /** Holds if the node `node` is a taint sanitizer. */
+  predicate isSanitizer(DataFlow::Node node) { none() }
+
+  final override predicate isBarrier(DataFlow::Node node) {
+    isSanitizer(node) or
+    defaultTaintBarrier(node)
+  }
+
+  /** Holds if data flow into `node` is prohibited. */
+  predicate isSanitizerIn(DataFlow::Node node) { none() }
+
+  final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
+
+  /** Holds if data flow out of `node` is prohibited. */
+  predicate isSanitizerOut(DataFlow::Node node) { none() }
+
+  final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
+
+  /** Holds if data flow through nodes guarded by `guard` is prohibited. */
+  predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
+
+  final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
+
+  /**
+   * Holds if the additional taint propagation step from `node1` to `node2`
+   * must be taken into account in the analysis.
+   */
+  predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
+
+  final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
+    isAdditionalTaintStep(node1, node2) or
+    defaultAdditionalTaintStep(node1, node2)
+  }
+
+  /**
+   * Holds if taint may flow from `source` to `sink` for this configuration.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
+    super.hasFlow(source, sink)
+  }
+}
--- a/python/ql/src/experimental/dataflow/internal/tainttracking1/TaintTrackingParameter.qll
+++ b/python/ql/src/experimental/dataflow/internal/tainttracking1/TaintTrackingParameter.qll
@@ -0,0 +1,6 @@
+import experimental.dataflow.internal.TaintTrackingPublic as Public
+
+module Private {
+  import experimental.dataflow.DataFlow::DataFlow as DataFlow
+  import experimental.dataflow.internal.TaintTrackingPrivate
+}
--- a/python/ql/test/experimental/dataflow/basic/allFlowsConfig.qll
+++ b/python/ql/test/experimental/dataflow/basic/allFlowsConfig.qll
@@ -0,0 +1,13 @@
+import experimental.dataflow.DataFlow
+
+/**
+ * A configuration to find all flows.
+ * To be used on tiny programs.
+ */
+class AllFlowsConfig extends DataFlow::Configuration {
+  AllFlowsConfig() { this = "AllFlowsConfig" }
+
+  override predicate isSource(DataFlow::Node node) { any() }
+
+  override predicate isSink(DataFlow::Node node) { any() }
+}
--- a/python/ql/test/experimental/dataflow/basic/callGraph.expected
+++ b/python/ql/test/experimental/dataflow/basic/callGraph.expected
@@ -0,0 +1,3 @@
+| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | SSA variable x |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
--- a/python/ql/test/experimental/dataflow/basic/callGraph.ql
+++ b/python/ql/test/experimental/dataflow/basic/callGraph.ql
@@ -0,0 +1,9 @@
+import callGraphConfig
+
+from
+  DataFlow::Node source,
+  DataFlow::Node sink
+where
+  exists(CallGraphConfig cfg | cfg.hasFlow(source, sink))
+select
+  source, sink
--- a/python/ql/test/experimental/dataflow/basic/callGraphConfig.qll
+++ b/python/ql/test/experimental/dataflow/basic/callGraphConfig.qll
@@ -0,0 +1,20 @@
+import experimental.dataflow.DataFlow
+
+/**
+ * A configuration to find the call graph edges. 
+ */
+class CallGraphConfig extends DataFlow::Configuration {
+  CallGraphConfig() { this = "CallGraphConfig" }
+
+  override predicate isSource(DataFlow::Node node) {
+    node instanceof DataFlow::ReturnNode
+    or
+    node instanceof DataFlow::ArgumentNode
+  }
+
+  override predicate isSink(DataFlow::Node node) {
+    node instanceof DataFlow::OutNode
+    or
+    node instanceof DataFlow::ParameterNode
+  }
+}
--- a/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected
+++ b/python/ql/test/experimental/dataflow/basic/callGraphSinks.expected
@@ -0,0 +1,2 @@
+| test.py:1:19:1:19 | SSA variable x |
+| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
--- a/python/ql/test/experimental/dataflow/basic/callGraphSinks.ql
+++ b/python/ql/test/experimental/dataflow/basic/callGraphSinks.ql
@@ -0,0 +1,5 @@
+import callGraphConfig
+
+from DataFlow::Node sink
+where exists(CallGraphConfig cfg | cfg.isSink(sink))
+select sink
--- a/python/ql/test/experimental/dataflow/basic/callGraphSources.expected
+++ b/python/ql/test/experimental/dataflow/basic/callGraphSources.expected
@@ -0,0 +1,2 @@
+| test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:7:19:7:19 | ControlFlowNode for a |
--- a/python/ql/test/experimental/dataflow/basic/callGraphSources.ql
+++ b/python/ql/test/experimental/dataflow/basic/callGraphSources.ql
@@ -0,0 +1,5 @@
+import callGraphConfig
+
+from DataFlow::Node source
+where exists(CallGraphConfig cfg | cfg.isSource(source))
+select source
--- a/python/ql/test/experimental/dataflow/basic/global.expected
+++ b/python/ql/test/experimental/dataflow/basic/global.expected
@@ -0,0 +1,95 @@
+| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:0:0:0:0 | GSSA Variable b | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:0:0:0:0 | GSSA Variable b | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
+| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:1:19:1:19 | SSA variable x | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
+| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
+| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
+| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:1:19:1:19 | SSA variable x | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:1:19:1:19 | SSA variable x | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:2:3:2:3 | SSA variable y | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
+| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:2:3:2:3 | SSA variable y | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:2:3:2:3 | SSA variable y | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:3:3:3:3 | SSA variable z | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:3:3:3:3 | SSA variable z | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:3:3:3:3 | SSA variable z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:4:10:4:10 | ControlFlowNode for z | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:1:19:1:19 | SSA variable x |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:2:3:2:3 | SSA variable y |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:2:7:2:7 | ControlFlowNode for x |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:3:3:3:3 | SSA variable z |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:7:19:7:19 | ControlFlowNode for a |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:1:19:1:19 | SSA variable x |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:2:3:2:3 | SSA variable y |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:2:7:2:7 | ControlFlowNode for x |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:3:3:3:3 | SSA variable z |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:5:7:20 | GSSA Variable a |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a |
+| test.py:7:1:7:1 | GSSA Variable b | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:7:5:7:20 | GSSA Variable a | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | SSA variable x |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:2:3:2:3 | SSA variable y |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:2:7:2:7 | ControlFlowNode for x |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:3:3:3:3 | SSA variable z |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
--- a/python/ql/test/experimental/dataflow/basic/global.ql
+++ b/python/ql/test/experimental/dataflow/basic/global.ql
@@ -0,0 +1,10 @@
+import allFlowsConfig
+
+from
+  DataFlow::Node source,
+  DataFlow::Node sink
+where
+  source != sink and 
+  exists(AllFlowsConfig cfg | cfg.hasFlow(source, sink))
+select
+  source, sink
--- a/python/ql/test/experimental/dataflow/basic/globalStep.expected
+++ b/python/ql/test/experimental/dataflow/basic/globalStep.expected
@@ -0,0 +1,118 @@
+| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:0:0:0:0 | GSSA Variable b | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:0:0:0:0 | GSSA Variable b | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
+| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
+| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
+| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
+| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
+| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
+| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
+| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
+| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
+| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
+| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
+| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
+| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
+| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
+| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
+| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
+| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
+| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
+| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:7:19:7:19 | ControlFlowNode for a |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:7:19:7:19 | ControlFlowNode for a |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a |
+| test.py:7:1:7:1 | GSSA Variable b | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:7:1:7:1 | GSSA Variable b | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:7:5:7:20 | GSSA Variable a | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:7:5:7:20 | GSSA Variable a | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | SSA variable x |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | SSA variable x |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
--- a/python/ql/test/experimental/dataflow/basic/globalStep.ql
+++ b/python/ql/test/experimental/dataflow/basic/globalStep.ql
@@ -0,0 +1,9 @@
+import allFlowsConfig
+
+from
+  DataFlow::PathNode fromNode,
+  DataFlow::PathNode toNode
+where
+  toNode = fromNode.getASuccessor()
+select
+  fromNode, toNode
--- a/python/ql/test/experimental/dataflow/basic/local.expected
+++ b/python/ql/test/experimental/dataflow/basic/local.expected
@@ -0,0 +1,51 @@
+| test.py:0:0:0:0 | Entry node for Module test | test.py:0:0:0:0 | Entry node for Module test |
+| test.py:0:0:0:0 | Exit node for Module test | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ |
+| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ |
+| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |
+| test.py:0:0:0:0 | SSA variable $ | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:0:0:0:0 | SSA variable $ | test.py:0:0:0:0 | SSA variable $ |
+| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
+| test.py:1:1:1:21 | Entry node for Function obfuscated_id | test.py:1:1:1:21 | Entry node for Function obfuscated_id |
+| test.py:1:1:1:21 | Exit node for Function obfuscated_id | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
+| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
+| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | ControlFlowNode for x |
+| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:1:19:1:19 | SSA variable x | test.py:1:19:1:19 | SSA variable x |
+| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
+| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
+| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
+| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:2:3:2:3 | ControlFlowNode for y | test.py:2:3:2:3 | ControlFlowNode for y |
+| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:2:3:2:3 | SSA variable y | test.py:2:3:2:3 | SSA variable y |
+| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
+| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:3:3:3:3 | ControlFlowNode for z | test.py:3:3:3:3 | ControlFlowNode for z |
+| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:3:3:3:3 | SSA variable z | test.py:3:3:3:3 | SSA variable z |
+| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:4:3:4:10 | ControlFlowNode for Return | test.py:4:3:4:10 | ControlFlowNode for Return |
+| test.py:4:10:4:10 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:6:1:6:1 | ControlFlowNode for a | test.py:6:1:6:1 | ControlFlowNode for a |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:6:1:6:1 | GSSA Variable a |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral |
+| test.py:7:1:7:1 | ControlFlowNode for b | test.py:7:1:7:1 | ControlFlowNode for b |
+| test.py:7:1:7:1 | GSSA Variable b | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:7:5:7:20 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
+| test.py:7:19:7:19 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a |
--- a/python/ql/test/experimental/dataflow/basic/local.ql
+++ b/python/ql/test/experimental/dataflow/basic/local.ql
@@ -0,0 +1,9 @@
+import experimental.dataflow.DataFlow
+
+from
+  DataFlow::Node fromNode,
+  DataFlow::Node toNode
+where
+  DataFlow::localFlow(fromNode, toNode)
+select
+  fromNode, toNode
--- a/python/ql/test/experimental/dataflow/basic/localStep.expected
+++ b/python/ql/test/experimental/dataflow/basic/localStep.expected
@@ -0,0 +1,9 @@
+| test.py:0:0:0:0 | SSA variable $ | test.py:0:0:0:0 | Exit node for Module test |
+| test.py:1:19:1:19 | SSA variable x | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
+| test.py:2:3:2:3 | SSA variable y | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
+| test.py:3:3:3:3 | SSA variable z | test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
--- a/python/ql/test/experimental/dataflow/basic/localStep.ql
+++ b/python/ql/test/experimental/dataflow/basic/localStep.ql
@@ -0,0 +1,9 @@
+import experimental.dataflow.DataFlow
+
+from
+  DataFlow::Node fromNode,
+  DataFlow::Node toNode
+where
+  DataFlow::localFlowStep(fromNode, toNode)
+select
+  fromNode, toNode
--- a/python/ql/test/experimental/dataflow/basic/maximalFlows.expected
+++ b/python/ql/test/experimental/dataflow/basic/maximalFlows.expected
@@ -0,0 +1,13 @@
+| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:0:0:0:0 | GSSA Variable b | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:1:19:1:19 | SSA variable x | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:2:3:2:3 | SSA variable y | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:3:3:3:3 | SSA variable z | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:7:1:7:1 | GSSA Variable b |
+| test.py:6:1:6:1 | GSSA Variable a | test.py:7:5:7:20 | GSSA Variable a |
--- a/python/ql/test/experimental/dataflow/basic/maximalFlows.ql
+++ b/python/ql/test/experimental/dataflow/basic/maximalFlows.ql
@@ -0,0 +1,10 @@
+import maximalFlowsConfig
+
+from
+  DataFlow::Node source,
+  DataFlow::Node sink
+where
+  source != sink and 
+  exists(MaximalFlowsConfig cfg | cfg.hasFlow(source, sink))
+select
+  source, sink
--- a/python/ql/test/experimental/dataflow/basic/maximalFlowsConfig.qll
+++ b/python/ql/test/experimental/dataflow/basic/maximalFlowsConfig.qll
@@ -0,0 +1,25 @@
+import experimental.dataflow.DataFlow
+
+/**
+ * A configuration to find all "maximal" flows.
+ * To be used on small programs.
+ */
+class MaximalFlowsConfig extends DataFlow::Configuration {
+  MaximalFlowsConfig() { this = "AllFlowsConfig" }
+
+  override predicate isSource(DataFlow::Node node) {
+    node instanceof DataFlow::ParameterNode
+    or
+    node instanceof DataFlow::EssaNode and
+    not exists(DataFlow::EssaNode pred |
+      DataFlow::localFlowStep(pred, node)
+    )
+  }
+
+  override predicate isSink(DataFlow::Node node) {
+    node instanceof DataFlow::ReturnNode
+    or
+    node instanceof DataFlow::EssaNode and
+    not exists(node.(DataFlow::EssaNode).getVar().getASourceUse())
+  }
+}
--- a/python/ql/test/experimental/dataflow/basic/sinks.expected
+++ b/python/ql/test/experimental/dataflow/basic/sinks.expected
@@ -0,0 +1,30 @@
+| test.py:0:0:0:0 | Entry node for Module test |
+| test.py:0:0:0:0 | Exit node for Module test |
+| test.py:0:0:0:0 | GSSA Variable __name__ |
+| test.py:0:0:0:0 | GSSA Variable __package__ |
+| test.py:0:0:0:0 | GSSA Variable b |
+| test.py:0:0:0:0 | SSA variable $ |
+| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
+| test.py:1:1:1:21 | Entry node for Function obfuscated_id |
+| test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
+| test.py:1:5:1:17 | GSSA Variable obfuscated_id |
+| test.py:1:19:1:19 | ControlFlowNode for x |
+| test.py:1:19:1:19 | SSA variable x |
+| test.py:2:3:2:3 | ControlFlowNode for y |
+| test.py:2:3:2:3 | SSA variable y |
+| test.py:2:7:2:7 | ControlFlowNode for x |
+| test.py:3:3:3:3 | ControlFlowNode for z |
+| test.py:3:3:3:3 | SSA variable z |
+| test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:4:3:4:10 | ControlFlowNode for Return |
+| test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:6:1:6:1 | ControlFlowNode for a |
+| test.py:6:1:6:1 | GSSA Variable a |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral |
+| test.py:7:1:7:1 | ControlFlowNode for b |
+| test.py:7:1:7:1 | GSSA Variable b |
+| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:7:5:7:20 | GSSA Variable a |
+| test.py:7:19:7:19 | ControlFlowNode for a |
--- a/python/ql/test/experimental/dataflow/basic/sinks.ql
+++ b/python/ql/test/experimental/dataflow/basic/sinks.ql
@@ -0,0 +1,5 @@
+import allFlowsConfig
+
+from DataFlow::Node sink
+where exists(AllFlowsConfig cfg | cfg.isSink(sink))
+select sink
--- a/python/ql/test/experimental/dataflow/basic/sources.expected
+++ b/python/ql/test/experimental/dataflow/basic/sources.expected
@@ -0,0 +1,30 @@
+| test.py:0:0:0:0 | Entry node for Module test |
+| test.py:0:0:0:0 | Exit node for Module test |
+| test.py:0:0:0:0 | GSSA Variable __name__ |
+| test.py:0:0:0:0 | GSSA Variable __package__ |
+| test.py:0:0:0:0 | GSSA Variable b |
+| test.py:0:0:0:0 | SSA variable $ |
+| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
+| test.py:1:1:1:21 | Entry node for Function obfuscated_id |
+| test.py:1:1:1:21 | Exit node for Function obfuscated_id |
+| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id |
+| test.py:1:5:1:17 | GSSA Variable obfuscated_id |
+| test.py:1:19:1:19 | ControlFlowNode for x |
+| test.py:1:19:1:19 | SSA variable x |
+| test.py:2:3:2:3 | ControlFlowNode for y |
+| test.py:2:3:2:3 | SSA variable y |
+| test.py:2:7:2:7 | ControlFlowNode for x |
+| test.py:3:3:3:3 | ControlFlowNode for z |
+| test.py:3:3:3:3 | SSA variable z |
+| test.py:3:7:3:7 | ControlFlowNode for y |
+| test.py:4:3:4:10 | ControlFlowNode for Return |
+| test.py:4:10:4:10 | ControlFlowNode for z |
+| test.py:6:1:6:1 | ControlFlowNode for a |
+| test.py:6:1:6:1 | GSSA Variable a |
+| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral |
+| test.py:7:1:7:1 | ControlFlowNode for b |
+| test.py:7:1:7:1 | GSSA Variable b |
+| test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
+| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
+| test.py:7:5:7:20 | GSSA Variable a |
+| test.py:7:19:7:19 | ControlFlowNode for a |
--- a/python/ql/test/experimental/dataflow/basic/sources.ql
+++ b/python/ql/test/experimental/dataflow/basic/sources.ql
@@ -0,0 +1,5 @@
+import allFlowsConfig
+
+from DataFlow::Node source
+where exists(AllFlowsConfig cfg | cfg.isSource(source))
+select source
--- a/python/ql/test/experimental/dataflow/basic/test.py
+++ b/python/ql/test/experimental/dataflow/basic/test.py
@@ -0,0 +1,7 @@
+def obfuscated_id(x):
+  y = x
+  z = y
+  return z
+
+a = 42
+b = obfuscated_id(a)
--- a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected
+++ b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected
@@ -0,0 +1,103 @@
+uniqueEnclosingCallable
+| test.py:0:0:0:0 | Exit node for Module test | Node should have one enclosing callable but has 0. |
+| test.py:0:0:0:0 | GSSA Variable __name__ | Node should have one enclosing callable but has 0. |
+| test.py:0:0:0:0 | GSSA Variable __package__ | Node should have one enclosing callable but has 0. |
+| test.py:0:0:0:0 | GSSA Variable test23 | Node should have one enclosing callable but has 0. |
+| test.py:0:0:0:0 | GSSA Variable test24 | Node should have one enclosing callable but has 0. |
+| test.py:0:0:0:0 | GSSA Variable test_truth | Node should have one enclosing callable but has 0. |
+| test.py:0:0:0:0 | GSSA Variable test_update_extend | Node should have one enclosing callable but has 0. |
+| test.py:0:0:0:0 | SSA variable $ | Node should have one enclosing callable but has 0. |
+| test.py:6:1:6:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:6:5:6:9 | GSSA Variable test1 | Node should have one enclosing callable but has 0. |
+| test.py:9:1:9:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:9:5:9:9 | GSSA Variable test2 | Node should have one enclosing callable but has 0. |
+| test.py:13:1:13:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:13:5:13:10 | GSSA Variable source | Node should have one enclosing callable but has 0. |
+| test.py:16:1:16:14 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:16:5:16:8 | GSSA Variable sink | Node should have one enclosing callable but has 0. |
+| test.py:19:1:19:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:19:5:19:9 | GSSA Variable test3 | Node should have one enclosing callable but has 0. |
+| test.py:23:1:23:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:23:5:23:9 | GSSA Variable test4 | Node should have one enclosing callable but has 0. |
+| test.py:27:1:27:12 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:27:5:27:9 | GSSA Variable test5 | Node should have one enclosing callable but has 0. |
+| test.py:31:1:31:16 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:31:5:31:9 | GSSA Variable test6 | Node should have one enclosing callable but has 0. |
+| test.py:39:1:39:16 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:39:5:39:9 | GSSA Variable test7 | Node should have one enclosing callable but has 0. |
+| test.py:47:1:47:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:47:5:47:11 | GSSA Variable source2 | Node should have one enclosing callable but has 0. |
+| test.py:50:1:50:15 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:50:5:50:9 | GSSA Variable sink2 | Node should have one enclosing callable but has 0. |
+| test.py:53:1:53:21 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:53:5:53:9 | GSSA Variable sink3 | Node should have one enclosing callable but has 0. |
+| test.py:57:1:57:16 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:57:5:57:9 | GSSA Variable test8 | Node should have one enclosing callable but has 0. |
+| test.py:62:1:62:16 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:62:5:62:9 | GSSA Variable test9 | Node should have one enclosing callable but has 0. |
+| test.py:69:1:69:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:69:5:69:10 | GSSA Variable test10 | Node should have one enclosing callable but has 0. |
+| test.py:76:1:76:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:76:5:76:7 | GSSA Variable hub | Node should have one enclosing callable but has 0. |
+| test.py:79:1:79:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:79:5:79:10 | GSSA Variable test11 | Node should have one enclosing callable but has 0. |
+| test.py:84:1:84:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:84:5:84:10 | GSSA Variable test12 | Node should have one enclosing callable but has 0. |
+| test.py:89:8:89:13 | ControlFlowNode for ImportExpr | Node should have one enclosing callable but has 0. |
+| test.py:89:8:89:13 | GSSA Variable module | Node should have one enclosing callable but has 0. |
+| test.py:91:1:91:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:91:5:91:10 | GSSA Variable test13 | Node should have one enclosing callable but has 0. |
+| test.py:95:1:95:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:95:5:95:10 | GSSA Variable test14 | Node should have one enclosing callable but has 0. |
+| test.py:99:1:99:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:99:5:99:10 | GSSA Variable test15 | Node should have one enclosing callable but has 0. |
+| test.py:103:1:103:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:103:5:103:10 | GSSA Variable test16 | Node should have one enclosing callable but has 0. |
+| test.py:108:1:108:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:108:5:108:10 | GSSA Variable test20 | Node should have one enclosing callable but has 0. |
+| test.py:118:1:118:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:118:5:118:10 | GSSA Variable test21 | Node should have one enclosing callable but has 0. |
+| test.py:128:1:128:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:128:5:128:10 | GSSA Variable test22 | Node should have one enclosing callable but has 0. |
+| test.py:139:20:139:38 | ControlFlowNode for ImportMember | Node should have one enclosing callable but has 0. |
+| test.py:139:33:139:38 | GSSA Variable unsafe | Node should have one enclosing callable but has 0. |
+| test.py:140:1:140:12 | ControlFlowNode for SINK() | Node should have one enclosing callable but has 0. |
+| test.py:140:1:140:12 | GSSA Variable unsafe | Node should have one enclosing callable but has 0. |
+| test.py:140:6:140:11 | ControlFlowNode for unsafe | Node should have one enclosing callable but has 0. |
+| test.py:142:1:142:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:142:5:142:10 | GSSA Variable test23 | Node should have one enclosing callable but has 0. |
+| test.py:146:1:146:13 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:146:5:146:10 | GSSA Variable test24 | Node should have one enclosing callable but has 0. |
+| test.py:151:1:151:29 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:151:5:151:22 | GSSA Variable test_update_extend | Node should have one enclosing callable but has 0. |
+| test.py:161:1:161:17 | ControlFlowNode for FunctionExpr | Node should have one enclosing callable but has 0. |
+| test.py:161:5:161:14 | GSSA Variable test_truth | Node should have one enclosing callable but has 0. |
+uniqueType
+uniqueNodeLocation
+missingLocation
+uniqueNodeToString
+missingToString
+parameterCallable
+localFlowIsLocal
+compatibleTypesReflexive
+unreachableNodeCCtx
+localCallNodes
+postIsNotPre
+postHasUniquePre
+uniquePostUpdate
+postIsInSameCallable
+reverseRead
+storeIsPostUpdate
+argHasPostUpdate
+| test.py:25:10:25:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
+| test.py:29:10:29:10 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
+| test.py:48:19:48:21 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
+| test.py:51:10:51:12 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
+| test.py:55:14:55:16 | ControlFlowNode for arg | ArgumentNode is missing PostUpdateNode. |
+| test.py:59:11:59:11 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
+| test.py:67:11:67:14 | ControlFlowNode for cond | ArgumentNode is missing PostUpdateNode. |
+| test.py:67:17:67:17 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
+| test.py:74:11:74:14 | ControlFlowNode for cond | ArgumentNode is missing PostUpdateNode. |
+| test.py:74:17:74:17 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
+| test.py:81:13:81:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
+| test.py:86:13:86:13 | ControlFlowNode for t | ArgumentNode is missing PostUpdateNode. |
--- a/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.ql
+++ b/python/ql/test/experimental/dataflow/consistency/dataflow-consistency.ql
@@ -0,0 +1 @@
+import experimental.dataflow.internal.DataFlowImplConsistency::Consistency
--- a/python/ql/test/experimental/dataflow/consistency/test.py
+++ b/python/ql/test/experimental/dataflow/consistency/test.py
@@ -0,0 +1,171 @@
+# This is currently a copy of the integration tests.
+# It should contain many syntactic constructs, so should
+# perhaps be taken from coverage once that is done.
+# (We might even put the consistency check in there.)
+
+def test1():
+    SINK(SOURCE)
+
+def test2():
+    s = SOURCE
+    SINK(s)
+
+def source():
+    return SOURCE
+
+def sink(arg):
+    SINK(arg)
+
+def test3():
+    t = source()
+    SINK(t)
+
+def test4():
+    t = SOURCE
+    sink(t)
+
+def test5():
+    t = source()
+    sink(t)
+
+def test6(cond):
+    if cond:
+        t = "Safe"
+    else:
+        t = SOURCE
+    if cond:
+        SINK(t)
+
+def test7(cond):
+    if cond:
+        t = SOURCE
+    else:
+        t = "Safe"
+    if cond:
+        SINK(t)
+
+def source2(arg):
+    return source(arg)
+
+def sink2(arg):
+    sink(arg)
+
+def sink3(cond, arg):
+    if cond:
+        sink(arg)
+
+def test8(cond):
+    t = source2()
+    sink2(t)
+
+#False positive
+def test9(cond):
+    if cond:
+        t  = "Safe"
+    else:
+        t = SOURCE
+    sink3(cond, t)
+
+def test10(cond):
+    if cond:
+        t = SOURCE
+    else:
+        t = "Safe"
+    sink3(cond, t)
+
+def hub(arg):
+    return arg
+
+def test11():
+    t = SOURCE
+    t = hub(t)
+    SINK(t)
+
+def test12():
+    t = "safe"
+    t = hub(t)
+    SINK(t)
+
+import module
+
+def test13():
+    t = module.dangerous
+    SINK(t)
+
+def test14():
+    t = module.safe
+    SINK(t)
+
+def test15():
+    t = module.safe2
+    SINK(t)
+
+def test16():
+    t = module.dangerous_func()
+    SINK(t)
+
+
+def test20(cond):
+    if cond:
+        t = CUSTOM_SOURCE
+    else:
+        t = SOURCE
+    if cond:
+        CUSTOM_SINK(t)
+    else:
+        SINK(t)
+
+def test21(cond):
+    if cond:
+        t = CUSTOM_SOURCE
+    else:
+        t = SOURCE
+    if not cond:
+        CUSTOM_SINK(t)
+    else:
+        SINK(t)
+
+def test22(cond):
+    if cond:
+        t = CUSTOM_SOURCE
+    else:
+        t = SOURCE
+    t = TAINT_FROM_ARG(t)
+    if cond:
+        CUSTOM_SINK(t)
+    else:
+        SINK(t)
+
+from module import dangerous as unsafe
+SINK(unsafe)
+
+def test23():
+    with SOURCE as t:
+        SINK(t)
+
+def test24():
+    s = SOURCE
+    SANITIZE(s)
+    SINK(s)
+
+def test_update_extend(x, y):
+    l = [SOURCE]
+    d = {"key" : SOURCE}
+    x.extend(l)
+    y.update(d)
+    SINK(x[0])
+    SINK(y["key"])
+    l2 = list(l)
+    d2 = dict(d)
+
+def test_truth():
+    t = SOURCE
+    if t:
+        SINK(t)
+    else:
+        SINK(t) # Regression: FP here
+    if not t:
+        SINK(t) # Regression: FP here
+    else:
+        SINK(t)
+
--- a/python/ql/test/experimental/dataflow/coverage/dataflow.expected
+++ b/python/ql/test/experimental/dataflow/coverage/dataflow.expected
@@ -0,0 +1,6 @@
+| test.py:20:9:20:14 | ControlFlowNode for SOURCE | test.py:21:10:21:10 | ControlFlowNode for x |
+| test.py:25:9:25:16 | ControlFlowNode for Str | test.py:26:10:26:10 | ControlFlowNode for x |
+| test.py:29:9:29:17 | ControlFlowNode for Str | test.py:30:10:30:10 | ControlFlowNode for x |
+| test.py:33:9:33:10 | ControlFlowNode for IntegerLiteral | test.py:34:10:34:10 | ControlFlowNode for x |
+| test.py:37:9:37:12 | ControlFlowNode for FloatLiteral | test.py:38:10:38:10 | ControlFlowNode for x |
+| test.py:46:10:46:15 | ControlFlowNode for SOURCE | test.py:47:10:47:10 | ControlFlowNode for x |
--- a/python/ql/test/experimental/dataflow/coverage/dataflow.ql
+++ b/python/ql/test/experimental/dataflow/coverage/dataflow.ql
@@ -0,0 +1,9 @@
+import experimental.dataflow.testConfig
+
+from
+  DataFlow::Node source,
+  DataFlow::Node sink
+where
+  exists(TestConfiguration cfg | cfg.hasFlow(source, sink))
+select
+  source, sink
--- a/python/ql/test/experimental/dataflow/coverage/localFlow.expected
+++ b/python/ql/test/experimental/dataflow/coverage/localFlow.expected
@@ -0,0 +1,7 @@
+| test.py:13:5:13:5 | SSA variable x | test.py:12:1:12:33 | Exit node for Function test_tuple_with_local_flow |
+| test.py:13:5:13:5 | SSA variable x | test.py:14:9:14:9 | ControlFlowNode for x |
+| test.py:13:10:13:18 | ControlFlowNode for Tuple | test.py:13:5:13:5 | SSA variable x |
+| test.py:14:5:14:5 | SSA variable y | test.py:15:5:15:11 | SSA variable y |
+| test.py:14:5:14:5 | SSA variable y | test.py:15:10:15:10 | ControlFlowNode for y |
+| test.py:14:9:14:12 | ControlFlowNode for Subscript | test.py:14:5:14:5 | SSA variable y |
+| test.py:15:5:15:11 | SSA variable y | test.py:12:1:12:33 | Exit node for Function test_tuple_with_local_flow |
--- a/python/ql/test/experimental/dataflow/coverage/localFlow.ql
+++ b/python/ql/test/experimental/dataflow/coverage/localFlow.ql
@@ -0,0 +1,8 @@
+import python
+import experimental.dataflow.DataFlow
+
+from DataFlow::Node nodeFrom, DataFlow::Node nodeTo
+where
+  DataFlow::localFlowStep(nodeFrom, nodeTo) and
+  nodeFrom.getEnclosingCallable().getName().matches("%\\_with\\_local\\_flow")
+select nodeFrom, nodeTo
--- a/python/ql/test/experimental/dataflow/coverage/test.py
+++ b/python/ql/test/experimental/dataflow/coverage/test.py
@@ -0,0 +1,131 @@
+# This should cover all the syntactical constructs that we hope to support
+# Intended sources should be the variable `SOURCE` and intended sinks should be
+# arguments to the function `SINK` (see python/ql/test/experimental/dataflow/testConfig.qll).
+#
+# Functions whose name ends with "_with_local_flow" will also be tested for local flow.
+
+# These are included so that we can easily evaluate the test code
+SOURCE = "source"
+def SINK(x):
+    print(x)
+
+def test_tuple_with_local_flow():
+    x = (3, SOURCE)
+    y = x[1]
+    SINK(y)
+
+# List taken from https://docs.python.org/3/reference/expressions.html
+# 6.2.1. Identifiers (Names)
+def test_names():
+    x = SOURCE
+    SINK(x)
+
+# 6.2.2. Literals
+def test_string_literal():
+    x = "source"
+    SINK(x)
+
+def test_bytes_literal():
+    x = b"source"
+    SINK(x)
+
+def test_integer_literal():
+    x = 42
+    SINK(x)
+
+def test_floatnumber_literal():
+    x = 42.0
+    SINK(x)
+
+def test_imagnumber_literal():
+    x = 42j
+    SINK(x)
+
+# 6.2.3. Parenthesized forms
+def test_parenthesized_form():
+    x = (SOURCE)
+    SINK(x)
+
+# 6.2.5. List displays
+def test_list_display():
+    x = [SOURCE]
+    SINK(x[0])
+
+def test_list_comprehension():
+    x = [SOURCE for y in [3]]
+    SINK(x[0])
+
+def test_nested_list_display():
+    x = [* [SOURCE]]
+    SINK(x[0])
+
+# 6.2.6. Set displays
+def test_set_display():
+    x = {SOURCE}
+    SINK(x.pop())
+
+def test_set_comprehension():
+    x = {SOURCE for y in [3]}
+    SINK(x.pop())
+
+def test_nested_set_display():
+    x = {* {SOURCE}}
+    SINK(x.pop())
+
+# 6.2.7. Dictionary displays
+def test_dict_display():
+    x = {"s": SOURCE}
+    SINK(x["s"])
+
+def test_dict_comprehension():
+    x = {y: SOURCE for y in ["s"]}
+    SINK(x["s"])
+
+def test_nested_dict_display():
+    x = {** {"s": SOURCE}}
+    SINK(x["s"])
+
+# 6.2.8. Generator expressions
+def test_generator():
+    x = (SOURCE for y in [3])
+    SINK([*x][0])
+
+# List taken from https://docs.python.org/3/reference/expressions.html
+# 6. Expressions
+# 6.1. Arithmetic conversions
+# 6.2. Atoms
+# 6.2.1. Identifiers (Names)
+# 6.2.2. Literals
+# 6.2.3. Parenthesized forms
+# 6.2.4. Displays for lists, sets and dictionaries
+# 6.2.5. List displays
+# 6.2.6. Set displays
+# 6.2.7. Dictionary displays
+# 6.2.8. Generator expressions
+# 6.2.9. Yield expressions
+# 6.2.9.1. Generator-iterator methods
+# 6.2.9.2. Examples
+# 6.2.9.3. Asynchronous generator functions
+# 6.2.9.4. Asynchronous generator-iterator methods
+# 6.3. Primaries
+# 6.3.1. Attribute references
+# 6.3.2. Subscriptions
+# 6.3.3. Slicings
+# 6.3.4. Calls
+# 6.4. Await expression
+# 6.5. The power operator
+# 6.6. Unary arithmetic and bitwise operations
+# 6.7. Binary arithmetic operations
+# 6.8. Shifting operations
+# 6.9. Binary bitwise operations
+# 6.10. Comparisons
+# 6.10.1. Value comparisons
+# 6.10.2. Membership test operations
+# 6.10.3. Identity comparisons
+# 6.11. Boolean operations
+# 6.12. Assignment expressions
+# 6.13. Conditional expressions
+# 6.14. Lambdas
+# 6.15. Expression lists
+# 6.16. Evaluation order
+# 6.17. Operator precedence
--- a/python/ql/test/experimental/dataflow/regression/dataflow.expected
+++ b/python/ql/test/experimental/dataflow/regression/dataflow.expected
@@ -0,0 +1,16 @@
+| test.py:3:10:3:15 | ControlFlowNode for SOURCE | test.py:3:10:3:15 | ControlFlowNode for SOURCE |
+| test.py:6:9:6:14 | ControlFlowNode for SOURCE | test.py:7:10:7:10 | ControlFlowNode for s |
+| test.py:10:12:10:17 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg |
+| test.py:10:12:10:17 | ControlFlowNode for SOURCE | test.py:17:10:17:10 | ControlFlowNode for t |
+| test.py:20:9:20:14 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg |
+| test.py:37:13:37:18 | ControlFlowNode for SOURCE | test.py:41:14:41:14 | ControlFlowNode for t |
+| test.py:62:13:62:18 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg |
+| test.py:67:13:67:18 | ControlFlowNode for SOURCE | test.py:13:10:13:12 | ControlFlowNode for arg |
+| test.py:76:9:76:14 | ControlFlowNode for SOURCE | test.py:78:10:78:10 | ControlFlowNode for t |
+| test.py:108:13:108:18 | ControlFlowNode for SOURCE | test.py:112:14:112:14 | ControlFlowNode for t |
+| test.py:139:10:139:15 | ControlFlowNode for SOURCE | test.py:140:14:140:14 | ControlFlowNode for t |
+| test.py:143:9:143:14 | ControlFlowNode for SOURCE | test.py:145:10:145:10 | ControlFlowNode for s |
+| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:160:14:160:14 | ControlFlowNode for t |
+| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:162:14:162:14 | ControlFlowNode for t |
+| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:164:14:164:14 | ControlFlowNode for t |
+| test.py:158:9:158:14 | ControlFlowNode for SOURCE | test.py:166:14:166:14 | ControlFlowNode for t |
--- a/python/ql/test/experimental/dataflow/regression/dataflow.ql
+++ b/python/ql/test/experimental/dataflow/regression/dataflow.ql
@@ -0,0 +1,16 @@
+/**
+ * This should be compared to
+ * python/ql/test/library-tests/taint/dataflow/Dataflow.ql
+ * A first goal is to have identical results; after that we
+ * hope to remove the false positive.
+ */
+
+import experimental.dataflow.testConfig
+
+from
+  DataFlow::Node source,
+  DataFlow::Node sink
+where
+  exists(TestConfiguration cfg | cfg.hasFlow(source, sink))
+select
+  source, sink
--- a/python/ql/test/experimental/dataflow/regression/test.py
+++ b/python/ql/test/experimental/dataflow/regression/test.py
@@ -0,0 +1,167 @@
+
+def test1():
+    SINK(SOURCE)
+
+def test2():
+    s = SOURCE
+    SINK(s)
+
+def source():
+    return SOURCE
+
+def sink(arg):
+    SINK(arg)
+
+def test3():
+    t = source()
+    SINK(t)
+
+def test4():
+    t = SOURCE
+    sink(t)
+
+def test5():
+    t = source()
+    sink(t)
+
+def test6(cond):
+    if cond:
+        t = "Safe"
+    else:
+        t = SOURCE
+    if cond:
+        SINK(t)
+
+def test7(cond):
+    if cond:
+        t = SOURCE
+    else:
+        t = "Safe"
+    if cond:
+        SINK(t)
+
+def source2(arg):
+    return source(arg)
+
+def sink2(arg):
+    sink(arg)
+
+def sink3(cond, arg):
+    if cond:
+        sink(arg)
+
+def test8(cond):
+    t = source2()
+    sink2(t)
+
+#False positive
+def test9(cond):
+    if cond:
+        t  = "Safe"
+    else:
+        t = SOURCE
+    sink3(cond, t)
+
+def test10(cond):
+    if cond:
+        t = SOURCE
+    else:
+        t = "Safe"
+    sink3(cond, t)
+
+def hub(arg):
+    return arg
+
+def test11():
+    t = SOURCE
+    t = hub(t)
+    SINK(t)
+
+def test12():
+    t = "safe"
+    t = hub(t)
+    SINK(t)
+
+import module
+
+def test13():
+    t = module.dangerous
+    SINK(t)
+
+def test14():
+    t = module.safe
+    SINK(t)
+
+def test15():
+    t = module.safe2
+    SINK(t)
+
+def test16():
+    t = module.dangerous_func()
+    SINK(t)
+
+
+def test20(cond):
+    if cond:
+        t = CUSTOM_SOURCE
+    else:
+        t = SOURCE
+    if cond:
+        CUSTOM_SINK(t)
+    else:
+        SINK(t)
+
+def test21(cond):
+    if cond:
+        t = CUSTOM_SOURCE
+    else:
+        t = SOURCE
+    if not cond:
+        CUSTOM_SINK(t)
+    else:
+        SINK(t)
+
+def test22(cond):
+    if cond:
+        t = CUSTOM_SOURCE
+    else:
+        t = SOURCE
+    t = TAINT_FROM_ARG(t)
+    if cond:
+        CUSTOM_SINK(t)
+    else:
+        SINK(t)
+
+from module import dangerous as unsafe
+SINK(unsafe)
+
+def test23():
+    with SOURCE as t:
+        SINK(t)
+
+def test24():
+    s = SOURCE
+    SANITIZE(s)
+    SINK(s)
+
+def test_update_extend(x, y):
+    l = [SOURCE]
+    d = {"key" : SOURCE}
+    x.extend(l)
+    y.update(d)
+    SINK(x[0])
+    SINK(y["key"])
+    l2 = list(l)
+    d2 = dict(d)
+
+def test_truth():
+    t = SOURCE
+    if t:
+        SINK(t)
+    else:
+        SINK(t) # Regression: FP here
+    if not t:
+        SINK(t) # Regression: FP here
+    else:
+        SINK(t)
+
--- a/python/ql/test/experimental/dataflow/testConfig.qll
+++ b/python/ql/test/experimental/dataflow/testConfig.qll
@@ -0,0 +1,45 @@
+/**
+ * Configuration to test selected data flow
+ * Sources in the source code are denoted by the special name `SOURCE`,
+ * and sinks are denoted by arguments to the special function `SINK`.
+ * For example, given the test code
+ * ```python
+ *  def test():
+ *      s = SOURCE
+ *      SINK(s)
+ * ```
+ * `SOURCE` will be a source and the second occurance of `s` will be a sink.
+ * 
+ * In order to test literals, alternative sources are defined for each type:
+ * 
+ *  for | use
+ * ----------
+ * string | `"source"`
+ * integer | `42`
+ * float | `42.0`
+ * complex | `42j` (not supported yet)
+ */
+
+import experimental.dataflow.DataFlow
+
+class TestConfiguration extends DataFlow::Configuration {
+  TestConfiguration() { this = "TestConfiguration" }
+
+   override predicate isSource(DataFlow::Node node) {
+    node.(DataFlow::CfgNode).getNode().(NameNode).getId() = "SOURCE"
+    or
+    node.(DataFlow::CfgNode).getNode().getNode().(StrConst).getS() = "source"
+    or
+    node.(DataFlow::CfgNode).getNode().getNode().(IntegerLiteral).getN() = "42"
+    or
+    node.(DataFlow::CfgNode).getNode().getNode().(FloatLiteral).getN() = "42.0"
+    // No support for complex numbers
+  }
+
+   override predicate isSink(DataFlow::Node node) {
+    exists(CallNode call |
+      call.getFunction().(NameNode).getId() in ["SINK", "SINK_F"] and
+      node.(DataFlow::CfgNode).getNode() = call.getAnArg()
+    )
+  }
+}
				`@@ -0,0 +1 @@`
				`import experimental.dataflow.internal.DataFlowImplConsistency::Consistency`