Merge pull request #13863 from aschackmull/dataflow/pack4

Dataflow: Move the shared library to a properly shared qlpack.
2025-12-21 03:06:31 +01:00 · 2023-08-02 14:19:49 +02:00
parent 7bea18773c 1ad51e754e
commit 7bc8bf616f
72 changed files with 7177 additions and 53563 deletions
--- a/cpp/ql/lib/qlpack.yml
+++ b/cpp/ql/lib/qlpack.yml
@@ -6,6 +6,7 @@ extractor: cpp
 library: true
 upgrades: upgrades
 dependencies:
+  codeql/dataflow: ${workspace}
  codeql/ssa: ${workspace}
  codeql/tutorial: ${workspace}
  codeql/util: ${workspace}
--- a/cpp/ql/lib/semmle/code/cpp/dataflow/DataFlow.qll
+++ b/cpp/ql/lib/semmle/code/cpp/dataflow/DataFlow.qll
@@ -26,6 +26,8 @@ import cpp
 * global (inter-procedural) data flow analyses.
 */
 deprecated module DataFlow {
-  import semmle.code.cpp.dataflow.internal.DataFlow
+  private import semmle.code.cpp.dataflow.internal.DataFlowImplSpecific
+  private import codeql.dataflow.DataFlow
+  import DataFlowMake<CppOldDataFlow>
  import semmle.code.cpp.dataflow.internal.DataFlowImpl1
 }
--- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlow.qll
+++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlow.qll
@@ -1,450 +0,0 @@
-/**
- * Provides an implementation of global (interprocedural) data flow. This file
- * re-exports the local (intraprocedural) data flow analysis from
- * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
- * through the `Global` and `GlobalWithState` modules.
- */
-
-private import DataFlowImplCommon
-private import DataFlowImplSpecific::Private
-import DataFlowImplSpecific::Public
-import DataFlowImplCommonPublic
-private import DataFlowImpl
-
-/** An input configuration for data flow. */
-signature module ConfigSig {
-  /**
-   * Holds if `source` is a relevant data flow source.
-   */
-  predicate isSource(Node source);
-
-  /**
-   * Holds if `sink` is a relevant data flow sink.
-   */
-  predicate isSink(Node sink);
-
-  /**
-   * Holds if data flow through `node` is prohibited. This completely removes
-   * `node` from the data flow graph.
-   */
-  default predicate isBarrier(Node node) { none() }
-
-  /** Holds if data flow into `node` is prohibited. */
-  default predicate isBarrierIn(Node node) { none() }
-
-  /** Holds if data flow out of `node` is prohibited. */
-  default predicate isBarrierOut(Node node) { none() }
-
-  /**
-   * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
-   */
-  default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
-
-  /**
-   * Holds if an arbitrary number of implicit read steps of content `c` may be
-   * taken at `node`.
-   */
-  default predicate allowImplicitRead(Node node, ContentSet c) { none() }
-
-  /**
-   * Holds if `node` should never be skipped over in the `PathGraph` and in path
-   * explanations.
-   */
-  default predicate neverSkip(Node node) {
-    isAdditionalFlowStep(node, _) or isAdditionalFlowStep(_, node)
-  }
-
-  /**
-   * Gets the virtual dispatch branching limit when calculating field flow.
-   * This can be overridden to a smaller value to improve performance (a
-   * value of 0 disables field flow), or a larger value to get more results.
-   */
-  default int fieldFlowBranchLimit() { result = 2 }
-
-  /**
-   * Gets a data flow configuration feature to add restrictions to the set of
-   * valid flow paths.
-   *
-   * - `FeatureHasSourceCallContext`:
-   *    Assume that sources have some existing call context to disallow
-   *    conflicting return-flow directly following the source.
-   * - `FeatureHasSinkCallContext`:
-   *    Assume that sinks have some existing call context to disallow
-   *    conflicting argument-to-parameter flow directly preceding the sink.
-   * - `FeatureEqualSourceSinkCallContext`:
-   *    Implies both of the above and additionally ensures that the entire flow
-   *    path preserves the call context.
-   *
-   * These features are generally not relevant for typical end-to-end data flow
-   * queries, but should only be used for constructing paths that need to
-   * somehow be pluggable in another path context.
-   */
-  default FlowFeature getAFeature() { none() }
-
-  /** Holds if sources should be grouped in the result of `flowPath`. */
-  default predicate sourceGrouping(Node source, string sourceGroup) { none() }
-
-  /** Holds if sinks should be grouped in the result of `flowPath`. */
-  default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
-
-  /**
-   * Holds if hidden nodes should be included in the data flow graph.
-   *
-   * This feature should only be used for debugging or when the data flow graph
-   * is not visualized (as it is in a `path-problem` query).
-   */
-  default predicate includeHiddenNodes() { none() }
-}
-
-/** An input configuration for data flow using flow state. */
-signature module StateConfigSig {
-  bindingset[this]
-  class FlowState;
-
-  /**
-   * Holds if `source` is a relevant data flow source with the given initial
-   * `state`.
-   */
-  predicate isSource(Node source, FlowState state);
-
-  /**
-   * Holds if `sink` is a relevant data flow sink accepting `state`.
-   */
-  predicate isSink(Node sink, FlowState state);
-
-  /**
-   * Holds if data flow through `node` is prohibited. This completely removes
-   * `node` from the data flow graph.
-   */
-  default predicate isBarrier(Node node) { none() }
-
-  /**
-   * Holds if data flow through `node` is prohibited when the flow state is
-   * `state`.
-   */
-  default predicate isBarrier(Node node, FlowState state) { none() }
-
-  /** Holds if data flow into `node` is prohibited. */
-  default predicate isBarrierIn(Node node) { none() }
-
-  /** Holds if data flow out of `node` is prohibited. */
-  default predicate isBarrierOut(Node node) { none() }
-
-  /**
-   * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
-   */
-  default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
-
-  /**
-   * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
-   * This step is only applicable in `state1` and updates the flow state to `state2`.
-   */
-  default predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
-    none()
-  }
-
-  /**
-   * Holds if an arbitrary number of implicit read steps of content `c` may be
-   * taken at `node`.
-   */
-  default predicate allowImplicitRead(Node node, ContentSet c) { none() }
-
-  /**
-   * Holds if `node` should never be skipped over in the `PathGraph` and in path
-   * explanations.
-   */
-  default predicate neverSkip(Node node) {
-    isAdditionalFlowStep(node, _) or
-    isAdditionalFlowStep(_, node) or
-    isAdditionalFlowStep(node, _, _, _) or
-    isAdditionalFlowStep(_, _, node, _)
-  }
-
-  /**
-   * Gets the virtual dispatch branching limit when calculating field flow.
-   * This can be overridden to a smaller value to improve performance (a
-   * value of 0 disables field flow), or a larger value to get more results.
-   */
-  default int fieldFlowBranchLimit() { result = 2 }
-
-  /**
-   * Gets a data flow configuration feature to add restrictions to the set of
-   * valid flow paths.
-   *
-   * - `FeatureHasSourceCallContext`:
-   *    Assume that sources have some existing call context to disallow
-   *    conflicting return-flow directly following the source.
-   * - `FeatureHasSinkCallContext`:
-   *    Assume that sinks have some existing call context to disallow
-   *    conflicting argument-to-parameter flow directly preceding the sink.
-   * - `FeatureEqualSourceSinkCallContext`:
-   *    Implies both of the above and additionally ensures that the entire flow
-   *    path preserves the call context.
-   *
-   * These features are generally not relevant for typical end-to-end data flow
-   * queries, but should only be used for constructing paths that need to
-   * somehow be pluggable in another path context.
-   */
-  default FlowFeature getAFeature() { none() }
-
-  /** Holds if sources should be grouped in the result of `flowPath`. */
-  default predicate sourceGrouping(Node source, string sourceGroup) { none() }
-
-  /** Holds if sinks should be grouped in the result of `flowPath`. */
-  default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
-
-  /**
-   * Holds if hidden nodes should be included in the data flow graph.
-   *
-   * This feature should only be used for debugging or when the data flow graph
-   * is not visualized (as it is in a `path-problem` query).
-   */
-  default predicate includeHiddenNodes() { none() }
-}
-
-/**
- * Gets the exploration limit for `partialFlow` and `partialFlowRev`
- * measured in approximate number of interprocedural steps.
- */
-signature int explorationLimitSig();
-
-/**
- * The output of a global data flow computation.
- */
-signature module GlobalFlowSig {
-  /**
-   * A `Node` augmented with a call context (except for sinks) and an access path.
-   * Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
-   */
-  class PathNode;
-
-  /**
-   * Holds if data can flow from `source` to `sink`.
-   *
-   * The corresponding paths are generated from the end-points and the graph
-   * included in the module `PathGraph`.
-   */
-  predicate flowPath(PathNode source, PathNode sink);
-
-  /**
-   * Holds if data can flow from `source` to `sink`.
-   */
-  predicate flow(Node source, Node sink);
-
-  /**
-   * Holds if data can flow from some source to `sink`.
-   */
-  predicate flowTo(Node sink);
-
-  /**
-   * Holds if data can flow from some source to `sink`.
-   */
-  predicate flowToExpr(DataFlowExpr sink);
-}
-
-/**
- * Constructs a global data flow computation.
- */
-module Global<ConfigSig Config> implements GlobalFlowSig {
-  private module C implements FullStateConfigSig {
-    import DefaultState<Config>
-    import Config
-  }
-
-  import Impl<C>
-}
-
-/** DEPRECATED: Use `Global` instead. */
-deprecated module Make<ConfigSig Config> implements GlobalFlowSig {
-  import Global<Config>
-}
-
-/**
- * Constructs a global data flow computation using flow state.
- */
-module GlobalWithState<StateConfigSig Config> implements GlobalFlowSig {
-  private module C implements FullStateConfigSig {
-    import Config
-  }
-
-  import Impl<C>
-}
-
-/** DEPRECATED: Use `GlobalWithState` instead. */
-deprecated module MakeWithState<StateConfigSig Config> implements GlobalFlowSig {
-  import GlobalWithState<Config>
-}
-
-signature class PathNodeSig {
-  /** Gets a textual representation of this element. */
-  string toString();
-
-  /**
-   * Holds if this element is at the specified location.
-   * The location spans column `startcolumn` of line `startline` to
-   * column `endcolumn` of line `endline` in file `filepath`.
-   * For more information, see
-   * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
-   */
-  predicate hasLocationInfo(
-    string filepath, int startline, int startcolumn, int endline, int endcolumn
-  );
-
-  /** Gets the underlying `Node`. */
-  Node getNode();
-}
-
-signature module PathGraphSig<PathNodeSig PathNode> {
-  /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
-  predicate edges(PathNode a, PathNode b);
-
-  /** Holds if `n` is a node in the graph of data flow path explanations. */
-  predicate nodes(PathNode n, string key, string val);
-
-  /**
-   * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
-   * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
-   * `ret -> out` is summarized as the edge `arg -> out`.
-   */
-  predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out);
-}
-
-/**
- * Constructs a `PathGraph` from two `PathGraph`s by disjoint union.
- */
-module MergePathGraph<
-  PathNodeSig PathNode1, PathNodeSig PathNode2, PathGraphSig<PathNode1> Graph1,
-  PathGraphSig<PathNode2> Graph2>
-{
-  private newtype TPathNode =
-    TPathNode1(PathNode1 p) or
-    TPathNode2(PathNode2 p)
-
-  /** A node in a graph of path explanations that is formed by disjoint union of the two given graphs. */
-  class PathNode extends TPathNode {
-    /** Gets this as a projection on the first given `PathGraph`. */
-    PathNode1 asPathNode1() { this = TPathNode1(result) }
-
-    /** Gets this as a projection on the second given `PathGraph`. */
-    PathNode2 asPathNode2() { this = TPathNode2(result) }
-
-    /** Gets a textual representation of this element. */
-    string toString() {
-      result = this.asPathNode1().toString() or
-      result = this.asPathNode2().toString()
-    }
-
-    /**
-     * Holds if this element is at the specified location.
-     * The location spans column `startcolumn` of line `startline` to
-     * column `endcolumn` of line `endline` in file `filepath`.
-     * For more information, see
-     * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
-     */
-    predicate hasLocationInfo(
-      string filepath, int startline, int startcolumn, int endline, int endcolumn
-    ) {
-      this.asPathNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) or
-      this.asPathNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
-    }
-
-    /** Gets the underlying `Node`. */
-    Node getNode() {
-      result = this.asPathNode1().getNode() or
-      result = this.asPathNode2().getNode()
-    }
-  }
-
-  /**
-   * Provides the query predicates needed to include a graph in a path-problem query.
-   */
-  module PathGraph implements PathGraphSig<PathNode> {
-    /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
-    query predicate edges(PathNode a, PathNode b) {
-      Graph1::edges(a.asPathNode1(), b.asPathNode1()) or
-      Graph2::edges(a.asPathNode2(), b.asPathNode2())
-    }
-
-    /** Holds if `n` is a node in the graph of data flow path explanations. */
-    query predicate nodes(PathNode n, string key, string val) {
-      Graph1::nodes(n.asPathNode1(), key, val) or
-      Graph2::nodes(n.asPathNode2(), key, val)
-    }
-
-    /**
-     * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
-     * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
-     * `ret -> out` is summarized as the edge `arg -> out`.
-     */
-    query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
-      Graph1::subpaths(arg.asPathNode1(), par.asPathNode1(), ret.asPathNode1(), out.asPathNode1()) or
-      Graph2::subpaths(arg.asPathNode2(), par.asPathNode2(), ret.asPathNode2(), out.asPathNode2())
-    }
-  }
-}
-
-/**
- * Constructs a `PathGraph` from three `PathGraph`s by disjoint union.
- */
-module MergePathGraph3<
-  PathNodeSig PathNode1, PathNodeSig PathNode2, PathNodeSig PathNode3,
-  PathGraphSig<PathNode1> Graph1, PathGraphSig<PathNode2> Graph2, PathGraphSig<PathNode3> Graph3>
-{
-  private module MergedInner = MergePathGraph<PathNode1, PathNode2, Graph1, Graph2>;
-
-  private module Merged =
-    MergePathGraph<MergedInner::PathNode, PathNode3, MergedInner::PathGraph, Graph3>;
-
-  /** A node in a graph of path explanations that is formed by disjoint union of the three given graphs. */
-  class PathNode instanceof Merged::PathNode {
-    /** Gets this as a projection on the first given `PathGraph`. */
-    PathNode1 asPathNode1() { result = super.asPathNode1().asPathNode1() }
-
-    /** Gets this as a projection on the second given `PathGraph`. */
-    PathNode2 asPathNode2() { result = super.asPathNode1().asPathNode2() }
-
-    /** Gets this as a projection on the third given `PathGraph`. */
-    PathNode3 asPathNode3() { result = super.asPathNode2() }
-
-    /** Gets a textual representation of this element. */
-    string toString() { result = super.toString() }
-
-    /**
-     * Holds if this element is at the specified location.
-     * The location spans column `startcolumn` of line `startline` to
-     * column `endcolumn` of line `endline` in file `filepath`.
-     * For more information, see
-     * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
-     */
-    predicate hasLocationInfo(
-      string filepath, int startline, int startcolumn, int endline, int endcolumn
-    ) {
-      super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
-    }
-
-    /** Gets the underlying `Node`. */
-    Node getNode() { result = super.getNode() }
-  }
-
-  /**
-   * Provides the query predicates needed to include a graph in a path-problem query.
-   */
-  module PathGraph implements PathGraphSig<PathNode> {
-    /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
-    query predicate edges(PathNode a, PathNode b) { Merged::PathGraph::edges(a, b) }
-
-    /** Holds if `n` is a node in the graph of data flow path explanations. */
-    query predicate nodes(PathNode n, string key, string val) {
-      Merged::PathGraph::nodes(n, key, val)
-    }
-
-    /**
-     * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
-     * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
-     * `ret -> out` is summarized as the edge `arg -> out`.
-     */
-    query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
-      Merged::PathGraph::subpaths(arg, par, ret, out)
-    }
-  }
-}
--- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowDispatch.qll
+++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowDispatch.qll
@@ -5,8 +5,8 @@ private import DataFlowUtil
 /**
 * Gets a function that might be called by `call`.
 */
-Function viableCallable(Call call) {
-  result = call.getTarget()
+Function viableCallable(DataFlowCall call) {
+  result = call.(Call).getTarget()
  or
  // If the target of the call does not have a body in the snapshot, it might
  // be because the target is just a header declaration, and the real target
@@ -58,13 +58,13 @@ private predicate functionSignature(Function f, string qualifiedName, int nparam
 * Holds if the set of viable implementations that can be called by `call`
 * might be improved by knowing the call context.
 */
-predicate mayBenefitFromCallContext(Call call, Function f) { none() }
+predicate mayBenefitFromCallContext(DataFlowCall call, Function f) { none() }

 /**
 * Gets a viable dispatch target of `call` in the context `ctx`. This is
 * restricted to those `call`s for which a context might make a difference.
 */
-Function viableImplInCallContext(Call call, Call ctx) { none() }
+Function viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { none() }

 /** A parameter position represented by an integer. */
 class ParameterPosition extends int {
--- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImpl.qll
+++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImpl.qll
--- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplCommon.qll
+++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplCommon.qll
--- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplSpecific.qll
+++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplSpecific.qll
@@ -1,6 +1,9 @@
 /**
 * Provides C++-specific definitions for use in the data flow library.
 */
+
+private import codeql.dataflow.DataFlowParameter
+
 module Private {
  import DataFlowPrivate
  import DataFlowDispatch
@@ -9,3 +12,10 @@ module Private {
 module Public {
  import DataFlowUtil
 }
+
+module CppOldDataFlow implements DataFlowParameter {
+  import Private
+  import Public
+
+  Node exprNode(DataFlowExpr e) { result = Public::exprNode(e) }
+}
--- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll
+++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll
@@ -153,10 +153,11 @@ predicate jumpStep(Node n1, Node n2) { none() }
 * Thus, `node2` references an object with a field `f` that contains the
 * value of `node1`.
 */
-predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
+predicate storeStep(Node node1, ContentSet f, Node node2) {
  exists(ClassAggregateLiteral aggr, Field field |
-    // The following line requires `node2` to be both an `ExprNode` and a
+    // The following lines requires `node2` to be both an `ExprNode` and a
    // `PostUpdateNode`, which means it must be an `ObjectInitializerNode`.
+    node2 instanceof PostUpdateNode and
    node2.asExpr() = aggr and
    f.(FieldContent).getField() = field and
    aggr.getAFieldExpr(field) = node1.asExpr()
@@ -167,12 +168,13 @@ predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
      node1.asExpr() = a and
      a.getLValue() = fa
    ) and
-    node2.getPreUpdateNode().asExpr() = fa.getQualifier() and
+    node2.(PostUpdateNode).getPreUpdateNode().asExpr() = fa.getQualifier() and
    f.(FieldContent).getField() = fa.getTarget()
  )
  or
  exists(ConstructorFieldInit cfi |
-    node2.getPreUpdateNode().(PreConstructorInitThis).getConstructorFieldInit() = cfi and
+    node2.(PostUpdateNode).getPreUpdateNode().(PreConstructorInitThis).getConstructorFieldInit() =
+      cfi and
    f.(FieldContent).getField() = cfi.getTarget() and
    node1.asExpr() = cfi.getExpr()
  )
@@ -183,7 +185,7 @@ predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
 * Thus, `node1` references an object with a field `f` whose value ends up in
 * `node2`.
 */
-predicate readStep(Node node1, Content f, Node node2) {
+predicate readStep(Node node1, ContentSet f, Node node2) {
  exists(FieldAccess fr |
    node1.asExpr() = fr.getQualifier() and
    fr.getTarget() = f.(FieldContent).getField() and
@@ -195,7 +197,7 @@ predicate readStep(Node node1, Content f, Node node2) {
 /**
 * Holds if values stored inside content `c` are cleared at node `n`.
 */
-predicate clearsContent(Node n, Content c) {
+predicate clearsContent(Node n, ContentSet c) {
  none() // stub implementation
 }

@@ -235,12 +237,6 @@ class CastNode extends Node {
  CastNode() { none() } // stub implementation
 }

-/**
- * Holds if `n` should never be skipped over in the `PathGraph` and in path
- * explanations.
- */
-predicate neverSkipInPathGraph(Node n) { none() }
-
 class DataFlowCallable = Function;

 class DataFlowExpr = Expr;
@@ -265,8 +261,6 @@ class DataFlowCall extends Expr instanceof Call {

 predicate isUnreachableInCall(Node n, DataFlowCall call) { none() } // stub implementation

-int accessPathLimit() { result = 5 }
-
 /**
 * Holds if access paths with `c` at their head always should be tracked at high
 * precision. This disables adaptive access path precision for such access paths.
--- a/cpp/ql/lib/semmle/code/cpp/dataflow/new/DataFlow.qll
+++ b/cpp/ql/lib/semmle/code/cpp/dataflow/new/DataFlow.qll
@@ -26,6 +26,8 @@ import cpp
 * global (inter-procedural) data flow analyses.
 */
 module DataFlow {
-  import semmle.code.cpp.ir.dataflow.internal.DataFlow
+  private import semmle.code.cpp.ir.dataflow.internal.DataFlowImplSpecific
+  private import codeql.dataflow.DataFlow
+  import DataFlowMake<CppDataFlow>
  import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl1
 }
--- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DataFlow.qll
+++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DataFlow.qll
@@ -22,6 +22,8 @@
 import cpp

 module DataFlow {
-  import semmle.code.cpp.ir.dataflow.internal.DataFlow
+  private import semmle.code.cpp.ir.dataflow.internal.DataFlowImplSpecific
+  private import codeql.dataflow.DataFlow
+  import DataFlowMake<CppDataFlow>
  import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl1
 }
--- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlow.qll
+++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlow.qll
@@ -1,450 +0,0 @@
-/**
- * Provides an implementation of global (interprocedural) data flow. This file
- * re-exports the local (intraprocedural) data flow analysis from
- * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
- * through the `Global` and `GlobalWithState` modules.
- */
-
-private import DataFlowImplCommon
-private import DataFlowImplSpecific::Private
-import DataFlowImplSpecific::Public
-import DataFlowImplCommonPublic
-private import DataFlowImpl
-
-/** An input configuration for data flow. */
-signature module ConfigSig {
-  /**
-   * Holds if `source` is a relevant data flow source.
-   */
-  predicate isSource(Node source);
-
-  /**
-   * Holds if `sink` is a relevant data flow sink.
-   */
-  predicate isSink(Node sink);
-
-  /**
-   * Holds if data flow through `node` is prohibited. This completely removes
-   * `node` from the data flow graph.
-   */
-  default predicate isBarrier(Node node) { none() }
-
-  /** Holds if data flow into `node` is prohibited. */
-  default predicate isBarrierIn(Node node) { none() }
-
-  /** Holds if data flow out of `node` is prohibited. */
-  default predicate isBarrierOut(Node node) { none() }
-
-  /**
-   * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
-   */
-  default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
-
-  /**
-   * Holds if an arbitrary number of implicit read steps of content `c` may be
-   * taken at `node`.
-   */
-  default predicate allowImplicitRead(Node node, ContentSet c) { none() }
-
-  /**
-   * Holds if `node` should never be skipped over in the `PathGraph` and in path
-   * explanations.
-   */
-  default predicate neverSkip(Node node) {
-    isAdditionalFlowStep(node, _) or isAdditionalFlowStep(_, node)
-  }
-
-  /**
-   * Gets the virtual dispatch branching limit when calculating field flow.
-   * This can be overridden to a smaller value to improve performance (a
-   * value of 0 disables field flow), or a larger value to get more results.
-   */
-  default int fieldFlowBranchLimit() { result = 2 }
-
-  /**
-   * Gets a data flow configuration feature to add restrictions to the set of
-   * valid flow paths.
-   *
-   * - `FeatureHasSourceCallContext`:
-   *    Assume that sources have some existing call context to disallow
-   *    conflicting return-flow directly following the source.
-   * - `FeatureHasSinkCallContext`:
-   *    Assume that sinks have some existing call context to disallow
-   *    conflicting argument-to-parameter flow directly preceding the sink.
-   * - `FeatureEqualSourceSinkCallContext`:
-   *    Implies both of the above and additionally ensures that the entire flow
-   *    path preserves the call context.
-   *
-   * These features are generally not relevant for typical end-to-end data flow
-   * queries, but should only be used for constructing paths that need to
-   * somehow be pluggable in another path context.
-   */
-  default FlowFeature getAFeature() { none() }
-
-  /** Holds if sources should be grouped in the result of `flowPath`. */
-  default predicate sourceGrouping(Node source, string sourceGroup) { none() }
-
-  /** Holds if sinks should be grouped in the result of `flowPath`. */
-  default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
-
-  /**
-   * Holds if hidden nodes should be included in the data flow graph.
-   *
-   * This feature should only be used for debugging or when the data flow graph
-   * is not visualized (as it is in a `path-problem` query).
-   */
-  default predicate includeHiddenNodes() { none() }
-}
-
-/** An input configuration for data flow using flow state. */
-signature module StateConfigSig {
-  bindingset[this]
-  class FlowState;
-
-  /**
-   * Holds if `source` is a relevant data flow source with the given initial
-   * `state`.
-   */
-  predicate isSource(Node source, FlowState state);
-
-  /**
-   * Holds if `sink` is a relevant data flow sink accepting `state`.
-   */
-  predicate isSink(Node sink, FlowState state);
-
-  /**
-   * Holds if data flow through `node` is prohibited. This completely removes
-   * `node` from the data flow graph.
-   */
-  default predicate isBarrier(Node node) { none() }
-
-  /**
-   * Holds if data flow through `node` is prohibited when the flow state is
-   * `state`.
-   */
-  default predicate isBarrier(Node node, FlowState state) { none() }
-
-  /** Holds if data flow into `node` is prohibited. */
-  default predicate isBarrierIn(Node node) { none() }
-
-  /** Holds if data flow out of `node` is prohibited. */
-  default predicate isBarrierOut(Node node) { none() }
-
-  /**
-   * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
-   */
-  default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
-
-  /**
-   * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
-   * This step is only applicable in `state1` and updates the flow state to `state2`.
-   */
-  default predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
-    none()
-  }
-
-  /**
-   * Holds if an arbitrary number of implicit read steps of content `c` may be
-   * taken at `node`.
-   */
-  default predicate allowImplicitRead(Node node, ContentSet c) { none() }
-
-  /**
-   * Holds if `node` should never be skipped over in the `PathGraph` and in path
-   * explanations.
-   */
-  default predicate neverSkip(Node node) {
-    isAdditionalFlowStep(node, _) or
-    isAdditionalFlowStep(_, node) or
-    isAdditionalFlowStep(node, _, _, _) or
-    isAdditionalFlowStep(_, _, node, _)
-  }
-
-  /**
-   * Gets the virtual dispatch branching limit when calculating field flow.
-   * This can be overridden to a smaller value to improve performance (a
-   * value of 0 disables field flow), or a larger value to get more results.
-   */
-  default int fieldFlowBranchLimit() { result = 2 }
-
-  /**
-   * Gets a data flow configuration feature to add restrictions to the set of
-   * valid flow paths.
-   *
-   * - `FeatureHasSourceCallContext`:
-   *    Assume that sources have some existing call context to disallow
-   *    conflicting return-flow directly following the source.
-   * - `FeatureHasSinkCallContext`:
-   *    Assume that sinks have some existing call context to disallow
-   *    conflicting argument-to-parameter flow directly preceding the sink.
-   * - `FeatureEqualSourceSinkCallContext`:
-   *    Implies both of the above and additionally ensures that the entire flow
-   *    path preserves the call context.
-   *
-   * These features are generally not relevant for typical end-to-end data flow
-   * queries, but should only be used for constructing paths that need to
-   * somehow be pluggable in another path context.
-   */
-  default FlowFeature getAFeature() { none() }
-
-  /** Holds if sources should be grouped in the result of `flowPath`. */
-  default predicate sourceGrouping(Node source, string sourceGroup) { none() }
-
-  /** Holds if sinks should be grouped in the result of `flowPath`. */
-  default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
-
-  /**
-   * Holds if hidden nodes should be included in the data flow graph.
-   *
-   * This feature should only be used for debugging or when the data flow graph
-   * is not visualized (as it is in a `path-problem` query).
-   */
-  default predicate includeHiddenNodes() { none() }
-}
-
-/**
- * Gets the exploration limit for `partialFlow` and `partialFlowRev`
- * measured in approximate number of interprocedural steps.
- */
-signature int explorationLimitSig();
-
-/**
- * The output of a global data flow computation.
- */
-signature module GlobalFlowSig {
-  /**
-   * A `Node` augmented with a call context (except for sinks) and an access path.
-   * Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
-   */
-  class PathNode;
-
-  /**
-   * Holds if data can flow from `source` to `sink`.
-   *
-   * The corresponding paths are generated from the end-points and the graph
-   * included in the module `PathGraph`.
-   */
-  predicate flowPath(PathNode source, PathNode sink);
-
-  /**
-   * Holds if data can flow from `source` to `sink`.
-   */
-  predicate flow(Node source, Node sink);
-
-  /**
-   * Holds if data can flow from some source to `sink`.
-   */
-  predicate flowTo(Node sink);
-
-  /**
-   * Holds if data can flow from some source to `sink`.
-   */
-  predicate flowToExpr(DataFlowExpr sink);
-}
-
-/**
- * Constructs a global data flow computation.
- */
-module Global<ConfigSig Config> implements GlobalFlowSig {
-  private module C implements FullStateConfigSig {
-    import DefaultState<Config>
-    import Config
-  }
-
-  import Impl<C>
-}
-
-/** DEPRECATED: Use `Global` instead. */
-deprecated module Make<ConfigSig Config> implements GlobalFlowSig {
-  import Global<Config>
-}
-
-/**
- * Constructs a global data flow computation using flow state.
- */
-module GlobalWithState<StateConfigSig Config> implements GlobalFlowSig {
-  private module C implements FullStateConfigSig {
-    import Config
-  }
-
-  import Impl<C>
-}
-
-/** DEPRECATED: Use `GlobalWithState` instead. */
-deprecated module MakeWithState<StateConfigSig Config> implements GlobalFlowSig {
-  import GlobalWithState<Config>
-}
-
-signature class PathNodeSig {
-  /** Gets a textual representation of this element. */
-  string toString();
-
-  /**
-   * Holds if this element is at the specified location.
-   * The location spans column `startcolumn` of line `startline` to
-   * column `endcolumn` of line `endline` in file `filepath`.
-   * For more information, see
-   * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
-   */
-  predicate hasLocationInfo(
-    string filepath, int startline, int startcolumn, int endline, int endcolumn
-  );
-
-  /** Gets the underlying `Node`. */
-  Node getNode();
-}
-
-signature module PathGraphSig<PathNodeSig PathNode> {
-  /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
-  predicate edges(PathNode a, PathNode b);
-
-  /** Holds if `n` is a node in the graph of data flow path explanations. */
-  predicate nodes(PathNode n, string key, string val);
-
-  /**
-   * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
-   * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
-   * `ret -> out` is summarized as the edge `arg -> out`.
-   */
-  predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out);
-}
-
-/**
- * Constructs a `PathGraph` from two `PathGraph`s by disjoint union.
- */
-module MergePathGraph<
-  PathNodeSig PathNode1, PathNodeSig PathNode2, PathGraphSig<PathNode1> Graph1,
-  PathGraphSig<PathNode2> Graph2>
-{
-  private newtype TPathNode =
-    TPathNode1(PathNode1 p) or
-    TPathNode2(PathNode2 p)
-
-  /** A node in a graph of path explanations that is formed by disjoint union of the two given graphs. */
-  class PathNode extends TPathNode {
-    /** Gets this as a projection on the first given `PathGraph`. */
-    PathNode1 asPathNode1() { this = TPathNode1(result) }
-
-    /** Gets this as a projection on the second given `PathGraph`. */
-    PathNode2 asPathNode2() { this = TPathNode2(result) }
-
-    /** Gets a textual representation of this element. */
-    string toString() {
-      result = this.asPathNode1().toString() or
-      result = this.asPathNode2().toString()
-    }
-
-    /**
-     * Holds if this element is at the specified location.
-     * The location spans column `startcolumn` of line `startline` to
-     * column `endcolumn` of line `endline` in file `filepath`.
-     * For more information, see
-     * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
-     */
-    predicate hasLocationInfo(
-      string filepath, int startline, int startcolumn, int endline, int endcolumn
-    ) {
-      this.asPathNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) or
-      this.asPathNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
-    }
-
-    /** Gets the underlying `Node`. */
-    Node getNode() {
-      result = this.asPathNode1().getNode() or
-      result = this.asPathNode2().getNode()
-    }
-  }
-
-  /**
-   * Provides the query predicates needed to include a graph in a path-problem query.
-   */
-  module PathGraph implements PathGraphSig<PathNode> {
-    /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
-    query predicate edges(PathNode a, PathNode b) {
-      Graph1::edges(a.asPathNode1(), b.asPathNode1()) or
-      Graph2::edges(a.asPathNode2(), b.asPathNode2())
-    }
-
-    /** Holds if `n` is a node in the graph of data flow path explanations. */
-    query predicate nodes(PathNode n, string key, string val) {
-      Graph1::nodes(n.asPathNode1(), key, val) or
-      Graph2::nodes(n.asPathNode2(), key, val)
-    }
-
-    /**
-     * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
-     * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
-     * `ret -> out` is summarized as the edge `arg -> out`.
-     */
-    query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
-      Graph1::subpaths(arg.asPathNode1(), par.asPathNode1(), ret.asPathNode1(), out.asPathNode1()) or
-      Graph2::subpaths(arg.asPathNode2(), par.asPathNode2(), ret.asPathNode2(), out.asPathNode2())
-    }
-  }
-}
-
-/**
- * Constructs a `PathGraph` from three `PathGraph`s by disjoint union.
- */
-module MergePathGraph3<
-  PathNodeSig PathNode1, PathNodeSig PathNode2, PathNodeSig PathNode3,
-  PathGraphSig<PathNode1> Graph1, PathGraphSig<PathNode2> Graph2, PathGraphSig<PathNode3> Graph3>
-{
-  private module MergedInner = MergePathGraph<PathNode1, PathNode2, Graph1, Graph2>;
-
-  private module Merged =
-    MergePathGraph<MergedInner::PathNode, PathNode3, MergedInner::PathGraph, Graph3>;
-
-  /** A node in a graph of path explanations that is formed by disjoint union of the three given graphs. */
-  class PathNode instanceof Merged::PathNode {
-    /** Gets this as a projection on the first given `PathGraph`. */
-    PathNode1 asPathNode1() { result = super.asPathNode1().asPathNode1() }
-
-    /** Gets this as a projection on the second given `PathGraph`. */
-    PathNode2 asPathNode2() { result = super.asPathNode1().asPathNode2() }
-
-    /** Gets this as a projection on the third given `PathGraph`. */
-    PathNode3 asPathNode3() { result = super.asPathNode2() }
-
-    /** Gets a textual representation of this element. */
-    string toString() { result = super.toString() }
-
-    /**
-     * Holds if this element is at the specified location.
-     * The location spans column `startcolumn` of line `startline` to
-     * column `endcolumn` of line `endline` in file `filepath`.
-     * For more information, see
-     * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
-     */
-    predicate hasLocationInfo(
-      string filepath, int startline, int startcolumn, int endline, int endcolumn
-    ) {
-      super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
-    }
-
-    /** Gets the underlying `Node`. */
-    Node getNode() { result = super.getNode() }
-  }
-
-  /**
-   * Provides the query predicates needed to include a graph in a path-problem query.
-   */
-  module PathGraph implements PathGraphSig<PathNode> {
-    /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
-    query predicate edges(PathNode a, PathNode b) { Merged::PathGraph::edges(a, b) }
-
-    /** Holds if `n` is a node in the graph of data flow path explanations. */
-    query predicate nodes(PathNode n, string key, string val) {
-      Merged::PathGraph::nodes(n, key, val)
-    }
-
-    /**
-     * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
-     * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
-     * `ret -> out` is summarized as the edge `arg -> out`.
-     */
-    query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
-      Merged::PathGraph::subpaths(arg, par, ret, out)
-    }
-  }
-}
--- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll
+++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll
@@ -9,7 +9,7 @@ private import DataFlowImplCommon as DataFlowImplCommon
 * Gets a function that might be called by `call`.
 */
 cached
-Function viableCallable(CallInstruction call) {
+DataFlowCallable viableCallable(DataFlowCall call) {
  DataFlowImplCommon::forceCachingInSameStage() and
  result = call.getStaticCallTarget()
  or
@@ -235,7 +235,7 @@ private predicate functionSignature(Function f, string qualifiedName, int nparam
 * Holds if the set of viable implementations that can be called by `call`
 * might be improved by knowing the call context.
 */
-predicate mayBenefitFromCallContext(CallInstruction call, Function f) {
+predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable f) {
  mayBenefitFromCallContext(call, f, _)
 }

@@ -259,7 +259,7 @@ private predicate mayBenefitFromCallContext(
 * Gets a viable dispatch target of `call` in the context `ctx`. This is
 * restricted to those `call`s for which a context might make a difference.
 */
-Function viableImplInCallContext(CallInstruction call, CallInstruction ctx) {
+DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
  result = viableCallable(call) and
  exists(int i, Function f |
    mayBenefitFromCallContext(pragma[only_bind_into](call), f, i) and
--- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll
+++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll
--- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll
+++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll
--- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll
+++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll
@@ -1,6 +1,9 @@
 /**
 * Provides IR-specific definitions for use in the data flow library.
 */
+
+private import codeql.dataflow.DataFlowParameter
+
 module Private {
  import DataFlowPrivate
  import DataFlowDispatch
@@ -9,3 +12,10 @@ module Private {
 module Public {
  import DataFlowUtil
 }
+
+module CppDataFlow implements DataFlowParameter {
+  import Private
+  import Public
+
+  Node exprNode(DataFlowExpr e) { result = Public::exprNode(e) }
+}
--- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll
+++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll
@@ -681,9 +681,7 @@ predicate storeStepImpl(Node node1, Content c, PostFieldUpdateNode node2, boolea
 * Thus, `node2` references an object with a field `f` that contains the
 * value of `node1`.
 */
-predicate storeStep(Node node1, Content c, PostFieldUpdateNode node2) {
-  storeStepImpl(node1, c, node2, _)
-}
+predicate storeStep(Node node1, ContentSet c, Node node2) { storeStepImpl(node1, c, node2, _) }

 /**
 * Holds if `operandFrom` flows to `operandTo` using a sequence of conversion-like
@@ -744,7 +742,7 @@ predicate nodeHasInstruction(Node node, Instruction instr, int indirectionIndex)
 * Thus, `node1` references an object with a field `f` whose value ends up in
 * `node2`.
 */
-predicate readStep(Node node1, Content c, Node node2) {
+predicate readStep(Node node1, ContentSet c, Node node2) {
  exists(FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2 |
    nodeHasOperand(node2, operand, indirectionIndex2) and
    // The `1` here matches the `node2.getIndirectionIndex() = 1` conjunct
@@ -767,7 +765,7 @@ predicate readStep(Node node1, Content c, Node node2) {
 /**
 * Holds if values stored inside content `c` are cleared at node `n`.
 */
-predicate clearsContent(Node n, Content c) {
+predicate clearsContent(Node n, ContentSet c) {
  n =
    any(PostUpdateNode pun, Content d | d.impliesClearOf(c) and storeStepImpl(_, d, pun, true) | pun)
        .getPreUpdateNode() and
@@ -792,7 +790,7 @@ predicate clearsContent(Node n, Content c) {
      storeStepImpl(_, d, pun, true) and
      pun.getPreUpdateNode() = n
    |
-      c.getIndirectionIndex() = d.getIndirectionIndex()
+      c.(Content).getIndirectionIndex() = d.getIndirectionIndex()
    )
  )
 }
@@ -833,12 +831,6 @@ class CastNode extends Node {
  CastNode() { none() } // stub implementation
 }

-/**
- * Holds if `n` should never be skipped over in the `PathGraph` and in path
- * explanations.
- */
-predicate neverSkipInPathGraph(Node n) { none() }
-
 /**
 * A function that may contain code or a variable that may contain itself. When
 * flow crosses from one _enclosing callable_ to another, the interprocedural
@@ -853,7 +845,7 @@ class DataFlowType = Type;

 /** A function call relevant for data flow. */
 class DataFlowCall extends CallInstruction {
-  Function getEnclosingCallable() { result = this.getEnclosingFunction() }
+  DataFlowCallable getEnclosingCallable() { result = this.getEnclosingFunction() }
 }

 module IsUnreachableInCall {
@@ -924,8 +916,6 @@ module IsUnreachableInCall {

 import IsUnreachableInCall

-int accessPathLimit() { result = 5 }
-
 /**
 * Holds if access paths with `c` at their head always should be tracked at high
 * precision. This disables adaptive access path precision for such access paths.