Merge pull request #13863 from aschackmull/dataflow/pack4

Dataflow: Move the shared library to a properly shared qlpack.
2026-04-23 07:45:17 +02:00 · 2023-08-02 14:19:49 +02:00
parent 7bea18773c 1ad51e754e
commit 7bc8bf616f
72 changed files with 7177 additions and 53563 deletions
--- a/java/ql/lib/qlpack.yml
+++ b/java/ql/lib/qlpack.yml
@@ -6,6 +6,7 @@ extractor: java
 library: true
 upgrades: upgrades
 dependencies:
+  codeql/dataflow: ${workspace}
  codeql/mad: ${workspace}
  codeql/regex: ${workspace}
  codeql/tutorial: ${workspace}
--- a/java/ql/lib/semmle/code/java/Type.qll
+++ b/java/ql/lib/semmle/code/java/Type.qll
@@ -1261,6 +1261,7 @@ predicate notHaveIntersection(RefType t1, RefType t2) {
 * Holds if there is a common (reflexive, transitive) subtype of the erased
 * types `t1` and `t2`.
 */
+pragma[nomagic]
 predicate erasedHaveIntersection(RefType t1, RefType t2) {
  exists(SrcRefType commonSub |
    commonSub.getASourceSupertype*() = t1 and commonSub.getASourceSupertype*() = t2
--- a/java/ql/lib/semmle/code/java/dataflow/DataFlow.qll
+++ b/java/ql/lib/semmle/code/java/dataflow/DataFlow.qll
@@ -6,6 +6,8 @@
 import java

 module DataFlow {
-  import semmle.code.java.dataflow.internal.DataFlow
+  private import semmle.code.java.dataflow.internal.DataFlowImplSpecific
+  private import codeql.dataflow.DataFlow
+  import DataFlowMake<JavaDataFlow>
  import semmle.code.java.dataflow.internal.DataFlowImpl1
 }
--- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlow.qll
+++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlow.qll
@@ -1,450 +0,0 @@
-/**
- * Provides an implementation of global (interprocedural) data flow. This file
- * re-exports the local (intraprocedural) data flow analysis from
- * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
- * through the `Global` and `GlobalWithState` modules.
- */
-
-private import DataFlowImplCommon
-private import DataFlowImplSpecific::Private
-import DataFlowImplSpecific::Public
-import DataFlowImplCommonPublic
-private import DataFlowImpl
-
-/** An input configuration for data flow. */
-signature module ConfigSig {
-  /**
-   * Holds if `source` is a relevant data flow source.
-   */
-  predicate isSource(Node source);
-
-  /**
-   * Holds if `sink` is a relevant data flow sink.
-   */
-  predicate isSink(Node sink);
-
-  /**
-   * Holds if data flow through `node` is prohibited. This completely removes
-   * `node` from the data flow graph.
-   */
-  default predicate isBarrier(Node node) { none() }
-
-  /** Holds if data flow into `node` is prohibited. */
-  default predicate isBarrierIn(Node node) { none() }
-
-  /** Holds if data flow out of `node` is prohibited. */
-  default predicate isBarrierOut(Node node) { none() }
-
-  /**
-   * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
-   */
-  default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
-
-  /**
-   * Holds if an arbitrary number of implicit read steps of content `c` may be
-   * taken at `node`.
-   */
-  default predicate allowImplicitRead(Node node, ContentSet c) { none() }
-
-  /**
-   * Holds if `node` should never be skipped over in the `PathGraph` and in path
-   * explanations.
-   */
-  default predicate neverSkip(Node node) {
-    isAdditionalFlowStep(node, _) or isAdditionalFlowStep(_, node)
-  }
-
-  /**
-   * Gets the virtual dispatch branching limit when calculating field flow.
-   * This can be overridden to a smaller value to improve performance (a
-   * value of 0 disables field flow), or a larger value to get more results.
-   */
-  default int fieldFlowBranchLimit() { result = 2 }
-
-  /**
-   * Gets a data flow configuration feature to add restrictions to the set of
-   * valid flow paths.
-   *
-   * - `FeatureHasSourceCallContext`:
-   *    Assume that sources have some existing call context to disallow
-   *    conflicting return-flow directly following the source.
-   * - `FeatureHasSinkCallContext`:
-   *    Assume that sinks have some existing call context to disallow
-   *    conflicting argument-to-parameter flow directly preceding the sink.
-   * - `FeatureEqualSourceSinkCallContext`:
-   *    Implies both of the above and additionally ensures that the entire flow
-   *    path preserves the call context.
-   *
-   * These features are generally not relevant for typical end-to-end data flow
-   * queries, but should only be used for constructing paths that need to
-   * somehow be pluggable in another path context.
-   */
-  default FlowFeature getAFeature() { none() }
-
-  /** Holds if sources should be grouped in the result of `flowPath`. */
-  default predicate sourceGrouping(Node source, string sourceGroup) { none() }
-
-  /** Holds if sinks should be grouped in the result of `flowPath`. */
-  default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
-
-  /**
-   * Holds if hidden nodes should be included in the data flow graph.
-   *
-   * This feature should only be used for debugging or when the data flow graph
-   * is not visualized (as it is in a `path-problem` query).
-   */
-  default predicate includeHiddenNodes() { none() }
-}
-
-/** An input configuration for data flow using flow state. */
-signature module StateConfigSig {
-  bindingset[this]
-  class FlowState;
-
-  /**
-   * Holds if `source` is a relevant data flow source with the given initial
-   * `state`.
-   */
-  predicate isSource(Node source, FlowState state);
-
-  /**
-   * Holds if `sink` is a relevant data flow sink accepting `state`.
-   */
-  predicate isSink(Node sink, FlowState state);
-
-  /**
-   * Holds if data flow through `node` is prohibited. This completely removes
-   * `node` from the data flow graph.
-   */
-  default predicate isBarrier(Node node) { none() }
-
-  /**
-   * Holds if data flow through `node` is prohibited when the flow state is
-   * `state`.
-   */
-  default predicate isBarrier(Node node, FlowState state) { none() }
-
-  /** Holds if data flow into `node` is prohibited. */
-  default predicate isBarrierIn(Node node) { none() }
-
-  /** Holds if data flow out of `node` is prohibited. */
-  default predicate isBarrierOut(Node node) { none() }
-
-  /**
-   * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
-   */
-  default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
-
-  /**
-   * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
-   * This step is only applicable in `state1` and updates the flow state to `state2`.
-   */
-  default predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) {
-    none()
-  }
-
-  /**
-   * Holds if an arbitrary number of implicit read steps of content `c` may be
-   * taken at `node`.
-   */
-  default predicate allowImplicitRead(Node node, ContentSet c) { none() }
-
-  /**
-   * Holds if `node` should never be skipped over in the `PathGraph` and in path
-   * explanations.
-   */
-  default predicate neverSkip(Node node) {
-    isAdditionalFlowStep(node, _) or
-    isAdditionalFlowStep(_, node) or
-    isAdditionalFlowStep(node, _, _, _) or
-    isAdditionalFlowStep(_, _, node, _)
-  }
-
-  /**
-   * Gets the virtual dispatch branching limit when calculating field flow.
-   * This can be overridden to a smaller value to improve performance (a
-   * value of 0 disables field flow), or a larger value to get more results.
-   */
-  default int fieldFlowBranchLimit() { result = 2 }
-
-  /**
-   * Gets a data flow configuration feature to add restrictions to the set of
-   * valid flow paths.
-   *
-   * - `FeatureHasSourceCallContext`:
-   *    Assume that sources have some existing call context to disallow
-   *    conflicting return-flow directly following the source.
-   * - `FeatureHasSinkCallContext`:
-   *    Assume that sinks have some existing call context to disallow
-   *    conflicting argument-to-parameter flow directly preceding the sink.
-   * - `FeatureEqualSourceSinkCallContext`:
-   *    Implies both of the above and additionally ensures that the entire flow
-   *    path preserves the call context.
-   *
-   * These features are generally not relevant for typical end-to-end data flow
-   * queries, but should only be used for constructing paths that need to
-   * somehow be pluggable in another path context.
-   */
-  default FlowFeature getAFeature() { none() }
-
-  /** Holds if sources should be grouped in the result of `flowPath`. */
-  default predicate sourceGrouping(Node source, string sourceGroup) { none() }
-
-  /** Holds if sinks should be grouped in the result of `flowPath`. */
-  default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
-
-  /**
-   * Holds if hidden nodes should be included in the data flow graph.
-   *
-   * This feature should only be used for debugging or when the data flow graph
-   * is not visualized (as it is in a `path-problem` query).
-   */
-  default predicate includeHiddenNodes() { none() }
-}
-
-/**
- * Gets the exploration limit for `partialFlow` and `partialFlowRev`
- * measured in approximate number of interprocedural steps.
- */
-signature int explorationLimitSig();
-
-/**
- * The output of a global data flow computation.
- */
-signature module GlobalFlowSig {
-  /**
-   * A `Node` augmented with a call context (except for sinks) and an access path.
-   * Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
-   */
-  class PathNode;
-
-  /**
-   * Holds if data can flow from `source` to `sink`.
-   *
-   * The corresponding paths are generated from the end-points and the graph
-   * included in the module `PathGraph`.
-   */
-  predicate flowPath(PathNode source, PathNode sink);
-
-  /**
-   * Holds if data can flow from `source` to `sink`.
-   */
-  predicate flow(Node source, Node sink);
-
-  /**
-   * Holds if data can flow from some source to `sink`.
-   */
-  predicate flowTo(Node sink);
-
-  /**
-   * Holds if data can flow from some source to `sink`.
-   */
-  predicate flowToExpr(DataFlowExpr sink);
-}
-
-/**
- * Constructs a global data flow computation.
- */
-module Global<ConfigSig Config> implements GlobalFlowSig {
-  private module C implements FullStateConfigSig {
-    import DefaultState<Config>
-    import Config
-  }
-
-  import Impl<C>
-}
-
-/** DEPRECATED: Use `Global` instead. */
-deprecated module Make<ConfigSig Config> implements GlobalFlowSig {
-  import Global<Config>
-}
-
-/**
- * Constructs a global data flow computation using flow state.
- */
-module GlobalWithState<StateConfigSig Config> implements GlobalFlowSig {
-  private module C implements FullStateConfigSig {
-    import Config
-  }
-
-  import Impl<C>
-}
-
-/** DEPRECATED: Use `GlobalWithState` instead. */
-deprecated module MakeWithState<StateConfigSig Config> implements GlobalFlowSig {
-  import GlobalWithState<Config>
-}
-
-signature class PathNodeSig {
-  /** Gets a textual representation of this element. */
-  string toString();
-
-  /**
-   * Holds if this element is at the specified location.
-   * The location spans column `startcolumn` of line `startline` to
-   * column `endcolumn` of line `endline` in file `filepath`.
-   * For more information, see
-   * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
-   */
-  predicate hasLocationInfo(
-    string filepath, int startline, int startcolumn, int endline, int endcolumn
-  );
-
-  /** Gets the underlying `Node`. */
-  Node getNode();
-}
-
-signature module PathGraphSig<PathNodeSig PathNode> {
-  /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
-  predicate edges(PathNode a, PathNode b);
-
-  /** Holds if `n` is a node in the graph of data flow path explanations. */
-  predicate nodes(PathNode n, string key, string val);
-
-  /**
-   * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
-   * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
-   * `ret -> out` is summarized as the edge `arg -> out`.
-   */
-  predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out);
-}
-
-/**
- * Constructs a `PathGraph` from two `PathGraph`s by disjoint union.
- */
-module MergePathGraph<
-  PathNodeSig PathNode1, PathNodeSig PathNode2, PathGraphSig<PathNode1> Graph1,
-  PathGraphSig<PathNode2> Graph2>
-{
-  private newtype TPathNode =
-    TPathNode1(PathNode1 p) or
-    TPathNode2(PathNode2 p)
-
-  /** A node in a graph of path explanations that is formed by disjoint union of the two given graphs. */
-  class PathNode extends TPathNode {
-    /** Gets this as a projection on the first given `PathGraph`. */
-    PathNode1 asPathNode1() { this = TPathNode1(result) }
-
-    /** Gets this as a projection on the second given `PathGraph`. */
-    PathNode2 asPathNode2() { this = TPathNode2(result) }
-
-    /** Gets a textual representation of this element. */
-    string toString() {
-      result = this.asPathNode1().toString() or
-      result = this.asPathNode2().toString()
-    }
-
-    /**
-     * Holds if this element is at the specified location.
-     * The location spans column `startcolumn` of line `startline` to
-     * column `endcolumn` of line `endline` in file `filepath`.
-     * For more information, see
-     * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
-     */
-    predicate hasLocationInfo(
-      string filepath, int startline, int startcolumn, int endline, int endcolumn
-    ) {
-      this.asPathNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) or
-      this.asPathNode2().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
-    }
-
-    /** Gets the underlying `Node`. */
-    Node getNode() {
-      result = this.asPathNode1().getNode() or
-      result = this.asPathNode2().getNode()
-    }
-  }
-
-  /**
-   * Provides the query predicates needed to include a graph in a path-problem query.
-   */
-  module PathGraph implements PathGraphSig<PathNode> {
-    /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
-    query predicate edges(PathNode a, PathNode b) {
-      Graph1::edges(a.asPathNode1(), b.asPathNode1()) or
-      Graph2::edges(a.asPathNode2(), b.asPathNode2())
-    }
-
-    /** Holds if `n` is a node in the graph of data flow path explanations. */
-    query predicate nodes(PathNode n, string key, string val) {
-      Graph1::nodes(n.asPathNode1(), key, val) or
-      Graph2::nodes(n.asPathNode2(), key, val)
-    }
-
-    /**
-     * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
-     * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
-     * `ret -> out` is summarized as the edge `arg -> out`.
-     */
-    query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
-      Graph1::subpaths(arg.asPathNode1(), par.asPathNode1(), ret.asPathNode1(), out.asPathNode1()) or
-      Graph2::subpaths(arg.asPathNode2(), par.asPathNode2(), ret.asPathNode2(), out.asPathNode2())
-    }
-  }
-}
-
-/**
- * Constructs a `PathGraph` from three `PathGraph`s by disjoint union.
- */
-module MergePathGraph3<
-  PathNodeSig PathNode1, PathNodeSig PathNode2, PathNodeSig PathNode3,
-  PathGraphSig<PathNode1> Graph1, PathGraphSig<PathNode2> Graph2, PathGraphSig<PathNode3> Graph3>
-{
-  private module MergedInner = MergePathGraph<PathNode1, PathNode2, Graph1, Graph2>;
-
-  private module Merged =
-    MergePathGraph<MergedInner::PathNode, PathNode3, MergedInner::PathGraph, Graph3>;
-
-  /** A node in a graph of path explanations that is formed by disjoint union of the three given graphs. */
-  class PathNode instanceof Merged::PathNode {
-    /** Gets this as a projection on the first given `PathGraph`. */
-    PathNode1 asPathNode1() { result = super.asPathNode1().asPathNode1() }
-
-    /** Gets this as a projection on the second given `PathGraph`. */
-    PathNode2 asPathNode2() { result = super.asPathNode1().asPathNode2() }
-
-    /** Gets this as a projection on the third given `PathGraph`. */
-    PathNode3 asPathNode3() { result = super.asPathNode2() }
-
-    /** Gets a textual representation of this element. */
-    string toString() { result = super.toString() }
-
-    /**
-     * Holds if this element is at the specified location.
-     * The location spans column `startcolumn` of line `startline` to
-     * column `endcolumn` of line `endline` in file `filepath`.
-     * For more information, see
-     * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
-     */
-    predicate hasLocationInfo(
-      string filepath, int startline, int startcolumn, int endline, int endcolumn
-    ) {
-      super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
-    }
-
-    /** Gets the underlying `Node`. */
-    Node getNode() { result = super.getNode() }
-  }
-
-  /**
-   * Provides the query predicates needed to include a graph in a path-problem query.
-   */
-  module PathGraph implements PathGraphSig<PathNode> {
-    /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
-    query predicate edges(PathNode a, PathNode b) { Merged::PathGraph::edges(a, b) }
-
-    /** Holds if `n` is a node in the graph of data flow path explanations. */
-    query predicate nodes(PathNode n, string key, string val) {
-      Merged::PathGraph::nodes(n, key, val)
-    }
-
-    /**
-     * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through
-     * a subpath between `par` and `ret` with the connecting edges `arg -> par` and
-     * `ret -> out` is summarized as the edge `arg -> out`.
-     */
-    query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
-      Merged::PathGraph::subpaths(arg, par, ret, out)
-    }
-  }
-}
--- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImpl.qll
+++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImpl.qll
--- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplCommon.qll
+++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplCommon.qll
--- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll
+++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll
@@ -1,6 +1,9 @@
 /**
 * Provides Java-specific definitions for use in the data flow library.
 */
+
+private import codeql.dataflow.DataFlowParameter
+
 module Private {
  import DataFlowPrivate
  import DataFlowDispatch
@@ -9,3 +12,10 @@ module Private {
 module Public {
  import DataFlowUtil
 }
+
+module JavaDataFlow implements DataFlowParameter {
+  import Private
+  import Public
+
+  Node exprNode(DataFlowExpr e) { result = Public::exprNode(e) }
+}
--- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll
+++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll
@@ -106,7 +106,7 @@ private predicate instanceFieldAssign(Expr src, FieldAccess fa) {
 * Thus, `node2` references an object with a field `f` that contains the
 * value of `node1`.
 */
-predicate storeStep(Node node1, Content f, Node node2) {
+predicate storeStep(Node node1, ContentSet f, Node node2) {
  exists(FieldAccess fa |
    instanceFieldAssign(node1.asExpr(), fa) and
    node2.(PostUpdateNode).getPreUpdateNode() = getFieldQualifier(fa) and
@@ -124,7 +124,7 @@ predicate storeStep(Node node1, Content f, Node node2) {
 * Thus, `node1` references an object with a field `f` whose value ends up in
 * `node2`.
 */
-predicate readStep(Node node1, Content f, Node node2) {
+predicate readStep(Node node1, ContentSet f, Node node2) {
  exists(FieldRead fr |
    node1 = getFieldQualifier(fr) and
    fr.getField() = f.(FieldContent).getField() and
@@ -156,7 +156,7 @@ predicate readStep(Node node1, Content f, Node node2) {
 * any value stored inside `f` is cleared at the pre-update node associated with `x`
 * in `x.f = newValue`.
 */
-predicate clearsContent(Node n, Content c) {
+predicate clearsContent(Node n, ContentSet c) {
  exists(FieldAccess fa |
    instanceFieldAssign(_, fa) and
    n = getFieldQualifier(fa) and
@@ -207,47 +207,25 @@ DataFlowType getNodeType(Node n) {
 }

 /** Gets a string representation of a type returned by `getErasedRepr`. */
-string ppReprType(Type t) {
+string ppReprType(DataFlowType t) {
  if t.(BoxedType).getPrimitiveType().getName() = "double"
  then result = "Number"
  else result = t.toString()
 }

-private predicate canContainBool(Type t) {
-  t instanceof BooleanType or
-  any(BooleanType b).(RefType).getASourceSupertype+() = t
-}
-
 /**
 * Holds if `t1` and `t2` are compatible, that is, whether data can flow from
 * a node of type `t1` to a node of type `t2`.
 */
-pragma[inline]
-predicate compatibleTypes(Type t1, Type t2) {
-  exists(Type e1, Type e2 |
-    e1 = getErasedRepr(t1) and
-    e2 = getErasedRepr(t2)
-  |
-    // Because of `getErasedRepr`, `erasedHaveIntersection` is a sufficient
-    // compatibility check, but `conContainBool` is kept as a dummy disjunct
-    // to get the proper join-order.
-    erasedHaveIntersection(e1, e2)
-    or
-    canContainBool(e1) and canContainBool(e2)
-  )
-}
+bindingset[t1, t2]
+pragma[inline_late]
+predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { erasedHaveIntersection(t1, t2) }

 /** A node that performs a type cast. */
 class CastNode extends ExprNode {
  CastNode() { this.getExpr() instanceof CastingExpr }
 }

-/**
- * Holds if `n` should never be skipped over in the `PathGraph` and in path
- * explanations.
- */
-predicate neverSkipInPathGraph(Node n) { none() }
-
 private newtype TDataFlowCallable =
  TSrcCallable(Callable c) or
  TSummarizedCallable(SummarizedCallable c) or
@@ -381,8 +359,6 @@ predicate isUnreachableInCall(Node n, DataFlowCall call) {
  )
 }

-int accessPathLimit() { result = 5 }
-
 /**
 * Holds if access paths with `c` at their head always should be tracked at high
 * precision. This disables adaptive access path precision for such access paths.
--- a/java/ql/src/Telemetry/AutomodelAlertSinkUtil.qll
+++ b/java/ql/src/Telemetry/AutomodelAlertSinkUtil.qll
@@ -1,6 +1,5 @@
 private import java
 private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
-private import semmle.code.java.dataflow.internal.DataFlow
 private import semmle.code.java.dataflow.TaintTracking
 private import semmle.code.java.security.RequestForgeryConfig
 private import semmle.code.java.security.CommandLineQuery