From e9a029cba32de0f058fd968e6e668a3b24d9c1ab Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Wed, 21 Aug 2019 10:19:31 +0200 Subject: [PATCH] C++: Local field flow using global library This commit removes fields from the responsibilities of `FlowVar.qll`. The treatment of fields in that file was slow and imprecise. It then adds another copy of the shared global data flow library, used only to find local field flow, and it exposes that local field flow through `localFlow` and `localFlowStep`. This has a performance cost. It adds two cached stages to any query that uses `localFlow`: the stage from `DataFlowImplCommon`, which is shared with all queries that use global data flow, and a new stage just for `localFlowStep`. --- config/identical-files.json | 1 + .../dataflow/internal/DataFlowImplLocal.qll | 2369 +++++++++++++++++ .../cpp/dataflow/internal/DataFlowUtil.qll | 51 + .../code/cpp/dataflow/internal/FlowVar.qll | 1 + .../dataflow/dataflow-tests/test.cpp | 2 +- .../dataflow/dataflow-tests/test.expected | 1 - .../dataflow-tests/test_diff.expected | 1 - .../dataflow/fields/aliasing.cpp | 8 +- .../dataflow/fields/flow.expected | 9 - 9 files changed, 2427 insertions(+), 16 deletions(-) create mode 100644 cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImplLocal.qll diff --git a/config/identical-files.json b/config/identical-files.json index f42bf41ccba..63f4945680b 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -9,6 +9,7 @@ "cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl2.qll", "cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl3.qll", "cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl4.qll", + "cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImplLocal.qll", "cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll", "cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll", "cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll", diff --git a/cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImplLocal.qll b/cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImplLocal.qll new file mode 100644 index 00000000000..11447e1cb61 --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImplLocal.qll @@ -0,0 +1,2369 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from + * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed + * through the `Configuration` class. This file exists in several identical + * copies, allowing queries to use multiple `Configuration` classes that depend + * on each other without introducing mutual recursion among those configurations. + */ + +private import DataFlowImplCommon +private import DataFlowImplSpecific::Private +import DataFlowImplSpecific::Public + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ``` + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and + * the edges are those data-flow steps that preserve the value of the node + * along with any additional edges defined by `isAdditionalFlowStep`. + * Specifying nodes in `isBarrier` will remove those nodes from the graph, and + * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going + * and/or out-going edges from those nodes, respectively. + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ``` + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + abstract predicate isSource(Node source); + + /** + * Holds if `sink` is a relevant data flow sink. + */ + abstract predicate isSink(Node sink); + + /** + * Holds if data flow through `node` is prohibited. This completely removes + * `node` from the data flow graph. + */ + predicate isBarrier(Node node) { none() } + + /** DEPRECATED: override `isBarrierIn` and `isBarrierOut` instead. */ + deprecated predicate isBarrierEdge(Node node1, Node node2) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrierIn(Node node) { none() } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isBarrierOut(Node node) { none() } + + /** Holds if data flow through nodes guarded by `guard` is prohibited. */ + predicate isBarrierGuard(BarrierGuard guard) { none() } + + /** + * Holds if the additional flow step from `node1` to `node2` must be taken + * into account in the analysis. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) } + + /** + * Gets the exploration limit for `hasPartialFlow` measured in approximate + * number of interprocedural steps. + */ + int explorationLimit() { none() } + + /** + * Holds if there is a partial data flow path from `source` to `node`. The + * approximate distance between `node` and the closest source is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards sink definitions. + * + * This predicate is intended for dataflow exploration and debugging and may + * perform poorly if the number of sources is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + */ + final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + partialFlow(source, node, this) and + dist = node.getSourceDistance() + } + + /** DEPRECATED: use `hasFlow` instead. */ + deprecated predicate hasFlowForward(Node source, Node sink) { hasFlow(source, sink) } + + /** DEPRECATED: use `hasFlow` instead. */ + deprecated predicate hasFlowBackward(Node source, Node sink) { hasFlow(source, sink) } +} + +/** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + */ +abstract private class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + super.hasFlow(source, sink) + } +} + +private predicate inBarrier(Node node, Configuration config) { + config.isBarrierIn(node) and + config.isSource(node) +} + +private predicate outBarrier(Node node, Configuration config) { + config.isBarrierOut(node) and + config.isSink(node) +} + +private predicate fullBarrier(Node node, Configuration config) { + config.isBarrier(node) + or + config.isBarrierIn(node) and + not config.isSource(node) + or + config.isBarrierOut(node) and + not config.isSink(node) + or + exists(BarrierGuard g | + config.isBarrierGuard(g) and + node = g.getAGuardedNode() + ) +} + +private class AdditionalFlowStepSource extends Node { + AdditionalFlowStepSource() { any(Configuration c).isAdditionalFlowStep(this, _) } +} + +pragma[noinline] +private predicate isAdditionalFlowStep( + AdditionalFlowStepSource node1, Node node2, DataFlowCallable callable1, Configuration config +) { + config.isAdditionalFlowStep(node1, node2) and + callable1 = node1.getEnclosingCallable() +} + +/** + * Holds if data can flow in one local step from `node1` to `node2`. + */ +private predicate localFlowStep(Node node1, Node node2, Configuration config) { + simpleLocalFlowStep(node1, node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(Node node1, Node node2, Configuration config) { + isAdditionalFlowStep(node1, node2, node2.getEnclosingCallable(), config) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. + */ +private predicate jumpStep(Node node1, Node node2, Configuration config) { + jumpStep(node1, node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(Node node1, Node node2, Configuration config) { + exists(DataFlowCallable callable1 | + isAdditionalFlowStep(node1, node2, callable1, config) and + node2.getEnclosingCallable() != callable1 and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) + ) +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +pragma[noinline] +private ReturnPosition viableReturnPos(DataFlowCall call, ReturnKind kind) { + viableImpl(call) = result.getCallable() and + kind = result.getKind() +} + +/** + * Holds if `node` is reachable from a source in the given configuration + * ignoring call contexts. + */ +private predicate nodeCandFwd1(Node node, Configuration config) { + not fullBarrier(node, config) and + ( + config.isSource(node) + or + exists(Node mid | + nodeCandFwd1(mid, config) and + localFlowStep(mid, node, config) + ) + or + exists(Node mid | + nodeCandFwd1(mid, config) and + additionalLocalFlowStep(mid, node, config) + ) + or + exists(Node mid | + nodeCandFwd1(mid, config) and + jumpStep(mid, node, config) + ) + or + exists(Node mid | + nodeCandFwd1(mid, config) and + additionalJumpStep(mid, node, config) + ) + or + // store + exists(Node mid | + useFieldFlow(config) and + nodeCandFwd1(mid, config) and + store(mid, _, node) and + not outBarrier(mid, config) + ) + or + // read + exists(Content f | + nodeCandFwd1Read(f, node, config) and + storeCandFwd1(f, config) and + not inBarrier(node, config) + ) + or + // flow into a callable + exists(Node arg | + nodeCandFwd1(arg, config) and + viableParamArg(_, node, arg) + ) + or + // flow out of an argument + exists(PostUpdateNode mid, ParameterNode p | + nodeCandFwd1(mid, config) and + parameterValueFlowsToUpdate(p, mid) and + viableParamArg(_, p, node.(PostUpdateNode).getPreUpdateNode()) + ) + or + // flow out of a callable + exists(DataFlowCall call, ReturnNode ret, ReturnKind kind | + nodeCandFwd1(ret, config) and + getReturnPosition(ret) = viableReturnPos(call, kind) and + node = getAnOutNode(call, kind) + ) + ) +} + +pragma[nomagic] +private predicate nodeCandFwd1Read(Content f, Node node, Configuration config) { + exists(Node mid | + nodeCandFwd1(mid, config) and + read(mid, f, node) + ) +} + +/** + * Holds if `f` is the target of a store in the flow covered by `nodeCandFwd1`. + */ +pragma[noinline] +private predicate storeCandFwd1(Content f, Configuration config) { + exists(Node mid, Node node | + not fullBarrier(node, config) and + useFieldFlow(config) and + nodeCandFwd1(mid, config) and + store(mid, f, node) + ) +} + +bindingset[result, b] +private boolean unbindBool(boolean b) { result != b.booleanNot() } + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration ignoring call contexts. + */ +pragma[nomagic] +private predicate nodeCand1(Node node, Configuration config) { + nodeCandFwd1(node, config) and + config.isSink(node) + or + nodeCandFwd1(node, unbind(config)) and + ( + exists(Node mid | + localFlowStep(node, mid, config) and + nodeCand1(mid, config) + ) + or + exists(Node mid | + additionalLocalFlowStep(node, mid, config) and + nodeCand1(mid, config) + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + nodeCand1(mid, config) + ) + or + exists(Node mid | + additionalJumpStep(node, mid, config) and + nodeCand1(mid, config) + ) + or + // store + exists(Content f | + nodeCand1Store(f, node, config) and + readCand1(f, config) + ) + or + // read + exists(Node mid, Content f | + read(node, f, mid) and + storeCandFwd1(f, unbind(config)) and + nodeCand1(mid, config) + ) + or + // flow into a callable + exists(Node param | + viableParamArg(_, param, node) and + nodeCand1(param, config) + ) + or + // flow out of an argument + exists(PostUpdateNode mid, ParameterNode p | + parameterValueFlowsToUpdate(p, node) and + viableParamArg(_, p, mid.getPreUpdateNode()) and + nodeCand1(mid, config) + ) + or + // flow out of a callable + exists(DataFlowCall call, ReturnKind kind, OutNode out | + nodeCand1(out, config) and + getReturnPosition(node) = viableReturnPos(call, kind) and + out = getAnOutNode(call, kind) + ) + ) +} + +/** + * Holds if `f` is the target of a read in the flow covered by `nodeCand1`. + */ +pragma[noinline] +private predicate readCand1(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd1(node, unbind(config)) and + read(node, f, mid) and + storeCandFwd1(f, unbind(config)) and + nodeCand1(mid, config) + ) +} + +pragma[nomagic] +private predicate nodeCand1Store(Content f, Node node, Configuration config) { + exists(Node mid | + nodeCand1(mid, config) and + storeCandFwd1(f, unbind(config)) and + store(node, f, mid) + ) +} + +private predicate throughFlowNodeCand(Node node, Configuration config) { + nodeCand1(node, config) and + not fullBarrier(node, config) and + not inBarrier(node, config) and + not outBarrier(node, config) +} + +/** + * Holds if there is a path from `p` to `node` in the same callable that is + * part of a path from a source to a sink taking simple call contexts into + * consideration. This is restricted to paths that do not necessarily + * preserve the value of `p` by making use of at least one additional step + * from the configuration. + */ +pragma[nomagic] +private predicate simpleParameterFlow( + ParameterNode p, Node node, DataFlowType t, Configuration config +) { + throughFlowNodeCand(node, config) and + p = node and + t = getErasedRepr(node.getType()) and + exists(ReturnNode ret, ReturnKind kind | + returnNodeGetEnclosingCallable(ret) = p.getEnclosingCallable() and + kind = ret.getKind() and + not parameterValueFlowsThrough(p, kind, _) + ) + or + throughFlowNodeCand(node, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, t, config) and + localFlowStep(mid, node, config) and + compatibleTypes(t, node.getType()) + ) + or + throughFlowNodeCand(node, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, _, config) and + additionalLocalFlowStep(mid, node, config) and + t = getErasedRepr(node.getType()) + ) + or + throughFlowNodeCand(node, unbind(config)) and + exists(Node mid | + simpleParameterFlow(p, mid, t, config) and + localStoreReadStep(mid, node) and + compatibleTypes(t, node.getType()) + ) + or + // value flow through a callable + throughFlowNodeCand(node, unbind(config)) and + exists(Node arg | + simpleParameterFlow(p, arg, t, config) and + argumentValueFlowsThrough(arg, node, _) and + compatibleTypes(t, node.getType()) + ) + or + // flow through a callable + throughFlowNodeCand(node, unbind(config)) and + exists(Node arg | + simpleParameterFlow(p, arg, _, config) and + simpleArgumentFlowsThrough(arg, node, t, config) + ) +} + +pragma[noinline] +private predicate simpleArgumentFlowsThrough0( + DataFlowCall call, ArgumentNode arg, ReturnKind kind, DataFlowType t, Configuration config +) { + nodeCand1(arg, unbind(config)) and + not outBarrier(arg, config) and + exists(ParameterNode p, ReturnNode ret | + simpleParameterFlow(p, ret, t, config) and + kind = ret.getKind() and + viableParamArg(call, p, arg) + ) +} + +/** + * Holds if data can flow from `arg` to `out` through a call, taking simple + * call contexts into consideration, and that this is part of a path from a + * source to a sink. This is restricted to paths through calls that do not + * necessarily preserve the value of `arg` by making use of at least one + * additional step from the configuration. + */ +private predicate simpleArgumentFlowsThrough( + ArgumentNode arg, Node out, DataFlowType t, Configuration config +) { + exists(DataFlowCall call, ReturnKind kind | + nodeCand1(out, unbind(config)) and + not inBarrier(out, config) and + simpleArgumentFlowsThrough0(call, arg, kind, t, config) and + out = getAnOutNode(call, kind) + ) +} + +/** + * Holds if data can flow from `node1` to `node2` in one local step or a step + * through a callable. + */ +pragma[noinline] +private predicate localFlowStepOrFlowThroughCallable(Node node1, Node node2, Configuration config) { + nodeCand1(node1, config) and + localFlowStep(node1, node2, config) + or + nodeCand1(node1, config) and + argumentValueFlowsThrough(node1, node2, _) +} + +/** + * Holds if data can flow from `node1` to `node2` in one local step or a step + * through a callable, in both cases using an additional flow step from the + * configuration. + */ +pragma[noinline] +private predicate additionalLocalFlowStepOrFlowThroughCallable( + Node node1, Node node2, Configuration config +) { + nodeCand1(node1, config) and + additionalLocalFlowStep(node1, node2, config) + or + simpleArgumentFlowsThrough(node1, node2, _, config) +} + +/** + * Holds if data can flow out of a callable from `node1` to `node2`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +private predicate flowOutOfCallable(Node node1, Node node2, Configuration config) { + nodeCand1(node1, unbind(config)) and + nodeCand1(node2, config) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + ( + // flow out of an argument + exists(ParameterNode p | + parameterValueFlowsToUpdate(p, node1) and + viableParamArg(_, p, node2.(PostUpdateNode).getPreUpdateNode()) + ) + or + // flow out of a callable + exists(DataFlowCall call, ReturnKind kind | + getReturnPosition(node1) = viableReturnPos(call, kind) and + node2 = getAnOutNode(call, kind) + ) + ) +} + +/** + * Holds if data can flow into a callable and that this step is part of a + * path from a source to a sink. + */ +private predicate flowIntoCallable(Node node1, Node node2, Configuration config) { + viableParamArg(_, node2, node1) and + nodeCand1(node1, unbind(config)) and + nodeCand1(node2, config) and + not outBarrier(node1, config) and + not inBarrier(node2, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(Node n1, Configuration conf) { + result = strictcount(Node n | flowOutOfCallable(n1, n, conf) or flowIntoCallable(n1, n, conf)) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(Node n2, Configuration conf) { + result = strictcount(Node n | flowOutOfCallable(n, n2, conf) or flowIntoCallable(n, n2, conf)) +} + +/** + * Holds if data can flow out of a callable from `node1` to `node2`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +private predicate flowOutOfCallable( + Node node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallable(node1, node2, config) and + exists(int b, int j | + b = branch(node1, config) and + j = join(node2, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into a callable and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +private predicate flowIntoCallable( + Node node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallable(node1, node2, config) and + exists(int b, int j | + b = branch(node1, config) and + j = join(node2, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration taking simple call contexts into consideration. + */ +private predicate nodeCandFwd2(Node node, boolean fromArg, boolean stored, Configuration config) { + nodeCand1(node, config) and + config.isSource(node) and + fromArg = false and + stored = false + or + nodeCand1(node, unbind(config)) and + ( + exists(Node mid | + nodeCandFwd2(mid, fromArg, stored, config) and + localFlowStepOrFlowThroughCallable(mid, node, config) + ) + or + exists(Node mid | + nodeCandFwd2(mid, fromArg, stored, config) and + additionalLocalFlowStepOrFlowThroughCallable(mid, node, config) and + stored = false + ) + or + exists(Node mid | + nodeCandFwd2(mid, _, stored, config) and + jumpStep(mid, node, config) and + fromArg = false + ) + or + exists(Node mid | + nodeCandFwd2(mid, _, stored, config) and + additionalJumpStep(mid, node, config) and + fromArg = false and + stored = false + ) + or + // store + exists(Node mid, Content f | + nodeCandFwd2(mid, fromArg, _, config) and + store(mid, f, node) and + readCand1(f, unbind(config)) and + stored = true + ) + or + // read + exists(Content f | + nodeCandFwd2Read(f, node, fromArg, config) and + storeCandFwd2(f, config) and + (stored = false or stored = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + nodeCandFwd2(mid, _, stored, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (stored = false or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + nodeCandFwd2(mid, false, stored, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (stored = false or allowsFieldFlow = true) + ) + ) +} + +/** + * Holds if `f` is the target of a store in the flow covered by `nodeCandFwd2`. + */ +pragma[noinline] +private predicate storeCandFwd2(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCand1(node, unbind(config)) and + nodeCandFwd2(mid, _, _, config) and + store(mid, f, node) and + readCand1(f, unbind(config)) + ) +} + +pragma[nomagic] +private predicate nodeCandFwd2Read(Content f, Node node, boolean fromArg, Configuration config) { + exists(Node mid | + nodeCandFwd2(mid, fromArg, true, config) and + read(mid, f, node) and + readCand1(f, unbind(config)) + ) +} + +/** + * Holds if `node` is part of a path from a source to a sink in the given + * configuration taking simple call contexts into consideration. + */ +private predicate nodeCand2(Node node, boolean toReturn, boolean stored, Configuration config) { + nodeCandFwd2(node, _, false, config) and + config.isSink(node) and + toReturn = false and + stored = false + or + nodeCandFwd2(node, _, unbindBool(stored), unbind(config)) and + ( + exists(Node mid | + localFlowStepOrFlowThroughCallable(node, mid, config) and + nodeCand2(mid, toReturn, stored, config) + ) + or + exists(Node mid | + additionalLocalFlowStepOrFlowThroughCallable(node, mid, config) and + nodeCand2(mid, toReturn, stored, config) and + stored = false + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + nodeCand2(mid, _, stored, config) and + toReturn = false + ) + or + exists(Node mid | + additionalJumpStep(node, mid, config) and + nodeCand2(mid, _, stored, config) and + toReturn = false and + stored = false + ) + or + // store + exists(Content f | + nodeCand2Store(f, node, toReturn, config) and + readCand2(f, config) and + (stored = false or stored = true) + ) + or + // read + exists(Node mid, Content f | + read(node, f, mid) and + storeCandFwd2(f, unbind(config)) and + nodeCand2(mid, toReturn, _, config) and + stored = true + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + nodeCand2(mid, false, stored, config) and + toReturn = false and + (stored = false or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + nodeCand2(mid, _, stored, config) and + toReturn = true and + (stored = false or allowsFieldFlow = true) + ) + ) +} + +/** + * Holds if `f` is the target of a read in the flow covered by `nodeCand2`. + */ +pragma[noinline] +private predicate readCand2(Content f, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd2(node, _, true, unbind(config)) and + read(node, f, mid) and + storeCandFwd2(f, unbind(config)) and + nodeCand2(mid, _, _, config) + ) +} + +pragma[noinline] +private predicate nodeCand2Store(Content f, Node node, boolean toReturn, Configuration config) { + exists(Node mid | + store(node, f, mid) and + nodeCand2(mid, toReturn, true, config) + ) +} + +pragma[nomagic] +private predicate storeCand(Content f, Configuration conf) { + exists(Node node | + nodeCand2Store(f, node, _, conf) and + nodeCand2(node, _, _, conf) + ) +} + +/** + * Holds if `f` is the target of both a store and a read in the path graph + * covered by `nodeCand2`. + */ +pragma[noinline] +private predicate readStoreCand(Content f, Configuration conf) { + storeCand(f, conf) and + readCand2(f, conf) +} + +private predicate nodeCand(Node node, Configuration config) { nodeCand2(node, _, _, config) } + +/** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ +private predicate localFlowEntry(Node node, Configuration config) { + nodeCand(node, config) and + ( + config.isSource(node) or + jumpStep(_, node, config) or + additionalJumpStep(_, node, config) or + node instanceof ParameterNode or + node instanceof OutNode or + node instanceof PostUpdateNode or + read(_, _, node) or + node instanceof CastNode + ) +} + +/** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ +private predicate localFlowExit(Node node, Configuration config) { + exists(Node next | nodeCand(next, config) | + jumpStep(node, next, config) or + additionalJumpStep(node, next, config) or + flowIntoCallable(node, next, config) or + flowOutOfCallable(node, next, config) or + simpleArgumentFlowsThrough(node, next, _, config) or + argumentValueFlowsThrough(node, next, _) or + store(node, _, next) or + read(node, _, next) + ) + or + node instanceof CastNode + or + config.isSink(node) +} + +/** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `[additional]localFlowStep` beginning + * at `localFlowEntry`. + */ +pragma[nomagic] +private predicate localFlowStepPlus( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + localFlowEntry(node1, config) and + ( + localFlowStep(node1, node2, config) and preservesValue = true + or + additionalLocalFlowStep(node1, node2, config) and preservesValue = false + ) and + node1 != node2 and + nodeCand(node2, unbind(config)) + or + exists(Node mid | + localFlowStepPlus(node1, mid, preservesValue, config) and + localFlowStep(mid, node2, config) and + not mid instanceof CastNode and + nodeCand(node2, unbind(config)) + ) + or + exists(Node mid | + localFlowStepPlus(node1, mid, _, config) and + additionalLocalFlowStep(mid, node2, config) and + not mid instanceof CastNode and + preservesValue = false and + nodeCand(node2, unbind(config)) + ) +} + +/** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ +pragma[noinline] +private predicate localFlowBigStep( + Node node1, Node node2, boolean preservesValue, Configuration config +) { + localFlowStepPlus(node1, node2, preservesValue, config) and + localFlowExit(node2, config) +} + +private newtype TAccessPathFront = + TFrontNil(DataFlowType t) or + TFrontHead(Content f) + +/** + * The front of an `AccessPath`. This is either a head or a nil. + */ +private class AccessPathFront extends TAccessPathFront { + string toString() { + exists(DataFlowType t | this = TFrontNil(t) | result = ppReprType(t)) + or + exists(Content f | this = TFrontHead(f) | result = f.toString()) + } + + DataFlowType getType() { + this = TFrontNil(result) + or + exists(Content head | this = TFrontHead(head) | result = head.getContainerType()) + } + + predicate headUsesContent(Content f) { this = TFrontHead(f) } +} + +private class AccessPathFrontNil extends AccessPathFront, TFrontNil { } + +/** + * A `Node` at which a cast can occur such that the type should be checked. + */ +private class CastingNode extends Node { + CastingNode() { + this instanceof ParameterNode or + this instanceof CastNode or + this instanceof OutNode or + this.(PostUpdateNode).getPreUpdateNode() instanceof ArgumentNode + } +} + +/** + * Holds if data can flow from a source to `node` with the given `apf`. + */ +private predicate flowCandFwd(Node node, boolean fromArg, AccessPathFront apf, Configuration config) { + flowCandFwd0(node, fromArg, apf, config) and + if node instanceof CastingNode then compatibleTypes(node.getType(), apf.getType()) else any() +} + +/** + * A node that requires an empty access path and should have its tracked type + * (re-)computed. This is either a source or a node reached through an + * additional step. + */ +private class AccessPathFrontNilNode extends Node { + AccessPathFrontNilNode() { + nodeCand(this, _) and + ( + any(Configuration c).isSource(this) + or + localFlowBigStep(_, this, false, _) + or + additionalJumpStep(_, this, _) + ) + } + + pragma[noinline] + private DataFlowType getErasedReprType() { result = getErasedRepr(this.getType()) } + + /** Gets the `nil` path front for this node. */ + AccessPathFrontNil getApf() { result = TFrontNil(this.getErasedReprType()) } +} + +private predicate flowCandFwd0(Node node, boolean fromArg, AccessPathFront apf, Configuration config) { + nodeCand2(node, _, false, config) and + config.isSource(node) and + fromArg = false and + apf = node.(AccessPathFrontNilNode).getApf() + or + nodeCand(node, unbind(config)) and + ( + exists(Node mid | + flowCandFwd(mid, fromArg, apf, config) and + localFlowBigStep(mid, node, true, config) + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(mid, fromArg, nil, config) and + localFlowBigStep(mid, node, false, config) and + apf = node.(AccessPathFrontNilNode).getApf() + ) + or + exists(Node mid | + flowCandFwd(mid, _, apf, config) and + jumpStep(mid, node, config) and + fromArg = false + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(mid, _, nil, config) and + additionalJumpStep(mid, node, config) and + fromArg = false and + apf = node.(AccessPathFrontNilNode).getApf() + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowCandFwd(mid, _, apf, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowCandFwd(mid, false, apf, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid | + flowCandFwd(mid, fromArg, apf, config) and + argumentValueFlowsThrough(mid, node, _) + ) + or + exists(Node mid, AccessPathFrontNil nil, DataFlowType t | + flowCandFwd(mid, fromArg, nil, config) and + simpleArgumentFlowsThrough(mid, node, t, config) and + apf = TFrontNil(t) + ) + ) + or + exists(Node mid, Content f | + flowCandFwd(mid, fromArg, _, config) and + store(mid, f, node) and + nodeCand(node, unbind(config)) and + apf.headUsesContent(f) + ) + or + exists(Content f | + flowCandFwdRead(f, node, fromArg, config) and + consCandFwd(f, apf, config) + ) +} + +pragma[noinline] +private predicate consCandFwd(Content f, AccessPathFront apf, Configuration config) { + exists(Node mid, Node n | + flowCandFwd(mid, _, apf, config) and + store(mid, f, n) and + nodeCand(n, unbind(config)) and + readStoreCand(f, unbind(config)) and + compatibleTypes(apf.getType(), f.getType()) + ) +} + +pragma[nomagic] +private predicate flowCandFwdRead(Content f, Node node, boolean fromArg, Configuration config) { + exists(Node mid, AccessPathFront apf | + flowCandFwd(mid, fromArg, apf, config) and + read(mid, f, node) and + apf.headUsesContent(f) and + nodeCand(node, unbind(config)) + ) +} + +/** + * Holds if data can flow from a source to `node` with the given `apf` and + * from there flow to a sink. + */ +private predicate flowCand(Node node, boolean toReturn, AccessPathFront apf, Configuration config) { + flowCand0(node, toReturn, apf, config) and + flowCandFwd(node, _, apf, config) +} + +private predicate flowCand0(Node node, boolean toReturn, AccessPathFront apf, Configuration config) { + flowCandFwd(node, _, apf, config) and + config.isSink(node) and + toReturn = false and + apf instanceof AccessPathFrontNil + or + exists(Node mid | + localFlowBigStep(node, mid, true, config) and + flowCand(mid, toReturn, apf, config) + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(node, _, apf, config) and + localFlowBigStep(node, mid, false, config) and + flowCand(mid, toReturn, nil, config) and + apf instanceof AccessPathFrontNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + flowCand(mid, _, apf, config) and + toReturn = false + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(node, _, apf, config) and + additionalJumpStep(node, mid, config) and + flowCand(mid, _, nil, config) and + toReturn = false and + apf instanceof AccessPathFrontNil + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + flowCand(mid, false, apf, config) and + toReturn = false and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + flowCand(mid, _, apf, config) and + toReturn = true and + (apf instanceof AccessPathFrontNil or allowsFieldFlow = true) + ) + or + exists(Node mid | + argumentValueFlowsThrough(node, mid, _) and + flowCand(mid, toReturn, apf, config) + ) + or + exists(Node mid, AccessPathFrontNil nil | + simpleArgumentFlowsThrough(node, mid, _, config) and + flowCand(mid, toReturn, nil, config) and + apf instanceof AccessPathFrontNil and + flowCandFwd(node, _, apf, config) + ) + or + exists(Content f, AccessPathFront apf0 | + flowCandStore(node, f, toReturn, apf0, config) and + apf0.headUsesContent(f) and + consCand(f, apf, unbind(config)) + ) + or + exists(Content f, AccessPathFront apf0 | + flowCandRead(node, f, toReturn, apf0, config) and + consCandFwd(f, apf0, unbind(config)) and + apf.headUsesContent(f) + ) +} + +pragma[nomagic] +private predicate flowCandRead( + Node node, Content f, boolean toReturn, AccessPathFront apf0, Configuration config +) { + exists(Node mid | + read(node, f, mid) and + flowCand(mid, toReturn, apf0, config) + ) +} + +pragma[nomagic] +private predicate flowCandStore( + Node node, Content f, boolean toReturn, AccessPathFront apf0, Configuration config +) { + exists(Node mid | + store(node, f, mid) and + flowCand(mid, toReturn, apf0, config) + ) +} + +pragma[noinline] +private predicate consCand(Content f, AccessPathFront apf, Configuration config) { + consCandFwd(f, apf, config) and + exists(Node n, AccessPathFront apf0 | + flowCandFwd(n, _, apf0, config) and + apf0.headUsesContent(f) and + flowCandRead(n, f, _, apf, config) + ) +} + +private newtype TAccessPath = + TNil(DataFlowType t) or + TCons(Content f, int len) { len in [1 .. 5] } + +/** + * Conceptually a list of `Content`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +private class AccessPath extends TAccessPath { + abstract string toString(); + + Content getHead() { this = TCons(result, _) } + + int len() { + this = TNil(_) and result = 0 + or + this = TCons(_, result) + } + + DataFlowType getType() { + this = TNil(result) + or + exists(Content head | this = TCons(head, _) | result = head.getContainerType()) + } + + abstract AccessPathFront getFront(); +} + +private class AccessPathNil extends AccessPath, TNil { + override string toString() { exists(DataFlowType t | this = TNil(t) | result = ppReprType(t)) } + + override AccessPathFront getFront() { + exists(DataFlowType t | this = TNil(t) | result = TFrontNil(t)) + } +} + +private class AccessPathCons extends AccessPath, TCons { + override string toString() { + exists(Content f, int len | this = TCons(f, len) | + result = f.toString() + ", ... (" + len.toString() + ")" + ) + } + + override AccessPathFront getFront() { + exists(Content f | this = TCons(f, _) | result = TFrontHead(f)) + } +} + +/** Holds if `ap0` corresponds to the cons of `f` and `ap`. */ +private predicate pop(AccessPath ap0, Content f, AccessPath ap) { + ap0.getFront().headUsesContent(f) and + consCand(f, ap.getFront(), _) and + ap0.len() = 1 + ap.len() +} + +/** Holds if `ap0` corresponds to the cons of `f` and `ap` and `apf` is the front of `ap`. */ +pragma[noinline] +private predicate popWithFront(AccessPath ap0, Content f, AccessPathFront apf, AccessPath ap) { + pop(ap0, f, ap) and apf = ap.getFront() +} + +/** Holds if `ap` corresponds to the cons of `f` and `ap0`. */ +private predicate push(AccessPath ap0, Content f, AccessPath ap) { pop(ap, f, ap0) } + +/** + * A node that requires an empty access path and should have its tracked type + * (re-)computed. This is either a source or a node reached through an + * additional step. + */ +private class AccessPathNilNode extends Node { + AccessPathNilNode() { flowCand(this.(AccessPathFrontNilNode), _, _, _) } + + pragma[noinline] + private DataFlowType getErasedReprType() { result = getErasedRepr(this.getType()) } + + /** Gets the `nil` path for this node. */ + AccessPathNil getAp() { result = TNil(this.getErasedReprType()) } +} + +/** + * Holds if data can flow from a source to `node` with the given `ap`. + */ +private predicate flowFwd( + Node node, boolean fromArg, AccessPathFront apf, AccessPath ap, Configuration config +) { + flowFwd0(node, fromArg, apf, ap, config) and + flowCand(node, _, apf, config) +} + +private predicate flowFwd0( + Node node, boolean fromArg, AccessPathFront apf, AccessPath ap, Configuration config +) { + flowCand(node, _, _, config) and + config.isSource(node) and + fromArg = false and + ap = node.(AccessPathNilNode).getAp() and + apf = ap.(AccessPathNil).getFront() + or + flowCand(node, _, _, unbind(config)) and + ( + exists(Node mid | + flowFwd(mid, fromArg, apf, ap, config) and + localFlowBigStep(mid, node, true, config) + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(mid, fromArg, _, nil, config) and + localFlowBigStep(mid, node, false, config) and + ap = node.(AccessPathNilNode).getAp() and + apf = ap.(AccessPathNil).getFront() + ) + or + exists(Node mid | + flowFwd(mid, _, apf, ap, config) and + jumpStep(mid, node, config) and + fromArg = false + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(mid, _, _, nil, config) and + additionalJumpStep(mid, node, config) and + fromArg = false and + ap = node.(AccessPathNilNode).getAp() and + apf = ap.(AccessPathNil).getFront() + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowFwd(mid, _, apf, ap, config) and + flowIntoCallable(mid, node, allowsFieldFlow, config) and + fromArg = true and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowFwd(mid, false, apf, ap, config) and + flowOutOfCallable(mid, node, allowsFieldFlow, config) and + fromArg = false and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid | + flowFwd(mid, fromArg, apf, ap, config) and + argumentValueFlowsThrough(mid, node, _) + ) + or + exists(Node mid, AccessPathNil nil, DataFlowType t | + flowFwd(mid, fromArg, _, nil, config) and + simpleArgumentFlowsThrough(mid, node, t, config) and + ap = TNil(t) and + apf = ap.(AccessPathNil).getFront() + ) + ) + or + exists(Content f, AccessPath ap0 | + flowFwdStore(node, f, ap0, apf, fromArg, config) and + push(ap0, f, ap) + ) + or + exists(Content f, AccessPath ap0 | + flowFwdRead(node, f, ap0, fromArg, config) and + popWithFront(ap0, f, apf, ap) + ) +} + +pragma[nomagic] +private predicate flowFwdStore( + Node node, Content f, AccessPath ap0, AccessPathFront apf, boolean fromArg, Configuration config +) { + exists(Node mid, AccessPathFront apf0 | + flowFwd(mid, fromArg, apf0, ap0, config) and + flowFwdStoreAux(mid, f, node, apf0, apf, config) + ) +} + +private predicate flowFwdStoreAux( + Node mid, Content f, Node node, AccessPathFront apf0, AccessPathFront apf, Configuration config +) { + store(mid, f, node) and + consCand(f, apf0, config) and + apf.headUsesContent(f) and + flowCand(node, _, apf, unbind(config)) +} + +pragma[nomagic] +private predicate flowFwdRead( + Node node, Content f, AccessPath ap0, boolean fromArg, Configuration config +) { + exists(Node mid, AccessPathFront apf0 | + flowFwd(mid, fromArg, apf0, ap0, config) and + read(mid, f, node) and + apf0.headUsesContent(f) and + flowCand(node, _, _, unbind(config)) + ) +} + +/** + * Holds if data can flow from a source to `node` with the given `ap` and + * from there flow to a sink. + */ +private predicate flow(Node node, boolean toReturn, AccessPath ap, Configuration config) { + flow0(node, toReturn, ap, config) and + flowFwd(node, _, _, ap, config) +} + +private predicate flow0(Node node, boolean toReturn, AccessPath ap, Configuration config) { + flowFwd(node, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + ap instanceof AccessPathNil + or + exists(Node mid | + localFlowBigStep(node, mid, true, config) and + flow(mid, toReturn, ap, config) + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(node, _, _, ap, config) and + localFlowBigStep(node, mid, false, config) and + flow(mid, toReturn, nil, config) and + ap instanceof AccessPathNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + flow(mid, _, ap, config) and + toReturn = false + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(node, _, _, ap, config) and + additionalJumpStep(node, mid, config) and + flow(mid, _, nil, config) and + toReturn = false and + ap instanceof AccessPathNil + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowIntoCallable(node, mid, allowsFieldFlow, config) and + flow(mid, false, ap, config) and + toReturn = false and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid, boolean allowsFieldFlow | + flowOutOfCallable(node, mid, allowsFieldFlow, config) and + flow(mid, _, ap, config) and + toReturn = true and + (ap instanceof AccessPathNil or allowsFieldFlow = true) + ) + or + exists(Node mid | + argumentValueFlowsThrough(node, mid, _) and + flow(mid, toReturn, ap, config) + ) + or + exists(Node mid, AccessPathNil ap0 | + simpleArgumentFlowsThrough(node, mid, _, config) and + flow(mid, toReturn, ap0, config) and + ap instanceof AccessPathNil and + flowFwd(node, _, _, ap, config) + ) + or + exists(Content f, AccessPath ap0 | + flowStore(node, f, toReturn, ap0, config) and + pop(ap0, f, ap) + ) + or + exists(Content f, AccessPath ap0 | + flowRead(node, f, toReturn, ap0, config) and + push(ap0, f, ap) + ) +} + +pragma[nomagic] +private predicate flowStore( + Node node, Content f, boolean toReturn, AccessPath ap0, Configuration config +) { + exists(Node mid | + store(node, f, mid) and + flow(mid, toReturn, ap0, config) + ) +} + +pragma[nomagic] +private predicate flowRead( + Node node, Content f, boolean toReturn, AccessPath ap0, Configuration config +) { + exists(Node mid | + read(node, f, mid) and + flow(mid, toReturn, ap0, config) + ) +} + +bindingset[conf, result] +private Configuration unbind(Configuration conf) { result >= conf and result <= conf } + +private predicate flow(Node n, Configuration config) { flow(n, _, _, config) } + +private newtype TPathNode = + TPathNodeMid(Node node, CallContext cc, AccessPath ap, Configuration config) { + // A PathNode is introduced by a source ... + flow(node, config) and + config.isSource(node) and + cc instanceof CallContextAny and + ap = node.(AccessPathNilNode).getAp() + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + pathStep(mid, node, cc, ap) and + config = mid.getConfiguration() and + flow(node, _, ap, unbind(config)) + ) + } or + TPathNodeSink(Node node, Configuration config) { + // The AccessPath on a sink is empty. + config.isSink(node) and + flow(node, config) + } + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +abstract class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = getNode().toString() + ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { result = getNode().toString() + ppAp() + ppCtx() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + abstract Node getNode(); + + /** Gets the associated configuration. */ + abstract Configuration getConfiguration(); + + /** Gets a successor. */ + deprecated final PathNode getSucc() { result = this.getASuccessor() } + + /** Gets a successor of this node, if any. */ + abstract PathNode getASuccessor(); + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + exists(string s | s = this.(PathNodeMid).getAp().toString() | + if s = "" then result = "" else result = " [" + s + "]" + ) + } + + private string ppCtx() { + this instanceof PathNodeSink and result = "" + or + result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" + } +} + +/** Holds if `n` can reach a sink. */ +private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getASuccessor()) } + +/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and reach(n2) } + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNode, TPathNodeMid { + Node node; + + CallContext cc; + + AccessPath ap; + + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, cc, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + pathStep(this, result.getNode(), result.getCallContext(), result.getAp()) and + result.getConfiguration() = unbind(this.getConfiguration()) + } + + override PathNode getASuccessor() { + // an intermediate step to another intermediate node + result = getSuccMid() + or + // a final step to a sink via one or more local steps + localFlowStepPlus(node, result.getNode(), _, config) and + ap instanceof AccessPathNil and + result instanceof PathNodeSink and + result.getConfiguration() = unbind(this.getConfiguration()) + or + // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges + exists(PathNodeMid mid | + mid = getSuccMid() and + mid.getNode() = result.getNode() and + mid.getAp() instanceof AccessPathNil and + result instanceof PathNodeSink and + result.getConfiguration() = unbind(mid.getConfiguration()) + ) + or + // a direct step from a source to a sink if a node is both + this instanceof PathNodeSource and + result instanceof PathNodeSink and + this.getNode() = result.getNode() and + result.getConfiguration() = unbind(this.getConfiguration()) + } +} + +/** + * A flow graph node corresponding to a source. + */ +private class PathNodeSource extends PathNodeMid { + PathNodeSource() { + getConfiguration().isSource(getNode()) and + getCallContext() instanceof CallContextAny and + getAp() instanceof AccessPathNil + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNode, TPathNodeSink { + Node node; + + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, config) } + + override Node getNode() { result = node } + + override Configuration getConfiguration() { result = config } + + override PathNode getASuccessor() { none() } +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +private predicate pathStep(PathNodeMid mid, Node node, CallContext cc, AccessPath ap) { + localFlowBigStep(mid.getNode(), node, true, mid.getConfiguration()) and + cc = mid.getCallContext() and + ap = mid.getAp() + or + localFlowBigStep(mid.getNode(), node, false, mid.getConfiguration()) and + cc = mid.getCallContext() and + mid.getAp() instanceof AccessPathNil and + ap = node.(AccessPathNilNode).getAp() + or + jumpStep(mid.getNode(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + ap = mid.getAp() + or + additionalJumpStep(mid.getNode(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + mid.getAp() instanceof AccessPathNil and + ap = node.(AccessPathNilNode).getAp() + or + contentReadStep(mid, node, ap) and cc = mid.getCallContext() + or + exists(Content f, AccessPath ap0 | contentStoreStep(mid, node, ap0, f, cc) and push(ap0, f, ap)) + or + pathOutOfArgument(mid, node, cc) and ap = mid.getAp() + or + pathIntoCallable(mid, node, _, cc, _) and ap = mid.getAp() + or + pathOutOfCallable(mid, node, cc) and ap = mid.getAp() + or + pathThroughCallable(mid, node, cc, ap) + or + valuePathThroughCallable(mid, node, cc) and ap = mid.getAp() +} + +pragma[noinline] +private predicate contentReadStep(PathNodeMid mid, Node node, AccessPath ap) { + exists(Content f, AccessPath ap0 | + ap0 = mid.getAp() and + read(mid.getNode(), f, node) and + pop(ap0, f, ap) + ) +} + +pragma[noinline] +private predicate contentStoreStep( + PathNodeMid mid, Node node, AccessPath ap0, Content f, CallContext cc +) { + ap0 = mid.getAp() and + store(mid.getNode(), f, node) and + cc = mid.getCallContext() +} + +private predicate pathOutOfCallable0(PathNodeMid mid, ReturnPosition pos, CallContext innercc) { + pos = getReturnPosition(mid.getNode()) and + innercc = mid.getCallContext() and + not innercc instanceof CallContextCall +} + +pragma[noinline] +private predicate pathOutOfCallable1( + PathNodeMid mid, DataFlowCall call, ReturnKind kind, CallContext cc +) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + pathOutOfCallable0(mid, pos, innercc) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) +} + +/** + * Holds if data may flow from `mid` to `out`. The last step of this path + * is a return from a callable and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate pathOutOfCallable(PathNodeMid mid, OutNode out, CallContext cc) { + exists(ReturnKind kind, DataFlowCall call | pathOutOfCallable1(mid, call, kind, cc) | + out = getAnOutNode(call, kind) + ) +} + +private predicate pathOutOfArgument(PathNodeMid mid, PostUpdateNode node, CallContext cc) { + exists( + PostUpdateNode n, ParameterNode p, DataFlowCallable callable, CallContext innercc, int i, + DataFlowCall call, ArgumentNode arg + | + mid.getNode() = n and + parameterValueFlowsToUpdate(p, n) and + innercc = mid.getCallContext() and + p.isParameterOf(callable, i) and + resolveReturn(innercc, callable, call) and + node.getPreUpdateNode() = arg and + arg.argumentOf(call, i) and + flow(node, unbind(mid.getConfiguration())) + | + if reducedViableImplInReturn(callable, call) + then cc = TReturn(callable, call) + else cc = TAnyCallContext() + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate pathIntoArg( + PathNodeMid mid, int i, CallContext cc, DataFlowCall call, boolean emptyAp +) { + exists(ArgumentNode arg, AccessPath ap | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() + | + ap instanceof AccessPathNil and emptyAp = true + or + ap instanceof AccessPathCons and emptyAp = false + ) +} + +pragma[noinline] +private predicate parameterCand(DataFlowCallable callable, int i, Configuration config) { + exists(ParameterNode p | + flow(p, config) and + p.isParameterOf(callable, i) + ) +} + +pragma[nomagic] +private predicate pathIntoCallable0( + PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call, + boolean emptyAp +) { + pathIntoArg(mid, i, outercc, call, emptyAp) and + callable = resolveCall(call, outercc) and + parameterCand(callable, any(int j | j <= i and j >= i), mid.getConfiguration()) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +private predicate pathIntoCallable( + PathNodeMid mid, ParameterNode p, CallContext outercc, CallContextCall innercc, DataFlowCall call +) { + exists(int i, DataFlowCallable callable, boolean emptyAp | + pathIntoCallable0(mid, callable, i, outercc, call, emptyAp) and + p.isParameterOf(callable, i) + | + if reducedViableImplInCallContext(_, callable, call) + then innercc = TSpecificCall(call, i, emptyAp) + else innercc = TSomeCall(p, emptyAp) + ) +} + +/** Holds if data may flow from `p` to a return of kind `kind`. */ +pragma[nomagic] +private predicate paramFlowsThrough( + ParameterNode p, ReturnKind kind, CallContextCall cc, AccessPathNil apnil, Configuration config +) { + exists(PathNodeMid mid, ReturnNode ret | + mid.getNode() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + config = mid.getConfiguration() and + apnil = mid.getAp() + | + cc = TSomeCall(p, true) + or + exists(int i | cc = TSpecificCall(_, i, true) | + p.isParameterOf(returnNodeGetEnclosingCallable(ret), i) + ) + ) +} + +pragma[noinline] +private predicate pathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKind kind, CallContext cc, AccessPathNil apnil +) { + exists(ParameterNode p, CallContext innercc | + pathIntoCallable(mid, p, cc, innercc, call) and + paramFlowsThrough(p, kind, innercc, apnil, unbind(mid.getConfiguration())) and + not parameterValueFlowsThrough(p, kind, innercc) and + mid.getAp() instanceof AccessPathNil + ) +} + +/** + * Holds if data may flow from `mid` through a callable to the node `out`. + * The context `cc` is restored to its value prior to entering the callable. + */ +pragma[noinline] +private predicate pathThroughCallable( + PathNodeMid mid, OutNode out, CallContext cc, AccessPathNil apnil +) { + exists(DataFlowCall call, ReturnKind kind | + pathThroughCallable0(call, mid, kind, cc, apnil) and + out = getAnOutNode(call, kind) + ) +} + +pragma[noinline] +private predicate valuePathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKind kind, CallContext cc +) { + exists(ParameterNode p, CallContext innercc | + pathIntoCallable(mid, p, cc, innercc, call) and + parameterValueFlowsThrough(p, kind, innercc) + ) +} + +private predicate valuePathThroughCallable(PathNodeMid mid, OutNode out, CallContext cc) { + exists(DataFlowCall call, ReturnKind kind | + valuePathThroughCallable0(call, mid, kind, cc) and + out = getAnOutNode(call, kind) + ) +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNodeSource flowsource, PathNodeSink flowsink, Node source, Node sink, + Configuration configuration +) { + flowsource.getConfiguration() = configuration and + flowsource.getNode() = source and + pathSuccPlus(flowsource, flowsink) and + flowsink.getNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} + +private module FlowExploration { + private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { + exists(Node node1, Node node2 | + jumpStep(node1, node2, config) + or + additionalJumpStep(node1, node2, config) + or + // flow into callable + viableParamArg(_, node2, node1) + or + // flow out of an argument + exists(ParameterNode p | + parameterValueFlowsToUpdate(p, node1) and + viableParamArg(_, p, node2.(PostUpdateNode).getPreUpdateNode()) + ) + or + // flow out of a callable + exists(DataFlowCall call, ReturnKind kind | + getReturnPosition(node1) = viableReturnPos(call, kind) and + node2 = getAnOutNode(call, kind) + ) + | + c1 = node1.getEnclosingCallable() and + c2 = node2.getEnclosingCallable() and + c1 != c2 + ) + } + + private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSource(n) and c = n.getEnclosingCallable()) + or + exists(DataFlowCallable mid | + interestingCallableSrc(mid, config) and callableStep(mid, c, config) + ) + } + + private newtype TCallableExt = + TCallable(DataFlowCallable c, Configuration config) { interestingCallableSrc(c, config) } or + TCallableSrc() + + private predicate callableExtSrc(TCallableSrc src) { any() } + + private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { + exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | + callableStep(c1, c2, config) and + ce1 = TCallable(c1, config) and + ce2 = TCallable(c2, unbind(config)) + ) + or + exists(Node n, Configuration config | + ce1 = TCallableSrc() and + config.isSource(n) and + ce2 = TCallable(n.getEnclosingCallable(), config) + ) + } + + private int distSrcExt(TCallableExt c) = + shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) + + private int distSrc(DataFlowCallable c, Configuration config) { + result = distSrcExt(TCallable(c, config)) - 1 + } + + private newtype TPartialPathNode = + TPartialPathNodeMk(Node node, CallContext cc, AccessPath ap, Configuration config) { + config.isSource(node) and + cc instanceof CallContextAny and + ap = TNil(getErasedRepr(node.getType())) and + not fullBarrier(node, config) and + exists(config.explorationLimit()) + or + partialPathNodeMk0(node, cc, ap, config) and + distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit() + } + + pragma[nomagic] + private predicate partialPathNodeMk0( + Node node, CallContext cc, AccessPath ap, Configuration config + ) { + exists(PartialPathNode mid | + partialPathStep(mid, node, cc, ap, config) and + not fullBarrier(node, config) and + if node instanceof CastingNode then compatibleTypes(node.getType(), ap.getType()) else any() + ) + } + + /** + * A `Node` augmented with a call context, an access path, and a configuration. + */ + class PartialPathNode extends TPartialPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = getNode().toString() + ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { result = getNode().toString() + ppAp() + ppCtx() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + abstract Node getNode(); + + /** Gets the associated configuration. */ + abstract Configuration getConfiguration(); + + /** Gets a successor of this node, if any. */ + abstract PartialPathNode getASuccessor(); + + /** + * Gets the approximate distance to the nearest source measured in number + * of interprocedural steps. + */ + int getSourceDistance() { + result = distSrc(this.getNode().getEnclosingCallable(), this.getConfiguration()) + } + + private string ppAp() { + exists(string s | s = this.(PartialPathNodePriv).getAp().toString() | + if s = "" then result = "" else result = " [" + s + "]" + ) + } + + private string ppCtx() { + result = " <" + this.(PartialPathNodePriv).getCallContext().toString() + ">" + } + } + + /** + * Provides the query predicates needed to include a graph in a path-problem query. + */ + module PartialPathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } + } + + private class PartialPathNodePriv extends PartialPathNode { + Node node; + + CallContext cc; + + AccessPath ap; + + Configuration config; + + PartialPathNodePriv() { this = TPartialPathNodeMk(node, cc, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PartialPathNodePriv getSuccMid() { + partialPathStep(this, result.getNode(), result.getCallContext(), result.getAp(), + result.getConfiguration()) + } + + override PartialPathNode getASuccessor() { result = getSuccMid() } + } + + private predicate partialPathStep( + PartialPathNodePriv mid, Node node, CallContext cc, AccessPath ap, Configuration config + ) { + localFlowStep(mid.getNode(), node, config) and + cc = mid.getCallContext() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(mid.getNode(), node, config) and + cc = mid.getCallContext() and + mid.getAp() instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) and + config = mid.getConfiguration() + or + jumpStep(mid.getNode(), node, config) and + cc instanceof CallContextAny and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(mid.getNode(), node, config) and + cc instanceof CallContextAny and + mid.getAp() instanceof AccessPathNil and + ap = TNil(getErasedRepr(node.getType())) and + config = mid.getConfiguration() + or + partialPathStoreStep(mid, _, _, node, ap) and + cc = mid.getCallContext() and + config = mid.getConfiguration() + or + exists(AccessPath ap0, Content f | + partialPathReadStep(mid, ap0, f, node, cc, config) and + apConsFwd(ap, f, ap0, config) + ) + or + partialPathOutOfArgument(mid, node, cc, ap, config) + or + partialPathIntoCallable(mid, node, _, cc, _, ap, config) + or + partialPathOutOfCallable(mid, node, cc, ap, config) + or + partialPathThroughCallable(mid, node, cc, ap, config) + or + valuePartialPathThroughCallable(mid, node, cc, ap, config) + } + + bindingset[result, i] + private int unbindInt(int i) { i <= result and i >= result } + + pragma[inline] + private predicate partialPathStoreStep( + PartialPathNodePriv mid, AccessPath ap1, Content f, Node node, AccessPath ap2 + ) { + ap1 = mid.getAp() and + store(mid.getNode(), f, node) and + ap2.getHead() = f and + ap2.len() = unbindInt(ap1.len() + 1) and + compatibleTypes(ap1.getType(), f.getType()) + } + + pragma[nomagic] + private predicate apConsFwd(AccessPath ap1, Content f, AccessPath ap2, Configuration config) { + exists(PartialPathNodePriv mid | + partialPathStoreStep(mid, ap1, f, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathReadStep( + PartialPathNodePriv mid, AccessPath ap, Content f, Node node, CallContext cc, + Configuration config + ) { + ap = mid.getAp() and + read(mid.getNode(), f, node) and + ap.getHead() = f and + config = mid.getConfiguration() and + cc = mid.getCallContext() + } + + private predicate partialPathOutOfCallable0( + PartialPathNodePriv mid, ReturnPosition pos, CallContext innercc, AccessPath ap, + Configuration config + ) { + pos = getReturnPosition(mid.getNode()) and + innercc = mid.getCallContext() and + not innercc instanceof CallContextCall and + ap = mid.getAp() and + config = mid.getConfiguration() + } + + pragma[noinline] + private predicate partialPathOutOfCallable1( + PartialPathNodePriv mid, DataFlowCall call, ReturnKind kind, CallContext cc, AccessPath ap, + Configuration config + ) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + partialPathOutOfCallable0(mid, pos, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) + } + + private predicate partialPathOutOfCallable( + PartialPathNodePriv mid, OutNode out, CallContext cc, AccessPath ap, Configuration config + ) { + exists(ReturnKind kind, DataFlowCall call | + partialPathOutOfCallable1(mid, call, kind, cc, ap, config) + | + out = getAnOutNode(call, kind) + ) + } + + private predicate partialPathOutOfArgument( + PartialPathNodePriv mid, PostUpdateNode node, CallContext cc, AccessPath ap, + Configuration config + ) { + exists( + PostUpdateNode n, ParameterNode p, DataFlowCallable callable, CallContext innercc, int i, + DataFlowCall call, ArgumentNode arg + | + mid.getNode() = n and + parameterValueFlowsToUpdate(p, n) and + innercc = mid.getCallContext() and + p.isParameterOf(callable, i) and + resolveReturn(innercc, callable, call) and + node.getPreUpdateNode() = arg and + arg.argumentOf(call, i) and + ap = mid.getAp() and + config = mid.getConfiguration() + | + if reducedViableImplInReturn(callable, call) + then cc = TReturn(callable, call) + else cc = TAnyCallContext() + ) + } + + pragma[noinline] + private predicate partialPathIntoArg( + PartialPathNodePriv mid, int i, CallContext cc, DataFlowCall call, boolean emptyAp, + AccessPath ap, Configuration config + ) { + exists(ArgumentNode arg | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() and + config = mid.getConfiguration() + | + ap instanceof AccessPathNil and emptyAp = true + or + ap instanceof AccessPathCons and emptyAp = false + ) + } + + pragma[nomagic] + private predicate partialPathIntoCallable0( + PartialPathNodePriv mid, DataFlowCallable callable, int i, CallContext outercc, + DataFlowCall call, boolean emptyAp, AccessPath ap, Configuration config + ) { + partialPathIntoArg(mid, i, outercc, call, emptyAp, ap, config) and + callable = resolveCall(call, outercc) + } + + private predicate partialPathIntoCallable( + PartialPathNodePriv mid, ParameterNode p, CallContext outercc, CallContextCall innercc, + DataFlowCall call, AccessPath ap, Configuration config + ) { + exists(int i, DataFlowCallable callable, boolean emptyAp | + partialPathIntoCallable0(mid, callable, i, outercc, call, emptyAp, ap, config) and + p.isParameterOf(callable, i) + | + if reducedViableImplInCallContext(_, callable, call) + then innercc = TSpecificCall(call, i, emptyAp) + else innercc = TSomeCall(p, emptyAp) + ) + } + + pragma[nomagic] + private predicate paramFlowsThroughInPartialPath( + ParameterNode p, ReturnKind kind, CallContextCall cc, AccessPathNil apnil, Configuration config + ) { + exists(PartialPathNodePriv mid, ReturnNode ret | + mid.getNode() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + config = mid.getConfiguration() and + apnil = mid.getAp() + | + cc = TSomeCall(p, true) + or + exists(int i | cc = TSpecificCall(_, i, true) | + p.isParameterOf(returnNodeGetEnclosingCallable(ret), i) + ) + ) + } + + pragma[noinline] + private predicate partialPathThroughCallable0( + DataFlowCall call, PartialPathNodePriv mid, ReturnKind kind, CallContext cc, + AccessPathNil apnil, Configuration config + ) { + exists(ParameterNode p, CallContext innercc, AccessPathNil midapnil | + partialPathIntoCallable(mid, p, cc, innercc, call, midapnil, config) and + paramFlowsThroughInPartialPath(p, kind, innercc, apnil, config) and + not parameterValueFlowsThrough(p, kind, innercc) + ) + } + + private predicate partialPathThroughCallable( + PartialPathNodePriv mid, OutNode out, CallContext cc, AccessPathNil apnil, Configuration config + ) { + exists(DataFlowCall call, ReturnKind kind | + partialPathThroughCallable0(call, mid, kind, cc, apnil, config) and + out = getAnOutNode(call, kind) + ) + } + + pragma[noinline] + private predicate valuePartialPathThroughCallable0( + DataFlowCall call, PartialPathNodePriv mid, ReturnKind kind, CallContext cc, AccessPath ap, + Configuration config + ) { + exists(ParameterNode p, CallContext innercc | + partialPathIntoCallable(mid, p, cc, innercc, call, ap, config) and + parameterValueFlowsThrough(p, kind, innercc) + ) + } + + private predicate valuePartialPathThroughCallable( + PartialPathNodePriv mid, OutNode out, CallContext cc, AccessPath ap, Configuration config + ) { + exists(DataFlowCall call, ReturnKind kind | + valuePartialPathThroughCallable0(call, mid, kind, cc, ap, config) and + out = getAnOutNode(call, kind) + ) + } +} +import FlowExploration + +private predicate partialFlow( + PartialPathNode source, PartialPathNode node, Configuration configuration +) { + source.getConfiguration() = configuration and + configuration.isSource(source.getNode()) and + node = source.getASuccessor+() +} diff --git a/cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll index 13f5db09b6c..f906f989b48 100644 --- a/cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll @@ -443,8 +443,14 @@ private module ThisFlow { * Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local * (intra-procedural) step. */ +cached predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo) + or + // Field flow is not strictly a "step" but covers the whole function + // transitively. There's no way to get a step-like relation out of the global + // data flow library, so we just have to accept some big steps here. + FieldFlow::fieldFlow(nodeFrom, nodeTo) } /** @@ -571,6 +577,51 @@ private predicate exprToDefinitionByReferenceStep(Expr exprIn, Expr argOut) { ) } +private module FieldFlow { + private import DataFlowImplLocal + private import DataFlowPrivate + + /** + * A configuration for finding local-only flow through fields. This uses the + * `Configuration` class in the dedicated `DataFlowImplLocal` copy of the + * shared library that's not user-exposed directly. + * + * To keep the flow local to a single function, we put barriers on parameters + * and return statements. Sources and sinks are the values that go into and + * out of fields, respectively. + */ + private class FieldConfiguration extends Configuration { + FieldConfiguration() { this = "FieldConfiguration" } + + override predicate isSource(Node source) { + storeStep(source, _, _) + } + + override predicate isSink(Node sink) { + readStep(_, _, sink) + } + + override predicate isBarrier(Node node) { + node instanceof ParameterNode + } + + override predicate isBarrierOut(Node node) { + node.asExpr().getParent() instanceof ReturnStmt + or + node.asExpr().getParent() instanceof ThrowExpr + } + } + + predicate fieldFlow(Node node1, Node node2) { + exists(FieldConfiguration cfg | + cfg.hasFlow(node1, node2) + ) and + // This configuration should not be able to cross function boundaries, but + // we double-check here just to be sure. + node1.getFunction() = node2.getFunction() + } +} + VariableAccess getAnAccessToAssignedVariable(Expr assign) { ( assign instanceof Assignment diff --git a/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll b/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll index fa47c5af49f..3130e64d7d6 100644 --- a/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll +++ b/cpp/ql/src/semmle/code/cpp/dataflow/internal/FlowVar.qll @@ -212,6 +212,7 @@ module FlowVar_internal { or TBlockVar(SubBasicBlock sbb, Variable v) { not fullySupportedSsaVariable(v) and + not v instanceof Field and // Fields are interprocedural data flow, not local reachable(sbb) and ( initializer(sbb.getANode(), v, _) diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp index 16b2468c06f..9b8eb8ca577 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp @@ -137,7 +137,7 @@ void following_pointers( sink(sourceStruct1_ptr->m1); // flow sink(sourceStruct1_ptr->getFirst()); // flow [NOT DETECTED with IR] sink(sourceStruct1_ptr->m2); // no flow - sink(sourceStruct1.m1); // flow (due to lack of no-alias tracking) + sink(sourceStruct1.m1); // no flow twoIntFields s = { source(), source() }; diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected index 044028f0930..bca0bfbca69 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected @@ -22,7 +22,6 @@ | test.cpp:126:8:126:19 | sourceArray1 | test.cpp:120:9:120:20 | sourceArray1 | | test.cpp:137:27:137:28 | m1 | test.cpp:136:27:136:32 | call to source | | test.cpp:138:27:138:34 | call to getFirst | test.cpp:136:27:136:32 | call to source | -| test.cpp:140:22:140:23 | m1 | test.cpp:136:27:136:32 | call to source | | test.cpp:145:10:145:11 | m2 | test.cpp:142:32:142:37 | call to source | | test.cpp:153:17:153:18 | m2 | test.cpp:151:35:151:40 | call to source | | test.cpp:188:8:188:8 | y | test.cpp:186:27:186:32 | call to source | diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected index 907fb5151c2..1228f8f5816 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected @@ -9,7 +9,6 @@ | test.cpp:109:9:109:14 | test.cpp:110:10:110:12 | IR only | | test.cpp:136:27:136:32 | test.cpp:137:27:137:28 | AST only | | test.cpp:136:27:136:32 | test.cpp:138:27:138:34 | AST only | -| test.cpp:136:27:136:32 | test.cpp:140:22:140:23 | AST only | | test.cpp:147:42:147:47 | test.cpp:149:18:149:19 | IR only | | test.cpp:395:17:395:22 | test.cpp:397:10:397:18 | AST only | | test.cpp:407:13:407:18 | test.cpp:413:10:413:14 | AST only | diff --git a/cpp/ql/test/library-tests/dataflow/fields/aliasing.cpp b/cpp/ql/test/library-tests/dataflow/fields/aliasing.cpp index 364db7aae73..cdcade3f39d 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/aliasing.cpp +++ b/cpp/ql/test/library-tests/dataflow/fields/aliasing.cpp @@ -35,24 +35,24 @@ void assignAfterAlias() { S s1 = { 0, 0 }; S &ref1 = s1; ref1.m1 = user_input(); - sink(s1.m1); // flow + sink(s1.m1); // flow [FALSE NEGATIVE] S s2 = { 0, 0 }; S &ref2 = s2; s2.m1 = user_input(); - sink(ref2.m1); // flow + sink(ref2.m1); // flow [FALSE NEGATIVE] } void assignAfterCopy() { S s1 = { 0, 0 }; S copy1 = s1; copy1.m1 = user_input(); - sink(s1.m1); // no flow [FALSE POSITIVE] + sink(s1.m1); // no flow S s2 = { 0, 0 }; S copy2 = s2; s2.m1 = user_input(); - sink(copy2.m1); // no flow [FALSE POSITIVE] + sink(copy2.m1); // no flow } void assignBeforeCopy() { diff --git a/cpp/ql/test/library-tests/dataflow/fields/flow.expected b/cpp/ql/test/library-tests/dataflow/fields/flow.expected index 885ce2aaec5..c7c18cc4c37 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/flow.expected +++ b/cpp/ql/test/library-tests/dataflow/fields/flow.expected @@ -98,14 +98,9 @@ edges | aliasing.cpp:26:19:26:20 | ref arg s2 [m1, ... (1)] | aliasing.cpp:30:8:30:9 | s2 [m1, ... (1)] | | aliasing.cpp:29:8:29:9 | s1 [m1, ... (1)] | aliasing.cpp:29:11:29:12 | m1 | | aliasing.cpp:30:8:30:9 | s2 [m1, ... (1)] | aliasing.cpp:30:11:30:12 | m1 | -| aliasing.cpp:37:13:37:22 | call to user_input [void] | aliasing.cpp:38:11:38:12 | m1 | -| aliasing.cpp:42:11:42:20 | call to user_input [void] | aliasing.cpp:43:13:43:14 | m1 | -| aliasing.cpp:49:14:49:23 | call to user_input [void] | aliasing.cpp:50:11:50:12 | m1 | -| aliasing.cpp:54:11:54:20 | call to user_input [void] | aliasing.cpp:55:14:55:15 | m1 | | aliasing.cpp:60:3:60:4 | s2 [post update] [m1, ... (1)] | aliasing.cpp:62:8:62:12 | copy2 [m1, ... (1)] | | aliasing.cpp:60:3:60:22 | ... = ... [void] | aliasing.cpp:60:3:60:4 | s2 [post update] [m1, ... (1)] | | aliasing.cpp:60:11:60:20 | call to user_input [void] | aliasing.cpp:60:3:60:22 | ... = ... [void] | -| aliasing.cpp:60:11:60:20 | call to user_input [void] | aliasing.cpp:62:14:62:15 | m1 | | aliasing.cpp:62:8:62:12 | copy2 [m1, ... (1)] | aliasing.cpp:62:14:62:15 | m1 | | constructors.cpp:26:15:26:15 | f [a_, ... (1)] | constructors.cpp:28:10:28:10 | f [a_, ... (1)] | | constructors.cpp:26:15:26:15 | f [b_, ... (1)] | constructors.cpp:29:10:29:10 | f [b_, ... (1)] | @@ -169,10 +164,6 @@ edges | C.cpp:31:10:31:11 | s3 | C.cpp:24:16:24:25 | new [void] | C.cpp:31:10:31:11 | s3 | s3 flows from $@ | C.cpp:24:16:24:25 | new [void] | new [void] | | aliasing.cpp:29:11:29:12 | m1 | aliasing.cpp:9:11:9:20 | call to user_input [void] | aliasing.cpp:29:11:29:12 | m1 | m1 flows from $@ | aliasing.cpp:9:11:9:20 | call to user_input [void] | call to user_input [void] | | aliasing.cpp:30:11:30:12 | m1 | aliasing.cpp:13:10:13:19 | call to user_input [void] | aliasing.cpp:30:11:30:12 | m1 | m1 flows from $@ | aliasing.cpp:13:10:13:19 | call to user_input [void] | call to user_input [void] | -| aliasing.cpp:38:11:38:12 | m1 | aliasing.cpp:37:13:37:22 | call to user_input [void] | aliasing.cpp:38:11:38:12 | m1 | m1 flows from $@ | aliasing.cpp:37:13:37:22 | call to user_input [void] | call to user_input [void] | -| aliasing.cpp:43:13:43:14 | m1 | aliasing.cpp:42:11:42:20 | call to user_input [void] | aliasing.cpp:43:13:43:14 | m1 | m1 flows from $@ | aliasing.cpp:42:11:42:20 | call to user_input [void] | call to user_input [void] | -| aliasing.cpp:50:11:50:12 | m1 | aliasing.cpp:49:14:49:23 | call to user_input [void] | aliasing.cpp:50:11:50:12 | m1 | m1 flows from $@ | aliasing.cpp:49:14:49:23 | call to user_input [void] | call to user_input [void] | -| aliasing.cpp:55:14:55:15 | m1 | aliasing.cpp:54:11:54:20 | call to user_input [void] | aliasing.cpp:55:14:55:15 | m1 | m1 flows from $@ | aliasing.cpp:54:11:54:20 | call to user_input [void] | call to user_input [void] | | aliasing.cpp:62:14:62:15 | m1 | aliasing.cpp:60:11:60:20 | call to user_input [void] | aliasing.cpp:62:14:62:15 | m1 | m1 flows from $@ | aliasing.cpp:60:11:60:20 | call to user_input [void] | call to user_input [void] | | constructors.cpp:28:12:28:12 | call to a | constructors.cpp:34:11:34:20 | call to user_input [void] | constructors.cpp:28:12:28:12 | call to a | call to a flows from $@ | constructors.cpp:34:11:34:20 | call to user_input [void] | call to user_input [void] | | constructors.cpp:28:12:28:12 | call to a | constructors.cpp:36:11:36:20 | call to user_input [void] | constructors.cpp:28:12:28:12 | call to a | call to a flows from $@ | constructors.cpp:36:11:36:20 | call to user_input [void] | call to user_input [void] |