From 83a7411a055295424240e71d69410acb68957341 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 12 Oct 2020 16:56:10 +0100 Subject: [PATCH] Improve accuracy of allocation-size-overflow by excluding len(...) calls that never see a large operand This is achieved by splitting the query into two pieces: (1) trace flow from indefinitely large object creation to len(...) calls, then (2) considering those particular len(...) calls as taint propagators, trace taint from the same sources all the way to an allocation call. This is more accurate than the previous solution, which considered any len(...) call to propagate taint, potentially confusing an array that stored a large value in one of its cells for an array which is itself of large size. --- Makefile | 2 +- ...2020-10-14-allocation-overflow-accuracy.md | 2 + ql/src/go.qll | 2 + ql/src/semmle/go/dataflow/DataFlow2.qll | 27 + ql/src/semmle/go/dataflow/TaintTracking2.qll | 7 + .../go/dataflow/internal/DataFlowImpl2.qll | 2985 +++++++++++++++++ .../tainttracking2/TaintTrackingImpl.qll | 115 + .../tainttracking2/TaintTrackingParameter.qll | 5 + .../go/security/AllocationSizeOverflow.qll | 29 + .../AllocationSizeOverflowCustomizations.qll | 6 - .../Security/CWE-190/array_vs_contents.go | 36 + 11 files changed, 3209 insertions(+), 7 deletions(-) create mode 100644 change-notes/2020-10-14-allocation-overflow-accuracy.md create mode 100644 ql/src/semmle/go/dataflow/DataFlow2.qll create mode 100644 ql/src/semmle/go/dataflow/TaintTracking2.qll create mode 100644 ql/src/semmle/go/dataflow/internal/DataFlowImpl2.qll create mode 100644 ql/src/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll create mode 100644 ql/src/semmle/go/dataflow/internal/tainttracking2/TaintTrackingParameter.qll create mode 100644 ql/test/query-tests/Security/CWE-190/array_vs_contents.go diff --git a/Makefile b/Makefile index e1668ccc848..5f53c3286ef 100644 --- a/Makefile +++ b/Makefile @@ -131,7 +131,7 @@ build/testdb/go.dbscheme: upgrades/initial/go.dbscheme .PHONY: sync-dataflow-libraries sync-dataflow-libraries: - for f in DataFlowImpl.qll DataFlowImplCommon.qll tainttracking1/TaintTrackingImpl.qll;\ + for f in DataFlowImpl.qll DataFlowImpl2.qll DataFlowImplCommon.qll tainttracking1/TaintTrackingImpl.qll tainttracking2/TaintTrackingImpl.qll;\ do\ curl -s -o ./ql/src/semmle/go/dataflow/internal/$$f https://raw.githubusercontent.com/github/codeql/$(DATAFLOW_BRANCH)/java/ql/src/semmle/code/java/dataflow/internal/$$f;\ done diff --git a/change-notes/2020-10-14-allocation-overflow-accuracy.md b/change-notes/2020-10-14-allocation-overflow-accuracy.md new file mode 100644 index 00000000000..c6b99061c95 --- /dev/null +++ b/change-notes/2020-10-14-allocation-overflow-accuracy.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* The accuracy of the `go/allocation-size-overflow` query was removed, excluding more false-positives in which a small array could be mistaken for one of unbounded size. diff --git a/ql/src/go.qll b/ql/src/go.qll index b52f07ef723..340f3d82d93 100644 --- a/ql/src/go.qll +++ b/ql/src/go.qll @@ -23,9 +23,11 @@ import semmle.go.controlflow.BasicBlocks import semmle.go.controlflow.ControlFlowGraph import semmle.go.controlflow.IR import semmle.go.dataflow.DataFlow +import semmle.go.dataflow.DataFlow2 import semmle.go.dataflow.GlobalValueNumbering import semmle.go.dataflow.SSA import semmle.go.dataflow.TaintTracking +import semmle.go.dataflow.TaintTracking2 import semmle.go.frameworks.Chi import semmle.go.frameworks.Echo import semmle.go.frameworks.Email diff --git a/ql/src/semmle/go/dataflow/DataFlow2.qll b/ql/src/semmle/go/dataflow/DataFlow2.qll new file mode 100644 index 00000000000..a2bae8bd939 --- /dev/null +++ b/ql/src/semmle/go/dataflow/DataFlow2.qll @@ -0,0 +1,27 @@ +/** + * Provides a library for local (intra-procedural) and global (inter-procedural) + * data flow analysis: deciding whether data can flow from a _source_ to a + * _sink_. + * + * Unless configured otherwise, _flow_ means that the exact value of + * the source may reach the sink. We do not track flow across pointer + * dereferences or array indexing. To track these types of flow, where the + * exact value may not be preserved, import + * `semmle.code.go.dataflow.TaintTracking`. + * + * To use global (interprocedural) data flow, extend the class + * `DataFlow::Configuration` as documented on that class. To use local + * (intraprocedural) data flow, invoke `DataFlow::localFlow` or + * `DataFlow::LocalFlowStep` with arguments of type `DataFlow::Node`. + */ + +import go + +/** + * Provides a library for local (intra-procedural) and global (inter-procedural) + * data flow analysis. + */ +module DataFlow2 { + import semmle.go.dataflow.internal.DataFlowImpl2 + import Properties +} diff --git a/ql/src/semmle/go/dataflow/TaintTracking2.qll b/ql/src/semmle/go/dataflow/TaintTracking2.qll new file mode 100644 index 00000000000..1e6639d8539 --- /dev/null +++ b/ql/src/semmle/go/dataflow/TaintTracking2.qll @@ -0,0 +1,7 @@ +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) taint-tracking analyses. + */ +module TaintTracking2 { + import semmle.go.dataflow.internal.tainttracking2.TaintTrackingImpl +} diff --git a/ql/src/semmle/go/dataflow/internal/DataFlowImpl2.qll b/ql/src/semmle/go/dataflow/internal/DataFlowImpl2.qll new file mode 100644 index 00000000000..5042dce683f --- /dev/null +++ b/ql/src/semmle/go/dataflow/internal/DataFlowImpl2.qll @@ -0,0 +1,2985 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from + * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed + * through the `Configuration` class. This file exists in several identical + * copies, allowing queries to use multiple `Configuration` classes that depend + * on each other without introducing mutual recursion among those configurations. + */ + +private import DataFlowImplCommon +private import DataFlowImplSpecific::Private +import DataFlowImplSpecific::Public + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ```ql + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and + * the edges are those data-flow steps that preserve the value of the node + * along with any additional edges defined by `isAdditionalFlowStep`. + * Specifying nodes in `isBarrier` will remove those nodes from the graph, and + * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going + * and/or out-going edges from those nodes, respectively. + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + abstract predicate isSource(Node source); + + /** + * Holds if `sink` is a relevant data flow sink. + */ + abstract predicate isSink(Node sink); + + /** + * Holds if data flow through `node` is prohibited. This completely removes + * `node` from the data flow graph. + */ + predicate isBarrier(Node node) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrierIn(Node node) { none() } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isBarrierOut(Node node) { none() } + + /** Holds if data flow through nodes guarded by `guard` is prohibited. */ + predicate isBarrierGuard(BarrierGuard guard) { none() } + + /** + * Holds if the additional flow step from `node1` to `node2` must be taken + * into account in the analysis. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) } + + /** + * Gets the exploration limit for `hasPartialFlow` measured in approximate + * number of interprocedural steps. + */ + int explorationLimit() { none() } + + /** + * Holds if there is a partial data flow path from `source` to `node`. The + * approximate distance between `node` and the closest source is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards sink definitions. + * + * This predicate is intended for dataflow exploration and debugging and may + * perform poorly if the number of sources is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + */ + final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + partialFlow(source, node, this) and + dist = node.getSourceDistance() + } +} + +/** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + */ +abstract private class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + super.hasFlow(source, sink) + } +} + +private predicate inBarrier(Node node, Configuration config) { + config.isBarrierIn(node) and + config.isSource(node) +} + +private predicate outBarrier(Node node, Configuration config) { + config.isBarrierOut(node) and + config.isSink(node) +} + +private predicate fullBarrier(Node node, Configuration config) { + config.isBarrier(node) + or + config.isBarrierIn(node) and + not config.isSource(node) + or + config.isBarrierOut(node) and + not config.isSink(node) + or + exists(BarrierGuard g | + config.isBarrierGuard(g) and + node = g.getAGuardedNode() + ) +} + +private class AdditionalFlowStepSource extends Node { + AdditionalFlowStepSource() { any(Configuration c).isAdditionalFlowStep(this, _) } +} + +pragma[noinline] +private predicate isAdditionalFlowStep( + AdditionalFlowStepSource node1, Node node2, DataFlowCallable callable1, Configuration config +) { + config.isAdditionalFlowStep(node1, node2) and + callable1 = node1.getEnclosingCallable() +} + +/** + * Holds if data can flow in one local step from `node1` to `node2`. + */ +private predicate localFlowStep(Node node1, Node node2, Configuration config) { + simpleLocalFlowStep(node1, node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(Node node1, Node node2, Configuration config) { + isAdditionalFlowStep(node1, node2, node2.getEnclosingCallable(), config) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. + */ +private predicate jumpStep(Node node1, Node node2, Configuration config) { + jumpStep(node1, node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(Node node1, Node node2, Configuration config) { + exists(DataFlowCallable callable1 | + isAdditionalFlowStep(node1, node2, callable1, config) and + node2.getEnclosingCallable() != callable1 and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) + ) +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +/** + * Holds if `node` is reachable from a source in the configuration `config`. + * + * The Boolean `fromArg` records whether the node is reached through an + * argument in a call. + */ +private predicate nodeCandFwd1(Node node, boolean fromArg, Configuration config) { + not fullBarrier(node, config) and + ( + config.isSource(node) and + fromArg = false + or + exists(Node mid | + nodeCandFwd1(mid, fromArg, config) and + localFlowStep(mid, node, config) + ) + or + exists(Node mid | + nodeCandFwd1(mid, fromArg, config) and + additionalLocalFlowStep(mid, node, config) + ) + or + exists(Node mid | + nodeCandFwd1(mid, config) and + jumpStep(mid, node, config) and + fromArg = false + ) + or + exists(Node mid | + nodeCandFwd1(mid, config) and + additionalJumpStep(mid, node, config) and + fromArg = false + ) + or + // store + exists(Node mid | + useFieldFlow(config) and + nodeCandFwd1(mid, fromArg, config) and + store(mid, _, node, _) and + not outBarrier(mid, config) + ) + or + // read + exists(Content c | + nodeCandFwd1Read(c, node, fromArg, config) and + nodeCandFwd1IsStored(c, config) and + not inBarrier(node, config) + ) + or + // flow into a callable + exists(Node arg | + nodeCandFwd1(arg, config) and + viableParamArg(_, node, arg) and + fromArg = true + ) + or + // flow out of a callable + exists(DataFlowCall call | + nodeCandFwd1Out(call, node, false, config) and + fromArg = false + or + nodeCandFwd1OutFromArg(call, node, config) and + nodeCandFwd1IsEntered(call, fromArg, config) + ) + ) +} + +private predicate nodeCandFwd1(Node node, Configuration config) { nodeCandFwd1(node, _, config) } + +pragma[nomagic] +private predicate nodeCandFwd1Read(Content c, Node node, boolean fromArg, Configuration config) { + exists(Node mid | + nodeCandFwd1(mid, fromArg, config) and + read(mid, c, node) + ) +} + +/** + * Holds if `c` is the target of a store in the flow covered by `nodeCandFwd1`. + */ +pragma[nomagic] +private predicate nodeCandFwd1IsStored(Content c, Configuration config) { + exists(Node mid, Node node, TypedContent tc | + not fullBarrier(node, config) and + useFieldFlow(config) and + nodeCandFwd1(mid, config) and + store(mid, tc, node, _) and + c = tc.getContent() + ) +} + +pragma[nomagic] +private predicate nodeCandFwd1ReturnPosition( + ReturnPosition pos, boolean fromArg, Configuration config +) { + exists(ReturnNodeExt ret | + nodeCandFwd1(ret, fromArg, config) and + getReturnPosition(ret) = pos + ) +} + +pragma[nomagic] +private predicate nodeCandFwd1Out(DataFlowCall call, Node out, boolean fromArg, Configuration config) { + exists(ReturnPosition pos | + nodeCandFwd1ReturnPosition(pos, fromArg, config) and + viableReturnPosOut(call, pos, out) + ) +} + +pragma[nomagic] +private predicate nodeCandFwd1OutFromArg(DataFlowCall call, Node node, Configuration config) { + nodeCandFwd1Out(call, node, true, config) +} + +/** + * Holds if an argument to `call` is reached in the flow covered by `nodeCandFwd1`. + */ +pragma[nomagic] +private predicate nodeCandFwd1IsEntered(DataFlowCall call, boolean fromArg, Configuration config) { + exists(ArgumentNode arg | + nodeCandFwd1(arg, fromArg, config) and + viableParamArg(call, _, arg) + ) +} + +bindingset[result, b] +private boolean unbindBool(boolean b) { result != b.booleanNot() } + +/** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink. + */ +pragma[nomagic] +private predicate nodeCand1(Node node, boolean toReturn, Configuration config) { + nodeCand1_0(node, toReturn, config) and + nodeCandFwd1(node, config) +} + +pragma[nomagic] +private predicate nodeCand1_0(Node node, boolean toReturn, Configuration config) { + nodeCandFwd1(node, config) and + config.isSink(node) and + toReturn = false + or + exists(Node mid | + localFlowStep(node, mid, config) and + nodeCand1(mid, toReturn, config) + ) + or + exists(Node mid | + additionalLocalFlowStep(node, mid, config) and + nodeCand1(mid, toReturn, config) + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + nodeCand1(mid, _, config) and + toReturn = false + ) + or + exists(Node mid | + additionalJumpStep(node, mid, config) and + nodeCand1(mid, _, config) and + toReturn = false + ) + or + // store + exists(Content c | + nodeCand1Store(c, node, toReturn, config) and + nodeCand1IsRead(c, config) + ) + or + // read + exists(Node mid, Content c | + read(node, c, mid) and + nodeCandFwd1IsStored(c, unbind(config)) and + nodeCand1(mid, toReturn, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + nodeCand1In(call, node, false, config) and + toReturn = false + or + nodeCand1InToReturn(call, node, config) and + nodeCand1IsReturned(call, toReturn, config) + ) + or + // flow out of a callable + exists(ReturnPosition pos | + nodeCand1Out(pos, config) and + getReturnPosition(node) = pos and + toReturn = true + ) +} + +/** + * Holds if `c` is the target of a read in the flow covered by `nodeCand1`. + */ +pragma[nomagic] +private predicate nodeCand1IsRead(Content c, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd1(node, unbind(config)) and + read(node, c, mid) and + nodeCandFwd1IsStored(c, unbind(config)) and + nodeCand1(mid, _, config) + ) +} + +pragma[nomagic] +private predicate nodeCand1Store(Content c, Node node, boolean toReturn, Configuration config) { + exists(Node mid, TypedContent tc | + nodeCand1(mid, toReturn, config) and + nodeCandFwd1IsStored(c, unbind(config)) and + store(node, tc, mid, _) and + c = tc.getContent() + ) +} + +/** + * Holds if `c` is the target of both a read and a store in the flow covered + * by `nodeCand1`. + */ +private predicate nodeCand1IsReadAndStored(Content c, Configuration conf) { + nodeCand1IsRead(c, conf) and + nodeCand1Store(c, _, _, conf) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCandFwd1( + DataFlowCall call, ReturnPosition pos, Node out, Configuration config +) { + nodeCandFwd1ReturnPosition(pos, _, config) and + viableReturnPosOut(call, pos, out) +} + +pragma[nomagic] +private predicate nodeCand1Out(ReturnPosition pos, Configuration config) { + exists(DataFlowCall call, Node out | + nodeCand1(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) + ) +} + +pragma[nomagic] +private predicate viableParamArgNodeCandFwd1( + DataFlowCall call, ParameterNode p, ArgumentNode arg, Configuration config +) { + viableParamArg(call, p, arg) and + nodeCandFwd1(arg, config) +} + +pragma[nomagic] +private predicate nodeCand1In( + DataFlowCall call, ArgumentNode arg, boolean toReturn, Configuration config +) { + exists(ParameterNode p | + nodeCand1(p, toReturn, config) and + viableParamArgNodeCandFwd1(call, p, arg, config) + ) +} + +pragma[nomagic] +private predicate nodeCand1InToReturn(DataFlowCall call, ArgumentNode arg, Configuration config) { + nodeCand1In(call, arg, true, config) +} + +/** + * Holds if an output from `call` is reached in the flow covered by `nodeCand1`. + */ +pragma[nomagic] +private predicate nodeCand1IsReturned(DataFlowCall call, boolean toReturn, Configuration config) { + exists(Node out | + nodeCand1(out, toReturn, config) and + nodeCandFwd1OutFromArg(call, out, config) + ) +} + +pragma[nomagic] +private predicate nodeCand1(Node node, Configuration config) { nodeCand1(node, _, config) } + +private predicate throughFlowNodeCand1(Node node, Configuration config) { + nodeCand1(node, true, config) and + not fullBarrier(node, config) and + not inBarrier(node, config) and + not outBarrier(node, config) +} + +/** Holds if flow may return from `callable`. */ +pragma[nomagic] +private predicate returnFlowCallableNodeCand1( + DataFlowCallable callable, ReturnKindExt kind, Configuration config +) { + exists(ReturnNodeExt ret | + throughFlowNodeCand1(ret, config) and + callable = ret.getEnclosingCallable() and + kind = ret.getKind() + ) +} + +/** + * Holds if flow may enter through `p` and reach a return node making `p` a + * candidate for the origin of a summary. + */ +private predicate parameterThroughFlowNodeCand1(ParameterNode p, Configuration config) { + exists(ReturnKindExt kind | + throughFlowNodeCand1(p, config) and + returnFlowCallableNodeCand1(p.getEnclosingCallable(), kind, config) and + // we don't expect a parameter to return stored in itself + not exists(int pos | + kind.(ParamUpdateReturnKind).getPosition() = pos and p.isParameterOf(_, pos) + ) + ) +} + +pragma[nomagic] +private predicate storeCand1(Node n1, Content c, Node n2, Configuration config) { + exists(TypedContent tc | + nodeCand1IsReadAndStored(c, config) and + nodeCand1(n2, unbind(config)) and + store(n1, tc, n2, _) and + c = tc.getContent() + ) +} + +pragma[nomagic] +private predicate read(Node n1, Content c, Node n2, Configuration config) { + nodeCand1IsReadAndStored(c, config) and + nodeCand1(n2, unbind(config)) and + read(n1, c, n2) +} + +pragma[noinline] +private predicate localFlowStepNodeCand1(Node node1, Node node2, Configuration config) { + nodeCand1(node1, config) and + localFlowStep(node1, node2, config) +} + +pragma[noinline] +private predicate additionalLocalFlowStepNodeCand1(Node node1, Node node2, Configuration config) { + nodeCand1(node1, config) and + additionalLocalFlowStep(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCand1( + DataFlowCall call, ReturnPosition pos, Node out, Configuration config +) { + nodeCand1(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, ReturnNodeExt ret, Node out, Configuration config +) { + viableReturnPosOutNodeCand1(call, getReturnPosition(ret), out, config) and + nodeCand1(ret, config) and + not outBarrier(ret, config) and + not inBarrier(out, config) +} + +pragma[nomagic] +private predicate viableParamArgNodeCand1( + DataFlowCall call, ParameterNode p, ArgumentNode arg, Configuration config +) { + viableParamArgNodeCandFwd1(call, p, arg, config) and + nodeCand1(arg, config) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgumentNode arg, ParameterNode p, Configuration config +) { + viableParamArgNodeCand1(call, p, arg, config) and + nodeCand1(p, config) and + not outBarrier(arg, config) and + not inBarrier(p, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(Node n1, Configuration conf) { + result = + strictcount(Node n | + flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf) + ) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(Node n2, Configuration conf) { + result = + strictcount(Node n | + flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf) + ) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, ReturnNodeExt ret, Node out, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, ret, out, config) and + exists(int b, int j | + b = branch(ret, config) and + j = join(out, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgumentNode arg, ParameterNode p, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, arg, p, config) and + exists(int b, int j | + b = branch(arg, config) and + j = join(p, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if `node` is reachable from a source in the configuration `config`. + * The Boolean `stored` records whether the tracked value is stored into a + * field of `node`. + * + * The Boolean `fromArg` records whether the node is reached through an + * argument in a call, and if so, `argStored` records whether the tracked + * value was stored into a field of the argument. + */ +private predicate nodeCandFwd2( + Node node, boolean fromArg, BooleanOption argStored, boolean stored, Configuration config +) { + nodeCand1(node, config) and + config.isSource(node) and + fromArg = false and + argStored = TBooleanNone() and + stored = false + or + nodeCand1(node, unbind(config)) and + ( + exists(Node mid | + nodeCandFwd2(mid, fromArg, argStored, stored, config) and + localFlowStepNodeCand1(mid, node, config) + ) + or + exists(Node mid | + nodeCandFwd2(mid, fromArg, argStored, stored, config) and + additionalLocalFlowStepNodeCand1(mid, node, config) and + stored = false + ) + or + exists(Node mid | + nodeCandFwd2(mid, _, _, stored, config) and + jumpStep(mid, node, config) and + fromArg = false and + argStored = TBooleanNone() + ) + or + exists(Node mid | + nodeCandFwd2(mid, _, _, stored, config) and + additionalJumpStep(mid, node, config) and + fromArg = false and + argStored = TBooleanNone() and + stored = false + ) + or + // store + exists(Node mid | + nodeCandFwd2(mid, fromArg, argStored, _, config) and + storeCand1(mid, _, node, config) and + stored = true + ) + or + // read + exists(Content c | + nodeCandFwd2Read(c, node, fromArg, argStored, config) and + nodeCandFwd2IsStored(c, stored, config) + ) + or + // flow into a callable + nodeCandFwd2In(_, node, _, _, stored, config) and + fromArg = true and + if parameterThroughFlowNodeCand1(node, config) + then argStored = TBooleanSome(stored) + else argStored = TBooleanNone() + or + // flow out of a callable + exists(DataFlowCall call | + nodeCandFwd2Out(call, node, fromArg, argStored, stored, config) and + fromArg = false + or + exists(boolean argStored0 | + nodeCandFwd2OutFromArg(call, node, argStored0, stored, config) and + nodeCandFwd2IsEntered(call, fromArg, argStored, argStored0, config) + ) + ) + ) +} + +/** + * Holds if `c` is the target of a store in the flow covered by `nodeCandFwd2`. + */ +pragma[noinline] +private predicate nodeCandFwd2IsStored(Content c, boolean stored, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCand1(node, unbind(config)) and + nodeCandFwd2(mid, _, _, stored, config) and + storeCand1(mid, c, node, config) + ) +} + +pragma[nomagic] +private predicate nodeCandFwd2Read( + Content c, Node node, boolean fromArg, BooleanOption argStored, Configuration config +) { + exists(Node mid | + nodeCandFwd2(mid, fromArg, argStored, true, config) and + read(mid, c, node, config) + ) +} + +pragma[nomagic] +private predicate nodeCandFwd2In( + DataFlowCall call, ParameterNode p, boolean fromArg, BooleanOption argStored, boolean stored, + Configuration config +) { + exists(ArgumentNode arg, boolean allowsFieldFlow | + nodeCandFwd2(arg, fromArg, argStored, stored, config) and + flowIntoCallNodeCand1(call, arg, p, allowsFieldFlow, config) + | + stored = false or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate nodeCandFwd2Out( + DataFlowCall call, Node out, boolean fromArg, BooleanOption argStored, boolean stored, + Configuration config +) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow | + nodeCandFwd2(ret, fromArg, argStored, stored, config) and + flowOutOfCallNodeCand1(call, ret, out, allowsFieldFlow, config) + | + stored = false or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate nodeCandFwd2OutFromArg( + DataFlowCall call, Node out, boolean argStored, boolean stored, Configuration config +) { + nodeCandFwd2Out(call, out, true, TBooleanSome(argStored), stored, config) +} + +/** + * Holds if an argument to `call` is reached in the flow covered by `nodeCandFwd2`. + */ +pragma[nomagic] +private predicate nodeCandFwd2IsEntered( + DataFlowCall call, boolean fromArg, BooleanOption argStored, boolean stored, Configuration config +) { + exists(ParameterNode p | + nodeCandFwd2In(call, p, fromArg, argStored, stored, config) and + parameterThroughFlowNodeCand1(p, config) + ) +} + +/** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. The Boolean `read` records whether the tracked + * value must be read from a field of `node` in order to reach a sink. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink, and if so, `returnRead` + * records whether a field must be read from the returned value. + */ +private predicate nodeCand2( + Node node, boolean toReturn, BooleanOption returnRead, boolean read, Configuration config +) { + nodeCandFwd2(node, _, _, false, config) and + config.isSink(node) and + toReturn = false and + returnRead = TBooleanNone() and + read = false + or + nodeCandFwd2(node, _, _, unbindBool(read), unbind(config)) and + ( + exists(Node mid | + localFlowStepNodeCand1(node, mid, config) and + nodeCand2(mid, toReturn, returnRead, read, config) + ) + or + exists(Node mid | + additionalLocalFlowStepNodeCand1(node, mid, config) and + nodeCand2(mid, toReturn, returnRead, read, config) and + read = false + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + nodeCand2(mid, _, _, read, config) and + toReturn = false and + returnRead = TBooleanNone() + ) + or + exists(Node mid | + additionalJumpStep(node, mid, config) and + nodeCand2(mid, _, _, read, config) and + toReturn = false and + returnRead = TBooleanNone() and + read = false + ) + or + // store + exists(Content c | + nodeCand2Store(c, node, toReturn, returnRead, read, config) and + nodeCand2IsRead(c, read, config) + ) + or + // read + exists(Node mid, Content c, boolean read0 | + read(node, c, mid, config) and + nodeCandFwd2IsStored(c, unbindBool(read0), unbind(config)) and + nodeCand2(mid, toReturn, returnRead, read0, config) and + read = true + ) + or + // flow into a callable + exists(DataFlowCall call | + nodeCand2In(call, node, toReturn, returnRead, read, config) and + toReturn = false + or + exists(boolean returnRead0 | + nodeCand2InToReturn(call, node, returnRead0, read, config) and + nodeCand2IsReturned(call, toReturn, returnRead, returnRead0, config) + ) + ) + or + // flow out of a callable + nodeCand2Out(_, node, _, _, read, config) and + toReturn = true and + if nodeCandFwd2(node, true, TBooleanSome(_), unbindBool(read), config) + then returnRead = TBooleanSome(read) + else returnRead = TBooleanNone() + ) +} + +/** + * Holds if `c` is the target of a read in the flow covered by `nodeCand2`. + */ +pragma[noinline] +private predicate nodeCand2IsRead(Content c, boolean read, Configuration config) { + exists(Node mid, Node node | + useFieldFlow(config) and + nodeCandFwd2(node, _, _, true, unbind(config)) and + read(node, c, mid, config) and + nodeCandFwd2IsStored(c, unbindBool(read), unbind(config)) and + nodeCand2(mid, _, _, read, config) + ) +} + +pragma[nomagic] +private predicate nodeCand2Store( + Content c, Node node, boolean toReturn, BooleanOption returnRead, boolean stored, + Configuration config +) { + exists(Node mid | + storeCand1(node, c, mid, config) and + nodeCand2(mid, toReturn, returnRead, true, config) and + nodeCandFwd2(node, _, _, stored, unbind(config)) + ) +} + +/** + * Holds if `c` is the target of a store in the flow covered by `nodeCand2`. + */ +pragma[nomagic] +private predicate nodeCand2IsStored(Content c, boolean stored, Configuration conf) { + exists(Node node | + nodeCand2Store(c, node, _, _, stored, conf) and + nodeCand2(node, _, _, stored, conf) + ) +} + +/** + * Holds if `c` is the target of both a store and a read in the path graph + * covered by `nodeCand2`. + */ +pragma[noinline] +private predicate nodeCand2IsReadAndStored(Content c, Configuration conf) { + exists(boolean apNonEmpty | + nodeCand2IsStored(c, apNonEmpty, conf) and + nodeCand2IsRead(c, apNonEmpty, conf) + ) +} + +pragma[nomagic] +private predicate nodeCand2Out( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, BooleanOption returnRead, boolean read, + Configuration config +) { + exists(Node out, boolean allowsFieldFlow | + nodeCand2(out, toReturn, returnRead, read, config) and + flowOutOfCallNodeCand1(call, ret, out, allowsFieldFlow, config) + | + read = false or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate nodeCand2In( + DataFlowCall call, ArgumentNode arg, boolean toReturn, BooleanOption returnRead, boolean read, + Configuration config +) { + exists(ParameterNode p, boolean allowsFieldFlow | + nodeCand2(p, toReturn, returnRead, read, config) and + flowIntoCallNodeCand1(call, arg, p, allowsFieldFlow, config) + | + read = false or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate nodeCand2InToReturn( + DataFlowCall call, ArgumentNode arg, boolean returnRead, boolean read, Configuration config +) { + nodeCand2In(call, arg, true, TBooleanSome(returnRead), read, config) +} + +/** + * Holds if an output from `call` is reached in the flow covered by `nodeCand2`. + */ +pragma[nomagic] +private predicate nodeCand2IsReturned( + DataFlowCall call, boolean toReturn, BooleanOption returnRead, boolean read, Configuration config +) { + exists(ReturnNodeExt ret | + nodeCand2Out(call, ret, toReturn, returnRead, read, config) and + nodeCandFwd2(ret, true, TBooleanSome(_), read, config) + ) +} + +private predicate nodeCand2(Node node, Configuration config) { nodeCand2(node, _, _, _, config) } + +pragma[nomagic] +private predicate flowOutOfCallNodeCand2( + DataFlowCall call, ReturnNodeExt node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + nodeCand2(node2, config) and + nodeCand2(node1, unbind(config)) +} + +pragma[nomagic] +private predicate flowIntoCallNodeCand2( + DataFlowCall call, ArgumentNode node1, ParameterNode node2, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + nodeCand2(node2, config) and + nodeCand2(node1, unbind(config)) +} + +private module LocalFlowBigStep { + /** + * A node where some checking is required, and hence the big-step relation + * is not allowed to step over. + */ + private class FlowCheckNode extends Node { + FlowCheckNode() { + this instanceof CastNode or + clearsContent(this, _) + } + } + + /** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowEntry(Node node, Configuration config) { + nodeCand2(node, config) and + ( + config.isSource(node) or + jumpStep(_, node, config) or + additionalJumpStep(_, node, config) or + node instanceof ParameterNode or + node instanceof OutNodeExt or + store(_, _, node, _) or + read(_, _, node) or + node instanceof FlowCheckNode + ) + } + + /** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowExit(Node node, Configuration config) { + exists(Node next | nodeCand2(next, config) | + jumpStep(node, next, config) or + additionalJumpStep(node, next, config) or + flowIntoCallNodeCand1(_, node, next, config) or + flowOutOfCallNodeCand1(_, node, next, config) or + store(node, _, next, _) or + read(node, _, next) + ) + or + node instanceof FlowCheckNode + or + config.isSink(node) + } + + pragma[noinline] + private predicate additionalLocalFlowStepNodeCand2(Node node1, Node node2, Configuration config) { + additionalLocalFlowStepNodeCand1(node1, node2, config) and + nodeCand2(node1, _, _, false, config) and + nodeCand2(node2, _, _, false, unbind(config)) + } + + /** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `[additional]localFlowStep` beginning + * at `localFlowEntry`. + */ + pragma[nomagic] + private predicate localFlowStepPlus( + Node node1, Node node2, boolean preservesValue, DataFlowType t, Configuration config, + LocalCallContext cc + ) { + not isUnreachableInCall(node2, cc.(LocalCallContextSpecificCall).getCall()) and + ( + localFlowEntry(node1, config) and + ( + localFlowStepNodeCand1(node1, node2, config) and + preservesValue = true and + t = getNodeType(node1) + or + additionalLocalFlowStepNodeCand2(node1, node2, config) and + preservesValue = false and + t = getNodeType(node2) + ) and + node1 != node2 and + cc.relevantFor(node1.getEnclosingCallable()) and + not isUnreachableInCall(node1, cc.(LocalCallContextSpecificCall).getCall()) and + nodeCand2(node2, unbind(config)) + or + exists(Node mid | + localFlowStepPlus(node1, mid, preservesValue, t, config, cc) and + localFlowStepNodeCand1(mid, node2, config) and + not mid instanceof FlowCheckNode and + nodeCand2(node2, unbind(config)) + ) + or + exists(Node mid | + localFlowStepPlus(node1, mid, _, _, config, cc) and + additionalLocalFlowStepNodeCand2(mid, node2, config) and + not mid instanceof FlowCheckNode and + preservesValue = false and + t = getNodeType(node2) and + nodeCand2(node2, unbind(config)) + ) + ) + } + + /** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ + pragma[nomagic] + predicate localFlowBigStep( + Node node1, Node node2, boolean preservesValue, AccessPathFrontNil apf, Configuration config, + LocalCallContext callContext + ) { + localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and + localFlowExit(node2, config) + } +} + +private import LocalFlowBigStep + +pragma[nomagic] +private predicate readCand2(Node node1, Content c, Node node2, Configuration config) { + read(node1, c, node2, config) and + nodeCand2(node1, _, _, true, unbind(config)) and + nodeCand2(node2, config) and + nodeCand2IsReadAndStored(c, unbind(config)) +} + +pragma[nomagic] +private predicate storeCand2( + Node node1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config +) { + store(node1, tc, node2, contentType) and + nodeCand2(node1, config) and + nodeCand2(node2, _, _, true, unbind(config)) and + nodeCand2IsReadAndStored(tc.getContent(), unbind(config)) +} + +/** + * Holds if `node` is reachable with access path front `apf` from a + * source in the configuration `config`. + * + * The Boolean `fromArg` records whether the node is reached through an + * argument in a call, and if so, `argApf` records the front of the + * access path of that argument. + */ +pragma[nomagic] +private predicate flowCandFwd( + Node node, boolean fromArg, AccessPathFrontOption argApf, AccessPathFront apf, + Configuration config +) { + flowCandFwd0(node, fromArg, argApf, apf, config) and + not apf.isClearedAt(node) and + if node instanceof CastingNode then compatibleTypes(getNodeType(node), apf.getType()) else any() +} + +pragma[nomagic] +private predicate flowCandFwd0( + Node node, boolean fromArg, AccessPathFrontOption argApf, AccessPathFront apf, + Configuration config +) { + nodeCand2(node, _, _, false, config) and + config.isSource(node) and + fromArg = false and + argApf = TAccessPathFrontNone() and + apf = TFrontNil(getNodeType(node)) + or + exists(Node mid | + flowCandFwd(mid, fromArg, argApf, apf, config) and + localFlowBigStep(mid, node, true, _, config, _) + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(mid, fromArg, argApf, nil, config) and + localFlowBigStep(mid, node, false, apf, config, _) + ) + or + exists(Node mid | + flowCandFwd(mid, _, _, apf, config) and + nodeCand2(node, unbind(config)) and + jumpStep(mid, node, config) and + fromArg = false and + argApf = TAccessPathFrontNone() + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(mid, _, _, nil, config) and + nodeCand2(node, unbind(config)) and + additionalJumpStep(mid, node, config) and + fromArg = false and + argApf = TAccessPathFrontNone() and + apf = TFrontNil(getNodeType(node)) + ) + or + // store + exists(Node mid, TypedContent tc, AccessPathFront apf0, DataFlowType contentType | + flowCandFwd(mid, fromArg, argApf, apf0, config) and + storeCand2(mid, tc, node, contentType, config) and + nodeCand2(node, _, _, true, unbind(config)) and + apf.headUsesContent(tc) and + compatibleTypes(apf0.getType(), contentType) + ) + or + // read + exists(TypedContent tc | + flowCandFwdRead(tc, node, fromArg, argApf, config) and + flowCandFwdConsCand(tc, apf, config) and + nodeCand2(node, _, _, unbindBool(apf.toBoolNonEmpty()), unbind(config)) + ) + or + // flow into a callable + flowCandFwdIn(_, node, _, _, apf, config) and + fromArg = true and + if nodeCand2(node, true, _, unbindBool(apf.toBoolNonEmpty()), config) + then argApf = TAccessPathFrontSome(apf) + else argApf = TAccessPathFrontNone() + or + // flow out of a callable + exists(DataFlowCall call | + flowCandFwdOut(call, node, fromArg, argApf, apf, config) and + fromArg = false + or + exists(AccessPathFront argApf0 | + flowCandFwdOutFromArg(call, node, argApf0, apf, config) and + flowCandFwdIsEntered(call, fromArg, argApf, argApf0, config) + ) + ) +} + +pragma[nomagic] +private predicate flowCandFwdConsCand(TypedContent tc, AccessPathFront apf, Configuration config) { + exists(Node mid, Node n, DataFlowType contentType | + flowCandFwd(mid, _, _, apf, config) and + storeCand2(mid, tc, n, contentType, config) and + nodeCand2(n, _, _, true, unbind(config)) and + compatibleTypes(apf.getType(), contentType) + ) +} + +pragma[nomagic] +private predicate flowCandFwdRead0( + Node node1, TypedContent tc, Content c, Node node2, boolean fromArg, AccessPathFrontOption argApf, + AccessPathFrontHead apf, Configuration config +) { + flowCandFwd(node1, fromArg, argApf, apf, config) and + readCand2(node1, c, node2, config) and + apf.headUsesContent(tc) +} + +pragma[nomagic] +private predicate flowCandFwdRead( + TypedContent tc, Node node, boolean fromArg, AccessPathFrontOption argApf, Configuration config +) { + flowCandFwdRead0(_, tc, tc.getContent(), node, fromArg, argApf, _, config) +} + +pragma[nomagic] +private predicate flowCandFwdIn( + DataFlowCall call, ParameterNode p, boolean fromArg, AccessPathFrontOption argApf, + AccessPathFront apf, Configuration config +) { + exists(ArgumentNode arg, boolean allowsFieldFlow | + flowCandFwd(arg, fromArg, argApf, apf, config) and + flowIntoCallNodeCand2(call, arg, p, allowsFieldFlow, config) + | + apf instanceof AccessPathFrontNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowCandFwdOut( + DataFlowCall call, Node node, boolean fromArg, AccessPathFrontOption argApf, AccessPathFront apf, + Configuration config +) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow | + flowCandFwd(ret, fromArg, argApf, apf, config) and + flowOutOfCallNodeCand2(call, ret, node, allowsFieldFlow, config) + | + apf instanceof AccessPathFrontNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowCandFwdOutFromArg( + DataFlowCall call, Node node, AccessPathFront argApf, AccessPathFront apf, Configuration config +) { + flowCandFwdOut(call, node, true, TAccessPathFrontSome(argApf), apf, config) +} + +/** + * Holds if an argument to `call` is reached in the flow covered by `flowCandFwd`. + */ +pragma[nomagic] +private predicate flowCandFwdIsEntered( + DataFlowCall call, boolean fromArg, AccessPathFrontOption argApf, AccessPathFront apf, + Configuration config +) { + exists(ParameterNode p | + flowCandFwdIn(call, p, fromArg, argApf, apf, config) and + nodeCand2(p, true, TBooleanSome(_), unbindBool(apf.toBoolNonEmpty()), config) + ) +} + +/** + * Holds if `node` with access path front `apf` is part of a path from a + * source to a sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink, and if so, `returnApf` + * records the front of the access path of the returned value. + */ +pragma[nomagic] +private predicate flowCand( + Node node, boolean toReturn, AccessPathFrontOption returnApf, AccessPathFront apf, + Configuration config +) { + flowCand0(node, toReturn, returnApf, apf, config) and + flowCandFwd(node, _, _, apf, config) +} + +pragma[nomagic] +private predicate flowCand0( + Node node, boolean toReturn, AccessPathFrontOption returnApf, AccessPathFront apf, + Configuration config +) { + flowCandFwd(node, _, _, apf, config) and + config.isSink(node) and + toReturn = false and + returnApf = TAccessPathFrontNone() and + apf instanceof AccessPathFrontNil + or + exists(Node mid | + localFlowBigStep(node, mid, true, _, config, _) and + flowCand(mid, toReturn, returnApf, apf, config) + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(node, _, _, apf, config) and + localFlowBigStep(node, mid, false, _, config, _) and + flowCand(mid, toReturn, returnApf, nil, config) and + apf instanceof AccessPathFrontNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + flowCand(mid, _, _, apf, config) and + toReturn = false and + returnApf = TAccessPathFrontNone() + ) + or + exists(Node mid, AccessPathFrontNil nil | + flowCandFwd(node, _, _, apf, config) and + additionalJumpStep(node, mid, config) and + flowCand(mid, _, _, nil, config) and + toReturn = false and + returnApf = TAccessPathFrontNone() and + apf instanceof AccessPathFrontNil + ) + or + // store + exists(TypedContent tc | + flowCandStore(node, tc, apf, toReturn, returnApf, config) and + flowCandConsCand(tc, apf, config) + ) + or + // read + exists(TypedContent tc, AccessPathFront apf0 | + flowCandRead(node, tc, apf, toReturn, returnApf, apf0, config) and + flowCandFwdConsCand(tc, apf0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + flowCandIn(call, node, toReturn, returnApf, apf, config) and + toReturn = false + or + exists(AccessPathFront returnApf0 | + flowCandInToReturn(call, node, returnApf0, apf, config) and + flowCandIsReturned(call, toReturn, returnApf, returnApf0, config) + ) + ) + or + // flow out of a callable + flowCandOut(_, node, _, _, apf, config) and + toReturn = true and + if flowCandFwd(node, true, _, apf, config) + then returnApf = TAccessPathFrontSome(apf) + else returnApf = TAccessPathFrontNone() +} + +pragma[nomagic] +private predicate readCandFwd( + Node node1, TypedContent tc, AccessPathFront apf, Node node2, Configuration config +) { + flowCandFwdRead0(node1, tc, tc.getContent(), node2, _, _, apf, config) +} + +pragma[nomagic] +private predicate flowCandRead( + Node node, TypedContent tc, AccessPathFront apf, boolean toReturn, + AccessPathFrontOption returnApf, AccessPathFront apf0, Configuration config +) { + exists(Node mid | + readCandFwd(node, tc, apf, mid, config) and + flowCand(mid, toReturn, returnApf, apf0, config) + ) +} + +pragma[nomagic] +private predicate flowCandStore( + Node node, TypedContent tc, AccessPathFront apf, boolean toReturn, + AccessPathFrontOption returnApf, Configuration config +) { + exists(Node mid | + flowCandFwd(node, _, _, apf, config) and + storeCand2(node, tc, mid, _, unbind(config)) and + flowCand(mid, toReturn, returnApf, TFrontHead(tc), unbind(config)) + ) +} + +pragma[nomagic] +private predicate flowCandConsCand(TypedContent tc, AccessPathFront apf, Configuration config) { + flowCandFwdConsCand(tc, apf, config) and + flowCandRead(_, tc, _, _, _, apf, config) +} + +pragma[nomagic] +private predicate flowCandOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, AccessPathFrontOption returnApf, + AccessPathFront apf, Configuration config +) { + exists(Node out, boolean allowsFieldFlow | + flowCand(out, toReturn, returnApf, apf, config) and + flowOutOfCallNodeCand2(call, ret, out, allowsFieldFlow, config) + | + apf instanceof AccessPathFrontNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowCandIn( + DataFlowCall call, ArgumentNode arg, boolean toReturn, AccessPathFrontOption returnApf, + AccessPathFront apf, Configuration config +) { + exists(ParameterNode p, boolean allowsFieldFlow | + flowCand(p, toReturn, returnApf, apf, config) and + flowIntoCallNodeCand2(call, arg, p, allowsFieldFlow, config) + | + apf instanceof AccessPathFrontNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowCandInToReturn( + DataFlowCall call, ArgumentNode arg, AccessPathFront returnApf, AccessPathFront apf, + Configuration config +) { + flowCandIn(call, arg, true, TAccessPathFrontSome(returnApf), apf, config) +} + +/** + * Holds if an output from `call` is reached in the flow covered by `flowCand`. + */ +pragma[nomagic] +private predicate flowCandIsReturned( + DataFlowCall call, boolean toReturn, AccessPathFrontOption returnApf, AccessPathFront apf, + Configuration config +) { + exists(ReturnNodeExt ret | + flowCandOut(call, ret, toReturn, returnApf, apf, config) and + flowCandFwd(ret, true, TAccessPathFrontSome(_), apf, config) + ) +} + +private newtype TAccessPath = + TNil(DataFlowType t) or + TConsNil(TypedContent tc, DataFlowType t) { flowCandConsCand(tc, TFrontNil(t), _) } or + TConsCons(TypedContent tc1, TypedContent tc2, int len) { + flowCandConsCand(tc1, TFrontHead(tc2), _) and len in [2 .. accessPathLimit()] + } + +/** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first two + * elements of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPath extends TAccessPath { + abstract string toString(); + + abstract TypedContent getHead(); + + abstract int len(); + + abstract DataFlowType getType(); + + abstract AccessPathFront getFront(); + + /** + * Holds if this access path has `head` at the front and may be followed by `tail`. + */ + abstract predicate pop(TypedContent head, AccessPath tail); +} + +private class AccessPathNil extends AccessPath, TNil { + private DataFlowType t; + + AccessPathNil() { this = TNil(t) } + + override string toString() { result = concat(": " + ppReprType(t)) } + + override TypedContent getHead() { none() } + + override int len() { result = 0 } + + override DataFlowType getType() { result = t } + + override AccessPathFront getFront() { result = TFrontNil(t) } + + override predicate pop(TypedContent head, AccessPath tail) { none() } +} + +abstract private class AccessPathCons extends AccessPath { } + +private class AccessPathConsNil extends AccessPathCons, TConsNil { + private TypedContent tc; + private DataFlowType t; + + AccessPathConsNil() { this = TConsNil(tc, t) } + + override string toString() { + // The `concat` becomes "" if `ppReprType` has no result. + result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t)) + } + + override TypedContent getHead() { result = tc } + + override int len() { result = 1 } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override predicate pop(TypedContent head, AccessPath tail) { head = tc and tail = TNil(t) } +} + +private class AccessPathConsCons extends AccessPathCons, TConsCons { + private TypedContent tc1; + private TypedContent tc2; + private int len; + + AccessPathConsCons() { this = TConsCons(tc1, tc2, len) } + + override string toString() { + if len = 2 + then result = "[" + tc1.toString() + ", " + tc2.toString() + "]" + else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc1 } + + override int len() { result = len } + + override DataFlowType getType() { result = tc1.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc1) } + + override predicate pop(TypedContent head, AccessPath tail) { + head = tc1 and + ( + tail = TConsCons(tc2, _, len - 1) + or + len = 2 and + tail = TConsNil(tc2, _) + ) + } +} + +/** Gets the access path obtained by popping `tc` from `ap`, if any. */ +private AccessPath pop(TypedContent tc, AccessPath ap) { ap.pop(tc, result) } + +/** Gets the access path obtained by pushing `tc` onto `ap`. */ +private AccessPath push(TypedContent tc, AccessPath ap) { ap = pop(tc, result) } + +private newtype TAccessPathOption = + TAccessPathNone() or + TAccessPathSome(AccessPath ap) + +private class AccessPathOption extends TAccessPathOption { + string toString() { + this = TAccessPathNone() and result = "" + or + this = TAccessPathSome(any(AccessPath ap | result = ap.toString())) + } +} + +/** + * Holds if `node` is reachable with access path `ap` from a source in + * the configuration `config`. + * + * The Boolean `fromArg` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ +private predicate flowFwd( + Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, AccessPath ap, + Configuration config +) { + flowFwd0(node, fromArg, argAp, apf, ap, config) and + flowCand(node, _, _, apf, config) +} + +private predicate flowFwd0( + Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, AccessPath ap, + Configuration config +) { + flowCand(node, _, _, _, config) and + config.isSource(node) and + fromArg = false and + argAp = TAccessPathNone() and + ap = TNil(getNodeType(node)) and + apf = ap.(AccessPathNil).getFront() + or + flowCand(node, _, _, _, unbind(config)) and + ( + exists(Node mid | + flowFwd(mid, fromArg, argAp, apf, ap, config) and + localFlowBigStep(mid, node, true, _, config, _) + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(mid, fromArg, argAp, _, nil, config) and + localFlowBigStep(mid, node, false, apf, config, _) and + apf = ap.(AccessPathNil).getFront() + ) + or + exists(Node mid | + flowFwd(mid, _, _, apf, ap, config) and + jumpStep(mid, node, config) and + fromArg = false and + argAp = TAccessPathNone() + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(mid, _, _, _, nil, config) and + additionalJumpStep(mid, node, config) and + fromArg = false and + argAp = TAccessPathNone() and + ap = TNil(getNodeType(node)) and + apf = ap.(AccessPathNil).getFront() + ) + ) + or + // store + exists(TypedContent tc | flowFwdStore(node, tc, pop(tc, ap), apf, fromArg, argAp, config)) + or + // read + exists(TypedContent tc | + flowFwdRead(node, _, push(tc, ap), apf, fromArg, argAp, config) and + flowFwdConsCand(tc, apf, ap, config) + ) + or + // flow into a callable + flowFwdIn(_, node, _, _, apf, ap, config) and + fromArg = true and + if flowCand(node, true, _, apf, config) + then argAp = TAccessPathSome(ap) + else argAp = TAccessPathNone() + or + // flow out of a callable + exists(DataFlowCall call | + flowFwdOut(call, node, fromArg, argAp, apf, ap, config) and + fromArg = false + or + exists(AccessPath argAp0 | + flowFwdOutFromArg(call, node, argAp0, apf, ap, config) and + flowFwdIsEntered(call, fromArg, argAp, argAp0, config) + ) + ) +} + +pragma[nomagic] +private predicate flowFwdStore( + Node node, TypedContent tc, AccessPath ap0, AccessPathFront apf, boolean fromArg, + AccessPathOption argAp, Configuration config +) { + exists(Node mid, AccessPathFront apf0 | + flowFwd(mid, fromArg, argAp, apf0, ap0, config) and + flowFwdStore0(mid, tc, node, apf0, apf, config) + ) +} + +pragma[nomagic] +private predicate storeCand( + Node mid, TypedContent tc, Node node, AccessPathFront apf0, AccessPathFront apf, + Configuration config +) { + storeCand2(mid, tc, node, _, config) and + flowCand(mid, _, _, apf0, config) and + apf.headUsesContent(tc) +} + +pragma[noinline] +private predicate flowFwdStore0( + Node mid, TypedContent tc, Node node, AccessPathFront apf0, AccessPathFrontHead apf, + Configuration config +) { + storeCand(mid, tc, node, apf0, apf, config) and + flowCandConsCand(tc, apf0, config) and + flowCand(node, _, _, apf, unbind(config)) +} + +pragma[nomagic] +private predicate flowFwdRead0( + Node node1, TypedContent tc, AccessPathFrontHead apf0, AccessPath ap0, Node node2, + boolean fromArg, AccessPathOption argAp, Configuration config +) { + flowFwd(node1, fromArg, argAp, apf0, ap0, config) and + readCandFwd(node1, tc, apf0, node2, config) +} + +pragma[nomagic] +private predicate flowFwdRead( + Node node, AccessPathFrontHead apf0, AccessPath ap0, AccessPathFront apf, boolean fromArg, + AccessPathOption argAp, Configuration config +) { + exists(Node mid, TypedContent tc | + flowFwdRead0(mid, tc, apf0, ap0, node, fromArg, argAp, config) and + flowCand(node, _, _, apf, unbind(config)) and + flowCandConsCand(tc, apf, unbind(config)) + ) +} + +pragma[nomagic] +private predicate flowFwdConsCand( + TypedContent tc, AccessPathFront apf, AccessPath ap, Configuration config +) { + exists(Node n | + flowFwd(n, _, _, apf, ap, config) and + flowFwdStore0(n, tc, _, apf, _, config) + ) +} + +pragma[nomagic] +private predicate flowFwdIn( + DataFlowCall call, ParameterNode p, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, + AccessPath ap, Configuration config +) { + exists(ArgumentNode arg, boolean allowsFieldFlow | + flowFwd(arg, fromArg, argAp, apf, ap, config) and + flowIntoCallNodeCand2(call, arg, p, allowsFieldFlow, config) and + flowCand(p, _, _, _, unbind(config)) + | + ap instanceof AccessPathNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowFwdOut( + DataFlowCall call, Node node, boolean fromArg, AccessPathOption argAp, AccessPathFront apf, + AccessPath ap, Configuration config +) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow | + flowFwd(ret, fromArg, argAp, apf, ap, config) and + flowOutOfCallNodeCand2(call, ret, node, allowsFieldFlow, config) and + flowCand(node, _, _, _, unbind(config)) + | + ap instanceof AccessPathNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowFwdOutFromArg( + DataFlowCall call, Node node, AccessPath argAp, AccessPathFront apf, AccessPath ap, + Configuration config +) { + flowFwdOut(call, node, true, TAccessPathSome(argAp), apf, ap, config) +} + +/** + * Holds if an argument to `call` is reached in the flow covered by `flowFwd`. + */ +pragma[nomagic] +private predicate flowFwdIsEntered( + DataFlowCall call, boolean fromArg, AccessPathOption argAp, AccessPath ap, Configuration config +) { + exists(ParameterNode p, AccessPathFront apf | + flowFwdIn(call, p, fromArg, argAp, apf, ap, config) and + flowCand(p, true, TAccessPathFrontSome(_), apf, config) + ) +} + +/** + * Holds if `node` with access path `ap` is part of a path from a source to + * a sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink, and if so, `returnAp` + * records the access path of the returned value. + */ +private predicate flow( + Node node, boolean toReturn, AccessPathOption returnAp, AccessPath ap, Configuration config +) { + flow0(node, toReturn, returnAp, ap, config) and + flowFwd(node, _, _, _, ap, config) +} + +private predicate flow0( + Node node, boolean toReturn, AccessPathOption returnAp, AccessPath ap, Configuration config +) { + flowFwd(node, _, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + returnAp = TAccessPathNone() and + ap instanceof AccessPathNil + or + exists(Node mid | + localFlowBigStep(node, mid, true, _, config, _) and + flow(mid, toReturn, returnAp, ap, config) + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(node, _, _, _, ap, config) and + localFlowBigStep(node, mid, false, _, config, _) and + flow(mid, toReturn, returnAp, nil, config) and + ap instanceof AccessPathNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + flow(mid, _, _, ap, config) and + toReturn = false and + returnAp = TAccessPathNone() + ) + or + exists(Node mid, AccessPathNil nil | + flowFwd(node, _, _, _, ap, config) and + additionalJumpStep(node, mid, config) and + flow(mid, _, _, nil, config) and + toReturn = false and + returnAp = TAccessPathNone() and + ap instanceof AccessPathNil + ) + or + // store + exists(TypedContent tc | + flowStore(tc, node, toReturn, returnAp, ap, config) and + flowConsCand(tc, ap, config) + ) + or + // read + exists(Node mid, AccessPath ap0 | + readFlowFwd(node, _, mid, ap, ap0, config) and + flow(mid, toReturn, returnAp, ap0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + flowIn(call, node, toReturn, returnAp, ap, config) and + toReturn = false + or + exists(AccessPath returnAp0 | + flowInToReturn(call, node, returnAp0, ap, config) and + flowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + ) + or + // flow out of a callable + flowOut(_, node, _, _, ap, config) and + toReturn = true and + if flowFwd(node, true, TAccessPathSome(_), _, ap, config) + then returnAp = TAccessPathSome(ap) + else returnAp = TAccessPathNone() +} + +pragma[nomagic] +private predicate storeFlowFwd( + Node node1, TypedContent tc, Node node2, AccessPath ap, AccessPath ap0, Configuration config +) { + storeCand2(node1, tc, node2, _, config) and + flowFwdStore(node2, tc, ap, _, _, _, config) and + ap0 = push(tc, ap) +} + +pragma[nomagic] +private predicate flowStore( + TypedContent tc, Node node, boolean toReturn, AccessPathOption returnAp, AccessPath ap, + Configuration config +) { + exists(Node mid, AccessPath ap0 | + storeFlowFwd(node, tc, mid, ap, ap0, config) and + flow(mid, toReturn, returnAp, ap0, config) + ) +} + +pragma[nomagic] +private predicate readFlowFwd( + Node node1, TypedContent tc, Node node2, AccessPath ap, AccessPath ap0, Configuration config +) { + exists(AccessPathFrontHead apf | + readCandFwd(node1, tc, apf, node2, config) and + flowFwdRead(node2, apf, ap, _, _, _, config) and + ap0 = pop(tc, ap) and + flowFwdConsCand(tc, _, ap0, unbind(config)) + ) +} + +pragma[nomagic] +private predicate flowConsCand(TypedContent tc, AccessPath ap, Configuration config) { + exists(Node n, Node mid | + flow(mid, _, _, ap, config) and + readFlowFwd(n, tc, mid, _, ap, config) + ) +} + +pragma[nomagic] +private predicate flowOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, AccessPathOption returnAp, AccessPath ap, + Configuration config +) { + exists(Node out, boolean allowsFieldFlow | + flow(out, toReturn, returnAp, ap, config) and + flowOutOfCallNodeCand2(call, ret, out, allowsFieldFlow, config) + | + ap instanceof AccessPathNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowIn( + DataFlowCall call, ArgumentNode arg, boolean toReturn, AccessPathOption returnAp, AccessPath ap, + Configuration config +) { + exists(ParameterNode p, boolean allowsFieldFlow | + flow(p, toReturn, returnAp, ap, config) and + flowIntoCallNodeCand2(call, arg, p, allowsFieldFlow, config) + | + ap instanceof AccessPathNil or allowsFieldFlow = true + ) +} + +pragma[nomagic] +private predicate flowInToReturn( + DataFlowCall call, ArgumentNode arg, AccessPath returnAp, AccessPath ap, Configuration config +) { + flowIn(call, arg, true, TAccessPathSome(returnAp), ap, config) +} + +/** + * Holds if an output from `call` is reached in the flow covered by `flow`. + */ +pragma[nomagic] +private predicate flowIsReturned( + DataFlowCall call, boolean toReturn, AccessPathOption returnAp, AccessPath ap, + Configuration config +) { + exists(ReturnNodeExt ret | + flowOut(call, ret, toReturn, returnAp, ap, config) and + flowFwd(ret, true, TAccessPathSome(_), _, ap, config) + ) +} + +bindingset[conf, result] +private Configuration unbind(Configuration conf) { result >= conf and result <= conf } + +private predicate flow(Node n, Configuration config) { flow(n, _, _, _, config) } + +pragma[noinline] +private predicate parameterFlow( + ParameterNode p, AccessPath ap, DataFlowCallable c, Configuration config +) { + flow(p, true, _, ap, config) and + c = p.getEnclosingCallable() +} + +private newtype TSummaryCtx = + TSummaryCtxNone() or + TSummaryCtxSome(ParameterNode p, AccessPath ap) { + exists(ReturnNodeExt ret, Configuration config, AccessPath ap0 | + parameterFlow(p, ap, ret.getEnclosingCallable(), config) and + flow(ret, true, TAccessPathSome(_), ap0, config) and + flowFwd(ret, true, TAccessPathSome(ap), _, ap0, config) + ) + } + +/** + * A context for generating flow summaries. This represents flow entry through + * a specific parameter with an access path of a specific shape. + * + * Summaries are only created for parameters that may flow through. + */ +abstract private class SummaryCtx extends TSummaryCtx { + abstract string toString(); +} + +/** A summary context from which no flow summary can be generated. */ +private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { + override string toString() { result = "" } +} + +/** A summary context from which a flow summary can be generated. */ +private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { + private ParameterNode p; + private AccessPath ap; + + SummaryCtxSome() { this = TSummaryCtxSome(p, ap) } + + int getParameterPos() { p.isParameterOf(_, result) } + + override string toString() { result = p + ": " + ap } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +private newtype TPathNode = + TPathNodeMid(Node node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) { + // A PathNode is introduced by a source ... + flow(node, config) and + config.isSource(node) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = TNil(getNodeType(node)) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + pathStep(mid, node, cc, sc, ap) and + config = mid.getConfiguration() and + flow(node, _, _, ap, unbind(config)) + ) + } or + TPathNodeSink(Node node, Configuration config) { + config.isSink(node) and + flow(node, unbind(config)) and + ( + // A sink that is also a source ... + config.isSource(node) + or + // ... or a sink that can be reached from a source + exists(PathNodeMid mid | + pathStep(mid, node, _, _, any(AccessPathNil nil)) and + config = unbind(mid.getConfiguration()) + ) + ) + } + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + + /** Gets the underlying `Node`. */ + Node getNode() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + private predicate isHidden() { + nodeIsHidden(this.getNode()) and + not this.isSource() and + not this instanceof PathNodeSink + } + + private PathNode getASuccessorIfHidden() { + this.isHidden() and + result = this.(PathNodeImpl).getASuccessorImpl() + } + + /** Gets a successor of this node, if any. */ + final PathNode getASuccessor() { + result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and + not this.isHidden() and + not result.isHidden() + } + + /** Holds if this node is a source. */ + predicate isSource() { none() } +} + +abstract private class PathNodeImpl extends PathNode { + abstract PathNode getASuccessorImpl(); + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + exists(string s | s = this.(PathNodeMid).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + this instanceof PathNodeSink and result = "" + or + result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" + } + + override string toString() { result = this.getNode().toString() + ppAp() } + + override string toStringWithContext() { result = this.getNode().toString() + ppAp() + ppCtx() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** Holds if `n` can reach a sink. */ +private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getASuccessor()) } + +/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and reach(n2) } + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) } + + /** Holds if `n` is a node in the graph of data flow path explanations. */ + query predicate nodes(PathNode n, string key, string val) { + reach(n) and key = "semmle.label" and val = n.toString() + } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNodeImpl, TPathNodeMid { + Node node; + CallContext cc; + SummaryCtx sc; + AccessPath ap; + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + SummaryCtx getSummaryCtx() { result = sc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + pathStep(this, result.getNode(), result.getCallContext(), result.getSummaryCtx(), result.getAp()) and + result.getConfiguration() = unbind(this.getConfiguration()) + } + + override PathNodeImpl getASuccessorImpl() { + // an intermediate step to another intermediate node + result = getSuccMid() + or + // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges + exists(PathNodeMid mid, PathNodeSink sink | + mid = getSuccMid() and + mid.getNode() = sink.getNode() and + mid.getAp() instanceof AccessPathNil and + sink.getConfiguration() = unbind(mid.getConfiguration()) and + result = sink + ) + } + + override predicate isSource() { + config.isSource(node) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap instanceof AccessPathNil + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNodeImpl, TPathNodeSink { + Node node; + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, config) } + + override Node getNode() { result = node } + + override Configuration getConfiguration() { result = config } + + override PathNode getASuccessorImpl() { none() } + + override predicate isSource() { config.isSource(node) } +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +private predicate pathStep(PathNodeMid mid, Node node, CallContext cc, SummaryCtx sc, AccessPath ap) { + exists(AccessPath ap0, Node midnode, Configuration conf, LocalCallContext localCC | + midnode = mid.getNode() and + conf = mid.getConfiguration() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + localCC = getLocalCallContext(cc, midnode.getEnclosingCallable()) and + ap0 = mid.getAp() + | + localFlowBigStep(midnode, node, true, _, conf, localCC) and + ap = ap0 + or + localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and + ap0 instanceof AccessPathNil + ) + or + jumpStep(mid.getNode(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = mid.getAp() + or + additionalJumpStep(mid.getNode(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TNil(getNodeType(node)) + or + exists(TypedContent tc | pathStoreStep(mid, node, pop(tc, ap), tc, cc)) and + sc = mid.getSummaryCtx() + or + exists(TypedContent tc | pathReadStep(mid, node, push(tc, ap), tc, cc)) and + sc = mid.getSummaryCtx() + or + pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp() + or + pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone + or + pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx() +} + +pragma[nomagic] +private predicate readCand(Node node1, TypedContent tc, Node node2, Configuration config) { + readCandFwd(node1, tc, _, node2, config) and + flow(node2, config) +} + +pragma[nomagic] +private predicate pathReadStep( + PathNodeMid mid, Node node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + readCand(mid.getNode(), tc, node, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +pragma[nomagic] +private predicate storeCand(Node node1, TypedContent tc, Node node2, Configuration config) { + storeCand2(node1, tc, node2, _, config) and + flow(node2, config) +} + +pragma[nomagic] +private predicate pathStoreStep( + PathNodeMid mid, Node node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + storeCand(mid.getNode(), tc, node, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +private predicate pathOutOfCallable0( + PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPath ap, Configuration config +) { + pos = getReturnPosition(mid.getNode()) and + innercc = mid.getCallContext() and + not innercc instanceof CallContextCall and + ap = mid.getAp() and + config = mid.getConfiguration() +} + +pragma[nomagic] +private predicate pathOutOfCallable1( + PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPath ap, + Configuration config +) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + pathOutOfCallable0(mid, pos, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) +} + +pragma[noinline] +private Node getAnOutNodeFlow( + ReturnKindExt kind, DataFlowCall call, AccessPath ap, Configuration config +) { + result = kind.getAnOutNode(call) and + flow(result, _, _, ap, config) +} + +/** + * Holds if data may flow from `mid` to `out`. The last step of this path + * is a return from a callable and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate pathOutOfCallable(PathNodeMid mid, Node out, CallContext cc) { + exists(ReturnKindExt kind, DataFlowCall call, AccessPath ap, Configuration config | + pathOutOfCallable1(mid, call, kind, cc, ap, config) + | + out = getAnOutNodeFlow(kind, call, ap, config) + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate pathIntoArg( + PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap +) { + exists(ArgumentNode arg | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() + ) +} + +pragma[noinline] +private predicate parameterCand( + DataFlowCallable callable, int i, AccessPath ap, Configuration config +) { + exists(ParameterNode p | + flow(p, _, _, ap, config) and + p.isParameterOf(callable, i) + ) +} + +pragma[nomagic] +private predicate pathIntoCallable0( + PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call, + AccessPath ap +) { + pathIntoArg(mid, i, outercc, call, ap) and + callable = resolveCall(call, outercc) and + parameterCand(callable, any(int j | j <= i and j >= i), ap, mid.getConfiguration()) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +private predicate pathIntoCallable( + PathNodeMid mid, ParameterNode p, CallContext outercc, CallContextCall innercc, SummaryCtx sc, + DataFlowCall call +) { + exists(int i, DataFlowCallable callable, AccessPath ap | + pathIntoCallable0(mid, callable, i, outercc, call, ap) and + p.isParameterOf(callable, i) and + ( + sc = TSummaryCtxSome(p, ap) + or + not exists(TSummaryCtxSome(p, ap)) and + sc = TSummaryCtxNone() + ) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) +} + +/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */ +pragma[nomagic] +private predicate paramFlowsThrough( + ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, Configuration config +) { + exists(PathNodeMid mid, ReturnNodeExt ret, int pos | + mid.getNode() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + config = mid.getConfiguration() and + ap = mid.getAp() and + pos = sc.getParameterPos() and + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) +} + +pragma[nomagic] +private predicate pathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap +) { + exists(CallContext innercc, SummaryCtx sc | + pathIntoCallable(mid, _, cc, innercc, sc, call) and + paramFlowsThrough(kind, innercc, sc, ap, unbind(mid.getConfiguration())) + ) +} + +/** + * Holds if data may flow from `mid` through a callable to the node `out`. + * The context `cc` is restored to its value prior to entering the callable. + */ +pragma[noinline] +private predicate pathThroughCallable(PathNodeMid mid, Node out, CallContext cc, AccessPath ap) { + exists(DataFlowCall call, ReturnKindExt kind | + pathThroughCallable0(call, mid, kind, cc, ap) and + out = getAnOutNodeFlow(kind, call, ap, mid.getConfiguration()) + ) +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration +) { + flowsource.isSource() and + flowsource.getConfiguration() = configuration and + flowsource.getNode() = source and + (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and + flowsink.getNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} + +private module FlowExploration { + private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { + exists(Node node1, Node node2 | + jumpStep(node1, node2, config) + or + additionalJumpStep(node1, node2, config) + or + // flow into callable + viableParamArg(_, node2, node1) + or + // flow out of a callable + viableReturnPosOut(_, getReturnPosition(node1), node2) + | + c1 = node1.getEnclosingCallable() and + c2 = node2.getEnclosingCallable() and + c1 != c2 + ) + } + + private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSource(n) and c = n.getEnclosingCallable()) + or + exists(DataFlowCallable mid | + interestingCallableSrc(mid, config) and callableStep(mid, c, config) + ) + } + + private newtype TCallableExt = + TCallable(DataFlowCallable c, Configuration config) { interestingCallableSrc(c, config) } or + TCallableSrc() + + private predicate callableExtSrc(TCallableSrc src) { any() } + + private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { + exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | + callableStep(c1, c2, config) and + ce1 = TCallable(c1, config) and + ce2 = TCallable(c2, unbind(config)) + ) + or + exists(Node n, Configuration config | + ce1 = TCallableSrc() and + config.isSource(n) and + ce2 = TCallable(n.getEnclosingCallable(), config) + ) + } + + private int distSrcExt(TCallableExt c) = + shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) + + private int distSrc(DataFlowCallable c, Configuration config) { + result = distSrcExt(TCallable(c, config)) - 1 + } + + private newtype TPartialAccessPath = + TPartialNil(DataFlowType t) or + TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ + private class PartialAccessPath extends TPartialAccessPath { + abstract string toString(); + + TypedContent getHead() { this = TPartialCons(result, _) } + + int len() { + this = TPartialNil(_) and result = 0 + or + this = TPartialCons(_, result) + } + + DataFlowType getType() { + this = TPartialNil(result) + or + exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType()) + } + + abstract AccessPathFront getFront(); + } + + private class PartialAccessPathNil extends PartialAccessPath, TPartialNil { + override string toString() { + exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t))) + } + + override AccessPathFront getFront() { + exists(DataFlowType t | this = TPartialNil(t) | result = TFrontNil(t)) + } + } + + private class PartialAccessPathCons extends PartialAccessPath, TPartialCons { + override string toString() { + exists(TypedContent tc, int len | this = TPartialCons(tc, len) | + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + ) + } + + override AccessPathFront getFront() { + exists(TypedContent tc | this = TPartialCons(tc, _) | result = TFrontHead(tc)) + } + } + + private newtype TSummaryCtx1 = + TSummaryCtx1None() or + TSummaryCtx1Param(ParameterNode p) + + private newtype TSummaryCtx2 = + TSummaryCtx2None() or + TSummaryCtx2Some(PartialAccessPath ap) + + private newtype TPartialPathNode = + TPartialPathNodeMk( + Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + config.isSource(node) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = TPartialNil(getNodeType(node)) and + not fullBarrier(node, config) and + exists(config.explorationLimit()) + or + partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and + distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit() + } + + pragma[nomagic] + private predicate partialPathNodeMk0( + Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + exists(PartialPathNode mid | + partialPathStep(mid, node, cc, sc1, sc2, ap, config) and + not fullBarrier(node, config) and + if node instanceof CastingNode + then compatibleTypes(getNodeType(node), ap.getType()) + else any() + ) + } + + /** + * A `Node` augmented with a call context, an access path, and a configuration. + */ + class PartialPathNode extends TPartialPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = this.getNode().toString() + this.ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { result = this.getNode().toString() + this.ppAp() + this.ppCtx() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + Node getNode() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + PartialPathNode getASuccessor() { none() } + + /** + * Gets the approximate distance to the nearest source measured in number + * of interprocedural steps. + */ + int getSourceDistance() { + result = distSrc(this.getNode().getEnclosingCallable(), this.getConfiguration()) + } + + private string ppAp() { + exists(string s | s = this.(PartialPathNodePriv).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + result = " <" + this.(PartialPathNodePriv).getCallContext().toString() + ">" + } + } + + /** + * Provides the query predicates needed to include a graph in a path-problem query. + */ + module PartialPathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } + } + + private class PartialPathNodePriv extends PartialPathNode { + Node node; + CallContext cc; + TSummaryCtx1 sc1; + TSummaryCtx2 sc2; + PartialAccessPath ap; + Configuration config; + + PartialPathNodePriv() { this = TPartialPathNodeMk(node, cc, sc1, sc2, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + TSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TSummaryCtx2 getSummaryCtx2() { result = sc2 } + + PartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodePriv getASuccessor() { + partialPathStep(this, result.getNode(), result.getCallContext(), result.getSummaryCtx1(), + result.getSummaryCtx2(), result.getAp(), result.getConfiguration()) + } + } + + private predicate partialPathStep( + PartialPathNodePriv mid, Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + not isUnreachableInCall(node, cc.(CallContextSpecificCall).getCall()) and + ( + localFlowStep(mid.getNode(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(mid.getNode(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(getNodeType(node)) and + config = mid.getConfiguration() + ) + or + jumpStep(mid.getNode(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(mid.getNode(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(getNodeType(node)) and + config = mid.getConfiguration() + or + partialPathStoreStep(mid, _, _, node, ap) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() + or + exists(PartialAccessPath ap0, TypedContent tc | + partialPathReadStep(mid, ap0, tc, node, cc, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + apConsFwd(ap, tc, ap0, config) and + compatibleTypes(ap.getType(), getNodeType(node)) + ) + or + partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config) + or + partialPathOutOfCallable(mid, node, cc, ap, config) and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() + or + partialPathThroughCallable(mid, node, cc, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() + } + + bindingset[result, i] + private int unbindInt(int i) { i <= result and i >= result } + + pragma[inline] + private predicate partialPathStoreStep( + PartialPathNodePriv mid, PartialAccessPath ap1, TypedContent tc, Node node, + PartialAccessPath ap2 + ) { + exists(Node midNode, DataFlowType contentType | + midNode = mid.getNode() and + ap1 = mid.getAp() and + store(midNode, tc, node, contentType) and + ap2.getHead() = tc and + ap2.len() = unbindInt(ap1.len() + 1) and + compatibleTypes(ap1.getType(), contentType) + ) + } + + pragma[nomagic] + private predicate apConsFwd( + PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodePriv mid | + partialPathStoreStep(mid, ap1, tc, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathReadStep( + PartialPathNodePriv mid, PartialAccessPath ap, TypedContent tc, Node node, CallContext cc, + Configuration config + ) { + exists(Node midNode | + midNode = mid.getNode() and + ap = mid.getAp() and + read(midNode, tc.getContent(), node) and + ap.getHead() = tc and + config = mid.getConfiguration() and + cc = mid.getCallContext() + ) + } + + private predicate partialPathOutOfCallable0( + PartialPathNodePriv mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap, + Configuration config + ) { + pos = getReturnPosition(mid.getNode()) and + innercc = mid.getCallContext() and + not innercc instanceof CallContextCall and + ap = mid.getAp() and + config = mid.getConfiguration() + } + + pragma[nomagic] + private predicate partialPathOutOfCallable1( + PartialPathNodePriv mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + partialPathOutOfCallable0(mid, pos, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) + } + + private predicate partialPathOutOfCallable( + PartialPathNodePriv mid, Node out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(ReturnKindExt kind, DataFlowCall call | + partialPathOutOfCallable1(mid, call, kind, cc, ap, config) + | + out = kind.getAnOutNode(call) + ) + } + + pragma[noinline] + private predicate partialPathIntoArg( + PartialPathNodePriv mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(ArgumentNode arg | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathIntoCallable0( + PartialPathNodePriv mid, DataFlowCallable callable, int i, CallContext outercc, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + partialPathIntoArg(mid, i, outercc, call, ap, config) and + callable = resolveCall(call, outercc) + } + + private predicate partialPathIntoCallable( + PartialPathNodePriv mid, ParameterNode p, CallContext outercc, CallContextCall innercc, + TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(int i, DataFlowCallable callable | + partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and + p.isParameterOf(callable, i) and + sc1 = TSummaryCtx1Param(p) and + sc2 = TSummaryCtx2Some(ap) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) + } + + pragma[nomagic] + private predicate paramFlowsThroughInPartialPath( + ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodePriv mid, ReturnNodeExt ret | + mid.getNode() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() and + ap = mid.getAp() + ) + } + + pragma[noinline] + private predicate partialPathThroughCallable0( + DataFlowCall call, PartialPathNodePriv mid, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ParameterNode p, CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 | + partialPathIntoCallable(mid, p, cc, innercc, sc1, sc2, call, _, config) and + paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config) + ) + } + + private predicate partialPathThroughCallable( + PartialPathNodePriv mid, Node out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(DataFlowCall call, ReturnKindExt kind | + partialPathThroughCallable0(call, mid, kind, cc, ap, config) and + out = kind.getAnOutNode(call) + ) + } +} + +import FlowExploration + +private predicate partialFlow( + PartialPathNode source, PartialPathNode node, Configuration configuration +) { + source.getConfiguration() = configuration and + configuration.isSource(source.getNode()) and + node = source.getASuccessor+() +} diff --git a/ql/src/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll b/ql/src/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll new file mode 100644 index 00000000000..b509fad9cd2 --- /dev/null +++ b/ql/src/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll @@ -0,0 +1,115 @@ +/** + * Provides an implementation of global (interprocedural) taint tracking. + * This file re-exports the local (intraprocedural) taint-tracking analysis + * from `TaintTrackingParameter::Public` and adds a global analysis, mainly + * exposed through the `Configuration` class. For some languages, this file + * exists in several identical copies, allowing queries to use multiple + * `Configuration` classes that depend on each other without introducing + * mutual recursion among those configurations. + */ + +import TaintTrackingParameter::Public +private import TaintTrackingParameter::Private + +/** + * A configuration of interprocedural taint tracking analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the taint tracking library must define its own unique extension of + * this abstract class. + * + * A taint-tracking configuration is a special data flow configuration + * (`DataFlow::Configuration`) that allows for flow through nodes that do not + * necessarily preserve values but are still relevant from a taint tracking + * perspective. (For example, string concatenation, where one of the operands + * is tainted.) + * + * To create a configuration, extend this class with a subclass whose + * characteristic predicate is a unique singleton string. For example, write + * + * ```ql + * class MyAnalysisConfiguration extends TaintTracking::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isSanitizer`. + * // Optionally override `isSanitizerIn`. + * // Optionally override `isSanitizerOut`. + * // Optionally override `isSanitizerGuard`. + * // Optionally override `isAdditionalTaintStep`. + * } + * ``` + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but it is unsupported to depend on + * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the + * overridden predicates that define sources, sinks, or additional steps. + * Instead, the dependency should go to a `TaintTracking2::Configuration` or a + * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc. + */ +abstract class Configuration extends DataFlow::Configuration { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant taint source. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + abstract override predicate isSource(DataFlow::Node source); + + /** + * Holds if `sink` is a relevant taint sink. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + abstract override predicate isSink(DataFlow::Node sink); + + /** Holds if the node `node` is a taint sanitizer. */ + predicate isSanitizer(DataFlow::Node node) { none() } + + final override predicate isBarrier(DataFlow::Node node) { + isSanitizer(node) or + defaultTaintSanitizer(node) + } + + /** Holds if taint propagation into `node` is prohibited. */ + predicate isSanitizerIn(DataFlow::Node node) { none() } + + final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) } + + /** Holds if taint propagation out of `node` is prohibited. */ + predicate isSanitizerOut(DataFlow::Node node) { none() } + + final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) } + + /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */ + predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() } + + final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) } + + /** + * Holds if the additional taint propagation step from `node1` to `node2` + * must be taken into account in the analysis. + */ + predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() } + + final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + isAdditionalTaintStep(node1, node2) or + defaultAdditionalTaintStep(node1, node2) + } + + /** + * Holds if taint may flow from `source` to `sink` for this configuration. + */ + // overridden to provide taint-tracking specific qldoc + override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) { + super.hasFlow(source, sink) + } +} diff --git a/ql/src/semmle/go/dataflow/internal/tainttracking2/TaintTrackingParameter.qll b/ql/src/semmle/go/dataflow/internal/tainttracking2/TaintTrackingParameter.qll new file mode 100644 index 00000000000..1130c2e42e1 --- /dev/null +++ b/ql/src/semmle/go/dataflow/internal/tainttracking2/TaintTrackingParameter.qll @@ -0,0 +1,5 @@ +import semmle.go.dataflow.internal.TaintTrackingUtil as Public + +module Private { + import semmle.go.dataflow.DataFlow2::DataFlow2 as DataFlow +} diff --git a/ql/src/semmle/go/security/AllocationSizeOverflow.qll b/ql/src/semmle/go/security/AllocationSizeOverflow.qll index f970154ee59..ff55b887bc3 100644 --- a/ql/src/semmle/go/security/AllocationSizeOverflow.qll +++ b/ql/src/semmle/go/security/AllocationSizeOverflow.qll @@ -13,6 +13,29 @@ import go module AllocationSizeOverflow { import AllocationSizeOverflowCustomizations::AllocationSizeOverflow + /** + * A taint-tracking configuration for identifying `len(...)` calls whose argument may be large. + */ + class FindLargeLensConfiguration extends TaintTracking2::Configuration { + FindLargeLensConfiguration() { this = "AllocationSizeOverflow::FindLargeLens" } + + override predicate isSource(DataFlow::Node nd) { nd instanceof Source } + + override predicate isSink(DataFlow::Node nd) { nd = Builtin::len().getACall().getArgument(0) } + + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { + guard instanceof SanitizerGuard + } + + override predicate isSanitizer(DataFlow::Node nd) { nd instanceof Sanitizer } + } + + private DataFlow::CallNode getALargeLenCall() { + exists(FindLargeLensConfiguration config, DataFlow::Node lenArg | config.hasFlow(_, lenArg) | + result.getArgument(0) = lenArg + ) + } + /** * A taint-tracking configuration for identifying allocation-size overflows. */ @@ -33,6 +56,12 @@ module AllocationSizeOverflow { override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) { additionalStep(pred, succ) + or + exists(DataFlow::CallNode c | + c = getALargeLenCall() and + pred = c.getArgument(0) and + succ = c + ) } override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { diff --git a/ql/src/semmle/go/security/AllocationSizeOverflowCustomizations.qll b/ql/src/semmle/go/security/AllocationSizeOverflowCustomizations.qll index dffd5ac098e..62bfd40f347 100644 --- a/ql/src/semmle/go/security/AllocationSizeOverflowCustomizations.qll +++ b/ql/src/semmle/go/security/AllocationSizeOverflowCustomizations.qll @@ -178,12 +178,6 @@ module AllocationSizeOverflow { * or through an arithmetic operation (other than remainder). */ predicate additionalStep(DataFlow::Node pred, DataFlow::Node succ) { - exists(DataFlow::CallNode c | - c = Builtin::len().getACall() and - pred = c.getArgument(0) and - succ = c - ) - or succ.asExpr().(ArithmeticExpr).getAnOperand() = pred.asExpr() and not succ.asExpr() instanceof RemExpr } diff --git a/ql/test/query-tests/Security/CWE-190/array_vs_contents.go b/ql/test/query-tests/Security/CWE-190/array_vs_contents.go new file mode 100644 index 00000000000..a07727c38bc --- /dev/null +++ b/ql/test/query-tests/Security/CWE-190/array_vs_contents.go @@ -0,0 +1,36 @@ +package main + +import ( + "io/ioutil" + "net/http" +) + +// Two cases exposing the difference between tainted array contents +// and the array itself being tainted. One uses an explicitly allocated +// array, the other the implicit array allocated to hold varargs; in both +// cases the array has fixed small size but has a tainted integer written +// to one of its cells. + +func f(request *http.Request) []byte { + + f, _ := ioutil.ReadAll(request.Body) + array := make([]int, 1) + array[0] = len(f) + alloc := make([]byte, len(array)+3) // GOOD: len(array) == 1 + return alloc + +} + +func takesBoundedVarargs(args ...interface{}) []byte { + + alloc := make([]byte, len(args)+1) // GOOD (when called from `callsVarargs`): len(args) == 1 + return alloc + +} + +func callsVarargs(request *http.Request) []byte { + + f, _ := ioutil.ReadAll(request.Body) + return takesBoundedVarargs(len(f)) + +}