diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll index 65034ee08b9..97f5020142e 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll @@ -6,6 +6,7 @@ module; private import semmle.code.Location private import codeql.dataflow.DataFlow +private import semmle.code.java.Overlay module Private { import DataFlowPrivate @@ -29,4 +30,6 @@ module JavaDataFlow implements InputSig { predicate mayBenefitFromCallContext = Private::mayBenefitFromCallContext/1; predicate viableImplInCallContext = Private::viableImplInCallContext/2; + + predicate isEvaluatingInOverlay = isOverlay/0; } diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index 54263ebea94..f2fd4979c57 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -349,6 +349,18 @@ signature module InputSig { /** Holds if `fieldFlowBranchLimit` should be ignored for flow going into/out of `c`. */ default predicate ignoreFieldFlowBranchLimit(DataFlowCallable c) { none() } + + /** + * Holds if the evaluator is currently evaluating with an overlay. The + * implementation of this predicate needs to be `overlay[local]`. For a + * language with no overlay support, `none()` is a valid implementation. + * + * When called from a local predicate, this predicate holds if we are in the + * overlay-only local evaluation. When called from a global predicate, this + * predicate holds if we are evaluating globally with overlay and base both + * visible. + */ + default predicate isEvaluatingInOverlay() { none() } } module Configs Lang> { @@ -1101,6 +1113,8 @@ module DataFlowMake Lang> { predicate isAdditionalFlowStep(Node node1, Node node2, string model) { Config::isAdditionalFlowStep(node1, node2) and model = "Config" } + + predicate observeOverlayInformedIncrementalMode() { none() } } private module Stage1 = ImplStage1; @@ -1130,6 +1144,8 @@ module DataFlowMake Lang> { ) { Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config" } + + predicate observeOverlayInformedIncrementalMode() { none() } } private module Stage1 = ImplStage1; diff --git a/shared/dataflow/codeql/dataflow/TaintTracking.qll b/shared/dataflow/codeql/dataflow/TaintTracking.qll index 5a4235407b2..1f6cabbbba1 100644 --- a/shared/dataflow/codeql/dataflow/TaintTracking.qll +++ b/shared/dataflow/codeql/dataflow/TaintTracking.qll @@ -167,6 +167,8 @@ module TaintFlowMake< ) { Config::isAdditionalFlowStep(node1, node2) and model = "Config" } + + predicate observeOverlayInformedIncrementalMode() { none() } } private module C implements DataFlowInternal::FullStateConfigSig { @@ -201,6 +203,8 @@ module TaintFlowMake< ) { Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config" } + + predicate observeOverlayInformedIncrementalMode() { none() } } private module C implements DataFlowInternal::FullStateConfigSig { @@ -232,6 +236,8 @@ module TaintFlowMake< ) { Config::isAdditionalFlowStep(node1, node2) and model = "Config" } + + predicate observeOverlayInformedIncrementalMode() { none() } } private module C implements DataFlowInternal::FullStateConfigSig { @@ -270,6 +276,8 @@ module TaintFlowMake< ) { Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config" } + + predicate observeOverlayInformedIncrementalMode() { none() } } private module C implements DataFlowInternal::FullStateConfigSig { diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index a7e0736432a..c21858cc167 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -143,6 +143,14 @@ module MakeImpl Lang> { */ predicate observeDiffInformedIncrementalMode(); + /** + * Holds if sources and sinks should be filtered to only include those that + * are in the overlay database. This only has an effect when running + * in overlay-informed incremental mode. This should be used in conjunction + * with the `OverlayImpl` implementation to merge the base results back in. + */ + predicate observeOverlayInformedIncrementalMode(); + Location getASelectedSourceLocation(Node source); Location getASelectedSinkLocation(Node sink); diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll index c7883df0de1..07ebe057f02 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll @@ -4,7 +4,7 @@ * Provides an implementation of a fast initial pruning of global * (interprocedural) data flow reachability (Stage 1). */ -overlay[local?] +overlay[local?] // when this is removed, put `overlay[local?]` on `isOverlayNode`. module; private import codeql.util.Unit @@ -129,23 +129,75 @@ module MakeImplStage1 Lang> { private module AlertFiltering = AlertFilteringImpl; + /** + * Holds if the given node is visible in overlay-only local evaluation. + * + * This predicate needs to be `overlay[local?]`, either directly or + * through annotations from an outer scope. If `Node` is global for the + * language under analysis, then every node is considered an overlay + * node, which means there will effectively be no overlay-based + * filtering of sources and sinks. + */ + private predicate isOverlayNode(Node node) { + isEvaluatingInOverlay() and + // Any local node is an overlay node if we are evaluating in overlay mode + exists(node) + } + + /** + * The filtering if we aren't meant to be diff-informed. + * + * Shared between sources and sinks. + */ + pragma[inline] + private predicate nonDiffInformedFilter(Node node) { + // If we are in base-only global evaluation, do not filter out any sources. + not isEvaluatingInOverlay() + or + // If the configuration doesn't merge overlays, do not filter out any sources. + not Config::observeOverlayInformedIncrementalMode() + or + // If we are in global evaluation with an overlay present, restrict + // sources to those visible in the overlay. + isOverlayNode(node) + } + + overlay[global] pragma[nomagic] private predicate isFilteredSource(Node source) { Config::isSource(source, _) and + // Data flow is always incremental in one of two ways. + // 1. If the configuration is diff-informed, we filter to only include nodes in the diff, + // which gives the smallest set of nodes. + // If diff information is not available, we do not filter at all. + // 2. If not, in global evaluation with overlay, we filter to only + // include nodes from files in the overlay; flow from + // other nodes will be added back later. + // We start by seeing if we should be in case 1. if Config::observeDiffInformedIncrementalMode() - then AlertFiltering::filterByLocation(Config::getASelectedSourceLocation(source)) - else any() + then + // Case 1: We are meant to be diff-informed. + // We still only filter if we have diff information. + AlertFiltering::diffInformationAvailable() + implies + AlertFiltering::locationIsInDiff(Config::getASelectedSourceLocation(source)) + else nonDiffInformedFilter(source) } + overlay[global] pragma[nomagic] private predicate isFilteredSink(Node sink) { ( Config::isSink(sink, _) or Config::isSink(sink) ) and + // See the comments in `isFilteredSource` for the reasoning behind the following. if Config::observeDiffInformedIncrementalMode() - then AlertFiltering::filterByLocation(Config::getASelectedSinkLocation(sink)) - else any() + then + AlertFiltering::diffInformationAvailable() + implies + AlertFiltering::locationIsInDiff(Config::getASelectedSinkLocation(sink)) + else nonDiffInformedFilter(sink) } private predicate hasFilteredSource() { isFilteredSource(_) }