From 8c02130bcfa11d152dc656f383de32abe5bf23fe Mon Sep 17 00:00:00 2001 From: Alex Eyers-Taylor Date: Thu, 18 Sep 2025 18:46:11 +0100 Subject: [PATCH 1/7] DataFlow: Rearrange files to group the parts that can be shared with overlay aware. --- shared/dataflow/codeql/dataflow/DataFlow.qll | 106 +++++++-------- .../codeql/dataflow/TaintTracking.qll | 124 +++++++++--------- 2 files changed, 115 insertions(+), 115 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index 3483287e3b3..54263ebea94 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -691,59 +691,6 @@ module DataFlowMake Lang> { predicate flowToExpr(DataFlowExpr sink); } - /** - * Constructs a global data flow computation. - */ - module Global implements GlobalFlowSig { - private module C implements FullStateConfigSig { - import DefaultState - import Config - - predicate accessPathLimit = Config::accessPathLimit/0; - - predicate isAdditionalFlowStep(Node node1, Node node2, string model) { - Config::isAdditionalFlowStep(node1, node2) and model = "Config" - } - } - - private module Stage1 = ImplStage1; - - import Stage1::PartialFlow - - private module Flow = Impl; - - import Flow - } - - /** - * Constructs a global data flow computation using flow state. - */ - module GlobalWithState implements GlobalFlowSig { - private module C implements FullStateConfigSig { - import Config - - predicate accessPathLimit = Config::accessPathLimit/0; - - predicate isAdditionalFlowStep(Node node1, Node node2, string model) { - Config::isAdditionalFlowStep(node1, node2) and model = "Config" - } - - predicate isAdditionalFlowStep( - Node node1, FlowState state1, Node node2, FlowState state2, string model - ) { - Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config" - } - } - - private module Stage1 = ImplStage1; - - import Stage1::PartialFlow - - private module Flow = Impl; - - import Flow - } - signature class PathNodeSig { /** Gets a textual representation of this element. */ string toString(); @@ -1140,4 +1087,57 @@ module DataFlowMake Lang> { // Re-export the PathGraph so the user can import a single module and get both PathNode and the query predicates import PathGraph } + + /** + * Constructs a global data flow computation. + */ + module Global implements GlobalFlowSig { + private module C implements FullStateConfigSig { + import DefaultState + import Config + + predicate accessPathLimit = Config::accessPathLimit/0; + + predicate isAdditionalFlowStep(Node node1, Node node2, string model) { + Config::isAdditionalFlowStep(node1, node2) and model = "Config" + } + } + + private module Stage1 = ImplStage1; + + import Stage1::PartialFlow + + private module Flow = Impl; + + import Flow + } + + /** + * Constructs a global data flow computation using flow state. + */ + module GlobalWithState implements GlobalFlowSig { + private module C implements FullStateConfigSig { + import Config + + predicate accessPathLimit = Config::accessPathLimit/0; + + predicate isAdditionalFlowStep(Node node1, Node node2, string model) { + Config::isAdditionalFlowStep(node1, node2) and model = "Config" + } + + predicate isAdditionalFlowStep( + Node node1, FlowState state1, Node node2, FlowState state2, string model + ) { + Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config" + } + } + + private module Stage1 = ImplStage1; + + import Stage1::PartialFlow + + private module Flow = Impl; + + import Flow + } } diff --git a/shared/dataflow/codeql/dataflow/TaintTracking.qll b/shared/dataflow/codeql/dataflow/TaintTracking.qll index bd4b4ecd6ca..5a4235407b2 100644 --- a/shared/dataflow/codeql/dataflow/TaintTracking.qll +++ b/shared/dataflow/codeql/dataflow/TaintTracking.qll @@ -83,68 +83,6 @@ module TaintFlowMake< } } - /** - * Constructs a global taint tracking computation. - */ - module Global implements DataFlow::GlobalFlowSig { - private module Config0 implements DataFlowInternal::FullStateConfigSig { - import DataFlowInternal::DefaultState - import Config - - predicate isAdditionalFlowStep( - DataFlowLang::Node node1, DataFlowLang::Node node2, string model - ) { - Config::isAdditionalFlowStep(node1, node2) and model = "Config" - } - } - - private module C implements DataFlowInternal::FullStateConfigSig { - import AddTaintDefaults - } - - private module Stage1 = DataFlowInternalStage1::ImplStage1; - - import Stage1::PartialFlow - - private module Flow = DataFlowInternal::Impl; - - import Flow - } - - /** - * Constructs a global taint tracking computation using flow state. - */ - module GlobalWithState implements DataFlow::GlobalFlowSig { - private module Config0 implements DataFlowInternal::FullStateConfigSig { - import Config - - predicate isAdditionalFlowStep( - DataFlowLang::Node node1, DataFlowLang::Node node2, string model - ) { - Config::isAdditionalFlowStep(node1, node2) and model = "Config" - } - - predicate isAdditionalFlowStep( - DataFlowLang::Node node1, FlowState state1, DataFlowLang::Node node2, FlowState state2, - string model - ) { - Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config" - } - } - - private module C implements DataFlowInternal::FullStateConfigSig { - import AddTaintDefaults - } - - private module Stage1 = DataFlowInternalStage1::ImplStage1; - - import Stage1::PartialFlow - - private module Flow = DataFlowInternal::Impl; - - import Flow - } - signature int speculationLimitSig(); private module AddSpeculativeTaintSteps< @@ -216,6 +154,68 @@ module TaintFlowMake< } } + /** + * Constructs a global taint tracking computation. + */ + module Global implements DataFlow::GlobalFlowSig { + private module Config0 implements DataFlowInternal::FullStateConfigSig { + import DataFlowInternal::DefaultState + import Config + + predicate isAdditionalFlowStep( + DataFlowLang::Node node1, DataFlowLang::Node node2, string model + ) { + Config::isAdditionalFlowStep(node1, node2) and model = "Config" + } + } + + private module C implements DataFlowInternal::FullStateConfigSig { + import AddTaintDefaults + } + + private module Stage1 = DataFlowInternalStage1::ImplStage1; + + import Stage1::PartialFlow + + private module Flow = DataFlowInternal::Impl; + + import Flow + } + + /** + * Constructs a global taint tracking computation using flow state. + */ + module GlobalWithState implements DataFlow::GlobalFlowSig { + private module Config0 implements DataFlowInternal::FullStateConfigSig { + import Config + + predicate isAdditionalFlowStep( + DataFlowLang::Node node1, DataFlowLang::Node node2, string model + ) { + Config::isAdditionalFlowStep(node1, node2) and model = "Config" + } + + predicate isAdditionalFlowStep( + DataFlowLang::Node node1, FlowState state1, DataFlowLang::Node node2, FlowState state2, + string model + ) { + Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config" + } + } + + private module C implements DataFlowInternal::FullStateConfigSig { + import AddTaintDefaults + } + + private module Stage1 = DataFlowInternalStage1::ImplStage1; + + import Stage1::PartialFlow + + private module Flow = DataFlowInternal::Impl; + + import Flow + } + /** * Constructs a global taint tracking computation that also allows a given * maximum number of speculative taint steps. From 82e4fc9f0ff21042bd39ade62910c4070a9a2b33 Mon Sep 17 00:00:00 2001 From: Alex Eyers-Taylor Date: Thu, 18 Sep 2025 18:48:01 +0100 Subject: [PATCH 2/7] AlertFiltering: Expose filtering further. --- shared/util/codeql/util/AlertFiltering.qll | 26 +++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/shared/util/codeql/util/AlertFiltering.qll b/shared/util/codeql/util/AlertFiltering.qll index 1480421ae59..60597327c3b 100644 --- a/shared/util/codeql/util/AlertFiltering.qll +++ b/shared/util/codeql/util/AlertFiltering.qll @@ -82,6 +82,21 @@ module AlertFilteringImpl { ) } + /** Holds if diff information is available in this evaluation. */ + predicate diffInformationAvailable() { + restrictAlertsTo(_, _, _) or restrictAlertsToExactLocation(_, _, _, _, _) + } + + /** + * Holds if diff information is available, and `filePath` is in the diff + * range. + */ + predicate fileIsInDiff(string filePath) { + restrictAlertsTo(filePath, _, _) + or + restrictAlertsToExactLocation(filePath, _, _, _, _) + } + /** * Holds if the given location is a match for one of the active filtering * predicates in this module, or if all filtering predicates are inactive @@ -92,8 +107,17 @@ module AlertFilteringImpl { */ bindingset[location] predicate filterByLocation(Location location) { - not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _) + not diffInformationAvailable() or + locationIsInDiff(location) + } + + /** + * Like `filterByLocation`, except that if there is no diff range, this + * predicate never holds. + */ + bindingset[location] + predicate locationIsInDiff(Location location) { exists(string filePath | restrictAlertsToEntireFile(filePath) and location.hasLocationInfo(filePath, _, _, _, _) From c49e2ab2dacc12f396db0eb34341bb8acf51adc0 Mon Sep 17 00:00:00 2001 From: Alex Eyers-Taylor Date: Thu, 18 Sep 2025 18:55:18 +0100 Subject: [PATCH 3/7] DataFlow: Add code to do overlay informed dataflow. --- .../internal/DataFlowImplSpecific.qll | 3 + shared/dataflow/codeql/dataflow/DataFlow.qll | 16 +++++ .../codeql/dataflow/TaintTracking.qll | 8 +++ .../codeql/dataflow/internal/DataFlowImpl.qll | 8 +++ .../dataflow/internal/DataFlowImplStage1.qll | 62 +++++++++++++++++-- 5 files changed, 92 insertions(+), 5 deletions(-) diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll index 65034ee08b9..97f5020142e 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll @@ -6,6 +6,7 @@ module; private import semmle.code.Location private import codeql.dataflow.DataFlow +private import semmle.code.java.Overlay module Private { import DataFlowPrivate @@ -29,4 +30,6 @@ module JavaDataFlow implements InputSig { predicate mayBenefitFromCallContext = Private::mayBenefitFromCallContext/1; predicate viableImplInCallContext = Private::viableImplInCallContext/2; + + predicate isEvaluatingInOverlay = isOverlay/0; } diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index 54263ebea94..f2fd4979c57 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -349,6 +349,18 @@ signature module InputSig { /** Holds if `fieldFlowBranchLimit` should be ignored for flow going into/out of `c`. */ default predicate ignoreFieldFlowBranchLimit(DataFlowCallable c) { none() } + + /** + * Holds if the evaluator is currently evaluating with an overlay. The + * implementation of this predicate needs to be `overlay[local]`. For a + * language with no overlay support, `none()` is a valid implementation. + * + * When called from a local predicate, this predicate holds if we are in the + * overlay-only local evaluation. When called from a global predicate, this + * predicate holds if we are evaluating globally with overlay and base both + * visible. + */ + default predicate isEvaluatingInOverlay() { none() } } module Configs Lang> { @@ -1101,6 +1113,8 @@ module DataFlowMake Lang> { predicate isAdditionalFlowStep(Node node1, Node node2, string model) { Config::isAdditionalFlowStep(node1, node2) and model = "Config" } + + predicate observeOverlayInformedIncrementalMode() { none() } } private module Stage1 = ImplStage1; @@ -1130,6 +1144,8 @@ module DataFlowMake Lang> { ) { Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config" } + + predicate observeOverlayInformedIncrementalMode() { none() } } private module Stage1 = ImplStage1; diff --git a/shared/dataflow/codeql/dataflow/TaintTracking.qll b/shared/dataflow/codeql/dataflow/TaintTracking.qll index 5a4235407b2..1f6cabbbba1 100644 --- a/shared/dataflow/codeql/dataflow/TaintTracking.qll +++ b/shared/dataflow/codeql/dataflow/TaintTracking.qll @@ -167,6 +167,8 @@ module TaintFlowMake< ) { Config::isAdditionalFlowStep(node1, node2) and model = "Config" } + + predicate observeOverlayInformedIncrementalMode() { none() } } private module C implements DataFlowInternal::FullStateConfigSig { @@ -201,6 +203,8 @@ module TaintFlowMake< ) { Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config" } + + predicate observeOverlayInformedIncrementalMode() { none() } } private module C implements DataFlowInternal::FullStateConfigSig { @@ -232,6 +236,8 @@ module TaintFlowMake< ) { Config::isAdditionalFlowStep(node1, node2) and model = "Config" } + + predicate observeOverlayInformedIncrementalMode() { none() } } private module C implements DataFlowInternal::FullStateConfigSig { @@ -270,6 +276,8 @@ module TaintFlowMake< ) { Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config" } + + predicate observeOverlayInformedIncrementalMode() { none() } } private module C implements DataFlowInternal::FullStateConfigSig { diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index a7e0736432a..c21858cc167 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -143,6 +143,14 @@ module MakeImpl Lang> { */ predicate observeDiffInformedIncrementalMode(); + /** + * Holds if sources and sinks should be filtered to only include those that + * are in the overlay database. This only has an effect when running + * in overlay-informed incremental mode. This should be used in conjunction + * with the `OverlayImpl` implementation to merge the base results back in. + */ + predicate observeOverlayInformedIncrementalMode(); + Location getASelectedSourceLocation(Node source); Location getASelectedSinkLocation(Node sink); diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll index c7883df0de1..07ebe057f02 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll @@ -4,7 +4,7 @@ * Provides an implementation of a fast initial pruning of global * (interprocedural) data flow reachability (Stage 1). */ -overlay[local?] +overlay[local?] // when this is removed, put `overlay[local?]` on `isOverlayNode`. module; private import codeql.util.Unit @@ -129,23 +129,75 @@ module MakeImplStage1 Lang> { private module AlertFiltering = AlertFilteringImpl; + /** + * Holds if the given node is visible in overlay-only local evaluation. + * + * This predicate needs to be `overlay[local?]`, either directly or + * through annotations from an outer scope. If `Node` is global for the + * language under analysis, then every node is considered an overlay + * node, which means there will effectively be no overlay-based + * filtering of sources and sinks. + */ + private predicate isOverlayNode(Node node) { + isEvaluatingInOverlay() and + // Any local node is an overlay node if we are evaluating in overlay mode + exists(node) + } + + /** + * The filtering if we aren't meant to be diff-informed. + * + * Shared between sources and sinks. + */ + pragma[inline] + private predicate nonDiffInformedFilter(Node node) { + // If we are in base-only global evaluation, do not filter out any sources. + not isEvaluatingInOverlay() + or + // If the configuration doesn't merge overlays, do not filter out any sources. + not Config::observeOverlayInformedIncrementalMode() + or + // If we are in global evaluation with an overlay present, restrict + // sources to those visible in the overlay. + isOverlayNode(node) + } + + overlay[global] pragma[nomagic] private predicate isFilteredSource(Node source) { Config::isSource(source, _) and + // Data flow is always incremental in one of two ways. + // 1. If the configuration is diff-informed, we filter to only include nodes in the diff, + // which gives the smallest set of nodes. + // If diff information is not available, we do not filter at all. + // 2. If not, in global evaluation with overlay, we filter to only + // include nodes from files in the overlay; flow from + // other nodes will be added back later. + // We start by seeing if we should be in case 1. if Config::observeDiffInformedIncrementalMode() - then AlertFiltering::filterByLocation(Config::getASelectedSourceLocation(source)) - else any() + then + // Case 1: We are meant to be diff-informed. + // We still only filter if we have diff information. + AlertFiltering::diffInformationAvailable() + implies + AlertFiltering::locationIsInDiff(Config::getASelectedSourceLocation(source)) + else nonDiffInformedFilter(source) } + overlay[global] pragma[nomagic] private predicate isFilteredSink(Node sink) { ( Config::isSink(sink, _) or Config::isSink(sink) ) and + // See the comments in `isFilteredSource` for the reasoning behind the following. if Config::observeDiffInformedIncrementalMode() - then AlertFiltering::filterByLocation(Config::getASelectedSinkLocation(sink)) - else any() + then + AlertFiltering::diffInformationAvailable() + implies + AlertFiltering::locationIsInDiff(Config::getASelectedSinkLocation(sink)) + else nonDiffInformedFilter(sink) } private predicate hasFilteredSource() { isFilteredSource(_) } From 3c110f2eb80ea316e3b9f0e60feba02e65abf983 Mon Sep 17 00:00:00 2001 From: Alex Eyers-Taylor Date: Fri, 19 Sep 2025 15:56:02 +0100 Subject: [PATCH 4/7] DataFlow: Add code for merging base with overlay. --- shared/dataflow/codeql/dataflow/DataFlow.qll | 77 +++++++- .../codeql/dataflow/TaintTracking.qll | 177 +++++++++++++++++- .../codeql/dataflow/internal/DataFlowImpl.qll | 52 ++++- 3 files changed, 296 insertions(+), 10 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index f2fd4979c57..94bae26e7aa 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -657,7 +657,7 @@ private module PathGraphSigMod { } } -module DataFlowMake Lang> { +module DataFlowMakeCore Lang> { private import Lang private import internal.DataFlowImpl::MakeImpl private import internal.DataFlowImplStage1::MakeImplStage1 @@ -1099,6 +1099,13 @@ module DataFlowMake Lang> { // Re-export the PathGraph so the user can import a single module and get both PathNode and the query predicates import PathGraph } +} + +module DataFlowMake Lang> { + import DataFlowMakeCore + private import Lang + private import internal.DataFlowImpl::MakeImpl + private import internal.DataFlowImplStage1::MakeImplStage1 /** * Constructs a global data flow computation. @@ -1157,3 +1164,71 @@ module DataFlowMake Lang> { import Flow } } + +module DataFlowMakeOverlay Lang> { + import DataFlowMakeCore + private import Lang + private import internal.DataFlowImpl::MakeImpl + private import internal.DataFlowImplStage1::MakeImplStage1 + + /** + * Constructs a global data flow computation. + */ + module Global implements GlobalFlowSig { + private module C implements FullStateConfigSig { + import DefaultState + import Config + + predicate accessPathLimit = Config::accessPathLimit/0; + + predicate isAdditionalFlowStep(Node node1, Node node2, string model) { + Config::isAdditionalFlowStep(node1, node2) and model = "Config" + } + + predicate observeOverlayInformedIncrementalMode() { + not Config::observeDiffInformedIncrementalMode() + } + } + + private module Stage1 = ImplStage1; + + import Stage1::PartialFlow + + private module Flow = OverlayImpl; + + import Flow + } + + /** + * Constructs a global data flow computation using flow state. + */ + module GlobalWithState implements GlobalFlowSig { + private module C implements FullStateConfigSig { + import Config + + predicate accessPathLimit = Config::accessPathLimit/0; + + predicate isAdditionalFlowStep(Node node1, Node node2, string model) { + Config::isAdditionalFlowStep(node1, node2) and model = "Config" + } + + predicate isAdditionalFlowStep( + Node node1, FlowState state1, Node node2, FlowState state2, string model + ) { + Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config" + } + + predicate observeOverlayInformedIncrementalMode() { + not Config::observeDiffInformedIncrementalMode() + } + } + + private module Stage1 = ImplStage1; + + import Stage1::PartialFlow + + private module Flow = OverlayImpl; + + import Flow + } +} diff --git a/shared/dataflow/codeql/dataflow/TaintTracking.qll b/shared/dataflow/codeql/dataflow/TaintTracking.qll index 1f6cabbbba1..cb4fad7c8ce 100644 --- a/shared/dataflow/codeql/dataflow/TaintTracking.qll +++ b/shared/dataflow/codeql/dataflow/TaintTracking.qll @@ -47,16 +47,16 @@ signature module InputSig Lang> { /** * Construct the modules for taint-tracking analyses. */ -module TaintFlowMake< +private module TaintFlowMakeCore< LocationSig Location, DF::InputSig DataFlowLang, InputSig TaintTrackingLang> { - private import TaintTrackingLang - private import DF::DataFlowMake as DataFlow - private import MakeImpl as DataFlowInternal - private import MakeImplStage1 as DataFlowInternalStage1 + import TaintTrackingLang + import DF::DataFlowMakeCore as DataFlow + import MakeImpl as DataFlowInternal + import MakeImplStage1 as DataFlowInternalStage1 - private module AddTaintDefaults implements + module AddTaintDefaults implements DataFlowInternal::FullStateConfigSig { import Config @@ -85,7 +85,7 @@ module TaintFlowMake< signature int speculationLimitSig(); - private module AddSpeculativeTaintSteps< + module AddSpeculativeTaintSteps< DataFlowInternal::FullStateConfigSig Config, speculationLimitSig/0 speculationLimit> implements DataFlowInternal::FullStateConfigSig { @@ -153,6 +153,13 @@ module TaintFlowMake< state1.getState() = state2.getState() } } +} + +module TaintFlowMake< + LocationSig Location, DF::InputSig DataFlowLang, + InputSig TaintTrackingLang> +{ + private import TaintFlowMakeCore /** * Constructs a global taint tracking computation. @@ -288,7 +295,161 @@ module TaintFlowMake< import Stage1::PartialFlow - private module Flow = DataFlowInternal::Impl; + private module Flow = DataFlowInternal::OverlayImpl; + + import Flow + } +} + +module TaintFlowMakeOverlay< + LocationSig Location, DF::InputSig DataFlowLang, + InputSig TaintTrackingLang> +{ + private import TaintFlowMakeCore + + /** + * Constructs a global taint tracking computation. + */ + module Global implements DataFlow::GlobalFlowSig { + private module Config0 implements DataFlowInternal::FullStateConfigSig { + import DataFlowInternal::DefaultState + import Config + + predicate isAdditionalFlowStep( + DataFlowLang::Node node1, DataFlowLang::Node node2, string model + ) { + Config::isAdditionalFlowStep(node1, node2) and model = "Config" + } + + predicate observeOverlayInformedIncrementalMode() { + not Config::observeDiffInformedIncrementalMode() + } + } + + private module C implements DataFlowInternal::FullStateConfigSig { + import AddTaintDefaults + } + + private module Stage1 = DataFlowInternalStage1::ImplStage1; + + import Stage1::PartialFlow + + private module Flow = DataFlowInternal::OverlayImpl; + + import Flow + } + + /** + * Constructs a global taint tracking computation using flow state. + */ + module GlobalWithState implements DataFlow::GlobalFlowSig { + private module Config0 implements DataFlowInternal::FullStateConfigSig { + import Config + + predicate isAdditionalFlowStep( + DataFlowLang::Node node1, DataFlowLang::Node node2, string model + ) { + Config::isAdditionalFlowStep(node1, node2) and model = "Config" + } + + predicate isAdditionalFlowStep( + DataFlowLang::Node node1, FlowState state1, DataFlowLang::Node node2, FlowState state2, + string model + ) { + Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config" + } + + predicate observeOverlayInformedIncrementalMode() { + not Config::observeDiffInformedIncrementalMode() + } + } + + private module C implements DataFlowInternal::FullStateConfigSig { + import AddTaintDefaults + } + + private module Stage1 = DataFlowInternalStage1::ImplStage1; + + import Stage1::PartialFlow + + private module Flow = DataFlowInternal::OverlayImpl; + + import Flow + } + + /** + * Constructs a global taint tracking computation that also allows a given + * maximum number of speculative taint steps. + */ + module SpeculativeGlobal + implements DataFlow::GlobalFlowSig + { + private module Config0 implements DataFlowInternal::FullStateConfigSig { + import DataFlowInternal::DefaultState + import Config + + predicate isAdditionalFlowStep( + DataFlowLang::Node node1, DataFlowLang::Node node2, string model + ) { + Config::isAdditionalFlowStep(node1, node2) and model = "Config" + } + + predicate observeOverlayInformedIncrementalMode() { + not Config::observeDiffInformedIncrementalMode() + } + } + + private module C implements DataFlowInternal::FullStateConfigSig { + import AddTaintDefaults> + } + + private module Stage1 = DataFlowInternalStage1::ImplStage1; + + import Stage1::PartialFlow + + private module Flow = DataFlowInternal::Impl; + + import Flow + } + + /** + * Constructs a global taint tracking computation using flow state that also + * allows a given maximum number of speculative taint steps. + */ + module SpeculativeGlobalWithState< + DataFlow::StateConfigSig Config, speculationLimitSig/0 speculationLimit> implements + DataFlow::GlobalFlowSig + { + private module Config0 implements DataFlowInternal::FullStateConfigSig { + import Config + + predicate isAdditionalFlowStep( + DataFlowLang::Node node1, DataFlowLang::Node node2, string model + ) { + Config::isAdditionalFlowStep(node1, node2) and model = "Config" + } + + predicate isAdditionalFlowStep( + DataFlowLang::Node node1, FlowState state1, DataFlowLang::Node node2, FlowState state2, + string model + ) { + Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config" + } + + predicate observeOverlayInformedIncrementalMode() { + not Config::observeDiffInformedIncrementalMode() + } + } + + private module C implements DataFlowInternal::FullStateConfigSig { + import AddTaintDefaults> + } + + private module Stage1 = DataFlowInternalStage1::ImplStage1; + + import Stage1::PartialFlow + + private module Flow = DataFlowInternal::OverlayImpl; import Flow } diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index c21858cc167..81ac268b8b3 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -15,7 +15,7 @@ private import DataFlowImplStage1 module MakeImpl Lang> { private import Lang - private import DataFlowMake + private import DataFlowMakeCore private import MakeImplStage1 private import DataFlowImplCommon::MakeImplCommon private import DataFlowImplCommonPublic @@ -179,6 +179,56 @@ module MakeImpl Lang> { } } + /** + * Constructs a data flow computation given a full input configuration, and + * an initial stage 1 pruning with merging of overlay and base results. + */ + module OverlayImpl Stage1> { + module Base = Impl; + + import Base + + /** + * Holds if data can flow from `source` to `sink`. + * + * This is a local predicate that only has results local to the overlay/base database. + */ + predicate flowLocal(Node source, Node sink) = forceLocal(Base::flow/2)(source, sink) + + /** + * Holds if data can flow from `source` to `sink`. + */ + predicate flow(Node source, Node sink) { + Base::flow(source, sink) + or + // If we are overlay informed (i.e. we are not diff-informed), we + // merge in the local results which includes the base database results. + flowLocal(source, sink) and Config::observeOverlayInformedIncrementalMode() + } + + /** + * Holds if data can flow from some source to `sink`. + * This predicate that only has results local to the overlay/base database. + */ + predicate flowToLocal(Node sink) = forceLocal(Base::flowTo/1)(sink) + + /** + * Holds if data can flow from some source to `sink`. + */ + predicate flowTo(Node sink) { + Base::flowTo(sink) + or + // If we are overlay informed (i.e. we are not diff-informed), we + // merge in the local results which includes the base database results. + flowToLocal(sink) and Config::observeOverlayInformedIncrementalMode() + } + + /** + * Holds if data can flow from some source to `sink`. + */ + predicate flowToExpr(Lang::DataFlowExpr sink) { flowTo(exprNode(sink)) } + } + /** * Constructs a data flow computation given a full input configuration, and * an initial stage 1 pruning. From 542bdf079291f5ef12ceffcd2bb3142f71ed08e8 Mon Sep 17 00:00:00 2001 From: Alex Eyers-Taylor Date: Wed, 24 Sep 2025 14:51:34 +0100 Subject: [PATCH 5/7] Java: Use Overlay dataflow in java. --- java/ql/lib/semmle/code/java/dataflow/DataFlow.qll | 2 +- java/ql/lib/semmle/code/java/dataflow/TaintTracking.qll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/java/ql/lib/semmle/code/java/dataflow/DataFlow.qll b/java/ql/lib/semmle/code/java/dataflow/DataFlow.qll index 54eb809c7b9..c99a8a5a58e 100644 --- a/java/ql/lib/semmle/code/java/dataflow/DataFlow.qll +++ b/java/ql/lib/semmle/code/java/dataflow/DataFlow.qll @@ -10,6 +10,6 @@ import java module DataFlow { private import semmle.code.java.dataflow.internal.DataFlowImplSpecific private import codeql.dataflow.DataFlow - import DataFlowMake + import DataFlowMakeOverlay import Public } diff --git a/java/ql/lib/semmle/code/java/dataflow/TaintTracking.qll b/java/ql/lib/semmle/code/java/dataflow/TaintTracking.qll index 159604a95bd..34c7b717428 100644 --- a/java/ql/lib/semmle/code/java/dataflow/TaintTracking.qll +++ b/java/ql/lib/semmle/code/java/dataflow/TaintTracking.qll @@ -13,5 +13,5 @@ module TaintTracking { private import semmle.code.java.dataflow.internal.DataFlowImplSpecific private import semmle.code.java.dataflow.internal.TaintTrackingImplSpecific private import codeql.dataflow.TaintTracking - import TaintFlowMake + import TaintFlowMakeOverlay } From 7a8d2392eeac95ed7010bfacb22c49e3b7470331 Mon Sep 17 00:00:00 2001 From: Alex Eyers-Taylor Date: Thu, 25 Sep 2025 17:21:50 +0100 Subject: [PATCH 6/7] Daatflow: Fix bug --- shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll | 1 + 1 file changed, 1 insertion(+) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll index 07ebe057f02..982e4a1c6af 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll @@ -150,6 +150,7 @@ module MakeImplStage1 Lang> { * Shared between sources and sinks. */ pragma[inline] + overlay[global] private predicate nonDiffInformedFilter(Node node) { // If we are in base-only global evaluation, do not filter out any sources. not isEvaluatingInOverlay() From 193cd46a767a0854396cf66b31296263ffc45686 Mon Sep 17 00:00:00 2001 From: Alex Eyers-Taylor Date: Fri, 3 Oct 2025 20:39:41 +0100 Subject: [PATCH 7/7] DataFlow: Adress comments on overlay informed dataflow --- shared/dataflow/codeql/dataflow/DataFlow.qll | 6 ++---- .../codeql/dataflow/TaintTracking.qll | 6 +++--- .../codeql/dataflow/internal/DataFlowImpl.qll | 19 ++++++++++--------- .../dataflow/internal/DataFlowImplStage1.qll | 8 ++++---- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index 94bae26e7aa..49f84d45b2a 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -657,10 +657,8 @@ private module PathGraphSigMod { } } -module DataFlowMakeCore Lang> { +private module DataFlowMakeCore Lang> { private import Lang - private import internal.DataFlowImpl::MakeImpl - private import internal.DataFlowImplStage1::MakeImplStage1 import Configs /** @@ -1166,7 +1164,7 @@ module DataFlowMake Lang> { } module DataFlowMakeOverlay Lang> { - import DataFlowMakeCore + import DataFlowMake private import Lang private import internal.DataFlowImpl::MakeImpl private import internal.DataFlowImplStage1::MakeImplStage1 diff --git a/shared/dataflow/codeql/dataflow/TaintTracking.qll b/shared/dataflow/codeql/dataflow/TaintTracking.qll index cb4fad7c8ce..7bb9535d096 100644 --- a/shared/dataflow/codeql/dataflow/TaintTracking.qll +++ b/shared/dataflow/codeql/dataflow/TaintTracking.qll @@ -52,7 +52,7 @@ private module TaintFlowMakeCore< InputSig TaintTrackingLang> { import TaintTrackingLang - import DF::DataFlowMakeCore as DataFlow + import DF::DataFlowMake as DataFlow import MakeImpl as DataFlowInternal import MakeImplStage1 as DataFlowInternalStage1 @@ -295,7 +295,7 @@ module TaintFlowMake< import Stage1::PartialFlow - private module Flow = DataFlowInternal::OverlayImpl; + private module Flow = DataFlowInternal::Impl; import Flow } @@ -407,7 +407,7 @@ module TaintFlowMakeOverlay< import Stage1::PartialFlow - private module Flow = DataFlowInternal::Impl; + private module Flow = DataFlowInternal::OverlayImpl; import Flow } diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 81ac268b8b3..099866ab6bd 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -15,7 +15,7 @@ private import DataFlowImplStage1 module MakeImpl Lang> { private import Lang - private import DataFlowMakeCore + private import DataFlowMake private import MakeImplStage1 private import DataFlowImplCommon::MakeImplCommon private import DataFlowImplCommonPublic @@ -145,7 +145,8 @@ module MakeImpl Lang> { /** * Holds if sources and sinks should be filtered to only include those that - * are in the overlay database. This only has an effect when running + * may lead to a flow path with either a source or a sink in the overlay database. + * This only has an effect when running * in overlay-informed incremental mode. This should be used in conjunction * with the `OverlayImpl` implementation to merge the base results back in. */ @@ -184,22 +185,22 @@ module MakeImpl Lang> { * an initial stage 1 pruning with merging of overlay and base results. */ module OverlayImpl Stage1> { - module Base = Impl; + private module Flow = Impl; - import Base + import Flow /** * Holds if data can flow from `source` to `sink`. * * This is a local predicate that only has results local to the overlay/base database. */ - predicate flowLocal(Node source, Node sink) = forceLocal(Base::flow/2)(source, sink) + private predicate flowLocal(Node source, Node sink) = forceLocal(Flow::flow/2)(source, sink) /** * Holds if data can flow from `source` to `sink`. */ predicate flow(Node source, Node sink) { - Base::flow(source, sink) + Flow::flow(source, sink) or // If we are overlay informed (i.e. we are not diff-informed), we // merge in the local results which includes the base database results. @@ -208,15 +209,15 @@ module MakeImpl Lang> { /** * Holds if data can flow from some source to `sink`. - * This predicate that only has results local to the overlay/base database. + * This is a local predicate that only has results local to the overlay/base database. */ - predicate flowToLocal(Node sink) = forceLocal(Base::flowTo/1)(sink) + predicate flowToLocal(Node sink) = forceLocal(Flow::flowTo/1)(sink) /** * Holds if data can flow from some source to `sink`. */ predicate flowTo(Node sink) { - Base::flowTo(sink) + Flow::flowTo(sink) or // If we are overlay informed (i.e. we are not diff-informed), we // merge in the local results which includes the base database results. diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll index 982e4a1c6af..bb79ff62f5b 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplStage1.qll @@ -149,17 +149,17 @@ module MakeImplStage1 Lang> { * * Shared between sources and sinks. */ - pragma[inline] overlay[global] + pragma[inline] private predicate nonDiffInformedFilter(Node node) { - // If we are in base-only global evaluation, do not filter out any sources. + // If we are in base-only global evaluation, do not filter out any sources/sinks. not isEvaluatingInOverlay() or - // If the configuration doesn't merge overlays, do not filter out any sources. + // If the configuration doesn't merge overlays, do not filter out any sources/sinks. not Config::observeOverlayInformedIncrementalMode() or // If we are in global evaluation with an overlay present, restrict - // sources to those visible in the overlay. + // sources/sinks to those visible in the overlay. isOverlayNode(node) }