DataFlow: Add code to do overlay informed dataflow.

This commit is contained in:
Alex Eyers-Taylor
2025-09-18 18:55:18 +01:00
committed by Alexander Eyers-Taylor
parent 82e4fc9f0f
commit c49e2ab2da
5 changed files with 92 additions and 5 deletions

View File

@@ -6,6 +6,7 @@ module;
private import semmle.code.Location
private import codeql.dataflow.DataFlow
private import semmle.code.java.Overlay
module Private {
import DataFlowPrivate
@@ -29,4 +30,6 @@ module JavaDataFlow implements InputSig<Location> {
predicate mayBenefitFromCallContext = Private::mayBenefitFromCallContext/1;
predicate viableImplInCallContext = Private::viableImplInCallContext/2;
predicate isEvaluatingInOverlay = isOverlay/0;
}

View File

@@ -349,6 +349,18 @@ signature module InputSig<LocationSig Location> {
/** Holds if `fieldFlowBranchLimit` should be ignored for flow going into/out of `c`. */
default predicate ignoreFieldFlowBranchLimit(DataFlowCallable c) { none() }
/**
* Holds if the evaluator is currently evaluating with an overlay. The
* implementation of this predicate needs to be `overlay[local]`. For a
* language with no overlay support, `none()` is a valid implementation.
*
* When called from a local predicate, this predicate holds if we are in the
* overlay-only local evaluation. When called from a global predicate, this
* predicate holds if we are evaluating globally with overlay and base both
* visible.
*/
default predicate isEvaluatingInOverlay() { none() }
}
module Configs<LocationSig Location, InputSig<Location> Lang> {
@@ -1101,6 +1113,8 @@ module DataFlowMake<LocationSig Location, InputSig<Location> Lang> {
predicate isAdditionalFlowStep(Node node1, Node node2, string model) {
Config::isAdditionalFlowStep(node1, node2) and model = "Config"
}
predicate observeOverlayInformedIncrementalMode() { none() }
}
private module Stage1 = ImplStage1<C>;
@@ -1130,6 +1144,8 @@ module DataFlowMake<LocationSig Location, InputSig<Location> Lang> {
) {
Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config"
}
predicate observeOverlayInformedIncrementalMode() { none() }
}
private module Stage1 = ImplStage1<C>;

View File

@@ -167,6 +167,8 @@ module TaintFlowMake<
) {
Config::isAdditionalFlowStep(node1, node2) and model = "Config"
}
predicate observeOverlayInformedIncrementalMode() { none() }
}
private module C implements DataFlowInternal::FullStateConfigSig {
@@ -201,6 +203,8 @@ module TaintFlowMake<
) {
Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config"
}
predicate observeOverlayInformedIncrementalMode() { none() }
}
private module C implements DataFlowInternal::FullStateConfigSig {
@@ -232,6 +236,8 @@ module TaintFlowMake<
) {
Config::isAdditionalFlowStep(node1, node2) and model = "Config"
}
predicate observeOverlayInformedIncrementalMode() { none() }
}
private module C implements DataFlowInternal::FullStateConfigSig {
@@ -270,6 +276,8 @@ module TaintFlowMake<
) {
Config::isAdditionalFlowStep(node1, state1, node2, state2) and model = "Config"
}
predicate observeOverlayInformedIncrementalMode() { none() }
}
private module C implements DataFlowInternal::FullStateConfigSig {

View File

@@ -143,6 +143,14 @@ module MakeImpl<LocationSig Location, InputSig<Location> Lang> {
*/
predicate observeDiffInformedIncrementalMode();
/**
* Holds if sources and sinks should be filtered to only include those that
* are in the overlay database. This only has an effect when running
* in overlay-informed incremental mode. This should be used in conjunction
* with the `OverlayImpl` implementation to merge the base results back in.
*/
predicate observeOverlayInformedIncrementalMode();
Location getASelectedSourceLocation(Node source);
Location getASelectedSinkLocation(Node sink);

View File

@@ -4,7 +4,7 @@
* Provides an implementation of a fast initial pruning of global
* (interprocedural) data flow reachability (Stage 1).
*/
overlay[local?]
overlay[local?] // when this is removed, put `overlay[local?]` on `isOverlayNode`.
module;
private import codeql.util.Unit
@@ -129,23 +129,75 @@ module MakeImplStage1<LocationSig Location, InputSig<Location> Lang> {
private module AlertFiltering = AlertFilteringImpl<Location>;
/**
* Holds if the given node is visible in overlay-only local evaluation.
*
* This predicate needs to be `overlay[local?]`, either directly or
* through annotations from an outer scope. If `Node` is global for the
* language under analysis, then every node is considered an overlay
* node, which means there will effectively be no overlay-based
* filtering of sources and sinks.
*/
private predicate isOverlayNode(Node node) {
isEvaluatingInOverlay() and
// Any local node is an overlay node if we are evaluating in overlay mode
exists(node)
}
/**
* The filtering if we aren't meant to be diff-informed.
*
* Shared between sources and sinks.
*/
pragma[inline]
private predicate nonDiffInformedFilter(Node node) {
// If we are in base-only global evaluation, do not filter out any sources.
not isEvaluatingInOverlay()
or
// If the configuration doesn't merge overlays, do not filter out any sources.
not Config::observeOverlayInformedIncrementalMode()
or
// If we are in global evaluation with an overlay present, restrict
// sources to those visible in the overlay.
isOverlayNode(node)
}
overlay[global]
pragma[nomagic]
private predicate isFilteredSource(Node source) {
Config::isSource(source, _) and
// Data flow is always incremental in one of two ways.
// 1. If the configuration is diff-informed, we filter to only include nodes in the diff,
// which gives the smallest set of nodes.
// If diff information is not available, we do not filter at all.
// 2. If not, in global evaluation with overlay, we filter to only
// include nodes from files in the overlay; flow from
// other nodes will be added back later.
// We start by seeing if we should be in case 1.
if Config::observeDiffInformedIncrementalMode()
then AlertFiltering::filterByLocation(Config::getASelectedSourceLocation(source))
else any()
then
// Case 1: We are meant to be diff-informed.
// We still only filter if we have diff information.
AlertFiltering::diffInformationAvailable()
implies
AlertFiltering::locationIsInDiff(Config::getASelectedSourceLocation(source))
else nonDiffInformedFilter(source)
}
overlay[global]
pragma[nomagic]
private predicate isFilteredSink(Node sink) {
(
Config::isSink(sink, _) or
Config::isSink(sink)
) and
// See the comments in `isFilteredSource` for the reasoning behind the following.
if Config::observeDiffInformedIncrementalMode()
then AlertFiltering::filterByLocation(Config::getASelectedSinkLocation(sink))
else any()
then
AlertFiltering::diffInformationAvailable()
implies
AlertFiltering::locationIsInDiff(Config::getASelectedSinkLocation(sink))
else nonDiffInformedFilter(sink)
}
private predicate hasFilteredSource() { isFilteredSource(_) }