Compare commits

...

7 Commits

Author SHA1 Message Date
Alex Eyers-Taylor
652c048af5 Dataflow: Remove diff from overlay informed dataflow. 2025-08-21 19:49:48 +01:00
Alex Eyers-Taylor
eb31b1c0d3 Jave: Use force local to make parsing local after global regex finding. 2025-08-21 19:49:48 +01:00
Alex Eyers-Taylor
979df35109 QL: make a fastTC overlay_caller. 2025-08-21 19:49:47 +01:00
Jonas Jensen
93e3702555 TEMP: DataFlow: Java-only flow including base
This won't work for other languages until we can annotate a `forceLocal`
call with `local?`.
2025-08-21 19:49:47 +01:00
Jonas Jensen
1f265efac8 DataFlow: overlay-informed flow includes the diff 2025-08-21 19:49:47 +01:00
Jonas Jensen
8616943794 DataFlow:Run overlay-informed if not diff-informed
To ensure good performance, always run data flow overlay-informed unless
the configuration has opted in to being diff-informed. This change
affects only databases with an overlay and therefore has no immediate
production consequences.
2025-08-21 19:49:47 +01:00
Alex Eyers-Taylor
ee2de0170c SSA global annotation 2025-07-25 12:17:17 +01:00
12 changed files with 209 additions and 20 deletions

View File

@@ -10,7 +10,7 @@
* of the field in case the field is not amenable to a non-trivial SSA
* representation.
*/
overlay[local?]
overlay[local]
module;
import java

View File

@@ -6,6 +6,7 @@ module;
private import semmle.code.Location
private import codeql.dataflow.DataFlow
private import semmle.code.java.Overlay
module Private {
import DataFlowPrivate
@@ -29,4 +30,6 @@ module JavaDataFlow implements InputSig<Location> {
predicate mayBenefitFromCallContext = Private::mayBenefitFromCallContext/1;
predicate viableImplInCallContext = Private::viableImplInCallContext/2;
predicate isEvaluatingInOverlay = isOverlay/0;
}

View File

@@ -83,6 +83,7 @@ overlay[caller?]
pragma[inline]
predicate localFlow(Node node1, Node node2) { node1 = node2 or localFlowStepPlus(node1, node2) }
overlay[caller?]
private predicate localFlowStepPlus(Node node1, Node node2) = fastTC(localFlowStep/2)(node1, node2)
/**

View File

@@ -1,4 +1,4 @@
overlay[local?]
overlay[local]
module;
import java
@@ -157,15 +157,20 @@ private predicate hasEntryDef(TrackedVar v, BasicBlock b) {
}
/** Holds if `n` might update the locally tracked variable `v`. */
overlay[global]
pragma[nomagic]
private predicate uncertainVariableUpdate(TrackedVar v, ControlFlowNode n, BasicBlock b, int i) {
private predicate uncertainVariableUpdateImpl(TrackedVar v, ControlFlowNode n, BasicBlock b, int i) {
exists(Call c | c = n.asCall() | updatesNamedField(c, v, _)) and
b.getNode(i) = n and
hasDominanceInformation(b)
or
uncertainVariableUpdate(v.getQualifier(), n, b, i)
uncertainVariableUpdateImpl(v.getQualifier(), n, b, i)
}
/** Holds if `n` might update the locally tracked variable `v`. */
predicate uncertainVariableUpdate(TrackedVar v, ControlFlowNode n, BasicBlock b, int i) =
forceLocal(uncertainVariableUpdateImpl/4)(v, n, b, i)
private module SsaInput implements SsaImplCommon::InputSig<Location> {
private import java as J
@@ -345,6 +350,7 @@ private module Cached {
* Constructor --(intraInstanceCallEdge)-->+ Method(setter of this.f)
* ```
*/
overlay[global]
private predicate intraInstanceCallEdge(Callable c1, Method m2) {
exists(MethodCall ma, RefType t1 |
ma.getCaller() = c1 and
@@ -365,6 +371,7 @@ private module Cached {
)
}
overlay[global]
private Callable tgt(Call c) {
result = viableImpl_v2(c)
or
@@ -374,11 +381,13 @@ private module Cached {
}
/** Holds if `(c1,c2)` is an edge in the call graph. */
overlay[global]
private predicate callEdge(Callable c1, Callable c2) {
exists(Call c | c.getCaller() = c1 and c2 = tgt(c))
}
/** Holds if `(c1,c2)` is an edge in the call graph excluding `intraInstanceCallEdge`. */
overlay[global]
private predicate crossInstanceCallEdge(Callable c1, Callable c2) {
callEdge(c1, c2) and not intraInstanceCallEdge(c1, c2)
}
@@ -392,6 +401,7 @@ private module Cached {
relevantFieldUpdate(_, f.getField(), _)
}
overlay[global]
private predicate source(Call call, TrackedField f, Field field, Callable c, boolean fresh) {
relevantCall(call, f) and
field = f.getField() and
@@ -405,9 +415,11 @@ private module Cached {
* `fresh` indicates whether the instance `this` in `c` has been freshly
* allocated along the call-chain.
*/
overlay[global]
private newtype TCallableNode =
MkCallableNode(Callable c, boolean fresh) { source(_, _, _, c, fresh) or edge(_, c, fresh) }
overlay[global]
private predicate edge(TCallableNode n, Callable c2, boolean f2) {
exists(Callable c1, boolean f1 | n = MkCallableNode(c1, f1) |
intraInstanceCallEdge(c1, c2) and f2 = f1
@@ -417,6 +429,7 @@ private module Cached {
)
}
overlay[global]
private predicate edge(TCallableNode n1, TCallableNode n2) {
exists(Callable c2, boolean f2 |
edge(n1, c2, f2) and
@@ -424,6 +437,7 @@ private module Cached {
)
}
overlay[global]
pragma[noinline]
private predicate source(Call call, TrackedField f, Field field, TCallableNode n) {
exists(Callable c, boolean fresh |
@@ -432,24 +446,28 @@ private module Cached {
)
}
overlay[global]
private predicate sink(Callable c, Field f, TCallableNode n) {
setsOwnField(c, f) and n = MkCallableNode(c, false)
or
setsOtherField(c, f) and n = MkCallableNode(c, _)
}
overlay[global]
private predicate prunedNode(TCallableNode n) {
sink(_, _, n)
or
exists(TCallableNode mid | edge(n, mid) and prunedNode(mid))
}
overlay[global]
private predicate prunedEdge(TCallableNode n1, TCallableNode n2) {
prunedNode(n1) and
prunedNode(n2) and
edge(n1, n2)
}
overlay[global]
private predicate edgePlus(TCallableNode c1, TCallableNode c2) = fastTC(prunedEdge/2)(c1, c2)
/**
@@ -457,6 +475,7 @@ private module Cached {
* where `f` and `call` share the same enclosing callable in which a
* `FieldRead` of `f` is reachable from `call`.
*/
overlay[global]
pragma[noopt]
private predicate updatesNamedFieldImpl(Call call, TrackedField f, Callable setter) {
exists(TCallableNode src, TCallableNode sink, Field field |
@@ -467,17 +486,23 @@ private module Cached {
}
bindingset[call, f]
overlay[global]
pragma[inline_late]
private predicate updatesNamedField0(Call call, TrackedField f, Callable setter) {
updatesNamedField(call, f, setter)
}
overlay[global]
cached
predicate defUpdatesNamedField(SsaImplicitUpdate def, TrackedField f, Callable setter) {
predicate defUpdatesNamedFieldImpl(SsaImplicitUpdate def, TrackedField f, Callable setter) {
f = def.getSourceVariable() and
updatesNamedField0(def.getCfgNode().asCall(), f, setter)
}
cached
predicate defUpdatesNamedField(SsaImplicitUpdate def, TrackedField f, Callable setter) =
forceLocal(defUpdatesNamedFieldImpl/3)(def, f, setter)
cached
predicate ssaUncertainImplicitUpdate(SsaImplicitUpdate def) {
exists(SsaSourceVariable v, BasicBlock bb, int i |
@@ -545,6 +570,7 @@ private module Cached {
}
cached
overlay[global]
module DataFlowIntegration {
import DataFlowIntegrationImpl

View File

@@ -2,7 +2,6 @@
* Provides predicates for reasoning about runtime call targets through virtual
* dispatch.
*/
overlay[local?]
module;
import java

View File

@@ -170,12 +170,15 @@ private module RegexFlow = DataFlow::Global<RegexFlowConfig>;
* As an optimisation, only regexes containing an infinite repitition quatifier (`+`, `*`, or `{x,}`)
* and therefore may be relevant for ReDoS queries are considered.
*/
predicate usedAsRegex(StringLiteral regex, string mode, boolean match_full_string) {
predicate usedAsRegexG(StringLiteral regex, string mode, boolean match_full_string) {
RegexFlow::flow(DataFlow::exprNode(regex), _) and
mode = "None" and // TODO: proper mode detection
mode = "None" and // TODO: proper mode detection
(if matchesFullString(regex) then match_full_string = true else match_full_string = false)
}
overlay[local]
predicate usedAsRegex(StringLiteral regex, string mode, boolean match_full_string) = forceLocal(usedAsRegexG/3)(regex, mode, match_full_string)
/**
* Holds if `regex` is used as a regular expression that is matched against a full string,
* as though it was implicitly surrounded by ^ and $.

View File

@@ -1,5 +1,5 @@
/** Provides a class hierarchy corresponding to a parse tree of regular expressions. */
overlay[local?]
overlay[local]
module;
private import semmle.code.java.regex.regex as RE // importing under a namescape to avoid naming conflict for `Top`.

View File

@@ -936,6 +936,7 @@ abstract class RegexString extends StringLiteral {
}
/** A string literal used as a regular expression */
overlay[local]
class Regex extends RegexString {
boolean matches_full_string;

View File

@@ -349,6 +349,18 @@ signature module InputSig<LocationSig Location> {
/** Holds if `fieldFlowBranchLimit` should be ignored for flow going into/out of `c`. */
default predicate ignoreFieldFlowBranchLimit(DataFlowCallable c) { none() }
/**
* Holds if the evaluator is currently evaluating with an overlay. The
* implementation of this predicate needs to be `overlay[local]`. For a
* language with no overlay support, `none()` is a valid implementation.
*
* When called from a local predicate, this predicate holds if we are in the
* overlay-only local evaluation. When called from a global predicate, this
* predicate holds if we are evaluating globally with overlay and base both
* visible.
*/
default predicate isEvaluatingInOverlay() { none() }
}
module Configs<LocationSig Location, InputSig<Location> Lang> {

View File

@@ -3376,24 +3376,81 @@ module MakeImpl<LocationSig Location, InputSig<Location> Lang> {
import S6
/**
* Holds if data can flow from `source` to `sink`.
* Holds if data can flow from `source` to `sink`. When running in overlay
* incremental mode on a supported language, this predicate only has
* results where either the source or sink is in the overlay database.
*/
predicate flow(Node source, Node sink) {
private predicate flowIncremental(Node source, Node sink) {
exists(PathNode source0, PathNode sink0 |
flowPath(source0, sink0) and source0.getNode() = source and sink0.getNode() = sink
)
}
overlay[local]
private predicate flowLocal(Node source, Node sink) =
forceLocal(flowIncremental/2)(source, sink)
/**
* Holds if data can flow from `source` to `sink`.
*/
predicate flow(Node source, Node sink) {
flowIncremental(source, sink)
or
flowLocal(source, sink)
}
/**
* Holds if data can flow from some source to `sink`. When running in
* overlay incremental mode on a supported language, this predicate only
* has results where either the source or sink is in the overlay database.
*/
private predicate flowToIncremental(Node sink) {
exists(PathNode n | n.isSink() and n.getNode() = sink)
}
overlay[local]
private predicate flowToLocal(Node sink) = forceLocal(flowToIncremental/1)(sink)
/**
* Holds if data can flow from some source to `sink`.
*/
predicate flowTo(Node sink) { exists(PathNode n | n.isSink() and n.getNode() = sink) }
predicate flowTo(Node sink) {
flowToIncremental(sink)
or
flowToLocal(sink)
}
/**
* Holds if data can flow from some source to `sink`.
*/
predicate flowToExpr(Expr sink) { flowTo(exprNode(sink)) }
/**
* Holds if data can flow from `source` to some sink. When running in
* overlay incremental mode on a supported language, this predicate only
* has results where either the source or sink is in the overlay database.
*/
private predicate flowFromIncremental(Node source) {
exists(PathNode n | n.isSource() and n.getNode() = source)
}
overlay[local]
private predicate flowFromLocal(Node source) = forceLocal(flowFromIncremental/1)(source)
/**
* Holds if data can flow from `source` to some sink.
*/
predicate flowFrom(Node source) {
flowFromIncremental(source)
or
flowFromLocal(source)
}
/**
* Holds if data can flow from `source` to some sink.
*/
predicate flowFromExpr(Expr source) { flowFrom(exprNode(source)) }
/**
* INTERNAL: Only for debugging.
*

View File

@@ -4,7 +4,7 @@
* Provides an implementation of a fast initial pruning of global
* (interprocedural) data flow reachability (Stage 1).
*/
overlay[local?]
overlay[local?] // when this is removed, put `overlay[local?]` on `isOverlayNode`.
module;
private import codeql.util.Unit
@@ -129,23 +129,86 @@ module MakeImplStage1<LocationSig Location, InputSig<Location> Lang> {
private module AlertFiltering = AlertFilteringImpl<Location>;
/**
* Holds if the given node is visible in overlay-only local evaluation.
*
* This predicate needs to be `overlay[local?]`, either directly or
* through annotations from an outer scope. If `Node` is global for the
* language under analysis, then every node is considered an overlay
* node, which means there will effectively be no overlay-based
* filtering of sources and sinks.
*/
private predicate isOverlayNode(Node node) {
isEvaluatingInOverlay() and
// Any local node is an overlay node if we are evaluating in overlay mode
exists(node)
}
/**
* Holds if `node` comes from a file that was in the diff for which we
* are producing results.
*/
overlay[global]
private predicate isDiffFileNode(Node node) {
exists(string filePath |
node.getLocation().hasLocationInfo(filePath, _, _, _, _) and
AlertFiltering::fileIsInDiff(filePath)
)
}
overlay[global]
pragma[nomagic]
private predicate isFilteredSource(Node source) {
Config::isSource(source, _) and
if Config::observeDiffInformedIncrementalMode()
then AlertFiltering::filterByLocation(Config::getASelectedSourceLocation(source))
else any()
// Data flow is always incremental in one of two ways.
// 1. If the configuration is diff-informed and diff information is
// available, we filter to only include nodes in the diff, which
// gives the smallest set of nodes.
// 2. If not, in global evaluation with overlay, we filter to only
// include nodes from files in the overlay or the diff; flow from
// other nodes will be added back later. There can be two reasons
// why we are in this case:
// 1. This could be the primary configuration for a query that
// hasn't yet become diff-informed. In that case, the
// `getASelectedSourceLocation` information is probably just the
// default, and it's a fairly safe overapproximation to
// effectively expand to all nodes in the file (via
// `isDiffFileNode`).
// 2. This could be a secondary configuration, like a helper
// configuration for finding sources or sinks of a primary
// configuration. In that case, the results of this configuration
// are typically in the same file as the final alert.
if
Config::observeDiffInformedIncrementalMode() and
AlertFiltering::diffInformationAvailable()
then AlertFiltering::locationIsInDiff(Config::getASelectedSourceLocation(source))
else (
// If we are in base-only global evaluation, do not filter out any sources.
not isEvaluatingInOverlay()
or
// If we are in global evaluation with an overlay present, restrict
// sources to those visible in the overlay or
isOverlayNode(source)
)
}
overlay[global]
pragma[nomagic]
private predicate isFilteredSink(Node sink) {
(
Config::isSink(sink, _) or
Config::isSink(sink)
) and
if Config::observeDiffInformedIncrementalMode()
then AlertFiltering::filterByLocation(Config::getASelectedSinkLocation(sink))
else any()
// See the comments in `isFilteredSource` for the reasoning behind the following.
if
Config::observeDiffInformedIncrementalMode() and
AlertFiltering::diffInformationAvailable()
then AlertFiltering::locationIsInDiff(Config::getASelectedSinkLocation(sink))
else (
not isEvaluatingInOverlay()
or
isOverlayNode(sink)
)
}
private predicate hasFilteredSource() { isFilteredSource(_) }

View File

@@ -82,6 +82,21 @@ module AlertFilteringImpl<LocationSig Location> {
)
}
/** Holds if diff information is available in this evaluation. */
predicate diffInformationAvailable() {
restrictAlertsTo(_, _, _) or restrictAlertsToExactLocation(_, _, _, _, _)
}
/**
* Holds if diff information is available, and `filePath` is in the diff
* range.
*/
predicate fileIsInDiff(string filePath) {
restrictAlertsTo(filePath, _, _)
or
restrictAlertsToExactLocation(filePath, _, _, _, _)
}
/**
* Holds if the given location intersects the diff range. When that is the
* case, ensuring that alerts mentioning this location are included in the
@@ -103,8 +118,17 @@ module AlertFilteringImpl<LocationSig Location> {
*/
bindingset[location]
predicate filterByLocation(Location location) {
not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _)
not diffInformationAvailable()
or
locationIsInDiff(location)
}
/**
* Like `filterByLocation`, except that if there is no diff range, this
* predicate never holds.
*/
bindingset[location]
predicate locationIsInDiff(Location location) {
exists(string filePath |
restrictAlertsToEntireFile(filePath) and
location.hasLocationInfo(filePath, _, _, _, _)