From 1eccb8ea930e73c8ddb4f1b797e68f112eeb91a2 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Fri, 27 Feb 2026 12:05:48 +0000 Subject: [PATCH] C++: Add a cache module to taint-tracking and ensure they happen in the same stage as the dataflow stage. --- .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 1 + .../dataflow/internal/TaintTrackingUtil.qll | 115 ++++++++++-------- 2 files changed, 64 insertions(+), 52 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 06bff260254..4955c5a2c86 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -124,6 +124,7 @@ private module Cached { cached predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo, string model) { ( + TaintTrackingUtil::forceCachingInSameStage() and // Def-use/Use-use flow SsaImpl::ssaFlow(nodeFrom, nodeTo) or diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll index f190569330f..4b98469c418 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll @@ -9,60 +9,71 @@ private import SsaImpl as Ssa private import semmle.code.cpp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl private import semmle.code.cpp.ir.dataflow.FlowSteps -/** - * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local - * (intra-procedural) step. This relation is only used for local taint flow - * (for example `TaintTracking::localTaint(source, sink)`) so it may contain - * special cases that should only apply to local taint flow. - */ -predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - // dataflow step - DataFlow::localFlowStep(nodeFrom, nodeTo) - or - // taint flow step - localAdditionalTaintStep(nodeFrom, nodeTo, _) - or - // models-as-data summarized flow for local data flow (i.e. special case for flow - // through calls to modeled functions, without relying on global dataflow to join - // the dots). - FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(nodeFrom, nodeTo, _) +cached +private module Cached { + private import DataFlowImplCommon as DataFlowImplCommon + + cached + predicate forceCachingInSameStage() { DataFlowImplCommon::forceCachingInSameStage() } + + /** + * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local + * (intra-procedural) step. This relation is only used for local taint flow + * (for example `TaintTracking::localTaint(source, sink)`) so it may contain + * special cases that should only apply to local taint flow. + */ + cached + predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + // dataflow step + DataFlow::localFlowStep(nodeFrom, nodeTo) + or + // taint flow step + localAdditionalTaintStep(nodeFrom, nodeTo, _) + or + // models-as-data summarized flow for local data flow (i.e. special case for flow + // through calls to modeled functions, without relying on global dataflow to join + // the dots). + FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(nodeFrom, nodeTo, _) + } + + /** + * Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding + * local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent + * different objects. + */ + cached + predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, string model) { + operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction()) and + model = "" + or + modeledTaintStep(nodeFrom, nodeTo, model) + or + // Flow from (the indirection of) an operand of a pointer arithmetic instruction to the + // indirection of the pointer arithmetic instruction. This provides flow from `source` + // in `x[source]` to the result of the associated load instruction. + exists(PointerArithmeticInstruction pai, int indirectionIndex | + nodeHasOperand(nodeFrom, pai.getAnOperand(), pragma[only_bind_into](indirectionIndex)) and + hasInstructionAndIndex(nodeTo, pai, indirectionIndex + 1) + ) and + model = "" + or + any(Ssa::Indirection ind).isAdditionalTaintStep(nodeFrom, nodeTo) and + model = "" + or + // models-as-data summarized flow + FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), + nodeTo.(FlowSummaryNode).getSummaryNode(), false, model) + or + // object->field conflation for content that is a `TaintInheritingContent`. + exists(DataFlow::ContentSet f | + readStep(nodeFrom, f, nodeTo) and + f.getAReadContent() instanceof TaintInheritingContent + ) and + model = "" + } } -/** - * Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding - * local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent - * different objects. - */ -cached -predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, string model) { - operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction()) and - model = "" - or - modeledTaintStep(nodeFrom, nodeTo, model) - or - // Flow from (the indirection of) an operand of a pointer arithmetic instruction to the - // indirection of the pointer arithmetic instruction. This provides flow from `source` - // in `x[source]` to the result of the associated load instruction. - exists(PointerArithmeticInstruction pai, int indirectionIndex | - nodeHasOperand(nodeFrom, pai.getAnOperand(), pragma[only_bind_into](indirectionIndex)) and - hasInstructionAndIndex(nodeTo, pai, indirectionIndex + 1) - ) and - model = "" - or - any(Ssa::Indirection ind).isAdditionalTaintStep(nodeFrom, nodeTo) and - model = "" - or - // models-as-data summarized flow - FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), - nodeTo.(FlowSummaryNode).getSummaryNode(), false, model) - or - // object->field conflation for content that is a `TaintInheritingContent`. - exists(DataFlow::ContentSet f | - readStep(nodeFrom, f, nodeTo) and - f.getAReadContent() instanceof TaintInheritingContent - ) and - model = "" -} +import Cached /** * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local