Merge pull request #21185 from MathiasVP/mad-cpp-guards

C++: Support models-as-data barriers and barrier guards
This commit is contained in:
Mathias Vorreiter Pedersen
2026-01-21 12:13:49 +00:00
committed by GitHub
8 changed files with 317 additions and 56 deletions

View File

@@ -0,0 +1,4 @@
---
category: feature
---
* Added modules `DataFlow::ParameterizedBarrierGuard` and `DataFlow::ParameterizedInstructionBarrierGuard`. These modules provide the same features as `DataFlow::BarrierGuard` and `DataFlow::InstructionBarrierGuard`, but allow for an additional parameter to support properly using them in dataflow configurations that uses flow states.

View File

@@ -95,6 +95,7 @@
import cpp
private import new.DataFlow
private import semmle.code.cpp.controlflow.IRGuards
private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate as Private
private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
private import internal.FlowSummaryImpl
@@ -367,6 +368,8 @@ private predicate elementSpec(
) {
sourceModel(namespace, type, subtypes, name, signature, ext, _, _, _, _) or
sinkModel(namespace, type, subtypes, name, signature, ext, _, _, _, _) or
barrierModel(namespace, type, subtypes, name, signature, ext, _, _, _, _) or
barrierGuardModel(namespace, type, subtypes, name, signature, ext, _, _, _, _, _) or
summaryModel(namespace, type, subtypes, name, signature, ext, _, _, _, _, _)
}
@@ -1028,6 +1031,84 @@ private module Cached {
isSinkNode(n, kind, model) and n.asNode() = node
)
}
private newtype TKindModelPair =
TMkPair(string kind, string model) { isBarrierGuardNode(_, _, kind, model) }
private GuardValue convertAcceptingValue(Public::AcceptingValue av) {
av.isTrue() and result.asBooleanValue() = true
or
av.isFalse() and result.asBooleanValue() = false
or
// NOTE: The below cases don't contribute anything currently since the
// callers immediately use `.asBooleanValue()` to convert the `GuardValue`
// to a boolean. Once we're willing to accept the breaking change of
// converting the barrier guard API to use `GuardValue`s instead `Boolean`s
// we can remove this restriction.
av.isNoException() and result.getDualValue().isThrowsException()
or
av.isZero() and result.asIntValue() = 0
or
av.isNotZero() and result.getDualValue().asIntValue() = 0
or
av.isNull() and result.isNullValue()
or
av.isNotNull() and result.isNonNullValue()
}
private predicate barrierGuardChecks(IRGuardCondition g, Expr e, boolean gv, TKindModelPair kmp) {
exists(
SourceSinkInterpretationInput::InterpretNode n, Public::AcceptingValue acceptingvalue,
string kind, string model
|
isBarrierGuardNode(n, acceptingvalue, kind, model) and
n.asNode().asExpr() = e and
kmp = TMkPair(kind, model) and
gv = convertAcceptingValue(acceptingvalue).asBooleanValue() and
n.asNode().(Private::ArgumentNode).getCall().asCallInstruction() = g
)
}
private newtype TKindModelPairIntPair =
MkKindModelPairIntPair(TKindModelPair pair, int indirectionIndex) {
indirectionIndex > 0 and
Private::nodeHasInstruction(_, _, indirectionIndex) and
exists(pair)
}
private predicate indirectBarrierGuardChecks(
IRGuardCondition g, Expr e, boolean gv, TKindModelPairIntPair kmp
) {
exists(
SourceSinkInterpretationInput::InterpretNode interpretNode,
Public::AcceptingValue acceptingvalue, string kind, string model, int indirectionIndex,
Private::ArgumentNode arg
|
isBarrierGuardNode(interpretNode, acceptingvalue, kind, model) and
arg = interpretNode.asNode() and
arg.asIndirectExpr(indirectionIndex) = e and
kmp = MkKindModelPairIntPair(TMkPair(kind, model), indirectionIndex) and
gv = convertAcceptingValue(acceptingvalue).asBooleanValue() and
arg.getCall().asCallInstruction() = g
)
}
/**
* Holds if `node` is specified as a barrier with the given kind in a MaD flow
* model.
*/
cached
predicate barrierNode(DataFlow::Node node, string kind, string model) {
exists(SourceSinkInterpretationInput::InterpretNode n |
isBarrierNode(n, kind, model) and n.asNode() = node
)
or
DataFlow::ParameterizedBarrierGuard<TKindModelPair, barrierGuardChecks/4>::getABarrierNode(TMkPair(kind,
model)) = node
or
DataFlow::ParameterizedBarrierGuard<TKindModelPairIntPair, indirectBarrierGuardChecks/4>::getAnIndirectBarrierNode(MkKindModelPairIntPair(TMkPair(kind,
model), _)) = node
}
}
import Cached
@@ -1044,6 +1125,12 @@ predicate sourceNode(DataFlow::Node node, string kind) { sourceNode(node, kind,
*/
predicate sinkNode(DataFlow::Node node, string kind) { sinkNode(node, kind, _) }
/**
* Holds if `node` is specified as a barrier with the given kind in a MaD flow
* model.
*/
predicate barrierNode(DataFlow::Node node, string kind) { barrierNode(node, kind, _) }
private predicate interpretSummary(
Function f, string input, string output, string kind, string provenance, string model
) {

View File

@@ -149,16 +149,27 @@ module SourceSinkInterpretationInput implements
}
predicate barrierElement(
Element n, string output, string kind, Public::Provenance provenance, string model
Element e, string output, string kind, Public::Provenance provenance, string model
) {
none()
exists(
string namespace, string type, boolean subtypes, string name, string signature, string ext
|
barrierModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance, model) and
e = interpretElement(namespace, type, subtypes, name, signature, ext)
)
}
predicate barrierGuardElement(
Element n, string input, Public::AcceptingValue acceptingvalue, string kind,
Element e, string input, Public::AcceptingValue acceptingvalue, string kind,
Public::Provenance provenance, string model
) {
none()
exists(
string package, string type, boolean subtypes, string name, string signature, string ext
|
barrierGuardModel(package, type, subtypes, name, signature, ext, input, acceptingvalue, kind,
provenance, model) and
e = interpretElement(package, type, subtypes, name, signature, ext)
)
}
private newtype TInterpretNode =

View File

@@ -2417,6 +2417,19 @@ class ContentSet instanceof Content {
}
}
private signature class ParamSig;
private module WithParam<ParamSig P> {
/**
* Holds if the guard `g` validates the expression `e` upon evaluating to `branch`.
*
* The expression `e` is expected to be a syntactic part of the guard `g`.
* For example, the guard `g` might be a call `isSafe(x)` and the expression `e`
* the argument `x`.
*/
signature predicate guardChecksSig(IRGuardCondition g, Expr e, boolean branch, P param);
}
/**
* Holds if the guard `g` validates the expression `e` upon evaluating to `branch`.
*
@@ -2438,7 +2451,7 @@ private predicate controls(IRGuardCondition g, Node n, boolean edge) {
* This is expected to be used in `isBarrier`/`isSanitizer` definitions
* in data flow and taint tracking.
*/
module BarrierGuard<guardChecksSig/3 guardChecks> {
module ParameterizedBarrierGuard<ParamSig P, WithParam<P>::guardChecksSig/4 guardChecks> {
bindingset[value, n]
pragma[inline_late]
private predicate convertedExprHasValueNumber(ValueNumber value, Node n) {
@@ -2448,12 +2461,13 @@ module BarrierGuard<guardChecksSig/3 guardChecks> {
)
}
private predicate guardChecksNode(IRGuardCondition g, Node n, boolean branch) {
guardChecks(g, n.asOperand().getDef().getConvertedResultExpression(), branch)
private predicate guardChecksNode(IRGuardCondition g, Node n, boolean branch, P p) {
guardChecks(g, n.asOperand().getDef().getConvertedResultExpression(), branch, p)
}
/**
* Gets an expression node that is safely guarded by the given guard check.
* Gets an expression node that is safely guarded by the given guard check
* when the parameter is `p`.
*
* For example, given the following code:
* ```cpp
@@ -2484,19 +2498,27 @@ module BarrierGuard<guardChecksSig/3 guardChecks> {
*
* NOTE: If an indirect expression is tracked, use `getAnIndirectBarrierNode` instead.
*/
Node getABarrierNode() {
Node getABarrierNode(P p) {
exists(IRGuardCondition g, ValueNumber value, boolean edge |
convertedExprHasValueNumber(value, result) and
guardChecks(g,
pragma[only_bind_into](value.getAnInstruction().getConvertedResultExpression()), edge) and
pragma[only_bind_into](value.getAnInstruction().getConvertedResultExpression()), edge, p) and
controls(g, result, edge)
)
or
result = SsaImpl::BarrierGuard<guardChecksNode/3>::getABarrierNode()
result = SsaImpl::BarrierGuard<P, guardChecksNode/4>::getABarrierNode(p)
}
/**
* Gets an indirect expression node that is safely guarded by the given guard check.
* Gets an expression node that is safely guarded by the given guard check.
*
* See `getABarrierNode/1` for examples.
*/
Node getABarrierNode() { result = getABarrierNode(_) }
/**
* Gets an indirect expression node that is safely guarded by the given
* guard check with parameter `p`.
*
* For example, given the following code:
* ```cpp
@@ -2528,6 +2550,13 @@ module BarrierGuard<guardChecksSig/3 guardChecks> {
*
* NOTE: If a non-indirect expression is tracked, use `getABarrierNode` instead.
*/
Node getAnIndirectBarrierNode(P p) { result = getAnIndirectBarrierNode(_, p) }
/**
* Gets an indirect expression node that is safely guarded by the given guard check.
*
* See `getAnIndirectBarrierNode/1` for examples.
*/
Node getAnIndirectBarrierNode() { result = getAnIndirectBarrierNode(_) }
bindingset[value, n]
@@ -2542,10 +2571,10 @@ module BarrierGuard<guardChecksSig/3 guardChecks> {
}
private predicate guardChecksIndirectNode(
IRGuardCondition g, Node n, boolean branch, int indirectionIndex
IRGuardCondition g, Node n, boolean branch, int indirectionIndex, P p
) {
guardChecks(g, n.asIndirectOperand(indirectionIndex).getDef().getConvertedResultExpression(),
branch)
branch, p)
}
/**
@@ -2582,19 +2611,44 @@ module BarrierGuard<guardChecksSig/3 guardChecks> {
*
* NOTE: If a non-indirect expression is tracked, use `getABarrierNode` instead.
*/
Node getAnIndirectBarrierNode(int indirectionIndex) {
Node getAnIndirectBarrierNode(int indirectionIndex, P p) {
exists(IRGuardCondition g, ValueNumber value, boolean edge |
indirectConvertedExprHasValueNumber(indirectionIndex, value, result) and
guardChecks(g,
pragma[only_bind_into](value.getAnInstruction().getConvertedResultExpression()), edge) and
pragma[only_bind_into](value.getAnInstruction().getConvertedResultExpression()), edge, p) and
controls(g, result, edge)
)
or
result =
SsaImpl::BarrierGuardWithIntParam<guardChecksIndirectNode/4>::getABarrierNode(indirectionIndex)
SsaImpl::BarrierGuardWithIntParam<P, guardChecksIndirectNode/5>::getABarrierNode(indirectionIndex,
p)
}
}
/**
* Provides a set of barrier nodes for a guard that validates an expression.
*
* This is expected to be used in `isBarrier`/`isSanitizer` definitions
* in data flow and taint tracking.
*/
module BarrierGuard<guardChecksSig/3 guardChecks> {
private predicate guardChecks(IRGuardCondition g, Expr e, boolean branch, Unit unit) {
guardChecks(g, e, branch) and
exists(unit)
}
import ParameterizedBarrierGuard<Unit, guardChecks/4>
}
private module InstrWithParam<ParamSig P> {
/**
* Holds if the guard `g` validates the instruction `instr` upon evaluating to `branch`.
*/
signature predicate instructionGuardChecksSig(
IRGuardCondition g, Instruction instr, boolean branch, P p
);
}
/**
* Holds if the guard `g` validates the instruction `instr` upon evaluating to `branch`.
*/
@@ -2606,7 +2660,9 @@ signature predicate instructionGuardChecksSig(IRGuardCondition g, Instruction in
* This is expected to be used in `isBarrier`/`isSanitizer` definitions
* in data flow and taint tracking.
*/
module InstructionBarrierGuard<instructionGuardChecksSig/3 instructionGuardChecks> {
module ParameterizedInstructionBarrierGuard<
ParamSig P, InstrWithParam<P>::instructionGuardChecksSig/4 instructionGuardChecks>
{
bindingset[value, n]
pragma[inline_late]
private predicate operandHasValueNumber(ValueNumber value, Node n) {
@@ -2616,21 +2672,27 @@ module InstructionBarrierGuard<instructionGuardChecksSig/3 instructionGuardCheck
)
}
private predicate guardChecksNode(IRGuardCondition g, Node n, boolean branch) {
instructionGuardChecks(g, n.asOperand().getDef(), branch)
private predicate guardChecksNode(IRGuardCondition g, Node n, boolean branch, P p) {
instructionGuardChecks(g, n.asOperand().getDef(), branch, p)
}
/** Gets a node that is safely guarded by the given guard check. */
Node getABarrierNode() {
/**
* Gets a node that is safely guarded by the given guard check with
* parameter `p`.
*/
Node getABarrierNode(P p) {
exists(IRGuardCondition g, ValueNumber value, boolean edge |
instructionGuardChecks(g, pragma[only_bind_into](value.getAnInstruction()), edge) and
instructionGuardChecks(g, pragma[only_bind_into](value.getAnInstruction()), edge, p) and
operandHasValueNumber(value, result) and
controls(g, result, edge)
)
or
result = SsaImpl::BarrierGuard<guardChecksNode/3>::getABarrierNode()
result = SsaImpl::BarrierGuard<P, guardChecksNode/4>::getABarrierNode(p)
}
/** Gets a node that is safely guarded by the given guard check. */
Node getABarrierNode() { result = getABarrierNode(_) }
bindingset[value, n]
pragma[inline_late]
private predicate indirectOperandHasValueNumber(ValueNumber value, int indirectionIndex, Node n) {
@@ -2641,25 +2703,52 @@ module InstructionBarrierGuard<instructionGuardChecksSig/3 instructionGuardCheck
}
private predicate guardChecksIndirectNode(
IRGuardCondition g, Node n, boolean branch, int indirectionIndex
IRGuardCondition g, Node n, boolean branch, int indirectionIndex, P p
) {
instructionGuardChecks(g, n.asIndirectOperand(indirectionIndex).getDef(), branch)
instructionGuardChecks(g, n.asIndirectOperand(indirectionIndex).getDef(), branch, p)
}
/**
* Gets an indirect node with indirection index `indirectionIndex` that is
* safely guarded by the given guard check.
* safely guarded by the given guard check with parameter `p`.
*/
Node getAnIndirectBarrierNode(int indirectionIndex) {
Node getAnIndirectBarrierNode(int indirectionIndex, P p) {
exists(IRGuardCondition g, ValueNumber value, boolean edge |
instructionGuardChecks(g, pragma[only_bind_into](value.getAnInstruction()), edge) and
instructionGuardChecks(g, pragma[only_bind_into](value.getAnInstruction()), edge, p) and
indirectOperandHasValueNumber(value, indirectionIndex, result) and
controls(g, result, edge)
)
or
result =
SsaImpl::BarrierGuardWithIntParam<guardChecksIndirectNode/4>::getABarrierNode(indirectionIndex)
SsaImpl::BarrierGuardWithIntParam<P, guardChecksIndirectNode/5>::getABarrierNode(indirectionIndex,
p)
}
/**
* Gets an indirect node that is safely guarded by the given guard check
* with parameter `p`.
*/
Node getAnIndirectBarrierNode(P p) { result = getAnIndirectBarrierNode(_, p) }
/** Gets an indirect node that is safely guarded by the given guard check. */
Node getAnIndirectBarrierNode() { result = getAnIndirectBarrierNode(_) }
}
/**
* Provides a set of barrier nodes for a guard that validates an instruction.
*
* This is expected to be used in `isBarrier`/`isSanitizer` definitions
* in data flow and taint tracking.
*/
module InstructionBarrierGuard<instructionGuardChecksSig/3 instructionGuardChecks> {
private predicate instructionGuardChecks(
IRGuardCondition g, Instruction i, boolean branch, Unit unit
) {
instructionGuardChecks(g, i, branch) and
exists(unit)
}
import ParameterizedInstructionBarrierGuard<Unit, instructionGuardChecks/4>
}
/**

View File

@@ -1035,13 +1035,23 @@ class SynthNode extends DataFlowIntegrationImpl::SsaNode {
SynthNode() { not this.asDefinition() instanceof SsaImpl::WriteDefinition }
}
signature predicate guardChecksNodeSig(IRGuards::IRGuardCondition g, Node e, boolean branch);
private signature class ParamSig;
signature predicate guardChecksNodeSig(
IRGuards::IRGuardCondition g, Node e, boolean branch, int indirectionIndex
);
private module ParamIntPair<ParamSig P> {
newtype TPair = MkPair(P p, int indirectionIndex) { nodeHasInstruction(_, _, indirectionIndex) }
}
module BarrierGuardWithIntParam<guardChecksNodeSig/4 guardChecksNode> {
private module WithParam<ParamSig P> {
signature predicate guardChecksNodeSig(IRGuards::IRGuardCondition g, Node e, boolean gv, P param);
}
private module IntWithParam<ParamSig P> {
signature predicate guardChecksNodeSig(
IRGuards::IRGuardCondition g, Node e, boolean gv, int indirectionIndex, P param
);
}
module BarrierGuardWithIntParam<ParamSig P, IntWithParam<P>::guardChecksNodeSig/5 guardChecksNode> {
private predicate ssaDefReachesCertainUse(Definition def, UseImpl use) {
exists(SourceVariable v, IRBlock bb, int i |
use.hasIndexInBlock(bb, i, v) and
@@ -1052,21 +1062,23 @@ module BarrierGuardWithIntParam<guardChecksNodeSig/4 guardChecksNode> {
private predicate guardChecksInstr(
IRGuards::Guards_v1::Guard g, IRGuards::GuardsInput::Expr instr, IRGuards::GuardValue gv,
int indirectionIndex
ParamIntPair<P>::TPair pair
) {
exists(Node node |
exists(Node node, int indirectionIndex, P p |
pair = ParamIntPair<P>::MkPair(p, indirectionIndex) and
nodeHasInstruction(node, instr, indirectionIndex) and
guardChecksNode(g, node, gv.asBooleanValue(), indirectionIndex)
guardChecksNode(g, node, gv.asBooleanValue(), indirectionIndex, p)
)
}
private predicate guardChecksWithWrappers(
DataFlowIntegrationInput::Guard g, SsaImpl::Definition def, IRGuards::GuardValue val,
int indirectionIndex
ParamIntPair<P>::MkPair pair
) {
exists(Instruction e |
IRGuards::Guards_v1::ParameterizedValidationWrapper<int, guardChecksInstr/4>::guardChecks(g,
e, val, indirectionIndex)
exists(Instruction e, int indirectionIndex |
IRGuards::Guards_v1::ParameterizedValidationWrapper<ParamIntPair<P>::TPair, guardChecksInstr/4>::guardChecks(g,
e, val, pair) and
pair = ParamIntPair<P>::MkPair(_, indirectionIndex)
|
indirectionIndex = 0 and
def.(Definition).getAUse().getDef() = e
@@ -1075,18 +1087,19 @@ module BarrierGuardWithIntParam<guardChecksNodeSig/4 guardChecksNode> {
)
}
Node getABarrierNode(int indirectionIndex) {
Node getABarrierNode(int indirectionIndex, P p) {
// Only get the SynthNodes from the shared implementation, as the ExprNodes cannot
// be matched on SourceVariable.
result.(SsaSynthNode).getSynthNode() =
DataFlowIntegrationImpl::BarrierGuardDefWithState<int, guardChecksWithWrappers/4>::getABarrierNode(indirectionIndex)
DataFlowIntegrationImpl::BarrierGuardDefWithState<ParamIntPair<P>::MkPair, guardChecksWithWrappers/4>::getABarrierNode(ParamIntPair<P>::MkPair(p,
indirectionIndex))
or
// Calculate the guarded UseImpls corresponding to ExprNodes directly.
exists(
DataFlowIntegrationInput::Guard g, IRGuards::GuardValue branch, Definition def, IRBlock bb
|
guardChecksWithWrappers(g, def, branch, indirectionIndex) and
exists(UseImpl use |
guardChecksWithWrappers(g, def, branch, ParamIntPair<P>::MkPair(p, indirectionIndex)) and
ssaDefReachesCertainUse(def, use) and
use.getBlock() = bb and
DataFlowIntegrationInput::guardControlsBlock(g, bb, branch) and
@@ -1096,15 +1109,16 @@ module BarrierGuardWithIntParam<guardChecksNodeSig/4 guardChecksNode> {
}
}
module BarrierGuard<guardChecksNodeSig/3 guardChecksNode> {
module BarrierGuard<ParamSig P, WithParam<P>::guardChecksNodeSig/4 guardChecksNode> {
private predicate guardChecksNode(
IRGuards::IRGuardCondition g, Node e, boolean branch, int indirectionIndex
IRGuards::IRGuardCondition g, Node e, boolean gv, int indirectionIndex, P p
) {
guardChecksNode(g, e, branch) and indirectionIndex = 0
indirectionIndex = 0 and
guardChecksNode(g, e, gv, p)
}
Node getABarrierNode() {
result = BarrierGuardWithIntParam<guardChecksNode/4>::getABarrierNode(0)
Node getABarrierNode(P p) {
result = BarrierGuardWithIntParam<P, guardChecksNode/5>::getABarrierNode(0, p)
}
}

View File

@@ -12,4 +12,38 @@ void testCheckArgument(int p) {
if (checkArgument(&p)) {
sink(p); // $ barrier=glval<int> indirect_barrier=int
}
}
int* get_clean_value(int* x) { return x; }
bool is_clean_value(int*);
int* get_clean_pointer(int* x) { return x; }
bool is_clean_pointer(int*);
void sink(int*);
void test_mad(int x, int* p) {
{
if(is_clean_value(&x)) {
sink(x); // $ external=int
}
}
{
if(is_clean_value(p)) {
sink(*p); // $ external=int
}
}
{
if(is_clean_pointer(p)) {
sink(p); // $ external=int*
}
}
{
if(is_clean_pointer(&x)) {
sink(x); // $ external=glval<int>
}
}
}

View File

@@ -0,0 +1,13 @@
extensions:
- addsTo:
pack: codeql/cpp-all
extensible: barrierModel
data:
- ["", "", False, "get_clean_pointer", "", "", "ReturnValue", "test-barrier", "manual"]
- ["", "", False, "get_clean_data", "", "", "ReturnValue[*]", "test-barrier", "manual"]
- addsTo:
pack: codeql/cpp-all
extensible: barrierGuardModel
data:
- ["", "", False, "is_clean_value", "", "", "Argument[*0]", "true", "test-barrier", "manual"]
- ["", "", False, "is_clean_pointer", "", "", "Argument[0]", "true", "test-barrier", "manual"]

View File

@@ -2,6 +2,7 @@ import cpp
import semmle.code.cpp.dataflow.new.DataFlow
import semmle.code.cpp.controlflow.IRGuards
import utils.test.InlineExpectationsTest
import semmle.code.cpp.dataflow.ExternalFlow
predicate instructionGuardChecks(IRGuardCondition gc, Instruction checked, boolean branch) {
exists(CallInstruction call |
@@ -27,18 +28,26 @@ predicate barrierGuard(DataFlow::Node node, string s) {
else s = node.getType().toString().replaceAll(" ", "")
}
predicate externalBarrierGuard(DataFlow::Node node, string s) {
barrierNode(node, "test-barrier") and
if node.isGLValue()
then s = "glval<" + node.getType().toString().replaceAll(" ", "") + ">"
else s = node.getType().toString().replaceAll(" ", "")
}
module Test implements TestSig {
string getARelevantTag() { result = ["barrier", "indirect_barrier"] }
string getARelevantTag() { result = ["barrier", "indirect_barrier", "external"] }
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(DataFlow::Node node, string s |
indirectBarrierGuard(node, s) and
value = s and
exists(DataFlow::Node node |
indirectBarrierGuard(node, value) and
tag = "indirect_barrier"
or
barrierGuard(node, s) and
value = s and
barrierGuard(node, value) and
tag = "barrier"
or
externalBarrierGuard(node, value) and
tag = "external"
|
element = node.toString() and
location = node.getLocation()