C++: Extract the barriers from 'cpp/invalid-pointer-deref' into a library.

This commit is contained in:
Mathias Vorreiter Pedersen
2025-07-22 18:15:10 +01:00
parent 92a730c9ac
commit a1f4246c5f
2 changed files with 183 additions and 156 deletions

View File

@@ -53,44 +53,12 @@
private import cpp
private import semmle.code.cpp.ir.dataflow.internal.ProductFlow
private import semmle.code.cpp.security.ProductFlowUtils.ProductFlowUtils
private import semmle.code.cpp.ir.ValueNumbering
private import semmle.code.cpp.controlflow.IRGuards
private import codeql.util.Unit
private import semmle.code.cpp.rangeanalysis.new.RangeAnalysisUtil
private VariableAccess getAVariableAccess(Expr e) { e.getAChild*() = result }
/**
* Gets a (sub)expression that may be the result of evaluating `size`.
*
* For example, `getASizeCandidate(a ? b : c)` gives `a ? b : c`, `b` and `c`.
*/
bindingset[size]
pragma[inline_late]
private Expr getASizeCandidate(Expr size) {
result = size
or
result = [size.(ConditionalExpr).getThen(), size.(ConditionalExpr).getElse()]
}
/**
* Holds if the `(n, state)` pair represents the source of flow for the size
* expression associated with `alloc`.
*/
predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) {
exists(VariableAccess va, Expr size, int delta, Expr s |
size = alloc.getSizeExpr() and
s = getASizeCandidate(size) and
// Get the unique variable in a size expression like `x` in `malloc(x + 1)`.
va = unique( | | getAVariableAccess(s)) and
// Compute `delta` as the constant difference between `x` and `x + 1`.
bounded1(any(Instruction instr | instr.getUnconvertedResultExpression() = s),
any(LoadInstruction load | load.getUnconvertedResultExpression() = va), delta) and
n.asExpr() = va and
state = delta
)
}
/**
* Gets the virtual dispatch branching limit when calculating field flow while searching
* for flow from an allocation to the construction of an out-of-bounds pointer.
@@ -100,125 +68,6 @@ predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) {
*/
int allocationToInvalidPointerFieldFlowBranchLimit() { result = 0 }
/**
* A module that encapsulates a barrier guard to remove false positives from flow like:
* ```cpp
* char *p = new char[size];
* // ...
* unsigned n = size;
* // ...
* if(n < size) {
* use(*p[n]);
* }
* ```
* In this case, the sink pair identified by the product flow library (without any additional barriers)
* would be `(p, n)` (where `n` is the `n` in `p[n]`), because there exists a pointer-arithmetic
* instruction `pai = a + b` such that:
* 1. the allocation flows to `a`, and
* 2. `b <= n` where `n` is the `n` in `p[n]`
* but because there's a strict comparison that compares `n` against the size of the allocation this
* snippet is fine.
*/
private module SizeBarrier {
private module SizeBarrierConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
// The sources is the same as in the sources for the second
// projection in the `AllocToInvalidPointerConfig` module.
hasSize(_, source, _) and
InterestingPointerAddInstruction::isInterestingSize(source)
}
int fieldFlowBranchLimit() { result = allocationToInvalidPointerFieldFlowBranchLimit() }
/**
* Holds if `small <= large + k` holds if `g` evaluates to `testIsTrue`.
*/
additional predicate isSink(
DataFlow::Node small, DataFlow::Node large, IRGuardCondition g, int k, boolean testIsTrue
) {
// The sink is any "large" side of a relational comparison. i.e., the `large` expression
// in a guard such as `small <= large + k`.
g.comparesLt(small.asOperand(), large.asOperand(), k + 1, true, testIsTrue)
}
predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) }
}
module SizeBarrierFlow = DataFlow::Global<SizeBarrierConfig>;
private int getASizeAddend(DataFlow::Node node) {
exists(DataFlow::Node source |
SizeBarrierFlow::flow(source, node) and
hasSize(_, source, result)
)
}
/**
* Holds if `small <= large + k` holds if `g` evaluates to `edge`.
*/
private predicate operandGuardChecks(
IRGuardCondition g, Operand small, DataFlow::Node large, int k, boolean edge
) {
SizeBarrierFlow::flowTo(large) and
SizeBarrierConfig::isSink(DataFlow::operandNode(small), large, g, k, edge)
}
/**
* Gets an instruction `instr` that is guarded by a check such as `instr <= small + delta` where
* `small <= _ + k` and `small` is the "small side" of of a relational comparison that checks
* whether `small <= size` where `size` is the size of an allocation.
*/
Instruction getABarrierInstruction0(int delta, int k) {
exists(
IRGuardCondition g, ValueNumber value, Operand small, boolean edge, DataFlow::Node large
|
// We know:
// 1. result <= value + delta (by `bounded`)
// 2. value <= large + k (by `operandGuardChecks`).
// So:
// result <= value + delta (by 1.)
// <= large + k + delta (by 2.)
small = value.getAUse() and
operandGuardChecks(pragma[only_bind_into](g), pragma[only_bind_into](small), large,
pragma[only_bind_into](k), pragma[only_bind_into](edge)) and
bounded(result, value.getAnInstruction(), delta) and
g.controls(result.getBlock(), edge) and
k < getASizeAddend(large)
)
}
/**
* Gets an instruction that is guarded by a guard condition which ensures that
* the value of the instruction is upper-bounded by size of some allocation.
*/
bindingset[state]
pragma[inline_late]
Instruction getABarrierInstruction(int state) {
exists(int delta, int k |
state > k + delta and
// result <= "size of allocation" + delta + k
// < "size of allocation" + state
result = getABarrierInstruction0(delta, k)
)
}
/**
* Gets a `DataFlow::Node` that is guarded by a guard condition which ensures that
* the value of the node is upper-bounded by size of some allocation.
*/
DataFlow::Node getABarrierNode(int state) {
exists(DataFlow::Node source, int delta, int k |
SizeBarrierFlow::flow(source, result) and
hasSize(_, source, state) and
result.asInstruction() = SizeBarrier::getABarrierInstruction0(delta, k) and
state > k + delta
// so now we have:
// result <= "size of allocation" + delta + k
// < "size of allocation" + state
)
}
}
private module InterestingPointerAddInstruction {
private module PointerAddInstructionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
@@ -227,7 +76,7 @@ private module InterestingPointerAddInstruction {
hasSize(source.asExpr(), _, _)
}
int fieldFlowBranchLimit() { result = allocationToInvalidPointerFieldFlowBranchLimit() }
predicate fieldFlowBranchLimit = allocationToInvalidPointerFieldFlowBranchLimit/0;
predicate isSink(DataFlow::Node sink) {
sink.asInstruction() = any(PointerAddInstruction pai).getLeft()
@@ -263,6 +112,17 @@ private module InterestingPointerAddInstruction {
}
}
private module SizeBarrierInput implements SizeBarrierInputSig {
predicate fieldFlowBranchLimit = allocationToInvalidPointerFieldFlowBranchLimit/0;
predicate isSource(DataFlow::Node source) {
// The sources is the same as in the sources for the second
// projection in the `AllocToInvalidPointerConfig` module.
hasSize(_, source, _) and
InterestingPointerAddInstruction::isInterestingSize(source)
}
}
/**
* A product-flow configuration for flow from an `(allocation, size)` pair to a
* pointer-arithmetic operation `pai` such that `pai <= allocation + size`.
@@ -301,7 +161,7 @@ private module Config implements ProductFlow::StateConfigSig {
private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate
predicate isBarrier2(DataFlow::Node node, FlowState2 state) {
node = SizeBarrier::getABarrierNode(state)
node = SizeBarrier<SizeBarrierInput>::getABarrierNode(state)
}
predicate isBarrier2(DataFlow::Node node) {
@@ -357,8 +217,8 @@ private predicate pointerAddInstructionHasBounds0(
sizeInstr = sizeSink.asInstruction() and
// pai.getRight() <= sizeSink + delta
bounded1(right, sizeInstr, delta) and
not right = SizeBarrier::getABarrierInstruction(delta) and
not sizeInstr = SizeBarrier::getABarrierInstruction(delta)
not right = SizeBarrier<SizeBarrierInput>::getABarrierInstruction(delta) and
not sizeInstr = SizeBarrier<SizeBarrierInput>::getABarrierInstruction(delta)
)
}

View File

@@ -0,0 +1,167 @@
/**
* This file provies the `SizeBarrier` module which provides barriers for
* both the `cpp/invalid-pointer-deref` query and the `cpp/overrun-write`
* query.
*/
private import cpp
private import semmle.code.cpp.dataflow.new.DataFlow
private import semmle.code.cpp.ir.ValueNumbering
private import semmle.code.cpp.controlflow.IRGuards
private import semmle.code.cpp.rangeanalysis.new.RangeAnalysisUtil
private VariableAccess getAVariableAccess(Expr e) { e.getAChild*() = result }
/**
* Gets a (sub)expression that may be the result of evaluating `size`.
*
* For example, `getASizeCandidate(a ? b : c)` gives `a ? b : c`, `b` and `c`.
*/
bindingset[size]
pragma[inline_late]
private Expr getASizeCandidate(Expr size) {
result = size
or
result = [size.(ConditionalExpr).getThen(), size.(ConditionalExpr).getElse()]
}
/**
* Holds if the `(n, state)` pair represents the source of flow for the size
* expression associated with `alloc`.
*/
predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) {
exists(VariableAccess va, Expr size, int delta, Expr s |
size = alloc.getSizeExpr() and
s = getASizeCandidate(size) and
// Get the unique variable in a size expression like `x` in `malloc(x + 1)`.
va = unique( | | getAVariableAccess(s)) and
// Compute `delta` as the constant difference between `x` and `x + 1`.
bounded1(any(Instruction instr | instr.getUnconvertedResultExpression() = s),
any(LoadInstruction load | load.getUnconvertedResultExpression() = va), delta) and
n.asExpr() = va and
state = delta
)
}
/** Provides the input specification of the `SizeBarrier` module. */
signature module SizeBarrierInputSig {
/** Gets the virtual dispatch branching limit when calculating field flow. */
int fieldFlowBranchLimit();
/** Holds if `source` is a relevant data flow source. */
predicate isSource(DataFlow::Node source);
}
/**
* A module that encapsulates a barrier guard to remove false positives from flow like:
* ```cpp
* char *p = new char[size];
* // ...
* unsigned n = size;
* // ...
* if(n < size) {
* use(*p[n]);
* }
* ```
* In this case, the sink pair identified by the product flow library (without any additional barriers)
* would be `(p, n)` (where `n` is the `n` in `p[n]`), because there exists a pointer-arithmetic
* instruction `pai = a + b` such that:
* 1. the allocation flows to `a`, and
* 2. `b <= n` where `n` is the `n` in `p[n]`
* but because there's a strict comparison that compares `n` against the size of the allocation this
* snippet is fine.
*/
module SizeBarrier<SizeBarrierInputSig Input> {
private module SizeBarrierConfig implements DataFlow::ConfigSig {
predicate isSource = Input::isSource/1;
predicate fieldFlowBranchLimit = Input::fieldFlowBranchLimit/0;
/**
* Holds if `small <= large + k` holds if `g` evaluates to `testIsTrue`.
*/
additional predicate isSink(
DataFlow::Node small, DataFlow::Node large, IRGuardCondition g, int k, boolean testIsTrue
) {
// The sink is any "large" side of a relational comparison. i.e., the `large` expression
// in a guard such as `small <= large + k`.
g.comparesLt(small.asOperand(), large.asOperand(), k + 1, true, testIsTrue)
}
predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) }
}
private module SizeBarrierFlow = DataFlow::Global<SizeBarrierConfig>;
private int getASizeAddend(DataFlow::Node node) {
exists(DataFlow::Node source |
SizeBarrierFlow::flow(source, node) and
hasSize(_, source, result)
)
}
/**
* Holds if `small <= large + k` holds if `g` evaluates to `edge`.
*/
private predicate operandGuardChecks(
IRGuardCondition g, Operand small, DataFlow::Node large, int k, boolean edge
) {
SizeBarrierFlow::flowTo(large) and
SizeBarrierConfig::isSink(DataFlow::operandNode(small), large, g, k, edge)
}
/**
* Gets an instruction `instr` that is guarded by a check such as `instr <= small + delta` where
* `small <= _ + k` and `small` is the "small side" of of a relational comparison that checks
* whether `small <= size` where `size` is the size of an allocation.
*/
private Instruction getABarrierInstruction0(int delta, int k) {
exists(
IRGuardCondition g, ValueNumber value, Operand small, boolean edge, DataFlow::Node large
|
// We know:
// 1. result <= value + delta (by `bounded`)
// 2. value <= large + k (by `operandGuardChecks`).
// So:
// result <= value + delta (by 1.)
// <= large + k + delta (by 2.)
small = value.getAUse() and
operandGuardChecks(pragma[only_bind_into](g), pragma[only_bind_into](small), large,
pragma[only_bind_into](k), pragma[only_bind_into](edge)) and
bounded(result, value.getAnInstruction(), delta) and
g.controls(result.getBlock(), edge) and
k < getASizeAddend(large)
)
}
/**
* Gets an instruction that is guarded by a guard condition which ensures that
* the value of the instruction is upper-bounded by size of some allocation.
*/
bindingset[state]
pragma[inline_late]
Instruction getABarrierInstruction(int state) {
exists(int delta, int k |
state > k + delta and
// result <= "size of allocation" + delta + k
// < "size of allocation" + state
result = getABarrierInstruction0(delta, k)
)
}
/**
* Gets a `DataFlow::Node` that is guarded by a guard condition which ensures that
* the value of the node is upper-bounded by size of some allocation.
*/
DataFlow::Node getABarrierNode(int state) {
exists(DataFlow::Node source, int delta, int k |
SizeBarrierFlow::flow(source, result) and
hasSize(_, source, state) and
result.asInstruction() = getABarrierInstruction0(delta, k) and
state > k + delta
// so now we have:
// result <= "size of allocation" + delta + k
// < "size of allocation" + state
)
}
}