Merge branch 'main' into print-global

This commit is contained in:
Jeroen Ketema
2023-07-25 14:10:33 +02:00
committed by GitHub
93 changed files with 3141 additions and 917 deletions

View File

@@ -1,6 +1,54 @@
/**
* This file provides the first phase of the `cpp/invalid-pointer-deref` query that identifies flow
* from an allocation to a pointer-arithmetic instruction that constructs a pointer that is out of bounds.
*
* Consider the following snippet:
* ```cpp
* 1. char* base = (char*)malloc(size);
* 2. char* end = base + size;
* 3. for(int *p = base; p <= end; p++) {
* 4. use(*p); // BUG: Should have been bounded by `p < end`.
* 5. }
* ```
* this file identifies the flow from `new int[size]` to `base + size`.
*
* This is done using the product-flow library. The configuration tracks flow from the pair
* `(allocation, size of allocation)` to a pair `(a, b)` where there exists a pointer-arithmetic instruction
* `pai = a + r` such that `b` is a dataflow node where `b <= r`. Because there will be a dataflow-path from
* `allocation` to `a` this means that the `pai` will compute a pointer that is some number of elements beyond
* the end position of the allocation. See `pointerAddInstructionHasBounds` for the implementation of this.
*
* In the above example, the pair `(a, b)` is `(base, size)` with `base` and `size` coming from the expression
* `base + size` on line 2, which is also the pointer-arithmetic instruction. In general, the pair does not necessarily
* correspond directly to the operands of the pointer-arithmetic instruction.
* In the following example, the pair is again `(base, size)`, but with `base` coming from line 3 and `size` from line 2,
* and the pointer-arithmetic instruction being `base + n` on line 3:
* ```cpp
* 1. int* base = new int[size];
* 2. if(n <= size) {
* 3. int* end = base + n;
* 4. for(int* p = base; p <= end; ++p) {
* 5. *p = 0; // BUG: Should have been bounded by `p < end`.
* 6. }
* 7. }
* ```
*
* Handling false positives:
*
* Consider a snippet such as:
* ```cpp
* 1. int* base = new int[size];
* 2. int n = condition() ? size : 0;
* 3. if(n >= size) return;
* 4. int* end = base + n;
* 5. for(int* p = base; p <= end; ++p) {
* 6. *p = 0; // This is fine since `end < base + size`
* 7. }
* ```
* In order to remove this false positive we define a barrier (see `SizeBarrier::SizeBarrierConfig`) that finds the
* possible guards that compares a value to the size of the allocation. In the above example, this is the `(n >= size)`
* guard on line 3. `SizeBarrier::getABarrierNode` then defines any node that is guarded by such a guard as a barrier
* in the dataflow configuration.
*/
private import cpp
@@ -42,16 +90,14 @@ predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) {
* ```
* In this case, the sink pair identified by the product flow library (without any additional barriers)
* would be `(p, n)` (where `n` is the `n` in `p[n]`), because there exists a pointer-arithmetic
* instruction `pai` such that:
* 1. The left-hand of `pai` flows from the allocation, and
* 2. The right-hand of `pai` is non-strictly upper bounded by `n` (where `n` is the `n` in `p[n]`)
* instruction `pai = a + b` such that:
* 1. the allocation flows to `a`, and
* 2. `b <= n` where `n` is the `n` in `p[n]`
* but because there's a strict comparison that compares `n` against the size of the allocation this
* snippet is fine.
*/
module Barrier2 {
private class FlowState2 = int;
private module BarrierConfig2 implements DataFlow::ConfigSig {
module SizeBarrier {
private module SizeBarrierConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
// The sources is the same as in the sources for the second
// projection in the `AllocToInvalidPointerConfig` module.
@@ -59,19 +105,19 @@ module Barrier2 {
}
additional predicate isSink(
DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, FlowState2 state,
boolean testIsTrue
DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, int k, boolean testIsTrue
) {
// The sink is any "large" side of a relational comparison.
g.comparesLt(left.asOperand(), right.asOperand(), state, true, testIsTrue)
// The sink is any "large" side of a relational comparison. i.e., the `right` expression
// in a guard such as `left < right + k`.
g.comparesLt(left.asOperand(), right.asOperand(), k, true, testIsTrue)
}
predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) }
}
private import DataFlow::Global<BarrierConfig2>
private import DataFlow::Global<SizeBarrierConfig>
private FlowState2 getAFlowStateForNode(DataFlow::Node node) {
private int getAFlowStateForNode(DataFlow::Node node) {
exists(DataFlow::Node source |
flow(source, node) and
hasSize(_, source, result)
@@ -79,14 +125,14 @@ module Barrier2 {
}
private predicate operandGuardChecks(
IRGuardCondition g, Operand left, Operand right, FlowState2 state, boolean edge
IRGuardCondition g, Operand left, Operand right, int state, boolean edge
) {
exists(DataFlow::Node nLeft, DataFlow::Node nRight, FlowState2 state0 |
exists(DataFlow::Node nLeft, DataFlow::Node nRight, int k |
nRight.asOperand() = right and
nLeft.asOperand() = left and
BarrierConfig2::isSink(nLeft, nRight, g, state0, edge) and
SizeBarrierConfig::isSink(nLeft, nRight, g, k, edge) and
state = getAFlowStateForNode(nRight) and
state0 <= state
k <= state
)
}
@@ -94,7 +140,7 @@ module Barrier2 {
* Gets an instruction that is guarded by a guard condition which ensures that
* the value of the instruction is upper-bounded by size of some allocation.
*/
Instruction getABarrierInstruction(FlowState2 state) {
Instruction getABarrierInstruction(int state) {
exists(IRGuardCondition g, ValueNumber value, Operand use, boolean edge |
use = value.getAUse() and
operandGuardChecks(pragma[only_bind_into](g), pragma[only_bind_into](use), _,
@@ -108,7 +154,7 @@ module Barrier2 {
* Gets a `DataFlow::Node` that is guarded by a guard condition which ensures that
* the value of the node is upper-bounded by size of some allocation.
*/
DataFlow::Node getABarrierNode(FlowState2 state) {
DataFlow::Node getABarrierNode(int state) {
result.asOperand() = getABarrierInstruction(state).getAUse()
}
@@ -116,9 +162,7 @@ module Barrier2 {
* Gets the block of a node that is guarded (see `getABarrierInstruction` or
* `getABarrierNode` for the definition of what it means to be guarded).
*/
IRBlock getABarrierBlock(FlowState2 state) {
result.getAnInstruction() = getABarrierInstruction(state)
}
IRBlock getABarrierBlock(int state) { result.getAnInstruction() = getABarrierInstruction(state) }
}
private module InterestingPointerAddInstruction {
@@ -151,24 +195,8 @@ private module InterestingPointerAddInstruction {
}
/**
* A product-flow configuration for flow from an (allocation, size) pair to a
* pointer-arithmetic operation that is non-strictly upper-bounded by `allocation + size`.
*
* The goal of this query is to find patterns such as:
* ```cpp
* 1. char* begin = (char*)malloc(size);
* 2. char* end = begin + size;
* 3. for(int *p = begin; p <= end; p++) {
* 4. use(*p);
* 5. }
* ```
*
* We do this by splitting the task up into two configurations:
* 1. `AllocToInvalidPointerConfig` find flow from `malloc(size)` to `begin + size`, and
* 2. `InvalidPointerToDerefConfig` finds flow from `begin + size` to an `end` (on line 3).
*
* Finally, the range-analysis library will find a load from (or store to) an address that
* is non-strictly upper-bounded by `end` (which in this case is `*p`).
* A product-flow configuration for flow from an `(allocation, size)` pair to a
* pointer-arithmetic operation `pai` such that `pai <= allocation + size`.
*/
private module Config implements ProductFlow::StateConfigSig {
class FlowState1 = Unit;
@@ -176,7 +204,7 @@ private module Config implements ProductFlow::StateConfigSig {
class FlowState2 = int;
predicate isSourcePair(
DataFlow::Node source1, FlowState1 state1, DataFlow::Node source2, FlowState2 state2
DataFlow::Node allocSource, FlowState1 unit, DataFlow::Node sizeSource, FlowState2 sizeAddend
) {
// In the case of an allocation like
// ```cpp
@@ -184,21 +212,21 @@ private module Config implements ProductFlow::StateConfigSig {
// ```
// we use `state2` to remember that there was an offset (in this case an offset of `1`) added
// to the size of the allocation. This state is then checked in `isSinkPair`.
exists(state1) and
hasSize(source1.asConvertedExpr(), source2, state2)
exists(unit) and
hasSize(allocSource.asConvertedExpr(), sizeSource, sizeAddend)
}
predicate isSinkPair(
DataFlow::Node sink1, FlowState1 state1, DataFlow::Node sink2, FlowState2 state2
DataFlow::Node allocSink, FlowState1 unit, DataFlow::Node sizeSink, FlowState2 sizeAddend
) {
exists(state1) and
exists(unit) and
// We check that the delta computed by the range analysis matches the
// state value that we set in `isSourcePair`.
pointerAddInstructionHasBounds0(_, sink1, sink2, state2)
pointerAddInstructionHasBounds0(_, allocSink, sizeSink, sizeAddend)
}
predicate isBarrier2(DataFlow::Node node, FlowState2 state) {
node = Barrier2::getABarrierNode(state)
node = SizeBarrier::getABarrierNode(state)
}
predicate isBarrierIn1(DataFlow::Node node) { isSourcePair(node, _, _, _) }
@@ -211,7 +239,7 @@ private module Config implements ProductFlow::StateConfigSig {
private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState<Config>;
/**
* Holds if `pai` is non-strictly upper bounded by `sink2 + delta` and `sink1` is the
* Holds if `pai` is non-strictly upper bounded by `sizeSink + delta` and `allocSink` is the
* left operand of the pointer-arithmetic operation.
*
* For example in,
@@ -220,37 +248,37 @@ private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState<Config>;
* ```
* We will have:
* - `pai` is `p + (size + 1)`,
* - `sink1` is `p`
* - `sink2` is `size`
* - `allocSink` is `p`
* - `sizeSink` is `size`
* - `delta` is `1`.
*/
pragma[nomagic]
private predicate pointerAddInstructionHasBounds0(
PointerAddInstruction pai, DataFlow::Node sink1, DataFlow::Node sink2, int delta
PointerAddInstruction pai, DataFlow::Node allocSink, DataFlow::Node sizeSink, int delta
) {
InterestingPointerAddInstruction::isInteresting(pragma[only_bind_into](pai)) and
exists(Instruction right, Instruction instr2 |
exists(Instruction right, Instruction sizeInstr |
pai.getRight() = right and
pai.getLeft() = sink1.asInstruction() and
instr2 = sink2.asInstruction() and
// pai.getRight() <= sink2 + delta
bounded1(right, instr2, delta) and
not right = Barrier2::getABarrierInstruction(delta) and
not instr2 = Barrier2::getABarrierInstruction(delta)
pai.getLeft() = allocSink.asInstruction() and
sizeInstr = sizeSink.asInstruction() and
// pai.getRight() <= sizeSink + delta
bounded1(right, sizeInstr, delta) and
not right = SizeBarrier::getABarrierInstruction(delta) and
not sizeInstr = SizeBarrier::getABarrierInstruction(delta)
)
}
/**
* Holds if `allocation` flows to `sink1` and `sink1` represents the left-hand
* side of the pointer-arithmetic instruction `pai`, and the right-hand side of `pai`
* is non-strictly upper bounded by the size of `alllocation` + `delta`.
* Holds if `allocation` flows to `allocSink` and `allocSink` represents the left operand
* of the pointer-arithmetic instruction `pai = a + b` (i.e., `allocSink = a`), and
* `b <= allocation + delta`.
*/
pragma[nomagic]
predicate pointerAddInstructionHasBounds(
DataFlow::Node allocation, PointerAddInstruction pai, DataFlow::Node sink1, int delta
DataFlow::Node allocation, PointerAddInstruction pai, DataFlow::Node allocSink, int delta
) {
exists(DataFlow::Node sink2 |
AllocToInvalidPointerFlow::flow(allocation, _, sink1, sink2) and
pointerAddInstructionHasBounds0(pai, sink1, sink2, delta)
exists(DataFlow::Node sizeSink |
AllocToInvalidPointerFlow::flow(allocation, _, allocSink, sizeSink) and
pointerAddInstructionHasBounds0(pai, allocSink, sizeSink, delta)
)
}

View File

@@ -2,6 +2,75 @@
* This file provides the second phase of the `cpp/invalid-pointer-deref` query that identifies flow
* from the out-of-bounds pointer identified by the `AllocationToInvalidPointer.qll` library to
* a dereference of the out-of-bounds pointer.
*
* Consider the following snippet:
* ```cpp
* 1. char* base = (char*)malloc(size);
* 2. char* end = base + size;
* 3. for(char *p = base; p <= end; p++) {
* 4. use(*p); // BUG: Should have been bounded by `p < end`.
* 5. }
* ```
* this file identifies the flow from `base + size` to `end`. We call `base + size` the "dereference source" and `end`
* the "dereference sink" (even though `end` is not actually dereferenced we will use this term because we will perform
* dataflow to find a use of a pointer `x` such that `x <= end` which is dereferenced. In the above example, `x` is `p`
* on line 4).
*
* Merely _constructing_ a pointer that's out-of-bounds is fine if the pointer is never dereferenced (in reality, the
* standard only guarantees that it is safe to move the pointer one element past the last element, but we ignore that
* here). So this step is about identifying which of the out-of-bounds pointers found by `pointerAddInstructionHasBounds`
* in `AllocationToInvalidPointer.qll` are actually being dereferenced. We do this using a regular dataflow
* configuration (see `InvalidPointerToDerefConfig`).
*
* The dataflow traversal defines the set of sources as any dataflow node `n` such that there exists a pointer-arithmetic
* instruction `pai` found by `AllocationToInvalidPointer.qll` and a `n.asInstruction() >= pai + deltaDerefSourceAndPai`.
* Here, `deltaDerefSourceAndPai` is the constant difference between the source we track for finding a dereference and the
* pointer-arithmetic instruction.
*
* The set of sinks is defined as any dataflow node `n` such that `addr <= n.asInstruction() + deltaDerefSinkAndDerefAddress`
* for some address operand `addr` and constant difference `deltaDerefSinkAndDerefAddress`. Since an address operand is
* always consumed by an instruction that performs a dereference this lets us identify a "bad dereference". We call the
* instruction that consumes the address operand the "operation".
*
* For example, consider the flow from `base + size` to `end` above. The sink is `end` on line 3 because
* `p <= end.asInstruction() + deltaDerefSinkAndDerefAddress`, where `p` is the address operand in `use(*p)` and
* `deltaDerefSinkAndDerefAddress >= 0`. The load attached to `*p` is the "operation". To ensure that the path makes
* intuitive sense, we only pick operations that are control-flow reachable from the dereference sink.
*
* To compute how many elements the dereference is beyond the end position of the allocation, we sum the two deltas
* `deltaDerefSourceAndPai` and `deltaDerefSinkAndDerefAddress`. This is done in the `operationIsOffBy` predicate
* (which is the only predicate exposed by this file).
*
* Handling false positives:
*
* Consider the following snippet:
* ```cpp
* 1. char *p = new char[size];
* 2. char *end = p + size;
* 3. if (p < end) {
* 4. p += 1;
* 5. }
* 6. if (p < end) {
* 7. int val = *p; // GOOD
* 8. }
* ```
* this is safe because `p` is guarded to be strictly less than `end` on line 6 before the dereference on line 7. However, if we
* run the query on the above without further modifications we would see an alert on line 7. This is because range analysis infers
* that `p <= end` after the increment on line 4, and thus the result of `p += 1` is seen as a valid dereference source. This
* node then flows to `p` on line 6 (which is a valid dereference sink since it non-strictly upper bounds an address operand), and
* range analysis then infers that the address operand of `*p` (i.e., `p`) is non-strictly upper bounded by `p`, and thus reports
* an alert on line 7.
*
* In order to handle the above false positive, we define a barrier that identifies guards such as `p < end` that ensures that a value
* is less than the pointer-arithmetic instruction that computed the invalid pointer. This is done in the `InvalidPointerToDerefBarrier`
* module. Since the node we are tracking is not necessarily _equal_ to the pointer-arithmetic instruction, but rather satisfies
* `node.asInstruction() <= pai + deltaDerefSourceAndPai`, we need to account for the delta when checking if a guard is sufficiently
* strong to infer that a future dereference is safe. To do this, we check that the guard guarantees that a node `n` satisfies
* `n < node + k` where `node` is a node we know is equal to the value of the dereference source (i.e., it satisfies
* `node.asInstruction() <= pai + deltaDerefSourceAndPai`) and `k <= deltaDerefSourceAndPai`. Combining this we have
* `n < node + k <= node + deltaDerefSourceAndPai <= pai + 2*deltaDerefSourceAndPai` (TODO: Oops. This math doesn't quite work out.
* I think this is because we need to redefine the `BarrierConfig` to start flow at the pointer-arithmetic instruction instead of
* at the dereference source. When combined with TODO above it's easy to show that this guard ensures that the dereference is safe).
*/
private import cpp
@@ -19,10 +88,10 @@ private module InvalidPointerToDerefBarrier {
}
additional predicate isSink(
DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, int state, boolean testIsTrue
DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, int k, boolean testIsTrue
) {
// The sink is any "large" side of a relational comparison.
g.comparesLt(left.asOperand(), right.asOperand(), state, true, testIsTrue)
g.comparesLt(left.asOperand(), right.asOperand(), k, true, testIsTrue)
}
predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) }
@@ -40,12 +109,12 @@ private module InvalidPointerToDerefBarrier {
private predicate operandGuardChecks(
IRGuardCondition g, Operand left, Operand right, int state, boolean edge
) {
exists(DataFlow::Node nLeft, DataFlow::Node nRight, int state0 |
exists(DataFlow::Node nLeft, DataFlow::Node nRight, int k |
nRight.asOperand() = right and
nLeft.asOperand() = left and
BarrierConfig::isSink(nLeft, nRight, g, state0, edge) and
BarrierConfig::isSink(nLeft, nRight, g, k, edge) and
state = getInvalidPointerToDerefSourceDelta(nRight) and
state0 <= state
k <= state
)
}
@@ -85,47 +154,50 @@ private module InvalidPointerToDerefConfig implements DataFlow::ConfigSig {
private import DataFlow::Global<InvalidPointerToDerefConfig>
/**
* Holds if `source1` is dataflow node that represents an allocation that flows to the
* left-hand side of the pointer-arithmetic `pai`, and `derefSource` is a dataflow node with
* a pointer-value that is non-strictly upper bounded by `pai + delta`.
* Holds if `allocSource` is dataflow node that represents an allocation that flows to the
* left-hand side of the pointer-arithmetic `pai`, and `derefSource <= pai + derefSourcePaiDelta`.
*
* For example, if `pai` is a pointer-arithmetic operation `p + size` in an expression such
* as `(p + size) + 1` and `derefSource` is the node representing `(p + size) + 1`. In this
* case `delta` is 1.
* case `derefSourcePaiDelta` is 1.
*/
private predicate invalidPointerToDerefSource(
DataFlow::Node source1, PointerArithmeticInstruction pai, DataFlow::Node derefSource, int delta
DataFlow::Node allocSource, PointerArithmeticInstruction pai, DataFlow::Node derefSource,
int deltaDerefSourceAndPai
) {
exists(int delta0 |
// Note that `delta` is not necessarily equal to `delta0`:
// `delta0` is the constant offset added to the size of the allocation, and
// delta is the constant difference between the pointer-arithmetic instruction
exists(int rhsSizeDelta |
// Note that `deltaDerefSourceAndPai` is not necessarily equal to `rhsSizeDelta`:
// `rhsSizeDelta` is the constant offset added to the size of the allocation, and
// `deltaDerefSourceAndPai` is the constant difference between the pointer-arithmetic instruction
// and the instruction computing the address for which we will search for a dereference.
AllocToInvalidPointer::pointerAddInstructionHasBounds(source1, pai, _, delta0) and
bounded2(derefSource.asInstruction(), pai, delta) and
delta >= 0 and
AllocToInvalidPointer::pointerAddInstructionHasBounds(allocSource, pai, _, rhsSizeDelta) and
// pai <= derefSource + deltaDerefSourceAndPai and deltaDerefSourceAndPai <= 0 is equivalent to
// derefSource >= pai + deltaDerefSourceAndPai and deltaDerefSourceAndPai >= 0
bounded1(pai, derefSource.asInstruction(), deltaDerefSourceAndPai) and
deltaDerefSourceAndPai <= 0 and
// TODO: This condition will go away once #13725 is merged, and then we can make `Barrier2`
// private to `AllocationToInvalidPointer.qll`.
not derefSource.getBasicBlock() = AllocToInvalidPointer::Barrier2::getABarrierBlock(delta0)
not derefSource.getBasicBlock() =
AllocToInvalidPointer::SizeBarrier::getABarrierBlock(rhsSizeDelta)
)
}
/**
* Holds if `sink` is a sink for `InvalidPointerToDerefConfig` and `i` is a `StoreInstruction` that
* writes to an address that non-strictly upper-bounds `sink`, or `i` is a `LoadInstruction` that
* reads from an address that non-strictly upper-bounds `sink`.
* writes to an address `addr` such that `addr <= sink`, or `i` is a `LoadInstruction` that
* reads from an address `addr` such that `addr <= sink`.
*/
pragma[inline]
private predicate isInvalidPointerDerefSink(
DataFlow::Node sink, Instruction i, string operation, int delta
DataFlow::Node sink, Instruction i, string operation, int deltaDerefSinkAndDerefAddress
) {
exists(AddressOperand addr, Instruction s, IRBlock b |
s = sink.asInstruction() and
bounded(addr.getDef(), s, delta) and
delta >= 0 and
bounded(addr.getDef(), s, deltaDerefSinkAndDerefAddress) and
deltaDerefSinkAndDerefAddress >= 0 and
i.getAnOperand() = addr and
b = i.getBlock() and
not b = InvalidPointerToDerefBarrier::getABarrierBlock(delta)
not b = InvalidPointerToDerefBarrier::getABarrierBlock(deltaDerefSinkAndDerefAddress)
|
i instanceof StoreInstruction and
operation = "write"
@@ -165,13 +237,13 @@ private predicate paiForDereferenceSink(PointerArithmeticInstruction pai, DataFl
*/
private predicate derefSinkToOperation(
DataFlow::Node derefSink, PointerArithmeticInstruction pai, DataFlow::Node operation,
string description, int delta
string description, int deltaDerefSinkAndDerefAddress
) {
exists(Instruction i |
exists(Instruction operationInstr |
paiForDereferenceSink(pai, pragma[only_bind_into](derefSink)) and
isInvalidPointerDerefSink(derefSink, i, description, delta) and
i = getASuccessor(derefSink.asInstruction()) and
operation.asInstruction() = i
isInvalidPointerDerefSink(derefSink, operationInstr, description, deltaDerefSinkAndDerefAddress) and
operationInstr = getASuccessor(derefSink.asInstruction()) and
operation.asInstruction() = operationInstr
)
}

View File

@@ -35,14 +35,5 @@ bindingset[i]
pragma[inline_late]
predicate bounded1(Instruction i, Instruction b, int delta) { boundedImpl(i, b, delta) }
/**
* Holds if `i <= b + delta`.
*
* This predicate enforces a join-order that ensures that `b` has already been bound.
*/
bindingset[b]
pragma[inline_late]
predicate bounded2(Instruction i, Instruction b, int delta) { boundedImpl(i, b, delta) }
/** Holds if `i <= b + delta`. */
predicate bounded = boundedImpl/3;