diff --git a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll new file mode 100644 index 00000000000..50e13945e29 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/AllocationToInvalidPointer.qll @@ -0,0 +1,228 @@ +private import cpp +private import semmle.code.cpp.ir.dataflow.internal.ProductFlow +private import semmle.code.cpp.ir.ValueNumbering +private import semmle.code.cpp.controlflow.IRGuards +private import semmle.code.cpp.ir.IR +private import codeql.util.Unit +private import RangeAnalysisUtil + +private VariableAccess getAVariableAccess(Expr e) { e.getAChild*() = result } + +/** + * Holds if `(n, state)` pair represents the source of flow for the size + * expression associated with `alloc`. + */ +predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) { + exists(VariableAccess va, Expr size, int delta | + size = alloc.getSizeExpr() and + // Get the unique variable in a size expression like `x` in `malloc(x + 1)`. + va = unique( | | getAVariableAccess(size)) and + // Compute `delta` as the constant difference between `x` and `x + 1`. + bounded1(any(Instruction instr | instr.getUnconvertedResultExpression() = size), + any(LoadInstruction load | load.getUnconvertedResultExpression() = va), delta) and + n.asConvertedExpr() = va.getFullyConverted() and + state = delta + ) +} + +/** + * A module that encapsulates a barrier guard to remove false positives from flow like: + * ```cpp + * char *p = new char[size]; + * // ... + * unsigned n = size; + * // ... + * if(n < size) { + * use(*p[n]); + * } + * ``` + * In this case, the sink pair identified by the product flow library (without any additional barriers) + * would be `(p, n)` (where `n` is the `n` in `p[n]`), because there exists a pointer-arithmetic + * instruction `pai` such that: + * 1. The left-hand of `pai` flows from the allocation, and + * 2. The right-hand of `pai` is non-strictly upper bounded by `n` (where `n` is the `n` in `p[n]`) + * but because there's a strict comparison that compares `n` against the size of the allocation this + * snippet is fine. + */ +module Barrier2 { + private class FlowState2 = int; + + private module BarrierConfig2 implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + // The sources is the same as in the sources for the second + // projection in the `AllocToInvalidPointerConfig` module. + hasSize(_, source, _) + } + + additional predicate isSink( + DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, FlowState2 state, + boolean testIsTrue + ) { + // The sink is any "large" side of a relational comparison. + g.comparesLt(left.asOperand(), right.asOperand(), state, true, testIsTrue) + } + + predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) } + } + + private import DataFlow::Global + + private FlowState2 getAFlowStateForNode(DataFlow::Node node) { + exists(DataFlow::Node source | + flow(source, node) and + hasSize(_, source, result) + ) + } + + private predicate operandGuardChecks( + IRGuardCondition g, Operand left, Operand right, FlowState2 state, boolean edge + ) { + exists(DataFlow::Node nLeft, DataFlow::Node nRight, FlowState2 state0 | + nRight.asOperand() = right and + nLeft.asOperand() = left and + BarrierConfig2::isSink(nLeft, nRight, g, state0, edge) and + state = getAFlowStateForNode(nRight) and + state0 <= state + ) + } + + Instruction getABarrierInstruction(FlowState2 state) { + exists(IRGuardCondition g, ValueNumber value, Operand use, boolean edge | + use = value.getAUse() and + operandGuardChecks(pragma[only_bind_into](g), pragma[only_bind_into](use), _, + pragma[only_bind_into](state), pragma[only_bind_into](edge)) and + result = value.getAnInstruction() and + g.controls(result.getBlock(), edge) + ) + } + + DataFlow::Node getABarrierNode(FlowState2 state) { + result.asOperand() = getABarrierInstruction(state).getAUse() + } + + IRBlock getABarrierBlock(FlowState2 state) { + result.getAnInstruction() = getABarrierInstruction(state) + } +} + +module InterestingPointerAddInstruction { + private module PointerAddInstructionConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + // The sources is the same as in the sources for the second + // projection in the `AllocToInvalidPointerConfig` module. + hasSize(source.asConvertedExpr(), _, _) + } + + predicate isSink(DataFlow::Node sink) { + sink.asInstruction() = any(PointerAddInstruction pai).getLeft() + } + } + + private import DataFlow::Global + + predicate isInteresting(PointerAddInstruction pai) { + exists(DataFlow::Node n | + n.asInstruction() = pai.getLeft() and + flowTo(n) + ) + } +} + +/** + * A product-flow configuration for flow from an (allocation, size) pair to a + * pointer-arithmetic operation that is non-strictly upper-bounded by `allocation + size`. + * + * The goal of this query is to find patterns such as: + * ```cpp + * 1. char* begin = (char*)malloc(size); + * 2. char* end = begin + size; + * 3. for(int *p = begin; p <= end; p++) { + * 4. use(*p); + * 5. } + * ``` + * + * We do this by splitting the task up into two configurations: + * 1. `AllocToInvalidPointerConfig` find flow from `malloc(size)` to `begin + size`, and + * 2. `InvalidPointerToDerefConfig` finds flow from `begin + size` to an `end` (on line 3). + * + * Finally, the range-analysis library will find a load from (or store to) an address that + * is non-strictly upper-bounded by `end` (which in this case is `*p`). + */ +private module Config implements ProductFlow::StateConfigSig { + class FlowState1 = Unit; + + class FlowState2 = int; + + predicate isSourcePair( + DataFlow::Node source1, FlowState1 state1, DataFlow::Node source2, FlowState2 state2 + ) { + // In the case of an allocation like + // ```cpp + // malloc(size + 1); + // ``` + // we use `state2` to remember that there was an offset (in this case an offset of `1`) added + // to the size of the allocation. This state is then checked in `isSinkPair`. + exists(state1) and + hasSize(source1.asConvertedExpr(), source2, state2) + } + + predicate isSinkPair( + DataFlow::Node sink1, FlowState1 state1, DataFlow::Node sink2, FlowState2 state2 + ) { + exists(state1) and + // We check that the delta computed by the range analysis matches the + // state value that we set in `isSourcePair`. + pointerAddInstructionHasBounds0(_, sink1, sink2, state2) + } + + predicate isBarrier2(DataFlow::Node node, FlowState2 state) { + node = Barrier2::getABarrierNode(state) + } + + predicate isBarrierIn1(DataFlow::Node node) { isSourcePair(node, _, _, _) } + + predicate isBarrierOut2(DataFlow::Node node) { + node = any(DataFlow::SsaPhiNode phi).getAnInput(true) + } +} + +private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState; + +/** + * Holds if `pai` is non-strictly upper bounded by `sink2 + delta` and `sink1` is the + * left operand of the pointer-arithmetic operation. + * + * For example in, + * ```cpp + * char* end = p + (size + 1); + * ``` + * We will have: + * - `pai` is `p + (size + 1)`, + * - `sink1` is `p` + * - `sink2` is `size` + * - `delta` is `1`. + */ +pragma[nomagic] +private predicate pointerAddInstructionHasBounds0( + PointerAddInstruction pai, DataFlow::Node sink1, DataFlow::Node sink2, int delta +) { + InterestingPointerAddInstruction::isInteresting(pragma[only_bind_into](pai)) and + exists(Instruction right, Instruction instr2 | + pai.getRight() = right and + pai.getLeft() = sink1.asInstruction() and + instr2 = sink2.asInstruction() and + bounded1(right, instr2, delta) and + not right = Barrier2::getABarrierInstruction(delta) and + not instr2 = Barrier2::getABarrierInstruction(delta) + ) +} + +pragma[nomagic] +predicate pointerAddInstructionHasBounds( + DataFlow::Node allocation, PointerAddInstruction pai, DataFlow::Node sink1, int delta +) { + exists(DataFlow::Node sink2 | + AllocToInvalidPointerFlow::flow(allocation, _, sink1, sink2) and + pointerAddInstructionHasBounds0(pai, sink1, sink2, delta) + ) +} diff --git a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll new file mode 100644 index 00000000000..4aacce01acc --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/InvalidPointerToDereference.qll @@ -0,0 +1,191 @@ +private import cpp +private import semmle.code.cpp.dataflow.new.DataFlow +private import semmle.code.cpp.ir.ValueNumbering +private import semmle.code.cpp.controlflow.IRGuards +private import semmle.code.cpp.ir.IR +private import AllocationToInvalidPointer as AllocToInvalidPointer +private import RangeAnalysisUtil + +private module InvalidPointerToDerefBarrier { + private module BarrierConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + // The sources is the same as in the sources for `InvalidPointerToDerefConfig`. + invalidPointerToDerefSource(_, _, source, _) + } + + additional predicate isSink( + DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, int state, boolean testIsTrue + ) { + // The sink is any "large" side of a relational comparison. + g.comparesLt(left.asOperand(), right.asOperand(), state, true, testIsTrue) + } + + predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) } + } + + private module BarrierFlow = DataFlow::Global; + + private int getInvalidPointerToDerefSourceDelta(DataFlow::Node node) { + exists(DataFlow::Node source | + BarrierFlow::flow(source, node) and + invalidPointerToDerefSource(_, _, source, result) + ) + } + + private predicate operandGuardChecks( + IRGuardCondition g, Operand left, Operand right, int state, boolean edge + ) { + exists(DataFlow::Node nLeft, DataFlow::Node nRight, int state0 | + nRight.asOperand() = right and + nLeft.asOperand() = left and + BarrierConfig::isSink(nLeft, nRight, g, state0, edge) and + state = getInvalidPointerToDerefSourceDelta(nRight) and + state0 <= state + ) + } + + Instruction getABarrierInstruction(int state) { + exists(IRGuardCondition g, ValueNumber value, Operand use, boolean edge | + use = value.getAUse() and + operandGuardChecks(pragma[only_bind_into](g), pragma[only_bind_into](use), _, state, + pragma[only_bind_into](edge)) and + result = value.getAnInstruction() and + g.controls(result.getBlock(), edge) + ) + } + + DataFlow::Node getABarrierNode() { result.asOperand() = getABarrierInstruction(_).getAUse() } + + pragma[nomagic] + IRBlock getABarrierBlock(int state) { result.getAnInstruction() = getABarrierInstruction(state) } +} + +/** + * A configuration to track flow from a pointer-arithmetic operation found + * by `AllocToInvalidPointerConfig` to a dereference of the pointer. + */ +private module InvalidPointerToDerefConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { invalidPointerToDerefSource(_, _, source, _) } + + pragma[inline] + predicate isSink(DataFlow::Node sink) { isInvalidPointerDerefSink(sink, _, _, _) } + + predicate isBarrier(DataFlow::Node node) { + node = any(DataFlow::SsaPhiNode phi | not phi.isPhiRead()).getAnInput(true) + or + node = InvalidPointerToDerefBarrier::getABarrierNode() + } +} + +private import DataFlow::Global + +/** + * Holds if `source1` is dataflow node that represents an allocation that flows to the + * left-hand side of the pointer-arithmetic `pai`, and `derefSource` is a dataflow node with + * a pointer-value that is non-strictly upper bounded by `pai + delta`. + * + * For example, if `pai` is a pointer-arithmetic operation `p + size` in an expression such + * as `(p + size) + 1` and `derefSource` is the node representing `(p + size) + 1`. In this + * case `delta` is 1. + */ +private predicate invalidPointerToDerefSource( + DataFlow::Node source1, PointerArithmeticInstruction pai, DataFlow::Node derefSource, int delta +) { + exists(int delta0 | + // Note that `delta` is not necessarily equal to `delta0`: + // `delta0` is the constant offset added to the size of the allocation, and + // delta is the constant difference between the pointer-arithmetic instruction + // and the instruction computing the address for which we will search for a dereference. + AllocToInvalidPointer::pointerAddInstructionHasBounds(source1, pai, _, delta0) and + bounded2(derefSource.asInstruction(), pai, delta) and + delta >= 0 and + // TODO: This condition will go away once #13725 is merged, and then we can make `Barrier2` + // private to `AllocationToInvalidPointer.qll`. + not derefSource.getBasicBlock() = AllocToInvalidPointer::Barrier2::getABarrierBlock(delta0) + ) +} + +/** + * Holds if `sink` is a sink for `InvalidPointerToDerefConfig` and `i` is a `StoreInstruction` that + * writes to an address that non-strictly upper-bounds `sink`, or `i` is a `LoadInstruction` that + * reads from an address that non-strictly upper-bounds `sink`. + */ +pragma[inline] +private predicate isInvalidPointerDerefSink( + DataFlow::Node sink, Instruction i, string operation, int delta +) { + exists(AddressOperand addr, Instruction s, IRBlock b | + s = sink.asInstruction() and + bounded(addr.getDef(), s, delta) and + delta >= 0 and + i.getAnOperand() = addr and + b = i.getBlock() and + not b = InvalidPointerToDerefBarrier::getABarrierBlock(delta) + | + i instanceof StoreInstruction and + operation = "write" + or + i instanceof LoadInstruction and + operation = "read" + ) +} + +/** + * Yields any instruction that is control-flow reachable from `instr`. + */ +bindingset[instr, result] +pragma[inline_late] +private Instruction getASuccessor(Instruction instr) { + exists(IRBlock b, int instrIndex, int resultIndex | + b.getInstruction(instrIndex) = instr and + b.getInstruction(resultIndex) = result + | + resultIndex >= instrIndex + ) + or + instr.getBlock().getASuccessor+() = result.getBlock() +} + +private predicate paiForDereferenceSink(PointerArithmeticInstruction pai, DataFlow::Node derefSink) { + exists(DataFlow::Node derefSource | + invalidPointerToDerefSource(_, pai, derefSource, _) and + flow(derefSource, derefSink) + ) +} + +/** + * Holds if `derefSink` is a dataflow node that represents an out-of-bounds address that is about to + * be dereferenced by `operation` (which is either a `StoreInstruction` or `LoadInstruction`), and + * `pai` is the pointer-arithmetic operation that caused the `derefSink` to be out-of-bounds. + */ +private predicate derefSinkToOperation( + DataFlow::Node derefSink, PointerArithmeticInstruction pai, DataFlow::Node operation, + string description, int delta +) { + exists(Instruction i | + paiForDereferenceSink(pai, pragma[only_bind_into](derefSink)) and + isInvalidPointerDerefSink(derefSink, i, description, delta) and + i = getASuccessor(derefSink.asInstruction()) and + operation.asInstruction() = i + ) +} + +/** + * Holds if `allocation` is the result of an allocation that flows to the left-hand side of `pai`, and where + * the right-hand side of `pai` is an offset such that the result of `pai` points to an out-of-bounds pointer. + * + * Futhermore, `derefSource` is at least as large as `pai` and flows to `derefSink` before being dereferenced + * by `operation` (which is either a `StoreInstruction` or `LoadInstruction`). The result is that `operation` + * dereferences a pointer that's "off by `delta`" number of elements. + */ +predicate operationIsOffBy( + DataFlow::Node allocation, PointerArithmeticInstruction pai, DataFlow::Node derefSource, + DataFlow::Node derefSink, string description, DataFlow::Node operation, int delta +) { + exists(int deltaDerefSourceAndPai, int deltaDerefSinkAndDerefAddress | + invalidPointerToDerefSource(allocation, pai, derefSource, deltaDerefSourceAndPai) and + flow(derefSource, derefSink) and + derefSinkToOperation(derefSink, pai, operation, description, deltaDerefSinkAndDerefAddress) and + delta = deltaDerefSourceAndPai + deltaDerefSinkAndDerefAddress + ) +} diff --git a/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/RangeAnalysisUtil.qll b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/RangeAnalysisUtil.qll new file mode 100644 index 00000000000..0a6b8cdcbb0 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/security/InvalidPointerDereference/RangeAnalysisUtil.qll @@ -0,0 +1,36 @@ +private import cpp +private import semmle.code.cpp.ir.dataflow.internal.ProductFlow +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.analysis.RangeAnalysis +private import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticExprSpecific +private import semmle.code.cpp.ir.ValueNumbering +private import semmle.code.cpp.controlflow.IRGuards +private import semmle.code.cpp.ir.IR +private import codeql.util.Unit + +pragma[nomagic] +private Instruction getABoundIn(SemBound b, IRFunction func) { + getSemanticExpr(result) = b.getExpr(0) and + result.getEnclosingIRFunction() = func +} + +/** + * Holds if `i <= b + delta`. + */ +pragma[inline] +private predicate boundedImpl(Instruction i, Instruction b, int delta) { + exists(SemBound bound, IRFunction func | + semBounded(getSemanticExpr(i), bound, delta, true, _) and + b = getABoundIn(bound, func) and + i.getEnclosingIRFunction() = func + ) +} + +bindingset[i] +pragma[inline_late] +predicate bounded1(Instruction i, Instruction b, int delta) { boundedImpl(i, b, delta) } + +bindingset[b] +pragma[inline_late] +predicate bounded2(Instruction i, Instruction b, int delta) { boundedImpl(i, b, delta) } + +predicate bounded = boundedImpl/3; diff --git a/cpp/ql/src/experimental/Security/CWE/CWE-193/InvalidPointerDeref.ql b/cpp/ql/src/experimental/Security/CWE/CWE-193/InvalidPointerDeref.ql index 7a55178c775..93cac5939d4 100644 --- a/cpp/ql/src/experimental/Security/CWE/CWE-193/InvalidPointerDeref.ql +++ b/cpp/ql/src/experimental/Security/CWE/CWE-193/InvalidPointerDeref.ql @@ -16,447 +16,11 @@ */ import cpp -import semmle.code.cpp.ir.dataflow.internal.ProductFlow -import semmle.code.cpp.rangeanalysis.new.internal.semantic.analysis.RangeAnalysis -import semmle.code.cpp.rangeanalysis.new.internal.semantic.SemanticExprSpecific -import semmle.code.cpp.ir.ValueNumbering -import semmle.code.cpp.controlflow.IRGuards +import semmle.code.cpp.dataflow.new.DataFlow import semmle.code.cpp.ir.IR -import codeql.util.Unit import FinalFlow::PathGraph - -pragma[nomagic] -Instruction getABoundIn(SemBound b, IRFunction func) { - getSemanticExpr(result) = b.getExpr(0) and - result.getEnclosingIRFunction() = func -} - -/** - * Holds if `i <= b + delta`. - */ -pragma[inline] -predicate boundedImpl(Instruction i, Instruction b, int delta) { - exists(SemBound bound, IRFunction func | - semBounded(getSemanticExpr(i), bound, delta, true, _) and - b = getABoundIn(bound, func) and - i.getEnclosingIRFunction() = func - ) -} - -bindingset[i] -pragma[inline_late] -predicate bounded1(Instruction i, Instruction b, int delta) { boundedImpl(i, b, delta) } - -bindingset[b] -pragma[inline_late] -predicate bounded2(Instruction i, Instruction b, int delta) { boundedImpl(i, b, delta) } - -VariableAccess getAVariableAccess(Expr e) { e.getAChild*() = result } - -/** - * Holds if `(n, state)` pair represents the source of flow for the size - * expression associated with `alloc`. - */ -predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) { - exists(VariableAccess va, Expr size, int delta | - size = alloc.getSizeExpr() and - // Get the unique variable in a size expression like `x` in `malloc(x + 1)`. - va = unique( | | getAVariableAccess(size)) and - // Compute `delta` as the constant difference between `x` and `x + 1`. - bounded1(any(Instruction instr | instr.getUnconvertedResultExpression() = size), - any(LoadInstruction load | load.getUnconvertedResultExpression() = va), delta) and - n.asConvertedExpr() = va.getFullyConverted() and - state = delta - ) -} - -/** - * A module that encapsulates a barrier guard to remove false positives from flow like: - * ```cpp - * char *p = new char[size]; - * // ... - * unsigned n = size; - * // ... - * if(n < size) { - * use(*p[n]); - * } - * ``` - * In this case, the sink pair identified by the product flow library (without any additional barriers) - * would be `(p, n)` (where `n` is the `n` in `p[n]`), because there exists a pointer-arithmetic - * instruction `pai` such that: - * 1. The left-hand of `pai` flows from the allocation, and - * 2. The right-hand of `pai` is non-strictly upper bounded by `n` (where `n` is the `n` in `p[n]`) - * but because there's a strict comparison that compares `n` against the size of the allocation this - * snippet is fine. - */ -module Barrier2 { - private class FlowState2 = int; - - private module BarrierConfig2 implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - // The sources is the same as in the sources for the second - // projection in the `AllocToInvalidPointerConfig` module. - hasSize(_, source, _) - } - - additional predicate isSink( - DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, FlowState2 state, - boolean testIsTrue - ) { - // The sink is any "large" side of a relational comparison. - g.comparesLt(left.asOperand(), right.asOperand(), state, true, testIsTrue) - } - - predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) } - } - - private import DataFlow::Global - - private FlowState2 getAFlowStateForNode(DataFlow::Node node) { - exists(DataFlow::Node source | - flow(source, node) and - hasSize(_, source, result) - ) - } - - private predicate operandGuardChecks( - IRGuardCondition g, Operand left, Operand right, FlowState2 state, boolean edge - ) { - exists(DataFlow::Node nLeft, DataFlow::Node nRight, FlowState2 state0 | - nRight.asOperand() = right and - nLeft.asOperand() = left and - BarrierConfig2::isSink(nLeft, nRight, g, state0, edge) and - state = getAFlowStateForNode(nRight) and - state0 <= state - ) - } - - Instruction getABarrierInstruction(FlowState2 state) { - exists(IRGuardCondition g, ValueNumber value, Operand use, boolean edge | - use = value.getAUse() and - operandGuardChecks(pragma[only_bind_into](g), pragma[only_bind_into](use), _, - pragma[only_bind_into](state), pragma[only_bind_into](edge)) and - result = value.getAnInstruction() and - g.controls(result.getBlock(), edge) - ) - } - - DataFlow::Node getABarrierNode(FlowState2 state) { - result.asOperand() = getABarrierInstruction(state).getAUse() - } - - IRBlock getABarrierBlock(FlowState2 state) { - result.getAnInstruction() = getABarrierInstruction(state) - } -} - -module AllocToInvalidPointer { - /** - * A product-flow configuration for flow from an (allocation, size) pair to a - * pointer-arithmetic operation that is non-strictly upper-bounded by `allocation + size`. - * - * The goal of this query is to find patterns such as: - * ```cpp - * 1. char* begin = (char*)malloc(size); - * 2. char* end = begin + size; - * 3. for(int *p = begin; p <= end; p++) { - * 4. use(*p); - * 5. } - * ``` - * - * We do this by splitting the task up into two configurations: - * 1. `AllocToInvalidPointerConfig` find flow from `malloc(size)` to `begin + size`, and - * 2. `InvalidPointerToDerefConfig` finds flow from `begin + size` to an `end` (on line 3). - * - * Finally, the range-analysis library will find a load from (or store to) an address that - * is non-strictly upper-bounded by `end` (which in this case is `*p`). - */ - private module Config implements ProductFlow::StateConfigSig { - class FlowState1 = Unit; - - class FlowState2 = int; - - predicate isSourcePair( - DataFlow::Node source1, FlowState1 state1, DataFlow::Node source2, FlowState2 state2 - ) { - // In the case of an allocation like - // ```cpp - // malloc(size + 1); - // ``` - // we use `state2` to remember that there was an offset (in this case an offset of `1`) added - // to the size of the allocation. This state is then checked in `isSinkPair`. - exists(state1) and - hasSize(source1.asConvertedExpr(), source2, state2) - } - - predicate isSinkPair( - DataFlow::Node sink1, FlowState1 state1, DataFlow::Node sink2, FlowState2 state2 - ) { - exists(state1) and - // We check that the delta computed by the range analysis matches the - // state value that we set in `isSourcePair`. - pointerAddInstructionHasBounds0(_, sink1, sink2, state2) - } - - predicate isBarrier2(DataFlow::Node node, FlowState2 state) { - node = Barrier2::getABarrierNode(state) - } - - predicate isBarrierIn1(DataFlow::Node node) { isSourcePair(node, _, _, _) } - - predicate isBarrierOut2(DataFlow::Node node) { - node = any(DataFlow::SsaPhiNode phi).getAnInput(true) - } - } - - private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState; - - /** - * Holds if `pai` is non-strictly upper bounded by `sink2 + delta` and `sink1` is the - * left operand of the pointer-arithmetic operation. - * - * For example in, - * ```cpp - * char* end = p + (size + 1); - * ``` - * We will have: - * - `pai` is `p + (size + 1)`, - * - `sink1` is `p` - * - `sink2` is `size` - * - `delta` is `1`. - */ - pragma[nomagic] - private predicate pointerAddInstructionHasBounds0( - PointerAddInstruction pai, DataFlow::Node sink1, DataFlow::Node sink2, int delta - ) { - InterestingPointerAddInstruction::isInteresting(pragma[only_bind_into](pai)) and - exists(Instruction right, Instruction instr2 | - pai.getRight() = right and - pai.getLeft() = sink1.asInstruction() and - instr2 = sink2.asInstruction() and - bounded1(right, instr2, delta) and - not right = Barrier2::getABarrierInstruction(delta) and - not instr2 = Barrier2::getABarrierInstruction(delta) - ) - } - - pragma[nomagic] - predicate pointerAddInstructionHasBounds( - DataFlow::Node allocation, PointerAddInstruction pai, DataFlow::Node sink1, int delta - ) { - exists(DataFlow::Node sink2 | - AllocToInvalidPointerFlow::flow(allocation, _, sink1, sink2) and - pointerAddInstructionHasBounds0(pai, sink1, sink2, delta) - ) - } -} - -module InterestingPointerAddInstruction { - private module PointerAddInstructionConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - // The sources is the same as in the sources for the second - // projection in the `AllocToInvalidPointerConfig` module. - hasSize(source.asConvertedExpr(), _, _) - } - - predicate isSink(DataFlow::Node sink) { - sink.asInstruction() = any(PointerAddInstruction pai).getLeft() - } - } - - private import DataFlow::Global - - predicate isInteresting(PointerAddInstruction pai) { - exists(DataFlow::Node n | - n.asInstruction() = pai.getLeft() and - flowTo(n) - ) - } -} - -/** - * Yields any instruction that is control-flow reachable from `instr`. - */ -bindingset[instr, result] -pragma[inline_late] -Instruction getASuccessor(Instruction instr) { - exists(IRBlock b, int instrIndex, int resultIndex | - b.getInstruction(instrIndex) = instr and - b.getInstruction(resultIndex) = result - | - resultIndex >= instrIndex - ) - or - instr.getBlock().getASuccessor+() = result.getBlock() -} - -/** - * Holds if `sink` is a sink for `InvalidPointerToDerefConfig` and `i` is a `StoreInstruction` that - * writes to an address that non-strictly upper-bounds `sink`, or `i` is a `LoadInstruction` that - * reads from an address that non-strictly upper-bounds `sink`. - */ -pragma[inline] -predicate isInvalidPointerDerefSink(DataFlow::Node sink, Instruction i, string operation, int delta) { - exists(AddressOperand addr, Instruction s, IRBlock b | - s = sink.asInstruction() and - boundedImpl(addr.getDef(), s, delta) and - delta >= 0 and - i.getAnOperand() = addr and - b = i.getBlock() and - not b = InvalidPointerToDerefBarrier::getABarrierBlock(delta) - | - i instanceof StoreInstruction and - operation = "write" - or - i instanceof LoadInstruction and - operation = "read" - ) -} - -module InvalidPointerToDerefBarrier { - private module BarrierConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - // The sources is the same as in the sources for `InvalidPointerToDerefConfig`. - InvalidPointerToDeref::invalidPointerToDerefSource(_, _, source, _) - } - - additional predicate isSink( - DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, int state, boolean testIsTrue - ) { - // The sink is any "large" side of a relational comparison. - g.comparesLt(left.asOperand(), right.asOperand(), state, true, testIsTrue) - } - - predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) } - } - - private import DataFlow::Global - - private int getInvalidPointerToDerefSourceDelta(DataFlow::Node node) { - exists(DataFlow::Node source | - flow(source, node) and - InvalidPointerToDeref::invalidPointerToDerefSource(_, _, source, result) - ) - } - - private predicate operandGuardChecks( - IRGuardCondition g, Operand left, Operand right, int state, boolean edge - ) { - exists(DataFlow::Node nLeft, DataFlow::Node nRight, int state0 | - nRight.asOperand() = right and - nLeft.asOperand() = left and - BarrierConfig::isSink(nLeft, nRight, g, state0, edge) and - state = getInvalidPointerToDerefSourceDelta(nRight) and - state0 <= state - ) - } - - Instruction getABarrierInstruction(int state) { - exists(IRGuardCondition g, ValueNumber value, Operand use, boolean edge | - use = value.getAUse() and - operandGuardChecks(pragma[only_bind_into](g), pragma[only_bind_into](use), _, state, - pragma[only_bind_into](edge)) and - result = value.getAnInstruction() and - g.controls(result.getBlock(), edge) - ) - } - - DataFlow::Node getABarrierNode() { result.asOperand() = getABarrierInstruction(_).getAUse() } - - pragma[nomagic] - IRBlock getABarrierBlock(int state) { result.getAnInstruction() = getABarrierInstruction(state) } -} - -module InvalidPointerToDeref { - /** - * A configuration to track flow from a pointer-arithmetic operation found - * by `AllocToInvalidPointerConfig` to a dereference of the pointer. - */ - private module InvalidPointerToDerefConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { invalidPointerToDerefSource(_, _, source, _) } - - pragma[inline] - predicate isSink(DataFlow::Node sink) { isInvalidPointerDerefSink(sink, _, _, _) } - - predicate isBarrier(DataFlow::Node node) { - node = any(DataFlow::SsaPhiNode phi | not phi.isPhiRead()).getAnInput(true) - or - node = InvalidPointerToDerefBarrier::getABarrierNode() - } - } - - import DataFlow::Global - - /** - * Holds if `source1` is dataflow node that represents an allocation that flows to the - * left-hand side of the pointer-arithmetic `pai`, and `derefSource` is a dataflow node with - * a pointer-value that is non-strictly upper bounded by `pai + delta`. - * - * For example, if `pai` is a pointer-arithmetic operation `p + size` in an expression such - * as `(p + size) + 1` and `derefSource` is the node representing `(p + size) + 1`. In this - * case `delta` is 1. - */ - predicate invalidPointerToDerefSource( - DataFlow::Node source1, PointerArithmeticInstruction pai, DataFlow::Node derefSource, int delta - ) { - exists(int delta0 | - // Note that `delta` is not necessarily equal to `delta0`: - // `delta0` is the constant offset added to the size of the allocation, and - // delta is the constant difference between the pointer-arithmetic instruction - // and the instruction computing the address for which we will search for a dereference. - AllocToInvalidPointer::pointerAddInstructionHasBounds(source1, pai, _, delta0) and - bounded2(derefSource.asInstruction(), pai, delta) and - delta >= 0 and - // TODO: This condition will go away once #13725 is merged, and then we can make `Barrier2` - // private to `AllocationToInvalidPointer.qll`. - not derefSource.getBasicBlock() = Barrier2::getABarrierBlock(delta0) - ) - } - - private predicate paiForDereferenceSink(PointerArithmeticInstruction pai, DataFlow::Node derefSink) { - exists(DataFlow::Node derefSource | - invalidPointerToDerefSource(_, pai, derefSource, _) and - flow(derefSource, derefSink) - ) - } - - /** - * Holds if `derefSink` is a dataflow node that represents an out-of-bounds address that is about to - * be dereferenced by `operation` (which is either a `StoreInstruction` or `LoadInstruction`), and - * `pai` is the pointer-arithmetic operation that caused the `derefSink` to be out-of-bounds. - */ - private predicate derefSinkToOperation( - DataFlow::Node derefSink, PointerArithmeticInstruction pai, DataFlow::Node operation, - string description, int delta - ) { - exists(Instruction i | - paiForDereferenceSink(pai, pragma[only_bind_into](derefSink)) and - isInvalidPointerDerefSink(derefSink, i, description, delta) and - i = getASuccessor(derefSink.asInstruction()) and - operation.asInstruction() = i - ) - } - - /** - * Holds if `allocation` is the result of an allocation that flows to the left-hand side of `pai`, and where - * the right-hand side of `pai` is an offset such that the result of `pai` points to an out-of-bounds pointer. - * - * Futhermore, `derefSource` is at least as large as `pai` and flows to `derefSink` before being dereferenced - * by `operation` (which is either a `StoreInstruction` or `LoadInstruction`). The result is that `operation` - * dereferences a pointer that's "off by `delta`" number of elements. - */ - predicate operationIsOffBy( - DataFlow::Node allocation, PointerArithmeticInstruction pai, DataFlow::Node derefSource, - DataFlow::Node derefSink, string description, DataFlow::Node operation, int delta - ) { - exists(int deltaDerefSourceAndPai, int deltaDerefSinkAndDerefAddress | - invalidPointerToDerefSource(allocation, pai, derefSource, deltaDerefSourceAndPai) and - flow(derefSource, derefSink) and - derefSinkToOperation(derefSink, pai, operation, description, deltaDerefSinkAndDerefAddress) and - delta = deltaDerefSourceAndPai + deltaDerefSinkAndDerefAddress - ) - } -} +import semmle.code.cpp.security.InvalidPointerDereference.AllocationToInvalidPointer +import semmle.code.cpp.security.InvalidPointerDereference.InvalidPointerToDereference /** * A configuration that represents the full dataflow path all the way from @@ -470,17 +34,17 @@ module FinalConfig implements DataFlow::StateConfigSig { newtype FlowState = additional TInitial() or additional TPointerArith(PointerArithmeticInstruction pai) { - InvalidPointerToDeref::operationIsOffBy(_, pai, _, _, _, _, _) + operationIsOffBy(_, pai, _, _, _, _, _) } predicate isSource(DataFlow::Node source, FlowState state) { state = TInitial() and - InvalidPointerToDeref::operationIsOffBy(source, _, _, _, _, _, _) + operationIsOffBy(source, _, _, _, _, _, _) } predicate isSink(DataFlow::Node sink, FlowState state) { exists(PointerArithmeticInstruction pai | - InvalidPointerToDeref::operationIsOffBy(_, pai, _, _, _, sink, _) and + operationIsOffBy(_, pai, _, _, _, sink, _) and state = TPointerArith(pai) ) } @@ -492,8 +56,8 @@ module FinalConfig implements DataFlow::StateConfigSig { // identified as creating an out-of-bounds pointer to the result of the pointer-arithmetic // operation. exists(PointerArithmeticInstruction pai | - AllocToInvalidPointer::pointerAddInstructionHasBounds(_, pai, node1, _) and - InvalidPointerToDeref::operationIsOffBy(_, pai, node2, _, _, _, _) and + pointerAddInstructionHasBounds(_, pai, node1, _) and + operationIsOffBy(_, pai, node2, _, _, _, _) and state1 = TInitial() and state2 = TPointerArith(pai) ) @@ -505,7 +69,7 @@ module FinalConfig implements DataFlow::StateConfigSig { state1 = state2 and exists(PointerArithmeticInstruction pai | state1 = TPointerArith(pai) and - InvalidPointerToDeref::operationIsOffBy(_, pai, _, node1, _, node2, _) + operationIsOffBy(_, pai, _, node1, _, node2, _) ) } } @@ -526,8 +90,7 @@ predicate hasFlowPath( string operation, int delta ) { FinalFlow::flowPath(source, sink) and - InvalidPointerToDeref::operationIsOffBy(source.getNode(), pai, _, _, operation, sink.getNode(), - delta) and + operationIsOffBy(source.getNode(), pai, _, _, operation, sink.getNode(), delta) and sink.getState() = FinalConfig::TPointerArith(pai) }