Packaging: Migrate cpp experimental/semmle folder to lib

Also, fix up some library path dependencies.
This commit is contained in:
Andrew Eisenberg
2021-08-17 13:39:21 -07:00
parent d8e4e25c1e
commit 88ceb42356
14 changed files with 3 additions and 1 deletions

View File

@@ -0,0 +1,65 @@
/**
* EXPERIMENTAL: The API of this module may change without notice.
*
* Provides a class for modeling `RangeSsaDefinition`s with a restricted range.
*/
import cpp
import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
/**
* EXPERIMENTAL: The API of this class may change without notice.
*
* An SSA definition for which a range can be deduced. As with
* `RangeSsaDefinition` and `SsaDefinition`, instances of this class
* correspond to points in the program where one or more variables are defined
* or have their value constrained in some way.
*
* Extend this class to add functionality to the range analysis library.
*/
abstract class SimpleRangeAnalysisDefinition extends RangeSsaDefinition {
/**
* Holds if this `SimpleRangeAnalysisDefinition` adds range information for
* `v`. Because a `SimpleRangeAnalysisDefinition` is just a point in the
* program, it's possible that more than one variable might be defined at
* this point. This predicate clarifies which variable(s) should get range
* information from `this`.
*
* This predicate **must be overridden** to hold for any `v` that can show
* up in the other members of `SimpleRangeAnalysisDefinition`. Conversely,
* the other members **must be accurate** for any `v` in this predicate.
*/
abstract predicate hasRangeInformationFor(StackVariable v);
/**
* Holds if `(this, v)` depends on the range of the unconverted expression
* `e`. This information is used to inform the range analysis about cyclic
* dependencies. Without this information, range analysis might work for
* simple cases but will go into infinite loops on complex code.
*
* For example, when modelling the definition by reference in a call to an
* overloaded `operator=`, written as `v = e`, the definition of `(this, v)`
* depends on `e`.
*/
abstract predicate dependsOnExpr(StackVariable v, Expr e);
/**
* Gets the lower bound of the variable `v` defined by this definition.
*
* Implementations of this predicate should use
* `getFullyConvertedLowerBounds` and `getFullyConvertedUpperBounds` for
* recursive calls to get the bounds of their dependencies.
*/
abstract float getLowerBounds(StackVariable v);
/**
* Gets the upper bound of the variable `v` defined by this definition.
*
* Implementations of this predicate should use
* `getFullyConvertedLowerBounds` and `getFullyConvertedUpperBounds` for
* recursive calls to get the bounds of their dependencies.
*/
abstract float getUpperBounds(StackVariable v);
}
import SimpleRangeAnalysisInternal

View File

@@ -0,0 +1,78 @@
/**
* EXPERIMENTAL: The API of this module may change without notice.
*
* Provides a class for modeling `Expr`s with a restricted range.
*/
import cpp
import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
/**
* EXPERIMENTAL: The API of this class may change without notice.
*
* An expression for which a range can be deduced. Extend this class to add
* functionality to the range analysis library.
*/
abstract class SimpleRangeAnalysisExpr extends Expr {
/**
* Gets the lower bound of the expression.
*
* Implementations of this predicate should use
* `getFullyConvertedLowerBounds` and `getFullyConvertedUpperBounds` for
* recursive calls to get the bounds of their children.
*/
abstract float getLowerBounds();
/**
* Gets the upper bound of the expression.
*
* Implementations of this predicate should use
* `getFullyConvertedLowerBounds` and `getFullyConvertedUpperBounds` for
* recursive calls to get the bounds of their children.
*/
abstract float getUpperBounds();
/**
* Holds if the range this expression depends on the definition `srcDef` for
* StackVariable `srcVar`.
*
* Because this predicate cannot be recursive, most implementations should
* override `dependsOnChild` instead.
*/
predicate dependsOnDef(RangeSsaDefinition srcDef, StackVariable srcVar) { none() }
/**
* Holds if this expression depends on the range of its unconverted
* subexpression `child`. This information is used to inform the range
* analysis about cyclic dependencies. Without this information, range
* analysis might work for simple cases but will go into infinite loops on
* complex code.
*
* For example, when modeling a function call whose return value depends on
* all of its arguments, implement this predicate as
* `child = this.getAnArgument()`.
*/
abstract predicate dependsOnChild(Expr child);
}
import SimpleRangeAnalysisInternal
/**
* This class exists to prevent the QL front end from emitting compile errors
* inside `SimpleRangeAnalysis.qll` about certain conjuncts being empty
* because the overrides of `SimpleRangeAnalysisExpr` that happen to be in
* scope do not make use of every feature it offers.
*/
private class Empty extends SimpleRangeAnalysisExpr {
Empty() {
// This predicate is complicated enough that the QL type checker doesn't
// see it as empty but simple enough that the optimizer should.
this = this and none()
}
override float getLowerBounds() { none() }
override float getUpperBounds() { none() }
override predicate dependsOnChild(Expr child) { none() }
}

View File

@@ -0,0 +1,282 @@
/**
* Provides precise tracking of how big the memory pointed to by pointers is.
* For each pointer, we start tracking (starting from the allocation or an array declaration)
* 1) how long is the chunk of memory allocated
* 2) where the current pointer is in this chunk of memory
* As computing this information is obviously not possible for all pointers,
* we do not guarantee the existence of length/offset information for all pointers.
* However, when it exists it is guaranteed to be accurate.
*
* The length and offset are tracked in a similar way to the Rangeanalysis.
* Each length is a `ValueNumber + delta`, and each Offset is an `Operand + delta`.
* We choose to track a `ValueNumber` for length, because the Rangeanalysis offers
* integer bounds on instructions and operands in terms of `ValueNumber`s,
* and `Operand` for offset because integer bounds on `Operand`s are
* tighter than bounds on `Instruction`s.
*/
import cpp
import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.ValueNumbering
private import semmle.code.cpp.ir.internal.CppType
private import semmle.code.cpp.models.interfaces.Allocation
private import experimental.semmle.code.cpp.rangeanalysis.RangeUtils
private newtype TLength =
TZeroLength() or
TVNLength(ValueNumber vn) {
not vn.getAnInstruction() instanceof ConstantInstruction and
exists(Instruction i |
vn.getAnInstruction() = i and
(
i.getResultIRType() instanceof IRSignedIntegerType or
i.getResultIRType() instanceof IRUnsignedIntegerType
)
|
i instanceof PhiInstruction
or
i instanceof InitializeParameterInstruction
or
i instanceof CallInstruction
or
i.(LoadInstruction).getSourceAddress() instanceof VariableAddressInstruction
or
i.(LoadInstruction).getSourceAddress() instanceof FieldAddressInstruction
or
i.getAUse() instanceof ArgumentOperand
)
}
/**
* Array lengths are represented in a ValueNumber | Zero + delta format.
* This class keeps track of the ValueNumber or Zero.
* The delta is tracked in the predicate `knownArrayLength`.
*/
class Length extends TLength {
string toString() { none() } // overridden in subclasses
}
/**
* This length class corresponds to an array having a constant length
* that is tracked by the delta value.
*/
class ZeroLength extends Length, TZeroLength {
override string toString() { result = "ZeroLength" }
}
/**
* This length class corresponds to an array having variable length, i.e. the
* length is tracked by a value number. One example is an array having length
* `count` for an integer variable `count` in the program.
*/
class VNLength extends Length, TVNLength {
ValueNumber vn;
VNLength() { this = TVNLength(vn) }
/** Gets an instruction with this value number bound. */
Instruction getInstruction() { this = TVNLength(valueNumber(result)) }
ValueNumber getValueNumber() { result = vn }
override string toString() { result = "VNLength(" + vn.getExampleInstruction().toString() + ")" }
}
private newtype TOffset =
TZeroOffset() or
TOpOffset(Operand op) {
op.getAnyDef().getResultIRType() instanceof IRSignedIntegerType or
op.getAnyDef().getResultIRType() instanceof IRUnsignedIntegerType
}
/**
* This class describes the offset of a pointer in a chunk of memory.
* It is either an `Operand` or zero, an additional integer delta is added later.
*/
class Offset extends TOffset {
string toString() { none() } // overridden in subclasses
}
/**
* This class represents a fixed offset, only specified by a delta.
*/
class ZeroOffset extends Offset, TZeroOffset {
override string toString() { result = "ZeroOffset" }
}
/**
* This class represents an offset of an operand.
*/
class OpOffset extends Offset, TOpOffset {
Operand op;
OpOffset() { this = TOpOffset(op) }
Operand getOperand() { result = op }
override string toString() { result = "OpOffset(" + op.getDef().toString() + ")" }
}
private int getBaseSizeForPointerType(PointerType type) { result = type.getBaseType().getSize() }
/**
* Holds if pointer `prev` that points at offset `prevOffset + prevOffsetDelta`
* steps to `array` that points to `offset + offsetDelta` in one step.
* This predicate does not contain any recursive steps.
*/
bindingset[prevOffset, prevOffsetDelta]
predicate simpleArrayLengthStep(
Instruction array, Offset offset, int offsetDelta, Instruction prev, Offset prevOffset,
int prevOffsetDelta
) {
// array assign
array.(CopyInstruction).getSourceValue() = prev and
offset = prevOffset and
offsetDelta = prevOffsetDelta
or
// pointer add with constant
array.(PointerAddInstruction).getLeft() = prev and
offset = prevOffset and
offsetDelta = prevOffsetDelta + getConstantValue(array.(PointerAddInstruction).getRight())
or
// pointer add with variable
array.(PointerAddInstruction).getLeft() = prev and
prevOffset instanceof ZeroOffset and
offset.(OpOffset).getOperand() = array.(PointerAddInstruction).getRightOperand() and
offsetDelta = prevOffsetDelta and
not exists(getConstantValue(array.(PointerAddInstruction).getRight()))
or
// pointer sub with constant
array.(PointerSubInstruction).getLeft() = prev and
offset = prevOffset and
offsetDelta = prevOffsetDelta - getConstantValue(array.(PointerSubInstruction).getRight())
or
// array to pointer decay
array.(ConvertInstruction).getUnary() = prev and
array.getConvertedResultExpression() instanceof ArrayToPointerConversion and
offset = prevOffset and
offsetDelta = prevOffsetDelta
or
// cast of pointer to pointer with the same element size
exists(PointerType fromTyp, PointerType toTyp |
array.(PtrToPtrCastInstruction).getUnary() = prev and
prev.getResultLanguageType().hasType(fromTyp, false) and
array.getResultLanguageType().hasType(toTyp, false) and
offset = prevOffset and
offsetDelta = prevOffsetDelta and
if fromTyp instanceof VoidPointerType
then getBaseSizeForPointerType(toTyp) = 1
else (
if toTyp instanceof VoidPointerType
then getBaseSizeForPointerType(fromTyp) = 1
else getBaseSizeForPointerType(toTyp) = getBaseSizeForPointerType(fromTyp)
)
)
}
/**
* Parses a `sizeExpr` of malloc into a variable part (`lengthExpr`) and an integer offset (`delta`).
*/
private predicate deconstructMallocSizeExpr(Expr sizeExpr, Expr lengthExpr, int delta) {
sizeExpr instanceof AddExpr and
exists(Expr constantExpr |
lengthExpr = sizeExpr.(AddExpr).getAnOperand() and
constantExpr = sizeExpr.(AddExpr).getAnOperand() and
lengthExpr != constantExpr and
delta = constantExpr.getValue().toInt()
)
or
sizeExpr instanceof SubExpr and
exists(Expr constantExpr |
lengthExpr = sizeExpr.(SubExpr).getLeftOperand() and
constantExpr = sizeExpr.(SubExpr).getRightOperand() and
delta = -constantExpr.getValue().toInt()
)
}
/**
* Holds if the instruction `array` is a dynamic memory allocation of `length`+`delta` elements.
*/
private predicate allocation(Instruction array, Length length, int delta) {
exists(AllocationExpr alloc, Type ptrTyp |
array.getUnconvertedResultExpression() = alloc and
array.getResultLanguageType().hasType(ptrTyp, false) and
// ensure that we have the same type of the allocation and the pointer
ptrTyp.stripTopLevelSpecifiers().(PointerType).getBaseType().getUnspecifiedType() =
alloc.getAllocatedElementType().getUnspecifiedType() and
// ensure that the size multiplier of the allocation is the same as the
// size of the type we are allocating
alloc.getSizeMult() = getBaseSizeForPointerType(ptrTyp) and
(
length instanceof ZeroLength and
delta = alloc.getSizeExpr().getValue().toInt()
or
not exists(alloc.getSizeExpr().getValue().toInt()) and
(
exists(Expr lengthExpr |
deconstructMallocSizeExpr(alloc.getSizeExpr(), lengthExpr, delta) and
length.(VNLength).getInstruction().getConvertedResultExpression() = lengthExpr
)
or
not exists(int d | deconstructMallocSizeExpr(alloc.getSizeExpr(), _, d)) and
length.(VNLength).getInstruction().getConvertedResultExpression() = alloc.getSizeExpr() and
delta = 0
)
)
)
}
/**
* Holds if `array` is declared as an array with length `length + lengthDelta`
*/
private predicate arrayDeclaration(Instruction array, Length length, int lengthDelta) {
(
array instanceof VariableAddressInstruction or
array instanceof FieldAddressInstruction
) and
exists(ArrayType type | array.getResultLanguageType().hasType(type, _) |
length instanceof ZeroLength and
lengthDelta = type.getArraySize()
)
}
/**
* Holds if `array` is declared as an array or allocated
* with length `length + lengthDelta`
*/
predicate arrayAllocationOrDeclaration(Instruction array, Length length, int lengthDelta) {
allocation(array, length, lengthDelta)
or
// declaration of variable of array type
arrayDeclaration(array, length, lengthDelta)
}
/**
* Holds if the instruction `array` represents a pointer to a chunk of memory that holds
* `length + lengthDelta` elements, using only local analysis.
* `array` points at `offset + offsetDelta` in the chunk of memory.
* The pointer is in-bounds if `offset + offsetDelta < length + lengthDelta` and
* `offset + offsetDelta >= 0` holds.
* The pointer is out-of-bounds if `offset + offsetDelta >= length + lengthDelta`
* or `offset + offsetDelta < 0` holds.
* All pointers in this predicate are guaranteed to be non-null,
* but are not guaranteed to be live.
*/
predicate knownArrayLength(
Instruction array, Length length, int lengthDelta, Offset offset, int offsetDelta
) {
arrayAllocationOrDeclaration(array, length, lengthDelta) and
offset instanceof ZeroOffset and
offsetDelta = 0
or
// simple step (no phi nodes)
exists(Instruction prev, Offset prevOffset, int prevOffsetDelta |
knownArrayLength(prev, length, lengthDelta, prevOffset, prevOffsetDelta) and
simpleArrayLengthStep(array, offset, offsetDelta, prev, prevOffset, prevOffsetDelta)
)
or
// merge control flow after phi node - but only if all the bounds agree
forex(Instruction input | array.(PhiInstruction).getAnInput() = input |
knownArrayLength(input, length, lengthDelta, offset, offsetDelta)
)
}

View File

@@ -0,0 +1,82 @@
import cpp
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.ValueNumbering
private newtype TBound =
TBoundZero() or
TBoundValueNumber(ValueNumber vn) {
exists(Instruction i |
vn.getAnInstruction() = i and
(
i.getResultIRType() instanceof IRIntegerType or
i.getResultIRType() instanceof IRAddressType
) and
not vn.getAnInstruction() instanceof ConstantInstruction
|
i instanceof PhiInstruction
or
i instanceof InitializeParameterInstruction
or
i instanceof CallInstruction
or
i instanceof VariableAddressInstruction
or
i instanceof FieldAddressInstruction
or
i.(LoadInstruction).getSourceAddress() instanceof VariableAddressInstruction
or
i.(LoadInstruction).getSourceAddress() instanceof FieldAddressInstruction
or
i.getAUse() instanceof ArgumentOperand
)
}
/**
* A bound that may be inferred for an expression plus/minus an integer delta.
*/
abstract class Bound extends TBound {
abstract string toString();
/** Gets an expression that equals this bound plus `delta`. */
abstract Instruction getInstruction(int delta);
/** Gets an expression that equals this bound. */
Instruction getInstruction() { result = getInstruction(0) }
abstract Location getLocation();
}
/**
* The bound that corresponds to the integer 0. This is used to represent all
* integer bounds as bounds are always accompanied by an added integer delta.
*/
class ZeroBound extends Bound, TBoundZero {
override string toString() { result = "0" }
override Instruction getInstruction(int delta) {
result.(ConstantValueInstruction).getValue().toInt() = delta
}
override Location getLocation() { result instanceof UnknownDefaultLocation }
}
/**
* A bound corresponding to the value of an `Instruction`.
*/
class ValueNumberBound extends Bound, TBoundValueNumber {
ValueNumber vn;
ValueNumberBound() { this = TBoundValueNumber(vn) }
/** Gets an `Instruction` that equals this bound. */
override Instruction getInstruction(int delta) {
this = TBoundValueNumber(valueNumber(result)) and delta = 0
}
override string toString() { result = vn.getExampleInstruction().toString() }
override Location getLocation() { result = vn.getLocation() }
/** Gets the value number that equals this bound. */
ValueNumber getValueNumber() { result = vn }
}

View File

@@ -0,0 +1,5 @@
import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
//
// Import each extension we want to enable
import extensions.SubtractSelf
import extensions.ConstantBitwiseAndExprRange

View File

@@ -0,0 +1,105 @@
/**
* This library proves that a subset of pointer dereferences in a program are
* safe, i.e. in-bounds.
* It does so by first defining what a pointer dereference is (on the IR
* `Instruction` level), and then using the array length analysis and the range
* analysis together to prove that some of these pointer dereferences are safe.
*
* The analysis is soundy, i.e. it is sound if no undefined behaviour is present
* in the program.
* Furthermore, it crucially depends on the soundiness of the range analysis and
* the array length analysis.
*/
import cpp
private import experimental.semmle.code.cpp.rangeanalysis.ArrayLengthAnalysis
private import experimental.semmle.code.cpp.rangeanalysis.RangeAnalysis
/**
* Gets the instruction that computes the address of memory that `i` accesses.
* Only holds if `i` dereferences a pointer, not when the computation of the
* memory address is constant, or if the address of a local variable is loaded/stored to.
*/
private Instruction getMemoryAddressInstruction(Instruction i) {
(
result = i.(FieldAddressInstruction).getObjectAddress() or
result = i.(LoadInstruction).getSourceAddress() or
result = i.(StoreInstruction).getDestinationAddress()
) and
not result instanceof FieldAddressInstruction and
not result instanceof VariableAddressInstruction and
not result instanceof ConstantValueInstruction
}
/**
* All instructions that dereference a pointer.
*/
class PointerDereferenceInstruction extends Instruction {
PointerDereferenceInstruction() { exists(getMemoryAddressInstruction(this)) }
Instruction getAddress() { result = getMemoryAddressInstruction(this) }
}
/**
* Holds if `ptrDeref` can be proven to always access allocated memory.
*/
predicate inBounds(PointerDereferenceInstruction ptrDeref) {
exists(Length length, int lengthDelta, Offset offset, int offsetDelta |
knownArrayLength(ptrDeref.getAddress(), length, lengthDelta, offset, offsetDelta) and
// lower bound - note that we treat a pointer that accesses an array of
// length 0 as on upper-bound violation, but not as a lower-bound violation
(
offset instanceof ZeroOffset and
offsetDelta >= 0
or
offset instanceof OpOffset and
exists(int lowerBoundDelta |
boundedOperand(offset.(OpOffset).getOperand(), any(ZeroBound b), lowerBoundDelta,
/*upper*/ false, _) and
lowerBoundDelta + offsetDelta >= 0
)
) and
// upper bound
(
// both offset and length are only integers
length instanceof ZeroLength and
offset instanceof ZeroOffset and
offsetDelta < lengthDelta
or
exists(int lengthBound |
// array length is variable+integer, and there's a fixed (integer-only)
// lower bound on the variable, so we can guarantee this access is always in-bounds
length instanceof VNLength and
offset instanceof ZeroOffset and
boundedInstruction(length.(VNLength).getInstruction(), any(ZeroBound b), lengthBound,
/* upper*/ false, _) and
offsetDelta < lengthBound + lengthDelta
)
or
exists(int offsetBoundDelta |
length instanceof ZeroLength and
offset instanceof OpOffset and
boundedOperand(offset.(OpOffset).getOperand(), any(ZeroBound b), offsetBoundDelta,
/* upper */ true, _) and
// offset <= offsetBoundDelta, so offset + offsetDelta <= offsetDelta + offsetBoundDelta
// Thus, in-bounds if offsetDelta + offsetBoundDelta < lengthDelta
// as we have length instanceof ZeroLength
offsetDelta + offsetBoundDelta < lengthDelta
)
or
exists(ValueNumberBound b, int offsetBoundDelta |
length instanceof VNLength and
offset instanceof OpOffset and
b.getValueNumber() = length.(VNLength).getValueNumber() and
// It holds that offset <= length + offsetBoundDelta
boundedOperand(offset.(OpOffset).getOperand(), b, offsetBoundDelta, /*upper*/ true, _) and
// it also holds that
offsetDelta < lengthDelta - offsetBoundDelta
// taking both inequalities together we get
// offset <= length + offsetBoundDelta
// => offset + offsetDelta <= length + offsetBoundDelta + offsetDelta < length + offsetBoundDelta + lengthDelta - offsetBoundDelta
// as required
)
)
)
}

View File

@@ -0,0 +1,624 @@
/**
* Provides classes and predicates for range analysis.
*
* An inferred bound can either be a specific integer or a `ValueNumber`
* representing the abstract value of a set of `Instruction`s.
*
* If an inferred bound relies directly on a condition, then this condition is
* reported as the reason for the bound.
*/
/*
* This library tackles range analysis as a flow problem. Consider e.g.:
* ```
* len = arr.length;
* if (x < len) { ... y = x-1; ... y ... }
* ```
* In this case we would like to infer `y <= arr.length - 2`, and this is
* accomplished by tracking the bound through a sequence of steps:
* ```
* arr.length --> len = .. --> x < len --> x-1 --> y = .. --> y
* ```
*
* In its simplest form the step relation `I1 --> I2` relates two `Instruction`s
* such that `I1 <= B` implies `I2 <= B` for any `B` (with a second separate
* step relation handling lower bounds). Examples of such steps include
* assignments `I2 = I1` and conditions `x <= I1` where `I2` is a use of `x`
* guarded by the condition.
*
* In order to handle subtractions and additions with constants, and strict
* comparisons, the step relation is augmented with an integer delta. With this
* generalization `I1 --(delta)--> I2` relates two `Instruction`s and an integer
* such that `I1 <= B` implies `I2 <= B + delta` for any `B`. This corresponds
* to the predicate `boundFlowStep`.
*
* The complete range analysis is then implemented as the transitive closure of
* the step relation summing the deltas along the way. If `I1` transitively
* steps to `I2`, `delta` is the sum of deltas along the path, and `B` is an
* interesting bound equal to the value of `I1` then `I2 <= B + delta`. This
* corresponds to the predicate `boundedInstruction`.
*
* Bounds come in two forms: either they are relative to zero (and thus provide
* a constant bound), or they are relative to some program value. This value is
* represented by the `ValueNumber` class, each instance of which represents a
* set of `Instructions` that must have the same value.
*
* Phi nodes need a little bit of extra handling. Consider `x0 = phi(x1, x2)`.
* There are essentially two cases:
* - If `x1 <= B + d1` and `x2 <= B + d2` then `x0 <= B + max(d1,d2)`.
* - If `x1 <= B + d1` and `x2 <= x0 + d2` with `d2 <= 0` then `x0 <= B + d1`.
* The first case is for whenever a bound can be proven without taking looping
* into account. The second case is relevant when `x2` comes from a back-edge
* where we can prove that the variable has been non-increasing through the
* loop-iteration as this means that any upper bound that holds prior to the
* loop also holds for the variable during the loop.
* This generalizes to a phi node with `n` inputs, so if
* `x0 = phi(x1, ..., xn)` and `xi <= B + delta` for one of the inputs, then we
* also have `x0 <= B + delta` if we can prove either:
* - `xj <= B + d` with `d <= delta` or
* - `xj <= x0 + d` with `d <= 0`
* for each input `xj`.
*
* As all inferred bounds can be related directly to a path in the source code
* the only source of non-termination is if successive redundant (and thereby
* increasingly worse) bounds are calculated along a loop in the source code.
* We prevent this by weakening the bound to a small finite set of bounds when
* a path follows a second back-edge (we postpone weakening till the second
* back-edge as a precise bound might require traversing a loop once).
*/
import cpp
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.controlflow.IRGuards
private import semmle.code.cpp.ir.ValueNumbering
private import RangeUtils
private import SignAnalysis
import Bound
cached
private module RangeAnalysisCache {
cached
module RangeAnalysisPublic {
/**
* Holds if `b + delta` is a valid bound for `i` and this is the best such delta.
* - `upper = true` : `i <= b + delta`
* - `upper = false` : `i >= b + delta`
*
* The reason for the bound is given by `reason` and may be either a condition
* or `NoReason` if the bound was proven directly without the use of a bounding
* condition.
*/
cached
predicate boundedInstruction(Instruction i, Bound b, int delta, boolean upper, Reason reason) {
boundedInstruction(i, b, delta, upper, _, _, reason) and
bestInstructionBound(i, b, delta, upper)
}
/**
* Holds if `b + delta` is a valid bound for `op` and this is the best such delta.
* - `upper = true` : `op <= b + delta`
* - `upper = false` : `op >= b + delta`
*
* The reason for the bound is given by `reason` and may be either a condition
* or `NoReason` if the bound was proven directly without the use of a bounding
* condition.
*/
cached
predicate boundedOperand(Operand op, Bound b, int delta, boolean upper, Reason reason) {
boundedOperandCand(op, b, delta, upper, reason) and
bestOperandBound(op, b, delta, upper)
}
}
/**
* Holds if `guard = boundFlowCond(_, _, _, _, _) or guard = eqFlowCond(_, _, _, _, _)`.
*/
cached
predicate possibleReason(IRGuardCondition guard) {
guard = boundFlowCond(_, _, _, _, _)
or
guard = eqFlowCond(_, _, _, _, _)
}
}
private import RangeAnalysisCache
import RangeAnalysisPublic
/**
* Holds if `b + delta` is a valid bound for `e` and this is the best such delta.
* - `upper = true` : `e <= b + delta`
* - `upper = false` : `e >= b + delta`
*/
private predicate bestInstructionBound(Instruction i, Bound b, int delta, boolean upper) {
delta = min(int d | boundedInstruction(i, b, d, upper, _, _, _)) and upper = true
or
delta = max(int d | boundedInstruction(i, b, d, upper, _, _, _)) and upper = false
}
/**
* Holds if `b + delta` is a valid bound for `op`.
* - `upper = true` : `op <= b + delta`
* - `upper = false` : `op >= b + delta`
*
* The reason for the bound is given by `reason` and may be either a condition
* or `NoReason` if the bound was proven directly without the use of a bounding
* condition.
*/
private predicate boundedOperandCand(Operand op, Bound b, int delta, boolean upper, Reason reason) {
boundedNonPhiOperand(op, b, delta, upper, _, _, reason)
or
boundedPhiOperand(op, b, delta, upper, _, _, reason)
}
/**
* Holds if `b + delta` is a valid bound for `op` and this is the best such delta.
* - `upper = true` : `op <= b + delta`
* - `upper = false` : `op >= b + delta`
*/
private predicate bestOperandBound(Operand op, Bound b, int delta, boolean upper) {
delta = min(int d | boundedOperandCand(op, b, d, upper, _)) and upper = true
or
delta = max(int d | boundedOperandCand(op, b, d, upper, _)) and upper = false
}
/**
* Gets a condition that tests whether `vn` equals `bound + delta`.
*
* If the condition evaluates to `testIsTrue`:
* - `isEq = true` : `vn == bound + delta`
* - `isEq = false` : `vn != bound + delta`
*/
private IRGuardCondition eqFlowCond(
ValueNumber vn, Operand bound, int delta, boolean isEq, boolean testIsTrue
) {
result.comparesEq(vn.getAUse(), bound, delta, isEq, testIsTrue)
}
/**
* Holds if `op1 + delta` is a valid bound for `op2`.
* - `upper = true` : `op2 <= op1 + delta`
* - `upper = false` : `op2 >= op1 + delta`
*/
private predicate boundFlowStepSsa(
NonPhiOperand op2, Operand op1, int delta, boolean upper, Reason reason
) {
exists(IRGuardCondition guard, boolean testIsTrue |
guard = boundFlowCond(valueNumberOfOperand(op2), op1, delta, upper, testIsTrue) and
guard.controls(op2.getUse().getBlock(), testIsTrue) and
reason = TCondReason(guard)
)
or
exists(IRGuardCondition guard, boolean testIsTrue, SafeCastInstruction cast |
valueNumberOfOperand(op2) = valueNumber(cast.getUnary()) and
guard = boundFlowCond(valueNumber(cast), op1, delta, upper, testIsTrue) and
guard.controls(op2.getUse().getBlock(), testIsTrue) and
reason = TCondReason(guard)
)
}
/**
* Gets a condition that tests whether `vn` is bounded by `bound + delta`.
*
* If the condition evaluates to `testIsTrue`:
* - `upper = true` : `vn <= bound + delta`
* - `upper = false` : `vn >= bound + delta`
*/
private IRGuardCondition boundFlowCond(
ValueNumber vn, NonPhiOperand bound, int delta, boolean upper, boolean testIsTrue
) {
exists(int d |
result.comparesLt(vn.getAUse(), bound, d, upper, testIsTrue) and
// `comparesLt` provides bounds of the form `x < y + k` or `x >= y + k`, but we need
// `x <= y + k` so we strengthen here. `testIsTrue` has the same semantics in `comparesLt` as
// it does here, so we don't need to account for it.
if upper = true then delta = d - 1 else delta = d
)
or
result = eqFlowCond(vn, bound, delta, true, testIsTrue) and
(upper = true or upper = false)
}
private newtype TReason =
TNoReason() or
TCondReason(IRGuardCondition guard) { possibleReason(guard) }
/**
* A reason for an inferred bound. This can either be `CondReason` if the bound
* is due to a specific condition, or `NoReason` if the bound is inferred
* without going through a bounding condition.
*/
abstract class Reason extends TReason {
abstract string toString();
}
class NoReason extends Reason, TNoReason {
override string toString() { result = "NoReason" }
}
class CondReason extends Reason, TCondReason {
IRGuardCondition getCond() { this = TCondReason(result) }
override string toString() { result = getCond().toString() }
}
/**
* Holds if `typ` is a small integral type with the given lower and upper bounds.
*/
private predicate typeBound(IRIntegerType typ, int lowerbound, int upperbound) {
typ.isSigned() and typ.getByteSize() = 1 and lowerbound = -128 and upperbound = 127
or
typ.isUnsigned() and typ.getByteSize() = 1 and lowerbound = 0 and upperbound = 255
or
typ.isSigned() and typ.getByteSize() = 2 and lowerbound = -32768 and upperbound = 32767
or
typ.isUnsigned() and typ.getByteSize() = 2 and lowerbound = 0 and upperbound = 65535
}
/**
* A cast to a small integral type that may overflow or underflow.
*/
private class NarrowingCastInstruction extends ConvertInstruction {
NarrowingCastInstruction() {
not this instanceof SafeCastInstruction and
typeBound(getResultIRType(), _, _)
}
/** Gets the lower bound of the resulting type. */
int getLowerBound() { typeBound(getResultIRType(), result, _) }
/** Gets the upper bound of the resulting type. */
int getUpperBound() { typeBound(getResultIRType(), _, result) }
}
/**
* Holds if `op + delta` is a valid bound for `i`.
* - `upper = true` : `i <= op + delta`
* - `upper = false` : `i >= op + delta`
*/
private predicate boundFlowStep(Instruction i, NonPhiOperand op, int delta, boolean upper) {
valueFlowStep(i, op, delta) and
(upper = true or upper = false)
or
i.(SafeCastInstruction).getAnOperand() = op and
delta = 0 and
(upper = true or upper = false)
or
exists(Operand x |
i.(AddInstruction).getAnOperand() = op and
i.(AddInstruction).getAnOperand() = x and
op != x
|
not exists(getValue(getConstantValue(op.getUse()))) and
not exists(getValue(getConstantValue(x.getUse()))) and
if strictlyPositive(x)
then upper = false and delta = 1
else
if positive(x)
then upper = false and delta = 0
else
if strictlyNegative(x)
then upper = true and delta = -1
else
if negative(x)
then upper = true and delta = 0
else none()
)
or
exists(Operand x |
exists(SubInstruction sub |
i = sub and
sub.getLeftOperand() = op and
sub.getRightOperand() = x
)
|
// `x` with constant value is covered by valueFlowStep
not exists(getValue(getConstantValue(x.getUse()))) and
if strictlyPositive(x)
then upper = true and delta = -1
else
if positive(x)
then upper = true and delta = 0
else
if strictlyNegative(x)
then upper = false and delta = 1
else
if negative(x)
then upper = false and delta = 0
else none()
)
or
i.(RemInstruction).getRightOperand() = op and positive(op) and delta = -1 and upper = true
or
i.(RemInstruction).getLeftOperand() = op and positive(op) and delta = 0 and upper = true
or
i.(BitAndInstruction).getAnOperand() = op and positive(op) and delta = 0 and upper = true
or
i.(BitOrInstruction).getAnOperand() = op and
positiveInstruction(i) and
delta = 0 and
upper = false
// TODO: min, max, rand
}
private predicate boundFlowStepMul(Instruction i1, Operand op, int factor) {
exists(Instruction c, int k | k = getValue(getConstantValue(c)) and k > 0 |
i1.(MulInstruction).hasOperands(op, c.getAUse()) and factor = k
or
exists(ShiftLeftInstruction i |
i = i1 and i.getLeftOperand() = op and i.getRightOperand() = c.getAUse() and factor = 2.pow(k)
)
)
}
private predicate boundFlowStepDiv(Instruction i1, Operand op, int factor) {
exists(Instruction c, int k | k = getValue(getConstantValue(c)) and k > 0 |
exists(DivInstruction i |
i = i1 and i.getLeftOperand() = op and i.getRight() = c and factor = k
)
or
exists(ShiftRightInstruction i |
i = i1 and i.getLeftOperand() = op and i.getRight() = c and factor = 2.pow(k)
)
)
}
/**
* Holds if `b` is a valid bound for `op`
*/
pragma[noinline]
private predicate boundedNonPhiOperand(
NonPhiOperand op, Bound b, int delta, boolean upper, boolean fromBackEdge, int origdelta,
Reason reason
) {
exists(NonPhiOperand op2, int d1, int d2 |
boundFlowStepSsa(op, op2, d1, upper, reason) and
boundedNonPhiOperand(op2, b, d2, upper, fromBackEdge, origdelta, _) and
delta = d1 + d2
)
or
boundedInstruction(op.getDef(), b, delta, upper, fromBackEdge, origdelta, reason)
or
exists(int d, Reason r1, Reason r2 |
boundedNonPhiOperand(op, b, d, upper, fromBackEdge, origdelta, r2)
|
unequalOperand(op, b, d, r1) and
(
upper = true and delta = d - 1
or
upper = false and delta = d + 1
) and
(
reason = r1
or
reason = r2 and not r2 instanceof NoReason
)
)
}
/**
* Holds if `op1 + delta` is a valid bound for `op2`.
* - `upper = true` : `op2 <= op1 + delta`
* - `upper = false` : `op2 >= op1 + delta`
*/
private predicate boundFlowStepPhi(
PhiInputOperand op2, Operand op1, int delta, boolean upper, Reason reason
) {
op2.getDef().(CopyInstruction).getSourceValueOperand() = op1 and
(upper = true or upper = false) and
reason = TNoReason() and
delta = 0
or
exists(IRGuardCondition guard, boolean testIsTrue |
guard = boundFlowCond(valueNumberOfOperand(op2), op1, delta, upper, testIsTrue) and
guard.controlsEdge(op2.getPredecessorBlock(), op2.getUse().getBlock(), testIsTrue) and
reason = TCondReason(guard)
)
}
private predicate boundedPhiOperand(
PhiInputOperand op, Bound b, int delta, boolean upper, boolean fromBackEdge, int origdelta,
Reason reason
) {
exists(NonPhiOperand op2, int d1, int d2, Reason r1, Reason r2 |
boundFlowStepPhi(op, op2, d1, upper, r1) and
boundedNonPhiOperand(op2, b, d2, upper, fromBackEdge, origdelta, r2) and
delta = d1 + d2 and
(if r1 instanceof NoReason then reason = r2 else reason = r1)
)
or
boundedInstruction(op.getDef(), b, delta, upper, fromBackEdge, origdelta, reason)
or
exists(int d, Reason r1, Reason r2 |
boundedInstruction(op.getDef(), b, d, upper, fromBackEdge, origdelta, r2)
|
unequalOperand(op, b, d, r1) and
(
upper = true and delta = d - 1
or
upper = false and delta = d + 1
) and
(
reason = r1
or
reason = r2 and not r2 instanceof NoReason
)
)
}
/** Holds if `op2 != op1 + delta` at `pos`. */
private predicate unequalFlowStep(Operand op2, Operand op1, int delta, Reason reason) {
exists(IRGuardCondition guard, boolean testIsTrue |
guard = eqFlowCond(valueNumberOfOperand(op2), op1, delta, false, testIsTrue) and
guard.controls(op2.getUse().getBlock(), testIsTrue) and
reason = TCondReason(guard)
)
}
/**
* Holds if `op != b + delta` at `pos`.
*/
private predicate unequalOperand(Operand op, Bound b, int delta, Reason reason) {
exists(Operand op2, int d1, int d2 |
unequalFlowStep(op, op2, d1, reason) and
boundedNonPhiOperand(op2, b, d2, true, _, _, _) and
boundedNonPhiOperand(op2, b, d2, false, _, _, _) and
delta = d1 + d2
)
}
private predicate boundedPhiCandValidForEdge(
PhiInstruction phi, Bound b, int delta, boolean upper, boolean fromBackEdge, int origdelta,
Reason reason, PhiInputOperand op
) {
boundedPhiCand(phi, upper, b, delta, fromBackEdge, origdelta, reason) and
(
exists(int d | boundedPhiInp1(phi, op, b, d, upper) | upper = true and d <= delta)
or
exists(int d | boundedPhiInp1(phi, op, b, d, upper) | upper = false and d >= delta)
or
selfBoundedPhiInp(phi, op, upper)
)
}
/** Weakens a delta to lie in the range `[-1..1]`. */
bindingset[delta, upper]
private int weakenDelta(boolean upper, int delta) {
delta in [-1 .. 1] and result = delta
or
upper = true and result = -1 and delta < -1
or
upper = false and result = 1 and delta > 1
}
private predicate boundedPhiInp(
PhiInstruction phi, PhiInputOperand op, Bound b, int delta, boolean upper, boolean fromBackEdge,
int origdelta, Reason reason
) {
phi.getAnOperand() = op and
exists(int d, boolean fromBackEdge0 |
boundedPhiOperand(op, b, d, upper, fromBackEdge0, origdelta, reason)
or
b.(ValueNumberBound).getInstruction() = op.getDef() and
d = 0 and
(upper = true or upper = false) and
fromBackEdge0 = false and
origdelta = 0 and
reason = TNoReason()
|
if backEdge(phi, op)
then
fromBackEdge = true and
(
fromBackEdge0 = true and delta = weakenDelta(upper, d - origdelta) + origdelta
or
fromBackEdge0 = false and delta = d
)
else (
delta = d and fromBackEdge = fromBackEdge0
)
)
}
pragma[noinline]
private predicate boundedPhiInp1(
PhiInstruction phi, PhiInputOperand op, Bound b, int delta, boolean upper
) {
boundedPhiInp(phi, op, b, delta, upper, _, _, _)
}
private predicate selfBoundedPhiInp(PhiInstruction phi, PhiInputOperand op, boolean upper) {
exists(int d, ValueNumberBound phibound |
phibound.getInstruction() = phi and
boundedPhiInp(phi, op, phibound, d, upper, _, _, _) and
(
upper = true and d <= 0
or
upper = false and d >= 0
)
)
}
pragma[noinline]
private predicate boundedPhiCand(
PhiInstruction phi, boolean upper, Bound b, int delta, boolean fromBackEdge, int origdelta,
Reason reason
) {
exists(PhiInputOperand op |
boundedPhiInp(phi, op, b, delta, upper, fromBackEdge, origdelta, reason)
)
}
/**
* Holds if the value being cast has an upper (for `upper = true`) or lower
* (for `upper = false`) bound within the bounds of the resulting type.
* For `upper = true` this means that the cast will not overflow and for
* `upper = false` this means that the cast will not underflow.
*/
private predicate safeNarrowingCast(NarrowingCastInstruction cast, boolean upper) {
exists(int bound |
boundedNonPhiOperand(cast.getAnOperand(), any(ZeroBound zb), bound, upper, _, _, _)
|
upper = true and bound <= cast.getUpperBound()
or
upper = false and bound >= cast.getLowerBound()
)
}
pragma[noinline]
private predicate boundedCastExpr(
NarrowingCastInstruction cast, Bound b, int delta, boolean upper, boolean fromBackEdge,
int origdelta, Reason reason
) {
boundedNonPhiOperand(cast.getAnOperand(), b, delta, upper, fromBackEdge, origdelta, reason)
}
/**
* Holds if `b + delta` is a valid bound for `i`.
* - `upper = true` : `i <= b + delta`
* - `upper = false` : `i >= b + delta`
*/
private predicate boundedInstruction(
Instruction i, Bound b, int delta, boolean upper, boolean fromBackEdge, int origdelta,
Reason reason
) {
i instanceof PhiInstruction and
forex(PhiInputOperand op | op = i.getAnOperand() |
boundedPhiCandValidForEdge(i, b, delta, upper, fromBackEdge, origdelta, reason, op)
)
or
i = b.getInstruction(delta) and
(upper = true or upper = false) and
fromBackEdge = false and
origdelta = delta and
reason = TNoReason()
or
exists(Operand mid, int d1, int d2 |
boundFlowStep(i, mid, d1, upper) and
boundedNonPhiOperand(mid, b, d2, upper, fromBackEdge, origdelta, reason) and
delta = d1 + d2 and
not exists(getValue(getConstantValue(i)))
)
or
exists(Operand mid, int factor, int d |
boundFlowStepMul(i, mid, factor) and
boundedNonPhiOperand(mid, b, d, upper, fromBackEdge, origdelta, reason) and
b instanceof ZeroBound and
delta = d * factor and
not exists(getValue(getConstantValue(i)))
)
or
exists(Operand mid, int factor, int d |
boundFlowStepDiv(i, mid, factor) and
boundedNonPhiOperand(mid, b, d, upper, fromBackEdge, origdelta, reason) and
d >= 0 and
b instanceof ZeroBound and
delta = d / factor and
not exists(getValue(getConstantValue(i)))
)
or
exists(NarrowingCastInstruction cast |
cast = i and
safeNarrowingCast(cast, upper.booleanNot()) and
boundedCastExpr(cast, b, delta, upper, fromBackEdge, origdelta, reason)
)
}

View File

@@ -0,0 +1,134 @@
import cpp
private import semmle.code.cpp.ir.IR
// TODO: move this dependency
import semmle.code.cpp.ir.internal.IntegerConstant
// TODO: move this out of test code
language[monotonicAggregates]
IntValue getConstantValue(Instruction instr) {
result = instr.(IntegerConstantInstruction).getValue().toInt()
or
exists(BinaryInstruction binInstr, IntValue left, IntValue right |
binInstr = instr and
left = getConstantValue(binInstr.getLeft()) and
right = getConstantValue(binInstr.getRight()) and
(
binInstr instanceof AddInstruction and result = add(left, right)
or
binInstr instanceof SubInstruction and result = sub(left, right)
or
binInstr instanceof MulInstruction and result = mul(left, right)
or
binInstr instanceof DivInstruction and result = div(left, right)
)
)
or
result = getConstantValue(instr.(CopyInstruction).getSourceValue())
or
exists(PhiInstruction phi |
phi = instr and
result =
max(PhiInputOperand operand |
operand = phi.getAnOperand()
|
getConstantValue(operand.getDef())
) and
result =
min(PhiInputOperand operand |
operand = phi.getAnOperand()
|
getConstantValue(operand.getDef())
)
)
}
predicate valueFlowStep(Instruction i, Operand op, int delta) {
i.(CopyInstruction).getSourceValueOperand() = op and delta = 0
or
exists(Operand x |
i.(AddInstruction).getAnOperand() = op and
i.(AddInstruction).getAnOperand() = x and
op != x
|
delta = getValue(getConstantValue(x.getDef()))
)
or
exists(Operand x |
i.(SubInstruction).getLeftOperand() = op and
i.(SubInstruction).getRightOperand() = x
|
delta = -getValue(getConstantValue(x.getDef()))
)
or
exists(Operand x |
i.(PointerAddInstruction).getAnOperand() = op and
i.(PointerAddInstruction).getAnOperand() = x and
op != x
|
delta = i.(PointerAddInstruction).getElementSize() * getValue(getConstantValue(x.getDef()))
)
or
exists(Operand x |
i.(PointerSubInstruction).getLeftOperand() = op and
i.(PointerSubInstruction).getRightOperand() = x
|
delta = i.(PointerSubInstruction).getElementSize() * -getValue(getConstantValue(x.getDef()))
)
}
predicate backEdge(PhiInstruction phi, PhiInputOperand op) {
phi.getAnOperand() = op and
phi.getBlock() = op.getPredecessorBlock().getBackEdgeSuccessor(_)
}
/**
* Holds if a cast from `fromtyp` to `totyp` can be ignored for the purpose of
* range analysis.
*/
pragma[inline]
private predicate safeCast(IRIntegerType fromtyp, IRIntegerType totyp) {
fromtyp.getByteSize() < totyp.getByteSize() and
(
fromtyp.isUnsigned()
or
totyp.isSigned()
)
or
fromtyp.getByteSize() <= totyp.getByteSize() and
(
fromtyp.isSigned() and
totyp.isSigned()
or
fromtyp.isUnsigned() and
totyp.isUnsigned()
)
}
/**
* A `ConvertInstruction` which casts from one pointer type to another.
*/
class PtrToPtrCastInstruction extends ConvertInstruction {
PtrToPtrCastInstruction() {
getResultIRType() instanceof IRAddressType and
getUnary().getResultIRType() instanceof IRAddressType
}
}
/**
* A `ConvertInstruction` which casts from one integer type to another in a way
* that cannot overflow or underflow.
*/
class SafeIntCastInstruction extends ConvertInstruction {
SafeIntCastInstruction() { safeCast(getUnary().getResultIRType(), getResultIRType()) }
}
/**
* A `ConvertInstruction` which does not invalidate bounds determined by
* range analysis.
*/
class SafeCastInstruction extends ConvertInstruction {
SafeCastInstruction() {
this instanceof PtrToPtrCastInstruction or
this instanceof SafeIntCastInstruction
}
}

View File

@@ -0,0 +1,583 @@
/**
* Provides sign analysis to determine whether expression are always positive
* or negative.
*
* The analysis is implemented as an abstract interpretation over the
* three-valued domain `{negative, zero, positive}`.
*/
import cpp
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.controlflow.IRGuards
private import semmle.code.cpp.ir.ValueNumbering
private import SignAnalysisCached
private newtype TSign =
TNeg() or
TZero() or
TPos()
private class Sign extends TSign {
string toString() {
result = "-" and this = TNeg()
or
result = "0" and this = TZero()
or
result = "+" and this = TPos()
}
Sign inc() {
this = TNeg() and result = TNeg()
or
this = TNeg() and result = TZero()
or
this = TZero() and result = TPos()
or
this = TPos() and result = TPos()
}
Sign dec() { result.inc() = this }
Sign neg() {
this = TNeg() and result = TPos()
or
this = TZero() and result = TZero()
or
this = TPos() and result = TNeg()
}
Sign bitnot() {
this = TNeg() and result = TPos()
or
this = TNeg() and result = TZero()
or
this = TZero() and result = TNeg()
or
this = TPos() and result = TNeg()
}
Sign add(Sign s) {
this = TZero() and result = s
or
s = TZero() and result = this
or
this = s and this = result
or
this = TPos() and s = TNeg()
or
this = TNeg() and s = TPos()
}
Sign mul(Sign s) {
result = TZero() and this = TZero()
or
result = TZero() and s = TZero()
or
result = TNeg() and this = TPos() and s = TNeg()
or
result = TNeg() and this = TNeg() and s = TPos()
or
result = TPos() and this = TPos() and s = TPos()
or
result = TPos() and this = TNeg() and s = TNeg()
}
Sign div(Sign s) {
result = TZero() and s = TNeg()
or
result = TZero() and s = TPos()
or
result = TNeg() and this = TPos() and s = TNeg()
or
result = TNeg() and this = TNeg() and s = TPos()
or
result = TPos() and this = TPos() and s = TPos()
or
result = TPos() and this = TNeg() and s = TNeg()
}
Sign rem(Sign s) {
result = TZero() and s = TNeg()
or
result = TZero() and s = TPos()
or
result = this and s = TNeg()
or
result = this and s = TPos()
}
Sign bitand(Sign s) {
result = TZero() and this = TZero()
or
result = TZero() and s = TZero()
or
result = TZero() and this = TPos()
or
result = TZero() and s = TPos()
or
result = TNeg() and this = TNeg() and s = TNeg()
or
result = TPos() and this = TNeg() and s = TPos()
or
result = TPos() and this = TPos() and s = TNeg()
or
result = TPos() and this = TPos() and s = TPos()
}
Sign bitor(Sign s) {
result = TZero() and this = TZero() and s = TZero()
or
result = TNeg() and this = TNeg()
or
result = TNeg() and s = TNeg()
or
result = TPos() and this = TPos() and s = TZero()
or
result = TPos() and this = TZero() and s = TPos()
or
result = TPos() and this = TPos() and s = TPos()
}
Sign bitxor(Sign s) {
result = TZero() and this = s
or
result = this and s = TZero()
or
result = s and this = TZero()
or
result = TPos() and this = TPos() and s = TPos()
or
result = TNeg() and this = TNeg() and s = TPos()
or
result = TNeg() and this = TPos() and s = TNeg()
or
result = TPos() and this = TNeg() and s = TNeg()
}
Sign lshift(Sign s) {
result = TZero() and this = TZero()
or
result = this and s = TZero()
or
this != TZero() and s != TZero()
}
Sign rshift(Sign s) {
result = TZero() and this = TZero()
or
result = this and s = TZero()
or
result = TNeg() and this = TNeg()
or
result != TNeg() and this = TPos() and s != TZero()
}
Sign urshift(Sign s) {
result = TZero() and this = TZero()
or
result = this and s = TZero()
or
result != TZero() and this = TNeg() and s != TZero()
or
result != TNeg() and this = TPos() and s != TZero()
}
}
private Sign certainInstructionSign(Instruction inst) {
exists(int i | inst.(IntegerConstantInstruction).getValue().toInt() = i |
i < 0 and result = TNeg()
or
i = 0 and result = TZero()
or
i > 0 and result = TPos()
)
or
exists(float f | f = inst.(FloatConstantInstruction).getValue().toFloat() |
f < 0 and result = TNeg()
or
f = 0 and result = TZero()
or
f > 0 and result = TPos()
)
}
private newtype CastKind =
TWiden() or
TSame() or
TNarrow()
private CastKind getCastKind(ConvertInstruction ci) {
exists(int fromSize, int toSize |
toSize = ci.getResultSize() and
fromSize = ci.getUnary().getResultSize()
|
fromSize < toSize and
result = TWiden()
or
fromSize = toSize and
result = TSame()
or
fromSize > toSize and
result = TNarrow()
)
}
private predicate bindBool(boolean bool) {
bool = true or
bool = false
}
private Sign castSign(Sign s, boolean fromSigned, boolean toSigned, CastKind ck) {
result = TZero() and
(
bindBool(fromSigned) and
bindBool(toSigned) and
s = TZero()
or
bindBool(fromSigned) and
bindBool(toSigned) and
ck = TNarrow()
)
or
result = TPos() and
(
bindBool(fromSigned) and
bindBool(toSigned) and
s = TPos()
or
bindBool(fromSigned) and
bindBool(toSigned) and
s = TNeg() and
ck = TNarrow()
or
fromSigned = true and
toSigned = false and
s = TNeg()
)
or
result = TNeg() and
(
fromSigned = true and
toSigned = true and
s = TNeg()
or
fromSigned = false and
toSigned = true and
s = TPos() and
ck != TWiden()
)
}
/** Holds if the sign of `e` is too complicated to determine. */
private predicate unknownSign(Instruction i) {
// REVIEW: This should probably be a list of the instructions that we _do_ understand, rather than
// the ones we don't understand. Currently, if we try to compute the sign of an instruction that
// we don't understand, and it isn't on this list, we incorrectly compute the sign as "none"
// instead of "+,0,-".
// Even better, we could track the state of each instruction as a power set of {non-negative,
// non-positive, non-zero}, which would mean that the representation of the sign of an unknown
// value would be the empty set.
(
i instanceof UninitializedInstruction
or
i instanceof InitializeParameterInstruction
or
i instanceof BuiltInOperationInstruction
or
i instanceof CallInstruction
or
i instanceof ChiInstruction
)
}
/**
* Holds if `lowerbound` is a lower bound for `bounded`. This is restricted
* to only include bounds for which we might determine a sign.
*/
private predicate lowerBound(
IRGuardCondition comp, Operand lowerbound, Operand bounded, boolean isStrict
) {
exists(int adjustment, Operand compared |
valueNumberOfOperand(bounded) = valueNumberOfOperand(compared) and
(
isStrict = true and
adjustment = 0
or
isStrict = false and
adjustment = 1
) and
comp.ensuresLt(lowerbound, compared, adjustment, bounded.getUse().getBlock(), true)
)
}
/**
* Holds if `upperbound` is an upper bound for `bounded` at `pos`. This is restricted
* to only include bounds for which we might determine a sign.
*/
private predicate upperBound(
IRGuardCondition comp, Operand upperbound, Operand bounded, boolean isStrict
) {
exists(int adjustment, Operand compared |
valueNumberOfOperand(bounded) = valueNumberOfOperand(compared) and
(
isStrict = true and
adjustment = 0
or
isStrict = false and
adjustment = 1
) and
comp.ensuresLt(compared, upperbound, adjustment, bounded.getUse().getBlock(), true)
)
}
/**
* Holds if `eqbound` is an equality/inequality for `bounded` at `pos`. This is
* restricted to only include bounds for which we might determine a sign. The
* boolean `isEq` gives the polarity:
* - `isEq = true` : `bounded = eqbound`
* - `isEq = false` : `bounded != eqbound`
*/
private predicate eqBound(IRGuardCondition guard, Operand eqbound, Operand bounded, boolean isEq) {
exists(Operand compared |
valueNumberOfOperand(bounded) = valueNumberOfOperand(compared) and
guard.ensuresEq(compared, eqbound, 0, bounded.getUse().getBlock(), isEq)
)
}
/**
* Holds if `bound` is a bound for `v` at `pos` that needs to be positive in
* order for `v` to be positive.
*/
private predicate posBound(IRGuardCondition comp, Operand bound, Operand op) {
upperBound(comp, bound, op, _) or
eqBound(comp, bound, op, true)
}
/**
* Holds if `bound` is a bound for `v` at `pos` that needs to be negative in
* order for `v` to be negative.
*/
private predicate negBound(IRGuardCondition comp, Operand bound, Operand op) {
lowerBound(comp, bound, op, _) or
eqBound(comp, bound, op, true)
}
/**
* Holds if `bound` is a bound for `v` at `pos` that can restrict whether `v`
* can be zero.
*/
private predicate zeroBound(IRGuardCondition comp, Operand bound, Operand op) {
lowerBound(comp, bound, op, _) or
upperBound(comp, bound, op, _) or
eqBound(comp, bound, op, _)
}
/** Holds if `bound` allows `v` to be positive at `pos`. */
private predicate posBoundOk(IRGuardCondition comp, Operand bound, Operand op) {
posBound(comp, bound, op) and TPos() = operandSign(bound)
}
/** Holds if `bound` allows `v` to be negative at `pos`. */
private predicate negBoundOk(IRGuardCondition comp, Operand bound, Operand op) {
negBound(comp, bound, op) and TNeg() = operandSign(bound)
}
/** Holds if `bound` allows `v` to be zero at `pos`. */
private predicate zeroBoundOk(IRGuardCondition comp, Operand bound, Operand op) {
lowerBound(comp, bound, op, _) and TNeg() = operandSign(bound)
or
lowerBound(comp, bound, op, false) and TZero() = operandSign(bound)
or
upperBound(comp, bound, op, _) and TPos() = operandSign(bound)
or
upperBound(comp, bound, op, false) and TZero() = operandSign(bound)
or
eqBound(comp, bound, op, true) and TZero() = operandSign(bound)
or
eqBound(comp, bound, op, false) and TZero() != operandSign(bound)
}
private Sign binaryOpLhsSign(BinaryInstruction i) { result = operandSign(i.getLeftOperand()) }
private Sign binaryOpRhsSign(BinaryInstruction i) { result = operandSign(i.getRightOperand()) }
pragma[noinline]
private predicate binaryOpSigns(BinaryInstruction i, Sign lhs, Sign rhs) {
lhs = binaryOpLhsSign(i) and
rhs = binaryOpRhsSign(i)
}
private Sign unguardedOperandSign(Operand operand) {
result = instructionSign(operand.getDef()) and
not hasGuard(operand, result)
}
private Sign guardedOperandSign(Operand operand) {
result = instructionSign(operand.getDef()) and
hasGuard(operand, result)
}
private Sign guardedOperandSignOk(Operand operand) {
result = TPos() and
forex(IRGuardCondition guard, Operand bound | posBound(guard, bound, operand) |
posBoundOk(guard, bound, operand)
)
or
result = TNeg() and
forex(IRGuardCondition guard, Operand bound | negBound(guard, bound, operand) |
negBoundOk(guard, bound, operand)
)
or
result = TZero() and
forex(IRGuardCondition guard, Operand bound | zeroBound(guard, bound, operand) |
zeroBoundOk(guard, bound, operand)
)
}
/**
* Holds if there is a bound that might restrict whether `v` has the sign `s`
* at `pos`.
*/
private predicate hasGuard(Operand op, Sign s) {
s = TPos() and posBound(_, _, op)
or
s = TNeg() and negBound(_, _, op)
or
s = TZero() and zeroBound(_, _, op)
}
cached
module SignAnalysisCached {
/**
* Gets a sign that `operand` may have at `pos`, taking guards into account.
*/
cached
Sign operandSign(Operand operand) {
result = unguardedOperandSign(operand)
or
result = guardedOperandSign(operand) and
result = guardedOperandSignOk(operand)
or
// `result` is unconstrained if the definition is inexact. Then any sign is possible.
operand.isDefinitionInexact()
}
cached
Sign instructionSign(Instruction i) {
result = certainInstructionSign(i)
or
not exists(certainInstructionSign(i)) and
not (
result = TNeg() and
i.getResultIRType().(IRIntegerType).isUnsigned()
) and
(
unknownSign(i)
or
exists(ConvertInstruction ci, Instruction prior, boolean fromSigned, boolean toSigned |
i = ci and
prior = ci.getUnary() and
(
if ci.getResultIRType().(IRIntegerType).isSigned()
then toSigned = true
else toSigned = false
) and
(
if prior.getResultIRType().(IRIntegerType).isSigned()
then fromSigned = true
else fromSigned = false
) and
result = castSign(operandSign(ci.getAnOperand()), fromSigned, toSigned, getCastKind(ci))
)
or
result = operandSign(i.(CopyInstruction).getSourceValueOperand())
or
result = operandSign(i.(BitComplementInstruction).getAnOperand()).bitnot()
or
result = operandSign(i.(NegateInstruction).getAnOperand()).neg()
or
exists(Sign s1, Sign s2 | binaryOpSigns(i, s1, s2) |
i instanceof AddInstruction and result = s1.add(s2)
or
i instanceof SubInstruction and result = s1.add(s2.neg())
or
i instanceof MulInstruction and result = s1.mul(s2)
or
i instanceof DivInstruction and result = s1.div(s2)
or
i instanceof RemInstruction and result = s1.rem(s2)
or
i instanceof BitAndInstruction and result = s1.bitand(s2)
or
i instanceof BitOrInstruction and result = s1.bitor(s2)
or
i instanceof BitXorInstruction and result = s1.bitxor(s2)
or
i instanceof ShiftLeftInstruction and result = s1.lshift(s2)
or
i instanceof ShiftRightInstruction and
i.getResultIRType().(IRIntegerType).isSigned() and
result = s1.rshift(s2)
or
i instanceof ShiftRightInstruction and
not i.getResultIRType().(IRIntegerType).isSigned() and
result = s1.urshift(s2)
)
or
// use hasGuard here?
result = operandSign(i.(PhiInstruction).getAnOperand())
)
}
}
/** Holds if `i` can be positive and cannot be negative. */
predicate positiveInstruction(Instruction i) {
instructionSign(i) = TPos() and
not instructionSign(i) = TNeg()
}
/** Holds if `i` at `pos` can be positive at and cannot be negative. */
predicate positive(Operand op) {
operandSign(op) = TPos() and
not operandSign(op) = TNeg()
}
/** Holds if `i` can be negative and cannot be positive. */
predicate negativeInstruction(Instruction i) {
instructionSign(i) = TNeg() and
not instructionSign(i) = TPos()
}
/** Holds if `i` at `pos` can be negative and cannot be positive. */
predicate negative(Operand op) {
operandSign(op) = TNeg() and
not operandSign(op) = TPos()
}
/** Holds if `i` is strictly positive. */
predicate strictlyPositiveInstruction(Instruction i) {
instructionSign(i) = TPos() and
not instructionSign(i) = TNeg() and
not instructionSign(i) = TZero()
}
/** Holds if `i` is strictly positive at `pos`. */
predicate strictlyPositive(Operand op) {
operandSign(op) = TPos() and
not operandSign(op) = TNeg() and
not operandSign(op) = TZero()
}
/** Holds if `i` is strictly negative. */
predicate strictlyNegativeInstruction(Instruction i) {
instructionSign(i) = TNeg() and
not instructionSign(i) = TPos() and
not instructionSign(i) = TZero()
}
/** Holds if `i` is strictly negative at `pos`. */
predicate strictlyNegative(Operand op) {
operandSign(op) = TNeg() and
not operandSign(op) = TPos() and
not operandSign(op) = TZero()
}

View File

@@ -0,0 +1,90 @@
private import cpp
private import experimental.semmle.code.cpp.models.interfaces.SimpleRangeAnalysisExpr
private import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
/**
* Holds if `e` is a constant or if it is a variable with a constant value
*/
float evaluateConstantExpr(Expr e) {
result = e.getValue().toFloat()
or
exists(SsaDefinition defn, StackVariable sv |
defn.getAUse(sv) = e and
result = defn.getDefiningValue(sv).getValue().toFloat()
)
}
/**
* The current implementation for `BitwiseAndExpr` only handles cases where both operands are
* either unsigned or non-negative constants. This class not only covers these cases, but also
* adds support for `&` expressions between a signed integer with a non-negative range and a
* non-negative constant. It also adds support for `&=` for the same set of cases as `&`.
*/
private class ConstantBitwiseAndExprRange extends SimpleRangeAnalysisExpr {
ConstantBitwiseAndExprRange() {
exists(Expr l, Expr r |
l = this.(BitwiseAndExpr).getLeftOperand() and
r = this.(BitwiseAndExpr).getRightOperand()
or
l = this.(AssignAndExpr).getLValue() and
r = this.(AssignAndExpr).getRValue()
|
// No operands can be negative constants
not (evaluateConstantExpr(l) < 0 or evaluateConstantExpr(r) < 0) and
// At least one operand must be a non-negative constant
(evaluateConstantExpr(l) >= 0 or evaluateConstantExpr(r) >= 0)
)
}
Expr getLeftOperand() {
result = this.(BitwiseAndExpr).getLeftOperand() or
result = this.(AssignAndExpr).getLValue()
}
Expr getRightOperand() {
result = this.(BitwiseAndExpr).getRightOperand() or
result = this.(AssignAndExpr).getRValue()
}
override float getLowerBounds() {
// If an operand can have negative values, the lower bound is unconstrained.
// Otherwise, the lower bound is zero.
exists(float lLower, float rLower |
lLower = getFullyConvertedLowerBounds(getLeftOperand()) and
rLower = getFullyConvertedLowerBounds(getRightOperand()) and
(
(lLower < 0 or rLower < 0) and
result = exprMinVal(this)
or
// This technically results in two lowerBounds when an operand range is negative, but
// that's fine since `exprMinVal(x) <= 0`. We can't use an if statement here without
// non-monotonic recursion issues
result = 0
)
)
}
override float getUpperBounds() {
// If an operand can have negative values, the upper bound is unconstrained.
// Otherwise, the upper bound is the minimum of the upper bounds of the operands
exists(float lLower, float lUpper, float rLower, float rUpper |
lLower = getFullyConvertedLowerBounds(getLeftOperand()) and
lUpper = getFullyConvertedUpperBounds(getLeftOperand()) and
rLower = getFullyConvertedLowerBounds(getRightOperand()) and
rUpper = getFullyConvertedUpperBounds(getRightOperand()) and
(
(lLower < 0 or rLower < 0) and
result = exprMaxVal(this)
or
// This technically results in two upperBounds when an operand range is negative, but
// that's fine since `exprMaxVal(b) >= result`. We can't use an if statement here without
// non-monotonic recursion issues
result = rUpper.minimum(lUpper)
)
)
}
override predicate dependsOnChild(Expr child) {
child = getLeftOperand() or child = getRightOperand()
}
}

View File

@@ -0,0 +1,15 @@
import experimental.semmle.code.cpp.models.interfaces.SimpleRangeAnalysisExpr
private class SelfSub extends SimpleRangeAnalysisExpr, SubExpr {
SelfSub() {
// Match `x - x` but not `myInt - (unsigned char)myInt`.
getLeftOperand().getExplicitlyConverted().(VariableAccess).getTarget() =
getRightOperand().getExplicitlyConverted().(VariableAccess).getTarget()
}
override float getLowerBounds() { result = 0 }
override float getUpperBounds() { result = 0 }
override predicate dependsOnChild(Expr child) { none() }
}

View File

@@ -0,0 +1,62 @@
/**
* Provides a taint-tracking configuration for reasoning about private information flowing unencrypted to an external location.
*/
import cpp
import semmle.code.cpp.dataflow.TaintTracking
import experimental.semmle.code.cpp.security.PrivateData
import semmle.code.cpp.security.FileWrite
import semmle.code.cpp.security.BufferWrite
module PrivateCleartextWrite {
/**
* A data flow source for private information flowing unencrypted to an external location.
*/
abstract class Source extends DataFlow::ExprNode { }
/**
* A data flow sink for private information flowing unencrypted to an external location.
*/
abstract class Sink extends DataFlow::ExprNode { }
/**
* A sanitizer for private information flowing unencrypted to an external location.
*/
abstract class Sanitizer extends DataFlow::ExprNode { }
/** A call to any method whose name suggests that it encodes or encrypts the parameter. */
class ProtectSanitizer extends Sanitizer {
ProtectSanitizer() {
exists(Function m, string s |
this.getExpr().(FunctionCall).getTarget() = m and
m.getName().regexpMatch("(?i).*" + s + ".*")
|
s = "protect" or s = "encode" or s = "encrypt"
)
}
}
class WriteConfig extends TaintTracking::Configuration {
WriteConfig() { this = "Write configuration" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
}
class PrivateDataSource extends Source {
PrivateDataSource() { this.getExpr() instanceof PrivateDataExpr }
}
class WriteSink extends Sink {
WriteSink() {
exists(FileWrite f, BufferWrite b |
this.asExpr() = f.getASource()
or
this.asExpr() = b.getAChild()
)
}
}
}

View File

@@ -0,0 +1,53 @@
/**
* Provides classes and predicates for identifying private data and functions for security.
*
* 'Private' data in general is anything that would compromise user privacy if exposed. This
* library tries to guess where private data may either be stored in a variable or produced by a
* function.
*
* This library is not concerned with credentials. See `SensitiveActions` for expressions related
* to credentials.
*/
import cpp
/** A string for `match` that identifies strings that look like they represent private data. */
private string privateNames() {
// Inspired by the list on https://cwe.mitre.org/data/definitions/359.html
// Government identifiers, such as Social Security Numbers
result = "%social%security%number%" or
// Contact information, such as home addresses and telephone numbers
result = "%postcode%" or
result = "%zipcode%" or
// result = "%telephone%" or
// Geographic location - where the user is (or was)
result = "%latitude%" or
result = "%longitude%" or
// Financial data - such as credit card numbers, salary, bank accounts, and debts
result = "%creditcard%" or
result = "%salary%" or
result = "%bankaccount%" or
// Communications - e-mail addresses, private e-mail messages, SMS text messages, chat logs, etc.
// result = "%email%" or
// result = "%mobile%" or
result = "%employer%" or
// Health - medical conditions, insurance status, prescription records
result = "%medical%"
}
/** An expression that might contain private data. */
abstract class PrivateDataExpr extends Expr { }
/** A functiond call that might produce private data. */
class PrivateFunctionCall extends PrivateDataExpr, FunctionCall {
PrivateFunctionCall() {
exists(string s | this.getTarget().getName().toLowerCase() = s | s.matches(privateNames()))
}
}
/** An access to a variable that might contain private data. */
class PrivateVariableAccess extends PrivateDataExpr, VariableAccess {
PrivateVariableAccess() {
exists(string s | this.getTarget().getName().toLowerCase() = s | s.matches(privateNames()))
}
}