Merge pull request #3368 from Cornelius-Riemenschneider/local-ala

C++: Add experimental Array Length Tracking library
This commit is contained in:
Robert Marsh
2020-05-05 13:05:52 -07:00
committed by GitHub
6 changed files with 455 additions and 32 deletions

View File

@@ -0,0 +1,282 @@
/**
* Provides precise tracking of how big the memory pointed to by pointers is.
* For each pointer, we start tracking (starting from the allocation or an array declaration)
* 1) how long is the chunk of memory allocated
* 2) where the current pointer is in this chunk of memory
* As computing this information is obviously not possible for all pointers,
* we do not guarantee the existence of length/offset information for all pointers.
* However, when it exists it is guaranteed to be accurate.
*
* The length and offset are tracked in a similar way to the Rangeanalysis.
* Each length is a `ValueNumber + delta`, and each Offset is an `Operand + delta`.
* We choose to track a `ValueNumber` for length, because the Rangeanalysis offers
* integer bounds on instructions and operands in terms of `ValueNumber`s,
* and `Operand` for offset because integer bounds on `Operand`s are
* tighter than bounds on `Instruction`s.
*/
import cpp
import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.ValueNumbering
private import semmle.code.cpp.ir.internal.CppType
private import semmle.code.cpp.models.interfaces.Allocation
private import semmle.code.cpp.rangeanalysis.RangeUtils
private newtype TLength =
TZeroLength() or
TVNLength(ValueNumber vn) {
not vn.getAnInstruction() instanceof ConstantInstruction and
exists(Instruction i |
vn.getAnInstruction() = i and
(
i.getResultIRType() instanceof IRSignedIntegerType or
i.getResultIRType() instanceof IRUnsignedIntegerType
)
|
i instanceof PhiInstruction
or
i instanceof InitializeParameterInstruction
or
i instanceof CallInstruction
or
i.(LoadInstruction).getSourceAddress() instanceof VariableAddressInstruction
or
i.(LoadInstruction).getSourceAddress() instanceof FieldAddressInstruction
or
i.getAUse() instanceof ArgumentOperand
)
}
/**
* Array lengths are represented in a ValueNumber | Zero + delta format.
* This class keeps track of the ValueNumber or Zero.
* The delta is tracked in the predicate `knownArrayLength`.
*/
class Length extends TLength {
string toString() { none() } // overridden in subclasses
}
/**
* This length class corresponds to an array having a constant length
* that is tracked by the delta value.
*/
class ZeroLength extends Length, TZeroLength {
override string toString() { result = "ZeroLength" }
}
/**
* This length class corresponds to an array having variable length, i.e. the
* length is tracked by a value number. One example is an array having length
* `count` for an integer variable `count` in the program.
*/
class VNLength extends Length, TVNLength {
ValueNumber vn;
VNLength() { this = TVNLength(vn) }
/** Gets an instruction with this value number bound. */
Instruction getInstruction() { this = TVNLength(valueNumber(result)) }
ValueNumber getValueNumber() { result = vn }
override string toString() { result = "VNLength(" + vn.getExampleInstruction().toString() + ")" }
}
private newtype TOffset =
TZeroOffset() or
TOpOffset(Operand op) {
op.getAnyDef().getResultIRType() instanceof IRSignedIntegerType or
op.getAnyDef().getResultIRType() instanceof IRUnsignedIntegerType
}
/**
* This class describes the offset of a pointer in a chunk of memory.
* It is either an `Operand` or zero, an additional integer delta is added later.
*/
class Offset extends TOffset {
string toString() { none() } // overridden in subclasses
}
/**
* This class represents a fixed offset, only specified by a delta.
*/
class ZeroOffset extends Offset, TZeroOffset {
override string toString() { result = "ZeroOffset" }
}
/**
* This class represents an offset of an operand.
*/
class OpOffset extends Offset, TOpOffset {
Operand op;
OpOffset() { this = TOpOffset(op) }
Operand getOperand() { result = op }
override string toString() { result = "OpOffset(" + op.getDef().toString() + ")" }
}
private int getBaseSizeForPointerType(PointerType type) { result = type.getBaseType().getSize() }
/**
* Holds if pointer `prev` that points at offset `prevOffset + prevOffsetDelta`
* steps to `array` that points to `offset + offsetDelta` in one step.
* This predicate does not contain any recursive steps.
*/
bindingset[prevOffset, prevOffsetDelta]
predicate simpleArrayLengthStep(
Instruction array, Offset offset, int offsetDelta, Instruction prev, Offset prevOffset,
int prevOffsetDelta
) {
// array assign
array.(CopyInstruction).getSourceValue() = prev and
offset = prevOffset and
offsetDelta = prevOffsetDelta
or
// pointer add with constant
array.(PointerAddInstruction).getLeft() = prev and
offset = prevOffset and
offsetDelta = prevOffsetDelta + getConstantValue(array.(PointerAddInstruction).getRight())
or
// pointer add with variable
array.(PointerAddInstruction).getLeft() = prev and
prevOffset instanceof ZeroOffset and
offset.(OpOffset).getOperand() = array.(PointerAddInstruction).getRightOperand() and
offsetDelta = prevOffsetDelta and
not exists(getConstantValue(array.(PointerAddInstruction).getRight()))
or
// pointer sub with constant
array.(PointerSubInstruction).getLeft() = prev and
offset = prevOffset and
offsetDelta = prevOffsetDelta - getConstantValue(array.(PointerSubInstruction).getRight())
or
// array to pointer decay
array.(ConvertInstruction).getUnary() = prev and
array.getConvertedResultExpression() instanceof ArrayToPointerConversion and
offset = prevOffset and
offsetDelta = prevOffsetDelta
or
// cast of pointer to pointer with the same element size
exists(PointerType fromTyp, PointerType toTyp |
array.(PtrToPtrCastInstruction).getUnary() = prev and
prev.getResultLanguageType().hasType(fromTyp, false) and
array.getResultLanguageType().hasType(toTyp, false) and
offset = prevOffset and
offsetDelta = prevOffsetDelta and
if fromTyp instanceof VoidPointerType
then getBaseSizeForPointerType(toTyp) = 1
else (
if toTyp instanceof VoidPointerType
then getBaseSizeForPointerType(fromTyp) = 1
else getBaseSizeForPointerType(toTyp) = getBaseSizeForPointerType(fromTyp)
)
)
}
/**
* Parses a `sizeExpr` of malloc into a variable part (`lengthExpr`) and an integer offset (`delta`).
*/
private predicate deconstructMallocSizeExpr(Expr sizeExpr, Expr lengthExpr, int delta) {
sizeExpr instanceof AddExpr and
exists(Expr constantExpr |
lengthExpr = sizeExpr.(AddExpr).getAnOperand() and
constantExpr = sizeExpr.(AddExpr).getAnOperand() and
lengthExpr != constantExpr and
delta = constantExpr.getValue().toInt()
)
or
sizeExpr instanceof SubExpr and
exists(Expr constantExpr |
lengthExpr = sizeExpr.(SubExpr).getLeftOperand() and
constantExpr = sizeExpr.(SubExpr).getRightOperand() and
delta = -constantExpr.getValue().toInt()
)
}
/**
* Holds if the instruction `array` is a dynamic memory allocation of `length`+`delta` elements.
*/
private predicate allocation(Instruction array, Length length, int delta) {
exists(AllocationExpr alloc, Type ptrTyp |
array.getUnconvertedResultExpression() = alloc and
array.getResultLanguageType().hasType(ptrTyp, false) and
// ensure that we have the same type of the allocation and the pointer
ptrTyp.stripTopLevelSpecifiers().(PointerType).getBaseType().getUnspecifiedType() =
alloc.getAllocatedElementType().getUnspecifiedType() and
// ensure that the size multiplier of the allocation is the same as the
// size of the type we are allocating
alloc.getSizeMult() = getBaseSizeForPointerType(ptrTyp) and
(
length instanceof ZeroLength and
delta = alloc.getSizeExpr().getValue().toInt()
or
not exists(alloc.getSizeExpr().getValue().toInt()) and
(
exists(Expr lengthExpr |
deconstructMallocSizeExpr(alloc.getSizeExpr(), lengthExpr, delta) and
length.(VNLength).getInstruction().getConvertedResultExpression() = lengthExpr
)
or
not exists(int d | deconstructMallocSizeExpr(alloc.getSizeExpr(), _, d)) and
length.(VNLength).getInstruction().getConvertedResultExpression() = alloc.getSizeExpr() and
delta = 0
)
)
)
}
/**
* Holds if `array` is declared as an array with length `length + lengthDelta`
*/
private predicate arrayDeclaration(Instruction array, Length length, int lengthDelta) {
(
array instanceof VariableAddressInstruction or
array instanceof FieldAddressInstruction
) and
exists(ArrayType type | array.getResultLanguageType().hasType(type, _) |
length instanceof ZeroLength and
lengthDelta = type.getArraySize()
)
}
/**
* Holds if `array` is declared as an array or allocated
* with length `length + lengthDelta`
*/
predicate arrayAllocationOrDeclaration(Instruction array, Length length, int lengthDelta) {
allocation(array, length, lengthDelta)
or
// declaration of variable of array type
arrayDeclaration(array, length, lengthDelta)
}
/**
* Holds if the instruction `array` represents a pointer to a chunk of memory that holds
* `length + lengthDelta` elements, using only local analysis.
* `array` points at `offset + offsetDelta` in the chunk of memory.
* The pointer is in-bounds if `offset + offsetDelta < length + lengthDelta` and
* `offset + offsetDelta >= 0` holds.
* The pointer is out-of-bounds if `offset + offsetDelta >= length + lengthDelta`
* or `offset + offsetDelta < 0` holds.
* All pointers in this predicate are guaranteed to be non-null,
* but are not guaranteed to be live.
*/
predicate knownArrayLength(
Instruction array, Length length, int lengthDelta, Offset offset, int offsetDelta
) {
arrayAllocationOrDeclaration(array, length, lengthDelta) and
offset instanceof ZeroOffset and
offsetDelta = 0
or
// simple step (no phi nodes)
exists(Instruction prev, Offset prevOffset, int prevOffsetDelta |
knownArrayLength(prev, length, lengthDelta, prevOffset, prevOffsetDelta) and
simpleArrayLengthStep(array, offset, offsetDelta, prev, prevOffset, prevOffsetDelta)
)
or
// merge control flow after phi node - but only if all the bounds agree
forex(Instruction input | array.(PhiInstruction).getAnInput() = input |
knownArrayLength(input, length, lengthDelta, offset, offsetDelta)
)
}

View File

@@ -241,38 +241,6 @@ class CondReason extends Reason, TCondReason {
override string toString() { result = getCond().toString() }
}
/**
* Holds if a cast from `fromtyp` to `totyp` can be ignored for the purpose of
* range analysis.
*/
pragma[inline]
private predicate safeCast(IntegralType fromtyp, IntegralType totyp) {
fromtyp.getSize() < totyp.getSize() and
(
fromtyp.isUnsigned()
or
totyp.isSigned()
)
or
fromtyp.getSize() <= totyp.getSize() and
(
fromtyp.isSigned() and
totyp.isSigned()
or
fromtyp.isUnsigned() and
totyp.isUnsigned()
)
}
private class SafeCastInstruction extends ConvertInstruction {
SafeCastInstruction() {
safeCast(getUnary().getResultType(), getResultType())
or
getResultType() instanceof PointerType and
getUnary().getResultType() instanceof PointerType
}
}
/**
* Holds if `typ` is a small integral type with the given lower and upper bounds.
*/

View File

@@ -80,3 +80,55 @@ predicate backEdge(PhiInstruction phi, PhiInputOperand op) {
phi.getAnOperand() = op and
phi.getBlock() = op.getPredecessorBlock().getBackEdgeSuccessor(_)
}
/**
* Holds if a cast from `fromtyp` to `totyp` can be ignored for the purpose of
* range analysis.
*/
pragma[inline]
private predicate safeCast(IntegralType fromtyp, IntegralType totyp) {
fromtyp.getSize() < totyp.getSize() and
(
fromtyp.isUnsigned()
or
totyp.isSigned()
)
or
fromtyp.getSize() <= totyp.getSize() and
(
fromtyp.isSigned() and
totyp.isSigned()
or
fromtyp.isUnsigned() and
totyp.isUnsigned()
)
}
/**
* A `ConvertInstruction` which casts from one pointer type to another.
*/
class PtrToPtrCastInstruction extends ConvertInstruction {
PtrToPtrCastInstruction() {
getResultType() instanceof PointerType and
getUnary().getResultType() instanceof PointerType
}
}
/**
* A `ConvertInstruction` which casts from one integer type to another in a way
* that cannot overflow or underflow.
*/
class SafeIntCastInstruction extends ConvertInstruction {
SafeIntCastInstruction() { safeCast(getUnary().getResultType(), getResultType()) }
}
/**
* A `ConvertInstruction` which does not invalidate bounds determined by
* range analysis.
*/
class SafeCastInstruction extends ConvertInstruction {
SafeCastInstruction() {
this instanceof PtrToPtrCastInstruction or
this instanceof SafeIntCastInstruction
}
}

View File

@@ -0,0 +1,23 @@
| test.cpp:15:8:15:11 | Load: aptr | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 0 |
| test.cpp:19:8:19:8 | Load: a | VNLength(Chi: ptr) | 0 | ZeroOffset | 0 |
| test.cpp:21:8:21:8 | Load: a | VNLength(Chi: ptr) | -1 | ZeroOffset | 0 |
| test.cpp:23:8:23:8 | Load: a | VNLength(Chi: ptr) | 1 | ZeroOffset | 0 |
| test.cpp:27:8:27:8 | Load: c | VNLength(Chi: ptr) | 0 | ZeroOffset | 0 |
| test.cpp:28:8:28:24 | Convert: (unsigned char *)... | VNLength(Chi: ptr) | 0 | ZeroOffset | 0 |
| test.cpp:30:8:30:8 | Load: v | VNLength(Chi: ptr) | 0 | ZeroOffset | 0 |
| test.cpp:34:8:34:12 | Convert: array to pointer conversion | ZeroLength | 100 | ZeroOffset | 0 |
| test.cpp:37:10:37:10 | Load: b | VNLength(Chi: ptr) | 0 | ZeroOffset | 0 |
| test.cpp:44:8:44:8 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 2 |
| test.cpp:53:10:53:10 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 2 |
| test.cpp:56:10:56:10 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 3 |
| test.cpp:63:10:63:14 | CopyValue: & ... | VNLength(InitializeParameter: count) | 0 | OpOffset(Load: i) | 0 |
| test.cpp:66:8:66:8 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 0 |
| test.cpp:68:8:68:8 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 3 |
| test.cpp:70:8:70:8 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 2 |
| test.cpp:72:8:72:8 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 0 |
| test.cpp:74:8:74:8 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | -10 |
| test.cpp:76:8:76:8 | Load: a | VNLength(InitializeParameter: count) | 1 | ZeroOffset | 0 |
| test.cpp:78:8:78:8 | Load: a | VNLength(InitializeParameter: count) | 1 | OpOffset(Load: count) | 0 |
| test.cpp:80:8:80:8 | Load: a | VNLength(InitializeParameter: count) | 1 | OpOffset(Load: count) | 1 |
| test.cpp:85:8:85:8 | Load: a | VNLength(InitializeParameter: count) | 1 | OpOffset(Add: ... + ...) | 0 |
| test.cpp:87:8:87:8 | Load: a | VNLength(InitializeParameter: count) | 1 | OpOffset(Add: ... + ...) | 1 |

View File

@@ -0,0 +1,8 @@
import cpp
import experimental.semmle.code.cpp.rangeanalysis.ArrayLengthAnalysis
from Instruction array, Length length, int delta, Offset offset, int offsetDelta
where
knownArrayLength(array, length, delta, offset, offsetDelta) and
array.getAUse() instanceof ArgumentOperand
select array, length, delta, offset, offsetDelta

View File

@@ -0,0 +1,90 @@
void *malloc(unsigned long);
void sink(...);
typedef struct A {
int a;
int b;
char * c;
} A;
void test1(unsigned int count) {
if (count < 1) {
return;
}
A* aptr = (A *) malloc(sizeof(A) * count);
sink(aptr); // (count, 0, Zero, 0)
unsigned int* ptr = &count;
sink(ptr); // (Zero, 1, Zero, 0) TODO none, as the feature is not implemented
int* a = (int *) malloc(sizeof(int) * count);
sink(a); // (count, 0, Zero, 0)
a = (int *) malloc(sizeof(int) * (count - 1));
sink(a); // (count, -1, Zero, 0)
a = (int *) malloc(sizeof(int) * (count + 1));
sink(a); // (count, 1, Zero, 0)
a = (int *) malloc(sizeof(int) * (2 * count));
sink(a); // none, as the size expression is too complicated
char* c = (char *)malloc(count);
sink(c); // /count, 0, Zero, 0)
sink((unsigned char*)c); // (count, 0, Zero, 0)
void* v = c;
sink(v); // (count, 0, Zero, 0)
v = malloc(count);
sink((char *)v); // none, as we don't track void* allocations
int stack[100];
sink(stack); // (Zero, 100, Zero, 0)
for(unsigned int i = 0; i < count; ++i) {
int* b = (int*) malloc(sizeof(int) * count);
sink(b); // (count, 0, Zero, 0)
}
}
void test2(unsigned int count, bool b) {
int* a = (int *) malloc(sizeof(int) * count);
a = a + 2;
sink(a); // (count, 0, Zero, 2)
for(unsigned int i = 2; i < count; ++i) {
sink(a); // none
a++;
sink(a); // none
}
a = (int*) malloc(sizeof(int) * count);
if (b) {
a += 2;
sink(a); // (count, 0, Zero, 2)
} else {
a += 3;
sink(a); // (count, 0, Zero, 2)
}
sink(a); // none
a -= 2;
sink(a); // none
a = (int*) malloc(sizeof(int) * count);
for(unsigned int i = 0; i < count; i++) {
sink(&a[i]); // (count, 0, i, 0)
}
a = (int*) malloc(sizeof(int) * count);
sink(a); // (count, 0, Zero, 0)
a += 3;
sink(a); // (count, 0, Zero, 3)
a -= 1;
sink(a); // (count, 0, Zero, 2)
a -= 2;
sink(a); // (count, 0, Zero, 0)
a -= 10;
sink(a); // (count, 0, Zero, -10)
a = (int*) malloc(sizeof(int) * (count + 1));
sink(a); // (count, 1, Zero, 0)
a += count;
sink(a); // (count, 1, count, 0);
a += 1;
sink(a); // (count, 1, count, 1);
a -= count;
sink(a); // none
a = (int*) malloc(sizeof(int) * (count + 1));
a += count + 1;
sink(a); // TODO, should be (count, 1, count, 1), but is (count, 1, count + 1, 0)
a += 1;
sink(a); // TODO, should be (count, 1, count, 2), but is (count, 1, count + 1, 1)
a = (int*) malloc(sizeof(int) * (1024 - count));
sink(a); // none, as the size expression is too complicated
}