Experimental library that tracks the length of memory.

For each pointer, we start tracking (starting from the allocation or an array declaration)
1) how long is the chunk of memory allocated
2) where the current pointer is in this chunk of memory.
This information might not always exist, but when it does, it is reliable.
Currently only works intraprocedurally.
This commit is contained in:
Cornelius Riemenschneider
2020-04-28 13:41:53 +02:00
parent 55cd0fac5c
commit e6d193294a
4 changed files with 394 additions and 0 deletions

View File

@@ -0,0 +1,274 @@
/**
* Provides precise tracking of how big the memory pointed to by pointers is.
* For each pointer, we start tracking (starting from the allocation or an array declaration)
* 1) how long is the chunk of memory allocated
* 2) where the current pointer is in this chunk of memory
* As computing this information is obviously not possible for all pointers,
* we do not guarantee the existence of length/offset information for all pointers.
* However, when it exists it is guaranteed to be accurate.
*
* The length and offset are tracked in a similar way to the Rangeanalysis.
* Each length is a `ValueNumber + delta`, and each Offset is an `Operand + delta`.
* We choose to track a `ValueNumber` for length, because the Rangeanalysis offers
* integer bounds on instructions and operands in terms of `ValueNumber`s,
* and `Operand` for offset because integer bounds on `Operand`s are
* tighter than bounds on `Instruction`s.
*/
import cpp
import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.ValueNumbering
private import semmle.code.cpp.ir.internal.CppType
private import semmle.code.cpp.models.interfaces.Allocation
private import semmle.code.cpp.rangeanalysis.RangeUtils
private newtype TLength =
TZeroLength() or
TVNLength(ValueNumber vn) {
exists(Instruction i |
vn.getAnInstruction() = i and
(
i.getResultIRType() instanceof IRSignedIntegerType or
i.getResultIRType() instanceof IRUnsignedIntegerType
)
) and
not vn.getAnInstruction() instanceof ConstantInstruction
}
/**
* Array lengths are represented in a ValueNumber | Zero + delta format.
* This class keeps track of the ValueNumber or Zero.
* The delta is tracked in the predicate `knownArrayLength`.
*/
abstract class Length extends TLength {
abstract string toString();
}
/**
* This length class corresponds to an array having a constant length
* that is tracked by the delta value.
*/
class ZeroLength extends Length, TZeroLength {
override string toString() { result = "ZeroLength" }
}
/**
* This length class corresponds to an array having variable length, i.e. the
* length is tracked by a value number. One example is an array having length
* `count` for an integer variable `count` in the program.
*/
class VNLength extends Length, TVNLength {
ValueNumber vn;
VNLength() { this = TVNLength(vn) }
/** Gets the SSA variable that equals value number bound. */
Instruction getInstruction() { this = TVNLength(valueNumber(result)) }
ValueNumber getValueNumber() { result = vn }
override string toString() { result = "VNLength(" + vn.getExampleInstruction().toString() + ")" }
}
private newtype TOffset =
TZeroOffset() or
TOpOffset(Operand op) {
op.getAnyDef().getResultIRType() instanceof IRSignedIntegerType or
op.getAnyDef().getResultIRType() instanceof IRUnsignedIntegerType
}
/**
* This class describes the offset of a pointer in a chunk of memory.
* It is either an `Operand` or zero, an additional integer delta is added later.
*/
abstract class Offset extends TOffset {
abstract string toString();
}
/**
* This class represents a fixed offset, only specified by a delta.
*/
class ZeroOffset extends Offset, TZeroOffset {
override string toString() { result = "ZeroOffset" }
}
/**
* This class represents an offset of an operand.
*/
class OpOffset extends Offset, TOpOffset {
Operand op;
OpOffset() { this = TOpOffset(op) }
Operand getOperand() { result = op }
override string toString() { result = "OpOffset(" + op.getDef().toString() + ")" }
}
private int getBaseSizeForPointerType(PointerType type) { result = type.getBaseType().getSize() }
/**
* Holds if pointer `prev` that points at offset `prevOffset + prevOffsetDelta`
* steps to `array` that points to `offset + offsetDelta` in one step.
* This predicate does not contain any recursive steps.
*/
bindingset[prevOffsetDelta]
predicate simpleArrayLengthStep(
Instruction array, Offset offset, int offsetDelta, Instruction prev, Offset prevOffset,
int prevOffsetDelta
) {
// array assign
array.(CopyInstruction).getSourceValue() = prev and
offset = prevOffset and
offsetDelta = prevOffsetDelta
or
// pointer add with constant
array.(PointerAddInstruction).getLeft() = prev and
offset = prevOffset and
offsetDelta = prevOffsetDelta + getConstantValue(array.(PointerAddInstruction).getRight())
or
// pointer add with variable
array.(PointerAddInstruction).getLeft() = prev and
prevOffset instanceof ZeroOffset and
offset.(OpOffset).getOperand() = array.(PointerAddInstruction).getRightOperand() and
offsetDelta = prevOffsetDelta and
not exists(getConstantValue(array.(PointerAddInstruction).getRight()))
or
// pointer sub with constant
array.(PointerSubInstruction).getLeft() = prev and
offset = prevOffset and
offsetDelta = prevOffsetDelta - getConstantValue(array.(PointerSubInstruction).getRight())
or
// array to pointer decay
array.(ConvertInstruction).getUnary() = prev and
array.getConvertedResultExpression() instanceof ArrayToPointerConversion and
offset = prevOffset and
offsetDelta = prevOffsetDelta
or
// cast of pointer to pointer with the same element size
exists(PointerType fromTyp, PointerType toTyp |
array.(PtrToPtrCastInstruction).getUnary() = prev and
prev.getResultLanguageType().hasType(fromTyp, false) and
array.getResultLanguageType().hasType(toTyp, false) and
offset = prevOffset and
offsetDelta = prevOffsetDelta and
if fromTyp instanceof VoidPointerType
then getBaseSizeForPointerType(toTyp) = 1
else (
if toTyp instanceof VoidPointerType
then getBaseSizeForPointerType(fromTyp) = 1
else getBaseSizeForPointerType(toTyp) = getBaseSizeForPointerType(fromTyp)
)
)
}
/**
* Parses a `sizeExpr` of malloc into a variable part (`lengthExpr`) and an integer offset (`delta`).
*/
private predicate deconstructMallocSizeExpr(Expr sizeExpr, Expr lengthExpr, int delta) {
sizeExpr instanceof AddExpr and
exists(Expr constantExpr |
lengthExpr = sizeExpr.(AddExpr).getAnOperand() and
constantExpr = sizeExpr.(AddExpr).getAnOperand() and
lengthExpr != constantExpr and
delta = constantExpr.getValue().toInt()
)
or
sizeExpr instanceof SubExpr and
exists(Expr constantExpr |
lengthExpr = sizeExpr.(SubExpr).getAnOperand() and
constantExpr = sizeExpr.(SubExpr).getAnOperand() and
lengthExpr != constantExpr and
delta = -constantExpr.getValue().toInt()
)
}
/**
* Holds if the instruction `array` is a dynamic memory allocation of `length`+`delta` elements.
*/
private predicate allocation(Instruction array, Length length, int delta) {
exists(AllocationExpr alloc, Type ptrTyp |
array.getUnconvertedResultExpression() = alloc and
array.getResultLanguageType().hasType(ptrTyp, false) and
// ensure that we have the same type of the allocation and the pointer
ptrTyp.stripTopLevelSpecifiers().(PointerType).getBaseType().getUnspecifiedType() =
alloc.getAllocatedElementType().getUnspecifiedType() and
// ensure that the size multiplier of the allocation is the same as the
// size of the type we are allocating
alloc.getSizeMult() = getBaseSizeForPointerType(ptrTyp) and
(
length instanceof ZeroLength and
delta = alloc.getSizeExpr().getValue().toInt()
or
not exists(alloc.getSizeExpr().getValue().toInt()) and
(
exists(Expr lengthExpr |
deconstructMallocSizeExpr(alloc.getSizeExpr(), lengthExpr, delta) and
// TODO converted or unconverted here?
length.(VNLength).getInstruction().getConvertedResultExpression() = lengthExpr
)
or
not exists(int d | deconstructMallocSizeExpr(alloc.getSizeExpr(), _, d)) and
// TODO converted or unconverted here?
length.(VNLength).getInstruction().getConvertedResultExpression() = alloc.getSizeExpr() and
delta = 0
)
)
)
}
/**
* Holds if `array` is declared as an array with length `length + lengthDelta`
*/
private predicate arrayDeclaration(Instruction array, Length length, int lengthDelta) {
(
array instanceof VariableAddressInstruction or
array instanceof FieldAddressInstruction
) and
exists(ArrayType type | array.getResultLanguageType().hasType(type, _) |
length instanceof ZeroLength and
lengthDelta = type.getArraySize()
)
}
/**
* Holds if `array` is declared as an array or allocated
* with length `length + lengthDelta`
*/
predicate arrayAllocationOrDeclaration(Instruction array, Length length, int lengthDelta) {
allocation(array, length, lengthDelta)
or
// declaration of variable of array type
arrayDeclaration(array, length, lengthDelta)
}
/**
* Holds if the instruction `array` represents a pointer to a chunk of memory that holds
* `length + lengthDelta` elements, using only local analysis.
* `array` points at `offset + offsetDelta` in the chunk of memory.
* The pointer is in-bounds if `offset < length + lengthDelta` and
* `offset + offsetDelta >= 0` holds.
* The pointer is out-of-bounds if `offset >= length + lengthDelta`
* or `offset + offsetDelta < 0` holds.
* All pointers in this predicate are guaranteed to be non-null,
* but are not guaranteed to be live.
*/
cached
predicate knownArrayLength(
Instruction array, Length length, int lengthDelta, Offset offset, int offsetDelta
) {
arrayAllocationOrDeclaration(array, length, lengthDelta) and
offset instanceof ZeroOffset and
offsetDelta = 0
or
// simple step (no phi nodes)
exists(Instruction prev, Offset prevOffset, int prevOffsetDelta |
knownArrayLength(prev, length, lengthDelta, prevOffset, prevOffsetDelta) and
simpleArrayLengthStep(array, offset, offsetDelta, prev, prevOffset, prevOffsetDelta)
)
or
// merge control flow after phi node - but only if all the bounds agree
forex(Instruction input | array.(PhiInstruction).getAnInput() = input |
knownArrayLength(input, length, lengthDelta, offset, offsetDelta)
)
}

View File

@@ -0,0 +1,24 @@
| test.cpp:15:8:15:11 | Load: aptr | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 0 |
| test.cpp:19:8:19:8 | Load: a | VNLength(Chi: ptr) | 0 | ZeroOffset | 0 |
| test.cpp:21:8:21:8 | Load: a | VNLength(Chi: ptr) | -1 | ZeroOffset | 0 |
| test.cpp:23:8:23:8 | Load: a | VNLength(Chi: ptr) | 1 | ZeroOffset | 0 |
| test.cpp:25:8:25:8 | Load: a | VNLength(Mul: ... * ...) | 0 | ZeroOffset | 0 |
| test.cpp:27:8:27:8 | Load: c | VNLength(Chi: ptr) | 0 | ZeroOffset | 0 |
| test.cpp:28:8:28:24 | Convert: (unsigned char *)... | VNLength(Chi: ptr) | 0 | ZeroOffset | 0 |
| test.cpp:30:8:30:8 | Load: v | VNLength(Chi: ptr) | 0 | ZeroOffset | 0 |
| test.cpp:34:8:34:12 | Convert: array to pointer conversion | ZeroLength | 100 | ZeroOffset | 0 |
| test.cpp:37:10:37:10 | Load: b | VNLength(Chi: ptr) | 0 | ZeroOffset | 0 |
| test.cpp:44:8:44:8 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 2 |
| test.cpp:53:10:53:10 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 2 |
| test.cpp:56:10:56:10 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 3 |
| test.cpp:63:10:63:14 | CopyValue: & ... | VNLength(InitializeParameter: count) | 0 | OpOffset(Load: i) | 0 |
| test.cpp:66:8:66:8 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 0 |
| test.cpp:68:8:68:8 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 3 |
| test.cpp:70:8:70:8 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 2 |
| test.cpp:72:8:72:8 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | 0 |
| test.cpp:74:8:74:8 | Load: a | VNLength(InitializeParameter: count) | 0 | ZeroOffset | -10 |
| test.cpp:76:8:76:8 | Load: a | VNLength(InitializeParameter: count) | 1 | ZeroOffset | 0 |
| test.cpp:78:8:78:8 | Load: a | VNLength(InitializeParameter: count) | 1 | OpOffset(Load: count) | 0 |
| test.cpp:80:8:80:8 | Load: a | VNLength(InitializeParameter: count) | 1 | OpOffset(Load: count) | 1 |
| test.cpp:85:8:85:8 | Load: a | VNLength(InitializeParameter: count) | 1 | OpOffset(Add: ... + ...) | 0 |
| test.cpp:87:8:87:8 | Load: a | VNLength(InitializeParameter: count) | 1 | OpOffset(Add: ... + ...) | 1 |

View File

@@ -0,0 +1,8 @@
import cpp
import experimental.library.ArrayLengthAnalysis
from Instruction array, Length length, int delta, Offset offset, int offsetDelta
where
knownArrayLength(array, length, delta, offset, offsetDelta) and
array.getAUse() instanceof ArgumentOperand
select array, length, delta, offset, offsetDelta

View File

@@ -0,0 +1,88 @@
void *malloc(unsigned long);
void sink(...);
typedef struct A {
int a;
int b;
char * c;
} A;
void test1(unsigned int count) {
if (count < 1) {
return;
}
A* aptr = (A *) malloc(sizeof(A) * count);
sink(aptr); // (count, 0, Zero, 0)
unsigned int* ptr = &count;
sink(ptr); // (Zero, 1, Zero, 0) TODO none, as the feature is not implemented
int* a = (int *) malloc(sizeof(int) * count);
sink(a); // (count, 0, Zero, 0)
a = (int *) malloc(sizeof(int) * (count - 1));
sink(a); // (count, -1, Zero, 0)
a = (int *) malloc(sizeof(int) * (count + 1));
sink(a); // (count, 1, Zero, 0)
a = (int *) malloc(sizeof(int) * (2 * count));
sink(a); // (2*count, 0, Zero, 0)
char* c = (char *)malloc(count);
sink(c); // /count, 0, Zero, 0)
sink((unsigned char*)c); // (count, 0, Zero, 0)
void* v = c;
sink(v); // (count, 0, Zero, 0)
v = malloc(count);
sink((char *)v); // none, as we don't track void* allocations
int stack[100];
sink(stack); // (Zero, 100, Zero, 0)
for(unsigned int i = 0; i < count; ++i) {
int* b = (int*) malloc(sizeof(int) * count);
sink(b); // (count, 0, Zero, 0)
}
}
void test2(unsigned int count, bool b) {
int* a = (int *) malloc(sizeof(int) * count);
a = a + 2;
sink(a); // (count, 0, Zero, 2)
for(unsigned int i = 2; i < count; ++i) {
sink(a); // none
a++;
sink(a); // none
}
a = (int*) malloc(sizeof(int) * count);
if (b) {
a += 2;
sink(a); // (count, 0, Zero, 2)
} else {
a += 3;
sink(a); // (count, 0, Zero, 2)
}
sink(a); // none
a -= 2;
sink(a); // none
a = (int*) malloc(sizeof(int) * count);
for(unsigned int i = 0; i < count; i++) {
sink(&a[i]); // (count, 0, i, 0)
}
a = (int*) malloc(sizeof(int) * count);
sink(a); // (count, 0, Zero, 0)
a += 3;
sink(a); // (count, 0, Zero, 3)
a -= 1;
sink(a); // (count, 0, Zero, 2)
a -= 2;
sink(a); // (count, 0, Zero, 0)
a -= 10;
sink(a); // (count, 0, Zero, -10)
a = (int*) malloc(sizeof(int) * (count + 1));
sink(a); // (count, 1, Zero, 0)
a += count;
sink(a); // (count, 1, count, 0);
a += 1;
sink(a); // (count, 1, count, 1);
a -= count;
sink(a); // none
a = (int*) malloc(sizeof(int) * (count + 1));
a += count + 1;
sink(a); // TODO, should be (count, 1, count, 1), but is (count, 1, count + 1, 0)
a += 1;
sink(a); // TODO, should be (count, 1, count, 2), but is (count, 1, count + 1, 1)
}