This library proves that a subset of pointer dereferences in a program are safe, i.e. in-bounds.

It does so by first defining what a pointer dereference is (on the IR
`Instruction` level), and then using the array length analysis and the range
analysis together to prove that some of these pointer dereferences are safe.
This commit is contained in:
Cornelius Riemenschneider
2020-05-04 17:20:27 +02:00
parent b2f1008a00
commit 1c9fa4eb1d
5 changed files with 229 additions and 0 deletions

View File

@@ -0,0 +1,102 @@
/**
* This library proves that a subset of pointer dereferences in a program are
* safe, i.e. in-bounds.
* It does so by first defining what a pointer dereference is (on the IR
* `Instruction` level), and then using the array length analysis and the range
* analysis together to prove that some of these pointer dereferences are safe.
*
* The analysis is soundy, i.e. it is sound if no undefined behaviour is present
* in the program.
* Furthermore, it crucially depends on the soundiness of the range analysis and
* the array length analysis.
*/
import cpp
private import experimental.semmle.code.cpp.rangeanalysis.ArrayLengthAnalysis
private import semmle.code.cpp.rangeanalysis.RangeAnalysis
/**
* Gets the instruction that computes the address of memory that `i` accesses.
* Only holds if `i` dereferences a pointer, not when the computation of the
* memory address is constant, or if the address of a local variable is loaded/stored to.
*/
private Instruction getMemoryAddressInstruction(Instruction i) {
(
result = i.(FieldAddressInstruction).getObjectAddress() or
result = i.(LoadInstruction).getSourceAddress() or
result = i.(StoreInstruction).getDestinationAddress()
) and
not result instanceof FieldAddressInstruction and
not result instanceof VariableAddressInstruction and
not result instanceof ConstantValueInstruction
}
/**
* All instructions that dereference a pointer.
*/
class PointerDereferenceInstruction extends Instruction {
PointerDereferenceInstruction() { exists(getMemoryAddressInstruction(this)) }
Instruction getAddress() { result = getMemoryAddressInstruction(this) }
}
/**
* Holds if `ptrDeref` can be proven to always access allocated memory.
*/
predicate inBounds(PointerDereferenceInstruction ptrDeref) {
exists(Length length, int lengthDelta, Offset offset, int offsetDelta |
knownArrayLength(ptrDeref.getAddress(), length, lengthDelta, offset, offsetDelta) and
// lower bound - note that we treat a pointer that accesses an array of
// length 0 as on upper-bound violation, but not as a lower-bound violation
(
offset instanceof ZeroOffset and
offsetDelta >= 0
or
offset instanceof OpOffset and
exists(int lowerBoundDelta |
boundedOperand(offset.(OpOffset).getOperand(), any(ZeroBound b), lowerBoundDelta,
/*upper*/ false, _) and
lowerBoundDelta + offsetDelta >= 0
)
) and
// upper bound
(
// both offset and length are only integers
length instanceof ZeroLength and
offset instanceof ZeroOffset and
offsetDelta < lengthDelta
or
exists(int lengthBound |
// array length is variable+integer, and there's a fixed (integer-only)
// lower bound on the variable, so we can guarantee this access is always in-bounds
length instanceof VNLength and
offset instanceof ZeroOffset and
boundedInstruction(length.(VNLength).getInstruction(), any(ZeroBound b), lengthBound,
/* upper*/ false, _) and
offsetDelta < lengthBound + lengthDelta
)
or
exists(int offsetBoundDelta |
length instanceof ZeroLength and
offset instanceof OpOffset and
boundedOperand(offset.(OpOffset).getOperand(), any(ZeroBound b), offsetBoundDelta,
/* upper */ true, _) and
// offset <= offsetBoundDelta, so offset + offsetDelta <= offsetDelta + offsetBoundDelta
// Thus, in-bounds if offsetDelta + offsetBoundDelta < lengthDelta
// as we have length instanceof ZeroLength
offsetDelta + offsetBoundDelta < lengthDelta
)
or
exists(ValueNumberBound b, int offsetBoundDelta |
length instanceof VNLength and
offset instanceof OpOffset and
b.getValueNumber() = length.(VNLength).getValueNumber() and
boundedOperand(offset.(OpOffset).getOperand(), b, offsetBoundDelta, /*upper*/ true, _) and
// this ensures that offset <= length holds
offsetBoundDelta <= 0 and
// with that we get that offset + offsetDelta < length offsetBoundDelta + lengthDelta - offsetBoundDelta
offsetDelta < lengthDelta - offsetBoundDelta
)
)
)
}

View File

@@ -76,4 +76,6 @@ class ValueNumberBound extends Bound, TBoundValueNumber {
override string toString() { result = vn.getExampleInstruction().toString() }
override Location getLocation() { result = vn.getLocation() }
ValueNumber getValueNumber() { result = vn }
}

View File

@@ -0,0 +1,10 @@
| test.cpp:14:18:14:18 | FieldAddress: a |
| test.cpp:15:18:15:18 | FieldAddress: b |
| test.cpp:26:5:26:12 | Store: ... = ... |
| test.cpp:27:13:27:16 | Load: access to array |
| test.cpp:33:5:33:12 | Store: ... = ... |
| test.cpp:48:5:48:16 | Store: ... = ... |
| test.cpp:64:11:64:14 | Load: access to array |
| test.cpp:68:5:68:12 | Store: ... = ... |
| test.cpp:70:3:70:11 | Store: ... = ... |
| test.cpp:74:3:74:18 | Store: ... = ... |

View File

@@ -0,0 +1,6 @@
import cpp
import experimental.semmle.code.cpp.rangeanalysis.InBoundsPointerDeref
from PointerDereferenceInstruction ptrAccess
where inBounds(ptrAccess)
select ptrAccess

View File

@@ -0,0 +1,109 @@
void *malloc(unsigned long);
typedef struct A {
int a;
int b;
char * c;
} A;
void test1(unsigned int count) {
if (count < 1) {
return;
}
A* ptr = (A*) malloc(sizeof(A) * count);
ptr[count - 1].a = 1000; // in-bounds
ptr[count - 1].b = 1001; // in-bounds
ptr[1].c = 0; // unknown
ptr[count - 2].a = 1002; // dependant on call-site
ptr[count].b = 1003; // out-of-bounds
ptr[-1].c = 0; // out-of-bounds
}
void test2(unsigned int count) {
int* a = (int*) malloc(sizeof(int) * count);
for(unsigned int i = 0; i < count; ++i) {
a[i] = 0; // in-bounds
int l = a[i]; // in-bounds
}
a = (int*) malloc(sizeof(int) * count);
a = a + 2;
for(unsigned int i = 0; i < count - 2; ++i) {
a[i] = 0; // in-bounds
}
for(unsigned int i = 0; i < count; ++i) {
*a = 1; // in-bounds but not detected, array length tracking is not advanced enough for this
a++;
}
void* v = malloc(count);
for(unsigned int i = 0; i < count; ++i) {
((char *)v)[i] = 0; // in-bounds, but due to void-allocation not detected
}
int stack[100];
for(unsigned int i = 0; i < 100; ++i) {
stack[i] = 0; // in-bounds
}
for(unsigned int i = 0; i < count; ++i) {
a = (int*) malloc(sizeof(int) * count);
for (int j = 0; j < count; ++j) {
a[j] = 0; // in-bounds TODO not detected
}
}
}
void test3(unsigned long count) {
if (count < 1) {
return;
}
int* a = (int*) malloc(sizeof(int) * count);
int b = a[0] + 3; // in-bounds
a = a + 2;
unsigned int i = 0;
for(; i < count - 2; ++i) {
a[i] = 0; // in-bounds
}
a[-2] = 0; // in-bounds
a[-3] = 0; // out-of-bounds
a[i] = 0; // out-of-bounds
a[count - 2] = 0; // out-of-bounds
a[count - 3] = 0; // in-bounds
}
void test4(unsigned int count) {
int* a = (int*) malloc(sizeof(int) * count);
a[0] = 0; // unknown, call-site dependant
}
void test5(unsigned int count, bool b) {
if(count < 4) {
return;
}
int* a = (int*) malloc(sizeof(int) * count);
if (b) {
a += 2;
} else {
a += 3;
} // we lose all information about a after the phi-node here
a[-2] = 0; // unknown
a[-3] = 0; // unknown
a[-4] = 0; // unknown
a[0] = 0; // unknown
}
void test6(unsigned int object) {
unsigned int* ptr = &object;
*ptr = 0; // in-bounds, but needs ArrayLengthAnalysis improvements.
}
void test7() {
void (*foo)(unsigned int);
foo = &test6;
foo(4); // in-bounds, but needs ArrayLengthAnalysis improvements.
}