Merge pull request #2312 from jbj/pointer-wraparound-query

C++: New query: Pointer overflow check
This commit is contained in:
Geoffrey White
2019-11-14 16:13:04 +00:00
committed by GitHub
15 changed files with 219 additions and 10 deletions

View File

@@ -13,6 +13,7 @@
import cpp
import PointlessSelfComparison
import semmle.code.cpp.commons.Exclusions
from ComparisonOperation cmp
where
@@ -20,11 +21,5 @@ where
not nanTest(cmp) and
not overflowTest(cmp) and
not cmp.isFromTemplateInstantiation(_) and
not exists(MacroInvocation mi |
// cmp is in mi
mi.getAnExpandedElement() = cmp and
// and cmp was apparently not passed in as a macro parameter
cmp.getLocation().getStartLine() = mi.getLocation().getStartLine() and
cmp.getLocation().getStartColumn() = mi.getLocation().getStartColumn()
)
not isFromMacroDefinition(cmp)
select cmp, "Self comparison."

View File

@@ -1,5 +1,5 @@
/**
* @name Undefined result of signed test for overflow
* @name Signed overflow check
* @description Testing for overflow by adding a value to a variable
* to see if it "wraps around" works only for
* unsigned integer values.
@@ -7,7 +7,7 @@
* @problem.severity warning
* @precision high
* @id cpp/signed-overflow-check
* @tags reliability
* @tags correctness
* security
*/

View File

@@ -0,0 +1,3 @@
bool not_in_range(T *ptr, T *ptr_end, size_t a) {
return ptr + a >= ptr_end || ptr + a < ptr; // BAD
}

View File

@@ -0,0 +1,3 @@
bool not_in_range(T *ptr, T *ptr_end, size_t a) {
return a >= ptr_end - ptr; // GOOD
}

View File

@@ -0,0 +1,69 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
The expression <code>ptr + a &lt; ptr</code> is equivalent to <code>a &lt;
0</code>, and an optimizing compiler is likely to make that replacement,
thereby removing a range check that might have been necessary for security.
If <code>a</code> is known to be non-negative, the compiler can even replace <code>ptr +
a &lt; ptr</code> with <code>false</code>.
</p>
<p>
The reason is that pointer arithmetic overflow in C/C++ is undefined
behavior. The optimizing compiler can assume that the program has no
undefined behavior, which means that adding a positive number to <code>ptr</code> cannot
produce a pointer less than <code>ptr</code>.
</p>
</overview>
<recommendation>
<p>
To check whether an index <code>a</code> is less than the length of an array,
simply compare these two numbers as unsigned integers: <code>a &lt; ARRAY_LENGTH</code>.
If the length of the array is defined as the difference between two pointers
<code>ptr</code> and <code>p_end</code>, write <code>a &lt; p_end - ptr</code>.
If a is <code>signed</code>, cast it to <code>unsigned</code>
in order to guard against negative <code>a</code>. For example, write
<code>(size_t)a &lt; p_end - ptr</code>.
</p>
</recommendation>
<example>
<p>
An invalid check for pointer overflow is most often seen as part of checking
whether a number <code>a</code> is too large by checking first if adding the
number to <code>ptr</code> goes past the end of an allocation and then
checking if adding it to <code>ptr</code> creates a pointer so large that it
overflows and wraps around.
</p>
<sample src="PointerOverflow-bad.cpp" />
<p>
In both of these checks, the operations are performed in the wrong order.
First, an expression that may cause undefined behavior is evaluated
(<code>ptr + a</code>), and then the result is checked for being in range.
But once undefined behavior has happened in the pointer addition, it cannot
be recovered from: it's too late to perform the range check after a possible
pointer overflow.
</p>
<p>
While it's not the subject of this query, the expression <code>ptr + a &lt;
ptr_end</code> is also an invalid range check. It's undefined behavor in
C/C++ to create a pointer that points more than one past the end of an
allocation.
</p>
<p>
The next example shows how to portably check whether an unsigned number is outside the
range of an allocation between <code>ptr</code> and <code>ptr_end</code>.
</p>
<sample src="PointerOverflow-good.cpp" />
</example>
<references>
<li>Embedded in Academia: <a href="https://blog.regehr.org/archives/1395">Pointer Overflow Checking</a>.</li>
<li>LWN: <a href="https://lwn.net/Articles/278137/">GCC and pointer overflows</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,31 @@
/**
* @name Pointer overflow check
* @description Adding a value to a pointer to check if it overflows relies
* on undefined behavior and may lead to memory corruption.
* @kind problem
* @problem.severity error
* @precision high
* @id cpp/pointer-overflow-check
* @tags reliability
* security
*/
import cpp
private import semmle.code.cpp.valuenumbering.GlobalValueNumbering
private import semmle.code.cpp.commons.Exclusions
from RelationalOperation ro, PointerAddExpr add, Expr expr1, Expr expr2
where
ro.getAnOperand() = add and
add.getAnOperand() = expr1 and
ro.getAnOperand() = expr2 and
globalValueNumber(expr1) = globalValueNumber(expr2) and
// Exclude macros but not their arguments
not isFromMacroDefinition(ro) and
// There must be a compilation of this file without a flag that makes pointer
// overflow well defined.
exists(Compilation c | c.getAFileCompiled() = ro.getFile() |
not c.getAnArgument() = "-fwrapv-pointer" and
not c.getAnArgument() = "-fno-strict-overflow"
)
select ro, "Range check relying on pointer overflow."

View File

@@ -79,3 +79,26 @@ predicate functionContainsPreprocCode(Function f) {
pbdStartLine >= fBlockStartLine
)
}
/**
* Holds if `e` is completely or partially from a macro definition, as opposed
* to being passed in as an argument.
*
* In the following example, the call to `f` is from a macro definition,
* while `y`, `+`, `1`, and `;` are not. This assumes that no identifier apart
* from `M` refers to a macro.
* ```
* #define M(x) f(x)
* ...
* M(y + 1);
* ```
*/
predicate isFromMacroDefinition(Element e) {
exists(MacroInvocation mi |
// e is in mi
mi.getAnExpandedElement() = e and
// and e was apparently not passed in as a macro parameter
e.getLocation().getStartLine() = mi.getLocation().getStartLine() and
e.getLocation().getStartColumn() = mi.getLocation().getStartColumn()
)
}

View File

@@ -0,0 +1,9 @@
// This is the example from the QLDoc of `isFromMacroDefinition`.
void f(int);
#define M(x) f(x)
void useM(int y) {
M(y + 1);
}

View File

@@ -0,0 +1 @@
| exclusions.cpp:8:3:8:10 | call to f |

View File

@@ -0,0 +1,5 @@
import semmle.code.cpp.commons.Exclusions
from Element e
where isFromMacroDefinition(e)
select e

View File

@@ -0,0 +1,4 @@
| test.cpp:6:12:6:33 | ... < ... | Range check relying on pointer overflow. |
| test.cpp:33:9:33:21 | ... < ... | Range check relying on pointer overflow. |
| test.cpp:49:36:49:48 | ... < ... | Range check relying on pointer overflow. |
| test.cpp:51:14:51:26 | ... < ... | Range check relying on pointer overflow. |

View File

@@ -0,0 +1 @@
Likely Bugs/Memory Management/PointerOverflow.ql

View File

@@ -0,0 +1,6 @@
// semmle-extractor-options: -fno-strict-overflow
int not_in_range_nostrict(int *ptr, int *ptr_end, unsigned int a) {
return ptr + a < ptr_end || // GOOD (for the purpose of this test)
ptr + a < ptr; // GOOD (due to compiler options)
}

View File

@@ -0,0 +1,58 @@
struct P { int a, b; };
bool check_pointer_overflow(P *ptr) {
// x86-64 gcc 9.2 -O2: deleted
// x86-64 clang 9.9.9 -O2: deleted
// x64 msvc v19.22 /O2: not deleted
return ptr + 0x12345678 < ptr; // BAD
}
bool check_pointer_overflow(P *ptr, P *ptr_end) {
// x86-64 gcc 9.2 -O2: not deleted
// x86-64 clang 9.0.0 -O2: not deleted
// x64 msvc v19.22 /O2: not deleted
return ptr_end - ptr > 4; // GOOD
}
struct Q {
#define Q_SIZE 32
char arr[Q_SIZE];
char *begin() { return &arr[0]; }
char *end() { return &arr[Q_SIZE]; }
};
void foo(int untrusted_int) {
Q q;
if (q.begin() + untrusted_int > q.end() || // GOOD (for the purpose of this test)
q.begin() + untrusted_int < q.begin()) // BAD [NOT DETECTED]
throw q;
}
typedef unsigned long size_t;
bool not_in_range_bad(Q *ptr, Q *ptr_end, size_t a) {
return ptr + a >= ptr_end || // GOOD (for the purpose of this test)
ptr + a < ptr; // BAD
}
bool not_in_range_good(Q *ptr, Q *ptr_end, size_t a) {
return a >= ptr_end - ptr; // GOOD
}
bool in_range(Q *ptr, Q *ptr_end, size_t a) {
return a < ptr_end - ptr; // GOOD
}
extern "C" void abort(void);
#define MYASSERT(cond) if (cond) abort()
void assert_not_in_range_bad(Q *ptr, Q *ptr_end, size_t a) {
MYASSERT(ptr + a >= ptr_end || ptr + a < ptr); // BAD
MYASSERT(ptr + a >= ptr_end); // GOOD (for the purpose of this test)
MYASSERT(ptr + a < ptr); // BAD
}
#define IS_LESS_THAN(lhs, rhs) ((lhs) < (rhs))
bool not_in_range_macro(Q *ptr, Q *ptr_end, size_t a) {
return ptr + a >= ptr_end || IS_LESS_THAN(ptr + a, ptr); // GOOD (meant to be excluded)
}