C++: complete initial implementation of cpp/missing-check-scanf

There are still some remaining FPs (haven't fully tested them)
that should be ironed out in a follow-up to increase the precision, e.g.:

  * if scanf(&i) != 1 return
    if maybe() && scanf(&i) != 1 return
    use(i) // should be OK on both counts

  * The minimum guard constant for the *_s variants may not be right.

  * int i[2]
    scanf(i, i+1) // second i is flagged as a use of the first

  * Maybe loosen the "unguarded or badly guarded use() = bad" policy to
    "unguarded but already-initialized = good" and "badly guarded = bad",
    since a lot of FPs in MRVA fall into the "unguarded but already-
    initialized" bucket.
This commit is contained in:
Nora Dimitrijević
2022-08-11 16:48:11 +02:00
parent 69911d4f36
commit ca162a4365
3 changed files with 355 additions and 60 deletions

View File

@@ -1,19 +1,122 @@
/**
* @name TODO
* @name Missing return-value check for a scanf-like function
* @description TODO
* @kind problem
* @problem.severity warning
* @security-severity TODO
* @precision TODO
* @id cpp/missing-check-scanf
* @tags TODO
* @tags security
*/
import cpp
import semmle.code.cpp.commons.Scanf
import semmle.code.cpp.controlflow.Guards
import semmle.code.cpp.dataflow.DataFlow
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.ValueNumbering
from ScanfFunction scanf, FunctionCall fc
/** An expression appearing as an output argument to a `scanf`-like call */
class ScanfOutput extends Expr {
ScanfFunctionCall call;
int varargIndex;
Instruction instr;
ValueNumber valNum;
ScanfOutput() {
this = call.getArgument(call.getTarget().getNumberOfParameters() + varargIndex) and
varargIndex >= 0 and
instr.getUnconvertedResultExpression() = this and
valueNumber(instr) = valNum and
// The following line is a kludge to prohibit more than one associated `instr` field,
// as would occur, for example, when `this` is an access to an array variable.
not instr instanceof ConvertInstruction
}
ScanfFunctionCall getCall() { result = call }
/**
* Any subsequent use of this argument should be surrounded by a
* check ensuring that the `scanf`-like function has returned a value
* equal to at least `getMinimumGuardConstant()`.
*/
int getMinimumGuardConstant() {
result =
varargIndex + 1 -
count(ScanfFormatLiteral f, int n |
n <= varargIndex and f.getUse() = call and f.parseConvSpec(n, _, _, _, "n")
)
}
predicate hasGuardedAccess(Access e, boolean isGuarded) {
e = this.getAnAccess() and
if
exists(int value, int minGuard | minGuard = this.getMinimumGuardConstant() |
e.getBasicBlock() = blockGuardedBy(value, "==", call) and minGuard <= value
or
e.getBasicBlock() = blockGuardedBy(value, "<", call) and minGuard - 1 <= value
or
e.getBasicBlock() = blockGuardedBy(value, "<=", call) and minGuard <= value
)
then isGuarded = true
else isGuarded = false
}
/**
* Get a subsequent access of the same underlying storage,
* but before it gets reset or reused in another `scanf` call.
*/
Access getAnAccess() {
exists(Instruction dst |
this.bigStep(instr) = dst and
dst.getAst() = result and
valueNumber(dst) = valNum
)
}
private Instruction bigStep(Instruction i) {
result = this.smallStep(i)
or
exists(Instruction j | j = this.bigStep(i) | result = this.smallStep(j))
}
private Instruction smallStep(Instruction i) {
instr.getASuccessor*() = i and
i.getASuccessor() = result and
not this.isBarrier(result)
}
private predicate isBarrier(Instruction i) {
valueNumber(i) = valNum and
exists(Expr e | i.getAst() = e |
i = any(StoreInstruction s).getDestinationAddress()
or
[e, e.getParent().(AddressOfExpr)] instanceof ScanfOutput
)
}
}
/** Returns a block guarded by the assertion of $value $op $call */
BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
exists(GuardCondition g, Expr left, Expr right |
right = g.getAChild() and
value = left.getValue().toInt() and
DataFlow::localExprFlow(call, right)
|
g.ensuresEq(left, right, 0, result, true) and op = "=="
or
g.ensuresLt(left, right, 0, result, true) and op = "<"
or
g.ensuresLt(left, right, 1, result, true) and op = "<="
)
}
from ScanfOutput output, ScanfFunctionCall call, ScanfFunction fun, Access access
where
fc.getTarget() = scanf and
fc instanceof ExprInVoidContext
select fc, "This is a call to scanf."
call.getTarget() = fun and
output.getCall() = call and
output.hasGuardedAccess(access, false)
select access,
"$@ is read here, but may not have been written. " +
"It should be guarded by a check that $@() returns at least " + output.getMinimumGuardConstant()
+ ".", access, access.toString(), call, fun.getName()

View File

@@ -1,9 +1,19 @@
| test.cpp:23:3:23:7 | call to scanf | This is a call to scanf. |
| test.cpp:39:3:39:7 | call to scanf | This is a call to scanf. |
| test.cpp:48:3:48:8 | call to fscanf | This is a call to scanf. |
| test.cpp:55:3:55:8 | call to sscanf | This is a call to scanf. |
| test.cpp:135:3:135:7 | call to scanf | This is a call to scanf. |
| test.cpp:143:3:143:7 | call to scanf | This is a call to scanf. |
| test.cpp:151:3:151:7 | call to scanf | This is a call to scanf. |
| test.cpp:163:3:163:7 | call to scanf | This is a call to scanf. |
| test.cpp:173:3:173:7 | call to scanf | This is a call to scanf. |
| test.cpp:30:7:30:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:30:7:30:7 | i | i | test.cpp:29:3:29:7 | call to scanf | scanf |
| test.cpp:46:7:46:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:46:7:46:7 | i | i | test.cpp:45:3:45:7 | call to scanf | scanf |
| test.cpp:63:7:63:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:63:7:63:7 | i | i | test.cpp:62:3:62:7 | call to scanf | scanf |
| test.cpp:75:7:75:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:75:7:75:7 | i | i | test.cpp:74:3:74:7 | call to scanf | scanf |
| test.cpp:87:7:87:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:87:7:87:7 | i | i | test.cpp:86:3:86:8 | call to fscanf | fscanf |
| test.cpp:94:7:94:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:94:7:94:7 | i | i | test.cpp:93:3:93:8 | call to sscanf | sscanf |
| test.cpp:143:8:143:8 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:143:8:143:8 | i | i | test.cpp:141:7:141:11 | call to scanf | scanf |
| test.cpp:152:8:152:8 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:152:8:152:8 | i | i | test.cpp:150:7:150:11 | call to scanf | scanf |
| test.cpp:184:8:184:8 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:184:8:184:8 | i | i | test.cpp:183:7:183:11 | call to scanf | scanf |
| test.cpp:203:8:203:8 | j | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 2. | test.cpp:203:8:203:8 | j | j | test.cpp:200:7:200:11 | call to scanf | scanf |
| test.cpp:227:9:227:9 | d | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 2. | test.cpp:227:9:227:9 | d | d | test.cpp:225:25:225:29 | call to scanf | scanf |
| test.cpp:231:9:231:9 | d | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 2. | test.cpp:231:9:231:9 | d | d | test.cpp:229:14:229:18 | call to scanf | scanf |
| test.cpp:243:7:243:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:243:7:243:7 | i | i | test.cpp:242:3:242:7 | call to scanf | scanf |
| test.cpp:251:7:251:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:251:7:251:7 | i | i | test.cpp:250:3:250:7 | call to scanf | scanf |
| test.cpp:259:7:259:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:259:7:259:7 | i | i | test.cpp:258:3:258:7 | call to scanf | scanf |
| test.cpp:271:7:271:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:271:7:271:7 | i | i | test.cpp:270:3:270:7 | call to scanf | scanf |
| test.cpp:281:8:281:12 | ptr_i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:281:8:281:12 | ptr_i | ptr_i | test.cpp:280:3:280:7 | call to scanf | scanf |
| test.cpp:289:7:289:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:289:7:289:7 | i | i | test.cpp:288:3:288:7 | call to scanf | scanf |
| test.cpp:383:25:383:25 | u | $@ is read here, but may not have been written. It should be guarded by a check that $@() returns at least 1. | test.cpp:383:25:383:25 | u | u | test.cpp:382:6:382:11 | call to sscanf | sscanf |

View File

@@ -1,8 +1,13 @@
typedef struct {} FILE;
typedef struct
{
} FILE;
typedef void *locale_t;
int scanf(const char *format, ...);
int fscanf(FILE *stream, const char *format, ...);
int sscanf(const char *s, const char *format, ...);
int _scanf_l(const char *format, locale_t locale, ...);
void use(int i);
@@ -12,6 +17,7 @@ bool maybe();
FILE *get_a_stream();
const char *get_a_string();
extern locale_t get_a_locale();
int main()
{
@@ -20,24 +26,56 @@ int main()
{
int i;
scanf("%d", &i); // BAD: may not have written `i`
use(i);
scanf("%d", &i);
use(i); // BAD: may not have written `i`
}
{
int i;
if (scanf("%d", &i) == 1) // GOOD: checks return value
if (scanf("%d", &i) == 1)
{
use(i);
use(i); // GOOD: checks return value
}
}
{
int i = 0;
scanf("%d", &i); // BAD
use(i);
scanf("%d", &i);
use(i); // BAD. Design choice: already initialized variables shouldn't make a difference.
}
{
int i;
use(i); // GOOD: only care about uses after scanf call
if (scanf("%d", &i) == 1)
{
use(i); // GOOD
}
}
{
int i; // Reused variable
scanf("%d", &i);
use(i); // BAD
if (scanf("%d", &i) == 1)
{
use(i); // GOOD
}
}
{
int i; // Reset variable
scanf("%d", &i);
use(i); // BAD
i = 1;
use(i); // GOOD
}
// --- different scanf functions ---
@@ -45,15 +83,24 @@ int main()
{
int i;
fscanf(get_a_stream(), "%d", &i); // BAD: may not have written `i`
use(i);
fscanf(get_a_stream(), "%d", &i);
use(i); // BAD: may not have written `i`
}
{
int i;
sscanf(get_a_string(), "%d", &i); // BAD: may not have written `i`
use(i);
sscanf(get_a_string(), "%d", &i);
use(i); // BAD: may not have written `i`
}
{
int i;
if (_scanf_l("%d", get_a_locale(), &i) == 1)
{
use(i); // GOOD
}
}
// --- different ways of checking ---
@@ -61,36 +108,60 @@ int main()
{
int i;
if (scanf("%d", &i) >= 1) // GOOD
if (scanf("%d", &i) >= 1)
{
use(i);
use(i); // GOOD
}
}
{
int i;
if (scanf("%d", &i) == 1) // GOOD
if (scanf("%d", &i) == 1)
{
use(i);
use(i); // GOOD
}
}
{
int i;
if (scanf("%d", &i) != 0) // BAD: scanf can return -1 [NOT DETECTED]
if (0 < scanf("%d", &i))
{
use(i);
if (true)
{
use(i); // GOOD
}
}
}
{
int i;
if (scanf("%d", &i) == 0) // BAD: checks return value incorrectly [NOT DETECTED]
if (scanf("%d", &i) != 0)
{
use(i);
use(i); // BAD: scanf can return EOF
}
}
{
int i;
if (scanf("%d", &i) == 0)
{
use(i); // BAD: checks return value incorrectly
}
}
{
int r;
int i;
r = scanf("%d", &i);
if (r >= 1)
{
use(i); // GOOD
}
}
@@ -98,31 +169,67 @@ int main()
bool b;
int i;
b = scanf("%d", &i); // GOOD
b = scanf("%d", &i);
if (b >= 1)
{
use(i);
use(i); // BAD [NOT DETECTED]: scanf can return EOF (boolifies true)
}
}
{
int i;
if (scanf("%d", &i))
use(i); // BAD
}
{
int i, j;
if (scanf("%d %d", &i) >= 2)
{
use(i); // GOOD
use(j); // GOOD: `j` is not a scanf arg, so out of scope of MissingCheckScanf
}
}
{
int i, j;
if (scanf("%d %d", &i) >= 2) // GOOD
if (scanf("%d %d", &i, &j) >= 1)
{
use(i);
use(j);
use(i); // GOOD
use(j); // BAD: checks return value incorrectly
}
}
{
int i, j;
if (scanf("%d %d", &i, &j) >= 1) // BAD: checks return value incorrectly [NOT DETECTED]
if (scanf("%d %d", &i, &j) >= 2)
{
use(i);
use(j);
use(i); // GOOD
use(j); // GOOD
}
}
{
char c[5];
int d;
while(maybe()) {
if (maybe()) {
break;
}
else if (maybe() && (scanf("%5c %d", c, &d) == 1)) { // GOOD [FALSE POSITIVE]
use(*(int *)c); // GOOD
use(d); // BAD
}
else if ((scanf("%5c %d", c, &d) == 1) && maybe()) { // GOOD [FALSE POSITIVE]
use(*(int *)c); // GOOD
use(d); // BAD
}
}
}
@@ -132,24 +239,24 @@ int main()
int i;
i = 0;
scanf("%d", &i); // BAD
use(i);
scanf("%d", &i);
use(i); // BAD
}
{
int i;
set_by_ref(i);
scanf("%d", &i); // BAD
use(i);
scanf("%d", &i);
use(i); // BAD
}
{
int i;
set_by_ptr(&i);
scanf("%d", &i); // BAD
use(i);
scanf("%d", &i);
use(i); // BAD
}
{
@@ -160,46 +267,121 @@ int main()
i = 0;
}
scanf("%d", &i); // BAD: `i` may not have been initialized
use(i);
scanf("%d", &i);
use(i); // BAD: `i` may not have been initialized
}
// --- different use ---
{
int i;
int *ptr_i = &i;
scanf("%d", &i); // BAD: may not have written `i`
use(*ptr_i);
scanf("%d", &i);
use(*ptr_i); // BAD: may not have written `i`
}
{
int i;
int *ptr_i = &i;
scanf("%d", ptr_i);
use(i); // BAD: may not have written `*ptr_i`
}
{
int i;
scanf("%d", &i);
i = 42;
use(i); // GOOD
}
// --- weird formatting strings ---
{
int i;
int i, j;
if (scanf("%n %d", &i) >= 1) // GOOD (`%n` does not consume input)
if (sscanf("123", "%n %*d %n", &i, &j) >= 0)
{
use(i);
use(i); // GOOD (`%n` does not consume input, but writes 0 to i)
use(j); // GOOD (`%n` does not consume input, but writes 3 to j)
}
}
{
int i;
if (scanf("%% %d", &i) >= 1) // GOOD (`%%` does not consume input)
if (scanf("%% %d", &i) >= 1)
{
use(i);
use(i); // GOOD (`%%` does not consume input)
}
}
{
int i;
if (scanf("%*d %d", &i) >= 1) // GOOD (`%*d` does not consume input)
if (scanf("%*d %d", &i) >= 1)
{
use(i);
use(i); // GOOD (`%*d` does not consume input)
}
}
{
int d, n;
if (scanf("%*d %d %n", &d, &n) == 1) {
use(d); // GOOD
use(n); // GOOD
}
}
{
char substr[32];
int n;
while (sscanf(get_a_string(), "%31[^:]: %d", substr, &n) == 2) { // GOOD: cycle from write to unguarded access
use(*(int *)substr); // GOOD
use(n); // GOOD
}
}
}
// --- Non-local cases ---
bool my_scan_int(int &i)
{
return scanf("%d", &i) == 1; // GOOD
}
void my_scan_int_test()
{
int i;
use(i); // GOOD: used before scanf
my_scan_int(i);
use(i); // BAD [NOT DETECTED]
if (my_scan_int(i))
{
use(i); // GOOD
}
}
// --- Can be OK'd given a sufficiently smart analysis ---
char *my_string_copy() {
static const char SRC_STRING[] = "48656C6C6F";
static char DST_STRING[] = ".....";
int len = sizeof(SRC_STRING) - 1;
const char *src = SRC_STRING;
char *ptr = DST_STRING;
for (int i = 0; i < len; i += 2) {
unsigned int u;
sscanf(src + i, "%2x", &u);
*ptr++ = (char) u; // GOOD [FALSE POSITIVE]? src+i+{0,1} are always valid %x digits, so this should be OK.
}
*ptr++ = 0;
return DST_STRING;
}