Merge pull request #10163 from d10c/missing-check-scanf-squashed

C++: New Query: missing return-value check for scanf-like functions
This commit is contained in:
Nora Dimitrijević
2022-09-01 23:43:21 +02:00
committed by GitHub
8 changed files with 623 additions and 0 deletions

View File

@@ -143,6 +143,28 @@ class ScanfFunctionCall extends FunctionCall {
* (rather than a `char*`). * (rather than a `char*`).
*/ */
predicate isWideCharDefault() { this.getScanfFunction().isWideCharDefault() } predicate isWideCharDefault() { this.getScanfFunction().isWideCharDefault() }
/**
* Gets the output argument at position `n` in the vararg list of this call.
*
* The range of `n` is from `0` to `this.getNumberOfOutputArguments() - 1`.
*/
Expr getOutputArgument(int n) {
result = this.getArgument(this.getTarget().getNumberOfParameters() + n) and
n >= 0
}
/**
* Gets an output argument given to this call in vararg position.
*/
Expr getAnOutputArgument() { result = this.getOutputArgument(_) }
/**
* Gets the number of output arguments present in this call.
*/
int getNumberOfOutputArguments() {
result = this.getNumberOfArguments() - this.getTarget().getNumberOfParameters()
}
} }
/** /**

View File

@@ -0,0 +1,17 @@
{
int i, j, r;
r = scanf("%d %d", &i, &j);
use(i); // BAD: i is not guarded
if (r >= 1) {
use(i); // GOOD: i is guarded correctly
use(j); // BAD: j is guarded incorrectly
}
if (r != 2)
return;
use(j); // GOOD: j is guarded correctly
}

View File

@@ -0,0 +1,51 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
This query finds calls of <tt>scanf</tt>-like functions with missing or
improper return-value checking.
</p>
<p>
Specifically, the query flags uses of variables that may have been modified by
<tt>scanf</tt> and subsequently are used without being guarded by a correct
return-value check. A proper check is one that ensures that the corresponding
<tt>scanf</tt> has returned (at least) a certain minimum constant.
</p>
<p>
Functions in the <tt>scanf</tt> family return either EOF (a negative value)
in case of IO failure, or the number of items successfully read from the
input. Consequently, a simple check that the return value is truthy (nonzero)
is not enough.
</p>
<warning>
This query has medium precision because, in the current implementation, it
takes a strict stance on unguarded uses of output variables, and flags them
as problematic even if they have already been initialized.
</warning>
</overview>
<recommendation>
<p>
Ensure that all subsequent uses of <tt>scanf</tt> output arguments occur in a
branch of an <tt>if</tt> statement (or similar), in which it is known that the
corresponding <tt>scanf</tt> call has in fact read all possible items from its
input. This can be done by comparing the return value to a numerical constant.
</p>
</recommendation>
<example>
<p>This example shows different ways of guarding a <tt>scanf</tt> output:
</p>
<sample src="MissingCheckScanf.cpp" />
</example>
<references>
<li>SEI CERT C++ Coding Standard: <a href="https://wiki.sei.cmu.edu/confluence/display/cplusplus/ERR62-CPP.+Detect+errors+when+converting+a+string+to+a+number">ERR62-CPP. Detect errors when converting a string to a number</a>.</li>
<li>SEI CERT C Coding Standard: <a href="https://wiki.sei.cmu.edu/confluence/display/c/ERR33-C.+Detect+and+handle+standard+library+errors">ERR33-C. Detect and handle standard library errors</a>.</li>
<li>cppreference.com: <a href="https://en.cppreference.com/w/c/io/fscanf">scanf, fscanf, sscanf, scanf_s, fscanf_s, sscanf_s</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,122 @@
/**
* @name Missing return-value check for a 'scanf'-like function
* @description Failing to check that a call to 'scanf' actually writes to an
* output variable can lead to unexpected behavior at reading time.
* @kind problem
* @problem.severity warning
* @security-severity 7.5
* @precision medium
* @id cpp/missing-check-scanf
* @tags security
* correctness
* external/cwe/cwe-252
* external/cwe/cwe-253
*/
import cpp
import semmle.code.cpp.commons.Scanf
import semmle.code.cpp.controlflow.Guards
import semmle.code.cpp.dataflow.DataFlow
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.ValueNumbering
/** An expression appearing as an output argument to a `scanf`-like call */
class ScanfOutput extends Expr {
ScanfFunctionCall call;
int varargIndex;
Instruction instr;
ValueNumber valNum;
ScanfOutput() {
this = call.getOutputArgument(varargIndex).getFullyConverted() and
instr.getConvertedResultExpression() = this and
valueNumber(instr) = valNum
}
ScanfFunctionCall getCall() { result = call }
/**
* Returns the smallest possible `scanf` return value that would indicate
* success in writing this output argument.
*/
int getMinimumGuardConstant() {
result =
varargIndex + 1 -
count(ScanfFormatLiteral f, int n |
// Special case: %n writes to an argument without reading any input.
// It does not increase the count returned by `scanf`.
n <= varargIndex and f.getUse() = call and f.getConversionChar(n) = "n"
)
}
predicate hasGuardedAccess(Access e, boolean isGuarded) {
e = this.getAnAccess() and
if
exists(int value, int minGuard | minGuard = this.getMinimumGuardConstant() |
e.getBasicBlock() = blockGuardedBy(value, "==", call) and minGuard <= value
or
e.getBasicBlock() = blockGuardedBy(value, "<", call) and minGuard - 1 <= value
or
e.getBasicBlock() = blockGuardedBy(value, "<=", call) and minGuard <= value
)
then isGuarded = true
else isGuarded = false
}
/**
* Get a subsequent access of the same underlying storage,
* but before it gets reset or reused in another `scanf` call.
*/
Access getAnAccess() {
exists(Instruction dst |
this.bigStep() = dst and
dst.getAst() = result and
valueNumber(dst) = valNum
)
}
private Instruction bigStep() {
result = this.smallStep(instr)
or
exists(Instruction i | i = this.bigStep() | result = this.smallStep(i))
}
private Instruction smallStep(Instruction i) {
instr.getASuccessor*() = i and
i.getASuccessor() = result and
not this.isBarrier(result)
}
private predicate isBarrier(Instruction i) {
valueNumber(i) = valNum and
exists(Expr e | i.getAst() = e |
i = any(StoreInstruction s).getDestinationAddress()
or
[e, e.getParent().(AddressOfExpr)] instanceof ScanfOutput
)
}
}
/** Returns a block guarded by the assertion of `value op call` */
BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
exists(GuardCondition g, Expr left, Expr right |
right = g.getAChild() and
value = left.getValue().toInt() and
DataFlow::localExprFlow(call, right)
|
g.ensuresEq(left, right, 0, result, true) and op = "=="
or
g.ensuresLt(left, right, 0, result, true) and op = "<"
or
g.ensuresLt(left, right, 1, result, true) and op = "<="
)
}
from ScanfOutput output, ScanfFunctionCall call, Access access
where
output.getCall() = call and
output.hasGuardedAccess(access, false)
select access,
"$@ is read here, but may not have been written. " +
"It should be guarded by a check that the $@ returns at least " +
output.getMinimumGuardConstant() + ".", access, access.toString(), call, call.toString()

View File

@@ -0,0 +1,4 @@
---
category: newQuery
---
* Added a new medium-precision query, `cpp/missing-check-scanf`, which detects `scanf` output variables that are used without a proper return-value check to see that they were actually written. A variation of this query was originally contributed as an [experimental query by @ihsinme](https://github.com/github/codeql/pull/8246).

View File

@@ -0,0 +1,19 @@
| test.cpp:30:7:30:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:30:7:30:7 | i | i | test.cpp:29:3:29:7 | call to scanf | call to scanf |
| test.cpp:46:7:46:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:46:7:46:7 | i | i | test.cpp:45:3:45:7 | call to scanf | call to scanf |
| test.cpp:63:7:63:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:63:7:63:7 | i | i | test.cpp:62:3:62:7 | call to scanf | call to scanf |
| test.cpp:75:7:75:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:75:7:75:7 | i | i | test.cpp:74:3:74:7 | call to scanf | call to scanf |
| test.cpp:87:7:87:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:87:7:87:7 | i | i | test.cpp:86:3:86:8 | call to fscanf | call to fscanf |
| test.cpp:94:7:94:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:94:7:94:7 | i | i | test.cpp:93:3:93:8 | call to sscanf | call to sscanf |
| test.cpp:143:8:143:8 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:143:8:143:8 | i | i | test.cpp:141:7:141:11 | call to scanf | call to scanf |
| test.cpp:152:8:152:8 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:152:8:152:8 | i | i | test.cpp:150:7:150:11 | call to scanf | call to scanf |
| test.cpp:184:8:184:8 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:184:8:184:8 | i | i | test.cpp:183:7:183:11 | call to scanf | call to scanf |
| test.cpp:203:8:203:8 | j | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:203:8:203:8 | j | j | test.cpp:200:7:200:11 | call to scanf | call to scanf |
| test.cpp:227:9:227:9 | d | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:227:9:227:9 | d | d | test.cpp:225:25:225:29 | call to scanf | call to scanf |
| test.cpp:231:9:231:9 | d | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:231:9:231:9 | d | d | test.cpp:229:14:229:18 | call to scanf | call to scanf |
| test.cpp:243:7:243:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:243:7:243:7 | i | i | test.cpp:242:3:242:7 | call to scanf | call to scanf |
| test.cpp:251:7:251:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:251:7:251:7 | i | i | test.cpp:250:3:250:7 | call to scanf | call to scanf |
| test.cpp:259:7:259:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:259:7:259:7 | i | i | test.cpp:258:3:258:7 | call to scanf | call to scanf |
| test.cpp:271:7:271:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:271:7:271:7 | i | i | test.cpp:270:3:270:7 | call to scanf | call to scanf |
| test.cpp:281:8:281:12 | ptr_i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:281:8:281:12 | ptr_i | ptr_i | test.cpp:280:3:280:7 | call to scanf | call to scanf |
| test.cpp:289:7:289:7 | i | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:289:7:289:7 | i | i | test.cpp:288:3:288:7 | call to scanf | call to scanf |
| test.cpp:383:25:383:25 | u | $@ is read here, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:383:25:383:25 | u | u | test.cpp:382:6:382:11 | call to sscanf | call to sscanf |

View File

@@ -0,0 +1 @@
Critical/MissingCheckScanf.ql

View File

@@ -0,0 +1,387 @@
typedef struct
{
} FILE;
typedef void *locale_t;
int scanf(const char *format, ...);
int fscanf(FILE *stream, const char *format, ...);
int sscanf(const char *s, const char *format, ...);
int _scanf_l(const char *format, locale_t locale, ...);
void use(int i);
void set_by_ref(int &i);
void set_by_ptr(int *i);
bool maybe();
FILE *get_a_stream();
const char *get_a_string();
extern locale_t get_a_locale();
int main()
{
// --- simple cases ---
{
int i;
scanf("%d", &i);
use(i); // BAD: may not have written `i`
}
{
int i;
if (scanf("%d", &i) == 1)
{
use(i); // GOOD: checks return value
}
}
{
int i = 0;
scanf("%d", &i);
use(i); // BAD. Design choice: already initialized variables shouldn't make a difference.
}
{
int i;
use(i); // GOOD: only care about uses after scanf call
if (scanf("%d", &i) == 1)
{
use(i); // GOOD
}
}
{
int i; // Reused variable
scanf("%d", &i);
use(i); // BAD
if (scanf("%d", &i) == 1)
{
use(i); // GOOD
}
}
{
int i; // Reset variable
scanf("%d", &i);
use(i); // BAD
i = 1;
use(i); // GOOD
}
// --- different scanf functions ---
{
int i;
fscanf(get_a_stream(), "%d", &i);
use(i); // BAD: may not have written `i`
}
{
int i;
sscanf(get_a_string(), "%d", &i);
use(i); // BAD: may not have written `i`
}
{
int i;
if (_scanf_l("%d", get_a_locale(), &i) == 1)
{
use(i); // GOOD
}
}
// --- different ways of checking ---
{
int i;
if (scanf("%d", &i) >= 1)
{
use(i); // GOOD
}
}
{
int i;
if (scanf("%d", &i) == 1)
{
use(i); // GOOD
}
}
{
int i;
if (0 < scanf("%d", &i))
{
if (true)
{
use(i); // GOOD
}
}
}
{
int i;
if (scanf("%d", &i) != 0)
{
use(i); // BAD: scanf can return EOF
}
}
{
int i;
if (scanf("%d", &i) == 0)
{
use(i); // BAD: checks return value incorrectly
}
}
{
int r;
int i;
r = scanf("%d", &i);
if (r >= 1)
{
use(i); // GOOD
}
}
{
bool b;
int i;
b = scanf("%d", &i);
if (b >= 1)
{
use(i); // BAD [NOT DETECTED]: scanf can return EOF (boolifies true)
}
}
{
int i;
if (scanf("%d", &i))
use(i); // BAD
}
{
int i, j;
if (scanf("%d %d", &i) >= 2)
{
use(i); // GOOD
use(j); // GOOD: `j` is not a scanf arg, so out of scope of MissingCheckScanf
}
}
{
int i, j;
if (scanf("%d %d", &i, &j) >= 1)
{
use(i); // GOOD
use(j); // BAD: checks return value incorrectly
}
}
{
int i, j;
if (scanf("%d %d", &i, &j) >= 2)
{
use(i); // GOOD
use(j); // GOOD
}
}
{
char c[5];
int d;
while(maybe()) {
if (maybe()) {
break;
}
else if (maybe() && (scanf("%5c %d", c, &d) == 1)) { // GOOD
use(*(int *)c); // GOOD
use(d); // BAD
}
else if ((scanf("%5c %d", c, &d) == 1) && maybe()) { // GOOD
use(*(int *)c); // GOOD
use(d); // BAD
}
}
}
// --- different initialization ---
{
int i;
i = 0;
scanf("%d", &i);
use(i); // BAD
}
{
int i;
set_by_ref(i);
scanf("%d", &i);
use(i); // BAD
}
{
int i;
set_by_ptr(&i);
scanf("%d", &i);
use(i); // BAD
}
{
int i;
if (maybe())
{
i = 0;
}
scanf("%d", &i);
use(i); // BAD: `i` may not have been initialized
}
// --- different use ---
{
int i;
int *ptr_i = &i;
scanf("%d", &i);
use(*ptr_i); // BAD: may not have written `i`
}
{
int i;
int *ptr_i = &i;
scanf("%d", ptr_i);
use(i); // BAD: may not have written `*ptr_i`
}
{
int i;
scanf("%d", &i);
i = 42;
use(i); // GOOD
}
// --- weird formatting strings ---
{
int i, j;
if (sscanf("123", "%n %*d %n", &i, &j) >= 0)
{
use(i); // GOOD (`%n` does not consume input, but writes 0 to i)
use(j); // GOOD (`%n` does not consume input, but writes 3 to j)
}
}
{
int i;
if (scanf("%% %d", &i) >= 1)
{
use(i); // GOOD (`%%` does not consume input)
}
}
{
int i;
if (scanf("%*d %d", &i) >= 1)
{
use(i); // GOOD (`%*d` does not consume input)
}
}
{
int d, n;
if (scanf("%*d %d %n", &d, &n) == 1) {
use(d); // GOOD
use(n); // GOOD
}
}
{
char substr[32];
int n;
while (sscanf(get_a_string(), "%31[^:]: %d", substr, &n) == 2) { // GOOD: cycle from write to unguarded access
use(*(int *)substr); // GOOD
use(n); // GOOD
}
}
}
// --- Non-local cases ---
bool my_scan_int(int &i)
{
return scanf("%d", &i) == 1; // GOOD
}
void my_scan_int_test()
{
int i;
use(i); // GOOD: used before scanf
my_scan_int(i);
use(i); // BAD [NOT DETECTED]
if (my_scan_int(i))
{
use(i); // GOOD
}
}
// --- Can be OK'd given a sufficiently smart analysis ---
char *my_string_copy() {
static const char SRC_STRING[] = "48656C6C6F";
static char DST_STRING[] = ".....";
int len = sizeof(SRC_STRING) - 1;
const char *src = SRC_STRING;
char *ptr = DST_STRING;
for (int i = 0; i < len; i += 2) {
unsigned int u;
sscanf(src + i, "%2x", &u);
*ptr++ = (char) u; // GOOD [FALSE POSITIVE]? src+i+{0,1} are always valid %x digits, so this should be OK.
}
*ptr++ = 0;
return DST_STRING;
}