Merge pull request #12818 from MathiasVP/dataflow-for-missing-scanf-qery

C++: Use the new dataflow library in `cpp/missing-check-scanf`
This commit is contained in:
Mathias Vorreiter Pedersen
2023-04-17 14:34:11 +01:00
committed by GitHub
3 changed files with 113 additions and 145 deletions

View File

@@ -16,160 +16,133 @@
import cpp import cpp
import semmle.code.cpp.commons.Scanf import semmle.code.cpp.commons.Scanf
import semmle.code.cpp.controlflow.Guards import semmle.code.cpp.controlflow.Guards
import semmle.code.cpp.ir.dataflow.DataFlow import semmle.code.cpp.dataflow.new.DataFlow::DataFlow
import semmle.code.cpp.ir.IR import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.ValueNumbering import semmle.code.cpp.ir.ValueNumbering
/** /** Holds if `n` reaches an argument to a call to a `scanf`-like function. */
* Holds if `call` is a `scanf`-like function that may write to `output` at index `index`. pragma[nomagic]
* predicate revFlow0(Node n) {
* Furthermore, `instr` is the instruction that defines the address of the `index`'th argument isSink(_, _, n, _)
* of `call`, and `vn` is the value number of `instr.` or
*/ exists(Node succ | revFlow0(succ) | localFlowStep(n, succ))
predicate isSource(ScanfFunctionCall call, int index, Instruction instr, ValueNumber vn, Expr output) {
output = call.getOutputArgument(index).getFullyConverted() and
instr.getConvertedResultExpression() = output and
vn.getAnInstruction() = instr
} }
/** /**
* Holds if `instr` is control-flow reachable in 0 or more steps from * Holds if `n` represents an uninitialized stack-allocated variable, or a
* a call to a `scanf`-like function. * newly (and presumed uninitialized) heap allocation.
*/ */
predicate isUninitialized(Node n) {
exists(n.asUninitialized()) or
n.asIndirectExpr(1) instanceof AllocationExpr
}
pragma[nomagic] pragma[nomagic]
predicate fwdFlow0(Instruction instr) { predicate fwdFlow0(Node n) {
isSource(_, _, instr, _, _) revFlow0(n) and
or (
exists(Instruction prev | isUninitialized(n)
fwdFlow0(prev) and or
prev.getASuccessor() = instr exists(Node prev |
fwdFlow0(prev) and
localFlowStep(prev, n)
)
) )
} }
/** predicate isSink(ScanfFunctionCall call, int index, Node n, Expr input) {
* Holds if `instr` is part of the IR translation of `access` that input = call.getOutputArgument(index) and
* is not an expression being deallocated, and `instr` has value n.asIndirectExpr() = input
* number `vn`.
*/
predicate isSink(Instruction instr, Access access, ValueNumber vn) {
instr.getAst() = access and
not any(DeallocationExpr dealloc).getFreedExpr() = access and
vn.getAnInstruction() = instr
} }
/** /**
* Holds if `instr` is part of a path from a call to a `scanf`-like function * Holds if `call` is a `scanf`-like call and `output` is the `index`'th
* argument that has not been previously initialized.
*/
predicate isRelevantScanfCall(ScanfFunctionCall call, int index, Expr output) {
exists(Node n | fwdFlow0(n) and isSink(call, index, n, output))
}
/**
* Holds if `call` is a `scanf`-like function that may write to `output` at
* index `index` and `n` is the dataflow node that represents the data after
* it has been written to by `call`.
*/
predicate isSource(ScanfFunctionCall call, int index, Node n, Expr output) {
isRelevantScanfCall(call, index, output) and
output = call.getOutputArgument(index) and
n.asDefiningArgument() = output
}
/**
* Holds if `n` is reachable from an output argument of a relevant call to
* a `scanf`-like function.
*/
pragma[nomagic]
predicate fwdFlow(Node n) {
isSource(_, _, n, _)
or
exists(Node prev |
fwdFlow(prev) and
localFlowStep(prev, n) and
not isSanitizerOut(prev)
)
}
/** Holds if `n` should not have outgoing flow. */
predicate isSanitizerOut(Node n) {
// We disable flow out of sinks to reduce result duplication
isSink(n, _)
or
// If the node is being passed to a function it may be
// modified, and thus it's safe to later read the value.
exists(n.asIndirectArgument())
}
/**
* Holds if `n` is a node such that `n.asExpr() = e` and `e` is not an
* argument of a deallocation expression.
*/
predicate isSink(Node n, Expr e) {
n.asExpr() = e and
not any(DeallocationExpr dealloc).getFreedExpr() = e
}
/**
* Holds if `n` is part of a path from a call to a `scanf`-like function
* to a use of the written variable. * to a use of the written variable.
*/ */
pragma[nomagic] pragma[nomagic]
predicate revFlow0(Instruction instr) { predicate revFlow(Node n) {
fwdFlow0(instr) and fwdFlow(n) and
( (
isSink(instr, _, _) isSink(n, _)
or or
exists(Instruction succ | revFlow0(succ) | instr.getASuccessor() = succ) exists(Node succ |
) revFlow(succ) and
} localFlowStep(n, succ) and
not isSanitizerOut(n)
/**
* Holds if `instr` is part of a path from a call to a `scanf`-like function
* that writes to a variable with value number `vn`, without passing through
* redefinitions of the variable.
*/
pragma[nomagic]
private predicate fwdFlow(Instruction instr, ValueNumber vn) {
revFlow0(instr) and
(
isSource(_, _, instr, vn, _)
or
exists(Instruction prev |
fwdFlow(prev, vn) and
prev.getASuccessor() = instr and
not isBarrier(instr, vn)
) )
) )
} }
/** /** A local flow step, restricted to relevant dataflow nodes. */
* Holds if `instr` is part of a path from a call to a `scanf`-like function private predicate step(Node n1, Node n2) {
* that writes to a variable with value number `vn`, without passing through revFlow(n1) and
* redefinitions of the variable. revFlow(n2) and
* localFlowStep(n1, n2)
* Note: This predicate only holds for the `(intr, vn)` pairs that are also
* control-flow reachable from an argument to a `scanf`-like function call.
*/
pragma[nomagic]
predicate revFlow(Instruction instr, ValueNumber vn) {
fwdFlow(instr, pragma[only_bind_out](vn)) and
(
isSink(instr, _, vn)
or
exists(Instruction succ | revFlow(succ, vn) |
instr.getASuccessor() = succ and
not isBarrier(succ, vn)
)
)
} }
/** predicate hasFlow(Node n1, Node n2) = fastTC(step/2)(n1, n2)
* A type that bundles together a reachable instruction with the appropriate
* value number (i.e., the value number that's transferred from the source
* to the sink).
*/
newtype TNode = MkNode(Instruction instr, ValueNumber vn) { revFlow(instr, vn) }
class Node extends MkNode {
ValueNumber vn;
Instruction instr;
Node() { this = MkNode(instr, vn) }
final string toString() { result = instr.toString() }
final Node getASuccessor() { result = MkNode(pragma[only_bind_out](instr.getASuccessor()), vn) }
final Location getLocation() { result = instr.getLocation() }
}
/**
* Holds if `instr` is an instruction with value number `vn` that is
* used in a store operation, or is overwritten by another call to
* a `scanf`-like function.
*/
private predicate isBarrier(Instruction instr, ValueNumber vn) {
// We only need to compute barriers for instructions that we
// managed to hit during the initial flow stage.
revFlow0(pragma[only_bind_into](instr)) and
valueNumber(instr) = vn and
exists(Expr e | instr.getAst() = e |
instr = any(StoreInstruction s).getDestinationAddress()
or
isSource(_, _, _, _, [e, e.getParent().(AddressOfExpr)])
)
}
/** Holds if `n1` steps to `n2` in a single step. */
predicate isSuccessor(Node n1, Node n2) { n1.getASuccessor() = n2 }
predicate hasFlow(Node n1, Node n2) = fastTC(isSuccessor/2)(n1, n2)
Node getNode(Instruction instr, ValueNumber vn) { result = MkNode(instr, vn) }
/** /**
* Holds if `source` is the `index`'th argument to the `scanf`-like call `call`, and `sink` is * Holds if `source` is the `index`'th argument to the `scanf`-like call `call`, and `sink` is
* an instruction that is part of the translation of `access` which is a transitive * a dataflow node that represents the expression `e`.
* control-flow successor of `call`.
*
* Furthermore, `source` and `sink` have identical global value numbers.
*/ */
predicate hasFlow( predicate hasFlow(Node source, ScanfFunctionCall call, int index, Node sink, Expr e) {
Instruction source, ScanfFunctionCall call, int index, Instruction sink, Access access isSource(call, index, source, _) and
) { hasFlow(source, sink) and
exists(ValueNumber vn | isSink(sink, e)
isSource(call, index, source, vn, _) and
hasFlow(getNode(source, pragma[only_bind_into](vn)), getNode(sink, pragma[only_bind_into](vn))) and
isSink(sink, access, vn)
)
} }
/** /**
@@ -177,7 +150,7 @@ predicate hasFlow(
* success in writing the output argument at index `index`. * success in writing the output argument at index `index`.
*/ */
int getMinimumGuardConstant(ScanfFunctionCall call, int index) { int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
isSource(call, index, _, _, _) and isSource(call, index, _, _) and
result = result =
index + 1 - index + 1 -
count(ScanfFormatLiteral f, int n | count(ScanfFormatLiteral f, int n |
@@ -191,7 +164,7 @@ int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
* Holds the access to `e` isn't guarded by a check that ensures that `call` returned * Holds the access to `e` isn't guarded by a check that ensures that `call` returned
* at least `minGuard`. * at least `minGuard`.
*/ */
predicate hasNonGuardedAccess(ScanfFunctionCall call, Access e, int minGuard) { predicate hasNonGuardedAccess(ScanfFunctionCall call, Expr e, int minGuard) {
exists(int index | exists(int index |
hasFlow(_, call, index, _, e) and hasFlow(_, call, index, _, e) and
minGuard = getMinimumGuardConstant(call, index) minGuard = getMinimumGuardConstant(call, index)
@@ -211,7 +184,7 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
exists(GuardCondition g, Expr left, Expr right | exists(GuardCondition g, Expr left, Expr right |
right = g.getAChild() and right = g.getAChild() and
value = left.getValue().toInt() and value = left.getValue().toInt() and
DataFlow::localExprFlow(call, right) localExprFlow(call, right)
| |
g.ensuresEq(left, right, 0, result, true) and op = "==" g.ensuresEq(left, right, 0, result, true) and op = "=="
or or
@@ -221,9 +194,9 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
) )
} }
from ScanfFunctionCall call, Access access, int minGuard from ScanfFunctionCall call, Expr e, int minGuard
where hasNonGuardedAccess(call, access, minGuard) where hasNonGuardedAccess(call, e, minGuard)
select access, select e,
"This variable is read, but may not have been written. " + "This variable is read, but may not have been written. " +
"It should be guarded by a check that the $@ returns at least " + minGuard + ".", call, "It should be guarded by a check that the $@ returns at least " + minGuard + ".", call,
call.toString() call.toString()

View File

@@ -1,9 +1,8 @@
| test.cpp:35:7:35:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:34:3:34:7 | call to scanf | call to scanf | | test.cpp:35:7:35:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:34:3:34:7 | call to scanf | call to scanf |
| test.cpp:51:7:51:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:50:3:50:7 | call to scanf | call to scanf |
| test.cpp:68:7:68:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:67:3:67:7 | call to scanf | call to scanf | | test.cpp:68:7:68:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:67:3:67:7 | call to scanf | call to scanf |
| test.cpp:80:7:80:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:79:3:79:7 | call to scanf | call to scanf | | test.cpp:80:7:80:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:79:3:79:7 | call to scanf | call to scanf |
| test.cpp:90:8:90:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf | | test.cpp:90:7:90:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
| test.cpp:98:8:98:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf | | test.cpp:98:7:98:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
| test.cpp:108:7:108:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:107:3:107:8 | call to fscanf | call to fscanf | | test.cpp:108:7:108:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:107:3:107:8 | call to fscanf | call to fscanf |
| test.cpp:115:7:115:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:114:3:114:8 | call to sscanf | call to sscanf | | test.cpp:115:7:115:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:114:3:114:8 | call to sscanf | call to sscanf |
| test.cpp:164:8:164:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:162:7:162:11 | call to scanf | call to scanf | | test.cpp:164:8:164:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:162:7:162:11 | call to scanf | call to scanf |
@@ -12,13 +11,9 @@
| test.cpp:224:8:224:8 | j | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:221:7:221:11 | call to scanf | call to scanf | | test.cpp:224:8:224:8 | j | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:221:7:221:11 | call to scanf | call to scanf |
| test.cpp:248:9:248:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:246:25:246:29 | call to scanf | call to scanf | | test.cpp:248:9:248:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:246:25:246:29 | call to scanf | call to scanf |
| test.cpp:252:9:252:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:250:14:250:18 | call to scanf | call to scanf | | test.cpp:252:9:252:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:250:14:250:18 | call to scanf | call to scanf |
| test.cpp:264:7:264:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:263:3:263:7 | call to scanf | call to scanf |
| test.cpp:272:7:272:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:271:3:271:7 | call to scanf | call to scanf | | test.cpp:272:7:272:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:271:3:271:7 | call to scanf | call to scanf |
| test.cpp:280:7:280:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:279:3:279:7 | call to scanf | call to scanf | | test.cpp:280:7:280:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:279:3:279:7 | call to scanf | call to scanf |
| test.cpp:292:7:292:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:291:3:291:7 | call to scanf | call to scanf | | test.cpp:292:7:292:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:291:3:291:7 | call to scanf | call to scanf |
| test.cpp:302:8:302:12 | ptr_i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:301:3:301:7 | call to scanf | call to scanf |
| test.cpp:310:7:310:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:309:3:309:7 | call to scanf | call to scanf |
| test.cpp:404:25:404:25 | u | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:403:6:403:11 | call to sscanf | call to sscanf | | test.cpp:404:25:404:25 | u | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:403:6:403:11 | call to sscanf | call to sscanf |
| test.cpp:416:7:416:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:413:7:413:11 | call to scanf | call to scanf | | test.cpp:416:7:416:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:413:7:413:11 | call to scanf | call to scanf |
| test.cpp:423:7:423:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:420:7:420:11 | call to scanf | call to scanf | | test.cpp:423:7:423:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:420:7:420:11 | call to scanf | call to scanf |
| test.cpp:430:6:430:6 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:429:2:429:6 | call to scanf | call to scanf |

View File

@@ -48,7 +48,7 @@ int main()
int i = 0; int i = 0;
scanf("%d", &i); scanf("%d", &i);
use(i); // BAD. Design choice: already initialized variables shouldn't make a difference. use(i); // GOOD. Design choice: already initialized variables are fine.
} }
{ {
@@ -261,7 +261,7 @@ int main()
i = 0; i = 0;
scanf("%d", &i); scanf("%d", &i);
use(i); // BAD use(i); // GOOD
} }
{ {
@@ -269,7 +269,7 @@ int main()
set_by_ref(i); set_by_ref(i);
scanf("%d", &i); scanf("%d", &i);
use(i); // BAD use(i); // GOOD [FALSE POSITIVE]
} }
{ {
@@ -277,7 +277,7 @@ int main()
set_by_ptr(&i); set_by_ptr(&i);
scanf("%d", &i); scanf("%d", &i);
use(i); // BAD use(i); // GOOD [FALSE POSITIVE]
} }
{ {
@@ -299,7 +299,7 @@ int main()
int *ptr_i = &i; int *ptr_i = &i;
scanf("%d", &i); scanf("%d", &i);
use(*ptr_i); // BAD: may not have written `i` use(*ptr_i); // BAD [NOT DETECTED]: may not have written `i`
} }
{ {
@@ -307,7 +307,7 @@ int main()
int *ptr_i = &i; int *ptr_i = &i;
scanf("%d", ptr_i); scanf("%d", ptr_i);
use(i); // BAD: may not have written `*ptr_i` use(i); // BAD [NOT DETECTED]: may not have written `*ptr_i`
} }
{ {
@@ -427,5 +427,5 @@ void scan_and_write() {
void scan_and_static_variable() { void scan_and_static_variable() {
static int i; static int i;
scanf("%d", &i); scanf("%d", &i);
use(i); // GOOD [FALSE POSITIVE]: static variables are always 0-initialized use(i); // GOOD: static variables are always 0-initialized
} }