Merge pull request #12818 from MathiasVP/dataflow-for-missing-scanf-qery

C++: Use the new dataflow library in `cpp/missing-check-scanf`
This commit is contained in:
Mathias Vorreiter Pedersen
2023-04-17 14:34:11 +01:00
committed by GitHub
3 changed files with 113 additions and 145 deletions

View File

@@ -16,160 +16,133 @@
import cpp
import semmle.code.cpp.commons.Scanf
import semmle.code.cpp.controlflow.Guards
import semmle.code.cpp.ir.dataflow.DataFlow
import semmle.code.cpp.dataflow.new.DataFlow::DataFlow
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.ValueNumbering
/**
* Holds if `call` is a `scanf`-like function that may write to `output` at index `index`.
*
* Furthermore, `instr` is the instruction that defines the address of the `index`'th argument
* of `call`, and `vn` is the value number of `instr.`
*/
predicate isSource(ScanfFunctionCall call, int index, Instruction instr, ValueNumber vn, Expr output) {
output = call.getOutputArgument(index).getFullyConverted() and
instr.getConvertedResultExpression() = output and
vn.getAnInstruction() = instr
/** Holds if `n` reaches an argument to a call to a `scanf`-like function. */
pragma[nomagic]
predicate revFlow0(Node n) {
isSink(_, _, n, _)
or
exists(Node succ | revFlow0(succ) | localFlowStep(n, succ))
}
/**
* Holds if `instr` is control-flow reachable in 0 or more steps from
* a call to a `scanf`-like function.
* Holds if `n` represents an uninitialized stack-allocated variable, or a
* newly (and presumed uninitialized) heap allocation.
*/
predicate isUninitialized(Node n) {
exists(n.asUninitialized()) or
n.asIndirectExpr(1) instanceof AllocationExpr
}
pragma[nomagic]
predicate fwdFlow0(Instruction instr) {
isSource(_, _, instr, _, _)
or
exists(Instruction prev |
fwdFlow0(prev) and
prev.getASuccessor() = instr
predicate fwdFlow0(Node n) {
revFlow0(n) and
(
isUninitialized(n)
or
exists(Node prev |
fwdFlow0(prev) and
localFlowStep(prev, n)
)
)
}
/**
* Holds if `instr` is part of the IR translation of `access` that
* is not an expression being deallocated, and `instr` has value
* number `vn`.
*/
predicate isSink(Instruction instr, Access access, ValueNumber vn) {
instr.getAst() = access and
not any(DeallocationExpr dealloc).getFreedExpr() = access and
vn.getAnInstruction() = instr
predicate isSink(ScanfFunctionCall call, int index, Node n, Expr input) {
input = call.getOutputArgument(index) and
n.asIndirectExpr() = input
}
/**
* Holds if `instr` is part of a path from a call to a `scanf`-like function
* Holds if `call` is a `scanf`-like call and `output` is the `index`'th
* argument that has not been previously initialized.
*/
predicate isRelevantScanfCall(ScanfFunctionCall call, int index, Expr output) {
exists(Node n | fwdFlow0(n) and isSink(call, index, n, output))
}
/**
* Holds if `call` is a `scanf`-like function that may write to `output` at
* index `index` and `n` is the dataflow node that represents the data after
* it has been written to by `call`.
*/
predicate isSource(ScanfFunctionCall call, int index, Node n, Expr output) {
isRelevantScanfCall(call, index, output) and
output = call.getOutputArgument(index) and
n.asDefiningArgument() = output
}
/**
* Holds if `n` is reachable from an output argument of a relevant call to
* a `scanf`-like function.
*/
pragma[nomagic]
predicate fwdFlow(Node n) {
isSource(_, _, n, _)
or
exists(Node prev |
fwdFlow(prev) and
localFlowStep(prev, n) and
not isSanitizerOut(prev)
)
}
/** Holds if `n` should not have outgoing flow. */
predicate isSanitizerOut(Node n) {
// We disable flow out of sinks to reduce result duplication
isSink(n, _)
or
// If the node is being passed to a function it may be
// modified, and thus it's safe to later read the value.
exists(n.asIndirectArgument())
}
/**
* Holds if `n` is a node such that `n.asExpr() = e` and `e` is not an
* argument of a deallocation expression.
*/
predicate isSink(Node n, Expr e) {
n.asExpr() = e and
not any(DeallocationExpr dealloc).getFreedExpr() = e
}
/**
* Holds if `n` is part of a path from a call to a `scanf`-like function
* to a use of the written variable.
*/
pragma[nomagic]
predicate revFlow0(Instruction instr) {
fwdFlow0(instr) and
predicate revFlow(Node n) {
fwdFlow(n) and
(
isSink(instr, _, _)
isSink(n, _)
or
exists(Instruction succ | revFlow0(succ) | instr.getASuccessor() = succ)
)
}
/**
* Holds if `instr` is part of a path from a call to a `scanf`-like function
* that writes to a variable with value number `vn`, without passing through
* redefinitions of the variable.
*/
pragma[nomagic]
private predicate fwdFlow(Instruction instr, ValueNumber vn) {
revFlow0(instr) and
(
isSource(_, _, instr, vn, _)
or
exists(Instruction prev |
fwdFlow(prev, vn) and
prev.getASuccessor() = instr and
not isBarrier(instr, vn)
exists(Node succ |
revFlow(succ) and
localFlowStep(n, succ) and
not isSanitizerOut(n)
)
)
}
/**
* Holds if `instr` is part of a path from a call to a `scanf`-like function
* that writes to a variable with value number `vn`, without passing through
* redefinitions of the variable.
*
* Note: This predicate only holds for the `(intr, vn)` pairs that are also
* control-flow reachable from an argument to a `scanf`-like function call.
*/
pragma[nomagic]
predicate revFlow(Instruction instr, ValueNumber vn) {
fwdFlow(instr, pragma[only_bind_out](vn)) and
(
isSink(instr, _, vn)
or
exists(Instruction succ | revFlow(succ, vn) |
instr.getASuccessor() = succ and
not isBarrier(succ, vn)
)
)
/** A local flow step, restricted to relevant dataflow nodes. */
private predicate step(Node n1, Node n2) {
revFlow(n1) and
revFlow(n2) and
localFlowStep(n1, n2)
}
/**
* A type that bundles together a reachable instruction with the appropriate
* value number (i.e., the value number that's transferred from the source
* to the sink).
*/
newtype TNode = MkNode(Instruction instr, ValueNumber vn) { revFlow(instr, vn) }
class Node extends MkNode {
ValueNumber vn;
Instruction instr;
Node() { this = MkNode(instr, vn) }
final string toString() { result = instr.toString() }
final Node getASuccessor() { result = MkNode(pragma[only_bind_out](instr.getASuccessor()), vn) }
final Location getLocation() { result = instr.getLocation() }
}
/**
* Holds if `instr` is an instruction with value number `vn` that is
* used in a store operation, or is overwritten by another call to
* a `scanf`-like function.
*/
private predicate isBarrier(Instruction instr, ValueNumber vn) {
// We only need to compute barriers for instructions that we
// managed to hit during the initial flow stage.
revFlow0(pragma[only_bind_into](instr)) and
valueNumber(instr) = vn and
exists(Expr e | instr.getAst() = e |
instr = any(StoreInstruction s).getDestinationAddress()
or
isSource(_, _, _, _, [e, e.getParent().(AddressOfExpr)])
)
}
/** Holds if `n1` steps to `n2` in a single step. */
predicate isSuccessor(Node n1, Node n2) { n1.getASuccessor() = n2 }
predicate hasFlow(Node n1, Node n2) = fastTC(isSuccessor/2)(n1, n2)
Node getNode(Instruction instr, ValueNumber vn) { result = MkNode(instr, vn) }
predicate hasFlow(Node n1, Node n2) = fastTC(step/2)(n1, n2)
/**
* Holds if `source` is the `index`'th argument to the `scanf`-like call `call`, and `sink` is
* an instruction that is part of the translation of `access` which is a transitive
* control-flow successor of `call`.
*
* Furthermore, `source` and `sink` have identical global value numbers.
* a dataflow node that represents the expression `e`.
*/
predicate hasFlow(
Instruction source, ScanfFunctionCall call, int index, Instruction sink, Access access
) {
exists(ValueNumber vn |
isSource(call, index, source, vn, _) and
hasFlow(getNode(source, pragma[only_bind_into](vn)), getNode(sink, pragma[only_bind_into](vn))) and
isSink(sink, access, vn)
)
predicate hasFlow(Node source, ScanfFunctionCall call, int index, Node sink, Expr e) {
isSource(call, index, source, _) and
hasFlow(source, sink) and
isSink(sink, e)
}
/**
@@ -177,7 +150,7 @@ predicate hasFlow(
* success in writing the output argument at index `index`.
*/
int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
isSource(call, index, _, _, _) and
isSource(call, index, _, _) and
result =
index + 1 -
count(ScanfFormatLiteral f, int n |
@@ -191,7 +164,7 @@ int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
* Holds the access to `e` isn't guarded by a check that ensures that `call` returned
* at least `minGuard`.
*/
predicate hasNonGuardedAccess(ScanfFunctionCall call, Access e, int minGuard) {
predicate hasNonGuardedAccess(ScanfFunctionCall call, Expr e, int minGuard) {
exists(int index |
hasFlow(_, call, index, _, e) and
minGuard = getMinimumGuardConstant(call, index)
@@ -211,7 +184,7 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
exists(GuardCondition g, Expr left, Expr right |
right = g.getAChild() and
value = left.getValue().toInt() and
DataFlow::localExprFlow(call, right)
localExprFlow(call, right)
|
g.ensuresEq(left, right, 0, result, true) and op = "=="
or
@@ -221,9 +194,9 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
)
}
from ScanfFunctionCall call, Access access, int minGuard
where hasNonGuardedAccess(call, access, minGuard)
select access,
from ScanfFunctionCall call, Expr e, int minGuard
where hasNonGuardedAccess(call, e, minGuard)
select e,
"This variable is read, but may not have been written. " +
"It should be guarded by a check that the $@ returns at least " + minGuard + ".", call,
call.toString()

View File

@@ -1,9 +1,8 @@
| test.cpp:35:7:35:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:34:3:34:7 | call to scanf | call to scanf |
| test.cpp:51:7:51:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:50:3:50:7 | call to scanf | call to scanf |
| test.cpp:68:7:68:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:67:3:67:7 | call to scanf | call to scanf |
| test.cpp:80:7:80:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:79:3:79:7 | call to scanf | call to scanf |
| test.cpp:90:8:90:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
| test.cpp:98:8:98:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
| test.cpp:90:7:90:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
| test.cpp:98:7:98:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
| test.cpp:108:7:108:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:107:3:107:8 | call to fscanf | call to fscanf |
| test.cpp:115:7:115:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:114:3:114:8 | call to sscanf | call to sscanf |
| test.cpp:164:8:164:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:162:7:162:11 | call to scanf | call to scanf |
@@ -12,13 +11,9 @@
| test.cpp:224:8:224:8 | j | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:221:7:221:11 | call to scanf | call to scanf |
| test.cpp:248:9:248:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:246:25:246:29 | call to scanf | call to scanf |
| test.cpp:252:9:252:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:250:14:250:18 | call to scanf | call to scanf |
| test.cpp:264:7:264:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:263:3:263:7 | call to scanf | call to scanf |
| test.cpp:272:7:272:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:271:3:271:7 | call to scanf | call to scanf |
| test.cpp:280:7:280:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:279:3:279:7 | call to scanf | call to scanf |
| test.cpp:292:7:292:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:291:3:291:7 | call to scanf | call to scanf |
| test.cpp:302:8:302:12 | ptr_i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:301:3:301:7 | call to scanf | call to scanf |
| test.cpp:310:7:310:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:309:3:309:7 | call to scanf | call to scanf |
| test.cpp:404:25:404:25 | u | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:403:6:403:11 | call to sscanf | call to sscanf |
| test.cpp:416:7:416:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:413:7:413:11 | call to scanf | call to scanf |
| test.cpp:423:7:423:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:420:7:420:11 | call to scanf | call to scanf |
| test.cpp:430:6:430:6 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:429:2:429:6 | call to scanf | call to scanf |

View File

@@ -48,7 +48,7 @@ int main()
int i = 0;
scanf("%d", &i);
use(i); // BAD. Design choice: already initialized variables shouldn't make a difference.
use(i); // GOOD. Design choice: already initialized variables are fine.
}
{
@@ -261,7 +261,7 @@ int main()
i = 0;
scanf("%d", &i);
use(i); // BAD
use(i); // GOOD
}
{
@@ -269,7 +269,7 @@ int main()
set_by_ref(i);
scanf("%d", &i);
use(i); // BAD
use(i); // GOOD [FALSE POSITIVE]
}
{
@@ -277,7 +277,7 @@ int main()
set_by_ptr(&i);
scanf("%d", &i);
use(i); // BAD
use(i); // GOOD [FALSE POSITIVE]
}
{
@@ -299,7 +299,7 @@ int main()
int *ptr_i = &i;
scanf("%d", &i);
use(*ptr_i); // BAD: may not have written `i`
use(*ptr_i); // BAD [NOT DETECTED]: may not have written `i`
}
{
@@ -307,7 +307,7 @@ int main()
int *ptr_i = &i;
scanf("%d", ptr_i);
use(i); // BAD: may not have written `*ptr_i`
use(i); // BAD [NOT DETECTED]: may not have written `*ptr_i`
}
{
@@ -427,5 +427,5 @@ void scan_and_write() {
void scan_and_static_variable() {
static int i;
scanf("%d", &i);
use(i); // GOOD [FALSE POSITIVE]: static variables are always 0-initialized
use(i); // GOOD: static variables are always 0-initialized
}