mirror of
https://github.com/github/codeql.git
synced 2025-12-22 11:46:32 +01:00
Merge pull request #12818 from MathiasVP/dataflow-for-missing-scanf-qery
C++: Use the new dataflow library in `cpp/missing-check-scanf`
This commit is contained in:
@@ -16,160 +16,133 @@
|
||||
import cpp
|
||||
import semmle.code.cpp.commons.Scanf
|
||||
import semmle.code.cpp.controlflow.Guards
|
||||
import semmle.code.cpp.ir.dataflow.DataFlow
|
||||
import semmle.code.cpp.dataflow.new.DataFlow::DataFlow
|
||||
import semmle.code.cpp.ir.IR
|
||||
import semmle.code.cpp.ir.ValueNumbering
|
||||
|
||||
/**
|
||||
* Holds if `call` is a `scanf`-like function that may write to `output` at index `index`.
|
||||
*
|
||||
* Furthermore, `instr` is the instruction that defines the address of the `index`'th argument
|
||||
* of `call`, and `vn` is the value number of `instr.`
|
||||
*/
|
||||
predicate isSource(ScanfFunctionCall call, int index, Instruction instr, ValueNumber vn, Expr output) {
|
||||
output = call.getOutputArgument(index).getFullyConverted() and
|
||||
instr.getConvertedResultExpression() = output and
|
||||
vn.getAnInstruction() = instr
|
||||
/** Holds if `n` reaches an argument to a call to a `scanf`-like function. */
|
||||
pragma[nomagic]
|
||||
predicate revFlow0(Node n) {
|
||||
isSink(_, _, n, _)
|
||||
or
|
||||
exists(Node succ | revFlow0(succ) | localFlowStep(n, succ))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `instr` is control-flow reachable in 0 or more steps from
|
||||
* a call to a `scanf`-like function.
|
||||
* Holds if `n` represents an uninitialized stack-allocated variable, or a
|
||||
* newly (and presumed uninitialized) heap allocation.
|
||||
*/
|
||||
predicate isUninitialized(Node n) {
|
||||
exists(n.asUninitialized()) or
|
||||
n.asIndirectExpr(1) instanceof AllocationExpr
|
||||
}
|
||||
|
||||
pragma[nomagic]
|
||||
predicate fwdFlow0(Instruction instr) {
|
||||
isSource(_, _, instr, _, _)
|
||||
or
|
||||
exists(Instruction prev |
|
||||
fwdFlow0(prev) and
|
||||
prev.getASuccessor() = instr
|
||||
predicate fwdFlow0(Node n) {
|
||||
revFlow0(n) and
|
||||
(
|
||||
isUninitialized(n)
|
||||
or
|
||||
exists(Node prev |
|
||||
fwdFlow0(prev) and
|
||||
localFlowStep(prev, n)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `instr` is part of the IR translation of `access` that
|
||||
* is not an expression being deallocated, and `instr` has value
|
||||
* number `vn`.
|
||||
*/
|
||||
predicate isSink(Instruction instr, Access access, ValueNumber vn) {
|
||||
instr.getAst() = access and
|
||||
not any(DeallocationExpr dealloc).getFreedExpr() = access and
|
||||
vn.getAnInstruction() = instr
|
||||
predicate isSink(ScanfFunctionCall call, int index, Node n, Expr input) {
|
||||
input = call.getOutputArgument(index) and
|
||||
n.asIndirectExpr() = input
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `instr` is part of a path from a call to a `scanf`-like function
|
||||
* Holds if `call` is a `scanf`-like call and `output` is the `index`'th
|
||||
* argument that has not been previously initialized.
|
||||
*/
|
||||
predicate isRelevantScanfCall(ScanfFunctionCall call, int index, Expr output) {
|
||||
exists(Node n | fwdFlow0(n) and isSink(call, index, n, output))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `call` is a `scanf`-like function that may write to `output` at
|
||||
* index `index` and `n` is the dataflow node that represents the data after
|
||||
* it has been written to by `call`.
|
||||
*/
|
||||
predicate isSource(ScanfFunctionCall call, int index, Node n, Expr output) {
|
||||
isRelevantScanfCall(call, index, output) and
|
||||
output = call.getOutputArgument(index) and
|
||||
n.asDefiningArgument() = output
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `n` is reachable from an output argument of a relevant call to
|
||||
* a `scanf`-like function.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
predicate fwdFlow(Node n) {
|
||||
isSource(_, _, n, _)
|
||||
or
|
||||
exists(Node prev |
|
||||
fwdFlow(prev) and
|
||||
localFlowStep(prev, n) and
|
||||
not isSanitizerOut(prev)
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `n` should not have outgoing flow. */
|
||||
predicate isSanitizerOut(Node n) {
|
||||
// We disable flow out of sinks to reduce result duplication
|
||||
isSink(n, _)
|
||||
or
|
||||
// If the node is being passed to a function it may be
|
||||
// modified, and thus it's safe to later read the value.
|
||||
exists(n.asIndirectArgument())
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `n` is a node such that `n.asExpr() = e` and `e` is not an
|
||||
* argument of a deallocation expression.
|
||||
*/
|
||||
predicate isSink(Node n, Expr e) {
|
||||
n.asExpr() = e and
|
||||
not any(DeallocationExpr dealloc).getFreedExpr() = e
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `n` is part of a path from a call to a `scanf`-like function
|
||||
* to a use of the written variable.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
predicate revFlow0(Instruction instr) {
|
||||
fwdFlow0(instr) and
|
||||
predicate revFlow(Node n) {
|
||||
fwdFlow(n) and
|
||||
(
|
||||
isSink(instr, _, _)
|
||||
isSink(n, _)
|
||||
or
|
||||
exists(Instruction succ | revFlow0(succ) | instr.getASuccessor() = succ)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `instr` is part of a path from a call to a `scanf`-like function
|
||||
* that writes to a variable with value number `vn`, without passing through
|
||||
* redefinitions of the variable.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
private predicate fwdFlow(Instruction instr, ValueNumber vn) {
|
||||
revFlow0(instr) and
|
||||
(
|
||||
isSource(_, _, instr, vn, _)
|
||||
or
|
||||
exists(Instruction prev |
|
||||
fwdFlow(prev, vn) and
|
||||
prev.getASuccessor() = instr and
|
||||
not isBarrier(instr, vn)
|
||||
exists(Node succ |
|
||||
revFlow(succ) and
|
||||
localFlowStep(n, succ) and
|
||||
not isSanitizerOut(n)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `instr` is part of a path from a call to a `scanf`-like function
|
||||
* that writes to a variable with value number `vn`, without passing through
|
||||
* redefinitions of the variable.
|
||||
*
|
||||
* Note: This predicate only holds for the `(intr, vn)` pairs that are also
|
||||
* control-flow reachable from an argument to a `scanf`-like function call.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
predicate revFlow(Instruction instr, ValueNumber vn) {
|
||||
fwdFlow(instr, pragma[only_bind_out](vn)) and
|
||||
(
|
||||
isSink(instr, _, vn)
|
||||
or
|
||||
exists(Instruction succ | revFlow(succ, vn) |
|
||||
instr.getASuccessor() = succ and
|
||||
not isBarrier(succ, vn)
|
||||
)
|
||||
)
|
||||
/** A local flow step, restricted to relevant dataflow nodes. */
|
||||
private predicate step(Node n1, Node n2) {
|
||||
revFlow(n1) and
|
||||
revFlow(n2) and
|
||||
localFlowStep(n1, n2)
|
||||
}
|
||||
|
||||
/**
|
||||
* A type that bundles together a reachable instruction with the appropriate
|
||||
* value number (i.e., the value number that's transferred from the source
|
||||
* to the sink).
|
||||
*/
|
||||
newtype TNode = MkNode(Instruction instr, ValueNumber vn) { revFlow(instr, vn) }
|
||||
|
||||
class Node extends MkNode {
|
||||
ValueNumber vn;
|
||||
Instruction instr;
|
||||
|
||||
Node() { this = MkNode(instr, vn) }
|
||||
|
||||
final string toString() { result = instr.toString() }
|
||||
|
||||
final Node getASuccessor() { result = MkNode(pragma[only_bind_out](instr.getASuccessor()), vn) }
|
||||
|
||||
final Location getLocation() { result = instr.getLocation() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `instr` is an instruction with value number `vn` that is
|
||||
* used in a store operation, or is overwritten by another call to
|
||||
* a `scanf`-like function.
|
||||
*/
|
||||
private predicate isBarrier(Instruction instr, ValueNumber vn) {
|
||||
// We only need to compute barriers for instructions that we
|
||||
// managed to hit during the initial flow stage.
|
||||
revFlow0(pragma[only_bind_into](instr)) and
|
||||
valueNumber(instr) = vn and
|
||||
exists(Expr e | instr.getAst() = e |
|
||||
instr = any(StoreInstruction s).getDestinationAddress()
|
||||
or
|
||||
isSource(_, _, _, _, [e, e.getParent().(AddressOfExpr)])
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `n1` steps to `n2` in a single step. */
|
||||
predicate isSuccessor(Node n1, Node n2) { n1.getASuccessor() = n2 }
|
||||
|
||||
predicate hasFlow(Node n1, Node n2) = fastTC(isSuccessor/2)(n1, n2)
|
||||
|
||||
Node getNode(Instruction instr, ValueNumber vn) { result = MkNode(instr, vn) }
|
||||
predicate hasFlow(Node n1, Node n2) = fastTC(step/2)(n1, n2)
|
||||
|
||||
/**
|
||||
* Holds if `source` is the `index`'th argument to the `scanf`-like call `call`, and `sink` is
|
||||
* an instruction that is part of the translation of `access` which is a transitive
|
||||
* control-flow successor of `call`.
|
||||
*
|
||||
* Furthermore, `source` and `sink` have identical global value numbers.
|
||||
* a dataflow node that represents the expression `e`.
|
||||
*/
|
||||
predicate hasFlow(
|
||||
Instruction source, ScanfFunctionCall call, int index, Instruction sink, Access access
|
||||
) {
|
||||
exists(ValueNumber vn |
|
||||
isSource(call, index, source, vn, _) and
|
||||
hasFlow(getNode(source, pragma[only_bind_into](vn)), getNode(sink, pragma[only_bind_into](vn))) and
|
||||
isSink(sink, access, vn)
|
||||
)
|
||||
predicate hasFlow(Node source, ScanfFunctionCall call, int index, Node sink, Expr e) {
|
||||
isSource(call, index, source, _) and
|
||||
hasFlow(source, sink) and
|
||||
isSink(sink, e)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -177,7 +150,7 @@ predicate hasFlow(
|
||||
* success in writing the output argument at index `index`.
|
||||
*/
|
||||
int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
|
||||
isSource(call, index, _, _, _) and
|
||||
isSource(call, index, _, _) and
|
||||
result =
|
||||
index + 1 -
|
||||
count(ScanfFormatLiteral f, int n |
|
||||
@@ -191,7 +164,7 @@ int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
|
||||
* Holds the access to `e` isn't guarded by a check that ensures that `call` returned
|
||||
* at least `minGuard`.
|
||||
*/
|
||||
predicate hasNonGuardedAccess(ScanfFunctionCall call, Access e, int minGuard) {
|
||||
predicate hasNonGuardedAccess(ScanfFunctionCall call, Expr e, int minGuard) {
|
||||
exists(int index |
|
||||
hasFlow(_, call, index, _, e) and
|
||||
minGuard = getMinimumGuardConstant(call, index)
|
||||
@@ -211,7 +184,7 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
|
||||
exists(GuardCondition g, Expr left, Expr right |
|
||||
right = g.getAChild() and
|
||||
value = left.getValue().toInt() and
|
||||
DataFlow::localExprFlow(call, right)
|
||||
localExprFlow(call, right)
|
||||
|
|
||||
g.ensuresEq(left, right, 0, result, true) and op = "=="
|
||||
or
|
||||
@@ -221,9 +194,9 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
|
||||
)
|
||||
}
|
||||
|
||||
from ScanfFunctionCall call, Access access, int minGuard
|
||||
where hasNonGuardedAccess(call, access, minGuard)
|
||||
select access,
|
||||
from ScanfFunctionCall call, Expr e, int minGuard
|
||||
where hasNonGuardedAccess(call, e, minGuard)
|
||||
select e,
|
||||
"This variable is read, but may not have been written. " +
|
||||
"It should be guarded by a check that the $@ returns at least " + minGuard + ".", call,
|
||||
call.toString()
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
| test.cpp:35:7:35:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:34:3:34:7 | call to scanf | call to scanf |
|
||||
| test.cpp:51:7:51:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:50:3:50:7 | call to scanf | call to scanf |
|
||||
| test.cpp:68:7:68:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:67:3:67:7 | call to scanf | call to scanf |
|
||||
| test.cpp:80:7:80:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:79:3:79:7 | call to scanf | call to scanf |
|
||||
| test.cpp:90:8:90:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
|
||||
| test.cpp:98:8:98:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
|
||||
| test.cpp:90:7:90:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
|
||||
| test.cpp:98:7:98:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
|
||||
| test.cpp:108:7:108:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:107:3:107:8 | call to fscanf | call to fscanf |
|
||||
| test.cpp:115:7:115:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:114:3:114:8 | call to sscanf | call to sscanf |
|
||||
| test.cpp:164:8:164:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:162:7:162:11 | call to scanf | call to scanf |
|
||||
@@ -12,13 +11,9 @@
|
||||
| test.cpp:224:8:224:8 | j | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:221:7:221:11 | call to scanf | call to scanf |
|
||||
| test.cpp:248:9:248:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:246:25:246:29 | call to scanf | call to scanf |
|
||||
| test.cpp:252:9:252:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:250:14:250:18 | call to scanf | call to scanf |
|
||||
| test.cpp:264:7:264:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:263:3:263:7 | call to scanf | call to scanf |
|
||||
| test.cpp:272:7:272:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:271:3:271:7 | call to scanf | call to scanf |
|
||||
| test.cpp:280:7:280:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:279:3:279:7 | call to scanf | call to scanf |
|
||||
| test.cpp:292:7:292:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:291:3:291:7 | call to scanf | call to scanf |
|
||||
| test.cpp:302:8:302:12 | ptr_i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:301:3:301:7 | call to scanf | call to scanf |
|
||||
| test.cpp:310:7:310:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:309:3:309:7 | call to scanf | call to scanf |
|
||||
| test.cpp:404:25:404:25 | u | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:403:6:403:11 | call to sscanf | call to sscanf |
|
||||
| test.cpp:416:7:416:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:413:7:413:11 | call to scanf | call to scanf |
|
||||
| test.cpp:423:7:423:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:420:7:420:11 | call to scanf | call to scanf |
|
||||
| test.cpp:430:6:430:6 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:429:2:429:6 | call to scanf | call to scanf |
|
||||
|
||||
@@ -48,7 +48,7 @@ int main()
|
||||
int i = 0;
|
||||
|
||||
scanf("%d", &i);
|
||||
use(i); // BAD. Design choice: already initialized variables shouldn't make a difference.
|
||||
use(i); // GOOD. Design choice: already initialized variables are fine.
|
||||
}
|
||||
|
||||
{
|
||||
@@ -261,7 +261,7 @@ int main()
|
||||
i = 0;
|
||||
|
||||
scanf("%d", &i);
|
||||
use(i); // BAD
|
||||
use(i); // GOOD
|
||||
}
|
||||
|
||||
{
|
||||
@@ -269,7 +269,7 @@ int main()
|
||||
|
||||
set_by_ref(i);
|
||||
scanf("%d", &i);
|
||||
use(i); // BAD
|
||||
use(i); // GOOD [FALSE POSITIVE]
|
||||
}
|
||||
|
||||
{
|
||||
@@ -277,7 +277,7 @@ int main()
|
||||
|
||||
set_by_ptr(&i);
|
||||
scanf("%d", &i);
|
||||
use(i); // BAD
|
||||
use(i); // GOOD [FALSE POSITIVE]
|
||||
}
|
||||
|
||||
{
|
||||
@@ -299,7 +299,7 @@ int main()
|
||||
int *ptr_i = &i;
|
||||
|
||||
scanf("%d", &i);
|
||||
use(*ptr_i); // BAD: may not have written `i`
|
||||
use(*ptr_i); // BAD [NOT DETECTED]: may not have written `i`
|
||||
}
|
||||
|
||||
{
|
||||
@@ -307,7 +307,7 @@ int main()
|
||||
int *ptr_i = &i;
|
||||
|
||||
scanf("%d", ptr_i);
|
||||
use(i); // BAD: may not have written `*ptr_i`
|
||||
use(i); // BAD [NOT DETECTED]: may not have written `*ptr_i`
|
||||
}
|
||||
|
||||
{
|
||||
@@ -427,5 +427,5 @@ void scan_and_write() {
|
||||
void scan_and_static_variable() {
|
||||
static int i;
|
||||
scanf("%d", &i);
|
||||
use(i); // GOOD [FALSE POSITIVE]: static variables are always 0-initialized
|
||||
use(i); // GOOD: static variables are always 0-initialized
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user