Merge pull request #15504 from MathiasVP/block-summary-flow-out-of-strdup-and-friends

C++: Block summary flow through `strdup` and friends
This commit is contained in:
Jeroen Ketema
2024-02-02 14:47:05 +01:00
committed by GitHub
9 changed files with 234 additions and 17 deletions

View File

@@ -7,6 +7,9 @@ private import SsaInternals as Ssa
private import DataFlowImplCommon as DataFlowImplCommon
private import codeql.util.Unit
private import Node0ToString
private import ModelUtil
private import semmle.code.cpp.models.interfaces.FunctionInputsAndOutputs as IO
private import semmle.code.cpp.models.interfaces.DataFlow as DF
cached
private module Cached {
@@ -1178,6 +1181,19 @@ private int countNumberOfBranchesUsingParameter(SwitchInstruction switch, Parame
)
}
pragma[nomagic]
private predicate isInputOutput(
DF::DataFlowFunction target, Node node1, Node node2, IO::FunctionInput input,
IO::FunctionOutput output
) {
exists(CallInstruction call |
node1 = callInput(call, input) and
node2 = callOutput(call, output) and
call.getStaticCallTarget() = target and
target.hasDataFlow(input, output)
)
}
/**
* Holds if the data-flow step from `node1` to `node2` can be used to
* determine where side-effects may return from a callable.
@@ -1189,6 +1205,11 @@ private int countNumberOfBranchesUsingParameter(SwitchInstruction switch, Parame
* int x = *p;
* ```
* does not preserve the identity of `*p`.
*
* Similarly, a function that copies the contents of a string into a new location
* does not also preserve the identity. For example, `strdup(p)` does not
* preserve the identity of `*p` (since it allocates new storage and copies
* the string into the new storage).
*/
bindingset[node1, node2]
pragma[inline_late]
@@ -1225,7 +1246,16 @@ predicate validParameterAliasStep(Node node1, Node node2) {
not exists(Operand operand |
node1.asOperand() = operand and
node2.asInstruction().(StoreInstruction).getSourceValueOperand() = operand
) and
(
// Either this is not a modeled flow.
not isInputOutput(_, node1, node2, _, _)
or
exists(DF::DataFlowFunction target, IO::FunctionInput input, IO::FunctionOutput output |
// Or it is a modeled flow and there's `*input` to `*output` flow
isInputOutput(target, node1, node2, input.getIndirectionInput(), output.getIndirectionOutput()) and
// and in that case there should also be `input` to `output` flow
target.hasDataFlow(input, output)
)
)
// TODO: Also block flow through models that don't preserve identity such
// as `strdup`.
}

View File

@@ -23,6 +23,14 @@ private newtype TFunctionInput =
class FunctionInput extends TFunctionInput {
abstract string toString();
/**
* INTERNAL: Do not use.
*
* Gets the `FunctionInput` that represents the indirection of this input,
* if any.
*/
FunctionInput getIndirectionInput() { none() }
/**
* Holds if this is the input value of the parameter with index `index`.
*
@@ -226,6 +234,8 @@ class InParameter extends FunctionInput, TInParameter {
ParameterIndex getIndex() { result = index }
override predicate isParameter(ParameterIndex i) { i = index }
override FunctionInput getIndirectionInput() { result = TInParameterDeref(index, 1) }
}
/**
@@ -257,6 +267,10 @@ class InParameterDeref extends FunctionInput, TInParameterDeref {
override predicate isParameterDeref(ParameterIndex i, int indirection) {
i = index and indirectionIndex = indirection
}
override FunctionInput getIndirectionInput() {
result = TInParameterDeref(index, indirectionIndex + 1)
}
}
/**
@@ -275,6 +289,8 @@ class InQualifierObject extends FunctionInput, TInQualifierObject {
override string toString() { result = "InQualifierObject" }
override predicate isQualifierObject() { any() }
override FunctionInput getIndirectionInput() { none() }
}
/**
@@ -293,6 +309,8 @@ class InQualifierAddress extends FunctionInput, TInQualifierAddress {
override string toString() { result = "InQualifierAddress" }
override predicate isQualifierAddress() { any() }
override FunctionInput getIndirectionInput() { result = TInQualifierObject() }
}
/**
@@ -321,6 +339,8 @@ class InReturnValueDeref extends FunctionInput, TInReturnValueDeref {
override string toString() { result = "InReturnValueDeref" }
override predicate isReturnValueDeref() { any() }
override FunctionInput getIndirectionInput() { none() }
}
private newtype TFunctionOutput =
@@ -340,6 +360,14 @@ private newtype TFunctionOutput =
class FunctionOutput extends TFunctionOutput {
abstract string toString();
/**
* INTERNAL: Do not use.
*
* Gets the `FunctionOutput` that represents the indirection of this output,
* if any.
*/
FunctionOutput getIndirectionOutput() { none() }
/**
* Holds if this is the output value pointed to by a pointer parameter to a function, or the
* output value referred to by a reference parameter to a function, where the parameter has
@@ -512,6 +540,10 @@ class OutParameterDeref extends FunctionOutput, TOutParameterDeref {
override predicate isParameterDeref(ParameterIndex i, int ind) {
i = index and ind = indirectionIndex
}
override FunctionOutput getIndirectionOutput() {
result = TOutParameterDeref(index, indirectionIndex + 1)
}
}
/**
@@ -530,6 +562,8 @@ class OutQualifierObject extends FunctionOutput, TOutQualifierObject {
override string toString() { result = "OutQualifierObject" }
override predicate isQualifierObject() { any() }
override FunctionOutput getIndirectionOutput() { none() }
}
/**
@@ -552,6 +586,8 @@ class OutReturnValue extends FunctionOutput, TOutReturnValue {
override string toString() { result = "OutReturnValue" }
override predicate isReturnValue() { any() }
override FunctionOutput getIndirectionOutput() { result = TOutReturnValueDeref(1) }
}
/**
@@ -571,11 +607,19 @@ class OutReturnValue extends FunctionOutput, TOutReturnValue {
* of `getInt()` is neither a pointer nor a reference.
*/
class OutReturnValueDeref extends FunctionOutput, TOutReturnValueDeref {
int indirectionIndex;
OutReturnValueDeref() { this = TOutReturnValueDeref(indirectionIndex) }
override string toString() { result = "OutReturnValueDeref" }
override predicate isReturnValueDeref() { any() }
override predicate isReturnValueDeref(int indirectionIndex) {
this = TOutReturnValueDeref(indirectionIndex)
override predicate isReturnValueDeref(int indirectionIndex_) {
indirectionIndex = indirectionIndex_
}
override FunctionOutput getIndirectionOutput() {
result = TOutReturnValueDeref(indirectionIndex + 1)
}
}

View File

@@ -49,6 +49,53 @@ module IRTest {
import semmle.code.cpp.ir.dataflow.DataFlow
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.controlflow.IRGuards
private import semmle.code.cpp.models.interfaces.DataFlow
boolean isOne(string s) {
s = "1" and result = true
or
s = "0" and result = false
}
/**
* A model of a test function called `strdup_ptr_xyz` where `x, y, z in {0, 1}`.
* `x` is 1 if there's flow from the argument to the function return,
* `y` is 1 if there's flow from the first indirection of the argument to
* the first indirection of the function return, and
* `z` is 1 if there's flow from the second indirection of the argument to
* the second indirection of the function return.
*/
class StrDupPtr extends DataFlowFunction {
boolean argToReturnFlow;
boolean argIndToReturnInd;
boolean argIndInToReturnIndInd;
StrDupPtr() {
exists(string r |
r = "strdup_ptr_([01])([01])([01])" and
argToReturnFlow = isOne(this.getName().regexpCapture(r, 1)) and
argIndToReturnInd = isOne(this.getName().regexpCapture(r, 2)) and
argIndInToReturnIndInd = isOne(this.getName().regexpCapture(r, 3))
)
}
/**
* Flow from `**ptr` to `**return`
*/
override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
argToReturnFlow = true and
input.isParameter(0) and
output.isReturnValue()
or
argIndToReturnInd = true and
input.isParameterDeref(0, 1) and
output.isReturnValueDeref(1)
or
argIndInToReturnIndInd = true and
input.isParameterDeref(0, 2) and
output.isReturnValueDeref(2)
}
}
/**
* A `BarrierGuard` that stops flow to all occurrences of `x` within statement

View File

@@ -24,6 +24,7 @@ postIsInSameCallable
reverseRead
argHasPostUpdate
| flowOut.cpp:55:14:55:16 | * ... | ArgumentNode is missing PostUpdateNode. |
| flowOut.cpp:185:8:185:9 | * ... | ArgumentNode is missing PostUpdateNode. |
| lambdas.cpp:18:7:18:7 | a | ArgumentNode is missing PostUpdateNode. |
| lambdas.cpp:25:2:25:2 | b | ArgumentNode is missing PostUpdateNode. |
| lambdas.cpp:32:2:32:2 | c | ArgumentNode is missing PostUpdateNode. |
@@ -64,6 +65,8 @@ postWithInFlow
| flowOut.cpp:90:3:90:4 | * ... [post update] | PostUpdateNode should not be the target of local flow. |
| flowOut.cpp:90:4:90:4 | q [inner post update] | PostUpdateNode should not be the target of local flow. |
| flowOut.cpp:101:14:101:14 | p [inner post update] | PostUpdateNode should not be the target of local flow. |
| flowOut.cpp:168:3:168:10 | * ... [post update] | PostUpdateNode should not be the target of local flow. |
| flowOut.cpp:168:4:168:10 | toTaint [inner post update] | PostUpdateNode should not be the target of local flow. |
| globals.cpp:13:5:13:19 | flowTestGlobal1 [post update] | PostUpdateNode should not be the target of local flow. |
| globals.cpp:23:5:23:19 | flowTestGlobal2 [post update] | PostUpdateNode should not be the target of local flow. |
| lambdas.cpp:23:3:23:14 | v [post update] | PostUpdateNode should not be the target of local flow. |

View File

@@ -20,6 +20,7 @@ reverseRead
argHasPostUpdate
postWithInFlow
| flowOut.cpp:84:3:84:14 | *access to array | PostUpdateNode should not be the target of local flow. |
| flowOut.cpp:111:28:111:31 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
| test.cpp:384:10:384:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
| test.cpp:391:10:391:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
| test.cpp:400:10:400:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. |

View File

@@ -70,7 +70,7 @@ void modify_copy_via_strdup(char* p) { // $ ast-def=p
void test_modify_copy_via_strdup(char* p) { // $ ast-def=p
modify_copy_via_strdup(p);
sink(*p); // $ SPURIOUS: ir
sink(*p); // clean
}
int* deref(int** p) { // $ ast-def=p
@@ -101,3 +101,103 @@ void test2() {
addtaint2(&p);
sink(*p); // $ ir MISSING: ast
}
using size_t = decltype(sizeof(int));
void* memcpy(void* dest, const void* src, size_t);
void modify_copy_via_memcpy(char* p) { // $ ast-def=p
char* dest;
char* p2 = (char*)memcpy(dest, p, 10);
source_ref(p2);
}
void test_modify_copy_via_memcpy(char* p) { // $ ast-def=p
modify_copy_via_memcpy(p);
sink(*p); // clean
}
// These functions from any real database. We add a dataflow model of
// them as part of dataflow library testing.
// `r = strdup_ptr_001`(p) has flow from **p to **r
// `r = strdup_ptr_011`(p) has flow from *p to *r, and **p to **r
// `r = strdup_ptr_111`(p) has flow from p to r, *p to *r, **p to **r
char** strdup_ptr_001(const char** p);
char** strdup_ptr_011(const char** p);
char** strdup_ptr_111(const char** p);
void source_ref_ref(char** toTaint) { // $ ast-def=toTaint ir-def=*toTaint ir-def=**toTaint
// source -> **toTaint
**toTaint = source(true);
}
// This function copies the value of **p into a new location **p2 and then
// taints **p. Thus, **p does not contain tainted data after returning from
// this function.
void modify_copy_via_strdup_ptr_001(char** p) { // $ ast-def=p
// **p -> **p2
char** p2 = strdup_ptr_001(p);
// source -> **p2
source_ref_ref(p2);
}
void test_modify_copy_via_strdup_001(char** p) { // $ ast-def=p
modify_copy_via_strdup_ptr_001(p);
sink(**p); // clean
}
// This function copies the value of *p into a new location *p2 and then
// taints **p2. Thus, **p contains tainted data after returning from this
// function.
void modify_copy_via_strdup_ptr_011(char** p) { // $ ast-def=p
// **p -> **p2 and *p -> *p2
char** p2 = strdup_ptr_011(p);
// source -> **p2
source_ref_ref(p2);
}
void test_modify_copy_via_strdup_011(char** p) { // $ ast-def=p
modify_copy_via_strdup_ptr_011(p);
sink(**p); // $ ir MISSING: ast
}
char* source(int);
void source_ref_2(char** toTaint) { // $ ast-def=toTaint ir-def=*toTaint ir-def=**toTaint
// source -> *toTaint
*toTaint = source(42);
}
// This function copies the value of p into a new location p2 and then
// taints *p2. Thus, *p contains tainted data after returning from this
// function.
void modify_copy_via_strdup_ptr_111_taint_ind(char** p) { // $ ast-def=p
// **p -> **p2, *p -> *p2, and p -> p2
char** p2 = strdup_ptr_111(p);
// source -> *p2
source_ref_2(p2);
}
void sink(char*);
void test_modify_copy_via_strdup_111_taint_ind(char** p) { // $ ast-def=p
modify_copy_via_strdup_ptr_111_taint_ind(p);
sink(*p); // $ ir MISSING: ast
}
// This function copies the value of p into a new location p2 and then
// taints **p2. Thus, **p contains tainted data after returning from this
// function.
void modify_copy_via_strdup_ptr_111_taint_ind_ind(char** p) { // $ ast-def=p
// **p -> **p2, *p -> *p2, and p -> p2
char** p2 = strdup_ptr_111(p);
// source -> **p2
source_ref_ref(p2);
}
void sink(char*);
void test_modify_copy_via_strdup_111_taint_ind_ind(char** p) { // $ ast-def=p
modify_copy_via_strdup_ptr_111_taint_ind_ind(p);
sink(**p); // $ ir MISSING: ast
}

View File

@@ -174,9 +174,11 @@ irFlow
| dispatch.cpp:144:8:144:13 | call to source | dispatch.cpp:96:8:96:8 | x |
| flowOut.cpp:5:16:5:21 | call to source | flowOut.cpp:31:9:31:9 | x |
| flowOut.cpp:5:16:5:21 | call to source | flowOut.cpp:61:8:61:11 | access to array |
| flowOut.cpp:8:16:8:23 | call to source | flowOut.cpp:73:8:73:9 | * ... |
| flowOut.cpp:84:18:84:23 | call to source | flowOut.cpp:85:8:85:9 | * ... |
| flowOut.cpp:90:8:90:13 | call to source | flowOut.cpp:102:8:102:9 | * ... |
| flowOut.cpp:131:15:131:20 | call to source | flowOut.cpp:161:8:161:10 | * ... |
| flowOut.cpp:131:15:131:20 | call to source | flowOut.cpp:202:8:202:10 | * ... |
| flowOut.cpp:168:14:168:19 | call to source | flowOut.cpp:185:8:185:9 | * ... |
| globals.cpp:5:17:5:22 | call to source | globals.cpp:6:10:6:14 | local |
| globals.cpp:13:23:13:28 | call to source | globals.cpp:12:10:12:24 | flowTestGlobal1 |
| globals.cpp:23:23:23:28 | call to source | globals.cpp:19:10:19:24 | flowTestGlobal2 |

View File

@@ -2,6 +2,7 @@
| flowOut.cpp:44:7:44:7 | x | flowOut.cpp:46:8:46:8 | x |
| flowOut.cpp:59:7:59:7 | x | flowOut.cpp:60:18:60:18 | x |
| flowOut.cpp:59:7:59:7 | x | flowOut.cpp:61:8:61:8 | x |
| flowOut.cpp:110:9:110:12 | dest | flowOut.cpp:111:28:111:31 | dest |
| ref.cpp:53:9:53:10 | x1 | ref.cpp:55:19:55:20 | x1 |
| ref.cpp:53:9:53:10 | x1 | ref.cpp:56:10:56:11 | x1 |
| ref.cpp:53:13:53:14 | x2 | ref.cpp:58:15:58:16 | x2 |

View File

@@ -3,17 +3,11 @@ edges
| main.cpp:7:33:7:36 | **argv | overflowdestination.cpp:23:45:23:48 | **argv |
| overflowdestination.cpp:23:45:23:48 | **argv | overflowdestination.cpp:30:17:30:20 | *arg1 |
| overflowdestination.cpp:43:8:43:10 | fgets output argument | overflowdestination.cpp:46:15:46:17 | *src |
| overflowdestination.cpp:50:52:50:54 | *src | overflowdestination.cpp:53:9:53:12 | memcpy output argument |
| overflowdestination.cpp:50:52:50:54 | *src | overflowdestination.cpp:53:15:53:17 | *src |
| overflowdestination.cpp:50:52:50:54 | *src | overflowdestination.cpp:54:9:54:12 | memcpy output argument |
| overflowdestination.cpp:53:9:53:12 | memcpy output argument | overflowdestination.cpp:54:9:54:12 | memcpy output argument |
| overflowdestination.cpp:54:9:54:12 | memcpy output argument | overflowdestination.cpp:54:9:54:12 | memcpy output argument |
| overflowdestination.cpp:57:52:57:54 | *src | overflowdestination.cpp:64:16:64:19 | *src2 |
| overflowdestination.cpp:73:8:73:10 | fgets output argument | overflowdestination.cpp:75:30:75:32 | *src |
| overflowdestination.cpp:73:8:73:10 | fgets output argument | overflowdestination.cpp:76:30:76:32 | *src |
| overflowdestination.cpp:75:30:75:32 | *src | overflowdestination.cpp:50:52:50:54 | *src |
| overflowdestination.cpp:75:30:75:32 | *src | overflowdestination.cpp:75:30:75:32 | overflowdest_test2 output argument |
| overflowdestination.cpp:75:30:75:32 | overflowdest_test2 output argument | overflowdestination.cpp:76:30:76:32 | *src |
| overflowdestination.cpp:76:30:76:32 | *src | overflowdestination.cpp:57:52:57:54 | *src |
nodes
| main.cpp:6:27:6:30 | **argv | semmle.label | **argv |
@@ -23,18 +17,13 @@ nodes
| overflowdestination.cpp:43:8:43:10 | fgets output argument | semmle.label | fgets output argument |
| overflowdestination.cpp:46:15:46:17 | *src | semmle.label | *src |
| overflowdestination.cpp:50:52:50:54 | *src | semmle.label | *src |
| overflowdestination.cpp:53:9:53:12 | memcpy output argument | semmle.label | memcpy output argument |
| overflowdestination.cpp:53:15:53:17 | *src | semmle.label | *src |
| overflowdestination.cpp:54:9:54:12 | memcpy output argument | semmle.label | memcpy output argument |
| overflowdestination.cpp:57:52:57:54 | *src | semmle.label | *src |
| overflowdestination.cpp:64:16:64:19 | *src2 | semmle.label | *src2 |
| overflowdestination.cpp:73:8:73:10 | fgets output argument | semmle.label | fgets output argument |
| overflowdestination.cpp:75:30:75:32 | *src | semmle.label | *src |
| overflowdestination.cpp:75:30:75:32 | overflowdest_test2 output argument | semmle.label | overflowdest_test2 output argument |
| overflowdestination.cpp:76:30:76:32 | *src | semmle.label | *src |
subpaths
| overflowdestination.cpp:75:30:75:32 | *src | overflowdestination.cpp:50:52:50:54 | *src | overflowdestination.cpp:53:9:53:12 | memcpy output argument | overflowdestination.cpp:75:30:75:32 | overflowdest_test2 output argument |
| overflowdestination.cpp:75:30:75:32 | *src | overflowdestination.cpp:50:52:50:54 | *src | overflowdestination.cpp:54:9:54:12 | memcpy output argument | overflowdestination.cpp:75:30:75:32 | overflowdest_test2 output argument |
#select
| overflowdestination.cpp:30:2:30:8 | call to strncpy | main.cpp:6:27:6:30 | **argv | overflowdestination.cpp:30:17:30:20 | *arg1 | To avoid overflow, this operation should be bounded by destination-buffer size, not source-buffer size. |
| overflowdestination.cpp:46:2:46:7 | call to memcpy | overflowdestination.cpp:43:8:43:10 | fgets output argument | overflowdestination.cpp:46:15:46:17 | *src | To avoid overflow, this operation should be bounded by destination-buffer size, not source-buffer size. |