Merge pull request #14619 from MathiasVP/fix-strtol-model

C++: Fix `strtol` model
This commit is contained in:
Mathias Vorreiter Pedersen
2023-10-30 09:41:23 +00:00
committed by GitHub
10 changed files with 222 additions and 3 deletions

View File

@@ -19,6 +19,7 @@ private import implementations.Strtok
private import implementations.Strset
private import implementations.Strcrement
private import implementations.Strnextc
private import implementations.Strtol
private import implementations.StdContainer
private import implementations.StdPair
private import implementations.StdMap

View File

@@ -13,7 +13,7 @@ private class PureStrFunction extends AliasFunction, ArrayFunction, TaintFunctio
PureStrFunction() {
this.hasGlobalOrStdOrBslName([
atoi(), "strcasestr", "strchnul", "strchr", "strchrnul", "strstr", "strpbrk", "strrchr",
"strspn", strtol(), strrev(), strcmp(), strlwr(), strupr()
"strspn", strrev(), strcmp(), strlwr(), strupr()
])
}
@@ -70,8 +70,6 @@ private class PureStrFunction extends AliasFunction, ArrayFunction, TaintFunctio
private string atoi() { result = ["atof", "atoi", "atol", "atoll"] }
private string strtol() { result = ["strtod", "strtof", "strtol", "strtoll", "strtoq", "strtoul"] }
private string strlwr() {
result = ["_strlwr", "_wcslwr", "_mbslwr", "_strlwr_l", "_wcslwr_l", "_mbslwr_l"]
}

View File

@@ -0,0 +1,54 @@
import semmle.code.cpp.models.interfaces.ArrayFunction
import semmle.code.cpp.models.interfaces.Taint
import semmle.code.cpp.models.interfaces.Alias
import semmle.code.cpp.models.interfaces.SideEffect
private string strtol() { result = ["strtod", "strtof", "strtol", "strtoll", "strtoq", "strtoul"] }
/**
* The standard function `strtol` and its assorted variants
*/
private class Strtol extends AliasFunction, ArrayFunction, TaintFunction, SideEffectFunction {
Strtol() { this.hasGlobalOrStdOrBslName(strtol()) }
override predicate hasArrayInput(int bufParam) {
// All the functions given by `strtol()` takes a `const char*` input as the first parameter
bufParam = 0
}
override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 0 }
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
(
input.isParameter(0)
or
input.isParameterDeref(0)
) and
output.isReturnValue()
or
input.isParameter(0) and
output.isParameterDeref(1)
}
override predicate parameterNeverEscapes(int i) {
// Parameter 0 does escape into parameter 1.
i = 1
}
override predicate parameterEscapesOnlyViaReturn(int i) { none() }
override predicate parameterIsAlwaysReturned(int i) { none() }
override predicate hasOnlySpecificReadSideEffects() { any() }
override predicate hasOnlySpecificWriteSideEffects() { any() }
override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) {
i = 0 and
buffer = true
}
override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) {
i = 1 and buffer = false and mustWrite = false
}
}

View File

@@ -6629,6 +6629,20 @@ WARNING: Module TaintTracking has been deprecated and may be removed in future (
| taint.cpp:720:27:720:32 | source | taint.cpp:720:20:720:25 | call to strtok | TAINT |
| taint.cpp:721:8:721:16 | tokenized | taint.cpp:721:7:721:16 | * ... | TAINT |
| taint.cpp:722:8:722:12 | delim | taint.cpp:722:7:722:12 | * ... | TAINT |
| taint.cpp:727:24:727:29 | source | taint.cpp:727:24:727:29 | source | |
| taint.cpp:727:24:727:29 | source | taint.cpp:729:18:729:23 | source | |
| taint.cpp:728:17:728:23 | 0 | taint.cpp:729:27:729:32 | endptr | |
| taint.cpp:728:17:728:23 | 0 | taint.cpp:731:7:731:12 | endptr | |
| taint.cpp:728:17:728:23 | 0 | taint.cpp:732:8:732:13 | endptr | |
| taint.cpp:729:11:729:16 | call to strtol | taint.cpp:730:7:730:7 | l | |
| taint.cpp:729:18:729:23 | source | taint.cpp:729:11:729:16 | call to strtol | TAINT |
| taint.cpp:729:18:729:23 | source | taint.cpp:729:26:729:32 | ref arg & ... | TAINT |
| taint.cpp:729:26:729:32 | ref arg & ... | taint.cpp:729:27:729:32 | endptr [inner post update] | |
| taint.cpp:729:26:729:32 | ref arg & ... | taint.cpp:731:7:731:12 | endptr | |
| taint.cpp:729:26:729:32 | ref arg & ... | taint.cpp:732:8:732:13 | endptr | |
| taint.cpp:729:27:729:32 | endptr | taint.cpp:729:26:729:32 | & ... | |
| taint.cpp:731:7:731:12 | ref arg endptr | taint.cpp:732:8:732:13 | endptr | |
| taint.cpp:732:8:732:13 | endptr | taint.cpp:732:7:732:13 | * ... | TAINT |
| vector.cpp:16:43:16:49 | source1 | vector.cpp:17:26:17:32 | source1 | |
| vector.cpp:16:43:16:49 | source1 | vector.cpp:31:38:31:44 | source1 | |
| vector.cpp:17:21:17:33 | call to vector | vector.cpp:19:14:19:14 | v | |

View File

@@ -720,4 +720,14 @@ void test_strtok_indirect() {
char* tokenized = strtok(source, delim);
sink(*tokenized); // $ ir MISSING: ast
sink(*delim);
}
long int strtol(const char*, char**, int);
void test_strtol(char *source) {
char* endptr = nullptr;
long l = strtol(source, &endptr, 10);
sink(l); // $ ast,ir
sink(endptr); // $ ast,ir
sink(*endptr); // $ ast,ir
}

View File

@@ -15938,6 +15938,44 @@ ir.cpp:
# 2104| Type = [CTypedefType,Size_t] size_t
# 2104| ValueCategory = prvalue(load)
# 2105| getStmt(6): [ReturnStmt] return ...
# 2107| [TopLevelFunction] double strtod(char const*, char**)
# 2107| <params>:
# 2107| getParameter(0): [Parameter] str
# 2107| Type = [PointerType] const char *
# 2107| getParameter(1): [Parameter] endptr
# 2107| Type = [PointerType] char **
# 2109| [TopLevelFunction] char* test_strtod(char*)
# 2109| <params>:
# 2109| getParameter(0): [Parameter] s
# 2109| Type = [CharPointerType] char *
# 2109| getEntryPoint(): [BlockStmt] { ... }
# 2110| getStmt(0): [DeclStmt] declaration
# 2110| getDeclarationEntry(0): [VariableDeclarationEntry] definition of end
# 2110| Type = [CharPointerType] char *
# 2111| getStmt(1): [DeclStmt] declaration
# 2111| getDeclarationEntry(0): [VariableDeclarationEntry] definition of d
# 2111| Type = [DoubleType] double
# 2111| getVariable().getInitializer(): [Initializer] initializer for d
# 2111| getExpr(): [FunctionCall] call to strtod
# 2111| Type = [DoubleType] double
# 2111| ValueCategory = prvalue
# 2111| getArgument(0): [VariableAccess] s
# 2111| Type = [CharPointerType] char *
# 2111| ValueCategory = prvalue(load)
# 2111| getArgument(1): [AddressOfExpr] & ...
# 2111| Type = [PointerType] char **
# 2111| ValueCategory = prvalue
# 2111| getOperand(): [VariableAccess] end
# 2111| Type = [CharPointerType] char *
# 2111| ValueCategory = lvalue
# 2111| getArgument(0).getFullyConverted(): [CStyleCast] (const char *)...
# 2111| Conversion = [PointerConversion] pointer conversion
# 2111| Type = [PointerType] const char *
# 2111| ValueCategory = prvalue
# 2112| getStmt(2): [ReturnStmt] return ...
# 2112| getExpr(): [VariableAccess] end
# 2112| Type = [CharPointerType] char *
# 2112| ValueCategory = prvalue(load)
perf-regression.cpp:
# 4| [CopyAssignmentOperator] Big& Big::operator=(Big const&)
# 4| <params>:

View File

@@ -12333,6 +12333,40 @@ ir.cpp:
# 2098| v2098_8(void) = AliasedUse : ~m2104_8
# 2098| v2098_9(void) = ExitFunction :
# 2109| char* test_strtod(char*)
# 2109| Block 0
# 2109| v2109_1(void) = EnterFunction :
# 2109| m2109_2(unknown) = AliasedDefinition :
# 2109| m2109_3(unknown) = InitializeNonLocal :
# 2109| m2109_4(unknown) = Chi : total:m2109_2, partial:m2109_3
# 2109| r2109_5(glval<char *>) = VariableAddress[s] :
# 2109| m2109_6(char *) = InitializeParameter[s] : &:r2109_5
# 2109| r2109_7(char *) = Load[s] : &:r2109_5, m2109_6
# 2109| m2109_8(unknown) = InitializeIndirection[s] : &:r2109_7
# 2110| r2110_1(glval<char *>) = VariableAddress[end] :
# 2110| m2110_2(char *) = Uninitialized[end] : &:r2110_1
# 2111| r2111_1(glval<double>) = VariableAddress[d] :
# 2111| r2111_2(glval<unknown>) = FunctionAddress[strtod] :
# 2111| r2111_3(glval<char *>) = VariableAddress[s] :
# 2111| r2111_4(char *) = Load[s] : &:r2111_3, m2109_6
# 2111| r2111_5(char *) = Convert : r2111_4
# 2111| r2111_6(glval<char *>) = VariableAddress[end] :
# 2111| r2111_7(char **) = CopyValue : r2111_6
# 2111| r2111_8(double) = Call[strtod] : func:r2111_2, 0:r2111_5, 1:r2111_7
# 2111| v2111_9(void) = ^BufferReadSideEffect[0] : &:r2111_5, ~m2109_8
# 2111| m2111_10(char *) = ^IndirectMayWriteSideEffect[1] : &:r2111_7
# 2111| m2111_11(char *) = Chi : total:m2110_2, partial:m2111_10
# 2111| m2111_12(double) = Store[d] : &:r2111_1, r2111_8
# 2112| r2112_1(glval<char *>) = VariableAddress[#return] :
# 2112| r2112_2(glval<char *>) = VariableAddress[end] :
# 2112| r2112_3(char *) = Load[end] : &:r2112_2, m2111_11
# 2112| m2112_4(char *) = Store[#return] : &:r2112_1, r2112_3
# 2109| v2109_9(void) = ReturnIndirection[s] : &:r2109_7, m2109_8
# 2109| r2109_10(glval<char *>) = VariableAddress[#return] :
# 2109| v2109_11(void) = ReturnValue : &:r2109_10, m2112_4
# 2109| v2109_12(void) = AliasedUse : m2109_3
# 2109| v2109_13(void) = ExitFunction :
perf-regression.cpp:
# 6| void Big::Big()
# 6| Block 0

View File

@@ -2104,4 +2104,12 @@ void newArrayCorrectType(size_t n) {
new int[n] { 0, 1, 2 };
}
double strtod (const char* str, char** endptr);
char* test_strtod(char *s) {
char *end;
double d = strtod(s, &end);
return end;
}
// semmle-extractor-options: -std=c++17 --clang

View File

@@ -10004,6 +10004,36 @@
| ir.cpp:2104:11:2104:11 | Address | &:r2104_2 |
| ir.cpp:2104:11:2104:11 | Left | r2104_3 |
| ir.cpp:2104:11:2104:11 | Load | m2098_6 |
| ir.cpp:2109:7:2109:17 | Address | &:r2109_10 |
| ir.cpp:2109:7:2109:17 | ChiPartial | partial:m2109_3 |
| ir.cpp:2109:7:2109:17 | ChiTotal | total:m2109_2 |
| ir.cpp:2109:7:2109:17 | Load | m2112_4 |
| ir.cpp:2109:7:2109:17 | SideEffect | m2109_3 |
| ir.cpp:2109:25:2109:25 | Address | &:r2109_5 |
| ir.cpp:2109:25:2109:25 | Address | &:r2109_5 |
| ir.cpp:2109:25:2109:25 | Address | &:r2109_7 |
| ir.cpp:2109:25:2109:25 | Address | &:r2109_7 |
| ir.cpp:2109:25:2109:25 | Load | m2109_6 |
| ir.cpp:2109:25:2109:25 | SideEffect | m2109_8 |
| ir.cpp:2110:9:2110:11 | Address | &:r2110_1 |
| ir.cpp:2111:10:2111:10 | Address | &:r2111_1 |
| ir.cpp:2111:14:2111:19 | CallTarget | func:r2111_2 |
| ir.cpp:2111:14:2111:19 | StoreValue | r2111_8 |
| ir.cpp:2111:21:2111:21 | Address | &:r2111_3 |
| ir.cpp:2111:21:2111:21 | Address | &:r2111_5 |
| ir.cpp:2111:21:2111:21 | Arg(0) | 0:r2111_5 |
| ir.cpp:2111:21:2111:21 | Load | m2109_6 |
| ir.cpp:2111:21:2111:21 | SideEffect | ~m2109_8 |
| ir.cpp:2111:21:2111:21 | Unary | r2111_4 |
| ir.cpp:2111:24:2111:27 | Address | &:r2111_7 |
| ir.cpp:2111:24:2111:27 | Arg(1) | 1:r2111_7 |
| ir.cpp:2111:24:2111:27 | ChiPartial | partial:m2111_10 |
| ir.cpp:2111:24:2111:27 | ChiTotal | total:m2110_2 |
| ir.cpp:2111:25:2111:27 | Unary | r2111_6 |
| ir.cpp:2112:3:2112:13 | Address | &:r2112_1 |
| ir.cpp:2112:10:2112:12 | Address | &:r2112_2 |
| ir.cpp:2112:10:2112:12 | Load | m2111_11 |
| ir.cpp:2112:10:2112:12 | StoreValue | r2112_3 |
| perf-regression.cpp:6:3:6:5 | Address | &:r6_5 |
| perf-regression.cpp:6:3:6:5 | Address | &:r6_5 |
| perf-regression.cpp:6:3:6:5 | Address | &:r6_7 |

View File

@@ -11538,6 +11538,38 @@ ir.cpp:
# 2098| v2098_7(void) = AliasedUse : ~m?
# 2098| v2098_8(void) = ExitFunction :
# 2109| char* test_strtod(char*)
# 2109| Block 0
# 2109| v2109_1(void) = EnterFunction :
# 2109| mu2109_2(unknown) = AliasedDefinition :
# 2109| mu2109_3(unknown) = InitializeNonLocal :
# 2109| r2109_4(glval<char *>) = VariableAddress[s] :
# 2109| mu2109_5(char *) = InitializeParameter[s] : &:r2109_4
# 2109| r2109_6(char *) = Load[s] : &:r2109_4, ~m?
# 2109| mu2109_7(unknown) = InitializeIndirection[s] : &:r2109_6
# 2110| r2110_1(glval<char *>) = VariableAddress[end] :
# 2110| mu2110_2(char *) = Uninitialized[end] : &:r2110_1
# 2111| r2111_1(glval<double>) = VariableAddress[d] :
# 2111| r2111_2(glval<unknown>) = FunctionAddress[strtod] :
# 2111| r2111_3(glval<char *>) = VariableAddress[s] :
# 2111| r2111_4(char *) = Load[s] : &:r2111_3, ~m?
# 2111| r2111_5(char *) = Convert : r2111_4
# 2111| r2111_6(glval<char *>) = VariableAddress[end] :
# 2111| r2111_7(char **) = CopyValue : r2111_6
# 2111| r2111_8(double) = Call[strtod] : func:r2111_2, 0:r2111_5, 1:r2111_7
# 2111| v2111_9(void) = ^BufferReadSideEffect[0] : &:r2111_5, ~m?
# 2111| mu2111_10(char *) = ^IndirectMayWriteSideEffect[1] : &:r2111_7
# 2111| mu2111_11(double) = Store[d] : &:r2111_1, r2111_8
# 2112| r2112_1(glval<char *>) = VariableAddress[#return] :
# 2112| r2112_2(glval<char *>) = VariableAddress[end] :
# 2112| r2112_3(char *) = Load[end] : &:r2112_2, ~m?
# 2112| mu2112_4(char *) = Store[#return] : &:r2112_1, r2112_3
# 2109| v2109_8(void) = ReturnIndirection[s] : &:r2109_6, ~m?
# 2109| r2109_9(glval<char *>) = VariableAddress[#return] :
# 2109| v2109_10(void) = ReturnValue : &:r2109_9, ~m?
# 2109| v2109_11(void) = AliasedUse : ~m?
# 2109| v2109_12(void) = ExitFunction :
perf-regression.cpp:
# 6| void Big::Big()
# 6| Block 0