diff --git a/cpp/ql/lib/semmle/code/cpp/models/Models.qll b/cpp/ql/lib/semmle/code/cpp/models/Models.qll index 1318fd37b7f..6a99a7e342b 100644 --- a/cpp/ql/lib/semmle/code/cpp/models/Models.qll +++ b/cpp/ql/lib/semmle/code/cpp/models/Models.qll @@ -19,6 +19,7 @@ private import implementations.Strtok private import implementations.Strset private import implementations.Strcrement private import implementations.Strnextc +private import implementations.Strtol private import implementations.StdContainer private import implementations.StdPair private import implementations.StdMap diff --git a/cpp/ql/lib/semmle/code/cpp/models/implementations/Pure.qll b/cpp/ql/lib/semmle/code/cpp/models/implementations/Pure.qll index 41bd9ae0db7..915bcbce655 100644 --- a/cpp/ql/lib/semmle/code/cpp/models/implementations/Pure.qll +++ b/cpp/ql/lib/semmle/code/cpp/models/implementations/Pure.qll @@ -13,7 +13,7 @@ private class PureStrFunction extends AliasFunction, ArrayFunction, TaintFunctio PureStrFunction() { this.hasGlobalOrStdOrBslName([ atoi(), "strcasestr", "strchnul", "strchr", "strchrnul", "strstr", "strpbrk", "strrchr", - "strspn", strtol(), strrev(), strcmp(), strlwr(), strupr() + "strspn", strrev(), strcmp(), strlwr(), strupr() ]) } @@ -70,8 +70,6 @@ private class PureStrFunction extends AliasFunction, ArrayFunction, TaintFunctio private string atoi() { result = ["atof", "atoi", "atol", "atoll"] } -private string strtol() { result = ["strtod", "strtof", "strtol", "strtoll", "strtoq", "strtoul"] } - private string strlwr() { result = ["_strlwr", "_wcslwr", "_mbslwr", "_strlwr_l", "_wcslwr_l", "_mbslwr_l"] } diff --git a/cpp/ql/lib/semmle/code/cpp/models/implementations/Strtol.qll b/cpp/ql/lib/semmle/code/cpp/models/implementations/Strtol.qll new file mode 100644 index 00000000000..30e95622924 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/models/implementations/Strtol.qll @@ -0,0 +1,54 @@ +import semmle.code.cpp.models.interfaces.ArrayFunction +import semmle.code.cpp.models.interfaces.Taint +import semmle.code.cpp.models.interfaces.Alias +import semmle.code.cpp.models.interfaces.SideEffect + +private string strtol() { result = ["strtod", "strtof", "strtol", "strtoll", "strtoq", "strtoul"] } + +/** + * The standard function `strtol` and its assorted variants + */ +private class Strtol extends AliasFunction, ArrayFunction, TaintFunction, SideEffectFunction { + Strtol() { this.hasGlobalOrStdOrBslName(strtol()) } + + override predicate hasArrayInput(int bufParam) { + // All the functions given by `strtol()` takes a `const char*` input as the first parameter + bufParam = 0 + } + + override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 0 } + + override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) { + ( + input.isParameter(0) + or + input.isParameterDeref(0) + ) and + output.isReturnValue() + or + input.isParameter(0) and + output.isParameterDeref(1) + } + + override predicate parameterNeverEscapes(int i) { + // Parameter 0 does escape into parameter 1. + i = 1 + } + + override predicate parameterEscapesOnlyViaReturn(int i) { none() } + + override predicate parameterIsAlwaysReturned(int i) { none() } + + override predicate hasOnlySpecificReadSideEffects() { any() } + + override predicate hasOnlySpecificWriteSideEffects() { any() } + + override predicate hasSpecificReadSideEffect(ParameterIndex i, boolean buffer) { + i = 0 and + buffer = true + } + + override predicate hasSpecificWriteSideEffect(ParameterIndex i, boolean buffer, boolean mustWrite) { + i = 1 and buffer = false and mustWrite = false + } +} diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected b/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected index 8080f4f3c7a..ceb3cde9a8e 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected @@ -6629,6 +6629,20 @@ WARNING: Module TaintTracking has been deprecated and may be removed in future ( | taint.cpp:720:27:720:32 | source | taint.cpp:720:20:720:25 | call to strtok | TAINT | | taint.cpp:721:8:721:16 | tokenized | taint.cpp:721:7:721:16 | * ... | TAINT | | taint.cpp:722:8:722:12 | delim | taint.cpp:722:7:722:12 | * ... | TAINT | +| taint.cpp:727:24:727:29 | source | taint.cpp:727:24:727:29 | source | | +| taint.cpp:727:24:727:29 | source | taint.cpp:729:18:729:23 | source | | +| taint.cpp:728:17:728:23 | 0 | taint.cpp:729:27:729:32 | endptr | | +| taint.cpp:728:17:728:23 | 0 | taint.cpp:731:7:731:12 | endptr | | +| taint.cpp:728:17:728:23 | 0 | taint.cpp:732:8:732:13 | endptr | | +| taint.cpp:729:11:729:16 | call to strtol | taint.cpp:730:7:730:7 | l | | +| taint.cpp:729:18:729:23 | source | taint.cpp:729:11:729:16 | call to strtol | TAINT | +| taint.cpp:729:18:729:23 | source | taint.cpp:729:26:729:32 | ref arg & ... | TAINT | +| taint.cpp:729:26:729:32 | ref arg & ... | taint.cpp:729:27:729:32 | endptr [inner post update] | | +| taint.cpp:729:26:729:32 | ref arg & ... | taint.cpp:731:7:731:12 | endptr | | +| taint.cpp:729:26:729:32 | ref arg & ... | taint.cpp:732:8:732:13 | endptr | | +| taint.cpp:729:27:729:32 | endptr | taint.cpp:729:26:729:32 | & ... | | +| taint.cpp:731:7:731:12 | ref arg endptr | taint.cpp:732:8:732:13 | endptr | | +| taint.cpp:732:8:732:13 | endptr | taint.cpp:732:7:732:13 | * ... | TAINT | | vector.cpp:16:43:16:49 | source1 | vector.cpp:17:26:17:32 | source1 | | | vector.cpp:16:43:16:49 | source1 | vector.cpp:31:38:31:44 | source1 | | | vector.cpp:17:21:17:33 | call to vector | vector.cpp:19:14:19:14 | v | | diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/taint.cpp b/cpp/ql/test/library-tests/dataflow/taint-tests/taint.cpp index 6c13e8fa2bb..e479d7a11e0 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/taint.cpp +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/taint.cpp @@ -720,4 +720,14 @@ void test_strtok_indirect() { char* tokenized = strtok(source, delim); sink(*tokenized); // $ ir MISSING: ast sink(*delim); +} + +long int strtol(const char*, char**, int); + +void test_strtol(char *source) { + char* endptr = nullptr; + long l = strtol(source, &endptr, 10); + sink(l); // $ ast,ir + sink(endptr); // $ ast,ir + sink(*endptr); // $ ast,ir } \ No newline at end of file diff --git a/cpp/ql/test/library-tests/ir/ir/PrintAST.expected b/cpp/ql/test/library-tests/ir/ir/PrintAST.expected index d714f52629b..10f1c32005b 100644 --- a/cpp/ql/test/library-tests/ir/ir/PrintAST.expected +++ b/cpp/ql/test/library-tests/ir/ir/PrintAST.expected @@ -15938,6 +15938,44 @@ ir.cpp: # 2104| Type = [CTypedefType,Size_t] size_t # 2104| ValueCategory = prvalue(load) # 2105| getStmt(6): [ReturnStmt] return ... +# 2107| [TopLevelFunction] double strtod(char const*, char**) +# 2107| : +# 2107| getParameter(0): [Parameter] str +# 2107| Type = [PointerType] const char * +# 2107| getParameter(1): [Parameter] endptr +# 2107| Type = [PointerType] char ** +# 2109| [TopLevelFunction] char* test_strtod(char*) +# 2109| : +# 2109| getParameter(0): [Parameter] s +# 2109| Type = [CharPointerType] char * +# 2109| getEntryPoint(): [BlockStmt] { ... } +# 2110| getStmt(0): [DeclStmt] declaration +# 2110| getDeclarationEntry(0): [VariableDeclarationEntry] definition of end +# 2110| Type = [CharPointerType] char * +# 2111| getStmt(1): [DeclStmt] declaration +# 2111| getDeclarationEntry(0): [VariableDeclarationEntry] definition of d +# 2111| Type = [DoubleType] double +# 2111| getVariable().getInitializer(): [Initializer] initializer for d +# 2111| getExpr(): [FunctionCall] call to strtod +# 2111| Type = [DoubleType] double +# 2111| ValueCategory = prvalue +# 2111| getArgument(0): [VariableAccess] s +# 2111| Type = [CharPointerType] char * +# 2111| ValueCategory = prvalue(load) +# 2111| getArgument(1): [AddressOfExpr] & ... +# 2111| Type = [PointerType] char ** +# 2111| ValueCategory = prvalue +# 2111| getOperand(): [VariableAccess] end +# 2111| Type = [CharPointerType] char * +# 2111| ValueCategory = lvalue +# 2111| getArgument(0).getFullyConverted(): [CStyleCast] (const char *)... +# 2111| Conversion = [PointerConversion] pointer conversion +# 2111| Type = [PointerType] const char * +# 2111| ValueCategory = prvalue +# 2112| getStmt(2): [ReturnStmt] return ... +# 2112| getExpr(): [VariableAccess] end +# 2112| Type = [CharPointerType] char * +# 2112| ValueCategory = prvalue(load) perf-regression.cpp: # 4| [CopyAssignmentOperator] Big& Big::operator=(Big const&) # 4| : diff --git a/cpp/ql/test/library-tests/ir/ir/aliased_ir.expected b/cpp/ql/test/library-tests/ir/ir/aliased_ir.expected index 150bed79ef4..d7e3c36d204 100644 --- a/cpp/ql/test/library-tests/ir/ir/aliased_ir.expected +++ b/cpp/ql/test/library-tests/ir/ir/aliased_ir.expected @@ -12333,6 +12333,40 @@ ir.cpp: # 2098| v2098_8(void) = AliasedUse : ~m2104_8 # 2098| v2098_9(void) = ExitFunction : +# 2109| char* test_strtod(char*) +# 2109| Block 0 +# 2109| v2109_1(void) = EnterFunction : +# 2109| m2109_2(unknown) = AliasedDefinition : +# 2109| m2109_3(unknown) = InitializeNonLocal : +# 2109| m2109_4(unknown) = Chi : total:m2109_2, partial:m2109_3 +# 2109| r2109_5(glval) = VariableAddress[s] : +# 2109| m2109_6(char *) = InitializeParameter[s] : &:r2109_5 +# 2109| r2109_7(char *) = Load[s] : &:r2109_5, m2109_6 +# 2109| m2109_8(unknown) = InitializeIndirection[s] : &:r2109_7 +# 2110| r2110_1(glval) = VariableAddress[end] : +# 2110| m2110_2(char *) = Uninitialized[end] : &:r2110_1 +# 2111| r2111_1(glval) = VariableAddress[d] : +# 2111| r2111_2(glval) = FunctionAddress[strtod] : +# 2111| r2111_3(glval) = VariableAddress[s] : +# 2111| r2111_4(char *) = Load[s] : &:r2111_3, m2109_6 +# 2111| r2111_5(char *) = Convert : r2111_4 +# 2111| r2111_6(glval) = VariableAddress[end] : +# 2111| r2111_7(char **) = CopyValue : r2111_6 +# 2111| r2111_8(double) = Call[strtod] : func:r2111_2, 0:r2111_5, 1:r2111_7 +# 2111| v2111_9(void) = ^BufferReadSideEffect[0] : &:r2111_5, ~m2109_8 +# 2111| m2111_10(char *) = ^IndirectMayWriteSideEffect[1] : &:r2111_7 +# 2111| m2111_11(char *) = Chi : total:m2110_2, partial:m2111_10 +# 2111| m2111_12(double) = Store[d] : &:r2111_1, r2111_8 +# 2112| r2112_1(glval) = VariableAddress[#return] : +# 2112| r2112_2(glval) = VariableAddress[end] : +# 2112| r2112_3(char *) = Load[end] : &:r2112_2, m2111_11 +# 2112| m2112_4(char *) = Store[#return] : &:r2112_1, r2112_3 +# 2109| v2109_9(void) = ReturnIndirection[s] : &:r2109_7, m2109_8 +# 2109| r2109_10(glval) = VariableAddress[#return] : +# 2109| v2109_11(void) = ReturnValue : &:r2109_10, m2112_4 +# 2109| v2109_12(void) = AliasedUse : m2109_3 +# 2109| v2109_13(void) = ExitFunction : + perf-regression.cpp: # 6| void Big::Big() # 6| Block 0 diff --git a/cpp/ql/test/library-tests/ir/ir/ir.cpp b/cpp/ql/test/library-tests/ir/ir/ir.cpp index 813f7c2448a..adaae6b7e59 100644 --- a/cpp/ql/test/library-tests/ir/ir/ir.cpp +++ b/cpp/ql/test/library-tests/ir/ir/ir.cpp @@ -2104,4 +2104,12 @@ void newArrayCorrectType(size_t n) { new int[n] { 0, 1, 2 }; } +double strtod (const char* str, char** endptr); + +char* test_strtod(char *s) { + char *end; + double d = strtod(s, &end); + return end; +} + // semmle-extractor-options: -std=c++17 --clang diff --git a/cpp/ql/test/library-tests/ir/ir/operand_locations.expected b/cpp/ql/test/library-tests/ir/ir/operand_locations.expected index af1939abd03..39d238c1fae 100644 --- a/cpp/ql/test/library-tests/ir/ir/operand_locations.expected +++ b/cpp/ql/test/library-tests/ir/ir/operand_locations.expected @@ -10004,6 +10004,36 @@ | ir.cpp:2104:11:2104:11 | Address | &:r2104_2 | | ir.cpp:2104:11:2104:11 | Left | r2104_3 | | ir.cpp:2104:11:2104:11 | Load | m2098_6 | +| ir.cpp:2109:7:2109:17 | Address | &:r2109_10 | +| ir.cpp:2109:7:2109:17 | ChiPartial | partial:m2109_3 | +| ir.cpp:2109:7:2109:17 | ChiTotal | total:m2109_2 | +| ir.cpp:2109:7:2109:17 | Load | m2112_4 | +| ir.cpp:2109:7:2109:17 | SideEffect | m2109_3 | +| ir.cpp:2109:25:2109:25 | Address | &:r2109_5 | +| ir.cpp:2109:25:2109:25 | Address | &:r2109_5 | +| ir.cpp:2109:25:2109:25 | Address | &:r2109_7 | +| ir.cpp:2109:25:2109:25 | Address | &:r2109_7 | +| ir.cpp:2109:25:2109:25 | Load | m2109_6 | +| ir.cpp:2109:25:2109:25 | SideEffect | m2109_8 | +| ir.cpp:2110:9:2110:11 | Address | &:r2110_1 | +| ir.cpp:2111:10:2111:10 | Address | &:r2111_1 | +| ir.cpp:2111:14:2111:19 | CallTarget | func:r2111_2 | +| ir.cpp:2111:14:2111:19 | StoreValue | r2111_8 | +| ir.cpp:2111:21:2111:21 | Address | &:r2111_3 | +| ir.cpp:2111:21:2111:21 | Address | &:r2111_5 | +| ir.cpp:2111:21:2111:21 | Arg(0) | 0:r2111_5 | +| ir.cpp:2111:21:2111:21 | Load | m2109_6 | +| ir.cpp:2111:21:2111:21 | SideEffect | ~m2109_8 | +| ir.cpp:2111:21:2111:21 | Unary | r2111_4 | +| ir.cpp:2111:24:2111:27 | Address | &:r2111_7 | +| ir.cpp:2111:24:2111:27 | Arg(1) | 1:r2111_7 | +| ir.cpp:2111:24:2111:27 | ChiPartial | partial:m2111_10 | +| ir.cpp:2111:24:2111:27 | ChiTotal | total:m2110_2 | +| ir.cpp:2111:25:2111:27 | Unary | r2111_6 | +| ir.cpp:2112:3:2112:13 | Address | &:r2112_1 | +| ir.cpp:2112:10:2112:12 | Address | &:r2112_2 | +| ir.cpp:2112:10:2112:12 | Load | m2111_11 | +| ir.cpp:2112:10:2112:12 | StoreValue | r2112_3 | | perf-regression.cpp:6:3:6:5 | Address | &:r6_5 | | perf-regression.cpp:6:3:6:5 | Address | &:r6_5 | | perf-regression.cpp:6:3:6:5 | Address | &:r6_7 | diff --git a/cpp/ql/test/library-tests/ir/ir/raw_ir.expected b/cpp/ql/test/library-tests/ir/ir/raw_ir.expected index c4774b10f89..f43a105f70a 100644 --- a/cpp/ql/test/library-tests/ir/ir/raw_ir.expected +++ b/cpp/ql/test/library-tests/ir/ir/raw_ir.expected @@ -11538,6 +11538,38 @@ ir.cpp: # 2098| v2098_7(void) = AliasedUse : ~m? # 2098| v2098_8(void) = ExitFunction : +# 2109| char* test_strtod(char*) +# 2109| Block 0 +# 2109| v2109_1(void) = EnterFunction : +# 2109| mu2109_2(unknown) = AliasedDefinition : +# 2109| mu2109_3(unknown) = InitializeNonLocal : +# 2109| r2109_4(glval) = VariableAddress[s] : +# 2109| mu2109_5(char *) = InitializeParameter[s] : &:r2109_4 +# 2109| r2109_6(char *) = Load[s] : &:r2109_4, ~m? +# 2109| mu2109_7(unknown) = InitializeIndirection[s] : &:r2109_6 +# 2110| r2110_1(glval) = VariableAddress[end] : +# 2110| mu2110_2(char *) = Uninitialized[end] : &:r2110_1 +# 2111| r2111_1(glval) = VariableAddress[d] : +# 2111| r2111_2(glval) = FunctionAddress[strtod] : +# 2111| r2111_3(glval) = VariableAddress[s] : +# 2111| r2111_4(char *) = Load[s] : &:r2111_3, ~m? +# 2111| r2111_5(char *) = Convert : r2111_4 +# 2111| r2111_6(glval) = VariableAddress[end] : +# 2111| r2111_7(char **) = CopyValue : r2111_6 +# 2111| r2111_8(double) = Call[strtod] : func:r2111_2, 0:r2111_5, 1:r2111_7 +# 2111| v2111_9(void) = ^BufferReadSideEffect[0] : &:r2111_5, ~m? +# 2111| mu2111_10(char *) = ^IndirectMayWriteSideEffect[1] : &:r2111_7 +# 2111| mu2111_11(double) = Store[d] : &:r2111_1, r2111_8 +# 2112| r2112_1(glval) = VariableAddress[#return] : +# 2112| r2112_2(glval) = VariableAddress[end] : +# 2112| r2112_3(char *) = Load[end] : &:r2112_2, ~m? +# 2112| mu2112_4(char *) = Store[#return] : &:r2112_1, r2112_3 +# 2109| v2109_8(void) = ReturnIndirection[s] : &:r2109_6, ~m? +# 2109| r2109_9(glval) = VariableAddress[#return] : +# 2109| v2109_10(void) = ReturnValue : &:r2109_9, ~m? +# 2109| v2109_11(void) = AliasedUse : ~m? +# 2109| v2109_12(void) = ExitFunction : + perf-regression.cpp: # 6| void Big::Big() # 6| Block 0