diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll index fea47e1b5bd..aa5cfbe1fab 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll @@ -21,6 +21,40 @@ private predicate predictableInstruction(Instruction instr) { predictableInstruction(instr.(UnaryInstruction).getUnary()) } +/** + * Functions that we should only allow taint to flow through (to the return + * value) if all but the source argument are 'predictable'. This is done to + * emulate the old security library's implementation rather than due to any + * strong belief that this is the right approach. + * + * Note that the list itself is not very principled; it consists of all the + * functions listed in the old security library's [default] `isPureFunction` + * that have more than one argument, but are not in the old taint tracking + * library's `returnArgument` predicate. In addition, `strlen` is included + * because it's also a special case in flow to return values. + */ +predicate predictableOnlyFlow(string name) { + name = "strcasestr" or + name = "strchnul" or + name = "strchr" or + name = "strchrnul" or + name = "strcmp" or + name = "strcspn" or + name = "strlen" or // special case + name = "strncmp" or + name = "strndup" or + name = "strnlen" or + name = "strrchr" or + name = "strspn" or + name = "strstr" or + name = "strtod" or + name = "strtof" or + name = "strtol" or + name = "strtoll" or + name = "strtoq" or + name = "strtoul" +} + private DataFlow::Node getNodeForSource(Expr source) { isUserInput(source, _) and ( @@ -123,15 +157,16 @@ private predicate nodeIsBarrier(DataFlow::Node node) { private predicate instructionTaintStep(Instruction i1, Instruction i2) { // Expressions computed from tainted data are also tainted - i2 = - any(CallInstruction call | - isPureFunction(call.getStaticCallTarget().getName()) and - call.getAnArgument() = i1 and - forall(Instruction arg | arg = call.getAnArgument() | arg = i1 or predictableInstruction(arg)) and - // flow through `strlen` tends to cause dubious results, if the length is - // bounded. - not call.getStaticCallTarget().getName() = "strlen" - ) + exists(CallInstruction call, int argIndex | call = i2 | + isPureFunction(call.getStaticCallTarget().getName()) and + i1 = getACallArgumentOrIndirection(call, argIndex) and + forall(Instruction arg | arg = call.getAnArgument() | + arg = getACallArgumentOrIndirection(call, argIndex) or predictableInstruction(arg) + ) and + // flow through `strlen` tends to cause dubious results, if the length is + // bounded. + not call.getStaticCallTarget().getName() = "strlen" + ) or // Flow through pointer dereference i2.(LoadInstruction).getSourceAddress() = i1 @@ -172,7 +207,8 @@ private predicate instructionTaintStep(Instruction i1, Instruction i2) { any(CallInstruction call | exists(int indexIn | modelTaintToReturnValue(call.getStaticCallTarget(), indexIn) and - i1 = getACallArgumentOrIndirection(call, indexIn) + i1 = getACallArgumentOrIndirection(call, indexIn) and + not predictableOnlyFlow(call.getStaticCallTarget().getName()) ) ) or diff --git a/cpp/ql/test/library-tests/dataflow/security-taint/tainted_diff.expected b/cpp/ql/test/library-tests/dataflow/security-taint/tainted_diff.expected index bd82e48f8c6..ed954b5444a 100644 --- a/cpp/ql/test/library-tests/dataflow/security-taint/tainted_diff.expected +++ b/cpp/ql/test/library-tests/dataflow/security-taint/tainted_diff.expected @@ -1,15 +1,8 @@ -| test.cpp:38:23:38:28 | call to getenv | test.cpp:40:6:40:33 | ! ... | IR only | -| test.cpp:38:23:38:28 | call to getenv | test.cpp:40:7:40:12 | call to strcmp | IR only | -| test.cpp:38:23:38:28 | call to getenv | test.cpp:40:7:40:33 | (bool)... | IR only | | test.cpp:49:23:49:28 | call to getenv | test.cpp:50:15:50:24 | envStr_ptr | AST only | | test.cpp:49:23:49:28 | call to getenv | test.cpp:50:28:50:40 | & ... | AST only | | test.cpp:49:23:49:28 | call to getenv | test.cpp:50:29:50:40 | envStrGlobal | AST only | | test.cpp:49:23:49:28 | call to getenv | test.cpp:52:2:52:12 | * ... | AST only | | test.cpp:49:23:49:28 | call to getenv | test.cpp:52:3:52:12 | envStr_ptr | AST only | -| test.cpp:60:29:60:34 | call to getenv | test.cpp:64:10:64:14 | bytes | IR only | -| test.cpp:60:29:60:34 | call to getenv | test.cpp:64:18:64:23 | call to strlen | IR only | -| test.cpp:60:29:60:34 | call to getenv | test.cpp:64:18:64:37 | (int)... | IR only | -| test.cpp:60:29:60:34 | call to getenv | test.cpp:64:18:64:37 | ... + ... | IR only | | test.cpp:68:28:68:33 | call to getenv | test.cpp:11:20:11:21 | s1 | AST only | | test.cpp:68:28:68:33 | call to getenv | test.cpp:67:7:67:13 | copying | AST only | | test.cpp:68:28:68:33 | call to getenv | test.cpp:69:10:69:13 | copy | AST only | diff --git a/cpp/ql/test/library-tests/dataflow/security-taint/tainted_ir.expected b/cpp/ql/test/library-tests/dataflow/security-taint/tainted_ir.expected index 216d583d925..114c213ff54 100644 --- a/cpp/ql/test/library-tests/dataflow/security-taint/tainted_ir.expected +++ b/cpp/ql/test/library-tests/dataflow/security-taint/tainted_ir.expected @@ -14,9 +14,6 @@ | test.cpp:38:23:38:28 | call to getenv | test.cpp:38:14:38:19 | envStr | | | test.cpp:38:23:38:28 | call to getenv | test.cpp:38:23:38:28 | call to getenv | | | test.cpp:38:23:38:28 | call to getenv | test.cpp:38:23:38:40 | (const char *)... | | -| test.cpp:38:23:38:28 | call to getenv | test.cpp:40:6:40:33 | ! ... | | -| test.cpp:38:23:38:28 | call to getenv | test.cpp:40:7:40:12 | call to strcmp | | -| test.cpp:38:23:38:28 | call to getenv | test.cpp:40:7:40:33 | (bool)... | | | test.cpp:38:23:38:28 | call to getenv | test.cpp:40:14:40:19 | envStr | | | test.cpp:49:23:49:28 | call to getenv | test.cpp:8:24:8:25 | s1 | | | test.cpp:49:23:49:28 | call to getenv | test.cpp:45:13:45:24 | envStrGlobal | | @@ -32,10 +29,6 @@ | test.cpp:60:29:60:34 | call to getenv | test.cpp:60:18:60:25 | userName | | | test.cpp:60:29:60:34 | call to getenv | test.cpp:60:29:60:34 | call to getenv | | | test.cpp:60:29:60:34 | call to getenv | test.cpp:60:29:60:47 | (const char *)... | | -| test.cpp:60:29:60:34 | call to getenv | test.cpp:64:10:64:14 | bytes | | -| test.cpp:60:29:60:34 | call to getenv | test.cpp:64:18:64:23 | call to strlen | | -| test.cpp:60:29:60:34 | call to getenv | test.cpp:64:18:64:37 | (int)... | | -| test.cpp:60:29:60:34 | call to getenv | test.cpp:64:18:64:37 | ... + ... | | | test.cpp:60:29:60:34 | call to getenv | test.cpp:64:25:64:32 | userName | | | test.cpp:68:28:68:33 | call to getenv | test.cpp:11:36:11:37 | s2 | | | test.cpp:68:28:68:33 | call to getenv | test.cpp:68:17:68:24 | userName | | diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-807/semmle/TaintedCondition/test.cpp b/cpp/ql/test/query-tests/Security/CWE/CWE-807/semmle/TaintedCondition/test.cpp index fb6fffb1c44..0430d861095 100644 --- a/cpp/ql/test/query-tests/Security/CWE/CWE-807/semmle/TaintedCondition/test.cpp +++ b/cpp/ql/test/query-tests/Security/CWE/CWE-807/semmle/TaintedCondition/test.cpp @@ -35,7 +35,7 @@ void processRequest() adminPrivileges = 0; // OK, since it's a 0 and not a 1 } - // BAD, but it requires pointer analysis to catch + // BAD (requires pointer analysis to catch) const char** userp = ¤tUser; *userp = userName; if (!strcmp(currentUser, "admin")) {