Merge pull request #15875 from MathiasVP/bring-back-type-barriers-in-non-constant-format

C++: Clean up `cpp/non-constant-format`
This commit is contained in:
Mathias Vorreiter Pedersen
2024-03-14 10:51:23 +00:00
committed by GitHub
4 changed files with 76 additions and 58 deletions

View File

@@ -37,6 +37,37 @@ class UncalledFunction extends Function {
}
}
/** The `unsigned short` type. */
class UnsignedShort extends ShortType {
UnsignedShort() { this.isUnsigned() }
}
/**
* Holds if `t` cannot refer to a string. That is, it's a built-in
* or arithmetic type that is not a "`char` like" type.
*/
predicate cannotContainString(Type t) {
exists(Type unspecified |
unspecified = t.getUnspecifiedType() and
not unspecified instanceof UnknownType and
not unspecified instanceof CharType and
not unspecified instanceof WideCharType and
not unspecified instanceof Char8Type and
not unspecified instanceof Char16Type and
not unspecified instanceof Char32Type and
// C often defines `wchar_t` as `unsigned short`
not unspecified instanceof UnsignedShort
|
unspecified instanceof ArithmeticType or
unspecified instanceof BuiltInType
)
}
predicate dataFlowOrTaintFlowFunction(Function func, FunctionOutput output) {
func.(DataFlowFunction).hasDataFlow(_, output) or
func.(TaintFunction).hasTaintFlow(_, output)
}
/**
* Holds if `node` is a non-constant source of data flow for non-const format string detection.
* This is defined as either:
@@ -69,7 +100,9 @@ predicate isNonConst(DataFlow::Node node) {
// Parameters of uncalled functions that aren't const
exists(UncalledFunction f, Parameter p |
f.getAParameter() = p and
p = node.asParameter() and
// We pick the indirection of the parameter since this query is focused
// on strings.
p = node.asParameter(1) and
// Ignore main's argv parameter as it is already considered a `FlowSource`
// not ignoring it will result in path redundancies
(f.getName() = "main" implies p != f.getParameter(1))
@@ -82,30 +115,27 @@ predicate isNonConst(DataFlow::Node node) {
// are considered as possible non-const sources
// The function's output must also not be const to be considered a non-const source
exists(Function func, CallInstruction call |
// NOTE: could use `Call` getAnArgument() instead of `CallInstruction` but requires two
// variables representing the same call in ordoer to use `callOutput` below.
exists(Expr arg |
call.getPositionalArgumentOperand(_).getDef().getUnconvertedResultExpression() = arg and
arg = node.asDefiningArgument()
)
or
call.getUnconvertedResultExpression() = node.asIndirectExpr()
not func.hasDefinition() and
func = call.getStaticCallTarget()
|
func = call.getStaticCallTarget() and
// Case 1: It's a known dataflow or taintflow function with flow to the return value
call.getUnconvertedResultExpression() = node.asIndirectExpr() and
not exists(FunctionOutput output |
// NOTE: we must include dataflow and taintflow. e.g., including only dataflow we will find sprintf
// variant function's output are now possible non-const sources
pragma[only_bind_out](func).(DataFlowFunction).hasDataFlow(_, output) or
pragma[only_bind_out](func).(TaintFunction).hasTaintFlow(_, output)
|
dataFlowOrTaintFlowFunction(func, output) and
output.isReturnValueDeref(_) and
node = callOutput(call, output)
)
) and
not exists(Call c |
c.getTarget().hasDefinition() and
if node instanceof DataFlow::DefinitionByReferenceNode
then c.getAnArgument() = node.asDefiningArgument()
else c = [node.asExpr(), node.asIndirectExpr()]
or
// Case 2: It's a known dataflow or taintflow function with flow to an output parameter
exists(int i |
call.getPositionalArgumentOperand(i).getDef().getUnconvertedResultExpression() =
node.asDefiningArgument() and
not exists(FunctionOutput output |
dataFlowOrTaintFlowFunction(func, output) and
output.isParameterDeref(i, _) and
node = callOutput(call, output)
)
)
)
}
@@ -114,18 +144,29 @@ predicate isNonConst(DataFlow::Node node) {
* `FormattingFunctionCall`.
*/
predicate isSinkImpl(DataFlow::Node sink, Expr formatString) {
[sink.asExpr(), sink.asIndirectExpr()] = formatString and
sink.asIndirectExpr() = formatString and
exists(FormattingFunctionCall fc | formatString = fc.getArgument(fc.getFormatParameterIndex()))
}
module NonConstFlowConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { isNonConst(source) }
predicate isSource(DataFlow::Node source) {
exists(Type t |
isNonConst(source) and
t = source.getType() and
not cannotContainString(t)
)
}
predicate isSink(DataFlow::Node sink) { isSinkImpl(sink, _) }
predicate isBarrier(DataFlow::Node node) {
// Ignore tracing non-const through array indices
exists(ArrayExpr a | a.getArrayOffset() = node.asExpr())
exists(ArrayExpr a | a.getArrayOffset() = node.asIndirectExpr())
or
exists(Type t |
t = node.getType() and
cannotContainString(t)
)
}
}

View File

@@ -8,15 +8,10 @@ edges
| nested.cpp:35:19:35:21 | *fmt | nested.cpp:27:32:27:34 | *fmt | provenance | |
| nested.cpp:42:24:42:34 | *call to ext_fmt_str | nested.cpp:34:37:34:39 | *fmt | provenance | |
| nested.cpp:86:19:86:46 | *call to __builtin_alloca | nested.cpp:87:18:87:20 | *fmt | provenance | |
| test.cpp:27:39:27:39 | n | test.cpp:27:13:27:24 | **make_message | provenance | |
| test.cpp:46:14:46:17 | argc | test.cpp:51:23:51:30 | ... - ... | provenance | |
| test.cpp:46:27:46:30 | **argv | test.cpp:130:20:130:26 | *access to array | provenance | |
| test.cpp:51:23:51:30 | ... - ... | test.cpp:27:39:27:39 | n | provenance | |
| test.cpp:51:23:51:30 | ... - ... | test.cpp:51:10:51:21 | *call to make_message | provenance | |
| test.cpp:155:27:155:30 | data | test.cpp:157:12:157:15 | data | provenance | |
| test.cpp:167:31:167:34 | data | test.cpp:170:12:170:14 | *res | provenance | |
| test.cpp:193:32:193:34 | str | test.cpp:195:31:195:33 | str | provenance | |
| test.cpp:193:32:193:34 | str | test.cpp:197:11:197:14 | *wstr | provenance | |
| test.cpp:167:31:167:34 | *data | test.cpp:170:12:170:14 | *res | provenance | |
| test.cpp:193:32:193:34 | *str | test.cpp:195:31:195:33 | *str | provenance | |
| test.cpp:193:32:193:34 | *str | test.cpp:197:11:197:14 | *wstr | provenance | |
| test.cpp:204:25:204:36 | *call to get_string | test.cpp:205:12:205:20 | *... + ... | provenance | |
| test.cpp:204:25:204:36 | *call to get_string | test.cpp:206:12:206:16 | *hello | provenance | |
| test.cpp:209:25:209:36 | *call to get_string | test.cpp:211:12:211:16 | *hello | provenance | |
@@ -42,19 +37,12 @@ nodes
| nested.cpp:79:32:79:38 | *call to get_fmt | semmle.label | *call to get_fmt |
| nested.cpp:86:19:86:46 | *call to __builtin_alloca | semmle.label | *call to __builtin_alloca |
| nested.cpp:87:18:87:20 | *fmt | semmle.label | *fmt |
| test.cpp:27:13:27:24 | **make_message | semmle.label | **make_message |
| test.cpp:27:39:27:39 | n | semmle.label | n |
| test.cpp:46:14:46:17 | argc | semmle.label | argc |
| test.cpp:46:27:46:30 | **argv | semmle.label | **argv |
| test.cpp:51:10:51:21 | *call to make_message | semmle.label | *call to make_message |
| test.cpp:51:23:51:30 | ... - ... | semmle.label | ... - ... |
| test.cpp:130:20:130:26 | *access to array | semmle.label | *access to array |
| test.cpp:155:27:155:30 | data | semmle.label | data |
| test.cpp:157:12:157:15 | data | semmle.label | data |
| test.cpp:167:31:167:34 | data | semmle.label | data |
| test.cpp:167:31:167:34 | *data | semmle.label | *data |
| test.cpp:170:12:170:14 | *res | semmle.label | *res |
| test.cpp:193:32:193:34 | str | semmle.label | str |
| test.cpp:195:31:195:33 | str | semmle.label | str |
| test.cpp:193:32:193:34 | *str | semmle.label | *str |
| test.cpp:195:31:195:33 | *str | semmle.label | *str |
| test.cpp:197:11:197:14 | *wstr | semmle.label | *wstr |
| test.cpp:204:25:204:36 | *call to get_string | semmle.label | *call to get_string |
| test.cpp:205:12:205:20 | *... + ... | semmle.label | *... + ... |
@@ -74,7 +62,6 @@ nodes
| test.cpp:245:25:245:36 | *call to get_string | semmle.label | *call to get_string |
| test.cpp:247:12:247:16 | *hello | semmle.label | *hello |
subpaths
| test.cpp:51:23:51:30 | ... - ... | test.cpp:27:39:27:39 | n | test.cpp:27:13:27:24 | **make_message | test.cpp:51:10:51:21 | *call to make_message |
#select
| NonConstantFormat.c:30:10:30:16 | *access to array | NonConstantFormat.c:28:27:28:30 | **argv | NonConstantFormat.c:30:10:30:16 | *access to array | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | NonConstantFormat.c:30:3:30:8 | call to printf | printf |
| NonConstantFormat.c:41:9:41:45 | *call to any_random_function | NonConstantFormat.c:41:9:41:45 | *call to any_random_function | NonConstantFormat.c:41:9:41:45 | *call to any_random_function | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | NonConstantFormat.c:41:2:41:7 | call to printf | printf |
@@ -82,12 +69,10 @@ subpaths
| nested.cpp:21:23:21:26 | *fmt0 | nested.cpp:42:24:42:34 | *call to ext_fmt_str | nested.cpp:21:23:21:26 | *fmt0 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | nested.cpp:21:5:21:12 | call to snprintf | snprintf |
| nested.cpp:79:32:79:38 | *call to get_fmt | nested.cpp:79:32:79:38 | *call to get_fmt | nested.cpp:79:32:79:38 | *call to get_fmt | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | nested.cpp:79:5:79:14 | call to diagnostic | diagnostic |
| nested.cpp:87:18:87:20 | *fmt | nested.cpp:86:19:86:46 | *call to __builtin_alloca | nested.cpp:87:18:87:20 | *fmt | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | nested.cpp:87:7:87:16 | call to diagnostic | diagnostic |
| test.cpp:51:10:51:21 | *call to make_message | test.cpp:46:14:46:17 | argc | test.cpp:51:10:51:21 | *call to make_message | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:51:3:51:8 | call to printf | printf |
| test.cpp:130:20:130:26 | *access to array | test.cpp:46:27:46:30 | **argv | test.cpp:130:20:130:26 | *access to array | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:130:2:130:10 | call to sprintf | sprintf |
| test.cpp:157:12:157:15 | data | test.cpp:155:27:155:30 | data | test.cpp:157:12:157:15 | data | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:157:5:157:10 | call to printf | printf |
| test.cpp:170:12:170:14 | *res | test.cpp:167:31:167:34 | data | test.cpp:170:12:170:14 | *res | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:170:5:170:10 | call to printf | printf |
| test.cpp:195:31:195:33 | str | test.cpp:193:32:193:34 | str | test.cpp:195:31:195:33 | str | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:195:3:195:18 | call to StringCchPrintfW | StringCchPrintfW |
| test.cpp:197:11:197:14 | *wstr | test.cpp:193:32:193:34 | str | test.cpp:197:11:197:14 | *wstr | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:197:3:197:9 | call to wprintf | wprintf |
| test.cpp:170:12:170:14 | *res | test.cpp:167:31:167:34 | *data | test.cpp:170:12:170:14 | *res | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:170:5:170:10 | call to printf | printf |
| test.cpp:195:31:195:33 | *str | test.cpp:193:32:193:34 | *str | test.cpp:195:31:195:33 | *str | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:195:3:195:18 | call to StringCchPrintfW | StringCchPrintfW |
| test.cpp:197:11:197:14 | *wstr | test.cpp:193:32:193:34 | *str | test.cpp:197:11:197:14 | *wstr | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:197:3:197:9 | call to wprintf | wprintf |
| test.cpp:205:12:205:20 | *... + ... | test.cpp:204:25:204:36 | *call to get_string | test.cpp:205:12:205:20 | *... + ... | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:205:5:205:10 | call to printf | printf |
| test.cpp:206:12:206:16 | *hello | test.cpp:204:25:204:36 | *call to get_string | test.cpp:206:12:206:16 | *hello | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:206:5:206:10 | call to printf | printf |
| test.cpp:211:12:211:16 | *hello | test.cpp:209:25:209:36 | *call to get_string | test.cpp:211:12:211:16 | *hello | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | test.cpp:211:5:211:10 | call to printf | printf |

View File

@@ -48,7 +48,7 @@ int main(int argc, char **argv) {
printf(choose_message(argc - 1), argc - 1); // GOOD
printf(messages[1]); // GOOD
printf(message); // GOOD
printf(make_message(argc - 1)); // BAD
printf(make_message(argc - 1)); // BAD [NOT DETECTED]
printf("Hello, World\n"); // GOOD
printf(_("Hello, World\n")); // GOOD
{
@@ -154,7 +154,7 @@ void print_ith_message() {
void fmt_via_strcpy(char *data) {
strcpy(data, "some string");
printf(data); // GOOD [FALSE POSITIVE: Due to inaccurate dataflow killers]
printf(data); // GOOD
}
void fmt_with_assignment() {

View File

@@ -23,9 +23,7 @@ edges
| consts.cpp:106:13:106:19 | *call to varFunc | consts.cpp:107:9:107:10 | *v5 | provenance | |
| consts.cpp:111:7:111:13 | *call to varFunc | consts.cpp:112:9:112:10 | *v6 | provenance | |
| consts.cpp:139:13:139:16 | readString output argument | consts.cpp:140:9:140:11 | *v11 | provenance | |
| consts.cpp:139:13:139:16 | readString output argument | consts.cpp:140:9:140:11 | v11 | provenance | |
| consts.cpp:144:16:144:18 | readStringRef output argument | consts.cpp:145:9:145:11 | *v12 | provenance | |
| consts.cpp:144:16:144:18 | readStringRef output argument | consts.cpp:145:9:145:11 | v12 | provenance | |
nodes
| consts.cpp:24:7:24:9 | **gv1 | semmle.label | **gv1 |
| consts.cpp:29:7:29:25 | **nonConstFuncToArray | semmle.label | **nonConstFuncToArray |
@@ -47,13 +45,9 @@ nodes
| consts.cpp:130:9:130:10 | *v9 | semmle.label | *v9 |
| consts.cpp:135:9:135:11 | *v10 | semmle.label | *v10 |
| consts.cpp:139:13:139:16 | readString output argument | semmle.label | readString output argument |
| consts.cpp:139:13:139:16 | readString output argument | semmle.label | readString output argument |
| consts.cpp:140:9:140:11 | *v11 | semmle.label | *v11 |
| consts.cpp:140:9:140:11 | v11 | semmle.label | v11 |
| consts.cpp:144:16:144:18 | readStringRef output argument | semmle.label | readStringRef output argument |
| consts.cpp:144:16:144:18 | readStringRef output argument | semmle.label | readStringRef output argument |
| consts.cpp:145:9:145:11 | *v12 | semmle.label | *v12 |
| consts.cpp:145:9:145:11 | v12 | semmle.label | v12 |
subpaths
#select
| consts.cpp:86:9:86:10 | *v1 | consts.cpp:85:7:85:8 | gets output argument | consts.cpp:86:9:86:10 | *v1 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | consts.cpp:86:2:86:7 | call to printf | printf |
@@ -78,6 +72,4 @@ subpaths
| consts.cpp:135:9:135:11 | *v10 | consts.cpp:85:7:85:8 | gets output argument | consts.cpp:135:9:135:11 | *v10 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | consts.cpp:135:2:135:7 | call to printf | printf |
| consts.cpp:135:9:135:11 | *v10 | consts.cpp:90:12:90:13 | gets output argument | consts.cpp:135:9:135:11 | *v10 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | consts.cpp:135:2:135:7 | call to printf | printf |
| consts.cpp:140:9:140:11 | *v11 | consts.cpp:139:13:139:16 | readString output argument | consts.cpp:140:9:140:11 | *v11 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | consts.cpp:140:2:140:7 | call to printf | printf |
| consts.cpp:140:9:140:11 | v11 | consts.cpp:139:13:139:16 | readString output argument | consts.cpp:140:9:140:11 | v11 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | consts.cpp:140:2:140:7 | call to printf | printf |
| consts.cpp:145:9:145:11 | *v12 | consts.cpp:144:16:144:18 | readStringRef output argument | consts.cpp:145:9:145:11 | *v12 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | consts.cpp:145:2:145:7 | call to printf | printf |
| consts.cpp:145:9:145:11 | v12 | consts.cpp:144:16:144:18 | readStringRef output argument | consts.cpp:145:9:145:11 | v12 | The format string argument to $@ has a source which cannot be verified to originate from a string literal. | consts.cpp:145:2:145:7 | call to printf | printf |