From dec5fc0f48a964429dfdedbd08f84bc8903bd4f7 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 25 Mar 2024 10:52:24 +0000 Subject: [PATCH] C++: Switch MAD syntax from *Argument[0] style to Argument[*0] style. --- .../semmle/code/cpp/dataflow/ExternalFlow.qll | 49 +++++++++++-------- .../cpp/dataflow/internal/FlowSummaryImpl.qll | 49 ++++++++++++------- .../models-as-data/FlowSummaryNode.expected | 10 ++-- .../dataflow/models-as-data/testModels.qll | 34 ++++++------- .../dataflow/models-as-data/tests.cpp | 14 +++--- 5 files changed, 88 insertions(+), 68 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll index f52735a2d64..09f4103d872 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll @@ -32,35 +32,42 @@ * or method, or a parameter. * 7. The `input` column specifies how data enters the element selected by the * first 6 columns, and the `output` column specifies how data leaves the - * element selected by the first 6 columns. An `input` can be either "", - * "Argument[n]", "Argument[n1..n2]", "ReturnValue": + * element selected by the first 6 columns. An `input` can be either: * - "": Selects a write to the selected element in case this is a field. * - "Argument[n]": Selects an argument in a call to the selected element. * The arguments are zero-indexed, and `-1` specifies the qualifier object, * that is, `*this`. - * - "Argument[n1..n2]": Similar to "Argument[n]" but select any argument in - * the given range. The range is inclusive at both ends. + * - one or more "*" can be added in front of the argument index to indicate + * indirection, for example, `Argument[*0]` indicates the first indirection + * of the 0th argument. + * - `n1..n2` syntax can be used to indicate a range of arguments, inclusive + * at both ends. One or more "*" can be added in front of the range to + * indicate indirection on all arguments in the range, for example `*n1..n2`. * - "ReturnValue": Selects a value being returned by the selected element. - * This requires that the selected element is a method with a body. + * One or more "*" can be added as an argument to indicate indirection, for + * example, "ReturnValue[*]" indicates the first indirection of the return + * value. * - * An `output` can be either "", "Argument[n]", "Argument[n1..n2]", "Parameter", - * "Parameter[n]", "Parameter[n1..n2]", or "ReturnValue": + * An `output` can be either: * - "": Selects a read of a selected field, or a selected parameter. - * - "Argument[n]": Selects the post-update value of an argument in a call to the - * selected element. That is, the value of the argument after the call returns. - * The arguments are zero-indexed, and `-1` specifies the qualifier object, - * that is, `*this`. - * - "Argument[n1..n2]": Similar to "Argument[n]" but select any argument in - * the given range. The range is inclusive at both ends. + * - "Argument[n]": Selects the post-update value of an argument in a call to + * the selected element. That is, the value of the argument after the call + * returns. The arguments are zero-indexed, and `-1` specifies the qualifier + * object, that is, `*this`. + * - one or more "*" can be added in front of the argument index to indicate + * indirection, for example, `Argument[*0]` indicates the first indirection + * of the 0th argument. + * - `n1..n2` syntax can be used to indicate a range of arguments, inclusive + * at both ends. One or more "*" can be added in front of the range to + * indicate indirection on all arguments in the range, for example `*n1..n2`. * - "Parameter": Selects the value of a parameter of the selected element. - * "Parameter" is also allowed in case the selected element is already a - * parameter itself. - * - "Parameter[n]": Similar to "Parameter" but restricted to a specific - * numbered parameter. The parameters are zero-indexed, and `-1` specifies - * the qualifier object, that is, `*this`. - * - "Parameter[n1..n2]": Similar to "Parameter[n]" but selects any parameter - * in the given range. The range is inclusive at both ends. - * - "ReturnValue": Selects the return value of a call to the selected element. + * The syntax is the same as for "Argument", for example "Parameter[0]", + * "Parameter[*0]", "Parameter[0..2]" etc. "Parameter" is also allowed in + * case the selected element is already a parameter itself. + * - "ReturnValue": Selects a value being returned by the selected element. + * One or more "*" can be added as an argument to indicate indirection, for + * example, "ReturnValue[*]" indicates the first indirection of the return + * value. * 8. The `kind` column is a tag that can be referenced from QL to determine to * which classes the interpreted elements should be added. For example, for * sources "remote" indicates a default remote flow source, and for summaries diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/FlowSummaryImpl.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/FlowSummaryImpl.qll index b34a1d4786e..86c031bf845 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/FlowSummaryImpl.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/FlowSummaryImpl.qll @@ -32,16 +32,16 @@ module Input implements InputSig { string encodeReturn(ReturnKind rk, string arg) { rk != getStandardReturnValueKind() and - result = indirectionString(rk.(NormalReturnKind).getIndirectionIndex()) + "ReturnValue" and - arg = "" + result = "ReturnValue" and + arg = indirectionString(rk.(NormalReturnKind).getIndirectionIndex()) } string encodeContent(ContentSet cs, string arg) { exists(FieldContent c | cs.isSingleton(c) and // FieldContent indices have 0 for the address, 1 for content, so we need to subtract one. - result = indirectionString(c.getIndirectionIndex() - 1) + "Field" and - arg = c.getField().getName() + result = "Field" and + arg = indirectionString(c.getIndirectionIndex() - 1) + c.getField().getName() ) } @@ -54,15 +54,17 @@ module Input implements InputSig { bindingset[token] ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) { // needed to support `Argument[x..y]` ranges, `Argument[-1]`, and indirections `*Argument[0]`. - exists(int indirection | - token.getName() = indirectionString(indirection) + "Argument" and - exists(int pos | pos = AccessPath::parseInt(token.getAnArgument()) | - pos >= 0 and indirection = 0 and result = TDirectPosition(pos) + exists(int indirection, string argPosString, int argPos | + token.getName() = "Argument" and + token.getAnArgument() = indirectionString(indirection) + argPosString and + argPos = AccessPath::parseInt(argPosString) and + ( + argPos >= 0 and indirection = 0 and result = TDirectPosition(argPos) or - pos >= 0 and indirection > 0 and result = TIndirectionPosition(pos, indirection) + argPos >= 0 and indirection > 0 and result = TIndirectionPosition(argPos, indirection) or // `Argument[-1]` is the qualifier object `*this`, not the `this` pointer itself - pos = -1 and result = TIndirectionPosition(pos, indirection + 1) + argPos = -1 and result = TIndirectionPosition(argPos, indirection + 1) ) ) } @@ -70,26 +72,37 @@ module Input implements InputSig { bindingset[token] ArgumentPosition decodeUnknownArgumentPosition(AccessPath::AccessPathTokenBase token) { // needed to support `Argument[x..y]` ranges, `Argument[-1]`, and indirections `*Argument[0]`. - exists(int indirection | - token.getName() = indirectionString(indirection) + "Parameter" and - exists(int pos | pos = AccessPath::parseInt(token.getAnArgument()) | - pos >= 0 and indirection = 0 and result = TDirectPosition(pos) + exists(int indirection, string paramPosString, int paramPos | + token.getName() = "Parameter" and + token.getAnArgument() = indirectionString(indirection) + paramPosString and + paramPos = AccessPath::parseInt(paramPosString) and + ( + paramPos >= 0 and indirection = 0 and result = TDirectPosition(paramPos) or - pos >= 0 and indirection > 0 and result = TIndirectionPosition(pos, indirection) + paramPos >= 0 and indirection > 0 and result = TIndirectionPosition(paramPos, indirection) or // `Argument[-1]` is the qualifier object `*this`, not the `this` pointer itself - pos = -1 and result = TIndirectionPosition(pos, indirection + 1) + paramPos = -1 and result = TIndirectionPosition(paramPos, indirection + 1) ) ) } bindingset[token] ContentSet decodeUnknownContent(AccessPath::AccessPathTokenBase token) { - // field content (with indirection support). + // field content (no indirection support) exists(FieldContent c | result.isSingleton(c) and + token.getName() = c.getField().getName() and + not exists(token.getArgumentList()) and + c.getIndirectionIndex() = 1 + ) + or + // field content (with indirection support) + exists(FieldContent c | + result.isSingleton(c) and + token.getName() = c.getField().getName() and // FieldContent indices have 0 for the address, 1 for content, so we need to subtract one. - token = indirectionString(c.getIndirectionIndex() - 1) + c.getField().getName() + token.getAnArgument() = indirectionString(c.getIndirectionIndex() - 1) ) } } diff --git a/cpp/ql/test/library-tests/dataflow/models-as-data/FlowSummaryNode.expected b/cpp/ql/test/library-tests/dataflow/models-as-data/FlowSummaryNode.expected index ab7f52d68df..f835211f9c0 100644 --- a/cpp/ql/test/library-tests/dataflow/models-as-data/FlowSummaryNode.expected +++ b/cpp/ql/test/library-tests/dataflow/models-as-data/FlowSummaryNode.expected @@ -1,7 +1,7 @@ | tests.cpp:128:5:128:19 | [summary param] 0 in madArg0ToReturn | ParameterNode | madArg0ToReturn | madArg0ToReturn | | tests.cpp:128:5:128:19 | [summary] to write: ReturnValue in madArg0ToReturn | ReturnNode | madArg0ToReturn | madArg0ToReturn | | tests.cpp:129:6:129:28 | [summary param] 0 in madArg0ToReturnIndirect | ParameterNode | madArg0ToReturnIndirect | madArg0ToReturnIndirect | -| tests.cpp:129:6:129:28 | [summary] to write: *ReturnValue in madArg0ToReturnIndirect | ReturnNode | madArg0ToReturnIndirect | madArg0ToReturnIndirect | +| tests.cpp:129:6:129:28 | [summary] to write: ReturnValue[*] in madArg0ToReturnIndirect | ReturnNode | madArg0ToReturnIndirect | madArg0ToReturnIndirect | | tests.cpp:131:5:131:28 | [summary param] 0 in madArg0ToReturnValueFlow | ParameterNode | madArg0ToReturnValueFlow | madArg0ToReturnValueFlow | | tests.cpp:131:5:131:28 | [summary] to write: ReturnValue in madArg0ToReturnValueFlow | ReturnNode | madArg0ToReturnValueFlow | madArg0ToReturnValueFlow | | tests.cpp:132:5:132:27 | [summary param] 0 indirection in madArg0IndirectToReturn | ParameterNode | madArg0IndirectToReturn | madArg0IndirectToReturn | @@ -23,17 +23,17 @@ | tests.cpp:139:5:139:32 | [summary] read: Argument[0 indirection].Field[value] in madArg0IndirectFieldToReturn | | madArg0IndirectFieldToReturn | madArg0IndirectFieldToReturn | | tests.cpp:139:5:139:32 | [summary] to write: ReturnValue in madArg0IndirectFieldToReturn | ReturnNode | madArg0IndirectFieldToReturn | madArg0IndirectFieldToReturn | | tests.cpp:140:5:140:32 | [summary param] 0 in madArg0FieldIndirectToReturn | ParameterNode | madArg0FieldIndirectToReturn | madArg0FieldIndirectToReturn | -| tests.cpp:140:5:140:32 | [summary] read: Argument[0].*Field[ptr] in madArg0FieldIndirectToReturn | | madArg0FieldIndirectToReturn | madArg0FieldIndirectToReturn | +| tests.cpp:140:5:140:32 | [summary] read: Argument[0].Field[*ptr] in madArg0FieldIndirectToReturn | | madArg0FieldIndirectToReturn | madArg0FieldIndirectToReturn | | tests.cpp:140:5:140:32 | [summary] to write: ReturnValue in madArg0FieldIndirectToReturn | ReturnNode | madArg0FieldIndirectToReturn | madArg0FieldIndirectToReturn | | tests.cpp:141:13:141:32 | [summary param] 0 in madArg0ToReturnField | ParameterNode | madArg0ToReturnField | madArg0ToReturnField | | tests.cpp:141:13:141:32 | [summary] to write: ReturnValue in madArg0ToReturnField | ReturnNode | madArg0ToReturnField | madArg0ToReturnField | | tests.cpp:141:13:141:32 | [summary] to write: ReturnValue.Field[value] in madArg0ToReturnField | | madArg0ToReturnField | madArg0ToReturnField | | tests.cpp:142:14:142:41 | [summary param] 0 in madArg0ToReturnIndirectField | ParameterNode | madArg0ToReturnIndirectField | madArg0ToReturnIndirectField | -| tests.cpp:142:14:142:41 | [summary] to write: *ReturnValue in madArg0ToReturnIndirectField | ReturnNode | madArg0ToReturnIndirectField | madArg0ToReturnIndirectField | -| tests.cpp:142:14:142:41 | [summary] to write: *ReturnValue.Field[value] in madArg0ToReturnIndirectField | | madArg0ToReturnIndirectField | madArg0ToReturnIndirectField | +| tests.cpp:142:14:142:41 | [summary] to write: ReturnValue[*] in madArg0ToReturnIndirectField | ReturnNode | madArg0ToReturnIndirectField | madArg0ToReturnIndirectField | +| tests.cpp:142:14:142:41 | [summary] to write: ReturnValue[*].Field[value] in madArg0ToReturnIndirectField | | madArg0ToReturnIndirectField | madArg0ToReturnIndirectField | | tests.cpp:143:13:143:40 | [summary param] 0 in madArg0ToReturnFieldIndirect | ParameterNode | madArg0ToReturnFieldIndirect | madArg0ToReturnFieldIndirect | | tests.cpp:143:13:143:40 | [summary] to write: ReturnValue in madArg0ToReturnFieldIndirect | ReturnNode | madArg0ToReturnFieldIndirect | madArg0ToReturnFieldIndirect | -| tests.cpp:143:13:143:40 | [summary] to write: ReturnValue.*Field[ptr] in madArg0ToReturnFieldIndirect | | madArg0ToReturnFieldIndirect | madArg0ToReturnFieldIndirect | +| tests.cpp:143:13:143:40 | [summary] to write: ReturnValue.Field[*ptr] in madArg0ToReturnFieldIndirect | | madArg0ToReturnFieldIndirect | madArg0ToReturnFieldIndirect | | tests.cpp:227:7:227:19 | [summary param] 0 in madArg0ToSelf | ParameterNode | madArg0ToSelf | madArg0ToSelf | | tests.cpp:227:7:227:19 | [summary param] this indirection in madArg0ToSelf | ParameterNode | madArg0ToSelf | madArg0ToSelf | | tests.cpp:227:7:227:19 | [summary] to write: Argument[this indirection] in madArg0ToSelf | PostUpdateNode | madArg0ToSelf | madArg0ToSelf | diff --git a/cpp/ql/test/library-tests/dataflow/models-as-data/testModels.qll b/cpp/ql/test/library-tests/dataflow/models-as-data/testModels.qll index 58fb2cb6019..e8aa30cb8ab 100644 --- a/cpp/ql/test/library-tests/dataflow/models-as-data/testModels.qll +++ b/cpp/ql/test/library-tests/dataflow/models-as-data/testModels.qll @@ -11,10 +11,10 @@ private class TestSources extends SourceModelCsv { ";;false;remoteMadSource;;;ReturnValue;remote", ";;false;localMadSourceVoid;;;ReturnValue;local", ";;false;localMadSourceHasBody;;;ReturnValue;local", - ";;false;remoteMadSourceIndirect;;;*ReturnValue;remote", - ";;false;remoteMadSourceDoubleIndirect;;;**ReturnValue;remote", - ";;false;remoteMadSourceIndirectArg0;;;*Argument[0];remote", - ";;false;remoteMadSourceIndirectArg1;;;*Argument[1];remote", + ";;false;remoteMadSourceIndirect;;;ReturnValue[*];remote", + ";;false;remoteMadSourceDoubleIndirect;;;ReturnValue[**];remote", + ";;false;remoteMadSourceIndirectArg0;;;Argument[*0];remote", + ";;false;remoteMadSourceIndirectArg1;;;Argument[*1];remote", ";;false;remoteMadSourceVar;;;;remote", ";;false;remoteMadSourceVarIndirect;;;*;remote", ";;false;remoteMadSourceParam0;;;Parameter[0];remote", @@ -22,7 +22,7 @@ private class TestSources extends SourceModelCsv { "MyNamespace;;false;namespaceLocalMadSourceVar;;;;local", "MyNamespace::MyNamespace2;;false;namespace2LocalMadSource;;;ReturnValue;local", ";MyClass;true;memberRemoteMadSource;;;ReturnValue;remote", - ";MyClass;true;memberRemoteMadSourceIndirectArg0;;;*Argument[0];remote", + ";MyClass;true;memberRemoteMadSourceIndirectArg0;;;Argument[*0];remote", ";MyClass;true;memberRemoteMadSourceVar;;;;remote", ";MyClass;true;subtypeRemoteMadSource1;;;ReturnValue;remote", ";MyClass;false;subtypeNonSource;;;ReturnValue;remote", // the tests define this in MyDerivedClass, so it should *not* be recongized as a source @@ -44,8 +44,8 @@ private class TestSinks extends SinkModelCsv { ";;false;madSinkArg1;;;Argument[1];test-sink", ";;false;madSinkArg01;;;Argument[0..1];test-sink", ";;false;madSinkArg02;;;Argument[0,2];test-sink", - ";;false;madSinkIndirectArg0;;;*Argument[0];test-sink", - ";;false;madSinkDoubleIndirectArg0;;;**Argument[0];test-sink", + ";;false;madSinkIndirectArg0;;;Argument[*0];test-sink", + ";;false;madSinkDoubleIndirectArg0;;;Argument[**0];test-sink", ";;false;madSinkVar;;;;test-sink", ";;false;madSinkVarIndirect;;;*;test-sink", ";;false;madSinkParam0;;;Parameter[0];test-sink", @@ -70,20 +70,20 @@ private class TestSummaries extends SummaryModelCsv { row = [ ";;false;madArg0ToReturn;;;Argument[0];ReturnValue;taint", - ";;false;madArg0ToReturnIndirect;;;Argument[0];*ReturnValue;taint", + ";;false;madArg0ToReturnIndirect;;;Argument[0];ReturnValue[*];taint", ";;false;madArg0ToReturnValueFlow;;;Argument[0];ReturnValue;value", - ";;false;madArg0IndirectToReturn;;;*Argument[0];ReturnValue;taint", - ";;false;madArg0DoubleIndirectToReturn;;;**Argument[0];ReturnValue;taint", + ";;false;madArg0IndirectToReturn;;;Argument[*0];ReturnValue;taint", + ";;false;madArg0DoubleIndirectToReturn;;;Argument[**0];ReturnValue;taint", ";;false;madArg0NotIndirectToReturn;;;Argument[0];ReturnValue;taint", - ";;false;madArg0ToArg1Indirect;;;Argument[0];*Argument[1];taint", - ";;false;madArg0IndirectToArg1Indirect;;;*Argument[0];*Argument[1];taint", + ";;false;madArg0ToArg1Indirect;;;Argument[0];Argument[*1];taint", + ";;false;madArg0IndirectToArg1Indirect;;;Argument[*0];Argument[*1];taint", ";;false;madArg0FieldToReturn;;;Argument[0].value;ReturnValue;taint", - ";;false;madArg0IndirectFieldToReturn;;;*Argument[0].value;ReturnValue;taint", - ";;false;madArg0FieldIndirectToReturn;;;Argument[0].*ptr;ReturnValue;taint", + ";;false;madArg0IndirectFieldToReturn;;;Argument[*0].value;ReturnValue;taint", + ";;false;madArg0FieldIndirectToReturn;;;Argument[0].ptr[*];ReturnValue;taint", ";;false;madArg0ToReturnField;;;Argument[0];ReturnValue.value;taint", - ";;false;madArg0ToReturnIndirectField;;;Argument[0];*ReturnValue.value;taint", - ";;false;madArg0ToReturnFieldIndirect;;;Argument[0];ReturnValue.*ptr;taint", - ";;false;madArg0ToReturnFieldNotIndirect;;;Argument[0];ReturnValue.*ptr;taint", + ";;false;madArg0ToReturnIndirectField;;;Argument[0];ReturnValue[*].value;taint", + ";;false;madArg0ToReturnFieldIndirect;;;Argument[0];ReturnValue.ptr[*];taint", + ";;false;madArg0ToReturnFieldNotIndirect;;;Argument[0];ReturnValue.ptr[*];taint", ";MyClass;true;madArg0ToSelf;;;Argument[0];Argument[-1];taint", ";MyClass;true;madSelfToReturn;;;Argument[-1];ReturnValue;taint", ";MyClass;true;madArg0ToField;;;Argument[0];Argument[-1].val;taint", diff --git a/cpp/ql/test/library-tests/dataflow/models-as-data/tests.cpp b/cpp/ql/test/library-tests/dataflow/models-as-data/tests.cpp index bc49d8a6d47..46b2737a408 100644 --- a/cpp/ql/test/library-tests/dataflow/models-as-data/tests.cpp +++ b/cpp/ql/test/library-tests/dataflow/models-as-data/tests.cpp @@ -48,11 +48,11 @@ void test_sources() { int a, b, c, d; remoteMadSourceIndirectArg0(&a, &b); - sink(a); // $ MISSING: ir - sink(a); + sink(a); // $ ir + sink(a); // $ SPURIOUS: ir remoteMadSourceIndirectArg1(c, d); sink(c); - sink(d); // $ MISSING: ir + sink(d); // $ ir sink(remoteMadSourceVar); // $ ir sink(*remoteMadSourceVarIndirect); // $ MISSING: ir @@ -100,9 +100,9 @@ void test_sinks() { int a = source(); int *a_ptr = &a; - madSinkIndirectArg0(&a); // $ MISSING: ir - madSinkIndirectArg0(a_ptr); // $ MISSING: ir - madSinkDoubleIndirectArg0(&a_ptr); // $ MISSING: ir + madSinkIndirectArg0(&a); // $ ir + madSinkIndirectArg0(a_ptr); // $ ir + madSinkDoubleIndirectArg0(&a_ptr); // $ ir madSinkVar = source(); // $ ir @@ -272,7 +272,7 @@ void test_class_members() { int a; mc.memberRemoteMadSourceIndirectArg0(&a); - sink(a); // $ MISSING: ir + sink(a); // $ ir sink(mc.memberRemoteMadSourceVar); // $ ir