From 697b2dcde8faafcc585c41a08dd1036f2ab53e09 Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Mon, 12 Apr 2021 18:11:10 -0400 Subject: [PATCH] C++: Add missing store step for single-field struct use We have special code to handle field flow for single-field structs, but that special case was too specific. Some `Store`s to single-field structs have no `Chi` instruction, which is the case that we handled already. However, it is possible for the `Store` to have a `Chi` instruction (e.g. for `{AllAliased}`), but still have a use of the result of the `Store` directly. We now add a `PostUpdateNode` for the result of the `Store` itself in those cases, just like we already did if the `Store` had no `Chi`. --- .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 15 +++++-- .../ir/dataflow/internal/PrintIRLocalFlow.qll | 29 +------------- .../dataflow/internal/PrintIRStoreSteps.qll | 33 ++++++++++++++++ .../ir/dataflow/internal/PrintIRUtilities.qll | 39 +++++++++++++++++++ .../dataflow-ir-consistency.expected | 2 + .../fields/dataflow-ir-consistency.expected | 4 ++ .../dataflow/fields/ir-path-flow.expected | 6 +-- 7 files changed, 93 insertions(+), 35 deletions(-) create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/PrintIRStoreSteps.qll create mode 100644 cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/PrintIRUtilities.qll diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 76ca7b215dc..81a07ad9d04 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -362,15 +362,22 @@ private class ExplicitFieldStoreQualifierNode extends PartialDefinitionNode { /** * Not every store instruction generates a chi instruction that we can attach a PostUpdateNode to. - * For instance, an update to a field of a struct containing only one field. For these cases we - * attach the PostUpdateNode to the store instruction. There's no obvious pre update node for this case - * (as the entire memory is updated), so `getPreUpdateNode` is implemented as `none()`. + * For instance, an update to a field of a struct containing only one field. Even if the store does + * have a chi instruction, a subsequent use of the result of the store may be linked directly to the + * result of the store as an inexact definition if the store totally overlaps the use. For these + * cases we attach the PostUpdateNode to the store instruction. There's no obvious pre update node + * for this case (as the entire memory is updated), so `getPreUpdateNode` is implemented as + * `none()`. */ private class ExplicitSingleFieldStoreQualifierNode extends PartialDefinitionNode { override StoreInstruction instr; ExplicitSingleFieldStoreQualifierNode() { - not exists(ChiInstruction chi | chi.getPartial() = instr) and + ( + instr.getAUse().isDefinitionInexact() + or + not exists(ChiInstruction chi | chi.getPartial() = instr) + ) and // Without this condition any store would create a `PostUpdateNode`. instr.getDestinationAddress() instanceof FieldAddressInstruction } diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/PrintIRLocalFlow.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/PrintIRLocalFlow.qll index 337dc71a3ca..16182296e40 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/PrintIRLocalFlow.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/PrintIRLocalFlow.qll @@ -6,34 +6,7 @@ private import semmle.code.cpp.ir.ValueNumbering private import semmle.code.cpp.ir.IR private import semmle.code.cpp.ir.dataflow.DataFlow private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil - -/** - * Gets a short ID for an IR dataflow node. - * - For `Instruction`s, this is just the result ID of the instruction (e.g. `m128`). - * - For `Operand`s, this is the label of the operand, prefixed with the result ID of the - * instruction and a dot (e.g. `m128.left`). - * - For `Variable`s, this is the qualified name of the variable. - */ -private string nodeId(DataFlow::Node node, int order1, int order2) { - exists(Instruction instruction | instruction = node.asInstruction() | - result = instruction.getResultId() and - order1 = instruction.getBlock().getDisplayIndex() and - order2 = instruction.getDisplayIndexInBlock() - ) - or - exists(Operand operand, Instruction instruction | - operand = node.asOperand() and - instruction = operand.getUse() - | - result = instruction.getResultId() + "." + operand.getDumpId() and - order1 = instruction.getBlock().getDisplayIndex() and - order2 = instruction.getDisplayIndexInBlock() - ) - or - result = "var(" + node.asVariable().getQualifiedName() + ")" and - order1 = 1000000 and - order2 = 0 -} +private import PrintIRUtilities /** * Gets the local dataflow from other nodes in the same function to this node. diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/PrintIRStoreSteps.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/PrintIRStoreSteps.qll new file mode 100644 index 00000000000..8c318216217 --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/PrintIRStoreSteps.qll @@ -0,0 +1,33 @@ +/** + * Print the dataflow local store steps in IR dumps. + */ + +private import cpp +// The `ValueNumbering` library has to be imported right after `cpp` to ensure +// that the cached IR gets the same checksum here as it does in queries that use +// `ValueNumbering` without `DataFlow`. +private import semmle.code.cpp.ir.ValueNumbering +private import semmle.code.cpp.ir.IR +private import semmle.code.cpp.ir.dataflow.DataFlow +private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil +private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate +private import PrintIRUtilities + +/** + * Property provider for local IR dataflow store steps. + */ +class LocalFlowPropertyProvider extends IRPropertyProvider { + override string getInstructionProperty(Instruction instruction, string key) { + exists(DataFlow::Node objectNode, Content content | + key = "content[" + content.toString() + "]" and + instruction = objectNode.asInstruction() and + result = + strictconcat(string element, DataFlow::Node fieldNode | + storeStep(fieldNode, content, objectNode) and + element = nodeId(fieldNode, _, _) + | + element, ", " + ) + ) + } +} diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/PrintIRUtilities.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/PrintIRUtilities.qll new file mode 100644 index 00000000000..5fc15cf986c --- /dev/null +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/PrintIRUtilities.qll @@ -0,0 +1,39 @@ +/** + * Shared utilities used when printing dataflow annotations in IR dumps. + */ + +private import cpp +// The `ValueNumbering` library has to be imported right after `cpp` to ensure +// that the cached IR gets the same checksum here as it does in queries that use +// `ValueNumbering` without `DataFlow`. +private import semmle.code.cpp.ir.ValueNumbering +private import semmle.code.cpp.ir.IR +private import semmle.code.cpp.ir.dataflow.DataFlow + +/** + * Gets a short ID for an IR dataflow node. + * - For `Instruction`s, this is just the result ID of the instruction (e.g. `m128`). + * - For `Operand`s, this is the label of the operand, prefixed with the result ID of the + * instruction and a dot (e.g. `m128.left`). + * - For `Variable`s, this is the qualified name of the variable. + */ +string nodeId(DataFlow::Node node, int order1, int order2) { + exists(Instruction instruction | instruction = node.asInstruction() | + result = instruction.getResultId() and + order1 = instruction.getBlock().getDisplayIndex() and + order2 = instruction.getDisplayIndexInBlock() + ) + or + exists(Operand operand, Instruction instruction | + operand = node.asOperand() and + instruction = operand.getUse() + | + result = instruction.getResultId() + "." + operand.getDumpId() and + order1 = instruction.getBlock().getDisplayIndex() and + order2 = instruction.getDisplayIndexInBlock() + ) + or + result = "var(" + node.asVariable().getQualifiedName() + ")" and + order1 = 1000000 and + order2 = 0 +} diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected index fc6c97aa2a6..db9a86fbb57 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected @@ -26,6 +26,7 @@ unreachableNodeCCtx localCallNodes postIsNotPre postHasUniquePre +| test.cpp:373:5:373:20 | Store | PostUpdateNode should have one pre-update node but has 0. | uniquePostUpdate postIsInSameCallable reverseRead @@ -82,4 +83,5 @@ postWithInFlow | test.cpp:125:3:125:11 | Chi | PostUpdateNode should not be the target of local flow. | | test.cpp:359:5:359:20 | Chi | PostUpdateNode should not be the target of local flow. | | test.cpp:373:5:373:20 | Chi | PostUpdateNode should not be the target of local flow. | +| test.cpp:373:5:373:20 | Store | PostUpdateNode should not be the target of local flow. | | test.cpp:465:3:465:15 | Chi | PostUpdateNode should not be the target of local flow. | diff --git a/cpp/ql/test/library-tests/dataflow/fields/dataflow-ir-consistency.expected b/cpp/ql/test/library-tests/dataflow/fields/dataflow-ir-consistency.expected index 63d3b2c0f48..fe7d8360403 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/dataflow-ir-consistency.expected +++ b/cpp/ql/test/library-tests/dataflow/fields/dataflow-ir-consistency.expected @@ -20,7 +20,9 @@ unreachableNodeCCtx localCallNodes postIsNotPre postHasUniquePre +| D.cpp:57:5:57:42 | Store | PostUpdateNode should have one pre-update node but has 0. | | simple.cpp:65:5:65:22 | Store | PostUpdateNode should have one pre-update node but has 0. | +| simple.cpp:83:9:83:28 | Store | PostUpdateNode should have one pre-update node but has 0. | | simple.cpp:92:5:92:22 | Store | PostUpdateNode should have one pre-update node but has 0. | uniquePostUpdate postIsInSameCallable @@ -54,6 +56,7 @@ postWithInFlow | D.cpp:49:15:49:24 | Chi | PostUpdateNode should not be the target of local flow. | | D.cpp:56:15:56:24 | Chi | PostUpdateNode should not be the target of local flow. | | D.cpp:57:5:57:42 | Chi | PostUpdateNode should not be the target of local flow. | +| D.cpp:57:5:57:42 | Store | PostUpdateNode should not be the target of local flow. | | aliasing.cpp:9:3:9:22 | Chi | PostUpdateNode should not be the target of local flow. | | aliasing.cpp:13:3:13:21 | Chi | PostUpdateNode should not be the target of local flow. | | aliasing.cpp:17:3:17:21 | Chi | PostUpdateNode should not be the target of local flow. | @@ -150,6 +153,7 @@ postWithInFlow | simple.cpp:23:35:23:35 | Chi | PostUpdateNode should not be the target of local flow. | | simple.cpp:65:5:65:22 | Store | PostUpdateNode should not be the target of local flow. | | simple.cpp:83:9:83:28 | Chi | PostUpdateNode should not be the target of local flow. | +| simple.cpp:83:9:83:28 | Store | PostUpdateNode should not be the target of local flow. | | simple.cpp:92:5:92:22 | Store | PostUpdateNode should not be the target of local flow. | | struct_init.c:20:20:20:29 | Chi | PostUpdateNode should not be the target of local flow. | | struct_init.c:20:34:20:34 | Chi | PostUpdateNode should not be the target of local flow. | diff --git a/cpp/ql/test/library-tests/dataflow/fields/ir-path-flow.expected b/cpp/ql/test/library-tests/dataflow/fields/ir-path-flow.expected index 37a0dc3832a..e6234ca17f7 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/ir-path-flow.expected +++ b/cpp/ql/test/library-tests/dataflow/fields/ir-path-flow.expected @@ -228,8 +228,8 @@ edges | simple.cpp:65:5:65:22 | Store [i] | simple.cpp:66:12:66:12 | Store [i] | | simple.cpp:65:11:65:20 | call to user_input | simple.cpp:65:5:65:22 | Store [i] | | simple.cpp:66:12:66:12 | Store [i] | simple.cpp:67:13:67:13 | i | -| simple.cpp:83:9:83:28 | Chi [f1] | simple.cpp:84:14:84:20 | this indirection [f1] | -| simple.cpp:83:17:83:26 | call to user_input | simple.cpp:83:9:83:28 | Chi [f1] | +| simple.cpp:83:9:83:28 | Store [f1] | simple.cpp:84:14:84:20 | this indirection [f1] | +| simple.cpp:83:17:83:26 | call to user_input | simple.cpp:83:9:83:28 | Store [f1] | | simple.cpp:84:14:84:20 | this indirection [f1] | simple.cpp:84:14:84:20 | call to getf2f1 | | simple.cpp:92:5:92:22 | Store [i] | simple.cpp:93:20:93:20 | Store [i] | | simple.cpp:92:11:92:20 | call to user_input | simple.cpp:92:5:92:22 | Store [i] | @@ -494,7 +494,7 @@ nodes | simple.cpp:65:11:65:20 | call to user_input | semmle.label | call to user_input | | simple.cpp:66:12:66:12 | Store [i] | semmle.label | Store [i] | | simple.cpp:67:13:67:13 | i | semmle.label | i | -| simple.cpp:83:9:83:28 | Chi [f1] | semmle.label | Chi [f1] | +| simple.cpp:83:9:83:28 | Store [f1] | semmle.label | Store [f1] | | simple.cpp:83:17:83:26 | call to user_input | semmle.label | call to user_input | | simple.cpp:84:14:84:20 | call to getf2f1 | semmle.label | call to getf2f1 | | simple.cpp:84:14:84:20 | this indirection [f1] | semmle.label | this indirection [f1] |