Merge pull request #14867 from MathiasVP/reduce-duplication-from-operators

C++: Reduce duplication from crement operations
This commit is contained in:
Mathias Vorreiter Pedersen
2023-12-05 11:57:48 +00:00
committed by GitHub
9 changed files with 212 additions and 34 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added a new predicate `Node.asDefinition` on `DataFlow::Node`s for selecting the dataflow node corresponding to a particular definition.

View File

@@ -260,6 +260,71 @@ class Node extends TIRDataFlowNode {
*/
Expr asDefiningArgument() { result = this.asDefiningArgument(_) }
/**
* Gets the definition associated with this node, if any.
*
* For example, consider the following example
* ```cpp
* int x = 42; // 1
* x = 34; // 2
* ++x; // 3
* x++; // 4
* x += 1; // 5
* int y = x += 2; // 6
* ```
* - For (1) the result is `42`.
* - For (2) the result is `x = 34`.
* - For (3) the result is `++x`.
* - For (4) the result is `x++`.
* - For (5) the result is `x += 1`.
* - For (6) there are two results:
* - For the definition generated by `x += 2` the result is `x += 2`
* - For the definition generated by `int y = ...` the result is
* also `x += 2`.
*
* For assignments, `node.asDefinition()` and `node.asExpr()` will both exist
* for the same dataflow node. However, for expression such as `x++` that
* both write to `x` and read the current value of `x`, `node.asDefinition()`
* will give the node corresponding to the value after the increment, and
* `node.asExpr()` will give the node corresponding to the value before the
* increment. For an example of this, consider the following:
*
* ```cpp
* sink(x++);
* ```
* in the above program, there will not be flow from a node `n` such that
* `n.asDefinition() instanceof IncrementOperation` to the argument of `sink`
* since the value passed to `sink` is the value before to the increment.
* However, there will be dataflow from a node `n` such that
* `n.asExpr() instanceof IncrementOperation` since the result of evaluating
* the expression `x++` is passed to `sink`.
*/
Expr asDefinition() {
exists(StoreInstruction store |
store = this.asInstruction() and
result = asDefinitionImpl(store)
)
}
/**
* Gets the indirect definition at a given indirection corresponding to this
* node, if any.
*
* See the comments on `Node.asDefinition` for examples.
*/
Expr asIndirectDefinition(int indirectionIndex) {
exists(StoreInstruction store |
this.(IndirectInstruction).hasInstructionAndIndirectionIndex(store, indirectionIndex) and
result = asDefinitionImpl(store)
)
}
/**
* Gets the indirect definition at some indirection corresponding to this
* node, if any.
*/
Expr asIndirectDefinition() { result = this.asIndirectDefinition(_) }
/**
* Gets the argument that defines this `DefinitionByReferenceNode`, if any.
*
@@ -1142,22 +1207,6 @@ private module GetConvertedResultExpression {
}
private Expr getConvertedResultExpressionImpl0(Instruction instr) {
// For an expression such as `i += 2` we pretend that the generated
// `StoreInstruction` contains the result of the expression even though
// this isn't totally aligned with the C/C++ standard.
exists(TranslatedAssignOperation tao |
result = tao.getExpr() and
instr = tao.getInstruction(any(AssignmentStoreTag tag))
)
or
// Similarly for `i++` and `++i` we pretend that the generated
// `StoreInstruction` is contains the result of the expression even though
// this isn't totally aligned with the C/C++ standard.
exists(TranslatedCrementOperation tco |
result = tco.getExpr() and
instr = tco.getInstruction(any(CrementStoreTag tag))
)
or
// IR construction inserts an additional cast to a `size_t` on the extent
// of a `new[]` expression. The resulting `ConvertInstruction` doesn't have
// a result for `getConvertedResultExpression`. We remap this here so that
@@ -1165,7 +1214,7 @@ private module GetConvertedResultExpression {
// represents the extent.
exists(TranslatedNonConstantAllocationSize tas |
result = tas.getExtent().getExpr() and
instr = tas.getInstruction(any(AllocationExtentConvertTag tag))
instr = tas.getInstruction(AllocationExtentConvertTag())
)
or
// There's no instruction that returns `ParenthesisExpr`, but some queries
@@ -1174,6 +1223,39 @@ private module GetConvertedResultExpression {
result = ttc.getExpr().(ParenthesisExpr) and
instr = ttc.getResult()
)
or
// Certain expressions generate `CopyValueInstruction`s only when they
// are needed. Examples of this include crement operations and compound
// assignment operations. For example:
// ```cpp
// int x = ...
// int y = x++;
// ```
// this generate IR like:
// ```
// r1(glval<int>) = VariableAddress[x] :
// r2(int) = Constant[0] :
// m3(int) = Store[x] : &:r1, r2
// r4(glval<int>) = VariableAddress[y] :
// r5(glval<int>) = VariableAddress[x] :
// r6(int) = Load[x] : &:r5, m3
// r7(int) = Constant[1] :
// r8(int) = Add : r6, r7
// m9(int) = Store[x] : &:r5, r8
// r11(int) = CopyValue : r6
// m12(int) = Store[y] : &:r4, r11
// ```
// When the `CopyValueInstruction` is not generated there is no instruction
// whose `getConvertedResultExpression` maps back to the expression. When
// such an instruction doesn't exist it means that the old value is not
// needed, and in that case the only value that will propagate forward in
// the program is the value that's been updated. So in those cases we just
// use the result of `node.asDefinition()` as the result of `node.asExpr()`.
exists(TranslatedCoreExpr tco |
tco.getInstruction(_) = instr and
tco.producesExprResult() and
result = asDefinitionImpl0(instr)
)
}
private Expr getConvertedResultExpressionImpl(Instruction instr) {
@@ -1182,6 +1264,75 @@ private module GetConvertedResultExpression {
not exists(getConvertedResultExpressionImpl0(instr)) and
result = instr.getConvertedResultExpression()
}
/**
* Gets the result for `node.asDefinition()` (when `node` is the instruction
* node that wraps `store`) in the cases where `store.getAst()` should not be
* used to define the result of `node.asDefinition()`.
*/
private Expr asDefinitionImpl0(StoreInstruction store) {
// For an expression such as `i += 2` we pretend that the generated
// `StoreInstruction` contains the result of the expression even though
// this isn't totally aligned with the C/C++ standard.
exists(TranslatedAssignOperation tao |
store = tao.getInstruction(AssignmentStoreTag()) and
result = tao.getExpr()
)
or
// Similarly for `i++` and `++i` we pretend that the generated
// `StoreInstruction` is contains the result of the expression even though
// this isn't totally aligned with the C/C++ standard.
exists(TranslatedCrementOperation tco |
store = tco.getInstruction(CrementStoreTag()) and
result = tco.getExpr()
)
}
/**
* Holds if the expression returned by `store.getAst()` should not be
* returned as the result of `node.asDefinition()` when `node` is the
* instruction node that wraps `store`.
*/
private predicate excludeAsDefinitionResult(StoreInstruction store) {
// Exclude the store to the temporary generated by a ternary expression.
exists(TranslatedConditionalExpr tce |
store = tce.getInstruction(ConditionValueFalseStoreTag())
or
store = tce.getInstruction(ConditionValueTrueStoreTag())
)
}
/**
* Gets the expression that represents the result of `StoreInstruction` for
* dataflow purposes.
*
* For example, consider the following example
* ```cpp
* int x = 42; // 1
* x = 34; // 2
* ++x; // 3
* x++; // 4
* x += 1; // 5
* int y = x += 2; // 6
* ```
* For (1) the result is `42`.
* For (2) the result is `x = 34`.
* For (3) the result is `++x`.
* For (4) the result is `x++`.
* For (5) the result is `x += 1`.
* For (6) there are two results:
* - For the `StoreInstruction` generated by `x += 2` the result
* is `x += 2`
* - For the `StoreInstruction` generated by `int y = ...` the result
* is also `x += 2`
*/
Expr asDefinitionImpl(StoreInstruction store) {
not exists(asDefinitionImpl0(store)) and
not excludeAsDefinitionResult(store) and
result = store.getAst().(Expr).getUnconverted()
or
result = asDefinitionImpl0(store)
}
}
private import GetConvertedResultExpression

View File

@@ -345,6 +345,8 @@ private module PossibleYearArithmeticOperationCheckConfig implements DataFlow::C
)
}
predicate isBarrierIn(DataFlow::Node node) { isSource(node) }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
// flow from anything on the RHS of an assignment to a time/date structure to that
// assignment.

View File

@@ -35,10 +35,10 @@ predicate isSource(FS::FlowSource source, string sourceType) { sourceType = sour
predicate isSink(DataFlow::Node sink, string kind) {
exists(Expr use |
use = sink.asExpr() and
not use.getUnspecifiedType() instanceof PointerType and
outOfBoundsExpr(use, kind) and
not inSystemMacroExpansion(use)
not inSystemMacroExpansion(use) and
use = sink.asExpr()
)
}

View File

@@ -1,9 +1,5 @@
| test.cpp:173:29:173:51 | ... & ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:170:2:170:47 | ... += ... | ... += ... |
| test.cpp:173:29:173:51 | ... & ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:170:16:170:47 | ... * ... | ... * ... |
| test.cpp:174:30:174:45 | ... >> ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:170:2:170:47 | ... += ... | ... += ... |
| test.cpp:174:30:174:45 | ... >> ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:170:16:170:47 | ... * ... | ... * ... |
| test.cpp:193:15:193:24 | ... / ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:193:15:193:24 | ... / ... | ... / ... |
| test.cpp:217:29:217:51 | ... & ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:214:2:214:47 | ... += ... | ... += ... |
| test.cpp:217:29:217:51 | ... & ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:214:16:214:47 | ... * ... | ... * ... |
| test.cpp:218:30:218:45 | ... >> ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:214:2:214:47 | ... += ... | ... += ... |
| test.cpp:218:30:218:45 | ... >> ... | An arithmetic operation $@ that uses a constant value of 365 ends up modifying this date/time, without considering leap year scenarios. | test.cpp:214:16:214:47 | ... * ... | ... * ... |

View File

@@ -47,6 +47,8 @@
| tests.cpp:546:6:546:10 | call to fread | This 'fread' operation may access 400 bytes but the $@ is only 100 bytes. | tests.cpp:532:7:532:16 | charBuffer | destination buffer |
| tests.cpp:569:6:569:15 | access to array | This array indexing operation accesses a negative index -1 on the $@. | tests.cpp:565:7:565:12 | buffer | array |
| tests.cpp:577:7:577:13 | access to array | This array indexing operation accesses a negative index -1 on the $@. | tests.cpp:565:7:565:12 | buffer | array |
| tests.cpp:637:6:637:15 | access to array | This array indexing operation accesses a negative index -1 on the $@. | tests.cpp:633:7:633:12 | buffer | array |
| tests.cpp:645:7:645:13 | access to array | This array indexing operation accesses a negative index -1 on the $@. | tests.cpp:633:7:633:12 | buffer | array |
| tests_restrict.c:12:2:12:7 | call to memcpy | This 'memcpy' operation accesses 2 bytes but the $@ is only 1 byte. | tests_restrict.c:7:6:7:13 | smallbuf | source buffer |
| unions.cpp:26:2:26:7 | call to memset | This 'memset' operation accesses 200 bytes but the $@ is only 100 bytes. | unions.cpp:21:10:21:11 | mu | destination buffer |
| unions.cpp:30:2:30:7 | call to memset | This 'memset' operation accesses 200 bytes but the $@ is only 100 bytes. | unions.cpp:15:7:15:11 | small | destination buffer |

View File

@@ -1,6 +1,6 @@
edges
| main.cpp:6:27:6:30 | argv indirection | main.cpp:10:20:10:23 | argv indirection |
| main.cpp:10:20:10:23 | argv indirection | tests.cpp:631:32:631:35 | argv indirection |
| main.cpp:10:20:10:23 | argv indirection | tests.cpp:657:32:657:35 | argv indirection |
| tests.cpp:613:19:613:24 | source indirection | tests.cpp:615:17:615:22 | source indirection |
| tests.cpp:622:19:622:24 | source indirection | tests.cpp:625:2:625:16 | ... = ... indirection |
| tests.cpp:625:2:625:16 | ... = ... indirection | tests.cpp:625:4:625:7 | s indirection [post update] [home indirection] |
@@ -8,10 +8,10 @@ edges
| tests.cpp:628:14:628:14 | s indirection [home indirection] | tests.cpp:628:14:628:19 | home indirection |
| tests.cpp:628:14:628:14 | s indirection [home indirection] | tests.cpp:628:16:628:19 | home indirection |
| tests.cpp:628:16:628:19 | home indirection | tests.cpp:628:14:628:19 | home indirection |
| tests.cpp:631:32:631:35 | argv indirection | tests.cpp:656:9:656:15 | access to array indirection |
| tests.cpp:631:32:631:35 | argv indirection | tests.cpp:657:9:657:15 | access to array indirection |
| tests.cpp:656:9:656:15 | access to array indirection | tests.cpp:613:19:613:24 | source indirection |
| tests.cpp:657:9:657:15 | access to array indirection | tests.cpp:622:19:622:24 | source indirection |
| tests.cpp:657:32:657:35 | argv indirection | tests.cpp:682:9:682:15 | access to array indirection |
| tests.cpp:657:32:657:35 | argv indirection | tests.cpp:683:9:683:15 | access to array indirection |
| tests.cpp:682:9:682:15 | access to array indirection | tests.cpp:613:19:613:24 | source indirection |
| tests.cpp:683:9:683:15 | access to array indirection | tests.cpp:622:19:622:24 | source indirection |
nodes
| main.cpp:6:27:6:30 | argv indirection | semmle.label | argv indirection |
| main.cpp:10:20:10:23 | argv indirection | semmle.label | argv indirection |
@@ -23,9 +23,9 @@ nodes
| tests.cpp:628:14:628:14 | s indirection [home indirection] | semmle.label | s indirection [home indirection] |
| tests.cpp:628:14:628:19 | home indirection | semmle.label | home indirection |
| tests.cpp:628:16:628:19 | home indirection | semmle.label | home indirection |
| tests.cpp:631:32:631:35 | argv indirection | semmle.label | argv indirection |
| tests.cpp:656:9:656:15 | access to array indirection | semmle.label | access to array indirection |
| tests.cpp:657:9:657:15 | access to array indirection | semmle.label | access to array indirection |
| tests.cpp:657:32:657:35 | argv indirection | semmle.label | argv indirection |
| tests.cpp:682:9:682:15 | access to array indirection | semmle.label | access to array indirection |
| tests.cpp:683:9:683:15 | access to array indirection | semmle.label | access to array indirection |
subpaths
#select
| tests.cpp:615:2:615:7 | call to strcpy | main.cpp:6:27:6:30 | argv indirection | tests.cpp:615:17:615:22 | source indirection | This 'call to strcpy' with input from $@ may overflow the destination. | main.cpp:6:27:6:30 | argv indirection | a command-line argument |

View File

@@ -628,6 +628,32 @@ void test25(char* source) {
strcpy(buf, s.home); // BAD
}
void test26(bool cond)
{
char buffer[100];
char *ptr;
int i;
if (buffer[-1] == 0) { return; } // BAD: accesses buffer[-1]
ptr = buffer;
if (cond)
{
ptr += 1;
if (ptr[-1] == 0) { return; } // GOOD: accesses buffer[0]
} else {
if (ptr[-1] == 0) { return; } // BAD: accesses buffer[-1]
}
if (ptr[-1] == 0) { return; } // BAD: accesses buffer[-1] or buffer[0] [NOT DETECTED]
ptr = buffer;
for (i = 0; i < 2; i++)
{
ptr += 1;
}
if (ptr[-1] == 0) { return; } // GOOD: accesses buffer[1]
}
int tests_main(int argc, char *argv[])
{
long long arr17[19];

View File

@@ -16,7 +16,6 @@ edges
| argvLocal.c:13:27:13:30 | argv indirection | argvLocal.c:131:9:131:14 | ... + ... indirection |
| argvLocal.c:13:27:13:30 | argv indirection | argvLocal.c:132:15:132:20 | ... + ... indirection |
| argvLocal.c:13:27:13:30 | argv indirection | argvLocal.c:135:9:135:12 | ... ++ indirection |
| argvLocal.c:13:27:13:30 | argv indirection | argvLocal.c:135:9:135:12 | ... ++ indirection |
| argvLocal.c:13:27:13:30 | argv indirection | argvLocal.c:136:15:136:18 | -- ... indirection |
| argvLocal.c:13:27:13:30 | argv indirection | argvLocal.c:139:9:139:26 | ... ? ... : ... indirection |
| argvLocal.c:13:27:13:30 | argv indirection | argvLocal.c:140:15:140:32 | ... ? ... : ... indirection |
@@ -43,7 +42,6 @@ nodes
| argvLocal.c:131:9:131:14 | ... + ... indirection | semmle.label | ... + ... indirection |
| argvLocal.c:132:15:132:20 | ... + ... indirection | semmle.label | ... + ... indirection |
| argvLocal.c:135:9:135:12 | ... ++ indirection | semmle.label | ... ++ indirection |
| argvLocal.c:135:9:135:12 | ... ++ indirection | semmle.label | ... ++ indirection |
| argvLocal.c:136:15:136:18 | -- ... indirection | semmle.label | -- ... indirection |
| argvLocal.c:139:9:139:26 | ... ? ... : ... indirection | semmle.label | ... ? ... : ... indirection |
| argvLocal.c:140:15:140:32 | ... ? ... : ... indirection | semmle.label | ... ? ... : ... indirection |
@@ -70,7 +68,6 @@ subpaths
| argvLocal.c:131:9:131:14 | ... + ... indirection | argvLocal.c:13:27:13:30 | argv indirection | argvLocal.c:131:9:131:14 | ... + ... indirection | The value of this argument may come from $@ and is being used as a formatting argument to printf(format). | argvLocal.c:13:27:13:30 | argv indirection | a command-line argument |
| argvLocal.c:132:15:132:20 | ... + ... indirection | argvLocal.c:13:27:13:30 | argv indirection | argvLocal.c:132:15:132:20 | ... + ... indirection | The value of this argument may come from $@ and is being used as a formatting argument to printWrapper(correct), which calls printf(format). | argvLocal.c:13:27:13:30 | argv indirection | a command-line argument |
| argvLocal.c:135:9:135:12 | ... ++ indirection | argvLocal.c:13:27:13:30 | argv indirection | argvLocal.c:135:9:135:12 | ... ++ indirection | The value of this argument may come from $@ and is being used as a formatting argument to printf(format). | argvLocal.c:13:27:13:30 | argv indirection | a command-line argument |
| argvLocal.c:135:9:135:12 | ... ++ indirection | argvLocal.c:13:27:13:30 | argv indirection | argvLocal.c:135:9:135:12 | ... ++ indirection | The value of this argument may come from $@ and is being used as a formatting argument to printf(format). | argvLocal.c:13:27:13:30 | argv indirection | a command-line argument |
| argvLocal.c:136:15:136:18 | -- ... indirection | argvLocal.c:13:27:13:30 | argv indirection | argvLocal.c:136:15:136:18 | -- ... indirection | The value of this argument may come from $@ and is being used as a formatting argument to printWrapper(correct), which calls printf(format). | argvLocal.c:13:27:13:30 | argv indirection | a command-line argument |
| argvLocal.c:139:9:139:26 | ... ? ... : ... indirection | argvLocal.c:13:27:13:30 | argv indirection | argvLocal.c:139:9:139:26 | ... ? ... : ... indirection | The value of this argument may come from $@ and is being used as a formatting argument to printf(format). | argvLocal.c:13:27:13:30 | argv indirection | a command-line argument |
| argvLocal.c:140:15:140:32 | ... ? ... : ... indirection | argvLocal.c:13:27:13:30 | argv indirection | argvLocal.c:140:15:140:32 | ... ? ... : ... indirection | The value of this argument may come from $@ and is being used as a formatting argument to printWrapper(correct), which calls printf(format). | argvLocal.c:13:27:13:30 | argv indirection | a command-line argument |