C++: Data flow through address-of operator (&)

The data flow library conflates pointers and their objects in some
places but not others. For example, a member function call `x.f()` will
cause flow from `x` of type `T` to `this` of type `T*` inside `f`. It
might be ideal to avoid that conflation, but that's not realistic
without using the IR.

We've had good experience in the taint tracking library with conflating
pointers and objects, and it improves results for field flow, so perhaps
it's time to try it out for all data flow.
This commit is contained in:
Jonas Jensen
2019-09-06 11:26:23 +02:00
parent 396a72db5f
commit fd6d06fe6f
8 changed files with 62 additions and 7 deletions

View File

@@ -28,6 +28,7 @@ The following changes in version 1.23 affect C/C++ analysis in all applications.
picture of the partial flow paths from a given source. The feature is
disabled by default and can be enabled for individual configurations by
overriding `int explorationLimit()`.
* The data-flow library now allows flow through the address-of operator (`&`).
* The `DataFlow::DefinitionByReferenceNode` class now considers `f(x)` to be a
definition of `x` when `x` is a variable of pointer type. It no longer
considers deep paths such as `f(&x.myField)` to be definitions of `x`. These

View File

@@ -553,6 +553,10 @@ private predicate exprToExprStep_nocfg(Expr fromExpr, Expr toExpr) {
or
toExpr = any(StmtExpr stmtExpr | fromExpr = stmtExpr.getResultExpr())
or
toExpr.(AddressOfExpr).getOperand() = fromExpr
or
toExpr.(BuiltInOperationBuiltInAddressOf).getOperand() = fromExpr
or
// The following case is needed to track the qualifier object for flow
// through fields. It gives flow from `T(x)` to `new T(x)`. That's not
// strictly _data_ flow but _taint_ flow because the type of `fromExpr` is

View File

@@ -19,7 +19,7 @@ void following_pointers(
sink(sourceArray1[0]); // no flow
sink(*sourceArray1); // no flow
sink(&sourceArray1); // no flow (since sourceArray1 is really a pointer)
sink(&sourceArray1); // flow (should probably be taint only)
sink(sourceStruct1.m1); // no flow
sink(sourceStruct1_ptr->m1); // no flow

View File

@@ -14,6 +14,8 @@
| example.c:24:24:24:30 | ... + ... | example.c:24:13:24:30 | ... = ... |
| example.c:26:13:26:16 | call to getX | example.c:26:2:26:25 | ... = ... |
| example.c:26:18:26:24 | ref arg & ... | example.c:26:2:26:7 | coords |
| example.c:26:19:26:24 | coords | example.c:26:18:26:24 | & ... |
| example.c:28:23:28:25 | pos | example.c:28:22:28:25 | & ... |
| test.cpp:6:12:6:17 | call to source | test.cpp:7:8:7:9 | t1 |
| test.cpp:6:12:6:17 | call to source | test.cpp:8:8:8:9 | t1 |
| test.cpp:6:12:6:17 | call to source | test.cpp:9:8:9:9 | t1 |
@@ -45,7 +47,9 @@
| test.cpp:384:10:384:13 | ref arg & ... | test.cpp:384:3:384:8 | call to memcpy |
| test.cpp:384:10:384:13 | ref arg & ... | test.cpp:384:33:384:35 | tmp |
| test.cpp:384:10:384:13 | ref arg & ... | test.cpp:385:8:385:10 | tmp |
| test.cpp:384:11:384:13 | tmp | test.cpp:384:10:384:13 | & ... |
| test.cpp:384:17:384:23 | source1 | test.cpp:384:10:384:13 | ref arg & ... |
| test.cpp:384:17:384:23 | source1 | test.cpp:384:16:384:23 | & ... |
| test.cpp:388:53:388:59 | source1 | test.cpp:391:17:391:23 | source1 |
| test.cpp:388:66:388:66 | b | test.cpp:393:7:393:7 | b |
| test.cpp:389:12:389:13 | 0 | test.cpp:390:19:390:21 | tmp |
@@ -53,9 +57,12 @@
| test.cpp:389:12:389:13 | 0 | test.cpp:391:33:391:35 | tmp |
| test.cpp:389:12:389:13 | 0 | test.cpp:392:8:392:10 | tmp |
| test.cpp:389:12:389:13 | 0 | test.cpp:394:10:394:12 | tmp |
| test.cpp:390:19:390:21 | tmp | test.cpp:390:18:390:21 | & ... |
| test.cpp:391:10:391:13 | & ... | test.cpp:391:3:391:8 | call to memcpy |
| test.cpp:391:10:391:13 | ref arg & ... | test.cpp:391:3:391:8 | call to memcpy |
| test.cpp:391:10:391:13 | ref arg & ... | test.cpp:391:33:391:35 | tmp |
| test.cpp:391:10:391:13 | ref arg & ... | test.cpp:392:8:392:10 | tmp |
| test.cpp:391:10:391:13 | ref arg & ... | test.cpp:394:10:394:12 | tmp |
| test.cpp:391:11:391:13 | tmp | test.cpp:391:10:391:13 | & ... |
| test.cpp:391:17:391:23 | source1 | test.cpp:391:10:391:13 | ref arg & ... |
| test.cpp:391:17:391:23 | source1 | test.cpp:391:16:391:23 | & ... |

View File

@@ -1,5 +1,6 @@
| acrossLinkTargets.cpp:12:8:12:8 | x | acrossLinkTargets.cpp:19:27:19:32 | call to source |
| clang.cpp:18:8:18:19 | sourceArray1 | clang.cpp:12:9:12:20 | sourceArray1 |
| clang.cpp:22:8:22:20 | & ... | clang.cpp:12:9:12:20 | sourceArray1 |
| clang.cpp:29:27:29:28 | m1 | clang.cpp:28:27:28:32 | call to source |
| clang.cpp:30:27:30:34 | call to getFirst | clang.cpp:28:27:28:32 | call to source |
| clang.cpp:37:10:37:11 | m2 | clang.cpp:34:32:34:37 | call to source |

View File

@@ -1,3 +1,4 @@
| clang.cpp:12:9:12:20 | clang.cpp:22:8:22:20 | AST only |
| clang.cpp:28:27:28:32 | clang.cpp:29:27:29:28 | AST only |
| clang.cpp:28:27:28:32 | clang.cpp:30:27:30:34 | AST only |
| clang.cpp:39:42:39:47 | clang.cpp:41:18:41:19 | IR only |

View File

@@ -151,6 +151,9 @@ edges
| by_reference.cpp:62:3:62:3 | s [post update] [a] | by_reference.cpp:63:8:63:8 | s [a] |
| by_reference.cpp:62:25:62:34 | call to user_input | by_reference.cpp:62:3:62:3 | s [post update] [a] |
| by_reference.cpp:63:8:63:8 | s [a] | by_reference.cpp:63:10:63:28 | call to getThroughNonMember |
| by_reference.cpp:68:17:68:18 | ref arg & ... [a] | by_reference.cpp:69:22:69:23 | & ... [a] |
| by_reference.cpp:68:21:68:30 | call to user_input | by_reference.cpp:68:17:68:18 | ref arg & ... [a] |
| by_reference.cpp:69:22:69:23 | & ... [a] | by_reference.cpp:69:8:69:20 | call to nonMemberGetA |
| complex.cpp:34:15:34:15 | b [f, a_] | complex.cpp:44:8:44:8 | b [f, a_] |
| complex.cpp:34:15:34:15 | b [f, b_] | complex.cpp:45:8:45:8 | b [f, b_] |
| complex.cpp:44:8:44:8 | b [f, a_] | complex.cpp:44:10:44:10 | f [a_] |
@@ -205,14 +208,30 @@ edges
| simple.cpp:48:9:48:9 | g [b_] | simple.cpp:26:15:26:15 | f [b_] |
| simple.cpp:51:9:51:9 | h [a_] | simple.cpp:26:15:26:15 | f [a_] |
| simple.cpp:51:9:51:9 | h [b_] | simple.cpp:26:15:26:15 | f [b_] |
| struct_init.c:14:24:14:25 | ab [a] | struct_init.c:15:8:15:9 | ab [a] |
| struct_init.c:15:8:15:9 | ab [a] | struct_init.c:15:12:15:12 | a |
| struct_init.c:20:17:20:36 | {...} [a] | struct_init.c:22:8:22:9 | ab [a] |
| struct_init.c:20:17:20:36 | {...} [a] | struct_init.c:24:10:24:12 | & ... [a] |
| struct_init.c:20:17:20:36 | {...} [a] | struct_init.c:28:5:28:7 | & ... [a] |
| struct_init.c:20:20:20:29 | call to user_input | struct_init.c:20:17:20:36 | {...} [a] |
| struct_init.c:22:8:22:9 | ab [a] | struct_init.c:22:11:22:11 | a |
| struct_init.c:24:10:24:12 | & ... [a] | struct_init.c:14:24:14:25 | ab [a] |
| struct_init.c:26:23:29:3 | {...} [nestedAB, a] | struct_init.c:31:8:31:12 | outer [nestedAB, a] |
| struct_init.c:26:23:29:3 | {...} [nestedAB, a] | struct_init.c:36:11:36:15 | outer [nestedAB, a] |
| struct_init.c:26:23:29:3 | {...} [pointerAB, a] | struct_init.c:33:8:33:12 | outer [pointerAB, a] |
| struct_init.c:26:23:29:3 | {...} [pointerAB, a] | struct_init.c:37:10:37:14 | outer [pointerAB, a] |
| struct_init.c:27:5:27:23 | {...} [a] | struct_init.c:26:23:29:3 | {...} [nestedAB, a] |
| struct_init.c:27:7:27:16 | call to user_input | struct_init.c:27:5:27:23 | {...} [a] |
| struct_init.c:28:5:28:7 | & ... [a] | struct_init.c:26:23:29:3 | {...} [pointerAB, a] |
| struct_init.c:31:8:31:12 | outer [nestedAB, a] | struct_init.c:31:14:31:21 | nestedAB [a] |
| struct_init.c:31:14:31:21 | nestedAB [a] | struct_init.c:31:23:31:23 | a |
| struct_init.c:33:8:33:12 | outer [pointerAB, a] | struct_init.c:33:14:33:22 | pointerAB [a] |
| struct_init.c:33:14:33:22 | pointerAB [a] | struct_init.c:33:25:33:25 | a |
| struct_init.c:36:10:36:24 | & ... [a] | struct_init.c:14:24:14:25 | ab [a] |
| struct_init.c:36:11:36:15 | outer [nestedAB, a] | struct_init.c:36:17:36:24 | nestedAB [a] |
| struct_init.c:36:17:36:24 | nestedAB [a] | struct_init.c:36:10:36:24 | & ... [a] |
| struct_init.c:37:10:37:14 | outer [pointerAB, a] | struct_init.c:37:16:37:24 | pointerAB [a] |
| struct_init.c:37:16:37:24 | pointerAB [a] | struct_init.c:14:24:14:25 | ab [a] |
nodes
| A.cpp:41:15:41:21 | new | semmle.label | new |
| A.cpp:43:10:43:12 | & ... | semmle.label | & ... |
@@ -390,6 +409,10 @@ nodes
| by_reference.cpp:62:25:62:34 | call to user_input | semmle.label | call to user_input |
| by_reference.cpp:63:8:63:8 | s [a] | semmle.label | s [a] |
| by_reference.cpp:63:10:63:28 | call to getThroughNonMember | semmle.label | call to getThroughNonMember |
| by_reference.cpp:68:17:68:18 | ref arg & ... [a] | semmle.label | ref arg & ... [a] |
| by_reference.cpp:68:21:68:30 | call to user_input | semmle.label | call to user_input |
| by_reference.cpp:69:8:69:20 | call to nonMemberGetA | semmle.label | call to nonMemberGetA |
| by_reference.cpp:69:22:69:23 | & ... [a] | semmle.label | & ... [a] |
| complex.cpp:34:15:34:15 | b [f, a_] | semmle.label | b [f, a_] |
| complex.cpp:34:15:34:15 | b [f, b_] | semmle.label | b [f, b_] |
| complex.cpp:44:8:44:8 | b [f, a_] | semmle.label | b [f, a_] |
@@ -450,16 +473,30 @@ nodes
| simple.cpp:48:9:48:9 | g [b_] | semmle.label | g [b_] |
| simple.cpp:51:9:51:9 | h [a_] | semmle.label | h [a_] |
| simple.cpp:51:9:51:9 | h [b_] | semmle.label | h [b_] |
| struct_init.c:14:24:14:25 | ab [a] | semmle.label | ab [a] |
| struct_init.c:15:8:15:9 | ab [a] | semmle.label | ab [a] |
| struct_init.c:15:12:15:12 | a | semmle.label | a |
| struct_init.c:20:17:20:36 | {...} [a] | semmle.label | {...} [a] |
| struct_init.c:20:20:20:29 | call to user_input | semmle.label | call to user_input |
| struct_init.c:22:8:22:9 | ab [a] | semmle.label | ab [a] |
| struct_init.c:22:11:22:11 | a | semmle.label | a |
| struct_init.c:24:10:24:12 | & ... [a] | semmle.label | & ... [a] |
| struct_init.c:26:23:29:3 | {...} [nestedAB, a] | semmle.label | {...} [nestedAB, a] |
| struct_init.c:26:23:29:3 | {...} [pointerAB, a] | semmle.label | {...} [pointerAB, a] |
| struct_init.c:27:5:27:23 | {...} [a] | semmle.label | {...} [a] |
| struct_init.c:27:7:27:16 | call to user_input | semmle.label | call to user_input |
| struct_init.c:28:5:28:7 | & ... [a] | semmle.label | & ... [a] |
| struct_init.c:31:8:31:12 | outer [nestedAB, a] | semmle.label | outer [nestedAB, a] |
| struct_init.c:31:14:31:21 | nestedAB [a] | semmle.label | nestedAB [a] |
| struct_init.c:31:23:31:23 | a | semmle.label | a |
| struct_init.c:33:8:33:12 | outer [pointerAB, a] | semmle.label | outer [pointerAB, a] |
| struct_init.c:33:14:33:22 | pointerAB [a] | semmle.label | pointerAB [a] |
| struct_init.c:33:25:33:25 | a | semmle.label | a |
| struct_init.c:36:10:36:24 | & ... [a] | semmle.label | & ... [a] |
| struct_init.c:36:11:36:15 | outer [nestedAB, a] | semmle.label | outer [nestedAB, a] |
| struct_init.c:36:17:36:24 | nestedAB [a] | semmle.label | nestedAB [a] |
| struct_init.c:37:10:37:14 | outer [pointerAB, a] | semmle.label | outer [pointerAB, a] |
| struct_init.c:37:16:37:24 | pointerAB [a] | semmle.label | pointerAB [a] |
#select
| A.cpp:43:10:43:12 | & ... | A.cpp:41:15:41:21 | new | A.cpp:43:10:43:12 | & ... | & ... flows from $@ | A.cpp:41:15:41:21 | new | new |
| A.cpp:49:13:49:13 | c | A.cpp:47:12:47:18 | new | A.cpp:49:13:49:13 | c | c flows from $@ | A.cpp:47:12:47:18 | new | new |
@@ -492,6 +529,7 @@ nodes
| by_reference.cpp:51:10:51:20 | call to getDirectly | by_reference.cpp:50:17:50:26 | call to user_input | by_reference.cpp:51:10:51:20 | call to getDirectly | call to getDirectly flows from $@ | by_reference.cpp:50:17:50:26 | call to user_input | call to user_input |
| by_reference.cpp:57:10:57:22 | call to getIndirectly | by_reference.cpp:56:19:56:28 | call to user_input | by_reference.cpp:57:10:57:22 | call to getIndirectly | call to getIndirectly flows from $@ | by_reference.cpp:56:19:56:28 | call to user_input | call to user_input |
| by_reference.cpp:63:10:63:28 | call to getThroughNonMember | by_reference.cpp:62:25:62:34 | call to user_input | by_reference.cpp:63:10:63:28 | call to getThroughNonMember | call to getThroughNonMember flows from $@ | by_reference.cpp:62:25:62:34 | call to user_input | call to user_input |
| by_reference.cpp:69:8:69:20 | call to nonMemberGetA | by_reference.cpp:68:21:68:30 | call to user_input | by_reference.cpp:69:8:69:20 | call to nonMemberGetA | call to nonMemberGetA flows from $@ | by_reference.cpp:68:21:68:30 | call to user_input | call to user_input |
| complex.cpp:44:12:44:12 | call to a | complex.cpp:55:13:55:22 | call to user_input | complex.cpp:44:12:44:12 | call to a | call to a flows from $@ | complex.cpp:55:13:55:22 | call to user_input | call to user_input |
| complex.cpp:44:12:44:12 | call to a | complex.cpp:57:13:57:22 | call to user_input | complex.cpp:44:12:44:12 | call to a | call to a flows from $@ | complex.cpp:57:13:57:22 | call to user_input | call to user_input |
| complex.cpp:45:12:45:12 | call to b | complex.cpp:56:13:56:22 | call to user_input | complex.cpp:45:12:45:12 | call to b | call to b flows from $@ | complex.cpp:56:13:56:22 | call to user_input | call to user_input |
@@ -504,5 +542,8 @@ nodes
| simple.cpp:28:12:28:12 | call to a | simple.cpp:41:12:41:21 | call to user_input | simple.cpp:28:12:28:12 | call to a | call to a flows from $@ | simple.cpp:41:12:41:21 | call to user_input | call to user_input |
| simple.cpp:29:12:29:12 | call to b | simple.cpp:40:12:40:21 | call to user_input | simple.cpp:29:12:29:12 | call to b | call to b flows from $@ | simple.cpp:40:12:40:21 | call to user_input | call to user_input |
| simple.cpp:29:12:29:12 | call to b | simple.cpp:42:12:42:21 | call to user_input | simple.cpp:29:12:29:12 | call to b | call to b flows from $@ | simple.cpp:42:12:42:21 | call to user_input | call to user_input |
| struct_init.c:15:12:15:12 | a | struct_init.c:20:20:20:29 | call to user_input | struct_init.c:15:12:15:12 | a | a flows from $@ | struct_init.c:20:20:20:29 | call to user_input | call to user_input |
| struct_init.c:15:12:15:12 | a | struct_init.c:27:7:27:16 | call to user_input | struct_init.c:15:12:15:12 | a | a flows from $@ | struct_init.c:27:7:27:16 | call to user_input | call to user_input |
| struct_init.c:22:11:22:11 | a | struct_init.c:20:20:20:29 | call to user_input | struct_init.c:22:11:22:11 | a | a flows from $@ | struct_init.c:20:20:20:29 | call to user_input | call to user_input |
| struct_init.c:31:23:31:23 | a | struct_init.c:27:7:27:16 | call to user_input | struct_init.c:31:23:31:23 | a | a flows from $@ | struct_init.c:27:7:27:16 | call to user_input | call to user_input |
| struct_init.c:33:25:33:25 | a | struct_init.c:20:20:20:29 | call to user_input | struct_init.c:33:25:33:25 | a | a flows from $@ | struct_init.c:20:20:20:29 | call to user_input | call to user_input |

View File

@@ -102,12 +102,12 @@
| taint.cpp:121:10:121:11 | 1 | taint.cpp:124:13:124:14 | t2 | |
| taint.cpp:122:10:122:11 | 1 | taint.cpp:125:13:125:14 | t3 | |
| taint.cpp:123:12:123:14 | & ... | taint.cpp:129:8:129:9 | p1 | |
| taint.cpp:123:13:123:14 | t1 | taint.cpp:123:12:123:14 | & ... | TAINT |
| taint.cpp:123:13:123:14 | t1 | taint.cpp:123:12:123:14 | & ... | |
| taint.cpp:124:12:124:14 | & ... | taint.cpp:127:3:127:4 | p2 | |
| taint.cpp:124:12:124:14 | & ... | taint.cpp:130:8:130:9 | p2 | |
| taint.cpp:124:13:124:14 | t2 | taint.cpp:124:12:124:14 | & ... | TAINT |
| taint.cpp:124:13:124:14 | t2 | taint.cpp:124:12:124:14 | & ... | |
| taint.cpp:125:12:125:14 | & ... | taint.cpp:131:8:131:9 | p3 | |
| taint.cpp:125:13:125:14 | t3 | taint.cpp:125:12:125:14 | & ... | TAINT |
| taint.cpp:125:13:125:14 | t3 | taint.cpp:125:12:125:14 | & ... | |
| taint.cpp:127:3:127:4 | p2 | taint.cpp:127:2:127:4 | * ... | TAINT |
| taint.cpp:127:8:127:13 | call to source | taint.cpp:127:2:127:15 | ... = ... | |
| taint.cpp:129:8:129:9 | p1 | taint.cpp:129:7:129:9 | * ... | TAINT |
@@ -117,7 +117,7 @@
| taint.cpp:133:7:133:9 | & ... | taint.cpp:134:8:134:9 | p3 | |
| taint.cpp:133:7:133:9 | & ... | taint.cpp:136:3:136:4 | p3 | |
| taint.cpp:133:7:133:9 | & ... | taint.cpp:137:8:137:9 | p3 | |
| taint.cpp:133:8:133:9 | t1 | taint.cpp:133:7:133:9 | & ... | TAINT |
| taint.cpp:133:8:133:9 | t1 | taint.cpp:133:7:133:9 | & ... | |
| taint.cpp:134:8:134:9 | p3 | taint.cpp:134:7:134:9 | * ... | TAINT |
| taint.cpp:136:3:136:4 | p3 | taint.cpp:136:2:136:4 | * ... | TAINT |
| taint.cpp:136:8:136:8 | 0 | taint.cpp:136:2:136:8 | ... = ... | |
@@ -150,14 +150,14 @@
| taint.cpp:180:19:180:19 | p | taint.cpp:181:9:181:9 | p | |
| taint.cpp:181:9:181:9 | p | taint.cpp:181:8:181:9 | * ... | TAINT |
| taint.cpp:185:11:185:16 | call to source | taint.cpp:186:11:186:11 | x | |
| taint.cpp:186:11:186:11 | x | taint.cpp:186:10:186:11 | & ... | TAINT |
| taint.cpp:186:11:186:11 | x | taint.cpp:186:10:186:11 | & ... | |
| taint.cpp:192:23:192:28 | source | taint.cpp:194:13:194:18 | source | |
| taint.cpp:193:6:193:6 | x | taint.cpp:194:10:194:10 | x | |
| taint.cpp:193:6:193:6 | x | taint.cpp:195:7:195:7 | x | |
| taint.cpp:194:9:194:10 | & ... | taint.cpp:194:2:194:7 | call to memcpy | |
| taint.cpp:194:9:194:10 | ref arg & ... | taint.cpp:194:2:194:7 | call to memcpy | |
| taint.cpp:194:9:194:10 | ref arg & ... | taint.cpp:195:7:195:7 | x | |
| taint.cpp:194:10:194:10 | x | taint.cpp:194:9:194:10 | & ... | TAINT |
| taint.cpp:194:10:194:10 | x | taint.cpp:194:9:194:10 | & ... | |
| taint.cpp:194:13:194:18 | source | taint.cpp:194:9:194:10 | ref arg & ... | TAINT |
| taint.cpp:194:21:194:31 | sizeof(int) | taint.cpp:194:9:194:10 | ref arg & ... | TAINT |
| taint.cpp:207:6:207:11 | call to source | taint.cpp:207:2:207:13 | ... = ... | |