From 7376daf16e037bc4212dbb991fd5edf39efa88ec Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Wed, 22 Jan 2020 17:06:21 +0100 Subject: [PATCH] C++: Some data flow through partial chi operands --- .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 16 +++++++++++----- .../defaulttainttracking.cpp | 4 ++-- .../DefaultTaintTracking/tainted.expected | 1 + .../library-tests/dataflow/taint-tests/taint.cpp | 6 +++--- .../dataflow/taint-tests/test_diff.expected | 3 +++ .../dataflow/taint-tests/test_ir.expected | 3 +++ 6 files changed, 23 insertions(+), 10 deletions(-) diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 81d4ab83fde..7d6fe1ba2b6 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -278,12 +278,18 @@ private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction // By allowing flow through the total operand, we ensure that flow is not lost // due to shortcomings of the alias analysis. We may get false flow in cases // where the data is indeed overwritten. - // - // Allowing flow through the partial operand would be more noisy, especially - // for variables that have escaped: for soundness, the IR has to assume that - // every write to an unknown address can affect every escaped variable, and - // this assumption shows up as data flowing through partial chi operands. iTo.getAnOperand().(ChiTotalOperand).getDef() = iFrom + or + // Flow through the partial operand must be restricted a bit more. For + // soundness, the IR has to assume that every write to an unknown address can + // affect every escaped variable, and this assumption shows up as data flowing + // through partial chi operands. The chi instructions for all escaped data can + // be recognized by having unknown types. For all other chi instructions, flow + // through partial operands is more likely to be real. + exists(ChiInstruction chi | iTo = chi | + iFrom = chi.getPartial() and + not chi.getResultIRType() instanceof IRUnknownType + ) } /** diff --git a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/defaulttainttracking.cpp b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/defaulttainttracking.cpp index 87fd3a9fec8..0562b79a7e3 100644 --- a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/defaulttainttracking.cpp +++ b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/defaulttainttracking.cpp @@ -21,8 +21,8 @@ int main(int argc, char *argv[]) { char buf[100] = "VAR = "; sink(strcat(buf, getenv("VAR"))); - sink(buf); // BUG: no taint - sink(untainted_buf); // the two buffers would be conflated if we added flow through partial chi inputs + sink(buf); + sink(untainted_buf); // the two buffers would be conflated if we added flow through all partial chi inputs return 0; } diff --git a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/tainted.expected b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/tainted.expected index 3de4c3d2fc7..b0f173bd1e5 100644 --- a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/tainted.expected +++ b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/tainted.expected @@ -25,6 +25,7 @@ | defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:8:22:33 | (const char *)... | | defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:20:22:25 | call to getenv | | defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:20:22:32 | (const char *)... | +| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:24:8:24:10 | buf | | defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:32:11:32:26 | p#0 | | defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:38:11:38:21 | env_pointer | | defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:38:25:38:30 | call to getenv | diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/taint.cpp b/cpp/ql/test/library-tests/dataflow/taint-tests/taint.cpp index a162f6d49d6..49b2fd06e9c 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/taint.cpp +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/taint.cpp @@ -107,9 +107,9 @@ void array_test(int i) { arr3[5] = 0; sink(arr1[5]); // tainted - sink(arr1[i]); // tainted [NOT DETECTED] - sink(arr2[5]); // tainted [NOT DETECTED] - sink(arr2[i]); // tainted [NOT DETECTED] + sink(arr1[i]); // tainted [NOT DETECTED with AST] + sink(arr2[5]); // tainted [NOT DETECTED with AST] + sink(arr2[i]); // tainted [NOT DETECTED with AST] sink(arr3[5]); sink(arr3[i]); } diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/test_diff.expected b/cpp/ql/test/library-tests/dataflow/taint-tests/test_diff.expected index 60ef72073f4..6477371af38 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/test_diff.expected +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/test_diff.expected @@ -7,6 +7,9 @@ | taint.cpp:93:11:93:11 | taint.cpp:71:22:71:27 | AST only | | taint.cpp:94:11:94:11 | taint.cpp:72:7:72:12 | AST only | | taint.cpp:109:7:109:13 | taint.cpp:105:12:105:17 | IR only | +| taint.cpp:110:7:110:13 | taint.cpp:105:12:105:17 | IR only | +| taint.cpp:111:7:111:13 | taint.cpp:106:12:106:17 | IR only | +| taint.cpp:112:7:112:13 | taint.cpp:106:12:106:17 | IR only | | taint.cpp:130:7:130:9 | taint.cpp:127:8:127:13 | IR only | | taint.cpp:137:7:137:9 | taint.cpp:120:11:120:16 | AST only | | taint.cpp:173:8:173:13 | taint.cpp:164:19:164:24 | AST only | diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/test_ir.expected b/cpp/ql/test/library-tests/dataflow/taint-tests/test_ir.expected index 56fd106e503..f5bd94474c8 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/test_ir.expected +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/test_ir.expected @@ -2,6 +2,9 @@ | taint.cpp:16:8:16:14 | source1 | taint.cpp:12:22:12:27 | call to source | | taint.cpp:17:8:17:16 | ++ ... | taint.cpp:12:22:12:27 | call to source | | taint.cpp:109:7:109:13 | access to array | taint.cpp:105:12:105:17 | call to source | +| taint.cpp:110:7:110:13 | access to array | taint.cpp:105:12:105:17 | call to source | +| taint.cpp:111:7:111:13 | access to array | taint.cpp:106:12:106:17 | call to source | +| taint.cpp:112:7:112:13 | access to array | taint.cpp:106:12:106:17 | call to source | | taint.cpp:129:7:129:9 | * ... | taint.cpp:120:11:120:16 | call to source | | taint.cpp:130:7:130:9 | * ... | taint.cpp:127:8:127:13 | call to source | | taint.cpp:134:7:134:9 | * ... | taint.cpp:120:11:120:16 | call to source |