From c0b04eac7c7958a5a098d84b8e020072d9678bb7 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 23 Oct 2023 19:29:28 +0100 Subject: [PATCH 1/4] C++: Add failing test. --- .../dataflow/taint-tests/localTaint.expected | 13 +++++++++++++ .../test/library-tests/dataflow/taint-tests/map.cpp | 13 +++++++++++++ .../library-tests/dataflow/taint-tests/taint.ql | 2 ++ 3 files changed, 28 insertions(+) diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected b/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected index f134b647a14..22b44fd27d5 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected @@ -2199,6 +2199,19 @@ WARNING: Module TaintTracking has been deprecated and may be removed in future ( | map.cpp:436:55:436:59 | def | map.cpp:436:19:436:60 | call to pair | TAINT | | map.cpp:436:63:436:67 | first | map.cpp:436:7:436:67 | call to iterator | | | map.cpp:437:7:437:9 | m35 | map.cpp:437:7:437:9 | call to unordered_map | | +| map.cpp:446:23:446:23 | call to map | map.cpp:448:3:448:3 | m | | +| map.cpp:446:23:446:23 | call to map | map.cpp:449:12:449:12 | m | | +| map.cpp:446:23:446:23 | call to map | map.cpp:451:1:451:1 | m | | +| map.cpp:447:12:447:26 | call to indirect_source | map.cpp:448:10:448:10 | p | | +| map.cpp:448:3:448:3 | m | map.cpp:448:4:448:4 | call to operator[] | TAINT | +| map.cpp:448:3:448:3 | ref arg m | map.cpp:449:12:449:12 | m | | +| map.cpp:448:3:448:3 | ref arg m | map.cpp:451:1:451:1 | m | | +| map.cpp:448:3:448:10 | ... = ... | map.cpp:448:4:448:4 | call to operator[] [post update] | | +| map.cpp:448:4:448:4 | call to operator[] [post update] | map.cpp:448:3:448:3 | ref arg m | TAINT | +| map.cpp:448:10:448:10 | p | map.cpp:448:3:448:10 | ... = ... | | +| map.cpp:449:12:449:12 | m | map.cpp:449:13:449:13 | call to operator[] | TAINT | +| map.cpp:449:12:449:12 | ref arg m | map.cpp:451:1:451:1 | m | | +| map.cpp:449:13:449:13 | call to operator[] | map.cpp:450:8:450:8 | q | | | movableclass.cpp:8:2:8:15 | this | movableclass.cpp:8:27:8:31 | constructor init of field v [pre-this] | | | movableclass.cpp:8:21:8:22 | _v | movableclass.cpp:8:29:8:30 | _v | | | movableclass.cpp:8:29:8:30 | _v | movableclass.cpp:8:27:8:31 | constructor init of field v | TAINT | diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/map.cpp b/cpp/ql/test/library-tests/dataflow/taint-tests/map.cpp index 404d6627b27..b645cdda078 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/map.cpp +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/map.cpp @@ -436,3 +436,16 @@ void test_unordered_map() sink(m35.emplace(std::pair(source(), "def")).first); // $ MISSING: ast,ir sink(m35); // $ MISSING: ast,ir } + +namespace { + int* indirect_source(); + void indirect_sink(int*); +} + +void test_indirect_taint() { + std::map m; + int* p = indirect_source(); + m[1] = p; + int* q = m[1]; + sink(q); // $ MISSING: ast ir +} \ No newline at end of file diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/taint.ql b/cpp/ql/test/library-tests/dataflow/taint-tests/taint.ql index ef79f065921..56c8cd8ba68 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/taint.ql +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/taint.ql @@ -84,6 +84,8 @@ module IRTest { or source.asIndirectExpr().(FunctionCall).getTarget().getName() = "source" or + source.asIndirectExpr().(FunctionCall).getTarget().getName() = "indirect_source" + or source.asParameter().getName().matches("source%") or exists(FunctionCall fc | From b107c4c641a4474f646c479ac53d16722be46bcd Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 23 Oct 2023 19:34:25 +0100 Subject: [PATCH 2/4] C++: Fix missing result in 'ModelUtil'. The problem was that 'n.asInstruction()' on line 81 wasn't necessarily a 'CallInstruction' (it could be a conversion). --- .../cpp/ir/dataflow/internal/ModelUtil.qll | 56 ++++++++++--------- .../dataflow/taint-tests/map.cpp | 2 +- .../dataflow/taint-tests/smart_pointer.cpp | 8 +-- 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll index cc2a0b01c4d..e71f456af4b 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll @@ -31,26 +31,35 @@ DataFlow::Node callInput(CallInstruction call, FunctionInput input) { ) } +/** + * Gets the node that represents the output of `call` with kind `output` at + * indirection index `indirectionIndex`. + */ +private Node callOutputWithIndirectionIndex( + CallInstruction call, FunctionOutput output, int indirectionIndex +) { + // The return value + simpleOutNode(result, call) and + output.isReturnValue() and + indirectionIndex = 0 + or + // The side effect of a call on the value pointed to by an argument or qualifier + exists(int index | + result.(IndirectArgumentOutNode).getArgumentIndex() = index and + result.(IndirectArgumentOutNode).getIndirectionIndex() = indirectionIndex - 1 and + result.(IndirectArgumentOutNode).getCallInstruction() = call and + output.isParameterDerefOrQualifierObject(index, indirectionIndex - 1) + ) + or + result = getIndirectReturnOutNode(call, indirectionIndex) and + output.isReturnValueDeref(indirectionIndex) +} + /** * Gets the instruction that holds the `output` for `call`. */ Node callOutput(CallInstruction call, FunctionOutput output) { - // The return value - simpleOutNode(result, call) and - output.isReturnValue() - or - // The side effect of a call on the value pointed to by an argument or qualifier - exists(int index, int indirectionIndex | - result.(IndirectArgumentOutNode).getArgumentIndex() = index and - result.(IndirectArgumentOutNode).getIndirectionIndex() = indirectionIndex and - result.(IndirectArgumentOutNode).getCallInstruction() = call and - output.isParameterDerefOrQualifierObject(index, indirectionIndex) - ) - or - exists(int ind | - result = getIndirectReturnOutNode(call, ind) and - output.isReturnValueDeref(ind) - ) + result = callOutputWithIndirectionIndex(call, output, _) } DataFlow::Node callInput(CallInstruction call, FunctionInput input, int d) { @@ -76,19 +85,16 @@ private IndirectReturnOutNode getIndirectReturnOutNode(CallInstruction call, int */ bindingset[d] Node callOutput(CallInstruction call, FunctionOutput output, int d) { - exists(DataFlow::Node n | n = callOutput(call, output) and d > 0 | + exists(DataFlow::Node n, int indirectionIndex | + n = callOutputWithIndirectionIndex(call, output, indirectionIndex) and d > 0 + | // The return value - result = getIndirectReturnOutNode(n.asInstruction(), d) + result = callOutputWithIndirectionIndex(call, output, indirectionIndex + d) or + n = callOutputWithIndirectionIndex(call, output, indirectionIndex) and // If there isn't an indirect out node for the call with indirection `d` then // we conflate this with the underlying `CallInstruction`. - not exists(getIndirectReturnOutNode(call, d)) and + not exists(getIndirectReturnOutNode(call, indirectionIndex + d)) and n = result - or - // The side effect of a call on the value pointed to by an argument or qualifier - exists(Operand operand, int indirectionIndex | - Ssa::outNodeHasAddressAndIndex(n, operand, indirectionIndex) and - Ssa::outNodeHasAddressAndIndex(result, operand, indirectionIndex + d) - ) ) } diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/map.cpp b/cpp/ql/test/library-tests/dataflow/taint-tests/map.cpp index b645cdda078..8eeb80a0f83 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/map.cpp +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/map.cpp @@ -447,5 +447,5 @@ void test_indirect_taint() { int* p = indirect_source(); m[1] = p; int* q = m[1]; - sink(q); // $ MISSING: ast ir + sink(q); // $ ir MISSING: ast } \ No newline at end of file diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/smart_pointer.cpp b/cpp/ql/test/library-tests/dataflow/taint-tests/smart_pointer.cpp index 560a02fa4e6..8fa3791f43f 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/smart_pointer.cpp +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/smart_pointer.cpp @@ -85,11 +85,11 @@ struct B { void test_operator_arrow(std::unique_ptr p, std::unique_ptr q) { p->x = source(); - sink(p->x); // $ ast,ir + sink(p->x); // $ ast MISSING: ir sink(p->y); q->a1.x = source(); - sink(q->a1.x); // $ ast,ir + sink(q->a1.x); // $ ast MISSING: ir sink(q->a1.y); sink(q->a2.x); } @@ -101,7 +101,7 @@ void taint_x(A* pa) { void reverse_taint_smart_pointer() { std::unique_ptr p = std::unique_ptr(new A); taint_x(p.get()); - sink(p->x); // $ ast,ir + sink(p->x); // $ ast MISSING: ir } struct C { @@ -131,7 +131,7 @@ int nested_shared_ptr_taint(std::shared_ptr p1, std::unique_ptr p1, std::unique_ptr> p2) { taint_x_shared_cref(p1->q); - sink(p1->q->x); // $ ast,ir + sink(p1->q->x); // $ ast MISSING: ir getNumberCRef(*p2); sink(**p2); // $ ast,ir From 67ed12c9160571b934a13e83db0b30adc7988afe Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 23 Oct 2023 19:36:33 +0100 Subject: [PATCH 3/4] C++: Correctly model that 'operator->', and 'get' on smart pointers perform a load. --- .../code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll | 2 +- .../dataflow-tests/dataflow-ir-consistency.expected | 4 ---- .../dataflow/fields/dataflow-ir-consistency.expected | 2 -- .../library-tests/dataflow/taint-tests/smart_pointer.cpp | 8 ++++---- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll index 8d915650266..fc718693dbd 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll @@ -228,7 +228,7 @@ private class PointerWrapperTypeIndirection extends Indirection instanceof Point override predicate isAdditionalDereference(Instruction deref, Operand address) { exists(CallInstruction call | operandForFullyConvertedCall(getAUse(deref), call) and - this = call.getStaticCallTarget().getClassAndName("operator*") and + this = call.getStaticCallTarget().getClassAndName(["operator*", "operator->", "get"]) and address = call.getThisArgumentOperand() ) } diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected index b7b2a8eab90..bacd714e614 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected @@ -20,12 +20,8 @@ reverseRead argHasPostUpdate postWithInFlow | test.cpp:384:10:384:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. | -| test.cpp:384:10:384:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. | -| test.cpp:391:10:391:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. | | test.cpp:391:10:391:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. | | test.cpp:400:10:400:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. | -| test.cpp:400:10:400:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. | -| test.cpp:407:10:407:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. | | test.cpp:407:10:407:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. | viableImplInCallContextTooLarge uniqueParameterNodeAtPosition diff --git a/cpp/ql/test/library-tests/dataflow/fields/dataflow-ir-consistency.expected b/cpp/ql/test/library-tests/dataflow/fields/dataflow-ir-consistency.expected index 72818427b84..4b74de5a825 100644 --- a/cpp/ql/test/library-tests/dataflow/fields/dataflow-ir-consistency.expected +++ b/cpp/ql/test/library-tests/dataflow/fields/dataflow-ir-consistency.expected @@ -44,8 +44,6 @@ reverseRead argHasPostUpdate postWithInFlow | realistic.cpp:54:16:54:47 | memcpy output argument | PostUpdateNode should not be the target of local flow. | -| realistic.cpp:54:16:54:47 | memcpy output argument | PostUpdateNode should not be the target of local flow. | -| realistic.cpp:60:16:60:18 | memcpy output argument | PostUpdateNode should not be the target of local flow. | | realistic.cpp:60:16:60:18 | memcpy output argument | PostUpdateNode should not be the target of local flow. | viableImplInCallContextTooLarge uniqueParameterNodeAtPosition diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/smart_pointer.cpp b/cpp/ql/test/library-tests/dataflow/taint-tests/smart_pointer.cpp index 8fa3791f43f..560a02fa4e6 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/smart_pointer.cpp +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/smart_pointer.cpp @@ -85,11 +85,11 @@ struct B { void test_operator_arrow(std::unique_ptr p, std::unique_ptr q) { p->x = source(); - sink(p->x); // $ ast MISSING: ir + sink(p->x); // $ ast,ir sink(p->y); q->a1.x = source(); - sink(q->a1.x); // $ ast MISSING: ir + sink(q->a1.x); // $ ast,ir sink(q->a1.y); sink(q->a2.x); } @@ -101,7 +101,7 @@ void taint_x(A* pa) { void reverse_taint_smart_pointer() { std::unique_ptr p = std::unique_ptr(new A); taint_x(p.get()); - sink(p->x); // $ ast MISSING: ir + sink(p->x); // $ ast,ir } struct C { @@ -131,7 +131,7 @@ int nested_shared_ptr_taint(std::shared_ptr p1, std::unique_ptr p1, std::unique_ptr> p2) { taint_x_shared_cref(p1->q); - sink(p1->q->x); // $ ast MISSING: ir + sink(p1->q->x); // $ ast,ir getNumberCRef(*p2); sink(**p2); // $ ast,ir From 1fce26534f7c92ca349038889f7b8c9fd96c6bf6 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 24 Oct 2023 09:25:32 +0100 Subject: [PATCH 4/4] C++: Remove implied conjunct. --- cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll index e71f456af4b..055f48c80ec 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll @@ -91,7 +91,6 @@ Node callOutput(CallInstruction call, FunctionOutput output, int d) { // The return value result = callOutputWithIndirectionIndex(call, output, indirectionIndex + d) or - n = callOutputWithIndirectionIndex(call, output, indirectionIndex) and // If there isn't an indirect out node for the call with indirection `d` then // we conflate this with the underlying `CallInstruction`. not exists(getIndirectReturnOutNode(call, indirectionIndex + d)) and