diff --git a/change-notes/1.23/analysis-cpp.md b/change-notes/1.23/analysis-cpp.md index 64e9b98fcfb..f22436f55cb 100644 --- a/change-notes/1.23/analysis-cpp.md +++ b/change-notes/1.23/analysis-cpp.md @@ -39,6 +39,10 @@ The following changes in version 1.23 affect C/C++ analysis in all applications. definition of `x` when `x` is a variable of pointer type. It no longer considers deep paths such as `f(&x.myField)` to be definitions of `x`. These changes are in line with the user expectations we've observed. +* The data-flow library now makes it easier to specify barriers/sanitizers + arising from guards by overriding the predicate + `isBarrierGuard`/`isSanitizerGuard` on data-flow and taint-tracking + configurations respectively. * There is now a `DataFlow::localExprFlow` predicate and a `TaintTracking::localExprTaint` predicate to make it easy to use the most common case of local data flow and taint: from one `Expr` to another. diff --git a/cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll index 0caa2ab05df..cd1fbd487e0 100644 --- a/cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll @@ -5,6 +5,8 @@ private import cpp private import semmle.code.cpp.dataflow.internal.FlowVar private import semmle.code.cpp.models.interfaces.DataFlow +private import semmle.code.cpp.controlflow.Guards +private import semmle.code.cpp.valuenumbering.GlobalValueNumbering cached private newtype TNode = @@ -680,12 +682,16 @@ VariableAccess getAnAccessToAssignedVariable(Expr assign) { * * It is important that all extending classes in scope are disjoint. */ -class BarrierGuard extends Expr { - /** NOT YET SUPPORTED. Holds if this guard validates `e` upon evaluating to `branch`. */ - abstract deprecated predicate checks(Expr e, boolean branch); +class BarrierGuard extends GuardCondition { + /** Override this predicate to hold if this guard validates `e` upon evaluating to `b`. */ + abstract predicate checks(Expr e, boolean branch); /** Gets a node guarded by this guard. */ - final Node getAGuardedNode() { - none() // stub + final ExprNode getAGuardedNode() { + exists(GVN value, boolean branch | + result.getExpr() = value.getAnExpr() and + this.checks(value.getAnExpr(), branch) and + this.controls(result.getExpr().getBasicBlock(), branch) + ) } } diff --git a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 0476ea3c30a..f824e0b6bf2 100644 --- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -5,6 +5,7 @@ private import cpp private import semmle.code.cpp.ir.IR private import semmle.code.cpp.controlflow.IRGuards +private import semmle.code.cpp.ir.ValueNumbering /** * A newtype wrapper to prevent accidental casts between `Node` and @@ -220,7 +221,7 @@ predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) } predicate localExprFlow(Expr e1, Expr e2) { localFlow(exprNode(e1), exprNode(e2)) } /** - * A guard that validates some expression. + * A guard that validates some instruction. * * To use this in a configuration, extend the class and provide a * characteristic predicate precisely specifying the guard, and override @@ -229,11 +230,15 @@ predicate localExprFlow(Expr e1, Expr e2) { localFlow(exprNode(e1), exprNode(e2) * It is important that all extending classes in scope are disjoint. */ class BarrierGuard extends IRGuardCondition { - /** NOT YET SUPPORTED. Holds if this guard validates `e` upon evaluating to `b`. */ - abstract deprecated predicate checks(Instruction e, boolean b); + /** Override this predicate to hold if this guard validates `instr` upon evaluating to `b`. */ + abstract predicate checks(Instruction instr, boolean b); /** Gets a node guarded by this guard. */ final Node getAGuardedNode() { - none() // stub + exists(ValueNumber value, boolean edge | + result.asInstruction() = value.getAnInstruction() and + this.checks(value.getAnInstruction(), edge) and + this.controls(result.asInstruction().getBlock(), edge) + ) } } diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/BarrierGuard.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/BarrierGuard.cpp new file mode 100644 index 00000000000..9c3c8bc4569 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/BarrierGuard.cpp @@ -0,0 +1,68 @@ +int source(); +void sink(int); +bool guarded(int); + +void bg_basic(int source) { + if (guarded(source)) { + sink(source); // no flow + } else { + sink(source); // flow + } +} + +void bg_not(int source) { + if (!guarded(source)) { + sink(source); // flow + } else { + sink(source); // no flow + } +} + +void bg_and(int source, bool arbitrary) { + if (guarded(source) && arbitrary) { + sink(source); // no flow + } else { + sink(source); // flow + } +} + +void bg_or(int source, bool arbitrary) { + if (guarded(source) || arbitrary) { + sink(source); // flow + } else { + sink(source); // flow + } +} + +void bg_return(int source) { + if (!guarded(source)) { + return; + } + sink(source); // no flow +} + +struct XY { + int x, y; +}; + +void bg_stackstruct(XY s1, XY s2) { + s1.x = source(); + if (guarded(s1.x)) { + sink(s1.x); // no flow + } else if (guarded(s1.y)) { + sink(s1.x); // flow + } else if (guarded(s2.y)) { + sink(s1.x); // flow + } +} + +void bg_structptr(XY *p1, XY *p2) { + p1->x = source(); + if (guarded(p1->x)) { + sink(p1->x); // no flow [FALSE POSITIVE in AST] + } else if (guarded(p1->y)) { + sink(p1->x); // flow [NOT DETECTED in IR] + } else if (guarded(p2->x)) { + sink(p1->x); // flow [NOT DETECTED in IR] + } +} diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/DataflowTestCommon.qll b/cpp/ql/test/library-tests/dataflow/dataflow-tests/DataflowTestCommon.qll index 02ee4d45380..a81394640ee 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/DataflowTestCommon.qll +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/DataflowTestCommon.qll @@ -1,6 +1,20 @@ import cpp import semmle.code.cpp.dataflow.DataFlow +/** + * A `BarrierGuard` that stops flow to all occurrences of `x` within statement + * S in `if (guarded(x)) S`. + */ +// This is tested in `BarrierGuard.cpp`. +class TestBarrierGuard extends DataFlow::BarrierGuard { + TestBarrierGuard() { this.(FunctionCall).getTarget().getName() = "guarded" } + + override predicate checks(Expr checked, boolean isTrue) { + checked = this.(FunctionCall).getArgument(0) and + isTrue = true + } +} + /** Common data flow configuration to be used by tests. */ class TestAllocationConfig extends DataFlow::Configuration { TestAllocationConfig() { this = "TestAllocationConfig" } @@ -26,4 +40,6 @@ class TestAllocationConfig extends DataFlow::Configuration { override predicate isBarrier(DataFlow::Node barrier) { barrier.asExpr().(VariableAccess).getTarget().hasName("barrier") } + + override predicate isBarrierGuard(DataFlow::BarrierGuard bg) { bg instanceof TestBarrierGuard } } diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/IRDataflowTestCommon.qll b/cpp/ql/test/library-tests/dataflow/dataflow-tests/IRDataflowTestCommon.qll index eb5fa14e2e0..490f7e4290a 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/IRDataflowTestCommon.qll +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/IRDataflowTestCommon.qll @@ -1,5 +1,20 @@ import cpp import semmle.code.cpp.ir.dataflow.DataFlow +import semmle.code.cpp.ir.IR + +/** + * A `BarrierGuard` that stops flow to all occurrences of `x` within statement + * S in `if (guarded(x)) S`. + */ +// This is tested in `BarrierGuard.cpp`. +class TestBarrierGuard extends DataFlow::BarrierGuard { + TestBarrierGuard() { this.(CallInstruction).getStaticCallTarget().getName() = "guarded" } + + override predicate checks(Instruction checked, boolean isTrue) { + checked = this.(CallInstruction).getPositionalArgument(0) and + isTrue = true + } +} /** Common data flow configuration to be used by tests. */ class TestAllocationConfig extends DataFlow::Configuration { @@ -24,4 +39,6 @@ class TestAllocationConfig extends DataFlow::Configuration { override predicate isBarrier(DataFlow::Node barrier) { barrier.asExpr().(VariableAccess).getTarget().hasName("barrier") } + + override predicate isBarrierGuard(DataFlow::BarrierGuard bg) { bg instanceof TestBarrierGuard } } diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected index b9fca1f678f..6527db4b77e 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.expected @@ -1,3 +1,13 @@ +| BarrierGuard.cpp:9:10:9:15 | source | BarrierGuard.cpp:5:19:5:24 | source | +| BarrierGuard.cpp:15:10:15:15 | source | BarrierGuard.cpp:13:17:13:22 | source | +| BarrierGuard.cpp:25:10:25:15 | source | BarrierGuard.cpp:21:17:21:22 | source | +| BarrierGuard.cpp:31:10:31:15 | source | BarrierGuard.cpp:29:16:29:21 | source | +| BarrierGuard.cpp:33:10:33:15 | source | BarrierGuard.cpp:29:16:29:21 | source | +| BarrierGuard.cpp:53:13:53:13 | x | BarrierGuard.cpp:49:10:49:15 | call to source | +| BarrierGuard.cpp:55:13:55:13 | x | BarrierGuard.cpp:49:10:49:15 | call to source | +| BarrierGuard.cpp:62:14:62:14 | x | BarrierGuard.cpp:60:11:60:16 | call to source | +| BarrierGuard.cpp:64:14:64:14 | x | BarrierGuard.cpp:60:11:60:16 | call to source | +| BarrierGuard.cpp:66:14:66:14 | x | BarrierGuard.cpp:60:11:60:16 | call to source | | acrossLinkTargets.cpp:12:8:12:8 | x | acrossLinkTargets.cpp:19:27:19:32 | call to source | | clang.cpp:18:8:18:19 | sourceArray1 | clang.cpp:12:9:12:20 | sourceArray1 | | clang.cpp:22:8:22:20 | & ... | clang.cpp:12:9:12:20 | sourceArray1 | diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected index cd7e3dac785..7d0d4e7d72e 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected @@ -1,3 +1,6 @@ +| BarrierGuard.cpp:60:11:60:16 | BarrierGuard.cpp:62:14:62:14 | AST only | +| BarrierGuard.cpp:60:11:60:16 | BarrierGuard.cpp:64:14:64:14 | AST only | +| BarrierGuard.cpp:60:11:60:16 | BarrierGuard.cpp:66:14:66:14 | AST only | | clang.cpp:12:9:12:20 | clang.cpp:22:8:22:20 | AST only | | clang.cpp:28:27:28:32 | clang.cpp:29:27:29:28 | AST only | | clang.cpp:28:27:28:32 | clang.cpp:30:27:30:34 | AST only | diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected index 83ba546480f..9b67a013a58 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected @@ -1,3 +1,10 @@ +| BarrierGuard.cpp:9:10:9:15 | Load: source | BarrierGuard.cpp:5:19:5:24 | InitializeParameter: source | +| BarrierGuard.cpp:15:10:15:15 | Load: source | BarrierGuard.cpp:13:17:13:22 | InitializeParameter: source | +| BarrierGuard.cpp:25:10:25:15 | Load: source | BarrierGuard.cpp:21:17:21:22 | InitializeParameter: source | +| BarrierGuard.cpp:31:10:31:15 | Load: source | BarrierGuard.cpp:29:16:29:21 | InitializeParameter: source | +| BarrierGuard.cpp:33:10:33:15 | Load: source | BarrierGuard.cpp:29:16:29:21 | InitializeParameter: source | +| BarrierGuard.cpp:53:13:53:13 | Load: x | BarrierGuard.cpp:49:10:49:15 | Call: call to source | +| BarrierGuard.cpp:55:13:55:13 | Load: x | BarrierGuard.cpp:49:10:49:15 | Call: call to source | | acrossLinkTargets.cpp:12:8:12:8 | Convert: (int)... | acrossLinkTargets.cpp:19:27:19:32 | Call: call to source | | acrossLinkTargets.cpp:12:8:12:8 | Load: x | acrossLinkTargets.cpp:19:27:19:32 | Call: call to source | | clang.cpp:18:8:18:19 | Convert: (const int *)... | clang.cpp:12:9:12:20 | InitializeParameter: sourceArray1 | diff --git a/docs/language/learn-ql/cpp/dataflow.rst b/docs/language/learn-ql/cpp/dataflow.rst index 4867f3daf47..50fccdc5cc4 100644 --- a/docs/language/learn-ql/cpp/dataflow.rst +++ b/docs/language/learn-ql/cpp/dataflow.rst @@ -166,6 +166,7 @@ The following predicates are defined in the configuration: - ``isSource``—defines where data may flow from - ``isSink``—defines where data may flow to - ``isBarrier``—optional, restricts the data flow +- ``isBarrierGuard``—optional, restricts the data flow - ``isAdditionalFlowStep``—optional, adds additional flow steps The characteristic predicate ``MyDataFlowConfiguration()`` defines the name of the configuration, so ``"MyDataFlowConfiguration"`` should be replaced by the name of your class. @@ -204,6 +205,7 @@ The following predicates are defined in the configuration: - ``isSource``—defines where taint may flow from - ``isSink``—defines where taint may flow to - ``isSanitizer``—optional, restricts the taint flow +- ``isSanitizerGuard``—optional, restricts the taint flow - ``isAdditionalTaintStep``—optional, adds additional taint steps Similar to global data flow, the characteristic predicate ``MyTaintTrackingConfiguration()`` defines the unique name of the configuration, so ``"MyTaintTrackingConfiguration"`` should be replaced by the name of your class.