Merge pull request #14669 from MathiasVP/no-dtt-in-unbounded-write

C++: Rewrite `cpp/unbounded-write` away from `DefaultTaintTracking`
2026-04-24 16:25:15 +02:00 · 2023-11-10 15:08:42 +00:00
parent 2ceb4cffbc 4f6b6b4a6f
commit 18c0bcec0b
2 changed files with 67 additions and 67 deletions
--- a/cpp/ql/src/Security/CWE/CWE-120/UnboundedWrite.ql
+++ b/cpp/ql/src/Security/CWE/CWE-120/UnboundedWrite.ql
@@ -15,9 +15,10 @@
 */

 import semmle.code.cpp.security.BufferWrite
-import semmle.code.cpp.security.Security
-import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl
-import TaintedWithPath
+import semmle.code.cpp.security.FlowSources as FS
+import semmle.code.cpp.dataflow.new.TaintTracking
+import semmle.code.cpp.controlflow.IRGuards
+import Flow::PathGraph

 /*
 * --- Summary of CWE-120 alerts ---
@@ -47,15 +48,6 @@ predicate isUnboundedWrite(BufferWrite bw) {
  not exists(bw.getMaxData(_)) // and we can't deduce an upper bound to the amount copied
 }

-/*
- * predicate isMaybeUnboundedWrite(BufferWrite bw)
- * {
- *   not bw.hasExplicitLimit()                           // has no explicit size limit
- *   and exists(bw.getMaxData())                         // and we can deduce an upper bound to the amount copied
- *   and (not exists(getBufferSize(bw.getDest(), _)))    // but we can't work out the size of the destination to be sure
- * }
- */
-
 /**
 * Holds if `e` is a source buffer going into an unbounded write `bw` or a
 * qualifier of (a qualifier of ...) such a source.
@@ -66,19 +58,43 @@ predicate unboundedWriteSource(Expr e, BufferWrite bw) {
  exists(FieldAccess fa | unboundedWriteSource(fa, bw) and e = fa.getQualifier())
 }

-/*
- * --- user input reach ---
- */
+predicate isSource(FS::FlowSource source, string sourceType) { source.getSourceType() = sourceType }

-class Configuration extends TaintTrackingConfiguration {
-  override predicate isSink(Element tainted) { unboundedWriteSource(tainted, _) }
-
-  override predicate taintThroughGlobals() { any() }
+predicate isSink(DataFlow::Node sink, BufferWrite bw) {
+  unboundedWriteSource(sink.asIndirectExpr(), bw)
+  or
+  // `gets` and `scanf` reads from stdin so there's no real input.
+  // The `BufferWrite` library models this as the call itself being
+  // the source. In this case we mark the output argument as being
+  // the sink so that we report a path where source = sink (because
+  // the same output argument is also included in `isSource`).
+  bw.getASource() = bw and
+  unboundedWriteSource(sink.asDefiningArgument(), bw)
 }

-/*
- * --- put it together ---
- */
+predicate lessThanOrEqual(IRGuardCondition g, Expr e, boolean branch) {
+  exists(Operand left |
+    g.comparesLt(left, _, _, true, branch) or
+    g.comparesEq(left, _, _, true, branch)
+  |
+    left.getDef().getUnconvertedResultExpression() = e
+  )
+}
+
+module Config implements DataFlow::ConfigSig {
+  predicate isSource(DataFlow::Node source) { isSource(source, _) }
+
+  predicate isSink(DataFlow::Node sink) { isSink(sink, _) }
+
+  predicate isBarrierOut(DataFlow::Node node) { isSink(node) }
+
+  predicate isBarrier(DataFlow::Node node) {
+    // Block flow if the node is guarded by any <, <= or = operations.
+    node = DataFlow::BarrierGuard<lessThanOrEqual/3>::getABarrierNode()
+  }
+}
+
+module Flow = TaintTracking::Global<Config>;

 /*
 * An unbounded write is, for example `strcpy(..., tainted)`. We're looking
@@ -87,17 +103,20 @@ class Configuration extends TaintTrackingConfiguration {
 *
 * In the case of `gets` and `scanf`, where the source buffer is implicit, the
 * `BufferWrite` library reports the source buffer to be the same as the
- * destination buffer. Since those destination-buffer arguments are also
- * modeled in the taint-tracking library as being _sources_ of taint, they are
- * in practice reported as being tainted because the `security.TaintTracking`
- * library does not distinguish between taint going into an argument and out of
- * an argument. Thus, we get the desired alerts.
+ * destination buffer. So to report an alert on a pattern like:
+ * ```
+ * char s[32];
+ * gets(s);
+ * ```
+ * we define the sink as the node corresponding to the output argument of `gets`.
+ * This gives us a path where the source is equal to the sink.
 */

-from BufferWrite bw, Expr inputSource, Expr tainted, PathNode sourceNode, PathNode sinkNode
+from BufferWrite bw, Flow::PathNode source, Flow::PathNode sink, string sourceType
 where
-  taintedWithPath(inputSource, tainted, sourceNode, sinkNode) and
-  unboundedWriteSource(tainted, bw)
-select bw, sourceNode, sinkNode,
-  "This '" + bw.getBWDesc() + "' with input from $@ may overflow the destination.", inputSource,
-  inputSource.toString()
+  Flow::flowPath(source, sink) and
+  isSource(source.getNode(), sourceType) and
+  isSink(sink.getNode(), bw)
+select bw, source, sink,
+  "This '" + bw.getBWDesc() + "' with input from $@ may overflow the destination.",
+  source.getNode(), sourceType
--- a/cpp/ql/test/query-tests/Security/CWE/CWE-120/semmle/tests/UnboundedWrite.expected
+++ b/cpp/ql/test/query-tests/Security/CWE/CWE-120/semmle/tests/UnboundedWrite.expected
@@ -1,37 +1,18 @@
 edges
-| tests.c:28:22:28:25 | argv | tests.c:28:22:28:28 | access to array |
-| tests.c:28:22:28:25 | argv | tests.c:28:22:28:28 | access to array |
-| tests.c:28:22:28:25 | argv | tests.c:28:22:28:28 | access to array |
-| tests.c:28:22:28:25 | argv | tests.c:28:22:28:28 | access to array |
-| tests.c:29:28:29:31 | argv | tests.c:29:28:29:34 | access to array |
-| tests.c:29:28:29:31 | argv | tests.c:29:28:29:34 | access to array |
-| tests.c:29:28:29:31 | argv | tests.c:29:28:29:34 | access to array |
-| tests.c:29:28:29:31 | argv | tests.c:29:28:29:34 | access to array |
-| tests.c:34:10:34:13 | argv | tests.c:34:10:34:16 | access to array |
-| tests.c:34:10:34:13 | argv | tests.c:34:10:34:16 | access to array |
-| tests.c:34:10:34:13 | argv | tests.c:34:10:34:16 | access to array |
-| tests.c:34:10:34:13 | argv | tests.c:34:10:34:16 | access to array |
-subpaths
+| tests.c:16:26:16:29 | argv indirection | tests.c:28:22:28:28 | access to array indirection |
+| tests.c:16:26:16:29 | argv indirection | tests.c:29:28:29:34 | access to array indirection |
+| tests.c:16:26:16:29 | argv indirection | tests.c:34:10:34:16 | access to array indirection |
 nodes
-| tests.c:28:22:28:25 | argv | semmle.label | argv |
-| tests.c:28:22:28:25 | argv | semmle.label | argv |
-| tests.c:28:22:28:28 | access to array | semmle.label | access to array |
-| tests.c:28:22:28:28 | access to array | semmle.label | access to array |
-| tests.c:29:28:29:31 | argv | semmle.label | argv |
-| tests.c:29:28:29:31 | argv | semmle.label | argv |
-| tests.c:29:28:29:34 | access to array | semmle.label | access to array |
-| tests.c:29:28:29:34 | access to array | semmle.label | access to array |
-| tests.c:31:15:31:23 | buffer100 | semmle.label | buffer100 |
-| tests.c:31:15:31:23 | buffer100 | semmle.label | buffer100 |
-| tests.c:33:21:33:29 | buffer100 | semmle.label | buffer100 |
-| tests.c:33:21:33:29 | buffer100 | semmle.label | buffer100 |
-| tests.c:34:10:34:13 | argv | semmle.label | argv |
-| tests.c:34:10:34:13 | argv | semmle.label | argv |
-| tests.c:34:10:34:16 | access to array | semmle.label | access to array |
-| tests.c:34:10:34:16 | access to array | semmle.label | access to array |
+| tests.c:16:26:16:29 | argv indirection | semmle.label | argv indirection |
+| tests.c:28:22:28:28 | access to array indirection | semmle.label | access to array indirection |
+| tests.c:29:28:29:34 | access to array indirection | semmle.label | access to array indirection |
+| tests.c:31:15:31:23 | scanf output argument | semmle.label | scanf output argument |
+| tests.c:33:21:33:29 | scanf output argument | semmle.label | scanf output argument |
+| tests.c:34:10:34:16 | access to array indirection | semmle.label | access to array indirection |
+subpaths
 #select
-| tests.c:28:3:28:9 | call to sprintf | tests.c:28:22:28:25 | argv | tests.c:28:22:28:28 | access to array | This 'call to sprintf' with input from $@ may overflow the destination. | tests.c:28:22:28:25 | argv | argv |
-| tests.c:29:3:29:9 | call to sprintf | tests.c:29:28:29:31 | argv | tests.c:29:28:29:34 | access to array | This 'call to sprintf' with input from $@ may overflow the destination. | tests.c:29:28:29:31 | argv | argv |
-| tests.c:31:15:31:23 | buffer100 | tests.c:31:15:31:23 | buffer100 | tests.c:31:15:31:23 | buffer100 | This 'scanf string argument' with input from $@ may overflow the destination. | tests.c:31:15:31:23 | buffer100 | buffer100 |
-| tests.c:33:21:33:29 | buffer100 | tests.c:33:21:33:29 | buffer100 | tests.c:33:21:33:29 | buffer100 | This 'scanf string argument' with input from $@ may overflow the destination. | tests.c:33:21:33:29 | buffer100 | buffer100 |
-| tests.c:34:25:34:33 | buffer100 | tests.c:34:10:34:13 | argv | tests.c:34:10:34:16 | access to array | This 'sscanf string argument' with input from $@ may overflow the destination. | tests.c:34:10:34:13 | argv | argv |
+| tests.c:28:3:28:9 | call to sprintf | tests.c:16:26:16:29 | argv indirection | tests.c:28:22:28:28 | access to array indirection | This 'call to sprintf' with input from $@ may overflow the destination. | tests.c:16:26:16:29 | argv indirection | a command-line argument |
+| tests.c:29:3:29:9 | call to sprintf | tests.c:16:26:16:29 | argv indirection | tests.c:29:28:29:34 | access to array indirection | This 'call to sprintf' with input from $@ may overflow the destination. | tests.c:16:26:16:29 | argv indirection | a command-line argument |
+| tests.c:31:15:31:23 | buffer100 | tests.c:31:15:31:23 | scanf output argument | tests.c:31:15:31:23 | scanf output argument | This 'scanf string argument' with input from $@ may overflow the destination. | tests.c:31:15:31:23 | scanf output argument | value read by scanf |
+| tests.c:33:21:33:29 | buffer100 | tests.c:33:21:33:29 | scanf output argument | tests.c:33:21:33:29 | scanf output argument | This 'scanf string argument' with input from $@ may overflow the destination. | tests.c:33:21:33:29 | scanf output argument | value read by scanf |
+| tests.c:34:25:34:33 | buffer100 | tests.c:16:26:16:29 | argv indirection | tests.c:34:10:34:16 | access to array indirection | This 'sscanf string argument' with input from $@ may overflow the destination. | tests.c:16:26:16:29 | argv indirection | a command-line argument |