Merge pull request #374 from smowton/smowton/feature/more-accurate-allocation-overflow

Improve accuracy of allocation-size-overflow by excluding len(...) calls that never see a large operand
2026-01-29 14:23:03 +01:00 · 2020-10-14 13:25:45 +01:00
parent 59f8717ca2 83a7411a05
commit 0afa0e75c4
11 changed files with 3209 additions and 7 deletions
--- a/2
+++ b/2
@@ -131,7 +131,7 @@ build/testdb/go.dbscheme: upgrades/initial/go.dbscheme

 .PHONY: sync-dataflow-libraries
 sync-dataflow-libraries:
-	for f in DataFlowImpl.qll DataFlowImplCommon.qll tainttracking1/TaintTrackingImpl.qll;\
+	for f in DataFlowImpl.qll DataFlowImpl2.qll DataFlowImplCommon.qll tainttracking1/TaintTrackingImpl.qll tainttracking2/TaintTrackingImpl.qll;\
 	do\
 		curl -s -o ./ql/src/semmle/go/dataflow/internal/$$f https://raw.githubusercontent.com/github/codeql/$(DATAFLOW_BRANCH)/java/ql/src/semmle/code/java/dataflow/internal/$$f;\
 	done
--- a/change-notes/2020-10-14-allocation-overflow-accuracy.md
+++ b/change-notes/2020-10-14-allocation-overflow-accuracy.md
@@ -0,0 +1,2 @@
+lgtm,codescanning
+* The accuracy of the `go/allocation-size-overflow` query was removed, excluding more false-positives in which a small array could be mistaken for one of unbounded size.
--- a/ql/src/go.qll
+++ b/ql/src/go.qll
@@ -23,9 +23,11 @@ import semmle.go.controlflow.BasicBlocks
 import semmle.go.controlflow.ControlFlowGraph
 import semmle.go.controlflow.IR
 import semmle.go.dataflow.DataFlow
+import semmle.go.dataflow.DataFlow2
 import semmle.go.dataflow.GlobalValueNumbering
 import semmle.go.dataflow.SSA
 import semmle.go.dataflow.TaintTracking
+import semmle.go.dataflow.TaintTracking2
 import semmle.go.frameworks.Chi
 import semmle.go.frameworks.Echo
 import semmle.go.frameworks.Email
--- a/ql/src/semmle/go/dataflow/DataFlow2.qll
+++ b/ql/src/semmle/go/dataflow/DataFlow2.qll
@@ -0,0 +1,27 @@
+/**
+ * Provides a library for local (intra-procedural) and global (inter-procedural)
+ * data flow analysis: deciding whether data can flow from a _source_ to a
+ * _sink_.
+ *
+ * Unless configured otherwise, _flow_ means that the exact value of
+ * the source may reach the sink. We do not track flow across pointer
+ * dereferences or array indexing. To track these types of flow, where the
+ * exact value may not be preserved, import
+ * `semmle.code.go.dataflow.TaintTracking`.
+ *
+ * To use global (interprocedural) data flow, extend the class
+ * `DataFlow::Configuration` as documented on that class. To use local
+ * (intraprocedural) data flow, invoke `DataFlow::localFlow` or
+ * `DataFlow::LocalFlowStep` with arguments of type `DataFlow::Node`.
+ */
+
+import go
+
+/**
+ * Provides a library for local (intra-procedural) and global (inter-procedural)
+ * data flow analysis.
+ */
+module DataFlow2 {
+  import semmle.go.dataflow.internal.DataFlowImpl2
+  import Properties
+}
--- a/ql/src/semmle/go/dataflow/TaintTracking2.qll
+++ b/ql/src/semmle/go/dataflow/TaintTracking2.qll
@@ -0,0 +1,7 @@
+/**
+ * Provides classes for performing local (intra-procedural) and
+ * global (inter-procedural) taint-tracking analyses.
+ */
+module TaintTracking2 {
+  import semmle.go.dataflow.internal.tainttracking2.TaintTrackingImpl
+}
--- a/ql/src/semmle/go/dataflow/internal/DataFlowImpl2.qll
+++ b/ql/src/semmle/go/dataflow/internal/DataFlowImpl2.qll
--- a/ql/src/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll
+++ b/ql/src/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll
@@ -0,0 +1,115 @@
+/**
+ * Provides an implementation of global (interprocedural) taint tracking.
+ * This file re-exports the local (intraprocedural) taint-tracking analysis
+ * from `TaintTrackingParameter::Public` and adds a global analysis, mainly
+ * exposed through the `Configuration` class. For some languages, this file
+ * exists in several identical copies, allowing queries to use multiple
+ * `Configuration` classes that depend on each other without introducing
+ * mutual recursion among those configurations.
+ */
+
+import TaintTrackingParameter::Public
+private import TaintTrackingParameter::Private
+
+/**
+ * A configuration of interprocedural taint tracking analysis. This defines
+ * sources, sinks, and any other configurable aspect of the analysis. Each
+ * use of the taint tracking library must define its own unique extension of
+ * this abstract class.
+ *
+ * A taint-tracking configuration is a special data flow configuration
+ * (`DataFlow::Configuration`) that allows for flow through nodes that do not
+ * necessarily preserve values but are still relevant from a taint tracking
+ * perspective. (For example, string concatenation, where one of the operands
+ * is tainted.)
+ *
+ * To create a configuration, extend this class with a subclass whose
+ * characteristic predicate is a unique singleton string. For example, write
+ *
+ * ```ql
+ * class MyAnalysisConfiguration extends TaintTracking::Configuration {
+ *   MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
+ *   // Override `isSource` and `isSink`.
+ *   // Optionally override `isSanitizer`.
+ *   // Optionally override `isSanitizerIn`.
+ *   // Optionally override `isSanitizerOut`.
+ *   // Optionally override `isSanitizerGuard`.
+ *   // Optionally override `isAdditionalTaintStep`.
+ * }
+ * ```
+ *
+ * Then, to query whether there is flow between some `source` and `sink`,
+ * write
+ *
+ * ```ql
+ * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
+ * ```
+ *
+ * Multiple configurations can coexist, but it is unsupported to depend on
+ * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
+ * overridden predicates that define sources, sinks, or additional steps.
+ * Instead, the dependency should go to a `TaintTracking2::Configuration` or a
+ * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
+ */
+abstract class Configuration extends DataFlow::Configuration {
+  bindingset[this]
+  Configuration() { any() }
+
+  /**
+   * Holds if `source` is a relevant taint source.
+   *
+   * The smaller this predicate is, the faster `hasFlow()` will converge.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  abstract override predicate isSource(DataFlow::Node source);
+
+  /**
+   * Holds if `sink` is a relevant taint sink.
+   *
+   * The smaller this predicate is, the faster `hasFlow()` will converge.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  abstract override predicate isSink(DataFlow::Node sink);
+
+  /** Holds if the node `node` is a taint sanitizer. */
+  predicate isSanitizer(DataFlow::Node node) { none() }
+
+  final override predicate isBarrier(DataFlow::Node node) {
+    isSanitizer(node) or
+    defaultTaintSanitizer(node)
+  }
+
+  /** Holds if taint propagation into `node` is prohibited. */
+  predicate isSanitizerIn(DataFlow::Node node) { none() }
+
+  final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
+
+  /** Holds if taint propagation out of `node` is prohibited. */
+  predicate isSanitizerOut(DataFlow::Node node) { none() }
+
+  final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
+
+  /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
+  predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
+
+  final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
+
+  /**
+   * Holds if the additional taint propagation step from `node1` to `node2`
+   * must be taken into account in the analysis.
+   */
+  predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
+
+  final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
+    isAdditionalTaintStep(node1, node2) or
+    defaultAdditionalTaintStep(node1, node2)
+  }
+
+  /**
+   * Holds if taint may flow from `source` to `sink` for this configuration.
+   */
+  // overridden to provide taint-tracking specific qldoc
+  override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
+    super.hasFlow(source, sink)
+  }
+}
--- a/ql/src/semmle/go/dataflow/internal/tainttracking2/TaintTrackingParameter.qll
+++ b/ql/src/semmle/go/dataflow/internal/tainttracking2/TaintTrackingParameter.qll
@@ -0,0 +1,5 @@
+import semmle.go.dataflow.internal.TaintTrackingUtil as Public
+
+module Private {
+  import semmle.go.dataflow.DataFlow2::DataFlow2 as DataFlow
+}
--- a/ql/src/semmle/go/security/AllocationSizeOverflow.qll
+++ b/ql/src/semmle/go/security/AllocationSizeOverflow.qll
@@ -13,6 +13,29 @@ import go
 module AllocationSizeOverflow {
  import AllocationSizeOverflowCustomizations::AllocationSizeOverflow

+  /**
+   * A taint-tracking configuration for identifying `len(...)` calls whose argument may be large.
+   */
+  class FindLargeLensConfiguration extends TaintTracking2::Configuration {
+    FindLargeLensConfiguration() { this = "AllocationSizeOverflow::FindLargeLens" }
+
+    override predicate isSource(DataFlow::Node nd) { nd instanceof Source }
+
+    override predicate isSink(DataFlow::Node nd) { nd = Builtin::len().getACall().getArgument(0) }
+
+    override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
+      guard instanceof SanitizerGuard
+    }
+
+    override predicate isSanitizer(DataFlow::Node nd) { nd instanceof Sanitizer }
+  }
+
+  private DataFlow::CallNode getALargeLenCall() {
+    exists(FindLargeLensConfiguration config, DataFlow::Node lenArg | config.hasFlow(_, lenArg) |
+      result.getArgument(0) = lenArg
+    )
+  }
+
  /**
   * A taint-tracking configuration for identifying allocation-size overflows.
   */
@@ -33,6 +56,12 @@ module AllocationSizeOverflow {

    override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
      additionalStep(pred, succ)
+      or
+      exists(DataFlow::CallNode c |
+        c = getALargeLenCall() and
+        pred = c.getArgument(0) and
+        succ = c
+      )
    }

    override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
--- a/ql/src/semmle/go/security/AllocationSizeOverflowCustomizations.qll
+++ b/ql/src/semmle/go/security/AllocationSizeOverflowCustomizations.qll
@@ -178,12 +178,6 @@ module AllocationSizeOverflow {
   * or through an arithmetic operation (other than remainder).
   */
  predicate additionalStep(DataFlow::Node pred, DataFlow::Node succ) {
-    exists(DataFlow::CallNode c |
-      c = Builtin::len().getACall() and
-      pred = c.getArgument(0) and
-      succ = c
-    )
-    or
    succ.asExpr().(ArithmeticExpr).getAnOperand() = pred.asExpr() and
    not succ.asExpr() instanceof RemExpr
  }
--- a/ql/test/query-tests/Security/CWE-190/array_vs_contents.go
+++ b/ql/test/query-tests/Security/CWE-190/array_vs_contents.go
@@ -0,0 +1,36 @@
+package main
+
+import (
+	"io/ioutil"
+	"net/http"
+)
+
+// Two cases exposing the difference between tainted array contents
+// and the array itself being tainted. One uses an explicitly allocated
+// array, the other the implicit array allocated to hold varargs; in both
+// cases the array has fixed small size but has a tainted integer written
+// to one of its cells.
+
+func f(request *http.Request) []byte {
+
+	f, _ := ioutil.ReadAll(request.Body)
+	array := make([]int, 1)
+	array[0] = len(f)
+	alloc := make([]byte, len(array)+3) // GOOD: len(array) == 1
+	return alloc
+
+}
+
+func takesBoundedVarargs(args ...interface{}) []byte {
+
+	alloc := make([]byte, len(args)+1) // GOOD (when called from `callsVarargs`): len(args) == 1
+	return alloc
+
+}
+
+func callsVarargs(request *http.Request) []byte {
+
+	f, _ := ioutil.ReadAll(request.Body)
+	return takesBoundedVarargs(len(f))
+
+}