Merge pull request #374 from smowton/smowton/feature/more-accurate-allocation-overflow

Improve accuracy of allocation-size-overflow by excluding len(...) calls that never see a large operand
This commit is contained in:
Chris Smowton
2020-10-14 13:25:45 +01:00
committed by GitHub
11 changed files with 3209 additions and 7 deletions

View File

@@ -131,7 +131,7 @@ build/testdb/go.dbscheme: upgrades/initial/go.dbscheme
.PHONY: sync-dataflow-libraries
sync-dataflow-libraries:
for f in DataFlowImpl.qll DataFlowImplCommon.qll tainttracking1/TaintTrackingImpl.qll;\
for f in DataFlowImpl.qll DataFlowImpl2.qll DataFlowImplCommon.qll tainttracking1/TaintTrackingImpl.qll tainttracking2/TaintTrackingImpl.qll;\
do\
curl -s -o ./ql/src/semmle/go/dataflow/internal/$$f https://raw.githubusercontent.com/github/codeql/$(DATAFLOW_BRANCH)/java/ql/src/semmle/code/java/dataflow/internal/$$f;\
done

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* The accuracy of the `go/allocation-size-overflow` query was removed, excluding more false-positives in which a small array could be mistaken for one of unbounded size.

View File

@@ -23,9 +23,11 @@ import semmle.go.controlflow.BasicBlocks
import semmle.go.controlflow.ControlFlowGraph
import semmle.go.controlflow.IR
import semmle.go.dataflow.DataFlow
import semmle.go.dataflow.DataFlow2
import semmle.go.dataflow.GlobalValueNumbering
import semmle.go.dataflow.SSA
import semmle.go.dataflow.TaintTracking
import semmle.go.dataflow.TaintTracking2
import semmle.go.frameworks.Chi
import semmle.go.frameworks.Echo
import semmle.go.frameworks.Email

View File

@@ -0,0 +1,27 @@
/**
* Provides a library for local (intra-procedural) and global (inter-procedural)
* data flow analysis: deciding whether data can flow from a _source_ to a
* _sink_.
*
* Unless configured otherwise, _flow_ means that the exact value of
* the source may reach the sink. We do not track flow across pointer
* dereferences or array indexing. To track these types of flow, where the
* exact value may not be preserved, import
* `semmle.code.go.dataflow.TaintTracking`.
*
* To use global (interprocedural) data flow, extend the class
* `DataFlow::Configuration` as documented on that class. To use local
* (intraprocedural) data flow, invoke `DataFlow::localFlow` or
* `DataFlow::LocalFlowStep` with arguments of type `DataFlow::Node`.
*/
import go
/**
* Provides a library for local (intra-procedural) and global (inter-procedural)
* data flow analysis.
*/
module DataFlow2 {
import semmle.go.dataflow.internal.DataFlowImpl2
import Properties
}

View File

@@ -0,0 +1,7 @@
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) taint-tracking analyses.
*/
module TaintTracking2 {
import semmle.go.dataflow.internal.tainttracking2.TaintTrackingImpl
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,115 @@
/**
* Provides an implementation of global (interprocedural) taint tracking.
* This file re-exports the local (intraprocedural) taint-tracking analysis
* from `TaintTrackingParameter::Public` and adds a global analysis, mainly
* exposed through the `Configuration` class. For some languages, this file
* exists in several identical copies, allowing queries to use multiple
* `Configuration` classes that depend on each other without introducing
* mutual recursion among those configurations.
*/
import TaintTrackingParameter::Public
private import TaintTrackingParameter::Private
/**
* A configuration of interprocedural taint tracking analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the taint tracking library must define its own unique extension of
* this abstract class.
*
* A taint-tracking configuration is a special data flow configuration
* (`DataFlow::Configuration`) that allows for flow through nodes that do not
* necessarily preserve values but are still relevant from a taint tracking
* perspective. (For example, string concatenation, where one of the operands
* is tainted.)
*
* To create a configuration, extend this class with a subclass whose
* characteristic predicate is a unique singleton string. For example, write
*
* ```ql
* class MyAnalysisConfiguration extends TaintTracking::Configuration {
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
* // Override `isSource` and `isSink`.
* // Optionally override `isSanitizer`.
* // Optionally override `isSanitizerIn`.
* // Optionally override `isSanitizerOut`.
* // Optionally override `isSanitizerGuard`.
* // Optionally override `isAdditionalTaintStep`.
* }
* ```
*
* Then, to query whether there is flow between some `source` and `sink`,
* write
*
* ```ql
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
* ```
*
* Multiple configurations can coexist, but it is unsupported to depend on
* another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
* overridden predicates that define sources, sinks, or additional steps.
* Instead, the dependency should go to a `TaintTracking2::Configuration` or a
* `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
*/
abstract class Configuration extends DataFlow::Configuration {
bindingset[this]
Configuration() { any() }
/**
* Holds if `source` is a relevant taint source.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
/**
* Holds if `sink` is a relevant taint sink.
*
* The smaller this predicate is, the faster `hasFlow()` will converge.
*/
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
/** Holds if the node `node` is a taint sanitizer. */
predicate isSanitizer(DataFlow::Node node) { none() }
final override predicate isBarrier(DataFlow::Node node) {
isSanitizer(node) or
defaultTaintSanitizer(node)
}
/** Holds if taint propagation into `node` is prohibited. */
predicate isSanitizerIn(DataFlow::Node node) { none() }
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
/** Holds if taint propagation out of `node` is prohibited. */
predicate isSanitizerOut(DataFlow::Node node) { none() }
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
/**
* Holds if the additional taint propagation step from `node1` to `node2`
* must be taken into account in the analysis.
*/
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isAdditionalTaintStep(node1, node2) or
defaultAdditionalTaintStep(node1, node2)
}
/**
* Holds if taint may flow from `source` to `sink` for this configuration.
*/
// overridden to provide taint-tracking specific qldoc
override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
super.hasFlow(source, sink)
}
}

View File

@@ -0,0 +1,5 @@
import semmle.go.dataflow.internal.TaintTrackingUtil as Public
module Private {
import semmle.go.dataflow.DataFlow2::DataFlow2 as DataFlow
}

View File

@@ -13,6 +13,29 @@ import go
module AllocationSizeOverflow {
import AllocationSizeOverflowCustomizations::AllocationSizeOverflow
/**
* A taint-tracking configuration for identifying `len(...)` calls whose argument may be large.
*/
class FindLargeLensConfiguration extends TaintTracking2::Configuration {
FindLargeLensConfiguration() { this = "AllocationSizeOverflow::FindLargeLens" }
override predicate isSource(DataFlow::Node nd) { nd instanceof Source }
override predicate isSink(DataFlow::Node nd) { nd = Builtin::len().getACall().getArgument(0) }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
override predicate isSanitizer(DataFlow::Node nd) { nd instanceof Sanitizer }
}
private DataFlow::CallNode getALargeLenCall() {
exists(FindLargeLensConfiguration config, DataFlow::Node lenArg | config.hasFlow(_, lenArg) |
result.getArgument(0) = lenArg
)
}
/**
* A taint-tracking configuration for identifying allocation-size overflows.
*/
@@ -33,6 +56,12 @@ module AllocationSizeOverflow {
override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
additionalStep(pred, succ)
or
exists(DataFlow::CallNode c |
c = getALargeLenCall() and
pred = c.getArgument(0) and
succ = c
)
}
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {

View File

@@ -178,12 +178,6 @@ module AllocationSizeOverflow {
* or through an arithmetic operation (other than remainder).
*/
predicate additionalStep(DataFlow::Node pred, DataFlow::Node succ) {
exists(DataFlow::CallNode c |
c = Builtin::len().getACall() and
pred = c.getArgument(0) and
succ = c
)
or
succ.asExpr().(ArithmeticExpr).getAnOperand() = pred.asExpr() and
not succ.asExpr() instanceof RemExpr
}

View File

@@ -0,0 +1,36 @@
package main
import (
"io/ioutil"
"net/http"
)
// Two cases exposing the difference between tainted array contents
// and the array itself being tainted. One uses an explicitly allocated
// array, the other the implicit array allocated to hold varargs; in both
// cases the array has fixed small size but has a tainted integer written
// to one of its cells.
func f(request *http.Request) []byte {
f, _ := ioutil.ReadAll(request.Body)
array := make([]int, 1)
array[0] = len(f)
alloc := make([]byte, len(array)+3) // GOOD: len(array) == 1
return alloc
}
func takesBoundedVarargs(args ...interface{}) []byte {
alloc := make([]byte, len(args)+1) // GOOD (when called from `callsVarargs`): len(args) == 1
return alloc
}
func callsVarargs(request *http.Request) []byte {
f, _ := ioutil.ReadAll(request.Body)
return takesBoundedVarargs(len(f))
}