C++: Fix IR Dataflow PR feedback

This commit is contained in:
Dave Bartolomeo
2018-12-04 07:31:13 -08:00
parent 2822d14588
commit e11b4b6c40
5 changed files with 58 additions and 196 deletions

View File

@@ -1,13 +1,15 @@
/**
* Provides a library for local (intra-procedural) and global (inter-procedural)
* data flow analysis: deciding whether data can flow from a _source_ to a
* _sink_.
* _sink_. This library differs from the one in `semmle.code.cpp.dataflow` in that
* this library uses the IR (Intermediate Representation) library, which provides
* a more precise semantic representation of the program, whereas the other dataflow
* library uses the more syntax-oriented ASTs. This library should provide more accurate
* results than the AST-based library in most scenarios.
*
* Unless configured otherwise, _flow_ means that the exact value of
* the source may reach the sink. We do not track flow across pointer
* dereferences or array indexing. To track these types of flow, where the
* exact value may not be preserved, import
* `semmle.code.cpp.dataflow.TaintTracking`.
* dereferences or array indexing.
*
* To use global (interprocedural) data flow, extend the class
* `DataFlow::Configuration` as documented on that class. To use local

View File

@@ -1,189 +0,0 @@
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) taint-tracking analyses.
*
* We define _taint propagation_ informally to mean that a substantial part of
* the information from the source is preserved at the sink. For example, taint
* propagates from `x` to `x + 100`, but it does not propagate from `x` to `x >
* 100` since we consider a single bit of information to be too little.
*/
import semmle.code.cpp.ir.dataflow.DataFlow
import semmle.code.cpp.ir.dataflow.DataFlow2
private import semmle.code.cpp.ir.IR
module TaintTracking {
/**
* A configuration of interprocedural taint tracking analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the taint tracking library must define its own unique extension of
* this abstract class.
*
* A taint-tracking configuration is a special data flow configuration
* (`DataFlow::Configuration`) that allows for flow through nodes that do not
* necessarily preserve values but are still relevant from a taint-tracking
* perspective. (For example, string concatenation, where one of the operands
* is tainted.)
*
* To create a configuration, extend this class with a subclass whose
* characteristic predicate is a unique singleton string. For example, write
*
* ```
* class MyAnalysisConfiguration extends TaintTracking::Configuration {
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
* // Override `isSource` and `isSink`.
* // Optionally override `isSanitizer`.
* // Optionally override `isAdditionalTaintStep`.
* }
* ```
*
* Then, to query whether there is flow between some `source` and `sink`,
* write
*
* ```
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
* ```
*
* Multiple configurations can coexist, but it is unsupported to depend on a
* `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
* overridden predicates that define sources, sinks, or additional steps.
* Instead, the dependency should go to a `TaintTracking::Configuration2` or
* a `DataFlow{2,3,4}::Configuration`.
*/
abstract class Configuration extends DataFlow::Configuration {
bindingset[this]
Configuration() { any() }
/** Holds if `source` is a taint source. */
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
/** Holds if `sink` is a taint sink. */
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
/**
* Holds if taint should not flow into `node`.
*/
predicate isSanitizer(DataFlow::Node node) { none() }
/**
* Holds if the additional taint propagation step
* from `source` to `target` must be taken into account in the analysis.
* This step will only be followed if `target` is not in the `isSanitizer`
* predicate.
*/
predicate isAdditionalTaintStep(DataFlow::Node source,
DataFlow::Node target)
{ none() }
final override
predicate isBarrier(DataFlow::Node node) { isSanitizer(node) }
final override
predicate isAdditionalFlowStep(DataFlow::Node source, DataFlow::Node target) {
this.isAdditionalTaintStep(source, target)
or
localTaintStep(source, target)
}
}
/**
* A taint-tracking configuration that is backed by the `DataFlow2` library
* instead of `DataFlow`. Use this class when taint-tracking configurations
* or data-flow configurations must depend on each other.
*
* See `TaintTracking::Configuration` for the full documentation.
*/
abstract class Configuration2 extends DataFlow2::Configuration {
bindingset[this]
Configuration2() { any() }
/** Holds if `source` is a taint source. */
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
/** Holds if `sink` is a taint sink. */
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
/**
* Holds if taint should not flow into `node`.
*/
predicate isSanitizer(DataFlow::Node node) { none() }
/**
* Holds if the additional taint propagation step
* from `source` to `target` must be taken into account in the analysis.
* This step will only be followed if `target` is not in the `isSanitizer`
* predicate.
*/
predicate isAdditionalTaintStep(DataFlow::Node source,
DataFlow::Node target)
{ none() }
final override
predicate isBarrier(DataFlow::Node node) { isSanitizer(node) }
final override
predicate isAdditionalFlowStep(DataFlow::Node source, DataFlow::Node target) {
this.isAdditionalTaintStep(source, target)
or
localTaintStep(source, target)
}
}
/**
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Taint can flow into using ordinary data flow.
DataFlow::localFlowStep(nodeFrom, nodeTo)
or
// Taint can flow through expressions that alter the value but preserve
// more than one bit of it _or_ expressions that follow data through
// pointer indirections.
not nodeTo instanceof CompareInstruction and
not nodeTo instanceof InvokeInstruction and
nodeTo.getAnOperand() = nodeFrom
}
/**
* Holds if taint may propagate from `source` to `sink` in zero or more local
* (intra-procedural) steps.
*/
predicate localTaint(DataFlow::Node source, DataFlow::Node sink) {
localTaintStep*(source, sink)
}
/**
* Holds if we do not propagate taint from `fromExpr` to `toExpr`
* even though `toExpr` is the AST parent of `fromExpr`.
*/
private predicate noParentExprFlow(Expr fromExpr, Expr toExpr) {
fromExpr = toExpr.(ConditionalExpr).getCondition()
or
fromExpr = toExpr.(CommaExpr).getLeftOperand()
or
fromExpr = toExpr.(AssignExpr).getLValue() // LHS of `=`
}
/**
* Holds if we do not propagate taint from a child of `e` to `e` itself.
*/
private predicate noFlowFromChildExpr(Expr e) {
e instanceof ComparisonOperation
or
e instanceof LogicalAndExpr
or
e instanceof LogicalOrExpr
or
e instanceof Call
or
e instanceof SizeofOperator
or
e instanceof AlignofOperator
}
}

View File

@@ -3,9 +3,7 @@ private import DataFlowUtil
/**
* A data flow node that occurs as the argument of a call and is passed as-is
* to the callable. Arguments that are wrapped in an implicit varargs array
* creation are not included, but the implicitly created array is.
* Instance arguments are also included.
* to the callable. Instance arguments (`this` pointer) are also included.
*/
class ArgumentNode extends Node {
ArgumentNode() {

View File

@@ -0,0 +1,14 @@
| test.cpp:6:12:6:17 | test.cpp:21:8:21:9 | IR only |
| test.cpp:66:30:66:36 | test.cpp:71:8:71:9 | AST only |
| test.cpp:89:28:89:34 | test.cpp:92:8:92:14 | IR only |
| test.cpp:100:13:100:18 | test.cpp:103:10:103:12 | AST only |
| test.cpp:120:9:120:20 | test.cpp:126:8:126:19 | AST only |
| test.cpp:122:18:122:30 | test.cpp:132:22:132:23 | IR only |
| test.cpp:122:18:122:30 | test.cpp:140:22:140:23 | IR only |
| test.cpp:136:27:136:32 | test.cpp:137:27:137:28 | AST only |
| test.cpp:136:27:136:32 | test.cpp:140:22:140:23 | AST only |
| test.cpp:395:17:395:22 | test.cpp:397:10:397:18 | AST only |
| test.cpp:421:13:421:18 | test.cpp:423:10:423:14 | AST only |
| true_upon_entry.cpp:9:11:9:16 | true_upon_entry.cpp:13:8:13:8 | IR only |
| true_upon_entry.cpp:62:11:62:16 | true_upon_entry.cpp:66:8:66:8 | IR only |
| true_upon_entry.cpp:98:11:98:16 | true_upon_entry.cpp:105:8:105:8 | IR only |

View File

@@ -0,0 +1,37 @@
import cpp
import DataflowTestCommon as ASTCommon
import IRDataflowTestCommon as IRCommon
import semmle.code.cpp.dataflow.DataFlow as ASTDataFlow
import semmle.code.cpp.ir.dataflow.DataFlow as IRDataFlow
predicate astFlow(Location sourceLocation, Location sinkLocation) {
exists(ASTDataFlow::DataFlow::Node source, ASTDataFlow::DataFlow::Node sink,
ASTCommon::TestAllocationConfig cfg |
cfg.hasFlow(source, sink) and
sourceLocation = source.getLocation() and
sinkLocation = sink.getLocation()
)
}
predicate irFlow(Location sourceLocation, Location sinkLocation) {
exists(IRDataFlow::DataFlow::Node source, IRDataFlow::DataFlow::Node sink,
IRCommon::TestAllocationConfig cfg |
cfg.hasFlow(source, sink) and
sourceLocation = source.getLocation() and
sinkLocation = sink.getLocation()
)
}
from Location sourceLocation, Location sinkLocation, string note
where
(
astFlow(sourceLocation, sinkLocation) and
not irFlow(sourceLocation, sinkLocation) and
note = "AST only"
) or
(
irFlow(sourceLocation, sinkLocation) and
not astFlow(sourceLocation, sinkLocation) and
note = "IR only"
)
select sourceLocation.toString(), sinkLocation.toString(), note