C++: IR-based dataflow

This commit is contained in:
Dave Bartolomeo
2018-11-29 10:45:29 -08:00
parent d933152a54
commit 58f7596519
23 changed files with 7716 additions and 53 deletions

View File

@@ -1,55 +1,76 @@
{
"C++ IR Instruction": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll"
],
"C++ IR IRBlock": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRBlock.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRBlock.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRBlock.qll"
],
"C++ IR IRVariable": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRVariable.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRVariable.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRVariable.qll"
],
"C++ IR FunctionIR": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/FunctionIR.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/FunctionIR.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/FunctionIR.qll"
],
"C++ IR Operand": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll"
],
"C++ IR IRImpl": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IR.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IR.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IR.qll"
],
"C++ IR IRSanityImpl": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRSanity.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRSanity.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRSanity.qll"
],
"C++ IR PrintIRImpl": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/PrintIR.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll"
],
"C++ SSA AliasAnalysis": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysis.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysis.qll"
],
"C++ SSA SSAConstruction": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstruction.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstruction.qll"
],
"C++ IR ValueNumber": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/gvn/ValueNumbering.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/ValueNumbering.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/ValueNumbering.qll"
]
"DataFlow Java/C++": [
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl.qll",
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl2.qll",
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl3.qll",
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl4.qll",
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl5.qll",
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplDepr.qll",
"cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl.qll",
"cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl2.qll",
"cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl3.qll",
"cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl4.qll",
"cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll",
"cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll",
"cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll",
"cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll"
],
"DataFlow Java/C++ Common": [
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplCommon.qll",
"cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImplCommon.qll",
"cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll"
],
"C++ IR Instruction": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll"
],
"C++ IR IRBlock": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRBlock.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRBlock.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRBlock.qll"
],
"C++ IR IRVariable": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRVariable.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRVariable.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRVariable.qll"
],
"C++ IR FunctionIR": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/FunctionIR.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/FunctionIR.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/FunctionIR.qll"
],
"C++ IR Operand": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll"
],
"C++ IR IRImpl": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IR.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IR.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IR.qll"
],
"C++ IR IRSanityImpl": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRSanity.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRSanity.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRSanity.qll"
],
"C++ IR PrintIRImpl": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/PrintIR.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll"
],
"C++ SSA AliasAnalysis": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysis.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysis.qll"
],
"C++ SSA SSAConstruction": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstruction.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstruction.qll"
],
"C++ IR ValueNumber": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/gvn/ValueNumbering.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/ValueNumbering.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/ValueNumbering.qll"
]
}

View File

@@ -0,0 +1,21 @@
/**
* Provides a library for local (intra-procedural) and global (inter-procedural)
* data flow analysis: deciding whether data can flow from a _source_ to a
* _sink_.
*
* Unless configured otherwise, _flow_ means that the exact value of
* the source may reach the sink. We do not track flow across pointer
* dereferences or array indexing. To track these types of flow, where the
* exact value may not be preserved, import
* `semmle.code.cpp.dataflow.TaintTracking`.
*
* To use global (interprocedural) data flow, extend the class
* `DataFlow::Configuration` as documented on that class. To use local
* (intraprocedural) data flow, invoke `DataFlow::localFlow` or
* `DataFlow::LocalFlowStep` with arguments of type `DataFlow::Node`.
*/
import cpp
module DataFlow {
import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl
}

View File

@@ -0,0 +1,38 @@
/**
* Provides a `DataFlow2` module, which is a copy of the `DataFlow` module. Use
* this class when data-flow configurations must depend on each other. Two
* classes extending `DataFlow::Configuration` should never depend on each
* other, but one of them should instead depend on a
* `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a
* `DataFlow4::Configuration`.
*
* See `semmle.code.cpp.dataflow.DataFlow` for the full documentation.
*/
import cpp
module DataFlow2 {
import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl2
/**
* This class exists to prevent mutual recursion between the user-overridden
* member predicates of `Configuration` and the rest of the data-flow library.
* Good performance cannot be guaranteed in the presence of such recursion, so
* it should be replaced by using more than one copy of the data flow library.
* Four copies are available: `DataFlow` through `DataFlow4`.
*/
private abstract
class ConfigurationRecursionPrevention extends Configuration {
bindingset[this]
ConfigurationRecursionPrevention() { any() }
override predicate hasFlow(Node source, Node sink) {
strictcount(Node n | this.isSource(n)) < 0
or
strictcount(Node n | this.isSink(n)) < 0
or
strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
or
super.hasFlow(source, sink)
}
}
}

View File

@@ -0,0 +1,38 @@
/**
* Provides a `DataFlow3` module, which is a copy of the `DataFlow` module. Use
* this class when data-flow configurations must depend on each other. Two
* classes extending `DataFlow::Configuration` should never depend on each
* other, but one of them should instead depend on a
* `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a
* `DataFlow4::Configuration`.
*
* See `semmle.code.cpp.dataflow.DataFlow` for the full documentation.
*/
import cpp
module DataFlow3 {
import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl3
/**
* This class exists to prevent mutual recursion between the user-overridden
* member predicates of `Configuration` and the rest of the data-flow library.
* Good performance cannot be guaranteed in the presence of such recursion, so
* it should be replaced by using more than one copy of the data flow library.
* Four copies are available: `DataFlow` through `DataFlow4`.
*/
private abstract
class ConfigurationRecursionPrevention extends Configuration {
bindingset[this]
ConfigurationRecursionPrevention() { any() }
override predicate hasFlow(Node source, Node sink) {
strictcount(Node n | this.isSource(n)) < 0
or
strictcount(Node n | this.isSink(n)) < 0
or
strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
or
super.hasFlow(source, sink)
}
}
}

View File

@@ -0,0 +1,38 @@
/**
* Provides a `DataFlow4` module, which is a copy of the `DataFlow` module. Use
* this class when data-flow configurations must depend on each other. Two
* classes extending `DataFlow::Configuration` should never depend on each
* other, but one of them should instead depend on a
* `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a
* `DataFlow4::Configuration`.
*
* See `semmle.code.cpp.dataflow.DataFlow` for the full documentation.
*/
import cpp
module DataFlow4 {
import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl4
/**
* This class exists to prevent mutual recursion between the user-overridden
* member predicates of `Configuration` and the rest of the data-flow library.
* Good performance cannot be guaranteed in the presence of such recursion, so
* it should be replaced by using more than one copy of the data flow library.
* Four copies are available: `DataFlow` through `DataFlow4`.
*/
private abstract
class ConfigurationRecursionPrevention extends Configuration {
bindingset[this]
ConfigurationRecursionPrevention() { any() }
override predicate hasFlow(Node source, Node sink) {
strictcount(Node n | this.isSource(n)) < 0
or
strictcount(Node n | this.isSink(n)) < 0
or
strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
or
super.hasFlow(source, sink)
}
}
}

View File

@@ -0,0 +1,189 @@
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) taint-tracking analyses.
*
* We define _taint propagation_ informally to mean that a substantial part of
* the information from the source is preserved at the sink. For example, taint
* propagates from `x` to `x + 100`, but it does not propagate from `x` to `x >
* 100` since we consider a single bit of information to be too little.
*/
import semmle.code.cpp.ir.dataflow.DataFlow
import semmle.code.cpp.ir.dataflow.DataFlow2
private import semmle.code.cpp.ir.IR
module TaintTracking {
/**
* A configuration of interprocedural taint tracking analysis. This defines
* sources, sinks, and any other configurable aspect of the analysis. Each
* use of the taint tracking library must define its own unique extension of
* this abstract class.
*
* A taint-tracking configuration is a special data flow configuration
* (`DataFlow::Configuration`) that allows for flow through nodes that do not
* necessarily preserve values but are still relevant from a taint-tracking
* perspective. (For example, string concatenation, where one of the operands
* is tainted.)
*
* To create a configuration, extend this class with a subclass whose
* characteristic predicate is a unique singleton string. For example, write
*
* ```
* class MyAnalysisConfiguration extends TaintTracking::Configuration {
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
* // Override `isSource` and `isSink`.
* // Optionally override `isSanitizer`.
* // Optionally override `isAdditionalTaintStep`.
* }
* ```
*
* Then, to query whether there is flow between some `source` and `sink`,
* write
*
* ```
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
* ```
*
* Multiple configurations can coexist, but it is unsupported to depend on a
* `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
* overridden predicates that define sources, sinks, or additional steps.
* Instead, the dependency should go to a `TaintTracking::Configuration2` or
* a `DataFlow{2,3,4}::Configuration`.
*/
abstract class Configuration extends DataFlow::Configuration {
bindingset[this]
Configuration() { any() }
/** Holds if `source` is a taint source. */
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
/** Holds if `sink` is a taint sink. */
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
/**
* Holds if taint should not flow into `node`.
*/
predicate isSanitizer(DataFlow::Node node) { none() }
/**
* Holds if the additional taint propagation step
* from `source` to `target` must be taken into account in the analysis.
* This step will only be followed if `target` is not in the `isSanitizer`
* predicate.
*/
predicate isAdditionalTaintStep(DataFlow::Node source,
DataFlow::Node target)
{ none() }
final override
predicate isBarrier(DataFlow::Node node) { isSanitizer(node) }
final override
predicate isAdditionalFlowStep(DataFlow::Node source, DataFlow::Node target) {
this.isAdditionalTaintStep(source, target)
or
localTaintStep(source, target)
}
}
/**
* A taint-tracking configuration that is backed by the `DataFlow2` library
* instead of `DataFlow`. Use this class when taint-tracking configurations
* or data-flow configurations must depend on each other.
*
* See `TaintTracking::Configuration` for the full documentation.
*/
abstract class Configuration2 extends DataFlow2::Configuration {
bindingset[this]
Configuration2() { any() }
/** Holds if `source` is a taint source. */
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSource(DataFlow::Node source);
/** Holds if `sink` is a taint sink. */
// overridden to provide taint-tracking specific qldoc
abstract override predicate isSink(DataFlow::Node sink);
/**
* Holds if taint should not flow into `node`.
*/
predicate isSanitizer(DataFlow::Node node) { none() }
/**
* Holds if the additional taint propagation step
* from `source` to `target` must be taken into account in the analysis.
* This step will only be followed if `target` is not in the `isSanitizer`
* predicate.
*/
predicate isAdditionalTaintStep(DataFlow::Node source,
DataFlow::Node target)
{ none() }
final override
predicate isBarrier(DataFlow::Node node) { isSanitizer(node) }
final override
predicate isAdditionalFlowStep(DataFlow::Node source, DataFlow::Node target) {
this.isAdditionalTaintStep(source, target)
or
localTaintStep(source, target)
}
}
/**
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Taint can flow into using ordinary data flow.
DataFlow::localFlowStep(nodeFrom, nodeTo)
or
// Taint can flow through expressions that alter the value but preserve
// more than one bit of it _or_ expressions that follow data through
// pointer indirections.
not nodeTo instanceof CompareInstruction and
not nodeTo instanceof InvokeInstruction and
nodeTo.getAnOperand() = nodeFrom
}
/**
* Holds if taint may propagate from `source` to `sink` in zero or more local
* (intra-procedural) steps.
*/
predicate localTaint(DataFlow::Node source, DataFlow::Node sink) {
localTaintStep*(source, sink)
}
/**
* Holds if we do not propagate taint from `fromExpr` to `toExpr`
* even though `toExpr` is the AST parent of `fromExpr`.
*/
private predicate noParentExprFlow(Expr fromExpr, Expr toExpr) {
fromExpr = toExpr.(ConditionalExpr).getCondition()
or
fromExpr = toExpr.(CommaExpr).getLeftOperand()
or
fromExpr = toExpr.(AssignExpr).getLValue() // LHS of `=`
}
/**
* Holds if we do not propagate taint from a child of `e` to `e` itself.
*/
private predicate noFlowFromChildExpr(Expr e) {
e instanceof ComparisonOperation
or
e instanceof LogicalAndExpr
or
e instanceof LogicalOrExpr
or
e instanceof Call
or
e instanceof SizeofOperator
or
e instanceof AlignofOperator
}
}

View File

@@ -0,0 +1,73 @@
private import cpp
private import DataFlowPrivate
Function viableImpl(MethodAccess ma) {
result = ma.getTarget()
}
Function viableCallable(Call call) {
result = call.getTarget()
}
/**
* Holds if the call context `ctx` reduces the set of viable dispatch
* targets of `ma` in `c`.
*/
predicate reducedViableImplInCallContext(MethodAccess ma, Callable c, Call ctx) {
none()
}
/**
* Gets a viable dispatch target of `ma` in the context `ctx`. This is
* restricted to those `ma`s for which a context might make a difference.
*/
private Method viableImplInCallContext(MethodAccess ma, Call ctx) {
// stub implementation
result = viableImpl(ma) and
viableCallable(ctx) = ma.getEnclosingFunction()
}
/**
* Gets a viable dispatch target of `ma` in the context `ctx`. This is
* restricted to those `ma`s for which the context makes a difference.
*/
Method prunedViableImplInCallContext(MethodAccess ma, Call ctx) {
result = viableImplInCallContext(ma, ctx) and
reducedViableImplInCallContext(ma, _, ctx)
}
/**
* Holds if data might flow from `ma` to a return statement in some
* configuration.
*/
private predicate maybeChainedReturn(MethodAccess ma) {
exists(ReturnStmt ret |
exists(ret.getExpr()) and
ret.getEnclosingFunction() = ma.getEnclosingFunction() and
not ma.getParent() instanceof ExprStmt
)
}
/**
* Holds if flow returning from `m` to `ma` might return further and if
* this path restricts the set of call sites that can be returned to.
*/
predicate reducedViableImplInReturn(Method m, MethodAccess ma) {
exists(int tgts, int ctxtgts |
m = viableImpl(ma) and
ctxtgts = count(Call ctx | m = viableImplInCallContext(ma, ctx)) and
tgts = strictcount(Call ctx | viableCallable(ctx) = ma.getEnclosingFunction()) and
ctxtgts < tgts
) and
maybeChainedReturn(ma)
}
/**
* Gets a viable dispatch target of `ma` in the context `ctx`. This is
* restricted to those `ma`s and results for which the return flow from the
* result to `ma` restricts the possible context `ctx`.
*/
Method prunedViableImplInCallContextReverse(MethodAccess ma, Call ctx) {
result = viableImplInCallContext(ma, ctx) and
reducedViableImplInReturn(result, ma)
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,284 @@
import DataFlowUtil
private import DataFlowPrivate
private import DataFlowDispatch
cached
private module ImplCommon {
/**
* Holds if `p` is the `i`th parameter of a viable dispatch target of `call`.
* The instance parameter is considered to have index `-1`.
*/
pragma[nomagic]
private predicate viableParam(Call call, int i, ParameterNode p) {
exists(Callable callable |
callable = viableCallable(call) and
p.isParameterOf(callable, i)
)
}
/**
* Holds if `arg` is a possible argument to `p` taking virtual dispatch into account.
*/
cached
predicate viableParamArg(ParameterNode p, ArgumentNode arg) {
exists(int i, Call call |
viableParam(call, i, p) and
arg.argumentOf(call, i)
)
}
/**
* Holds if `p` can flow to `node` in the same callable using only
* value-preserving steps.
*/
private predicate parameterValueFlow(ParameterNode p, Node node) {
p = node
or
exists(Node mid |
parameterValueFlow(p, mid) and
localFlowStep(mid, node) and
compatibleTypes(p.getType(), node.getType())
)
or
// flow through a callable
exists(Node arg |
parameterValueFlow(p, arg) and
argumentValueFlowsThrough(arg, node) and
compatibleTypes(p.getType(), node.getType())
)
}
/**
* Holds if `p` can flow to a `ReturnNode` in the same callable using only
* value-preserving steps.
*/
cached
predicate parameterValueFlowsThrough(ParameterNode p) {
exists(ReturnNode ret | parameterValueFlow(p, ret))
}
/**
* Holds if `arg` flows through `call` using only value-preserving steps.
*/
cached
predicate argumentValueFlowsThrough(ArgumentNode arg, ExprNode call) {
exists(ParameterNode param |
viableParamArg(param, arg) and
parameterValueFlowsThrough(param) and
arg.argumentOf(call.getExpr(), _) and
compatibleTypes(arg.getType(), call.getType())
)
}
/**
* Holds if `p` can flow to the pre-update node of `n` in the same callable
* using only value-preserving steps.
*/
cached
predicate parameterValueFlowsToUpdate(ParameterNode p, PostUpdateNode n) {
parameterValueFlow(p, n.getPreUpdateNode())
}
/**
* Holds if data can flow from `node1` to `node2` in one local step or a step
* through a value-preserving method.
*/
private predicate localValueStep(Node node1, Node node2) {
localFlowStep(node1, node2) or
argumentValueFlowsThrough(node1, node2)
}
/*
* Calculation of `predicate store(Node node1, Content f, Node node2)`:
* There are three cases:
* - The base case: A direct local assignment given by `storeStep`.
* - A call to a method or constructor with two arguments, `arg1` and `arg2`,
* such the call has the side-effect `arg2.f = arg1`.
* - A call to a method that returns an object in which an argument has been
* stored.
* `storeViaSideEffect` covers the first two cases, and `storeReturn` covers
* the third case.
*/
/**
* Holds if data can flow from `node1` to `node2` via a direct assignment to
* `f` or via a call that acts as a setter.
*/
cached
predicate store(Node node1, Content f, Node node2) {
storeViaSideEffect(node1, f, node2) or
storeReturn(node1, f, node2)
}
private predicate storeViaSideEffect(Node node1, Content f, PostUpdateNode node2) {
storeStep(node1, f, node2) and readStep(_, f, _)
or
exists(Call call, int i1, int i2 |
setterCall(call, i1, i2, f) and
node1.(ArgumentNode).argumentOf(call, i1) and
node2.getPreUpdateNode().(ArgumentNode).argumentOf(call, i2) and
compatibleTypes(node1.getTypeBound(), f.getType()) and
compatibleTypes(node2.getTypeBound(), f.getContainerType())
)
}
pragma[nomagic]
private predicate setterInParam(ParameterNode p1, Content f, ParameterNode p2) {
exists(Node n1, PostUpdateNode n2 |
parameterValueFlow(p1, n1) and
storeViaSideEffect(n1, f, n2) and
parameterValueFlow(p2, n2.getPreUpdateNode()) and
p1 != p2
)
}
pragma[nomagic]
private predicate setterCall(Call call, int i1, int i2, Content f) {
exists(Callable callable, ParameterNode p1, ParameterNode p2 |
setterInParam(p1, f, p2) and
callable = viableCallable(call) and
p1.isParameterOf(callable, i1) and
p2.isParameterOf(callable, i2)
)
}
private predicate storeReturn(Node node1, Content f, Node node2) {
exists(ParameterNode p, ArgumentNode arg |
arg = node1 and
viableParamArg(p, arg) and
setterReturn(p, f) and
arg.argumentOf(node2.asExpr(), _) and
compatibleTypes(node1.getTypeBound(), f.getType()) and
compatibleTypes(node2.getTypeBound(), f.getContainerType())
)
}
private predicate setterReturn(ParameterNode p, Content f) {
exists(Node n1, Node n2, ReturnNode ret |
parameterValueFlow(p, n1) and
store(n1, f, n2) and
localValueStep*(n2, ret)
)
}
/**
* Holds if data can flow from `node1` to `node2` via a direct read of `f` or
* via a getter.
*/
cached
predicate read(Node node1, Content f, Node node2) {
readStep(node1, f, node2) and storeStep(_, f, _)
or
exists(ParameterNode p, ArgumentNode arg |
arg = node1 and
viableParamArg(p, arg) and
getter(p, f) and
arg.argumentOf(node2.asExpr(), _) and
compatibleTypes(node1.getTypeBound(), f.getContainerType()) and
compatibleTypes(node2.getTypeBound(), f.getType())
)
}
private predicate getter(ParameterNode p, Content f) {
exists(Node n1, Node n2, ReturnNode ret |
parameterValueFlow(p, n1) and
read(n1, f, n2) and
localValueStep*(n2, ret)
)
}
cached
predicate localStoreReadStep(Node node1, Node node2) {
exists(Node mid1, Node mid2, Content f |
store(node1, f, mid1) and
localValueStep*(mid1, mid2) and
read(mid2, f, node2)
)
}
/**
* Holds if `call` passes an implicit or explicit instance argument, i.e., an
* expression that reaches a `this` parameter.
*/
private predicate callHasInstanceArgument(Call call) {
exists(ArgumentNode arg | arg.argumentOf(call, -1))
}
cached
newtype TCallContext =
TAnyCallContext() or
TSpecificCall(Call call, int i, boolean emptyAp) {
reducedViableImplInCallContext(_, _, call) and
(emptyAp = true or emptyAp = false) and
(
exists(call.getArgument(i))
or
i = -1 and callHasInstanceArgument(call)
)
} or
TSomeCall(ParameterNode p, boolean emptyAp) { emptyAp = true or emptyAp = false } or
TReturn(Method m, MethodAccess ma) { reducedViableImplInReturn(m, ma) }
}
import ImplCommon
/**
* A call context to restrict the targets of virtual dispatch and match the
* call sites of flow into a method with flow out of a method.
*
* There are four cases:
* - `TAnyCallContext()` : No restrictions on method flow.
* - `TSpecificCall(Call call, int i)` : Flow entered through the `i`th
* parameter at the given `call`. This call improves the set of viable
* dispatch targets for at least one method call in the current callable.
* - `TSomeCall(ParameterNode p)` : Flow entered through parameter `p`. The
* originating call does not improve the set of dispatch targets for any
* method call in the current callable and was therefore not recorded.
* - `TReturn(Method m, MethodAccess ma)` : Flow reached `ma` from `m` and
* this dispatch target of `ma` implies a reduced set of dispatch origins
* to which data may flow if it should reach a `return` statement.
*/
abstract class CallContext extends TCallContext { abstract string toString(); }
class CallContextAny extends CallContext, TAnyCallContext {
override string toString() { result = "CcAny" }
}
abstract class CallContextCall extends CallContext { }
class CallContextSpecificCall extends CallContextCall, TSpecificCall {
override string toString() { result = "CcCall" }
}
class CallContextSomeCall extends CallContextCall, TSomeCall {
override string toString() { result = "CcSomeCall" }
}
class CallContextReturn extends CallContext, TReturn {
override string toString() { result = "CcReturn" }
}
bindingset[cc, callable]
predicate resolveReturn(CallContext cc, Callable callable, Call call) {
cc instanceof CallContextAny and callable = viableCallable(call)
or
exists(Method m0, MethodAccess ma0 |
ma0.getEnclosingCallable() = callable and
cc = TReturn(m0, ma0) and
m0 = prunedViableImplInCallContextReverse(ma0, call)
)
}
bindingset[call, cc]
Callable resolveCall(Call call, CallContext cc) {
exists(Call ctx | cc = TSpecificCall(ctx, _, _) |
if reducedViableImplInCallContext(call, _, ctx)
then result = prunedViableImplInCallContext(call, ctx)
else result = viableCallable(call)
)
or
result = viableCallable(call) and cc instanceof CallContextSomeCall
or
result = viableCallable(call) and cc instanceof CallContextAny
or
result = viableCallable(call) and cc instanceof CallContextReturn
}

View File

@@ -0,0 +1,189 @@
private import cpp
private import DataFlowUtil
/**
* A data flow node that occurs as the argument of a call and is passed as-is
* to the callable. Arguments that are wrapped in an implicit varargs array
* creation are not included, but the implicitly created array is.
* Instance arguments are also included.
*/
class ArgumentNode extends Node {
ArgumentNode() {
exists(CallInstruction call |
this = call.getAnArgument()
)
}
/**
* Holds if this argument occurs at the given position in the given call.
* The instance argument is considered to have index `-1`.
*/
predicate argumentOf(Call call, int pos) {
exists (CallInstruction callInstr |
callInstr.getAST() = call and
(
this = callInstr.getPositionalArgument(pos) or
this = callInstr.getThisArgument() and pos = -1
)
)
}
}
/** A data flow node that occurs as the result of a `ReturnStmt`. */
class ReturnNode extends Node {
ReturnNode() {
exists(ReturnValueInstruction ret | this = ret.getReturnValue() )
}
}
/**
* Holds if data can flow from `node1` to `node2` in a way that loses the
* calling context. For example, this would happen with flow through a
* global or static variable.
*/
predicate jumpStep(Node n1, Node n2) {
none()
}
/**
* Holds if `call` does not pass an implicit or explicit qualifier, i.e., a
* `this` parameter.
*/
predicate callHasQualifier(Call call) {
call.hasQualifier()
or
call.getTarget() instanceof Destructor
}
private newtype TContent = TFieldContent(Field f) or TCollectionContent() or TArrayContent()
/**
* A reference contained in an object. Examples include instance fields, the
* contents of a collection object, or the contents of an array.
*/
class Content extends TContent {
/** Gets a textual representation of this element. */
abstract string toString();
predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) {
path = "" and sl = 0 and sc = 0 and el = 0 and ec = 0
}
/** Gets the type of the object containing this content. */
abstract RefType getContainerType();
/** Gets the type of this content. */
abstract Type getType();
}
private class FieldContent extends Content, TFieldContent {
Field f;
FieldContent() { this = TFieldContent(f) }
Field getField() { result = f }
override string toString() { result = f.toString() }
override predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) {
f.getLocation().hasLocationInfo(path, sl, sc, el, ec)
}
override RefType getContainerType() { result = f.getDeclaringType() }
override Type getType() { result = f.getType() }
}
private class CollectionContent extends Content, TCollectionContent {
override string toString() { result = "collection" }
override RefType getContainerType() { none() }
override Type getType() { none() }
}
private class ArrayContent extends Content, TArrayContent {
override string toString() { result = "array" }
override RefType getContainerType() { none() }
override Type getType() { none() }
}
/**
* Holds if data can flow from `node1` to `node2` via an assignment to `f`.
* Thus, `node2` references an object with a field `f` that contains the
* value of `node1`.
*/
predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
none() // stub implementation
}
/**
* Holds if data can flow from `node1` to `node2` via a read of `f`.
* Thus, `node1` references an object with a field `f` whose value ends up in
* `node2`.
*/
predicate readStep(Node node1, Content f, Node node2) {
none() // stub implementation
}
/**
* Gets a representative (boxed) type for `t` for the purpose of pruning
* possible flow. A single type is used for all numeric types to account for
* numeric conversions, and otherwise the erasure is used.
*/
RefType getErasedRepr(Type t) {
suppressUnusedType(t) and
result instanceof VoidType // stub implementation
}
/** Gets a string representation of a type returned by `getErasedRepr`. */
string ppReprType(Type t) {
result = t.toString()
}
/**
* Holds if `t1` and `t2` are compatible, that is, whether data can flow from
* a node of type `t1` to a node of type `t2`.
*/
pragma[inline]
predicate compatibleTypes(Type t1, Type t2) {
any() // stub implementation
}
private predicate suppressUnusedType(Type t) { any() }
//////////////////////////////////////////////////////////////////////////////
// Java QL library compatibility wrappers
//////////////////////////////////////////////////////////////////////////////
class RefType extends Type {
}
class CastExpr extends Expr {
CastExpr() { none() } // stub implementation
}
/** An argument to a call. */
class Argument extends Expr {
Call call;
int pos;
Argument() {
call.getArgument(pos) = this
}
/** Gets the call that has this argument. */
Call getCall() { result = call }
/** Gets the position of this argument. */
int getPosition() {
result = pos
}
}
class Callable extends Function { }
/**
* An alias for `Function` in the C++ library. In the Java library, a `Method`
* is any callable except a constructor.
*/
class Method extends Function { }
/**
* An alias for `FunctionCall` in the C++ library. In the Java library, a
* `MethodAccess` is any `Call` that does not call a constructor.
*/
class MethodAccess extends FunctionCall {
/**
* INTERNAL: Do not use. Alternative name for `getEnclosingFunction`.
*/
Callable getEnclosingCallable() {
result = this.getEnclosingFunction()
}
}

View File

@@ -0,0 +1,143 @@
/**
* Provides C++-specific definitions for use in the data flow library.
*/
import cpp
import semmle.code.cpp.ir.IR
/**
* A node in a data flow graph.
*
* A node can be either an expression, a parameter, or an uninitialized local
* variable. Such nodes are created with `DataFlow::exprNode`,
* `DataFlow::parameterNode`, and `DataFlow::uninitializedNode` respectively.
*/
class Node extends Instruction {
/**
* INTERNAL: Do not use. Alternative name for `getFunction`.
*/
Function getEnclosingCallable() {
result = this.getFunction()
}
/** Gets the type of this node. */
Type getType() {
result = this.asExpr().getType()
or
result = this.getAST().(Variable).getType()
}
/** Gets the expression corresponding to this node, if any. */
Expr asExpr() { result = this.getUnconvertedResultExpression() }
/** Gets the parameter corresponding to this node, if any. */
Parameter asParameter() { result = this.(ParameterNode).getParameter() }
/**
* Gets the uninitialized local variable corresponding to this node, if
* any.
*/
LocalVariable asUninitialized() {
result = this.(UninitializedNode).getLocalVariable()
}
/**
* Gets an upper bound on the type of this node.
*/
Type getTypeBound() { result = getType() }
}
/**
* An expression, viewed as a node in a data flow graph.
*/
class ExprNode extends Node {
ExprNode() { getAST() instanceof Expr }
Expr getExpr() { result = getAST() }
}
/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
class ParameterNode extends Node, InitializeParameterInstruction {
/**
* Holds if this node is the parameter of `c` at the specified (zero-based)
* position. The implicit `this` parameter is considered to have index `-1`.
*/
predicate isParameterOf(Function f, int i) {
f.getParameter(i) = getParameter()
}
}
/**
* The value of an uninitialized local variable, viewed as a node in a data
* flow graph.
*/
class UninitializedNode extends Node, UninitializedInstruction {
/** Gets the uninitialized local variable corresponding to this node. */
LocalVariable getLocalVariable() { result = this.getAST().(VariableDeclarationEntry).getDeclaration()}
}
/**
* A node associated with an object after an operation that might have
* changed its state.
*
* This can be either the argument to a callable after the callable returns
* (which might have mutated the argument), or the qualifier of a field after
* an update to the field.
*
* Nodes corresponding to AST elements, for example `ExprNode`, usually refer
* to the value before the update with the exception of `ClassInstanceExpr`,
* which represents the value after the constructor has run.
*/
abstract class PostUpdateNode extends Node {
/**
* Gets the node before the state update.
*/
abstract Node getPreUpdateNode();
}
class StoreDestinationAsPostUpdateNode extends PostUpdateNode {
StoreInstruction si;
StoreDestinationAsPostUpdateNode() {
this = si.getDestinationAddress()
}
override Node getPreUpdateNode() {
result = si.getDestinationAddress()
}
}
/**
* Gets the `Node` corresponding to `e`.
*/
ExprNode exprNode(Expr e) { result.getExpr() = e }
/**
* Gets the `Node` corresponding to the value of `p` at function entry.
*/
ParameterNode parameterNode(Parameter p) { result.getParameter() = p }
/**
* Gets the `Node` corresponding to the value of an uninitialized local
* variable `v`.
*/
UninitializedNode uninitializedNode(LocalVariable v) {
result.getLocalVariable() = v
}
/**
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
predicate localFlowStep(Node nodeFrom, Node nodeTo) {
nodeTo.(CopyInstruction).getSourceValue() = nodeFrom or
nodeTo.(PhiInstruction).getAnOperand().getDefinitionInstruction() = nodeFrom
}
/**
* Holds if data flows from `source` to `sink` in zero or more local
* (intra-procedural) steps.
*/
predicate localFlow(Node source, Node sink) {
localFlowStep*(source, sink)
}

View File

@@ -1106,9 +1106,39 @@ class CallInstruction extends Instruction {
opcode instanceof Opcode::Call
}
/**
* Gets the `Instruction` that computes the target function of the call. This is usually a
* `FunctionAddress` instruction, but can also be an arbitrary instruction that produces a
* function pointer.
*/
final Instruction getCallTarget() {
result = getAnOperand().(CallTargetOperand).getDefinitionInstruction()
}
/**
* Gets all of the arguments of the call, including the `this` pointer, if any.
*/
final Instruction getAnArgument() {
result = getAnOperand().(ArgumentOperand).getDefinitionInstruction()
}
/**
* Gets the `this` pointer argument of the call, if any.
*/
final Instruction getThisArgument() {
result = getAnOperand().(ThisArgumentOperand).getDefinitionInstruction()
}
/**
* Gets the argument at the specified index.
*/
final Instruction getPositionalArgument(int index) {
exists(PositionalArgumentOperand operand |
operand = getAnOperand() and
operand.getIndex() = index and
result = operand.getDefinitionInstruction()
)
}
}
/**

View File

@@ -304,6 +304,13 @@ class PositionalArgumentOperand extends ArgumentOperand {
override string toString() {
result = "Arg(" + argIndex + ")"
}
/**
* Gets the zero-based index of the argument.
*/
final int getIndex() {
result = argIndex
}
}
class SideEffectOperand extends NonPhiOperand {

View File

@@ -1106,9 +1106,39 @@ class CallInstruction extends Instruction {
opcode instanceof Opcode::Call
}
/**
* Gets the `Instruction` that computes the target function of the call. This is usually a
* `FunctionAddress` instruction, but can also be an arbitrary instruction that produces a
* function pointer.
*/
final Instruction getCallTarget() {
result = getAnOperand().(CallTargetOperand).getDefinitionInstruction()
}
/**
* Gets all of the arguments of the call, including the `this` pointer, if any.
*/
final Instruction getAnArgument() {
result = getAnOperand().(ArgumentOperand).getDefinitionInstruction()
}
/**
* Gets the `this` pointer argument of the call, if any.
*/
final Instruction getThisArgument() {
result = getAnOperand().(ThisArgumentOperand).getDefinitionInstruction()
}
/**
* Gets the argument at the specified index.
*/
final Instruction getPositionalArgument(int index) {
exists(PositionalArgumentOperand operand |
operand = getAnOperand() and
operand.getIndex() = index and
result = operand.getDefinitionInstruction()
)
}
}
/**

View File

@@ -304,6 +304,13 @@ class PositionalArgumentOperand extends ArgumentOperand {
override string toString() {
result = "Arg(" + argIndex + ")"
}
/**
* Gets the zero-based index of the argument.
*/
final int getIndex() {
result = argIndex
}
}
class SideEffectOperand extends NonPhiOperand {

View File

@@ -1106,9 +1106,39 @@ class CallInstruction extends Instruction {
opcode instanceof Opcode::Call
}
/**
* Gets the `Instruction` that computes the target function of the call. This is usually a
* `FunctionAddress` instruction, but can also be an arbitrary instruction that produces a
* function pointer.
*/
final Instruction getCallTarget() {
result = getAnOperand().(CallTargetOperand).getDefinitionInstruction()
}
/**
* Gets all of the arguments of the call, including the `this` pointer, if any.
*/
final Instruction getAnArgument() {
result = getAnOperand().(ArgumentOperand).getDefinitionInstruction()
}
/**
* Gets the `this` pointer argument of the call, if any.
*/
final Instruction getThisArgument() {
result = getAnOperand().(ThisArgumentOperand).getDefinitionInstruction()
}
/**
* Gets the argument at the specified index.
*/
final Instruction getPositionalArgument(int index) {
exists(PositionalArgumentOperand operand |
operand = getAnOperand() and
operand.getIndex() = index and
result = operand.getDefinitionInstruction()
)
}
}
/**

View File

@@ -304,6 +304,13 @@ class PositionalArgumentOperand extends ArgumentOperand {
override string toString() {
result = "Arg(" + argIndex + ")"
}
/**
* Gets the zero-based index of the argument.
*/
final int getIndex() {
result = argIndex
}
}
class SideEffectOperand extends NonPhiOperand {

View File

@@ -0,0 +1,29 @@
import cpp
import semmle.code.cpp.ir.dataflow.DataFlow
/** Common data flow configuration to be used by tests. */
class TestAllocationConfig extends DataFlow::Configuration {
TestAllocationConfig() {
this = "TestAllocationConfig"
}
override predicate isSource(DataFlow::Node source) {
source.asExpr().(FunctionCall).getTarget().getName() = "source"
or
source.asParameter().getName().matches("source%")
or
// Track uninitialized variables
exists(source.asUninitialized())
}
override predicate isSink(DataFlow::Node sink) {
exists(FunctionCall call |
call.getTarget().getName() = "sink" and
sink.asExpr() = call.getAnArgument()
)
}
override predicate isBarrier(DataFlow::Node barrier) {
barrier.asExpr().(VariableAccess).getTarget().hasName("barrier")
}
}

View File

@@ -0,0 +1,28 @@
| test.cpp:7:8:7:9 | Load: t1 | test.cpp:6:12:6:17 | Call: call to source |
| test.cpp:9:8:9:9 | Load: t1 | test.cpp:6:12:6:17 | Call: call to source |
| test.cpp:10:8:10:9 | Load: t2 | test.cpp:6:12:6:17 | Call: call to source |
| test.cpp:15:8:15:9 | Load: t2 | test.cpp:6:12:6:17 | Call: call to source |
| test.cpp:21:8:21:9 | Load: t1 | test.cpp:6:12:6:17 | Call: call to source |
| test.cpp:26:8:26:9 | Load: t1 | test.cpp:6:12:6:17 | Call: call to source |
| test.cpp:30:8:30:8 | Load: t | test.cpp:35:10:35:15 | Call: call to source |
| test.cpp:31:8:31:8 | Load: c | test.cpp:36:13:36:18 | Call: call to source |
| test.cpp:58:10:58:10 | Load: t | test.cpp:50:14:50:19 | Call: call to source |
| test.cpp:90:8:90:14 | Load: source1 | test.cpp:89:28:89:34 | InitializeParameter: source1 |
| test.cpp:92:8:92:14 | Load: source1 | test.cpp:89:28:89:34 | InitializeParameter: source1 |
| test.cpp:132:22:132:23 | Load: m1 | test.cpp:122:18:122:30 | InitializeParameter: sourceStruct1 |
| test.cpp:140:22:140:23 | Load: m1 | test.cpp:122:18:122:30 | InitializeParameter: sourceStruct1 |
| test.cpp:192:8:192:8 | Load: s | test.cpp:199:33:199:38 | Call: call to source |
| test.cpp:205:8:205:8 | Load: x | test.cpp:212:34:212:39 | Call: call to source |
| test.cpp:226:8:226:8 | Load: y | test.cpp:219:11:219:16 | Call: call to source |
| test.cpp:308:12:308:12 | Load: x | test.cpp:293:14:293:19 | Call: call to source |
| test.cpp:337:14:337:14 | Load: x | test.cpp:353:17:353:22 | Call: call to source |
| true_upon_entry.cpp:13:8:13:8 | Load: x | true_upon_entry.cpp:9:11:9:16 | Call: call to source |
| true_upon_entry.cpp:21:8:21:8 | Load: x | true_upon_entry.cpp:17:11:17:16 | Call: call to source |
| true_upon_entry.cpp:29:8:29:8 | Load: x | true_upon_entry.cpp:27:9:27:14 | Call: call to source |
| true_upon_entry.cpp:39:8:39:8 | Load: x | true_upon_entry.cpp:33:11:33:16 | Call: call to source |
| true_upon_entry.cpp:49:8:49:8 | Load: x | true_upon_entry.cpp:43:11:43:16 | Call: call to source |
| true_upon_entry.cpp:57:8:57:8 | Load: x | true_upon_entry.cpp:54:11:54:16 | Call: call to source |
| true_upon_entry.cpp:66:8:66:8 | Load: x | true_upon_entry.cpp:62:11:62:16 | Call: call to source |
| true_upon_entry.cpp:78:8:78:8 | Load: x | true_upon_entry.cpp:70:11:70:16 | Call: call to source |
| true_upon_entry.cpp:86:8:86:8 | Load: x | true_upon_entry.cpp:83:11:83:16 | Call: call to source |
| true_upon_entry.cpp:105:8:105:8 | Load: x | true_upon_entry.cpp:98:11:98:16 | Call: call to source |

View File

@@ -0,0 +1,5 @@
import IRDataflowTestCommon
from DataFlow::Node sink, DataFlow::Node source, TestAllocationConfig cfg
where cfg.hasFlow(source, sink)
select sink, source