C++: Replace AST dataflow with IR dataflow.

This commit is contained in:
Mathias Vorreiter Pedersen
2022-10-13 11:18:12 +02:00
parent d79a7e863a
commit 41cbef81ec
50 changed files with 2471 additions and 1408 deletions

View File

@@ -4,10 +4,7 @@
* _sink_.
*
* Unless configured otherwise, _flow_ means that the exact value of
* the source may reach the sink. We do not track flow across pointer
* dereferences or array indexing. To track these types of flow, where the
* exact value may not be preserved, import
* `semmle.code.cpp.dataflow.TaintTracking`.
* the source may reach the sink.
*
* To use global (interprocedural) data flow, extend the class
* `DataFlow::Configuration` as documented on that class. To use local
@@ -17,8 +14,4 @@
* `DataFlow::Node`.
*/
import cpp
module DataFlow {
import semmle.code.cpp.dataflow.internal.DataFlowImpl
}
import semmle.code.cpp.ir.dataflow.DataFlow

View File

@@ -9,8 +9,4 @@
* See `semmle.code.cpp.dataflow.DataFlow` for the full documentation.
*/
import cpp
module DataFlow2 {
import semmle.code.cpp.dataflow.internal.DataFlowImpl2
}
import semmle.code.cpp.ir.dataflow.DataFlow2

View File

@@ -9,8 +9,4 @@
* See `semmle.code.cpp.dataflow.DataFlow` for the full documentation.
*/
import cpp
module DataFlow3 {
import semmle.code.cpp.dataflow.internal.DataFlowImpl3
}
import semmle.code.cpp.ir.dataflow.DataFlow3

View File

@@ -9,8 +9,4 @@
* See `semmle.code.cpp.dataflow.DataFlow` for the full documentation.
*/
import cpp
module DataFlow4 {
import semmle.code.cpp.dataflow.internal.DataFlowImpl4
}
import semmle.code.cpp.ir.dataflow.DataFlow4

View File

@@ -95,6 +95,11 @@ predicate stackPointerFlowsToUse(Expr use, Type useType, Expr source, boolean is
cached
private PointerType getExprPtrType(Expr use) { result = use.getUnspecifiedType() }
/**
* Holds if `use` has type `useType` and `source` is an access to a stack variable
* that flows to `use`. `isLocal` is `true` if `use` is accessed via a parameter, and
* `false` otherwise.
*/
predicate stackReferenceFlowsToUse(Expr use, Type useType, Expr source, boolean isLocal) {
// Stack variables
exists(StackVariable var |

View File

@@ -15,9 +15,4 @@
* `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
*/
import semmle.code.cpp.dataflow.DataFlow
import semmle.code.cpp.dataflow.DataFlow2
module TaintTracking {
import semmle.code.cpp.dataflow.internal.tainttracking1.TaintTrackingImpl
}
import semmle.code.cpp.ir.dataflow.TaintTracking

View File

@@ -10,6 +10,5 @@
*
* See `semmle.code.cpp.dataflow.TaintTracking` for the full documentation.
*/
module TaintTracking2 {
import semmle.code.cpp.dataflow.internal.tainttracking2.TaintTrackingImpl
}
import semmle.code.cpp.ir.dataflow.TaintTracking2

View File

@@ -0,0 +1,14 @@
/**
* Provides a `TaintTracking3` module, which is a copy of the `TaintTracking`
* module. Use this class when data-flow configurations or taint-tracking
* configurations must depend on each other. Two classes extending
* `DataFlow::Configuration` should never depend on each other, but one of them
* should instead depend on a `DataFlow2::Configuration`, a
* `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. The
* `TaintTracking::Configuration` class extends `DataFlow::Configuration`, and
* `TaintTracking2::Configuration` extends `DataFlow2::Configuration`.
*
* See `semmle.code.cpp.dataflow.TaintTracking` for the full documentation.
*/
import semmle.code.cpp.ir.dataflow.TaintTracking3

View File

@@ -1,5 +0,0 @@
import semmle.code.cpp.dataflow.internal.TaintTrackingUtil as Public
module Private {
import semmle.code.cpp.dataflow.DataFlow::DataFlow as DataFlow
}

View File

@@ -1,5 +0,0 @@
import semmle.code.cpp.dataflow.internal.TaintTrackingUtil as Public
module Private {
import semmle.code.cpp.dataflow.DataFlow2::DataFlow2 as DataFlow
}

View File

@@ -0,0 +1,25 @@
/**
* Provides a library for local (intra-procedural) and global (inter-procedural)
* data flow analysis: deciding whether data can flow from a _source_ to a
* _sink_.
*
* Unless configured otherwise, _flow_ means that the exact value of
* the source may reach the sink.
*
* To use global (interprocedural) data flow, extend the class
* `DataFlow::Configuration` as documented on that class. To use local
* (intraprocedural) data flow between expressions, call
* `DataFlow::localExprFlow`. For more general cases of local data flow, call
* `DataFlow::localFlow` or `DataFlow::localFlowStep` with arguments of type
* `DataFlow::Node`.
*/
import cpp
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) data flow analyses.
*/
module DataFlow {
import semmle.code.cpp.dataflow.old.internal.DataFlowImpl
}

View File

@@ -0,0 +1,20 @@
/**
* Provides a `DataFlow2` module, which is a copy of the `DataFlow` module. Use
* this class when data-flow configurations must depend on each other. Two
* classes extending `DataFlow::Configuration` should never depend on each
* other, but one of them should instead depend on a
* `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a
* `DataFlow4::Configuration`.
*
* See `semmle.code.cpp.ir.dataflow.DataFlow` for the full documentation.
*/
import cpp
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) data flow analyses.
*/
module DataFlow {
import semmle.code.cpp.dataflow.old.internal.DataFlowImpl2
}

View File

@@ -0,0 +1,20 @@
/**
* Provides a `DataFlow3` module, which is a copy of the `DataFlow` module. Use
* this class when data-flow configurations must depend on each other. Two
* classes extending `DataFlow::Configuration` should never depend on each
* other, but one of them should instead depend on a
* `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a
* `DataFlow4::Configuration`.
*
* See `semmle.code.cpp.ir.dataflow.DataFlow` for the full documentation.
*/
import cpp
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) data flow analyses.
*/
module DataFlow {
import semmle.code.cpp.dataflow.old.internal.DataFlowImpl3
}

View File

@@ -0,0 +1,20 @@
/**
* Provides a `DataFlow4` module, which is a copy of the `DataFlow` module. Use
* this class when data-flow configurations must depend on each other. Two
* classes extending `DataFlow::Configuration` should never depend on each
* other, but one of them should instead depend on a
* `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a
* `DataFlow4::Configuration`.
*
* See `semmle.code.cpp.ir.dataflow.DataFlow` for the full documentation.
*/
import cpp
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) data flow analyses.
*/
module DataFlow {
import semmle.code.cpp.dataflow.old.internal.DataFlowImpl4
}

View File

@@ -0,0 +1,39 @@
/**
* DEPRECATED: Recursion through `DataFlow::Configuration` is impossible in
* any supported tooling. There is no need for this module because it's
* impossible to accidentally depend on recursion through
* `DataFlow::Configuration` in current releases.
*
* When this module is imported, recursive use of `DataFlow::Configuration` is
* disallowed. Importing this module will guarantee the absence of such
* recursion, which is unsupported and will be unconditionally disallowed in a
* future release.
*
* Recursive use of `DataFlow{2..4}::Configuration` is always disallowed, so no
* import is needed for those.
*/
import cpp
private import semmle.code.cpp.dataflow.DataFlow
/**
* This class exists to prevent mutual recursion between the user-overridden
* member predicates of `Configuration` and the rest of the data-flow library.
* Good performance cannot be guaranteed in the presence of such recursion, so
* it should be replaced by using more than one copy of the data flow library.
* Four copies are available: `DataFlow` through `DataFlow4`.
*/
abstract private class ConfigurationRecursionPrevention extends DataFlow::Configuration {
bindingset[this]
ConfigurationRecursionPrevention() { any() }
override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
strictcount(DataFlow::Node n | this.isSource(n)) < 0
or
strictcount(DataFlow::Node n | this.isSink(n)) < 0
or
strictcount(DataFlow::Node n1, DataFlow::Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
or
super.hasFlow(source, sink)
}
}

View File

@@ -0,0 +1,18 @@
/**
* Provides classes for performing local (intra-procedural) and
* global (inter-procedural) taint-tracking analyses.
*
* We define _taint propagation_ informally to mean that a substantial part of
* the information from the source is preserved at the sink. For example, taint
* propagates from `x` to `x + 100`, but it does not propagate from `x` to `x >
* 100` since we consider a single bit of information to be too little.
*
* To use global (interprocedural) taint tracking, extend the class
* `TaintTracking::Configuration` as documented on that class. To use local
* (intraprocedural) taint tracking between expressions, call
* `TaintTracking::localExprTaint`. For more general cases of local taint
* tracking, call `TaintTracking::localTaint` or
* `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`.
*/
import semmle.code.cpp.ir.dataflow.TaintTracking

View File

@@ -0,0 +1,14 @@
/**
* Provides a `TaintTracking2` module, which is a copy of the `TaintTracking`
* module. Use this class when data-flow configurations or taint-tracking
* configurations must depend on each other. Two classes extending
* `DataFlow::Configuration` should never depend on each other, but one of them
* should instead depend on a `DataFlow2::Configuration`, a
* `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. The
* `TaintTracking::Configuration` class extends `DataFlow::Configuration`, and
* `TaintTracking2::Configuration` extends `DataFlow2::Configuration`.
*
* See `semmle.code.cpp.ir.dataflow.TaintTracking` for the full documentation.
*/
import semmle.code.cpp.ir.dataflow.TaintTracking2

View File

@@ -0,0 +1,14 @@
/**
* Provides a `TaintTracking3` module, which is a copy of the `TaintTracking`
* module. Use this class when data-flow configurations or taint-tracking
* configurations must depend on each other. Two classes extending
* `DataFlow::Configuration` should never depend on each other, but one of them
* should instead depend on a `DataFlow2::Configuration`, a
* `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. The
* `TaintTracking::Configuration` class extends `DataFlow::Configuration`, and
* `TaintTracking2::Configuration` extends `DataFlow2::Configuration`.
*
* See `semmle.code.cpp.ir.dataflow.TaintTracking` for the full documentation.
*/
import semmle.code.cpp.ir.dataflow.TaintTracking3

View File

@@ -1,6 +1,6 @@
private import cpp
private import semmle.code.cpp.dataflow.internal.DataFlowPrivate
private import semmle.code.cpp.dataflow.internal.DataFlowUtil
private import DataFlowPrivate
private import DataFlowUtil
/**
* Gets a function that might be called by `call`.

View File

@@ -3,10 +3,10 @@
*/
private import cpp
private import semmle.code.cpp.dataflow.internal.FlowVar
private import FlowVar
private import semmle.code.cpp.models.interfaces.DataFlow
private import semmle.code.cpp.controlflow.Guards
private import semmle.code.cpp.dataflow.internal.AddressFlow
private import AddressFlow
cached
private newtype TNode =

View File

@@ -4,8 +4,8 @@
import cpp
private import semmle.code.cpp.controlflow.SSA
private import semmle.code.cpp.dataflow.internal.SubBasicBlocks
private import semmle.code.cpp.dataflow.internal.AddressFlow
private import SubBasicBlocks
private import AddressFlow
private import semmle.code.cpp.models.implementations.Iterator
private import semmle.code.cpp.models.interfaces.PointerWrapper

View File

@@ -14,7 +14,7 @@ private import semmle.code.cpp.models.interfaces.Iterator
private import semmle.code.cpp.models.interfaces.PointerWrapper
private module DataFlow {
import semmle.code.cpp.dataflow.internal.DataFlowUtil
import DataFlowUtil
}
/**

View File

@@ -0,0 +1,5 @@
import semmle.code.cpp.dataflow.old.internal.TaintTrackingUtil as Public
module Private {
import semmle.code.cpp.dataflow.old.DataFlow::DataFlow as DataFlow
}

View File

@@ -0,0 +1,5 @@
import semmle.code.cpp.dataflow.old.internal.TaintTrackingUtil as Public
module Private {
import semmle.code.cpp.dataflow.old.DataFlow2::DataFlow as DataFlow
}

View File

@@ -282,7 +282,7 @@ private module Cached {
cached
predicate additionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
exists(CallInstruction call, Function func, FunctionInput modelIn, FunctionOutput modelOut |
n1.asOperand() = callInput(call, modelIn) and
n1 = callInput(call, modelIn) and
(
func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
or

View File

@@ -1,8 +1,8 @@
private import cpp
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.dataflow.DataFlow
private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate
private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
private import DataFlowPrivate
private import DataFlowUtil
private import DataFlowImplCommon as DataFlowImplCommon
/**

View File

@@ -1,8 +1,10 @@
private import cpp
private import cpp as Cpp
private import DataFlowUtil
private import semmle.code.cpp.ir.IR
private import DataFlowDispatch
private import DataFlowImplConsistency
private import semmle.code.cpp.ir.internal.IRCppLanguage
private import SsaInternals as Ssa
/** Gets the callable in which this node occurs. */
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
@@ -22,7 +24,7 @@ predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos)
* to the callable. Instance arguments (`this` pointer) and read side effects
* on parameters are also included.
*/
abstract class ArgumentNode extends OperandNode {
abstract class ArgumentNode extends Node {
/**
* Holds if this argument occurs at the given position in the given call.
* The instance argument is considered to have index `-1`.
@@ -37,7 +39,7 @@ abstract class ArgumentNode extends OperandNode {
* A data flow node that occurs as the argument to a call, or an
* implicit `this` pointer argument.
*/
private class PrimaryArgumentNode extends ArgumentNode {
private class PrimaryArgumentNode extends ArgumentNode, OperandNode {
override ArgumentOperand op;
PrimaryArgumentNode() { exists(CallInstruction call | op = call.getAnArgumentOperand()) }
@@ -46,49 +48,34 @@ private class PrimaryArgumentNode extends ArgumentNode {
op = call.getArgumentOperand(pos.(DirectPosition).getIndex())
}
override string toString() {
exists(Expr unconverted |
unconverted = op.getDef().getUnconvertedResultExpression() and
result = unconverted.toString()
)
or
// Certain instructions don't map to an unconverted result expression. For these cases
// we fall back to a simpler naming scheme. This can happen in IR-generated constructors.
not exists(op.getDef().getUnconvertedResultExpression()) and
(
result = "Argument " + op.(PositionalArgumentOperand).getIndex()
or
op instanceof ThisArgumentOperand and result = "Argument this"
)
}
override string toStringImpl() { result = argumentOperandToString(op) }
}
/**
* A data flow node representing the read side effect of a call on a
* specific parameter.
*/
private class SideEffectArgumentNode extends ArgumentNode {
override SideEffectOperand op;
ReadSideEffectInstruction read;
private string argumentOperandToString(ArgumentOperand op) {
exists(Expr unconverted |
unconverted = op.getDef().getUnconvertedResultExpression() and
result = unconverted.toString()
)
or
// Certain instructions don't map to an unconverted result expression. For these cases
// we fall back to a simpler naming scheme. This can happen in IR-generated constructors.
not exists(op.getDef().getUnconvertedResultExpression()) and
(
result = "Argument " + op.(PositionalArgumentOperand).getIndex()
or
op instanceof ThisArgumentOperand and result = "Argument this"
)
}
SideEffectArgumentNode() { op = read.getSideEffectOperand() }
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
read.getPrimaryInstruction() = call and
pos.(IndirectionPosition).getIndex() = read.getIndex()
private class SideEffectArgumentNode extends ArgumentNode, SideEffectOperandNode {
override predicate argumentOf(DataFlowCall dfCall, ArgumentPosition pos) {
this.getCallInstruction() = dfCall and
pos.(IndirectionPosition).getArgumentIndex() = this.getArgumentIndex() and
pos.(IndirectionPosition).getIndirectionIndex() = super.getIndirectionIndex()
}
override string toString() {
result = read.getArgumentDef().getUnconvertedResultExpression().toString() + " indirection"
or
// Some instructions don't map to an unconverted result expression. For these cases
// we fall back to a simpler naming scheme. This can happen in IR-generated constructors.
not exists(read.getArgumentDef().getUnconvertedResultExpression()) and
(
if read.getIndex() = -1
then result = "Argument this indirection"
else result = "Argument " + read.getIndex() + " indirection"
)
override string toStringImpl() {
result = argumentOperandToString(this.getAddressOperand()) + " indirection"
}
}
@@ -102,47 +89,57 @@ class Position extends TPosition {
abstract string toString();
}
class DirectPosition extends TDirectPosition {
class DirectPosition extends Position, TDirectPosition {
int index;
DirectPosition() { this = TDirectPosition(index) }
string toString() {
index = -1 and
result = "this"
or
index != -1 and
result = index.toString()
}
override string toString() { if index = -1 then result = "this" else result = index.toString() }
int getIndex() { result = index }
}
class IndirectionPosition extends TIndirectionPosition {
int index;
class IndirectionPosition extends Position, TIndirectionPosition {
int argumentIndex;
int indirectionIndex;
IndirectionPosition() { this = TIndirectionPosition(index) }
IndirectionPosition() { this = TIndirectionPosition(argumentIndex, indirectionIndex) }
string toString() {
index = -1 and
result = "this"
or
index != -1 and
result = index.toString()
override string toString() {
if argumentIndex = -1
then if indirectionIndex > 0 then result = "this indirection" else result = "this"
else
if indirectionIndex > 0
then result = argumentIndex.toString() + " indirection"
else result = argumentIndex.toString()
}
int getIndex() { result = index }
int getArgumentIndex() { result = argumentIndex }
int getIndirectionIndex() { result = indirectionIndex }
}
newtype TPosition =
TDirectPosition(int index) { exists(any(CallInstruction c).getArgument(index)) } or
TIndirectionPosition(int index) {
exists(ReadSideEffectInstruction instr | instr.getIndex() = index)
TIndirectionPosition(int argumentIndex, int indirectionIndex) {
hasOperandAndIndex(_, any(CallInstruction call).getArgumentOperand(argumentIndex),
indirectionIndex)
}
private newtype TReturnKind =
TNormalReturnKind() or
TIndirectReturnKind(ParameterIndex index)
TNormalReturnKind(int index) {
exists(IndirectReturnNode return |
return.getAddressOperand() = any(ReturnValueInstruction r).getReturnAddressOperand() and
index = return.getIndirectionIndex() - 1 // We subtract one because the return loads the value.
)
} or
TIndirectReturnKind(int argumentIndex, int indirectionIndex) {
exists(IndirectReturnNode return, ReturnIndirectionInstruction returnInd |
returnInd.hasIndex(argumentIndex) and
return.getAddressOperand() = returnInd.getSourceAddressOperand() and
indirectionIndex = return.getIndirectionIndex()
)
}
/**
* A return kind. A return kind describes how a value can be returned
@@ -154,53 +151,146 @@ class ReturnKind extends TReturnKind {
}
private class NormalReturnKind extends ReturnKind, TNormalReturnKind {
override string toString() { result = "return" }
int index;
NormalReturnKind() { this = TNormalReturnKind(index) }
override string toString() { result = "indirect return" }
}
private class IndirectReturnKind extends ReturnKind, TIndirectReturnKind {
ParameterIndex index;
int argumentIndex;
int indirectionIndex;
IndirectReturnKind() { this = TIndirectReturnKind(index) }
IndirectReturnKind() { this = TIndirectReturnKind(argumentIndex, indirectionIndex) }
override string toString() { result = "outparam[" + index.toString() + "]" }
override string toString() { result = "indirect outparam[" + argumentIndex.toString() + "]" }
}
/** A data flow node that occurs as the result of a `ReturnStmt`. */
class ReturnNode extends InstructionNode {
Instruction primary;
ReturnNode() {
exists(ReturnValueInstruction ret | instr = ret and primary = ret)
or
exists(ReturnIndirectionInstruction rii | instr = rii and primary = rii)
}
class ReturnNode extends Node instanceof IndirectReturnNode {
/** Gets the kind of this returned value. */
abstract ReturnKind getKind();
}
class ReturnValueNode extends ReturnNode {
override ReturnValueInstruction primary;
override ReturnKind getKind() { result = TNormalReturnKind() }
/**
* This predicate represents an annoying hack that we have to do. We use the
* `ReturnIndirectionInstruction` to determine which variables need flow back
* out of a function. However, the IR will unconditionally create those for a
* variable passed to a function even though the variable was never updated by
* the function. And if a function has too many `ReturnNode`s the dataflow
* library lowers its precision for that function by disabling field flow.
*
* So we those eliminate `ReturnNode`s that would have otherwise been created
* by this unconditional `ReturnIndirectionInstruction` by requiring that there
* must exist an SSA definition of the IR variable in the function.
*/
private predicate hasNonInitializeParameterDef(IRVariable v) {
exists(Ssa::Def def |
not def.getDefiningInstruction() instanceof InitializeParameterInstruction and
v = def.getSourceVariable().getBaseVariable().(Ssa::BaseIRVariable).getIRVariable()
)
}
class ReturnIndirectionNode extends ReturnNode {
override ReturnIndirectionInstruction primary;
class ReturnIndirectionNode extends IndirectReturnNode, ReturnNode {
override ReturnKind getKind() {
exists(int index |
primary.hasIndex(index) and
result = TIndirectReturnKind(index)
exists(int argumentIndex, ReturnIndirectionInstruction returnInd |
returnInd.hasIndex(argumentIndex) and
this.getAddressOperand() = returnInd.getSourceAddressOperand() and
result = TIndirectReturnKind(argumentIndex, this.getIndirectionIndex()) and
hasNonInitializeParameterDef(returnInd.getIRVariable())
)
or
this.getAddressOperand() = any(ReturnValueInstruction r).getReturnAddressOperand() and
result = TNormalReturnKind(this.getIndirectionIndex() - 1)
}
}
private Operand fullyConvertedCallStep(Operand op) {
not exists(getANonConversionUse(op)) and
exists(Instruction instr |
conversionFlow(op, instr, _) and
result = getAUse(instr)
)
}
/**
* Gets the instruction that uses this operand, if the instruction is not
* ignored for dataflow purposes.
*/
private Instruction getUse(Operand op) {
result = op.getUse() and
not Ssa::ignoreOperand(op)
}
/** Gets a use of the instruction `instr` that is not ignored for dataflow purposes. */
Operand getAUse(Instruction instr) {
result = instr.getAUse() and
not Ssa::ignoreOperand(result)
}
/**
* Gets a use of `operand` that is:
* - not ignored for dataflow purposes, and
* - not a conversion-like instruction.
*/
private Instruction getANonConversionUse(Operand operand) {
result = getUse(operand) and
not conversionFlow(_, result, _)
}
/**
* Gets the operand that represents the first use of the value of `call` following
* a sequnce of conversion-like instructions.
*/
predicate operandForfullyConvertedCall(Operand operand, CallInstruction call) {
exists(getANonConversionUse(operand)) and
(
operand = getAUse(call)
or
operand = fullyConvertedCallStep*(getAUse(call))
)
}
/**
* Gets the instruction that represents the first use of the value of `call` following
* a sequnce of conversion-like instructions.
*
* This predicate only holds if there is no suitable operand (i.e., no operand of a non-
* conversion instruction) to use to represent the value of `call` after conversions.
*/
predicate instructionForfullyConvertedCall(Instruction instr, CallInstruction call) {
not operandForfullyConvertedCall(_, call) and
(
// If there is no use of the call then we pick the call instruction
not exists(getAUse(call)) and
instr = call
or
// Otherwise, flow to the first non-conversion use.
exists(Operand operand | operand = fullyConvertedCallStep*(getAUse(call)) |
instr = getANonConversionUse(operand)
)
)
}
/** Holds if `node` represents the output node for `call`. */
private predicate simpleOutNode(Node node, CallInstruction call) {
operandForfullyConvertedCall(node.asOperand(), call)
or
instructionForfullyConvertedCall(node.asInstruction(), call)
}
/** A data flow node that represents the output of a call. */
class OutNode extends InstructionNode {
class OutNode extends Node {
OutNode() {
instr instanceof CallInstruction or
instr instanceof WriteSideEffectInstruction
// Return values not hidden behind indirections
simpleOutNode(this, _)
or
// Return values hidden behind indirections
this instanceof IndirectReturnOutNode
or
// Modified arguments hidden behind indirections
this instanceof IndirectArgumentOutNode
}
/** Gets the underlying call. */
@@ -209,20 +299,28 @@ class OutNode extends InstructionNode {
abstract ReturnKind getReturnKind();
}
private class CallOutNode extends OutNode {
override CallInstruction instr;
private class DirectCallOutNode extends OutNode {
CallInstruction call;
override DataFlowCall getCall() { result = instr }
DirectCallOutNode() { simpleOutNode(this, call) }
override ReturnKind getReturnKind() { result instanceof NormalReturnKind }
override DataFlowCall getCall() { result = call }
override ReturnKind getReturnKind() { result = TNormalReturnKind(0) }
}
private class SideEffectOutNode extends OutNode {
override WriteSideEffectInstruction instr;
private class IndirectCallOutNode extends OutNode, IndirectReturnOutNode {
override DataFlowCall getCall() { result = this.getCallInstruction() }
override DataFlowCall getCall() { result = instr.getPrimaryInstruction() }
override ReturnKind getReturnKind() { result = TNormalReturnKind(this.getIndirectionIndex()) }
}
override ReturnKind getReturnKind() { result = TIndirectReturnKind(instr.getIndex()) }
private class SideEffectOutNode extends OutNode, IndirectArgumentOutNode {
override DataFlowCall getCall() { result = this.getCallInstruction() }
override ReturnKind getReturnKind() {
result = TIndirectReturnKind(this.getArgumentIndex(), this.getIndirectionIndex())
}
}
/**
@@ -230,13 +328,8 @@ private class SideEffectOutNode extends OutNode {
* `kind`.
*/
OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) {
// There should be only one `OutNode` for a given `(call, kind)` pair. Showing the optimizer that
// this is true helps it make better decisions downstream, especially in virtual dispatch.
result =
unique(OutNode outNode |
outNode.getCall() = call and
outNode.getReturnKind() = kind
)
result.getCall() = call and
result.getReturnKind() = kind
}
/**
@@ -245,7 +338,7 @@ OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) {
* global or static variable.
*/
predicate jumpStep(Node n1, Node n2) {
exists(GlobalOrNamespaceVariable v |
exists(Cpp::GlobalOrNamespaceVariable v |
v =
n1.asInstruction()
.(StoreInstruction)
@@ -269,24 +362,92 @@ predicate jumpStep(Node n1, Node n2) {
* Thus, `node2` references an object with a field `f` that contains the
* value of `node1`.
*/
predicate storeStep(StoreNodeInstr node1, FieldContent f, StoreNodeInstr node2) {
exists(FieldAddressInstruction fai |
node1.getInstruction() = fai and
node2.getInstruction() = fai.getObjectAddress() and
f.getField() = fai.getField()
predicate storeStep(Node node1, Content c, PostFieldUpdateNode node2) {
exists(int indirectionIndex1, int numberOfLoads, StoreInstruction store |
nodeHasInstruction(node1, store, pragma[only_bind_into](indirectionIndex1)) and
node2.getIndirectionIndex() = 1 and
numberOfLoadsFromOperand(node2.getFieldAddress(), store.getDestinationAddressOperand(),
numberOfLoads)
|
exists(FieldContent fc | fc = c |
fc.getField() = node2.getUpdatedField() and
fc.getIndirectionIndex() = 1 + indirectionIndex1 + numberOfLoads
)
or
exists(UnionContent uc | uc = c |
uc.getAField() = node2.getUpdatedField() and
uc.getIndirectionIndex() = 1 + indirectionIndex1 + numberOfLoads
)
)
}
/**
* Holds if `operandFrom` flows to `operandTo` using a sequence of conversion-like
* operations and exactly `n` `LoadInstruction` operations.
*/
private predicate numberOfLoadsFromOperandRec(Operand operandFrom, Operand operandTo, int ind) {
exists(LoadInstruction load | load.getSourceAddressOperand() = operandFrom |
operandTo = operandFrom and ind = 0
or
numberOfLoadsFromOperand(load.getAUse(), operandTo, ind - 1)
)
or
exists(Operand op, Instruction instr |
instr = op.getDef() and
conversionFlow(operandFrom, instr, _) and
numberOfLoadsFromOperand(op, operandTo, ind)
)
}
/**
* Holds if `operandFrom` flows to `operandTo` using a sequence of conversion-like
* operations and exactly `n` `LoadInstruction` operations.
*/
private predicate numberOfLoadsFromOperand(Operand operandFrom, Operand operandTo, int n) {
numberOfLoadsFromOperandRec(operandFrom, operandTo, n)
or
not any(LoadInstruction load).getSourceAddressOperand() = operandFrom and
not conversionFlow(operandFrom, _, _) and
operandFrom = operandTo and
n = 0
}
// Needed to join on both an operand and an index at the same time.
pragma[noinline]
predicate nodeHasOperand(Node node, Operand operand, int indirectionIndex) {
node.asOperand() = operand and indirectionIndex = 0
or
hasOperandAndIndex(node, operand, indirectionIndex)
}
// Needed to join on both an instruction and an index at the same time.
pragma[noinline]
predicate nodeHasInstruction(Node node, Instruction instr, int indirectionIndex) {
node.asInstruction() = instr and indirectionIndex = 0
or
hasInstructionAndIndex(node, instr, indirectionIndex)
}
/**
* Holds if data can flow from `node1` to `node2` via a read of `f`.
* Thus, `node1` references an object with a field `f` whose value ends up in
* `node2`.
*/
predicate readStep(ReadNode node1, FieldContent f, ReadNode node2) {
exists(FieldAddressInstruction fai |
node1.getInstruction() = fai.getObjectAddress() and
node2.getInstruction() = fai and
f.getField() = fai.getField()
predicate readStep(Node node1, Content c, Node node2) {
exists(FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2 |
nodeHasOperand(node2, operand, indirectionIndex2) and
nodeHasOperand(node1, fa1.getObjectAddressOperand(), _) and
numberOfLoadsFromOperand(fa1, operand, numberOfLoads)
|
exists(FieldContent fc | fc = c |
fc.getField() = fa1.getField() and
fc.getIndirectionIndex() = indirectionIndex2 + numberOfLoads
)
or
exists(UnionContent uc | uc = c |
uc.getAField() = fa1.getField() and
uc.getIndirectionIndex() = indirectionIndex2 + numberOfLoads
)
)
}
@@ -304,20 +465,20 @@ predicate clearsContent(Node n, Content c) {
predicate expectsContent(Node n, ContentSet c) { none() }
/** Gets the type of `n` used for type pruning. */
IRType getNodeType(Node n) {
DataFlowType getNodeType(Node n) {
suppressUnusedNode(n) and
result instanceof IRVoidType // stub implementation
result instanceof VoidType // stub implementation
}
/** Gets a string representation of a type returned by `getNodeType`. */
string ppReprType(IRType t) { none() } // stub implementation
string ppReprType(DataFlowType t) { none() } // stub implementation
/**
* Holds if `t1` and `t2` are compatible, that is, whether data can flow from
* a node of type `t1` to a node of type `t2`.
*/
pragma[inline]
predicate compatibleTypes(IRType t1, IRType t2) {
predicate compatibleTypes(DataFlowType t1, DataFlowType t2) {
any() // stub implementation
}
@@ -337,11 +498,11 @@ class CastNode extends Node {
* data-flow library discards call contexts and inserts a node in the big-step
* relation used for human-readable path explanations.
*/
class DataFlowCallable = Declaration;
class DataFlowCallable = Cpp::Declaration;
class DataFlowExpr = Expr;
class DataFlowType = IRType;
class DataFlowType = Type;
/** A function call relevant for data flow. */
class DataFlowCall extends CallInstruction {
@@ -368,17 +529,7 @@ class Unit extends TUnit {
}
/** Holds if `n` should be hidden from path explanations. */
predicate nodeIsHidden(Node n) {
n instanceof OperandNode and not n instanceof ArgumentNode
or
StoreNodeFlow::flowThrough(n, _) and
not StoreNodeFlow::flowOutOf(n, _) and
not StoreNodeFlow::flowInto(_, n)
or
ReadNodeFlow::flowThrough(n, _) and
not ReadNodeFlow::flowOutOf(n, _) and
not ReadNodeFlow::flowInto(_, n)
}
predicate nodeIsHidden(Node n) { n instanceof OperandNode and not n instanceof ArgumentNode }
class LambdaCallKind = Unit;

View File

@@ -5,41 +5,89 @@
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.dataflow.DataFlow
private import DataFlowUtil
private import SsaInternals as Ssa
/**
* Gets the instruction that goes into `input` for `call`.
*/
Operand callInput(CallInstruction call, FunctionInput input) {
DataFlow::Node callInput(CallInstruction call, FunctionInput input) {
// An argument or qualifier
exists(int index |
result = call.getArgumentOperand(index) and
result.asOperand() = call.getArgumentOperand(index) and
input.isParameterOrQualifierAddress(index)
)
or
// A value pointed to by an argument or qualifier
exists(ReadSideEffectInstruction read |
result = read.getSideEffectOperand() and
read.getPrimaryInstruction() = call and
input.isParameterDerefOrQualifierObject(read.getIndex())
exists(int index, int indirectionIndex |
hasOperandAndIndex(result, call.getArgumentOperand(index), indirectionIndex) and
input.isParameterDerefOrQualifierObject(index, indirectionIndex)
)
or
exists(int ind |
result = getIndirectReturnOutNode(call, ind) and
input.isReturnValueDeref(ind)
)
}
/**
* Gets the instruction that holds the `output` for `call`.
*/
Instruction callOutput(CallInstruction call, FunctionOutput output) {
Node callOutput(CallInstruction call, FunctionOutput output) {
// The return value
result = call and
result.asInstruction() = call and
output.isReturnValue()
or
// The side effect of a call on the value pointed to by an argument or qualifier
exists(WriteSideEffectInstruction effect |
result = effect and
effect.getPrimaryInstruction() = call and
output.isParameterDerefOrQualifierObject(effect.getIndex())
exists(int index, int indirectionIndex |
result.(IndirectArgumentOutNode).getArgumentIndex() = index and
result.(IndirectArgumentOutNode).getIndirectionIndex() = indirectionIndex and
result.(IndirectArgumentOutNode).getCallInstruction() = call and
output.isParameterDerefOrQualifierObject(index, indirectionIndex)
)
or
// TODO: modify this when we get return value dereferences
result = call and
output.isReturnValueDeref()
exists(int ind |
result = getIndirectReturnOutNode(call, ind) and
output.isReturnValueDeref(ind)
)
}
DataFlow::Node callInput(CallInstruction call, FunctionInput input, int d) {
exists(DataFlow::Node n | n = callInput(call, input) and d > 0 |
// An argument or qualifier
hasOperandAndIndex(result, n.asOperand(), d)
or
exists(Operand operand, int indirectionIndex |
// A value pointed to by an argument or qualifier
hasOperandAndIndex(n, operand, indirectionIndex) and
hasOperandAndIndex(result, operand, indirectionIndex + d)
)
)
}
private IndirectReturnOutNode getIndirectReturnOutNode(CallInstruction call, int d) {
result.getCallInstruction() = call and
result.getIndirectionIndex() = d
}
/**
* Gets the instruction that holds the `output` for `call`.
*/
bindingset[d]
Node callOutput(CallInstruction call, FunctionOutput output, int d) {
exists(DataFlow::Node n | n = callOutput(call, output) and d > 0 |
// The return value
result = getIndirectReturnOutNode(n.asInstruction(), d)
or
// If there isn't an indirect out node for the call with indirection `d` then
// we conflate this with the underlying `CallInstruction`.
not exists(getIndirectReturnOutNode(call, d)) and
n.asInstruction() = result.asInstruction()
or
// The side effect of a call on the value pointed to by an argument or qualifier
exists(Operand operand, int indirectionIndex |
Ssa::outNodeHasAddressAndIndex(n, operand, indirectionIndex) and
Ssa::outNodeHasAddressAndIndex(result, operand, indirectionIndex + d)
)
)
}

View File

@@ -0,0 +1,270 @@
import cpp as Cpp
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.internal.IRCppLanguage
private import semmle.code.cpp.ir.implementation.raw.internal.SideEffects as SideEffects
private import DataFlowImplCommon as DataFlowImplCommon
private import DataFlowUtil
/**
* Holds if `operand` is an operand that is not used by the dataflow library.
* Ignored operands are not recognizd as uses by SSA, and they don't have a
* corresponding `(Indirect)OperandNode`.
*/
predicate ignoreOperand(Operand operand) {
operand = any(Instruction instr | ignoreInstruction(instr)).getAnOperand() or
operand = any(Instruction instr | ignoreInstruction(instr)).getAUse() or
operand instanceof MemoryOperand
}
/**
* Holds if `instr` is an instruction that is not used by the dataflow library.
* Ignored instructions are not recognized as reads/writes by SSA, and they
* don't have a corresponding `(Indirect)InstructionNode`.
*/
predicate ignoreInstruction(Instruction instr) {
DataFlowImplCommon::forceCachingInSameStage() and
(
instr instanceof WriteSideEffectInstruction or
instr instanceof PhiInstruction or
instr instanceof ReadSideEffectInstruction or
instr instanceof ChiInstruction or
instr instanceof InitializeIndirectionInstruction
)
}
/**
* Gets the C++ type of `this` in the member function `f`.
* The result is a glvalue if `isGLValue` is true, and
* a prvalue if `isGLValue` is false.
*/
bindingset[isGLValue]
private CppType getThisType(Cpp::MemberFunction f, boolean isGLValue) {
result.hasType(f.getTypeOfThis(), isGLValue)
}
/**
* Gets the C++ type of the instruction `i`.
*
* This is equivalent to `i.getResultLanguageType()` with the exception
* of instructions that directly references a `this` IRVariable. In this
* case, `i.getResultLanguageType()` gives an unknown type, whereas the
* predicate gives the expected type (i.e., a potentially cv-qualified
* type `A*` where `A` is the declaring type of the member function that
* contains `i`).
*/
cached
CppType getResultLanguageType(Instruction i) {
if i.(VariableAddressInstruction).getIRVariable() instanceof IRThisVariable
then
if i.isGLValue()
then result = getThisType(i.getEnclosingFunction(), true)
else result = getThisType(i.getEnclosingFunction(), false)
else result = i.getResultLanguageType()
}
/**
* Gets the C++ type of the operand `operand`.
* This is equivalent to the type of the operand's defining instruction.
*
* See `getResultLanguageType` for a description of this behavior.
*/
CppType getLanguageType(Operand operand) { result = getResultLanguageType(operand.getDef()) }
/**
* Gets the maximum number of indirections a glvalue of type `type` can have.
* For example:
* - If `type = int`, the result is 1
* - If `type = MyStruct`, the result is 1
* - If `type = char*`, the result is 2
*/
int getMaxIndirectionsForType(Type type) {
result = countIndirectionsForCppType(getTypeForGLValue(type))
}
/**
* Gets the maximum number of indirections a value of type `type` can have.
*
* Note that this predicate is intended to be called on unspecified types
* (i.e., `countIndirections(e.getUnspecifiedType())`).
*/
private int countIndirections(Type t) {
result =
1 +
countIndirections([t.(Cpp::PointerType).getBaseType(), t.(Cpp::ReferenceType).getBaseType()])
or
not t instanceof Cpp::PointerType and
not t instanceof Cpp::ReferenceType and
result = 0
}
/**
* Gets the maximum number of indirections a value of C++
* type `langType` can have.
*/
int countIndirectionsForCppType(LanguageType langType) {
exists(Type type | langType.hasType(type, true) |
result = 1 + countIndirections(type.getUnspecifiedType())
)
or
exists(Type type | langType.hasType(type, false) |
result = countIndirections(type.getUnspecifiedType())
)
}
/**
* A `CallInstruction` that calls an allocation function such
* as `malloc` or `operator new`.
*/
class AllocationInstruction extends CallInstruction {
AllocationInstruction() { this.getStaticCallTarget() instanceof Cpp::AllocationFunction }
}
/**
* Holds if `i` is a base instruction that starts a sequence of uses
* of some variable that SSA can handle.
*
* This is either when `i` is a `VariableAddressInstruction` or when
* `i` is a fresh allocation produced by an `AllocationInstruction`.
*/
private predicate isSourceVariableBase(Instruction i) {
i instanceof VariableAddressInstruction or i instanceof AllocationInstruction
}
/**
* Holds if the value pointed to by `operand` can potentially be
* modified be the caller.
*/
predicate isModifiableByCall(ArgumentOperand operand) {
exists(CallInstruction call, int index, CppType type |
type = getLanguageType(operand) and
call.getArgumentOperand(index) = operand and
if index = -1
then not call.getStaticCallTarget() instanceof Cpp::ConstMemberFunction
else not SideEffects::isConstPointerLike(any(Type t | type.hasType(t, _)))
)
}
cached
private module Cached {
/**
* Holds if `op` is a use of an SSA variable rooted at `base` with `ind` number
* of indirections.
*
* `certain` is `true` if the operand is guaranteed to read the variable, and
* `indirectionIndex` specifies the number of loads required to read the variable.
*/
cached
predicate isUse(boolean certain, Operand op, Instruction base, int ind, int indirectionIndex) {
not ignoreOperand(op) and
certain = true and
exists(LanguageType type, int m, int ind0 |
type = getLanguageType(op) and
m = countIndirectionsForCppType(type) and
isUseImpl(op, base, ind0) and
ind = ind0 + [0 .. m] and
indirectionIndex = ind - ind0
)
}
/**
* Holds if `operand` is a use of an SSA variable rooted at `base`, and the
* path from `base` to `operand` passes through `ind` load-like instructions.
*/
private predicate isUseImpl(Operand operand, Instruction base, int ind) {
DataFlowImplCommon::forceCachingInSameStage() and
ind = 0 and
operand.getDef() = base and
isSourceVariableBase(base)
or
exists(Operand mid, Instruction instr |
isUseImpl(mid, base, ind) and
instr = operand.getDef() and
conversionFlow(mid, instr, false)
)
or
exists(int ind0 |
isUseImpl(operand.getDef().(LoadInstruction).getSourceAddressOperand(), base, ind0)
or
isUseImpl(operand.getDef().(InitializeParameterInstruction).getAnOperand(), base, ind0)
|
ind0 = ind - 1
)
}
/**
* Holds if `address` is an address of an SSA variable rooted at `base`,
* and `instr` is a definition of the SSA variable with `ind` number of indirections.
*
* `certain` is `true` if `instr` is guaranteed to write to the variable, and
* `indirectionIndex` specifies the number of loads required to read the variable
* after the write operation.
*/
cached
predicate isDef(
boolean certain, Instruction instr, Operand address, Instruction base, int ind,
int indirectionIndex
) {
certain = true and
exists(int ind0, CppType type, int m |
address =
[
instr.(StoreInstruction).getDestinationAddressOperand(),
instr.(InitializeParameterInstruction).getAnOperand(),
instr.(InitializeDynamicAllocationInstruction).getAllocationAddressOperand(),
instr.(UninitializedInstruction).getAnOperand()
]
|
isDefImpl(address, base, ind0) and
type = getLanguageType(address) and
m = countIndirectionsForCppType(type) and
ind = ind0 + [1 .. m] and
indirectionIndex = ind - (ind0 + 1)
)
}
/**
* Holds if `address` is a use of an SSA variable rooted at `base`, and the
* path from `base` to `address` passes through `ind` load-like instructions.
*
* Note: Unlike `isUseImpl`, this predicate recurses through pointer-arithmetic
* instructions.
*/
private predicate isDefImpl(Operand address, Instruction base, int ind) {
DataFlowImplCommon::forceCachingInSameStage() and
ind = 0 and
address.getDef() = base and
isSourceVariableBase(base)
or
exists(Operand mid, Instruction instr |
isDefImpl(mid, base, ind) and
instr = address.getDef() and
conversionFlow(mid, instr, _)
)
or
exists(int ind0 |
isDefImpl(address.getDef().(LoadInstruction).getSourceAddressOperand(), base, ind0)
or
isDefImpl(address.getDef().(InitializeParameterInstruction).getAnOperand(), base, ind0)
|
ind0 = ind - 1
)
}
}
import Cached
/**
* Inputs to the shared SSA library's parameterized module that is shared
* between the SSA pruning stage, and the final SSA stage.
*/
module InputSigCommon {
class BasicBlock = IRBlock;
BasicBlock getImmediateBasicBlockDominator(BasicBlock bb) { result.immediatelyDominates(bb) }
BasicBlock getABasicBlockSuccessor(BasicBlock bb) { result = bb.getASuccessor() }
class ExitBasicBlock extends IRBlock {
ExitBasicBlock() { this.getLastInstruction() instanceof ExitFunctionInstruction }
}
}

View File

@@ -3,6 +3,9 @@ private import semmle.code.cpp.ir.dataflow.DataFlow
private import ModelUtil
private import semmle.code.cpp.models.interfaces.DataFlow
private import semmle.code.cpp.models.interfaces.SideEffect
private import DataFlowUtil
private import DataFlowPrivate
private import semmle.code.cpp.models.Models
/**
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
@@ -23,26 +26,26 @@ cached
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction())
or
instructionToOperandTaintStep(nodeFrom.asInstruction(), nodeTo.asOperand())
}
private predicate instructionToOperandTaintStep(Instruction fromInstr, Operand toOperand) {
// Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
// We only do this in certain cases:
// 1. The instruction's result must not be conflated, and
// 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
// this is array types and union types. This matches the other two cases of element-to-object flow in
// `DefaultTaintTracking`.
toOperand.getAnyDef() = fromInstr and
not fromInstr.isResultConflated() and
(
fromInstr.getResultType() instanceof ArrayType or
fromInstr.getResultType() instanceof Union
modeledTaintStep(nodeFrom, nodeTo)
or
// Flow from `op` to `*op`.
exists(Operand operand, int indirectionIndex |
nodeHasOperand(nodeFrom, operand, indirectionIndex) and
nodeHasOperand(nodeTo, operand, indirectionIndex - 1)
)
or
exists(ReadSideEffectInstruction readInstr |
fromInstr = readInstr.getArgumentDef() and
toOperand = readInstr.getSideEffectOperand()
// Flow from `instr` to `*instr`.
exists(Instruction instr, int indirectionIndex |
nodeHasInstruction(nodeFrom, instr, indirectionIndex) and
nodeHasInstruction(nodeTo, instr, indirectionIndex - 1)
)
or
// Flow from (the indirection of) an operand of a pointer arithmetic instruction to the
// indirection of the pointer arithmetic instruction. This provides flow from `source`
// in `x[source]` to the result of the associated load instruction.
exists(PointerArithmeticInstruction pai, int indirectionIndex |
nodeHasOperand(nodeFrom, pai.getAnOperand(), pragma[only_bind_into](indirectionIndex)) and
hasInstructionAndIndex(nodeTo, pai, indirectionIndex + 1)
)
}
@@ -61,13 +64,13 @@ private predicate operandToInstructionTaintStep(Operand opFrom, Instruction inst
instrTo instanceof BitwiseInstruction
or
instrTo instanceof PointerArithmeticInstruction
or
// The `CopyInstruction` case is also present in non-taint data flow, but
// that uses `getDef` rather than `getAnyDef`. For taint, we want flow
// from a definition of `myStruct` to a `myStruct.myField` expression.
instrTo instanceof CopyInstruction
)
or
// The `CopyInstruction` case is also present in non-taint data flow, but
// that uses `getDef` rather than `getAnyDef`. For taint, we want flow
// from a definition of `myStruct` to a `myStruct.myField` expression.
instrTo.(LoadInstruction).getSourceAddressOperand() = opFrom
or
// Unary instructions tend to preserve enough information in practice that we
// want taint to flow through.
// The exception is `FieldAddressInstruction`. Together with the rules below for
@@ -81,40 +84,6 @@ private predicate operandToInstructionTaintStep(Operand opFrom, Instruction inst
or
instrTo.(FieldAddressInstruction).getField().getDeclaringType() instanceof Union
)
or
// Flow from an element to an array or union that contains it.
instrTo.(ChiInstruction).getPartialOperand() = opFrom and
not instrTo.isResultConflated() and
exists(Type t | instrTo.getResultLanguageType().hasType(t, false) |
t instanceof Union
or
t instanceof ArrayType
)
or
// Until we have flow through indirections across calls, we'll take flow out
// of the indirection and into the argument.
// When we get proper flow through indirections across calls, this code can be
// moved to `adjusedSink` or possibly into the `DataFlow::ExprNode` class.
exists(ReadSideEffectInstruction read |
read.getSideEffectOperand() = opFrom and
read.getArgumentDef() = instrTo
)
or
// Until we have from through indirections across calls, we'll take flow out
// of the parameter and into its indirection.
// `InitializeIndirectionInstruction` only has a single operand: the address of the
// value whose indirection we are initializing. When initializing an indirection of a parameter `p`,
// the IR looks like this:
// ```
// m1 = InitializeParameter[p] : &r1
// r2 = Load[p] : r2, m1
// m3 = InitializeIndirection[p] : &r2
// ```
// So by having flow from `r2` to `m3` we're enabling flow from `m1` to `m3`. This relies on the
// `LoadOperand`'s overlap being exact.
instrTo.(InitializeIndirectionInstruction).getAnOperand() = opFrom
or
modeledTaintStep(opFrom, instrTo)
}
/**
@@ -167,21 +136,39 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
* Holds if taint can flow from `instrIn` to `instrOut` through a call to a
* modeled function.
*/
predicate modeledTaintStep(Operand nodeIn, Instruction nodeOut) {
predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
// Normal taint steps
exists(CallInstruction call, TaintFunction func, FunctionInput modelIn, FunctionOutput modelOut |
call.getStaticCallTarget() = func and
func.hasTaintFlow(modelIn, modelOut)
|
(
nodeIn = callInput(call, modelIn)
or
exists(int n |
modelIn.isParameterDerefOrQualifierObject(n) and
if n = -1
then nodeIn = callInput(call, any(InQualifierObject inQualifier))
then nodeIn = callInput(call, any(InQualifierAddress inQualifier))
else nodeIn = callInput(call, any(InParameter inParam | inParam.getIndex() = n))
)
) and
nodeOut = callOutput(call, modelOut) and
call.getStaticCallTarget() = func and
func.hasTaintFlow(modelIn, modelOut)
nodeOut = callOutput(call, modelOut)
or
exists(int d |
nodeIn = callInput(call, modelIn, d)
or
exists(int n |
d = 1 and
modelIn.isParameterDerefOrQualifierObject(n) and
if n = -1
then nodeIn = callInput(call, any(InQualifierAddress inQualifier))
else nodeIn = callInput(call, any(InParameter inParam | inParam.getIndex() = n))
)
|
call.getStaticCallTarget() = func and
func.hasTaintFlow(modelIn, modelOut) and
nodeOut = callOutput(call, modelOut, d)
)
)
or
// Taint flow from one argument to another and data flow from an argument to a
@@ -205,12 +192,11 @@ predicate modeledTaintStep(Operand nodeIn, Instruction nodeOut) {
// Taint flow from a pointer argument to an output, when the model specifies flow from the deref
// to that output, but the deref is not modeled in the IR for the caller.
exists(
CallInstruction call, ReadSideEffectInstruction read, Function func, FunctionInput modelIn,
FunctionOutput modelOut
CallInstruction call, DataFlow::SideEffectOperandNode indirectArgument, Function func,
FunctionInput modelIn, FunctionOutput modelOut
|
read.getSideEffectOperand() = callInput(call, modelIn) and
read.getArgumentDef() = nodeIn.getDef() and
not read.getSideEffect().isResultModeled() and
indirectArgument = callInput(call, modelIn) and
indirectArgument.getAddressOperand() = nodeIn.asOperand() and
call.getStaticCallTarget() = func and
(
func.(DataFlowFunction).hasDataFlow(modelIn, modelOut)

View File

@@ -0,0 +1,314 @@
/**
* This module defines an initial SSA pruning stage that doesn't take
* indirections into account.
*/
private import codeql.ssa.Ssa as SsaImplCommon
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
private import semmle.code.cpp.models.interfaces.Allocation as Alloc
private import semmle.code.cpp.models.interfaces.DataFlow as DataFlow
private import semmle.code.cpp.ir.implementation.raw.internal.SideEffects as SideEffects
private import semmle.code.cpp.ir.internal.IRCppLanguage
private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate
private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
private import semmle.code.cpp.ir.dataflow.internal.SsaInternalsCommon
private module SourceVariables {
newtype TBaseSourceVariable =
// Each IR variable gets its own source variable
TBaseIRVariable(IRVariable var) or
// Each allocation gets its own source variable
TBaseCallVariable(AllocationInstruction call)
abstract class BaseSourceVariable extends TBaseSourceVariable {
abstract string toString();
abstract DataFlowType getType();
}
class BaseIRVariable extends BaseSourceVariable, TBaseIRVariable {
IRVariable var;
IRVariable getIRVariable() { result = var }
BaseIRVariable() { this = TBaseIRVariable(var) }
override string toString() { result = var.toString() }
override DataFlowType getType() { result = var.getType() }
}
class BaseCallVariable extends BaseSourceVariable, TBaseCallVariable {
AllocationInstruction call;
BaseCallVariable() { this = TBaseCallVariable(call) }
AllocationInstruction getCallInstruction() { result = call }
override string toString() { result = call.toString() }
override DataFlowType getType() { result = call.getResultType() }
}
private newtype TSourceVariable =
TSourceIRVariable(BaseIRVariable baseVar) or
TCallVariable(AllocationInstruction call)
abstract class SourceVariable extends TSourceVariable {
abstract string toString();
abstract BaseSourceVariable getBaseVariable();
}
class SourceIRVariable extends SourceVariable, TSourceIRVariable {
BaseIRVariable var;
SourceIRVariable() { this = TSourceIRVariable(var) }
IRVariable getIRVariable() { result = var.getIRVariable() }
override BaseIRVariable getBaseVariable() { result.getIRVariable() = this.getIRVariable() }
override string toString() { result = this.getIRVariable().toString() }
}
class CallVariable extends SourceVariable, TCallVariable {
AllocationInstruction call;
CallVariable() { this = TCallVariable(call) }
AllocationInstruction getCall() { result = call }
override BaseCallVariable getBaseVariable() { result.getCallInstruction() = call }
override string toString() { result = "Call" }
}
}
import SourceVariables
private newtype TDefOrUseImpl =
TDefImpl(Operand address) { isDef(_, _, address, _, _, _) } or
TUseImpl(Operand operand) {
isUse(_, operand, _, _, _) and
not isDef(_, _, operand, _, _, _)
}
abstract private class DefOrUseImpl extends TDefOrUseImpl {
/** Gets a textual representation of this element. */
abstract string toString();
/** Gets the block of this definition or use. */
abstract IRBlock getBlock();
/** Holds if this definition or use has index `index` in block `block`. */
abstract predicate hasIndexInBlock(IRBlock block, int index);
final predicate hasIndexInBlock(IRBlock block, int index, SourceVariable sv) {
this.hasIndexInBlock(block, index) and
sv = this.getSourceVariable()
}
/** Gets the location of this element. */
abstract Cpp::Location getLocation();
abstract Instruction getBase();
final BaseSourceVariable getBaseSourceVariable() {
exists(IRVariable var |
result.(BaseIRVariable).getIRVariable() = var and
instructionHasIRVariable(this.getBase(), var)
)
or
result.(BaseCallVariable).getCallInstruction() = this.getBase()
}
/** Gets the variable that is defined or used. */
final SourceVariable getSourceVariable() {
exists(BaseSourceVariable v |
sourceVariableHasBaseAndIndex(result, v) and
defOrUseHasSourceVariable(this, v)
)
}
}
pragma[noinline]
private predicate instructionHasIRVariable(VariableAddressInstruction vai, IRVariable var) {
vai.getIRVariable() = var
}
private predicate defOrUseHasSourceVariable(DefOrUseImpl defOrUse, BaseSourceVariable bv) {
defHasSourceVariable(defOrUse, bv)
or
useHasSourceVariable(defOrUse, bv)
}
pragma[noinline]
private predicate defHasSourceVariable(DefImpl def, BaseSourceVariable bv) {
bv = def.getBaseSourceVariable()
}
pragma[noinline]
private predicate useHasSourceVariable(UseImpl use, BaseSourceVariable bv) {
bv = use.getBaseSourceVariable()
}
pragma[noinline]
private predicate sourceVariableHasBaseAndIndex(SourceVariable v, BaseSourceVariable bv) {
v.getBaseVariable() = bv
}
class DefImpl extends DefOrUseImpl, TDefImpl {
Operand address;
DefImpl() { this = TDefImpl(address) }
override Instruction getBase() { isDef(_, _, address, result, _, _) }
Operand getAddressOperand() { result = address }
Instruction getDefiningInstruction() { isDef(_, result, address, _, _, _) }
override string toString() { result = address.toString() }
override IRBlock getBlock() { result = this.getDefiningInstruction().getBlock() }
override Cpp::Location getLocation() { result = this.getDefiningInstruction().getLocation() }
final override predicate hasIndexInBlock(IRBlock block, int index) {
this.getDefiningInstruction() = block.getInstruction(index)
}
predicate isCertain() { isDef(true, _, address, _, _, _) }
}
class UseImpl extends DefOrUseImpl, TUseImpl {
Operand operand;
UseImpl() { this = TUseImpl(operand) }
Operand getOperand() { result = operand }
override string toString() { result = operand.toString() }
final override predicate hasIndexInBlock(IRBlock block, int index) {
operand.getUse() = block.getInstruction(index)
}
final override IRBlock getBlock() { result = operand.getUse().getBlock() }
final override Cpp::Location getLocation() { result = operand.getLocation() }
override Instruction getBase() { isUse(_, operand, result, _, _) }
predicate isCertain() { isUse(true, operand, _, _, _) }
}
private module SsaInput implements SsaImplCommon::InputSig {
import InputSigCommon
import SourceVariables
/**
* Holds if the `i`'th write in block `bb` writes to the variable `v`.
* `certain` is `true` if the write is guaranteed to overwrite the entire variable.
*/
predicate variableWrite(IRBlock bb, int i, SourceVariable v, boolean certain) {
DataFlowImplCommon::forceCachingInSameStage() and
exists(DefImpl def | def.hasIndexInBlock(bb, i, v) |
if def.isCertain() then certain = true else certain = false
)
}
/**
* Holds if the `i`'th read in block `bb` reads to the variable `v`.
* `certain` is `true` if the read is guaranteed.
*/
predicate variableRead(IRBlock bb, int i, SourceVariable v, boolean certain) {
exists(UseImpl use | use.hasIndexInBlock(bb, i, v) |
if use.isCertain() then certain = true else certain = false
)
}
}
private newtype TSsaDefOrUse =
TDefOrUse(DefOrUseImpl defOrUse) {
defOrUse instanceof UseImpl
or
// If `defOrUse` is a definition we only include it if the
// SSA library concludes that it's live after the write.
exists(Definition def, SourceVariable sv, IRBlock bb, int i |
def.definesAt(sv, bb, i) and
defOrUse.(DefImpl).hasIndexInBlock(bb, i, sv)
)
} or
TPhi(PhiNode phi)
abstract private class SsaDefOrUse extends TSsaDefOrUse {
string toString() { result = "SsaDefOrUse" }
DefOrUseImpl asDefOrUse() { none() }
PhiNode asPhi() { none() }
abstract Location getLocation();
}
class DefOrUse extends TDefOrUse, SsaDefOrUse {
DefOrUseImpl defOrUse;
DefOrUse() { this = TDefOrUse(defOrUse) }
final override DefOrUseImpl asDefOrUse() { result = defOrUse }
final override Location getLocation() { result = defOrUse.getLocation() }
final SourceVariable getSourceVariable() { result = defOrUse.getSourceVariable() }
}
class Phi extends TPhi, SsaDefOrUse {
PhiNode phi;
Phi() { this = TPhi(phi) }
final override PhiNode asPhi() { result = phi }
final override Location getLocation() { result = phi.getBasicBlock().getLocation() }
}
class UseOrPhi extends SsaDefOrUse {
UseOrPhi() {
this.asDefOrUse() instanceof UseImpl
or
this instanceof Phi
}
final override Location getLocation() {
result = this.asDefOrUse().getLocation() or result = this.(Phi).getLocation()
}
override string toString() {
result = this.asDefOrUse().toString()
or
this instanceof Phi and
result = "Phi"
}
}
class Def extends DefOrUse {
override DefImpl defOrUse;
Operand getAddressOperand() { result = defOrUse.getAddressOperand() }
Instruction getAddress() { result = this.getAddressOperand().getDef() }
Instruction getDefiningInstruction() { result = defOrUse.getDefiningInstruction() }
override string toString() { result = this.asDefOrUse().toString() + " (def)" }
}
private module SsaImpl = SsaImplCommon::Make<SsaInput>;
class PhiNode = SsaImpl::PhiNode;
class Definition = SsaImpl::Definition;

View File

@@ -13,7 +13,6 @@
import cpp
private import semmle.code.cpp.ir.dataflow.DataFlow::DataFlow as IRDataFlow
private import semmle.code.cpp.dataflow.DataFlow::DataFlow as AstDataFlow
import TestUtilities.InlineExpectationsTest
class IRFlowTest extends InlineExpectationsTest {
@@ -41,34 +40,3 @@ class IRFlowTest extends InlineExpectationsTest {
)
}
}
class AstFlowTest extends InlineExpectationsTest {
AstFlowTest() { this = "ASTFlowTest" }
override string getARelevantTag() { result = "ast" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
exists(
AstDataFlow::Node source, AstDataFlow::Node sink, AstDataFlow::Configuration conf, int n
|
tag = "ast" and
conf.hasFlow(source, sink) and
n = strictcount(AstDataFlow::Node otherSource | conf.hasFlow(otherSource, sink)) and
(
n = 1 and value = ""
or
// If there is more than one source for this sink
// we specify the source location explicitly.
n > 1 and
value =
source.getLocation().getStartLine().toString() + ":" +
source.getLocation().getStartColumn()
) and
location = sink.getLocation() and
element = sink.toString()
)
}
}
/** DEPRECATED: Alias for AstFlowTest */
deprecated class ASTFlowTest = AstFlowTest;

View File

@@ -1,4 +1,4 @@
private import semmle.code.cpp.dataflow.DataFlow
private import semmle.code.cpp.dataflow.old.DataFlow2
private import DataFlow
class AstConf extends Configuration {

View File

@@ -1,5 +1,5 @@
private import semmle.code.cpp.ir.dataflow.DataFlow as IR
private import semmle.code.cpp.dataflow.DataFlow as AST
private import semmle.code.cpp.dataflow.old.DataFlow as AST
private import cpp
private newtype TNode =

View File

@@ -1 +1 @@
import semmle.code.cpp.dataflow.internal.DataFlowImplConsistency::Consistency
import semmle.code.cpp.dataflow.old.internal.DataFlowImplConsistency::Consistency