C++: Throw away the old way of doing store steps using memory edges. Instead, we introduce a StoreNode IPA branch that does store steps and instead use the shared SSA library to transfer flow into these nodes before a store step, and out of them following a sequence of store steps.

This commit is contained in:
Mathias Vorreiter Pedersen
2021-10-20 15:14:03 +01:00
parent 1842fed7a2
commit 5ebefe2d30
3 changed files with 268 additions and 129 deletions

View File

@@ -184,108 +184,17 @@ OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) {
*/
predicate jumpStep(Node n1, Node n2) { none() }
private predicate fieldStoreStepNoChi(Node node1, FieldContent f, PostUpdateNode node2) {
exists(StoreInstruction store, Class c |
store = node2.asInstruction() and
store.getSourceValueOperand() = node1.asOperand() and
getWrittenField(store, f.(FieldContent).getAField(), c) and
f.hasOffset(c, _, _)
)
}
private FieldAddressInstruction getFieldInstruction(Instruction instr) {
result = instr or
result = instr.(CopyValueInstruction).getUnary()
}
pragma[noinline]
private predicate getWrittenField(Instruction instr, Field f, Class c) {
exists(FieldAddressInstruction fa |
fa =
getFieldInstruction([
instr.(StoreInstruction).getDestinationAddress(),
instr.(WriteSideEffectInstruction).getDestinationAddress()
]) and
f = fa.getField() and
c = f.getDeclaringType()
)
}
private predicate fieldStoreStepChi(Node node1, FieldContent f, PostUpdateNode node2) {
exists(ChiPartialOperand operand, ChiInstruction chi |
chi.getPartialOperand() = operand and
node1.asOperand() = operand and
node2.asInstruction() = chi and
exists(Class c |
c = chi.getResultType() and
exists(int startBit, int endBit |
chi.getUpdatedInterval(startBit, endBit) and
f.hasOffset(c, startBit, endBit)
)
or
getWrittenField(operand.getDef(), f.getAField(), c) and
f.hasOffset(c, _, _)
)
)
}
private predicate arrayStoreStepChi(Node node1, ArrayContent a, PostUpdateNode node2) {
exists(a) and
exists(ChiPartialOperand operand, ChiInstruction chi, StoreInstruction store |
chi.getPartialOperand() = operand and
store = operand.getDef() and
node1.asOperand() = operand and
// This `ChiInstruction` will always have a non-conflated result because both `ArrayStoreNode`
// and `PointerStoreNode` require it in their characteristic predicates.
node2.asInstruction() = chi and
(
// `x[i] = taint()`
// This matches the characteristic predicate in `ArrayStoreNode`.
store.getDestinationAddress() instanceof PointerAddInstruction
or
// `*p = taint()`
// This matches the characteristic predicate in `PointerStoreNode`.
store.getDestinationAddress().(CopyValueInstruction).getUnary() instanceof LoadInstruction
)
)
}
/**
* Holds if data can flow from `node1` to `node2` via an assignment to `f`.
* Thus, `node2` references an object with a field `f` that contains the
* value of `node1`.
*/
predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
fieldStoreStepNoChi(node1, f, node2) or
fieldStoreStepChi(node1, f, node2) or
arrayStoreStepChi(node1, f, node2) or
fieldStoreStepAfterArraySuppression(node1, f, node2)
}
// This predicate pushes the correct `FieldContent` onto the access path when the
// `suppressArrayRead` predicate has popped off an `ArrayContent`.
private predicate fieldStoreStepAfterArraySuppression(
Node node1, FieldContent f, PostUpdateNode node2
) {
exists(WriteSideEffectInstruction write, ChiInstruction chi, Class c |
not chi.isResultConflated() and
node1.asInstruction() = chi and
node2.asInstruction() = chi and
chi.getPartial() = write and
getWrittenField(write, f.getAField(), c) and
f.hasOffset(c, _, _)
)
}
bindingset[result, i]
private int unbindInt(int i) { i <= result and i >= result }
pragma[noinline]
private predicate getLoadedField(LoadInstruction load, Field f, Class c) {
exists(FieldAddressInstruction fa |
fa = load.getSourceAddress() and
f = fa.getField() and
c = f.getDeclaringType()
predicate storeStep(StoreNode node1, FieldContent f, StoreNode node2) {
exists(FieldAddressInstruction fai |
not fai.getObjectAddress().getResultType().stripType() instanceof Union and
node1.getInstruction() = fai and
node2.getInstruction() = fai.getObjectAddress() and
f.getField() = fai.getField()
)
}

View File

@@ -10,6 +10,8 @@ private import semmle.code.cpp.ir.ValueNumbering
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.controlflow.IRGuards
private import semmle.code.cpp.models.interfaces.DataFlow
private import DataFlowPrivate
private import Ssa as Ssa
cached
private module Cached {
@@ -17,12 +19,28 @@ private module Cached {
newtype TIRDataFlowNode =
TInstructionNode(Instruction i) or
TOperandNode(Operand op) or
TVariableNode(Variable var)
TVariableNode(Variable var) or
TStoreNodeInstr(Instruction i) { Ssa::explicitWrite(_, _, i) } or
TStoreNodeOperand(ArgumentOperand op) { Ssa::explicitWrite(_, _, op.getDef()) }
cached
predicate localFlowStepCached(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo)
}
private predicate needsPostReadNode(Instruction iFrom) {
// If the instruction generates an address that flows to a load.
Ssa::addressFlowTC(iFrom, Ssa::getSourceAddress(_)) and
(
// And it is either a field address
iFrom instanceof FieldAddressInstruction
or
// Or it is instruction that either uses or is used for an address that needs a post read node.
exists(Instruction mid | needsPostReadNode(mid) |
Ssa::addressFlow(mid, iFrom) or Ssa::addressFlow(iFrom, mid)
)
)
}
}
private import Cached
@@ -180,6 +198,99 @@ class OperandNode extends Node, TOperandNode {
override string toString() { result = this.getOperand().toString() }
}
/**
* INTERNAL: do not use.
*
* A `StoreNode` is a node that has been (or is about to be) the
* source or target of a `storeStep`.
*/
abstract class StoreNode extends Node {
/** Gets the underlying instruction, if any. */
Instruction getInstruction() { none() }
/** Gets the underlying operand, if any. */
Operand getOperand() { none() }
/** Holds if this node should receive flow from `addr`. */
abstract predicate flowInto(Instruction addr);
override Declaration getEnclosingCallable() { result = this.getFunction() }
/** Holds if this `StoreNode` is the root of the address computation used by a store operation. */
predicate isTerminal() {
not exists(this.getAPredecessor()) and
not storeStep(this, _, _)
}
/** Gets the store operation that uses the address computed by this `StoreNode`. */
abstract Instruction getStoreInstruction();
/** Holds if the store operation associated with this `StoreNode` overwrites the entire variable. */
final predicate isCertain() { Ssa::explicitWrite(true, this.getStoreInstruction(), _) }
/**
* Gets the `StoreNode` that computes the address used by this `StoreNode`.
* The boolean `readEffect` is `true` if the predecessor is accessed through the
* address of a `ReadSideEffectInstruction`.
*/
abstract StoreNode getAPredecessor();
/** The inverse of `StoreNode.getAPredecessor`. */
final StoreNode getASuccessor() { result.getAPredecessor() = this }
}
private class StoreNodeInstr extends StoreNode, TStoreNodeInstr {
Instruction instr;
StoreNodeInstr() { this = TStoreNodeInstr(instr) }
override predicate flowInto(Instruction addr) { this.getInstruction() = addr }
override Instruction getInstruction() { result = instr }
override Function getFunction() { result = this.getInstruction().getEnclosingFunction() }
override IRType getType() { result = this.getInstruction().getResultIRType() }
override Location getLocation() { result = this.getInstruction().getLocation() }
override string toString() {
result = instructionNode(this.getInstruction()).toString() + " [store]"
}
override Instruction getStoreInstruction() {
Ssa::explicitWrite(_, result, this.getInstruction())
}
override StoreNode getAPredecessor() {
Ssa::addressFlow(result.getInstruction(), this.getInstruction())
}
}
private class StoreNodeOperand extends StoreNode, TStoreNodeOperand {
ArgumentOperand operand;
StoreNodeOperand() { this = TStoreNodeOperand(operand) }
override predicate flowInto(Instruction addr) { this.getOperand().getDef() = addr }
override Operand getOperand() { result = operand }
override Function getFunction() { result = operand.getDef().getEnclosingFunction() }
override IRType getType() { result = operand.getIRType() }
override Location getLocation() { result = operand.getLocation() }
override string toString() { result = operandNode(this.getOperand()).toString() + " [store]" }
override WriteSideEffectInstruction getStoreInstruction() {
Ssa::explicitWrite(_, result, operand.getDef())
}
override StoreNode getAPredecessor() { operand.getDef() = result.getInstruction() }
}
/**
* An expression, viewed as a node in a data flow graph.
*/
@@ -313,15 +424,14 @@ deprecated class UninitializedNode extends Node {
* Nodes corresponding to AST elements, for example `ExprNode`, usually refer
* to the value before the update with the exception of `ClassInstanceExpr`,
* which represents the value after the constructor has run.
*
* This class exists to match the interface used by Java. There are currently no non-abstract
* classes that extend it. When we implement field flow, we can revisit this.
*/
abstract class PostUpdateNode extends InstructionNode {
abstract class PostUpdateNode extends Node {
/**
* Gets the node before the state update.
*/
abstract Node getPreUpdateNode();
override string toString() { result = this.getPreUpdateNode() + " [post update]" }
}
/**
@@ -614,6 +724,13 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
or
// Instruction -> Operand flow
simpleOperandLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asOperand())
or
// Flow into, through, and out of store nodes
StoreNodeFlow::flowInto(nodeFrom, nodeTo)
or
StoreNodeFlow::flowThrough(nodeFrom, nodeTo)
or
StoreNodeFlow::flowOutOf(nodeFrom, nodeTo)
}
pragma[noinline]
@@ -631,6 +748,28 @@ private predicate isSingleFieldClass(Type type, Operand op) {
c.getSize() = size and
getFieldSizeOfClass(c, type, size)
)
private module StoreNodeFlow {
/** Holds if the store node `nodeTo` should receive flow from `nodeFrom`. */
predicate flowInto(Node nodeFrom, StoreNode nodeTo) {
nodeTo.flowInto(Ssa::getDestinationAddress(nodeFrom.asInstruction()))
}
/** Holds if the store node `nodeTo` should receive flow from `nodeFom`. */
predicate flowThrough(StoreNode nFrom, StoreNode nodeTo) {
// Flow through a post update node that doesn't need a store step.
not storeStep(nFrom, _, _) and
nodeTo.getASuccessor() = nFrom
}
/**
* Holds if flow should leave the store node `nodeFrom` and enter the node `nodeTo`.
* This happens because we have traversed an entire chain of field dereferences
* after a store operation.
*/
predicate flowOutOf(StoreNode nFrom, Node nodeTo) {
nFrom.isTerminal() and
Ssa::ssaFlow(nFrom, nodeTo)
}
}
private predicate simpleOperandLocalFlowStep(Instruction iFrom, Operand opTo) {
@@ -788,25 +927,10 @@ predicate localInstructionFlow(Instruction e1, Instruction e2) {
*/
predicate localExprFlow(Expr e1, Expr e2) { localFlow(exprNode(e1), exprNode(e2)) }
/**
* Gets a field corresponding to the bit range `[startBit..endBit)` of class `c`, if any.
*/
private Field getAField(Class c, int startBit, int endBit) {
result.getDeclaringType() = c and
startBit = 8 * result.getByteOffset() and
endBit = 8 * result.getType().getSize() + startBit
or
exists(Field f, Class cInner |
f = c.getAField() and
cInner = f.getUnderlyingType() and
result = getAField(cInner, startBit - 8 * f.getByteOffset(), endBit - 8 * f.getByteOffset())
)
}
private newtype TContent =
TFieldContent(Class c, int startBit, int endBit) { exists(getAField(c, startBit, endBit)) } or
TCollectionContent() or
TArrayContent()
TFieldContent(Field f) or
TCollectionContent() or // Not used in C/C++
TArrayContent() // Not used in C/C++.
/**
* A description of the way data may be stored inside an object. Examples
@@ -824,18 +948,13 @@ class Content extends TContent {
/** A reference through an instance field. */
class FieldContent extends Content, TFieldContent {
Class c;
int startBit;
int endBit;
Field f;
FieldContent() { this = TFieldContent(c, startBit, endBit) }
FieldContent() { this = TFieldContent(f) }
// Ensure that there's just 1 result for `toString`.
override string toString() { result = min(Field f | f = getAField() | f.toString()) }
override string toString() { result = f.toString() }
predicate hasOffset(Class cl, int start, int end) { cl = c and start = startBit and end = endBit }
Field getAField() { result = getAField(c, startBit, endBit) }
Field getField() { result = f }
}
/** A reference through an array. */

View File

@@ -301,3 +301,114 @@ predicate explicitWrite(boolean certain, Instruction instr, Instruction address)
addressFlowTC(address, instr.(WriteSideEffectInstruction).getDestinationAddress()) and
certain = false
}
cached
private module Cached {
private predicate defUseFlow(Node nodeFrom, Node nodeTo) {
exists(IRBlock bb1, int i1, IRBlock bb2, int i2, DefOrUse defOrUse, Use use |
defOrUse.hasRankInBlock(bb1, i1) and
use.hasRankInBlock(bb2, i2) and
adjacentDefRead(_, bb1, i1, bb2, i2) and
nodeFrom.asInstruction() = toInstruction(defOrUse) and
flowOutOfAddressStep(use.getOperand(), nodeTo)
)
}
private predicate fromStoreNode(StoreNode nodeFrom, Node nodeTo) {
// Def-use flow from a `StoreNode`.
exists(IRBlock bb1, int i1, IRBlock bb2, int i2, Def def, Use use |
nodeFrom.isTerminal() and
def.getInstruction() = nodeFrom.getStoreInstruction() and
def.hasRankInBlock(bb1, i1) and
adjacentDefRead(_, bb1, i1, bb2, i2) and
use.hasRankInBlock(bb2, i2) and
flowOutOfAddressStep(use.getOperand(), nodeTo)
)
}
/**
* Holds if `nodeFrom` is a read or write, and `nTo` is the next subsequent read of the variable
* written (or read) by `storeOrRead`.
*/
cached
predicate ssaFlow(Node nodeFrom, Node nodeTo) {
// Def-use/use-use flow from an `InstructionNode` to an `OperandNode`.
defUseFlow(nodeFrom, nodeTo)
or
// Def-use flow from a `StoreNode` to an `OperandNode`.
fromStoreNode(nodeFrom, nodeTo)
}
private predicate flowOutOfAddressStep(Operand operand, Node nTo) {
exists(StoreNode storeNode, Instruction def |
storeNode = nTo and
def = operand.getDef()
|
storeNode.isTerminal() and
not addressFlow(def, _) and
// Only transfer flow to a store node if it doesn't immediately overwrite the address
// we've just written to.
explicitWrite(false, storeNode.getStoreInstruction(), def)
)
or
operand = getSourceAddressOperand(nTo.asInstruction())
or
exists(ReturnIndirectionInstruction ret |
ret.getSourceAddressOperand() = operand and
ret = nTo.asInstruction()
)
or
exists(ReturnValueInstruction ret |
ret.getReturnAddressOperand() = operand and
nTo.asInstruction() = ret
)
or
exists(CallInstruction call, int index, ReadSideEffectInstruction read |
call.getArgumentOperand(index) = operand and
read = getSideEffectFor(call, index) and
nTo.asOperand() = read.getSideEffectOperand()
)
or
exists(CopyInstruction copy |
not exists(getSourceAddressOperand(copy)) and
copy.getSourceValueOperand() = operand and
flowOutOfAddressStep(copy.getAUse(), nTo)
)
or
exists(ConvertInstruction convert |
convert.getUnaryOperand() = operand and
flowOutOfAddressStep(convert.getAUse(), nTo)
)
or
exists(CheckedConvertOrNullInstruction convert |
convert.getUnaryOperand() = operand and
flowOutOfAddressStep(convert.getAUse(), nTo)
)
or
exists(InheritanceConversionInstruction convert |
convert.getUnaryOperand() = operand and
flowOutOfAddressStep(convert.getAUse(), nTo)
)
or
exists(PointerArithmeticInstruction arith |
arith.getLeftOperand() = operand and
flowOutOfAddressStep(arith.getAUse(), nTo)
)
or
// Flow through a modelled function that has parameter -> return value flow.
exists(
CallInstruction call, DataFlow::DataFlowFunction func, int index,
DataFlow::FunctionInput input, DataFlow::FunctionOutput output
|
call.getStaticCallTarget() = func and
call.getArgumentOperand(index) = operand and
not getSideEffectFor(call, index) instanceof ReadSideEffectInstruction and
func.hasDataFlow(input, output) and
input.isParameter(index) and
output.isReturnValue() and
flowOutOfAddressStep(call.getAUse(), nTo)
)
}
}
import Cached