diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index 73bf72a3643..4d4271941b4 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -184,108 +184,17 @@ OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { */ predicate jumpStep(Node n1, Node n2) { none() } -private predicate fieldStoreStepNoChi(Node node1, FieldContent f, PostUpdateNode node2) { - exists(StoreInstruction store, Class c | - store = node2.asInstruction() and - store.getSourceValueOperand() = node1.asOperand() and - getWrittenField(store, f.(FieldContent).getAField(), c) and - f.hasOffset(c, _, _) - ) -} - -private FieldAddressInstruction getFieldInstruction(Instruction instr) { - result = instr or - result = instr.(CopyValueInstruction).getUnary() -} - -pragma[noinline] -private predicate getWrittenField(Instruction instr, Field f, Class c) { - exists(FieldAddressInstruction fa | - fa = - getFieldInstruction([ - instr.(StoreInstruction).getDestinationAddress(), - instr.(WriteSideEffectInstruction).getDestinationAddress() - ]) and - f = fa.getField() and - c = f.getDeclaringType() - ) -} - -private predicate fieldStoreStepChi(Node node1, FieldContent f, PostUpdateNode node2) { - exists(ChiPartialOperand operand, ChiInstruction chi | - chi.getPartialOperand() = operand and - node1.asOperand() = operand and - node2.asInstruction() = chi and - exists(Class c | - c = chi.getResultType() and - exists(int startBit, int endBit | - chi.getUpdatedInterval(startBit, endBit) and - f.hasOffset(c, startBit, endBit) - ) - or - getWrittenField(operand.getDef(), f.getAField(), c) and - f.hasOffset(c, _, _) - ) - ) -} - -private predicate arrayStoreStepChi(Node node1, ArrayContent a, PostUpdateNode node2) { - exists(a) and - exists(ChiPartialOperand operand, ChiInstruction chi, StoreInstruction store | - chi.getPartialOperand() = operand and - store = operand.getDef() and - node1.asOperand() = operand and - // This `ChiInstruction` will always have a non-conflated result because both `ArrayStoreNode` - // and `PointerStoreNode` require it in their characteristic predicates. - node2.asInstruction() = chi and - ( - // `x[i] = taint()` - // This matches the characteristic predicate in `ArrayStoreNode`. - store.getDestinationAddress() instanceof PointerAddInstruction - or - // `*p = taint()` - // This matches the characteristic predicate in `PointerStoreNode`. - store.getDestinationAddress().(CopyValueInstruction).getUnary() instanceof LoadInstruction - ) - ) -} - /** * Holds if data can flow from `node1` to `node2` via an assignment to `f`. * Thus, `node2` references an object with a field `f` that contains the * value of `node1`. */ -predicate storeStep(Node node1, Content f, PostUpdateNode node2) { - fieldStoreStepNoChi(node1, f, node2) or - fieldStoreStepChi(node1, f, node2) or - arrayStoreStepChi(node1, f, node2) or - fieldStoreStepAfterArraySuppression(node1, f, node2) -} - -// This predicate pushes the correct `FieldContent` onto the access path when the -// `suppressArrayRead` predicate has popped off an `ArrayContent`. -private predicate fieldStoreStepAfterArraySuppression( - Node node1, FieldContent f, PostUpdateNode node2 -) { - exists(WriteSideEffectInstruction write, ChiInstruction chi, Class c | - not chi.isResultConflated() and - node1.asInstruction() = chi and - node2.asInstruction() = chi and - chi.getPartial() = write and - getWrittenField(write, f.getAField(), c) and - f.hasOffset(c, _, _) - ) -} - -bindingset[result, i] -private int unbindInt(int i) { i <= result and i >= result } - -pragma[noinline] -private predicate getLoadedField(LoadInstruction load, Field f, Class c) { - exists(FieldAddressInstruction fa | - fa = load.getSourceAddress() and - f = fa.getField() and - c = f.getDeclaringType() +predicate storeStep(StoreNode node1, FieldContent f, StoreNode node2) { + exists(FieldAddressInstruction fai | + not fai.getObjectAddress().getResultType().stripType() instanceof Union and + node1.getInstruction() = fai and + node2.getInstruction() = fai.getObjectAddress() and + f.getField() = fai.getField() ) } diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 9e7a95e010d..449e680b850 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -10,6 +10,8 @@ private import semmle.code.cpp.ir.ValueNumbering private import semmle.code.cpp.ir.IR private import semmle.code.cpp.controlflow.IRGuards private import semmle.code.cpp.models.interfaces.DataFlow +private import DataFlowPrivate +private import Ssa as Ssa cached private module Cached { @@ -17,12 +19,28 @@ private module Cached { newtype TIRDataFlowNode = TInstructionNode(Instruction i) or TOperandNode(Operand op) or - TVariableNode(Variable var) + TVariableNode(Variable var) or + TStoreNodeInstr(Instruction i) { Ssa::explicitWrite(_, _, i) } or + TStoreNodeOperand(ArgumentOperand op) { Ssa::explicitWrite(_, _, op.getDef()) } cached predicate localFlowStepCached(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo) } + + private predicate needsPostReadNode(Instruction iFrom) { + // If the instruction generates an address that flows to a load. + Ssa::addressFlowTC(iFrom, Ssa::getSourceAddress(_)) and + ( + // And it is either a field address + iFrom instanceof FieldAddressInstruction + or + // Or it is instruction that either uses or is used for an address that needs a post read node. + exists(Instruction mid | needsPostReadNode(mid) | + Ssa::addressFlow(mid, iFrom) or Ssa::addressFlow(iFrom, mid) + ) + ) + } } private import Cached @@ -180,6 +198,99 @@ class OperandNode extends Node, TOperandNode { override string toString() { result = this.getOperand().toString() } } +/** + * INTERNAL: do not use. + * + * A `StoreNode` is a node that has been (or is about to be) the + * source or target of a `storeStep`. + */ +abstract class StoreNode extends Node { + /** Gets the underlying instruction, if any. */ + Instruction getInstruction() { none() } + + /** Gets the underlying operand, if any. */ + Operand getOperand() { none() } + + /** Holds if this node should receive flow from `addr`. */ + abstract predicate flowInto(Instruction addr); + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + /** Holds if this `StoreNode` is the root of the address computation used by a store operation. */ + predicate isTerminal() { + not exists(this.getAPredecessor()) and + not storeStep(this, _, _) + } + + /** Gets the store operation that uses the address computed by this `StoreNode`. */ + abstract Instruction getStoreInstruction(); + + /** Holds if the store operation associated with this `StoreNode` overwrites the entire variable. */ + final predicate isCertain() { Ssa::explicitWrite(true, this.getStoreInstruction(), _) } + + /** + * Gets the `StoreNode` that computes the address used by this `StoreNode`. + * The boolean `readEffect` is `true` if the predecessor is accessed through the + * address of a `ReadSideEffectInstruction`. + */ + abstract StoreNode getAPredecessor(); + + /** The inverse of `StoreNode.getAPredecessor`. */ + final StoreNode getASuccessor() { result.getAPredecessor() = this } +} + +private class StoreNodeInstr extends StoreNode, TStoreNodeInstr { + Instruction instr; + + StoreNodeInstr() { this = TStoreNodeInstr(instr) } + + override predicate flowInto(Instruction addr) { this.getInstruction() = addr } + + override Instruction getInstruction() { result = instr } + + override Function getFunction() { result = this.getInstruction().getEnclosingFunction() } + + override IRType getType() { result = this.getInstruction().getResultIRType() } + + override Location getLocation() { result = this.getInstruction().getLocation() } + + override string toString() { + result = instructionNode(this.getInstruction()).toString() + " [store]" + } + + override Instruction getStoreInstruction() { + Ssa::explicitWrite(_, result, this.getInstruction()) + } + + override StoreNode getAPredecessor() { + Ssa::addressFlow(result.getInstruction(), this.getInstruction()) + } +} + +private class StoreNodeOperand extends StoreNode, TStoreNodeOperand { + ArgumentOperand operand; + + StoreNodeOperand() { this = TStoreNodeOperand(operand) } + + override predicate flowInto(Instruction addr) { this.getOperand().getDef() = addr } + + override Operand getOperand() { result = operand } + + override Function getFunction() { result = operand.getDef().getEnclosingFunction() } + + override IRType getType() { result = operand.getIRType() } + + override Location getLocation() { result = operand.getLocation() } + + override string toString() { result = operandNode(this.getOperand()).toString() + " [store]" } + + override WriteSideEffectInstruction getStoreInstruction() { + Ssa::explicitWrite(_, result, operand.getDef()) + } + + override StoreNode getAPredecessor() { operand.getDef() = result.getInstruction() } +} + /** * An expression, viewed as a node in a data flow graph. */ @@ -313,15 +424,14 @@ deprecated class UninitializedNode extends Node { * Nodes corresponding to AST elements, for example `ExprNode`, usually refer * to the value before the update with the exception of `ClassInstanceExpr`, * which represents the value after the constructor has run. - * - * This class exists to match the interface used by Java. There are currently no non-abstract - * classes that extend it. When we implement field flow, we can revisit this. */ -abstract class PostUpdateNode extends InstructionNode { +abstract class PostUpdateNode extends Node { /** * Gets the node before the state update. */ abstract Node getPreUpdateNode(); + + override string toString() { result = this.getPreUpdateNode() + " [post update]" } } /** @@ -614,6 +724,13 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { or // Instruction -> Operand flow simpleOperandLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asOperand()) + or + // Flow into, through, and out of store nodes + StoreNodeFlow::flowInto(nodeFrom, nodeTo) + or + StoreNodeFlow::flowThrough(nodeFrom, nodeTo) + or + StoreNodeFlow::flowOutOf(nodeFrom, nodeTo) } pragma[noinline] @@ -631,6 +748,28 @@ private predicate isSingleFieldClass(Type type, Operand op) { c.getSize() = size and getFieldSizeOfClass(c, type, size) ) +private module StoreNodeFlow { + /** Holds if the store node `nodeTo` should receive flow from `nodeFrom`. */ + predicate flowInto(Node nodeFrom, StoreNode nodeTo) { + nodeTo.flowInto(Ssa::getDestinationAddress(nodeFrom.asInstruction())) + } + + /** Holds if the store node `nodeTo` should receive flow from `nodeFom`. */ + predicate flowThrough(StoreNode nFrom, StoreNode nodeTo) { + // Flow through a post update node that doesn't need a store step. + not storeStep(nFrom, _, _) and + nodeTo.getASuccessor() = nFrom + } + + /** + * Holds if flow should leave the store node `nodeFrom` and enter the node `nodeTo`. + * This happens because we have traversed an entire chain of field dereferences + * after a store operation. + */ + predicate flowOutOf(StoreNode nFrom, Node nodeTo) { + nFrom.isTerminal() and + Ssa::ssaFlow(nFrom, nodeTo) + } } private predicate simpleOperandLocalFlowStep(Instruction iFrom, Operand opTo) { @@ -788,25 +927,10 @@ predicate localInstructionFlow(Instruction e1, Instruction e2) { */ predicate localExprFlow(Expr e1, Expr e2) { localFlow(exprNode(e1), exprNode(e2)) } -/** - * Gets a field corresponding to the bit range `[startBit..endBit)` of class `c`, if any. - */ -private Field getAField(Class c, int startBit, int endBit) { - result.getDeclaringType() = c and - startBit = 8 * result.getByteOffset() and - endBit = 8 * result.getType().getSize() + startBit - or - exists(Field f, Class cInner | - f = c.getAField() and - cInner = f.getUnderlyingType() and - result = getAField(cInner, startBit - 8 * f.getByteOffset(), endBit - 8 * f.getByteOffset()) - ) -} - private newtype TContent = - TFieldContent(Class c, int startBit, int endBit) { exists(getAField(c, startBit, endBit)) } or - TCollectionContent() or - TArrayContent() + TFieldContent(Field f) or + TCollectionContent() or // Not used in C/C++ + TArrayContent() // Not used in C/C++. /** * A description of the way data may be stored inside an object. Examples @@ -824,18 +948,13 @@ class Content extends TContent { /** A reference through an instance field. */ class FieldContent extends Content, TFieldContent { - Class c; - int startBit; - int endBit; + Field f; - FieldContent() { this = TFieldContent(c, startBit, endBit) } + FieldContent() { this = TFieldContent(f) } - // Ensure that there's just 1 result for `toString`. - override string toString() { result = min(Field f | f = getAField() | f.toString()) } + override string toString() { result = f.toString() } - predicate hasOffset(Class cl, int start, int end) { cl = c and start = startBit and end = endBit } - - Field getAField() { result = getAField(c, startBit, endBit) } + Field getField() { result = f } } /** A reference through an array. */ diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/Ssa.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/Ssa.qll index 57ba19f71f0..b4cfa89c22f 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/Ssa.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/Ssa.qll @@ -301,3 +301,114 @@ predicate explicitWrite(boolean certain, Instruction instr, Instruction address) addressFlowTC(address, instr.(WriteSideEffectInstruction).getDestinationAddress()) and certain = false } + +cached +private module Cached { + private predicate defUseFlow(Node nodeFrom, Node nodeTo) { + exists(IRBlock bb1, int i1, IRBlock bb2, int i2, DefOrUse defOrUse, Use use | + defOrUse.hasRankInBlock(bb1, i1) and + use.hasRankInBlock(bb2, i2) and + adjacentDefRead(_, bb1, i1, bb2, i2) and + nodeFrom.asInstruction() = toInstruction(defOrUse) and + flowOutOfAddressStep(use.getOperand(), nodeTo) + ) + } + + private predicate fromStoreNode(StoreNode nodeFrom, Node nodeTo) { + // Def-use flow from a `StoreNode`. + exists(IRBlock bb1, int i1, IRBlock bb2, int i2, Def def, Use use | + nodeFrom.isTerminal() and + def.getInstruction() = nodeFrom.getStoreInstruction() and + def.hasRankInBlock(bb1, i1) and + adjacentDefRead(_, bb1, i1, bb2, i2) and + use.hasRankInBlock(bb2, i2) and + flowOutOfAddressStep(use.getOperand(), nodeTo) + ) + } + + /** + * Holds if `nodeFrom` is a read or write, and `nTo` is the next subsequent read of the variable + * written (or read) by `storeOrRead`. + */ + cached + predicate ssaFlow(Node nodeFrom, Node nodeTo) { + // Def-use/use-use flow from an `InstructionNode` to an `OperandNode`. + defUseFlow(nodeFrom, nodeTo) + or + // Def-use flow from a `StoreNode` to an `OperandNode`. + fromStoreNode(nodeFrom, nodeTo) + } + + private predicate flowOutOfAddressStep(Operand operand, Node nTo) { + exists(StoreNode storeNode, Instruction def | + storeNode = nTo and + def = operand.getDef() + | + storeNode.isTerminal() and + not addressFlow(def, _) and + // Only transfer flow to a store node if it doesn't immediately overwrite the address + // we've just written to. + explicitWrite(false, storeNode.getStoreInstruction(), def) + ) + or + operand = getSourceAddressOperand(nTo.asInstruction()) + or + exists(ReturnIndirectionInstruction ret | + ret.getSourceAddressOperand() = operand and + ret = nTo.asInstruction() + ) + or + exists(ReturnValueInstruction ret | + ret.getReturnAddressOperand() = operand and + nTo.asInstruction() = ret + ) + or + exists(CallInstruction call, int index, ReadSideEffectInstruction read | + call.getArgumentOperand(index) = operand and + read = getSideEffectFor(call, index) and + nTo.asOperand() = read.getSideEffectOperand() + ) + or + exists(CopyInstruction copy | + not exists(getSourceAddressOperand(copy)) and + copy.getSourceValueOperand() = operand and + flowOutOfAddressStep(copy.getAUse(), nTo) + ) + or + exists(ConvertInstruction convert | + convert.getUnaryOperand() = operand and + flowOutOfAddressStep(convert.getAUse(), nTo) + ) + or + exists(CheckedConvertOrNullInstruction convert | + convert.getUnaryOperand() = operand and + flowOutOfAddressStep(convert.getAUse(), nTo) + ) + or + exists(InheritanceConversionInstruction convert | + convert.getUnaryOperand() = operand and + flowOutOfAddressStep(convert.getAUse(), nTo) + ) + or + exists(PointerArithmeticInstruction arith | + arith.getLeftOperand() = operand and + flowOutOfAddressStep(arith.getAUse(), nTo) + ) + or + // Flow through a modelled function that has parameter -> return value flow. + exists( + CallInstruction call, DataFlow::DataFlowFunction func, int index, + DataFlow::FunctionInput input, DataFlow::FunctionOutput output + | + call.getStaticCallTarget() = func and + call.getArgumentOperand(index) = operand and + not getSideEffectFor(call, index) instanceof ReadSideEffectInstruction and + func.hasDataFlow(input, output) and + input.isParameter(index) and + output.isReturnValue() and + flowOutOfAddressStep(call.getAUse(), nTo) + ) + } +} + +import Cached