diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index 88d7dd9faf1..31dba3a245b 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -16,27 +16,30 @@ private import semmle.code.cpp.dataflow.ExternalFlow as External cached private module Cached { cached - module Nodes0 { - cached - newtype TIRDataFlowNode0 = - TInstructionNode0(Instruction i) { - not Ssa::ignoreInstruction(i) and - not exists(Operand op | - not Ssa::ignoreOperand(op) and i = Ssa::getIRRepresentationOfOperand(op) - ) and - // We exclude `void`-typed instructions because they cannot contain data. - // However, if the instruction is a glvalue, and their type is `void`, then the result - // type of the instruction is really `void*`, and thus we still want to have a dataflow - // node for it. - (not i.getResultType() instanceof VoidType or i.isGLValue()) - } or - TMultipleUseOperandNode0(Operand op) { - not Ssa::ignoreOperand(op) and not exists(Ssa::getIRRepresentationOfOperand(op)) - } or - TSingleUseOperandNode0(Operand op) { - not Ssa::ignoreOperand(op) and exists(Ssa::getIRRepresentationOfOperand(op)) - } - } + newtype TIRDataFlowNode0 = + TInstructionNode0(Instruction i) { + not Ssa::ignoreInstruction(i) and + not exists(Operand op | + not Ssa::ignoreOperand(op) and i = Ssa::getIRRepresentationOfOperand(op) + ) and + // We exclude `void`-typed instructions because they cannot contain data. + // However, if the instruction is a glvalue, and their type is `void`, then the result + // type of the instruction is really `void*`, and thus we still want to have a dataflow + // node for it. + (not i.getResultType() instanceof VoidType or i.isGLValue()) + } or + TMultipleUseOperandNode0(Operand op) { + not Ssa::ignoreOperand(op) and not exists(Ssa::getIRRepresentationOfOperand(op)) + } or + TSingleUseOperandNode0(Operand op) { + not Ssa::ignoreOperand(op) and exists(Ssa::getIRRepresentationOfOperand(op)) + } + + cached + newtype TContentApprox = + TFieldApproxContent(string s) { fieldHasApproxName(_, s) } or + TUnionApproxContent(string s) { unionHasApproxName(_, s) } or + TElementApproxContent() /** * Gets an additional term that is added to the `join` and `branch` computations to reflect @@ -59,16 +62,155 @@ private module Cached { result = countNumberOfBranchesUsingParameter(switch, p) ) } + + cached + newtype TDataFlowCallable = + TSourceCallable(Cpp::Declaration decl) or + TSummarizedCallable(FlowSummaryImpl::Public::SummarizedCallable c) + + cached + newtype TDataFlowCall = + TNormalCall(CallInstruction call) or + TSummaryCall( + FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver + ) { + FlowSummaryImpl::Private::summaryCallbackRange(c, receiver) + } + + /** + * Holds if data can flow from `node1` to `node2` in a way that loses the + * calling context. For example, this would happen with flow through a + * global or static variable. + */ + cached + predicate jumpStep(Node n1, Node n2) { + exists(GlobalLikeVariable v | + exists(Ssa::GlobalUse globalUse | + v = globalUse.getVariable() and + n1.(FinalGlobalValue).getGlobalUse() = globalUse + | + globalUse.getIndirection() = getMinIndirectionForGlobalUse(globalUse) and + v = n2.asVariable() + or + v = n2.asIndirectVariable(globalUse.getIndirection()) + ) + or + exists(Ssa::GlobalDef globalDef | + v = globalDef.getVariable() and + n2.(InitialGlobalValue).getGlobalDef() = globalDef + | + globalDef.getIndirection() = getMinIndirectionForGlobalDef(globalDef) and + v = n1.asVariable() + or + v = n1.asIndirectVariable(globalDef.getIndirection()) + ) + ) + or + // models-as-data summarized flow + FlowSummaryImpl::Private::Steps::summaryJumpStep(n1.(FlowSummaryNode).getSummaryNode(), + n2.(FlowSummaryNode).getSummaryNode()) + } + + /** + * Holds if data can flow from `node1` to `node2` via an assignment to `f`. + * Thus, `node2` references an object with a field `f` that contains the + * value of `node1`. + * + * The boolean `certain` is true if the destination address does not involve + * any pointer arithmetic, and false otherwise. + */ + cached + predicate storeStepImpl(Node node1, Content c, Node node2, boolean certain) { + exists( + PostFieldUpdateNode postFieldUpdate, int indirectionIndex1, int numberOfLoads, + StoreInstruction store, FieldContent fc + | + postFieldUpdate = node2 and + fc = c and + nodeHasInstruction(node1, pragma[only_bind_into](store), + pragma[only_bind_into](indirectionIndex1)) and + postFieldUpdate.getIndirectionIndex() = 1 and + numberOfLoadsFromOperand(postFieldUpdate.getFieldAddress(), + store.getDestinationAddressOperand(), numberOfLoads, certain) and + fc.getAField() = postFieldUpdate.getUpdatedField() and + getIndirectionIndexLate(fc) = 1 + indirectionIndex1 + numberOfLoads + ) + or + // models-as-data summarized flow + FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), c, + node2.(FlowSummaryNode).getSummaryNode()) and + certain = true + } + + /** + * Holds if data can flow from `node1` to `node2` via an assignment to `f`. + * Thus, `node2` references an object with a field `f` that contains the + * value of `node1`. + */ + cached + predicate storeStep(Node node1, ContentSet c, Node node2) { storeStepImpl(node1, c, node2, _) } + + /** + * Holds if data can flow from `node1` to `node2` via a read of `f`. + * Thus, `node1` references an object with a field `f` whose value ends up in + * `node2`. + */ + cached + predicate readStep(Node node1, ContentSet c, Node node2) { + exists( + FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2, FieldContent fc + | + fc = c and + nodeHasOperand(node2, operand, indirectionIndex2) and + // The `1` here matches the `node2.getIndirectionIndex() = 1` conjunct + // in `storeStep`. + nodeHasOperand(node1, fa1.getObjectAddressOperand(), 1) and + numberOfLoadsFromOperand(fa1, operand, numberOfLoads, _) and + fc.getAField() = fa1.getField() and + getIndirectionIndexLate(fc) = indirectionIndex2 + numberOfLoads + ) + or + // models-as-data summarized flow + FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c, + node2.(FlowSummaryNode).getSummaryNode()) + } + + /** + * Holds if values stored inside content `c` are cleared at node `n`. + */ + cached + predicate clearsContent(Node n, ContentSet c) { + n = + any(PostUpdateNode pun, Content d | + d.impliesClearOf(c) and storeStepImpl(_, d, pun, true) + | + pun + ).getPreUpdateNode() and + ( + not exists(Operand op, Cpp::Operation p | + n.(IndirectOperand).hasOperandAndIndirectionIndex(op, _) and + ( + p instanceof Cpp::AssignPointerAddExpr or + p instanceof Cpp::AssignPointerSubExpr or + p instanceof Cpp::CrementOperation + ) + | + p.getAnOperand() = op.getUse().getAst() + ) + or + forex(PostUpdateNode pun, Content d | + pragma[only_bind_into](d).impliesClearOf(pragma[only_bind_into](c)) and + storeStepImpl(_, d, pun, true) and + pun.getPreUpdateNode() = n + | + c.(Content).getIndirectionIndex() = d.getIndirectionIndex() + ) + ) + } } import Cached -private import Nodes0 -/** - * A module for calculating the number of stars (i.e., `*`s) needed for various - * dataflow node `toString` predicates. - */ -module NodeStars { private int getNumberOfIndirections(Node n) { result = n.(RawIndirectOperand).getIndirectionIndex() or @@ -828,85 +970,10 @@ private int getMinIndirectionForGlobalDef(Ssa::GlobalDef def) { result = getMinIndirectionsForType(def.getUnspecifiedType()) } -/** - * Holds if data can flow from `node1` to `node2` in a way that loses the - * calling context. For example, this would happen with flow through a - * global or static variable. - */ -predicate jumpStep(Node n1, Node n2) { - exists(GlobalLikeVariable v | - exists(Ssa::GlobalUse globalUse | - v = globalUse.getVariable() and - n1.(FinalGlobalValue).getGlobalUse() = globalUse - | - globalUse.getIndirection() = getMinIndirectionForGlobalUse(globalUse) and - v = n2.asVariable() - or - v = n2.asIndirectVariable(globalUse.getIndirection()) - ) - or - exists(Ssa::GlobalDef globalDef | - v = globalDef.getVariable() and - n2.(InitialGlobalValue).getGlobalDef() = globalDef - | - globalDef.getIndirection() = getMinIndirectionForGlobalDef(globalDef) and - v = n1.asVariable() - or - v = n1.asIndirectVariable(globalDef.getIndirection()) - ) - ) - or - // models-as-data summarized flow - FlowSummaryImpl::Private::Steps::summaryJumpStep(n1.(FlowSummaryNode).getSummaryNode(), - n2.(FlowSummaryNode).getSummaryNode()) -} - bindingset[c] pragma[inline_late] private int getIndirectionIndexLate(Content c) { result = c.getIndirectionIndex() } -/** - * Holds if data can flow from `node1` to `node2` via an assignment to `f`. - * Thus, `node2` references an object with a field `f` that contains the - * value of `node1`. - * - * The boolean `certain` is true if the destination address does not involve - * any pointer arithmetic, and false otherwise. This has to do with whether a - * store step can be used to clear a field (see `clearsContent`). - */ -predicate storeStepImpl(Node node1, Content c, Node node2, boolean certain) { - exists( - PostFieldUpdateNode postFieldUpdate, int indirectionIndex1, int numberOfLoads, - StoreInstruction store, FieldContent fc - | - postFieldUpdate = node2 and - fc = c and - nodeHasInstruction(node1, pragma[only_bind_into](store), - pragma[only_bind_into](indirectionIndex1)) and - postFieldUpdate.getIndirectionIndex() = 1 and - numberOfLoadsFromOperand(postFieldUpdate.getFieldAddress(), - store.getDestinationAddressOperand(), numberOfLoads, certain) and - fc.getAField() = postFieldUpdate.getUpdatedField() and - getIndirectionIndexLate(fc) = 1 + indirectionIndex1 + numberOfLoads - ) - or - // models-as-data summarized flow - FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), c, - node2.(FlowSummaryNode).getSummaryNode()) and - certain = true -} - -/** - * Holds if data can flow from `node1` to `node2` via an assignment to `f`. - * Thus, `node2` references an object with a field `f` that contains the - * value of `node1`. - */ -predicate storeStep(Node node1, ContentSet c, Node node2) { storeStepImpl(node1, c, node2, _) } - -/** - * Holds if `operandFrom` flows to `operandTo` using a sequence of conversion-like - * operations and exactly `n` `LoadInstruction` operations. - */ private predicate numberOfLoadsFromOperandRec( Operand operandFrom, Operand operandTo, int ind, boolean certain ) { @@ -957,63 +1024,6 @@ predicate nodeHasInstruction(Node node, Instruction instr, int indirectionIndex) hasInstructionAndIndex(node, instr, indirectionIndex) } -/** - * Holds if data can flow from `node1` to `node2` via a read of `f`. - * Thus, `node1` references an object with a field `f` whose value ends up in - * `node2`. - */ -predicate readStep(Node node1, ContentSet c, Node node2) { - exists( - FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2, FieldContent fc - | - fc = c and - nodeHasOperand(node2, operand, indirectionIndex2) and - // The `1` here matches the `node2.getIndirectionIndex() = 1` conjunct - // in `storeStep`. - nodeHasOperand(node1, fa1.getObjectAddressOperand(), 1) and - numberOfLoadsFromOperand(fa1, operand, numberOfLoads, _) and - fc.getAField() = fa1.getField() and - getIndirectionIndexLate(fc) = indirectionIndex2 + numberOfLoads - ) - or - // models-as-data summarized flow - FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c, - node2.(FlowSummaryNode).getSummaryNode()) -} - -/** - * Holds if values stored inside content `c` are cleared at node `n`. - */ -predicate clearsContent(Node n, ContentSet c) { - n = - any(PostUpdateNode pun, Content d | d.impliesClearOf(c) and storeStepImpl(_, d, pun, true) | pun) - .getPreUpdateNode() and - ( - // The crement operations and pointer addition and subtraction self-assign. We do not - // want to clear the contents if it is indirectly pointed at by any of these operations, - // as part of the contents might still be accessible afterwards. If there is no such - // indirection clearing the contents is safe. - not exists(Operand op, Cpp::Operation p | - n.(IndirectOperand).hasOperandAndIndirectionIndex(op, _) and - ( - p instanceof Cpp::AssignPointerAddExpr or - p instanceof Cpp::AssignPointerSubExpr or - p instanceof Cpp::CrementOperation - ) - | - p.getAnOperand() = op.getUse().getAst() - ) - or - forex(PostUpdateNode pun, Content d | - pragma[only_bind_into](d).impliesClearOf(pragma[only_bind_into](c)) and - storeStepImpl(_, d, pun, true) and - pun.getPreUpdateNode() = n - | - c.(Content).getIndirectionIndex() = d.getIndirectionIndex() - ) - ) -} - /** * Holds if the value that is being tracked is expected to be stored inside content `c` * at node `n`. @@ -1046,11 +1056,6 @@ class CastNode extends Node { CastNode() { none() } // stub implementation } -cached -private newtype TDataFlowCallable = - TSourceCallable(Cpp::Declaration decl) or - TSummarizedCallable(FlowSummaryImpl::Public::SummarizedCallable c) - /** * A callable, which may be: * - a function (that may contain code) @@ -1134,15 +1139,6 @@ class DataFlowType extends TypeFinal { string toString() { result = "" } } -cached -private newtype TDataFlowCall = - TNormalCall(CallInstruction call) or - TSummaryCall( - FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver - ) { - FlowSummaryImpl::Private::summaryCallbackRange(c, receiver) - } - private predicate summarizedCallableIsManual(SummarizedCallable sc) { sc.asSummarizedCallable().hasManualModel() } @@ -1523,12 +1519,6 @@ private predicate fieldHasApproxName(Field f, string s) { private predicate unionHasApproxName(Cpp::Union u, string s) { s = u.getName().charAt(0) } -cached -private newtype TContentApprox = - TFieldApproxContent(string s) { fieldHasApproxName(_, s) } or - TUnionApproxContent(string s) { unionHasApproxName(_, s) } or - TElementApproxContent() - /** An approximated `Content`. */ class ContentApprox extends TContentApprox { string toString() { none() } // overridden in subclasses diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaImplCommon.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaImplCommon.qll index 10ebfdb5be0..8a38c84bfdb 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaImplCommon.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaImplCommon.qll @@ -55,26 +55,6 @@ private CppType getThisType(Cpp::MemberFunction f, boolean isGLValue) { result.hasType(f.getTypeOfThis(), isGLValue) } -/** - * Gets the C++ type of the instruction `i`. - * - * This is equivalent to `i.getResultLanguageType()` with the exception - * of instructions that directly references a `this` IRVariable. In this - * case, `i.getResultLanguageType()` gives an unknown type, whereas the - * predicate gives the expected type (i.e., a potentially cv-qualified - * type `A*` where `A` is the declaring type of the member function that - * contains `i`). - */ -cached -CppType getResultLanguageType(Instruction i) { - if i.(VariableAddressInstruction).getIRVariable() instanceof IRThisVariable - then - if i.isGLValue() - then result = getThisType(i.getEnclosingFunction(), true) - else result = getThisType(i.getEnclosingFunction(), false) - else result = i.getResultLanguageType() -} - /** * Gets the C++ type of the operand `operand`. * This is equivalent to the type of the operand's defining instruction. @@ -572,6 +552,26 @@ private class BaseCallInstruction extends BaseSourceVariableInstruction, CallIns cached private module Cached { + /** + * Gets the C++ type of the instruction `i`. + * + * This is equivalent to `i.getResultLanguageType()` with the exception + * of instructions that directly references a `this` IRVariable. In this + * case, `i.getResultLanguageType()` gives an unknown type, whereas the + * predicate gives the expected type (i.e., a potentially cv-qualified + * type `A*` where `A` is the declaring type of the member function that + * contains `i`). + */ + cached + CppType getResultLanguageType(Instruction i) { + if i.(VariableAddressInstruction).getIRVariable() instanceof IRThisVariable + then + if i.isGLValue() + then result = getThisType(i.getEnclosingFunction(), true) + else result = getThisType(i.getEnclosingFunction(), false) + else result = i.getResultLanguageType() + } + /** Holds if `op` is the only use of its defining instruction, and that op is used in a conversation */ private predicate isConversion(Operand op) { exists(Instruction def, Operand use |