From ae998583be9a9e5c721ffb9dd5413e30e7db2aea Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Thu, 12 Jan 2023 17:01:31 +0000 Subject: [PATCH] C++: Refactor parameter out nodes to not depend on 'ReturnIndirectionInstruction's. --- .../ir/dataflow/internal/DataFlowPrivate.qll | 48 +++---- .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 76 ++++++++++- .../cpp/ir/dataflow/internal/SsaInternals.qll | 126 ++++++++++++++---- .../dataflow/internal/SsaInternalsCommon.qll | 14 +- .../dataflow/internal/ssa0/SsaInternals.qll | 54 ++++++-- 5 files changed, 243 insertions(+), 75 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index 39c3b974f56..92dd561c35b 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -298,14 +298,13 @@ newtype TPosition = private newtype TReturnKind = TNormalReturnKind(int index) { exists(IndirectReturnNode return | - return.getAddressOperand() = any(ReturnValueInstruction r).getReturnAddressOperand() and + return.isNormalReturn() and index = return.getIndirectionIndex() - 1 // We subtract one because the return loads the value. ) } or TIndirectReturnKind(int argumentIndex, int indirectionIndex) { - exists(IndirectReturnNode return, ReturnIndirectionInstruction returnInd | - returnInd.hasIndex(argumentIndex) and - return.getAddressOperand() = returnInd.getSourceAddressOperand() and + exists(IndirectReturnNode return | + return.isParameterReturn(argumentIndex) and indirectionIndex = return.getIndirectionIndex() ) } @@ -342,42 +341,29 @@ class ReturnNode extends Node instanceof IndirectReturnNode { abstract ReturnKind getKind(); } -/** - * This predicate represents an annoying hack that we have to do. We use the - * `ReturnIndirectionInstruction` to determine which variables need flow back - * out of a function. However, the IR will unconditionally create those for a - * variable passed to a function even though the variable was never updated by - * the function. And if a function has too many `ReturnNode`s the dataflow - * library lowers its precision for that function by disabling field flow. - * - * So we those eliminate `ReturnNode`s that would have otherwise been created - * by this unconditional `ReturnIndirectionInstruction` by requiring that there - * must exist an SSA definition of the IR variable in the function. - */ -private predicate hasNonInitializeParameterDef(IRVariable v) { - exists(Ssa::Def def | - not def.getValue().asInstruction() instanceof InitializeParameterInstruction and - v = def.getSourceVariable().getBaseVariable().(Ssa::BaseIRVariable).getIRVariable() - ) +pragma[nomagic] +private predicate finalParameterNodeHasArgumentAndIndex( + FinalParameterNode node, int argumentIndex, int indirectionIndex +) { + node.getArgumentIndex() = argumentIndex and + node.getIndirectionIndex() = indirectionIndex } class ReturnIndirectionNode extends IndirectReturnNode, ReturnNode { override ReturnKind getKind() { - exists(Operand op, int i | - hasOperandAndIndex(this, pragma[only_bind_into](op), pragma[only_bind_into](i)) + exists(Operand op, int indirectionIndex | + hasOperandAndIndex(this, pragma[only_bind_into](op), pragma[only_bind_into](indirectionIndex)) | - exists(int argumentIndex, ReturnIndirectionInstruction returnInd | - op = returnInd.getSourceAddressOperand() and - returnInd.hasIndex(argumentIndex) and - hasNonInitializeParameterDef(returnInd.getIRVariable()) and - result = TIndirectReturnKind(argumentIndex, pragma[only_bind_into](i)) - ) - or exists(ReturnValueInstruction return | op = return.getReturnAddressOperand() and - result = TNormalReturnKind(i - 1) + result = TNormalReturnKind(indirectionIndex - 1) ) ) + or + exists(int argumentIndex, int indirectionIndex | + finalParameterNodeHasArgumentAndIndex(this, argumentIndex, indirectionIndex) and + result = TIndirectReturnKind(argumentIndex, indirectionIndex) + ) } } diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index eebb794e472..a956db330ca 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -46,6 +46,12 @@ private newtype TIRDataFlowNode = } or TRawIndirectInstruction(Instruction instr, int indirectionIndex) { Ssa::hasRawIndirectInstruction(instr, indirectionIndex) + } or + TFinalParameterNode(Parameter p, int indirectionIndex) { + exists(Ssa::FinalParameterUse use | + use.getParameter() = p and + use.getIndirectionIndex() = indirectionIndex + ) } /** @@ -522,16 +528,35 @@ class IndirectParameterNode extends Node, IndirectInstruction { * A node representing the indirection of a value that is * about to be returned from a function. */ -class IndirectReturnNode extends IndirectOperand { +class IndirectReturnNode extends Node { IndirectReturnNode() { - this.getOperand() = any(ReturnIndirectionInstruction ret).getSourceAddressOperand() + this instanceof FinalParameterNode or - this.getOperand() = any(ReturnValueInstruction ret).getReturnAddressOperand() + this.(IndirectOperand).getOperand() = any(ReturnValueInstruction ret).getReturnAddressOperand() } - Operand getAddressOperand() { result = operand } - override Declaration getEnclosingCallable() { result = this.getFunction() } + + /** + * Holds if this node represents the value that is returned to the caller + * through a `return` statement. + */ + predicate isNormalReturn() { this instanceof IndirectOperand } + + /** + * Holds if this node represents the value that is returned to the caller + * by writing to the `argumentIndex`'th argument of the call. + */ + predicate isParameterReturn(int argumentIndex) { + this.(FinalParameterNode).getArgumentIndex() = argumentIndex + } + + /** Gets the indirection index of this indirect return node. */ + int getIndirectionIndex() { + result = this.(FinalParameterNode).getIndirectionIndex() + or + result = this.(IndirectOperand).getIndirectionIndex() + } } /** @@ -722,6 +747,47 @@ class RawIndirectOperand extends Node, TRawIndirectOperand { } } +/** + * INTERNAL: do not use. + * + * A node representing the value of an update parameter + * just before reaching the end of a function. + */ +class FinalParameterNode extends Node, TFinalParameterNode { + Parameter p; + int indirectionIndex; + + FinalParameterNode() { this = TFinalParameterNode(p, indirectionIndex) } + + /** Gets the parameter associated with this final use. */ + Parameter getParameter() { result = p } + + /** Gets the underlying indirection index. */ + int getIndirectionIndex() { result = indirectionIndex } + + /** Gets the argument index associated with this final use. */ + final int getArgumentIndex() { result = p.getIndex() } + + override Function getFunction() { result = p.getFunction() } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override DataFlowType getType() { result = getTypeImpl(p.getUnspecifiedType(), indirectionIndex) } + + final override Location getLocationImpl() { + // Parameters can have multiple locations. When there's a unique location we use + // that one, but if multiple locations exist we default to an unknown location. + result = unique( | | p.getLocation()) + or + not exists(unique( | | p.getLocation())) and + result instanceof UnknownDefaultLocation + } + + override string toStringImpl() { + if indirectionIndex > 1 then result = p.toString() + " indirection" else result = p.toString() + } +} + /** * The value of an uninitialized local variable, viewed as a node in a data * flow graph. diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll index cef8fdc9274..683173b6f3a 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll @@ -130,6 +130,31 @@ private newtype TDefOrUseImpl = Operand iteratorAddress, BaseSourceVariableInstruction container, int indirectionIndex ) { isIteratorUse(container, iteratorAddress, _, indirectionIndex) + } or + TFinalParameterUse(Parameter p, int indirectionIndex) { + // Avoid creating parameter nodes if there is no definitions of the variable other than the initializaion. + exists(SsaInternals0::Def def | + def.getSourceVariable().getBaseVariable().(BaseIRVariable).getIRVariable().getAst() = p and + not def.getValue().asInstruction() instanceof InitializeParameterInstruction + ) and + // If the type is modifiable + exists(CppType cppType | + cppType.hasUnspecifiedType(p.getUnspecifiedType(), _) and + isModifiableAt(cppType, indirectionIndex + 1) + ) and + ( + exists(Indirection indirection | + indirection.getType() = p.getUnspecifiedType() and + indirectionIndex = [1 .. indirection.getNumberOfIndirections()] + ) + or + // Array types don't have indirections. So we need to special case them here. + exists(Cpp::ArrayType arrayType, CppType cppType | + arrayType = p.getUnspecifiedType() and + cppType.hasUnspecifiedType(arrayType, _) and + indirectionIndex = [1 .. countIndirectionsForCppType(cppType)] + ) + ) } abstract private class DefOrUseImpl extends TDefOrUseImpl { @@ -137,7 +162,7 @@ abstract private class DefOrUseImpl extends TDefOrUseImpl { abstract string toString(); /** Gets the block of this definition or use. */ - abstract IRBlock getBlock(); + final IRBlock getBlock() { this.hasIndexInBlock(result, _) } /** Holds if this definition or use has index `index` in block `block`. */ abstract predicate hasIndexInBlock(IRBlock block, int index); @@ -222,8 +247,6 @@ abstract class DefImpl extends DefOrUseImpl { override string toString() { result = "DefImpl" } - override IRBlock getBlock() { result = this.getAddressOperand().getUse().getBlock() } - override Cpp::Location getLocation() { result = this.getAddressOperand().getUse().getLocation() } final override predicate hasIndexInBlock(IRBlock block, int index) { @@ -258,32 +281,43 @@ private class IteratorDef extends DefImpl, TIteratorDef { } abstract class UseImpl extends DefOrUseImpl { - Operand operand; int ind; bindingset[ind] UseImpl() { any() } - Operand getOperand() { result = operand } + /** Gets the node associated with this use. */ + abstract Node getNode(); override string toString() { result = "UseImpl" } + /** Gets the indirection index of this use. */ + final override int getIndirectionIndex() { result = ind } + + /** Gets the number of loads that precedence this use. */ + abstract int getIndirection(); + + /** + * Holds if this use is guaranteed to read the + * associated variable. + */ + abstract predicate isCertain(); +} + +abstract private class OperandBasedUse extends UseImpl { + Operand operand; + + bindingset[ind] + OperandBasedUse() { any() } + final override predicate hasIndexInBlock(IRBlock block, int index) { operand.getUse() = block.getInstruction(index) } - final override IRBlock getBlock() { result = operand.getUse().getBlock() } - final override Cpp::Location getLocation() { result = operand.getLocation() } - - override int getIndirectionIndex() { result = ind } - - abstract int getIndirection(); - - abstract predicate isCertain(); } -private class DirectUse extends UseImpl, TUseImpl { +private class DirectUse extends OperandBasedUse, TUseImpl { DirectUse() { this = TUseImpl(operand, ind) } override int getIndirection() { isUse(_, operand, _, result, ind) } @@ -291,9 +325,11 @@ private class DirectUse extends UseImpl, TUseImpl { override BaseSourceVariableInstruction getBase() { isUse(_, operand, result, _, ind) } override predicate isCertain() { isUse(true, operand, _, _, ind) } + + override Node getNode() { nodeHasOperand(result, operand, ind) } } -private class IteratorUse extends UseImpl, TIteratorUse { +private class IteratorUse extends OperandBasedUse, TIteratorUse { BaseSourceVariableInstruction container; IteratorUse() { this = TIteratorUse(operand, container, ind) } @@ -303,6 +339,49 @@ private class IteratorUse extends UseImpl, TIteratorUse { override BaseSourceVariableInstruction getBase() { result = container } override predicate isCertain() { none() } + + override Node getNode() { nodeHasOperand(result, operand, ind) } +} + +class FinalParameterUse extends UseImpl, TFinalParameterUse { + Parameter p; + + FinalParameterUse() { this = TFinalParameterUse(p, ind) } + + Parameter getParameter() { result = p } + + override Node getNode() { + result.(FinalParameterNode).getParameter() = p and + result.(FinalParameterNode).getIndirectionIndex() = ind + } + + override int getIndirection() { result = ind + 1 } + + override predicate isCertain() { any() } + + override predicate hasIndexInBlock(IRBlock block, int index) { + exists(ReturnInstruction return | + block.getInstruction(index) = return and + return.getEnclosingFunction() = p.getFunction() + ) + } + + override Cpp::Location getLocation() { + // Parameters can have multiple locations. When there's a unique location we use + // that one, but if multiple locations exist we default to an unknown location. + result = unique( | | p.getLocation()) + or + not exists(unique( | | p.getLocation())) and + result instanceof UnknownDefaultLocation + } + + override BaseSourceVariableInstruction getBase() { + exists(InitializeParameterInstruction init | + init.getParameter() = p and + // This is always a `VariableAddressInstruction` + result = init.getAnOperand().getDef() + ) + } } /** @@ -331,14 +410,7 @@ predicate adjacentDefRead(DefOrUse defOrUse1, UseOrPhi use) { ) } -private predicate useToNode(UseOrPhi use, Node nodeTo) { - exists(UseImpl useImpl | - useImpl = use.asDefOrUse() and - nodeHasOperand(nodeTo, useImpl.getOperand(), useImpl.getIndirectionIndex()) - ) - or - nodeTo.(SsaPhiNode).getPhiNode() = use.asPhi() -} +private predicate useToNode(UseOrPhi use, Node nodeTo) { nodeTo = use.getNode() } pragma[noinline] predicate outNodeHasAddressAndIndex( @@ -609,6 +681,8 @@ class Phi extends TPhi, SsaDefOrUse { final override Location getLocation() { result = phi.getBasicBlock().getLocation() } override string toString() { result = "Phi" } + + SsaPhiNode getNode() { result.getPhiNode() = phi } } class UseOrPhi extends SsaDefOrUse { @@ -621,6 +695,12 @@ class UseOrPhi extends SsaDefOrUse { final override Location getLocation() { result = this.asDefOrUse().getLocation() or result = this.(Phi).getLocation() } + + final Node getNode() { + result = this.(Phi).getNode() + or + result = this.asDefOrUse().(UseImpl).getNode() + } } class Def extends DefOrUse { diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll index 486b1ad716e..ad5d8a167b1 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll @@ -32,7 +32,8 @@ predicate ignoreInstruction(Instruction instr) { instr instanceof ChiInstruction or instr instanceof InitializeIndirectionInstruction or instr instanceof AliasedDefinitionInstruction or - instr instanceof InitializeNonLocalInstruction + instr instanceof InitializeNonLocalInstruction or + instr instanceof ReturnIndirectionInstruction ) } @@ -403,9 +404,10 @@ predicate isModifiableByCall(ArgumentOperand operand, int indirectionIndex) { private predicate isModifiableAtImpl(CppType cppType, int indirectionIndex) { indirectionIndex = [1 .. countIndirectionsForCppType(cppType)] and ( - exists(PointerOrReferenceType pointerType, Type base, Type t | - cppType.hasType(t, _) and + exists(Type pointerType, Type base, Type t | pointerType = t.getUnderlyingType() and + (pointerType instanceof PointerOrReferenceType or pointerType instanceof Cpp::ArrayType) and + cppType.hasType(t, _) and base = getTypeImpl(pointerType, indirectionIndex) | // The value cannot be modified if it has a const specifier, @@ -428,7 +430,7 @@ private predicate isModifiableAtImpl(CppType cppType, int indirectionIndex) { * type `t` to a function function. */ bindingset[indirectionIndex] -private predicate isModifiableAt(CppType cppType, int indirectionIndex) { +predicate isModifiableAt(CppType cppType, int indirectionIndex) { isModifiableAtImpl(cppType, indirectionIndex) or exists(PointerWrapper pw, Type t | @@ -556,7 +558,7 @@ private module Cached { isChiAfterIteratorDef(memory, iteratorDerefAddress, value, numberOfLoads) and memorySucc*(begin, memory) and isChiAfterBegin(container, begin) and - upper = countIndirectionsForCppType(container.getResultLanguageType()) and + upper = countIndirectionsForCppType(getResultLanguageType(container)) and ind = numberOfLoads + [1 .. upper] and indirectionIndex = ind - (numberOfLoads + 1) ) @@ -577,7 +579,7 @@ private module Cached { isChiBeforeIteratorUse(iteratorDerefAddress, memory, numberOfLoads) and memorySucc*(begin, memory) and isChiAfterBegin(container, begin) and - upper = countIndirectionsForCppType(container.getResultLanguageType()) and + upper = countIndirectionsForCppType(getResultLanguageType(container)) and ind = numberOfLoads + [1 .. upper] and indirectionIndex = ind - (numberOfLoads + 1) ) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll index abf42575e47..159068230a9 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll @@ -39,6 +39,10 @@ private newtype TDefOrUseImpl = } or TIteratorUse(BaseSourceVariableInstruction container, Operand iteratorAddress) { isIteratorUse(container, iteratorAddress, _, _) + } or + TFinalParameterUse(Parameter p) { + any(Indirection indirection).getType() = p.getUnspecifiedType() or + p.getUnspecifiedType() instanceof Cpp::ArrayType } abstract private class DefOrUseImpl extends TDefOrUseImpl { @@ -46,7 +50,7 @@ abstract private class DefOrUseImpl extends TDefOrUseImpl { abstract string toString(); /** Gets the block of this definition or use. */ - abstract IRBlock getBlock(); + final IRBlock getBlock() { this.hasIndexInBlock(result, _) } /** Holds if this definition or use has index `index` in block `block`. */ abstract predicate hasIndexInBlock(IRBlock block, int index); @@ -82,8 +86,6 @@ abstract class DefImpl extends DefOrUseImpl { override string toString() { result = address.toString() } - override IRBlock getBlock() { result = this.getAddressOperand().getDef().getBlock() } - override Cpp::Location getLocation() { result = this.getAddressOperand().getLocation() } final override predicate hasIndexInBlock(IRBlock block, int index) { @@ -113,10 +115,10 @@ private class IteratorDef extends DefImpl, TIteratorDef { override predicate isCertain() { none() } } -abstract class UseImpl extends DefOrUseImpl { - Operand operand; +abstract class UseImpl extends DefOrUseImpl { } - Operand getOperand() { result = operand } +abstract private class OperandBasedUse extends UseImpl { + Operand operand; override string toString() { result = operand.toString() } @@ -124,12 +126,10 @@ abstract class UseImpl extends DefOrUseImpl { operand.getUse() = block.getInstruction(index) } - final override IRBlock getBlock() { result = operand.getUse().getBlock() } - final override Cpp::Location getLocation() { result = operand.getLocation() } } -private class DirectUse extends UseImpl, TUseImpl { +private class DirectUse extends OperandBasedUse, TUseImpl { DirectUse() { this = TUseImpl(operand) } override BaseSourceVariableInstruction getBase() { isUse(_, operand, result, _, _) } @@ -137,7 +137,7 @@ private class DirectUse extends UseImpl, TUseImpl { override predicate isCertain() { isUse(true, operand, _, _, _) } } -private class IteratorUse extends UseImpl, TIteratorUse { +private class IteratorUse extends OperandBasedUse, TIteratorUse { BaseSourceVariableInstruction container; IteratorUse() { this = TIteratorUse(container, operand) } @@ -147,6 +147,40 @@ private class IteratorUse extends UseImpl, TIteratorUse { override predicate isCertain() { none() } } +private class FinalParameterUse extends UseImpl, TFinalParameterUse { + Parameter p; + + FinalParameterUse() { this = TFinalParameterUse(p) } + + override string toString() { result = p.toString() } + + final override predicate hasIndexInBlock(IRBlock block, int index) { + exists(ReturnInstruction return | + block.getInstruction(index + 1) = return and + return.getEnclosingFunction() = p.getFunction() + ) + } + + final override Cpp::Location getLocation() { + // Parameters can have multiple locations. When there's a unique location we use + // that one, but if multiple locations exist we default to an unknown location. + result = unique( | | p.getLocation()) + or + not exists(unique( | | p.getLocation())) and + result instanceof UnknownDefaultLocation + } + + override BaseSourceVariableInstruction getBase() { + exists(InitializeParameterInstruction init | + init.getParameter() = p and + // This is always a `VariableAddressInstruction` + result = init.getAnOperand().getDef() + ) + } + + override predicate isCertain() { any() } +} + private module SsaInput implements SsaImplCommon::InputSig { import InputSigCommon import SourceVariables