From 044ee9b08a04bc6c1696b56d8e28e1aef6d2eb49 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Fri, 26 Apr 2024 17:16:26 +0100 Subject: [PATCH] C++: Delete old iterator flow using memory edges. --- .../cpp/ir/dataflow/internal/SsaInternals.qll | 34 --- .../dataflow/internal/SsaInternalsCommon.qll | 242 ------------------ 2 files changed, 276 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll index 7eb84aefe3f..db8fafa13b0 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll @@ -121,16 +121,6 @@ private newtype TDefOrUseImpl = // a function body. isGlobalDefImpl(v, f, _, indirectionIndex) } or - TIteratorDef( - Operand iteratorDerefAddress, BaseSourceVariableInstruction container, int indirectionIndex - ) { - isIteratorDef(container, iteratorDerefAddress, _, _, indirectionIndex) - } or - TIteratorUse( - Operand iteratorAddress, BaseSourceVariableInstruction container, int indirectionIndex - ) { - isIteratorUse(container, iteratorAddress, _, indirectionIndex) - } or TFinalParameterUse(Parameter p, int indirectionIndex) { underlyingTypeIsModifiableAt(p.getUnderlyingType(), indirectionIndex) and // Only create an SSA read for the final use of a parameter if there's @@ -336,20 +326,6 @@ private class DirectDef extends OperandBasedDef, TDefImpl { override predicate isCertain() { isDef(true, _, address, base, _, ind) } } -private class IteratorDef extends OperandBasedDef, TIteratorDef { - BaseSourceVariableInstruction container; - - IteratorDef() { this = TIteratorDef(address, container, ind) } - - override BaseSourceVariableInstruction getBase() { result = container } - - override int getIndirection() { isIteratorDef(container, address, _, result, ind) } - - override Node0Impl getValue() { isIteratorDef(container, address, result, _, _) } - - override predicate isCertain() { none() } -} - abstract class UseImpl extends DefOrUseImpl { int ind; @@ -418,16 +394,6 @@ private class DirectUse extends OperandBasedUse, TUseImpl { override Node getNode() { nodeHasOperand(result, operand, ind) } } -private class IteratorUse extends OperandBasedUse, TIteratorUse { - IteratorUse() { this = TIteratorUse(operand, base, ind) } - - override int getIndirection() { isIteratorUse(base, operand, result, ind) } - - override predicate isCertain() { none() } - - override Node getNode() { nodeHasOperand(result, operand, ind) } -} - pragma[nomagic] private predicate finalParameterNodeHasParameterAndIndex( FinalParameterNode n, Parameter p, int indirectionIndex diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll index 7e5b4de8122..0920e5a3865 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll @@ -246,14 +246,6 @@ private module IteratorIndirections { baseType = super.getValueType() } - override predicate isAdditionalDereference(Instruction deref, Operand address) { - exists(CallInstruction call | - operandForFullyConvertedCall(getAUse(deref), call) and - this = call.getStaticCallTarget().getClassAndName("operator*") and - address = call.getThisArgumentOperand() - ) - } - override predicate isAdditionalWrite(Node0Impl value, Operand address, boolean certain) { exists(CallInstruction call | call.getArgumentOperand(0) = value.asOperand() | this = call.getStaticCallTarget().getClassAndName("operator=") and @@ -262,16 +254,6 @@ private module IteratorIndirections { ) } - override predicate isAdditionalTaintStep(Node node1, Node node2) { - exists(CallInstruction call | - // Taint through `operator+=` and `operator-=` on iterators. - call.getStaticCallTarget() instanceof Iterator::IteratorAssignArithmeticOperator and - node2.(IndirectArgumentOutNode).getPreUpdateNode() = node1 and - node1.(IndirectOperand).hasOperandAndIndirectionIndex(call.getArgumentOperand(0), _) and - node1.getType().getUnspecifiedType() = this - ) - } - override predicate isAdditionalConversionFlow(Operand opFrom, Instruction instrTo) { // This is a bit annoying: Consider the following snippet: // ``` @@ -589,230 +571,6 @@ private class BaseCallInstruction extends BaseSourceVariableInstruction, CallIns cached private module Cached { - private import semmle.code.cpp.models.interfaces.Iterator as Interfaces - private import semmle.code.cpp.models.implementations.Iterator as Iterator - private import semmle.code.cpp.models.interfaces.FunctionInputsAndOutputs as IO - - /** - * Holds if `next` is a instruction with a memory result that potentially - * updates the memory produced by `prev`. - */ - private predicate memorySucc(Instruction prev, Instruction next) { - prev = next.(ChiInstruction).getTotal() - or - // Phi inputs can be inexact. - prev = next.(PhiInstruction).getAnInputOperand().getAnyDef() - or - prev = next.(CopyInstruction).getSourceValue() - or - exists(ReadSideEffectInstruction read | - next = read.getPrimaryInstruction() and - isAdditionalConversionFlow(_, next) and - prev = read.getSideEffectOperand().getAnyDef() - ) - } - - /** - * Holds if `iteratorDerefAddress` is an address of an iterator dereference (i.e., `*it`) - * that is used for a write operation that writes the value `value`. The `memory` instruction - * represents the memory that the IR's SSA analysis determined was read by the call to `operator*`. - * - * The `numberOfLoads` integer represents the number of dereferences this write corresponds to - * on the underlying container that produced the iterator. - */ - private predicate isChiAfterIteratorDef( - Instruction memory, Operand iteratorDerefAddress, Node0Impl value, int numberOfLoads - ) { - exists( - BaseSourceVariableInstruction iteratorBase, ReadSideEffectInstruction read, - Operand iteratorAddress - | - numberOfLoads >= 0 and - isDef(_, value, iteratorDerefAddress, iteratorBase, numberOfLoads + 2, 0) and - isUse(_, iteratorAddress, iteratorBase, numberOfLoads + 1, 0) and - iteratorBase.getResultType() instanceof Interfaces::Iterator and - isDereference(iteratorAddress.getDef(), read.getArgumentDef().getAUse(), _) and - memory = read.getSideEffectOperand().getAnyDef() - ) - } - - private predicate isSource(Instruction instr, Operand iteratorAddress, int numberOfLoads) { - getAUse(instr) = iteratorAddress and - exists(BaseSourceVariableInstruction iteratorBase | - iteratorBase.getResultType() instanceof Interfaces::Iterator and - not iteratorBase.getResultType() instanceof Cpp::PointerType and - isUse(_, iteratorAddress, iteratorBase, numberOfLoads - 1, 0) - ) - } - - private predicate isSink(Instruction instr, CallInstruction call) { - getAUse(instr).(ArgumentOperand).getCall() = call and - // Only include operations that may modify the object that the iterator points to. - // The following is a non-exhaustive list of things that may modify the value of the - // iterator, but never the value of what the iterator points to. - // The more things we can exclude here, the faster the small dataflow-like analysis - // done by `convertsIntoArgument` will converge. - not exists(Function f | f = call.getStaticCallTarget() | - f instanceof Iterator::IteratorCrementOperator or - f instanceof Iterator::IteratorBinaryArithmeticOperator or - f instanceof Iterator::IteratorAssignArithmeticOperator or - f instanceof Iterator::IteratorCrementMemberOperator or - f instanceof Iterator::IteratorBinaryArithmeticMemberOperator or - f instanceof Iterator::IteratorAssignArithmeticMemberOperator or - f instanceof Iterator::IteratorAssignmentMemberOperator - ) - } - - private predicate convertsIntoArgumentFwd(Instruction instr) { - isSource(instr, _, _) - or - exists(Instruction prev | convertsIntoArgumentFwd(prev) | - conversionFlow(unique( | | getAUse(prev)), instr, false, _) - ) - } - - private predicate convertsIntoArgumentRev(Instruction instr) { - convertsIntoArgumentFwd(instr) and - ( - isSink(instr, _) - or - exists(Instruction next | convertsIntoArgumentRev(next) | - conversionFlow(unique( | | getAUse(instr)), next, false, _) - ) - ) - } - - private predicate convertsIntoArgument( - Operand iteratorAddress, CallInstruction call, int numberOfLoads - ) { - exists(Instruction iteratorAddressDef | - isSource(iteratorAddressDef, iteratorAddress, numberOfLoads) and - isSink(iteratorAddressDef, call) and - convertsIntoArgumentRev(pragma[only_bind_into](iteratorAddressDef)) - ) - } - - private predicate isChiAfterIteratorArgument( - Instruction memory, Operand iteratorAddress, int numberOfLoads - ) { - // Ideally, `iteratorAddress` would be an `ArgumentOperand`, but there might be - // various conversions applied to it before it becomes an argument. - // So we do a small amount of flow to find the call that the iterator is passed to. - exists(CallInstruction call | convertsIntoArgument(iteratorAddress, call, numberOfLoads) | - exists(ReadSideEffectInstruction read | - read.getPrimaryInstruction() = call and - read.getSideEffectOperand().getAnyDef() = memory - ) - or - exists(LoadInstruction load | - iteratorAddress.getDef() = load and - memory = load.getSourceValueOperand().getAnyDef() - ) - ) - } - - /** - * Holds if `iterator` is a `StoreInstruction` that stores the result of some function - * returning an iterator into an address computed started at `containerBase`. - * - * For example, given a declaration like `std::vector::iterator it = v.begin()`, - * the `iterator` will be the `StoreInstruction` generated by the write to `it`, and - * `containerBase` will be the address of `v`. - */ - private predicate isChiAfterBegin( - BaseSourceVariableInstruction containerBase, StoreInstruction iterator - ) { - exists( - CallInstruction getIterator, Iterator::GetIteratorFunction getIteratorFunction, - IO::FunctionInput input, int i - | - getIterator = iterator.getSourceValue() and - getIteratorFunction = getIterator.getStaticCallTarget() and - getIteratorFunction.getsIterator(input, _) and - isDef(_, any(Node0Impl n | n.asInstruction() = iterator), _, _, 1, 0) and - input.isParameterDerefOrQualifierObject(i) and - isUse(_, getIterator.getArgumentOperand(i), containerBase, 0, 0) - ) - } - - /** - * Holds if `iteratorAddress` is an address of an iterator that is used for - * a read operation. The `memory` instruction represents the memory that - * the IR's SSA analysis determined was read by the call to `operator*`. - * - * Finally, the `numberOfLoads` integer represents the number of dereferences - * this read corresponds to on the underlying container that produced the iterator. - */ - private predicate isChiBeforeIteratorUse( - Operand iteratorAddress, Instruction memory, int numberOfLoads - ) { - exists( - BaseSourceVariableInstruction iteratorBase, LoadInstruction load, - ReadSideEffectInstruction read, Operand iteratorDerefAddress - | - numberOfLoads >= 0 and - isUse(_, iteratorAddress, iteratorBase, numberOfLoads + 1, 0) and - isUse(_, iteratorDerefAddress, iteratorBase, numberOfLoads + 2, 0) and - iteratorBase.getResultType() instanceof Interfaces::Iterator and - load.getSourceAddressOperand() = iteratorDerefAddress and - read.getPrimaryInstruction() = load.getSourceAddress() and - memory = read.getSideEffectOperand().getAnyDef() - ) - } - - /** - * Holds if `iteratorDerefAddress` is an address of an iterator dereference (i.e., `*it`) - * that is used for a write operation that writes the value `value` to a container that - * created the iterator. `container` represents the base of the address of the container - * that was used to create the iterator. - */ - cached - predicate isIteratorDef( - BaseSourceVariableInstruction container, Operand iteratorDerefAddress, Node0Impl value, - int numberOfLoads, int indirectionIndex - ) { - exists(Instruction memory, Instruction begin, int upper, int ind | - isChiAfterIteratorDef(memory, iteratorDerefAddress, value, numberOfLoads) and - memorySucc*(begin, memory) and - isChiAfterBegin(container, begin) and - upper = countIndirectionsForCppType(getResultLanguageType(container)) and - ind = numberOfLoads + [1 .. upper] and - indirectionIndex = ind - (numberOfLoads + 1) - ) - } - - /** - * Holds if `iteratorAddress` is an address of an iterator that is used for a - * read operation to read a value from a container that created the iterator. - * `container` represents the base of the address of the container that was used - * to create the iterator. - */ - cached - predicate isIteratorUse( - BaseSourceVariableInstruction container, Operand iteratorAddress, int numberOfLoads, - int indirectionIndex - ) { - // Direct use - exists(Instruction begin, Instruction memory, int upper, int ind | - isChiBeforeIteratorUse(iteratorAddress, memory, numberOfLoads) and - memorySucc*(begin, memory) and - isChiAfterBegin(container, begin) and - upper = countIndirectionsForCppType(getResultLanguageType(container)) and - ind = numberOfLoads + [1 .. upper] and - indirectionIndex = ind - (numberOfLoads + 1) - ) - or - // Use through function output - exists(Instruction memory, Instruction begin, int upper, int ind | - isChiAfterIteratorArgument(memory, iteratorAddress, numberOfLoads) and - memorySucc*(begin, memory) and - isChiAfterBegin(container, begin) and - upper = countIndirectionsForCppType(getResultLanguageType(container)) and - ind = numberOfLoads + [1 .. upper] and - indirectionIndex = ind - (numberOfLoads - 1) - ) - } - /** Holds if `op` is the only use of its defining instruction, and that op is used in a conversation */ private predicate isConversion(Operand op) { exists(Instruction def, Operand use |