Merge pull request #5522 from github/rdmarsh2/cpp/ssa-reuse

C++: reuse unaliased SSA results when computing aliased SSA
2026-04-29 10:45:15 +02:00 · 2021-06-01 10:17:54 -04:00
parent 534e771309 db85a215ab
commit da14647e5a
41 changed files with 2171 additions and 201 deletions
--- a/csharp/ql/src/experimental/ir/implementation/internal/TInstruction.qll
+++ b/csharp/ql/src/experimental/ir/implementation/internal/TInstruction.qll
@@ -55,6 +55,8 @@ module UnaliasedSSAInstructions {
    result = TUnaliasedSSAPhiInstruction(blockStartInstr, memoryLocation)
  }

+  TRawInstruction reusedPhiInstruction(TRawInstruction blockStartInstr) { none() }
+
  class TChiInstruction = TUnaliasedSSAChiInstruction;

  TChiInstruction chiInstruction(TRawInstruction primaryInstruction) {
@@ -75,7 +77,7 @@ module UnaliasedSSAInstructions {
 * a class alias.
 */
 module AliasedSSAInstructions {
-  class TPhiInstruction = TAliasedSSAPhiInstruction;
+  class TPhiInstruction = TAliasedSSAPhiInstruction or TUnaliasedSSAPhiInstruction;

  TPhiInstruction phiInstruction(
    TRawInstruction blockStartInstr, AliasedSSA::SSA::MemoryLocation memoryLocation
@@ -83,6 +85,10 @@ module AliasedSSAInstructions {
    result = TAliasedSSAPhiInstruction(blockStartInstr, memoryLocation)
  }

+  TPhiInstruction reusedPhiInstruction(TRawInstruction blockStartInstr) {
+    result = TUnaliasedSSAPhiInstruction(blockStartInstr, _)
+  }
+
  class TChiInstruction = TAliasedSSAChiInstruction;

  TChiInstruction chiInstruction(TRawInstruction primaryInstruction) {
--- a/csharp/ql/src/experimental/ir/implementation/internal/TOperand.qll
+++ b/csharp/ql/src/experimental/ir/implementation/internal/TOperand.qll
@@ -92,6 +92,16 @@ module RawOperands {
    none()
  }

+  /**
+   * Returns the Phi operand with the specified parameters.
+   */
+  TPhiOperand reusedPhiOperand(
+    Raw::PhiInstruction useInstr, Raw::Instruction defInstr, Raw::IRBlock predecessorBlock,
+    Overlap overlap
+  ) {
+    none()
+  }
+
  /**
   * Returns the Chi operand with the specified parameters.
   */
@@ -123,6 +133,16 @@ module UnaliasedSSAOperands {
    result = Internal::TUnaliasedPhiOperand(useInstr, defInstr, predecessorBlock, overlap)
  }

+  /**
+   * Returns the Phi operand with the specified parameters.
+   */
+  TPhiOperand reusedPhiOperand(
+    Unaliased::PhiInstruction useInstr, Unaliased::Instruction defInstr,
+    Unaliased::IRBlock predecessorBlock, Overlap overlap
+  ) {
+    none()
+  }
+
  /**
   * Returns the Chi operand with the specified parameters.
   */
--- a/csharp/ql/src/experimental/ir/implementation/raw/Instruction.qll
+++ b/csharp/ql/src/experimental/ir/implementation/raw/Instruction.qll
@@ -1994,6 +1994,14 @@ class PhiInstruction extends Instruction {
   */
  pragma[noinline]
  final Instruction getAnInput() { result = this.getAnInputOperand().getDef() }
+
+  /**
+   * Gets the input operand representing the value that flows from the specified predecessor block.
+   */
+  final PhiInputOperand getInputOperand(IRBlock predecessorBlock) {
+    result = this.getAnOperand() and
+    result.getPredecessorBlock() = predecessorBlock
+  }
 }

 /**
--- a/csharp/ql/src/experimental/ir/implementation/raw/Operand.qll
+++ b/csharp/ql/src/experimental/ir/implementation/raw/Operand.qll
@@ -28,11 +28,15 @@ class Operand extends TStageOperand {
  cached
  Operand() {
    // Ensure that the operand does not refer to instructions from earlier stages that are unreachable here
-    exists(Instruction use, Instruction def | this = registerOperand(use, _, def)) or
-    exists(Instruction use | this = nonSSAMemoryOperand(use, _)) or
+    exists(Instruction use, Instruction def | this = registerOperand(use, _, def))
+    or
+    exists(Instruction use | this = nonSSAMemoryOperand(use, _))
+    or
    exists(Instruction use, Instruction def, IRBlock predecessorBlock |
-      this = phiOperand(use, def, predecessorBlock, _)
-    ) or
+      this = phiOperand(use, def, predecessorBlock, _) or
+      this = reusedPhiOperand(use, def, predecessorBlock, _)
+    )
+    or
    exists(Instruction use | this = chiOperand(use, _))
  }

@@ -431,7 +435,11 @@ class PhiInputOperand extends MemoryOperand, TPhiOperand {
  Overlap overlap;

  cached
-  PhiInputOperand() { this = phiOperand(useInstr, defInstr, predecessorBlock, overlap) }
+  PhiInputOperand() {
+    this = phiOperand(useInstr, defInstr, predecessorBlock, overlap)
+    or
+    this = reusedPhiOperand(useInstr, defInstr, predecessorBlock, overlap)
+  }

  override string toString() { result = "Phi" }

--- a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/Instruction.qll
+++ b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/Instruction.qll
@@ -1994,6 +1994,14 @@ class PhiInstruction extends Instruction {
   */
  pragma[noinline]
  final Instruction getAnInput() { result = this.getAnInputOperand().getDef() }
+
+  /**
+   * Gets the input operand representing the value that flows from the specified predecessor block.
+   */
+  final PhiInputOperand getInputOperand(IRBlock predecessorBlock) {
+    result = this.getAnOperand() and
+    result.getPredecessorBlock() = predecessorBlock
+  }
 }

 /**
--- a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/Operand.qll
+++ b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/Operand.qll
@@ -28,11 +28,15 @@ class Operand extends TStageOperand {
  cached
  Operand() {
    // Ensure that the operand does not refer to instructions from earlier stages that are unreachable here
-    exists(Instruction use, Instruction def | this = registerOperand(use, _, def)) or
-    exists(Instruction use | this = nonSSAMemoryOperand(use, _)) or
+    exists(Instruction use, Instruction def | this = registerOperand(use, _, def))
+    or
+    exists(Instruction use | this = nonSSAMemoryOperand(use, _))
+    or
    exists(Instruction use, Instruction def, IRBlock predecessorBlock |
-      this = phiOperand(use, def, predecessorBlock, _)
-    ) or
+      this = phiOperand(use, def, predecessorBlock, _) or
+      this = reusedPhiOperand(use, def, predecessorBlock, _)
+    )
+    or
    exists(Instruction use | this = chiOperand(use, _))
  }

@@ -431,7 +435,11 @@ class PhiInputOperand extends MemoryOperand, TPhiOperand {
  Overlap overlap;

  cached
-  PhiInputOperand() { this = phiOperand(useInstr, defInstr, predecessorBlock, overlap) }
+  PhiInputOperand() {
+    this = phiOperand(useInstr, defInstr, predecessorBlock, overlap)
+    or
+    this = reusedPhiOperand(useInstr, defInstr, predecessorBlock, overlap)
+  }

  override string toString() { result = "Phi" }

--- a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/internal/AliasAnalysis.qll
+++ b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/internal/AliasAnalysis.qll
@@ -90,6 +90,9 @@ private predicate operandIsConsumedWithoutEscaping(Operand operand) {
      or
      // Converting an address to a `bool` does not escape the address.
      instr.(ConvertInstruction).getResultIRType() instanceof IRBooleanType
+      or
+      instr instanceof CallInstruction and
+      not exists(IREscapeAnalysisConfiguration config | config.useSoundEscapeAnalysis())
    )
  )
  or
@@ -284,14 +287,24 @@ private predicate isArgumentForParameter(
 private predicate isOnlyEscapesViaReturnArgument(Operand operand) {
  exists(AliasModels::AliasFunction f |
    f = operand.getUse().(CallInstruction).getStaticCallTarget() and
-    f.parameterEscapesOnlyViaReturn(operand.(PositionalArgumentOperand).getIndex())
+    (
+      f.parameterEscapesOnlyViaReturn(operand.(PositionalArgumentOperand).getIndex())
+      or
+      f.parameterEscapesOnlyViaReturn(-1) and
+      operand instanceof ThisArgumentOperand
+    )
  )
 }

 private predicate isNeverEscapesArgument(Operand operand) {
  exists(AliasModels::AliasFunction f |
    f = operand.getUse().(CallInstruction).getStaticCallTarget() and
-    f.parameterNeverEscapes(operand.(PositionalArgumentOperand).getIndex())
+    (
+      f.parameterNeverEscapes(operand.(PositionalArgumentOperand).getIndex())
+      or
+      f.parameterNeverEscapes(-1) and
+      operand instanceof ThisArgumentOperand
+    )
  )
 }

@@ -325,6 +338,9 @@ predicate allocationEscapes(Configuration::Allocation allocation) {
  exists(IREscapeAnalysisConfiguration config |
    config.useSoundEscapeAnalysis() and resultEscapesNonReturn(allocation.getABaseInstruction())
  )
+  or
+  Configuration::phaseNeedsSoundEscapeAnalysis() and
+  resultEscapesNonReturn(allocation.getABaseInstruction())
 }

 /**
--- a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/internal/AliasConfiguration.qll
+++ b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/internal/AliasConfiguration.qll
@@ -14,3 +14,5 @@ class Allocation extends IRAutomaticVariable {
    none()
  }
 }
+
+predicate phaseNeedsSoundEscapeAnalysis() { any() }
--- a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/internal/SSAConstruction.qll
+++ b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/internal/SSAConstruction.qll
@@ -43,24 +43,81 @@ private module Cached {
  class TStageInstruction =
    TRawInstruction or TPhiInstruction or TChiInstruction or TUnreachedInstruction;

+  /**
+   * If `oldInstruction` is a `Phi` instruction that has exactly one reachable predecessor block,
+   * this predicate returns the `PhiInputOperand` corresponding to that predecessor block.
+   * Otherwise, this predicate does not hold.
+   */
+  private OldIR::PhiInputOperand getDegeneratePhiOperand(OldInstruction oldInstruction) {
+    result =
+      unique(OldIR::PhiInputOperand operand |
+        operand = oldInstruction.(OldIR::PhiInstruction).getAnInputOperand() and
+        operand.getPredecessorBlock() instanceof OldBlock
+      )
+  }
+
  cached
  predicate hasInstruction(TStageInstruction instr) {
    instr instanceof TRawInstruction and instr instanceof OldInstruction
    or
-    instr instanceof TPhiInstruction
+    instr = phiInstruction(_, _)
+    or
+    instr = reusedPhiInstruction(_) and
+    // Check that the phi instruction is *not* degenerate, but we can't use
+    // getDegeneratePhiOperand in the first stage with phi instyructions
+    not exists(
+      unique(OldIR::PhiInputOperand operand |
+        operand = instr.(OldIR::PhiInstruction).getAnInputOperand() and
+        operand.getPredecessorBlock() instanceof OldBlock
+      )
+    )
    or
    instr instanceof TChiInstruction
    or
    instr instanceof TUnreachedInstruction
  }

-  private IRBlock getNewBlock(OldBlock oldBlock) {
-    result.getFirstInstruction() = getNewInstruction(oldBlock.getFirstInstruction())
+  cached
+  IRBlock getNewBlock(OldBlock oldBlock) {
+    exists(Instruction newEnd, OldIR::Instruction oldEnd |
+      (
+        result.getLastInstruction() = newEnd and
+        not newEnd instanceof ChiInstruction
+        or
+        newEnd = result.getLastInstruction().(ChiInstruction).getAPredecessor() // does this work?
+      ) and
+      (
+        oldBlock.getLastInstruction() = oldEnd and
+        not oldEnd instanceof OldIR::ChiInstruction
+        or
+        oldEnd = oldBlock.getLastInstruction().(OldIR::ChiInstruction).getAPredecessor() // does this work?
+      ) and
+      oldEnd = getNewInstruction(newEnd)
+    )
+  }
+
+  /**
+   * Gets the block from the old IR that corresponds to `newBlock`.
+   */
+  private OldBlock getOldBlock(IRBlock newBlock) { getNewBlock(result) = newBlock }
+
+  /**
+   * Holds if this iteration of SSA can model the def/use information for the result of
+   * `oldInstruction`, either because alias analysis has determined a memory location for that
+   * result, or because a previous iteration of the IR already computed that def/use information
+   * completely.
+   */
+  private predicate canModelResultForOldInstruction(OldInstruction oldInstruction) {
+    // We're modeling the result's memory location ourselves.
+    exists(Alias::getResultMemoryLocation(oldInstruction))
+    or
+    // This result was already modeled by a previous iteration of SSA.
+    Alias::canReuseSSAForOldResult(oldInstruction)
  }

  cached
  predicate hasModeledMemoryResult(Instruction instruction) {
-    exists(Alias::getResultMemoryLocation(getOldInstruction(instruction))) or
+    canModelResultForOldInstruction(getOldInstruction(instruction)) or
    instruction instanceof PhiInstruction or // Phis always have modeled results
    instruction instanceof ChiInstruction // Chis always have modeled results
  }
@@ -117,6 +174,32 @@ private module Cached {
    )
  }

+  /**
+   * Gets the new definition instruction for `oldOperand` based on `oldOperand`'s definition in the
+   * old IR. Usually, this will just get the old definition of `oldOperand` and map it to the
+   * corresponding new instruction. However, if the old definition of `oldOperand` is a `Phi`
+   * instruction that is now degenerate due all but one of its predecessor branches being
+   * unreachable, this predicate will recurse through any degenerate `Phi` instructions to find the
+   * true definition.
+   */
+  private Instruction getNewDefinitionFromOldSSA(OldIR::MemoryOperand oldOperand, Overlap overlap) {
+    exists(Overlap originalOverlap |
+      originalOverlap = oldOperand.getDefinitionOverlap() and
+      (
+        result = getNewInstruction(oldOperand.getAnyDef()) and
+        overlap = originalOverlap
+        or
+        exists(OldIR::PhiInputOperand phiOperand, Overlap phiOperandOverlap |
+          phiOperand = getDegeneratePhiOperand(oldOperand.getAnyDef()) and
+          result = getNewDefinitionFromOldSSA(phiOperand, phiOperandOverlap) and
+          overlap =
+            combineOverlap(pragma[only_bind_out](phiOperandOverlap),
+              pragma[only_bind_out](originalOverlap))
+        )
+      )
+    )
+  }
+
  cached
  private Instruction getMemoryOperandDefinition0(
    Instruction instruction, MemoryOperandTag tag, Overlap overlap
@@ -148,6 +231,12 @@ private module Cached {
      overlap instanceof MustExactlyOverlap and
      exists(MustTotallyOverlap o | exists(getMemoryOperandDefinition0(instruction, tag, o)))
    )
+    or
+    exists(OldIR::NonPhiMemoryOperand oldOperand |
+      result = getNewDefinitionFromOldSSA(oldOperand, overlap) and
+      oldOperand.getUse() = instruction and
+      tag = oldOperand.getOperandTag()
+    )
  }

  /**
@@ -214,10 +303,24 @@ private module Cached {
    )
  }

+  /**
+   * Gets the new definition instruction for the operand of `instr` that flows from the block
+   * `newPredecessorBlock`, based on that operand's definition in the old IR.
+   */
+  private Instruction getNewPhiOperandDefinitionFromOldSSA(
+    Instruction instr, IRBlock newPredecessorBlock, Overlap overlap
+  ) {
+    exists(OldIR::PhiInstruction oldPhi, OldIR::PhiInputOperand oldOperand |
+      oldPhi = getOldInstruction(instr) and
+      oldOperand = oldPhi.getInputOperand(getOldBlock(newPredecessorBlock)) and
+      result = getNewDefinitionFromOldSSA(oldOperand, overlap)
+    )
+  }
+
  pragma[noopt]
  cached
  Instruction getPhiOperandDefinition(
-    PhiInstruction instr, IRBlock newPredecessorBlock, Overlap overlap
+    Instruction instr, IRBlock newPredecessorBlock, Overlap overlap
  ) {
    exists(
      Alias::MemoryLocation defLocation, Alias::MemoryLocation useLocation, OldBlock phiBlock,
@@ -229,6 +332,8 @@ private module Cached {
      result = getDefinitionOrChiInstruction(defBlock, defOffset, defLocation, actualDefLocation) and
      overlap = Alias::getOverlap(actualDefLocation, useLocation)
    )
+    or
+    result = getNewPhiOperandDefinitionFromOldSSA(instr, newPredecessorBlock, overlap)
  }

  cached
@@ -249,7 +354,12 @@ private module Cached {
  cached
  Instruction getPhiInstructionBlockStart(PhiInstruction instr) {
    exists(OldBlock oldBlock |
-      instr = getPhi(oldBlock, _) and
+      (
+        instr = getPhi(oldBlock, _)
+        or
+        // Any `Phi` that we propagated from the previous iteration stays in the same block.
+        getOldInstruction(instr).getBlock() = oldBlock
+      ) and
      result = getNewInstruction(oldBlock.getFirstInstruction())
    )
  }
@@ -335,6 +445,9 @@ private module Cached {
      result = vvar.getType()
    )
    or
+    instr = reusedPhiInstruction(_) and
+    result = instr.(OldInstruction).getResultLanguageType()
+    or
    instr = unreachedInstruction(_) and result = Language::getVoidType()
  }

@@ -862,6 +975,26 @@ module DefUse {
  }
 }

+predicate canReuseSSAForMemoryResult(Instruction instruction) {
+  exists(OldInstruction oldInstruction |
+    oldInstruction = getOldInstruction(instruction) and
+    (
+      // The previous iteration said it was reusable, so we should mark it as reusable as well.
+      Alias::canReuseSSAForOldResult(oldInstruction)
+      or
+      // The current alias analysis says it is reusable.
+      Alias::getResultMemoryLocation(oldInstruction).canReuseSSA()
+    )
+  )
+  or
+  exists(Alias::MemoryLocation defLocation |
+    // This is a `Phi` for a reusable location, so the result of the `Phi` is reusable as well.
+    instruction = phiInstruction(_, defLocation) and
+    defLocation.canReuseSSA()
+  )
+  // We don't support reusing SSA for any location that could create a `Chi` instruction.
+}
+
 /**
 * Expose some of the internal predicates to PrintSSA.qll. We do this by publically importing those modules in the
 * `DebugSSA` module, which is then imported by PrintSSA.
--- a/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/internal/SimpleSSA.qll
+++ b/csharp/ql/src/experimental/ir/implementation/unaliased_ssa/internal/SimpleSSA.qll
@@ -17,7 +17,7 @@ private predicate isTotalAccess(Allocation var, AddressOperand addrOperand, IRTy
 * variable if its address never escapes and all reads and writes of that variable access the entire
 * variable using the original type of the variable.
 */
-private predicate isVariableModeled(Allocation var) {
+predicate isVariableModeled(Allocation var) {
  not allocationEscapes(var) and
  forall(Instruction instr, AddressOperand addrOperand, IRType type |
    addrOperand = instr.getResultAddressOperand() and
@@ -35,6 +35,17 @@ private predicate isVariableModeled(Allocation var) {
  )
 }

+/**
+ * Holds if the SSA use/def chain for the specified variable can be safely reused by later
+ * iterations of SSA construction. This will hold only if we modeled the variable soundly, so that
+ * subsequent iterations will recompute SSA for any variable that we assumed did not escape, but
+ * actually would have escaped if we had used a sound escape analysis.
+ */
+predicate canReuseSSAForVariable(IRAutomaticVariable var) {
+  isVariableModeled(var) and
+  not allocationEscapes(var)
+}
+
 private newtype TMemoryLocation = MkMemoryLocation(Allocation var) { isVariableModeled(var) }

 private MemoryLocation getMemoryLocation(Allocation var) { result.getAllocation() = var }
@@ -57,8 +68,12 @@ class MemoryLocation extends TMemoryLocation {
  final Language::LanguageType getType() { result = var.getLanguageType() }

  final string getUniqueId() { result = var.getUniqueId() }
+
+  final predicate canReuseSSA() { canReuseSSAForVariable(var) }
 }

+predicate canReuseSSAForOldResult(Instruction instr) { none() }
+
 /**
 * Represents a set of `MemoryLocation`s that cannot overlap with
 * `MemoryLocation`s outside of the set. The `VirtualVariable` will be
--- a/csharp/ql/src/experimental/ir/internal/Overlap.qll
+++ b/csharp/ql/src/experimental/ir/internal/Overlap.qll
@@ -8,6 +8,16 @@ private newtype TOverlap =
 */
 abstract class Overlap extends TOverlap {
  abstract string toString();
+
+  /**
+   * Gets a value representing how precise this overlap is. The higher the value, the more precise
+   * the overlap. The precision values are ordered as
+   * follows, from most to least precise:
+   * `MustExactlyOverlap`
+   * `MustTotallyOverlap`
+   * `MayPartiallyOverlap`
+   */
+  abstract int getPrecision();
 }

 /**
@@ -16,6 +26,8 @@ abstract class Overlap extends TOverlap {
 */
 class MayPartiallyOverlap extends Overlap, TMayPartiallyOverlap {
  final override string toString() { result = "MayPartiallyOverlap" }
+
+  final override int getPrecision() { result = 0 }
 }

 /**
@@ -24,6 +36,8 @@ class MayPartiallyOverlap extends Overlap, TMayPartiallyOverlap {
 */
 class MustTotallyOverlap extends Overlap, TMustTotallyOverlap {
  final override string toString() { result = "MustTotallyOverlap" }
+
+  final override int getPrecision() { result = 1 }
 }

 /**
@@ -32,4 +46,25 @@ class MustTotallyOverlap extends Overlap, TMustTotallyOverlap {
 */
 class MustExactlyOverlap extends Overlap, TMustExactlyOverlap {
  final override string toString() { result = "MustExactlyOverlap" }
+
+  final override int getPrecision() { result = 2 }
+}
+
+/**
+ * Gets the `Overlap` that best represents the relationship between two memory locations `a` and
+ * `c`, where `getOverlap(a, b) = previousOverlap` and `getOverlap(b, c) = newOverlap`, for some
+ * intermediate memory location `b`.
+ */
+Overlap combineOverlap(Overlap previousOverlap, Overlap newOverlap) {
+  // Note that it's possible that two less precise overlaps could combine to result in a more
+  // precise overlap. For example, both `previousOverlap` and `newOverlap` could be
+  // `MustTotallyOverlap` even though the actual relationship between `a` and `c` is
+  // `MustExactlyOverlap`. We will still return `MustTotallyOverlap` as the best conservative
+  // approximation we can make without additional input information.
+  result =
+    min(Overlap overlap |
+      overlap = [previousOverlap, newOverlap]
+    |
+      overlap order by overlap.getPrecision()
+    )
 }