upgrading to 2.17.5

2025-12-22 03:36:30 +01:00 · 2024-06-12 12:35:56 -07:00
parent cccbdf25c7 a160b891c8
commit c01daaa40e
1057 changed files with 21594 additions and 15634 deletions
--- a/cpp/ql/lib/semmle/code/cpp/Variable.qll
+++ b/cpp/ql/lib/semmle/code/cpp/Variable.qll
@@ -410,6 +410,10 @@ class LocalVariable extends LocalScopeVariable, @localvariable {
    or
    orphaned_variables(underlyingElement(this), unresolveElement(result))
  }
+
+  override predicate isStatic() {
+    super.isStatic() or orphaned_variables(underlyingElement(this), _)
+  }
 }

 /**
--- a/cpp/ql/lib/semmle/code/cpp/controlflow/IRGuards.qll
+++ b/cpp/ql/lib/semmle/code/cpp/controlflow/IRGuards.qll
@@ -375,6 +375,33 @@ cached
 class IRGuardCondition extends Instruction {
  Instruction branch;

+  /*
+   * An `IRGuardCondition` supports reasoning about four different kinds of
+   * relations:
+   * 1. A unary equality relation of the form `e == k`
+   * 2. A binary equality relation of the form `e1 == e2 + k`
+   * 3. A unary inequality relation of the form `e < k`
+   * 4. A binary inequality relation of the form `e1 < e2 + k`
+   *
+   * where `k` is a constant.
+   *
+   * Furthermore, the unary relations (i.e., case 1 and case 3) are also
+   * inferred from `switch` statement guards: equality relations are inferred
+   * from the unique `case` statement, if any, and inequality relations are
+   * inferred from the [case range](https://gcc.gnu.org/onlinedocs/gcc/Case-Ranges.html)
+   * gcc extension.
+   *
+   * The implementation of all four follows the same structure: Each relation
+   * has a cached user-facing predicate that. For example,
+   * `GuardCondition::comparesEq` calls `compares_eq`. This predicate has
+   * several cases that recursively decompose the relation to bring it to a
+   * canonical form (i.e., a relation of the form `e1 == e2 + k`). The base
+   * case for this relation (i.e., `simple_comparison_eq`) handles
+   * `CompareEQInstruction`s and `CompareNEInstruction`, and recursive
+   * predicates (e.g., `complex_eq`) rewrites larger expressions such as
+   * `e1 + k1 == e2 + k2` into canonical the form `e1 == e2 + (k2 - k1)`.
+   */
+
  cached
  IRGuardCondition() { branch = getBranchForCondition(this) }

@@ -776,7 +803,9 @@ private predicate unary_compares_eq(
  Instruction test, Operand op, int k, boolean areEqual, boolean inNonZeroCase, AbstractValue value
 ) {
  /* The simple case where the test *is* the comparison so areEqual = testIsTrue xor eq. */
-  exists(AbstractValue v | unary_simple_comparison_eq(test, op, k, inNonZeroCase, v) |
+  exists(AbstractValue v |
+    unary_simple_comparison_eq(test, k, inNonZeroCase, v) and op.getDef() = test
+  |
    areEqual = true and value = v
    or
    areEqual = false and value = v.getDualValue()
@@ -821,45 +850,55 @@ private predicate simple_comparison_eq(
  value.(BooleanValue).getValue() = false
 }

-/**
- * Holds if `test` is an instruction that is part of test that eventually is
- * used in a conditional branch.
- */
-private predicate relevantUnaryComparison(Instruction test) {
-  not test instanceof CompareInstruction and
-  exists(IRType type, ConditionalBranchInstruction branch |
-    type instanceof IRAddressType or type instanceof IRIntegerType
-  |
-    type = test.getResultIRType() and
-    branch.getCondition() = test
-  )
-  or
-  exists(LogicalNotInstruction logicalNot |
-    relevantUnaryComparison(logicalNot) and
-    test = logicalNot.getUnary()
-  )
-}
-
 /**
 * Rearrange various simple comparisons into `op == k` form.
 */
 private predicate unary_simple_comparison_eq(
-  Instruction test, Operand op, int k, boolean inNonZeroCase, AbstractValue value
+  Instruction test, int k, boolean inNonZeroCase, AbstractValue value
 ) {
  exists(SwitchInstruction switch, CaseEdge case |
    test = switch.getExpression() and
-    op.getDef() = test and
    case = value.(MatchValue).getCase() and
    exists(switch.getSuccessor(case)) and
    case.getValue().toInt() = k and
    inNonZeroCase = false
  )
  or
-  // There's no implicit CompareInstruction in files compiled as C since C
-  // doesn't have implicit boolean conversions. So instead we check whether
-  // there's a branch on a value of pointer or integer type.
-  relevantUnaryComparison(test) and
-  op.getDef() = test and
+  // Any instruction with an integral type could potentially be part of a
+  // check for nullness when used in a guard. So we include all integral
+  // typed instructions here. However, since some of these instructions are
+  // already included as guards in other cases, we exclude those here.
+  // These are instructions that compute a binary equality or inequality
+  // relation. For example, the following:
+  // ```cpp
+  // if(a == b + 42) { ... }
+  // ```
+  // generates the following IR:
+  // ```
+  // r1(glval<int>) = VariableAddress[a]     :
+  // r2(int)        = Load[a]                : &:r1, m1
+  // r3(glval<int>) = VariableAddress[b]     :
+  // r4(int)        = Load[b]                : &:r3, m2
+  // r5(int)        = Constant[42]           :
+  // r6(int)        = Add                    : r4, r5
+  // r7(bool)       = CompareEQ              : r2, r6
+  // v1(void)       = ConditionalBranch      : r7
+  // ```
+  // and since `r7` is an integral typed instruction this predicate could
+  // include a case for when `r7` evaluates to true (in which case we would
+  // infer that `r6` was non-zero, and a case for when `r7` evaluates to false
+  // (in which case we would infer that `r6` was zero).
+  // However, since `a == b + 42` is already supported when reasoning about
+  // binary equalities we exclude those cases here.
+  not test.isGLValue() and
+  not simple_comparison_eq(test, _, _, _, _) and
+  not simple_comparison_lt(test, _, _, _) and
+  not test = any(SwitchInstruction switch).getExpression() and
+  (
+    test.getResultIRType() instanceof IRAddressType or
+    test.getResultIRType() instanceof IRIntegerType or
+    test.getResultIRType() instanceof IRBooleanType
+  ) and
  (
    k = 1 and
    value.(BooleanValue).getValue() = true and
@@ -913,7 +952,8 @@ private predicate compares_lt(

 /** Holds if `op < k` evaluates to `isLt` given that `test` evaluates to `value`. */
 private predicate compares_lt(Instruction test, Operand op, int k, boolean isLt, AbstractValue value) {
-  simple_comparison_lt(test, op, k, isLt, value)
+  unary_simple_comparison_lt(test, k, isLt, value) and
+  op.getDef() = test
  or
  complex_lt(test, op, k, isLt, value)
  or
@@ -960,12 +1000,11 @@ private predicate simple_comparison_lt(CompareInstruction cmp, Operand left, Ope
 }

 /** Rearrange various simple comparisons into `op < k` form. */
-private predicate simple_comparison_lt(
-  Instruction test, Operand op, int k, boolean isLt, AbstractValue value
+private predicate unary_simple_comparison_lt(
+  Instruction test, int k, boolean isLt, AbstractValue value
 ) {
  exists(SwitchInstruction switch, CaseEdge case |
    test = switch.getExpression() and
-    op.getDef() = test and
    case = value.(MatchValue).getCase() and
    exists(switch.getSuccessor(case)) and
    case.getMaxValue() > case.getMinValue()
--- a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll
+++ b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll
@@ -78,6 +78,7 @@ private import internal.FlowSummaryImpl
 private import internal.FlowSummaryImpl::Public
 private import internal.FlowSummaryImpl::Private
 private import internal.FlowSummaryImpl::Private::External
+private import internal.ExternalFlowExtensions as Extensions
 private import codeql.mad.ModelValidation as SharedModelVal
 private import codeql.util.Unit

@@ -138,6 +139,9 @@ predicate sourceModel(
    row.splitAt(";", 7) = kind
  ) and
  provenance = "manual"
+  or
+  Extensions::sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance,
+    _)
 }

 /** Holds if a sink model exists for the given parameters. */
@@ -158,6 +162,8 @@ predicate sinkModel(
    row.splitAt(";", 7) = kind
  ) and
  provenance = "manual"
+  or
+  Extensions::sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance, _)
 }

 /** Holds if a summary model exists for the given parameters. */
@@ -179,6 +185,9 @@ predicate summaryModel(
    row.splitAt(";", 8) = kind
  ) and
  provenance = "manual"
+  or
+  Extensions::summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind,
+    provenance, _)
 }

 private predicate relevantNamespace(string namespace) {
@@ -203,8 +212,10 @@ private predicate canonicalNamespaceLink(string namespace, string subns) {
 }

 /**
- * Holds if CSV framework coverage of `namespace` is `n` api endpoints of the
- * kind `(kind, part)`.
+ * Holds if MaD framework coverage of `namespace` is `n` api endpoints of the
+ * kind `(kind, part)`, and `namespaces` is the number of subnamespaces of
+ * `namespace` which have MaD framework coverage (including `namespace`
+ * itself).
 */
 predicate modelCoverage(string namespace, int namespaces, string kind, string part, int n) {
  namespaces = strictcount(string subns | canonicalNamespaceLink(namespace, subns)) and
@@ -321,10 +332,10 @@ module CsvValidation {
      or
      summaryModel(namespace, type, _, name, signature, ext, _, _, _, _) and pred = "summary"
    |
-      not namespace.regexpMatch("[a-zA-Z0-9_\\.]+") and
+      not namespace.regexpMatch("[a-zA-Z0-9_\\.:]*") and
      result = "Dubious namespace \"" + namespace + "\" in " + pred + " model."
      or
-      not type.regexpMatch("[a-zA-Z0-9_<>,\\+]+") and
+      not type.regexpMatch("[a-zA-Z0-9_<>,\\+]*") and
      result = "Dubious type \"" + type + "\" in " + pred + " model."
      or
      not name.regexpMatch("[a-zA-Z0-9_<>,]*") and
--- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll
+++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll
@@ -0,0 +1,27 @@
+/**
+ * This module provides extensible predicates for defining MaD models.
+ */
+
+/**
+ * Holds if an external source model exists for the given parameters.
+ */
+extensible predicate sourceModel(
+  string namespace, string type, boolean subtypes, string name, string signature, string ext,
+  string output, string kind, string provenance, QlBuiltins::ExtensionId madId
+);
+
+/**
+ * Holds if an external sink model exists for the given parameters.
+ */
+extensible predicate sinkModel(
+  string namespace, string type, boolean subtypes, string name, string signature, string ext,
+  string input, string kind, string provenance, QlBuiltins::ExtensionId madId
+);
+
+/**
+ * Holds if an external summary model exists for the given parameters.
+ */
+extensible predicate summaryModel(
+  string namespace, string type, boolean subtypes, string name, string signature, string ext,
+  string input, string output, string kind, string provenance, QlBuiltins::ExtensionId madId
+);
--- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll
+++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll
@@ -1343,6 +1343,8 @@ predicate nodeIsHidden(Node n) {
  n instanceof FinalGlobalValue
  or
  n instanceof InitialGlobalValue
+  or
+  n instanceof SsaPhiInputNode
 }

 predicate neverSkipInPathGraph(Node n) {
@@ -1641,6 +1643,8 @@ private Instruction getAnInstruction(Node n) {
  or
  result = n.(SsaPhiNode).getPhiNode().getBasicBlock().getFirstInstruction()
  or
+  result = n.(SsaPhiInputNode).getBasicBlock().getFirstInstruction()
+  or
  n.(IndirectInstruction).hasInstructionAndIndirectionIndex(result, _)
  or
  not n instanceof IndirectInstruction and
@@ -1770,7 +1774,7 @@ module IteratorFlow {
        crementCall = def.getValue().asInstruction().(StoreInstruction).getSourceValue() and
        sv = def.getSourceVariable() and
        bb.getInstruction(i) = crementCall and
-        Ssa::ssaDefReachesRead(sv, result.asDef(), bb, i)
+        Ssa::ssaDefReachesReadExt(sv, result.asDef(), bb, i)
      )
    }

@@ -1804,7 +1808,7 @@ module IteratorFlow {
        isIteratorWrite(writeToDeref, address) and
        operandForFullyConvertedCall(address, starCall) and
        bbStar.getInstruction(iStar) = starCall and
-        Ssa::ssaDefReachesRead(_, def.asDef(), bbStar, iStar) and
+        Ssa::ssaDefReachesReadExt(_, def.asDef(), bbStar, iStar) and
        ultimate = getAnUltimateDefinition*(def) and
        beginStore = ultimate.getValue().asInstruction() and
        operandForFullyConvertedCall(beginStore.getSourceValueOperand(), beginCall)
--- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll
+++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll
@@ -45,6 +45,7 @@ private newtype TIRDataFlowNode =
    or
    Ssa::isModifiableByCall(operand, indirectionIndex)
  } or
+  TSsaPhiInputNode(Ssa::PhiNode phi, IRBlock input) { phi.hasInputFromBlock(_, _, _, _, input) } or
  TSsaPhiNode(Ssa::PhiNode phi) or
  TSsaIteratorNode(IteratorFlow::IteratorFlowNode n) or
  TRawIndirectOperand0(Node0Impl node, int indirectionIndex) {
@@ -114,6 +115,13 @@ predicate conversionFlow(
      instrTo.(CheckedConvertOrNullInstruction).getUnaryOperand() = opFrom
      or
      instrTo.(InheritanceConversionInstruction).getUnaryOperand() = opFrom
+      or
+      exists(BuiltInInstruction builtIn |
+        builtIn = instrTo and
+        // __builtin_bit_cast
+        builtIn.getBuiltInOperation() instanceof BuiltInBitCast and
+        opFrom = builtIn.getAnOperand()
+      )
    )
    or
    additional = true and
@@ -158,6 +166,12 @@ class Node extends TIRDataFlowNode {
  /** Gets the operands corresponding to this node, if any. */
  Operand asOperand() { result = this.(OperandNode).getOperand() }

+  /**
+   * Gets the operand that is indirectly tracked by this node behind `index`
+   * number of indirections.
+   */
+  Operand asIndirectOperand(int index) { hasOperandAndIndex(this, result, index) }
+
  /**
   * Holds if this node is at index `i` in basic block `block`.
   *
@@ -170,6 +184,9 @@ class Node extends TIRDataFlowNode {
    or
    this.(SsaPhiNode).getPhiNode().getBasicBlock() = block and i = -1
    or
+    this.(SsaPhiInputNode).getBlock() = block and
+    i = block.getInstructionCount()
+    or
    this.(RawIndirectOperand).getOperand().getUse() = block.getInstruction(i)
    or
    this.(RawIndirectInstruction).getInstruction() = block.getInstruction(i)
@@ -622,7 +639,7 @@ class SsaPhiNode extends Node, TSsaPhiNode {

  final override Location getLocationImpl() { result = phi.getBasicBlock().getLocation() }

-  override string toStringImpl() { result = "Phi" }
+  override string toStringImpl() { result = phi.toString() }

  /**
   * Gets a node that is used as input to this phi node.
@@ -631,7 +648,7 @@ class SsaPhiNode extends Node, TSsaPhiNode {
   */
  cached
  final Node getAnInput(boolean fromBackEdge) {
-    localFlowStep(result, this) and
+    result.(SsaPhiInputNode).getPhiNode() = phi and
    exists(IRBlock bPhi, IRBlock bResult |
      bPhi = phi.getBasicBlock() and bResult = result.getBasicBlock()
    |
@@ -654,6 +671,58 @@ class SsaPhiNode extends Node, TSsaPhiNode {
  predicate isPhiRead() { phi.isPhiRead() }
 }

+/**
+ * INTERNAL: Do not use.
+ *
+ * A node that is used as an input to a phi node.
+ *
+ * This class exists to allow more powerful barrier guards. Consider this
+ * example:
+ *
+ * ```cpp
+ * int x = source();
+ * if(!safe(x)) {
+ *   x = clear();
+ * }
+ * // phi node for x here
+ * sink(x);
+ * ```
+ *
+ * At the phi node for `x` it is neither the case that `x` is dominated by
+ * `safe(x)`, or is the case that the phi is dominated by a clearing of `x`.
+ *
+ * By inserting a "phi input" node as the last entry in the basic block that
+ * defines the inputs to the phi we can conclude that each of those inputs are
+ * safe to pass to `sink`.
+ */
+class SsaPhiInputNode extends Node, TSsaPhiInputNode {
+  Ssa::PhiNode phi;
+  IRBlock block;
+
+  SsaPhiInputNode() { this = TSsaPhiInputNode(phi, block) }
+
+  /** Gets the phi node associated with this node. */
+  Ssa::PhiNode getPhiNode() { result = phi }
+
+  /** Gets the basic block in which this input originates. */
+  IRBlock getBlock() { result = block }
+
+  override Declaration getEnclosingCallable() { result = this.getFunction() }
+
+  override Declaration getFunction() { result = phi.getBasicBlock().getEnclosingFunction() }
+
+  override DataFlowType getType() { result = this.getSourceVariable().getType() }
+
+  override predicate isGLValue() { phi.getSourceVariable().isGLValue() }
+
+  final override Location getLocationImpl() { result = block.getLastInstruction().getLocation() }
+
+  override string toStringImpl() { result = "Phi input" }
+
+  /** Gets the source variable underlying this phi node. */
+  Ssa::SourceVariable getSourceVariable() { result = phi.getSourceVariable() }
+}
+
 /**
 * INTERNAL: do not use.
 *
@@ -2176,6 +2245,9 @@ private module Cached {
      // Def-use/Use-use flow
      Ssa::ssaFlow(nodeFrom, nodeTo)
      or
+      // Phi input -> Phi
+      nodeFrom.(SsaPhiInputNode).getPhiNode() = nodeTo.(SsaPhiNode).getPhiNode()
+      or
      IteratorFlow::localFlowStep(nodeFrom, nodeTo)
      or
      // Operand -> Instruction flow
@@ -2614,6 +2686,22 @@ class ContentSet instanceof Content {
  }
 }

+pragma[nomagic]
+private predicate guardControlsPhiInput(
+  IRGuardCondition g, boolean branch, Ssa::Definition def, IRBlock input, Ssa::PhiNode phi
+) {
+  phi.hasInputFromBlock(def, _, _, _, input) and
+  (
+    g.controls(input, branch)
+    or
+    exists(EdgeKind kind |
+      g.getBlock() = input and
+      kind = getConditionalEdge(branch) and
+      input.getSuccessor(kind) = phi.getBasicBlock()
+    )
+  )
+}
+
 /**
 * Holds if the guard `g` validates the expression `e` upon evaluating to `branch`.
 *
@@ -2662,13 +2750,21 @@ module BarrierGuard<guardChecksSig/3 guardChecks> {
   *
   * NOTE: If an indirect expression is tracked, use `getAnIndirectBarrierNode` instead.
   */
-  ExprNode getABarrierNode() {
+  Node getABarrierNode() {
    exists(IRGuardCondition g, Expr e, ValueNumber value, boolean edge |
      e = value.getAnInstruction().getConvertedResultExpression() and
-      result.getConvertedExpr() = e and
+      result.asConvertedExpr() = e and
      guardChecks(g, value.getAnInstruction().getConvertedResultExpression(), edge) and
      g.controls(result.getBasicBlock(), edge)
    )
+    or
+    exists(
+      IRGuardCondition g, boolean branch, Ssa::DefinitionExt def, IRBlock input, Ssa::PhiNode phi
+    |
+      guardChecks(g, def.getARead().asOperand().getDef().getConvertedResultExpression(), branch) and
+      guardControlsPhiInput(g, branch, def, input, phi) and
+      result = TSsaPhiInputNode(phi, input)
+    )
  }

  /**
@@ -2704,7 +2800,7 @@ module BarrierGuard<guardChecksSig/3 guardChecks> {
   *
   * NOTE: If a non-indirect expression is tracked, use `getABarrierNode` instead.
   */
-  IndirectExprNode getAnIndirectBarrierNode() { result = getAnIndirectBarrierNode(_) }
+  Node getAnIndirectBarrierNode() { result = getAnIndirectBarrierNode(_) }

  /**
   * Gets an indirect expression node with indirection index `indirectionIndex` that is
@@ -2740,13 +2836,23 @@ module BarrierGuard<guardChecksSig/3 guardChecks> {
   *
   * NOTE: If a non-indirect expression is tracked, use `getABarrierNode` instead.
   */
-  IndirectExprNode getAnIndirectBarrierNode(int indirectionIndex) {
+  Node getAnIndirectBarrierNode(int indirectionIndex) {
    exists(IRGuardCondition g, Expr e, ValueNumber value, boolean edge |
      e = value.getAnInstruction().getConvertedResultExpression() and
-      result.getConvertedExpr(indirectionIndex) = e and
+      result.asIndirectConvertedExpr(indirectionIndex) = e and
      guardChecks(g, value.getAnInstruction().getConvertedResultExpression(), edge) and
      g.controls(result.getBasicBlock(), edge)
    )
+    or
+    exists(
+      IRGuardCondition g, boolean branch, Ssa::DefinitionExt def, IRBlock input, Ssa::PhiNode phi
+    |
+      guardChecks(g,
+        def.getARead().asIndirectOperand(indirectionIndex).getDef().getConvertedResultExpression(),
+        branch) and
+      guardControlsPhiInput(g, branch, def, input, phi) and
+      result = TSsaPhiInputNode(phi, input)
+    )
  }
 }

@@ -2755,6 +2861,14 @@ module BarrierGuard<guardChecksSig/3 guardChecks> {
 */
 signature predicate instructionGuardChecksSig(IRGuardCondition g, Instruction instr, boolean branch);

+private EdgeKind getConditionalEdge(boolean branch) {
+  branch = true and
+  result instanceof TrueEdge
+  or
+  branch = false and
+  result instanceof FalseEdge
+}
+
 /**
 * Provides a set of barrier nodes for a guard that validates an instruction.
 *
@@ -2763,12 +2877,20 @@ signature predicate instructionGuardChecksSig(IRGuardCondition g, Instruction in
 */
 module InstructionBarrierGuard<instructionGuardChecksSig/3 instructionGuardChecks> {
  /** Gets a node that is safely guarded by the given guard check. */
-  ExprNode getABarrierNode() {
+  Node getABarrierNode() {
    exists(IRGuardCondition g, ValueNumber value, boolean edge, Operand use |
      instructionGuardChecks(g, value.getAnInstruction(), edge) and
      use = value.getAnInstruction().getAUse() and
      result.asOperand() = use and
-      g.controls(use.getDef().getBlock(), edge)
+      g.controls(result.getBasicBlock(), edge)
+    )
+    or
+    exists(
+      IRGuardCondition g, boolean branch, Ssa::DefinitionExt def, IRBlock input, Ssa::PhiNode phi
+    |
+      instructionGuardChecks(g, def.getARead().asOperand().getDef(), branch) and
+      guardControlsPhiInput(g, branch, def, input, phi) and
+      result = TSsaPhiInputNode(phi, input)
    )
  }
 }
--- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll
+++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll
@@ -657,19 +657,9 @@ class GlobalDefImpl extends DefImpl, TGlobalDefImpl {
 */
 predicate adjacentDefRead(IRBlock bb1, int i1, SourceVariable sv, IRBlock bb2, int i2) {
  adjacentDefReadExt(_, sv, bb1, i1, bb2, i2)
-  or
-  exists(PhiNode phi |
-    lastRefRedefExt(_, sv, bb1, i1, phi) and
-    phi.definesAt(sv, bb2, i2, _)
-  )
 }

 predicate useToNode(IRBlock bb, int i, SourceVariable sv, Node nodeTo) {
-  exists(Phi phi |
-    phi.asPhi().definesAt(sv, bb, i, _) and
-    nodeTo = phi.getNode()
-  )
-  or
  exists(UseImpl use |
    use.hasIndexInBlock(bb, i, sv) and
    nodeTo = use.getNode()
@@ -723,46 +713,26 @@ predicate nodeToDefOrUse(Node node, SourceVariable sv, IRBlock bb, int i, boolea
 */
 private predicate indirectConversionFlowStep(Node nFrom, Node nTo) {
  not exists(SourceVariable sv, IRBlock bb2, int i2 |
-    nodeToDefOrUse(nTo, sv, bb2, i2, _) and
+    useToNode(bb2, i2, sv, nTo) and
    adjacentDefRead(bb2, i2, sv, _, _)
  ) and
-  (
-    exists(Operand op1, Operand op2, int indirectionIndex, Instruction instr |
-      hasOperandAndIndex(nFrom, op1, pragma[only_bind_into](indirectionIndex)) and
-      hasOperandAndIndex(nTo, op2, pragma[only_bind_into](indirectionIndex)) and
-      instr = op2.getDef() and
-      conversionFlow(op1, instr, _, _)
-    )
-    or
-    exists(Operand op1, Operand op2, int indirectionIndex, Instruction instr |
-      hasOperandAndIndex(nFrom, op1, pragma[only_bind_into](indirectionIndex)) and
-      hasOperandAndIndex(nTo, op2, indirectionIndex - 1) and
-      instr = op2.getDef() and
-      isDereference(instr, op1, _)
-    )
+  exists(Operand op1, Operand op2, int indirectionIndex, Instruction instr |
+    hasOperandAndIndex(nFrom, op1, pragma[only_bind_into](indirectionIndex)) and
+    hasOperandAndIndex(nTo, op2, pragma[only_bind_into](indirectionIndex)) and
+    instr = op2.getDef() and
+    conversionFlow(op1, instr, _, _)
  )
 }

 /**
- * The reason for this predicate is a bit annoying:
- * We cannot mark a `PointerArithmeticInstruction` that computes an offset based on some SSA
- * variable `x` as a use of `x` since this creates taint-flow in the following example:
- * ```c
- * int x = array[source]
- * sink(*array)
- * ```
- * This is because `source` would flow from the operand of `PointerArithmeticInstruction` to the
- * result of the instruction, and into the `IndirectOperand` that represents the value of `*array`.
- * Then, via use-use flow, flow will arrive at `*array` in `sink(*array)`.
- *
- * So this predicate recurses back along conversions and `PointerArithmeticInstruction`s to find the
- * first use that has provides use-use flow, and uses that target as the target of the `nodeFrom`.
+ * Holds if `node` is a phi input node that should receive flow from the
+ * definition to (or use of) `sv` at `(bb1, i1)`.
 */
-private predicate adjustForPointerArith(PostUpdateNode pun, SourceVariable sv, IRBlock bb2, int i2) {
-  exists(IRBlock bb1, int i1, Node adjusted |
-    indirectConversionFlowStep*(adjusted, pun.getPreUpdateNode()) and
-    nodeToDefOrUse(adjusted, sv, bb1, i1, _) and
-    adjacentDefRead(bb1, i1, sv, bb2, i2)
+private predicate phiToNode(SsaPhiInputNode node, SourceVariable sv, IRBlock bb1, int i1) {
+  exists(PhiNode phi, IRBlock input |
+    phi.hasInputFromBlock(_, sv, bb1, i1, input) and
+    node.getPhiNode() = phi and
+    node.getBlock() = input
  )
 }

@@ -777,10 +747,14 @@ private predicate adjustForPointerArith(PostUpdateNode pun, SourceVariable sv, I
 private predicate ssaFlowImpl(
  IRBlock bb1, int i1, SourceVariable sv, Node nodeFrom, Node nodeTo, boolean uncertain
 ) {
-  exists(IRBlock bb2, int i2 |
-    nodeToDefOrUse(nodeFrom, sv, bb1, i1, uncertain) and
-    adjacentDefRead(bb1, i1, sv, bb2, i2) and
-    useToNode(bb2, i2, sv, nodeTo)
+  nodeToDefOrUse(nodeFrom, sv, bb1, i1, uncertain) and
+  (
+    exists(IRBlock bb2, int i2 |
+      adjacentDefRead(bb1, i1, sv, bb2, i2) and
+      useToNode(bb2, i2, sv, nodeTo)
+    )
+    or
+    phiToNode(nodeTo, sv, bb1, i1)
  ) and
  nodeFrom != nodeTo
 }
@@ -789,7 +763,7 @@ private predicate ssaFlowImpl(
 private Node getAPriorDefinition(DefinitionExt next) {
  exists(IRBlock bb, int i, SourceVariable sv |
    lastRefRedefExt(_, pragma[only_bind_into](sv), pragma[only_bind_into](bb),
-      pragma[only_bind_into](i), next) and
+      pragma[only_bind_into](i), _, next) and
    nodeToDefOrUse(result, sv, bb, i, _)
  )
 }
@@ -896,9 +870,31 @@ private predicate isArgumentOfCallable(DataFlowCall call, Node n) {
 * Holds if there is use-use flow from `pun`'s pre-update node to `n`.
 */
 private predicate postUpdateNodeToFirstUse(PostUpdateNode pun, Node n) {
-  exists(SourceVariable sv, IRBlock bb2, int i2 |
-    adjustForPointerArith(pun, sv, bb2, i2) and
-    useToNode(bb2, i2, sv, n)
+  // We cannot mark a `PointerArithmeticInstruction` that computes an offset
+  // based on some SSA
+  // variable `x` as a use of `x` since this creates taint-flow in the
+  // following example:
+  // ```c
+  // int x = array[source]
+  // sink(*array)
+  // ```
+  // This is because `source` would flow from the operand of `PointerArithmetic`
+  // instruction to the result of the instruction, and into the `IndirectOperand`
+  // that represents the value of `*array`. Then, via use-use flow, flow will
+  // arrive at `*array` in `sink(*array)`.
+  // So this predicate recurses back along conversions and `PointerArithmetic`
+  // instructions to find the first use that has provides use-use flow, and
+  // uses that target as the target of the `nodeFrom`.
+  exists(Node adjusted, IRBlock bb1, int i1, SourceVariable sv |
+    indirectConversionFlowStep*(adjusted, pun.getPreUpdateNode()) and
+    useToNode(bb1, i1, sv, adjusted)
+  |
+    exists(IRBlock bb2, int i2 |
+      adjacentDefRead(bb1, i1, sv, bb2, i2) and
+      useToNode(bb2, i2, sv, n)
+    )
+    or
+    phiToNode(n, sv, bb1, i1)
  )
 }

@@ -953,11 +949,16 @@ predicate postUpdateFlow(PostUpdateNode pun, Node nodeTo) {

 /** Holds if `nodeTo` receives flow from the phi node `nodeFrom`. */
 predicate fromPhiNode(SsaPhiNode nodeFrom, Node nodeTo) {
-  exists(PhiNode phi, SourceVariable sv, IRBlock bb1, int i1, IRBlock bb2, int i2 |
+  exists(PhiNode phi, SourceVariable sv, IRBlock bb1, int i1 |
    phi = nodeFrom.getPhiNode() and
-    phi.definesAt(sv, bb1, i1, _) and
-    adjacentDefRead(bb1, i1, sv, bb2, i2) and
-    useToNode(bb2, i2, sv, nodeTo)
+    phi.definesAt(sv, bb1, i1, _)
+  |
+    exists(IRBlock bb2, int i2 |
+      adjacentDefRead(bb1, i1, sv, bb2, i2) and
+      useToNode(bb2, i2, sv, nodeTo)
+    )
+    or
+    phiToNode(nodeTo, sv, bb1, i1)
  )
 }

@@ -1031,22 +1032,26 @@ module SsaCached {
   * Holds if the node at index `i` in `bb` is a last reference to SSA definition
   * `def`. The reference is last because it can reach another write `next`,
   * without passing through another read or write.
+   *
+   * The path from node `i` in `bb` to `next` goes via basic block `input`,
+   * which is either a predecessor of the basic block of `next`, or `input` =
+   * `bb` in case `next` occurs in basic block `bb`.
   */
  cached
  predicate lastRefRedefExt(
-    DefinitionExt def, SourceVariable sv, IRBlock bb, int i, DefinitionExt next
+    DefinitionExt def, SourceVariable sv, IRBlock bb, int i, IRBlock input, DefinitionExt next
  ) {
-    SsaImpl::lastRefRedefExt(def, sv, bb, i, next)
+    SsaImpl::lastRefRedefExt(def, sv, bb, i, input, next)
  }

  cached
-  Definition phiHasInputFromBlock(PhiNode phi, IRBlock bb) {
-    SsaImpl::phiHasInputFromBlock(phi, result, bb)
+  Definition phiHasInputFromBlockExt(PhiNode phi, IRBlock bb) {
+    SsaImpl::phiHasInputFromBlockExt(phi, result, bb)
  }

  cached
-  predicate ssaDefReachesRead(SourceVariable v, Definition def, IRBlock bb, int i) {
-    SsaImpl::ssaDefReachesRead(v, def, bb, i)
+  predicate ssaDefReachesReadExt(SourceVariable v, DefinitionExt def, IRBlock bb, int i) {
+    SsaImpl::ssaDefReachesReadExt(v, def, bb, i)
  }

  predicate variableRead = SsaInput::variableRead/4;
@@ -1198,11 +1203,11 @@ class Phi extends TPhi, SsaDef {

  final override Location getLocation() { result = phi.getBasicBlock().getLocation() }

-  override string toString() { result = "Phi" }
+  override string toString() { result = phi.toString() }

-  SsaPhiNode getNode() { result.getPhiNode() = phi }
+  SsaPhiInputNode getNode(IRBlock block) { result.getPhiNode() = phi and result.getBlock() = block }

-  predicate hasInputFromBlock(Definition inp, IRBlock bb) { inp = phiHasInputFromBlock(phi, bb) }
+  predicate hasInputFromBlock(Definition inp, IRBlock bb) { inp = phiHasInputFromBlockExt(phi, bb) }

  final Definition getAnInput() { this.hasInputFromBlock(result, _) }
 }
@@ -1228,13 +1233,21 @@ class PhiNode extends SsaImpl::DefinitionExt {
   */
  predicate isPhiRead() { this instanceof SsaImpl::PhiReadNode }

-  /** Holds if `inp` is an input to this phi node along the edge originating in `bb`. */
-  predicate hasInputFromBlock(Definition inp, IRBlock bb) {
-    inp = SsaCached::phiHasInputFromBlock(this, bb)
+  /**
+   * Holds if the node at index `i` in `bb` is a last reference to SSA
+   * definition `def` of `sv`. The reference is last because it can reach
+   * this phi node, without passing through another read or write.
+   *
+   * The path from node `i` in `bb` to this phi node goes via basic block
+   * `input`, which is either a predecessor of the basic block of this phi
+   * node, or `input` = `bb` in case this phi node occurs in basic block `bb`.
+   */
+  predicate hasInputFromBlock(DefinitionExt def, SourceVariable sv, IRBlock bb, int i, IRBlock input) {
+    SsaCached::lastRefRedefExt(def, sv, bb, i, input, this)
  }

  /** Gets a definition that is an input to this phi node. */
-  final Definition getAnInput() { this.hasInputFromBlock(result, _) }
+  final Definition getAnInput() { this.hasInputFromBlock(result, _, _, _, _) }
 }

 /** An static single assignment (SSA) definition. */
@@ -1249,6 +1262,15 @@ class DefinitionExt extends SsaImpl::DefinitionExt {
    result = this.getAPhiInputOrPriorDefinition*() and
    not result instanceof PhiNode
  }
+
+  /** Gets a node that represents a read of this SSA definition. */
+  Node getARead() {
+    exists(SourceVariable sv, IRBlock bb, int i | SsaCached::ssaDefReachesReadExt(sv, this, bb, i) |
+      useToNode(bb, i, sv, result)
+      or
+      phiToNode(result, sv, bb, i)
+    )
+  }
 }

 class Definition = SsaImpl::Definition;
--- a/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedExpr.qll
+++ b/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/TranslatedExpr.qll
@@ -3208,9 +3208,20 @@ class TranslatedBuiltInOperation extends TranslatedNonConstantExpr {

  final override Instruction getResult() { result = this.getInstruction(OnlyInstructionTag()) }

+  /**
+   * Gets the rnk'th (0-indexed) child for which a `TranslatedElement` exists.
+   *
+   * We use this predicate to filter out `TypeName` expressions that sometimes
+   * occur in builtin operations since the IR doesn't have an instruction to
+   * represent a reference to a type.
+   */
+  private TranslatedElement getRankedChild(int rnk) {
+    result = rank[rnk + 1](int id, TranslatedElement te | te = this.getChild(id) | te order by id)
+  }
+
  final override Instruction getFirstInstruction(EdgeKind kind) {
-    if exists(this.getChild(0))
-    then result = this.getChild(0).getFirstInstruction(kind)
+    if exists(this.getRankedChild(0))
+    then result = this.getRankedChild(0).getFirstInstruction(kind)
    else (
      kind instanceof GotoEdge and result = this.getInstruction(OnlyInstructionTag())
    )
@@ -3230,11 +3241,11 @@ class TranslatedBuiltInOperation extends TranslatedNonConstantExpr {
  }

  final override Instruction getChildSuccessorInternal(TranslatedElement child, EdgeKind kind) {
-    exists(int id | child = this.getChild(id) |
-      result = this.getChild(id + 1).getFirstInstruction(kind)
+    exists(int id | child = this.getRankedChild(id) |
+      result = this.getRankedChild(id + 1).getFirstInstruction(kind)
      or
      kind instanceof GotoEdge and
-      not exists(this.getChild(id + 1)) and
+      not exists(this.getRankedChild(id + 1)) and
      result = this.getInstruction(OnlyInstructionTag())
    )
  }
@@ -3249,7 +3260,7 @@ class TranslatedBuiltInOperation extends TranslatedNonConstantExpr {
    tag = OnlyInstructionTag() and
    exists(int index |
      operandTag = positionalArgumentOperand(index) and
-      result = this.getChild(index).(TranslatedExpr).getResult()
+      result = this.getRankedChild(index).(TranslatedExpr).getResult()
    )
  }