diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index ea7bf69cc10..714e5bd3b49 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -1142,7 +1142,8 @@ private class IndirectOperandIndirectExprNode extends IndirectExprNodeBase, RawI } private class IndirectInstructionIndirectExprNode extends IndirectExprNodeBase, - RawIndirectInstruction { + RawIndirectInstruction +{ IndirectInstructionIndirectExprNode() { indirectExprNodeShouldBeIndirectInstruction(this, _) } final override Expr getConvertedExpr(int index) { @@ -1612,39 +1613,96 @@ predicate localInstructionFlow(Instruction e1, Instruction e2) { cached private module ExprFlowCached { /** - * Holds if `n1.asExpr()` doesn't have a result and `n1` flows to `n2` in a single + * Holds if `n` is an indirect operand of a `PointerArithmeticInstruction`, and + * `e` is the result of loading from the `PointerArithmeticInstruction`. + */ + private predicate isIndirectBaseOfArrayAccess(IndirectOperand n, Expr e) { + exists(LoadInstruction load, PointerArithmeticInstruction pai | + pai = load.getSourceAddress() and + pai.getLeftOperand() = n.getOperand() and + n.getIndirectionIndex() = 1 and + e = load.getConvertedResultExpression() + ) + } + + /** + * Gets the expression associated with node `n`, if any. + * + * Unlike `n.asExpr()`, this predicate will also get the + * expression `*(x + i)` when `n` is the indirect node + * for `x`. This ensures that an assignment in a long chain + * of assignments in a macro expansion is properly mapped + * to the previous assignment. For example, in: + * ```cpp + * *x = source(); + * use(x[0]); + * use(x[1]); + * ... + * use(x[i]); + * use(x[i+1]); + * ... + * use(x[N]); + * ``` + * To see what the problem would be if `asExpr(n)` was replaced + * with `n.asExpr()`, consider the transitive closure over + * `localStepFromNonExpr` in `localStepsToExpr`. We start at `n2` + * for which `n.asExpr()` exists. For example, `n2` in the above + * example could be a `x[i]` in any of the `use(x[i])` above. + * + * We then step to a dataflow predecessor of `n2`. In the above + * code fragment, thats the indirect node corresponding to `x` in + * `x[i-1]`. Since this doesn't have a result for `Node::asExpr()` + * we continue with the recursion until we reach `*x = source()` + * which does have a result for `Node::asExpr()`. + * + * If `N` is very large this blows up. + * + * To fix this, we map the indirect node corresponding to `x` to + * in `x[i - 1]` to the `x[i - 1]` expression. This ensures that + * `x[i]` steps to the expression `x[i - 1]` without traversing the + * entire chain. + */ + private Expr asExpr(Node n) { + isIndirectBaseOfArrayAccess(n, result) + or + not isIndirectBaseOfArrayAccess(n, _) and + result = n.asExpr() + } + + /** + * Holds if `asExpr(n1)` doesn't have a result and `n1` flows to `n2` in a single * dataflow step. */ private predicate localStepFromNonExpr(Node n1, Node n2) { - not exists(n1.asExpr()) and + not exists(asExpr(n1)) and localFlowStep(n1, n2) } /** - * Holds if `n1.asExpr()` doesn't have a result, `n2.asExpr() = e2` and - * `n2` is the first node reachable from `n1` such that `n2.asExpr()` exists. + * Holds if `asExpr(n1)` doesn't have a result, `asExpr(n2) = e2` and + * `n2` is the first node reachable from `n1` such that `asExpr(n2)` exists. */ pragma[nomagic] private predicate localStepsToExpr(Node n1, Node n2, Expr e2) { localStepFromNonExpr*(n1, n2) and - e2 = n2.asExpr() + e2 = asExpr(n2) } /** - * Holds if `n1.asExpr() = e1` and `n2.asExpr() = e2` and `n2` is the first node - * reachable from `n1` such that `n2.asExpr()` exists. + * Holds if `asExpr(n1) = e1` and `asExpr(n2) = e2` and `n2` is the first node + * reachable from `n1` such that `asExpr(n2)` exists. */ private predicate localExprFlowSingleExprStep(Node n1, Expr e1, Node n2, Expr e2) { exists(Node mid | localFlowStep(n1, mid) and localStepsToExpr(mid, n2, e2) and - e1 = n1.asExpr() + e1 = asExpr(n1) ) } /** - * Holds if `n1.asExpr() = e1` and `e1 != e2` and `n2` is the first reachable node from - * `n1` such that `n2.asExpr() = e2`. + * Holds if `asExpr(n1) = e1` and `e1 != e2` and `n2` is the first reachable node from + * `n1` such that `asExpr(n2) = e2`. */ private predicate localExprFlowStepImpl(Node n1, Expr e1, Node n2, Expr e2) { exists(Node n, Expr e | localExprFlowSingleExprStep(n1, e1, n, e) |