diff --git a/cpp/ql/lib/qlpack.yml b/cpp/ql/lib/qlpack.yml index 8d15b6d142a..27529f55a29 100644 --- a/cpp/ql/lib/qlpack.yml +++ b/cpp/ql/lib/qlpack.yml @@ -9,6 +9,7 @@ dependencies: codeql/dataflow: ${workspace} codeql/rangeanalysis: ${workspace} codeql/ssa: ${workspace} + codeql/typeflow: ${workspace} codeql/tutorial: ${workspace} codeql/util: ${workspace} codeql/xml: ${workspace} diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll index 862070820f8..7e5b4de8122 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll @@ -6,6 +6,7 @@ private import DataFlowImplCommon as DataFlowImplCommon private import DataFlowUtil private import semmle.code.cpp.models.interfaces.PointerWrapper private import DataFlowPrivate +private import TypeFlow private import semmle.code.cpp.ir.ValueNumbering /** @@ -955,11 +956,7 @@ private module Cached { * Holds if the address computed by `operand` is guaranteed to write * to a specific address. */ - private predicate isCertainAddress(Operand operand) { - valueNumberOfOperand(operand).getAnInstruction() instanceof VariableAddressInstruction - or - operand.getType() instanceof Cpp::ReferenceType - } + private predicate isCertainAddress(Operand operand) { isPointerToSingleObject(operand.getDef()) } /** * Holds if `address` is a use of an SSA variable rooted at `base`, and the diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/TypeFlow.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/TypeFlow.qll new file mode 100644 index 00000000000..44f2e60daac --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/TypeFlow.qll @@ -0,0 +1,259 @@ +private import cpp +private import semmle.code.cpp.ir.IR +private import codeql.typeflow.TypeFlow + +private module Input implements TypeFlowInput { + private predicate hasExactSingleType(Instruction i) { + // The address of a variable is always a single object + i instanceof VariableAddressInstruction + or + // A reference always points to a always a single object + i.getResultLanguageType().hasUnspecifiedType(any(ReferenceType rt), false) + or + // `this` is never an array + i instanceof InitializeThisInstruction + or + // An allocation of a non-array object + exists(AllocationExpr alloc | alloc = i.getUnconvertedResultExpression() | + // i.e., `new int`; + alloc instanceof NewExpr + or + // i.e., `malloc(sizeof(int))` + exists(SizeofTypeOperator sizeOf | sizeOf = alloc.getSizeExpr() | + not sizeOf.getTypeOperand().getUnspecifiedType() instanceof ArrayType + ) + ) + } + + private predicate hasExactBufferType(Instruction i) { + // Anything with an array type is a buffer + i.getResultLanguageType().hasUnspecifiedType(any(ArrayType at), false) + or + not hasExactSingleType(i) and + i.getUnconvertedResultExpression() instanceof AllocationExpr + } + + private newtype TTypeFlowNode = + TInstructionNode(Instruction i) or + TFunctionNode(IRFunction func) + + abstract class TypeFlowNode extends TTypeFlowNode { + /** Gets a textual representation of this node. */ + abstract string toString(); + + /** + * Gets the type of this node. This type may not be the most precise + * possible type, but will be used as a starting point of the analysis. + */ + abstract Type getType(); + + /** Gets the location of this node. */ + abstract Location getLocation(); + + /** Gets the underlying `Instruction` of this node, if any. */ + Instruction asInstruction() { none() } + + /** Gets the underlying `IRFunction` of this node, if any. */ + IRFunction asFunction() { none() } + + /** Holds if the value of this node is always null. */ + abstract predicate isNullValue(); + } + + private class InstructionNode extends TypeFlowNode, TInstructionNode { + Instruction instr; + + InstructionNode() { this = TInstructionNode(instr) } + + override string toString() { result = instr.toString() } + + override Type getType() { + if hasExactSingleType(instr) then result.isSingle() else result.isBuffer() + } + + override Location getLocation() { result = instr.getLocation() } + + override Instruction asInstruction() { result = instr } + + override predicate isNullValue() { + instr.(ConstantInstruction).getValue() = "0" and + instr.getResultIRType() instanceof IRAddressType + } + } + + /** Gets the `TypeFlowNode` corresponding to `i`. */ + additional InstructionNode instructionNode(Instruction i) { result.asInstruction() = i } + + private class FunctionNode extends TypeFlowNode, TFunctionNode { + IRFunction func; + + FunctionNode() { this = TFunctionNode(func) } + + override string toString() { result = func.toString() } + + Instruction getReturnValueInstruction() { + result = func.getReturnInstruction().(ReturnValueInstruction).getReturnValue() + } + + override Type getType() { result = instructionNode(this.getReturnValueInstruction()).getType() } + + override Location getLocation() { result = func.getLocation() } + + override IRFunction asFunction() { result = func } + + override predicate isNullValue() { + instructionNode(this.getReturnValueInstruction()).isNullValue() + } + } + + /** + * Gets an ultimiate definition of `phi`. That is, an input to `phi` that is + * not itself a `PhiInstruction`. + */ + private Instruction getAnUltimateLocalDefinition(PhiInstruction phi) { + result = phi.getAnInput*() and not result instanceof PhiInstruction + } + + /** + * Holds if this function is private (i.e., cannot be accessed outside its + * compilation unit). This means we can use a closed-world assumption about + * calls to this function. + */ + private predicate isPrivate(Function func) { + func.isStatic() + or + func.getNamespace().getParentNamespace*().isInline() + or + func.(MemberFunction).isPrivate() + } + + /** + * Holds if `arg` is an argument for the parameter `p` in a private callable. + */ + pragma[nomagic] + private predicate privateParamArg(InitializeParameterInstruction p, Instruction arg) { + exists(CallInstruction call, int i, Function func | + call.getArgument(pragma[only_bind_into](i)) = arg and + func = call.getStaticCallTarget() and + func.getParameter(pragma[only_bind_into](i)) = p.getParameter() and + isPrivate(func) + ) + } + + predicate joinStep(TypeFlowNode n1, TypeFlowNode n2) { + // instruction -> phi + getAnUltimateLocalDefinition(n2.asInstruction()) = n1.asInstruction() + or + // return value -> function + n2.(FunctionNode).getReturnValueInstruction() = n1.asInstruction() + or + // function -> call + exists(Function func | func = n1.asFunction().getFunction() | + not func.isVirtual() and + n2.asInstruction().(CallInstruction).getStaticCallTarget() = func + ) + or + // Argument -> parameter where the parameter's enclosing function + // is "private". + exists(Instruction arg, Instruction p | + privateParamArg(p, arg) and + n1.asInstruction() = arg and + n2.asInstruction() = p + ) + } + + /** + * Holds if knowing whether `i1` points to a single object or buffer implies + * knowing whether `i2` points to a single object or buffer. + */ + private predicate instructionStep(Instruction i1, Instruction i2) { + i2.(CopyInstruction).getSourceValue() = i1 + or + i2.(CopyValueInstruction).getSourceValue() = i1 + or + i2.(ConvertInstruction).getUnary() = i1 + or + i2.(CheckedConvertOrNullInstruction).getUnary() = i1 + or + i2.(InheritanceConversionInstruction).getUnary() = i1 + or + i2.(PointerArithmeticInstruction).getLeft() = i1 + } + + predicate step(TypeFlowNode n1, TypeFlowNode n2) { + instructionStep(n1.asInstruction(), n2.asInstruction()) + } + + predicate isNullValue(TypeFlowNode n) { n.isNullValue() } + + private newtype TType = + TSingle() or + TBuffer() + + class Type extends TType { + string toString() { + this.isSingle() and + result = "Single" + or + this.isBuffer() and + result = "Buffer" + } + + /** Holds if this type is the type that represents a single object. */ + predicate isSingle() { this = TSingle() } + + /** Holds if this type is the type that represents a buffer. */ + predicate isBuffer() { this = TBuffer() } + + /** + * Gets a super type of this type, if any. + * + * The type relation is `Single <: Buffer`. + */ + Type getASupertype() { + this.isSingle() and + result.isBuffer() + } + } + + predicate exactTypeBase(TypeFlowNode n, Type t) { + exists(Instruction instr | instr = n.asInstruction() | + hasExactSingleType(instr) and t.isSingle() + or + hasExactBufferType(instr) and t.isBuffer() + ) + } + + pragma[nomagic] + private predicate upcastCand(TypeFlowNode n, Type t1, Type t2) { + exists(TypeFlowNode next | + step(n, next) + or + joinStep(n, next) + | + n.getType() = t1 and + next.getType() = t2 and + t1 != t2 + ) + } + + private predicate upcast(TypeFlowNode n, Type t1) { + exists(Type t2 | upcastCand(n, t1, t2) | + // No need for transitive closure since the subtyping relation is just `Single <: Buffer` + t1.getASupertype() = t2 + ) + } + + predicate typeFlowBaseCand(TypeFlowNode n, Type t) { upcast(n, t) } +} + +private module TypeFlow = Make; + +/** + * Holds if `i` is an instruction that computes an address that points to a + * single object (as opposed to pointing into a buffer). + */ +pragma[nomagic] +predicate isPointerToSingleObject(Instruction i) { + TypeFlow::bestTypeFlow(Input::instructionNode(i), any(Input::Type t | t.isSingle()), _) +}