C++: Generalize loads and stores.

This commit is contained in:
Mathias Vorreiter Pedersen
2022-12-02 11:24:20 +00:00
parent 589f7bdbcc
commit 68f0997d87
4 changed files with 103 additions and 20 deletions

View File

@@ -561,7 +561,7 @@ predicate storeStep(Node node1, Content c, PostFieldUpdateNode node2) {
* operations and exactly `n` `LoadInstruction` operations.
*/
private predicate numberOfLoadsFromOperandRec(Operand operandFrom, Operand operandTo, int ind) {
exists(LoadInstruction load | load.getSourceAddressOperand() = operandFrom |
exists(Instruction load | Ssa::isDereference(load, operandFrom) |
operandTo = operandFrom and ind = 0
or
numberOfLoadsFromOperand(load.getAUse(), operandTo, ind - 1)
@@ -581,7 +581,7 @@ private predicate numberOfLoadsFromOperandRec(Operand operandFrom, Operand opera
private predicate numberOfLoadsFromOperand(Operand operandFrom, Operand operandTo, int n) {
numberOfLoadsFromOperandRec(operandFrom, operandTo, n)
or
not any(LoadInstruction load).getSourceAddressOperand() = operandFrom and
not Ssa::isDereference(_, operandFrom) and
not conversionFlow(operandFrom, _, _) and
operandFrom = operandTo and
n = 0

View File

@@ -86,6 +86,8 @@ predicate conversionFlow(Operand opFrom, Instruction instrTo, boolean isPointerA
instrTo.(CheckedConvertOrNullInstruction).getUnaryOperand() = opFrom
or
instrTo.(InheritanceConversionInstruction).getUnaryOperand() = opFrom
or
Ssa::isAdditionalConversionFlow(opFrom, instrTo)
)
or
isPointerArith = true and
@@ -382,12 +384,12 @@ class OperandNode extends Node, Node0 {
* For example, `stripPointers(int*&)` is `int*` and `stripPointers(int*)` is `int`.
*/
private Type stripPointer(Type t) {
result = t.(PointerType).getBaseType()
result = any(Ssa::Indirection ind | ind.getType() = t).getBaseType()
or
// These types have a sensible base type, but don't receive additional
// dataflow nodes representing their indirections. So for now we special case them.
result = t.(ArrayType).getBaseType()
or
result = t.(ReferenceType).getBaseType()
or
result = t.(PointerToMemberType).getBaseType()
or
result = t.(FunctionPointerIshType).getBaseType()
@@ -1140,8 +1142,9 @@ predicate localFlowStep = simpleLocalFlowStep/2;
private predicate indirectionOperandFlow(RawIndirectOperand nodeFrom, Node nodeTo) {
// Reduce the indirection count by 1 if we're passing through a `LoadInstruction`.
exists(int ind, LoadInstruction load |
hasOperandAndIndex(nodeFrom, load.getSourceAddressOperand(), ind) and
exists(int ind, Instruction load, Operand address |
Ssa::isDereference(load, address) and
hasOperandAndIndex(nodeFrom, address, ind) and
nodeHasInstruction(nodeTo, load, ind - 1)
)
or

View File

@@ -99,9 +99,9 @@ private class PointerOrReferenceType extends Cpp::DerivedType {
* (i.e., `countIndirections(e.getUnspecifiedType())`).
*/
private int countIndirections(Type t) {
result = 1 + countIndirections(t.(PointerOrReferenceType).getBaseType())
result = any(Indirection ind | ind.getType() = t).getNumberOfIndirections()
or
not t instanceof PointerOrReferenceType and
not exists(Indirection ind | ind.getType() = t) and
result = 0
}
@@ -127,7 +127,79 @@ class AllocationInstruction extends CallInstruction {
AllocationInstruction() { this.getStaticCallTarget() instanceof Cpp::AllocationFunction }
}
/**
* An abstract class for handling indirections.
*
* Extend this class to make a type behave as a pointer for the
* purposes of dataflow.
*/
abstract class Indirection extends Type {
/** Gets the type of this indirection. */
final Type getType() { result = super.getUnspecifiedType() }
/**
* Gets the number of indirections supported by this type.
*
* For example, the number of indirections of a variable `p` of type
* `int**` is `3` (i.e., `p`, `*p` and `**p`).
*/
abstract int getNumberOfIndirections();
/**
* Holds if `deref` is an instruction that behaves as a `LoadInstruction`
* that loads the value computed by `addres`.
*/
predicate isAdditionalDereference(Instruction deref, Operand address) { none() }
/**
* Holds if `value` is written to the address computed by `address`.
*
* `certain` is `true` if this write is guarenteed to write to the address.
*/
predicate isAdditionalWrite(Node0Impl value, Operand address, boolean certain) { none() }
/**
* Gets the base type of this indirection.
*
* For example, the base type of `int*&` is `int*`, and the base type of `int*` is `int`.
*/
abstract Type getBaseType();
/** Holds if there should be an additional taint step from `node1` to `node2`. */
predicate isAdditionalTaintStep(Node node1, Node node2) { none() }
/**
* Holds if the step from `opFrom` to `instrTo` should be considered a conversion
* from `opFrom` to `instrTo`.
*/
predicate isAdditionalConversionFlow(Operand opFrom, Instruction instrTo) { none() }
/**
* Holds if writing the value `value` to an address with base `base` should
* be ignored.
*/
predicate ignoreSourceVariableBase(BaseSourceVariableInstruction base, Node0Impl value) { none() }
}
private class PointerOrReferenceTypeIndirection extends Indirection, PointerOrReferenceType {
override int getNumberOfIndirections() { result = 1 + countIndirections(this.getBaseType()) }
override predicate isAdditionalDereference(Instruction deref, Operand address) { none() }
override predicate isAdditionalWrite(Node0Impl value, Operand address, boolean certain) { none() }
override Type getBaseType() { result = PointerOrReferenceType.super.getBaseType() }
}
predicate isDereference(Instruction deref, Operand address) {
any(Indirection ind).isAdditionalDereference(deref, address)
or
deref.(LoadInstruction).getSourceAddressOperand() = address
}
predicate isWrite(Node0Impl value, Operand address, boolean certain) {
any(Indirection ind).isAdditionalWrite(value, address, certain)
or
certain = true and
(
exists(StoreInstruction store |
@@ -289,8 +361,8 @@ private module Cached {
*/
cached
Operand getIRRepresentationOfIndirectOperand(Operand operand, int indirectionIndex) {
exists(LoadInstruction load |
operand = load.getSourceAddressOperand() and
exists(Instruction load |
isDereference(load, operand) and
result = unique( | | load.getAUse()) and
isUseImpl(operand, _, indirectionIndex - 1)
)
@@ -305,9 +377,10 @@ private module Cached {
*/
cached
Instruction getIRRepresentationOfIndirectInstruction(Instruction instr, int indirectionIndex) {
exists(LoadInstruction load |
load.getSourceAddress() = instr and
isUseImpl(load.getSourceAddressOperand(), _, indirectionIndex - 1) and
exists(Instruction load, Operand address |
address.getDef() = instr and
isDereference(load, address) and
isUseImpl(address, _, indirectionIndex - 1) and
result = instr
)
}
@@ -328,7 +401,10 @@ private module Cached {
)
or
exists(int ind0 |
isUseImpl(operand.getDef().(LoadInstruction).getSourceAddressOperand(), base, ind0)
exists(Operand address |
isDereference(operand.getDef(), address) and
isUseImpl(address, base, ind0)
)
or
isUseImpl(operand.getDef().(InitializeParameterInstruction).getAnOperand(), base, ind0)
|
@@ -379,7 +455,10 @@ private module Cached {
)
or
exists(int ind0 |
isDefImpl(address.getDef().(LoadInstruction).getSourceAddressOperand(), base, ind0)
exists(Operand operand |
isDereference(address.getDef(), operand) and
isDefImpl(operand, base, ind - 1)
)
or
isDefImpl(address.getDef().(InitializeParameterInstruction).getAnOperand(), base, ind0)
|

View File

@@ -5,6 +5,7 @@ private import semmle.code.cpp.models.interfaces.DataFlow
private import semmle.code.cpp.models.interfaces.SideEffect
private import DataFlowUtil
private import DataFlowPrivate
private import SsaInternals as Ssa
/**
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
@@ -46,6 +47,8 @@ predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeT
nodeHasOperand(nodeFrom, pai.getAnOperand(), pragma[only_bind_into](indirectionIndex)) and
hasInstructionAndIndex(nodeTo, pai, indirectionIndex + 1)
)
or
any(Ssa::Indirection ind).isAdditionalTaintStep(nodeFrom, nodeTo)
}
/**
@@ -65,10 +68,8 @@ private predicate operandToInstructionTaintStep(Operand opFrom, Instruction inst
instrTo instanceof PointerArithmeticInstruction
)
or
// The `CopyInstruction` case is also present in non-taint data flow, but
// that uses `getDef` rather than `getAnyDef`. For taint, we want flow
// from a definition of `myStruct` to a `myStruct.myField` expression.
instrTo.(LoadInstruction).getSourceAddressOperand() = opFrom
// Taint flow from an address to its dereference.
Ssa::isDereference(instrTo, opFrom)
or
// Unary instructions tend to preserve enough information in practice that we
// want taint to flow through.