C++: Restore some of the lost test results by doing operand -> instruction taint steps in IR TaintTracking.

This commit is contained in:
Mathias Vorreiter Pedersen
2021-03-02 15:45:40 +01:00
parent 23d3109071
commit eb4f1e1ba0
5 changed files with 62 additions and 64 deletions

View File

@@ -613,7 +613,7 @@ module TaintedWithPath {
// Step to return value of a modeled function when an input taints the
// dereference of the return value
exists(CallInstruction call, Function func, FunctionInput modelIn, FunctionOutput modelOut |
n1 = callInput(call, modelIn) and
n1.asOperand() = callInput(call, modelIn) and
(
func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
or

View File

@@ -9,30 +9,18 @@ private import semmle.code.cpp.ir.dataflow.DataFlow
/**
* Gets the instruction that goes into `input` for `call`.
*/
DataFlow::Node callInput(CallInstruction call, FunctionInput input) {
// A positional argument
Operand callInput(CallInstruction call, FunctionInput input) {
// An argument or qualifier
exists(int index |
result.asInstruction() = call.getPositionalArgument(index) and
input.isParameter(index)
result = call.getArgumentOperand(index) and
input.isParameterOrQualifierAddress(index)
)
or
// A value pointed to by a positional argument
// A value pointed to by an argument or qualifier
exists(ReadSideEffectInstruction read |
result.asOperand() = read.getSideEffectOperand() and
result = read.getSideEffectOperand() and
read.getPrimaryInstruction() = call and
input.isParameterDeref(read.getIndex())
)
or
// The qualifier pointer
result.asInstruction() = call.getThisArgument() and
input.isQualifierAddress()
or
// The qualifier object
exists(ReadSideEffectInstruction read |
result.asOperand() = read.getSideEffectOperand() and
read.getPrimaryInstruction() = call and
read.getIndex() = -1 and
input.isQualifierObject()
input.isParameterDerefOrQualifierObject(read.getIndex())
)
}
@@ -44,19 +32,11 @@ Instruction callOutput(CallInstruction call, FunctionOutput output) {
result = call and
output.isReturnValue()
or
// The side effect of a call on the value pointed to by a positional argument
// The side effect of a call on the value pointed to by an argument or qualifier
exists(WriteSideEffectInstruction effect |
result = effect and
effect.getPrimaryInstruction() = call and
output.isParameterDeref(effect.getIndex())
)
or
// The side effect of a call on the qualifier object
exists(WriteSideEffectInstruction effect |
result = effect and
effect.getPrimaryInstruction() = call and
effect.getIndex() = -1 and
output.isQualifierObject()
output.isParameterDerefOrQualifierObject(effect.getIndex())
)
// TODO: return value dereference
}

View File

@@ -21,53 +21,69 @@ predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
*/
cached
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
localInstructionTaintStep(nodeFrom.asInstruction(), nodeTo.asInstruction())
operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction())
or
modeledTaintStep(nodeFrom, nodeTo)
instructionToOperandTaintStep(nodeFrom.asInstruction(), nodeTo.asOperand())
}
private predicate instructionToOperandTaintStep(Instruction fromInstr, Operand toOperand) {
// Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
// We only do this in certain cases:
// 1. The instruction's result must not be conflated, and
// 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
// this is array types and union types. This matches the other two cases of element-to-object flow in
// `DefaultTaintTracking`.
toOperand.getAnyDef() = fromInstr and
not fromInstr.isResultConflated() and
(
fromInstr.getResultType() instanceof ArrayType or
fromInstr.getResultType() instanceof Union
)
or
exists(ReadSideEffectInstruction readInstr |
fromInstr = readInstr.getArgumentDef() and
toOperand = readInstr.getSideEffectOperand()
)
or
toOperand.(LoadOperand).getAnyDef() = fromInstr
}
/**
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
private predicate localInstructionTaintStep(Instruction nodeFrom, Instruction nodeTo) {
private predicate operandToInstructionTaintStep(Operand opFrom, Instruction instrTo) {
// Taint can flow through expressions that alter the value but preserve
// more than one bit of it _or_ expressions that follow data through
// pointer indirections.
nodeTo.getAnOperand().getAnyDef() = nodeFrom and
instrTo.getAnOperand() = opFrom and
(
nodeTo instanceof ArithmeticInstruction
instrTo instanceof ArithmeticInstruction
or
nodeTo instanceof BitwiseInstruction
instrTo instanceof BitwiseInstruction
or
nodeTo instanceof PointerArithmeticInstruction
instrTo instanceof PointerArithmeticInstruction
or
nodeTo instanceof FieldAddressInstruction
instrTo instanceof FieldAddressInstruction
or
// The `CopyInstruction` case is also present in non-taint data flow, but
// that uses `getDef` rather than `getAnyDef`. For taint, we want flow
// from a definition of `myStruct` to a `myStruct.myField` expression.
nodeTo instanceof CopyInstruction
instrTo instanceof CopyInstruction
)
or
nodeTo.(LoadInstruction).getSourceAddress() = nodeFrom
or
// Flow through partial reads of arrays and unions
nodeTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = nodeFrom and
not nodeFrom.isResultConflated() and
(
nodeFrom.getResultType() instanceof ArrayType or
nodeFrom.getResultType() instanceof Union
)
instrTo.(LoadInstruction).getSourceAddressOperand() = opFrom
or
// Flow from an element to an array or union that contains it.
nodeTo.(ChiInstruction).getPartial() = nodeFrom and
not nodeTo.isResultConflated() and
exists(Type t | nodeTo.getResultLanguageType().hasType(t, false) |
instrTo.(ChiInstruction).getPartialOperand() = opFrom and
not instrTo.isResultConflated() and
exists(Type t | instrTo.getResultLanguageType().hasType(t, false) |
t instanceof Union
or
t instanceof ArrayType
)
or
modeledTaintStep(opFrom, instrTo)
}
/**
@@ -110,17 +126,19 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
* Holds if taint can flow from `instrIn` to `instrOut` through a call to a
* modeled function.
*/
predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
predicate modeledTaintStep(Operand nodeIn, Instruction nodeOut) {
exists(CallInstruction call, TaintFunction func, FunctionInput modelIn, FunctionOutput modelOut |
(
nodeIn = callInput(call, modelIn)
or
exists(int n |
modelIn.isParameterDeref(n) and
nodeIn = callInput(call, any(InParameter inParam | inParam.getIndex() = n))
modelIn.isParameterDerefOrQualifierObject(n) and
if n = -1
then nodeIn = callInput(call, any(InQualifierObject inQualifier))
else nodeIn = callInput(call, any(InParameter inParam | inParam.getIndex() = n))
)
) and
nodeOut.asInstruction() = callOutput(call, modelOut) and
nodeOut = callOutput(call, modelOut) and
call.getStaticCallTarget() = func and
func.hasTaintFlow(modelIn, modelOut)
)
@@ -135,7 +153,7 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
int indexMid, InParameter modelMidIn, OutReturnValue modelOut
|
nodeIn = callInput(call, modelIn) and
nodeOut.asInstruction() = callOutput(call, modelOut) and
nodeOut = callOutput(call, modelOut) and
call.getStaticCallTarget() = func and
func.(TaintFunction).hasTaintFlow(modelIn, modelMidOut) and
func.(DataFlowFunction).hasDataFlow(modelMidIn, modelOut) and
@@ -149,8 +167,8 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
CallInstruction call, ReadSideEffectInstruction read, Function func, FunctionInput modelIn,
FunctionOutput modelOut
|
read.getSideEffectOperand() = callInput(call, modelIn).asOperand() and
read.getArgumentDef() = nodeIn.asInstruction() and
read.getSideEffectOperand() = callInput(call, modelIn) and
read.getArgumentDef() = nodeIn.getDef() and
not read.getSideEffect().isResultModeled() and
call.getStaticCallTarget() = func and
(
@@ -158,6 +176,6 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
or
func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
) and
nodeOut.asInstruction() = callOutput(call, modelOut)
nodeOut = callOutput(call, modelOut)
)
}

View File

@@ -13,8 +13,8 @@ int main() {
sink(_strdup(getenv("VAR"))); // $ MISSING: ast,ir
sink(strdup(getenv("VAR"))); // $ ast MISSING: ir
sink(_strdup(getenv("VAR"))); // $ ir MISSING: ast
sink(strdup(getenv("VAR"))); // $ ast,ir
sink(unmodeled_function(getenv("VAR"))); // clean by assumption
char untainted_buf[100] = "";

View File

@@ -369,9 +369,9 @@ void test_strdup(char *source)
a = strdup(source);
b = strdup("hello, world");
c = strndup(source, 100);
sink(a); // $ ast MISSING: ir
sink(a); // $ ast,ir
sink(b);
sink(c); // $ ast MISSING: ir
sink(c); // $ ast,ir
}
void test_strndup(int source)
@@ -388,7 +388,7 @@ void test_wcsdup(wchar_t *source)
a = wcsdup(source);
b = wcsdup(L"hello, world");
sink(a); // $ ast MISSING: ir
sink(a); // $ ast,ir
sink(b);
}