C++: Remove commonTaintStep from DefaultTaintTracking.

This commit is contained in:
Mathias Vorreiter Pedersen
2021-03-17 11:56:59 +01:00
parent 43fbcc1c8a
commit 3914a93504

View File

@@ -74,10 +74,6 @@ private class DefaultTaintTrackingCfg extends TaintTracking::Configuration {
override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
commonTaintStep(n1, n2)
}
override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) }
override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
@@ -93,8 +89,6 @@ private class ToGlobalVarTaintTrackingCfg extends TaintTracking::Configuration {
}
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
commonTaintStep(n1, n2)
or
writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable))
or
readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
@@ -117,8 +111,6 @@ private class FromGlobalVarTaintTrackingCfg extends TaintTracking2::Configuratio
override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
commonTaintStep(n1, n2)
or
// Additional step for flow out of variables. There is no flow _into_
// variables in this configuration, so this step only serves to take flow
// out of a variable that's a source.
@@ -227,207 +219,6 @@ private predicate nodeIsBarrierIn(DataFlow::Node node) {
)
}
cached
private predicate commonTaintStep(DataFlow::Node fromNode, DataFlow::Node toNode) {
operandToInstructionTaintStep(fromNode.asOperand(), toNode.asInstruction())
or
instructionToOperandTaintStep(fromNode.asInstruction(), toNode.asOperand())
}
private predicate instructionToOperandTaintStep(Instruction fromInstr, Operand toOperand) {
// Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
// We only do this in certain cases:
// 1. The instruction's result must not be conflated, and
// 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
// this is array types and union types. This matches the other two cases of element-to-object flow in
// `DefaultTaintTracking`.
toOperand.getAnyDef() = fromInstr and
not fromInstr.isResultConflated() and
(
fromInstr.getResultType() instanceof ArrayType or
fromInstr.getResultType() instanceof Union
)
or
exists(ReadSideEffectInstruction readInstr |
fromInstr = readInstr.getArgumentDef() and
toOperand = readInstr.getSideEffectOperand()
)
}
private predicate operandToInstructionTaintStep(Operand fromOperand, Instruction toInstr) {
// Expressions computed from tainted data are also tainted
exists(CallInstruction call, int argIndex | call = toInstr |
isPureFunction(call.getStaticCallTarget().getName()) and
fromOperand = getACallArgumentOrIndirection(call, argIndex) and
forall(Operand argOperand | argOperand = call.getAnArgumentOperand() |
argOperand = getACallArgumentOrIndirection(call, argIndex) or
predictableInstruction(argOperand.getAnyDef())
) and
// flow through `strlen` tends to cause dubious results, if the length is
// bounded.
not call.getStaticCallTarget().getName() = "strlen"
)
or
// Flow from argument to return value
toInstr =
any(CallInstruction call |
exists(int indexIn |
modelTaintToReturnValue(call.getStaticCallTarget(), indexIn) and
fromOperand = getACallArgumentOrIndirection(call, indexIn) and
not predictableOnlyFlow(call.getStaticCallTarget().getName())
)
)
or
// Flow from input argument to output argument
// TODO: This won't work in practice as long as all aliased memory is tracked
// together in a single virtual variable.
// TODO: Will this work on the test for `TaintedPath.ql`, where the output arg
// is a pointer addition expression?
toInstr =
any(WriteSideEffectInstruction outInstr |
exists(CallInstruction call, int indexIn, int indexOut |
modelTaintToParameter(call.getStaticCallTarget(), indexIn, indexOut) and
fromOperand = getACallArgumentOrIndirection(call, indexIn) and
outInstr.getIndex() = indexOut and
outInstr.getPrimaryInstruction() = call
)
)
or
// Flow through pointer dereference
toInstr.(LoadInstruction).getSourceAddressOperand() = fromOperand
or
// Flow through partial reads of arrays and unions
toInstr.(LoadInstruction).getSourceValueOperand() = fromOperand and
exists(Instruction fromInstr | fromInstr = fromOperand.getAnyDef() |
not fromInstr.isResultConflated() and
(
fromInstr.getResultType() instanceof ArrayType or
fromInstr.getResultType() instanceof Union
)
)
or
// Unary instructions tend to preserve enough information in practice that we
// want taint to flow through.
// The exception is `FieldAddressInstruction`. Together with the rule for
// `LoadInstruction` above and for `ChiInstruction` below, flow through
// `FieldAddressInstruction` could cause flow into one field to come out an
// unrelated field. This would happen across function boundaries, where the IR
// would not be able to match loads to stores.
toInstr.(UnaryInstruction).getUnaryOperand() = fromOperand and
(
not toInstr instanceof FieldAddressInstruction
or
toInstr.(FieldAddressInstruction).getField().getDeclaringType() instanceof Union
)
or
// Flow from an element to an array or union that contains it.
toInstr.(ChiInstruction).getPartialOperand() = fromOperand and
not toInstr.isResultConflated() and
exists(Type t | toInstr.getResultLanguageType().hasType(t, false) |
t instanceof Union
or
t instanceof ArrayType
)
or
exists(BinaryInstruction bin |
bin = toInstr and
predictableInstruction(toInstr.getAnOperand().getDef()) and
fromOperand = toInstr.getAnOperand()
)
or
// This is part of the translation of `a[i]`, where we want taint to flow
// from `a`.
toInstr.(PointerAddInstruction).getLeftOperand() = fromOperand
or
// Until we have flow through indirections across calls, we'll take flow out
// of the indirection and into the argument.
// When we get proper flow through indirections across calls, this code can be
// moved to `adjusedSink` or possibly into the `DataFlow::ExprNode` class.
exists(ReadSideEffectInstruction read |
read.getSideEffectOperand() = fromOperand and
read.getArgumentDef() = toInstr
)
or
// Until we have from through indirections across calls, we'll take flow out
// of the parameter and into its indirection.
// `InitializeIndirectionInstruction` only has a single operand: the address of the
// value whose indirection we are initializing. When initializing an indirection of a parameter `p`,
// the IR looks like this:
// ```
// m1 = InitializeParameter[p] : &r1
// r2 = Load[p] : r2, m1
// m3 = InitializeIndirection[p] : &r2
// ```
// So by having flow from `r2` to `m3` we're enabling flow from `m1` to `m3`. This relies on the
// `LoadOperand`'s overlap being exact.
toInstr.(InitializeIndirectionInstruction).getAnOperand() = fromOperand
}
/**
* Returns the index of the side effect instruction corresponding to the specified function output,
* if one exists.
*/
private int getWriteSideEffectIndex(FunctionOutput output) {
output.isParameterDeref(result)
or
output.isQualifierObject() and result = -1
}
/**
* Get an operand that goes into argument `argumentIndex` of `call`. This
* can be either directly or through one pointer indirection.
*/
private Operand getACallArgumentOrIndirection(CallInstruction call, int argumentIndex) {
result = call.getPositionalArgumentOperand(argumentIndex)
or
exists(ReadSideEffectInstruction readSE |
// TODO: why are read side effect operands imprecise?
result = readSE.getSideEffectOperand() and
readSE.getPrimaryInstruction() = call and
readSE.getIndex() = argumentIndex
)
}
private predicate modelTaintToParameter(Function f, int parameterIn, int parameterOut) {
exists(FunctionInput modelIn, FunctionOutput modelOut |
(
f.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
or
f.(TaintFunction).hasTaintFlow(modelIn, modelOut)
) and
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
parameterOut = getWriteSideEffectIndex(modelOut)
)
}
private predicate modelTaintToReturnValue(Function f, int parameterIn) {
// Taint flow from parameter to return value
exists(FunctionInput modelIn, FunctionOutput modelOut |
f.(TaintFunction).hasTaintFlow(modelIn, modelOut) and
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
(modelOut.isReturnValue() or modelOut.isReturnValueDeref())
)
or
// Data flow (not taint flow) to where the return value points. For the time
// being we will conflate pointers and objects in taint tracking.
exists(FunctionInput modelIn, FunctionOutput modelOut |
f.(DataFlowFunction).hasDataFlow(modelIn, modelOut) and
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
modelOut.isReturnValueDeref()
)
or
// Taint flow from one argument to another and data flow from an argument to a
// return value. This happens in functions like `strcat` and `memcpy`. We
// could model this flow in two separate steps, but that would add reverse
// flow from the write side-effect to the call instruction, which may not be
// desirable.
exists(int parameterMid, InParameter modelMid, OutReturnValue returnOut |
modelTaintToParameter(f, parameterIn, parameterMid) and
modelMid.isParameter(parameterMid) and
f.(DataFlowFunction).hasDataFlow(modelMid, returnOut)
)
}
private Element adjustedSink(DataFlow::Node sink) {
// TODO: is it more appropriate to use asConvertedExpr here and avoid
// `getConversion*`? Or will that cause us to miss some cases where there's