mirror of
https://github.com/github/codeql.git
synced 2026-04-30 11:15:13 +02:00
C++: Remove commonTaintStep from DefaultTaintTracking.
This commit is contained in:
@@ -74,10 +74,6 @@ private class DefaultTaintTrackingCfg extends TaintTracking::Configuration {
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
commonTaintStep(n1, n2)
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) }
|
||||
|
||||
override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
|
||||
@@ -93,8 +89,6 @@ private class ToGlobalVarTaintTrackingCfg extends TaintTracking::Configuration {
|
||||
}
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
commonTaintStep(n1, n2)
|
||||
or
|
||||
writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable))
|
||||
or
|
||||
readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
|
||||
@@ -117,8 +111,6 @@ private class FromGlobalVarTaintTrackingCfg extends TaintTracking2::Configuratio
|
||||
override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
commonTaintStep(n1, n2)
|
||||
or
|
||||
// Additional step for flow out of variables. There is no flow _into_
|
||||
// variables in this configuration, so this step only serves to take flow
|
||||
// out of a variable that's a source.
|
||||
@@ -227,207 +219,6 @@ private predicate nodeIsBarrierIn(DataFlow::Node node) {
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
private predicate commonTaintStep(DataFlow::Node fromNode, DataFlow::Node toNode) {
|
||||
operandToInstructionTaintStep(fromNode.asOperand(), toNode.asInstruction())
|
||||
or
|
||||
instructionToOperandTaintStep(fromNode.asInstruction(), toNode.asOperand())
|
||||
}
|
||||
|
||||
private predicate instructionToOperandTaintStep(Instruction fromInstr, Operand toOperand) {
|
||||
// Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
|
||||
// We only do this in certain cases:
|
||||
// 1. The instruction's result must not be conflated, and
|
||||
// 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
|
||||
// this is array types and union types. This matches the other two cases of element-to-object flow in
|
||||
// `DefaultTaintTracking`.
|
||||
toOperand.getAnyDef() = fromInstr and
|
||||
not fromInstr.isResultConflated() and
|
||||
(
|
||||
fromInstr.getResultType() instanceof ArrayType or
|
||||
fromInstr.getResultType() instanceof Union
|
||||
)
|
||||
or
|
||||
exists(ReadSideEffectInstruction readInstr |
|
||||
fromInstr = readInstr.getArgumentDef() and
|
||||
toOperand = readInstr.getSideEffectOperand()
|
||||
)
|
||||
}
|
||||
|
||||
private predicate operandToInstructionTaintStep(Operand fromOperand, Instruction toInstr) {
|
||||
// Expressions computed from tainted data are also tainted
|
||||
exists(CallInstruction call, int argIndex | call = toInstr |
|
||||
isPureFunction(call.getStaticCallTarget().getName()) and
|
||||
fromOperand = getACallArgumentOrIndirection(call, argIndex) and
|
||||
forall(Operand argOperand | argOperand = call.getAnArgumentOperand() |
|
||||
argOperand = getACallArgumentOrIndirection(call, argIndex) or
|
||||
predictableInstruction(argOperand.getAnyDef())
|
||||
) and
|
||||
// flow through `strlen` tends to cause dubious results, if the length is
|
||||
// bounded.
|
||||
not call.getStaticCallTarget().getName() = "strlen"
|
||||
)
|
||||
or
|
||||
// Flow from argument to return value
|
||||
toInstr =
|
||||
any(CallInstruction call |
|
||||
exists(int indexIn |
|
||||
modelTaintToReturnValue(call.getStaticCallTarget(), indexIn) and
|
||||
fromOperand = getACallArgumentOrIndirection(call, indexIn) and
|
||||
not predictableOnlyFlow(call.getStaticCallTarget().getName())
|
||||
)
|
||||
)
|
||||
or
|
||||
// Flow from input argument to output argument
|
||||
// TODO: This won't work in practice as long as all aliased memory is tracked
|
||||
// together in a single virtual variable.
|
||||
// TODO: Will this work on the test for `TaintedPath.ql`, where the output arg
|
||||
// is a pointer addition expression?
|
||||
toInstr =
|
||||
any(WriteSideEffectInstruction outInstr |
|
||||
exists(CallInstruction call, int indexIn, int indexOut |
|
||||
modelTaintToParameter(call.getStaticCallTarget(), indexIn, indexOut) and
|
||||
fromOperand = getACallArgumentOrIndirection(call, indexIn) and
|
||||
outInstr.getIndex() = indexOut and
|
||||
outInstr.getPrimaryInstruction() = call
|
||||
)
|
||||
)
|
||||
or
|
||||
// Flow through pointer dereference
|
||||
toInstr.(LoadInstruction).getSourceAddressOperand() = fromOperand
|
||||
or
|
||||
// Flow through partial reads of arrays and unions
|
||||
toInstr.(LoadInstruction).getSourceValueOperand() = fromOperand and
|
||||
exists(Instruction fromInstr | fromInstr = fromOperand.getAnyDef() |
|
||||
not fromInstr.isResultConflated() and
|
||||
(
|
||||
fromInstr.getResultType() instanceof ArrayType or
|
||||
fromInstr.getResultType() instanceof Union
|
||||
)
|
||||
)
|
||||
or
|
||||
// Unary instructions tend to preserve enough information in practice that we
|
||||
// want taint to flow through.
|
||||
// The exception is `FieldAddressInstruction`. Together with the rule for
|
||||
// `LoadInstruction` above and for `ChiInstruction` below, flow through
|
||||
// `FieldAddressInstruction` could cause flow into one field to come out an
|
||||
// unrelated field. This would happen across function boundaries, where the IR
|
||||
// would not be able to match loads to stores.
|
||||
toInstr.(UnaryInstruction).getUnaryOperand() = fromOperand and
|
||||
(
|
||||
not toInstr instanceof FieldAddressInstruction
|
||||
or
|
||||
toInstr.(FieldAddressInstruction).getField().getDeclaringType() instanceof Union
|
||||
)
|
||||
or
|
||||
// Flow from an element to an array or union that contains it.
|
||||
toInstr.(ChiInstruction).getPartialOperand() = fromOperand and
|
||||
not toInstr.isResultConflated() and
|
||||
exists(Type t | toInstr.getResultLanguageType().hasType(t, false) |
|
||||
t instanceof Union
|
||||
or
|
||||
t instanceof ArrayType
|
||||
)
|
||||
or
|
||||
exists(BinaryInstruction bin |
|
||||
bin = toInstr and
|
||||
predictableInstruction(toInstr.getAnOperand().getDef()) and
|
||||
fromOperand = toInstr.getAnOperand()
|
||||
)
|
||||
or
|
||||
// This is part of the translation of `a[i]`, where we want taint to flow
|
||||
// from `a`.
|
||||
toInstr.(PointerAddInstruction).getLeftOperand() = fromOperand
|
||||
or
|
||||
// Until we have flow through indirections across calls, we'll take flow out
|
||||
// of the indirection and into the argument.
|
||||
// When we get proper flow through indirections across calls, this code can be
|
||||
// moved to `adjusedSink` or possibly into the `DataFlow::ExprNode` class.
|
||||
exists(ReadSideEffectInstruction read |
|
||||
read.getSideEffectOperand() = fromOperand and
|
||||
read.getArgumentDef() = toInstr
|
||||
)
|
||||
or
|
||||
// Until we have from through indirections across calls, we'll take flow out
|
||||
// of the parameter and into its indirection.
|
||||
// `InitializeIndirectionInstruction` only has a single operand: the address of the
|
||||
// value whose indirection we are initializing. When initializing an indirection of a parameter `p`,
|
||||
// the IR looks like this:
|
||||
// ```
|
||||
// m1 = InitializeParameter[p] : &r1
|
||||
// r2 = Load[p] : r2, m1
|
||||
// m3 = InitializeIndirection[p] : &r2
|
||||
// ```
|
||||
// So by having flow from `r2` to `m3` we're enabling flow from `m1` to `m3`. This relies on the
|
||||
// `LoadOperand`'s overlap being exact.
|
||||
toInstr.(InitializeIndirectionInstruction).getAnOperand() = fromOperand
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the side effect instruction corresponding to the specified function output,
|
||||
* if one exists.
|
||||
*/
|
||||
private int getWriteSideEffectIndex(FunctionOutput output) {
|
||||
output.isParameterDeref(result)
|
||||
or
|
||||
output.isQualifierObject() and result = -1
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an operand that goes into argument `argumentIndex` of `call`. This
|
||||
* can be either directly or through one pointer indirection.
|
||||
*/
|
||||
private Operand getACallArgumentOrIndirection(CallInstruction call, int argumentIndex) {
|
||||
result = call.getPositionalArgumentOperand(argumentIndex)
|
||||
or
|
||||
exists(ReadSideEffectInstruction readSE |
|
||||
// TODO: why are read side effect operands imprecise?
|
||||
result = readSE.getSideEffectOperand() and
|
||||
readSE.getPrimaryInstruction() = call and
|
||||
readSE.getIndex() = argumentIndex
|
||||
)
|
||||
}
|
||||
|
||||
private predicate modelTaintToParameter(Function f, int parameterIn, int parameterOut) {
|
||||
exists(FunctionInput modelIn, FunctionOutput modelOut |
|
||||
(
|
||||
f.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
|
||||
or
|
||||
f.(TaintFunction).hasTaintFlow(modelIn, modelOut)
|
||||
) and
|
||||
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
|
||||
parameterOut = getWriteSideEffectIndex(modelOut)
|
||||
)
|
||||
}
|
||||
|
||||
private predicate modelTaintToReturnValue(Function f, int parameterIn) {
|
||||
// Taint flow from parameter to return value
|
||||
exists(FunctionInput modelIn, FunctionOutput modelOut |
|
||||
f.(TaintFunction).hasTaintFlow(modelIn, modelOut) and
|
||||
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
|
||||
(modelOut.isReturnValue() or modelOut.isReturnValueDeref())
|
||||
)
|
||||
or
|
||||
// Data flow (not taint flow) to where the return value points. For the time
|
||||
// being we will conflate pointers and objects in taint tracking.
|
||||
exists(FunctionInput modelIn, FunctionOutput modelOut |
|
||||
f.(DataFlowFunction).hasDataFlow(modelIn, modelOut) and
|
||||
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
|
||||
modelOut.isReturnValueDeref()
|
||||
)
|
||||
or
|
||||
// Taint flow from one argument to another and data flow from an argument to a
|
||||
// return value. This happens in functions like `strcat` and `memcpy`. We
|
||||
// could model this flow in two separate steps, but that would add reverse
|
||||
// flow from the write side-effect to the call instruction, which may not be
|
||||
// desirable.
|
||||
exists(int parameterMid, InParameter modelMid, OutReturnValue returnOut |
|
||||
modelTaintToParameter(f, parameterIn, parameterMid) and
|
||||
modelMid.isParameter(parameterMid) and
|
||||
f.(DataFlowFunction).hasDataFlow(modelMid, returnOut)
|
||||
)
|
||||
}
|
||||
|
||||
private Element adjustedSink(DataFlow::Node sink) {
|
||||
// TODO: is it more appropriate to use asConvertedExpr here and avoid
|
||||
// `getConversion*`? Or will that cause us to miss some cases where there's
|
||||
|
||||
Reference in New Issue
Block a user