Merge pull request #2657 from jbj/DefaultTaintTracking-models

C++: wire up models library to DefaultTaintTracking
This commit is contained in:
Robert Marsh
2020-01-27 17:41:54 -05:00
committed by GitHub
10 changed files with 302 additions and 103 deletions

View File

@@ -4,6 +4,8 @@ private import semmle.code.cpp.ir.dataflow.DataFlow
private import semmle.code.cpp.ir.dataflow.DataFlow2
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.dataflow.internal.DataFlowDispatch as Dispatch
private import semmle.code.cpp.models.interfaces.Taint
private import semmle.code.cpp.models.interfaces.DataFlow
/**
* A predictable instruction is one where an external user can predict
@@ -156,18 +158,79 @@ private predicate instructionTaintStep(Instruction i1, Instruction i2) {
// This is part of the translation of `a[i]`, where we want taint to flow
// from `a`.
i2.(PointerAddInstruction).getLeft() = i1
// TODO: robust Chi handling
//
// TODO: Flow from argument to return of known functions: Port missing parts
// of `returnArgument` to the `interfaces.Taint` and `interfaces.DataFlow`
// libraries.
//
// TODO: Flow from input argument to output argument of known functions: Port
// missing parts of `copyValueBetweenArguments` to the `interfaces.Taint` and
// `interfaces.DataFlow` libraries and implement call side-effect nodes. This
// will help with the test for `ExecTainted.ql`. The test for
// `TaintedPath.ql` is more tricky because the output arg is a pointer
// addition expression.
or
// Flow from argument to return value
i2 = any(CallInstruction call |
exists(int indexIn |
modelTaintToReturnValue(call.getStaticCallTarget(), indexIn) and
i1 = getACallArgumentOrIndirection(call, indexIn)
)
)
or
// Flow from input argument to output argument
// TODO: This won't work in practice as long as all aliased memory is tracked
// together in a single virtual variable.
// TODO: Will this work on the test for `TaintedPath.ql`, where the output arg
// is a pointer addition expression?
i2 = any(WriteSideEffectInstruction outNode |
exists(CallInstruction call, int indexIn, int indexOut |
modelTaintToParameter(call.getStaticCallTarget(), indexIn, indexOut) and
i1 = getACallArgumentOrIndirection(call, indexIn) and
outNode.getIndex() = indexOut and
outNode.getPrimaryInstruction() = call
)
)
}
/**
* Get an instruction that goes into argument `argumentIndex` of `call`. This
* can be either directly or through one pointer indirection.
*/
private Instruction getACallArgumentOrIndirection(CallInstruction call, int argumentIndex) {
result = call.getPositionalArgument(argumentIndex)
or
exists(ReadSideEffectInstruction readSE |
// TODO: why are read side effect operands imprecise?
result = readSE.getSideEffectOperand().getAnyDef() and
readSE.getPrimaryInstruction() = call and
readSE.getIndex() = argumentIndex
)
}
private predicate modelTaintToParameter(Function f, int parameterIn, int parameterOut) {
exists(FunctionInput modelIn, FunctionOutput modelOut |
f.(TaintFunction).hasTaintFlow(modelIn, modelOut) and
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
modelOut.isParameterDeref(parameterOut)
)
}
private predicate modelTaintToReturnValue(Function f, int parameterIn) {
// Taint flow from parameter to return value
exists(FunctionInput modelIn, FunctionOutput modelOut |
f.(TaintFunction).hasTaintFlow(modelIn, modelOut) and
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
(modelOut.isReturnValue() or modelOut.isReturnValueDeref())
)
or
// Data flow (not taint flow) to where the return value points. For the time
// being we will conflate pointers and objects in taint tracking.
exists(FunctionInput modelIn, FunctionOutput modelOut |
f.(DataFlowFunction).hasDataFlow(modelIn, modelOut) and
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
modelOut.isReturnValueDeref()
)
or
// Taint flow from one argument to another and data flow from an argument to a
// return value. This happens in functions like `strcat` and `memcpy`. We
// could model this flow in two separate steps, but that would add reverse
// flow from the write side-effect to the call instruction, which may not be
// desirable.
exists(int parameterMid, InParameter modelMid, OutReturnValue returnOut |
modelTaintToParameter(f, parameterIn, parameterMid) and
modelMid.isParameter(parameterMid) and
f.(DataFlowFunction).hasDataFlow(modelMid, returnOut)
)
}
private Element adjustedSink(DataFlow::Node sink) {

View File

@@ -1198,52 +1198,63 @@ class CallSideEffectInstruction extends SideEffectInstruction {
}
/**
* An instruction representing the side effect of a function call on any memory that might be read
* by that call.
* An instruction representing the side effect of a function call on any memory
* that might be read by that call. This instruction is emitted instead of
* `CallSideEffectInstruction` when it's certain that the call target cannot
* write to escaped memory.
*/
class CallReadSideEffectInstruction extends SideEffectInstruction {
CallReadSideEffectInstruction() { getOpcode() instanceof Opcode::CallReadSideEffect }
}
/**
* An instruction representing a read side effect of a function call on a
* specific parameter.
*/
class ReadSideEffectInstruction extends SideEffectInstruction, IndexedInstruction {
ReadSideEffectInstruction() { getOpcode() instanceof ReadSideEffectOpcode }
/** Gets the operand for the value that will be read from this instruction, if known. */
final SideEffectOperand getSideEffectOperand() { result = getAnOperand() }
/** Gets the value that will be read from this instruction, if known. */
final Instruction getSideEffect() { result = getSideEffectOperand().getDef() }
/** Gets the operand for the address from which this instruction may read. */
final AddressOperand getArgumentOperand() { result = getAnOperand() }
/** Gets the address from which this instruction may read. */
final Instruction getArgumentDef() { result = getArgumentOperand().getDef() }
}
/**
* An instruction representing the read of an indirect parameter within a function call.
*/
class IndirectReadSideEffectInstruction extends SideEffectInstruction {
class IndirectReadSideEffectInstruction extends ReadSideEffectInstruction {
IndirectReadSideEffectInstruction() { getOpcode() instanceof Opcode::IndirectReadSideEffect }
Instruction getArgumentDef() { result = getAnOperand().(AddressOperand).getDef() }
Instruction getSideEffect() { result = getAnOperand().(SideEffectOperand).getDef() }
}
/**
* An instruction representing the read of an indirect buffer parameter within a function call.
*/
class BufferReadSideEffectInstruction extends SideEffectInstruction {
class BufferReadSideEffectInstruction extends ReadSideEffectInstruction {
BufferReadSideEffectInstruction() { getOpcode() instanceof Opcode::BufferReadSideEffect }
Instruction getArgumentDef() { result = getAnOperand().(AddressOperand).getDef() }
Instruction getSideEffect() { result = getAnOperand().(SideEffectOperand).getDef() }
}
/**
* An instruction representing the read of an indirect buffer parameter within a function call.
*/
class SizedBufferReadSideEffectInstruction extends SideEffectInstruction {
class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction {
SizedBufferReadSideEffectInstruction() {
getOpcode() instanceof Opcode::SizedBufferReadSideEffect
}
Instruction getArgumentDef() { result = getAnOperand().(AddressOperand).getDef() }
Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() }
Instruction getSideEffect() { result = getAnOperand().(SideEffectOperand).getDef() }
}
/**
* An instruction representing a side effect of a function call.
* An instruction representing a write side effect of a function call on a
* specific parameter.
*/
class WriteSideEffectInstruction extends SideEffectInstruction, IndexedInstruction {
WriteSideEffectInstruction() { getOpcode() instanceof WriteSideEffectOpcode }

View File

@@ -1198,52 +1198,63 @@ class CallSideEffectInstruction extends SideEffectInstruction {
}
/**
* An instruction representing the side effect of a function call on any memory that might be read
* by that call.
* An instruction representing the side effect of a function call on any memory
* that might be read by that call. This instruction is emitted instead of
* `CallSideEffectInstruction` when it's certain that the call target cannot
* write to escaped memory.
*/
class CallReadSideEffectInstruction extends SideEffectInstruction {
CallReadSideEffectInstruction() { getOpcode() instanceof Opcode::CallReadSideEffect }
}
/**
* An instruction representing a read side effect of a function call on a
* specific parameter.
*/
class ReadSideEffectInstruction extends SideEffectInstruction, IndexedInstruction {
ReadSideEffectInstruction() { getOpcode() instanceof ReadSideEffectOpcode }
/** Gets the operand for the value that will be read from this instruction, if known. */
final SideEffectOperand getSideEffectOperand() { result = getAnOperand() }
/** Gets the value that will be read from this instruction, if known. */
final Instruction getSideEffect() { result = getSideEffectOperand().getDef() }
/** Gets the operand for the address from which this instruction may read. */
final AddressOperand getArgumentOperand() { result = getAnOperand() }
/** Gets the address from which this instruction may read. */
final Instruction getArgumentDef() { result = getArgumentOperand().getDef() }
}
/**
* An instruction representing the read of an indirect parameter within a function call.
*/
class IndirectReadSideEffectInstruction extends SideEffectInstruction {
class IndirectReadSideEffectInstruction extends ReadSideEffectInstruction {
IndirectReadSideEffectInstruction() { getOpcode() instanceof Opcode::IndirectReadSideEffect }
Instruction getArgumentDef() { result = getAnOperand().(AddressOperand).getDef() }
Instruction getSideEffect() { result = getAnOperand().(SideEffectOperand).getDef() }
}
/**
* An instruction representing the read of an indirect buffer parameter within a function call.
*/
class BufferReadSideEffectInstruction extends SideEffectInstruction {
class BufferReadSideEffectInstruction extends ReadSideEffectInstruction {
BufferReadSideEffectInstruction() { getOpcode() instanceof Opcode::BufferReadSideEffect }
Instruction getArgumentDef() { result = getAnOperand().(AddressOperand).getDef() }
Instruction getSideEffect() { result = getAnOperand().(SideEffectOperand).getDef() }
}
/**
* An instruction representing the read of an indirect buffer parameter within a function call.
*/
class SizedBufferReadSideEffectInstruction extends SideEffectInstruction {
class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction {
SizedBufferReadSideEffectInstruction() {
getOpcode() instanceof Opcode::SizedBufferReadSideEffect
}
Instruction getArgumentDef() { result = getAnOperand().(AddressOperand).getDef() }
Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() }
Instruction getSideEffect() { result = getAnOperand().(SideEffectOperand).getDef() }
}
/**
* An instruction representing a side effect of a function call.
* An instruction representing a write side effect of a function call on a
* specific parameter.
*/
class WriteSideEffectInstruction extends SideEffectInstruction, IndexedInstruction {
WriteSideEffectInstruction() { getOpcode() instanceof WriteSideEffectOpcode }

View File

@@ -1198,52 +1198,63 @@ class CallSideEffectInstruction extends SideEffectInstruction {
}
/**
* An instruction representing the side effect of a function call on any memory that might be read
* by that call.
* An instruction representing the side effect of a function call on any memory
* that might be read by that call. This instruction is emitted instead of
* `CallSideEffectInstruction` when it's certain that the call target cannot
* write to escaped memory.
*/
class CallReadSideEffectInstruction extends SideEffectInstruction {
CallReadSideEffectInstruction() { getOpcode() instanceof Opcode::CallReadSideEffect }
}
/**
* An instruction representing a read side effect of a function call on a
* specific parameter.
*/
class ReadSideEffectInstruction extends SideEffectInstruction, IndexedInstruction {
ReadSideEffectInstruction() { getOpcode() instanceof ReadSideEffectOpcode }
/** Gets the operand for the value that will be read from this instruction, if known. */
final SideEffectOperand getSideEffectOperand() { result = getAnOperand() }
/** Gets the value that will be read from this instruction, if known. */
final Instruction getSideEffect() { result = getSideEffectOperand().getDef() }
/** Gets the operand for the address from which this instruction may read. */
final AddressOperand getArgumentOperand() { result = getAnOperand() }
/** Gets the address from which this instruction may read. */
final Instruction getArgumentDef() { result = getArgumentOperand().getDef() }
}
/**
* An instruction representing the read of an indirect parameter within a function call.
*/
class IndirectReadSideEffectInstruction extends SideEffectInstruction {
class IndirectReadSideEffectInstruction extends ReadSideEffectInstruction {
IndirectReadSideEffectInstruction() { getOpcode() instanceof Opcode::IndirectReadSideEffect }
Instruction getArgumentDef() { result = getAnOperand().(AddressOperand).getDef() }
Instruction getSideEffect() { result = getAnOperand().(SideEffectOperand).getDef() }
}
/**
* An instruction representing the read of an indirect buffer parameter within a function call.
*/
class BufferReadSideEffectInstruction extends SideEffectInstruction {
class BufferReadSideEffectInstruction extends ReadSideEffectInstruction {
BufferReadSideEffectInstruction() { getOpcode() instanceof Opcode::BufferReadSideEffect }
Instruction getArgumentDef() { result = getAnOperand().(AddressOperand).getDef() }
Instruction getSideEffect() { result = getAnOperand().(SideEffectOperand).getDef() }
}
/**
* An instruction representing the read of an indirect buffer parameter within a function call.
*/
class SizedBufferReadSideEffectInstruction extends SideEffectInstruction {
class SizedBufferReadSideEffectInstruction extends ReadSideEffectInstruction {
SizedBufferReadSideEffectInstruction() {
getOpcode() instanceof Opcode::SizedBufferReadSideEffect
}
Instruction getArgumentDef() { result = getAnOperand().(AddressOperand).getDef() }
Instruction getSizeDef() { result = getAnOperand().(BufferSizeOperand).getDef() }
Instruction getSideEffect() { result = getAnOperand().(SideEffectOperand).getDef() }
}
/**
* An instruction representing a side effect of a function call.
* An instruction representing a write side effect of a function call on a
* specific parameter.
*/
class WriteSideEffectInstruction extends SideEffectInstruction, IndexedInstruction {
WriteSideEffectInstruction() { getOpcode() instanceof WriteSideEffectOpcode }

View File

@@ -30,7 +30,7 @@ class InetAton extends TaintFunction, ArrayFunction {
}
}
class InetAddr extends TaintFunction, ArrayFunction {
class InetAddr extends TaintFunction, ArrayFunction, AliasFunction {
InetAddr() { hasGlobalName("inet_addr") }
override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
@@ -41,6 +41,12 @@ class InetAddr extends TaintFunction, ArrayFunction {
override predicate hasArrayInput(int bufParam) { bufParam = 0 }
override predicate hasArrayWithNullTerminator(int bufParam) { bufParam = 0 }
override predicate parameterNeverEscapes(int index) { index = 0 }
override predicate parameterEscapesOnlyViaReturn(int index) { none() }
override predicate parameterIsAlwaysReturned(int index) { none() }
}
class InetNetwork extends TaintFunction, ArrayFunction {

View File

@@ -0,0 +1,41 @@
int atoi(const char *nptr);
char *getenv(const char *name);
char *strcat(char * s1, const char * s2);
char *strdup(const char *);
char *_strdup(const char *);
char *unmodeled_function(const char *);
void sink(const char *);
void sink(int);
int main(int argc, char *argv[]) {
sink(_strdup(getenv("VAR")));
sink(strdup(getenv("VAR")));
sink(unmodeled_function(getenv("VAR")));
char untainted_buf[100] = "";
char buf[100] = "VAR = ";
sink(strcat(buf, getenv("VAR")));
sink(buf); // BUG: no taint
sink(untainted_buf); // the two buffers would be conflated if we added flow through partial chi inputs
return 0;
}
typedef unsigned int inet_addr_retval;
inet_addr_retval inet_addr(const char *dotted_address);
void sink(inet_addr_retval);
void test_indirect_arg_to_model() {
// This test is non-sensical but carefully arranged so we get data flow into
// inet_addr not through the function argument but through its associated
// read side effect.
void *env_pointer = getenv("VAR"); // env_pointer is tainted, not its data.
inet_addr_retval a = inet_addr((const char *)&env_pointer);
sink(a);
}

View File

@@ -0,0 +1,29 @@
| defaulttainttracking.cpp:16:16:16:21 | call to getenv | defaulttainttracking.cpp:6:15:6:24 | p#0 |
| defaulttainttracking.cpp:16:16:16:21 | call to getenv | defaulttainttracking.cpp:9:11:9:20 | p#0 |
| defaulttainttracking.cpp:16:16:16:21 | call to getenv | defaulttainttracking.cpp:16:8:16:14 | call to _strdup |
| defaulttainttracking.cpp:16:16:16:21 | call to getenv | defaulttainttracking.cpp:16:8:16:29 | (const char *)... |
| defaulttainttracking.cpp:16:16:16:21 | call to getenv | defaulttainttracking.cpp:16:16:16:21 | call to getenv |
| defaulttainttracking.cpp:16:16:16:21 | call to getenv | defaulttainttracking.cpp:16:16:16:28 | (const char *)... |
| defaulttainttracking.cpp:17:15:17:20 | call to getenv | defaulttainttracking.cpp:5:14:5:23 | p#0 |
| defaulttainttracking.cpp:17:15:17:20 | call to getenv | defaulttainttracking.cpp:9:11:9:20 | p#0 |
| defaulttainttracking.cpp:17:15:17:20 | call to getenv | defaulttainttracking.cpp:17:8:17:13 | call to strdup |
| defaulttainttracking.cpp:17:15:17:20 | call to getenv | defaulttainttracking.cpp:17:8:17:28 | (const char *)... |
| defaulttainttracking.cpp:17:15:17:20 | call to getenv | defaulttainttracking.cpp:17:15:17:20 | call to getenv |
| defaulttainttracking.cpp:17:15:17:20 | call to getenv | defaulttainttracking.cpp:17:15:17:27 | (const char *)... |
| defaulttainttracking.cpp:18:27:18:32 | call to getenv | defaulttainttracking.cpp:7:26:7:35 | p#0 |
| defaulttainttracking.cpp:18:27:18:32 | call to getenv | defaulttainttracking.cpp:18:27:18:32 | call to getenv |
| defaulttainttracking.cpp:18:27:18:32 | call to getenv | defaulttainttracking.cpp:18:27:18:39 | (const char *)... |
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:3:38:3:39 | s2 |
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:9:11:9:20 | p#0 |
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:8:22:13 | call to strcat |
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:8:22:33 | (const char *)... |
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:20:22:25 | call to getenv |
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:20:22:32 | (const char *)... |
| defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:32:11:32:26 | p#0 |
| defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:38:11:38:21 | env_pointer |
| defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:38:25:38:30 | call to getenv |
| defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:38:25:38:37 | (void *)... |
| defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:39:22:39:22 | a |
| defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:39:26:39:34 | call to inet_addr |
| defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:39:50:39:61 | & ... |
| defaulttainttracking.cpp:38:25:38:30 | call to getenv | defaulttainttracking.cpp:40:10:40:10 | a |

View File

@@ -0,0 +1,5 @@
import semmle.code.cpp.ir.dataflow.DefaultTaintTracking
from Expr source, Element tainted
where tainted(source, tainted)
select source, tainted