C++: Stub replacement for security.TaintTracking

This commit adds a `semmle.code.cpp.ir.dataflow.DefaultTaintTracking`
library that's API-compatible with the
`semmle.code.cpp.security.TaintTracking` library. The new library is
implemented on top of the IR data flow library.

The idea is to evolve this library until it can replace
`semmle.code.cpp.security.TaintTracking` without decreasing our SAMATE
score. Then we'll have the IR in production use, and we will have one
less taint-tracking library in production.
This commit is contained in:
Jonas Jensen
2019-09-09 15:36:07 +02:00
parent 2806a52ec5
commit de4e2a259e

View File

@@ -0,0 +1,158 @@
import cpp
import semmle.code.cpp.security.Security
private import semmle.code.cpp.ir.dataflow.DataFlow
private import semmle.code.cpp.ir.IR
/**
* A predictable expression is one where an external user can predict
* the value. For example, a literal in the source code is considered
* predictable.
*/
// TODO: Change to use Instruction instead of Expr. Naive attempt breaks
// TaintedAllocationSize qltest.
private predicate predictable(Expr expr) {
expr instanceof Literal
or
exists(BinaryOperation binop | binop = expr |
predictable(binop.getLeftOperand()) and predictable(binop.getRightOperand())
)
or
exists(UnaryOperation unop | unop = expr | predictable(unop.getOperand()))
}
// TODO: remove when `predictable` has an `Instruction` parameter instead of `Expr`.
private predicate predictableInstruction(Instruction instr) {
exists(DataFlow::Node node |
node.asInstruction() = instr and
predictable(node.asExpr())
)
}
private class DefaultTaintTrackingCfg extends DataFlow::Configuration {
DefaultTaintTrackingCfg() { this = "DefaultTaintTrackingCfg" }
override predicate isSource(DataFlow::Node source) { isUserInput(source.asExpr(), _) }
override predicate isSink(DataFlow::Node sink) { any() }
override predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
instructionTaintStep(n1.asInstruction(), n2.asInstruction())
}
override predicate isBarrier(DataFlow::Node node) {
exists(Variable checkedVar |
accessesVariable(node.asInstruction(), checkedVar) and
hasUpperBoundsCheck(checkedVar)
)
}
}
private predicate accessesVariable(CopyInstruction copy, Variable var) {
exists(VariableAddressInstruction va |
va.getVariable().getAST() = var
|
copy.(StoreInstruction).getDestinationAddress() = va
or
copy.(LoadInstruction).getSourceAddress() = va
)
}
/**
* A variable that has any kind of upper-bound check anywhere in the program
*/
private predicate hasUpperBoundsCheck(Variable var) {
exists(BinaryOperation oper, VariableAccess access |
(
oper.getOperator() = "<" or
oper.getOperator() = "<=" or
oper.getOperator() = ">" or
oper.getOperator() = ">="
) and
oper.getLeftOperand() = access and
access.getTarget() = var and
// Comparing to 0 is not an upper bound check
not oper.getRightOperand().getValue() = "0"
)
}
private predicate instructionTaintStep(Instruction i1, Instruction i2) {
// Expressions computed from tainted data are also tainted
i2 = any(CallInstruction call |
isPureFunction(call.getStaticCallTarget().getName()) and
call.getAnArgument() = i1 and
forall(Instruction arg | arg = call.getAnArgument() | arg = i1 or predictableInstruction(arg)) and
// flow through `strlen` tends to cause dubious results, if the length is
// bounded.
not call.getStaticCallTarget().getName() = "strlen"
)
or
// Flow through pointer dereference
i2.(LoadInstruction).getSourceAddress() = i1
or
i2.(UnaryInstruction).getUnary() = i1
or
exists(BinaryInstruction bin |
bin = i2 and
predictableInstruction(i2.getAnOperand().getDef()) and
i1 = i2.getAnOperand().getDef()
)
// TODO: Check that we have flow from `a` to `a[i]`. It may work for constant
// `i` because there is flow through `predictable` `BinaryInstruction` and
// through `LoadInstruction`.
//
// TODO: Flow from argument to return of known functions: Port missing parts
// of `returnArgument` to the `interfaces.Taint` library.
//
// TODO: Flow from input argument to output argument of known functions: Port
// missing parts of `copyValueBetweenArguments` to the `interfaces.Taint`
// library and implement call side-effect nodes. This will help with the test
// for `ExecTainted.ql`. The test for `TaintedPath.ql` is more tricky because
// the output arg is a pointer addition expression.
}
predicate tainted(Expr source, Element tainted) {
exists(DefaultTaintTrackingCfg cfg, DataFlow::Node sink |
cfg.hasFlow(DataFlow::exprNode(source), sink)
|
sink.asExpr().getConversion*() = tainted
or
// For compatibility, send flow from arguments to parameters, even for
// functions with no body.
exists(FunctionCall call, int i |
sink.asExpr() = call.getArgument(i) and
tainted = resolveCall(call).getParameter(i)
)
or
// For compatibility, send flow into a `Variable` if there is flow to any
// Load or Store of that variable.
exists(CopyInstruction copy |
copy.getSourceValue() = sink.asInstruction() and
accessesVariable(copy, tainted) and
not hasUpperBoundsCheck(tainted)
)
or
// For compatibility, send flow into a `NotExpr` even if it's part of a
// short-circuiting condition and thus might get skipped.
tainted.(NotExpr).getOperand() = sink.asExpr()
)
}
predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) {
tainted(source, tainted) and
// TODO: Find a way to emulate how `security.TaintTracking` reports the last
// global variable that taint has passed through. Also make sure we emulate
// its behavior for interprocedural flow through globals.
globalVar = ""
}
GlobalOrNamespaceVariable globalVarFromId(string id) {
// TODO: Implement this when `taintedIncludingGlobalVars` has support for
// global variables.
none()
}
Function resolveCall(Call call) {
// TODO: improve virtual dispatch. This will help in the test for
// `UncontrolledProcessOperation.ql`.
result = call.getTarget()
}