C++: Stub replacement for security.TaintTracking

This commit adds a `semmle.code.cpp.ir.dataflow.DefaultTaintTracking` library that's API-compatible with the `semmle.code.cpp.security.TaintTracking` library. The new library is implemented on top of the IR data flow library. The idea is to evolve this library until it can replace `semmle.code.cpp.security.TaintTracking` without decreasing our SAMATE score. Then we'll have the IR in production use, and we will have one less taint-tracking library in production.
2026-04-28 18:25:24 +02:00 · 2019-09-09 15:36:07 +02:00
parent 2806a52ec5
commit de4e2a259e
1 changed files with 158 additions and 0 deletions
--- a/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll
+++ b/cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll
@@ -0,0 +1,158 @@
+import cpp
+import semmle.code.cpp.security.Security
+private import semmle.code.cpp.ir.dataflow.DataFlow
+private import semmle.code.cpp.ir.IR
+
+/**
+ * A predictable expression is one where an external user can predict
+ * the value. For example, a literal in the source code is considered
+ * predictable.
+ */
+// TODO: Change to use Instruction instead of Expr. Naive attempt breaks
+// TaintedAllocationSize qltest.
+private predicate predictable(Expr expr) {
+  expr instanceof Literal
+  or
+  exists(BinaryOperation binop | binop = expr |
+    predictable(binop.getLeftOperand()) and predictable(binop.getRightOperand())
+  )
+  or
+  exists(UnaryOperation unop | unop = expr | predictable(unop.getOperand()))
+}
+
+// TODO: remove when `predictable` has an `Instruction` parameter instead of `Expr`.
+private predicate predictableInstruction(Instruction instr) {
+  exists(DataFlow::Node node |
+    node.asInstruction() = instr and
+    predictable(node.asExpr())
+  )
+}
+
+private class DefaultTaintTrackingCfg extends DataFlow::Configuration {
+  DefaultTaintTrackingCfg() { this = "DefaultTaintTrackingCfg" }
+
+  override predicate isSource(DataFlow::Node source) { isUserInput(source.asExpr(), _) }
+
+  override predicate isSink(DataFlow::Node sink) { any() }
+
+  override predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
+    instructionTaintStep(n1.asInstruction(), n2.asInstruction())
+  }
+
+  override predicate isBarrier(DataFlow::Node node) {
+    exists(Variable checkedVar |
+      accessesVariable(node.asInstruction(), checkedVar) and
+      hasUpperBoundsCheck(checkedVar)
+    )
+  }
+}
+
+private predicate accessesVariable(CopyInstruction copy, Variable var) {
+  exists(VariableAddressInstruction va |
+    va.getVariable().getAST() = var
+  |
+    copy.(StoreInstruction).getDestinationAddress() = va
+    or
+    copy.(LoadInstruction).getSourceAddress() = va
+  )
+}
+
+/**
+ * A variable that has any kind of upper-bound check anywhere in the program
+ */
+private predicate hasUpperBoundsCheck(Variable var) {
+  exists(BinaryOperation oper, VariableAccess access |
+    (
+      oper.getOperator() = "<" or
+      oper.getOperator() = "<=" or
+      oper.getOperator() = ">" or
+      oper.getOperator() = ">="
+    ) and
+    oper.getLeftOperand() = access and
+    access.getTarget() = var and
+    // Comparing to 0 is not an upper bound check
+    not oper.getRightOperand().getValue() = "0"
+  )
+}
+
+private predicate instructionTaintStep(Instruction i1, Instruction i2) {
+  // Expressions computed from tainted data are also tainted
+  i2 = any(CallInstruction call |
+      isPureFunction(call.getStaticCallTarget().getName()) and
+      call.getAnArgument() = i1 and
+      forall(Instruction arg | arg = call.getAnArgument() | arg = i1 or predictableInstruction(arg)) and
+      // flow through `strlen` tends to cause dubious results, if the length is
+      // bounded.
+      not call.getStaticCallTarget().getName() = "strlen"
+    )
+  or
+  // Flow through pointer dereference
+  i2.(LoadInstruction).getSourceAddress() = i1
+  or
+  i2.(UnaryInstruction).getUnary() = i1
+  or
+  exists(BinaryInstruction bin |
+    bin = i2 and
+    predictableInstruction(i2.getAnOperand().getDef()) and
+    i1 = i2.getAnOperand().getDef()
+  )
+  // TODO: Check that we have flow from `a` to `a[i]`. It may work for constant
+  // `i` because there is flow through `predictable` `BinaryInstruction` and
+  // through `LoadInstruction`.
+  //
+  // TODO: Flow from argument to return of known functions: Port missing parts
+  // of `returnArgument` to the `interfaces.Taint` library.
+  //
+  // TODO: Flow from input argument to output argument of known functions: Port
+  // missing parts of `copyValueBetweenArguments` to the `interfaces.Taint`
+  // library and implement call side-effect nodes. This will help with the test
+  // for `ExecTainted.ql`. The test for `TaintedPath.ql` is more tricky because
+  // the output arg is a pointer addition expression.
+}
+
+predicate tainted(Expr source, Element tainted) {
+  exists(DefaultTaintTrackingCfg cfg, DataFlow::Node sink |
+    cfg.hasFlow(DataFlow::exprNode(source), sink)
+  |
+    sink.asExpr().getConversion*() = tainted
+    or
+    // For compatibility, send flow from arguments to parameters, even for
+    // functions with no body.
+    exists(FunctionCall call, int i |
+      sink.asExpr() = call.getArgument(i) and
+      tainted = resolveCall(call).getParameter(i)
+    )
+    or
+    // For compatibility, send flow into a `Variable` if there is flow to any
+    // Load or Store of that variable.
+    exists(CopyInstruction copy |
+      copy.getSourceValue() = sink.asInstruction() and
+      accessesVariable(copy, tainted) and
+      not hasUpperBoundsCheck(tainted)
+    )
+    or
+    // For compatibility, send flow into a `NotExpr` even if it's part of a
+    // short-circuiting condition and thus might get skipped.
+    tainted.(NotExpr).getOperand() = sink.asExpr()
+  )
+}
+
+predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) {
+  tainted(source, tainted) and
+  // TODO: Find a way to emulate how `security.TaintTracking` reports the last
+  // global variable that taint has passed through. Also make sure we emulate
+  // its behavior for interprocedural flow through globals.
+  globalVar = ""
+}
+
+GlobalOrNamespaceVariable globalVarFromId(string id) {
+  // TODO: Implement this when `taintedIncludingGlobalVars` has support for
+  // global variables.
+  none()
+}
+
+Function resolveCall(Call call) {
+  // TODO: improve virtual dispatch. This will help in the test for
+  // `UncontrolledProcessOperation.ql`.
+  result = call.getTarget()
+}