diff --git a/cpp/ql/lib/change-notes/2022-11-25-deprecate-default-taint-tracking.md b/cpp/ql/lib/change-notes/2022-11-25-deprecate-default-taint-tracking.md new file mode 100644 index 00000000000..54d10d592d6 --- /dev/null +++ b/cpp/ql/lib/change-notes/2022-11-25-deprecate-default-taint-tracking.md @@ -0,0 +1,6 @@ +--- +category: deprecated +--- + +* Deprecated `semmle.code.cpp.ir.dataflow.DefaultTaintTracking`. Use `semmle.code.cpp.ir.dataflow.TaintTracking`. +* Deprecated `semmle.code.cpp.security.TaintTrackingImpl`. Use `semmle.code.cpp.ir.dataflow.TaintTracking`. diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll index be5403734ce..8b559dbe932 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll @@ -1,642 +1,21 @@ /** + * DEPRECATED: Use `semmle.code.cpp.ir.dataflow.TaintTracking` as a replacement. + * * An IR taint tracking library that uses an IR DataFlow configuration to track * taint from user inputs as defined by `semmle.code.cpp.security.Security`. */ import cpp import semmle.code.cpp.security.Security -private import semmle.code.cpp.ir.dataflow.DataFlow -private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil -private import semmle.code.cpp.ir.dataflow.DataFlow3 -private import semmle.code.cpp.ir.IR -private import semmle.code.cpp.ir.dataflow.ResolveCall -private import semmle.code.cpp.controlflow.IRGuards -private import semmle.code.cpp.models.interfaces.Taint -private import semmle.code.cpp.models.interfaces.DataFlow -private import semmle.code.cpp.ir.dataflow.TaintTracking -private import semmle.code.cpp.ir.dataflow.TaintTracking2 -private import semmle.code.cpp.ir.dataflow.TaintTracking3 -private import semmle.code.cpp.ir.dataflow.internal.ModelUtil +private import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl as DefaultTaintTrackingImpl -/** - * A predictable instruction is one where an external user can predict - * the value. For example, a literal in the source code is considered - * predictable. - */ -private predicate predictableInstruction(Instruction instr) { - instr instanceof ConstantInstruction - or - instr instanceof StringConstantInstruction - or - // This could be a conversion on a string literal - predictableInstruction(instr.(UnaryInstruction).getUnary()) -} +deprecated predicate predictableOnlyFlow = DefaultTaintTrackingImpl::predictableOnlyFlow/1; -/** - * Functions that we should only allow taint to flow through (to the return - * value) if all but the source argument are 'predictable'. This is done to - * emulate the old security library's implementation rather than due to any - * strong belief that this is the right approach. - * - * Note that the list itself is not very principled; it consists of all the - * functions listed in the old security library's [default] `isPureFunction` - * that have more than one argument, but are not in the old taint tracking - * library's `returnArgument` predicate. - */ -predicate predictableOnlyFlow(string name) { - name = - [ - "strcasestr", "strchnul", "strchr", "strchrnul", "strcmp", "strcspn", "strncmp", "strndup", - "strnlen", "strrchr", "strspn", "strstr", "strtod", "strtof", "strtol", "strtoll", "strtoq", - "strtoul" - ] -} +deprecated predicate tainted = DefaultTaintTrackingImpl::tainted/2; -private DataFlow::Node getNodeForSource(Expr source) { - isUserInput(source, _) and - result = getNodeForExpr(source) -} +deprecated predicate taintedIncludingGlobalVars = + DefaultTaintTrackingImpl::taintedIncludingGlobalVars/3; -private DataFlow::Node getNodeForExpr(Expr node) { - result = DataFlow::exprNode(node) - or - // Some of the sources in `isUserInput` are intended to match the value of - // an expression, while others (those modeled below) are intended to match - // the taint that propagates out of an argument, like the `char *` argument - // to `gets`. It's impossible here to tell which is which, but the "access - // to argv" source is definitely not intended to match an output argument, - // and it causes false positives if we let it. - // - // This case goes together with the similar (but not identical) rule in - // `nodeIsBarrierIn`. - result = DataFlow::definitionByReferenceNodeFromArgument(node) and - not argv(node.(VariableAccess).getTarget()) -} +deprecated predicate globalVarFromId = DefaultTaintTrackingImpl::globalVarFromId/1; -private class DefaultTaintTrackingCfg extends TaintTracking::Configuration { - DefaultTaintTrackingCfg() { this = "DefaultTaintTrackingCfg" } - - override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) } - - override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) } - - override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) } - - override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) } -} - -private class ToGlobalVarTaintTrackingCfg extends TaintTracking::Configuration { - ToGlobalVarTaintTrackingCfg() { this = "GlobalVarTaintTrackingCfg" } - - override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) } - - override predicate isSink(DataFlow::Node sink) { - sink.asVariable() instanceof GlobalOrNamespaceVariable - } - - override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) { - writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable)) - or - readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable)) - } - - override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) } - - override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) } -} - -private class FromGlobalVarTaintTrackingCfg extends TaintTracking2::Configuration { - FromGlobalVarTaintTrackingCfg() { this = "FromGlobalVarTaintTrackingCfg" } - - override predicate isSource(DataFlow::Node source) { - // This set of sources should be reasonably small, which is good for - // performance since the set of sinks is very large. - exists(ToGlobalVarTaintTrackingCfg otherCfg | otherCfg.hasFlowTo(source)) - } - - override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) } - - override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) { - // Additional step for flow out of variables. There is no flow _into_ - // variables in this configuration, so this step only serves to take flow - // out of a variable that's a source. - readsVariable(n2.asInstruction(), n1.asVariable()) - } - - override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) } - - override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) } -} - -private predicate readsVariable(LoadInstruction load, Variable var) { - load.getSourceAddress().(VariableAddressInstruction).getAstVariable() = var -} - -private predicate writesVariable(StoreInstruction store, Variable var) { - store.getDestinationAddress().(VariableAddressInstruction).getAstVariable() = var -} - -/** - * A variable that has any kind of upper-bound check anywhere in the program. This is - * biased towards being inclusive because there are a lot of valid ways of doing an - * upper bounds checks if we don't consider where it occurs, for example: - * ``` - * if (x < 10) { sink(x); } - * - * if (10 > y) { sink(y); } - * - * if (z > 10) { z = 10; } - * sink(z); - * ``` - */ -// TODO: This coarse overapproximation, ported from the old taint tracking -// library, could be replaced with an actual semantic check that a particular -// variable _access_ is guarded by an upper-bound check. We probably don't want -// to do this right away since it could expose a lot of FPs that were -// previously suppressed by this predicate by coincidence. -private predicate hasUpperBoundsCheck(Variable var) { - exists(RelationalOperation oper, VariableAccess access | - oper.getAnOperand() = access and - access.getTarget() = var and - // Comparing to 0 is not an upper bound check - not oper.getAnOperand().getValue() = "0" - ) -} - -private predicate nodeIsBarrierEqualityCandidate( - DataFlow::Node node, Operand access, Variable checkedVar -) { - readsVariable(node.asInstruction(), checkedVar) and - any(IRGuardCondition guard).ensuresEq(access, _, _, node.asInstruction().getBlock(), true) -} - -cached -private module Cached { - cached - predicate nodeIsBarrier(DataFlow::Node node) { - exists(Variable checkedVar | - readsVariable(node.asInstruction(), checkedVar) and - hasUpperBoundsCheck(checkedVar) - ) - or - exists(Variable checkedVar, Operand access | - /* - * This node is guarded by a condition that forces the accessed variable - * to equal something else. For example: - * ``` - * x = taintsource() - * if (x == 10) { - * taintsink(x); // not considered tainted - * } - * ``` - */ - - nodeIsBarrierEqualityCandidate(node, access, checkedVar) and - readsVariable(access.getDef(), checkedVar) - ) - } - - cached - predicate nodeIsBarrierIn(DataFlow::Node node) { - // don't use dataflow into taint sources, as this leads to duplicate results. - exists(Expr source | isUserInput(source, _) | - node = DataFlow::exprNode(source) - or - // This case goes together with the similar (but not identical) rule in - // `getNodeForSource`. - node = DataFlow::definitionByReferenceNodeFromArgument(source) - ) - or - // don't use dataflow into binary instructions if both operands are unpredictable - exists(BinaryInstruction iTo | - iTo = node.asInstruction() and - not predictableInstruction(iTo.getLeft()) and - not predictableInstruction(iTo.getRight()) and - // propagate taint from either the pointer or the offset, regardless of predictability - not iTo instanceof PointerArithmeticInstruction - ) - or - // don't use dataflow through calls to pure functions if two or more operands - // are unpredictable - exists(Instruction iFrom1, Instruction iFrom2, CallInstruction iTo | - iTo = node.asInstruction() and - isPureFunction(iTo.getStaticCallTarget().getName()) and - iFrom1 = iTo.getAnArgument() and - iFrom2 = iTo.getAnArgument() and - not predictableInstruction(iFrom1) and - not predictableInstruction(iFrom2) and - iFrom1 != iFrom2 - ) - } - - cached - Element adjustedSink(DataFlow::Node sink) { - // TODO: is it more appropriate to use asConvertedExpr here and avoid - // `getConversion*`? Or will that cause us to miss some cases where there's - // flow to a conversion (like a `ReferenceDereferenceExpr`) and we want to - // pretend there was flow to the converted `Expr` for the sake of - // compatibility. - sink.asExpr().getConversion*() = result - or - // For compatibility, send flow from arguments to parameters, even for - // functions with no body. - exists(FunctionCall call, int i | - sink.asExpr() = call.getArgument(pragma[only_bind_into](i)) and - result = resolveCall(call).getParameter(pragma[only_bind_into](i)) - ) - or - // For compatibility, send flow into a `Variable` if there is flow to any - // Load or Store of that variable. - exists(CopyInstruction copy | - copy.getSourceValue() = sink.asInstruction() and - ( - readsVariable(copy, result) or - writesVariable(copy, result) - ) and - not hasUpperBoundsCheck(result) - ) - or - // For compatibility, send flow into a `NotExpr` even if it's part of a - // short-circuiting condition and thus might get skipped. - result.(NotExpr).getOperand() = sink.asExpr() - or - // Taint postfix and prefix crement operations when their operand is tainted. - result.(CrementOperation).getAnOperand() = sink.asExpr() - or - // Taint `e1 += e2`, `e &= e2` and friends when `e1` or `e2` is tainted. - result.(AssignOperation).getAnOperand() = sink.asExpr() - or - result = - sink.asOperand() - .(SideEffectOperand) - .getUse() - .(ReadSideEffectInstruction) - .getArgumentDef() - .getUnconvertedResultExpression() - } - - /** - * Step to return value of a modeled function when an input taints the - * dereference of the return value. - */ - cached - predicate additionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) { - exists(CallInstruction call, Function func, FunctionInput modelIn, FunctionOutput modelOut | - n1.asOperand() = callInput(call, modelIn) and - ( - func.(TaintFunction).hasTaintFlow(modelIn, modelOut) - or - func.(DataFlowFunction).hasDataFlow(modelIn, modelOut) - ) and - call.getStaticCallTarget() = func and - modelOut.isReturnValueDeref() and - call = n2.asInstruction() - ) - } -} - -private import Cached - -/** - * Holds if `tainted` may contain taint from `source`. - * - * A tainted expression is either directly user input, or is - * computed from user input in a way that users can probably - * control the exact output of the computation. - * - * This doesn't include data flow through global variables. - * If you need that you must call `taintedIncludingGlobalVars`. - */ -cached -predicate tainted(Expr source, Element tainted) { - exists(DefaultTaintTrackingCfg cfg, DataFlow::Node sink | - cfg.hasFlow(getNodeForSource(source), sink) and - tainted = adjustedSink(sink) - ) -} - -/** - * Holds if `tainted` may contain taint from `source`, where the taint passed - * through a global variable named `globalVar`. - * - * A tainted expression is either directly user input, or is - * computed from user input in a way that users can probably - * control the exact output of the computation. - * - * This version gives the same results as tainted but also includes - * data flow through global variables. - * - * The parameter `globalVar` is the qualified name of the last global variable - * used to move the value from source to tainted. If the taint did not pass - * through a global variable, then `globalVar = ""`. - */ -cached -predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) { - tainted(source, tainted) and - globalVar = "" - or - exists( - ToGlobalVarTaintTrackingCfg toCfg, FromGlobalVarTaintTrackingCfg fromCfg, - DataFlow::VariableNode variableNode, GlobalOrNamespaceVariable global, DataFlow::Node sink - | - global = variableNode.getVariable() and - toCfg.hasFlow(getNodeForSource(source), variableNode) and - fromCfg.hasFlow(variableNode, sink) and - tainted = adjustedSink(sink) and - global = globalVarFromId(globalVar) - ) -} - -/** - * Gets the global variable whose qualified name is `id`. Use this predicate - * together with `taintedIncludingGlobalVars`. Example: - * - * ``` - * exists(string varName | - * taintedIncludingGlobalVars(source, tainted, varName) and - * var = globalVarFromId(varName) - * ) - * ``` - */ -GlobalOrNamespaceVariable globalVarFromId(string id) { id = result.getQualifiedName() } - -/** - * Provides definitions for augmenting source/sink pairs with data-flow paths - * between them. From a `@kind path-problem` query, import this module in the - * global scope, extend `TaintTrackingConfiguration`, and use `taintedWithPath` - * in place of `tainted`. - * - * Importing this module will also import the query predicates that contain the - * taint paths. - */ -module TaintedWithPath { - private newtype TSingleton = MkSingleton() - - /** - * A taint-tracking configuration that matches sources and sinks in the same - * way as the `tainted` predicate. - * - * Override `isSink` and `taintThroughGlobals` as needed, but do not provide - * a characteristic predicate. - */ - class TaintTrackingConfiguration extends TSingleton { - /** Override this to specify which elements are sources in this configuration. */ - predicate isSource(Expr source) { exists(getNodeForSource(source)) } - - /** Override this to specify which elements are sinks in this configuration. */ - abstract predicate isSink(Element e); - - /** Override this to specify which expressions are barriers in this configuration. */ - predicate isBarrier(Expr e) { nodeIsBarrier(getNodeForExpr(e)) } - - /** - * Override this predicate to `any()` to allow taint to flow through global - * variables. - */ - predicate taintThroughGlobals() { none() } - - /** Gets a textual representation of this element. */ - string toString() { result = "TaintTrackingConfiguration" } - } - - private class AdjustedConfiguration extends TaintTracking3::Configuration { - AdjustedConfiguration() { this = "AdjustedConfiguration" } - - override predicate isSource(DataFlow::Node source) { - exists(TaintTrackingConfiguration cfg, Expr e | - cfg.isSource(e) and source = getNodeForExpr(e) - ) - } - - override predicate isSink(DataFlow::Node sink) { - exists(TaintTrackingConfiguration cfg | cfg.isSink(adjustedSink(sink))) - } - - override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) { - // Steps into and out of global variables - exists(TaintTrackingConfiguration cfg | cfg.taintThroughGlobals() | - writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable)) - or - readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable)) - ) - or - additionalTaintStep(n1, n2) - } - - override predicate isSanitizer(DataFlow::Node node) { - exists(TaintTrackingConfiguration cfg, Expr e | cfg.isBarrier(e) and node = getNodeForExpr(e)) - } - - override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) } - } - - /* - * A sink `Element` may map to multiple `DataFlowX::PathNode`s via (the - * inverse of) `adjustedSink`. For example, an `Expr` maps to all its - * conversions, and a `Variable` maps to all loads and stores from it. Because - * the path node is part of the tuple that constitutes the alert, this leads - * to duplicate alerts. - * - * To avoid showing duplicates, we edit the graph to replace the final node - * coming from the data-flow library with a node that matches exactly the - * `Element` sink that's requested. - * - * The same is done for sources. - */ - - private newtype TPathNode = - TWrapPathNode(DataFlow3::PathNode n) or - // There's a single newtype constructor for both sources and sinks since - // that makes it easiest to deal with the case where source = sink. - TEndpointPathNode(Element e) { - exists(AdjustedConfiguration cfg, DataFlow3::Node sourceNode, DataFlow3::Node sinkNode | - cfg.hasFlow(sourceNode, sinkNode) - | - sourceNode = getNodeForExpr(e) and - exists(TaintTrackingConfiguration ttCfg | ttCfg.isSource(e)) - or - e = adjustedSink(sinkNode) and - exists(TaintTrackingConfiguration ttCfg | ttCfg.isSink(e)) - ) - } - - /** An opaque type used for the nodes of a data-flow path. */ - class PathNode extends TPathNode { - /** Gets a textual representation of this element. */ - string toString() { none() } - - /** - * Holds if this element is at the specified location. - * The location spans column `startcolumn` of line `startline` to - * column `endcolumn` of line `endline` in file `filepath`. - * For more information, see - * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). - */ - predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - none() - } - } - - /** - * INTERNAL: Do not use. - */ - module Private { - /** Gets a predecessor `PathNode` of `pathNode`, if any. */ - PathNode getAPredecessor(PathNode pathNode) { edges(result, pathNode) } - - /** Gets the element that `pathNode` wraps, if any. */ - Element getElementFromPathNode(PathNode pathNode) { - exists(DataFlow::Node node | node = pathNode.(WrapPathNode).inner().getNode() | - result = node.asInstruction().getAst() - or - result = node.asOperand().getDef().getAst() - ) - or - result = pathNode.(EndpointPathNode).inner() - } - } - - private class WrapPathNode extends PathNode, TWrapPathNode { - DataFlow3::PathNode inner() { this = TWrapPathNode(result) } - - override string toString() { result = this.inner().toString() } - - override predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - this.inner().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) - } - } - - private class EndpointPathNode extends PathNode, TEndpointPathNode { - Expr inner() { this = TEndpointPathNode(result) } - - override string toString() { result = this.inner().toString() } - - override predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - this.inner() - .getLocation() - .hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) - } - } - - /** A PathNode whose `Element` is a source. It may also be a sink. */ - private class InitialPathNode extends EndpointPathNode { - InitialPathNode() { exists(TaintTrackingConfiguration cfg | cfg.isSource(this.inner())) } - } - - /** A PathNode whose `Element` is a sink. It may also be a source. */ - private class FinalPathNode extends EndpointPathNode { - FinalPathNode() { exists(TaintTrackingConfiguration cfg | cfg.isSink(this.inner())) } - } - - /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ - query predicate edges(PathNode a, PathNode b) { - DataFlow3::PathGraph::edges(a.(WrapPathNode).inner(), b.(WrapPathNode).inner()) - or - // To avoid showing trivial-looking steps, we _replace_ the last node instead - // of adding an edge out of it. - exists(WrapPathNode sinkNode | - DataFlow3::PathGraph::edges(a.(WrapPathNode).inner(), sinkNode.inner()) and - b.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode()) - ) - or - // Same for the first node - exists(WrapPathNode sourceNode | - DataFlow3::PathGraph::edges(sourceNode.inner(), b.(WrapPathNode).inner()) and - sourceNode.inner().getNode() = getNodeForExpr(a.(InitialPathNode).inner()) - ) - or - // Finally, handle the case where the path goes directly from a source to a - // sink, meaning that they both need to be translated. - exists(WrapPathNode sinkNode, WrapPathNode sourceNode | - DataFlow3::PathGraph::edges(sourceNode.inner(), sinkNode.inner()) and - sourceNode.inner().getNode() = getNodeForExpr(a.(InitialPathNode).inner()) and - b.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode()) - ) - } - - /** - * Holds if there is flow from `arg` to `out` across a call that can by summarized by the flow - * from `par` to `ret` within it, in the graph of data flow path explanations. - */ - query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) { - DataFlow3::PathGraph::subpaths(arg.(WrapPathNode).inner(), par.(WrapPathNode).inner(), - ret.(WrapPathNode).inner(), out.(WrapPathNode).inner()) - or - // To avoid showing trivial-looking steps, we _replace_ the last node instead - // of adding an edge out of it. - exists(WrapPathNode sinkNode | - DataFlow3::PathGraph::subpaths(arg.(WrapPathNode).inner(), par.(WrapPathNode).inner(), - ret.(WrapPathNode).inner(), sinkNode.inner()) and - out.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode()) - ) - or - // Same for the first node - exists(WrapPathNode sourceNode | - DataFlow3::PathGraph::subpaths(sourceNode.inner(), par.(WrapPathNode).inner(), - ret.(WrapPathNode).inner(), out.(WrapPathNode).inner()) and - sourceNode.inner().getNode() = getNodeForExpr(arg.(InitialPathNode).inner()) - ) - or - // Finally, handle the case where the path goes directly from a source to a - // sink, meaning that they both need to be translated. - exists(WrapPathNode sinkNode, WrapPathNode sourceNode | - DataFlow3::PathGraph::subpaths(sourceNode.inner(), par.(WrapPathNode).inner(), - ret.(WrapPathNode).inner(), sinkNode.inner()) and - sourceNode.inner().getNode() = getNodeForExpr(arg.(InitialPathNode).inner()) and - out.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode()) - ) - } - - /** Holds if `n` is a node in the graph of data flow path explanations. */ - query predicate nodes(PathNode n, string key, string val) { - key = "semmle.label" and val = n.toString() - } - - /** - * Holds if `tainted` may contain taint from `source`, where `sourceNode` and - * `sinkNode` are the corresponding `PathNode`s that can be used in a query - * to provide path explanations. Extend `TaintTrackingConfiguration` to use - * this predicate. - * - * A tainted expression is either directly user input, or is computed from - * user input in a way that users can probably control the exact output of - * the computation. - */ - predicate taintedWithPath(Expr source, Element tainted, PathNode sourceNode, PathNode sinkNode) { - exists(AdjustedConfiguration cfg, DataFlow3::Node flowSource, DataFlow3::Node flowSink | - source = sourceNode.(InitialPathNode).inner() and - flowSource = getNodeForExpr(source) and - cfg.hasFlow(flowSource, flowSink) and - tainted = adjustedSink(flowSink) and - tainted = sinkNode.(FinalPathNode).inner() - ) - } - - private predicate isGlobalVariablePathNode(WrapPathNode n) { - n.inner().getNode().asVariable() instanceof GlobalOrNamespaceVariable - } - - private predicate edgesWithoutGlobals(PathNode a, PathNode b) { - edges(a, b) and - not isGlobalVariablePathNode(a) and - not isGlobalVariablePathNode(b) - } - - /** - * Holds if `tainted` can be reached from a taint source without passing - * through a global variable. - */ - predicate taintedWithoutGlobals(Element tainted) { - exists(AdjustedConfiguration cfg, PathNode sourceNode, FinalPathNode sinkNode | - cfg.isSource(sourceNode.(WrapPathNode).inner().getNode()) and - edgesWithoutGlobals+(sourceNode, sinkNode) and - tainted = sinkNode.inner() - ) - } -} +deprecated module TaintedWithPath = DefaultTaintTrackingImpl::TaintedWithPath; diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DefaultTaintTrackingImpl.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DefaultTaintTrackingImpl.qll new file mode 100644 index 00000000000..d6a43ed2975 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DefaultTaintTrackingImpl.qll @@ -0,0 +1,644 @@ +/** + * INTERNAL: Do not use. + * + * An IR taint tracking library that uses an IR DataFlow configuration to track + * taint from user inputs as defined by `semmle.code.cpp.security.Security`. + */ + +import cpp +import semmle.code.cpp.security.Security +private import semmle.code.cpp.ir.dataflow.DataFlow +private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil +private import semmle.code.cpp.ir.dataflow.DataFlow3 +private import semmle.code.cpp.ir.IR +private import semmle.code.cpp.ir.dataflow.ResolveCall +private import semmle.code.cpp.controlflow.IRGuards +private import semmle.code.cpp.models.interfaces.Taint +private import semmle.code.cpp.models.interfaces.DataFlow +private import semmle.code.cpp.ir.dataflow.TaintTracking +private import semmle.code.cpp.ir.dataflow.TaintTracking2 +private import semmle.code.cpp.ir.dataflow.TaintTracking3 +private import semmle.code.cpp.ir.dataflow.internal.ModelUtil + +/** + * A predictable instruction is one where an external user can predict + * the value. For example, a literal in the source code is considered + * predictable. + */ +private predicate predictableInstruction(Instruction instr) { + instr instanceof ConstantInstruction + or + instr instanceof StringConstantInstruction + or + // This could be a conversion on a string literal + predictableInstruction(instr.(UnaryInstruction).getUnary()) +} + +/** + * Functions that we should only allow taint to flow through (to the return + * value) if all but the source argument are 'predictable'. This is done to + * emulate the old security library's implementation rather than due to any + * strong belief that this is the right approach. + * + * Note that the list itself is not very principled; it consists of all the + * functions listed in the old security library's [default] `isPureFunction` + * that have more than one argument, but are not in the old taint tracking + * library's `returnArgument` predicate. + */ +predicate predictableOnlyFlow(string name) { + name = + [ + "strcasestr", "strchnul", "strchr", "strchrnul", "strcmp", "strcspn", "strncmp", "strndup", + "strnlen", "strrchr", "strspn", "strstr", "strtod", "strtof", "strtol", "strtoll", "strtoq", + "strtoul" + ] +} + +private DataFlow::Node getNodeForSource(Expr source) { + isUserInput(source, _) and + result = getNodeForExpr(source) +} + +private DataFlow::Node getNodeForExpr(Expr node) { + result = DataFlow::exprNode(node) + or + // Some of the sources in `isUserInput` are intended to match the value of + // an expression, while others (those modeled below) are intended to match + // the taint that propagates out of an argument, like the `char *` argument + // to `gets`. It's impossible here to tell which is which, but the "access + // to argv" source is definitely not intended to match an output argument, + // and it causes false positives if we let it. + // + // This case goes together with the similar (but not identical) rule in + // `nodeIsBarrierIn`. + result = DataFlow::definitionByReferenceNodeFromArgument(node) and + not argv(node.(VariableAccess).getTarget()) +} + +private class DefaultTaintTrackingCfg extends TaintTracking::Configuration { + DefaultTaintTrackingCfg() { this = "DefaultTaintTrackingCfg" } + + override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) } + + override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) } + + override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) } + + override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) } +} + +private class ToGlobalVarTaintTrackingCfg extends TaintTracking::Configuration { + ToGlobalVarTaintTrackingCfg() { this = "GlobalVarTaintTrackingCfg" } + + override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) } + + override predicate isSink(DataFlow::Node sink) { + sink.asVariable() instanceof GlobalOrNamespaceVariable + } + + override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) { + writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable)) + or + readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable)) + } + + override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) } + + override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) } +} + +private class FromGlobalVarTaintTrackingCfg extends TaintTracking2::Configuration { + FromGlobalVarTaintTrackingCfg() { this = "FromGlobalVarTaintTrackingCfg" } + + override predicate isSource(DataFlow::Node source) { + // This set of sources should be reasonably small, which is good for + // performance since the set of sinks is very large. + exists(ToGlobalVarTaintTrackingCfg otherCfg | otherCfg.hasFlowTo(source)) + } + + override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) } + + override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) { + // Additional step for flow out of variables. There is no flow _into_ + // variables in this configuration, so this step only serves to take flow + // out of a variable that's a source. + readsVariable(n2.asInstruction(), n1.asVariable()) + } + + override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) } + + override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) } +} + +private predicate readsVariable(LoadInstruction load, Variable var) { + load.getSourceAddress().(VariableAddressInstruction).getAstVariable() = var +} + +private predicate writesVariable(StoreInstruction store, Variable var) { + store.getDestinationAddress().(VariableAddressInstruction).getAstVariable() = var +} + +/** + * A variable that has any kind of upper-bound check anywhere in the program. This is + * biased towards being inclusive because there are a lot of valid ways of doing an + * upper bounds checks if we don't consider where it occurs, for example: + * ``` + * if (x < 10) { sink(x); } + * + * if (10 > y) { sink(y); } + * + * if (z > 10) { z = 10; } + * sink(z); + * ``` + */ +// TODO: This coarse overapproximation, ported from the old taint tracking +// library, could be replaced with an actual semantic check that a particular +// variable _access_ is guarded by an upper-bound check. We probably don't want +// to do this right away since it could expose a lot of FPs that were +// previously suppressed by this predicate by coincidence. +private predicate hasUpperBoundsCheck(Variable var) { + exists(RelationalOperation oper, VariableAccess access | + oper.getAnOperand() = access and + access.getTarget() = var and + // Comparing to 0 is not an upper bound check + not oper.getAnOperand().getValue() = "0" + ) +} + +private predicate nodeIsBarrierEqualityCandidate( + DataFlow::Node node, Operand access, Variable checkedVar +) { + readsVariable(node.asInstruction(), checkedVar) and + any(IRGuardCondition guard).ensuresEq(access, _, _, node.asInstruction().getBlock(), true) +} + +cached +private module Cached { + cached + predicate nodeIsBarrier(DataFlow::Node node) { + exists(Variable checkedVar | + readsVariable(node.asInstruction(), checkedVar) and + hasUpperBoundsCheck(checkedVar) + ) + or + exists(Variable checkedVar, Operand access | + /* + * This node is guarded by a condition that forces the accessed variable + * to equal something else. For example: + * ``` + * x = taintsource() + * if (x == 10) { + * taintsink(x); // not considered tainted + * } + * ``` + */ + + nodeIsBarrierEqualityCandidate(node, access, checkedVar) and + readsVariable(access.getDef(), checkedVar) + ) + } + + cached + predicate nodeIsBarrierIn(DataFlow::Node node) { + // don't use dataflow into taint sources, as this leads to duplicate results. + exists(Expr source | isUserInput(source, _) | + node = DataFlow::exprNode(source) + or + // This case goes together with the similar (but not identical) rule in + // `getNodeForSource`. + node = DataFlow::definitionByReferenceNodeFromArgument(source) + ) + or + // don't use dataflow into binary instructions if both operands are unpredictable + exists(BinaryInstruction iTo | + iTo = node.asInstruction() and + not predictableInstruction(iTo.getLeft()) and + not predictableInstruction(iTo.getRight()) and + // propagate taint from either the pointer or the offset, regardless of predictability + not iTo instanceof PointerArithmeticInstruction + ) + or + // don't use dataflow through calls to pure functions if two or more operands + // are unpredictable + exists(Instruction iFrom1, Instruction iFrom2, CallInstruction iTo | + iTo = node.asInstruction() and + isPureFunction(iTo.getStaticCallTarget().getName()) and + iFrom1 = iTo.getAnArgument() and + iFrom2 = iTo.getAnArgument() and + not predictableInstruction(iFrom1) and + not predictableInstruction(iFrom2) and + iFrom1 != iFrom2 + ) + } + + cached + Element adjustedSink(DataFlow::Node sink) { + // TODO: is it more appropriate to use asConvertedExpr here and avoid + // `getConversion*`? Or will that cause us to miss some cases where there's + // flow to a conversion (like a `ReferenceDereferenceExpr`) and we want to + // pretend there was flow to the converted `Expr` for the sake of + // compatibility. + sink.asExpr().getConversion*() = result + or + // For compatibility, send flow from arguments to parameters, even for + // functions with no body. + exists(FunctionCall call, int i | + sink.asExpr() = call.getArgument(pragma[only_bind_into](i)) and + result = resolveCall(call).getParameter(pragma[only_bind_into](i)) + ) + or + // For compatibility, send flow into a `Variable` if there is flow to any + // Load or Store of that variable. + exists(CopyInstruction copy | + copy.getSourceValue() = sink.asInstruction() and + ( + readsVariable(copy, result) or + writesVariable(copy, result) + ) and + not hasUpperBoundsCheck(result) + ) + or + // For compatibility, send flow into a `NotExpr` even if it's part of a + // short-circuiting condition and thus might get skipped. + result.(NotExpr).getOperand() = sink.asExpr() + or + // Taint postfix and prefix crement operations when their operand is tainted. + result.(CrementOperation).getAnOperand() = sink.asExpr() + or + // Taint `e1 += e2`, `e &= e2` and friends when `e1` or `e2` is tainted. + result.(AssignOperation).getAnOperand() = sink.asExpr() + or + result = + sink.asOperand() + .(SideEffectOperand) + .getUse() + .(ReadSideEffectInstruction) + .getArgumentDef() + .getUnconvertedResultExpression() + } + + /** + * Step to return value of a modeled function when an input taints the + * dereference of the return value. + */ + cached + predicate additionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) { + exists(CallInstruction call, Function func, FunctionInput modelIn, FunctionOutput modelOut | + n1.asOperand() = callInput(call, modelIn) and + ( + func.(TaintFunction).hasTaintFlow(modelIn, modelOut) + or + func.(DataFlowFunction).hasDataFlow(modelIn, modelOut) + ) and + call.getStaticCallTarget() = func and + modelOut.isReturnValueDeref() and + call = n2.asInstruction() + ) + } +} + +private import Cached + +/** + * Holds if `tainted` may contain taint from `source`. + * + * A tainted expression is either directly user input, or is + * computed from user input in a way that users can probably + * control the exact output of the computation. + * + * This doesn't include data flow through global variables. + * If you need that you must call `taintedIncludingGlobalVars`. + */ +cached +predicate tainted(Expr source, Element tainted) { + exists(DefaultTaintTrackingCfg cfg, DataFlow::Node sink | + cfg.hasFlow(getNodeForSource(source), sink) and + tainted = adjustedSink(sink) + ) +} + +/** + * Holds if `tainted` may contain taint from `source`, where the taint passed + * through a global variable named `globalVar`. + * + * A tainted expression is either directly user input, or is + * computed from user input in a way that users can probably + * control the exact output of the computation. + * + * This version gives the same results as tainted but also includes + * data flow through global variables. + * + * The parameter `globalVar` is the qualified name of the last global variable + * used to move the value from source to tainted. If the taint did not pass + * through a global variable, then `globalVar = ""`. + */ +cached +predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) { + tainted(source, tainted) and + globalVar = "" + or + exists( + ToGlobalVarTaintTrackingCfg toCfg, FromGlobalVarTaintTrackingCfg fromCfg, + DataFlow::VariableNode variableNode, GlobalOrNamespaceVariable global, DataFlow::Node sink + | + global = variableNode.getVariable() and + toCfg.hasFlow(getNodeForSource(source), variableNode) and + fromCfg.hasFlow(variableNode, sink) and + tainted = adjustedSink(sink) and + global = globalVarFromId(globalVar) + ) +} + +/** + * Gets the global variable whose qualified name is `id`. Use this predicate + * together with `taintedIncludingGlobalVars`. Example: + * + * ``` + * exists(string varName | + * taintedIncludingGlobalVars(source, tainted, varName) and + * var = globalVarFromId(varName) + * ) + * ``` + */ +GlobalOrNamespaceVariable globalVarFromId(string id) { id = result.getQualifiedName() } + +/** + * Provides definitions for augmenting source/sink pairs with data-flow paths + * between them. From a `@kind path-problem` query, import this module in the + * global scope, extend `TaintTrackingConfiguration`, and use `taintedWithPath` + * in place of `tainted`. + * + * Importing this module will also import the query predicates that contain the + * taint paths. + */ +module TaintedWithPath { + private newtype TSingleton = MkSingleton() + + /** + * A taint-tracking configuration that matches sources and sinks in the same + * way as the `tainted` predicate. + * + * Override `isSink` and `taintThroughGlobals` as needed, but do not provide + * a characteristic predicate. + */ + class TaintTrackingConfiguration extends TSingleton { + /** Override this to specify which elements are sources in this configuration. */ + predicate isSource(Expr source) { exists(getNodeForSource(source)) } + + /** Override this to specify which elements are sinks in this configuration. */ + abstract predicate isSink(Element e); + + /** Override this to specify which expressions are barriers in this configuration. */ + predicate isBarrier(Expr e) { nodeIsBarrier(getNodeForExpr(e)) } + + /** + * Override this predicate to `any()` to allow taint to flow through global + * variables. + */ + predicate taintThroughGlobals() { none() } + + /** Gets a textual representation of this element. */ + string toString() { result = "TaintTrackingConfiguration" } + } + + private class AdjustedConfiguration extends TaintTracking3::Configuration { + AdjustedConfiguration() { this = "AdjustedConfiguration" } + + override predicate isSource(DataFlow::Node source) { + exists(TaintTrackingConfiguration cfg, Expr e | + cfg.isSource(e) and source = getNodeForExpr(e) + ) + } + + override predicate isSink(DataFlow::Node sink) { + exists(TaintTrackingConfiguration cfg | cfg.isSink(adjustedSink(sink))) + } + + override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) { + // Steps into and out of global variables + exists(TaintTrackingConfiguration cfg | cfg.taintThroughGlobals() | + writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable)) + or + readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable)) + ) + or + additionalTaintStep(n1, n2) + } + + override predicate isSanitizer(DataFlow::Node node) { + exists(TaintTrackingConfiguration cfg, Expr e | cfg.isBarrier(e) and node = getNodeForExpr(e)) + } + + override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) } + } + + /* + * A sink `Element` may map to multiple `DataFlowX::PathNode`s via (the + * inverse of) `adjustedSink`. For example, an `Expr` maps to all its + * conversions, and a `Variable` maps to all loads and stores from it. Because + * the path node is part of the tuple that constitutes the alert, this leads + * to duplicate alerts. + * + * To avoid showing duplicates, we edit the graph to replace the final node + * coming from the data-flow library with a node that matches exactly the + * `Element` sink that's requested. + * + * The same is done for sources. + */ + + private newtype TPathNode = + TWrapPathNode(DataFlow3::PathNode n) or + // There's a single newtype constructor for both sources and sinks since + // that makes it easiest to deal with the case where source = sink. + TEndpointPathNode(Element e) { + exists(AdjustedConfiguration cfg, DataFlow3::Node sourceNode, DataFlow3::Node sinkNode | + cfg.hasFlow(sourceNode, sinkNode) + | + sourceNode = getNodeForExpr(e) and + exists(TaintTrackingConfiguration ttCfg | ttCfg.isSource(e)) + or + e = adjustedSink(sinkNode) and + exists(TaintTrackingConfiguration ttCfg | ttCfg.isSink(e)) + ) + } + + /** An opaque type used for the nodes of a data-flow path. */ + class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + } + + /** + * INTERNAL: Do not use. + */ + module Private { + /** Gets a predecessor `PathNode` of `pathNode`, if any. */ + PathNode getAPredecessor(PathNode pathNode) { edges(result, pathNode) } + + /** Gets the element that `pathNode` wraps, if any. */ + Element getElementFromPathNode(PathNode pathNode) { + exists(DataFlow::Node node | node = pathNode.(WrapPathNode).inner().getNode() | + result = node.asInstruction().getAst() + or + result = node.asOperand().getDef().getAst() + ) + or + result = pathNode.(EndpointPathNode).inner() + } + } + + private class WrapPathNode extends PathNode, TWrapPathNode { + DataFlow3::PathNode inner() { this = TWrapPathNode(result) } + + override string toString() { result = this.inner().toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.inner().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + private class EndpointPathNode extends PathNode, TEndpointPathNode { + Expr inner() { this = TEndpointPathNode(result) } + + override string toString() { result = this.inner().toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.inner() + .getLocation() + .hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** A PathNode whose `Element` is a source. It may also be a sink. */ + private class InitialPathNode extends EndpointPathNode { + InitialPathNode() { exists(TaintTrackingConfiguration cfg | cfg.isSource(this.inner())) } + } + + /** A PathNode whose `Element` is a sink. It may also be a source. */ + private class FinalPathNode extends EndpointPathNode { + FinalPathNode() { exists(TaintTrackingConfiguration cfg | cfg.isSink(this.inner())) } + } + + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { + DataFlow3::PathGraph::edges(a.(WrapPathNode).inner(), b.(WrapPathNode).inner()) + or + // To avoid showing trivial-looking steps, we _replace_ the last node instead + // of adding an edge out of it. + exists(WrapPathNode sinkNode | + DataFlow3::PathGraph::edges(a.(WrapPathNode).inner(), sinkNode.inner()) and + b.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode()) + ) + or + // Same for the first node + exists(WrapPathNode sourceNode | + DataFlow3::PathGraph::edges(sourceNode.inner(), b.(WrapPathNode).inner()) and + sourceNode.inner().getNode() = getNodeForExpr(a.(InitialPathNode).inner()) + ) + or + // Finally, handle the case where the path goes directly from a source to a + // sink, meaning that they both need to be translated. + exists(WrapPathNode sinkNode, WrapPathNode sourceNode | + DataFlow3::PathGraph::edges(sourceNode.inner(), sinkNode.inner()) and + sourceNode.inner().getNode() = getNodeForExpr(a.(InitialPathNode).inner()) and + b.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode()) + ) + } + + /** + * Holds if there is flow from `arg` to `out` across a call that can by summarized by the flow + * from `par` to `ret` within it, in the graph of data flow path explanations. + */ + query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) { + DataFlow3::PathGraph::subpaths(arg.(WrapPathNode).inner(), par.(WrapPathNode).inner(), + ret.(WrapPathNode).inner(), out.(WrapPathNode).inner()) + or + // To avoid showing trivial-looking steps, we _replace_ the last node instead + // of adding an edge out of it. + exists(WrapPathNode sinkNode | + DataFlow3::PathGraph::subpaths(arg.(WrapPathNode).inner(), par.(WrapPathNode).inner(), + ret.(WrapPathNode).inner(), sinkNode.inner()) and + out.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode()) + ) + or + // Same for the first node + exists(WrapPathNode sourceNode | + DataFlow3::PathGraph::subpaths(sourceNode.inner(), par.(WrapPathNode).inner(), + ret.(WrapPathNode).inner(), out.(WrapPathNode).inner()) and + sourceNode.inner().getNode() = getNodeForExpr(arg.(InitialPathNode).inner()) + ) + or + // Finally, handle the case where the path goes directly from a source to a + // sink, meaning that they both need to be translated. + exists(WrapPathNode sinkNode, WrapPathNode sourceNode | + DataFlow3::PathGraph::subpaths(sourceNode.inner(), par.(WrapPathNode).inner(), + ret.(WrapPathNode).inner(), sinkNode.inner()) and + sourceNode.inner().getNode() = getNodeForExpr(arg.(InitialPathNode).inner()) and + out.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode()) + ) + } + + /** Holds if `n` is a node in the graph of data flow path explanations. */ + query predicate nodes(PathNode n, string key, string val) { + key = "semmle.label" and val = n.toString() + } + + /** + * Holds if `tainted` may contain taint from `source`, where `sourceNode` and + * `sinkNode` are the corresponding `PathNode`s that can be used in a query + * to provide path explanations. Extend `TaintTrackingConfiguration` to use + * this predicate. + * + * A tainted expression is either directly user input, or is computed from + * user input in a way that users can probably control the exact output of + * the computation. + */ + predicate taintedWithPath(Expr source, Element tainted, PathNode sourceNode, PathNode sinkNode) { + exists(AdjustedConfiguration cfg, DataFlow3::Node flowSource, DataFlow3::Node flowSink | + source = sourceNode.(InitialPathNode).inner() and + flowSource = getNodeForExpr(source) and + cfg.hasFlow(flowSource, flowSink) and + tainted = adjustedSink(flowSink) and + tainted = sinkNode.(FinalPathNode).inner() + ) + } + + private predicate isGlobalVariablePathNode(WrapPathNode n) { + n.inner().getNode().asVariable() instanceof GlobalOrNamespaceVariable + } + + private predicate edgesWithoutGlobals(PathNode a, PathNode b) { + edges(a, b) and + not isGlobalVariablePathNode(a) and + not isGlobalVariablePathNode(b) + } + + /** + * Holds if `tainted` can be reached from a taint source without passing + * through a global variable. + */ + predicate taintedWithoutGlobals(Element tainted) { + exists(AdjustedConfiguration cfg, PathNode sourceNode, FinalPathNode sinkNode | + cfg.isSource(sourceNode.(WrapPathNode).inner().getNode()) and + edgesWithoutGlobals+(sourceNode, sinkNode) and + tainted = sinkNode.inner() + ) + } +} diff --git a/cpp/ql/lib/semmle/code/cpp/security/TaintTrackingImpl.qll b/cpp/ql/lib/semmle/code/cpp/security/TaintTrackingImpl.qll index 532cf53e2d4..4d64edc2670 100644 --- a/cpp/ql/lib/semmle/code/cpp/security/TaintTrackingImpl.qll +++ b/cpp/ql/lib/semmle/code/cpp/security/TaintTrackingImpl.qll @@ -121,7 +121,9 @@ private predicate moveToDependingOnSide(Expr src, Expr dest) { * (this is done to avoid false positives). Because of this we need to track if the tainted element came from an argument * or not, and for that we use destFromArg */ -private predicate betweenFunctionsValueMoveTo(Element src, Element dest, boolean destFromArg) { +deprecated private predicate betweenFunctionsValueMoveTo( + Element src, Element dest, boolean destFromArg +) { not unreachable(src) and not unreachable(dest) and ( @@ -162,13 +164,13 @@ private predicate betweenFunctionsValueMoveTo(Element src, Element dest, boolean // predicate folding for proper join-order // bad magic: pushes down predicate that ruins join-order pragma[nomagic] -private predicate resolveCallWithParam(Call call, Function called, int i, Parameter p) { +deprecated private predicate resolveCallWithParam(Call call, Function called, int i, Parameter p) { called = resolveCall(call) and p = called.getParameter(i) } /** A variable for which flow through is allowed. */ -library class FlowVariable extends Variable { +deprecated library class FlowVariable extends Variable { FlowVariable() { ( this instanceof LocalScopeVariable or @@ -179,11 +181,11 @@ library class FlowVariable extends Variable { } /** A local scope variable for which flow through is allowed. */ -library class FlowLocalScopeVariable extends Variable { +deprecated library class FlowLocalScopeVariable extends Variable { FlowLocalScopeVariable() { this instanceof LocalScopeVariable } } -private predicate insideFunctionValueMoveTo(Element src, Element dest) { +deprecated private predicate insideFunctionValueMoveTo(Element src, Element dest) { not unreachable(src) and not unreachable(dest) and ( @@ -324,7 +326,7 @@ private predicate unionAccess(Variable v, Field f, FieldAccess a) { a.getQualifier() = v.getAnAccess() } -GlobalOrNamespaceVariable globalVarFromId(string id) { +deprecated GlobalOrNamespaceVariable globalVarFromId(string id) { if result instanceof NamespaceVariable then id = result.getNamespace() + "::" + result.getName() else id = result.getName() @@ -353,7 +355,7 @@ private predicate hasUpperBoundsCheck(Variable var) { } cached -private predicate taintedWithArgsAndGlobalVars( +deprecated private predicate taintedWithArgsAndGlobalVars( Element src, Element dest, boolean destFromArg, string globalVar ) { isUserInput(src, _) and @@ -395,7 +397,7 @@ private predicate taintedWithArgsAndGlobalVars( * This doesn't include data flow through global variables. * If you need that you must call taintedIncludingGlobalVars. */ -predicate tainted(Expr source, Element tainted) { +deprecated predicate tainted(Expr source, Element tainted) { taintedWithArgsAndGlobalVars(source, tainted, _, "") } @@ -410,7 +412,7 @@ predicate tainted(Expr source, Element tainted) { * The parameter `globalVar` is the name of the last global variable used to move the * value from source to tainted. */ -predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) { +deprecated predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) { taintedWithArgsAndGlobalVars(source, tainted, _, globalVar) } @@ -541,14 +543,14 @@ private predicate returnArgument(Function f, int sourceArg) { * targets a virtual method, simple data flow analysis is performed * in order to identify target(s). */ -Function resolveCall(Call call) { +deprecated Function resolveCall(Call call) { result = call.getTarget() or result = call.(DataSensitiveCallExpr).resolve() } /** A data sensitive call expression. */ -abstract library class DataSensitiveCallExpr extends Expr { +abstract deprecated library class DataSensitiveCallExpr extends Expr { DataSensitiveCallExpr() { not unreachable(this) } abstract Expr getSrc(); @@ -579,7 +581,7 @@ abstract library class DataSensitiveCallExpr extends Expr { } /** Call through a function pointer. */ -library class DataSensitiveExprCall extends DataSensitiveCallExpr, ExprCall { +deprecated library class DataSensitiveExprCall extends DataSensitiveCallExpr, ExprCall { override Expr getSrc() { result = getExpr() } override Function resolve() { @@ -588,7 +590,8 @@ library class DataSensitiveExprCall extends DataSensitiveCallExpr, ExprCall { } /** Call to a virtual function. */ -library class DataSensitiveOverriddenFunctionCall extends DataSensitiveCallExpr, FunctionCall { +deprecated library class DataSensitiveOverriddenFunctionCall extends DataSensitiveCallExpr, + FunctionCall { DataSensitiveOverriddenFunctionCall() { exists(getTarget().(VirtualFunction).getAnOverridingFunction()) } diff --git a/cpp/ql/src/Security/CWE/CWE-079/CgiXss.ql b/cpp/ql/src/Security/CWE/CWE-079/CgiXss.ql index bb38609927e..ffadb381a76 100644 --- a/cpp/ql/src/Security/CWE/CWE-079/CgiXss.ql +++ b/cpp/ql/src/Security/CWE/CWE-079/CgiXss.ql @@ -13,7 +13,7 @@ import cpp import semmle.code.cpp.commons.Environment -import semmle.code.cpp.security.TaintTracking +import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl import TaintedWithPath /** A call that prints its arguments to `stdout`. */ diff --git a/cpp/ql/src/Security/CWE/CWE-089/SqlTainted.ql b/cpp/ql/src/Security/CWE/CWE-089/SqlTainted.ql index b46314d8454..f3ff16e8f7b 100644 --- a/cpp/ql/src/Security/CWE/CWE-089/SqlTainted.ql +++ b/cpp/ql/src/Security/CWE/CWE-089/SqlTainted.ql @@ -15,7 +15,7 @@ import cpp import semmle.code.cpp.security.Security import semmle.code.cpp.security.FunctionWithWrappers -import semmle.code.cpp.security.TaintTracking +import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl import TaintedWithPath class SqlLikeFunction extends FunctionWithWrappers { diff --git a/cpp/ql/src/Security/CWE/CWE-114/UncontrolledProcessOperation.ql b/cpp/ql/src/Security/CWE/CWE-114/UncontrolledProcessOperation.ql index 7cbb0d81d49..883a6b07423 100644 --- a/cpp/ql/src/Security/CWE/CWE-114/UncontrolledProcessOperation.ql +++ b/cpp/ql/src/Security/CWE/CWE-114/UncontrolledProcessOperation.ql @@ -14,7 +14,7 @@ import cpp import semmle.code.cpp.security.Security -import semmle.code.cpp.security.TaintTracking +import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl import TaintedWithPath predicate isProcessOperationExplanation(Expr arg, string processOperation) { diff --git a/cpp/ql/src/Security/CWE/CWE-120/UnboundedWrite.ql b/cpp/ql/src/Security/CWE/CWE-120/UnboundedWrite.ql index a2d74e7557a..8f8d98900f2 100644 --- a/cpp/ql/src/Security/CWE/CWE-120/UnboundedWrite.ql +++ b/cpp/ql/src/Security/CWE/CWE-120/UnboundedWrite.ql @@ -16,7 +16,7 @@ import semmle.code.cpp.security.BufferWrite import semmle.code.cpp.security.Security -import semmle.code.cpp.security.TaintTracking +import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl import TaintedWithPath /* diff --git a/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatString.ql b/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatString.ql index 11cbfe16a0d..3a5cb7931b2 100644 --- a/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatString.ql +++ b/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatString.ql @@ -16,7 +16,7 @@ import cpp import semmle.code.cpp.security.Security import semmle.code.cpp.security.FunctionWithWrappers -import semmle.code.cpp.security.TaintTracking +import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl import TaintedWithPath class Configuration extends TaintTrackingConfiguration { diff --git a/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatStringThroughGlobalVar.ql b/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatStringThroughGlobalVar.ql index a4fd672f518..b37e34c296c 100644 --- a/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatStringThroughGlobalVar.ql +++ b/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatStringThroughGlobalVar.ql @@ -16,7 +16,7 @@ import cpp import semmle.code.cpp.security.FunctionWithWrappers import semmle.code.cpp.security.Security -import semmle.code.cpp.security.TaintTracking +import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl import TaintedWithPath class Configuration extends TaintTrackingConfiguration { diff --git a/cpp/ql/src/Security/CWE/CWE-170/ImproperNullTerminationTainted.ql b/cpp/ql/src/Security/CWE/CWE-170/ImproperNullTerminationTainted.ql index 762fcf573d1..ff2e7e924df 100644 --- a/cpp/ql/src/Security/CWE/CWE-170/ImproperNullTerminationTainted.ql +++ b/cpp/ql/src/Security/CWE/CWE-170/ImproperNullTerminationTainted.ql @@ -12,7 +12,7 @@ import cpp import semmle.code.cpp.commons.NullTermination -import semmle.code.cpp.security.TaintTracking +import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl /** A user-controlled expression that may not be null terminated. */ class TaintSource extends VariableAccess { diff --git a/cpp/ql/src/Security/CWE/CWE-190/ArithmeticTainted.ql b/cpp/ql/src/Security/CWE/CWE-190/ArithmeticTainted.ql index 350005194ac..07fbdc1ff44 100644 --- a/cpp/ql/src/Security/CWE/CWE-190/ArithmeticTainted.ql +++ b/cpp/ql/src/Security/CWE/CWE-190/ArithmeticTainted.ql @@ -15,7 +15,7 @@ import cpp import semmle.code.cpp.security.Overflow import semmle.code.cpp.security.Security -import semmle.code.cpp.security.TaintTracking +import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl import TaintedWithPath import Bounded diff --git a/cpp/ql/src/Security/CWE/CWE-190/ArithmeticWithExtremeValues.ql b/cpp/ql/src/Security/CWE/CWE-190/ArithmeticWithExtremeValues.ql index e54951b33e7..6034b4f5ffd 100644 --- a/cpp/ql/src/Security/CWE/CWE-190/ArithmeticWithExtremeValues.ql +++ b/cpp/ql/src/Security/CWE/CWE-190/ArithmeticWithExtremeValues.ql @@ -17,7 +17,7 @@ import cpp import semmle.code.cpp.security.Overflow import semmle.code.cpp.security.Security -import semmle.code.cpp.security.TaintTracking +import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl predicate isMaxValue(Expr mie) { exists(MacroInvocation mi | diff --git a/cpp/ql/src/Security/CWE/CWE-190/IntegerOverflowTainted.ql b/cpp/ql/src/Security/CWE/CWE-190/IntegerOverflowTainted.ql index 7cbd53ebf8a..19fe7df4c44 100644 --- a/cpp/ql/src/Security/CWE/CWE-190/IntegerOverflowTainted.ql +++ b/cpp/ql/src/Security/CWE/CWE-190/IntegerOverflowTainted.ql @@ -15,7 +15,7 @@ import cpp import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis -import semmle.code.cpp.security.TaintTracking +import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl /** Holds if `expr` might overflow. */ predicate outOfBoundsExpr(Expr expr, string kind) { diff --git a/cpp/ql/src/Security/CWE/CWE-290/AuthenticationBypass.ql b/cpp/ql/src/Security/CWE/CWE-290/AuthenticationBypass.ql index 814c6aff21b..06c4ee5efe5 100644 --- a/cpp/ql/src/Security/CWE/CWE-290/AuthenticationBypass.ql +++ b/cpp/ql/src/Security/CWE/CWE-290/AuthenticationBypass.ql @@ -12,7 +12,7 @@ * external/cwe/cwe-290 */ -import semmle.code.cpp.security.TaintTracking +import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl import TaintedWithPath predicate hardCodedAddressOrIP(StringLiteral txt) { diff --git a/cpp/ql/src/Security/CWE/CWE-807/TaintedCondition.ql b/cpp/ql/src/Security/CWE/CWE-807/TaintedCondition.ql index 14cd2a2188a..c5d7f2cbb61 100644 --- a/cpp/ql/src/Security/CWE/CWE-807/TaintedCondition.ql +++ b/cpp/ql/src/Security/CWE/CWE-807/TaintedCondition.ql @@ -12,7 +12,7 @@ * external/cwe/cwe-807 */ -import semmle.code.cpp.security.TaintTracking +import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl import TaintedWithPath predicate sensitiveCondition(Expr condition, Expr raise) { diff --git a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/annotate_path_to_sink/tainted.expected b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/annotate_path_to_sink/tainted.expected index e69de29bb2d..6f00f28e455 100644 --- a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/annotate_path_to_sink/tainted.expected +++ b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/annotate_path_to_sink/tainted.expected @@ -0,0 +1,2 @@ +WARNING: Module TaintedWithPath has been deprecated and may be removed in future (tainted.ql:9,8-47) +WARNING: Predicate tainted has been deprecated and may be removed in future (tainted.ql:20,49-74) diff --git a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/annotate_sinks_only/tainted.expected b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/annotate_sinks_only/tainted.expected index e69de29bb2d..5c235d0802d 100644 --- a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/annotate_sinks_only/tainted.expected +++ b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/annotate_sinks_only/tainted.expected @@ -0,0 +1,2 @@ +WARNING: Module TaintedWithPath has been deprecated and may be removed in future (tainted.ql:10,8-47) +WARNING: Predicate tainted has been deprecated and may be removed in future (tainted.ql:21,3-28) diff --git a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/globals/global.expected b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/globals/global.expected index e69de29bb2d..b1d79c1079e 100644 --- a/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/globals/global.expected +++ b/cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/globals/global.expected @@ -0,0 +1,2 @@ +WARNING: Predicate taintedIncludingGlobalVars has been deprecated and may be removed in future (global.ql:8,3-47) +WARNING: Predicate taintedIncludingGlobalVars has been deprecated and may be removed in future (global.ql:12,3-53) diff --git a/cpp/ql/test/library-tests/dataflow/security-taint/tainted.expected b/cpp/ql/test/library-tests/dataflow/security-taint/tainted.expected index cdea135e301..b1e0d3635a3 100644 --- a/cpp/ql/test/library-tests/dataflow/security-taint/tainted.expected +++ b/cpp/ql/test/library-tests/dataflow/security-taint/tainted.expected @@ -1,3 +1,4 @@ +WARNING: Predicate taintedIncludingGlobalVars has been deprecated and may be removed in future (tainted.ql:5,3-29) | test.cpp:23:23:23:28 | call to getenv | test.cpp:8:24:8:25 | s1 | | | test.cpp:23:23:23:28 | call to getenv | test.cpp:23:14:23:19 | envStr | | | test.cpp:23:23:23:28 | call to getenv | test.cpp:23:23:23:28 | call to getenv | | diff --git a/cpp/ql/test/library-tests/dataflow/security-taint/tainted_diff.expected b/cpp/ql/test/library-tests/dataflow/security-taint/tainted_diff.expected index ac95d5659a3..5d732c45866 100644 --- a/cpp/ql/test/library-tests/dataflow/security-taint/tainted_diff.expected +++ b/cpp/ql/test/library-tests/dataflow/security-taint/tainted_diff.expected @@ -1,3 +1,8 @@ +WARNING: Module TaintedWithPath has been deprecated and may be removed in future (tainted_diff.ql:5,35-54) +WARNING: Module TaintedWithPath has been deprecated and may be removed in future (tainted_diff.ql:12,7-26) +WARNING: Module TaintedWithPath has been deprecated and may be removed in future (tainted_diff.ql:16,3-22) +WARNING: Predicate taintedIncludingGlobalVars has been deprecated and may be removed in future (tainted_diff.ql:11,3-34) +WARNING: Predicate taintedIncludingGlobalVars has been deprecated and may be removed in future (tainted_diff.ql:17,7-38) | test.cpp:23:23:23:28 | call to getenv | test.cpp:8:24:8:25 | s1 | AST only | | test.cpp:23:23:23:28 | call to getenv | test.cpp:23:14:23:19 | envStr | AST only | | test.cpp:38:23:38:28 | call to getenv | test.cpp:8:24:8:25 | s1 | AST only | diff --git a/cpp/ql/test/library-tests/dataflow/security-taint/tainted_ir.expected b/cpp/ql/test/library-tests/dataflow/security-taint/tainted_ir.expected index 01d8f5092ed..4b439122fa3 100644 --- a/cpp/ql/test/library-tests/dataflow/security-taint/tainted_ir.expected +++ b/cpp/ql/test/library-tests/dataflow/security-taint/tainted_ir.expected @@ -1,3 +1,5 @@ +WARNING: Module TaintedWithPath has been deprecated and may be removed in future (tainted_ir.ql:3,35-50) +WARNING: Module TaintedWithPath has been deprecated and may be removed in future (tainted_ir.ql:9,3-18) | test.cpp:23:23:23:28 | call to getenv | test.cpp:23:23:23:28 | call to getenv | | test.cpp:23:23:23:28 | call to getenv | test.cpp:23:23:23:40 | (const char *)... | | test.cpp:23:23:23:28 | call to getenv | test.cpp:25:6:25:29 | ! ... |