mirror of
https://github.com/github/codeql.git
synced 2025-12-20 18:56:32 +01:00
C++: Deprecate DefaultTaintTracking and TaintTrackingImpl
This commit is contained in:
@@ -1,642 +1,21 @@
|
||||
/**
|
||||
* DEPRECATED: Use `semmle.code.cpp.ir.dataflow.TaintTracking` as a replacement.
|
||||
*
|
||||
* An IR taint tracking library that uses an IR DataFlow configuration to track
|
||||
* taint from user inputs as defined by `semmle.code.cpp.security.Security`.
|
||||
*/
|
||||
|
||||
import cpp
|
||||
import semmle.code.cpp.security.Security
|
||||
private import semmle.code.cpp.ir.dataflow.DataFlow
|
||||
private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
|
||||
private import semmle.code.cpp.ir.dataflow.DataFlow3
|
||||
private import semmle.code.cpp.ir.IR
|
||||
private import semmle.code.cpp.ir.dataflow.ResolveCall
|
||||
private import semmle.code.cpp.controlflow.IRGuards
|
||||
private import semmle.code.cpp.models.interfaces.Taint
|
||||
private import semmle.code.cpp.models.interfaces.DataFlow
|
||||
private import semmle.code.cpp.ir.dataflow.TaintTracking
|
||||
private import semmle.code.cpp.ir.dataflow.TaintTracking2
|
||||
private import semmle.code.cpp.ir.dataflow.TaintTracking3
|
||||
private import semmle.code.cpp.ir.dataflow.internal.ModelUtil
|
||||
private import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl as DefaultTaintTrackingImpl
|
||||
|
||||
/**
|
||||
* A predictable instruction is one where an external user can predict
|
||||
* the value. For example, a literal in the source code is considered
|
||||
* predictable.
|
||||
*/
|
||||
private predicate predictableInstruction(Instruction instr) {
|
||||
instr instanceof ConstantInstruction
|
||||
or
|
||||
instr instanceof StringConstantInstruction
|
||||
or
|
||||
// This could be a conversion on a string literal
|
||||
predictableInstruction(instr.(UnaryInstruction).getUnary())
|
||||
}
|
||||
deprecated predicate predictableOnlyFlow = DefaultTaintTrackingImpl::predictableOnlyFlow/1;
|
||||
|
||||
/**
|
||||
* Functions that we should only allow taint to flow through (to the return
|
||||
* value) if all but the source argument are 'predictable'. This is done to
|
||||
* emulate the old security library's implementation rather than due to any
|
||||
* strong belief that this is the right approach.
|
||||
*
|
||||
* Note that the list itself is not very principled; it consists of all the
|
||||
* functions listed in the old security library's [default] `isPureFunction`
|
||||
* that have more than one argument, but are not in the old taint tracking
|
||||
* library's `returnArgument` predicate.
|
||||
*/
|
||||
predicate predictableOnlyFlow(string name) {
|
||||
name =
|
||||
[
|
||||
"strcasestr", "strchnul", "strchr", "strchrnul", "strcmp", "strcspn", "strncmp", "strndup",
|
||||
"strnlen", "strrchr", "strspn", "strstr", "strtod", "strtof", "strtol", "strtoll", "strtoq",
|
||||
"strtoul"
|
||||
]
|
||||
}
|
||||
deprecated predicate tainted = DefaultTaintTrackingImpl::tainted/2;
|
||||
|
||||
private DataFlow::Node getNodeForSource(Expr source) {
|
||||
isUserInput(source, _) and
|
||||
result = getNodeForExpr(source)
|
||||
}
|
||||
deprecated predicate taintedIncludingGlobalVars =
|
||||
DefaultTaintTrackingImpl::taintedIncludingGlobalVars/3;
|
||||
|
||||
private DataFlow::Node getNodeForExpr(Expr node) {
|
||||
result = DataFlow::exprNode(node)
|
||||
or
|
||||
// Some of the sources in `isUserInput` are intended to match the value of
|
||||
// an expression, while others (those modeled below) are intended to match
|
||||
// the taint that propagates out of an argument, like the `char *` argument
|
||||
// to `gets`. It's impossible here to tell which is which, but the "access
|
||||
// to argv" source is definitely not intended to match an output argument,
|
||||
// and it causes false positives if we let it.
|
||||
//
|
||||
// This case goes together with the similar (but not identical) rule in
|
||||
// `nodeIsBarrierIn`.
|
||||
result = DataFlow::definitionByReferenceNodeFromArgument(node) and
|
||||
not argv(node.(VariableAccess).getTarget())
|
||||
}
|
||||
deprecated predicate globalVarFromId = DefaultTaintTrackingImpl::globalVarFromId/1;
|
||||
|
||||
private class DefaultTaintTrackingCfg extends TaintTracking::Configuration {
|
||||
DefaultTaintTrackingCfg() { this = "DefaultTaintTrackingCfg" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) }
|
||||
|
||||
override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
|
||||
}
|
||||
|
||||
private class ToGlobalVarTaintTrackingCfg extends TaintTracking::Configuration {
|
||||
ToGlobalVarTaintTrackingCfg() { this = "GlobalVarTaintTrackingCfg" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
sink.asVariable() instanceof GlobalOrNamespaceVariable
|
||||
}
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable))
|
||||
or
|
||||
readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) }
|
||||
|
||||
override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
|
||||
}
|
||||
|
||||
private class FromGlobalVarTaintTrackingCfg extends TaintTracking2::Configuration {
|
||||
FromGlobalVarTaintTrackingCfg() { this = "FromGlobalVarTaintTrackingCfg" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
// This set of sources should be reasonably small, which is good for
|
||||
// performance since the set of sinks is very large.
|
||||
exists(ToGlobalVarTaintTrackingCfg otherCfg | otherCfg.hasFlowTo(source))
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
// Additional step for flow out of variables. There is no flow _into_
|
||||
// variables in this configuration, so this step only serves to take flow
|
||||
// out of a variable that's a source.
|
||||
readsVariable(n2.asInstruction(), n1.asVariable())
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) }
|
||||
|
||||
override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
|
||||
}
|
||||
|
||||
private predicate readsVariable(LoadInstruction load, Variable var) {
|
||||
load.getSourceAddress().(VariableAddressInstruction).getAstVariable() = var
|
||||
}
|
||||
|
||||
private predicate writesVariable(StoreInstruction store, Variable var) {
|
||||
store.getDestinationAddress().(VariableAddressInstruction).getAstVariable() = var
|
||||
}
|
||||
|
||||
/**
|
||||
* A variable that has any kind of upper-bound check anywhere in the program. This is
|
||||
* biased towards being inclusive because there are a lot of valid ways of doing an
|
||||
* upper bounds checks if we don't consider where it occurs, for example:
|
||||
* ```
|
||||
* if (x < 10) { sink(x); }
|
||||
*
|
||||
* if (10 > y) { sink(y); }
|
||||
*
|
||||
* if (z > 10) { z = 10; }
|
||||
* sink(z);
|
||||
* ```
|
||||
*/
|
||||
// TODO: This coarse overapproximation, ported from the old taint tracking
|
||||
// library, could be replaced with an actual semantic check that a particular
|
||||
// variable _access_ is guarded by an upper-bound check. We probably don't want
|
||||
// to do this right away since it could expose a lot of FPs that were
|
||||
// previously suppressed by this predicate by coincidence.
|
||||
private predicate hasUpperBoundsCheck(Variable var) {
|
||||
exists(RelationalOperation oper, VariableAccess access |
|
||||
oper.getAnOperand() = access and
|
||||
access.getTarget() = var and
|
||||
// Comparing to 0 is not an upper bound check
|
||||
not oper.getAnOperand().getValue() = "0"
|
||||
)
|
||||
}
|
||||
|
||||
private predicate nodeIsBarrierEqualityCandidate(
|
||||
DataFlow::Node node, Operand access, Variable checkedVar
|
||||
) {
|
||||
readsVariable(node.asInstruction(), checkedVar) and
|
||||
any(IRGuardCondition guard).ensuresEq(access, _, _, node.asInstruction().getBlock(), true)
|
||||
}
|
||||
|
||||
cached
|
||||
private module Cached {
|
||||
cached
|
||||
predicate nodeIsBarrier(DataFlow::Node node) {
|
||||
exists(Variable checkedVar |
|
||||
readsVariable(node.asInstruction(), checkedVar) and
|
||||
hasUpperBoundsCheck(checkedVar)
|
||||
)
|
||||
or
|
||||
exists(Variable checkedVar, Operand access |
|
||||
/*
|
||||
* This node is guarded by a condition that forces the accessed variable
|
||||
* to equal something else. For example:
|
||||
* ```
|
||||
* x = taintsource()
|
||||
* if (x == 10) {
|
||||
* taintsink(x); // not considered tainted
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
|
||||
nodeIsBarrierEqualityCandidate(node, access, checkedVar) and
|
||||
readsVariable(access.getDef(), checkedVar)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate nodeIsBarrierIn(DataFlow::Node node) {
|
||||
// don't use dataflow into taint sources, as this leads to duplicate results.
|
||||
exists(Expr source | isUserInput(source, _) |
|
||||
node = DataFlow::exprNode(source)
|
||||
or
|
||||
// This case goes together with the similar (but not identical) rule in
|
||||
// `getNodeForSource`.
|
||||
node = DataFlow::definitionByReferenceNodeFromArgument(source)
|
||||
)
|
||||
or
|
||||
// don't use dataflow into binary instructions if both operands are unpredictable
|
||||
exists(BinaryInstruction iTo |
|
||||
iTo = node.asInstruction() and
|
||||
not predictableInstruction(iTo.getLeft()) and
|
||||
not predictableInstruction(iTo.getRight()) and
|
||||
// propagate taint from either the pointer or the offset, regardless of predictability
|
||||
not iTo instanceof PointerArithmeticInstruction
|
||||
)
|
||||
or
|
||||
// don't use dataflow through calls to pure functions if two or more operands
|
||||
// are unpredictable
|
||||
exists(Instruction iFrom1, Instruction iFrom2, CallInstruction iTo |
|
||||
iTo = node.asInstruction() and
|
||||
isPureFunction(iTo.getStaticCallTarget().getName()) and
|
||||
iFrom1 = iTo.getAnArgument() and
|
||||
iFrom2 = iTo.getAnArgument() and
|
||||
not predictableInstruction(iFrom1) and
|
||||
not predictableInstruction(iFrom2) and
|
||||
iFrom1 != iFrom2
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
Element adjustedSink(DataFlow::Node sink) {
|
||||
// TODO: is it more appropriate to use asConvertedExpr here and avoid
|
||||
// `getConversion*`? Or will that cause us to miss some cases where there's
|
||||
// flow to a conversion (like a `ReferenceDereferenceExpr`) and we want to
|
||||
// pretend there was flow to the converted `Expr` for the sake of
|
||||
// compatibility.
|
||||
sink.asExpr().getConversion*() = result
|
||||
or
|
||||
// For compatibility, send flow from arguments to parameters, even for
|
||||
// functions with no body.
|
||||
exists(FunctionCall call, int i |
|
||||
sink.asExpr() = call.getArgument(pragma[only_bind_into](i)) and
|
||||
result = resolveCall(call).getParameter(pragma[only_bind_into](i))
|
||||
)
|
||||
or
|
||||
// For compatibility, send flow into a `Variable` if there is flow to any
|
||||
// Load or Store of that variable.
|
||||
exists(CopyInstruction copy |
|
||||
copy.getSourceValue() = sink.asInstruction() and
|
||||
(
|
||||
readsVariable(copy, result) or
|
||||
writesVariable(copy, result)
|
||||
) and
|
||||
not hasUpperBoundsCheck(result)
|
||||
)
|
||||
or
|
||||
// For compatibility, send flow into a `NotExpr` even if it's part of a
|
||||
// short-circuiting condition and thus might get skipped.
|
||||
result.(NotExpr).getOperand() = sink.asExpr()
|
||||
or
|
||||
// Taint postfix and prefix crement operations when their operand is tainted.
|
||||
result.(CrementOperation).getAnOperand() = sink.asExpr()
|
||||
or
|
||||
// Taint `e1 += e2`, `e &= e2` and friends when `e1` or `e2` is tainted.
|
||||
result.(AssignOperation).getAnOperand() = sink.asExpr()
|
||||
or
|
||||
result =
|
||||
sink.asOperand()
|
||||
.(SideEffectOperand)
|
||||
.getUse()
|
||||
.(ReadSideEffectInstruction)
|
||||
.getArgumentDef()
|
||||
.getUnconvertedResultExpression()
|
||||
}
|
||||
|
||||
/**
|
||||
* Step to return value of a modeled function when an input taints the
|
||||
* dereference of the return value.
|
||||
*/
|
||||
cached
|
||||
predicate additionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
exists(CallInstruction call, Function func, FunctionInput modelIn, FunctionOutput modelOut |
|
||||
n1.asOperand() = callInput(call, modelIn) and
|
||||
(
|
||||
func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
|
||||
or
|
||||
func.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
|
||||
) and
|
||||
call.getStaticCallTarget() = func and
|
||||
modelOut.isReturnValueDeref() and
|
||||
call = n2.asInstruction()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private import Cached
|
||||
|
||||
/**
|
||||
* Holds if `tainted` may contain taint from `source`.
|
||||
*
|
||||
* A tainted expression is either directly user input, or is
|
||||
* computed from user input in a way that users can probably
|
||||
* control the exact output of the computation.
|
||||
*
|
||||
* This doesn't include data flow through global variables.
|
||||
* If you need that you must call `taintedIncludingGlobalVars`.
|
||||
*/
|
||||
cached
|
||||
predicate tainted(Expr source, Element tainted) {
|
||||
exists(DefaultTaintTrackingCfg cfg, DataFlow::Node sink |
|
||||
cfg.hasFlow(getNodeForSource(source), sink) and
|
||||
tainted = adjustedSink(sink)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `tainted` may contain taint from `source`, where the taint passed
|
||||
* through a global variable named `globalVar`.
|
||||
*
|
||||
* A tainted expression is either directly user input, or is
|
||||
* computed from user input in a way that users can probably
|
||||
* control the exact output of the computation.
|
||||
*
|
||||
* This version gives the same results as tainted but also includes
|
||||
* data flow through global variables.
|
||||
*
|
||||
* The parameter `globalVar` is the qualified name of the last global variable
|
||||
* used to move the value from source to tainted. If the taint did not pass
|
||||
* through a global variable, then `globalVar = ""`.
|
||||
*/
|
||||
cached
|
||||
predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) {
|
||||
tainted(source, tainted) and
|
||||
globalVar = ""
|
||||
or
|
||||
exists(
|
||||
ToGlobalVarTaintTrackingCfg toCfg, FromGlobalVarTaintTrackingCfg fromCfg,
|
||||
DataFlow::VariableNode variableNode, GlobalOrNamespaceVariable global, DataFlow::Node sink
|
||||
|
|
||||
global = variableNode.getVariable() and
|
||||
toCfg.hasFlow(getNodeForSource(source), variableNode) and
|
||||
fromCfg.hasFlow(variableNode, sink) and
|
||||
tainted = adjustedSink(sink) and
|
||||
global = globalVarFromId(globalVar)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the global variable whose qualified name is `id`. Use this predicate
|
||||
* together with `taintedIncludingGlobalVars`. Example:
|
||||
*
|
||||
* ```
|
||||
* exists(string varName |
|
||||
* taintedIncludingGlobalVars(source, tainted, varName) and
|
||||
* var = globalVarFromId(varName)
|
||||
* )
|
||||
* ```
|
||||
*/
|
||||
GlobalOrNamespaceVariable globalVarFromId(string id) { id = result.getQualifiedName() }
|
||||
|
||||
/**
|
||||
* Provides definitions for augmenting source/sink pairs with data-flow paths
|
||||
* between them. From a `@kind path-problem` query, import this module in the
|
||||
* global scope, extend `TaintTrackingConfiguration`, and use `taintedWithPath`
|
||||
* in place of `tainted`.
|
||||
*
|
||||
* Importing this module will also import the query predicates that contain the
|
||||
* taint paths.
|
||||
*/
|
||||
module TaintedWithPath {
|
||||
private newtype TSingleton = MkSingleton()
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration that matches sources and sinks in the same
|
||||
* way as the `tainted` predicate.
|
||||
*
|
||||
* Override `isSink` and `taintThroughGlobals` as needed, but do not provide
|
||||
* a characteristic predicate.
|
||||
*/
|
||||
class TaintTrackingConfiguration extends TSingleton {
|
||||
/** Override this to specify which elements are sources in this configuration. */
|
||||
predicate isSource(Expr source) { exists(getNodeForSource(source)) }
|
||||
|
||||
/** Override this to specify which elements are sinks in this configuration. */
|
||||
abstract predicate isSink(Element e);
|
||||
|
||||
/** Override this to specify which expressions are barriers in this configuration. */
|
||||
predicate isBarrier(Expr e) { nodeIsBarrier(getNodeForExpr(e)) }
|
||||
|
||||
/**
|
||||
* Override this predicate to `any()` to allow taint to flow through global
|
||||
* variables.
|
||||
*/
|
||||
predicate taintThroughGlobals() { none() }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "TaintTrackingConfiguration" }
|
||||
}
|
||||
|
||||
private class AdjustedConfiguration extends TaintTracking3::Configuration {
|
||||
AdjustedConfiguration() { this = "AdjustedConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
exists(TaintTrackingConfiguration cfg, Expr e |
|
||||
cfg.isSource(e) and source = getNodeForExpr(e)
|
||||
)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
exists(TaintTrackingConfiguration cfg | cfg.isSink(adjustedSink(sink)))
|
||||
}
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
// Steps into and out of global variables
|
||||
exists(TaintTrackingConfiguration cfg | cfg.taintThroughGlobals() |
|
||||
writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable))
|
||||
or
|
||||
readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
|
||||
)
|
||||
or
|
||||
additionalTaintStep(n1, n2)
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) {
|
||||
exists(TaintTrackingConfiguration cfg, Expr e | cfg.isBarrier(e) and node = getNodeForExpr(e))
|
||||
}
|
||||
|
||||
override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
|
||||
}
|
||||
|
||||
/*
|
||||
* A sink `Element` may map to multiple `DataFlowX::PathNode`s via (the
|
||||
* inverse of) `adjustedSink`. For example, an `Expr` maps to all its
|
||||
* conversions, and a `Variable` maps to all loads and stores from it. Because
|
||||
* the path node is part of the tuple that constitutes the alert, this leads
|
||||
* to duplicate alerts.
|
||||
*
|
||||
* To avoid showing duplicates, we edit the graph to replace the final node
|
||||
* coming from the data-flow library with a node that matches exactly the
|
||||
* `Element` sink that's requested.
|
||||
*
|
||||
* The same is done for sources.
|
||||
*/
|
||||
|
||||
private newtype TPathNode =
|
||||
TWrapPathNode(DataFlow3::PathNode n) or
|
||||
// There's a single newtype constructor for both sources and sinks since
|
||||
// that makes it easiest to deal with the case where source = sink.
|
||||
TEndpointPathNode(Element e) {
|
||||
exists(AdjustedConfiguration cfg, DataFlow3::Node sourceNode, DataFlow3::Node sinkNode |
|
||||
cfg.hasFlow(sourceNode, sinkNode)
|
||||
|
|
||||
sourceNode = getNodeForExpr(e) and
|
||||
exists(TaintTrackingConfiguration ttCfg | ttCfg.isSource(e))
|
||||
or
|
||||
e = adjustedSink(sinkNode) and
|
||||
exists(TaintTrackingConfiguration ttCfg | ttCfg.isSink(e))
|
||||
)
|
||||
}
|
||||
|
||||
/** An opaque type used for the nodes of a data-flow path. */
|
||||
class PathNode extends TPathNode {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { none() }
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
none()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
*/
|
||||
module Private {
|
||||
/** Gets a predecessor `PathNode` of `pathNode`, if any. */
|
||||
PathNode getAPredecessor(PathNode pathNode) { edges(result, pathNode) }
|
||||
|
||||
/** Gets the element that `pathNode` wraps, if any. */
|
||||
Element getElementFromPathNode(PathNode pathNode) {
|
||||
exists(DataFlow::Node node | node = pathNode.(WrapPathNode).inner().getNode() |
|
||||
result = node.asInstruction().getAst()
|
||||
or
|
||||
result = node.asOperand().getDef().getAst()
|
||||
)
|
||||
or
|
||||
result = pathNode.(EndpointPathNode).inner()
|
||||
}
|
||||
}
|
||||
|
||||
private class WrapPathNode extends PathNode, TWrapPathNode {
|
||||
DataFlow3::PathNode inner() { this = TWrapPathNode(result) }
|
||||
|
||||
override string toString() { result = this.inner().toString() }
|
||||
|
||||
override predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
this.inner().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
}
|
||||
|
||||
private class EndpointPathNode extends PathNode, TEndpointPathNode {
|
||||
Expr inner() { this = TEndpointPathNode(result) }
|
||||
|
||||
override string toString() { result = this.inner().toString() }
|
||||
|
||||
override predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
this.inner()
|
||||
.getLocation()
|
||||
.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
}
|
||||
|
||||
/** A PathNode whose `Element` is a source. It may also be a sink. */
|
||||
private class InitialPathNode extends EndpointPathNode {
|
||||
InitialPathNode() { exists(TaintTrackingConfiguration cfg | cfg.isSource(this.inner())) }
|
||||
}
|
||||
|
||||
/** A PathNode whose `Element` is a sink. It may also be a source. */
|
||||
private class FinalPathNode extends EndpointPathNode {
|
||||
FinalPathNode() { exists(TaintTrackingConfiguration cfg | cfg.isSink(this.inner())) }
|
||||
}
|
||||
|
||||
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
|
||||
query predicate edges(PathNode a, PathNode b) {
|
||||
DataFlow3::PathGraph::edges(a.(WrapPathNode).inner(), b.(WrapPathNode).inner())
|
||||
or
|
||||
// To avoid showing trivial-looking steps, we _replace_ the last node instead
|
||||
// of adding an edge out of it.
|
||||
exists(WrapPathNode sinkNode |
|
||||
DataFlow3::PathGraph::edges(a.(WrapPathNode).inner(), sinkNode.inner()) and
|
||||
b.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
|
||||
)
|
||||
or
|
||||
// Same for the first node
|
||||
exists(WrapPathNode sourceNode |
|
||||
DataFlow3::PathGraph::edges(sourceNode.inner(), b.(WrapPathNode).inner()) and
|
||||
sourceNode.inner().getNode() = getNodeForExpr(a.(InitialPathNode).inner())
|
||||
)
|
||||
or
|
||||
// Finally, handle the case where the path goes directly from a source to a
|
||||
// sink, meaning that they both need to be translated.
|
||||
exists(WrapPathNode sinkNode, WrapPathNode sourceNode |
|
||||
DataFlow3::PathGraph::edges(sourceNode.inner(), sinkNode.inner()) and
|
||||
sourceNode.inner().getNode() = getNodeForExpr(a.(InitialPathNode).inner()) and
|
||||
b.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is flow from `arg` to `out` across a call that can by summarized by the flow
|
||||
* from `par` to `ret` within it, in the graph of data flow path explanations.
|
||||
*/
|
||||
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
|
||||
DataFlow3::PathGraph::subpaths(arg.(WrapPathNode).inner(), par.(WrapPathNode).inner(),
|
||||
ret.(WrapPathNode).inner(), out.(WrapPathNode).inner())
|
||||
or
|
||||
// To avoid showing trivial-looking steps, we _replace_ the last node instead
|
||||
// of adding an edge out of it.
|
||||
exists(WrapPathNode sinkNode |
|
||||
DataFlow3::PathGraph::subpaths(arg.(WrapPathNode).inner(), par.(WrapPathNode).inner(),
|
||||
ret.(WrapPathNode).inner(), sinkNode.inner()) and
|
||||
out.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
|
||||
)
|
||||
or
|
||||
// Same for the first node
|
||||
exists(WrapPathNode sourceNode |
|
||||
DataFlow3::PathGraph::subpaths(sourceNode.inner(), par.(WrapPathNode).inner(),
|
||||
ret.(WrapPathNode).inner(), out.(WrapPathNode).inner()) and
|
||||
sourceNode.inner().getNode() = getNodeForExpr(arg.(InitialPathNode).inner())
|
||||
)
|
||||
or
|
||||
// Finally, handle the case where the path goes directly from a source to a
|
||||
// sink, meaning that they both need to be translated.
|
||||
exists(WrapPathNode sinkNode, WrapPathNode sourceNode |
|
||||
DataFlow3::PathGraph::subpaths(sourceNode.inner(), par.(WrapPathNode).inner(),
|
||||
ret.(WrapPathNode).inner(), sinkNode.inner()) and
|
||||
sourceNode.inner().getNode() = getNodeForExpr(arg.(InitialPathNode).inner()) and
|
||||
out.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `n` is a node in the graph of data flow path explanations. */
|
||||
query predicate nodes(PathNode n, string key, string val) {
|
||||
key = "semmle.label" and val = n.toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `tainted` may contain taint from `source`, where `sourceNode` and
|
||||
* `sinkNode` are the corresponding `PathNode`s that can be used in a query
|
||||
* to provide path explanations. Extend `TaintTrackingConfiguration` to use
|
||||
* this predicate.
|
||||
*
|
||||
* A tainted expression is either directly user input, or is computed from
|
||||
* user input in a way that users can probably control the exact output of
|
||||
* the computation.
|
||||
*/
|
||||
predicate taintedWithPath(Expr source, Element tainted, PathNode sourceNode, PathNode sinkNode) {
|
||||
exists(AdjustedConfiguration cfg, DataFlow3::Node flowSource, DataFlow3::Node flowSink |
|
||||
source = sourceNode.(InitialPathNode).inner() and
|
||||
flowSource = getNodeForExpr(source) and
|
||||
cfg.hasFlow(flowSource, flowSink) and
|
||||
tainted = adjustedSink(flowSink) and
|
||||
tainted = sinkNode.(FinalPathNode).inner()
|
||||
)
|
||||
}
|
||||
|
||||
private predicate isGlobalVariablePathNode(WrapPathNode n) {
|
||||
n.inner().getNode().asVariable() instanceof GlobalOrNamespaceVariable
|
||||
}
|
||||
|
||||
private predicate edgesWithoutGlobals(PathNode a, PathNode b) {
|
||||
edges(a, b) and
|
||||
not isGlobalVariablePathNode(a) and
|
||||
not isGlobalVariablePathNode(b)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `tainted` can be reached from a taint source without passing
|
||||
* through a global variable.
|
||||
*/
|
||||
predicate taintedWithoutGlobals(Element tainted) {
|
||||
exists(AdjustedConfiguration cfg, PathNode sourceNode, FinalPathNode sinkNode |
|
||||
cfg.isSource(sourceNode.(WrapPathNode).inner().getNode()) and
|
||||
edgesWithoutGlobals+(sourceNode, sinkNode) and
|
||||
tainted = sinkNode.inner()
|
||||
)
|
||||
}
|
||||
}
|
||||
deprecated module TaintedWithPath = DefaultTaintTrackingImpl::TaintedWithPath;
|
||||
|
||||
@@ -0,0 +1,644 @@
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
*
|
||||
* An IR taint tracking library that uses an IR DataFlow configuration to track
|
||||
* taint from user inputs as defined by `semmle.code.cpp.security.Security`.
|
||||
*/
|
||||
|
||||
import cpp
|
||||
import semmle.code.cpp.security.Security
|
||||
private import semmle.code.cpp.ir.dataflow.DataFlow
|
||||
private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
|
||||
private import semmle.code.cpp.ir.dataflow.DataFlow3
|
||||
private import semmle.code.cpp.ir.IR
|
||||
private import semmle.code.cpp.ir.dataflow.ResolveCall
|
||||
private import semmle.code.cpp.controlflow.IRGuards
|
||||
private import semmle.code.cpp.models.interfaces.Taint
|
||||
private import semmle.code.cpp.models.interfaces.DataFlow
|
||||
private import semmle.code.cpp.ir.dataflow.TaintTracking
|
||||
private import semmle.code.cpp.ir.dataflow.TaintTracking2
|
||||
private import semmle.code.cpp.ir.dataflow.TaintTracking3
|
||||
private import semmle.code.cpp.ir.dataflow.internal.ModelUtil
|
||||
|
||||
/**
|
||||
* A predictable instruction is one where an external user can predict
|
||||
* the value. For example, a literal in the source code is considered
|
||||
* predictable.
|
||||
*/
|
||||
private predicate predictableInstruction(Instruction instr) {
|
||||
instr instanceof ConstantInstruction
|
||||
or
|
||||
instr instanceof StringConstantInstruction
|
||||
or
|
||||
// This could be a conversion on a string literal
|
||||
predictableInstruction(instr.(UnaryInstruction).getUnary())
|
||||
}
|
||||
|
||||
/**
|
||||
* Functions that we should only allow taint to flow through (to the return
|
||||
* value) if all but the source argument are 'predictable'. This is done to
|
||||
* emulate the old security library's implementation rather than due to any
|
||||
* strong belief that this is the right approach.
|
||||
*
|
||||
* Note that the list itself is not very principled; it consists of all the
|
||||
* functions listed in the old security library's [default] `isPureFunction`
|
||||
* that have more than one argument, but are not in the old taint tracking
|
||||
* library's `returnArgument` predicate.
|
||||
*/
|
||||
predicate predictableOnlyFlow(string name) {
|
||||
name =
|
||||
[
|
||||
"strcasestr", "strchnul", "strchr", "strchrnul", "strcmp", "strcspn", "strncmp", "strndup",
|
||||
"strnlen", "strrchr", "strspn", "strstr", "strtod", "strtof", "strtol", "strtoll", "strtoq",
|
||||
"strtoul"
|
||||
]
|
||||
}
|
||||
|
||||
private DataFlow::Node getNodeForSource(Expr source) {
|
||||
isUserInput(source, _) and
|
||||
result = getNodeForExpr(source)
|
||||
}
|
||||
|
||||
private DataFlow::Node getNodeForExpr(Expr node) {
|
||||
result = DataFlow::exprNode(node)
|
||||
or
|
||||
// Some of the sources in `isUserInput` are intended to match the value of
|
||||
// an expression, while others (those modeled below) are intended to match
|
||||
// the taint that propagates out of an argument, like the `char *` argument
|
||||
// to `gets`. It's impossible here to tell which is which, but the "access
|
||||
// to argv" source is definitely not intended to match an output argument,
|
||||
// and it causes false positives if we let it.
|
||||
//
|
||||
// This case goes together with the similar (but not identical) rule in
|
||||
// `nodeIsBarrierIn`.
|
||||
result = DataFlow::definitionByReferenceNodeFromArgument(node) and
|
||||
not argv(node.(VariableAccess).getTarget())
|
||||
}
|
||||
|
||||
private class DefaultTaintTrackingCfg extends TaintTracking::Configuration {
|
||||
DefaultTaintTrackingCfg() { this = "DefaultTaintTrackingCfg" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) }
|
||||
|
||||
override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
|
||||
}
|
||||
|
||||
private class ToGlobalVarTaintTrackingCfg extends TaintTracking::Configuration {
|
||||
ToGlobalVarTaintTrackingCfg() { this = "GlobalVarTaintTrackingCfg" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
sink.asVariable() instanceof GlobalOrNamespaceVariable
|
||||
}
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable))
|
||||
or
|
||||
readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) }
|
||||
|
||||
override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
|
||||
}
|
||||
|
||||
private class FromGlobalVarTaintTrackingCfg extends TaintTracking2::Configuration {
|
||||
FromGlobalVarTaintTrackingCfg() { this = "FromGlobalVarTaintTrackingCfg" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
// This set of sources should be reasonably small, which is good for
|
||||
// performance since the set of sinks is very large.
|
||||
exists(ToGlobalVarTaintTrackingCfg otherCfg | otherCfg.hasFlowTo(source))
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
// Additional step for flow out of variables. There is no flow _into_
|
||||
// variables in this configuration, so this step only serves to take flow
|
||||
// out of a variable that's a source.
|
||||
readsVariable(n2.asInstruction(), n1.asVariable())
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) }
|
||||
|
||||
override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
|
||||
}
|
||||
|
||||
private predicate readsVariable(LoadInstruction load, Variable var) {
|
||||
load.getSourceAddress().(VariableAddressInstruction).getAstVariable() = var
|
||||
}
|
||||
|
||||
private predicate writesVariable(StoreInstruction store, Variable var) {
|
||||
store.getDestinationAddress().(VariableAddressInstruction).getAstVariable() = var
|
||||
}
|
||||
|
||||
/**
|
||||
* A variable that has any kind of upper-bound check anywhere in the program. This is
|
||||
* biased towards being inclusive because there are a lot of valid ways of doing an
|
||||
* upper bounds checks if we don't consider where it occurs, for example:
|
||||
* ```
|
||||
* if (x < 10) { sink(x); }
|
||||
*
|
||||
* if (10 > y) { sink(y); }
|
||||
*
|
||||
* if (z > 10) { z = 10; }
|
||||
* sink(z);
|
||||
* ```
|
||||
*/
|
||||
// TODO: This coarse overapproximation, ported from the old taint tracking
|
||||
// library, could be replaced with an actual semantic check that a particular
|
||||
// variable _access_ is guarded by an upper-bound check. We probably don't want
|
||||
// to do this right away since it could expose a lot of FPs that were
|
||||
// previously suppressed by this predicate by coincidence.
|
||||
private predicate hasUpperBoundsCheck(Variable var) {
|
||||
exists(RelationalOperation oper, VariableAccess access |
|
||||
oper.getAnOperand() = access and
|
||||
access.getTarget() = var and
|
||||
// Comparing to 0 is not an upper bound check
|
||||
not oper.getAnOperand().getValue() = "0"
|
||||
)
|
||||
}
|
||||
|
||||
private predicate nodeIsBarrierEqualityCandidate(
|
||||
DataFlow::Node node, Operand access, Variable checkedVar
|
||||
) {
|
||||
readsVariable(node.asInstruction(), checkedVar) and
|
||||
any(IRGuardCondition guard).ensuresEq(access, _, _, node.asInstruction().getBlock(), true)
|
||||
}
|
||||
|
||||
cached
|
||||
private module Cached {
|
||||
cached
|
||||
predicate nodeIsBarrier(DataFlow::Node node) {
|
||||
exists(Variable checkedVar |
|
||||
readsVariable(node.asInstruction(), checkedVar) and
|
||||
hasUpperBoundsCheck(checkedVar)
|
||||
)
|
||||
or
|
||||
exists(Variable checkedVar, Operand access |
|
||||
/*
|
||||
* This node is guarded by a condition that forces the accessed variable
|
||||
* to equal something else. For example:
|
||||
* ```
|
||||
* x = taintsource()
|
||||
* if (x == 10) {
|
||||
* taintsink(x); // not considered tainted
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
|
||||
nodeIsBarrierEqualityCandidate(node, access, checkedVar) and
|
||||
readsVariable(access.getDef(), checkedVar)
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
predicate nodeIsBarrierIn(DataFlow::Node node) {
|
||||
// don't use dataflow into taint sources, as this leads to duplicate results.
|
||||
exists(Expr source | isUserInput(source, _) |
|
||||
node = DataFlow::exprNode(source)
|
||||
or
|
||||
// This case goes together with the similar (but not identical) rule in
|
||||
// `getNodeForSource`.
|
||||
node = DataFlow::definitionByReferenceNodeFromArgument(source)
|
||||
)
|
||||
or
|
||||
// don't use dataflow into binary instructions if both operands are unpredictable
|
||||
exists(BinaryInstruction iTo |
|
||||
iTo = node.asInstruction() and
|
||||
not predictableInstruction(iTo.getLeft()) and
|
||||
not predictableInstruction(iTo.getRight()) and
|
||||
// propagate taint from either the pointer or the offset, regardless of predictability
|
||||
not iTo instanceof PointerArithmeticInstruction
|
||||
)
|
||||
or
|
||||
// don't use dataflow through calls to pure functions if two or more operands
|
||||
// are unpredictable
|
||||
exists(Instruction iFrom1, Instruction iFrom2, CallInstruction iTo |
|
||||
iTo = node.asInstruction() and
|
||||
isPureFunction(iTo.getStaticCallTarget().getName()) and
|
||||
iFrom1 = iTo.getAnArgument() and
|
||||
iFrom2 = iTo.getAnArgument() and
|
||||
not predictableInstruction(iFrom1) and
|
||||
not predictableInstruction(iFrom2) and
|
||||
iFrom1 != iFrom2
|
||||
)
|
||||
}
|
||||
|
||||
cached
|
||||
Element adjustedSink(DataFlow::Node sink) {
|
||||
// TODO: is it more appropriate to use asConvertedExpr here and avoid
|
||||
// `getConversion*`? Or will that cause us to miss some cases where there's
|
||||
// flow to a conversion (like a `ReferenceDereferenceExpr`) and we want to
|
||||
// pretend there was flow to the converted `Expr` for the sake of
|
||||
// compatibility.
|
||||
sink.asExpr().getConversion*() = result
|
||||
or
|
||||
// For compatibility, send flow from arguments to parameters, even for
|
||||
// functions with no body.
|
||||
exists(FunctionCall call, int i |
|
||||
sink.asExpr() = call.getArgument(pragma[only_bind_into](i)) and
|
||||
result = resolveCall(call).getParameter(pragma[only_bind_into](i))
|
||||
)
|
||||
or
|
||||
// For compatibility, send flow into a `Variable` if there is flow to any
|
||||
// Load or Store of that variable.
|
||||
exists(CopyInstruction copy |
|
||||
copy.getSourceValue() = sink.asInstruction() and
|
||||
(
|
||||
readsVariable(copy, result) or
|
||||
writesVariable(copy, result)
|
||||
) and
|
||||
not hasUpperBoundsCheck(result)
|
||||
)
|
||||
or
|
||||
// For compatibility, send flow into a `NotExpr` even if it's part of a
|
||||
// short-circuiting condition and thus might get skipped.
|
||||
result.(NotExpr).getOperand() = sink.asExpr()
|
||||
or
|
||||
// Taint postfix and prefix crement operations when their operand is tainted.
|
||||
result.(CrementOperation).getAnOperand() = sink.asExpr()
|
||||
or
|
||||
// Taint `e1 += e2`, `e &= e2` and friends when `e1` or `e2` is tainted.
|
||||
result.(AssignOperation).getAnOperand() = sink.asExpr()
|
||||
or
|
||||
result =
|
||||
sink.asOperand()
|
||||
.(SideEffectOperand)
|
||||
.getUse()
|
||||
.(ReadSideEffectInstruction)
|
||||
.getArgumentDef()
|
||||
.getUnconvertedResultExpression()
|
||||
}
|
||||
|
||||
/**
|
||||
* Step to return value of a modeled function when an input taints the
|
||||
* dereference of the return value.
|
||||
*/
|
||||
cached
|
||||
predicate additionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
exists(CallInstruction call, Function func, FunctionInput modelIn, FunctionOutput modelOut |
|
||||
n1.asOperand() = callInput(call, modelIn) and
|
||||
(
|
||||
func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
|
||||
or
|
||||
func.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
|
||||
) and
|
||||
call.getStaticCallTarget() = func and
|
||||
modelOut.isReturnValueDeref() and
|
||||
call = n2.asInstruction()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private import Cached
|
||||
|
||||
/**
|
||||
* Holds if `tainted` may contain taint from `source`.
|
||||
*
|
||||
* A tainted expression is either directly user input, or is
|
||||
* computed from user input in a way that users can probably
|
||||
* control the exact output of the computation.
|
||||
*
|
||||
* This doesn't include data flow through global variables.
|
||||
* If you need that you must call `taintedIncludingGlobalVars`.
|
||||
*/
|
||||
cached
|
||||
predicate tainted(Expr source, Element tainted) {
|
||||
exists(DefaultTaintTrackingCfg cfg, DataFlow::Node sink |
|
||||
cfg.hasFlow(getNodeForSource(source), sink) and
|
||||
tainted = adjustedSink(sink)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `tainted` may contain taint from `source`, where the taint passed
|
||||
* through a global variable named `globalVar`.
|
||||
*
|
||||
* A tainted expression is either directly user input, or is
|
||||
* computed from user input in a way that users can probably
|
||||
* control the exact output of the computation.
|
||||
*
|
||||
* This version gives the same results as tainted but also includes
|
||||
* data flow through global variables.
|
||||
*
|
||||
* The parameter `globalVar` is the qualified name of the last global variable
|
||||
* used to move the value from source to tainted. If the taint did not pass
|
||||
* through a global variable, then `globalVar = ""`.
|
||||
*/
|
||||
cached
|
||||
predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) {
|
||||
tainted(source, tainted) and
|
||||
globalVar = ""
|
||||
or
|
||||
exists(
|
||||
ToGlobalVarTaintTrackingCfg toCfg, FromGlobalVarTaintTrackingCfg fromCfg,
|
||||
DataFlow::VariableNode variableNode, GlobalOrNamespaceVariable global, DataFlow::Node sink
|
||||
|
|
||||
global = variableNode.getVariable() and
|
||||
toCfg.hasFlow(getNodeForSource(source), variableNode) and
|
||||
fromCfg.hasFlow(variableNode, sink) and
|
||||
tainted = adjustedSink(sink) and
|
||||
global = globalVarFromId(globalVar)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the global variable whose qualified name is `id`. Use this predicate
|
||||
* together with `taintedIncludingGlobalVars`. Example:
|
||||
*
|
||||
* ```
|
||||
* exists(string varName |
|
||||
* taintedIncludingGlobalVars(source, tainted, varName) and
|
||||
* var = globalVarFromId(varName)
|
||||
* )
|
||||
* ```
|
||||
*/
|
||||
GlobalOrNamespaceVariable globalVarFromId(string id) { id = result.getQualifiedName() }
|
||||
|
||||
/**
|
||||
* Provides definitions for augmenting source/sink pairs with data-flow paths
|
||||
* between them. From a `@kind path-problem` query, import this module in the
|
||||
* global scope, extend `TaintTrackingConfiguration`, and use `taintedWithPath`
|
||||
* in place of `tainted`.
|
||||
*
|
||||
* Importing this module will also import the query predicates that contain the
|
||||
* taint paths.
|
||||
*/
|
||||
module TaintedWithPath {
|
||||
private newtype TSingleton = MkSingleton()
|
||||
|
||||
/**
|
||||
* A taint-tracking configuration that matches sources and sinks in the same
|
||||
* way as the `tainted` predicate.
|
||||
*
|
||||
* Override `isSink` and `taintThroughGlobals` as needed, but do not provide
|
||||
* a characteristic predicate.
|
||||
*/
|
||||
class TaintTrackingConfiguration extends TSingleton {
|
||||
/** Override this to specify which elements are sources in this configuration. */
|
||||
predicate isSource(Expr source) { exists(getNodeForSource(source)) }
|
||||
|
||||
/** Override this to specify which elements are sinks in this configuration. */
|
||||
abstract predicate isSink(Element e);
|
||||
|
||||
/** Override this to specify which expressions are barriers in this configuration. */
|
||||
predicate isBarrier(Expr e) { nodeIsBarrier(getNodeForExpr(e)) }
|
||||
|
||||
/**
|
||||
* Override this predicate to `any()` to allow taint to flow through global
|
||||
* variables.
|
||||
*/
|
||||
predicate taintThroughGlobals() { none() }
|
||||
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { result = "TaintTrackingConfiguration" }
|
||||
}
|
||||
|
||||
private class AdjustedConfiguration extends TaintTracking3::Configuration {
|
||||
AdjustedConfiguration() { this = "AdjustedConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
exists(TaintTrackingConfiguration cfg, Expr e |
|
||||
cfg.isSource(e) and source = getNodeForExpr(e)
|
||||
)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
exists(TaintTrackingConfiguration cfg | cfg.isSink(adjustedSink(sink)))
|
||||
}
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
// Steps into and out of global variables
|
||||
exists(TaintTrackingConfiguration cfg | cfg.taintThroughGlobals() |
|
||||
writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable))
|
||||
or
|
||||
readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
|
||||
)
|
||||
or
|
||||
additionalTaintStep(n1, n2)
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) {
|
||||
exists(TaintTrackingConfiguration cfg, Expr e | cfg.isBarrier(e) and node = getNodeForExpr(e))
|
||||
}
|
||||
|
||||
override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
|
||||
}
|
||||
|
||||
/*
|
||||
* A sink `Element` may map to multiple `DataFlowX::PathNode`s via (the
|
||||
* inverse of) `adjustedSink`. For example, an `Expr` maps to all its
|
||||
* conversions, and a `Variable` maps to all loads and stores from it. Because
|
||||
* the path node is part of the tuple that constitutes the alert, this leads
|
||||
* to duplicate alerts.
|
||||
*
|
||||
* To avoid showing duplicates, we edit the graph to replace the final node
|
||||
* coming from the data-flow library with a node that matches exactly the
|
||||
* `Element` sink that's requested.
|
||||
*
|
||||
* The same is done for sources.
|
||||
*/
|
||||
|
||||
private newtype TPathNode =
|
||||
TWrapPathNode(DataFlow3::PathNode n) or
|
||||
// There's a single newtype constructor for both sources and sinks since
|
||||
// that makes it easiest to deal with the case where source = sink.
|
||||
TEndpointPathNode(Element e) {
|
||||
exists(AdjustedConfiguration cfg, DataFlow3::Node sourceNode, DataFlow3::Node sinkNode |
|
||||
cfg.hasFlow(sourceNode, sinkNode)
|
||||
|
|
||||
sourceNode = getNodeForExpr(e) and
|
||||
exists(TaintTrackingConfiguration ttCfg | ttCfg.isSource(e))
|
||||
or
|
||||
e = adjustedSink(sinkNode) and
|
||||
exists(TaintTrackingConfiguration ttCfg | ttCfg.isSink(e))
|
||||
)
|
||||
}
|
||||
|
||||
/** An opaque type used for the nodes of a data-flow path. */
|
||||
class PathNode extends TPathNode {
|
||||
/** Gets a textual representation of this element. */
|
||||
string toString() { none() }
|
||||
|
||||
/**
|
||||
* Holds if this element is at the specified location.
|
||||
* The location spans column `startcolumn` of line `startline` to
|
||||
* column `endcolumn` of line `endline` in file `filepath`.
|
||||
* For more information, see
|
||||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
|
||||
*/
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
none()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* INTERNAL: Do not use.
|
||||
*/
|
||||
module Private {
|
||||
/** Gets a predecessor `PathNode` of `pathNode`, if any. */
|
||||
PathNode getAPredecessor(PathNode pathNode) { edges(result, pathNode) }
|
||||
|
||||
/** Gets the element that `pathNode` wraps, if any. */
|
||||
Element getElementFromPathNode(PathNode pathNode) {
|
||||
exists(DataFlow::Node node | node = pathNode.(WrapPathNode).inner().getNode() |
|
||||
result = node.asInstruction().getAst()
|
||||
or
|
||||
result = node.asOperand().getDef().getAst()
|
||||
)
|
||||
or
|
||||
result = pathNode.(EndpointPathNode).inner()
|
||||
}
|
||||
}
|
||||
|
||||
private class WrapPathNode extends PathNode, TWrapPathNode {
|
||||
DataFlow3::PathNode inner() { this = TWrapPathNode(result) }
|
||||
|
||||
override string toString() { result = this.inner().toString() }
|
||||
|
||||
override predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
this.inner().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
}
|
||||
|
||||
private class EndpointPathNode extends PathNode, TEndpointPathNode {
|
||||
Expr inner() { this = TEndpointPathNode(result) }
|
||||
|
||||
override string toString() { result = this.inner().toString() }
|
||||
|
||||
override predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
this.inner()
|
||||
.getLocation()
|
||||
.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
}
|
||||
}
|
||||
|
||||
/** A PathNode whose `Element` is a source. It may also be a sink. */
|
||||
private class InitialPathNode extends EndpointPathNode {
|
||||
InitialPathNode() { exists(TaintTrackingConfiguration cfg | cfg.isSource(this.inner())) }
|
||||
}
|
||||
|
||||
/** A PathNode whose `Element` is a sink. It may also be a source. */
|
||||
private class FinalPathNode extends EndpointPathNode {
|
||||
FinalPathNode() { exists(TaintTrackingConfiguration cfg | cfg.isSink(this.inner())) }
|
||||
}
|
||||
|
||||
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
|
||||
query predicate edges(PathNode a, PathNode b) {
|
||||
DataFlow3::PathGraph::edges(a.(WrapPathNode).inner(), b.(WrapPathNode).inner())
|
||||
or
|
||||
// To avoid showing trivial-looking steps, we _replace_ the last node instead
|
||||
// of adding an edge out of it.
|
||||
exists(WrapPathNode sinkNode |
|
||||
DataFlow3::PathGraph::edges(a.(WrapPathNode).inner(), sinkNode.inner()) and
|
||||
b.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
|
||||
)
|
||||
or
|
||||
// Same for the first node
|
||||
exists(WrapPathNode sourceNode |
|
||||
DataFlow3::PathGraph::edges(sourceNode.inner(), b.(WrapPathNode).inner()) and
|
||||
sourceNode.inner().getNode() = getNodeForExpr(a.(InitialPathNode).inner())
|
||||
)
|
||||
or
|
||||
// Finally, handle the case where the path goes directly from a source to a
|
||||
// sink, meaning that they both need to be translated.
|
||||
exists(WrapPathNode sinkNode, WrapPathNode sourceNode |
|
||||
DataFlow3::PathGraph::edges(sourceNode.inner(), sinkNode.inner()) and
|
||||
sourceNode.inner().getNode() = getNodeForExpr(a.(InitialPathNode).inner()) and
|
||||
b.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if there is flow from `arg` to `out` across a call that can by summarized by the flow
|
||||
* from `par` to `ret` within it, in the graph of data flow path explanations.
|
||||
*/
|
||||
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
|
||||
DataFlow3::PathGraph::subpaths(arg.(WrapPathNode).inner(), par.(WrapPathNode).inner(),
|
||||
ret.(WrapPathNode).inner(), out.(WrapPathNode).inner())
|
||||
or
|
||||
// To avoid showing trivial-looking steps, we _replace_ the last node instead
|
||||
// of adding an edge out of it.
|
||||
exists(WrapPathNode sinkNode |
|
||||
DataFlow3::PathGraph::subpaths(arg.(WrapPathNode).inner(), par.(WrapPathNode).inner(),
|
||||
ret.(WrapPathNode).inner(), sinkNode.inner()) and
|
||||
out.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
|
||||
)
|
||||
or
|
||||
// Same for the first node
|
||||
exists(WrapPathNode sourceNode |
|
||||
DataFlow3::PathGraph::subpaths(sourceNode.inner(), par.(WrapPathNode).inner(),
|
||||
ret.(WrapPathNode).inner(), out.(WrapPathNode).inner()) and
|
||||
sourceNode.inner().getNode() = getNodeForExpr(arg.(InitialPathNode).inner())
|
||||
)
|
||||
or
|
||||
// Finally, handle the case where the path goes directly from a source to a
|
||||
// sink, meaning that they both need to be translated.
|
||||
exists(WrapPathNode sinkNode, WrapPathNode sourceNode |
|
||||
DataFlow3::PathGraph::subpaths(sourceNode.inner(), par.(WrapPathNode).inner(),
|
||||
ret.(WrapPathNode).inner(), sinkNode.inner()) and
|
||||
sourceNode.inner().getNode() = getNodeForExpr(arg.(InitialPathNode).inner()) and
|
||||
out.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `n` is a node in the graph of data flow path explanations. */
|
||||
query predicate nodes(PathNode n, string key, string val) {
|
||||
key = "semmle.label" and val = n.toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `tainted` may contain taint from `source`, where `sourceNode` and
|
||||
* `sinkNode` are the corresponding `PathNode`s that can be used in a query
|
||||
* to provide path explanations. Extend `TaintTrackingConfiguration` to use
|
||||
* this predicate.
|
||||
*
|
||||
* A tainted expression is either directly user input, or is computed from
|
||||
* user input in a way that users can probably control the exact output of
|
||||
* the computation.
|
||||
*/
|
||||
predicate taintedWithPath(Expr source, Element tainted, PathNode sourceNode, PathNode sinkNode) {
|
||||
exists(AdjustedConfiguration cfg, DataFlow3::Node flowSource, DataFlow3::Node flowSink |
|
||||
source = sourceNode.(InitialPathNode).inner() and
|
||||
flowSource = getNodeForExpr(source) and
|
||||
cfg.hasFlow(flowSource, flowSink) and
|
||||
tainted = adjustedSink(flowSink) and
|
||||
tainted = sinkNode.(FinalPathNode).inner()
|
||||
)
|
||||
}
|
||||
|
||||
private predicate isGlobalVariablePathNode(WrapPathNode n) {
|
||||
n.inner().getNode().asVariable() instanceof GlobalOrNamespaceVariable
|
||||
}
|
||||
|
||||
private predicate edgesWithoutGlobals(PathNode a, PathNode b) {
|
||||
edges(a, b) and
|
||||
not isGlobalVariablePathNode(a) and
|
||||
not isGlobalVariablePathNode(b)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `tainted` can be reached from a taint source without passing
|
||||
* through a global variable.
|
||||
*/
|
||||
predicate taintedWithoutGlobals(Element tainted) {
|
||||
exists(AdjustedConfiguration cfg, PathNode sourceNode, FinalPathNode sinkNode |
|
||||
cfg.isSource(sourceNode.(WrapPathNode).inner().getNode()) and
|
||||
edgesWithoutGlobals+(sourceNode, sinkNode) and
|
||||
tainted = sinkNode.inner()
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -121,7 +121,9 @@ private predicate moveToDependingOnSide(Expr src, Expr dest) {
|
||||
* (this is done to avoid false positives). Because of this we need to track if the tainted element came from an argument
|
||||
* or not, and for that we use destFromArg
|
||||
*/
|
||||
private predicate betweenFunctionsValueMoveTo(Element src, Element dest, boolean destFromArg) {
|
||||
deprecated private predicate betweenFunctionsValueMoveTo(
|
||||
Element src, Element dest, boolean destFromArg
|
||||
) {
|
||||
not unreachable(src) and
|
||||
not unreachable(dest) and
|
||||
(
|
||||
@@ -162,13 +164,13 @@ private predicate betweenFunctionsValueMoveTo(Element src, Element dest, boolean
|
||||
// predicate folding for proper join-order
|
||||
// bad magic: pushes down predicate that ruins join-order
|
||||
pragma[nomagic]
|
||||
private predicate resolveCallWithParam(Call call, Function called, int i, Parameter p) {
|
||||
deprecated private predicate resolveCallWithParam(Call call, Function called, int i, Parameter p) {
|
||||
called = resolveCall(call) and
|
||||
p = called.getParameter(i)
|
||||
}
|
||||
|
||||
/** A variable for which flow through is allowed. */
|
||||
library class FlowVariable extends Variable {
|
||||
deprecated library class FlowVariable extends Variable {
|
||||
FlowVariable() {
|
||||
(
|
||||
this instanceof LocalScopeVariable or
|
||||
@@ -179,11 +181,11 @@ library class FlowVariable extends Variable {
|
||||
}
|
||||
|
||||
/** A local scope variable for which flow through is allowed. */
|
||||
library class FlowLocalScopeVariable extends Variable {
|
||||
deprecated library class FlowLocalScopeVariable extends Variable {
|
||||
FlowLocalScopeVariable() { this instanceof LocalScopeVariable }
|
||||
}
|
||||
|
||||
private predicate insideFunctionValueMoveTo(Element src, Element dest) {
|
||||
deprecated private predicate insideFunctionValueMoveTo(Element src, Element dest) {
|
||||
not unreachable(src) and
|
||||
not unreachable(dest) and
|
||||
(
|
||||
@@ -324,7 +326,7 @@ private predicate unionAccess(Variable v, Field f, FieldAccess a) {
|
||||
a.getQualifier() = v.getAnAccess()
|
||||
}
|
||||
|
||||
GlobalOrNamespaceVariable globalVarFromId(string id) {
|
||||
deprecated GlobalOrNamespaceVariable globalVarFromId(string id) {
|
||||
if result instanceof NamespaceVariable
|
||||
then id = result.getNamespace() + "::" + result.getName()
|
||||
else id = result.getName()
|
||||
@@ -353,7 +355,7 @@ private predicate hasUpperBoundsCheck(Variable var) {
|
||||
}
|
||||
|
||||
cached
|
||||
private predicate taintedWithArgsAndGlobalVars(
|
||||
deprecated private predicate taintedWithArgsAndGlobalVars(
|
||||
Element src, Element dest, boolean destFromArg, string globalVar
|
||||
) {
|
||||
isUserInput(src, _) and
|
||||
@@ -395,7 +397,7 @@ private predicate taintedWithArgsAndGlobalVars(
|
||||
* This doesn't include data flow through global variables.
|
||||
* If you need that you must call taintedIncludingGlobalVars.
|
||||
*/
|
||||
predicate tainted(Expr source, Element tainted) {
|
||||
deprecated predicate tainted(Expr source, Element tainted) {
|
||||
taintedWithArgsAndGlobalVars(source, tainted, _, "")
|
||||
}
|
||||
|
||||
@@ -410,7 +412,7 @@ predicate tainted(Expr source, Element tainted) {
|
||||
* The parameter `globalVar` is the name of the last global variable used to move the
|
||||
* value from source to tainted.
|
||||
*/
|
||||
predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) {
|
||||
deprecated predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) {
|
||||
taintedWithArgsAndGlobalVars(source, tainted, _, globalVar)
|
||||
}
|
||||
|
||||
@@ -541,14 +543,14 @@ private predicate returnArgument(Function f, int sourceArg) {
|
||||
* targets a virtual method, simple data flow analysis is performed
|
||||
* in order to identify target(s).
|
||||
*/
|
||||
Function resolveCall(Call call) {
|
||||
deprecated Function resolveCall(Call call) {
|
||||
result = call.getTarget()
|
||||
or
|
||||
result = call.(DataSensitiveCallExpr).resolve()
|
||||
}
|
||||
|
||||
/** A data sensitive call expression. */
|
||||
abstract library class DataSensitiveCallExpr extends Expr {
|
||||
abstract deprecated library class DataSensitiveCallExpr extends Expr {
|
||||
DataSensitiveCallExpr() { not unreachable(this) }
|
||||
|
||||
abstract Expr getSrc();
|
||||
@@ -579,7 +581,7 @@ abstract library class DataSensitiveCallExpr extends Expr {
|
||||
}
|
||||
|
||||
/** Call through a function pointer. */
|
||||
library class DataSensitiveExprCall extends DataSensitiveCallExpr, ExprCall {
|
||||
deprecated library class DataSensitiveExprCall extends DataSensitiveCallExpr, ExprCall {
|
||||
override Expr getSrc() { result = getExpr() }
|
||||
|
||||
override Function resolve() {
|
||||
@@ -588,7 +590,8 @@ library class DataSensitiveExprCall extends DataSensitiveCallExpr, ExprCall {
|
||||
}
|
||||
|
||||
/** Call to a virtual function. */
|
||||
library class DataSensitiveOverriddenFunctionCall extends DataSensitiveCallExpr, FunctionCall {
|
||||
deprecated library class DataSensitiveOverriddenFunctionCall extends DataSensitiveCallExpr,
|
||||
FunctionCall {
|
||||
DataSensitiveOverriddenFunctionCall() {
|
||||
exists(getTarget().(VirtualFunction).getAnOverridingFunction())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user