Merge branch 'main' into fewer-dataflow-branches

This commit is contained in:
Mathias Vorreiter Pedersen
2023-12-08 09:30:01 +00:00
79 changed files with 12353 additions and 3566 deletions

View File

@@ -52,12 +52,13 @@ class Options extends string {
/**
* Holds if a call to this function will never return.
*
* By default, this holds for `exit`, `_exit`, `abort`, `__assert_fail`,
* `longjmp`, `__builtin_unreachable` and any function with a
* `noreturn` attribute or specifier.
* By default, this holds for `exit`, `_exit`, `_Exit`, `abort`,
* `__assert_fail`, `longjmp`, `__builtin_unreachable` and any
* function with a `noreturn` or `__noreturn__` attribute or
* `noreturn` specifier.
*/
predicate exits(Function f) {
f.getAnAttribute().hasName("noreturn")
f.getAnAttribute().hasName(["noreturn", "__noreturn__"])
or
f.getASpecifier().hasName("noreturn")
or

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* The `Guards` library has been replaced with the API-compatible `IRGuards` implementation, which has better precision in some cases.

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* The deprecated `DefaultTaintTracking` library has been removed.

View File

@@ -7,371 +7,7 @@ import cpp
import semmle.code.cpp.controlflow.BasicBlocks
import semmle.code.cpp.controlflow.SSA
import semmle.code.cpp.controlflow.Dominance
/**
* A Boolean condition that guards one or more basic blocks. This includes
* operands of logical operators but not switch statements.
*/
class GuardCondition extends Expr {
GuardCondition() { is_condition(this) }
/**
* Holds if this condition controls `block`, meaning that `block` is only
* entered if the value of this condition is `testIsTrue`.
*
* Illustration:
*
* ```
* [ (testIsTrue) ]
* [ this ----------------succ ---- controlled ]
* [ | | ]
* [ (testIsFalse) | ------ ... ]
* [ other ]
* ```
*
* The predicate holds if all paths to `controlled` go via the `testIsTrue`
* edge of the control-flow graph. In other words, the `testIsTrue` edge
* must dominate `controlled`. This means that `controlled` must be
* dominated by both `this` and `succ` (the target of the `testIsTrue`
* edge). It also means that any other edge into `succ` must be a back-edge
* from a node which is dominated by `succ`.
*
* The short-circuit boolean operations have slightly surprising behavior
* here: because the operation itself only dominates one branch (due to
* being short-circuited) then it will only control blocks dominated by the
* true (for `&&`) or false (for `||`) branch.
*/
cached
predicate controls(BasicBlock controlled, boolean testIsTrue) {
// This condition must determine the flow of control; that is, this
// node must be a top-level condition.
this.controlsBlock(controlled, testIsTrue)
or
exists(BinaryLogicalOperation binop, GuardCondition lhs, GuardCondition rhs |
this = binop and
lhs = binop.getLeftOperand() and
rhs = binop.getRightOperand() and
lhs.controls(controlled, testIsTrue) and
rhs.controls(controlled, testIsTrue)
)
or
exists(GuardCondition ne, GuardCondition operand |
this = operand and
operand = ne.(NotExpr).getOperand() and
ne.controls(controlled, testIsTrue.booleanNot())
)
}
/** Holds if (determined by this guard) `left < right + k` evaluates to `isLessThan` if this expression evaluates to `testIsTrue`. */
cached
predicate comparesLt(Expr left, Expr right, int k, boolean isLessThan, boolean testIsTrue) {
compares_lt(this, left, right, k, isLessThan, testIsTrue)
}
/**
* Holds if (determined by this guard) `left < right + k` must be `isLessThan` in `block`.
* If `isLessThan = false` then this implies `left >= right + k`.
*/
cached
predicate ensuresLt(Expr left, Expr right, int k, BasicBlock block, boolean isLessThan) {
exists(boolean testIsTrue |
compares_lt(this, left, right, k, isLessThan, testIsTrue) and this.controls(block, testIsTrue)
)
}
/** Holds if (determined by this guard) `left == right + k` evaluates to `areEqual` if this expression evaluates to `testIsTrue`. */
cached
predicate comparesEq(Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue) {
compares_eq(this, left, right, k, areEqual, testIsTrue)
}
/**
* Holds if (determined by this guard) `left == right + k` must be `areEqual` in `block`.
* If `areEqual = false` then this implies `left != right + k`.
*/
cached
predicate ensuresEq(Expr left, Expr right, int k, BasicBlock block, boolean areEqual) {
exists(boolean testIsTrue |
compares_eq(this, left, right, k, areEqual, testIsTrue) and this.controls(block, testIsTrue)
)
}
/**
* Holds if this condition controls `block`, meaning that `block` is only
* entered if the value of this condition is `testIsTrue`. This helper
* predicate does not necessarily hold for binary logical operations like
* `&&` and `||`. See the detailed explanation on predicate `controls`.
*/
private predicate controlsBlock(BasicBlock controlled, boolean testIsTrue) {
exists(BasicBlock thisblock | thisblock.contains(this) |
exists(BasicBlock succ |
testIsTrue = true and succ = this.getATrueSuccessor()
or
testIsTrue = false and succ = this.getAFalseSuccessor()
|
bbDominates(succ, controlled) and
forall(BasicBlock pred | pred.getASuccessor() = succ |
pred = thisblock or bbDominates(succ, pred) or not reachable(pred)
)
)
)
}
}
private predicate is_condition(Expr guard) {
guard.isCondition()
or
is_condition(guard.(BinaryLogicalOperation).getAnOperand())
or
exists(NotExpr cond | is_condition(cond) and cond.getOperand() = guard)
}
/*
* Simplification of equality expressions:
* Simplify conditions in the source to the canonical form l op r + k.
*/
/**
* Holds if `left == right + k` is `areEqual` given that test is `testIsTrue`.
*
* Beware making mistaken logical implications here relating `areEqual` and `testIsTrue`.
*/
private predicate compares_eq(
Expr test, Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue
) {
/* The simple case where the test *is* the comparison so areEqual = testIsTrue xor eq. */
exists(boolean eq | simple_comparison_eq(test, left, right, k, eq) |
areEqual = true and testIsTrue = eq
or
areEqual = false and testIsTrue = eq.booleanNot()
)
or
logical_comparison_eq(test, left, right, k, areEqual, testIsTrue)
or
/* a == b + k => b == a - k */
exists(int mk | k = -mk | compares_eq(test, right, left, mk, areEqual, testIsTrue))
or
complex_eq(test, left, right, k, areEqual, testIsTrue)
or
/* (x is true => (left == right + k)) => (!x is false => (left == right + k)) */
exists(boolean isFalse | testIsTrue = isFalse.booleanNot() |
compares_eq(test.(NotExpr).getOperand(), left, right, k, areEqual, isFalse)
)
}
/**
* If `test => part` and `part => left == right + k` then `test => left == right + k`.
* Similarly for the case where `test` is false.
*/
private predicate logical_comparison_eq(
BinaryLogicalOperation test, Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue
) {
exists(boolean partIsTrue, Expr part | test.impliesValue(part, partIsTrue, testIsTrue) |
compares_eq(part, left, right, k, areEqual, partIsTrue)
)
}
/** Rearrange various simple comparisons into `left == right + k` form. */
private predicate simple_comparison_eq(
ComparisonOperation cmp, Expr left, Expr right, int k, boolean areEqual
) {
left = cmp.getLeftOperand() and
cmp.getOperator() = "==" and
right = cmp.getRightOperand() and
k = 0 and
areEqual = true
or
left = cmp.getLeftOperand() and
cmp.getOperator() = "!=" and
right = cmp.getRightOperand() and
k = 0 and
areEqual = false
}
private predicate complex_eq(
ComparisonOperation cmp, Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue
) {
sub_eq(cmp, left, right, k, areEqual, testIsTrue)
or
add_eq(cmp, left, right, k, areEqual, testIsTrue)
}
// left - x == right + c => left == right + (c+x)
// left == (right - x) + c => left == right + (c-x)
private predicate sub_eq(
ComparisonOperation cmp, Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue
) {
exists(SubExpr lhs, int c, int x |
compares_eq(cmp, lhs, right, c, areEqual, testIsTrue) and
left = lhs.getLeftOperand() and
x = int_value(lhs.getRightOperand()) and
k = c + x
)
or
exists(SubExpr rhs, int c, int x |
compares_eq(cmp, left, rhs, c, areEqual, testIsTrue) and
right = rhs.getLeftOperand() and
x = int_value(rhs.getRightOperand()) and
k = c - x
)
}
// left + x == right + c => left == right + (c-x)
// left == (right + x) + c => left == right + (c+x)
private predicate add_eq(
ComparisonOperation cmp, Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue
) {
exists(AddExpr lhs, int c, int x |
compares_eq(cmp, lhs, right, c, areEqual, testIsTrue) and
(
left = lhs.getLeftOperand() and x = int_value(lhs.getRightOperand())
or
left = lhs.getRightOperand() and x = int_value(lhs.getLeftOperand())
) and
k = c - x
)
or
exists(AddExpr rhs, int c, int x |
compares_eq(cmp, left, rhs, c, areEqual, testIsTrue) and
(
right = rhs.getLeftOperand() and x = int_value(rhs.getRightOperand())
or
right = rhs.getRightOperand() and x = int_value(rhs.getLeftOperand())
) and
k = c + x
)
}
/*
* Simplification of inequality expressions:
* Simplify conditions in the source to the canonical form l < r + k.
*/
/** Holds if `left < right + k` evaluates to `isLt` given that test is `testIsTrue`. */
private predicate compares_lt(
Expr test, Expr left, Expr right, int k, boolean isLt, boolean testIsTrue
) {
/* In the simple case, the test is the comparison, so isLt = testIsTrue */
simple_comparison_lt(test, left, right, k) and isLt = true and testIsTrue = true
or
simple_comparison_lt(test, left, right, k) and isLt = false and testIsTrue = false
or
logical_comparison_lt(test, left, right, k, isLt, testIsTrue)
or
complex_lt(test, left, right, k, isLt, testIsTrue)
or
/* (not (left < right + k)) => (left >= right + k) */
exists(boolean isGe | isLt = isGe.booleanNot() |
compares_ge(test, left, right, k, isGe, testIsTrue)
)
or
/* (x is true => (left < right + k)) => (!x is false => (left < right + k)) */
exists(boolean isFalse | testIsTrue = isFalse.booleanNot() |
compares_lt(test.(NotExpr).getOperand(), left, right, k, isLt, isFalse)
)
}
/** `(a < b + k) => (b > a - k) => (b >= a + (1-k))` */
private predicate compares_ge(
Expr test, Expr left, Expr right, int k, boolean isGe, boolean testIsTrue
) {
exists(int onemk | k = 1 - onemk | compares_lt(test, right, left, onemk, isGe, testIsTrue))
}
/**
* If `test => part` and `part => left < right + k` then `test => left < right + k`.
* Similarly for the case where `test` evaluates false.
*/
private predicate logical_comparison_lt(
BinaryLogicalOperation test, Expr left, Expr right, int k, boolean isLt, boolean testIsTrue
) {
exists(boolean partIsTrue, Expr part | test.impliesValue(part, partIsTrue, testIsTrue) |
compares_lt(part, left, right, k, isLt, partIsTrue)
)
}
/** Rearrange various simple comparisons into `left < right + k` form. */
private predicate simple_comparison_lt(ComparisonOperation cmp, Expr left, Expr right, int k) {
left = cmp.getLeftOperand() and
cmp.getOperator() = "<" and
right = cmp.getRightOperand() and
k = 0
or
left = cmp.getLeftOperand() and
cmp.getOperator() = "<=" and
right = cmp.getRightOperand() and
k = 1
or
right = cmp.getLeftOperand() and
cmp.getOperator() = ">" and
left = cmp.getRightOperand() and
k = 0
or
right = cmp.getLeftOperand() and
cmp.getOperator() = ">=" and
left = cmp.getRightOperand() and
k = 1
}
private predicate complex_lt(
ComparisonOperation cmp, Expr left, Expr right, int k, boolean isLt, boolean testIsTrue
) {
sub_lt(cmp, left, right, k, isLt, testIsTrue)
or
add_lt(cmp, left, right, k, isLt, testIsTrue)
}
// left - x < right + c => left < right + (c+x)
// left < (right - x) + c => left < right + (c-x)
private predicate sub_lt(
ComparisonOperation cmp, Expr left, Expr right, int k, boolean isLt, boolean testIsTrue
) {
exists(SubExpr lhs, int c, int x |
compares_lt(cmp, lhs, right, c, isLt, testIsTrue) and
left = lhs.getLeftOperand() and
x = int_value(lhs.getRightOperand()) and
k = c + x
)
or
exists(SubExpr rhs, int c, int x |
compares_lt(cmp, left, rhs, c, isLt, testIsTrue) and
right = rhs.getLeftOperand() and
x = int_value(rhs.getRightOperand()) and
k = c - x
)
}
// left + x < right + c => left < right + (c-x)
// left < (right + x) + c => left < right + (c+x)
private predicate add_lt(
ComparisonOperation cmp, Expr left, Expr right, int k, boolean isLt, boolean testIsTrue
) {
exists(AddExpr lhs, int c, int x |
compares_lt(cmp, lhs, right, c, isLt, testIsTrue) and
(
left = lhs.getLeftOperand() and x = int_value(lhs.getRightOperand())
or
left = lhs.getRightOperand() and x = int_value(lhs.getLeftOperand())
) and
k = c - x
)
or
exists(AddExpr rhs, int c, int x |
compares_lt(cmp, left, rhs, c, isLt, testIsTrue) and
(
right = rhs.getLeftOperand() and x = int_value(rhs.getRightOperand())
or
right = rhs.getRightOperand() and x = int_value(rhs.getLeftOperand())
) and
k = c + x
)
}
/** The `int` value of integer constant expression. */
private int int_value(Expr e) {
e.getUnderlyingType() instanceof IntegralType and
result = e.getValue().toInt()
}
import IRGuards
/** An `SsaDefinition` with an additional predicate `isLt`. */
class GuardedSsa extends SsaDefinition {

View File

@@ -1,21 +0,0 @@
/**
* DEPRECATED: Use `semmle.code.cpp.ir.dataflow.TaintTracking` as a replacement.
*
* An IR taint tracking library that uses an IR DataFlow configuration to track
* taint from user inputs as defined by `semmle.code.cpp.security.Security`.
*/
import cpp
import semmle.code.cpp.security.Security
private import semmle.code.cpp.ir.dataflow.internal.DefaultTaintTrackingImpl as DefaultTaintTrackingImpl
deprecated predicate predictableOnlyFlow = DefaultTaintTrackingImpl::predictableOnlyFlow/1;
deprecated predicate tainted = DefaultTaintTrackingImpl::tainted/2;
deprecated predicate taintedIncludingGlobalVars =
DefaultTaintTrackingImpl::taintedIncludingGlobalVars/3;
deprecated predicate globalVarFromId = DefaultTaintTrackingImpl::globalVarFromId/1;
deprecated module TaintedWithPath = DefaultTaintTrackingImpl::TaintedWithPath;

View File

@@ -1,668 +0,0 @@
/**
* INTERNAL: Do not use.
*
* An IR taint tracking library that uses an IR DataFlow configuration to track
* taint from user inputs as defined by `semmle.code.cpp.security.Security`.
*/
import cpp
import semmle.code.cpp.security.Security
private import semmle.code.cpp.ir.dataflow.DataFlow
private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.dataflow.ResolveCall
private import semmle.code.cpp.controlflow.IRGuards
private import semmle.code.cpp.models.interfaces.Taint
private import semmle.code.cpp.models.interfaces.DataFlow
private import semmle.code.cpp.ir.dataflow.TaintTracking
private import semmle.code.cpp.ir.dataflow.TaintTracking2
private import semmle.code.cpp.ir.dataflow.TaintTracking3
private import semmle.code.cpp.ir.dataflow.internal.ModelUtil
private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate
/**
* A predictable instruction is one where an external user can predict
* the value. For example, a literal in the source code is considered
* predictable.
*/
private predicate predictableInstruction(Instruction instr) {
instr instanceof ConstantInstruction
or
instr instanceof StringConstantInstruction
or
// This could be a conversion on a string literal
predictableInstruction(instr.(UnaryInstruction).getUnary())
}
/**
* Functions that we should only allow taint to flow through (to the return
* value) if all but the source argument are 'predictable'. This is done to
* emulate the old security library's implementation rather than due to any
* strong belief that this is the right approach.
*
* Note that the list itself is not very principled; it consists of all the
* functions listed in the old security library's [default] `isPureFunction`
* that have more than one argument, but are not in the old taint tracking
* library's `returnArgument` predicate.
*/
predicate predictableOnlyFlow(string name) {
name =
[
"strcasestr", "strchnul", "strchr", "strchrnul", "strcmp", "strcspn", "strncmp", "strndup",
"strnlen", "strrchr", "strspn", "strstr", "strtod", "strtof", "strtol", "strtoll", "strtoq",
"strtoul"
]
}
private DataFlow::Node getNodeForSource(Expr source) {
isUserInput(source, _) and
result = getNodeForExpr(source)
}
private DataFlow::Node getNodeForExpr(Expr node) {
node = DataFlow::ExprFlowCached::asExprInternal(result)
or
// Some of the sources in `isUserInput` are intended to match the value of
// an expression, while others (those modeled below) are intended to match
// the taint that propagates out of an argument, like the `char *` argument
// to `gets`. It's impossible here to tell which is which, but the "access
// to argv" source is definitely not intended to match an output argument,
// and it causes false positives if we let it.
//
// This case goes together with the similar (but not identical) rule in
// `nodeIsBarrierIn`.
result = DataFlow::definitionByReferenceNodeFromArgument(node) and
not argv(node.(VariableAccess).getTarget())
}
private predicate conflatePointerAndPointee(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Flow from `op` to `*op`.
exists(Operand operand, int indirectionIndex |
nodeHasOperand(nodeFrom, operand, indirectionIndex) and
nodeHasOperand(nodeTo, operand, indirectionIndex - 1)
)
or
// Flow from `instr` to `*instr`.
exists(Instruction instr, int indirectionIndex |
nodeHasInstruction(nodeFrom, instr, indirectionIndex) and
nodeHasInstruction(nodeTo, instr, indirectionIndex - 1)
)
}
private module DefaultTaintTrackingConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) }
predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
predicate isBarrier(DataFlow::Node node) { nodeIsBarrier(node) }
predicate isBarrierIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
conflatePointerAndPointee(nodeFrom, nodeTo)
}
}
private module DefaultTaintTrackingFlow = TaintTracking::Global<DefaultTaintTrackingConfig>;
private module ToGlobalVarTaintTrackingConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) }
predicate isSink(DataFlow::Node sink) { sink.asVariable() instanceof GlobalOrNamespaceVariable }
predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable))
or
readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
}
predicate isBarrier(DataFlow::Node node) { nodeIsBarrier(node) }
predicate isBarrierIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
}
private module ToGlobalVarTaintTrackingFlow = TaintTracking::Global<ToGlobalVarTaintTrackingConfig>;
private module FromGlobalVarTaintTrackingConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
// This set of sources should be reasonably small, which is good for
// performance since the set of sinks is very large.
ToGlobalVarTaintTrackingFlow::flowTo(source)
}
predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
// Additional step for flow out of variables. There is no flow _into_
// variables in this configuration, so this step only serves to take flow
// out of a variable that's a source.
readsVariable(n2.asInstruction(), n1.asVariable())
}
predicate isBarrier(DataFlow::Node node) { nodeIsBarrier(node) }
predicate isBarrierIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
}
private module FromGlobalVarTaintTrackingFlow =
TaintTracking::Global<FromGlobalVarTaintTrackingConfig>;
private predicate readsVariable(LoadInstruction load, Variable var) {
load.getSourceAddress().(VariableAddressInstruction).getAstVariable() = var
}
private predicate writesVariable(StoreInstruction store, Variable var) {
store.getDestinationAddress().(VariableAddressInstruction).getAstVariable() = var
}
/**
* A variable that has any kind of upper-bound check anywhere in the program. This is
* biased towards being inclusive because there are a lot of valid ways of doing an
* upper bounds checks if we don't consider where it occurs, for example:
* ```
* if (x < 10) { sink(x); }
*
* if (10 > y) { sink(y); }
*
* if (z > 10) { z = 10; }
* sink(z);
* ```
*/
// TODO: This coarse overapproximation, ported from the old taint tracking
// library, could be replaced with an actual semantic check that a particular
// variable _access_ is guarded by an upper-bound check. We probably don't want
// to do this right away since it could expose a lot of FPs that were
// previously suppressed by this predicate by coincidence.
private predicate hasUpperBoundsCheck(Variable var) {
exists(RelationalOperation oper, VariableAccess access |
oper.getAnOperand() = access and
access.getTarget() = var and
// Comparing to 0 is not an upper bound check
not oper.getAnOperand().getValue() = "0"
)
}
private predicate nodeIsBarrierEqualityCandidate(
DataFlow::Node node, Operand access, Variable checkedVar
) {
exists(Instruction instr | instr = node.asOperand().getDef() |
readsVariable(instr, checkedVar) and
any(IRGuardCondition guard).ensuresEq(access, _, _, instr.getBlock(), true)
)
}
cached
private module Cached {
cached
predicate nodeIsBarrier(DataFlow::Node node) {
exists(Variable checkedVar, Instruction instr | instr = node.asOperand().getDef() |
readsVariable(instr, checkedVar) and
hasUpperBoundsCheck(checkedVar)
)
or
exists(Variable checkedVar, Operand access |
/*
* This node is guarded by a condition that forces the accessed variable
* to equal something else. For example:
* ```
* x = taintsource()
* if (x == 10) {
* taintsink(x); // not considered tainted
* }
* ```
*/
nodeIsBarrierEqualityCandidate(node, access, checkedVar) and
readsVariable(access.getDef(), checkedVar)
)
}
cached
predicate nodeIsBarrierIn(DataFlow::Node node) {
// don't use dataflow into taint sources, as this leads to duplicate results.
exists(Expr source | isUserInput(source, _) |
source = DataFlow::ExprFlowCached::asExprInternal(node)
or
// This case goes together with the similar (but not identical) rule in
// `getNodeForSource`.
node = DataFlow::definitionByReferenceNodeFromArgument(source)
)
or
// don't use dataflow into binary instructions if both operands are unpredictable
exists(BinaryInstruction iTo |
iTo = node.asInstruction() and
not predictableInstruction(iTo.getLeft()) and
not predictableInstruction(iTo.getRight()) and
// propagate taint from either the pointer or the offset, regardless of predictability
not iTo instanceof PointerArithmeticInstruction
)
or
// don't use dataflow through calls to pure functions if two or more operands
// are unpredictable
exists(Instruction iFrom1, Instruction iFrom2, CallInstruction iTo |
iTo = node.asInstruction() and
isPureFunction(iTo.getStaticCallTarget().getName()) and
iFrom1 = iTo.getAnArgument() and
iFrom2 = iTo.getAnArgument() and
not predictableInstruction(iFrom1) and
not predictableInstruction(iFrom2) and
iFrom1 != iFrom2
)
}
cached
Element adjustedSink(DataFlow::Node sink) {
// TODO: is it more appropriate to use asConvertedExpr here and avoid
// `getConversion*`? Or will that cause us to miss some cases where there's
// flow to a conversion (like a `ReferenceDereferenceExpr`) and we want to
// pretend there was flow to the converted `Expr` for the sake of
// compatibility.
sink.asExpr().getConversion*() = result
or
// For compatibility, send flow from arguments to parameters, even for
// functions with no body.
exists(FunctionCall call, int i |
sink.asExpr() = call.getArgument(pragma[only_bind_into](i)) and
result = resolveCall(call).getParameter(pragma[only_bind_into](i))
)
or
// For compatibility, send flow into a `Variable` if there is flow to any
// Load or Store of that variable.
exists(CopyInstruction copy |
copy.getSourceValue() = sink.asInstruction() and
(
readsVariable(copy, result) or
writesVariable(copy, result)
) and
not hasUpperBoundsCheck(result)
)
or
// For compatibility, send flow into a `NotExpr` even if it's part of a
// short-circuiting condition and thus might get skipped.
result.(NotExpr).getOperand() = sink.asExpr()
or
// Taint postfix and prefix crement operations when their operand is tainted.
result.(CrementOperation).getAnOperand() = sink.asExpr()
or
// Taint `e1 += e2`, `e &= e2` and friends when `e1` or `e2` is tainted.
result.(AssignOperation).getAnOperand() = sink.asExpr()
or
result =
sink.asOperand()
.(SideEffectOperand)
.getUse()
.(ReadSideEffectInstruction)
.getArgumentDef()
.getUnconvertedResultExpression()
}
/**
* Step to return value of a modeled function when an input taints the
* dereference of the return value.
*/
cached
predicate additionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
exists(CallInstruction call, Function func, FunctionInput modelIn, FunctionOutput modelOut |
n1 = callInput(call, modelIn) and
(
func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
or
func.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
) and
call.getStaticCallTarget() = func and
modelOut.isReturnValueDeref() and
call = n2.asInstruction()
)
}
}
private import Cached
/**
* Holds if `tainted` may contain taint from `source`.
*
* A tainted expression is either directly user input, or is
* computed from user input in a way that users can probably
* control the exact output of the computation.
*
* This doesn't include data flow through global variables.
* If you need that you must call `taintedIncludingGlobalVars`.
*/
cached
predicate tainted(Expr source, Element tainted) {
exists(DataFlow::Node sink |
DefaultTaintTrackingFlow::flow(getNodeForSource(source), sink) and
tainted = adjustedSink(sink)
)
}
/**
* Holds if `tainted` may contain taint from `source`, where the taint passed
* through a global variable named `globalVar`.
*
* A tainted expression is either directly user input, or is
* computed from user input in a way that users can probably
* control the exact output of the computation.
*
* This version gives the same results as tainted but also includes
* data flow through global variables.
*
* The parameter `globalVar` is the qualified name of the last global variable
* used to move the value from source to tainted. If the taint did not pass
* through a global variable, then `globalVar = ""`.
*/
cached
predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) {
tainted(source, tainted) and
globalVar = ""
or
exists(
DataFlow::VariableNode variableNode, GlobalOrNamespaceVariable global, DataFlow::Node sink
|
global = variableNode.getVariable() and
ToGlobalVarTaintTrackingFlow::flow(getNodeForSource(source), variableNode) and
FromGlobalVarTaintTrackingFlow::flow(variableNode, sink) and
tainted = adjustedSink(sink) and
global = globalVarFromId(globalVar)
)
}
/**
* Gets the global variable whose qualified name is `id`. Use this predicate
* together with `taintedIncludingGlobalVars`. Example:
*
* ```
* exists(string varName |
* taintedIncludingGlobalVars(source, tainted, varName) and
* var = globalVarFromId(varName)
* )
* ```
*/
GlobalOrNamespaceVariable globalVarFromId(string id) { id = result.getQualifiedName() }
/**
* Provides definitions for augmenting source/sink pairs with data-flow paths
* between them. From a `@kind path-problem` query, import this module in the
* global scope, extend `TaintTrackingConfiguration`, and use `taintedWithPath`
* in place of `tainted`.
*
* Importing this module will also import the query predicates that contain the
* taint paths.
*/
module TaintedWithPath {
private newtype TSingleton = MkSingleton()
/**
* A taint-tracking configuration that matches sources and sinks in the same
* way as the `tainted` predicate.
*
* Override `isSink` and `taintThroughGlobals` as needed, but do not provide
* a characteristic predicate.
*/
class TaintTrackingConfiguration extends TSingleton {
/** Override this to specify which elements are sources in this configuration. */
predicate isSource(Expr source) { exists(getNodeForSource(source)) }
/** Override this to specify which elements are sinks in this configuration. */
abstract predicate isSink(Element e);
/** Override this to specify which expressions are barriers in this configuration. */
predicate isBarrier(Expr e) { nodeIsBarrier(getNodeForExpr(e)) }
/**
* Override this predicate to `any()` to allow taint to flow through global
* variables.
*/
predicate taintThroughGlobals() { none() }
/** Gets a textual representation of this element. */
string toString() { result = "TaintTrackingConfiguration" }
}
private module AdjustedConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
exists(TaintTrackingConfiguration cfg, Expr e |
cfg.isSource(e) and source = getNodeForExpr(e)
)
}
predicate isSink(DataFlow::Node sink) {
exists(TaintTrackingConfiguration cfg | cfg.isSink(adjustedSink(sink)))
}
predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
conflatePointerAndPointee(n1, n2)
or
// Steps into and out of global variables
exists(TaintTrackingConfiguration cfg | cfg.taintThroughGlobals() |
writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable))
or
readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
)
or
additionalTaintStep(n1, n2)
}
predicate isBarrier(DataFlow::Node node) {
exists(TaintTrackingConfiguration cfg, Expr e | cfg.isBarrier(e) and node = getNodeForExpr(e))
}
predicate isBarrierIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
predicate neverSkip(Node node) { none() }
}
private module AdjustedFlow = TaintTracking::Global<AdjustedConfig>;
/*
* A sink `Element` may map to multiple `DataFlowX::PathNode`s via (the
* inverse of) `adjustedSink`. For example, an `Expr` maps to all its
* conversions, and a `Variable` maps to all loads and stores from it. Because
* the path node is part of the tuple that constitutes the alert, this leads
* to duplicate alerts.
*
* To avoid showing duplicates, we edit the graph to replace the final node
* coming from the data-flow library with a node that matches exactly the
* `Element` sink that's requested.
*
* The same is done for sources.
*/
private newtype TPathNode =
TWrapPathNode(AdjustedFlow::PathNode n) or
// There's a single newtype constructor for both sources and sinks since
// that makes it easiest to deal with the case where source = sink.
TEndpointPathNode(Element e) {
exists(DataFlow::Node sourceNode, DataFlow::Node sinkNode |
AdjustedFlow::flow(sourceNode, sinkNode)
|
sourceNode = getNodeForExpr(e) and
exists(TaintTrackingConfiguration ttCfg | ttCfg.isSource(e))
or
e = adjustedSink(sinkNode) and
exists(TaintTrackingConfiguration ttCfg | ttCfg.isSink(e))
)
}
/** An opaque type used for the nodes of a data-flow path. */
class PathNode extends TPathNode {
/** Gets a textual representation of this element. */
string toString() { none() }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
none()
}
}
/**
* INTERNAL: Do not use.
*/
module Private {
/** Gets a predecessor `PathNode` of `pathNode`, if any. */
PathNode getAPredecessor(PathNode pathNode) { edges(result, pathNode) }
/** Gets the element that `pathNode` wraps, if any. */
Element getElementFromPathNode(PathNode pathNode) {
exists(DataFlow::Node node | node = pathNode.(WrapPathNode).inner().getNode() |
result = node.asInstruction().getAst()
or
result = node.asOperand().getDef().getAst()
)
or
result = pathNode.(EndpointPathNode).inner()
}
}
private class WrapPathNode extends PathNode, TWrapPathNode {
AdjustedFlow::PathNode inner() { this = TWrapPathNode(result) }
override string toString() { result = this.inner().toString() }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.inner().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
private class EndpointPathNode extends PathNode, TEndpointPathNode {
Expr inner() { this = TEndpointPathNode(result) }
override string toString() { result = this.inner().toString() }
override predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
this.inner()
.getLocation()
.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
/** A PathNode whose `Element` is a source. It may also be a sink. */
private class InitialPathNode extends EndpointPathNode {
InitialPathNode() { exists(TaintTrackingConfiguration cfg | cfg.isSource(this.inner())) }
}
/** A PathNode whose `Element` is a sink. It may also be a source. */
private class FinalPathNode extends EndpointPathNode {
FinalPathNode() { exists(TaintTrackingConfiguration cfg | cfg.isSink(this.inner())) }
}
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
query predicate edges(PathNode a, PathNode b) {
AdjustedFlow::PathGraph::edges(a.(WrapPathNode).inner(), b.(WrapPathNode).inner())
or
// To avoid showing trivial-looking steps, we _replace_ the last node instead
// of adding an edge out of it.
exists(WrapPathNode sinkNode |
AdjustedFlow::PathGraph::edges(a.(WrapPathNode).inner(), sinkNode.inner()) and
b.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
)
or
// Same for the first node
exists(WrapPathNode sourceNode |
AdjustedFlow::PathGraph::edges(sourceNode.inner(), b.(WrapPathNode).inner()) and
sourceNode.inner().getNode() = getNodeForExpr(a.(InitialPathNode).inner())
)
or
// Finally, handle the case where the path goes directly from a source to a
// sink, meaning that they both need to be translated.
exists(WrapPathNode sinkNode, WrapPathNode sourceNode |
AdjustedFlow::PathGraph::edges(sourceNode.inner(), sinkNode.inner()) and
sourceNode.inner().getNode() = getNodeForExpr(a.(InitialPathNode).inner()) and
b.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
)
}
/**
* Holds if there is flow from `arg` to `out` across a call that can by summarized by the flow
* from `par` to `ret` within it, in the graph of data flow path explanations.
*/
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
AdjustedFlow::PathGraph::subpaths(arg.(WrapPathNode).inner(), par.(WrapPathNode).inner(),
ret.(WrapPathNode).inner(), out.(WrapPathNode).inner())
or
// To avoid showing trivial-looking steps, we _replace_ the last node instead
// of adding an edge out of it.
exists(WrapPathNode sinkNode |
AdjustedFlow::PathGraph::subpaths(arg.(WrapPathNode).inner(), par.(WrapPathNode).inner(),
ret.(WrapPathNode).inner(), sinkNode.inner()) and
out.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
)
or
// Same for the first node
exists(WrapPathNode sourceNode |
AdjustedFlow::PathGraph::subpaths(sourceNode.inner(), par.(WrapPathNode).inner(),
ret.(WrapPathNode).inner(), out.(WrapPathNode).inner()) and
sourceNode.inner().getNode() = getNodeForExpr(arg.(InitialPathNode).inner())
)
or
// Finally, handle the case where the path goes directly from a source to a
// sink, meaning that they both need to be translated.
exists(WrapPathNode sinkNode, WrapPathNode sourceNode |
AdjustedFlow::PathGraph::subpaths(sourceNode.inner(), par.(WrapPathNode).inner(),
ret.(WrapPathNode).inner(), sinkNode.inner()) and
sourceNode.inner().getNode() = getNodeForExpr(arg.(InitialPathNode).inner()) and
out.(FinalPathNode).inner() = adjustedSink(sinkNode.inner().getNode())
)
}
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
key = "semmle.label" and val = n.toString()
}
/**
* Holds if `tainted` may contain taint from `source`, where `sourceNode` and
* `sinkNode` are the corresponding `PathNode`s that can be used in a query
* to provide path explanations. Extend `TaintTrackingConfiguration` to use
* this predicate.
*
* A tainted expression is either directly user input, or is computed from
* user input in a way that users can probably control the exact output of
* the computation.
*/
predicate taintedWithPath(Expr source, Element tainted, PathNode sourceNode, PathNode sinkNode) {
exists(DataFlow::Node flowSource, DataFlow::Node flowSink |
source = sourceNode.(InitialPathNode).inner() and
flowSource = getNodeForExpr(source) and
AdjustedFlow::flow(flowSource, flowSink) and
tainted = adjustedSink(flowSink) and
tainted = sinkNode.(FinalPathNode).inner()
)
}
private predicate isGlobalVariablePathNode(WrapPathNode n) {
n.inner().getNode().asVariable() instanceof GlobalOrNamespaceVariable
or
n.inner().getNode().asIndirectVariable() instanceof GlobalOrNamespaceVariable
}
private predicate edgesWithoutGlobals(PathNode a, PathNode b) {
edges(a, b) and
not isGlobalVariablePathNode(a) and
not isGlobalVariablePathNode(b)
}
/**
* Holds if `tainted` can be reached from a taint source without passing
* through a global variable.
*/
predicate taintedWithoutGlobals(Element tainted) {
exists(PathNode sourceNode, FinalPathNode sinkNode |
AdjustedConfig::isSource(sourceNode.(WrapPathNode).inner().getNode()) and
edgesWithoutGlobals+(sourceNode, sinkNode) and
tainted = sinkNode.inner()
)
}
}

View File

@@ -1,10 +0,0 @@
/**
* Support for tracking tainted data through the program. This is an alias for
* `semmle.code.cpp.ir.dataflow.DefaultTaintTracking` provided for backwards
* compatibility.
*
* Prefer to use `semmle.code.cpp.dataflow.TaintTracking` or
* `semmle.code.cpp.ir.dataflow.TaintTracking` when designing new queries.
*/
import semmle.code.cpp.ir.dataflow.DefaultTaintTracking

View File

@@ -1,654 +0,0 @@
/**
* DEPRECATED: we now use `semmle.code.cpp.ir.dataflow.DefaultTaintTracking`,
* which is based on the IR but designed to behave similarly to this old
* library.
*
* Provides the implementation of `semmle.code.cpp.security.TaintTracking`. Do
* not import this file directly.
*/
import cpp
import Security
/** Expressions that change the value of a variable */
private predicate valueSource(Expr expr) {
exists(AssignExpr ae | expr = ae.getLValue())
or
exists(FunctionCall fc, int i |
userInputArgument(fc, i) and
expr = fc.getArgument(i)
)
or
exists(FunctionCall c, int arg |
copyValueBetweenArguments(c.getTarget(), _, arg) and
expr = c.getArgument(arg)
)
or
exists(FunctionCall c, int arg |
c.getTarget().getParameter(arg).getType() instanceof ReferenceType and
expr = c.getArgument(arg)
)
}
/** Expressions that are inside an expression that changes the value of a variable */
private predicate insideValueSource(Expr expr) {
valueSource(expr)
or
insideValueSource(expr.getParent()) and
// A modification of array[offset] does not modify offset
not expr.getParent().(ArrayExpr).getArrayOffset() = expr
}
private predicate isPointer(Type type) {
type instanceof PointerType or
isPointer(type.(ReferenceType).getBaseType())
}
/**
* Tracks data flow from src to dest.
* If this is used in the left side of an assignment src and dest should be swapped
*/
private predicate moveToDependingOnSide(Expr src, Expr dest) {
exists(ParenthesisExpr e |
src = e.getAChild() and
dest = e
)
or
exists(ArrayExpr e |
src = e.getArrayBase() and
dest = e
)
or
exists(PointerDereferenceExpr e |
src = e.getOperand() and
dest = e
)
or
exists(AddressOfExpr e |
src = e.getOperand() and
dest = e
)
or
// if var+offset is tainted, then so is var
exists(VariableAccess base, BinaryOperation binop |
dest = binop and
(base = binop.getLeftOperand() or base = binop.getRightOperand()) and
isPointer(base.getType()) and
base.getTarget() instanceof LocalScopeVariable and
src = base and
// flow through pointer-pointer subtraction is dubious, the result should be
// a number bounded by the size of the pointed-to thing.
not binop instanceof PointerDiffExpr
)
or
exists(UnaryOperation unop |
dest = unop and
unop.getAnOperand() = src
)
or
exists(BinaryOperation binop |
dest = binop and
binop.getLeftOperand() = src and
predictable(binop.getRightOperand())
)
or
exists(BinaryOperation binop |
dest = binop and
binop.getRightOperand() = src and
predictable(binop.getLeftOperand())
)
or
exists(Cast cast |
dest = cast and
src = cast.getExpr()
)
or
exists(ConditionalExpr cond |
cond = dest and
(
cond.getThen() = src or
cond.getElse() = src
)
)
}
/**
* Track value flow between functions.
* Handles the following cases:
* - If an argument to a function is tainted, all the usages of the parameter inside the function are tainted
* - If a function obtains input from the user internally and returns it, all calls to the function are tainted
* - If an argument to a function is tainted and that parameter is returned, all calls to the function are not tainted
* (this is done to avoid false positives). Because of this we need to track if the tainted element came from an argument
* or not, and for that we use destFromArg
*/
deprecated private predicate betweenFunctionsValueMoveTo(
Element src, Element dest, boolean destFromArg
) {
not unreachable(src) and
not unreachable(dest) and
(
exists(Call call, int i |
src = call.getArgument(i) and
resolveCallWithParam(call, _, i, dest) and
destFromArg = true
)
or
// Only move the return of the function to the function itself if the value didn't came from an
// argument, or else we would taint all the calls to one function if one argument is tainted
// somewhere
exists(Function f, ReturnStmt ret |
ret.getEnclosingFunction() = f and
src = ret.getExpr() and
destFromArg = false and
dest = f
)
or
exists(Call call, Function f |
f = resolveCall(call) and
src = f and
dest = call and
destFromArg = false
)
or
// If a parameter of type reference is tainted inside a function, taint the argument too
exists(Call call, int pi, Parameter p |
resolveCallWithParam(call, _, pi, p) and
p.getType() instanceof ReferenceType and
src = p and
dest = call.getArgument(pi) and
destFromArg = false
)
)
}
// predicate folding for proper join-order
// bad magic: pushes down predicate that ruins join-order
pragma[nomagic]
deprecated private predicate resolveCallWithParam(Call call, Function called, int i, Parameter p) {
called = resolveCall(call) and
p = called.getParameter(i)
}
/** A variable for which flow through is allowed. */
deprecated library class FlowVariable extends Variable {
FlowVariable() {
(
this instanceof LocalScopeVariable or
this instanceof GlobalOrNamespaceVariable
) and
not argv(this)
}
}
/** A local scope variable for which flow through is allowed. */
deprecated library class FlowLocalScopeVariable extends Variable {
FlowLocalScopeVariable() { this instanceof LocalScopeVariable }
}
deprecated private predicate insideFunctionValueMoveTo(Element src, Element dest) {
not unreachable(src) and
not unreachable(dest) and
(
// Taint all variable usages when one is tainted
// This function taints global variables but doesn't taint from a global variable (see globalVariableValueMoveTo)
exists(FlowLocalScopeVariable v |
src = v and
dest = v.getAnAccess() and
not insideValueSource(dest)
)
or
exists(FlowVariable v |
src = v.getAnAccess() and
dest = v and
insideValueSource(src)
)
or
// Taint all union usages when one is tainted
// This function taints global variables but doesn't taint from a global variable (see globalVariableValueMoveTo)
exists(FlowLocalScopeVariable v, FieldAccess a |
unionAccess(v, _, a) and
src = v and
dest = a and
not insideValueSource(dest)
)
or
exists(FlowVariable v, FieldAccess a |
unionAccess(v, _, a) and
src = a and
dest = v and
insideValueSource(src)
)
or
// If a pointer is tainted, taint the original variable
exists(FlowVariable p, FlowVariable v, AddressOfExpr e |
p.getAnAssignedValue() = e and
e.getOperand() = v.getAnAccess() and
src = p and
dest = v
)
or
// If a reference is tainted, taint the original variable
exists(FlowVariable r, FlowVariable v |
r.getType() instanceof ReferenceType and
r.getInitializer().getExpr() = v.getAnAccess() and
src = r and
dest = v
)
or
exists(Variable var |
var = dest and
var.getInitializer().getExpr() = src
)
or
exists(AssignExpr ae |
src = ae.getRValue() and
dest = ae.getLValue()
)
or
exists(CommaExpr comma |
comma = dest and
comma.getRightOperand() = src
)
or
exists(FunctionCall c, int sourceArg, int destArg |
copyValueBetweenArguments(c.getTarget(), sourceArg, destArg) and
// Only consider copies from `printf`-like functions if the format is a string
(
exists(FormattingFunctionCall ffc, FormatLiteral format |
ffc = c and
format = ffc.getFormat() and
format.getConversionChar(sourceArg - ffc.getTarget().getNumberOfParameters()) = ["s", "S"]
)
or
not c.(FormattingFunctionCall).getFormat() instanceof FormatLiteral
or
not c instanceof FormattingFunctionCall
) and
src = c.getArgument(sourceArg) and
dest = c.getArgument(destArg)
)
or
exists(FunctionCall c, int sourceArg |
returnArgument(c.getTarget(), sourceArg) and
src = c.getArgument(sourceArg) and
dest = c
)
or
exists(FormattingFunctionCall formattingSend, int arg, FormatLiteral format |
dest = formattingSend and
formattingSend.getArgument(arg) = src and
format = formattingSend.getFormat() and
format.getConversionChar(arg - formattingSend.getTarget().getNumberOfParameters()) =
["s", "S", "@"]
)
or
// Expressions computed from tainted data are also tainted
exists(FunctionCall call | dest = call and isPureFunction(call.getTarget().getName()) |
call.getAnArgument() = src and
forall(Expr arg | arg = call.getAnArgument() | arg = src or predictable(arg)) and
// flow through `strlen` tends to cause dubious results, if the length is
// bounded.
not call.getTarget().getName() = "strlen"
)
or
exists(Element a, Element b |
moveToDependingOnSide(a, b) and
if insideValueSource(a) then (src = b and dest = a) else (src = a and dest = b)
)
)
}
/**
* Handles data flow from global variables to its usages.
* The tainting for the global variable itself is done at insideFunctionValueMoveTo.
*/
private predicate globalVariableValueMoveTo(GlobalOrNamespaceVariable src, Expr dest) {
not unreachable(dest) and
(
exists(GlobalOrNamespaceVariable v |
src = v and
dest = v.getAnAccess() and
not insideValueSource(dest)
)
or
exists(GlobalOrNamespaceVariable v, FieldAccess a |
unionAccess(v, _, a) and
src = v and
dest = a and
not insideValueSource(dest)
)
)
}
private predicate unionAccess(Variable v, Field f, FieldAccess a) {
f.getDeclaringType() instanceof Union and
a.getTarget() = f and
a.getQualifier() = v.getAnAccess()
}
deprecated GlobalOrNamespaceVariable globalVarFromId(string id) {
if result instanceof NamespaceVariable
then id = result.getNamespace() + "::" + result.getName()
else id = result.getName()
}
/**
* A variable that has any kind of upper-bound check anywhere in the program. This is
* biased towards being inclusive because there are a lot of valid ways of doing an
* upper bounds checks if we don't consider where it occurs, for example:
* ```
* if (x < 10) { sink(x); }
*
* if (10 > y) { sink(y); }
*
* if (z > 10) { z = 10; }
* sink(z);
* ```
*/
private predicate hasUpperBoundsCheck(Variable var) {
exists(RelationalOperation oper, VariableAccess access |
oper.getAnOperand() = access and
access.getTarget() = var and
// Comparing to 0 is not an upper bound check
not oper.getAnOperand().getValue() = "0"
)
}
cached
deprecated private predicate taintedWithArgsAndGlobalVars(
Element src, Element dest, boolean destFromArg, string globalVar
) {
isUserInput(src, _) and
not unreachable(src) and
dest = src and
destFromArg = false and
globalVar = ""
or
exists(Element other, boolean otherFromArg, string otherGlobalVar |
taintedWithArgsAndGlobalVars(src, other, otherFromArg, otherGlobalVar)
|
not unreachable(dest) and
not hasUpperBoundsCheck(dest) and
(
// Direct flow from one expression to another.
betweenFunctionsValueMoveTo(other, dest, destFromArg) and
(destFromArg = true or otherFromArg = false) and
globalVar = otherGlobalVar
or
insideFunctionValueMoveTo(other, dest) and
destFromArg = otherFromArg and
globalVar = otherGlobalVar
or
exists(GlobalOrNamespaceVariable v |
v = other and
globalVariableValueMoveTo(v, dest) and
destFromArg = false and
v = globalVarFromId(globalVar)
)
)
)
}
/**
* A tainted expression is either directly user input, or is
* computed from user input in a way that users can probably
* control the exact output of the computation.
*
* This doesn't include data flow through global variables.
* If you need that you must call taintedIncludingGlobalVars.
*/
deprecated predicate tainted(Expr source, Element tainted) {
taintedWithArgsAndGlobalVars(source, tainted, _, "")
}
/**
* A tainted expression is either directly user input, or is
* computed from user input in a way that users can probably
* control the exact output of the computation.
*
* This version gives the same results as tainted but also includes
* data flow through global variables.
*
* The parameter `globalVar` is the name of the last global variable used to move the
* value from source to tainted.
*/
deprecated predicate taintedIncludingGlobalVars(Expr source, Element tainted, string globalVar) {
taintedWithArgsAndGlobalVars(source, tainted, _, globalVar)
}
/**
* A predictable expression is one where an external user can predict
* the value. For example, a literal in the source code is considered
* predictable.
*/
private predicate predictable(Expr expr) {
expr instanceof Literal
or
exists(BinaryOperation binop | binop = expr |
predictable(binop.getLeftOperand()) and predictable(binop.getRightOperand())
)
or
exists(UnaryOperation unop | unop = expr | predictable(unop.getOperand()))
}
private int maxArgIndex(Function f) {
result =
max(FunctionCall fc, int toMax |
fc.getTarget() = f and toMax = fc.getNumberOfArguments() - 1
|
toMax
)
}
/** Functions that copy the value of one argument to another */
private predicate copyValueBetweenArguments(Function f, int sourceArg, int destArg) {
f.hasGlobalOrStdName("memcpy") and sourceArg = 1 and destArg = 0
or
f.hasGlobalName("__builtin___memcpy_chk") and sourceArg = 1 and destArg = 0
or
f.hasGlobalOrStdName("memmove") and sourceArg = 1 and destArg = 0
or
f.hasGlobalOrStdName("strcat") and sourceArg = 1 and destArg = 0
or
f.hasGlobalName("_mbscat") and sourceArg = 1 and destArg = 0
or
f.hasGlobalOrStdName("wcscat") and sourceArg = 1 and destArg = 0
or
f.hasGlobalOrStdName("strncat") and sourceArg = 1 and destArg = 0
or
f.hasGlobalName("_mbsncat") and sourceArg = 1 and destArg = 0
or
f.hasGlobalName("wcsncat") and sourceArg = 1 and destArg = 0
or
f.hasGlobalOrStdName("strcpy") and sourceArg = 1 and destArg = 0
or
f.hasGlobalName("_mbscpy") and sourceArg = 1 and destArg = 0
or
f.hasGlobalOrStdName("wcscpy") and sourceArg = 1 and destArg = 0
or
f.hasGlobalOrStdName("strncpy") and sourceArg = 1 and destArg = 0
or
f.hasGlobalName("_mbsncpy") and sourceArg = 1 and destArg = 0
or
f.hasGlobalOrStdName("wcsncpy") and sourceArg = 1 and destArg = 0
or
f.hasGlobalName("inet_aton") and sourceArg = 0 and destArg = 1
or
f.hasGlobalName("inet_pton") and sourceArg = 1 and destArg = 2
or
f.hasGlobalOrStdName("strftime") and sourceArg in [2 .. maxArgIndex(f)] and destArg = 0
or
exists(FormattingFunction ff | ff = f |
sourceArg in [ff.getFormatParameterIndex() .. maxArgIndex(f)] and
destArg = ff.getOutputParameterIndex(false)
)
}
/** Functions where if one of the arguments is tainted, the result should be tainted */
private predicate returnArgument(Function f, int sourceArg) {
f.hasGlobalName("memcpy") and sourceArg = 0
or
f.hasGlobalName("__builtin___memcpy_chk") and sourceArg = 0
or
f.hasGlobalOrStdName("memmove") and sourceArg = 0
or
f.hasGlobalOrStdName("strcat") and sourceArg = 0
or
f.hasGlobalName("_mbscat") and sourceArg = 0
or
f.hasGlobalOrStdName("wcsncat") and sourceArg = 0
or
f.hasGlobalOrStdName("strncat") and sourceArg = 0
or
f.hasGlobalName("_mbsncat") and sourceArg = 0
or
f.hasGlobalOrStdName("wcsncat") and sourceArg = 0
or
f.hasGlobalOrStdName("strcpy") and sourceArg = 0
or
f.hasGlobalName("_mbscpy") and sourceArg = 0
or
f.hasGlobalOrStdName("wcscpy") and sourceArg = 0
or
f.hasGlobalOrStdName("strncpy") and sourceArg = 0
or
f.hasGlobalName("_mbsncpy") and sourceArg = 0
or
f.hasGlobalOrStdName("wcsncpy") and sourceArg = 0
or
f.hasGlobalName("inet_ntoa") and sourceArg = 0
or
f.hasGlobalName("inet_addr") and sourceArg = 0
or
f.hasGlobalName("inet_network") and sourceArg = 0
or
f.hasGlobalName("inet_ntoa") and sourceArg = 0
or
f.hasGlobalName("inet_makeaddr") and
(sourceArg = 0 or sourceArg = 1)
or
f.hasGlobalName("inet_lnaof") and sourceArg = 0
or
f.hasGlobalName("inet_netof") and sourceArg = 0
or
f.hasGlobalName("gethostbyname") and sourceArg = 0
or
f.hasGlobalName("gethostbyaddr") and sourceArg = 0
}
/**
* Resolve potential target function(s) for `call`.
*
* If `call` is a call through a function pointer (`ExprCall`) or
* targets a virtual method, simple data flow analysis is performed
* in order to identify target(s).
*/
deprecated Function resolveCall(Call call) {
result = call.getTarget()
or
result = call.(DataSensitiveCallExpr).resolve()
}
/** A data sensitive call expression. */
abstract deprecated library class DataSensitiveCallExpr extends Expr {
DataSensitiveCallExpr() { not unreachable(this) }
abstract Expr getSrc();
cached
abstract Function resolve();
/**
* Whether `src` can flow to this call expression.
*
* Searches backwards from `getSrc()` to `src`.
*/
predicate flowsFrom(Element src, boolean allowFromArg) {
src = this.getSrc() and allowFromArg = true
or
exists(Element other, boolean allowOtherFromArg | this.flowsFrom(other, allowOtherFromArg) |
exists(boolean otherFromArg | betweenFunctionsValueMoveToStatic(src, other, otherFromArg) |
otherFromArg = true and allowOtherFromArg = true and allowFromArg = true
or
otherFromArg = false and allowFromArg = false
)
or
insideFunctionValueMoveTo(src, other) and allowFromArg = allowOtherFromArg
or
globalVariableValueMoveTo(src, other) and allowFromArg = true
)
}
}
/** Call through a function pointer. */
deprecated library class DataSensitiveExprCall extends DataSensitiveCallExpr, ExprCall {
override Expr getSrc() { result = this.getExpr() }
override Function resolve() {
exists(FunctionAccess fa | this.flowsFrom(fa, true) | result = fa.getTarget())
}
}
/** Call to a virtual function. */
deprecated library class DataSensitiveOverriddenFunctionCall extends DataSensitiveCallExpr,
FunctionCall
{
DataSensitiveOverriddenFunctionCall() {
exists(this.getTarget().(VirtualFunction).getAnOverridingFunction())
}
override Expr getSrc() { result = this.getQualifier() }
override MemberFunction resolve() {
exists(NewExpr new |
this.flowsFrom(new, true) and
memberFunctionFromNewExpr(new, result) and
result.overrides*(this.getTarget().(VirtualFunction))
)
}
}
private predicate memberFunctionFromNewExpr(NewExpr new, MemberFunction f) {
f = new.getAllocatedType().(Class).getAMemberFunction()
}
/** Same as `betweenFunctionsValueMoveTo`, but calls are resolved to their static target. */
private predicate betweenFunctionsValueMoveToStatic(Element src, Element dest, boolean destFromArg) {
not unreachable(src) and
not unreachable(dest) and
(
exists(FunctionCall call, Function called, int i |
src = call.getArgument(i) and
called = call.getTarget() and
dest = called.getParameter(i) and
destFromArg = true
)
or
// Only move the return of the function to the function itself if the value didn't came from an
// argument, or else we would taint all the calls to one function if one argument is tainted
// somewhere
exists(Function f, ReturnStmt ret |
ret.getEnclosingFunction() = f and
src = ret.getExpr() and
destFromArg = false and
dest = f
)
or
exists(FunctionCall call, Function f |
call.getTarget() = f and
src = f and
dest = call and
destFromArg = false
)
or
// If a parameter of type reference is tainted inside a function, taint the argument too
exists(FunctionCall call, Function f, int pi, Parameter p |
call.getTarget() = f and
f.getParameter(pi) = p and
p.getType() instanceof ReferenceType and
src = p and
dest = call.getArgument(pi) and
destFromArg = false
)
)
}