Files
codeql/cpp/ql/lib/semmle/code/cpp/controlflow/DefinitionsAndUses.qll
2023-10-13 12:46:56 +02:00

384 lines
13 KiB
Plaintext

/**
* Provides classes and predicates for reasoning about definitions and uses of variables.
*/
import cpp
private import semmle.code.cpp.controlflow.StackVariableReachability
private import semmle.code.cpp.dataflow.EscapesTree
/**
* Computed relation: A "definition-use-pair" for a particular variable.
* Intuitively, this means that `def` is an assignment to `var`, and
* `use` is a read of `var` at which the value assigned by `def` may
* be read. (There can be more than one definition reaching a single
* use, and a single definition can reach many uses.)
*/
predicate definitionUsePair(SemanticStackVariable var, Expr def, Expr use) {
exists(Use u |
u = use and
def.(Def).reaches(true, var, u) and
u.getVariable(false) = var
)
}
/**
* Holds if the definition `def` of some stack variable can reach `node`, which
* is a definition or use, without crossing definitions of the same variable.
*/
predicate definitionReaches(Expr def, Expr node) { def.(Def).reaches(true, _, node) }
private predicate hasAddressOfAccess(SemanticStackVariable var) {
var.getAnAccess().isAddressOfAccessNonConst()
}
/**
* A use/use pair is a pair of uses of a particular variable `var`
* where the same value might be read (meaning that there is a
* control-flow path from `first` to `second` without crossing
* a definition of `var`).
*/
predicate useUsePair(SemanticStackVariable var, Expr first, Expr second) {
(
/*
* If the address of `var` is used anywhere, we require that
* a definition of `var` can reach the first use. This is to
* rule out examples such as this:
* ```
* int x = 0;
* int& y = x;
* use(x);
* y = 1;
* use(x); // not a use-use pair with the use above
* ```
*/
hasAddressOfAccess(var) implies definitionUsePair(var, _, first)
) and
// If `first` is both a def and a use, like `x` in `f(x)` when `f` takes a
// reference parameter, it'll play the role of a use first and a def second.
// We are not interested in uses that follow its role as a def.
not definition(var, first) and
exists(Use u |
u = second and
first.(Use).reaches(false, var, u) and
u.getVariable(false) = var
)
}
/**
* Holds if `va` is a use of the parameter `p` that could
* observe the passed-in value.
*/
predicate parameterUsePair(Parameter p, VariableAccess va) {
not parameterIsOverwritten(_, p) and va.getTarget() = p
or
exists(ParameterDef pd | pd.reaches(true, p, va.(Use)))
}
/**
* Utility class: A definition or use of a stack variable.
*/
class DefOrUse extends ControlFlowNodeBase {
DefOrUse() {
// Uninstantiated templates are purely syntax, and only on instantiation
// will they be complete with information about types, conversions, call
// targets, etc.
not this.(ControlFlowNode).isFromUninstantiatedTemplate(_)
}
/**
* Gets the target variable of this definition or use.
*
* The `isDef` parameter is needed in order to ensure disjointness
* of definitions and uses; in a variable initialization such as
* `int x = y`, `y` is both a definition of `x`, as well as a use of
* `y`, and `isDef` is used to distinguish the two situations.
*/
abstract SemanticStackVariable getVariable(boolean isDef);
pragma[noinline]
private predicate reaches_helper(boolean isDef, SemanticStackVariable v, BasicBlock bb, int i) {
this.getVariable(isDef) = v and
bb.getNode(i) = this
}
/**
* Holds if the value of `v` in this control-flow node reaches
* `defOrUse` along some control-flow path without crossing a
* definition of `v`.
*/
cached
predicate reaches(boolean isDef, SemanticStackVariable v, DefOrUse defOrUse) {
/*
* Implementation detail: this predicate and its private auxiliary
* predicates are instances of the more general predicates in
* StackVariableReachability.qll, and should be kept in sync.
*
* Unfortunately, caching of abstract predicates does not work well, so the
* predicates are duplicated for now.
*/
exists(BasicBlock bb, int i | this.reaches_helper(isDef, v, bb, i) |
exists(int j |
j > i and
(bbDefAt(bb, j, v, defOrUse) or bbUseAt(bb, j, v, defOrUse)) and
not exists(int k | this.firstBarrierAfterThis(isDef, k, v) and k < j)
)
or
not this.firstBarrierAfterThis(isDef, _, v) and
bbSuccessorEntryReachesDefOrUse(bb, v, defOrUse, _)
)
}
private predicate firstBarrierAfterThis(boolean isDef, int j, SemanticStackVariable v) {
exists(BasicBlock bb, int i |
this.getVariable(isDef) = v and
bb.getNode(i) = this and
j = min(int k | bbBarrierAt(bb, k, v, _) and k > i)
)
}
}
/** A definition of a stack variable. */
class Def extends DefOrUse {
Def() { definition(_, this) }
override SemanticStackVariable getVariable(boolean isDef) {
definition(result, this) and isDef = true
}
}
/** Holds if parameter `p` is potentially overwritten in the body of `f`. */
private predicate parameterIsOverwritten(Function f, Parameter p) {
f.getAParameter() = p and
definitionBarrier(p, _)
}
/** A definition of a parameter. */
class ParameterDef extends DefOrUse {
ParameterDef() {
// Optimization: parameters that are not overwritten do not require
// reachability analysis
exists(Function f | parameterIsOverwritten(f, _) | this = f.getEntryPoint())
}
override SemanticStackVariable getVariable(boolean isDef) {
exists(Function f | parameterIsOverwritten(f, result) | this = f.getEntryPoint()) and
isDef = true
}
}
/** A use of a stack variable. */
class Use extends DefOrUse {
Use() { useOfVar(_, this) }
override SemanticStackVariable getVariable(boolean isDef) {
useOfVar(result, this) and isDef = false
}
}
private predicate bbUseAt(BasicBlock bb, int i, SemanticStackVariable v, Use use) {
bb.getNode(i) = use and
use.getVariable(false) = v
}
private predicate bbDefAt(BasicBlock bb, int i, SemanticStackVariable v, Def def) {
bb.getNode(i) = def and
def.getVariable(true) = v
}
private predicate bbBarrierAt(BasicBlock bb, int i, SemanticStackVariable v, ControlFlowNode node) {
bb.getNode(i) = node and
definitionBarrier(v, node)
}
/**
* Holds if the entry node of a _successor_ of basic block `bb` can
* reach `defOrUse` without going through a barrier for `v`.
* `defOrUse` may either be in the successor block, or in another
* basic block reachable from the successor.
*
* `skipsFirstLoopAlwaysTrueUponEntry` indicates whether the first loop
* on the path to `defOrUse` (if any), where the condition is provably
* true upon entry, is skipped (including the step from `bb` to the
* successor).
*/
private predicate bbSuccessorEntryReachesDefOrUse(
BasicBlock bb, SemanticStackVariable v, DefOrUse defOrUse,
boolean skipsFirstLoopAlwaysTrueUponEntry
) {
exists(BasicBlock succ, boolean succSkipsFirstLoopAlwaysTrueUponEntry |
bbSuccessorEntryReachesLoopInvariant(bb, succ, skipsFirstLoopAlwaysTrueUponEntry,
succSkipsFirstLoopAlwaysTrueUponEntry)
|
bbEntryReachesDefOrUseLocally(succ, v, defOrUse) and
succSkipsFirstLoopAlwaysTrueUponEntry = false and
not excludeReachesFunction(bb.getEnclosingFunction())
or
not bbBarrierAt(succ, _, v, _) and
bbSuccessorEntryReachesDefOrUse(succ, v, defOrUse, succSkipsFirstLoopAlwaysTrueUponEntry)
)
}
private predicate bbEntryReachesDefOrUseLocally(
BasicBlock bb, SemanticStackVariable v, DefOrUse defOrUse
) {
exists(int n | bbDefAt(bb, n, v, defOrUse) or bbUseAt(bb, n, v, defOrUse) |
not exists(int m | m < n | bbBarrierAt(bb, m, v, _))
)
}
/**
* Holds if `barrier` is either a (potential) definition of `v` or follows an
* access that gets the address of `v`. In both cases, the value of
* `v` after `barrier` cannot be assumed to be the same as before.
*/
predicate definitionBarrier(SemanticStackVariable v, ControlFlowNode barrier) {
definition(v, barrier)
or
exists(VariableAccess va |
// `va = barrier` does not work, as that could generate an
// incorrect use-use pair (a barrier must exist _between_
// uses):
//
// x = 0;
// int& y = x; // use1
// y = 1;
// use(x); // use2
va.getASuccessor() = barrier and
va.getTarget() = v and
va.isAddressOfAccessNonConst() and
not exists(Call c | c.passesByReferenceNonConst(_, va)) // handled in definitionByReference
)
}
/**
* Holds if `def` is a (potential) assignment to stack variable `v`. That is,
* the variable may hold another value in the control-flow node(s)
* following `def` than before.
*/
predicate definition(SemanticStackVariable v, Expr def) {
def = v.getInitializer().getExpr()
or
variableAccessedAsValue(v.getAnAccess(), def.(Assignment).getLValue().getFullyConverted())
or
variableAccessedAsValue(v.getAnAccess(), def.(CrementOperation).getOperand().getFullyConverted())
or
exists(AsmStmt asmStmt |
def = asmStmt.getAChild() and
def = v.getAnAccess().getParent*()
)
or
definitionByReference(v.getAnAccess(), def)
}
/**
* Holds if `def` is a (definite) assignment to the stack variable `v`. `e` is
* the assigned expression.
*/
predicate exprDefinition(SemanticStackVariable v, ControlFlowNode def, Expr e) {
def = v.getInitializer().getExpr() and
def = e and
not v.getType() instanceof ReferenceType
or
exists(AssignExpr assign |
def = assign and
assign.getLValue() = v.getAnAccess() and
e = assign.getRValue()
)
}
pragma[noinline]
private predicate containsAssembly(Function f) { f = any(AsmStmt s).getEnclosingFunction() }
/**
* Holds if `va` is a variable passed by reference as argument `def`, where the
* callee potentially assigns the corresponding parameter. The
* definitions-and-uses library models assignment by reference as if it happens
* on evaluation of the argument, `def`.
*
* All library functions except `std::move` are assumed to assign
* call-by-reference parameters, and source code functions are assumed to
* assign call-by-reference parameters that are accessed somewhere within the
* function. The latter is an over-approximation, but avoids having to take
* aliasing of the parameter into account.
*/
predicate definitionByReference(VariableAccess va, Expr def) {
exists(Call c, int i |
c.passesByReferenceNonConst(i, va) and
def = c.getArgument(i) and
forall(Function f | f = c.getTarget() and f.hasEntryPoint() |
exists(f.getParameter(i).getAnAccess())
or
f.isVarargs() and i >= f.getNumberOfParameters()
or
// If the callee contains an AsmStmt, then it is better to
// be conservative and assume that the parameter can be
// modified.
containsAssembly(f)
)
)
or
// Extractor artifact when using templates; an expression call where the
// target expression is an unknown literal. Since we cannot know what
// these calls represent, assume they assign all their arguments
exists(ExprCall c, Literal l |
l = c.getExpr() and
c.getAnArgument() = va and
not exists(l.getValue()) and
def = va
)
}
private predicate accessInSizeof(VariableAccess use) { use.getParent+() instanceof SizeofOperator }
/**
* Holds if `use` is a non-definition use of stack variable `v`. This will not
* include accesses on the LHS of an assignment (which don't retrieve the
* variable value), but _will_ include accesses in increment/decrement
* operations.
*/
predicate useOfVar(SemanticStackVariable v, VariableAccess use) {
use = v.getAnAccess() and
not exists(AssignExpr e | e.getLValue() = use) and
// sizeof accesses are resolved at compile-time
not accessInSizeof(use)
}
/**
* Same as `useOfVar(v, use)`, but with the extra condition that the
* access `use` actually reads the value of the stack variable `v` at
* run-time. (Non-examples include `&x` and function calls where the
* callee does not use the relevant parameter.)
*/
predicate useOfVarActual(SemanticStackVariable v, VariableAccess use) {
useOfVar(v, use) and
exists(Expr e |
variableAccessedAsValue(use, e) and
not exists(AssignExpr assign | e = assign.getLValue().getFullyConverted())
) and
// A call to a function that does not use the relevant parameter
not exists(Call c, int i |
c.getArgument(i) = use and
c.getTarget().hasEntryPoint() and
not exists(c.getTarget().getParameter(i).getAnAccess())
)
}
/**
* A function that should be excluded from 'reaches' analysis.
*
* The current implementation performs badly in some cases where a
* function has both a huge number of def/uses and a huge number of
* basic blocks, typically in generated code. We exclude these
* functions based on the former because it is cheaper to calculate.
*/
private predicate excludeReachesFunction(Function f) {
exists(int defOrUses |
defOrUses =
count(Def def | def.(ControlFlowNode).getControlFlowScope() = f) +
count(Use use | use.(ControlFlowNode).getControlFlowScope() = f) and
defOrUses >= 13000
)
}