Packaging: Refactor the cpp libraries

This PR separates the core cpp packs into `codeql/cpp-queries` and
`codeql/cpp-all`.

There are very few lines of code changed. Almost all changes are moving
files around.
This commit is contained in:
Andrew Eisenberg
2021-08-16 17:23:27 -07:00
parent e566fb9c5a
commit 2c5dd2dfa3
421 changed files with 175 additions and 159 deletions

View File

@@ -0,0 +1,282 @@
/**
* Provides a library for reasoning about control flow at the granularity of basic blocks.
* This is usually much more efficient than reasoning directly at the level of `ControlFlowNode`s.
*/
import cpp
private import internal.PrimitiveBasicBlocks
private import internal.ConstantExprs
/*
* `BasicBlock`s are refinements of `PrimitiveBasicBlock`s, taking
* impossible CFG edges into account (using the `successors_adapted`
* relation). The refinement manifests itself in two changes:
*
* - The successor relation on `BasicBlock`s uses `successors_adapted`
* (instead of `successors_extended` used by `PrimtiveBasicBlock`s). Consequently,
* some edges between `BasicBlock`s may be removed. Example:
* ```
* x = 1; // s1
* if (true) { // s2
* x = 2; // s3
* } else {
* x = 3; // s4
* }
* ```
* The `BasicBlock` successor edge from the basic block containing `s1`
* and `s2` to the basic block containing `s4` is removed.
*
* - `PrimitiveBasicBlock`s may be split up into two or more
* `BasicBlock`s: Internal nodes of `PrimitiveBasicBlock`s whose
* predecessor edges have been removed (unreachable code) will be entry
* points of new `BasicBlock`s. Consequently, each entry point of a
* `PrimitiveBasicBlock` will also be an entry point of a `BasicBlock`,
* but the converse does not necessarily hold. Example:
* ```
* x = 1; // s5
* abort(); // s6
* x = 2; // s7
* ```
* `s5`-`s7` belong to the same `PrimitiveBasicBlock`, but the CFG edge
* from `s6` to `s7` is impossible, so `s7` will be the entry point of
* its own (unreachable) `BasicBlock`.
*
* Note that, although possible, two or more `PrimitiveBasicBlock`s are
* never merged to one `BasicBlock`: Consider the first example above;
* it would be possible to define a single `BasicBlock` consisting of
* `s1`-`s3`, however, the result would be counter-intuitive.
*/
private import Cached
cached
private module Cached {
/**
* Any node that is the entry point of a primitive basic block is
* also the entry point of a basic block. In addition, all nodes
* with a primitive successor, where the predecessor has been pruned
* (that is, `getAPredecessor()` does not exist while a predecessor
* using the primitive `successors_extended` relation does exist), is also
* considered a basic block entry node.
*/
cached
predicate basic_block_entry_node(ControlFlowNode node) {
primitive_basic_block_entry_node(node) or
non_primitive_basic_block_entry_node(node)
}
private predicate non_primitive_basic_block_entry_node(ControlFlowNode node) {
not primitive_basic_block_entry_node(node) and
not exists(node.getAPredecessor()) and
successors_extended(node, _)
}
/**
* Holds if basic block `bb` equals a primitive basic block.
*
* There are two situations in which this is *not* the case:
*
* - Either the entry node of `bb` does not correspond to an
* entry node of a primitive basic block, or
* - The primitive basic block with the same entry node contains
* a (non-entry) node which is the entry node of a non-primitive
* basic block (that is, the primitive basic block has been split
* up).
*
* This predicate is used for performance optimization only:
* Whenever a `BasicBlock` equals a `PrimitiveBasicBlock`, we can
* reuse predicates already computed for `PrimitiveBasicBlocks`.
*/
private predicate equalsPrimitiveBasicBlock(BasicBlock bb) {
primitive_basic_block_entry_node(bb) and
not exists(int i |
i > 0 and
non_primitive_basic_block_entry_node(bb.(PrimitiveBasicBlock).getNode(i))
)
}
/** Holds if `node` is the `pos`th control-flow node in basic block `bb`. */
cached
predicate basic_block_member(ControlFlowNode node, BasicBlock bb, int pos) {
equalsPrimitiveBasicBlock(bb) and primitive_basic_block_member(node, bb, pos) // reuse already computed relation
or
non_primitive_basic_block_member(node, bb, pos)
}
private predicate non_primitive_basic_block_member(ControlFlowNode node, BasicBlock bb, int pos) {
not equalsPrimitiveBasicBlock(bb) and node = bb and pos = 0
or
not node instanceof BasicBlock and
exists(ControlFlowNode pred | successors_extended(pred, node) |
non_primitive_basic_block_member(pred, bb, pos - 1)
)
}
/** Gets the number of control-flow nodes in the basic block `bb`. */
cached
int bb_length(BasicBlock bb) {
if equalsPrimitiveBasicBlock(bb)
then result = bb.(PrimitiveBasicBlock).length() // reuse already computed relation
else result = strictcount(ControlFlowNode node | basic_block_member(node, bb, _))
}
/** Successor relation for basic blocks. */
cached
predicate bb_successor_cached(BasicBlock pred, BasicBlock succ) {
exists(ControlFlowNode last |
basic_block_member(last, pred, bb_length(pred) - 1) and
last.getASuccessor() = succ
)
}
}
predicate bb_successor = bb_successor_cached/2;
/**
* A basic block in the C/C++ control-flow graph.
*
* A basic block is a simple sequence of control-flow nodes,
* connected to each other and nothing else:
*
* ```
* A - B - C - D ABCD is a basic block
* ```
*
* Any incoming or outgoing edges break the block into two:
*
* ```
* A - B > C - D AB is a basic block and CD is a basic block (C has two incoming edges)
*
*
* A - B < C - D AB is a basic block and CD is a basic block (B has two outgoing edges)
* ```
*/
class BasicBlock extends ControlFlowNodeBase {
BasicBlock() { basic_block_entry_node(this) }
/** Holds if this basic block contains `node`. */
predicate contains(ControlFlowNode node) { basic_block_member(node, this, _) }
/** Gets the `ControlFlowNode` at position `pos` in this basic block. */
ControlFlowNode getNode(int pos) { basic_block_member(result, this, pos) }
/** Gets a `ControlFlowNode` in this basic block. */
ControlFlowNode getANode() { basic_block_member(result, this, _) }
/** Gets a `BasicBlock` that is a direct successor of this basic block. */
BasicBlock getASuccessor() { bb_successor(this, result) }
/** Gets a `BasicBlock` that is a direct predecessor of this basic block. */
BasicBlock getAPredecessor() { bb_successor(result, this) }
/**
* Gets a `BasicBlock` such that the control-flow edge `(this, result)` may be taken
* when the outgoing edge of this basic block is an expression that is true.
*/
BasicBlock getATrueSuccessor() { result.getStart() = this.getEnd().getATrueSuccessor() }
/**
* Gets a `BasicBlock` such that the control-flow edge `(this, result)` may be taken
* when the outgoing edge of this basic block is an expression that is false.
*/
BasicBlock getAFalseSuccessor() { result.getStart() = this.getEnd().getAFalseSuccessor() }
/** Gets the final `ControlFlowNode` of this basic block. */
ControlFlowNode getEnd() { basic_block_member(result, this, bb_length(this) - 1) }
/** Gets the first `ControlFlowNode` of this basic block. */
ControlFlowNode getStart() { result = this }
/** Gets the number of `ControlFlowNode`s in this basic block. */
int length() { result = bb_length(this) }
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*
* Yields no result if this basic block spans multiple source files.
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
hasLocationInfoInternal(filepath, startline, startcolumn, filepath, endline, endcolumn)
}
pragma[noinline]
private predicate hasLocationInfoInternal(
string file, int line, int col, string endf, int endl, int endc
) {
this.getStart().getLocation().hasLocationInfo(file, line, col, _, _) and
this.getEnd().getLocation().hasLocationInfo(endf, _, _, endl, endc)
}
/** Gets the function containing this basic block. */
Function getEnclosingFunction() { result = this.getStart().getControlFlowScope() }
/**
* Holds if this basic block is in a loop of the control-flow graph. This
* includes loops created by `goto` statements. This predicate may not hold
* even if this basic block is syntactically inside a `while` loop if the
* necessary back edges are unreachable.
*/
predicate inLoop() { this.getASuccessor+() = this }
/**
* DEPRECATED since version 1.11: this predicate does not match the standard
* definition of _loop header_.
*
* Holds if this basic block is in a loop of the control-flow graph and
* additionally has an incoming edge that is not part of any loop containing
* this basic block. A typical example would be the basic block that computes
* `x > 0` in an outermost loop `while (x > 0) { ... }`.
*/
deprecated predicate isLoopHeader() {
this.inLoop() and
exists(BasicBlock pred | pred = this.getAPredecessor() and not pred = this.getASuccessor+())
}
/**
* Holds if control flow may reach this basic block from a function entry
* point or any handler of a reachable `try` statement.
*/
predicate isReachable() {
exists(Function f | f.getBlock() = this)
or
exists(TryStmt t, BasicBlock tryblock |
// a `Handler` preceeds the `CatchBlock`, and is always the beginning
// of a new `BasicBlock` (see `primitive_basic_block_entry_node`).
this.(Handler).getTryStmt() = t and
tryblock.isReachable() and
tryblock.contains(t)
)
or
exists(BasicBlock pred | pred.getASuccessor() = this and pred.isReachable())
}
/** Means `not isReachable()`. */
predicate isUnreachable() { not this.isReachable() }
}
/** Correct relation for reachability of ControlFlowNodes. */
predicate unreachable(ControlFlowNode n) {
exists(BasicBlock bb | bb.contains(n) and bb.isUnreachable())
}
/**
* An entry point of a function.
*/
class EntryBasicBlock extends BasicBlock {
EntryBasicBlock() { exists(Function f | this = f.getEntryPoint()) }
}
/**
* A basic block whose last node is the exit point of a function.
*/
class ExitBasicBlock extends BasicBlock {
ExitBasicBlock() {
getEnd() instanceof Function or
aborting(getEnd())
}
}

View File

@@ -0,0 +1,145 @@
/**
* Provides a library for reasoning about control flow at the granularity of
* individual nodes in the control-flow graph.
*/
import cpp
import BasicBlocks
private import semmle.code.cpp.controlflow.internal.ConstantExprs
private import semmle.code.cpp.controlflow.internal.CFG
/**
* A control-flow node is either a statement or an expression; in addition,
* functions are control-flow nodes representing the exit point of the
* function. The graph represents one possible evaluation order out of all the
* ones the compiler might have picked.
*
* Control-flow nodes have successors and predecessors at the expression level,
* so control flow is accurately represented in expressions as well as between
* statements. Statements and initializers precede their contained expressions,
* and expressions deeper in the tree precede those higher up; for example, the
* statement `x = y + 1` gets a control-flow graph that looks like
*
* ```
* ExprStmt -> y -> 1 -> (+) -> x -> (=)
* ```
*
* The first control-flow node in a function is the body of the function (a
* block), and the last is the function itself, which is used to represent the
* exit point.
*
* Each `throw` expression or `Handler` has a path (along any necessary
* destructor calls) to its nearest enclosing `Handler` within the same
* function, or to the exit point of the function if there is no such
* `Handler`. There are no edges from function calls to `Handler`s.
*/
class ControlFlowNode extends Locatable, ControlFlowNodeBase {
/** Gets a direct successor of this control-flow node, if any. */
ControlFlowNode getASuccessor() { successors_adapted(this, result) }
/** Gets a direct predecessor of this control-flow node, if any. */
ControlFlowNode getAPredecessor() { this = result.getASuccessor() }
/** Gets the function containing this control-flow node. */
Function getControlFlowScope() {
none() // overridden in subclasses
}
/** Gets the smallest statement containing this control-flow node. */
Stmt getEnclosingStmt() {
none() // overridden in subclasses
}
/**
* Holds if this node is the top-level expression of a conditional statement,
* meaning that `this.getATrueSuccessor()` or `this.getAFalseSuccessor()`
* will have a result.
*/
predicate isCondition() {
exists(this.getATrueSuccessor()) or
exists(this.getAFalseSuccessor())
}
/**
* Gets a node such that the control-flow edge `(this, result)` may be
* taken when this expression is true.
*/
ControlFlowNode getATrueSuccessor() {
qlCFGTrueSuccessor(this, result) and
result = getASuccessor()
}
/**
* Gets a node such that the control-flow edge `(this, result)` may be
* taken when this expression is false.
*/
ControlFlowNode getAFalseSuccessor() {
qlCFGFalseSuccessor(this, result) and
result = getASuccessor()
}
/** Gets the `BasicBlock` containing this control-flow node. */
BasicBlock getBasicBlock() { result.getANode() = this }
}
import ControlFlowGraphPublic
/**
* An element that is convertible to `ControlFlowNode`. This class is similar
* to `ControlFlowNode` except that is has no member predicates apart from
* `toString`.
*
* This class can be used as base class for classes that want to inherit the
* extent of `ControlFlowNode` without inheriting its public member predicates.
*/
class ControlFlowNodeBase extends ElementBase, @cfgnode { }
/**
* DEPRECATED: Use `ControlFlowNode.getATrueSuccessor()` instead.
* Holds when `n2` is a control-flow node such that the control-flow
* edge `(n1, n2)` may be taken when `n1` is an expression that is true.
*/
deprecated predicate truecond_base(ControlFlowNodeBase n1, ControlFlowNodeBase n2) {
qlCFGTrueSuccessor(n1, n2)
}
/**
* DEPRECATED: Use `ControlFlowNode.getAFalseSuccessor()` instead.
* Holds when `n2` is a control-flow node such that the control-flow
* edge `(n1, n2)` may be taken when `n1` is an expression that is false.
*/
deprecated predicate falsecond_base(ControlFlowNodeBase n1, ControlFlowNodeBase n2) {
qlCFGFalseSuccessor(n1, n2)
}
/**
* An abstract class that can be extended to add additional edges to the
* control-flow graph. Instances of this class correspond to the source nodes
* of such edges, and the predicate `getAnEdgeTarget` should be overridden to
* produce the target nodes of each source.
*
* Changing the control-flow graph in some queries and not others can be
* expensive in execution time and disk space. Most cached predicates in the
* library depend on the control-flow graph, so these predicates will be
* computed and cached for each variation of the control-flow graph
* that is used.
*
* Edges added by this class will still be removed by the library if they
* appear to be unreachable. See the documentation on `ControlFlowNode` for
* more information about the control-flow graph.
*/
abstract class AdditionalControlFlowEdge extends ControlFlowNodeBase {
/** Gets a target node of this edge, where the source node is `this`. */
abstract ControlFlowNodeBase getAnEdgeTarget();
}
/**
* Holds if there is a control-flow edge from `source` to `target` in either
* the extractor-generated control-flow graph or in a subclass of
* `AdditionalControlFlowEdge`. Use this relation instead of `qlCFGSuccessor`.
*/
predicate successors_extended(ControlFlowNodeBase source, ControlFlowNodeBase target) {
qlCFGSuccessor(source, target)
or
source.(AdditionalControlFlowEdge).getAnEdgeTarget() = target
}

View File

@@ -0,0 +1,166 @@
/**
* Provides a simple data flow analysis to find expressions that are definitely
* null or that may be null.
*/
import cpp
import Nullness
import Dereferenced
/**
* INTERNAL: Do not use.
* A string that identifies a data flow analysis along with a set of member
* predicates that implement this analysis.
*/
abstract class DataflowAnnotation extends string {
DataflowAnnotation() { this = ["pointer-null", "pointer-valid"] }
/** Holds if this annotation is the default annotation. */
abstract predicate isDefault();
/** Holds if this annotation is generated when analyzing expression `e`. */
abstract predicate generatedOn(Expr e);
/**
* Holds if this annotation is generated for the variable `v` when
* the control-flow edge `(src, dest)` is taken.
*/
abstract predicate generatedBy(LocalScopeVariable v, ControlFlowNode src, ControlFlowNode dest);
/**
* Holds if this annotation is removed for the variable `v` when
* the control-flow edge `(src, dest)` is taken.
*/
abstract predicate killedBy(LocalScopeVariable v, ControlFlowNode src, ControlFlowNode dest);
/** Holds if expression `e` is given this annotation. */
predicate marks(Expr e) {
this.generatedOn(e) and reachable(e)
or
this.marks(e.(AssignExpr).getRValue())
or
exists(LocalScopeVariable v | this.marks(v, e) and e = v.getAnAccess())
}
/** Holds if the variable `v` accessed in control-flow node `n` is given this annotation. */
predicate marks(LocalScopeVariable v, ControlFlowNode n) {
v.getAnAccess().getEnclosingFunction().getBlock() = n and
this.isDefault()
or
this.marks(n.(Initializer).getExpr()) and
v.getInitializer() = n
or
exists(ControlFlowNode pred |
this.generatedBy(v, pred, n) and
not this.killedBy(v, pred, n) and
reachable(pred)
)
or
exists(ControlFlowNode pred |
this.assignedBy(v, pred, n) and
not this.killedBy(v, pred, n) and
reachable(pred)
)
or
exists(ControlFlowNode pred |
this.preservedBy(v, pred, n) and
not this.killedBy(v, pred, n) and
reachable(pred)
)
}
/**
* Holds if the variable `v` preserves this annotation when the control-flow
* edge `(src, dest)` is taken.
*/
predicate preservedBy(LocalScopeVariable v, ControlFlowNode src, ControlFlowNode dest) {
this.marks(v, src) and
src.getASuccessor() = dest and
not v.getInitializer() = dest and
not v.getAnAssignment() = src
}
/**
* Holds if the variable `v` is assigned this annotation when `src` is an assignment
* expression that assigns to `v` and the control-flow edge `(src, dest)` is taken.
*/
predicate assignedBy(LocalScopeVariable v, ControlFlowNode src, ControlFlowNode dest) {
this.marks(src.(AssignExpr).getRValue()) and
src = v.getAnAssignment() and
src.getASuccessor() = dest
}
}
/**
* INTERNAL: Do not use.
* Two analyses relating to nullness: `"pointer-null"` and `"pointer-valid"`.
* These analyses mark expressions that are possibly null or possibly non-null,
* respectively.
*/
class NullnessAnnotation extends DataflowAnnotation {
NullnessAnnotation() { this = ["pointer-null", "pointer-valid"] }
override predicate isDefault() { this = "pointer-valid" }
override predicate generatedOn(Expr e) {
exists(Variable v |
v.getAnAccess() = e and
(v instanceof GlobalVariable or v instanceof Field) and
this.isDefault()
)
or
e instanceof Call and this = "pointer-valid"
or
nullValue(e) and this = "pointer-null"
}
override predicate killedBy(LocalScopeVariable v, ControlFlowNode src, ControlFlowNode dest) {
src.(AnalysedExpr).getNullSuccessor(v) = dest and this = "pointer-valid"
or
src.(AnalysedExpr).getNonNullSuccessor(v) = dest and this = "pointer-null"
or
dest = src.getASuccessor() and callByReference(src, v) and not this.isDefault()
or
dest = src.getASuccessor() and deref(v, src) and this = "pointer-null"
}
override predicate generatedBy(LocalScopeVariable v, ControlFlowNode src, ControlFlowNode dest) {
dest = src.getASuccessor() and
callByReference(src, v) and
this.isDefault()
}
}
/**
* Holds if evaluation of `op` dereferences `v`.
*/
predicate deref(Variable v, Expr op) { dereferencedByOperation(op, v.getAnAccess()) }
/**
* Holds if `call` passes `v` by reference, either with an explicit address-of
* operator or implicitly as a C++ reference. Both const and non-const
* references are included.
*/
predicate callByReference(Call call, Variable v) {
exists(Expr arg |
call.getAnArgument() = arg and
(
arg.(AddressOfExpr).getAChild() = v.getAnAccess()
or
v.getAnAccess() = arg and arg.getConversion*() instanceof ReferenceToExpr
)
)
}
/**
* Holds if a simple data-flow analysis determines that `e` is definitely null.
*/
predicate definitelyNull(Expr e) {
"pointer-null".(NullnessAnnotation).marks(e) and
not "pointer-valid".(NullnessAnnotation).marks(e)
}
/**
* Holds if a simple data-flow analysis determines that `e` may be null.
*/
predicate maybeNull(Expr e) { "pointer-null".(NullnessAnnotation).marks(e) }

View File

@@ -0,0 +1,383 @@
/**
* Provides classes and predicates for reasoning about definitions and uses of variables.
*/
import cpp
private import semmle.code.cpp.controlflow.StackVariableReachability
private import semmle.code.cpp.dataflow.EscapesTree
/**
* Computed relation: A "definition-use-pair" for a particular variable.
* Intuitively, this means that `def` is an assignment to `var`, and
* `use` is a read of `var` at which the value assigned by `def` may
* be read. (There can be more than one definition reaching a single
* use, and a single definition can reach many uses.)
*/
predicate definitionUsePair(SemanticStackVariable var, Expr def, Expr use) {
exists(Use u |
u = use and
def.(Def).reaches(true, var, u) and
u.getVariable(false) = var
)
}
/**
* Holds if the definition `def` of some stack variable can reach `node`, which
* is a definition or use, without crossing definitions of the same variable.
*/
predicate definitionReaches(Expr def, Expr node) { def.(Def).reaches(true, _, node.(DefOrUse)) }
private predicate hasAddressOfAccess(SemanticStackVariable var) {
var.getAnAccess().isAddressOfAccessNonConst()
}
/**
* A use/use pair is a pair of uses of a particular variable `var`
* where the same value might be read (meaning that there is a
* control-flow path from `first` to `second` without crossing
* a definition of `var`).
*/
predicate useUsePair(SemanticStackVariable var, Expr first, Expr second) {
(
/*
* If the address of `var` is used anywhere, we require that
* a definition of `var` can reach the first use. This is to
* rule out examples such as this:
* ```
* int x = 0;
* int& y = x;
* use(x);
* y = 1;
* use(x); // not a use-use pair with the use above
* ```
*/
hasAddressOfAccess(var) implies definitionUsePair(var, _, first)
) and
// If `first` is both a def and a use, like `x` in `f(x)` when `f` takes a
// reference parameter, it'll play the role of a use first and a def second.
// We are not interested in uses that follow its role as a def.
not definition(var, first) and
exists(Use u |
u = second and
first.(Use).reaches(false, var, u) and
u.getVariable(false) = var
)
}
/**
* Holds if `va` is a use of the parameter `p` that could
* observe the passed-in value.
*/
predicate parameterUsePair(Parameter p, VariableAccess va) {
not parameterIsOverwritten(_, p) and va.getTarget() = p
or
exists(ParameterDef pd | pd.reaches(true, p, va.(Use)))
}
/**
* Utility class: A definition or use of a stack variable.
*/
library class DefOrUse extends ControlFlowNodeBase {
DefOrUse() {
// Uninstantiated templates are purely syntax, and only on instantiation
// will they be complete with information about types, conversions, call
// targets, etc.
not this.(ControlFlowNode).isFromUninstantiatedTemplate(_)
}
/**
* Gets the target variable of this definition or use.
*
* The `isDef` parameter is needed in order to ensure disjointness
* of definitions and uses; in a variable initialization such as
* `int x = y`, `y` is both a definition of `x`, as well as a use of
* `y`, and `isDef` is used to distinguish the two situations.
*/
abstract SemanticStackVariable getVariable(boolean isDef);
pragma[noinline]
private predicate reaches_helper(boolean isDef, SemanticStackVariable v, BasicBlock bb, int i) {
getVariable(isDef) = v and
bb.getNode(i) = this
}
/**
* Holds if the value of `v` in this control-flow node reaches
* `defOrUse` along some control-flow path without crossing a
* definition of `v`.
*/
cached
predicate reaches(boolean isDef, SemanticStackVariable v, DefOrUse defOrUse) {
/*
* Implementation detail: this predicate and its private auxiliary
* predicates are instances of the more general predicates in
* StackVariableReachability.qll, and should be kept in sync.
*
* Unfortunately, caching of abstract predicates does not work well, so the
* predicates are duplicated for now.
*/
exists(BasicBlock bb, int i | reaches_helper(isDef, v, bb, i) |
exists(int j |
j > i and
(bbDefAt(bb, j, v, defOrUse) or bbUseAt(bb, j, v, defOrUse)) and
not exists(int k | firstBarrierAfterThis(isDef, k, v) and k < j)
)
or
not firstBarrierAfterThis(isDef, _, v) and
bbSuccessorEntryReachesDefOrUse(bb, v, defOrUse, _)
)
}
private predicate firstBarrierAfterThis(boolean isDef, int j, SemanticStackVariable v) {
exists(BasicBlock bb, int i |
getVariable(isDef) = v and
bb.getNode(i) = this and
j = min(int k | bbBarrierAt(bb, k, v, _) and k > i)
)
}
}
/** A definition of a stack variable. */
library class Def extends DefOrUse {
Def() { definition(_, this) }
override SemanticStackVariable getVariable(boolean isDef) {
definition(result, this) and isDef = true
}
}
/** Holds if parameter `p` is potentially overwritten in the body of `f`. */
private predicate parameterIsOverwritten(Function f, Parameter p) {
f.getAParameter() = p and
definitionBarrier(p, _)
}
/** A definition of a parameter. */
library class ParameterDef extends DefOrUse {
ParameterDef() {
// Optimization: parameters that are not overwritten do not require
// reachability analysis
exists(Function f | parameterIsOverwritten(f, _) | this = f.getEntryPoint())
}
override SemanticStackVariable getVariable(boolean isDef) {
exists(Function f | parameterIsOverwritten(f, result) | this = f.getEntryPoint()) and
isDef = true
}
}
/** A use of a stack variable. */
library class Use extends DefOrUse {
Use() { useOfVar(_, this) }
override SemanticStackVariable getVariable(boolean isDef) {
useOfVar(result, this) and isDef = false
}
}
private predicate bbUseAt(BasicBlock bb, int i, SemanticStackVariable v, Use use) {
bb.getNode(i) = use and
use.getVariable(false) = v
}
private predicate bbDefAt(BasicBlock bb, int i, SemanticStackVariable v, Def def) {
bb.getNode(i) = def and
def.getVariable(true) = v
}
private predicate bbBarrierAt(BasicBlock bb, int i, SemanticStackVariable v, ControlFlowNode node) {
bb.getNode(i) = node and
definitionBarrier(v, node)
}
/**
* Holds if the entry node of a _successor_ of basic block `bb` can
* reach `defOrUse` without going through a barrier for `v`.
* `defOrUse` may either be in the successor block, or in another
* basic block reachable from the successor.
*
* `skipsFirstLoopAlwaysTrueUponEntry` indicates whether the first loop
* on the path to `defOrUse` (if any), where the condition is provably
* true upon entry, is skipped (including the step from `bb` to the
* successor).
*/
private predicate bbSuccessorEntryReachesDefOrUse(
BasicBlock bb, SemanticStackVariable v, DefOrUse defOrUse,
boolean skipsFirstLoopAlwaysTrueUponEntry
) {
exists(BasicBlock succ, boolean succSkipsFirstLoopAlwaysTrueUponEntry |
bbSuccessorEntryReachesLoopInvariant(bb, succ, skipsFirstLoopAlwaysTrueUponEntry,
succSkipsFirstLoopAlwaysTrueUponEntry)
|
bbEntryReachesDefOrUseLocally(succ, v, defOrUse) and
succSkipsFirstLoopAlwaysTrueUponEntry = false and
not excludeReachesFunction(bb.getEnclosingFunction())
or
not bbBarrierAt(succ, _, v, _) and
bbSuccessorEntryReachesDefOrUse(succ, v, defOrUse, succSkipsFirstLoopAlwaysTrueUponEntry)
)
}
private predicate bbEntryReachesDefOrUseLocally(
BasicBlock bb, SemanticStackVariable v, DefOrUse defOrUse
) {
exists(int n | bbDefAt(bb, n, v, defOrUse) or bbUseAt(bb, n, v, defOrUse) |
not exists(int m | m < n | bbBarrierAt(bb, m, v, _))
)
}
/**
* Holds if `barrier` is either a (potential) definition of `v` or follows an
* access that gets the address of `v`. In both cases, the value of
* `v` after `barrier` cannot be assumed to be the same as before.
*/
predicate definitionBarrier(SemanticStackVariable v, ControlFlowNode barrier) {
definition(v, barrier)
or
exists(VariableAccess va |
// `va = barrier` does not work, as that could generate an
// incorrect use-use pair (a barrier must exist _between_
// uses):
//
// x = 0;
// int& y = x; // use1
// y = 1;
// use(x); // use2
va.getASuccessor() = barrier and
va.getTarget() = v and
va.isAddressOfAccessNonConst() and
not exists(Call c | c.passesByReferenceNonConst(_, va)) // handled in definitionByReference
)
}
/**
* Holds if `def` is a (potential) assignment to stack variable `v`. That is,
* the variable may hold another value in the control-flow node(s)
* following `def` than before.
*/
predicate definition(SemanticStackVariable v, Expr def) {
def = v.getInitializer().getExpr()
or
variableAccessedAsValue(v.getAnAccess(), def.(Assignment).getLValue().getFullyConverted())
or
variableAccessedAsValue(v.getAnAccess(), def.(CrementOperation).getOperand().getFullyConverted())
or
exists(AsmStmt asmStmt |
def = asmStmt.getAChild() and
def = v.getAnAccess().getParent*()
)
or
definitionByReference(v.getAnAccess(), def)
}
/**
* Holds if `def` is a (definite) assignment to the stack variable `v`. `e` is
* the assigned expression.
*/
predicate exprDefinition(SemanticStackVariable v, ControlFlowNode def, Expr e) {
def = v.getInitializer().getExpr() and
def = e and
not v.getType() instanceof ReferenceType
or
exists(AssignExpr assign |
def = assign and
assign.getLValue() = v.getAnAccess() and
e = assign.getRValue()
)
}
pragma[noinline]
private predicate containsAssembly(Function f) { f = any(AsmStmt s).getEnclosingFunction() }
/**
* Holds if `va` is a variable passed by reference as argument `def`, where the
* callee potentially assigns the corresponding parameter. The
* definitions-and-uses library models assignment by reference as if it happens
* on evaluation of the argument, `def`.
*
* All library functions except `std::move` are assumed to assign
* call-by-reference parameters, and source code functions are assumed to
* assign call-by-reference parameters that are accessed somewhere within the
* function. The latter is an over-approximation, but avoids having to take
* aliasing of the parameter into account.
*/
predicate definitionByReference(VariableAccess va, Expr def) {
exists(Call c, int i |
c.passesByReferenceNonConst(i, va) and
def = c.getArgument(i) and
forall(Function f | f = c.getTarget() and f.hasEntryPoint() |
exists(f.getParameter(i).getAnAccess())
or
f.isVarargs() and i >= f.getNumberOfParameters()
or
// If the callee contains an AsmStmt, then it is better to
// be conservative and assume that the parameter can be
// modified.
containsAssembly(f)
)
)
or
// Extractor artifact when using templates; an expression call where the
// target expression is an unknown literal. Since we cannot know what
// these calls represent, assume they assign all their arguments
exists(ExprCall c, Literal l |
l = c.getExpr() and
c.getAnArgument() = va and
not exists(l.getValue()) and
def = va
)
}
private predicate accessInSizeof(VariableAccess use) { use.getParent+() instanceof SizeofOperator }
/**
* Holds if `use` is a non-definition use of stack variable `v`. This will not
* include accesses on the LHS of an assignment (which don't retrieve the
* variable value), but _will_ include accesses in increment/decrement
* operations.
*/
predicate useOfVar(SemanticStackVariable v, VariableAccess use) {
use = v.getAnAccess() and
not exists(AssignExpr e | e.getLValue() = use) and
// sizeof accesses are resolved at compile-time
not accessInSizeof(use)
}
/**
* Same as `useOfVar(v, use)`, but with the extra condition that the
* access `use` actually reads the value of the stack variable `v` at
* run-time. (Non-examples include `&x` and function calls where the
* callee does not use the relevant parameter.)
*/
predicate useOfVarActual(SemanticStackVariable v, VariableAccess use) {
useOfVar(v, use) and
exists(Expr e |
variableAccessedAsValue(use, e) and
not exists(AssignExpr assign | e = assign.getLValue().getFullyConverted())
) and
// A call to a function that does not use the relevant parameter
not exists(Call c, int i |
c.getArgument(i) = use and
c.getTarget().hasEntryPoint() and
not exists(c.getTarget().getParameter(i).getAnAccess())
)
}
/**
* A function that should be excluded from 'reaches' analysis.
*
* The current implementation performs badly in some cases where a
* function has both a huge number of def/uses and a huge number of
* basic blocks, typically in generated code. We exclude these
* functions based on the former because it is cheaper to calculate.
*/
private predicate excludeReachesFunction(Function f) {
exists(int defOrUses |
defOrUses =
count(Def def | def.(ControlFlowNode).getControlFlowScope() = f) +
count(Use use | use.(ControlFlowNode).getControlFlowScope() = f) and
defOrUses >= 13000
)
}

View File

@@ -0,0 +1,110 @@
/**
* Provides predicates for detecting whether an expression dereferences a pointer.
*/
import cpp
import Nullness
import semmle.code.cpp.models.interfaces.ArrayFunction
/**
* Holds if the call `fc` will dereference argument `i`.
*/
predicate callDereferences(FunctionCall fc, int i) {
exists(ArrayFunction af |
fc.getTarget() = af and
(
af.hasArrayInput(i) or
af.hasArrayOutput(i)
)
)
or
exists(FormattingFunction ff |
fc.getTarget() = ff and
i >= ff.getFirstFormatArgumentIndex() and
fc.getArgument(i).getType() instanceof PointerType
)
}
/**
* Holds if evaluation of `op` dereferences `e`.
*/
predicate dereferencedByOperation(Expr op, Expr e) {
exists(PointerDereferenceExpr deref |
deref.getAChild() = e and
deref = op and
not deref.getParent*() instanceof SizeofOperator
)
or
exists(CrementOperation crement | dereferencedByOperation(e, op) and crement.getOperand() = e)
or
exists(ArrayExpr ae |
(
not ae.getParent() instanceof AddressOfExpr and
not ae.getParent*() instanceof SizeofOperator
) and
ae = op and
(
e = ae.getArrayBase() and e.getType() instanceof PointerType
or
e = ae.getArrayOffset() and e.getType() instanceof PointerType
)
)
or
exists(AddressOfExpr addof, ArrayExpr ae |
dereferencedByOperation(addof, op) and
addof.getOperand() = ae and
(e = ae.getArrayBase() or e = ae.getArrayOffset()) and
e.getType() instanceof PointerType
)
or
exists(UnaryArithmeticOperation arithop |
dereferencedByOperation(arithop, op) and
e = arithop.getAnOperand() and
e.getType() instanceof PointerType
)
or
exists(BinaryArithmeticOperation arithop |
dereferencedByOperation(arithop, op) and
e = arithop.getAnOperand() and
e.getType() instanceof PointerType
)
or
exists(FunctionCall fc, int i |
(callDereferences(fc, i) or functionCallDereferences(fc, i)) and
e = fc.getArgument(i) and
op = fc
)
or
// ptr->Field
e = op.(FieldAccess).getQualifier() and isClassPointerType(e.getType())
or
// ptr->method()
e = op.(Call).getQualifier() and isClassPointerType(e.getType())
}
private predicate isClassPointerType(Type t) {
t.getUnderlyingType().(PointerType).getBaseType().getUnderlyingType() instanceof Class
}
/**
* Holds if `e` will be dereferenced after being evaluated.
*/
predicate dereferenced(Expr e) { dereferencedByOperation(_, e) }
pragma[noinline]
private predicate functionCallDereferences(FunctionCall fc, int i) {
functionDereferences(fc.getTarget(), i)
}
/**
* Holds if the body of a function `f` is likely to dereference its `i`th
* parameter unconditionally. This analysis does not account for reassignment.
*/
predicate functionDereferences(Function f, int i) {
exists(VariableAccess access, Parameter p |
p = f.getParameter(i) and
dereferenced(access) and
access = p.getAnAccess() and
not checkedValid(p, access)
)
}

View File

@@ -0,0 +1,160 @@
/**
* Provides dominance predicates for control-flow nodes.
*
* These variations of the _dominance relation_ are used for computing SSA
* form. Formally, a node `d` _dominates_ a node `n` if all paths from the
* function entry point to `n` go through `d`; this applies within a function
* and only for nodes reachable from the entry point. Unreachable nodes are not
* part the dominance relation.
*/
import cpp
/**
* In rare cases, the same node is used in multiple control-flow scopes. This
* confuses the dominance analysis, so this predicate is used to exclude them.
*/
pragma[noinline]
private predicate hasMultiScopeNode(Function f) {
exists(ControlFlowNode node |
node.getControlFlowScope() = f and
node.getControlFlowScope() != f
)
}
/** Holds if `entry` is the entry point of a function. */
predicate functionEntry(ControlFlowNode entry) {
exists(Function function |
function.getEntryPoint() = entry and
not hasMultiScopeNode(function)
)
}
/** Holds if `exit` is the exit node of a function. */
predicate functionExit(ControlFlowNode exit) {
exists(Function function |
function = exit and
not hasMultiScopeNode(function)
)
}
/**
* Holds if `dest` is an immediate successor of `src` in the control-flow graph.
*/
private predicate nodeSucc(ControlFlowNode src, ControlFlowNode dest) { src.getASuccessor() = dest }
/**
* Holds if `pred` is an immediate predecessor of `src` in the control-flow graph.
*/
private predicate nodePred(ControlFlowNode src, ControlFlowNode pred) {
src.getAPredecessor() = pred
}
/**
* Holds if `dominator` is an immediate dominator of `node` in the control-flow
* graph.
*/
predicate iDominates(ControlFlowNode dominator, ControlFlowNode node) =
idominance(functionEntry/1, nodeSucc/2)(_, dominator, node)
/**
* Holds if `postDominator` is an immediate post-dominator of `node` in the control-flow
* graph.
*/
predicate iPostDominates(ControlFlowNode postDominator, ControlFlowNode node) =
idominance(functionExit/1, nodePred/2)(_, postDominator, node)
/**
* Holds if `dominator` is a strict dominator of `node` in the control-flow
* graph. Being strict means that `dominator != node`.
*/
predicate strictlyDominates(ControlFlowNode dominator, ControlFlowNode node) {
iDominates+(dominator, node)
}
/**
* Holds if `postDominator` is a strict post-dominator of `node` in the control-flow
* graph. Being strict means that `postDominator != node`.
*/
predicate strictlyPostDominates(ControlFlowNode postDominator, ControlFlowNode node) {
iPostDominates+(postDominator, node)
}
/**
* Holds if `dominator` is a dominator of `node` in the control-flow graph. This
* is reflexive.
*/
predicate dominates(ControlFlowNode dominator, ControlFlowNode node) {
strictlyDominates(dominator, node) or dominator = node
}
/**
* Holds if `postDominator` is a post-dominator of `node` in the control-flow graph. This
* is reflexive.
*/
predicate postDominates(ControlFlowNode postDominator, ControlFlowNode node) {
strictlyPostDominates(postDominator, node) or postDominator = node
}
/*
* Dominance predicates for basic blocks.
*/
/**
* Holds if `dominator` is an immediate dominator of `node` in the control-flow
* graph of basic blocks.
*/
predicate bbIDominates(BasicBlock dom, BasicBlock node) =
idominance(functionEntry/1, bb_successor/2)(_, dom, node)
/**
* Holds if `pred` is a predecessor of `succ` in the control-flow graph of
* basic blocks.
*/
private predicate bb_predecessor(BasicBlock succ, BasicBlock pred) { bb_successor(pred, succ) }
/** Holds if `exit` is an `ExitBasicBlock`. */
private predicate bb_exit(ExitBasicBlock exit) { any() }
/**
* Holds if `postDominator` is an immediate post-dominator of `node` in the control-flow
* graph of basic blocks.
*/
predicate bbIPostDominates(BasicBlock pDom, BasicBlock node) =
idominance(bb_exit/1, bb_predecessor/2)(_, pDom, node)
/**
* Holds if `dominator` is a strict dominator of `node` in the control-flow
* graph of basic blocks. Being strict means that `dominator != node`.
*/
// magic prevents fastTC
pragma[nomagic]
predicate bbStrictlyDominates(BasicBlock dominator, BasicBlock node) {
bbIDominates+(dominator, node)
}
/**
* Holds if `postDominator` is a strict post-dominator of `node` in the control-flow
* graph of basic blocks. Being strict means that `postDominator != node`.
*/
// magic prevents fastTC
pragma[nomagic]
predicate bbStrictlyPostDominates(BasicBlock postDominator, BasicBlock node) {
bbIPostDominates+(postDominator, node)
}
/**
* Holds if `dominator` is a dominator of `node` in the control-flow graph of
* basic blocks. This is reflexive.
*/
predicate bbDominates(BasicBlock dominator, BasicBlock node) {
bbStrictlyDominates(dominator, node) or dominator = node
}
/**
* Holds if `postDominator` is a post-dominator of `node` in the control-flow graph of
* basic blocks. This is reflexive.
*/
predicate bbPostDominates(BasicBlock postDominator, BasicBlock node) {
bbStrictlyPostDominates(postDominator, node) or postDominator = node
}

View File

@@ -0,0 +1,384 @@
/**
* Provides classes and predicates for reasoning about guards and the control
* flow elements controlled by those guards.
*/
import cpp
import semmle.code.cpp.controlflow.BasicBlocks
import semmle.code.cpp.controlflow.SSA
import semmle.code.cpp.controlflow.Dominance
/**
* A Boolean condition that guards one or more basic blocks. This includes
* operands of logical operators but not switch statements.
*/
class GuardCondition extends Expr {
GuardCondition() { is_condition(this) }
/**
* Holds if this condition controls `block`, meaning that `block` is only
* entered if the value of this condition is `testIsTrue`.
*
* Illustration:
*
* ```
* [ (testIsTrue) ]
* [ this ----------------succ ---- controlled ]
* [ | | ]
* [ (testIsFalse) | ------ ... ]
* [ other ]
* ```
*
* The predicate holds if all paths to `controlled` go via the `testIsTrue`
* edge of the control-flow graph. In other words, the `testIsTrue` edge
* must dominate `controlled`. This means that `controlled` must be
* dominated by both `this` and `succ` (the target of the `testIsTrue`
* edge). It also means that any other edge into `succ` must be a back-edge
* from a node which is dominated by `succ`.
*
* The short-circuit boolean operations have slightly surprising behavior
* here: because the operation itself only dominates one branch (due to
* being short-circuited) then it will only control blocks dominated by the
* true (for `&&`) or false (for `||`) branch.
*/
cached
predicate controls(BasicBlock controlled, boolean testIsTrue) {
// This condition must determine the flow of control; that is, this
// node must be a top-level condition.
this.controlsBlock(controlled, testIsTrue)
or
exists(BinaryLogicalOperation binop, GuardCondition lhs, GuardCondition rhs |
this = binop and
lhs = binop.getLeftOperand() and
rhs = binop.getRightOperand() and
lhs.controls(controlled, testIsTrue) and
rhs.controls(controlled, testIsTrue)
)
or
exists(GuardCondition ne, GuardCondition operand |
this = operand and
operand = ne.(NotExpr).getOperand() and
ne.controls(controlled, testIsTrue.booleanNot())
)
}
/** Holds if (determined by this guard) `left < right + k` evaluates to `isLessThan` if this expression evaluates to `testIsTrue`. */
cached
predicate comparesLt(Expr left, Expr right, int k, boolean isLessThan, boolean testIsTrue) {
compares_lt(this, left, right, k, isLessThan, testIsTrue)
}
/**
* Holds if (determined by this guard) `left < right + k` must be `isLessThan` in `block`.
* If `isLessThan = false` then this implies `left >= right + k`.
*/
cached
predicate ensuresLt(Expr left, Expr right, int k, BasicBlock block, boolean isLessThan) {
exists(boolean testIsTrue |
compares_lt(this, left, right, k, isLessThan, testIsTrue) and this.controls(block, testIsTrue)
)
}
/** Holds if (determined by this guard) `left == right + k` evaluates to `areEqual` if this expression evaluates to `testIsTrue`. */
cached
predicate comparesEq(Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue) {
compares_eq(this, left, right, k, areEqual, testIsTrue)
}
/**
* Holds if (determined by this guard) `left == right + k` must be `areEqual` in `block`.
* If `areEqual = false` then this implies `left != right + k`.
*/
cached
predicate ensuresEq(Expr left, Expr right, int k, BasicBlock block, boolean areEqual) {
exists(boolean testIsTrue |
compares_eq(this, left, right, k, areEqual, testIsTrue) and this.controls(block, testIsTrue)
)
}
/**
* Holds if this condition controls `block`, meaning that `block` is only
* entered if the value of this condition is `testIsTrue`. This helper
* predicate does not necessarily hold for binary logical operations like
* `&&` and `||`. See the detailed explanation on predicate `controls`.
*/
private predicate controlsBlock(BasicBlock controlled, boolean testIsTrue) {
exists(BasicBlock thisblock | thisblock.contains(this) |
exists(BasicBlock succ |
testIsTrue = true and succ = this.getATrueSuccessor()
or
testIsTrue = false and succ = this.getAFalseSuccessor()
|
bbDominates(succ, controlled) and
forall(BasicBlock pred | pred.getASuccessor() = succ |
pred = thisblock or bbDominates(succ, pred) or not reachable(pred)
)
)
)
}
}
private predicate is_condition(Expr guard) {
guard.isCondition()
or
is_condition(guard.(BinaryLogicalOperation).getAnOperand())
or
exists(NotExpr cond | is_condition(cond) and cond.getOperand() = guard)
}
/*
* Simplification of equality expressions:
* Simplify conditions in the source to the canonical form l op r + k.
*/
/**
* Holds if `left == right + k` is `areEqual` given that test is `testIsTrue`.
*
* Beware making mistaken logical implications here relating `areEqual` and `testIsTrue`.
*/
private predicate compares_eq(
Expr test, Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue
) {
/* The simple case where the test *is* the comparison so areEqual = testIsTrue xor eq. */
exists(boolean eq | simple_comparison_eq(test, left, right, k, eq) |
areEqual = true and testIsTrue = eq
or
areEqual = false and testIsTrue = eq.booleanNot()
)
or
logical_comparison_eq(test, left, right, k, areEqual, testIsTrue)
or
/* a == b + k => b == a - k */
exists(int mk | k = -mk | compares_eq(test, right, left, mk, areEqual, testIsTrue))
or
complex_eq(test, left, right, k, areEqual, testIsTrue)
or
/* (x is true => (left == right + k)) => (!x is false => (left == right + k)) */
exists(boolean isFalse | testIsTrue = isFalse.booleanNot() |
compares_eq(test.(NotExpr).getOperand(), left, right, k, areEqual, isFalse)
)
}
/**
* If `test => part` and `part => left == right + k` then `test => left == right + k`.
* Similarly for the case where `test` is false.
*/
private predicate logical_comparison_eq(
BinaryLogicalOperation test, Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue
) {
exists(boolean partIsTrue, Expr part | test.impliesValue(part, partIsTrue, testIsTrue) |
compares_eq(part, left, right, k, areEqual, partIsTrue)
)
}
/** Rearrange various simple comparisons into `left == right + k` form. */
private predicate simple_comparison_eq(
ComparisonOperation cmp, Expr left, Expr right, int k, boolean areEqual
) {
left = cmp.getLeftOperand() and
cmp.getOperator() = "==" and
right = cmp.getRightOperand() and
k = 0 and
areEqual = true
or
left = cmp.getLeftOperand() and
cmp.getOperator() = "!=" and
right = cmp.getRightOperand() and
k = 0 and
areEqual = false
}
private predicate complex_eq(
ComparisonOperation cmp, Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue
) {
sub_eq(cmp, left, right, k, areEqual, testIsTrue)
or
add_eq(cmp, left, right, k, areEqual, testIsTrue)
}
// left - x == right + c => left == right + (c+x)
// left == (right - x) + c => left == right + (c-x)
private predicate sub_eq(
ComparisonOperation cmp, Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue
) {
exists(SubExpr lhs, int c, int x |
compares_eq(cmp, lhs, right, c, areEqual, testIsTrue) and
left = lhs.getLeftOperand() and
x = int_value(lhs.getRightOperand()) and
k = c + x
)
or
exists(SubExpr rhs, int c, int x |
compares_eq(cmp, left, rhs, c, areEqual, testIsTrue) and
right = rhs.getLeftOperand() and
x = int_value(rhs.getRightOperand()) and
k = c - x
)
}
// left + x == right + c => left == right + (c-x)
// left == (right + x) + c => left == right + (c+x)
private predicate add_eq(
ComparisonOperation cmp, Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue
) {
exists(AddExpr lhs, int c, int x |
compares_eq(cmp, lhs, right, c, areEqual, testIsTrue) and
(
left = lhs.getLeftOperand() and x = int_value(lhs.getRightOperand())
or
left = lhs.getRightOperand() and x = int_value(lhs.getLeftOperand())
) and
k = c - x
)
or
exists(AddExpr rhs, int c, int x |
compares_eq(cmp, left, rhs, c, areEqual, testIsTrue) and
(
right = rhs.getLeftOperand() and x = int_value(rhs.getRightOperand())
or
right = rhs.getRightOperand() and x = int_value(rhs.getLeftOperand())
) and
k = c + x
)
}
/*
* Simplification of inequality expressions:
* Simplify conditions in the source to the canonical form l < r + k.
*/
/** Holds if `left < right + k` evaluates to `isLt` given that test is `testIsTrue`. */
private predicate compares_lt(
Expr test, Expr left, Expr right, int k, boolean isLt, boolean testIsTrue
) {
/* In the simple case, the test is the comparison, so isLt = testIsTrue */
simple_comparison_lt(test, left, right, k) and isLt = true and testIsTrue = true
or
simple_comparison_lt(test, left, right, k) and isLt = false and testIsTrue = false
or
logical_comparison_lt(test, left, right, k, isLt, testIsTrue)
or
complex_lt(test, left, right, k, isLt, testIsTrue)
or
/* (not (left < right + k)) => (left >= right + k) */
exists(boolean isGe | isLt = isGe.booleanNot() |
compares_ge(test, left, right, k, isGe, testIsTrue)
)
or
/* (x is true => (left < right + k)) => (!x is false => (left < right + k)) */
exists(boolean isFalse | testIsTrue = isFalse.booleanNot() |
compares_lt(test.(NotExpr).getOperand(), left, right, k, isLt, isFalse)
)
}
/** `(a < b + k) => (b > a - k) => (b >= a + (1-k))` */
private predicate compares_ge(
Expr test, Expr left, Expr right, int k, boolean isGe, boolean testIsTrue
) {
exists(int onemk | k = 1 - onemk | compares_lt(test, right, left, onemk, isGe, testIsTrue))
}
/**
* If `test => part` and `part => left < right + k` then `test => left < right + k`.
* Similarly for the case where `test` evaluates false.
*/
private predicate logical_comparison_lt(
BinaryLogicalOperation test, Expr left, Expr right, int k, boolean isLt, boolean testIsTrue
) {
exists(boolean partIsTrue, Expr part | test.impliesValue(part, partIsTrue, testIsTrue) |
compares_lt(part, left, right, k, isLt, partIsTrue)
)
}
/** Rearrange various simple comparisons into `left < right + k` form. */
private predicate simple_comparison_lt(ComparisonOperation cmp, Expr left, Expr right, int k) {
left = cmp.getLeftOperand() and
cmp.getOperator() = "<" and
right = cmp.getRightOperand() and
k = 0
or
left = cmp.getLeftOperand() and
cmp.getOperator() = "<=" and
right = cmp.getRightOperand() and
k = 1
or
right = cmp.getLeftOperand() and
cmp.getOperator() = ">" and
left = cmp.getRightOperand() and
k = 0
or
right = cmp.getLeftOperand() and
cmp.getOperator() = ">=" and
left = cmp.getRightOperand() and
k = 1
}
private predicate complex_lt(
ComparisonOperation cmp, Expr left, Expr right, int k, boolean isLt, boolean testIsTrue
) {
sub_lt(cmp, left, right, k, isLt, testIsTrue)
or
add_lt(cmp, left, right, k, isLt, testIsTrue)
}
// left - x < right + c => left < right + (c+x)
// left < (right - x) + c => left < right + (c-x)
private predicate sub_lt(
ComparisonOperation cmp, Expr left, Expr right, int k, boolean isLt, boolean testIsTrue
) {
exists(SubExpr lhs, int c, int x |
compares_lt(cmp, lhs, right, c, isLt, testIsTrue) and
left = lhs.getLeftOperand() and
x = int_value(lhs.getRightOperand()) and
k = c + x
)
or
exists(SubExpr rhs, int c, int x |
compares_lt(cmp, left, rhs, c, isLt, testIsTrue) and
right = rhs.getLeftOperand() and
x = int_value(rhs.getRightOperand()) and
k = c - x
)
}
// left + x < right + c => left < right + (c-x)
// left < (right + x) + c => left < right + (c+x)
private predicate add_lt(
ComparisonOperation cmp, Expr left, Expr right, int k, boolean isLt, boolean testIsTrue
) {
exists(AddExpr lhs, int c, int x |
compares_lt(cmp, lhs, right, c, isLt, testIsTrue) and
(
left = lhs.getLeftOperand() and x = int_value(lhs.getRightOperand())
or
left = lhs.getRightOperand() and x = int_value(lhs.getLeftOperand())
) and
k = c - x
)
or
exists(AddExpr rhs, int c, int x |
compares_lt(cmp, left, rhs, c, isLt, testIsTrue) and
(
right = rhs.getLeftOperand() and x = int_value(rhs.getRightOperand())
or
right = rhs.getRightOperand() and x = int_value(rhs.getLeftOperand())
) and
k = c + x
)
}
/** The `int` value of integer constant expression. */
private int int_value(Expr e) {
e.getUnderlyingType() instanceof IntegralType and
result = e.getValue().toInt()
}
/** An `SsaDefinition` with an additional predicate `isLt`. */
class GuardedSsa extends SsaDefinition {
/** Holds if this `SsaDefinition` is guarded such that `this(var) < gt + k` is `testIsTrue` in `block`. */
predicate isLt(StackVariable var, Expr gt, int k, BasicBlock block, boolean testIsTrue) {
exists(Expr luse, GuardCondition test | this.getAUse(var) = luse |
test.ensuresLt(luse, gt, k, block, testIsTrue)
)
}
}

View File

@@ -0,0 +1,703 @@
/**
* Provides classes and predicates for reasoning about guards and the control
* flow elements controlled by those guards.
*/
import cpp
import semmle.code.cpp.ir.IR
/**
* Holds if `block` consists of an `UnreachedInstruction`.
*
* We avoiding reporting an unreached block as being controlled by a guard. The unreached block
* has the AST for the `Function` itself, which tends to confuse mapping between the AST `BasicBlock`
* and the `IRBlock`.
*/
pragma[noinline]
private predicate isUnreachedBlock(IRBlock block) {
block.getFirstInstruction() instanceof UnreachedInstruction
}
/**
* A Boolean condition in the AST that guards one or more basic blocks. This includes
* operands of logical operators but not switch statements.
*/
cached
class GuardCondition extends Expr {
cached
GuardCondition() {
exists(IRGuardCondition ir | this = ir.getUnconvertedResultExpression())
or
// no binary operators in the IR
exists(GuardCondition gc | this.(BinaryLogicalOperation).getAnOperand() = gc)
or
// the IR short-circuits if(!x)
// don't produce a guard condition for `y = !x` and other non-short-circuited cases
not exists(Instruction inst | this.getFullyConverted() = inst.getAST()) and
exists(IRGuardCondition ir | this.(NotExpr).getOperand() = ir.getAST())
}
/**
* Holds if this condition controls `controlled`, meaning that `controlled` is only
* entered if the value of this condition is `testIsTrue`.
*
* Illustration:
*
* ```
* [ (testIsTrue) ]
* [ this ----------------succ ---- controlled ]
* [ | | ]
* [ (testIsFalse) | ------ ... ]
* [ other ]
* ```
*
* The predicate holds if all paths to `controlled` go via the `testIsTrue`
* edge of the control-flow graph. In other words, the `testIsTrue` edge
* must dominate `controlled`. This means that `controlled` must be
* dominated by both `this` and `succ` (the target of the `testIsTrue`
* edge). It also means that any other edge into `succ` must be a back-edge
* from a node which is dominated by `succ`.
*
* The short-circuit boolean operations have slightly surprising behavior
* here: because the operation itself only dominates one branch (due to
* being short-circuited) then it will only control blocks dominated by the
* true (for `&&`) or false (for `||`) branch.
*/
cached
predicate controls(BasicBlock controlled, boolean testIsTrue) { none() }
/** Holds if (determined by this guard) `left < right + k` evaluates to `isLessThan` if this expression evaluates to `testIsTrue`. */
cached
predicate comparesLt(Expr left, Expr right, int k, boolean isLessThan, boolean testIsTrue) {
none()
}
/**
* Holds if (determined by this guard) `left < right + k` must be `isLessThan` in `block`.
* If `isLessThan = false` then this implies `left >= right + k`.
*/
cached
predicate ensuresLt(Expr left, Expr right, int k, BasicBlock block, boolean isLessThan) { none() }
/** Holds if (determined by this guard) `left == right + k` evaluates to `areEqual` if this expression evaluates to `testIsTrue`. */
cached
predicate comparesEq(Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue) {
none()
}
/**
* Holds if (determined by this guard) `left == right + k` must be `areEqual` in `block`.
* If `areEqual = false` then this implies `left != right + k`.
*/
cached
predicate ensuresEq(Expr left, Expr right, int k, BasicBlock block, boolean areEqual) { none() }
}
/**
* A binary logical operator in the AST that guards one or more basic blocks.
*/
private class GuardConditionFromBinaryLogicalOperator extends GuardCondition {
GuardConditionFromBinaryLogicalOperator() {
exists(GuardCondition gc | this.(BinaryLogicalOperation).getAnOperand() = gc)
}
override predicate controls(BasicBlock controlled, boolean testIsTrue) {
exists(BinaryLogicalOperation binop, GuardCondition lhs, GuardCondition rhs |
this = binop and
lhs = binop.getLeftOperand() and
rhs = binop.getRightOperand() and
lhs.controls(controlled, testIsTrue) and
rhs.controls(controlled, testIsTrue)
)
}
override predicate comparesLt(Expr left, Expr right, int k, boolean isLessThan, boolean testIsTrue) {
exists(boolean partIsTrue, GuardCondition part |
this.(BinaryLogicalOperation).impliesValue(part, partIsTrue, testIsTrue)
|
part.comparesLt(left, right, k, isLessThan, partIsTrue)
)
}
override predicate ensuresLt(Expr left, Expr right, int k, BasicBlock block, boolean isLessThan) {
exists(boolean testIsTrue |
comparesLt(left, right, k, isLessThan, testIsTrue) and this.controls(block, testIsTrue)
)
}
override predicate comparesEq(Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue) {
exists(boolean partIsTrue, GuardCondition part |
this.(BinaryLogicalOperation).impliesValue(part, partIsTrue, testIsTrue)
|
part.comparesEq(left, right, k, areEqual, partIsTrue)
)
}
override predicate ensuresEq(Expr left, Expr right, int k, BasicBlock block, boolean areEqual) {
exists(boolean testIsTrue |
comparesEq(left, right, k, areEqual, testIsTrue) and this.controls(block, testIsTrue)
)
}
}
/**
* A `!` operator in the AST that guards one or more basic blocks, and does not have a corresponding
* IR instruction.
*/
private class GuardConditionFromShortCircuitNot extends GuardCondition, NotExpr {
GuardConditionFromShortCircuitNot() {
not exists(Instruction inst | this.getFullyConverted() = inst.getAST()) and
exists(IRGuardCondition ir | getOperand() = ir.getAST())
}
override predicate controls(BasicBlock controlled, boolean testIsTrue) {
getOperand().(GuardCondition).controls(controlled, testIsTrue.booleanNot())
}
override predicate comparesLt(Expr left, Expr right, int k, boolean isLessThan, boolean testIsTrue) {
getOperand().(GuardCondition).comparesLt(left, right, k, isLessThan, testIsTrue.booleanNot())
}
override predicate ensuresLt(Expr left, Expr right, int k, BasicBlock block, boolean isLessThan) {
getOperand().(GuardCondition).ensuresLt(left, right, k, block, isLessThan.booleanNot())
}
override predicate comparesEq(Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue) {
getOperand().(GuardCondition).comparesEq(left, right, k, areEqual, testIsTrue.booleanNot())
}
override predicate ensuresEq(Expr left, Expr right, int k, BasicBlock block, boolean areEqual) {
getOperand().(GuardCondition).ensuresEq(left, right, k, block, areEqual.booleanNot())
}
}
/**
* A Boolean condition in the AST that guards one or more basic blocks and has a corresponding IR
* instruction.
*/
private class GuardConditionFromIR extends GuardCondition {
IRGuardCondition ir;
GuardConditionFromIR() { this = ir.getUnconvertedResultExpression() }
override predicate controls(BasicBlock controlled, boolean testIsTrue) {
// This condition must determine the flow of control; that is, this
// node must be a top-level condition.
this.controlsBlock(controlled, testIsTrue)
}
/** Holds if (determined by this guard) `left < right + k` evaluates to `isLessThan` if this expression evaluates to `testIsTrue`. */
override predicate comparesLt(Expr left, Expr right, int k, boolean isLessThan, boolean testIsTrue) {
exists(Instruction li, Instruction ri |
li.getUnconvertedResultExpression() = left and
ri.getUnconvertedResultExpression() = right and
ir.comparesLt(li.getAUse(), ri.getAUse(), k, isLessThan, testIsTrue)
)
}
/**
* Holds if (determined by this guard) `left < right + k` must be `isLessThan` in `block`.
* If `isLessThan = false` then this implies `left >= right + k`.
*/
override predicate ensuresLt(Expr left, Expr right, int k, BasicBlock block, boolean isLessThan) {
exists(Instruction li, Instruction ri, boolean testIsTrue |
li.getUnconvertedResultExpression() = left and
ri.getUnconvertedResultExpression() = right and
ir.comparesLt(li.getAUse(), ri.getAUse(), k, isLessThan, testIsTrue) and
this.controls(block, testIsTrue)
)
}
/** Holds if (determined by this guard) `left == right + k` evaluates to `areEqual` if this expression evaluates to `testIsTrue`. */
override predicate comparesEq(Expr left, Expr right, int k, boolean areEqual, boolean testIsTrue) {
exists(Instruction li, Instruction ri |
li.getUnconvertedResultExpression() = left and
ri.getUnconvertedResultExpression() = right and
ir.comparesEq(li.getAUse(), ri.getAUse(), k, areEqual, testIsTrue)
)
}
/**
* Holds if (determined by this guard) `left == right + k` must be `areEqual` in `block`.
* If `areEqual = false` then this implies `left != right + k`.
*/
override predicate ensuresEq(Expr left, Expr right, int k, BasicBlock block, boolean areEqual) {
exists(Instruction li, Instruction ri, boolean testIsTrue |
li.getUnconvertedResultExpression() = left and
ri.getUnconvertedResultExpression() = right and
ir.comparesEq(li.getAUse(), ri.getAUse(), k, areEqual, testIsTrue) and
this.controls(block, testIsTrue)
)
}
/**
* Holds if this condition controls `block`, meaning that `block` is only
* entered if the value of this condition is `testIsTrue`. This helper
* predicate does not necessarily hold for binary logical operations like
* `&&` and `||`. See the detailed explanation on predicate `controls`.
*/
private predicate controlsBlock(BasicBlock controlled, boolean testIsTrue) {
exists(IRBlock irb |
forex(IRGuardCondition inst | inst = ir | inst.controls(irb, testIsTrue)) and
irb.getAnInstruction().getAST().(ControlFlowNode).getBasicBlock() = controlled and
not isUnreachedBlock(irb)
)
}
}
/**
* A Boolean condition in the IR that guards one or more basic blocks. This includes
* operands of logical operators but not switch statements. Note that `&&` and `||`
* don't have an explicit representation in the IR, and therefore will not appear as
* IRGuardConditions.
*/
cached
class IRGuardCondition extends Instruction {
ConditionalBranchInstruction branch;
cached
IRGuardCondition() { branch = get_branch_for_condition(this) }
/**
* Holds if this condition controls `controlled`, meaning that `controlled` is only
* entered if the value of this condition is `testIsTrue`.
*
* Illustration:
*
* ```
* [ (testIsTrue) ]
* [ this ----------------succ ---- controlled ]
* [ | | ]
* [ (testIsFalse) | ------ ... ]
* [ other ]
* ```
*
* The predicate holds if all paths to `controlled` go via the `testIsTrue`
* edge of the control-flow graph. In other words, the `testIsTrue` edge
* must dominate `controlled`. This means that `controlled` must be
* dominated by both `this` and `succ` (the target of the `testIsTrue`
* edge). It also means that any other edge into `succ` must be a back-edge
* from a node which is dominated by `succ`.
*
* The short-circuit boolean operations have slightly surprising behavior
* here: because the operation itself only dominates one branch (due to
* being short-circuited) then it will only control blocks dominated by the
* true (for `&&`) or false (for `||`) branch.
*/
cached
predicate controls(IRBlock controlled, boolean testIsTrue) {
// This condition must determine the flow of control; that is, this
// node must be a top-level condition.
this.controlsBlock(controlled, testIsTrue)
or
exists(IRGuardCondition ne |
this = ne.(LogicalNotInstruction).getUnary() and
ne.controls(controlled, testIsTrue.booleanNot())
)
}
/**
* Holds if the control-flow edge `(pred, succ)` may be taken only if
* the value of this condition is `testIsTrue`.
*/
cached
predicate controlsEdge(IRBlock pred, IRBlock succ, boolean testIsTrue) {
pred.getASuccessor() = succ and
controls(pred, testIsTrue)
or
succ = getBranchSuccessor(testIsTrue) and
branch.getCondition() = this and
branch.getBlock() = pred
}
/**
* Gets the block to which `branch` jumps directly when this condition is `testIsTrue`.
*
* This predicate is intended to help with situations in which an inference can only be made
* based on an edge between a block with multiple successors and a block with multiple
* predecessors. For example, in the following situation, an inference can be made about the
* value of `x` at the end of the `if` statement, but there is no block which is controlled by
* the `if` statement when `x >= y`.
* ```
* if (x < y) {
* x = y;
* }
* return x;
* ```
*/
private IRBlock getBranchSuccessor(boolean testIsTrue) {
branch.getCondition() = this and
(
testIsTrue = true and
result.getFirstInstruction() = branch.getTrueSuccessor()
or
testIsTrue = false and
result.getFirstInstruction() = branch.getFalseSuccessor()
)
}
/** Holds if (determined by this guard) `left < right + k` evaluates to `isLessThan` if this expression evaluates to `testIsTrue`. */
cached
predicate comparesLt(Operand left, Operand right, int k, boolean isLessThan, boolean testIsTrue) {
compares_lt(this, left, right, k, isLessThan, testIsTrue)
}
/**
* Holds if (determined by this guard) `left < right + k` must be `isLessThan` in `block`.
* If `isLessThan = false` then this implies `left >= right + k`.
*/
cached
predicate ensuresLt(Operand left, Operand right, int k, IRBlock block, boolean isLessThan) {
exists(boolean testIsTrue |
compares_lt(this, left, right, k, isLessThan, testIsTrue) and this.controls(block, testIsTrue)
)
}
/**
* Holds if (determined by this guard) `left < right + k` must be `isLessThan` on the edge from
* `pred` to `succ`. If `isLessThan = false` then this implies `left >= right + k`.
*/
cached
predicate ensuresLtEdge(
Operand left, Operand right, int k, IRBlock pred, IRBlock succ, boolean isLessThan
) {
exists(boolean testIsTrue |
compares_lt(this, left, right, k, isLessThan, testIsTrue) and
this.controlsEdge(pred, succ, testIsTrue)
)
}
/** Holds if (determined by this guard) `left == right + k` evaluates to `areEqual` if this expression evaluates to `testIsTrue`. */
cached
predicate comparesEq(Operand left, Operand right, int k, boolean areEqual, boolean testIsTrue) {
compares_eq(this, left, right, k, areEqual, testIsTrue)
}
/**
* Holds if (determined by this guard) `left == right + k` must be `areEqual` in `block`.
* If `areEqual = false` then this implies `left != right + k`.
*/
cached
predicate ensuresEq(Operand left, Operand right, int k, IRBlock block, boolean areEqual) {
exists(boolean testIsTrue |
compares_eq(this, left, right, k, areEqual, testIsTrue) and this.controls(block, testIsTrue)
)
}
/**
* Holds if (determined by this guard) `left == right + k` must be `areEqual` on the edge from
* `pred` to `succ`. If `areEqual = false` then this implies `left != right + k`.
*/
cached
predicate ensuresEqEdge(
Operand left, Operand right, int k, IRBlock pred, IRBlock succ, boolean areEqual
) {
exists(boolean testIsTrue |
compares_eq(this, left, right, k, areEqual, testIsTrue) and
this.controlsEdge(pred, succ, testIsTrue)
)
}
/**
* Holds if this condition controls `block`, meaning that `block` is only
* entered if the value of this condition is `testIsTrue`. This helper
* predicate does not necessarily hold for binary logical operations like
* `&&` and `||`. See the detailed explanation on predicate `controls`.
*/
private predicate controlsBlock(IRBlock controlled, boolean testIsTrue) {
not isUnreachedBlock(controlled) and
//
// For this block to control the block `controlled` with `testIsTrue` the
// following must hold: Execution must have passed through the test; that
// is, `this` must strictly dominate `controlled`. Execution must have
// passed through the `testIsTrue` edge leaving `this`.
//
// Although "passed through the true edge" implies that
// `getBranchSuccessor(true)` dominates `controlled`, the reverse is not
// true, as flow may have passed through another edge to get to
// `getBranchSuccessor(true)`, so we need to assert that
// `getBranchSuccessor(true)` dominates `controlled` *and* that all
// predecessors of `getBranchSuccessor(true)` are either `this` or
// dominated by `getBranchSuccessor(true)`.
//
// For example, in the following snippet:
//
// if (x)
// controlled;
// false_successor;
// uncontrolled;
//
// `false_successor` dominates `uncontrolled`, but not all of its
// predecessors are `this` (`if (x)`) or dominated by itself. Whereas in
// the following code:
//
// if (x)
// while (controlled)
// also_controlled;
// false_successor;
// uncontrolled;
//
// the block `while (controlled)` is controlled because all of its
// predecessors are `this` (`if (x)`) or (in the case of `also_controlled`)
// dominated by itself.
//
// The additional constraint on the predecessors of the test successor implies
// that `this` strictly dominates `controlled` so that isn't necessary to check
// directly.
exists(IRBlock succ |
succ = this.getBranchSuccessor(testIsTrue) and
this.hasDominatingEdgeTo(succ) and
succ.dominates(controlled)
)
}
/**
* Holds if `(this, succ)` is an edge that dominates `succ`, that is, all other
* predecessors of `succ` are dominated by `succ`. This implies that `this` is the
* immediate dominator of `succ`.
*
* This is a necessary and sufficient condition for an edge to dominate anything,
* and in particular `bb1.hasDominatingEdgeTo(bb2) and bb2.dominates(bb3)` means
* that the edge `(bb1, bb2)` dominates `bb3`.
*/
private predicate hasDominatingEdgeTo(IRBlock succ) {
exists(IRBlock branchBlock | branchBlock = this.getBranchBlock() |
branchBlock.immediatelyDominates(succ) and
branchBlock.getASuccessor() = succ and
forall(IRBlock pred | pred = succ.getAPredecessor() and pred != branchBlock |
succ.dominates(pred)
or
// An unreachable `pred` is vacuously dominated by `succ` since all
// paths from the entry to `pred` go through `succ`. Such vacuous
// dominance is not included in the `dominates` predicate since that
// could cause quadratic blow-up.
not pred.isReachableFromFunctionEntry()
)
)
}
pragma[noinline]
private IRBlock getBranchBlock() { result = branch.getBlock() }
}
private ConditionalBranchInstruction get_branch_for_condition(Instruction guard) {
result.getCondition() = guard
or
exists(LogicalNotInstruction cond |
result = get_branch_for_condition(cond) and cond.getUnary() = guard
)
}
/**
* Holds if `left == right + k` is `areEqual` given that test is `testIsTrue`.
*
* Beware making mistaken logical implications here relating `areEqual` and `testIsTrue`.
*/
private predicate compares_eq(
Instruction test, Operand left, Operand right, int k, boolean areEqual, boolean testIsTrue
) {
/* The simple case where the test *is* the comparison so areEqual = testIsTrue xor eq. */
exists(boolean eq | simple_comparison_eq(test, left, right, k, eq) |
areEqual = true and testIsTrue = eq
or
areEqual = false and testIsTrue = eq.booleanNot()
)
or
// I think this is handled by forwarding in controlsBlock.
//or
//logical_comparison_eq(test, left, right, k, areEqual, testIsTrue)
/* a == b + k => b == a - k */
exists(int mk | k = -mk | compares_eq(test, right, left, mk, areEqual, testIsTrue))
or
complex_eq(test, left, right, k, areEqual, testIsTrue)
or
/* (x is true => (left == right + k)) => (!x is false => (left == right + k)) */
exists(boolean isFalse | testIsTrue = isFalse.booleanNot() |
compares_eq(test.(LogicalNotInstruction).getUnary(), left, right, k, areEqual, isFalse)
)
}
/** Rearrange various simple comparisons into `left == right + k` form. */
private predicate simple_comparison_eq(
CompareInstruction cmp, Operand left, Operand right, int k, boolean areEqual
) {
left = cmp.getLeftOperand() and
cmp instanceof CompareEQInstruction and
right = cmp.getRightOperand() and
k = 0 and
areEqual = true
or
left = cmp.getLeftOperand() and
cmp instanceof CompareNEInstruction and
right = cmp.getRightOperand() and
k = 0 and
areEqual = false
}
private predicate complex_eq(
CompareInstruction cmp, Operand left, Operand right, int k, boolean areEqual, boolean testIsTrue
) {
sub_eq(cmp, left, right, k, areEqual, testIsTrue)
or
add_eq(cmp, left, right, k, areEqual, testIsTrue)
}
/*
* Simplification of inequality expressions
* Simplify conditions in the source to the canonical form l < r + k.
*/
/** Holds if `left < right + k` evaluates to `isLt` given that test is `testIsTrue`. */
private predicate compares_lt(
Instruction test, Operand left, Operand right, int k, boolean isLt, boolean testIsTrue
) {
/* In the simple case, the test is the comparison, so isLt = testIsTrue */
simple_comparison_lt(test, left, right, k) and isLt = true and testIsTrue = true
or
simple_comparison_lt(test, left, right, k) and isLt = false and testIsTrue = false
or
complex_lt(test, left, right, k, isLt, testIsTrue)
or
/* (not (left < right + k)) => (left >= right + k) */
exists(boolean isGe | isLt = isGe.booleanNot() |
compares_ge(test, left, right, k, isGe, testIsTrue)
)
or
/* (x is true => (left < right + k)) => (!x is false => (left < right + k)) */
exists(boolean isFalse | testIsTrue = isFalse.booleanNot() |
compares_lt(test.(LogicalNotInstruction).getUnary(), left, right, k, isLt, isFalse)
)
}
/** `(a < b + k) => (b > a - k) => (b >= a + (1-k))` */
private predicate compares_ge(
Instruction test, Operand left, Operand right, int k, boolean isGe, boolean testIsTrue
) {
exists(int onemk | k = 1 - onemk | compares_lt(test, right, left, onemk, isGe, testIsTrue))
}
/** Rearrange various simple comparisons into `left < right + k` form. */
private predicate simple_comparison_lt(CompareInstruction cmp, Operand left, Operand right, int k) {
left = cmp.getLeftOperand() and
cmp instanceof CompareLTInstruction and
right = cmp.getRightOperand() and
k = 0
or
left = cmp.getLeftOperand() and
cmp instanceof CompareLEInstruction and
right = cmp.getRightOperand() and
k = 1
or
right = cmp.getLeftOperand() and
cmp instanceof CompareGTInstruction and
left = cmp.getRightOperand() and
k = 0
or
right = cmp.getLeftOperand() and
cmp instanceof CompareGEInstruction and
left = cmp.getRightOperand() and
k = 1
}
private predicate complex_lt(
CompareInstruction cmp, Operand left, Operand right, int k, boolean isLt, boolean testIsTrue
) {
sub_lt(cmp, left, right, k, isLt, testIsTrue)
or
add_lt(cmp, left, right, k, isLt, testIsTrue)
}
// left - x < right + c => left < right + (c+x)
// left < (right - x) + c => left < right + (c-x)
private predicate sub_lt(
CompareInstruction cmp, Operand left, Operand right, int k, boolean isLt, boolean testIsTrue
) {
exists(SubInstruction lhs, int c, int x |
compares_lt(cmp, lhs.getAUse(), right, c, isLt, testIsTrue) and
left = lhs.getLeftOperand() and
x = int_value(lhs.getRight()) and
k = c + x
)
or
exists(SubInstruction rhs, int c, int x |
compares_lt(cmp, left, rhs.getAUse(), c, isLt, testIsTrue) and
right = rhs.getLeftOperand() and
x = int_value(rhs.getRight()) and
k = c - x
)
}
// left + x < right + c => left < right + (c-x)
// left < (right + x) + c => left < right + (c+x)
private predicate add_lt(
CompareInstruction cmp, Operand left, Operand right, int k, boolean isLt, boolean testIsTrue
) {
exists(AddInstruction lhs, int c, int x |
compares_lt(cmp, lhs.getAUse(), right, c, isLt, testIsTrue) and
(
left = lhs.getLeftOperand() and x = int_value(lhs.getRight())
or
left = lhs.getRightOperand() and x = int_value(lhs.getLeft())
) and
k = c - x
)
or
exists(AddInstruction rhs, int c, int x |
compares_lt(cmp, left, rhs.getAUse(), c, isLt, testIsTrue) and
(
right = rhs.getLeftOperand() and x = int_value(rhs.getRight())
or
right = rhs.getRightOperand() and x = int_value(rhs.getLeft())
) and
k = c + x
)
}
// left - x == right + c => left == right + (c+x)
// left == (right - x) + c => left == right + (c-x)
private predicate sub_eq(
CompareInstruction cmp, Operand left, Operand right, int k, boolean areEqual, boolean testIsTrue
) {
exists(SubInstruction lhs, int c, int x |
compares_eq(cmp, lhs.getAUse(), right, c, areEqual, testIsTrue) and
left = lhs.getLeftOperand() and
x = int_value(lhs.getRight()) and
k = c + x
)
or
exists(SubInstruction rhs, int c, int x |
compares_eq(cmp, left, rhs.getAUse(), c, areEqual, testIsTrue) and
right = rhs.getLeftOperand() and
x = int_value(rhs.getRight()) and
k = c - x
)
}
// left + x == right + c => left == right + (c-x)
// left == (right + x) + c => left == right + (c+x)
private predicate add_eq(
CompareInstruction cmp, Operand left, Operand right, int k, boolean areEqual, boolean testIsTrue
) {
exists(AddInstruction lhs, int c, int x |
compares_eq(cmp, lhs.getAUse(), right, c, areEqual, testIsTrue) and
(
left = lhs.getLeftOperand() and x = int_value(lhs.getRight())
or
left = lhs.getRightOperand() and x = int_value(lhs.getLeft())
) and
k = c - x
)
or
exists(AddInstruction rhs, int c, int x |
compares_eq(cmp, left, rhs.getAUse(), c, areEqual, testIsTrue) and
(
right = rhs.getLeftOperand() and x = int_value(rhs.getRight())
or
right = rhs.getRightOperand() and x = int_value(rhs.getLeft())
) and
k = c + x
)
}
/** The int value of integer constant expression. */
private int int_value(Instruction i) { result = i.(IntegerConstantInstruction).getValue().toInt() }

View File

@@ -0,0 +1,393 @@
/**
* DEPRECATED: Use `StackVariableReachability` instead.
*/
import cpp
/**
* DEPRECATED: Use `StackVariableReachability` instead.
*
* A reachability analysis for control-flow nodes involving stack variables.
* This defines sources, sinks, and any other configurable aspect of the
* analysis. Multiple analyses can coexist. To create an analysis, extend this
* class with a subclass whose characteristic predicate is a unique singleton
* string. For example, write
*
* ```
* class MyAnalysisConfiguration extends LocalScopeVariableReachability {
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
* // Override `isSource` and `isSink`.
* // Override `isBarrier`.
* }
* ```
*
* Then, to query whether there is flow between some source and sink, call the
* `reaches` predicate on an instance of `MyAnalysisConfiguration`.
*/
abstract deprecated class LocalScopeVariableReachability extends string {
bindingset[this]
LocalScopeVariableReachability() { length() >= 0 }
/** Holds if `node` is a source for the reachability analysis using variable `v`. */
abstract predicate isSource(ControlFlowNode node, LocalScopeVariable v);
/** Holds if `sink` is a (potential) sink for the reachability analysis using variable `v`. */
abstract predicate isSink(ControlFlowNode node, LocalScopeVariable v);
/** Holds if `node` is a barrier for the reachability analysis using variable `v`. */
abstract predicate isBarrier(ControlFlowNode node, LocalScopeVariable v);
/**
* Holds if the source node `source` can reach the sink `sink` without crossing
* a barrier. This is (almost) equivalent to the following QL predicate but
* uses basic blocks internally for better performance:
*
* ```
* predicate reaches(ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink) {
* reachesImpl(source, v, sink)
* and
* isSink(sink, v)
* }
*
* predicate reachesImpl(ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink) {
* sink = source.getASuccessor() and isSource(source, v)
* or
* exists(ControlFlowNode mid | reachesImpl(source, v, mid) |
* not isBarrier(mid, v)
* and
* sink = mid.getASuccessor()
* )
* }
* ```
*
* In addition to using a better performing implementation, this analysis
* accounts for loops where the condition is provably true upon entry.
*/
predicate reaches(ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink) {
/*
* Implementation detail: the predicates in this class are a generalization of
* those in DefinitionsAndUses.qll, and should be kept in sync.
*
* Unfortunately, caching of abstract predicates does not work well, so the
* predicates in DefinitionsAndUses.qll cannot use this library.
*/
exists(BasicBlock bb, int i |
isSource(source, v) and
bb.getNode(i) = source and
not bb.isUnreachable()
|
exists(int j |
j > i and
sink = bb.getNode(j) and
isSink(sink, v) and
not exists(int k | isBarrier(bb.getNode(k), v) | k in [i + 1 .. j - 1])
)
or
not exists(int k | isBarrier(bb.getNode(k), v) | k > i) and
bbSuccessorEntryReaches(bb, v, sink, _)
)
}
private predicate bbSuccessorEntryReaches(
BasicBlock bb, SemanticStackVariable v, ControlFlowNode node,
boolean skipsFirstLoopAlwaysTrueUponEntry
) {
exists(BasicBlock succ, boolean succSkipsFirstLoopAlwaysTrueUponEntry |
bbSuccessorEntryReachesLoopInvariant(bb, succ, skipsFirstLoopAlwaysTrueUponEntry,
succSkipsFirstLoopAlwaysTrueUponEntry)
|
bbEntryReachesLocally(succ, v, node) and
succSkipsFirstLoopAlwaysTrueUponEntry = false
or
not isBarrier(succ.getNode(_), v) and
bbSuccessorEntryReaches(succ, v, node, succSkipsFirstLoopAlwaysTrueUponEntry)
)
}
private predicate bbEntryReachesLocally(
BasicBlock bb, SemanticStackVariable v, ControlFlowNode node
) {
exists(int n |
node = bb.getNode(n) and
isSink(node, v)
|
not exists(this.firstBarrierIndexIn(bb, v))
or
n <= this.firstBarrierIndexIn(bb, v)
)
}
private int firstBarrierIndexIn(BasicBlock bb, SemanticStackVariable v) {
result = min(int m | isBarrier(bb.getNode(m), v))
}
}
/**
* Holds if `bb` contains the entry point `loop` for a loop at position `i`.
* The condition of that loop is provably true upon entry but not provably
* true in general (if it were, the false-successor had already been removed
* from the CFG).
*
* Examples:
* ```
* for (int i = 0; i < 2; i++) { } // always true upon entry
* for (int i = 0; true; i++) { } // always true
* ```
*/
private predicate bbLoopEntryConditionAlwaysTrueAt(BasicBlock bb, int i, ControlFlowNode loop) {
exists(Expr condition |
loopConditionAlwaysTrueUponEntry(loop, condition) and
not conditionAlwaysTrue(condition) and
bb.getNode(i) = loop
)
}
/**
* Basic block `pred` contains all or part of the condition belonging to a loop,
* and there is an edge from `pred` to `succ` that concludes the condition.
* If the edge corrseponds with the loop condition being found to be `true`, then
* `skipsLoop` is `false`. Otherwise the edge corresponds with the loop condition
* being found to be `false` and `skipsLoop` is `true`. Non-concluding edges
* within a complex loop condition are not matched by this predicate.
*/
private predicate bbLoopConditionAlwaysTrueUponEntrySuccessor(
BasicBlock pred, BasicBlock succ, boolean skipsLoop
) {
exists(Expr cond |
loopConditionAlwaysTrueUponEntry(_, cond) and
cond.getAChild*() = pred.getEnd() and
succ = pred.getASuccessor() and
not cond.getAChild*() = succ.getStart() and
(
succ = pred.getAFalseSuccessor() and
skipsLoop = true
or
succ = pred.getATrueSuccessor() and
skipsLoop = false
)
)
}
/**
* Loop invariant for `bbSuccessorEntryReaches`:
*
* - `succ` is a successor of `pred`.
* - `predSkipsFirstLoopAlwaysTrueUponEntry`: whether the path from
* `pred` (via `succ`) skips the first loop where the condition is
* provably true upon entry.
* - `succSkipsFirstLoopAlwaysTrueUponEntry`: whether the path from
* `succ` skips the first loop where the condition is provably true
* upon entry.
* - If `pred` contains the entry point of a loop where the condition
* is provably true upon entry, then `succ` is not allowed to skip
* that loop (`succSkipsFirstLoopAlwaysTrueUponEntry = false`).
*/
predicate bbSuccessorEntryReachesLoopInvariant(
BasicBlock pred, BasicBlock succ, boolean predSkipsFirstLoopAlwaysTrueUponEntry,
boolean succSkipsFirstLoopAlwaysTrueUponEntry
) {
succ = pred.getASuccessor() and
(succSkipsFirstLoopAlwaysTrueUponEntry = true or succSkipsFirstLoopAlwaysTrueUponEntry = false) and
(
// The edge from `pred` to `succ` is from a loop condition provably
// true upon entry, so the value of `predSkipsFirstLoopAlwaysTrueUponEntry`
// is determined by whether the true edge or the false edge is chosen,
// regardless of the value of `succSkipsFirstLoopAlwaysTrueUponEntry`.
bbLoopConditionAlwaysTrueUponEntrySuccessor(pred, succ, predSkipsFirstLoopAlwaysTrueUponEntry)
or
// The edge from `pred` to `succ` is _not_ from a loop condition provably
// true upon entry, so the values of `predSkipsFirstLoopAlwaysTrueUponEntry`
// and `succSkipsFirstLoopAlwaysTrueUponEntry` must be the same.
not bbLoopConditionAlwaysTrueUponEntrySuccessor(pred, succ, _) and
succSkipsFirstLoopAlwaysTrueUponEntry = predSkipsFirstLoopAlwaysTrueUponEntry and
// Moreover, if `pred` contains the entry point of a loop where the
// condition is provably true upon entry, then `succ` is not allowed
// to skip that loop, and hence `succSkipsFirstLoopAlwaysTrueUponEntry = false`.
(
bbLoopEntryConditionAlwaysTrueAt(pred, _, _)
implies
succSkipsFirstLoopAlwaysTrueUponEntry = false
)
)
}
/**
* DEPRECATED: Use `StackVariableReachabilityWithReassignment` instead.
*
* Reachability analysis for control-flow nodes involving stack variables.
* Unlike `LocalScopeVariableReachability`, this analysis takes variable
* reassignments into account.
*
* This class is used like `LocalScopeVariableReachability`, except that
* subclasses should override `isSourceActual` and `isSinkActual` instead of
* `isSource` and `isSink`, and that there is a `reachesTo` predicate in
* addition to `reaches`.
*/
abstract deprecated class LocalScopeVariableReachabilityWithReassignment extends LocalScopeVariableReachability {
bindingset[this]
LocalScopeVariableReachabilityWithReassignment() { length() >= 0 }
/** Override this predicate rather than `isSource` (`isSource` is used internally). */
abstract predicate isSourceActual(ControlFlowNode node, LocalScopeVariable v);
/** Override this predicate rather than `isSink` (`isSink` is used internally). */
abstract predicate isSinkActual(ControlFlowNode node, LocalScopeVariable v);
/**
* Holds if the source node `source` can reach the sink `sink` without crossing
* a barrier, taking reassignments into account. This is (almost) equivalent
* to the following QL predicate, but uses basic blocks internally for better
* performance:
*
* ```
* predicate reaches(ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink) {
* reachesImpl(source, v, sink)
* and
* isSinkActual(sink, v)
* }
*
* predicate reachesImpl(ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink) {
* isSourceActual(source, v)
* and
* (
* sink = source.getASuccessor()
* or
* exists(ControlFlowNode mid, SemanticStackVariable v0 | reachesImpl(source, v0, mid) |
* // ordinary successor
* not isBarrier(mid, v) and
* sink = mid.getASuccessor() and
* v = v0
* or
* // reassigned from v0 to v
* exprDefinition(v, mid, v0.getAnAccess()) and
* sink = mid.getASuccessor()
* )
* )
* }
* ```
*
* In addition to using a better performing implementation, this analysis
* accounts for loops where the condition is provably true upon entry.
*/
override predicate reaches(ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink) {
reachesTo(source, v, sink, _)
}
/**
* As `reaches`, but also specifies the last variable it was reassigned to (`v0`).
*/
predicate reachesTo(
ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink, SemanticStackVariable v0
) {
exists(ControlFlowNode def |
actualSourceReaches(source, v, def, v0) and
LocalScopeVariableReachability.super.reaches(def, v0, sink) and
isSinkActual(sink, v0)
)
}
private predicate actualSourceReaches(
ControlFlowNode source, SemanticStackVariable v, ControlFlowNode def, SemanticStackVariable v0
) {
isSourceActual(source, v) and def = source and v0 = v
or
exists(ControlFlowNode source1, SemanticStackVariable v1 |
actualSourceReaches(source, v, source1, v1)
|
reassignment(source1, v1, def, v0)
)
}
private predicate reassignment(
ControlFlowNode source, SemanticStackVariable v, ControlFlowNode def, SemanticStackVariable v0
) {
LocalScopeVariableReachability.super.reaches(source, v, def) and
exprDefinition(v0, def, v.getAnAccess())
}
final override predicate isSource(ControlFlowNode node, LocalScopeVariable v) {
isSourceActual(node, v)
or
// Reassignment generates a new (non-actual) source
reassignment(_, _, node, v)
}
final override predicate isSink(ControlFlowNode node, LocalScopeVariable v) {
isSinkActual(node, v)
or
// Reassignment generates a new (non-actual) sink
exprDefinition(_, node, v.getAnAccess())
}
}
/**
* DEPRECATED: Use `StackVariableReachabilityExt` instead.
*
* Same as `LocalScopeVariableReachability`, but `isBarrier` works on control-flow
* edges rather than nodes and is therefore parameterized by the original
* source node as well. Otherwise, this class is used like
* `LocalScopeVariableReachability`.
*/
abstract deprecated class LocalScopeVariableReachabilityExt extends string {
bindingset[this]
LocalScopeVariableReachabilityExt() { length() >= 0 }
/** `node` is a source for the reachability analysis using variable `v`. */
abstract predicate isSource(ControlFlowNode node, LocalScopeVariable v);
/** `sink` is a (potential) sink for the reachability analysis using variable `v`. */
abstract predicate isSink(ControlFlowNode node, LocalScopeVariable v);
/** `node` is a barrier for the reachability analysis using variable `v` and starting from `source`. */
abstract predicate isBarrier(
ControlFlowNode source, ControlFlowNode node, ControlFlowNode next, LocalScopeVariable v
);
/** See `LocalScopeVariableReachability.reaches`. */
predicate reaches(ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink) {
exists(BasicBlock bb, int i |
isSource(source, v) and
bb.getNode(i) = source and
not bb.isUnreachable()
|
exists(int j |
j > i and
sink = bb.getNode(j) and
isSink(sink, v) and
not exists(int k | isBarrier(source, bb.getNode(k), bb.getNode(k + 1), v) |
k in [i .. j - 1]
)
)
or
not exists(int k | isBarrier(source, bb.getNode(k), bb.getNode(k + 1), v) | k >= i) and
bbSuccessorEntryReaches(source, bb, v, sink, _)
)
}
private predicate bbSuccessorEntryReaches(
ControlFlowNode source, BasicBlock bb, SemanticStackVariable v, ControlFlowNode node,
boolean skipsFirstLoopAlwaysTrueUponEntry
) {
exists(BasicBlock succ, boolean succSkipsFirstLoopAlwaysTrueUponEntry |
bbSuccessorEntryReachesLoopInvariant(bb, succ, skipsFirstLoopAlwaysTrueUponEntry,
succSkipsFirstLoopAlwaysTrueUponEntry) and
not isBarrier(source, bb.getEnd(), succ.getStart(), v)
|
bbEntryReachesLocally(source, succ, v, node) and
succSkipsFirstLoopAlwaysTrueUponEntry = false
or
not exists(int k | isBarrier(source, succ.getNode(k), succ.getNode(k + 1), v)) and
bbSuccessorEntryReaches(source, succ, v, node, succSkipsFirstLoopAlwaysTrueUponEntry)
)
}
private predicate bbEntryReachesLocally(
ControlFlowNode source, BasicBlock bb, SemanticStackVariable v, ControlFlowNode node
) {
isSource(source, v) and
exists(int n | node = bb.getNode(n) and isSink(node, v) |
not exists(int m | m < n | isBarrier(source, bb.getNode(m), bb.getNode(m + 1), v))
)
}
}

View File

@@ -0,0 +1,291 @@
/**
* Provides classes and predicates for working with null values and checks for nullness.
*/
import cpp
import DefinitionsAndUses
/**
* A C/C++ literal whose value is considered null.
*/
abstract class NullValue extends Expr { }
/**
* A C/C++ literal whose value is zero.
*/
class Zero extends NullValue {
Zero() { this.(Literal).getValue() = "0" }
}
/**
* Holds if `var` is null when `checkExpr` evaluates to a true value.
*/
cached
predicate nullCheckExpr(Expr checkExpr, Variable var) {
exists(LocalScopeVariable v, AnalysedExpr expr |
var = v and
checkExpr = expr
|
exists(NotExpr notexpr, AnalysedExpr child |
expr = notexpr and notexpr.getOperand() = child and validCheckExpr(child, v)
)
or
exists(EQExpr eq, AnalysedExpr child, NullValue zero, int i |
expr = eq and
eq.getChild(i) = child and
validCheckExpr(child, v) and
eq.getChild(1 - i) = zero
)
or
exists(NEExpr neq, AnalysedExpr child, NullValue zero, int i |
expr = neq and
neq.getChild(i) = child and
nullCheckExpr(child, v) and
neq.getChild(1 - i) = zero
)
or
exists(LogicalAndExpr op, AnalysedExpr child |
expr = op and
op.getRightOperand() = child and
nullCheckExpr(child, v)
)
or
exists(AssignExpr assign, AnalysedExpr child |
expr = assign and
assign.getRValue() = child and
nullCheckExpr(child, v) and
not expr.isDef(v)
)
or
exists(FunctionCall fc, AnalysedExpr child |
expr = fc and
fc.getTarget().hasGlobalName("__builtin_expect") and
fc.getArgument(0) = child and
nullCheckExpr(child, v)
)
)
}
/**
* Holds if `var` is non-null when `checkExpr` evaluates to a true value.
*/
cached
predicate validCheckExpr(Expr checkExpr, Variable var) {
exists(AnalysedExpr expr, LocalScopeVariable v |
v = var and
expr = checkExpr
|
expr.isUse(v)
or
expr.isDef(v)
or
exists(NotExpr notexpr, AnalysedExpr child |
expr = notexpr and notexpr.getOperand() = child and nullCheckExpr(child, v)
)
or
exists(EQExpr eq, AnalysedExpr child, NullValue zero, int i |
expr = eq and
eq.getChild(i) = child and
nullCheckExpr(child, v) and
eq.getChild(1 - i) = zero
)
or
exists(NEExpr neq, AnalysedExpr child, NullValue zero, int i |
expr = neq and
neq.getChild(i) = child and
validCheckExpr(child, v) and
neq.getChild(1 - i) = zero
)
or
exists(LogicalAndExpr op, AnalysedExpr child |
expr = op and
op.getRightOperand() = child and
validCheckExpr(child, v)
)
or
exists(AssignExpr assign, AnalysedExpr child |
expr = assign and
assign.getRValue() = child and
validCheckExpr(child, v) and
not expr.isDef(v)
)
or
exists(FunctionCall fc, AnalysedExpr child |
expr = fc and
fc.getTarget().hasGlobalName("__builtin_expect") and
fc.getArgument(0) = child and
validCheckExpr(child, v)
)
)
}
/**
* An expression that has been extended with member predicates that provide
* information about the role of this expression in nullness checks.
*/
class AnalysedExpr extends Expr {
/**
* Holds if `v` is null when this expression evaluates to a true value.
*/
predicate isNullCheck(LocalScopeVariable v) { nullCheckExpr(this, v) }
/**
* Holds if `v` is non-null when this expression evaluates to a true value.
*/
predicate isValidCheck(LocalScopeVariable v) { validCheckExpr(this, v) }
/**
* Gets a successor of `this` in the control flow graph, where that successor
* is among the nodes to which control may flow when `this` tests `v` to be
* _null_.
*/
ControlFlowNode getNullSuccessor(LocalScopeVariable v) {
this.isNullCheck(v) and result = this.getATrueSuccessor()
or
this.isValidCheck(v) and result = this.getAFalseSuccessor()
}
/**
* Gets a successor of `this` in the control flow graph, where that successor
* is among the nodes to which control may flow when `this` tests `v` to be
* _not null_.
*/
ControlFlowNode getNonNullSuccessor(LocalScopeVariable v) {
this.isNullCheck(v) and result = this.getAFalseSuccessor()
or
this.isValidCheck(v) and result = this.getATrueSuccessor()
}
/**
* DEPRECATED: Use `getNonNullSuccessor` instead, which does the same.
*/
deprecated ControlFlowNode getValidSuccessor(LocalScopeVariable v) {
this.isValidCheck(v) and result = this.getATrueSuccessor()
or
this.isNullCheck(v) and result = this.getAFalseSuccessor()
}
/**
* Holds if this is a `VariableAccess` of `v` nested inside a condition.
*/
predicate isUse(LocalScopeVariable v) {
this.inCondition() and
this = v.getAnAccess()
}
/**
* Holds if this is an `Assignment` to `v` nested inside a condition.
*/
predicate isDef(LocalScopeVariable v) {
this.inCondition() and
this.(Assignment).getLValue() = v.getAnAccess()
}
/**
* Holds if `this` occurs at or below the controlling expression of an `if`,
* `while`, `?:`, or similar.
*/
predicate inCondition() {
this.isCondition() or
this.getParent().(AnalysedExpr).inCondition()
}
}
/**
* Holds if `var` is likely to be null at `node`.
*/
cached
predicate checkedNull(Variable var, ControlFlowNode node) {
exists(LocalScopeVariable v | v = var |
exists(AnalysedExpr e | e.getNullSuccessor(v) = node)
or
exists(ControlFlowNode pred |
pred = node.getAPredecessor() and
not pred.(AnalysedExpr).isDef(v) and
checkedNull(v, pred)
)
)
}
/**
* Holds if `var` is likely to be non-null at `node`.
*/
cached
predicate checkedValid(Variable var, ControlFlowNode node) {
exists(LocalScopeVariable v | v = var |
exists(AnalysedExpr e | e.getNonNullSuccessor(v) = node)
or
exists(ControlFlowNode pred |
pred = node.getAPredecessor() and
not pred.(AnalysedExpr).isDef(v) and
checkedValid(v, pred)
)
)
}
/**
* Holds if `val` is a null literal or a call to a function that may return a
* null literal.
*/
predicate nullValue(Expr val) {
val instanceof NullValue
or
callMayReturnNull(val)
or
nullValue(val.(Assignment).getRValue())
}
/**
* Holds if the evaluation of `n` may have the effect of, directly or
* indirectly, assigning a null literal to `var`.
*/
predicate nullInit(Variable v, ControlFlowNode n) {
exists(Initializer init |
init = n and
nullValue(init.getExpr()) and
v.getInitializer() = init
)
or
exists(AssignExpr assign |
assign = n and
nullValue(assign.getRValue()) and
assign.getLValue() = v.getAnAccess()
)
}
/**
* Holds if `call` may, directly or indirectly, evaluate to a null literal.
*/
predicate callMayReturnNull(Call call) {
exists(Options opts |
if opts.overrideReturnsNull(call)
then opts.returnsNull(call)
else mayReturnNull(call.(FunctionCall).getTarget())
)
}
/**
* Holds if `f` may, directly or indirectly, return a null literal.
*/
predicate mayReturnNull(Function f) {
f.hasGlobalOrStdName("malloc")
or
f.hasGlobalOrStdName("calloc")
or
// f.hasGlobalName("strchr")
// or
// f.hasGlobalName("strstr")
// or
exists(ReturnStmt ret |
nullValue(ret.getExpr()) and
ret.getEnclosingFunction() = f
)
or
exists(ReturnStmt ret, Expr returned, ControlFlowNode init, LocalScopeVariable v |
ret.getExpr() = returned and
nullInit(v, init) and
definitionUsePair(v, init, returned) and
not checkedValid(v, returned) and
ret.getEnclosingFunction() = f
)
}

View File

@@ -0,0 +1,161 @@
/**
* Provides classes and predicates for SSA representation (Static Single Assignment form).
*/
import cpp
import semmle.code.cpp.controlflow.Dominance
import SSAUtils
/**
* The SSA logic comes in two versions: the standard SSA and range-analysis RangeSSA.
* This class provides the standard SSA logic.
*/
library class StandardSSA extends SSAHelper {
StandardSSA() { this = 0 }
}
/**
* A definition of one or more SSA variables, including phi node definitions.
* An _SSA variable_, as defined in the literature, is effectively the pair of
* an `SsaDefinition d` and a `StackVariable v`, written `(d, v)` in this
* documentation. Note that definitions and uses can be coincident due to the
* presence of parameter definitions and phi nodes.
*
* Not all `StackVariable`s of a function have SSA definitions. If the variable
* has its address taken, either explicitly or implicitly, then it is excluded
* from analysis. `SsaDefinition`s are not generated in locations that are
* statically seen to be unreachable.
*/
class SsaDefinition extends ControlFlowNodeBase {
SsaDefinition() { exists(StandardSSA x | x.ssa_defn(_, this, _, _)) }
/**
* Gets a variable corresponding to an SSA StackVariable defined by
* this definition.
*/
StackVariable getAVariable() { exists(StandardSSA x | x.ssa_defn(result, this, _, _)) }
/**
* Gets a string representation of the SSA variable represented by the pair
* `(this, v)`.
*/
string toString(StackVariable v) { exists(StandardSSA x | result = x.toString(this, v)) }
/** Gets a use of the SSA variable represented by the pair `(this, v)`. */
VariableAccess getAUse(StackVariable v) { exists(StandardSSA x | result = x.getAUse(this, v)) }
/**
* Gets the control-flow node for this definition. This will usually be the
* control-flow node that assigns to this variable as a side effect, but
* there are some exceptions. If `this` is defined by initialization, the
* result is the value of `Initializer.getExpr()` for that initialization.
* If `this` is a function parameter (see `definedByParameter`), the result
* will be the function entry point. If `this` variable is defined by being
* passed as a reference in a function call, including overloaded
* operators, the result will be the `VariableAccess` expression for this
* parameter. If `this` is a phi node (see `isPhiNode`), the result will be
* the node where control flow is joined from multiple paths.
*/
ControlFlowNode getDefinition() { result = this }
/** Gets the `BasicBlock` containing this definition. */
BasicBlock getBasicBlock() { result.contains(getDefinition()) }
/** Holds if this definition is a phi node for variable `v`. */
predicate isPhiNode(StackVariable v) { exists(StandardSSA x | x.phi_node(v, this.(BasicBlock))) }
/** Gets the location of this definition. */
Location getLocation() { result = this.(ControlFlowNode).getLocation() }
/** Holds if the SSA variable `(this, p)` is defined by parameter `p`. */
predicate definedByParameter(Parameter p) { this = p.getFunction().getEntryPoint() }
/**
* Holds if the SSA variable `(result, v)` is an input to the phi definition
* `(this, v)`.
*/
SsaDefinition getAPhiInput(StackVariable v) {
this.isPhiNode(v) and
result.reachesEndOfBB(v, this.(BasicBlock).getAPredecessor())
}
/**
* Gets the expression assigned to the SSA variable `(this, v)`, if any,
* when it is not a phi definition. The following is an exhaustive list of
* expressions that may be the result of this predicate.
*
* - The contained expression of an `Initializer`.
* - The right-hand side of an `AssignExpr`.
* - An `AssignOperation`.
* - A `CrementOperation`.
*
* In all cases except `PostfixCrementOperation`, the variable `v` will be
* equal to the result of this predicate after evaluation of
* `this.getDefinition()`.
*
* If the SSA variable is defined in other ways than those four (such as
* function parameters or `f(&x)`) there is no result. These cases are
* instead covered via `definedByParameter` and `getDefinition`,
* respectively.
*/
Expr getDefiningValue(StackVariable v) {
exists(ControlFlowNode def | def = this.getDefinition() |
def = v.getInitializer().getExpr() and def = result
or
exists(AssignExpr assign |
def = assign and
assign.getLValue() = v.getAnAccess() and
result = assign.getRValue()
)
or
exists(AssignOperation assign |
def = assign and
assign.getLValue() = v.getAnAccess() and
result = assign
)
or
exists(CrementOperation crement |
def = crement and
crement.getOperand() = v.getAnAccess() and
result = crement
)
)
}
/** Holds if `(this, v)` reaches the end of basic block `b`. */
predicate reachesEndOfBB(StackVariable v, BasicBlock b) {
exists(StandardSSA x | x.ssaDefinitionReachesEndOfBB(v, this, b))
}
/**
* Gets a definition that ultimately defines this variable and is not
* itself a phi node.
*/
SsaDefinition getAnUltimateSsaDefinition(StackVariable v) {
result = this.getAPhiInput(v).getAnUltimateSsaDefinition(v)
or
v = this.getAVariable() and
not this.isPhiNode(v) and
result = this
}
/**
* Gets a possible defining expression for `v` at this SSA definition,
* recursing backwards through phi definitions. Not all definitions have a
* defining expression---see the documentation for `getDefiningValue`.
*/
Expr getAnUltimateDefiningValue(StackVariable v) {
result = this.getAnUltimateSsaDefinition(v).getDefiningValue(v)
}
/**
* DEPRECATED: this is the old name for `getAnUltimateDefiningValue`. The
* name was confusing as it seemed analogous to `getDefinition` rather than
* `getDefiningValue`. The SSA libraries for other languages use the name
* `getAnUltimateSsaDefinition` to refer to a predicate named
* `getAnUltimateSsaDefinition` in this class.
*/
deprecated Expr getAnUltimateDefinition(StackVariable v) {
result = this.getAnUltimateDefiningValue(v)
}
}

View File

@@ -0,0 +1,311 @@
/**
* Provides classes and predicates for use in the SSA library.
*/
import cpp
import semmle.code.cpp.controlflow.Dominance
import semmle.code.cpp.controlflow.SSA // must be imported for proper caching of SSAHelper
import semmle.code.cpp.rangeanalysis.RangeSSA // must be imported for proper caching of SSAHelper
/**
* The dominance frontier of a block `x` is the set of all blocks `w` such that
* `x` dominates a predecessor of `w` but does not strictly dominate `w`.
*
* This implementation is equivalent to:
*
* bbDominates(x, w.getAPredecessor()) and not bbStrictlyDominates(x, w)
*/
private predicate dominanceFrontier(BasicBlock x, BasicBlock w) {
x = w.getAPredecessor() and not bbIDominates(x, w)
or
exists(BasicBlock prev | dominanceFrontier(prev, w) |
bbIDominates(x, prev) and
not bbIDominates(x, w)
)
}
/**
* Extended version of `definition` that also includes parameters.
*/
predicate var_definition(StackVariable v, ControlFlowNode node) {
not addressTakenVariable(v) and
not unreachable(node) and
(
if isReferenceVar(v)
then
// Assignments to reference variables modify the referenced
// value, not the reference itself. So reference variables only
// have two kinds of definition: initializers and parameters.
node = v.getInitializer().getExpr()
else definition(v, node)
)
or
v instanceof Parameter and
exists(BasicBlock b |
b.getStart() = node and
not exists(b.getAPredecessor()) and
b = v.(Parameter).getFunction().getEntryPoint()
)
}
/**
* Stack variables that have their address taken are excluded from the
* analysis because the pointer could be used to change the value at
* any moment.
*/
private predicate addressTakenVariable(StackVariable var) {
// If the type of the variable is a reference type, then it is safe (as
// far as SSA is concerned) to take its address, because this does not
// enable the variable to be modified indirectly. Obviously the
// referenced value can change, but that is not the same thing as
// changing which value the reference points to. SSA tracks the latter,
// but the target of a reference is immutable so reference variables
// always have exactly one definition.
not isReferenceVar(var) and
// Find a VariableAccess that takes the address of `var`.
exists(VariableAccess va |
va = var.getAnAccess() and
va.isAddressOfAccessNonConst() and
// If the address is passed to a function then we will trust that it
// is only used to modify the variable for the duration of the
// function call.
not exists(Call call | call.passesByReferenceNonConst(_, va))
)
}
/**
* Holds if `v` is a stack-allocated reference-typed local variable. We don't
* build SSA for such variables since they are likely to change values even
* when not syntactically mentioned. For the same reason,
* `addressTakenVariable` is used to prevent tracking variables that may be
* aliased by such a reference.
*
* Reference-typed parameters are treated as if they weren't references.
* That's because it's in practice highly unlikely that they alias other data
* accessible from the function body.
*/
private predicate isReferenceVar(StackVariable v) {
v.getUnspecifiedType() instanceof ReferenceType and
not v instanceof Parameter
}
/**
* This predicate is the same as `var_definition`, but annotated with
* the basic block and index of the control flow node.
*/
private predicate variableUpdate(StackVariable v, ControlFlowNode n, BasicBlock b, int i) {
var_definition(v, n) and n = b.getNode(i)
}
private predicate ssa_use(StackVariable v, VariableAccess node, BasicBlock b, int index) {
useOfVar(v, node) and b.getNode(index) = node
}
private predicate live_at_start_of_bb(StackVariable v, BasicBlock b) {
exists(int i | ssa_use(v, _, b, i) | not exists(int j | variableUpdate(v, _, b, j) | j < i))
or
live_at_exit_of_bb(v, b) and not variableUpdate(v, _, b, _)
}
pragma[noinline]
private predicate live_at_exit_of_bb(StackVariable v, BasicBlock b) {
live_at_start_of_bb(v, b.getASuccessor())
}
/** Common SSA logic for standard SSA and range-analysis SSA. */
cached
library class SSAHelper extends int {
/* 0 = StandardSSA, 1 = RangeSSA */
cached
SSAHelper() { this in [0 .. 1] }
/**
* Override to insert a custom phi node for variable `v` at the start of
* basic block `b`.
*/
cached
predicate custom_phi_node(StackVariable v, BasicBlock b) { none() }
/**
* Remove any custom phi nodes that are invalid.
*/
private predicate sanitized_custom_phi_node(StackVariable v, BasicBlock b) {
custom_phi_node(v, b) and
not addressTakenVariable(v) and
not isReferenceVar(v) and
b.isReachable()
}
/**
* Holds if there is a phi node for variable `v` at the start of basic block
* `b`.
*/
cached
predicate phi_node(StackVariable v, BasicBlock b) {
frontier_phi_node(v, b) or sanitized_custom_phi_node(v, b)
}
/**
* A phi node is required for variable `v` at the start of basic block `b`
* if there exists a basic block `x` such that `b` is in the dominance
* frontier of `x` and `v` is defined in `x` (including phi-nodes as
* definitions). This is known as the iterated dominance frontier. See
* Modern Compiler Implementation by Andrew Appel.
*/
private predicate frontier_phi_node(StackVariable v, BasicBlock b) {
exists(BasicBlock x | dominanceFrontier(x, b) and ssa_defn_rec(v, x)) and
/* We can also eliminate those nodes where the variable is not live on any incoming edge */
live_at_start_of_bb(v, b)
}
private predicate ssa_defn_rec(StackVariable v, BasicBlock b) {
phi_node(v, b)
or
variableUpdate(v, _, b, _)
}
/**
* Holds if `v` is defined, for the purpose of SSA, at `node`, which is at
* position `index` in block `b`. This includes definitions from phi nodes.
*/
cached
predicate ssa_defn(StackVariable v, ControlFlowNode node, BasicBlock b, int index) {
phi_node(v, b) and b.getStart() = node and index = -1
or
variableUpdate(v, node, b, index)
}
/*
* The construction of SSA form ensures that each use of a variable is
* dominated by its definition. A definition of an SSA variable therefore
* reaches a `ControlFlowNode` if it is the _closest_ SSA variable definition
* that dominates the node. If two definitions dominate a node then one must
* dominate the other, so therefore the definition of _closest_ is given by the
* dominator tree. Thus, reaching definitions can be calculated in terms of
* dominance.
*/
/**
* A ranking of the indices `i` at which there is an SSA definition or use of
* `v` in the basic block `b`.
*
* Basic block indices are translated to rank indices in order to skip
* irrelevant indices at which there is no definition or use when traversing
* basic blocks.
*/
private predicate defUseRank(StackVariable v, BasicBlock b, int rankix, int i) {
i = rank[rankix](int j | ssa_defn(v, _, b, j) or ssa_use(v, _, b, j))
}
/**
* Gets the maximum rank index for the given variable `v` and basic block
* `b`. This will be the number of defs/uses of `v` in `b` plus one, where
* the extra rank at the end represents a position past the last node in
* the block.
*/
private int lastRank(StackVariable v, BasicBlock b) {
result = max(int rankix | defUseRank(v, b, rankix, _)) + 1
}
/**
* Holds if SSA variable `(v, def)` is defined at rank index `rankix` in
* basic block `b`.
*/
private predicate ssaDefRank(StackVariable v, ControlFlowNode def, BasicBlock b, int rankix) {
exists(int i |
ssa_defn(v, def, b, i) and
defUseRank(v, b, rankix, i)
)
}
/**
* Holds if SSA variable `(v, def)` reaches the rank index `rankix` in its
* own basic block `b` before being overwritten by another definition of
* `v` that comes _at or after_ the reached node. Reaching a node means
* that the definition is visible to any _use_ at that node.
*/
private predicate ssaDefReachesRank(StackVariable v, ControlFlowNode def, BasicBlock b, int rankix) {
// A definition should not reach its own node unless a loop allows it.
// When nodes are both definitions and uses for the same variable, the
// use is understood to happen _before_ the definition. Phi nodes are
// at rankidx -1 and will therefore always reach the first node in the
// basic block.
ssaDefRank(v, def, b, rankix - 1)
or
ssaDefReachesRank(v, def, b, rankix - 1) and
rankix <= lastRank(v, b) and // Without this, the predicate would be infinite.
not ssaDefRank(v, _, b, rankix - 1) // Range is inclusive of but not past next def.
}
/** Holds if SSA variable `(v, def)` reaches the end of block `b`. */
cached
predicate ssaDefinitionReachesEndOfBB(StackVariable v, ControlFlowNode def, BasicBlock b) {
live_at_exit_of_bb(v, b) and ssaDefReachesRank(v, def, b, lastRank(v, b))
or
exists(BasicBlock idom |
ssaDefinitionReachesEndOfBB(v, def, idom) and
noDefinitionsSinceIDominator(v, idom, b)
)
}
/**
* Helper predicate for ssaDefinitionReachesEndOfBB. If there is no
* definition of `v` in basic block `b`, then any definition of `v`
* that reaches the end of `idom` (the immediate dominator of `b`) also
* reaches the end of `b`.
*/
pragma[noinline]
private predicate noDefinitionsSinceIDominator(StackVariable v, BasicBlock idom, BasicBlock b) {
bbIDominates(idom, b) and // It is sufficient to traverse the dominator graph, cf. discussion above.
live_at_exit_of_bb(v, b) and
not ssa_defn(v, _, b, _)
}
/**
* Holds if SSA variable `(v, def)` reaches `use` within the same basic
* block, where `use` is a `VariableAccess` of `v`.
*/
private predicate ssaDefinitionReachesUseWithinBB(StackVariable v, ControlFlowNode def, Expr use) {
exists(BasicBlock b, int rankix, int i |
ssaDefReachesRank(v, def, b, rankix) and
defUseRank(v, b, rankix, i) and
ssa_use(v, use, b, i)
)
}
/**
* Holds if SSA variable `(v, def)` reaches the control-flow node `use`.
*/
private predicate ssaDefinitionReaches(StackVariable v, ControlFlowNode def, Expr use) {
ssaDefinitionReachesUseWithinBB(v, def, use)
or
exists(BasicBlock b |
ssa_use(v, use, b, _) and
ssaDefinitionReachesEndOfBB(v, def, b.getAPredecessor()) and
not ssaDefinitionReachesUseWithinBB(v, _, use)
)
}
/**
* Gets a string representation of the SSA variable represented by the pair
* `(node, v)`.
*/
cached
string toString(ControlFlowNode node, StackVariable v) {
if phi_node(v, node.(BasicBlock))
then result = "SSA phi(" + v.getName() + ")"
else (
ssa_defn(v, node, _, _) and result = "SSA def(" + v.getName() + ")"
)
}
/**
* Holds if SSA variable `(v, def)` reaches `result`, where `result` is an
* access of `v`.
*/
cached
VariableAccess getAUse(ControlFlowNode def, StackVariable v) {
ssaDefinitionReaches(v, def, result) and
ssa_use(v, result, _, _)
}
}

View File

@@ -0,0 +1,388 @@
/**
* Provides a library for working with local (intra-procedural) control-flow
* reachability involving stack variables.
*/
import cpp
/**
* A reachability analysis for control-flow nodes involving stack variables.
* This defines sources, sinks, and any other configurable aspect of the
* analysis. Multiple analyses can coexist. To create an analysis, extend this
* class with a subclass whose characteristic predicate is a unique singleton
* string. For example, write
*
* ```
* class MyAnalysisConfiguration extends StackVariableReachability {
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
* // Override `isSource` and `isSink`.
* // Override `isBarrier`.
* }
* ```
*
* Then, to query whether there is flow between some source and sink, call the
* `reaches` predicate on an instance of `MyAnalysisConfiguration`.
*/
abstract class StackVariableReachability extends string {
bindingset[this]
StackVariableReachability() { length() >= 0 }
/** Holds if `node` is a source for the reachability analysis using variable `v`. */
abstract predicate isSource(ControlFlowNode node, StackVariable v);
/** Holds if `sink` is a (potential) sink for the reachability analysis using variable `v`. */
abstract predicate isSink(ControlFlowNode node, StackVariable v);
/** Holds if `node` is a barrier for the reachability analysis using variable `v`. */
abstract predicate isBarrier(ControlFlowNode node, StackVariable v);
/**
* Holds if the source node `source` can reach the sink `sink` without crossing
* a barrier. This is (almost) equivalent to the following QL predicate but
* uses basic blocks internally for better performance:
*
* ```
* predicate reaches(ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink) {
* reachesImpl(source, v, sink)
* and
* isSink(sink, v)
* }
*
* predicate reachesImpl(ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink) {
* sink = source.getASuccessor() and isSource(source, v)
* or
* exists(ControlFlowNode mid | reachesImpl(source, v, mid) |
* not isBarrier(mid, v)
* and
* sink = mid.getASuccessor()
* )
* }
* ```
*
* In addition to using a better performing implementation, this analysis
* accounts for loops where the condition is provably true upon entry.
*/
predicate reaches(ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink) {
/*
* Implementation detail: the predicates in this class are a generalization of
* those in DefinitionsAndUses.qll, and should be kept in sync.
*
* Unfortunately, caching of abstract predicates does not work well, so the
* predicates in DefinitionsAndUses.qll cannot use this library.
*/
exists(BasicBlock bb, int i |
isSource(source, v) and
bb.getNode(i) = source and
not bb.isUnreachable()
|
exists(int j |
j > i and
sink = bb.getNode(j) and
isSink(sink, v) and
not exists(int k | isBarrier(bb.getNode(k), v) | k in [i + 1 .. j - 1])
)
or
not exists(int k | isBarrier(bb.getNode(k), v) | k > i) and
bbSuccessorEntryReaches(bb, v, sink, _)
)
}
private predicate bbSuccessorEntryReaches(
BasicBlock bb, SemanticStackVariable v, ControlFlowNode node,
boolean skipsFirstLoopAlwaysTrueUponEntry
) {
exists(BasicBlock succ, boolean succSkipsFirstLoopAlwaysTrueUponEntry |
bbSuccessorEntryReachesLoopInvariant(bb, succ, skipsFirstLoopAlwaysTrueUponEntry,
succSkipsFirstLoopAlwaysTrueUponEntry)
|
bbEntryReachesLocally(succ, v, node) and
succSkipsFirstLoopAlwaysTrueUponEntry = false
or
not isBarrier(succ.getNode(_), v) and
bbSuccessorEntryReaches(succ, v, node, succSkipsFirstLoopAlwaysTrueUponEntry)
)
}
private predicate bbEntryReachesLocally(
BasicBlock bb, SemanticStackVariable v, ControlFlowNode node
) {
exists(int n |
node = bb.getNode(n) and
isSink(node, v)
|
not exists(this.firstBarrierIndexIn(bb, v))
or
n <= this.firstBarrierIndexIn(bb, v)
)
}
private int firstBarrierIndexIn(BasicBlock bb, SemanticStackVariable v) {
result = min(int m | isBarrier(bb.getNode(m), v))
}
}
/**
* Holds if `bb` contains the entry point `loop` for a loop at position `i`.
* The condition of that loop is provably true upon entry but not provably
* true in general (if it were, the false-successor had already been removed
* from the CFG).
*
* Examples:
* ```
* for (int i = 0; i < 2; i++) { } // always true upon entry
* for (int i = 0; true; i++) { } // always true
* ```
*/
private predicate bbLoopEntryConditionAlwaysTrueAt(BasicBlock bb, int i, ControlFlowNode loop) {
exists(Expr condition |
loopConditionAlwaysTrueUponEntry(loop, condition) and
not conditionAlwaysTrue(condition) and
bb.getNode(i) = loop
)
}
/**
* Basic block `pred` contains all or part of the condition belonging to a loop,
* and there is an edge from `pred` to `succ` that concludes the condition.
* If the edge corrseponds with the loop condition being found to be `true`, then
* `skipsLoop` is `false`. Otherwise the edge corresponds with the loop condition
* being found to be `false` and `skipsLoop` is `true`. Non-concluding edges
* within a complex loop condition are not matched by this predicate.
*/
private predicate bbLoopConditionAlwaysTrueUponEntrySuccessor(
BasicBlock pred, BasicBlock succ, boolean skipsLoop
) {
exists(Expr cond |
loopConditionAlwaysTrueUponEntry(_, cond) and
cond.getAChild*() = pred.getEnd() and
succ = pred.getASuccessor() and
not cond.getAChild*() = succ.getStart() and
(
succ = pred.getAFalseSuccessor() and
skipsLoop = true
or
succ = pred.getATrueSuccessor() and
skipsLoop = false
)
)
}
/**
* Loop invariant for `bbSuccessorEntryReaches`:
*
* - `succ` is a successor of `pred`.
* - `predSkipsFirstLoopAlwaysTrueUponEntry`: whether the path from
* `pred` (via `succ`) skips the first loop where the condition is
* provably true upon entry.
* - `succSkipsFirstLoopAlwaysTrueUponEntry`: whether the path from
* `succ` skips the first loop where the condition is provably true
* upon entry.
* - If `pred` contains the entry point of a loop where the condition
* is provably true upon entry, then `succ` is not allowed to skip
* that loop (`succSkipsFirstLoopAlwaysTrueUponEntry = false`).
*/
predicate bbSuccessorEntryReachesLoopInvariant(
BasicBlock pred, BasicBlock succ, boolean predSkipsFirstLoopAlwaysTrueUponEntry,
boolean succSkipsFirstLoopAlwaysTrueUponEntry
) {
succ = pred.getASuccessor() and
(succSkipsFirstLoopAlwaysTrueUponEntry = true or succSkipsFirstLoopAlwaysTrueUponEntry = false) and
(
// The edge from `pred` to `succ` is from a loop condition provably
// true upon entry, so the value of `predSkipsFirstLoopAlwaysTrueUponEntry`
// is determined by whether the true edge or the false edge is chosen,
// regardless of the value of `succSkipsFirstLoopAlwaysTrueUponEntry`.
bbLoopConditionAlwaysTrueUponEntrySuccessor(pred, succ, predSkipsFirstLoopAlwaysTrueUponEntry)
or
// The edge from `pred` to `succ` is _not_ from a loop condition provably
// true upon entry, so the values of `predSkipsFirstLoopAlwaysTrueUponEntry`
// and `succSkipsFirstLoopAlwaysTrueUponEntry` must be the same.
not bbLoopConditionAlwaysTrueUponEntrySuccessor(pred, succ, _) and
succSkipsFirstLoopAlwaysTrueUponEntry = predSkipsFirstLoopAlwaysTrueUponEntry and
// Moreover, if `pred` contains the entry point of a loop where the
// condition is provably true upon entry, then `succ` is not allowed
// to skip that loop, and hence `succSkipsFirstLoopAlwaysTrueUponEntry = false`.
(
bbLoopEntryConditionAlwaysTrueAt(pred, _, _)
implies
succSkipsFirstLoopAlwaysTrueUponEntry = false
)
)
}
/**
* Reachability analysis for control-flow nodes involving stack variables.
* Unlike `StackVariableReachability`, this analysis takes variable
* reassignments into account.
*
* This class is used like `StackVariableReachability`, except that
* subclasses should override `isSourceActual` and `isSinkActual` instead of
* `isSource` and `isSink`, and that there is a `reachesTo` predicate in
* addition to `reaches`.
*/
abstract class StackVariableReachabilityWithReassignment extends StackVariableReachability {
bindingset[this]
StackVariableReachabilityWithReassignment() { length() >= 0 }
/** Override this predicate rather than `isSource` (`isSource` is used internally). */
abstract predicate isSourceActual(ControlFlowNode node, StackVariable v);
/** Override this predicate rather than `isSink` (`isSink` is used internally). */
abstract predicate isSinkActual(ControlFlowNode node, StackVariable v);
/**
* Holds if the source node `source` can reach the sink `sink` without crossing
* a barrier, taking reassignments into account. This is (almost) equivalent
* to the following QL predicate, but uses basic blocks internally for better
* performance:
*
* ```
* predicate reaches(ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink) {
* reachesImpl(source, v, sink)
* and
* isSinkActual(sink, v)
* }
*
* predicate reachesImpl(ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink) {
* isSourceActual(source, v)
* and
* (
* sink = source.getASuccessor()
* or
* exists(ControlFlowNode mid, SemanticStackVariable v0 | reachesImpl(source, v0, mid) |
* // ordinary successor
* not isBarrier(mid, v) and
* sink = mid.getASuccessor() and
* v = v0
* or
* // reassigned from v0 to v
* exprDefinition(v, mid, v0.getAnAccess()) and
* sink = mid.getASuccessor()
* )
* )
* }
* ```
*
* In addition to using a better performing implementation, this analysis
* accounts for loops where the condition is provably true upon entry.
*/
override predicate reaches(ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink) {
reachesTo(source, v, sink, _)
}
/**
* As `reaches`, but also specifies the last variable it was reassigned to (`v0`).
*/
predicate reachesTo(
ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink, SemanticStackVariable v0
) {
exists(ControlFlowNode def |
actualSourceReaches(source, v, def, v0) and
StackVariableReachability.super.reaches(def, v0, sink) and
isSinkActual(sink, v0)
)
}
private predicate actualSourceReaches(
ControlFlowNode source, SemanticStackVariable v, ControlFlowNode def, SemanticStackVariable v0
) {
isSourceActual(source, v) and def = source and v0 = v
or
exists(ControlFlowNode source1, SemanticStackVariable v1 |
actualSourceReaches(source, v, source1, v1)
|
reassignment(source1, v1, def, v0)
)
}
private predicate reassignment(
ControlFlowNode source, SemanticStackVariable v, ControlFlowNode def, SemanticStackVariable v0
) {
StackVariableReachability.super.reaches(source, v, def) and
exprDefinition(v0, def, v.getAnAccess())
}
final override predicate isSource(ControlFlowNode node, StackVariable v) {
isSourceActual(node, v)
or
// Reassignment generates a new (non-actual) source
reassignment(_, _, node, v)
}
final override predicate isSink(ControlFlowNode node, StackVariable v) {
isSinkActual(node, v)
or
// Reassignment generates a new (non-actual) sink
exprDefinition(_, node, v.getAnAccess())
}
}
/**
* Same as `StackVariableReachability`, but `isBarrier` works on control-flow
* edges rather than nodes and is therefore parameterized by the original
* source node as well. Otherwise, this class is used like
* `StackVariableReachability`.
*/
abstract class StackVariableReachabilityExt extends string {
bindingset[this]
StackVariableReachabilityExt() { length() >= 0 }
/** `node` is a source for the reachability analysis using variable `v`. */
abstract predicate isSource(ControlFlowNode node, StackVariable v);
/** `sink` is a (potential) sink for the reachability analysis using variable `v`. */
abstract predicate isSink(ControlFlowNode node, StackVariable v);
/** `node` is a barrier for the reachability analysis using variable `v` and starting from `source`. */
abstract predicate isBarrier(
ControlFlowNode source, ControlFlowNode node, ControlFlowNode next, StackVariable v
);
/** See `StackVariableReachability.reaches`. */
predicate reaches(ControlFlowNode source, SemanticStackVariable v, ControlFlowNode sink) {
exists(BasicBlock bb, int i |
isSource(source, v) and
bb.getNode(i) = source and
not bb.isUnreachable()
|
exists(int j |
j > i and
sink = bb.getNode(j) and
isSink(sink, v) and
not exists(int k | isBarrier(source, bb.getNode(k), bb.getNode(k + 1), v) |
k in [i .. j - 1]
)
)
or
not exists(int k | isBarrier(source, bb.getNode(k), bb.getNode(k + 1), v) | k >= i) and
bbSuccessorEntryReaches(source, bb, v, sink, _)
)
}
private predicate bbSuccessorEntryReaches(
ControlFlowNode source, BasicBlock bb, SemanticStackVariable v, ControlFlowNode node,
boolean skipsFirstLoopAlwaysTrueUponEntry
) {
exists(BasicBlock succ, boolean succSkipsFirstLoopAlwaysTrueUponEntry |
bbSuccessorEntryReachesLoopInvariant(bb, succ, skipsFirstLoopAlwaysTrueUponEntry,
succSkipsFirstLoopAlwaysTrueUponEntry) and
not isBarrier(source, bb.getEnd(), succ.getStart(), v)
|
bbEntryReachesLocally(source, succ, v, node) and
succSkipsFirstLoopAlwaysTrueUponEntry = false
or
not exists(int k | isBarrier(source, succ.getNode(k), succ.getNode(k + 1), v)) and
bbSuccessorEntryReaches(source, succ, v, node, succSkipsFirstLoopAlwaysTrueUponEntry)
)
}
private predicate bbEntryReachesLocally(
ControlFlowNode source, BasicBlock bb, SemanticStackVariable v, ControlFlowNode node
) {
isSource(source, v) and
exists(int n | node = bb.getNode(n) and isSink(node, v) |
not exists(int m | m < n | isBarrier(source, bb.getNode(m), bb.getNode(m + 1), v))
)
}
}

View File

@@ -0,0 +1,179 @@
// NOTE: There are two copies of this file, and they must be kept identical:
// - semmle/code/cpp/controlflow/SubBasicBlocks.qll
// - semmle/code/cpp/dataflow/internal/SubBasicBlocks.qll
//
// The second one is a private copy of the `SubBasicBlocks` library for
// internal use by the data flow library. Having an extra copy prevents
// non-monotonic recursion errors in queries that use both the data flow
// library and the `SubBasicBlocks` library.
/**
* Provides the `SubBasicBlock` class, used for partitioning basic blocks in
* smaller pieces.
*/
import cpp
/**
* An abstract class that directs where in the control-flow graph a new
* `SubBasicBlock` must start. If a `ControlFlowNode` is an instance of this
* class, that node is guaranteed to be the first node in a `SubBasicBlock`.
* If multiple libraries use the `SubBasicBlock` library, basic blocks may be
* split in more places than either library expects, but nothing should break
* as a direct result of that.
*/
abstract class SubBasicBlockCutNode extends ControlFlowNode {
SubBasicBlockCutNode() {
// Some control-flow nodes are not in any basic block. This includes
// `Conversion`s, expressions that are evaluated at compile time, default
// arguments, and `Function`s without implementation.
exists(this.getBasicBlock())
}
}
/**
* A block that can be smaller than or equal to a `BasicBlock`. Use this class
* when `ControlFlowNode` is too fine-grained and `BasicBlock` too
* coarse-grained. Their successor graph is like that of basic blocks, except
* that the blocks are split up with an extra edge right before any instance of
* the abstract class `SubBasicBlockCutNode`. Users of this library must
* therefore extend `SubBasicBlockCutNode` to direct where basic blocks will be
* split up.
*/
class SubBasicBlock extends ControlFlowNodeBase {
SubBasicBlock() {
this instanceof BasicBlock
or
this instanceof SubBasicBlockCutNode
}
/** Gets the basic block in which this `SubBasicBlock` is contained. */
BasicBlock getBasicBlock() { result = this.(ControlFlowNode).getBasicBlock() }
/**
* Holds if this `SubBasicBlock` comes first in its basic block. This is the
* only condition under which a `SubBasicBlock` may have multiple
* predecessors.
*/
predicate firstInBB() { exists(BasicBlock bb | this.getRankInBasicBlock(bb) = 1) }
/**
* Holds if this `SubBasicBlock` comes last in its basic block. This is the
* only condition under which a `SubBasicBlock` may have multiple successors.
*/
predicate lastInBB() {
exists(BasicBlock bb | this.getRankInBasicBlock(bb) = countSubBasicBlocksInBasicBlock(bb))
}
/**
* Gets the (1-based) rank of this `SubBasicBlock` among the other `SubBasicBlock`s in
* its containing basic block `bb`, where `bb` is equal to `getBasicBlock()`.
*/
int getRankInBasicBlock(BasicBlock bb) {
exists(int thisIndexInBB |
thisIndexInBB = this.getIndexInBasicBlock(bb) and
thisIndexInBB = rank[result](int i | i = any(SubBasicBlock n).getIndexInBasicBlock(bb))
)
}
/**
* DEPRECATED: use `getRankInBasicBlock` instead. Note that this predicate
* returns a 0-based position, while `getRankInBasicBlock` returns a 1-based
* position.
*/
deprecated int getPosInBasicBlock(BasicBlock bb) { result = getRankInBasicBlock(bb) - 1 }
pragma[noinline]
private int getIndexInBasicBlock(BasicBlock bb) { this = bb.getNode(result) }
/** Gets a successor in the control-flow graph of `SubBasicBlock`s. */
SubBasicBlock getASuccessor() {
this.lastInBB() and
result = this.getBasicBlock().getASuccessor()
or
exists(BasicBlock bb | result.getRankInBasicBlock(bb) = this.getRankInBasicBlock(bb) + 1)
}
/**
* Gets the `index`th control-flow node in this `SubBasicBlock`. Indexes
* start from 0, and the node at index 0 always exists and compares equal
* to `this`.
*/
ControlFlowNode getNode(int index) {
exists(BasicBlock bb |
exists(int outerIndex |
result = bb.getNode(outerIndex) and
index = outerToInnerIndex(bb, outerIndex)
)
)
}
/**
* Gets the index of the node in this `SubBasicBlock` that has `indexInBB` in
* `bb`, where `bb` is equal to `getBasicBlock()`.
*/
// This predicate is factored out of `getNode` to ensure a good join order.
// It's sensitive to bad magic, so it has `pragma[nomagic]` on it. For
// example, it can get very slow if `getNode` is pragma[nomagic], which could
// mean it might get very slow if `getNode` is used in the wrong context.
pragma[nomagic]
private int outerToInnerIndex(BasicBlock bb, int indexInBB) {
indexInBB = result + this.getIndexInBasicBlock(bb) and
result = [0 .. this.getNumberOfNodes() - 1]
}
/** Gets a control-flow node in this `SubBasicBlock`. */
ControlFlowNode getANode() { result = this.getNode(_) }
/** Holds if `this` contains `node`. */
predicate contains(ControlFlowNode node) { node = this.getANode() }
/** Gets a predecessor in the control-flow graph of `SubBasicBlock`s. */
SubBasicBlock getAPredecessor() { result.getASuccessor() = this }
/**
* Gets a node such that the control-flow edge `(this, result)` may be taken
* when the final node of this `SubBasicBlock` is a conditional expression
* and evaluates to true.
*/
SubBasicBlock getATrueSuccessor() {
this.lastInBB() and
result = this.getBasicBlock().getATrueSuccessor()
}
/**
* Gets a node such that the control-flow edge `(this, result)` may be taken
* when the final node of this `SubBasicBlock` is a conditional expression
* and evaluates to false.
*/
SubBasicBlock getAFalseSuccessor() {
this.lastInBB() and
result = this.getBasicBlock().getAFalseSuccessor()
}
/**
* Gets the number of control-flow nodes in this `SubBasicBlock`. There is
* always at least one.
*/
int getNumberOfNodes() {
exists(BasicBlock bb |
if this.lastInBB()
then result = bb.length() - this.getIndexInBasicBlock(bb)
else result = this.getASuccessor().getIndexInBasicBlock(bb) - this.getIndexInBasicBlock(bb)
)
}
/** Gets the last control-flow node in this `SubBasicBlock`. */
ControlFlowNode getEnd() { result = this.getNode(this.getNumberOfNodes() - 1) }
/** Gets the first control-flow node in this `SubBasicBlock`. */
ControlFlowNode getStart() { result = this }
/** Gets the function that contains this `SubBasicBlock`. */
pragma[noinline]
Function getEnclosingFunction() { result = this.getStart().getControlFlowScope() }
}
/** Gets the number of `SubBasicBlock`s in the given basic block. */
private int countSubBasicBlocksInBasicBlock(BasicBlock bb) {
result = strictcount(SubBasicBlock sbb | sbb.getBasicBlock() = bb)
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,110 @@
/**
* INTERNAL: use the `BasicBlocks` library instead.
* This library defines `PrimitiveBasicBlock`s, an intermediate stage in the
* computation of `BasicBlock`s.
*/
/*
* Unlike `BasicBlock`s, `PrimitiveBasicBlock`s are constructed using
* the primitive `successors_extended` relation only. That is, impossible
* edges removed in `successors_adapted` are still taken into account.
*
* `PrimitiveBasicBlock`s are used as helper entities for actually
* constructing `successors_adapted`, hence the need for two "layers"
* of basic blocks. In addition to being used to construct
* `successors_adapted`, the relations for `BasicBlocks` (e.g.,
* `basic_block_entry_node`) can reuse the relations computed here
* (e.g, `primitive_basic_block_entry_node`), as most `BasicBlock`s
* will coincide with `PrimitiveBasicBlock`s.
*/
import cpp
private class Node = ControlFlowNodeBase;
import Cached
cached
private module Cached {
/** Holds if `node` is the entry node of a primitive basic block. */
cached
predicate primitive_basic_block_entry_node(Node node) {
// The entry point of the CFG is the start of a BB.
exists(Function f | f.getEntryPoint() = node)
or
// If the node has more than one predecessor,
// or the node's predecessor has more than one successor,
// then the node is the start of a new primitive basic block.
strictcount(Node pred | successors_extended(pred, node)) > 1
or
exists(ControlFlowNode pred | successors_extended(pred, node) |
strictcount(ControlFlowNode other | successors_extended(pred, other)) > 1
)
or
// If the node has zero predecessors then it is the start of
// a BB. However, the C++ AST contains many nodes with zero
// predecessors and zero successors, which are not members of
// the CFG. So we exclude all of those trivial BBs by requiring
// that the node have at least one successor.
not successors_extended(_, node) and successors_extended(node, _)
or
// An exception handler is always the start of a new basic block. We
// don't generate edges for [possible] exceptions, but in practice control
// flow could reach the handler from anywhere inside the try block that
// could throw an exception of a corresponding type. A `Handler` usually
// needs to be considered reachable (see also `BasicBlock.isReachable`).
node instanceof Handler
}
/** Holds if `n2` follows `n1` in a `PrimitiveBasicBlock`. */
private predicate member_step(Node n1, Node n2) {
successors_extended(n1, n2) and
not n2 instanceof PrimitiveBasicBlock
}
/** Holds if `node` is the `pos`th control-flow node in primitive basic block `bb`. */
cached
predicate primitive_basic_block_member(Node node, PrimitiveBasicBlock bb, int pos) {
primitive_basic_block_entry_node(bb) and node = bb and pos = 0
or
exists(Node prev |
member_step(prev, node) and
primitive_basic_block_member(prev, bb, pos - 1)
)
}
/** Gets the number of control-flow nodes in the primitive basic block `bb`. */
cached
int primitive_bb_length(PrimitiveBasicBlock bb) {
result = strictcount(Node node | primitive_basic_block_member(node, bb, _))
}
/** Successor relation for primitive basic blocks. */
cached
predicate primitive_bb_successor(PrimitiveBasicBlock pred, PrimitiveBasicBlock succ) {
exists(Node last |
primitive_basic_block_member(last, pred, primitive_bb_length(pred) - 1) and
successors_extended(last, succ)
)
}
}
/**
* A primitive basic block in the C/C++ control-flow graph constructed using
* the primitive `successors_extended` relation only.
*/
class PrimitiveBasicBlock extends Node {
PrimitiveBasicBlock() { primitive_basic_block_entry_node(this) }
predicate contains(Node node) { primitive_basic_block_member(node, this, _) }
Node getNode(int pos) { primitive_basic_block_member(result, this, pos) }
Node getANode() { primitive_basic_block_member(result, this, _) }
PrimitiveBasicBlock getASuccessor() { primitive_bb_successor(this, result) }
PrimitiveBasicBlock getAPredecessor() { primitive_bb_successor(result, this) }
int length() { result = primitive_bb_length(this) }
}