C++: Make the two must-flow queries use the new must-flow library

This commit is contained in:
Mathias Vorreiter Pedersen
2022-03-08 11:31:19 +00:00
parent ee9c0dcb83
commit 8c5b3368e1
2 changed files with 85 additions and 297 deletions

View File

@@ -18,157 +18,70 @@ import cpp
// recomputing the IR.
private import semmle.code.cpp.valuenumbering.GlobalValueNumbering
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.dataflow.DataFlow::DataFlow
import semmle.code.cpp.ir.dataflow.MustFlow
import PathGraph
/** Holds if `f` has a name that we intrepret as evidence of intentionally returning the value of the stack pointer. */
predicate intentionallyReturnsStackPointer(Function f) {
f.getName().toLowerCase().matches(["%stack%", "%sp%"])
}
/**
* Holds if `source` is a node that represents the use of a stack variable
*/
predicate isSource(Node source) {
exists(VariableAddressInstruction var, Function func |
var = source.asInstruction() and
func = var.getEnclosingFunction() and
var.getASTVariable() instanceof StackVariable and
// Pointer-to-member types aren't properly handled in the dbscheme.
not var.getResultType() instanceof PointerToMemberType and
// Rule out FPs caused by extraction errors.
not any(ErrorExpr e).getEnclosingFunction() = func and
not intentionallyReturnsStackPointer(func)
)
}
class ReturnStackAllocatedMemoryConfig extends MustFlowConfiguration {
ReturnStackAllocatedMemoryConfig() { this = "ReturnStackAllocatedMemoryConfig" }
/**
* Holds if `sink` is a node that represents the `StoreInstruction` that is subsequently used in
* a `ReturnValueInstruction`. We use the `StoreInstruction` instead of the instruction that defines the
* `ReturnValueInstruction`'s source value oprand because the former has better location information.
*/
predicate isSink(Node sink) {
exists(StoreInstruction store |
store.getDestinationAddress().(VariableAddressInstruction).getIRVariable() instanceof
IRReturnVariable and
sink.asOperand() = store.getSourceValueOperand()
)
}
/** Holds if `node1` _must_ flow to `node2`. */
predicate step(Node node1, Node node2) {
instructionToOperandStep(node1.asInstruction(), node2.asOperand())
or
operandToInstructionStep(node1.asOperand(), node2.asInstruction())
}
predicate instructionToOperandStep(Instruction instr, Operand operand) { operand.getDef() = instr }
/**
* Holds if `operand` flows to the result of `instr`.
*
* This predicate ignores flow through `PhiInstruction`s to create a 'must flow' relation. It also
* intentionally conflates addresses of fields and their object, and pointer offsets with their
* base pointer as this allows us to detect cases where an object's address flows to a return statement
* via a field. For example:
*
* ```cpp
* struct S { int x, y };
* int* test() {
* S s;
* return &s.x; // BAD: &s.x is an address of a variable on the stack.
* }
* ```
*/
predicate operandToInstructionStep(Operand operand, Instruction instr) {
instr.(CopyInstruction).getSourceValueOperand() = operand
or
instr.(ConvertInstruction).getUnaryOperand() = operand
or
instr.(CheckedConvertOrNullInstruction).getUnaryOperand() = operand
or
instr.(InheritanceConversionInstruction).getUnaryOperand() = operand
or
instr.(FieldAddressInstruction).getObjectAddressOperand() = operand
or
instr.(PointerOffsetInstruction).getLeftOperand() = operand
}
/** Holds if a source node flows to `n`. */
predicate branchlessLocalFlow0(Node n) {
isSource(n)
or
exists(Node mid |
branchlessLocalFlow0(mid) and
step(mid, n)
)
}
/** Holds if `n` is reachable through some source node, and `n` also eventually reaches a sink. */
predicate branchlessLocalFlow1(Node n) {
branchlessLocalFlow0(n) and
(
isSink(n)
or
exists(Node mid |
branchlessLocalFlow1(mid) and
step(n, mid)
)
)
}
newtype TLocalPathNode =
TLocalPathNodeMid(Node n) {
branchlessLocalFlow1(n) and
(
isSource(n) or
exists(LocalPathNodeMid mid | step(mid.getNode(), n))
override predicate isSource(DataFlow::Node source) {
// Holds if `source` is a node that represents the use of a stack variable
exists(VariableAddressInstruction var, Function func |
var = source.asInstruction() and
func = var.getEnclosingFunction() and
var.getASTVariable() instanceof StackVariable and
// Pointer-to-member types aren't properly handled in the dbscheme.
not var.getResultType() instanceof PointerToMemberType and
// Rule out FPs caused by extraction errors.
not any(ErrorExpr e).getEnclosingFunction() = func and
not intentionallyReturnsStackPointer(func)
)
}
abstract class LocalPathNode extends TLocalPathNode {
Node n;
override predicate isSink(DataFlow::Node sink) {
// Holds if `sink` is a node that represents the `StoreInstruction` that is subsequently used in
// a `ReturnValueInstruction`.
// We use the `StoreInstruction` instead of the instruction that defines the
// `ReturnValueInstruction`'s source value oprand because the former has better location information.
exists(StoreInstruction store |
store.getDestinationAddress().(VariableAddressInstruction).getIRVariable() instanceof
IRReturnVariable and
sink.asOperand() = store.getSourceValueOperand()
)
}
/** Gets the underlying node. */
Node getNode() { result = n }
/** Gets a textual representation of this node. */
string toString() { result = n.toString() }
/** Gets the location of this element. */
Location getLocation() { result = n.getLocation() }
/** Gets a successor `LocalPathNode`, if any. */
LocalPathNode getASuccessor() { step(this.getNode(), result.getNode()) }
/**
* This configuration intentionally conflates addresses of fields and their object, and pointer offsets
* with their base pointer as this allows us to detect cases where an object's address flows to a
* return statement via a field. For example:
*
* ```cpp
* struct S { int x, y };
* int* test() {
* S s;
* return &s.x; // BAD: &s.x is an address of a variable on the stack.
* }
*/
override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
node2.asInstruction().(FieldAddressInstruction).getObjectAddressOperand() = node1.asOperand()
or
node2.asInstruction().(PointerOffsetInstruction).getLeftOperand() = node1.asOperand()
}
}
class LocalPathNodeMid extends LocalPathNode, TLocalPathNodeMid {
LocalPathNodeMid() { this = TLocalPathNodeMid(n) }
}
class LocalPathNodeSink extends LocalPathNodeMid {
LocalPathNodeSink() { isSink(this.getNode()) }
}
/**
* Holds if `source` is a source node, `sink` is a sink node, and there's flow
* from `source` to `sink` using `step` relation.
*/
predicate hasFlow(LocalPathNode source, LocalPathNodeSink sink) {
isSource(source.getNode()) and
source.getASuccessor+() = sink
}
predicate reach(LocalPathNode n) { n instanceof LocalPathNodeSink or reach(n.getASuccessor()) }
query predicate edges(LocalPathNode a, LocalPathNode b) { a.getASuccessor() = b and reach(b) }
query predicate nodes(LocalPathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
}
from LocalPathNode source, LocalPathNodeSink sink, VariableAddressInstruction var
from
MustFlowPathNode source, MustFlowPathNode sink, VariableAddressInstruction var,
ReturnStackAllocatedMemoryConfig conf
where
hasFlow(source, sink) and
source.getNode().asInstruction() = var
conf.hasFlowPath(source, sink) and
source.getNode().asInstruction() = var and
// Only raise an alert if we're returning from the _same_ callable as the on that
// declared the stack variable.
var.getEnclosingFunction() = sink.getNode().getEnclosingCallable()
select sink.getNode(), source, sink, "May return stack-allocated memory from $@.", var.getAST(),
var.getAST().toString()

View File

@@ -17,169 +17,47 @@
import cpp
// We don't actually use the global value numbering library in this query, but without it we end up
// recomputing the IR.
private import semmle.code.cpp.valuenumbering.GlobalValueNumbering
private import semmle.code.cpp.ir.IR
import semmle.code.cpp.valuenumbering.GlobalValueNumbering
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.dataflow.MustFlow
import PathGraph
bindingset[n, result]
int unbind(int n) { result >= n and result <= n }
class UnsafeUseOfThisConfig extends MustFlowConfiguration {
UnsafeUseOfThisConfig() { this = "UnsafeUseOfThisConfig" }
/** Holds if `p` is the `n`'th parameter of the non-virtual function `f`. */
predicate parameterOf(Parameter p, Function f, int n) {
not f.isVirtual() and f.getParameter(n) = p
}
override predicate isSource(DataFlow::Node source) { isSource(source, _, _) }
/**
* Holds if `instr` is the `n`'th argument to a call to the non-virtual function `f`, and
* `init` is the corresponding initiazation instruction that receives the value of `instr` in `f`.
*/
predicate flowIntoParameter(
CallInstruction call, Instruction instr, Function f, int n, InitializeParameterInstruction init
) {
not f.isVirtual() and
call.getPositionalArgument(n) = instr and
f = call.getStaticCallTarget() and
getEnclosingNonVirtualFunctionInitializeParameter(init, f) and
init.getParameter().getIndex() = unbind(n)
}
/**
* Holds if `instr` is an argument to a call to the function `f`, and `init` is the
* corresponding initialization instruction that receives the value of `instr` in `f`.
*/
pragma[noinline]
predicate getPositionalArgumentInitParam(
CallInstruction call, Instruction instr, InitializeParameterInstruction init, Function f
) {
exists(int n |
parameterOf(_, f, n) and
flowIntoParameter(call, instr, f, unbind(n), init)
)
}
/**
* Holds if `instr` is the qualifier to a call to the non-virtual function `f`, and
* `init` is the corresponding initiazation instruction that receives the value of
* `instr` in `f`.
*/
pragma[noinline]
predicate getThisArgumentInitParam(
CallInstruction call, Instruction instr, InitializeParameterInstruction init, Function f
) {
not f.isVirtual() and
call.getStaticCallTarget() = f and
getEnclosingNonVirtualFunctionInitializeParameter(init, f) and
call.getThisArgument() = instr and
init.getIRVariable() instanceof IRThisVariable
override predicate isSink(DataFlow::Node sink) { isSink(sink, _) }
}
/** Holds if `instr` is a `this` pointer used by the call instruction `call`. */
predicate isSink(Instruction instr, CallInstruction call) {
predicate isSink(DataFlow::Node sink, CallInstruction call) {
exists(PureVirtualFunction func |
call.getStaticCallTarget() = func and
call.getThisArgument() = instr and
call.getThisArgument() = sink.asInstruction() and
// Weed out implicit calls to destructors of a base class
not func instanceof Destructor
)
}
/** Holds if `init` initializes the `this` pointer in class `c`. */
predicate isSource(InitializeParameterInstruction init, string msg, Class c) {
(
exists(Constructor func |
not func instanceof CopyConstructor and
not func instanceof MoveConstructor and
func = init.getEnclosingFunction() and
msg = "construction"
)
or
init.getEnclosingFunction() instanceof Destructor and msg = "destruction"
) and
init.getIRVariable() instanceof IRThisVariable and
init.getEnclosingFunction().getDeclaringType() = c
}
/**
* Holds if `instr` flows to a sink (which is a use of the value of `instr` as a `this` pointer).
*/
predicate flowsToSink(Instruction instr, Instruction sink) {
flowsFromSource(instr) and
(
isSink(instr, _) and instr = sink
or
exists(Instruction mid |
successor(instr, mid) and
flowsToSink(mid, sink)
)
predicate isSource(DataFlow::Node source, string msg, Class c) {
exists(InitializeParameterInstruction init | init = source.asInstruction() |
(
exists(Constructor func |
not func instanceof CopyConstructor and
not func instanceof MoveConstructor and
func = init.getEnclosingFunction() and
msg = "construction"
)
or
init.getEnclosingFunction() instanceof Destructor and msg = "destruction"
) and
init.getIRVariable() instanceof IRThisVariable and
init.getEnclosingFunction().getDeclaringType() = c
)
}
/** Holds if `instr` flows from a source. */
predicate flowsFromSource(Instruction instr) {
isSource(instr, _, _)
or
exists(Instruction mid |
successor(mid, instr) and
flowsFromSource(mid)
)
}
/** Holds if `f` is the enclosing non-virtual function of `init`. */
predicate getEnclosingNonVirtualFunctionInitializeParameter(
InitializeParameterInstruction init, Function f
) {
not f.isVirtual() and
init.getEnclosingFunction() = f
}
/** Holds if `f` is the enclosing non-virtual function of `init`. */
predicate getEnclosingNonVirtualFunctionInitializeIndirection(
InitializeIndirectionInstruction init, Function f
) {
not f.isVirtual() and
init.getEnclosingFunction() = f
}
/**
* Holds if `instr` is an argument (or argument indirection) to a call, and
* `succ` is the corresponding initialization instruction in the call target.
*/
predicate flowThroughCallable(Instruction instr, Instruction succ) {
// Flow from an argument to a parameter
exists(CallInstruction call, InitializeParameterInstruction init | init = succ |
getPositionalArgumentInitParam(call, instr, init, call.getStaticCallTarget())
or
getThisArgumentInitParam(call, instr, init, call.getStaticCallTarget())
)
or
// Flow from argument indirection to parameter indirection
exists(
CallInstruction call, ReadSideEffectInstruction read, InitializeIndirectionInstruction init
|
init = succ and
read.getPrimaryInstruction() = call and
getEnclosingNonVirtualFunctionInitializeIndirection(init, call.getStaticCallTarget())
|
exists(int n |
read.getSideEffectOperand().getAnyDef() = instr and
read.getIndex() = n and
init.getParameter().getIndex() = unbind(n)
)
or
call.getThisArgument() = instr and
init.getIRVariable() instanceof IRThisVariable
)
}
/** Holds if `instr` flows to `succ`. */
predicate successor(Instruction instr, Instruction succ) {
succ.(CopyInstruction).getSourceValue() = instr or
succ.(CheckedConvertOrNullInstruction).getUnary() = instr or
succ.(ChiInstruction).getTotal() = instr or
succ.(ConvertInstruction).getUnary() = instr or
succ.(InheritanceConversionInstruction).getUnary() = instr or
flowThroughCallable(instr, succ)
}
/**
* Holds if:
* - `source` is an initialization of a `this` pointer of type `sourceClass`, and
@@ -188,22 +66,19 @@ predicate successor(Instruction instr, Instruction succ) {
* - `msg` is a string describing whether `source` is from a constructor or destructor.
*/
predicate flows(
Instruction source, string msg, Class sourceClass, Instruction sink, CallInstruction call
MustFlowPathNode source, string msg, Class sourceClass, MustFlowPathNode sink,
CallInstruction call
) {
isSource(source, msg, sourceClass) and
flowsToSink(source, sink) and
isSink(sink, call)
exists(UnsafeUseOfThisConfig conf |
conf.hasFlowPath(source, sink) and
isSource(source.getNode(), msg, sourceClass) and
isSink(sink.getNode(), call)
)
}
query predicate edges(Instruction a, Instruction b) { successor(a, b) and flowsToSink(b, _) }
query predicate nodes(Instruction n, string key, string val) {
flowsToSink(n, _) and
key = "semmle.label" and
val = n.toString()
}
from Instruction source, Instruction sink, CallInstruction call, string msg, Class sourceClass
from
MustFlowPathNode source, MustFlowPathNode sink, CallInstruction call, string msg,
Class sourceClass
where
flows(source, msg, sourceClass, sink, call) and
// Only raise an alert if there is no override of the pure virtual function in any base class.