Merge pull request #7682 from MathiasVP/rewrite-return-stack-allocated-memory-to-use-ir

C++: Use the IR for `cpp/return-stack-allocated-memory`.
This commit is contained in:
Mathias Vorreiter Pedersen
2022-01-21 14:57:30 +00:00
committed by GitHub
5 changed files with 382 additions and 62 deletions

View File

@@ -4,4 +4,4 @@ Record* fixRecord(Record* r) {
myRecord.fix();
return &myRecord; //returns reference to myRecord, which is a stack-allocated object
}
}

View File

@@ -3,7 +3,7 @@
* @description A function returns a pointer to a stack-allocated region of
* memory. This memory is deallocated at the end of the function,
* which may lead the caller to dereference a dangling pointer.
* @kind problem
* @kind path-problem
* @id cpp/return-stack-allocated-memory
* @problem.severity warning
* @precision high
@@ -12,59 +12,151 @@
*/
import cpp
import semmle.code.cpp.dataflow.EscapesTree
import semmle.code.cpp.models.interfaces.PointerWrapper
import semmle.code.cpp.dataflow.DataFlow
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.dataflow.DataFlow::DataFlow
/**
* Holds if `n1` may flow to `n2`, ignoring flow through fields because these
* are currently modeled as an overapproximation that assumes all objects may
* alias.
* Holds if `source` is a node that represents the use of a stack variable
*/
predicate conservativeDataFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
DataFlow::localFlowStep(n1, n2) and
not n2.asExpr() instanceof FieldAccess and
not hasNontrivialConversion(n2.asExpr())
}
/**
* Holds if `e` has a conversion that changes it from lvalue to pointer or
* back. As the data-flow library does not support conversions, we cannot track
* data flow through such expressions.
*/
predicate hasNontrivialConversion(Expr e) {
e instanceof Conversion and
not (
e instanceof Cast
or
e instanceof ParenthesisExpr
predicate isSource(Node source) {
exists(VariableAddressInstruction var |
var = source.asInstruction() and
var.getASTVariable() instanceof StackVariable and
// Pointer-to-member types aren't properly handled in the dbscheme.
not var.getResultType() instanceof PointerToMemberType and
// Rule out FPs caused by extraction errors.
not any(ErrorExpr e).getEnclosingFunction() = var.getEnclosingFunction()
)
or
// A smart pointer can be stack-allocated while the data it points to is heap-allocated.
// So we exclude such "conversions" from this predicate.
e = any(PointerWrapper wrapper).getAnUnwrapperFunction().getACallToThisFunction()
or
hasNontrivialConversion(e.getConversion())
}
from StackVariable var, VariableAccess va, ReturnStmt r
where
not var.getUnspecifiedType() instanceof ReferenceType and
not r.isFromUninstantiatedTemplate(_) and
va = var.getAnAccess() and
/**
* Holds if `sink` is a node that represents the `StoreInstruction` that is subsequently used in
* a `ReturnValueInstruction`. We use the `StoreInstruction` instead of the instruction that defines the
* `ReturnValueInstruction`'s source value oprand because the former has better location information.
*/
predicate isSink(Node sink) {
exists(StoreInstruction store |
store.getDestinationAddress().(VariableAddressInstruction).getIRVariable() instanceof
IRReturnVariable and
sink.asOperand() = store.getSourceValueOperand()
)
}
/** Holds if `node1` _must_ flow to `node2`. */
predicate step(Node node1, Node node2) {
instructionToOperandStep(node1.asInstruction(), node2.asOperand())
or
operandToInstructionStep(node1.asOperand(), node2.asInstruction())
}
predicate instructionToOperandStep(Instruction instr, Operand operand) { operand.getDef() = instr }
/**
* Holds if `operand` flows to the result of `instr`.
*
* This predicate ignores flow through `PhiInstruction`s to create a 'must flow' relation. It also
* intentionally conflates addresses of fields and their object, and pointer offsets with their
* base pointer as this allows us to detect cases where an object's address flows to a return statement
* via a field. For example:
*
* ```cpp
* struct S { int x, y };
* int* test() {
* S s;
* return &s.x; // BAD: &s.x is an address of a variable on the stack.
* }
* ```
*/
predicate operandToInstructionStep(Operand operand, Instruction instr) {
instr.(CopyInstruction).getSourceValueOperand() = operand
or
instr.(ConvertInstruction).getUnaryOperand() = operand
or
instr.(CheckedConvertOrNullInstruction).getUnaryOperand() = operand
or
instr.(InheritanceConversionInstruction).getUnaryOperand() = operand
or
instr.(FieldAddressInstruction).getObjectAddressOperand() = operand
or
instr.(PointerOffsetInstruction).getLeftOperand() = operand
}
/** Holds if a source node flows to `n`. */
predicate branchlessLocalFlow0(Node n) {
isSource(n)
or
exists(Node mid |
branchlessLocalFlow0(mid) and
step(mid, n)
)
}
/** Holds if `n` is reachable through some source node, and `n` also eventually reaches a sink. */
predicate branchlessLocalFlow1(Node n) {
branchlessLocalFlow0(n) and
(
// To check if the address escapes directly from `e` in `return e`, we need
// to check the fully-converted `e` in case there are implicit
// array-to-pointer conversions or reference conversions.
variableAddressEscapesTree(va, r.getExpr().getFullyConverted())
isSink(n)
or
// The data flow library doesn't support conversions, so here we check that
// the address escapes into some expression `pointerToLocal`, which flows
// in one or more steps to a returned expression.
exists(Expr pointerToLocal |
variableAddressEscapesTree(va, pointerToLocal.getFullyConverted()) and
not hasNontrivialConversion(pointerToLocal) and
conservativeDataFlowStep+(DataFlow::exprNode(pointerToLocal), DataFlow::exprNode(r.getExpr()))
exists(Node mid |
branchlessLocalFlow1(mid) and
step(n, mid)
)
)
select r, "May return stack-allocated memory from $@.", va, va.toString()
}
newtype TLocalPathNode =
TLocalPathNodeMid(Node n) {
branchlessLocalFlow1(n) and
(
isSource(n) or
exists(LocalPathNodeMid mid | step(mid.getNode(), n))
)
}
abstract class LocalPathNode extends TLocalPathNode {
Node n;
/** Gets the underlying node. */
Node getNode() { result = n }
/** Gets a textual representation of this node. */
string toString() { result = n.toString() }
/** Gets the location of this element. */
Location getLocation() { result = n.getLocation() }
/** Gets a successor `LocalPathNode`, if any. */
LocalPathNode getASuccessor() { step(this.getNode(), result.getNode()) }
}
class LocalPathNodeMid extends LocalPathNode, TLocalPathNodeMid {
LocalPathNodeMid() { this = TLocalPathNodeMid(n) }
}
class LocalPathNodeSink extends LocalPathNodeMid {
LocalPathNodeSink() { isSink(this.getNode()) }
}
/**
* Holds if `source` is a source node, `sink` is a sink node, and there's flow
* from `source` to `sink` using `step` relation.
*/
predicate hasFlow(LocalPathNode source, LocalPathNodeSink sink) {
isSource(source.getNode()) and
source.getASuccessor+() = sink
}
predicate reach(LocalPathNode n) { n instanceof LocalPathNodeSink or reach(n.getASuccessor()) }
query predicate edges(LocalPathNode a, LocalPathNode b) { a.getASuccessor() = b and reach(b) }
query predicate nodes(LocalPathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
}
from LocalPathNode source, LocalPathNodeSink sink, VariableAddressInstruction var
where
hasFlow(source, sink) and
source.getNode().asInstruction() = var
select sink.getNode(), source, sink, "May return stack-allocated memory from $@.", var.getAST(),
var.getAST().toString()

View File

@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
* The `cpp/return-stack-allocated-memory` query has been improved to produce fewer false positives. The
query has also been converted to a `path-problem` query.