Merge pull request #21331 from jketema/must-flow

C++: Modernize `MustFlow` and fix `allowInterproceduralFlow` in the case of direct recursion
This commit is contained in:
Jeroen Ketema
2026-02-17 17:36:58 +01:00
committed by GitHub
7 changed files with 189 additions and 196 deletions

View File

@@ -0,0 +1,4 @@
---
category: fix
---
* The `allowInterproceduralFlow` predicate of must-flow data flow configurations now correctly handles direct recursion.

View File

@@ -0,0 +1,4 @@
---
category: breaking
---
* `MustFlow`, the inter-procedural must-flow data flow analysis library, has been re-worked to use parameterized modules. Like in the case of data flow and taint tracking, instead of extending the `MustFlowConfiguration` class, the user should now implement a module with the `MustFlow::ConfigSig` signature, and instantiate the `MustFlow::Global` parameterized module with the implemented module.

View File

@@ -8,81 +8,143 @@ private import cpp
private import semmle.code.cpp.ir.IR
/**
* A configuration of a data flow analysis that performs must-flow analysis. This is different
* from `DataFlow.qll` which performs may-flow analysis (i.e., it finds paths where the source _may_
* flow to the sink).
*
* Like in `DataFlow.qll`, each use of the `MustFlow.qll` library must define its own unique extension
* of this abstract class. To create a configuration, extend this class with a subclass whose
* characteristic predicate is a unique singleton string and override `isSource`, `isSink` (and
* `isAdditionalFlowStep` if additional steps are required).
* Provides an inter-procedural must-flow data flow analysis.
*/
abstract class MustFlowConfiguration extends string {
bindingset[this]
MustFlowConfiguration() { any() }
module MustFlow {
/**
* Holds if `source` is a relevant data flow source.
* An input configuration of a data flow analysis that performs must-flow analysis. This is different
* from `DataFlow.qll` which performs may-flow analysis (i.e., it finds paths where the source _may_
* flow to the sink).
*/
abstract predicate isSource(Instruction source);
signature module ConfigSig {
/**
* Holds if `source` is a relevant data flow source.
*/
predicate isSource(Instruction source);
/**
* Holds if `sink` is a relevant data flow sink.
*/
abstract predicate isSink(Operand sink);
/**
* Holds if `sink` is a relevant data flow sink.
*/
predicate isSink(Operand sink);
/**
* Holds if data flow through `instr` is prohibited.
*/
predicate isBarrier(Instruction instr) { none() }
/**
* Holds if data flow through `instr` is prohibited.
*/
default predicate isBarrier(Instruction instr) { none() }
/**
* Holds if the additional flow step from `node1` to `node2` must be taken
* into account in the analysis.
*/
predicate isAdditionalFlowStep(Operand node1, Instruction node2) { none() }
/**
* Holds if the additional flow step from `node1` to `node2` must be taken
* into account in the analysis.
*/
default predicate isAdditionalFlowStep(Operand node1, Instruction node2) { none() }
/** Holds if this configuration allows flow from arguments to parameters. */
predicate allowInterproceduralFlow() { any() }
/**
* Holds if data must flow from `source` to `sink` for this configuration.
*
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
final predicate hasFlowPath(MustFlowPathNode source, MustFlowPathSink sink) {
this.isSource(source.getInstruction()) and
source.getASuccessor*() = sink
/** Holds if this configuration allows flow from arguments to parameters. */
default predicate allowInterproceduralFlow() { any() }
}
}
/** Holds if `node` flows from a source. */
pragma[nomagic]
private predicate flowsFromSource(Instruction node, MustFlowConfiguration config) {
not config.isBarrier(node) and
(
config.isSource(node)
or
exists(Instruction mid |
step(mid, node, config) and
flowsFromSource(mid, pragma[only_bind_into](config))
)
)
}
/**
* Constructs a global must-flow computation.
*/
module Global<ConfigSig Config> {
import Config
/** Holds if `node` flows to a sink. */
pragma[nomagic]
private predicate flowsToSink(Instruction node, MustFlowConfiguration config) {
flowsFromSource(node, pragma[only_bind_into](config)) and
(
config.isSink(node.getAUse())
or
exists(Instruction mid |
step(node, mid, config) and
flowsToSink(mid, pragma[only_bind_into](config))
)
)
/**
* Holds if data must flow from `source` to `sink`.
*
* The corresponding paths are generated from the end-points and the graph
* included in the module `PathGraph`.
*/
predicate flowPath(PathNode source, PathSink sink) {
isSource(source.getInstruction()) and
source.getASuccessor*() = sink
}
/** Holds if `node` flows from a source. */
pragma[nomagic]
private predicate flowsFromSource(Instruction node) {
not isBarrier(node) and
(
isSource(node)
or
exists(Instruction mid |
step(mid, node) and
flowsFromSource(mid)
)
)
}
/** Holds if `node` flows to a sink. */
pragma[nomagic]
private predicate flowsToSink(Instruction node) {
flowsFromSource(node) and
(
isSink(node.getAUse())
or
exists(Instruction mid |
step(node, mid) and
flowsToSink(mid)
)
)
}
/** Holds if `nodeFrom` flows to `nodeTo`. */
private predicate step(Instruction nodeFrom, Instruction nodeTo) {
Cached::localStep(nodeFrom, nodeTo)
or
allowInterproceduralFlow() and
Cached::flowThroughCallable(nodeFrom, nodeTo)
or
isAdditionalFlowStep(nodeFrom.getAUse(), nodeTo)
}
private newtype TLocalPathNode =
MkLocalPathNode(Instruction n) {
flowsToSink(n) and
(
isSource(n)
or
exists(PathNode mid | step(mid.getInstruction(), n))
)
}
/** A `Node` that is in a path from a source to a sink. */
class PathNode extends TLocalPathNode {
Instruction n;
PathNode() { this = MkLocalPathNode(n) }
/** Gets the underlying node. */
Instruction getInstruction() { result = n }
/** Gets a textual representation of this node. */
string toString() { result = n.getAst().toString() }
/** Gets the location of this element. */
Location getLocation() { result = n.getLocation() }
/** Gets a successor node, if any. */
PathNode getASuccessor() { step(this.getInstruction(), result.getInstruction()) }
}
private class PathSink extends PathNode {
PathSink() { isSink(this.getInstruction().getAUse()) }
}
/**
* Provides the query predicates needed to include a graph in a path-problem query.
*/
module PathGraph {
private predicate reach(PathNode n) { n instanceof PathSink or reach(n.getASuccessor()) }
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b and reach(b) }
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(PathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
}
}
}
}
cached
@@ -102,7 +164,7 @@ private module Cached {
not f.isVirtual() and
call.getPositionalArgument(n) = instr and
f = call.getStaticCallTarget() and
getEnclosingNonVirtualFunctionInitializeParameter(init, f) and
isEnclosingNonVirtualFunctionInitializeParameter(init, f) and
init.getParameter().getIndex() = pragma[only_bind_into](pragma[only_bind_out](n))
}
@@ -111,7 +173,7 @@ private module Cached {
* corresponding initialization instruction that receives the value of `instr` in `f`.
*/
pragma[noinline]
private predicate getPositionalArgumentInitParam(
private predicate isPositionalArgumentInitParam(
CallInstruction call, Instruction instr, InitializeParameterInstruction init, Function f
) {
exists(int n |
@@ -126,18 +188,18 @@ private module Cached {
* `instr` in `f`.
*/
pragma[noinline]
private predicate getThisArgumentInitParam(
private predicate isThisArgumentInitParam(
CallInstruction call, Instruction instr, InitializeParameterInstruction init, Function f
) {
not f.isVirtual() and
call.getStaticCallTarget() = f and
getEnclosingNonVirtualFunctionInitializeParameter(init, f) and
isEnclosingNonVirtualFunctionInitializeParameter(init, f) and
call.getThisArgument() = instr and
init.getIRVariable() instanceof IRThisVariable
}
/** Holds if `f` is the enclosing non-virtual function of `init`. */
private predicate getEnclosingNonVirtualFunctionInitializeParameter(
private predicate isEnclosingNonVirtualFunctionInitializeParameter(
InitializeParameterInstruction init, Function f
) {
not f.isVirtual() and
@@ -145,7 +207,7 @@ private module Cached {
}
/** Holds if `f` is the enclosing non-virtual function of `init`. */
private predicate getEnclosingNonVirtualFunctionInitializeIndirection(
private predicate isEnclosingNonVirtualFunctionInitializeIndirection(
InitializeIndirectionInstruction init, Function f
) {
not f.isVirtual() and
@@ -153,15 +215,16 @@ private module Cached {
}
/**
* Holds if `instr` is an argument (or argument indirection) to a call, and
* `succ` is the corresponding initialization instruction in the call target.
* Holds if `argument` is an argument (or argument indirection) to a call, and
* `parameter` is the corresponding initialization instruction in the call target.
*/
private predicate flowThroughCallable(Instruction argument, Instruction parameter) {
cached
predicate flowThroughCallable(Instruction argument, Instruction parameter) {
// Flow from an argument to a parameter
exists(CallInstruction call, InitializeParameterInstruction init | init = parameter |
getPositionalArgumentInitParam(call, argument, init, call.getStaticCallTarget())
isPositionalArgumentInitParam(call, argument, init, call.getStaticCallTarget())
or
getThisArgumentInitParam(call, argument, init, call.getStaticCallTarget())
isThisArgumentInitParam(call, argument, init, call.getStaticCallTarget())
)
or
// Flow from argument indirection to parameter indirection
@@ -170,7 +233,7 @@ private module Cached {
|
init = parameter and
read.getPrimaryInstruction() = call and
getEnclosingNonVirtualFunctionInitializeIndirection(init, call.getStaticCallTarget())
isEnclosingNonVirtualFunctionInitializeIndirection(init, call.getStaticCallTarget())
|
exists(int n |
read.getSideEffectOperand().getAnyDef() = argument and
@@ -205,92 +268,10 @@ private module Cached {
}
cached
predicate step(Instruction nodeFrom, Instruction nodeTo) {
predicate localStep(Instruction nodeFrom, Instruction nodeTo) {
exists(Operand mid |
instructionToOperandStep(nodeFrom, mid) and
operandToInstructionStep(mid, nodeTo)
)
or
flowThroughCallable(nodeFrom, nodeTo)
}
}
/**
* Gets the enclosing callable of `n`. Unlike `n.getEnclosingCallable()`, this
* predicate ensures that joins go from `n` to the result instead of the other
* way around.
*/
pragma[inline]
private IRFunction getEnclosingCallable(Instruction n) {
pragma[only_bind_into](result) = pragma[only_bind_out](n).getEnclosingIRFunction()
}
/** Holds if `nodeFrom` flows to `nodeTo`. */
private predicate step(Instruction nodeFrom, Instruction nodeTo, MustFlowConfiguration config) {
exists(config) and
Cached::step(pragma[only_bind_into](nodeFrom), pragma[only_bind_into](nodeTo)) and
(
config.allowInterproceduralFlow()
or
getEnclosingCallable(nodeFrom) = getEnclosingCallable(nodeTo)
)
or
config.isAdditionalFlowStep(nodeFrom.getAUse(), nodeTo)
}
private newtype TLocalPathNode =
MkLocalPathNode(Instruction n, MustFlowConfiguration config) {
flowsToSink(n, config) and
(
config.isSource(n)
or
exists(MustFlowPathNode mid | step(mid.getInstruction(), n, config))
)
}
/** A `Node` that is in a path from a source to a sink. */
class MustFlowPathNode extends TLocalPathNode {
Instruction n;
MustFlowPathNode() { this = MkLocalPathNode(n, _) }
/** Gets the underlying node. */
Instruction getInstruction() { result = n }
/** Gets a textual representation of this node. */
string toString() { result = n.getAst().toString() }
/** Gets the location of this element. */
Location getLocation() { result = n.getLocation() }
/** Gets a successor node, if any. */
MustFlowPathNode getASuccessor() {
step(this.getInstruction(), result.getInstruction(), this.getConfiguration())
}
/** Gets the associated configuration. */
MustFlowConfiguration getConfiguration() { this = MkLocalPathNode(_, result) }
}
private class MustFlowPathSink extends MustFlowPathNode {
MustFlowPathSink() { this.getConfiguration().isSink(this.getInstruction().getAUse()) }
}
/**
* Provides the query predicates needed to include a graph in a path-problem query.
*/
module PathGraph {
private predicate reach(MustFlowPathNode n) {
n instanceof MustFlowPathSink or reach(n.getASuccessor())
}
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
query predicate edges(MustFlowPathNode a, MustFlowPathNode b) {
a.getASuccessor() = b and reach(b)
}
/** Holds if `n` is a node in the graph of data flow path explanations. */
query predicate nodes(MustFlowPathNode n, string key, string val) {
reach(n) and key = "semmle.label" and val = n.toString()
}
}

View File

@@ -16,17 +16,15 @@
import cpp
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.dataflow.MustFlow
import PathGraph
import ReturnStackAllocatedMemory::PathGraph
/** Holds if `f` has a name that we interpret as evidence of intentionally returning the value of the stack pointer. */
predicate intentionallyReturnsStackPointer(Function f) {
f.getName().toLowerCase().matches(["%stack%", "%sp%"])
}
class ReturnStackAllocatedMemoryConfig extends MustFlowConfiguration {
ReturnStackAllocatedMemoryConfig() { this = "ReturnStackAllocatedMemoryConfig" }
override predicate isSource(Instruction source) {
module ReturnStackAllocatedMemoryConfig implements MustFlow::ConfigSig {
predicate isSource(Instruction source) {
exists(Function func |
// Rule out FPs caused by extraction errors.
not func.hasErrors() and
@@ -50,7 +48,7 @@ class ReturnStackAllocatedMemoryConfig extends MustFlowConfiguration {
)
}
override predicate isSink(Operand sink) {
predicate isSink(Operand sink) {
// Holds if `sink` is a node that represents the `StoreInstruction` that is subsequently used in
// a `ReturnValueInstruction`.
// We use the `StoreInstruction` instead of the instruction that defines the
@@ -72,7 +70,7 @@ class ReturnStackAllocatedMemoryConfig extends MustFlowConfiguration {
// int* px = id(&x);
// }
// ```
override predicate allowInterproceduralFlow() { none() }
predicate allowInterproceduralFlow() { none() }
/**
* This configuration intentionally conflates addresses of fields and their object, and pointer offsets
@@ -87,20 +85,22 @@ class ReturnStackAllocatedMemoryConfig extends MustFlowConfiguration {
* }
* ```
*/
override predicate isAdditionalFlowStep(Operand node1, Instruction node2) {
predicate isAdditionalFlowStep(Operand node1, Instruction node2) {
node2.(FieldAddressInstruction).getObjectAddressOperand() = node1
or
node2.(PointerOffsetInstruction).getLeftOperand() = node1
}
override predicate isBarrier(Instruction n) { n.getResultType() instanceof ErroneousType }
predicate isBarrier(Instruction n) { n.getResultType() instanceof ErroneousType }
}
module ReturnStackAllocatedMemory = MustFlow::Global<ReturnStackAllocatedMemoryConfig>;
from
MustFlowPathNode source, MustFlowPathNode sink, Instruction instr,
ReturnStackAllocatedMemoryConfig conf
ReturnStackAllocatedMemory::PathNode source, ReturnStackAllocatedMemory::PathNode sink,
Instruction instr
where
conf.hasFlowPath(pragma[only_bind_into](source), pragma[only_bind_into](sink)) and
ReturnStackAllocatedMemory::flowPath(pragma[only_bind_into](source), pragma[only_bind_into](sink)) and
source.getInstruction() = instr
select sink.getInstruction(), source, sink, "May return stack-allocated memory from $@.",
instr.getAst(), instr.getAst().toString()

View File

@@ -15,7 +15,7 @@
import cpp
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.dataflow.MustFlow
import PathGraph
import UninitializedLocal::PathGraph
/**
* Auxiliary predicate: Types that don't require initialization
@@ -70,25 +70,26 @@ predicate isSinkImpl(Instruction sink, VariableAccess va) {
)
}
class MustFlow extends MustFlowConfiguration {
MustFlow() { this = "MustFlow" }
override predicate isSource(Instruction source) {
module UninitializedLocalConfig implements MustFlow::ConfigSig {
predicate isSource(Instruction source) {
source instanceof UninitializedInstruction and
exists(Type t | t = source.getResultType() | not allocatedType(t))
}
override predicate isSink(Operand sink) { isSinkImpl(sink.getDef(), _) }
predicate isSink(Operand sink) { isSinkImpl(sink.getDef(), _) }
override predicate allowInterproceduralFlow() { none() }
predicate allowInterproceduralFlow() { none() }
override predicate isBarrier(Instruction instr) { instr instanceof ChiInstruction }
predicate isBarrier(Instruction instr) { instr instanceof ChiInstruction }
}
module UninitializedLocal = MustFlow::Global<UninitializedLocalConfig>;
from
VariableAccess va, LocalVariable v, MustFlow conf, MustFlowPathNode source, MustFlowPathNode sink
VariableAccess va, LocalVariable v, UninitializedLocal::PathNode source,
UninitializedLocal::PathNode sink
where
conf.hasFlowPath(source, sink) and
UninitializedLocal::flowPath(source, sink) and
isSinkImpl(sink.getInstruction(), va) and
v = va.getTarget()
select va, source, sink, "The variable $@ may not be initialized at this access.", v, v.getName()

View File

@@ -17,16 +17,16 @@
import cpp
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.dataflow.MustFlow
import PathGraph
import UnsafeUseOfThis::PathGraph
class UnsafeUseOfThisConfig extends MustFlowConfiguration {
UnsafeUseOfThisConfig() { this = "UnsafeUseOfThisConfig" }
module UnsafeUseOfThisConfig implements MustFlow::ConfigSig {
predicate isSource(Instruction source) { isSource(source, _, _) }
override predicate isSource(Instruction source) { isSource(source, _, _) }
override predicate isSink(Operand sink) { isSink(sink, _) }
predicate isSink(Operand sink) { isSink(sink, _) }
}
module UnsafeUseOfThis = MustFlow::Global<UnsafeUseOfThisConfig>;
/** Holds if `sink` is a `this` pointer used by the call instruction `call`. */
predicate isSink(Operand sink, CallInstruction call) {
exists(PureVirtualFunction func |
@@ -66,19 +66,17 @@ predicate isSource(InitializeParameterInstruction source, string msg, Class c) {
* - `msg` is a string describing whether `source` is from a constructor or destructor.
*/
predicate flows(
MustFlowPathNode source, string msg, Class sourceClass, MustFlowPathNode sink,
UnsafeUseOfThis::PathNode source, string msg, Class sourceClass, UnsafeUseOfThis::PathNode sink,
CallInstruction call
) {
exists(UnsafeUseOfThisConfig conf |
conf.hasFlowPath(source, sink) and
isSource(source.getInstruction(), msg, sourceClass) and
isSink(sink.getInstruction().getAUse(), call)
)
UnsafeUseOfThis::flowPath(source, sink) and
isSource(source.getInstruction(), msg, sourceClass) and
isSink(sink.getInstruction().getAUse(), call)
}
from
MustFlowPathNode source, MustFlowPathNode sink, CallInstruction call, string msg,
Class sourceClass
UnsafeUseOfThis::PathNode source, UnsafeUseOfThis::PathNode sink, CallInstruction call,
string msg, Class sourceClass
where
flows(source, msg, sourceClass, sink, call) and
// Only raise an alert if there is no override of the pure virtual function in any base class.

View File

@@ -250,3 +250,8 @@ void* test_strndupa(const char* s, size_t size) {
return s2; // BAD
}
int* f_rec(int *p) {
int x;
int* px = f_rec(&x); // GOOD
return p;
}