C++: Move a bunch of newtypes and predicates into a cached module.

This commit is contained in:
Mathias Vorreiter Pedersen
2026-02-27 11:58:53 +00:00
parent 6e0c5615fe
commit 86bd0c0dc3
2 changed files with 189 additions and 199 deletions

View File

@@ -16,27 +16,30 @@ private import semmle.code.cpp.dataflow.ExternalFlow as External
cached
private module Cached {
cached
module Nodes0 {
cached
newtype TIRDataFlowNode0 =
TInstructionNode0(Instruction i) {
not Ssa::ignoreInstruction(i) and
not exists(Operand op |
not Ssa::ignoreOperand(op) and i = Ssa::getIRRepresentationOfOperand(op)
) and
// We exclude `void`-typed instructions because they cannot contain data.
// However, if the instruction is a glvalue, and their type is `void`, then the result
// type of the instruction is really `void*`, and thus we still want to have a dataflow
// node for it.
(not i.getResultType() instanceof VoidType or i.isGLValue())
} or
TMultipleUseOperandNode0(Operand op) {
not Ssa::ignoreOperand(op) and not exists(Ssa::getIRRepresentationOfOperand(op))
} or
TSingleUseOperandNode0(Operand op) {
not Ssa::ignoreOperand(op) and exists(Ssa::getIRRepresentationOfOperand(op))
}
}
newtype TIRDataFlowNode0 =
TInstructionNode0(Instruction i) {
not Ssa::ignoreInstruction(i) and
not exists(Operand op |
not Ssa::ignoreOperand(op) and i = Ssa::getIRRepresentationOfOperand(op)
) and
// We exclude `void`-typed instructions because they cannot contain data.
// However, if the instruction is a glvalue, and their type is `void`, then the result
// type of the instruction is really `void*`, and thus we still want to have a dataflow
// node for it.
(not i.getResultType() instanceof VoidType or i.isGLValue())
} or
TMultipleUseOperandNode0(Operand op) {
not Ssa::ignoreOperand(op) and not exists(Ssa::getIRRepresentationOfOperand(op))
} or
TSingleUseOperandNode0(Operand op) {
not Ssa::ignoreOperand(op) and exists(Ssa::getIRRepresentationOfOperand(op))
}
cached
newtype TContentApprox =
TFieldApproxContent(string s) { fieldHasApproxName(_, s) } or
TUnionApproxContent(string s) { unionHasApproxName(_, s) } or
TElementApproxContent()
/**
* Gets an additional term that is added to the `join` and `branch` computations to reflect
@@ -59,16 +62,155 @@ private module Cached {
result = countNumberOfBranchesUsingParameter(switch, p)
)
}
cached
newtype TDataFlowCallable =
TSourceCallable(Cpp::Declaration decl) or
TSummarizedCallable(FlowSummaryImpl::Public::SummarizedCallable c)
cached
newtype TDataFlowCall =
TNormalCall(CallInstruction call) or
TSummaryCall(
FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver
) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
}
/**
* Holds if data can flow from `node1` to `node2` in a way that loses the
* calling context. For example, this would happen with flow through a
* global or static variable.
*/
cached
predicate jumpStep(Node n1, Node n2) {
exists(GlobalLikeVariable v |
exists(Ssa::GlobalUse globalUse |
v = globalUse.getVariable() and
n1.(FinalGlobalValue).getGlobalUse() = globalUse
|
globalUse.getIndirection() = getMinIndirectionForGlobalUse(globalUse) and
v = n2.asVariable()
or
v = n2.asIndirectVariable(globalUse.getIndirection())
)
or
exists(Ssa::GlobalDef globalDef |
v = globalDef.getVariable() and
n2.(InitialGlobalValue).getGlobalDef() = globalDef
|
globalDef.getIndirection() = getMinIndirectionForGlobalDef(globalDef) and
v = n1.asVariable()
or
v = n1.asIndirectVariable(globalDef.getIndirection())
)
)
or
// models-as-data summarized flow
FlowSummaryImpl::Private::Steps::summaryJumpStep(n1.(FlowSummaryNode).getSummaryNode(),
n2.(FlowSummaryNode).getSummaryNode())
}
/**
* Holds if data can flow from `node1` to `node2` via an assignment to `f`.
* Thus, `node2` references an object with a field `f` that contains the
* value of `node1`.
*
* The boolean `certain` is true if the destination address does not involve
* any pointer arithmetic, and false otherwise.
*/
cached
predicate storeStepImpl(Node node1, Content c, Node node2, boolean certain) {
exists(
PostFieldUpdateNode postFieldUpdate, int indirectionIndex1, int numberOfLoads,
StoreInstruction store, FieldContent fc
|
postFieldUpdate = node2 and
fc = c and
nodeHasInstruction(node1, pragma[only_bind_into](store),
pragma[only_bind_into](indirectionIndex1)) and
postFieldUpdate.getIndirectionIndex() = 1 and
numberOfLoadsFromOperand(postFieldUpdate.getFieldAddress(),
store.getDestinationAddressOperand(), numberOfLoads, certain) and
fc.getAField() = postFieldUpdate.getUpdatedField() and
getIndirectionIndexLate(fc) = 1 + indirectionIndex1 + numberOfLoads
)
or
// models-as-data summarized flow
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), c,
node2.(FlowSummaryNode).getSummaryNode()) and
certain = true
}
/**
* Holds if data can flow from `node1` to `node2` via an assignment to `f`.
* Thus, `node2` references an object with a field `f` that contains the
* value of `node1`.
*/
cached
predicate storeStep(Node node1, ContentSet c, Node node2) { storeStepImpl(node1, c, node2, _) }
/**
* Holds if data can flow from `node1` to `node2` via a read of `f`.
* Thus, `node1` references an object with a field `f` whose value ends up in
* `node2`.
*/
cached
predicate readStep(Node node1, ContentSet c, Node node2) {
exists(
FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2, FieldContent fc
|
fc = c and
nodeHasOperand(node2, operand, indirectionIndex2) and
// The `1` here matches the `node2.getIndirectionIndex() = 1` conjunct
// in `storeStep`.
nodeHasOperand(node1, fa1.getObjectAddressOperand(), 1) and
numberOfLoadsFromOperand(fa1, operand, numberOfLoads, _) and
fc.getAField() = fa1.getField() and
getIndirectionIndexLate(fc) = indirectionIndex2 + numberOfLoads
)
or
// models-as-data summarized flow
FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c,
node2.(FlowSummaryNode).getSummaryNode())
}
/**
* Holds if values stored inside content `c` are cleared at node `n`.
*/
cached
predicate clearsContent(Node n, ContentSet c) {
n =
any(PostUpdateNode pun, Content d |
d.impliesClearOf(c) and storeStepImpl(_, d, pun, true)
|
pun
).getPreUpdateNode() and
(
not exists(Operand op, Cpp::Operation p |
n.(IndirectOperand).hasOperandAndIndirectionIndex(op, _) and
(
p instanceof Cpp::AssignPointerAddExpr or
p instanceof Cpp::AssignPointerSubExpr or
p instanceof Cpp::CrementOperation
)
|
p.getAnOperand() = op.getUse().getAst()
)
or
forex(PostUpdateNode pun, Content d |
pragma[only_bind_into](d).impliesClearOf(pragma[only_bind_into](c)) and
storeStepImpl(_, d, pun, true) and
pun.getPreUpdateNode() = n
|
c.(Content).getIndirectionIndex() = d.getIndirectionIndex()
)
)
}
}
import Cached
private import Nodes0
/**
* A module for calculating the number of stars (i.e., `*`s) needed for various
* dataflow node `toString` predicates.
*/
module NodeStars {
private int getNumberOfIndirections(Node n) {
result = n.(RawIndirectOperand).getIndirectionIndex()
or
@@ -828,85 +970,10 @@ private int getMinIndirectionForGlobalDef(Ssa::GlobalDef def) {
result = getMinIndirectionsForType(def.getUnspecifiedType())
}
/**
* Holds if data can flow from `node1` to `node2` in a way that loses the
* calling context. For example, this would happen with flow through a
* global or static variable.
*/
predicate jumpStep(Node n1, Node n2) {
exists(GlobalLikeVariable v |
exists(Ssa::GlobalUse globalUse |
v = globalUse.getVariable() and
n1.(FinalGlobalValue).getGlobalUse() = globalUse
|
globalUse.getIndirection() = getMinIndirectionForGlobalUse(globalUse) and
v = n2.asVariable()
or
v = n2.asIndirectVariable(globalUse.getIndirection())
)
or
exists(Ssa::GlobalDef globalDef |
v = globalDef.getVariable() and
n2.(InitialGlobalValue).getGlobalDef() = globalDef
|
globalDef.getIndirection() = getMinIndirectionForGlobalDef(globalDef) and
v = n1.asVariable()
or
v = n1.asIndirectVariable(globalDef.getIndirection())
)
)
or
// models-as-data summarized flow
FlowSummaryImpl::Private::Steps::summaryJumpStep(n1.(FlowSummaryNode).getSummaryNode(),
n2.(FlowSummaryNode).getSummaryNode())
}
bindingset[c]
pragma[inline_late]
private int getIndirectionIndexLate(Content c) { result = c.getIndirectionIndex() }
/**
* Holds if data can flow from `node1` to `node2` via an assignment to `f`.
* Thus, `node2` references an object with a field `f` that contains the
* value of `node1`.
*
* The boolean `certain` is true if the destination address does not involve
* any pointer arithmetic, and false otherwise. This has to do with whether a
* store step can be used to clear a field (see `clearsContent`).
*/
predicate storeStepImpl(Node node1, Content c, Node node2, boolean certain) {
exists(
PostFieldUpdateNode postFieldUpdate, int indirectionIndex1, int numberOfLoads,
StoreInstruction store, FieldContent fc
|
postFieldUpdate = node2 and
fc = c and
nodeHasInstruction(node1, pragma[only_bind_into](store),
pragma[only_bind_into](indirectionIndex1)) and
postFieldUpdate.getIndirectionIndex() = 1 and
numberOfLoadsFromOperand(postFieldUpdate.getFieldAddress(),
store.getDestinationAddressOperand(), numberOfLoads, certain) and
fc.getAField() = postFieldUpdate.getUpdatedField() and
getIndirectionIndexLate(fc) = 1 + indirectionIndex1 + numberOfLoads
)
or
// models-as-data summarized flow
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), c,
node2.(FlowSummaryNode).getSummaryNode()) and
certain = true
}
/**
* Holds if data can flow from `node1` to `node2` via an assignment to `f`.
* Thus, `node2` references an object with a field `f` that contains the
* value of `node1`.
*/
predicate storeStep(Node node1, ContentSet c, Node node2) { storeStepImpl(node1, c, node2, _) }
/**
* Holds if `operandFrom` flows to `operandTo` using a sequence of conversion-like
* operations and exactly `n` `LoadInstruction` operations.
*/
private predicate numberOfLoadsFromOperandRec(
Operand operandFrom, Operand operandTo, int ind, boolean certain
) {
@@ -957,63 +1024,6 @@ predicate nodeHasInstruction(Node node, Instruction instr, int indirectionIndex)
hasInstructionAndIndex(node, instr, indirectionIndex)
}
/**
* Holds if data can flow from `node1` to `node2` via a read of `f`.
* Thus, `node1` references an object with a field `f` whose value ends up in
* `node2`.
*/
predicate readStep(Node node1, ContentSet c, Node node2) {
exists(
FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2, FieldContent fc
|
fc = c and
nodeHasOperand(node2, operand, indirectionIndex2) and
// The `1` here matches the `node2.getIndirectionIndex() = 1` conjunct
// in `storeStep`.
nodeHasOperand(node1, fa1.getObjectAddressOperand(), 1) and
numberOfLoadsFromOperand(fa1, operand, numberOfLoads, _) and
fc.getAField() = fa1.getField() and
getIndirectionIndexLate(fc) = indirectionIndex2 + numberOfLoads
)
or
// models-as-data summarized flow
FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c,
node2.(FlowSummaryNode).getSummaryNode())
}
/**
* Holds if values stored inside content `c` are cleared at node `n`.
*/
predicate clearsContent(Node n, ContentSet c) {
n =
any(PostUpdateNode pun, Content d | d.impliesClearOf(c) and storeStepImpl(_, d, pun, true) | pun)
.getPreUpdateNode() and
(
// The crement operations and pointer addition and subtraction self-assign. We do not
// want to clear the contents if it is indirectly pointed at by any of these operations,
// as part of the contents might still be accessible afterwards. If there is no such
// indirection clearing the contents is safe.
not exists(Operand op, Cpp::Operation p |
n.(IndirectOperand).hasOperandAndIndirectionIndex(op, _) and
(
p instanceof Cpp::AssignPointerAddExpr or
p instanceof Cpp::AssignPointerSubExpr or
p instanceof Cpp::CrementOperation
)
|
p.getAnOperand() = op.getUse().getAst()
)
or
forex(PostUpdateNode pun, Content d |
pragma[only_bind_into](d).impliesClearOf(pragma[only_bind_into](c)) and
storeStepImpl(_, d, pun, true) and
pun.getPreUpdateNode() = n
|
c.(Content).getIndirectionIndex() = d.getIndirectionIndex()
)
)
}
/**
* Holds if the value that is being tracked is expected to be stored inside content `c`
* at node `n`.
@@ -1046,11 +1056,6 @@ class CastNode extends Node {
CastNode() { none() } // stub implementation
}
cached
private newtype TDataFlowCallable =
TSourceCallable(Cpp::Declaration decl) or
TSummarizedCallable(FlowSummaryImpl::Public::SummarizedCallable c)
/**
* A callable, which may be:
* - a function (that may contain code)
@@ -1134,15 +1139,6 @@ class DataFlowType extends TypeFinal {
string toString() { result = "" }
}
cached
private newtype TDataFlowCall =
TNormalCall(CallInstruction call) or
TSummaryCall(
FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver
) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
}
private predicate summarizedCallableIsManual(SummarizedCallable sc) {
sc.asSummarizedCallable().hasManualModel()
}
@@ -1523,12 +1519,6 @@ private predicate fieldHasApproxName(Field f, string s) {
private predicate unionHasApproxName(Cpp::Union u, string s) { s = u.getName().charAt(0) }
cached
private newtype TContentApprox =
TFieldApproxContent(string s) { fieldHasApproxName(_, s) } or
TUnionApproxContent(string s) { unionHasApproxName(_, s) } or
TElementApproxContent()
/** An approximated `Content`. */
class ContentApprox extends TContentApprox {
string toString() { none() } // overridden in subclasses

View File

@@ -55,26 +55,6 @@ private CppType getThisType(Cpp::MemberFunction f, boolean isGLValue) {
result.hasType(f.getTypeOfThis(), isGLValue)
}
/**
* Gets the C++ type of the instruction `i`.
*
* This is equivalent to `i.getResultLanguageType()` with the exception
* of instructions that directly references a `this` IRVariable. In this
* case, `i.getResultLanguageType()` gives an unknown type, whereas the
* predicate gives the expected type (i.e., a potentially cv-qualified
* type `A*` where `A` is the declaring type of the member function that
* contains `i`).
*/
cached
CppType getResultLanguageType(Instruction i) {
if i.(VariableAddressInstruction).getIRVariable() instanceof IRThisVariable
then
if i.isGLValue()
then result = getThisType(i.getEnclosingFunction(), true)
else result = getThisType(i.getEnclosingFunction(), false)
else result = i.getResultLanguageType()
}
/**
* Gets the C++ type of the operand `operand`.
* This is equivalent to the type of the operand's defining instruction.
@@ -572,6 +552,26 @@ private class BaseCallInstruction extends BaseSourceVariableInstruction, CallIns
cached
private module Cached {
/**
* Gets the C++ type of the instruction `i`.
*
* This is equivalent to `i.getResultLanguageType()` with the exception
* of instructions that directly references a `this` IRVariable. In this
* case, `i.getResultLanguageType()` gives an unknown type, whereas the
* predicate gives the expected type (i.e., a potentially cv-qualified
* type `A*` where `A` is the declaring type of the member function that
* contains `i`).
*/
cached
CppType getResultLanguageType(Instruction i) {
if i.(VariableAddressInstruction).getIRVariable() instanceof IRThisVariable
then
if i.isGLValue()
then result = getThisType(i.getEnclosingFunction(), true)
else result = getThisType(i.getEnclosingFunction(), false)
else result = i.getResultLanguageType()
}
/** Holds if `op` is the only use of its defining instruction, and that op is used in a conversation */
private predicate isConversion(Operand op) {
exists(Instruction def, Operand use |