Merge pull request #21390 from MathiasVP/less-reevaluation-4

C++: Reduce re-evaluation
This commit is contained in:
Mathias Vorreiter Pedersen
2026-03-03 15:09:55 +00:00
committed by GitHub
17 changed files with 2285 additions and 2233 deletions

View File

@@ -555,6 +555,7 @@ private Locatable getSupportedFunctionTemplateArgument(Function templateFunction
* Normalize the `n`'th parameter of `f` by replacing template names
* with `func:N` (where `N` is the index of the template).
*/
pragma[nomagic]
private string getTypeNameWithoutFunctionTemplates(Function f, int n, int remaining) {
exists(Function templateFunction |
templateFunction = getFullyTemplatedFunction(f) and

View File

@@ -201,7 +201,7 @@ module SourceSinkInterpretationInput implements
string toString() {
result = this.asElement().toString()
or
result = this.asNode().toString()
result = this.asNode().toStringImpl()
or
result = this.asCall().toString()
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,6 @@
private import cpp as Cpp
private import DataFlowUtil
private import DataFlowNodes
private import semmle.code.cpp.ir.IR
private import DataFlowDispatch
private import semmle.code.cpp.ir.internal.IRCppLanguage
@@ -16,28 +17,42 @@ private import semmle.code.cpp.dataflow.ExternalFlow as External
cached
private module Cached {
cached
module Nodes0 {
cached
newtype TIRDataFlowNode0 =
TInstructionNode0(Instruction i) {
not Ssa::ignoreInstruction(i) and
not exists(Operand op |
not Ssa::ignoreOperand(op) and i = Ssa::getIRRepresentationOfOperand(op)
) and
// We exclude `void`-typed instructions because they cannot contain data.
// However, if the instruction is a glvalue, and their type is `void`, then the result
// type of the instruction is really `void*`, and thus we still want to have a dataflow
// node for it.
(not i.getResultType() instanceof VoidType or i.isGLValue())
} or
TMultipleUseOperandNode0(Operand op) {
not Ssa::ignoreOperand(op) and not exists(Ssa::getIRRepresentationOfOperand(op))
} or
TSingleUseOperandNode0(Operand op) {
not Ssa::ignoreOperand(op) and exists(Ssa::getIRRepresentationOfOperand(op))
}
newtype TIRDataFlowNode0 =
TInstructionNode0(Instruction i) {
not Ssa::ignoreInstruction(i) and
not exists(Operand op |
not Ssa::ignoreOperand(op) and i = Ssa::getIRRepresentationOfOperand(op)
) and
// We exclude `void`-typed instructions because they cannot contain data.
// However, if the instruction is a glvalue, and their type is `void`, then the result
// type of the instruction is really `void*`, and thus we still want to have a dataflow
// node for it.
(not i.getResultType() instanceof VoidType or i.isGLValue())
} or
TMultipleUseOperandNode0(Operand op) {
not Ssa::ignoreOperand(op) and not exists(Ssa::getIRRepresentationOfOperand(op))
} or
TSingleUseOperandNode0(Operand op) {
not Ssa::ignoreOperand(op) and exists(Ssa::getIRRepresentationOfOperand(op))
}
cached
string toStringCached(Node n) {
result = toExprString(n)
or
not exists(toExprString(n)) and
result = n.toStringImpl()
}
cached
Location getLocationCached(Node n) { result = n.getLocationImpl() }
cached
newtype TContentApprox =
TFieldApproxContent(string s) { fieldHasApproxName(_, s) } or
TUnionApproxContent(string s) { unionHasApproxName(_, s) } or
TElementApproxContent()
/**
* Gets an additional term that is added to the `join` and `branch` computations to reflect
* an additional forward or backwards branching factor that is not taken into account
@@ -59,38 +74,174 @@ private module Cached {
result = countNumberOfBranchesUsingParameter(switch, p)
)
}
}
import Cached
private import Nodes0
cached
newtype TDataFlowCallable =
TSourceCallable(Cpp::Declaration decl) or
TSummarizedCallable(FlowSummaryImpl::Public::SummarizedCallable c)
/**
* A module for calculating the number of stars (i.e., `*`s) needed for various
* dataflow node `toString` predicates.
*/
module NodeStars {
private int getNumberOfIndirections(Node n) {
result = n.(RawIndirectOperand).getIndirectionIndex()
cached
newtype TDataFlowCall =
TNormalCall(CallInstruction call) or
TSummaryCall(
FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver
) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
}
/**
* Holds if data can flow from `node1` to `node2` in a way that loses the
* calling context. For example, this would happen with flow through a
* global or static variable.
*/
cached
predicate jumpStep(Node n1, Node n2) {
exists(GlobalLikeVariable v |
exists(Ssa::GlobalUse globalUse |
v = globalUse.getVariable() and
n1.(FinalGlobalValue).getGlobalUse() = globalUse
|
globalUse.getIndirection() = getMinIndirectionForGlobalUse(globalUse) and
v = n2.asVariable()
or
v = n2.asIndirectVariable(globalUse.getIndirection())
)
or
exists(Ssa::GlobalDef globalDef |
v = globalDef.getVariable() and
n2.(InitialGlobalValue).getGlobalDef() = globalDef
|
globalDef.getIndirection() = getMinIndirectionForGlobalDef(globalDef) and
v = n1.asVariable()
or
v = n1.asIndirectVariable(globalDef.getIndirection())
)
)
or
result = n.(RawIndirectInstruction).getIndirectionIndex()
or
result = n.(VariableNode).getIndirectionIndex()
or
result = n.(PostUpdateNodeImpl).getIndirectionIndex()
or
result = n.(FinalParameterNode).getIndirectionIndex()
or
result = n.(BodyLessParameterNodeImpl).getIndirectionIndex()
// models-as-data summarized flow
FlowSummaryImpl::Private::Steps::summaryJumpStep(n1.(FlowSummaryNode).getSummaryNode(),
n2.(FlowSummaryNode).getSummaryNode())
}
/**
* Gets the number of stars (i.e., `*`s) needed to produce the `toString`
* output for `n`.
* Holds if data can flow from `node1` to `node2` via an assignment to `f`.
* Thus, `node2` references an object with a field `f` that contains the
* value of `node1`.
*
* The boolean `certain` is true if the destination address does not involve
* any pointer arithmetic, and false otherwise.
*/
string stars(Node n) { result = repeatStars(getNumberOfIndirections(n)) }
cached
predicate storeStepImpl(Node node1, Content c, Node node2, boolean certain) {
exists(
PostFieldUpdateNode postFieldUpdate, int indirectionIndex1, int numberOfLoads,
StoreInstruction store, FieldContent fc
|
postFieldUpdate = node2 and
fc = c and
nodeHasInstruction(node1, pragma[only_bind_into](store),
pragma[only_bind_into](indirectionIndex1)) and
postFieldUpdate.getIndirectionIndex() = 1 and
numberOfLoadsFromOperand(postFieldUpdate.getFieldAddress(),
store.getDestinationAddressOperand(), numberOfLoads, certain) and
fc.getAField() = postFieldUpdate.getUpdatedField() and
getIndirectionIndexLate(fc) = 1 + indirectionIndex1 + numberOfLoads
)
or
// models-as-data summarized flow
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), c,
node2.(FlowSummaryNode).getSummaryNode()) and
certain = true
}
/**
* Holds if data can flow from `node1` to `node2` via an assignment to `f`.
* Thus, `node2` references an object with a field `f` that contains the
* value of `node1`.
*/
cached
predicate storeStep(Node node1, ContentSet c, Node node2) { storeStepImpl(node1, c, node2, _) }
/**
* Holds if data can flow from `node1` to `node2` via a read of `f`.
* Thus, `node1` references an object with a field `f` whose value ends up in
* `node2`.
*/
cached
predicate readStep(Node node1, ContentSet c, Node node2) {
exists(
FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2, FieldContent fc
|
fc = c and
nodeHasOperand(node2, operand, indirectionIndex2) and
// The `1` here matches the `node2.getIndirectionIndex() = 1` conjunct
// in `storeStep`.
nodeHasOperand(node1, fa1.getObjectAddressOperand(), 1) and
numberOfLoadsFromOperand(fa1, operand, numberOfLoads, _) and
fc.getAField() = fa1.getField() and
getIndirectionIndexLate(fc) = indirectionIndex2 + numberOfLoads
)
or
// models-as-data summarized flow
FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c,
node2.(FlowSummaryNode).getSummaryNode())
}
/**
* Holds if values stored inside content `c` are cleared at node `n`.
*/
cached
predicate clearsContent(Node n, ContentSet c) {
n =
any(PostUpdateNode pun, Content d |
d.impliesClearOf(c) and storeStepImpl(_, d, pun, true)
|
pun
).getPreUpdateNode() and
(
not exists(Operand op, Cpp::Operation p |
n.(IndirectOperand).hasOperandAndIndirectionIndex(op, _) and
(
p instanceof Cpp::AssignPointerAddExpr or
p instanceof Cpp::AssignPointerSubExpr or
p instanceof Cpp::CrementOperation
)
|
p.getAnOperand() = op.getUse().getAst()
)
or
forex(PostUpdateNode pun, Content d |
pragma[only_bind_into](d).impliesClearOf(pragma[only_bind_into](c)) and
storeStepImpl(_, d, pun, true) and
pun.getPreUpdateNode() = n
|
c.(Content).getIndirectionIndex() = d.getIndirectionIndex()
)
)
}
}
import NodeStars
import Cached
private int getNumberOfIndirections(Node n) {
result = n.(RawIndirectOperand).getIndirectionIndex()
or
result = n.(RawIndirectInstruction).getIndirectionIndex()
or
result = n.(VariableNode).getIndirectionIndex()
or
result = n.(PostUpdateNodeImpl).getIndirectionIndex()
or
result = n.(FinalParameterNode).getIndirectionIndex()
or
result = n.(BodyLessParameterNodeImpl).getIndirectionIndex()
}
/**
* Gets the number of stars (i.e., `*`s) needed to produce the `toString`
* output for `n`.
*/
string stars(Node n) { result = repeatStars(getNumberOfIndirections(n)) }
/**
* A cut-down `DataFlow::Node` class that does not depend on the output of SSA.
@@ -828,85 +979,10 @@ private int getMinIndirectionForGlobalDef(Ssa::GlobalDef def) {
result = getMinIndirectionsForType(def.getUnspecifiedType())
}
/**
* Holds if data can flow from `node1` to `node2` in a way that loses the
* calling context. For example, this would happen with flow through a
* global or static variable.
*/
predicate jumpStep(Node n1, Node n2) {
exists(GlobalLikeVariable v |
exists(Ssa::GlobalUse globalUse |
v = globalUse.getVariable() and
n1.(FinalGlobalValue).getGlobalUse() = globalUse
|
globalUse.getIndirection() = getMinIndirectionForGlobalUse(globalUse) and
v = n2.asVariable()
or
v = n2.asIndirectVariable(globalUse.getIndirection())
)
or
exists(Ssa::GlobalDef globalDef |
v = globalDef.getVariable() and
n2.(InitialGlobalValue).getGlobalDef() = globalDef
|
globalDef.getIndirection() = getMinIndirectionForGlobalDef(globalDef) and
v = n1.asVariable()
or
v = n1.asIndirectVariable(globalDef.getIndirection())
)
)
or
// models-as-data summarized flow
FlowSummaryImpl::Private::Steps::summaryJumpStep(n1.(FlowSummaryNode).getSummaryNode(),
n2.(FlowSummaryNode).getSummaryNode())
}
bindingset[c]
pragma[inline_late]
private int getIndirectionIndexLate(Content c) { result = c.getIndirectionIndex() }
/**
* Holds if data can flow from `node1` to `node2` via an assignment to `f`.
* Thus, `node2` references an object with a field `f` that contains the
* value of `node1`.
*
* The boolean `certain` is true if the destination address does not involve
* any pointer arithmetic, and false otherwise. This has to do with whether a
* store step can be used to clear a field (see `clearsContent`).
*/
predicate storeStepImpl(Node node1, Content c, Node node2, boolean certain) {
exists(
PostFieldUpdateNode postFieldUpdate, int indirectionIndex1, int numberOfLoads,
StoreInstruction store, FieldContent fc
|
postFieldUpdate = node2 and
fc = c and
nodeHasInstruction(node1, pragma[only_bind_into](store),
pragma[only_bind_into](indirectionIndex1)) and
postFieldUpdate.getIndirectionIndex() = 1 and
numberOfLoadsFromOperand(postFieldUpdate.getFieldAddress(),
store.getDestinationAddressOperand(), numberOfLoads, certain) and
fc.getAField() = postFieldUpdate.getUpdatedField() and
getIndirectionIndexLate(fc) = 1 + indirectionIndex1 + numberOfLoads
)
or
// models-as-data summarized flow
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), c,
node2.(FlowSummaryNode).getSummaryNode()) and
certain = true
}
/**
* Holds if data can flow from `node1` to `node2` via an assignment to `f`.
* Thus, `node2` references an object with a field `f` that contains the
* value of `node1`.
*/
predicate storeStep(Node node1, ContentSet c, Node node2) { storeStepImpl(node1, c, node2, _) }
/**
* Holds if `operandFrom` flows to `operandTo` using a sequence of conversion-like
* operations and exactly `n` `LoadInstruction` operations.
*/
private predicate numberOfLoadsFromOperandRec(
Operand operandFrom, Operand operandTo, int ind, boolean certain
) {
@@ -957,63 +1033,6 @@ predicate nodeHasInstruction(Node node, Instruction instr, int indirectionIndex)
hasInstructionAndIndex(node, instr, indirectionIndex)
}
/**
* Holds if data can flow from `node1` to `node2` via a read of `f`.
* Thus, `node1` references an object with a field `f` whose value ends up in
* `node2`.
*/
predicate readStep(Node node1, ContentSet c, Node node2) {
exists(
FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2, FieldContent fc
|
fc = c and
nodeHasOperand(node2, operand, indirectionIndex2) and
// The `1` here matches the `node2.getIndirectionIndex() = 1` conjunct
// in `storeStep`.
nodeHasOperand(node1, fa1.getObjectAddressOperand(), 1) and
numberOfLoadsFromOperand(fa1, operand, numberOfLoads, _) and
fc.getAField() = fa1.getField() and
getIndirectionIndexLate(fc) = indirectionIndex2 + numberOfLoads
)
or
// models-as-data summarized flow
FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c,
node2.(FlowSummaryNode).getSummaryNode())
}
/**
* Holds if values stored inside content `c` are cleared at node `n`.
*/
predicate clearsContent(Node n, ContentSet c) {
n =
any(PostUpdateNode pun, Content d | d.impliesClearOf(c) and storeStepImpl(_, d, pun, true) | pun)
.getPreUpdateNode() and
(
// The crement operations and pointer addition and subtraction self-assign. We do not
// want to clear the contents if it is indirectly pointed at by any of these operations,
// as part of the contents might still be accessible afterwards. If there is no such
// indirection clearing the contents is safe.
not exists(Operand op, Cpp::Operation p |
n.(IndirectOperand).hasOperandAndIndirectionIndex(op, _) and
(
p instanceof Cpp::AssignPointerAddExpr or
p instanceof Cpp::AssignPointerSubExpr or
p instanceof Cpp::CrementOperation
)
|
p.getAnOperand() = op.getUse().getAst()
)
or
forex(PostUpdateNode pun, Content d |
pragma[only_bind_into](d).impliesClearOf(pragma[only_bind_into](c)) and
storeStepImpl(_, d, pun, true) and
pun.getPreUpdateNode() = n
|
c.(Content).getIndirectionIndex() = d.getIndirectionIndex()
)
)
}
/**
* Holds if the value that is being tracked is expected to be stored inside content `c`
* at node `n`.
@@ -1046,11 +1065,6 @@ class CastNode extends Node {
CastNode() { none() } // stub implementation
}
cached
private newtype TDataFlowCallable =
TSourceCallable(Cpp::Declaration decl) or
TSummarizedCallable(FlowSummaryImpl::Public::SummarizedCallable c)
/**
* A callable, which may be:
* - a function (that may contain code)
@@ -1134,15 +1148,6 @@ class DataFlowType extends TypeFinal {
string toString() { result = "" }
}
cached
private newtype TDataFlowCall =
TNormalCall(CallInstruction call) or
TSummaryCall(
FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver
) {
FlowSummaryImpl::Private::summaryCallbackRange(c, receiver)
}
private predicate summarizedCallableIsManual(SummarizedCallable sc) {
sc.asSummarizedCallable().hasManualModel()
}
@@ -1523,12 +1528,6 @@ private predicate fieldHasApproxName(Field f, string s) {
private predicate unionHasApproxName(Cpp::Union u, string s) { s = u.getName().charAt(0) }
cached
private newtype TContentApprox =
TFieldApproxContent(string s) { fieldHasApproxName(_, s) } or
TUnionApproxContent(string s) { unionHasApproxName(_, s) } or
TElementApproxContent()
/** An approximated `Content`. */
class ContentApprox extends TContentApprox {
string toString() { none() } // overridden in subclasses

View File

@@ -6,8 +6,8 @@ private import cpp
private import semmle.code.cpp.ir.IR
private import DataFlowUtil
private import DataFlowPrivate
private import semmle.code.cpp.ir.implementation.raw.internal.TranslatedExpr
private import semmle.code.cpp.ir.implementation.raw.internal.InstructionTag
private import DataFlowNodes
private import semmle.code.cpp.ir.implementation.raw.internal.IRConstruction as IRConstruction
cached
private module Cached {
@@ -73,17 +73,9 @@ private module Cached {
// a result for `getConvertedResultExpression`. We remap this here so that
// this `ConvertInstruction` maps to the result of the expression that
// represents the extent.
exists(TranslatedNonConstantAllocationSize tas |
result = tas.getExtent().getExpr() and
instr = tas.getInstruction(AllocationExtentConvertTag())
)
result = IRConstruction::Raw::getAllocationExtentConvertExpr(instr)
or
// There's no instruction that returns `ParenthesisExpr`, but some queries
// expect this
exists(TranslatedTransparentConversion ttc |
result = ttc.getExpr().(ParenthesisExpr) and
instr = ttc.getResult()
)
result = IRConstruction::Raw::getTransparentConversionParenthesisExpr(instr)
or
// Certain expressions generate `CopyValueInstruction`s only when they
// are needed. Examples of this include crement operations and compound
@@ -112,10 +104,10 @@ private module Cached {
// needed, and in that case the only value that will propagate forward in
// the program is the value that's been updated. So in those cases we just
// use the result of `node.asDefinition()` as the result of `node.asExpr()`.
exists(TranslatedCoreExpr tco |
tco.getInstruction(_) = instr and
tco.producesExprResult() and
result = asDefinitionImpl0(instr)
exists(StoreInstruction store |
store = instr and
IRConstruction::Raw::instructionProducesExprResult(store) and
result = asDefinitionImpl0(store)
)
or
// IR construction breaks an array aggregate literal `{1, 2, 3}` into a
@@ -145,18 +137,9 @@ private module Cached {
// For an expression such as `i += 2` we pretend that the generated
// `StoreInstruction` contains the result of the expression even though
// this isn't totally aligned with the C/C++ standard.
exists(TranslatedAssignOperation tao |
store = tao.getInstruction(AssignmentStoreTag()) and
result = tao.getExpr()
)
result = IRConstruction::Raw::getAssignOperationStoreExpr(store)
or
// Similarly for `i++` and `++i` we pretend that the generated
// `StoreInstruction` contains the result of the expression even though
// this isn't totally aligned with the C/C++ standard.
exists(TranslatedCrementOperation tco |
store = tco.getInstruction(CrementStoreTag()) and
result = tco.getExpr()
)
result = IRConstruction::Raw::getCrementOperationStoreExpr(store)
}
/**
@@ -166,11 +149,7 @@ private module Cached {
*/
private predicate excludeAsDefinitionResult(StoreInstruction store) {
// Exclude the store to the temporary generated by a ternary expression.
exists(TranslatedConditionalExpr tce |
store = tce.getInstruction(ConditionValueFalseStoreTag())
or
store = tce.getInstruction(ConditionValueTrueStoreTag())
)
IRConstruction::Raw::isConditionalExprTempStore(store)
}
/**

View File

@@ -6,6 +6,7 @@
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.models.interfaces.FunctionInputsAndOutputs
private import DataFlowUtil
private import DataFlowNodes
private import DataFlowPrivate
private import SsaImpl as Ssa

View File

@@ -6,6 +6,7 @@ private import cpp
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate
private import semmle.code.cpp.ir.dataflow.internal.DataFlowNodes
private import PrintIRUtilities
/** A property provider for local IR dataflow store steps. */

View File

@@ -2,6 +2,7 @@ private import cpp
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate
private import semmle.code.cpp.ir.dataflow.internal.DataFlowNodes
private import SsaImpl as Ssa
private import PrintIRUtilities

View File

@@ -6,6 +6,7 @@ private import cpp
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate
private import semmle.code.cpp.ir.dataflow.internal.DataFlowNodes
private Instruction getInstruction(Node n, string stars) {
result = [n.asInstruction(), n.(RawIndirectInstruction).getInstruction()] and

View File

@@ -10,8 +10,9 @@ private import semmle.code.cpp.models.interfaces.PartialFlow as PartialFlow
private import semmle.code.cpp.models.interfaces.FunctionInputsAndOutputs as FIO
private import semmle.code.cpp.ir.internal.IRCppLanguage
private import semmle.code.cpp.ir.dataflow.internal.ModelUtil
private import semmle.code.cpp.ir.implementation.raw.internal.TranslatedInitialization
private import semmle.code.cpp.ir.implementation.raw.internal.IRConstruction as IRConstruction
private import DataFlowPrivate
private import DataFlowNodes
import SsaImplCommon
private module SourceVariables {
@@ -438,10 +439,7 @@ private predicate sourceVariableHasBaseAndIndex(SourceVariable v, BaseSourceVari
* initialize `v`.
*/
private Instruction getInitializationTargetAddress(IRVariable v) {
exists(TranslatedVariableInitialization init |
init.getIRVariable() = v and
result = init.getTargetAddress()
)
result = IRConstruction::Raw::getInitializationTargetAddress(v)
}
/** An initial definition of an SSA variable address. */

View File

@@ -4,47 +4,12 @@ import semmle.code.cpp.ir.internal.IRCppLanguage
private import semmle.code.cpp.ir.implementation.raw.internal.SideEffects as SideEffects
private import DataFlowImplCommon as DataFlowImplCommon
private import DataFlowUtil
private import DataFlowNodes
private import semmle.code.cpp.models.interfaces.PointerWrapper
private import DataFlowPrivate
private import TypeFlow
private import semmle.code.cpp.ir.ValueNumbering
/**
* Holds if `operand` is an operand that is not used by the dataflow library.
* Ignored operands are not recognized as uses by SSA, and they don't have a
* corresponding `(Indirect)OperandNode`.
*/
predicate ignoreOperand(Operand operand) {
operand = any(Instruction instr | ignoreInstruction(instr)).getAnOperand() or
operand = any(Instruction instr | ignoreInstruction(instr)).getAUse() or
operand instanceof MemoryOperand
}
/**
* Holds if `instr` is an instruction that is not used by the dataflow library.
* Ignored instructions are not recognized as reads/writes by SSA, and they
* don't have a corresponding `(Indirect)InstructionNode`.
*/
predicate ignoreInstruction(Instruction instr) {
DataFlowImplCommon::forceCachingInSameStage() and
(
instr instanceof CallSideEffectInstruction or
instr instanceof CallReadSideEffectInstruction or
instr instanceof ExitFunctionInstruction or
instr instanceof EnterFunctionInstruction or
instr instanceof WriteSideEffectInstruction or
instr instanceof PhiInstruction or
instr instanceof ReadSideEffectInstruction or
instr instanceof ChiInstruction or
instr instanceof InitializeIndirectionInstruction or
instr instanceof AliasedDefinitionInstruction or
instr instanceof AliasedUseInstruction or
instr instanceof InitializeNonLocalInstruction or
instr instanceof ReturnIndirectionInstruction or
instr instanceof UninitializedGroupInstruction
)
}
/**
* Gets the C++ type of `this` in the member function `f`.
* The result is a glvalue if `isGLValue` is true, and
@@ -55,26 +20,6 @@ private CppType getThisType(Cpp::MemberFunction f, boolean isGLValue) {
result.hasType(f.getTypeOfThis(), isGLValue)
}
/**
* Gets the C++ type of the instruction `i`.
*
* This is equivalent to `i.getResultLanguageType()` with the exception
* of instructions that directly references a `this` IRVariable. In this
* case, `i.getResultLanguageType()` gives an unknown type, whereas the
* predicate gives the expected type (i.e., a potentially cv-qualified
* type `A*` where `A` is the declaring type of the member function that
* contains `i`).
*/
cached
CppType getResultLanguageType(Instruction i) {
if i.(VariableAddressInstruction).getIRVariable() instanceof IRThisVariable
then
if i.isGLValue()
then result = getThisType(i.getEnclosingFunction(), true)
else result = getThisType(i.getEnclosingFunction(), false)
else result = i.getResultLanguageType()
}
/**
* Gets the C++ type of the operand `operand`.
* This is equivalent to the type of the operand's defining instruction.
@@ -347,10 +292,6 @@ predicate isWrite(Node0Impl value, Operand address, boolean certain) {
)
}
predicate isAdditionalConversionFlow(Operand opFrom, Instruction instrTo) {
any(Indirection ind).isAdditionalConversionFlow(opFrom, instrTo)
}
newtype TBaseSourceVariable =
// Each IR variable gets its own source variable
TBaseIRVariable(IRVariable var) or
@@ -572,6 +513,69 @@ private class BaseCallInstruction extends BaseSourceVariableInstruction, CallIns
cached
private module Cached {
/**
* Holds if `operand` is an operand that is not used by the dataflow library.
* Ignored operands are not recognized as uses by SSA, and they don't have a
* corresponding `(Indirect)OperandNode`.
*/
cached
predicate ignoreOperand(Operand operand) {
operand = any(Instruction instr | ignoreInstruction(instr)).getAnOperand() or
operand = any(Instruction instr | ignoreInstruction(instr)).getAUse() or
operand instanceof MemoryOperand
}
/**
* Holds if `instr` is an instruction that is not used by the dataflow library.
* Ignored instructions are not recognized as reads/writes by SSA, and they
* don't have a corresponding `(Indirect)InstructionNode`.
*/
cached
predicate ignoreInstruction(Instruction instr) {
DataFlowImplCommon::forceCachingInSameStage() and
(
instr instanceof CallSideEffectInstruction or
instr instanceof CallReadSideEffectInstruction or
instr instanceof ExitFunctionInstruction or
instr instanceof EnterFunctionInstruction or
instr instanceof WriteSideEffectInstruction or
instr instanceof PhiInstruction or
instr instanceof ReadSideEffectInstruction or
instr instanceof ChiInstruction or
instr instanceof InitializeIndirectionInstruction or
instr instanceof AliasedDefinitionInstruction or
instr instanceof AliasedUseInstruction or
instr instanceof InitializeNonLocalInstruction or
instr instanceof ReturnIndirectionInstruction or
instr instanceof UninitializedGroupInstruction
)
}
cached
predicate isAdditionalConversionFlow(Operand opFrom, Instruction instrTo) {
any(Indirection ind).isAdditionalConversionFlow(opFrom, instrTo)
}
/**
* Gets the C++ type of the instruction `i`.
*
* This is equivalent to `i.getResultLanguageType()` with the exception
* of instructions that directly references a `this` IRVariable. In this
* case, `i.getResultLanguageType()` gives an unknown type, whereas the
* predicate gives the expected type (i.e., a potentially cv-qualified
* type `A*` where `A` is the declaring type of the member function that
* contains `i`).
*/
cached
CppType getResultLanguageType(Instruction i) {
if i.(VariableAddressInstruction).getIRVariable() instanceof IRThisVariable
then
if i.isGLValue()
then result = getThisType(i.getEnclosingFunction(), true)
else result = getThisType(i.getEnclosingFunction(), false)
else result = i.getResultLanguageType()
}
/** Holds if `op` is the only use of its defining instruction, and that op is used in a conversation */
private predicate isConversion(Operand op) {
exists(Instruction def, Operand use |

View File

@@ -5,64 +5,81 @@ private import semmle.code.cpp.models.interfaces.DataFlow
private import semmle.code.cpp.models.interfaces.SideEffect
private import DataFlowUtil
private import DataFlowPrivate
private import DataFlowNodes
private import SsaImpl as Ssa
private import semmle.code.cpp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import semmle.code.cpp.ir.dataflow.FlowSteps
/**
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step. This relation is only used for local taint flow
* (for example `TaintTracking::localTaint(source, sink)`) so it may contain
* special cases that should only apply to local taint flow.
*/
predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// dataflow step
DataFlow::localFlowStep(nodeFrom, nodeTo)
or
// taint flow step
localAdditionalTaintStep(nodeFrom, nodeTo, _)
or
// models-as-data summarized flow for local data flow (i.e. special case for flow
// through calls to modeled functions, without relying on global dataflow to join
// the dots).
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(nodeFrom, nodeTo, _)
cached
private module Cached {
private import DataFlowImplCommon as DataFlowImplCommon
/**
* This predicate exists to collapse the `cached` predicates in this module with the
* `cached` predicates in other C/C++ dataflow files, which is then collapsed
* with the `cached` predicates in `DataFlowImplCommon.qll`.
*/
cached
predicate forceCachingInSameStage() { DataFlowImplCommon::forceCachingInSameStage() }
/**
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step. This relation is only used for local taint flow
* (for example `TaintTracking::localTaint(source, sink)`) so it may contain
* special cases that should only apply to local taint flow.
*/
cached
predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// dataflow step
DataFlow::localFlowStep(nodeFrom, nodeTo)
or
// taint flow step
localAdditionalTaintStep(nodeFrom, nodeTo, _)
or
// models-as-data summarized flow for local data flow (i.e. special case for flow
// through calls to modeled functions, without relying on global dataflow to join
// the dots).
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(nodeFrom, nodeTo, _)
}
/**
* Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
* local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
* different objects.
*/
cached
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, string model) {
operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction()) and
model = ""
or
modeledTaintStep(nodeFrom, nodeTo, model)
or
// Flow from (the indirection of) an operand of a pointer arithmetic instruction to the
// indirection of the pointer arithmetic instruction. This provides flow from `source`
// in `x[source]` to the result of the associated load instruction.
exists(PointerArithmeticInstruction pai, int indirectionIndex |
nodeHasOperand(nodeFrom, pai.getAnOperand(), pragma[only_bind_into](indirectionIndex)) and
hasInstructionAndIndex(nodeTo, pai, indirectionIndex + 1)
) and
model = ""
or
any(Ssa::Indirection ind).isAdditionalTaintStep(nodeFrom, nodeTo) and
model = ""
or
// models-as-data summarized flow
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(),
nodeTo.(FlowSummaryNode).getSummaryNode(), false, model)
or
// object->field conflation for content that is a `TaintInheritingContent`.
exists(DataFlow::ContentSet f |
readStep(nodeFrom, f, nodeTo) and
f.getAReadContent() instanceof TaintInheritingContent
) and
model = ""
}
}
/**
* Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding
* local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
* different objects.
*/
cached
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, string model) {
operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction()) and
model = ""
or
modeledTaintStep(nodeFrom, nodeTo, model)
or
// Flow from (the indirection of) an operand of a pointer arithmetic instruction to the
// indirection of the pointer arithmetic instruction. This provides flow from `source`
// in `x[source]` to the result of the associated load instruction.
exists(PointerArithmeticInstruction pai, int indirectionIndex |
nodeHasOperand(nodeFrom, pai.getAnOperand(), pragma[only_bind_into](indirectionIndex)) and
hasInstructionAndIndex(nodeTo, pai, indirectionIndex + 1)
) and
model = ""
or
any(Ssa::Indirection ind).isAdditionalTaintStep(nodeFrom, nodeTo) and
model = ""
or
// models-as-data summarized flow
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(),
nodeTo.(FlowSummaryNode).getSummaryNode(), false, model)
or
// object->field conflation for content that is a `TaintInheritingContent`.
exists(DataFlow::ContentSet f |
readStep(nodeFrom, f, nodeTo) and
f.getAReadContent() instanceof TaintInheritingContent
) and
model = ""
}
import Cached
/**
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
@@ -196,7 +213,7 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut, string
// Taint flow from a pointer argument to an output, when the model specifies flow from the deref
// to that output, but the deref is not modeled in the IR for the caller.
exists(
CallInstruction call, DataFlow::SideEffectOperandNode indirectArgument, Function func,
CallInstruction call, SideEffectOperandNode indirectArgument, Function func,
FunctionInput modelIn, FunctionOutput modelOut
|
indirectArgument = callInput(call, modelIn) and

View File

@@ -15,6 +15,7 @@ private import TranslatedCall
private import TranslatedStmt
private import TranslatedFunction
private import TranslatedGlobalVar
private import TranslatedInitialization
TranslatedElement getInstructionTranslatedElement(Instruction instruction) {
instruction = TRawInstruction(result, _)
@@ -194,6 +195,89 @@ module Raw {
Expr getInstructionUnconvertedResultExpression(Instruction instruction) {
result = getInstructionConvertedResultExpression(instruction).getUnconverted()
}
/**
* Gets the expression associated with the instruction `instr` that computes
* the `Convert` instruction on the extent expression of an allocation.
*/
cached
Expr getAllocationExtentConvertExpr(Instruction instr) {
exists(TranslatedNonConstantAllocationSize tas |
instr = tas.getInstruction(AllocationExtentConvertTag()) and
result = tas.getExtent().getExpr()
)
}
/**
* Gets the `ParenthesisExpr` associated with a transparent conversion
* instruction, if any.
*/
cached
ParenthesisExpr getTransparentConversionParenthesisExpr(Instruction instr) {
exists(TranslatedTransparentConversion ttc |
result = ttc.getExpr() and
instr = ttc.getResult()
)
}
/**
* Holds if `instr` belongs to a `TranslatedCoreExpr` that produces an
* expression result. This indicates that the instruction represents a
* definition whose result should be mapped back to the expression.
*/
cached
predicate instructionProducesExprResult(Instruction instr) {
exists(TranslatedCoreExpr tco |
tco.getInstruction(_) = instr and
tco.producesExprResult()
)
}
/**
* Gets the expression associated with a `StoreInstruction` generated
* by an `TranslatedAssignOperation`.
*/
cached
Expr getAssignOperationStoreExpr(StoreInstruction store) {
exists(TranslatedAssignOperation tao |
store = tao.getInstruction(AssignmentStoreTag()) and
result = tao.getExpr()
)
}
/**
* Gets the expression associated with a `StoreInstruction` generated
* by an `TranslatedCrementOperation`.
*/
cached
Expr getCrementOperationStoreExpr(StoreInstruction store) {
exists(TranslatedCrementOperation tco |
store = tco.getInstruction(CrementStoreTag()) and
result = tco.getExpr()
)
}
/**
* Holds if `store` is a `StoreInstruction` that defines the temporary
* `IRVariable` generated as part of the translation of a ternary expression.
*/
cached
predicate isConditionalExprTempStore(StoreInstruction store) {
exists(TranslatedConditionalExpr tce |
store = tce.getInstruction(ConditionValueFalseStoreTag())
or
store = tce.getInstruction(ConditionValueTrueStoreTag())
)
}
/** Gets the instruction that computes the address used to initialize `v`. */
cached
Instruction getInitializationTargetAddress(IRVariable v) {
exists(TranslatedVariableInitialization init |
init.getIRVariable() = v and
result = init.getTargetAddress()
)
}
}
class TStageInstruction = TRawInstruction or TRawUnreachedInstruction;

View File

@@ -15,6 +15,7 @@
import cpp
import semmle.code.cpp.rangeanalysis.RangeAnalysisUtils
import semmle.code.cpp.ir.dataflow.DataFlow
private import semmle.code.cpp.ir.dataflow.internal.DataFlowNodes
/** Gets a loop that contains `e`. */
Loop getAnEnclosingLoopOfExpr(Expr e) { result = getAnEnclosingLoopOfStmt(e.getEnclosingStmt()) }
@@ -45,9 +46,9 @@ private Expr getExpr(DataFlow::Node node) {
or
result = node.asOperand().getUse().getAst()
or
result = node.(DataFlow::RawIndirectInstruction).getInstruction().getAst()
result = node.(RawIndirectInstruction).getInstruction().getAst()
or
result = node.(DataFlow::RawIndirectOperand).getOperand().getUse().getAst()
result = node.(RawIndirectOperand).getOperand().getUse().getAst()
}
/**
@@ -208,7 +209,7 @@ class LoopWithAlloca extends Stmt {
this.conditionRequiresInequality(va, _, _) and
DataFlow::localFlow(result, DataFlow::exprNode(va)) and
// Phi nodes will be preceded by nodes that represent actual definitions
not result instanceof DataFlow::SsaSynthNode and
not result instanceof SsaSynthNode and
// A source is outside the loop if it's not inside the loop
not exists(Expr e | e = getExpr(result) | this = getAnEnclosingLoopOfExpr(e))
)

View File

@@ -8,6 +8,7 @@ private import semmle.code.cpp.dataflow.ExternalFlow as ExternalFlow
private import semmle.code.cpp.ir.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
private import semmle.code.cpp.ir.dataflow.internal.DataFlowImplSpecific
private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.code.cpp.ir.dataflow.internal.DataFlowNodes as DataFlowNodes
private import semmle.code.cpp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import semmle.code.cpp.ir.dataflow.internal.TaintTrackingImplSpecific
private import semmle.code.cpp.dataflow.new.TaintTracking as Tt
@@ -403,7 +404,7 @@ private module SinkModelGeneratorInput implements SinkModelGeneratorInputSig {
}
predicate apiSource(DataFlow::Node source) {
DataFlowPrivate::nodeHasOperand(source, any(DataFlow::FieldAddress fa), 1)
DataFlowPrivate::nodeHasOperand(source, any(DataFlowNodes::FieldAddress fa), 1)
or
source instanceof DataFlow::ParameterNode
}
@@ -416,7 +417,7 @@ private module SinkModelGeneratorInput implements SinkModelGeneratorInputSig {
result = "Argument[" + DataFlow::repeatStars(indirectionIndex) + argumentIndex + "]"
)
or
DataFlowPrivate::nodeHasOperand(source, any(DataFlow::FieldAddress fa), 1) and
DataFlowPrivate::nodeHasOperand(source, any(DataFlowNodes::FieldAddress fa), 1) and
result = qualifierString()
}

View File

@@ -1,6 +1,7 @@
import testModels
private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate
private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil
private import semmle.code.cpp.ir.dataflow.internal.DataFlowNodes
string describe(DataFlow::Node n) {
n instanceof ParameterNode and result = "ParameterNode"