C++: Instantiate the type tracking module inside a reusable module like it's done in Java.

This commit is contained in:
Mathias Vorreiter Pedersen
2025-08-19 12:48:07 +02:00
parent caf7464f3b
commit d4188d59a8

View File

@@ -1,8 +1,11 @@
private import cpp
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.ir.dataflow.DataFlow
private import DataFlowPrivate as DataFlowPrivate
private import DataFlowUtil
private import DataFlowImplCommon as DataFlowImplCommon
private import codeql.typetracking.TypeTracking
private import SsaImpl as SsaImpl
/**
* Holds if `f` has name `qualifiedName` and `nparams` parameter count. This is
@@ -81,174 +84,200 @@ private DataFlowPrivate::DataFlowCallable nonVirtualDispatch(DataFlowPrivate::Da
.viableTarget(call.asCallInstruction().getUnconvertedResultExpression())
}
private class RelevantNode extends Node {
RelevantNode() { this.getType().stripType() instanceof Class }
}
private signature DataFlowPrivate::DataFlowCallable methodDispatchSig(
DataFlowPrivate::DataFlowCall c
);
private predicate ignoreConstructor(Expr e) {
e instanceof ConstructorDirectInit or
e instanceof ConstructorVirtualInit or
e instanceof ConstructorDelegationInit or
exists(ConstructorFieldInit init | init.getExpr() = e)
}
/**
* Provides virtual dispatch support compatible with the original
* implementation of `semmle.code.cpp.security.TaintTracking`.
* Holds if `n` is either:
* - the post-update node of a qualifier after a call to a constructor which
* constructs an object containing at least one virtual function.
* - a node which represents a derived-to-base instruction that converts from `c`.
*/
private module VirtualDispatch {
/** A call that may dispatch differently depending on the qualifier value. */
abstract class DataSensitiveCall extends DataFlowCall {
/**
* Gets the node whose value determines the target of this call. This node
* could be the qualifier of a virtual dispatch or the function-pointer
* expression in a call to a function pointer. What they have in common is
* that we need to find out which data flows there, and then it's up to the
* `resolve` predicate to stitch that information together and resolve the
* call.
*/
abstract Node getDispatchValue();
private predicate lambdaSourceImpl(RelevantNode n, Class c) {
// Object construction
exists(CallInstruction call, ThisArgumentOperand qualifier, Call e |
qualifier = call.getThisArgumentOperand() and
n.(PostUpdateNode).getPreUpdateNode().asOperand() = qualifier and
call.getStaticCallTarget() instanceof Constructor and
qualifier.getType().stripType() = c and
c.getABaseClass*().getAMemberFunction().isVirtual() and
e = call.getUnconvertedResultExpression() and
not ignoreConstructor(e)
|
exists(c.getABaseClass())
or
exists(c.getADerivedClass())
)
or
// Conversion to a base class
exists(ConvertToBaseInstruction convert |
// Only keep the most specific cast
not convert.getUnary() instanceof ConvertToBaseInstruction and
n.asInstruction() = convert and
convert.getDerivedClass() = c and
c.getABaseClass*().getAMemberFunction().isVirtual()
)
}
/** Gets a candidate target for this call. */
abstract Function resolve();
private module TrackVirtualDispatch<methodDispatchSig/1 lambdaDispatch0> {
/**
* Gets a possible runtime target of `c` using both static call-target
* information, and call-target resolution from `lambdaDispatch0`.
*/
private DataFlowPrivate::DataFlowCallable dispatch(DataFlowPrivate::DataFlowCall c) {
result = nonVirtualDispatch(c) or
result = lambdaDispatch0(c)
}
/**
* Whether `src` can flow to this call.
*
* Searches backwards from `getDispatchValue()` to `src`. The `allowFromArg`
* parameter is true when the search is allowed to continue backwards into
* a parameter; non-recursive callers should pass `_` for `allowFromArg`.
*/
predicate flowsFrom(Node src, boolean allowFromArg) {
src = this.getDispatchValue() and allowFromArg = true
or
exists(Node other, boolean allowOtherFromArg | this.flowsFrom(other, allowOtherFromArg) |
// Call argument
exists(DataFlowCall call, Position i |
other.(ParameterNode).isParameterOf(pragma[only_bind_into](call).getStaticCallTarget(), i) and
src.(ArgumentNode).argumentOf(call, pragma[only_bind_into](pragma[only_bind_out](i)))
) and
allowOtherFromArg = true and
allowFromArg = true
private module TtInput implements TypeTrackingInput<Location> {
final class Node = RelevantNode;
class LocalSourceNode extends Node {
LocalSourceNode() {
this instanceof ParameterNode
or
// Call return
exists(DataFlowCall call, ReturnKind returnKind |
other = getAnOutNode(call, returnKind) and
returnNodeWithKindAndEnclosingCallable(src, returnKind, call.getStaticCallTarget())
) and
allowFromArg = false
this instanceof DataFlowPrivate::OutNode
or
// Local flow
localFlowStep(src, other) and
allowFromArg = allowOtherFromArg
DataFlowPrivate::readStep(_, _, this)
or
// Flow from global variable to load.
exists(LoadInstruction load, GlobalOrNamespaceVariable var |
var = src.asVariable() and
other.asInstruction() = load and
addressOfGlobal(load.getSourceAddress(), var) and
// The `allowFromArg` concept doesn't play a role when `src` is a
// global variable, so we just set it to a single arbitrary value for
// performance.
allowFromArg = true
)
or
// Flow from store to global variable.
exists(StoreInstruction store, GlobalOrNamespaceVariable var |
var = other.asVariable() and
store = src.asInstruction() and
storeIntoGlobal(store, var) and
// Setting `allowFromArg` to `true` like in the base case means we
// treat a store to a global variable like the dispatch itself: flow
// may come from anywhere.
allowFromArg = true
DataFlowPrivate::storeStep(_, _, this)
or
DataFlowPrivate::jumpStep(_, this)
or
lambdaSourceImpl(this, _)
}
}
final private class ContentSetFinal = ContentSet;
class Content extends ContentSetFinal {
Content() {
exists(DataFlow::Content c |
this.isSingleton(c) and
c.getIndirectionIndex() = 1
)
}
}
class ContentFilter extends Content {
Content getAMatchingContent() { result = this }
}
predicate compatibleContents(Content storeContents, Content loadContents) {
storeContents = loadContents
}
predicate simpleLocalSmallStep(Node nodeFrom, Node nodeTo) {
nodeFrom.getFunction() instanceof Function and
simpleLocalFlowStep(nodeFrom, nodeTo, _)
}
predicate levelStepNoCall(Node n1, LocalSourceNode n2) { none() }
predicate levelStepCall(Node n1, LocalSourceNode n2) { none() }
predicate storeStep(Node n1, Node n2, Content f) { DataFlowPrivate::storeStep(n1, f, n2) }
predicate callStep(Node n1, LocalSourceNode n2) {
exists(DataFlowPrivate::DataFlowCall call, DataFlowPrivate::Position pos |
n1.(DataFlowPrivate::ArgumentNode).argumentOf(call, pos) and
n2.(ParameterNode).isParameterOf(dispatch(call), pos)
)
}
predicate returnStep(Node n1, LocalSourceNode n2) {
exists(DataFlowPrivate::DataFlowCallable callable, DataFlowPrivate::DataFlowCall call |
n1.(DataFlowPrivate::ReturnNode).getEnclosingCallable() = callable and
callable = dispatch(call) and
n2 = DataFlowPrivate::getAnOutNode(call, n1.(DataFlowPrivate::ReturnNode).getKind())
)
}
predicate loadStep(Node n1, LocalSourceNode n2, Content f) {
DataFlowPrivate::readStep(n1, f, n2)
}
predicate loadStoreStep(Node nodeFrom, Node nodeTo, Content f1, Content f2) { none() }
predicate withContentStep(Node nodeFrom, LocalSourceNode nodeTo, ContentFilter f) { none() }
predicate withoutContentStep(Node nodeFrom, LocalSourceNode nodeTo, ContentFilter f) { none() }
predicate jumpStep(Node n1, LocalSourceNode n2) { DataFlowPrivate::jumpStep(n1, n2) }
predicate hasFeatureBacktrackStoreTarget() { none() }
}
pragma[noinline]
private predicate storeIntoGlobal(StoreInstruction store, GlobalOrNamespaceVariable var) {
addressOfGlobal(store.getDestinationAddress(), var)
private predicate lambdaSource(RelevantNode n) { lambdaSourceImpl(n, _) }
/**
* Holds if `n` is the qualifier of `call` which targets the virtual member
* function `mf`.
*/
private predicate lambdaSinkImpl(RelevantNode n, CallInstruction call, MemberFunction mf) {
n.asOperand() = call.getThisArgumentOperand() and
call.getStaticCallTarget() = mf and
mf.isVirtual()
}
/** Holds if `addressInstr` is an instruction that produces the address of `var`. */
private predicate addressOfGlobal(Instruction addressInstr, GlobalOrNamespaceVariable var) {
// Access directly to the global variable
addressInstr.(VariableAddressInstruction).getAstVariable() = var
or
// Access to a field on a global union
exists(FieldAddressInstruction fa |
fa = addressInstr and
fa.getObjectAddress().(VariableAddressInstruction).getAstVariable() = var and
fa.getField().getDeclaringType() instanceof Union
private predicate lambdaSink(RelevantNode n) { lambdaSinkImpl(n, _, _) }
private import TypeTracking<Location, TtInput>::TypeTrack<lambdaSource/1>::Graph<lambdaSink/1>
private predicate edgePlus(PathNode n1, PathNode n2) = fastTC(edges/2)(n1, n2)
/**
* Gets the most specific implementation of `mf` that may be called when the
* qualifier has runtime type `c`.
*/
private MemberFunction mostSpecific(MemberFunction mf, Class c) {
lambdaSinkImpl(_, _, mf) and
mf.getAnOverridingFunction*() = result and
(
result.getDeclaringType() = c
or
not c.getAMemberFunction().getAnOverriddenFunction*() = mf and
result = mostSpecific(mf, c.getABaseClass())
)
}
/**
* A ReturnNode with its ReturnKind and its enclosing callable.
*
* Used to fix a join ordering issue in flowsFrom.
* Gets a possible pair of end-points `(p1, p2)` where:
* - `p1` is a derived-to-base conversion that converts from some
* class `derived`, and
* - `p2` is the qualifier of a call to a virtual function that may
* target `callable`, and
* - `callable` is the most specific implementation that may be called when
* the qualifier has type `derived`.
*/
pragma[noinline]
private predicate returnNodeWithKindAndEnclosingCallable(
ReturnNode node, ReturnKind kind, DataFlowCallable callable
private predicate pairCand(
PathNode p1, PathNode p2, DataFlowPrivate::DataFlowCallable callable,
DataFlowPrivate::DataFlowCall call
) {
node.getKind() = kind and
node.getFunction() = callable.getUnderlyingCallable()
exists(Class derived, MemberFunction mf |
lambdaSourceImpl(p1.getNode(), derived) and
lambdaSinkImpl(p2.getNode(), call.asCallInstruction(), mf) and
p1.isSource() and
p2.isSink() and
callable.asSourceCallable() = mostSpecific(mf, derived)
)
}
/** Call through a function pointer. */
private class DataSensitiveExprCall extends DataSensitiveCall {
DataSensitiveExprCall() { not exists(this.getStaticCallTarget()) }
override Node getDispatchValue() { result.asOperand() = this.getCallTargetOperand() }
override Function resolve() {
exists(FunctionInstruction fi |
this.flowsFrom(instructionNode(fi), _) and
result = fi.getFunctionSymbol()
) and
(
this.getNumberOfArguments() <= result.getEffectiveNumberOfParameters() and
this.getNumberOfArguments() >= result.getEffectiveNumberOfParameters()
or
result.isVarargs()
)
}
}
/** Call to a virtual function. */
private class DataSensitiveOverriddenFunctionCall extends DataSensitiveCall {
DataSensitiveOverriddenFunctionCall() {
exists(
this.getStaticCallTarget()
.getUnderlyingCallable()
.(VirtualFunction)
.getAnOverridingFunction()
)
}
override Node getDispatchValue() { result.asInstruction() = this.getArgument(-1) }
override MemberFunction resolve() {
exists(Class overridingClass |
this.overrideMayAffectCall(overridingClass, result) and
this.hasFlowFromCastFrom(overridingClass)
)
}
/**
* Holds if `this` is a virtual function call whose static target is
* overridden by `overridingFunction` in `overridingClass`.
*/
pragma[noinline]
private predicate overrideMayAffectCall(Class overridingClass, MemberFunction overridingFunction) {
overridingFunction.getAnOverriddenFunction+() =
this.getStaticCallTarget().getUnderlyingCallable().(VirtualFunction) and
overridingFunction.getDeclaringType() = overridingClass
}
/**
* Holds if the qualifier of `this` has flow from an upcast from
* `derivedClass`.
*/
pragma[noinline]
private predicate hasFlowFromCastFrom(Class derivedClass) {
exists(ConvertToBaseInstruction toBase |
this.flowsFrom(instructionNode(toBase), _) and
derivedClass = toBase.getDerivedClass()
)
}
/** Gets a possible run-time target of `call`. */
DataFlowPrivate::DataFlowCallable lambdaDispatch(DataFlowPrivate::DataFlowCall call) {
exists(PathNode p1, PathNode p2 | p1 = p2 or edgePlus(p1, p2) | pairCand(p1, p2, result, call))
}
}