C++: Add alert provenance plumbing.

This commit is contained in:
Anders Schack-Mulligen
2024-02-07 11:37:45 +01:00
parent 6991f5452f
commit f202661912
8 changed files with 135 additions and 111 deletions

View File

@@ -286,6 +286,10 @@ predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { no
/** Extra data-flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
predicate knownSourceModel(Node source, string model) { none() }
predicate knownSinkModel(Node sink, string model) { none() }
/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.

View File

@@ -516,7 +516,7 @@ private module ThisFlow {
*/
cached
predicate localFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo)
simpleLocalFlowStep(nodeFrom, nodeTo, _)
or
// Field flow is not strictly a "step" but covers the whole function
// transitively. There's no way to get a step-like relation out of the global
@@ -530,64 +530,67 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) {
* This is the local flow predicate that's used as a building block in global
* data flow. It may have less flow than the `localFlowStep` predicate.
*/
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
// Expr -> Expr
exprToExprStep_nocfg(nodeFrom.asExpr(), nodeTo.asExpr())
or
// Assignment -> LValue post-update node
//
// This is used for assignments whose left-hand side is not a variable
// assignment or a storeStep but is still modeled by other means. It could be
// a call to `operator*` or `operator[]` where taint should flow to the
// post-update node of the qualifier.
exists(AssignExpr assign |
nodeFrom.asExpr() = assign and
nodeTo.(PostUpdateNode).getPreUpdateNode().asExpr() = assign.getLValue()
)
or
// Node -> FlowVar -> VariableAccess
exists(FlowVar var |
(
exprToVarStep(nodeFrom.asExpr(), var)
or
varSourceBaseCase(var, nodeFrom.asParameter())
or
varSourceBaseCase(var, nodeFrom.asUninitialized())
or
var.definedPartiallyAt(nodeFrom.asPartialDefinition())
) and
varToNodeStep(var, nodeTo)
)
or
// Expr -> DefinitionByReferenceNode
exprToDefinitionByReferenceStep(nodeFrom.asExpr(), nodeTo.asDefiningArgument())
or
// `this` -> adjacent-`this`
ThisFlow::adjacentThisRefs(nodeFrom, nodeTo)
or
// post-update-`this` -> following-`this`-ref
ThisFlow::adjacentThisRefs(nodeFrom.(PostUpdateNode).getPreUpdateNode(), nodeTo)
or
// In `f(&x->a)`, this step provides the flow from post-`&` to post-`x->a`,
// from which there is field flow to `x` via reverse read.
exists(PartialDefinition def, Expr inner, Expr outer |
def.definesExpressions(inner, outer) and
inner = nodeTo.(InnerPartialDefinitionNode).getPreUpdateNode().asExpr() and
outer = nodeFrom.(PartialDefinitionNode).getPreUpdateNode().asExpr()
)
or
// Reverse flow: data that flows from the post-update node of a reference
// returned by a function call, back into the qualifier of that function.
// This allows data to flow 'in' through references returned by a modeled
// function such as `operator[]`.
exists(DataFlowFunction f, Call call, FunctionInput inModel, FunctionOutput outModel |
call.getTarget() = f and
inModel.isReturnValueDeref() and
outModel.isQualifierObject() and
f.hasDataFlow(inModel, outModel) and
nodeFrom.(PostUpdateNode).getPreUpdateNode().asExpr() = call and
nodeTo.asDefiningArgument() = call.getQualifier()
)
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo, string model) {
(
// Expr -> Expr
exprToExprStep_nocfg(nodeFrom.asExpr(), nodeTo.asExpr())
or
// Assignment -> LValue post-update node
//
// This is used for assignments whose left-hand side is not a variable
// assignment or a storeStep but is still modeled by other means. It could be
// a call to `operator*` or `operator[]` where taint should flow to the
// post-update node of the qualifier.
exists(AssignExpr assign |
nodeFrom.asExpr() = assign and
nodeTo.(PostUpdateNode).getPreUpdateNode().asExpr() = assign.getLValue()
)
or
// Node -> FlowVar -> VariableAccess
exists(FlowVar var |
(
exprToVarStep(nodeFrom.asExpr(), var)
or
varSourceBaseCase(var, nodeFrom.asParameter())
or
varSourceBaseCase(var, nodeFrom.asUninitialized())
or
var.definedPartiallyAt(nodeFrom.asPartialDefinition())
) and
varToNodeStep(var, nodeTo)
)
or
// Expr -> DefinitionByReferenceNode
exprToDefinitionByReferenceStep(nodeFrom.asExpr(), nodeTo.asDefiningArgument())
or
// `this` -> adjacent-`this`
ThisFlow::adjacentThisRefs(nodeFrom, nodeTo)
or
// post-update-`this` -> following-`this`-ref
ThisFlow::adjacentThisRefs(nodeFrom.(PostUpdateNode).getPreUpdateNode(), nodeTo)
or
// In `f(&x->a)`, this step provides the flow from post-`&` to post-`x->a`,
// from which there is field flow to `x` via reverse read.
exists(PartialDefinition def, Expr inner, Expr outer |
def.definesExpressions(inner, outer) and
inner = nodeTo.(InnerPartialDefinitionNode).getPreUpdateNode().asExpr() and
outer = nodeFrom.(PartialDefinitionNode).getPreUpdateNode().asExpr()
)
or
// Reverse flow: data that flows from the post-update node of a reference
// returned by a function call, back into the qualifier of that function.
// This allows data to flow 'in' through references returned by a modeled
// function such as `operator[]`.
exists(DataFlowFunction f, Call call, FunctionInput inModel, FunctionOutput outModel |
call.getTarget() = f and
inModel.isReturnValueDeref() and
outModel.isQualifierObject() and
f.hasDataFlow(inModel, outModel) and
nodeFrom.(PostUpdateNode).getPreUpdateNode().asExpr() = call and
nodeTo.asDefiningArgument() = call.getQualifier()
)
) and
model = ""
}
/**

View File

@@ -32,8 +32,8 @@ predicate localTaintStep(DataFlow::Node src, DataFlow::Node sink) {
* Holds if the additional step from `src` to `sink` should be included in all
* global taint flow configurations.
*/
predicate defaultAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink) {
localAdditionalTaintStep(src, sink)
predicate defaultAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink, string model) {
localAdditionalTaintStep(src, sink) and model = ""
}
/**

View File

@@ -1020,6 +1020,10 @@ predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { no
/** Extra data-flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
predicate knownSourceModel(Node source, string model) { none() }
predicate knownSinkModel(Node sink, string model) { none() }
/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.
@@ -1096,7 +1100,7 @@ private predicate localFlowStepWithSummaries(Node node1, Node node2) {
or
readStep(node1, _, node2)
or
DataFlowImplCommon::argumentValueFlowsThrough(node1, _, node2)
DataFlowImplCommon::argumentValueFlowsThrough(node1, _, node2, _)
}
/** Holds if `node` flows to a node that is used in a `SwitchInstruction`. */

View File

@@ -1892,7 +1892,7 @@ private module Cached {
* (intra-procedural) step.
*/
cached
predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo) }
predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo, _) }
private predicate indirectionOperandFlow(RawIndirectOperand nodeFrom, Node nodeTo) {
nodeFrom != nodeTo and
@@ -1962,41 +1962,45 @@ private module Cached {
* data flow. It may have less flow than the `localFlowStep` predicate.
*/
cached
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
// Post update node -> Node flow
Ssa::postUpdateFlow(nodeFrom, nodeTo)
or
// Def-use/Use-use flow
Ssa::ssaFlow(nodeFrom, nodeTo)
or
// Operand -> Instruction flow
simpleInstructionLocalFlowStep(nodeFrom.asOperand(), nodeTo.asInstruction())
or
// Instruction -> Operand flow
exists(Instruction iFrom, Operand opTo |
iFrom = nodeFrom.asInstruction() and opTo = nodeTo.asOperand()
|
simpleOperandLocalFlowStep(iFrom, opTo) and
// Omit when the instruction node also represents the operand.
not iFrom = Ssa::getIRRepresentationOfOperand(opTo)
)
or
// Phi node -> Node flow
Ssa::fromPhiNode(nodeFrom, nodeTo)
or
// Indirect operand -> (indirect) instruction flow
indirectionOperandFlow(nodeFrom, nodeTo)
or
// Indirect instruction -> indirect operand flow
indirectionInstructionFlow(nodeFrom, nodeTo)
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo, string model) {
(
// Post update node -> Node flow
Ssa::postUpdateFlow(nodeFrom, nodeTo)
or
// Def-use/Use-use flow
Ssa::ssaFlow(nodeFrom, nodeTo)
or
// Operand -> Instruction flow
simpleInstructionLocalFlowStep(nodeFrom.asOperand(), nodeTo.asInstruction())
or
// Instruction -> Operand flow
exists(Instruction iFrom, Operand opTo |
iFrom = nodeFrom.asInstruction() and opTo = nodeTo.asOperand()
|
simpleOperandLocalFlowStep(iFrom, opTo) and
// Omit when the instruction node also represents the operand.
not iFrom = Ssa::getIRRepresentationOfOperand(opTo)
)
or
// Phi node -> Node flow
Ssa::fromPhiNode(nodeFrom, nodeTo)
or
// Indirect operand -> (indirect) instruction flow
indirectionOperandFlow(nodeFrom, nodeTo)
or
// Indirect instruction -> indirect operand flow
indirectionInstructionFlow(nodeFrom, nodeTo)
) and
model = ""
or
// Flow through modeled functions
modelFlow(nodeFrom, nodeTo)
modelFlow(nodeFrom, nodeTo, model)
or
// Reverse flow: data that flows from the definition node back into the indirection returned
// by a function. This allows data to flow 'in' through references returned by a modeled
// function such as `operator[]`.
reverseFlow(nodeFrom, nodeTo)
reverseFlow(nodeFrom, nodeTo) and
model = ""
}
private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo) {
@@ -2011,12 +2015,13 @@ private module Cached {
opTo.getDef() = iFrom
}
private predicate modelFlow(Node nodeFrom, Node nodeTo) {
private predicate modelFlow(Node nodeFrom, Node nodeTo, string model) {
exists(
CallInstruction call, DataFlowFunction func, FunctionInput modelIn, FunctionOutput modelOut
|
call.getStaticCallTarget() = func and
func.hasDataFlow(modelIn, modelOut)
func.hasDataFlow(modelIn, modelOut) and
model = "DataFlowFunction"
|
nodeFrom = callInput(call, modelIn) and
nodeTo = callOutput(call, modelOut)

View File

@@ -10,7 +10,7 @@ private import PrintIRUtilities
*/
private string getFromFlow(Node node2, int order1, int order2) {
exists(Node node1 |
simpleLocalFlowStep(node1, node2) and
simpleLocalFlowStep(node1, node2, _) and
result = nodeId(node1, order1, order2)
)
}
@@ -20,7 +20,7 @@ private string getFromFlow(Node node2, int order1, int order2) {
*/
private string getToFlow(Node node1, int order1, int order2) {
exists(Node node2 |
simpleLocalFlowStep(node1, node2) and
simpleLocalFlowStep(node1, node2, _) and
result = nodeId(node2, order1, order2)
)
}

View File

@@ -15,7 +15,7 @@ private import semmle.code.cpp.ir.dataflow.FlowSteps
predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
DataFlow::localFlowStep(nodeFrom, nodeTo)
or
localAdditionalTaintStep(nodeFrom, nodeTo)
localAdditionalTaintStep(nodeFrom, nodeTo, _)
}
/**
@@ -24,10 +24,11 @@ predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
* different objects.
*/
cached
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction())
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, string model) {
operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction()) and
model = ""
or
modeledTaintStep(nodeFrom, nodeTo)
modeledTaintStep(nodeFrom, nodeTo, model)
or
// Flow from (the indirection of) an operand of a pointer arithmetic instruction to the
// indirection of the pointer arithmetic instruction. This provides flow from `source`
@@ -35,15 +36,18 @@ predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeT
exists(PointerArithmeticInstruction pai, int indirectionIndex |
nodeHasOperand(nodeFrom, pai.getAnOperand(), pragma[only_bind_into](indirectionIndex)) and
hasInstructionAndIndex(nodeTo, pai, indirectionIndex + 1)
)
) and
model = ""
or
any(Ssa::Indirection ind).isAdditionalTaintStep(nodeFrom, nodeTo)
any(Ssa::Indirection ind).isAdditionalTaintStep(nodeFrom, nodeTo) and
model = ""
or
// object->field conflation for content that is a `TaintInheritingContent`.
exists(DataFlow::ContentSet f |
readStep(nodeFrom, f, nodeTo) and
f.getAReadContent() instanceof TaintInheritingContent
)
) and
model = ""
}
/**
@@ -120,8 +124,8 @@ predicate localExprTaint(Expr e1, Expr e2) {
* Holds if the additional step from `src` to `sink` should be included in all
* global taint flow configurations.
*/
predicate defaultAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink) {
localAdditionalTaintStep(src, sink)
predicate defaultAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink, string model) {
localAdditionalTaintStep(src, sink, model)
}
/**
@@ -141,7 +145,7 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
* Holds if taint can flow from `nodeIn` to `nodeOut` through a call to a
* modeled function.
*/
predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut, string model) {
// Normal taint steps
exists(CallInstruction call, TaintFunction func, FunctionInput modelIn, FunctionOutput modelOut |
call.getStaticCallTarget() = func and
@@ -150,7 +154,8 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
nodeIn = callInput(call, modelIn) and nodeOut = callOutput(call, modelOut)
or
exists(int d | nodeIn = callInput(call, modelIn, d) and nodeOut = callOutput(call, modelOut, d))
)
) and
model = "TaintFunction"
or
// Taint flow from one argument to another and data flow from an argument to a
// return value. This happens in functions like `strcat` and `memcpy`. We
@@ -167,7 +172,8 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
func.(TaintFunction).hasTaintFlow(modelIn, modelMidOut) and
func.(DataFlowFunction).hasDataFlow(modelMidIn, modelOut) and
modelMidOut.isParameterDeref(indexMid) and
modelMidIn.isParameter(indexMid)
modelMidIn.isParameter(indexMid) and
model = "TaintFunction"
)
or
// Taint flow from a pointer argument to an output, when the model specifies flow from the deref
@@ -180,9 +186,11 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
indirectArgument.hasAddressOperandAndIndirectionIndex(nodeIn.asOperand(), _) and
call.getStaticCallTarget() = func and
(
func.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
func.(DataFlowFunction).hasDataFlow(modelIn, modelOut) and
model = "DataFlowFunction"
or
func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
func.(TaintFunction).hasTaintFlow(modelIn, modelOut) and
model = "TaintFunction"
) and
nodeOut = callOutput(call, modelOut)
)

View File

@@ -35,7 +35,7 @@ module XxeConfig implements DataFlow::StateConfigSig {
) {
// create additional flow steps for `XxeFlowStateTransformer`s
state2 = node2.asIndirectExpr().(XxeFlowStateTransformer).transform(state1) and
DataFlow::simpleLocalFlowStep(node1, node2)
DataFlow::simpleLocalFlowStep(node1, node2, _)
}
predicate isBarrier(DataFlow::Node node, FlowState flowstate) {