Shared: Re-factor the model generator and put the heuristic queries in its own module.

This commit is contained in:
Michael Nebel
2025-04-15 14:10:17 +02:00
parent 2357a69d55
commit f78be91af2

View File

@@ -338,10 +338,6 @@ module MakeModelGenerator<
}
}
private string getOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsOutput/2>::getOutput(node)
}
final private class SummaryTargetApiFinal = SummaryTargetApi;
class DataFlowSummaryTargetApi extends SummaryTargetApiFinal {
@@ -352,16 +348,6 @@ module MakeModelGenerator<
class DataFlowSinkTargetApi = SinkTargetApi;
private module ModelPrintingInput implements Printing::ModelPrintingSig {
class SummaryApi = DataFlowSummaryTargetApi;
class SourceOrSinkApi = SourceOrSinkTargetApi;
string getProvenance() { result = "df-generated" }
}
module ModelPrinting = Printing::ModelPrinting<ModelPrintingInput>;
/**
* Holds if `c` is a relevant content kind, where the underlying type is relevant.
*/
@@ -369,19 +355,6 @@ module MakeModelGenerator<
isRelevantType(getUnderlyingContentType(c))
}
/**
* Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`.
*/
private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
exists(DataFlow::ContentSet f |
DataFlow::readStep(node1, f, node2) and
// Partially restrict the content types used for intermediate steps.
(not exists(getUnderlyingContentType(f)) or isRelevantTypeInContent(f))
)
or
exists(DataFlow::ContentSet f | DataFlow::storeStep(node1, f, node2) | containerContent(f))
}
/**
* Holds if content `c` is either a field, a synthetic field or language specific
* content of a relevant type or a container like content.
@@ -393,177 +366,302 @@ module MakeModelGenerator<
}
/**
* Gets the MaD string representation of the parameter node `p`.
* Provides classes and predicates related to capturing summary models
* based on heuristic data flow.
*/
string parameterNodeAsInput(DataFlow::ParameterNode p) {
result = parameterAccess(asParameter(p))
or
result = qualifierString() and p instanceof InstanceParameterNode
}
module Heuristic {
private module ModelPrintingInput implements Printing::ModelPrintingSig {
class SummaryApi = DataFlowSummaryTargetApi;
/**
* Gets the MaD input string representation of `source`.
*/
string asInputArgument(NodeExtended source) { result = getInputArgument(source) }
class SourceOrSinkApi = SourceOrSinkTargetApi;
/**
* Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`).
*/
private string captureQualifierFlow(DataFlowSummaryTargetApi api) {
exists(ReturnNodeExt ret |
api = returnNodeEnclosingCallable(ret) and
isOwnInstanceAccessNode(ret)
) and
result = ModelPrinting::asLiftedValueModel(api, qualifierString(), "ReturnValue")
}
private int accessPathLimit0() { result = 2 }
private newtype TTaintState =
TTaintRead(int n) { n in [0 .. accessPathLimit0()] } or
TTaintStore(int n) { n in [1 .. accessPathLimit0()] }
abstract private class TaintState extends TTaintState {
abstract string toString();
}
/**
* A FlowState representing a tainted read.
*/
private class TaintRead extends TaintState, TTaintRead {
private int step;
TaintRead() { this = TTaintRead(step) }
/**
* Gets the flow state step number.
*/
int getStep() { result = step }
override string toString() { result = "TaintRead(" + step + ")" }
}
/**
* A FlowState representing a tainted write.
*/
private class TaintStore extends TaintState, TTaintStore {
private int step;
TaintStore() { this = TTaintStore(step) }
/**
* Gets the flow state step number.
*/
int getStep() { result = step }
override string toString() { result = "TaintStore(" + step + ")" }
}
/**
* A data flow configuration for tracking flow through APIs.
* The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters.
*
* This can be used to generate Flow summaries for APIs from parameter to return.
*/
module PropagateFlowConfig implements DataFlow::StateConfigSig {
class FlowState = TaintState;
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof DataFlow::ParameterNode and
exists(Callable c |
c = getEnclosingCallable(source) and
c instanceof DataFlowSummaryTargetApi and
not isUninterestingForHeuristicDataFlowModels(c)
) and
state.(TaintRead).getStep() = 0
string getProvenance() { result = "df-generated" }
}
predicate isSink(DataFlow::Node sink, FlowState state) {
sink instanceof ReturnNodeExt and
not isOwnInstanceAccessNode(sink) and
not exists(captureQualifierFlow(getAsExprEnclosingCallable(sink))) and
(state instanceof TaintRead or state instanceof TaintStore)
module ModelPrinting = Printing::ModelPrinting<ModelPrintingInput>;
private string getOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsOutput/2>::getOutput(node)
}
predicate isAdditionalFlowStep(
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
) {
exists(DataFlow::NodeEx n1, DataFlow::NodeEx n2, DataFlow::ContentSet c |
node1 = n1.asNode() and
node2 = n2.asNode() and
DataFlow::storeEx(n1, c.getAStoreContent(), n2, _, _) and
isRelevantContent0(c) and
(
state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1
or
state1.(TaintStore).getStep() + 1 = state2.(TaintStore).getStep()
)
/**
* Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`.
*/
private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
exists(DataFlow::ContentSet f |
DataFlow::readStep(node1, f, node2) and
// Partially restrict the content types used for intermediate steps.
(not exists(getUnderlyingContentType(f)) or isRelevantTypeInContent(f))
)
or
exists(DataFlow::ContentSet c |
DataFlow::readStep(node1, c, node2) and
isRelevantContent0(c) and
state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep()
exists(DataFlow::ContentSet f | DataFlow::storeStep(node1, f, node2) | containerContent(f))
}
/**
* Gets the MaD string representation of the parameter node `p`.
*/
string parameterNodeAsInput(DataFlow::ParameterNode p) {
result = parameterAccess(asParameter(p))
or
result = qualifierString() and p instanceof InstanceParameterNode
}
/**
* Gets the MaD input string representation of `source`.
*/
private string asInputArgument(NodeExtended source) { result = getInputArgument(source) }
/**
* Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`).
*/
private string captureQualifierFlow(DataFlowSummaryTargetApi api) {
exists(ReturnNodeExt ret |
api = returnNodeEnclosingCallable(ret) and
isOwnInstanceAccessNode(ret)
) and
result = ModelPrinting::asLiftedValueModel(api, qualifierString(), "ReturnValue")
}
private int accessPathLimit0() { result = 2 }
private newtype TTaintState =
TTaintRead(int n) { n in [0 .. accessPathLimit0()] } or
TTaintStore(int n) { n in [1 .. accessPathLimit0()] }
abstract private class TaintState extends TTaintState {
abstract string toString();
}
/**
* A FlowState representing a tainted read.
*/
private class TaintRead extends TaintState, TTaintRead {
private int step;
TaintRead() { this = TTaintRead(step) }
/**
* Gets the flow state step number.
*/
int getStep() { result = step }
override string toString() { result = "TaintRead(" + step + ")" }
}
/**
* A FlowState representing a tainted write.
*/
private class TaintStore extends TaintState, TTaintStore {
private int step;
TaintStore() { this = TTaintStore(step) }
/**
* Gets the flow state step number.
*/
int getStep() { result = step }
override string toString() { result = "TaintStore(" + step + ")" }
}
/**
* A data flow configuration for tracking flow through APIs.
* The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters.
*
* This can be used to generate Flow summaries for APIs from parameter to return.
*/
private module PropagateFlowConfig implements DataFlow::StateConfigSig {
class FlowState = TaintState;
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof DataFlow::ParameterNode and
exists(Callable c |
c = getEnclosingCallable(source) and
c instanceof DataFlowSummaryTargetApi and
not isUninterestingForHeuristicDataFlowModels(c)
) and
state.(TaintRead).getStep() = 0
}
predicate isSink(DataFlow::Node sink, FlowState state) {
sink instanceof ReturnNodeExt and
not isOwnInstanceAccessNode(sink) and
not exists(captureQualifierFlow(getAsExprEnclosingCallable(sink))) and
(state instanceof TaintRead or state instanceof TaintStore)
}
predicate isAdditionalFlowStep(
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
) {
exists(DataFlow::NodeEx n1, DataFlow::NodeEx n2, DataFlow::ContentSet c |
node1 = n1.asNode() and
node2 = n2.asNode() and
DataFlow::storeEx(n1, c.getAStoreContent(), n2, _, _) and
isRelevantContent0(c) and
(
state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1
or
state1.(TaintStore).getStep() + 1 = state2.(TaintStore).getStep()
)
)
or
exists(DataFlow::ContentSet c |
DataFlow::readStep(node1, c, node2) and
isRelevantContent0(c) and
state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep()
)
}
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t))
}
DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
}
}
module PropagateFlow = TaintTracking::GlobalWithState<PropagateFlowConfig>;
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
string captureThroughFlow0(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt
) {
exists(string input, string output |
getEnclosingCallable(p) = api and
getEnclosingCallable(returnNodeExt) = api and
input = parameterNodeAsInput(p) and
output = getOutput(returnNodeExt) and
input != output and
result = ModelPrinting::asLiftedTaintModel(api, input, output)
)
}
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t))
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
private string captureThroughFlow(DataFlowSummaryTargetApi api) {
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
PropagateFlow::flow(p, returnNodeExt) and
result = captureThroughFlow0(api, p, returnNodeExt)
)
}
DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to the
* return value or parameter or if `api` is a fluent API.
*/
string captureFlow(DataFlowSummaryTargetApi api) {
result = captureQualifierFlow(api) or
result = captureThroughFlow(api)
}
}
module PropagateFlow = TaintTracking::GlobalWithState<PropagateFlowConfig>;
/**
* Gets the neutral summary model for `api`, if any.
* A neutral summary model is generated, if we are not generating
* a summary model that applies to `api`.
*/
string captureNoFlow(DataFlowSummaryTargetApi api) {
not exists(DataFlowSummaryTargetApi api0 |
exists(captureFlow(api0)) and api0.lift() = api.lift()
) and
api.isRelevant() and
result = ModelPrinting::asNeutralSummaryModel(api)
}
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
string captureThroughFlow0(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt
) {
exists(string input, string output |
getEnclosingCallable(p) = api and
getEnclosingCallable(returnNodeExt) = api and
input = parameterNodeAsInput(p) and
output = getOutput(returnNodeExt) and
input != output and
result = ModelPrinting::asLiftedTaintModel(api, input, output)
)
}
/**
* A data flow configuration used for finding new sources.
* The sources are the already known existing sources and the sinks are the API return nodes.
*
* This can be used to generate Source summaries for an API, if the API expose an already known source
* via its return (then the API itself becomes a source).
*/
module PropagateFromSourceConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
exists(string kind |
isRelevantSourceKind(kind) and
sourceNode(source, kind)
)
}
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
private string captureThroughFlow(DataFlowSummaryTargetApi api) {
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
PropagateFlow::flow(p, returnNodeExt) and
result = captureThroughFlow0(api, p, returnNodeExt)
)
}
predicate isSink(DataFlow::Node sink) {
sink instanceof ReturnNodeExt and
getEnclosingCallable(sink) instanceof DataFlowSourceTargetApi
}
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to the
* return value or parameter or if `api` is a fluent API.
*/
string captureFlow(DataFlowSummaryTargetApi api) {
result = captureQualifierFlow(api) or
result = captureThroughFlow(api)
}
DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSinkCallContext }
/**
* Gets the neutral summary model for `api`, if any.
* A neutral summary model is generated, if we are not generating
* a summary model that applies to `api`.
*/
string captureNoFlow(DataFlowSummaryTargetApi api) {
not exists(DataFlowSummaryTargetApi api0 |
exists(captureFlow(api0)) and api0.lift() = api.lift()
) and
api.isRelevant() and
result = ModelPrinting::asNeutralSummaryModel(api)
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t))
}
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isRelevantTaintStep(node1, node2)
}
}
private module PropagateFromSource = TaintTracking::Global<PropagateFromSourceConfig>;
/**
* Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`.
*/
string captureSource(DataFlowSourceTargetApi api) {
exists(NodeExtended source, ReturnNodeExt sink, string kind |
PropagateFromSource::flow(source, sink) and
sourceNode(source, kind) and
api = getEnclosingCallable(sink) and
not irrelevantSourceSinkApi(getEnclosingCallable(source), api) and
result = ModelPrinting::asSourceModel(api, getOutput(sink), kind)
)
}
/**
* A data flow configuration used for finding new sinks.
* The sources are the parameters of the API and the fields of the enclosing type.
*
* This can be used to generate Sink summaries for APIs, if the API propagates a parameter (or enclosing type field)
* into an existing known sink (then the API itself becomes a sink).
*/
module PropagateToSinkConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
apiSource(source) and
getEnclosingCallable(source) instanceof DataFlowSinkTargetApi
}
predicate isSink(DataFlow::Node sink) {
exists(string kind | isRelevantSinkKind(kind) and sinkNode(sink, kind))
}
predicate isBarrier(DataFlow::Node node) {
exists(Type t | t = node.(NodeExtended).getType() and not isRelevantType(t))
or
sinkModelSanitizer(node)
}
DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureHasSourceCallContext
}
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isRelevantTaintStep(node1, node2)
}
}
private module PropagateToSink = TaintTracking::Global<PropagateToSinkConfig>;
/**
* Gets the sink model(s) of `api`, if there is flow from a parameter to an existing known sink.
*/
string captureSink(DataFlowSinkTargetApi api) {
exists(NodeExtended src, NodeExtended sink, string kind |
PropagateToSink::flow(src, sink) and
sinkNode(sink, kind) and
api = getEnclosingCallable(src) and
result = ModelPrinting::asSinkModel(api, asInputArgument(src), kind)
)
}
}
/**
@@ -943,7 +1041,7 @@ module MakeModelGenerator<
* 2. If content based flow does not yield any summary for an API, then we try and
* generate flow summaries using the non-content based summary generator.
*/
string captureMixedFlow(DataFlowSummaryTargetApi api, boolean lift) {
string captureFlow(DataFlowSummaryTargetApi api, boolean lift) {
result = ContentSensitive::captureFlow(api, lift)
or
not exists(DataFlowSummaryTargetApi api0 |
@@ -953,7 +1051,7 @@ module MakeModelGenerator<
api0.lift() = api.lift() and
exists(ContentSensitive::captureFlow(api0, true))
) and
result = captureFlow(api) and
result = Heuristic::captureFlow(api) and
lift = true
}
@@ -962,9 +1060,9 @@ module MakeModelGenerator<
* A neutral summary model is generated, if we are not generating
* a mixed summary model that applies to `api`.
*/
string captureMixedNeutral(DataFlowSummaryTargetApi api) {
string captureNeutral(DataFlowSummaryTargetApi api) {
not exists(DataFlowSummaryTargetApi api0, boolean lift |
exists(captureMixedFlow(api0, lift)) and
exists(captureFlow(api0, lift)) and
(
lift = false and
(api0 = api or api0 = api.lift())
@@ -973,96 +1071,6 @@ module MakeModelGenerator<
)
) and
api.isRelevant() and
result = ModelPrinting::asNeutralSummaryModel(api)
}
/**
* A data flow configuration used for finding new sources.
* The sources are the already known existing sources and the sinks are the API return nodes.
*
* This can be used to generate Source summaries for an API, if the API expose an already known source
* via its return (then the API itself becomes a source).
*/
module PropagateFromSourceConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
exists(string kind |
isRelevantSourceKind(kind) and
sourceNode(source, kind)
)
}
predicate isSink(DataFlow::Node sink) {
sink instanceof ReturnNodeExt and
getEnclosingCallable(sink) instanceof DataFlowSourceTargetApi
}
DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSinkCallContext }
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t))
}
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isRelevantTaintStep(node1, node2)
}
}
private module PropagateFromSource = TaintTracking::Global<PropagateFromSourceConfig>;
/**
* Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`.
*/
string captureSource(DataFlowSourceTargetApi api) {
exists(NodeExtended source, ReturnNodeExt sink, string kind |
PropagateFromSource::flow(source, sink) and
sourceNode(source, kind) and
api = getEnclosingCallable(sink) and
not irrelevantSourceSinkApi(getEnclosingCallable(source), api) and
result = ModelPrinting::asSourceModel(api, getOutput(sink), kind)
)
}
/**
* A data flow configuration used for finding new sinks.
* The sources are the parameters of the API and the fields of the enclosing type.
*
* This can be used to generate Sink summaries for APIs, if the API propagates a parameter (or enclosing type field)
* into an existing known sink (then the API itself becomes a sink).
*/
module PropagateToSinkConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
apiSource(source) and
getEnclosingCallable(source) instanceof DataFlowSinkTargetApi
}
predicate isSink(DataFlow::Node sink) {
exists(string kind | isRelevantSinkKind(kind) and sinkNode(sink, kind))
}
predicate isBarrier(DataFlow::Node node) {
exists(Type t | t = node.(NodeExtended).getType() and not isRelevantType(t))
or
sinkModelSanitizer(node)
}
DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSourceCallContext }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isRelevantTaintStep(node1, node2)
}
}
private module PropagateToSink = TaintTracking::Global<PropagateToSinkConfig>;
/**
* Gets the sink model(s) of `api`, if there is flow from a parameter to an existing known sink.
*/
string captureSink(DataFlowSinkTargetApi api) {
exists(NodeExtended src, NodeExtended sink, string kind |
PropagateToSink::flow(src, sink) and
sinkNode(sink, kind) and
api = getEnclosingCallable(src) and
result = ModelPrinting::asSinkModel(api, asInputArgument(src), kind)
)
result = Heuristic::ModelPrinting::asNeutralSummaryModel(api)
}
}