Merge pull request #17521 from michaelnebel/modelgen/moreimprovements

C#/Java: Content based model generation improvements.
This commit is contained in:
Michael Nebel
2024-09-30 11:22:30 +02:00
committed by GitHub
40 changed files with 626 additions and 251 deletions

View File

@@ -272,7 +272,10 @@ module MakeImplContentDataFlow<LocationSig Location, InputSig<Location> Lang> {
)
}
private ContentSet getAtIndex(int i) {
/**
* Gets the content set at index `i` in this access path, if any.
*/
ContentSet getAtIndex(int i) {
i = 0 and
result = this.getHead()
or
@@ -286,23 +289,6 @@ module MakeImplContentDataFlow<LocationSig Location, InputSig<Location> Lang> {
i >= 0 and
result = TAccessPathCons(this.getAtIndex(i), this.reverse0(i - 1))
}
/**
* Gets the length of this access path.
*/
private int length() {
result = 0 and this = TAccessPathNil()
or
result = 1 + this.getTail().length()
}
/**
* Gets the reversed access path, if any.
*
* Note that not all access paths have a reverse as these are not
* included by default in the IPA type.
*/
AccessPath reverse() { result = this.reverse0(this.length() - 1) }
}
/**

View File

@@ -171,6 +171,7 @@ module Make<
[
"ai", // AI (machine learning)
"df", // Dataflow (model generator)
"dfc", // Content dataflow (model generator)
"tb", // Type based (model generator)
"hq", // Heuristic query
]

View File

@@ -30,11 +30,6 @@ signature module ModelGeneratorInputSig<LocationSig Location, InputSig<Location>
* A Callable.
*/
class Callable {
/**
* Gets the number of parameters of this callable.
*/
int getNumberOfParameters();
/**
* Gets a string representation of this callable.
*/
@@ -561,6 +556,16 @@ module MakeModelGenerator<
private module PropagateContentFlow = ContentDataFlow::Global<PropagateContentFlowConfig>;
private module ContentModelPrintingInput implements Printing::ModelPrintingSig {
class SummaryApi = DataFlowSummaryTargetApi;
class SourceOrSinkApi = SourceOrSinkTargetApi;
string getProvenance() { result = "dfc-generated" }
}
private module ContentModelPrinting = Printing::ModelPrinting<ContentModelPrintingInput>;
private string getContentOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsContentOutput/2>::getOutput(node)
}
@@ -576,15 +581,7 @@ module MakeModelGenerator<
}
private string getContent(PropagateContentFlow::AccessPath ap, int i) {
exists(DataFlow::ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail()
|
i = 0 and
result = "." + printContent(head)
or
i > 0 and result = getContent(tail, i - 1)
)
result = "." + printContent(ap.getAtIndex(i))
}
/**
@@ -605,12 +602,7 @@ module MakeModelGenerator<
* Holds if the access path `ap` contains a field or synthetic field access.
*/
private predicate mentionsField(PropagateContentFlow::AccessPath ap) {
exists(DataFlow::ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail()
|
mentionsField(tail) or isField(head)
)
isField(ap.getAtIndex(_))
}
private predicate apiFlow(
@@ -626,28 +618,35 @@ module MakeModelGenerator<
/**
* A class of APIs relevant for modeling using content flow.
* The following heuristic is applied:
* Content flow is only relevant for an API, if
* #content flow <= 2 * #parameters + 3
* If an API produces more content flow, it is likely that
* 1. Types are not sufficiently constrained leading to a combinatorial
* Content flow is only relevant for an API on a parameter, if
* #content flow from parameter <= 3
* If an API produces more content flow on a parameter, it is likely that
* 1. Types are not sufficiently constrained on the parameter leading to a combinatorial
* explosion in dispatch and thus in the generated summaries.
* 2. It is a reasonable approximation to use the non-content based flow
* detection instead, as reads and stores would use a significant
* part of an objects internal state.
*/
private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi {
private DataFlow::ParameterNode parameter;
ContentDataFlowSummaryTargetApi() {
count(string input, string output |
exists(
DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores
PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt,
PropagateContentFlow::AccessPath stores
|
apiFlow(this, p, reads, returnNodeExt, stores, _) and
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
apiFlow(this, parameter, reads, returnNodeExt, stores, _) and
input = parameterNodeAsContentInput(parameter) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores)
)
) <= 2 * this.getNumberOfParameters() + 3
) <= 3
}
/**
* Gets a parameter node of `this` api, where there are less than 3 possible models, if any.
*/
DataFlow::ParameterNode getARelevantParameterNode() { result = parameter }
}
pragma[nomagic]
@@ -658,20 +657,38 @@ module MakeModelGenerator<
) {
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
returnNodeExt.getEnclosingCallable() = api and
p.(NodeExtended).getEnclosingCallable() = api
p.(NodeExtended).getEnclosingCallable() = api and
p = api.getARelevantParameterNode()
}
/**
* Holds if any of the content sets in `path` translates into a synthetic field.
*/
private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) {
exists(PropagateContentFlow::AccessPath tail, DataFlow::ContentSet head |
head = path.getHead() and
tail = path.getTail()
|
exists(getSyntheticName(head)) or
hasSyntheticContent(tail)
)
exists(getSyntheticName(path.getAtIndex(_)))
}
private string getHashAtIndex(PropagateContentFlow::AccessPath ap, int i) {
result = getSyntheticName(ap.getAtIndex(i))
}
private string getReversedHash(PropagateContentFlow::AccessPath ap) {
result = strictconcat(int i | | getHashAtIndex(ap, i), "." order by i desc)
}
private string getHash(PropagateContentFlow::AccessPath ap) {
result = strictconcat(int i | | getHashAtIndex(ap, i), "." order by i)
}
/**
* Gets all access paths that contain the synthetic fields
* from `ap` in reverse order (if `ap` contains at least one synthetic field).
* These are the possible candidates for synthetic path continuations.
*/
private PropagateContentFlow::AccessPath getSyntheticPathCandidate(
PropagateContentFlow::AccessPath ap
) {
getHash(ap) = getReversedHash(result)
}
/**
@@ -737,7 +754,7 @@ module MakeModelGenerator<
exists(PropagateContentFlow::AccessPath mid, Type midType |
hasSyntheticContent(mid) and
step(t, read, midType, mid) and
reachesSynthExit(midType, mid.reverse())
reachesSynthExit(midType, getSyntheticPathCandidate(mid))
)
}
@@ -753,7 +770,7 @@ module MakeModelGenerator<
exists(PropagateContentFlow::AccessPath mid, Type midType |
hasSyntheticContent(mid) and
step(midType, mid, t, store) and
synthEntryReaches(midType, mid.reverse())
synthEntryReaches(midType, getSyntheticPathCandidate(mid))
)
}
@@ -782,14 +799,15 @@ module MakeModelGenerator<
Type t1, PropagateContentFlow::AccessPath read, Type t2,
PropagateContentFlow::AccessPath store
) {
synthPathEntry(t1, read, t2, store) and reachesSynthExit(t2, store.reverse())
synthPathEntry(t1, read, t2, store) and
reachesSynthExit(t2, getSyntheticPathCandidate(store))
or
exists(PropagateContentFlow::AccessPath store0 | store0.reverse() = read |
exists(PropagateContentFlow::AccessPath store0 | getSyntheticPathCandidate(store0) = read |
synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store)
or
synthEntryReaches(t1, store0) and
step(t1, read, t2, store) and
reachesSynthExit(t2, store.reverse())
reachesSynthExit(t2, getSyntheticPathCandidate(store))
)
}
}
@@ -828,26 +846,64 @@ module MakeModelGenerator<
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and
input != output and
(if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true)
(
if mentionsField(reads) or mentionsField(stores)
then lift = false and api.isRelevant()
else lift = true
)
)
}
/**
* Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to
* the return value or a parameter).
* the return value or a parameter). `lift` is true, if the model should be lifted, otherwise false.
*
* Models are lifted to the best type in case the read and store access paths do not
* contain a field or synthetic field access.
*/
string captureFlow(ContentDataFlowSummaryTargetApi api) {
exists(string input, string output, boolean lift, boolean preservesValue |
string captureFlow(ContentDataFlowSummaryTargetApi api, boolean lift) {
exists(string input, string output, boolean preservesValue |
captureFlow0(api, input, output, _, lift) and
preservesValue = max(boolean p | captureFlow0(api, input, output, p, lift)) and
result = ModelPrinting::asModel(api, input, output, preservesValue, lift)
result = ContentModelPrinting::asModel(api, input, output, preservesValue, lift)
)
}
}
/**
* Gets the summary model(s) for `api`, if any. `lift` is true if the model is lifted
* otherwise false.
* The following heuristic is applied:
* 1. If content based flow yields at lease one summary for an API, then we use that.
* 2. If content based flow does not yield any summary for an API, then we try and
* generate flow summaries using the non-content based summary generator.
*/
string captureMixedFlow(DataFlowSummaryTargetApi api, boolean lift) {
result = ContentSensitive::captureFlow(api, lift)
or
not exists(ContentSensitive::captureFlow(api, lift)) and
result = captureFlow(api) and
lift = true
}
/**
* Gets the neutral summary model for `api`, if any.
* A neutral summary model is generated, if we are not generating
* a mixed summary model that applies to `api`.
*/
string captureMixedNeutral(DataFlowSummaryTargetApi api) {
not exists(DataFlowSummaryTargetApi api0, boolean lift |
exists(captureMixedFlow(api0, lift)) and
(
lift = false and api0 = api
or
lift = true and api0.lift() = api.lift()
)
) and
api.isRelevant() and
result = ModelPrinting::asNeutralSummaryModel(api)
}
/**
* A dataflow configuration used for finding new sources.
* The sources are the already known existing sources and the sinks are the API return nodes.