Java: add draft of generated vs manual MaD metrics query

This commit is contained in:
Jami Cogswell
2022-12-06 22:15:19 -05:00
parent 5d43c431c0
commit b82f9b1911
7 changed files with 286 additions and 6 deletions

View File

@@ -241,9 +241,19 @@ module Public {
}
/**
* Holds if the summary is auto generated.
* Holds if the summary is auto generated and not manually generated.
*/
predicate isAutoGenerated() { none() }
/**
* Holds if the summary is manually generated and not auto generated.
*/
predicate isManuallyGenerated() { none() }
/**
* Holds if the summary is both auto generated and manually generated.
*/
predicate isBothAutoAndManuallyGenerated() { none() }
}
/** A callable with a flow summary stating there is no flow via the callable. */
@@ -991,6 +1001,20 @@ module Private {
not summaryElement(this, _, _, _, false)
}
private predicate relevantSummaryElementManual(
AccessPath inSpec, AccessPath outSpec, string kind
) {
summaryElement(this, inSpec, outSpec, kind, false) and
not summaryElement(this, _, _, _, true)
}
private predicate relevantSummaryElementBothGeneratedAndManual(
AccessPath inSpec, AccessPath outSpec, string kind
) {
summaryElement(this, inSpec, outSpec, kind, true) and
summaryElement(this, inSpec, outSpec, kind, false)
}
private predicate relevantSummaryElement(AccessPath inSpec, AccessPath outSpec, string kind) {
summaryElement(this, inSpec, outSpec, kind, false)
or
@@ -1012,6 +1036,12 @@ module Private {
}
override predicate isAutoGenerated() { this.relevantSummaryElementGenerated(_, _, _) }
override predicate isManuallyGenerated() { this.relevantSummaryElementManual(_, _, _) }
override predicate isBothAutoAndManuallyGenerated() {
this.relevantSummaryElementBothGeneratedAndManual(_, _, _)
}
}
/** Holds if component `c` of specification `spec` cannot be parsed. */

View File

@@ -0,0 +1,100 @@
/**
* @id java/summary/generated-vs-manual-coverage
* @name Metrics of generated versus manual MaD coverage
* @description Expose metrics for the number of API endpoints covered by generated versus manual MaD models.
* @kind table
* @tags summary
*/
//import java // not needed I guess
import semmle.code.java.dataflow.FlowSummary // for SummarizedCallable
import utils.modelgenerator.internal.CaptureModels // for DataFlowTargetApi
// ! improve QLDoc?
/**
* A callable for a given library that is modeled by MaD.
* Specifically, this callable is the intersection of
* DataFlowTargetApis and SummarizedCallables.
*/
class MadModeledCallable extends SummarizedCallableBase {
// ! better name for this class?
MadModeledCallable() {
this instanceof SummarizedCallable and
exists(DataFlowTargetApi dataFlowTargApi |
this.asCallable() = dataFlowTargApi and
not exists(FunctionalExpr funcExpr | dataFlowTargApi = funcExpr.asMethod()) // ! remove this if DataFlowTargetApi itself is adjusted to exclude FunctionalExpr (see static-team slack thread)
)
}
}
// ! move to other file
/**
* Returns the number of APIs with MaD models
* for a given package and provenance.
*/
float getNumMadModels(string package, string provenance) {
exists(MadModeledCallable mc |
package = mc.asCallable().getDeclaringType().getPackage().toString() and
provenance in ["generated", "manual", "both"]
|
result =
count(MadModeledCallable c |
package = c.asCallable().getDeclaringType().getPackage().toString() and
(
c.(SummarizedCallable).isAutoGenerated() and // generated and NOT manual = "auto-only"
provenance = "generated"
or
c.(SummarizedCallable).isManuallyGenerated() and // manual and NOT generated = "manual-only"
provenance = "manual"
or
c.(SummarizedCallable).isBothAutoAndManuallyGenerated() and // BOTH generated and manual = "both"
provenance = "both"
)
)
)
}
// ! move to other file
/**
* Returns the number of APIs without MaD
* models for a given package.
*/
float getNumApisWithoutMadModel(string package) {
exists(DataFlowTargetApi dataFlowTargApi |
package = dataFlowTargApi.getDeclaringType().getPackage().toString() and
not exists(FunctionalExpr fe | dataFlowTargApi = fe.asMethod()) // remove lambdas // ! remove this if DataFlowTargetApi itself is adjusted to exclude FunctionalExpr (see static-team slack thread)
|
result =
count(DataFlowTargetApi d |
package = d.getDeclaringType().getPackage().toString() and
not exists(FunctionalExpr funcExpr | d = funcExpr.asMethod()) and // remove lambdas // ! remove this if DataFlowTargetApi itself is adjusted to exclude FunctionalExpr (see static-team slack thread)
not exists(SummarizedCallable sc | d = sc.asCallable()) // set minus with SummarizedCallables
)
)
}
// ! Note: adjust metric formulas as needed after more discussion with Yorck
/*
* metric1:
* Proportion of manual models covered by automation: “both” / (“both” + “manual-only”)
* Auto-generated vs all positive manual (percentage of manual models covered by auto-generation)
*/
/*
* metric2:
* Coverage relative to total number of APIs: (“auto-only” + “both” + “manual-only”) / “all”
* Auto-generated vs specific pos+neg subset (top-N manual, random)
*/
from
string package, float generated, float manual, float both, float notModeled, float all,
float metric1, float metric2
where
generated = getNumMadModels(package, "generated") and
manual = getNumMadModels(package, "manual") and
both = getNumMadModels(package, "both") and
notModeled = getNumApisWithoutMadModel(package) and // ! better name for this?, "none" is a reserved keyword :(
all = generated + manual + both + notModeled and
metric1 = (both / (both + manual)) and
metric2 = (generated + both + manual) / all
select package, generated, manual, both, notModeled, all, metric1, metric2 order by package