Java: Update flow summary impl and external flow to support provenance and include testing in Csv model validation.

This commit is contained in:
Michael Nebel
2022-06-13 10:56:15 +02:00
parent 649757c27f
commit 4622b69c5d
2 changed files with 57 additions and 38 deletions

View File

@@ -62,6 +62,10 @@
* sources "remote" indicates a default remote flow source, and for summaries * sources "remote" indicates a default remote flow source, and for summaries
* "taint" indicates a default additional taint step and "value" indicates a * "taint" indicates a default additional taint step and "value" indicates a
* globally applicable value-preserving step. * globally applicable value-preserving step.
* 9. The `provenance` column is tag to indicate the origin of the summary.
* There are two supported values: "generated" and "manual". "generated" means that
* the model has been emitted by the model generator tool and "manual" means
* that the model has been written by hand.
*/ */
import java import java
@@ -415,17 +419,10 @@ private predicate summaryModel(string row) {
any(SummaryModelCsv s).row(row) any(SummaryModelCsv s).row(row)
} }
bindingset[input]
private predicate getKind(string input, string kind, boolean generated) {
input.splitAt(":", 0) = "generated" and kind = input.splitAt(":", 1) and generated = true
or
not input.matches("%:%") and kind = input and generated = false
}
/** Holds if a source model exists for the given parameters. */ /** Holds if a source model exists for the given parameters. */
predicate sourceModel( predicate sourceModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext, string namespace, string type, boolean subtypes, string name, string signature, string ext,
string output, string kind, boolean generated string output, string kind, string provenance
) { ) {
exists(string row | exists(string row |
sourceModel(row) and sourceModel(row) and
@@ -437,14 +434,15 @@ predicate sourceModel(
row.splitAt(";", 4) = signature and row.splitAt(";", 4) = signature and
row.splitAt(";", 5) = ext and row.splitAt(";", 5) = ext and
row.splitAt(";", 6) = output and row.splitAt(";", 6) = output and
exists(string k | row.splitAt(";", 7) = k and getKind(k, kind, generated)) row.splitAt(";", 7) = kind and
row.splitAt(";", 8) = provenance
) )
} }
/** Holds if a sink model exists for the given parameters. */ /** Holds if a sink model exists for the given parameters. */
predicate sinkModel( predicate sinkModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext, string namespace, string type, boolean subtypes, string name, string signature, string ext,
string input, string kind, boolean generated string input, string kind, string provenance
) { ) {
exists(string row | exists(string row |
sinkModel(row) and sinkModel(row) and
@@ -456,22 +454,23 @@ predicate sinkModel(
row.splitAt(";", 4) = signature and row.splitAt(";", 4) = signature and
row.splitAt(";", 5) = ext and row.splitAt(";", 5) = ext and
row.splitAt(";", 6) = input and row.splitAt(";", 6) = input and
exists(string k | row.splitAt(";", 7) = k and getKind(k, kind, generated)) row.splitAt(";", 7) = kind and
row.splitAt(";", 8) = provenance
) )
} }
/** Holds if a summary model exists for the given parameters. */ /** Holds if a summary model exists for the given parameters. */
predicate summaryModel( predicate summaryModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext, string namespace, string type, boolean subtypes, string name, string signature, string ext,
string input, string output, string kind, boolean generated string input, string output, string kind, string provenance
) { ) {
summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, generated, _) summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance, _)
} }
/** Holds if a summary model `row` exists for the given parameters. */ /** Holds if a summary model `row` exists for the given parameters. */
predicate summaryModel( predicate summaryModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext, string namespace, string type, boolean subtypes, string name, string signature, string ext,
string input, string output, string kind, boolean generated, string row string input, string output, string kind, string provenance, string row
) { ) {
summaryModel(row) and summaryModel(row) and
row.splitAt(";", 0) = namespace and row.splitAt(";", 0) = namespace and
@@ -483,7 +482,8 @@ predicate summaryModel(
row.splitAt(";", 5) = ext and row.splitAt(";", 5) = ext and
row.splitAt(";", 6) = input and row.splitAt(";", 6) = input and
row.splitAt(";", 7) = output and row.splitAt(";", 7) = output and
exists(string k | row.splitAt(";", 8) = k and getKind(k, kind, generated)) row.splitAt(";", 8) = kind and
row.splitAt(";", 9) = provenance
} }
private predicate relevantPackage(string package) { private predicate relevantPackage(string package) {
@@ -517,25 +517,25 @@ predicate modelCoverage(string package, int pkgs, string kind, string part, int
part = "source" and part = "source" and
n = n =
strictcount(string subpkg, string type, boolean subtypes, string name, string signature, strictcount(string subpkg, string type, boolean subtypes, string name, string signature,
string ext, string output, boolean generated | string ext, string output, string provenance |
canonicalPkgLink(package, subpkg) and canonicalPkgLink(package, subpkg) and
sourceModel(subpkg, type, subtypes, name, signature, ext, output, kind, generated) sourceModel(subpkg, type, subtypes, name, signature, ext, output, kind, provenance)
) )
or or
part = "sink" and part = "sink" and
n = n =
strictcount(string subpkg, string type, boolean subtypes, string name, string signature, strictcount(string subpkg, string type, boolean subtypes, string name, string signature,
string ext, string input, boolean generated | string ext, string input, string provenance |
canonicalPkgLink(package, subpkg) and canonicalPkgLink(package, subpkg) and
sinkModel(subpkg, type, subtypes, name, signature, ext, input, kind, generated) sinkModel(subpkg, type, subtypes, name, signature, ext, input, kind, provenance)
) )
or or
part = "summary" and part = "summary" and
n = n =
strictcount(string subpkg, string type, boolean subtypes, string name, string signature, strictcount(string subpkg, string type, boolean subtypes, string name, string signature,
string ext, string input, string output, boolean generated | string ext, string input, string output, string provenance |
canonicalPkgLink(package, subpkg) and canonicalPkgLink(package, subpkg) and
summaryModel(subpkg, type, subtypes, name, signature, ext, input, output, kind, generated) summaryModel(subpkg, type, subtypes, name, signature, ext, input, output, kind, provenance)
) )
) )
} }
@@ -544,12 +544,16 @@ predicate modelCoverage(string package, int pkgs, string kind, string part, int
module CsvValidation { module CsvValidation {
/** Holds if some row in a CSV-based flow model appears to contain typos. */ /** Holds if some row in a CSV-based flow model appears to contain typos. */
query predicate invalidModelRow(string msg) { query predicate invalidModelRow(string msg) {
exists(string pred, string namespace, string type, string name, string signature, string ext | exists(
sourceModel(namespace, type, _, name, signature, ext, _, _, _) and pred = "source" string pred, string namespace, string type, string name, string signature, string ext,
string provenance
|
sourceModel(namespace, type, _, name, signature, ext, _, _, provenance) and pred = "source"
or or
sinkModel(namespace, type, _, name, signature, ext, _, _, _) and pred = "sink" sinkModel(namespace, type, _, name, signature, ext, _, _, provenance) and pred = "sink"
or or
summaryModel(namespace, type, _, name, signature, ext, _, _, _, _) and pred = "summary" summaryModel(namespace, type, _, name, signature, ext, _, _, _, provenance) and
pred = "summary"
| |
not namespace.regexpMatch("[a-zA-Z0-9_\\.]+") and not namespace.regexpMatch("[a-zA-Z0-9_\\.]+") and
msg = "Dubious namespace \"" + namespace + "\" in " + pred + " model." msg = "Dubious namespace \"" + namespace + "\" in " + pred + " model."
@@ -565,6 +569,9 @@ module CsvValidation {
or or
not ext.regexpMatch("|Annotated") and not ext.regexpMatch("|Annotated") and
msg = "Unrecognized extra API graph element \"" + ext + "\" in " + pred + " model." msg = "Unrecognized extra API graph element \"" + ext + "\" in " + pred + " model."
or
not provenance = ["manual", "generated"] and
msg = "Unrecognized provenance description \"" + provenance + "\" in " + pred + " model."
) )
or or
exists(string pred, string input, string part | exists(string pred, string input, string part |
@@ -596,18 +603,18 @@ module CsvValidation {
) )
or or
exists(string pred, string row, int expect | exists(string pred, string row, int expect |
sourceModel(row) and expect = 8 and pred = "source" sourceModel(row) and expect = 9 and pred = "source"
or or
sinkModel(row) and expect = 8 and pred = "sink" sinkModel(row) and expect = 9 and pred = "sink"
or or
summaryModel(row) and expect = 9 and pred = "summary" summaryModel(row) and expect = 10 and pred = "summary"
| |
exists(int cols | exists(int cols |
cols = 1 + max(int n | exists(row.splitAt(";", n))) and cols = 1 + max(int n | exists(row.splitAt(";", n))) and
cols != expect and cols != expect and
msg = msg =
"Wrong number of columns in " + pred + " model row, expected " + expect + ", got " + cols + "Wrong number of columns in " + pred + " model row, expected " + expect + ", got " + cols +
"." " in " + row + "."
) )
or or
exists(string b | exists(string b |
@@ -617,9 +624,8 @@ module CsvValidation {
) )
) )
or or
exists(string row, string k, string kind | summaryModel(row) | exists(string row, string kind | summaryModel(row) |
k = row.splitAt(";", 8) and kind = row.splitAt(";", 8) and
getKind(k, kind, _) and
not kind = ["taint", "value"] and not kind = ["taint", "value"] and
msg = "Invalid kind \"" + kind + "\" in summary model." msg = "Invalid kind \"" + kind + "\" in summary model."
) )

View File

@@ -55,6 +55,13 @@ DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) {
exists(rk) exists(rk)
} }
bindingset[provenance]
private boolean isGenerated(string provenance) {
provenance = "generated" and result = true
or
provenance != "generated" and result = false
}
/** /**
* Holds if an external flow summary exists for `c` with input specification * Holds if an external flow summary exists for `c` with input specification
* `input`, output specification `output`, kind `kind`, and a flag `generated` * `input`, output specification `output`, kind `kind`, and a flag `generated`
@@ -62,9 +69,11 @@ DataFlowType getCallbackReturnType(DataFlowType t, ReturnKind rk) {
*/ */
predicate summaryElement(Callable c, string input, string output, string kind, boolean generated) { predicate summaryElement(Callable c, string input, string output, string kind, boolean generated) {
exists( exists(
string namespace, string type, boolean subtypes, string name, string signature, string ext string namespace, string type, boolean subtypes, string name, string signature, string ext,
string provenance
| |
summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, generated) and summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) and
generated = isGenerated(provenance) and
c = interpretElement(namespace, type, subtypes, name, signature, ext) c = interpretElement(namespace, type, subtypes, name, signature, ext)
) )
} }
@@ -122,9 +131,11 @@ class SourceOrSinkElement = Top;
*/ */
predicate sourceElement(SourceOrSinkElement e, string output, string kind, boolean generated) { predicate sourceElement(SourceOrSinkElement e, string output, string kind, boolean generated) {
exists( exists(
string namespace, string type, boolean subtypes, string name, string signature, string ext string namespace, string type, boolean subtypes, string name, string signature, string ext,
string provenance
| |
sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, generated) and sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance) and
generated = isGenerated(provenance) and
e = interpretElement(namespace, type, subtypes, name, signature, ext) e = interpretElement(namespace, type, subtypes, name, signature, ext)
) )
} }
@@ -136,9 +147,11 @@ predicate sourceElement(SourceOrSinkElement e, string output, string kind, boole
*/ */
predicate sinkElement(SourceOrSinkElement e, string input, string kind, boolean generated) { predicate sinkElement(SourceOrSinkElement e, string input, string kind, boolean generated) {
exists( exists(
string namespace, string type, boolean subtypes, string name, string signature, string ext string namespace, string type, boolean subtypes, string name, string signature, string ext,
string provenance
| |
sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, generated) and sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance) and
generated = isGenerated(provenance) and
e = interpretElement(namespace, type, subtypes, name, signature, ext) e = interpretElement(namespace, type, subtypes, name, signature, ext)
) )
} }