Merge pull request #16985 from geoffw0/madprov

C++: Support MaD alert provenance
This commit is contained in:
Geoffrey White
2024-07-17 16:25:49 +01:00
committed by GitHub
6 changed files with 137 additions and 59 deletions

View File

@@ -0,0 +1,4 @@
---
category: feature
---
* Models-as-data alert provenance information has been extended to the C/C++ language. Any qltests that include the edges relation in their output (for example, `.qlref`s that reference path-problem queries) will need to be have their expected output updated accordingly.

View File

@@ -146,7 +146,7 @@ predicate summaryModel(string row) { any(SummaryModelCsv s).row(row) }
/** Holds if a source model exists for the given parameters. */
predicate sourceModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext,
string output, string kind, string provenance
string output, string kind, string provenance, string model
) {
exists(string row |
sourceModel(row) and
@@ -160,16 +160,20 @@ predicate sourceModel(
row.splitAt(";", 6) = output and
row.splitAt(";", 7) = kind
) and
provenance = "manual"
provenance = "manual" and
model = ""
or
Extensions::sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance,
_)
exists(QlBuiltins::ExtensionId madId |
Extensions::sourceModel(namespace, type, subtypes, name, signature, ext, output, kind,
provenance, madId) and
model = "MaD:" + madId.toString()
)
}
/** Holds if a sink model exists for the given parameters. */
predicate sinkModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext,
string input, string kind, string provenance
string input, string kind, string provenance, string model
) {
exists(string row |
sinkModel(row) and
@@ -183,9 +187,14 @@ predicate sinkModel(
row.splitAt(";", 6) = input and
row.splitAt(";", 7) = kind
) and
provenance = "manual"
provenance = "manual" and
model = ""
or
Extensions::sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance, _)
exists(QlBuiltins::ExtensionId madId |
Extensions::sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance,
madId) and
model = "MaD:" + madId.toString()
)
}
/**
@@ -195,7 +204,7 @@ predicate sinkModel(
*/
private predicate summaryModel0(
string namespace, string type, boolean subtypes, string name, string signature, string ext,
string input, string output, string kind, string provenance
string input, string output, string kind, string provenance, string model
) {
exists(string row |
summaryModel(row) and
@@ -210,10 +219,14 @@ private predicate summaryModel0(
row.splitAt(";", 7) = output and
row.splitAt(";", 8) = kind
) and
provenance = "manual"
provenance = "manual" and
model = ""
or
Extensions::summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind,
provenance, _)
exists(QlBuiltins::ExtensionId madId |
Extensions::summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind,
provenance, madId) and
model = "MaD:" + madId.toString()
)
}
/**
@@ -234,19 +247,20 @@ private predicate expandInputAndOutput(
*/
predicate summaryModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext,
string input, string output, string kind, string provenance
string input, string output, string kind, string provenance, string model
) {
exists(string input0, string output0 |
summaryModel0(namespace, type, subtypes, name, signature, ext, input0, output0, kind, provenance) and
summaryModel0(namespace, type, subtypes, name, signature, ext, input0, output0, kind,
provenance, model) and
expandInputAndOutput(input0, input, output0, output,
[0 .. Private::getMaxElementContentIndirectionIndex() - 1])
)
}
private predicate relevantNamespace(string namespace) {
sourceModel(namespace, _, _, _, _, _, _, _, _) or
sinkModel(namespace, _, _, _, _, _, _, _, _) or
summaryModel(namespace, _, _, _, _, _, _, _, _, _)
sourceModel(namespace, _, _, _, _, _, _, _, _, _) or
sinkModel(namespace, _, _, _, _, _, _, _, _, _) or
summaryModel(namespace, _, _, _, _, _, _, _, _, _, _)
}
private predicate namespaceLink(string shortns, string longns) {
@@ -276,17 +290,17 @@ predicate modelCoverage(string namespace, int namespaces, string kind, string pa
part = "source" and
n =
strictcount(string subns, string type, boolean subtypes, string name, string signature,
string ext, string output, string provenance |
string ext, string output, string provenance, string model |
canonicalNamespaceLink(namespace, subns) and
sourceModel(subns, type, subtypes, name, signature, ext, output, kind, provenance)
sourceModel(subns, type, subtypes, name, signature, ext, output, kind, provenance, model)
)
or
part = "sink" and
n =
strictcount(string subns, string type, boolean subtypes, string name, string signature,
string ext, string input, string provenance |
string ext, string input, string provenance, string model |
canonicalNamespaceLink(namespace, subns) and
sinkModel(subns, type, subtypes, name, signature, ext, input, kind, provenance)
sinkModel(subns, type, subtypes, name, signature, ext, input, kind, provenance, model)
)
or
part = "summary" and
@@ -294,7 +308,7 @@ predicate modelCoverage(string namespace, int namespaces, string kind, string pa
strictcount(string subns, string type, boolean subtypes, string name, string signature,
string ext, string input, string output, string provenance |
canonicalNamespaceLink(namespace, subns) and
summaryModel(subns, type, subtypes, name, signature, ext, input, output, kind, provenance)
summaryModel(subns, type, subtypes, name, signature, ext, input, output, kind, provenance, _)
)
)
}
@@ -303,9 +317,9 @@ predicate modelCoverage(string namespace, int namespaces, string kind, string pa
module CsvValidation {
private string getInvalidModelInput() {
exists(string pred, AccessPath input, string part |
sinkModel(_, _, _, _, _, _, input, _, _) and pred = "sink"
sinkModel(_, _, _, _, _, _, input, _, _, _) and pred = "sink"
or
summaryModel(_, _, _, _, _, _, input, _, _, _) and pred = "summary"
summaryModel(_, _, _, _, _, _, input, _, _, _, _) and pred = "summary"
|
(
invalidSpecComponent(input, part) and
@@ -322,9 +336,9 @@ module CsvValidation {
private string getInvalidModelOutput() {
exists(string pred, string output, string part |
sourceModel(_, _, _, _, _, _, output, _, _) and pred = "source"
sourceModel(_, _, _, _, _, _, output, _, _, _) and pred = "source"
or
summaryModel(_, _, _, _, _, _, _, output, _, _) and pred = "summary"
summaryModel(_, _, _, _, _, _, _, output, _, _, _) and pred = "summary"
|
invalidSpecComponent(output, part) and
not part = "" and
@@ -334,11 +348,11 @@ module CsvValidation {
}
private module KindValConfig implements SharedModelVal::KindValidationConfigSig {
predicate summaryKind(string kind) { summaryModel(_, _, _, _, _, _, _, _, kind, _) }
predicate summaryKind(string kind) { summaryModel(_, _, _, _, _, _, _, _, kind, _, _) }
predicate sinkKind(string kind) { sinkModel(_, _, _, _, _, _, _, kind, _) }
predicate sinkKind(string kind) { sinkModel(_, _, _, _, _, _, _, kind, _, _) }
predicate sourceKind(string kind) { sourceModel(_, _, _, _, _, _, _, kind, _) }
predicate sourceKind(string kind) { sourceModel(_, _, _, _, _, _, _, kind, _, _) }
}
private module KindVal = SharedModelVal::KindValidation<KindValConfig>;
@@ -379,11 +393,11 @@ module CsvValidation {
private string getInvalidModelSignature() {
exists(string pred, string namespace, string type, string name, string signature, string ext |
sourceModel(namespace, type, _, name, signature, ext, _, _, _) and pred = "source"
sourceModel(namespace, type, _, name, signature, ext, _, _, _, _) and pred = "source"
or
sinkModel(namespace, type, _, name, signature, ext, _, _, _) and pred = "sink"
sinkModel(namespace, type, _, name, signature, ext, _, _, _, _) and pred = "sink"
or
summaryModel(namespace, type, _, name, signature, ext, _, _, _, _) and pred = "summary"
summaryModel(namespace, type, _, name, signature, ext, _, _, _, _, _) and pred = "summary"
|
not namespace.regexpMatch("[a-zA-Z0-9_\\.:]*") and
result = "Dubious namespace \"" + namespace + "\" in " + pred + " model."
@@ -415,9 +429,9 @@ module CsvValidation {
private predicate elementSpec(
string namespace, string type, boolean subtypes, string name, string signature, string ext
) {
sourceModel(namespace, type, subtypes, name, signature, ext, _, _, _) or
sinkModel(namespace, type, subtypes, name, signature, ext, _, _, _) or
summaryModel(namespace, type, subtypes, name, signature, ext, _, _, _, _)
sourceModel(namespace, type, subtypes, name, signature, ext, _, _, _, _) or
sinkModel(namespace, type, subtypes, name, signature, ext, _, _, _, _) or
summaryModel(namespace, type, subtypes, name, signature, ext, _, _, _, _, _)
}
/** Gets the fully templated version of `f`. */
@@ -867,9 +881,9 @@ private module Cached {
* model.
*/
cached
predicate sourceNode(DataFlow::Node node, string kind) {
predicate sourceNode(DataFlow::Node node, string kind, string model) {
exists(SourceSinkInterpretationInput::InterpretNode n |
isSourceNode(n, kind, _) and n.asNode() = node // TODO
isSourceNode(n, kind, model) and n.asNode() = node
)
}
@@ -878,40 +892,57 @@ private module Cached {
* model.
*/
cached
predicate sinkNode(DataFlow::Node node, string kind) {
predicate sinkNode(DataFlow::Node node, string kind, string model) {
exists(SourceSinkInterpretationInput::InterpretNode n |
isSinkNode(n, kind, _) and n.asNode() = node // TODO
isSinkNode(n, kind, model) and n.asNode() = node
)
}
}
import Cached
/**
* Holds if `node` is specified as a source with the given kind in a MaD flow
* model.
*/
predicate sourceNode(DataFlow::Node node, string kind) { sourceNode(node, kind, _) }
/**
* Holds if `node` is specified as a sink with the given kind in a MaD flow
* model.
*/
predicate sinkNode(DataFlow::Node node, string kind) { sinkNode(node, kind, _) }
private predicate interpretSummary(
Function f, string input, string output, string kind, string provenance
Function f, string input, string output, string kind, string provenance, string model
) {
exists(
string namespace, string type, boolean subtypes, string name, string signature, string ext
|
summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) and
summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance,
model) and
f = interpretElement(namespace, type, subtypes, name, signature, ext)
)
}
// adapter class for converting Mad summaries to `SummarizedCallable`s
private class SummarizedCallableAdapter extends SummarizedCallable {
SummarizedCallableAdapter() { interpretSummary(this, _, _, _, _) }
SummarizedCallableAdapter() { interpretSummary(this, _, _, _, _, _) }
private predicate relevantSummaryElementManual(string input, string output, string kind) {
private predicate relevantSummaryElementManual(
string input, string output, string kind, string model
) {
exists(Provenance provenance |
interpretSummary(this, input, output, kind, provenance) and
interpretSummary(this, input, output, kind, provenance, model) and
provenance.isManual()
)
}
private predicate relevantSummaryElementGenerated(string input, string output, string kind) {
private predicate relevantSummaryElementGenerated(
string input, string output, string kind, string model
) {
exists(Provenance provenance |
interpretSummary(this, input, output, kind, provenance) and
interpretSummary(this, input, output, kind, provenance, model) and
provenance.isGenerated()
)
}
@@ -920,18 +951,17 @@ private class SummarizedCallableAdapter extends SummarizedCallable {
string input, string output, boolean preservesValue, string model
) {
exists(string kind |
this.relevantSummaryElementManual(input, output, kind)
this.relevantSummaryElementManual(input, output, kind, model)
or
not this.relevantSummaryElementManual(_, _, _) and
this.relevantSummaryElementGenerated(input, output, kind)
not this.relevantSummaryElementManual(_, _, _, _) and
this.relevantSummaryElementGenerated(input, output, kind, model)
|
if kind = "value" then preservesValue = true else preservesValue = false
) and
model = "" // TODO
)
}
override predicate hasProvenance(Provenance provenance) {
interpretSummary(this, _, _, _, provenance)
interpretSummary(this, _, _, _, provenance, _)
}
}

View File

@@ -112,9 +112,8 @@ module SourceSinkInterpretationInput implements
exists(
string namespace, string type, boolean subtypes, string name, string signature, string ext
|
sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance) and
e = interpretElement(namespace, type, subtypes, name, signature, ext) and
model = "" // TODO
sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance, model) and
e = interpretElement(namespace, type, subtypes, name, signature, ext)
)
}
@@ -128,9 +127,8 @@ module SourceSinkInterpretationInput implements
exists(
string package, string type, boolean subtypes, string name, string signature, string ext
|
sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance) and
e = interpretElement(package, type, subtypes, name, signature, ext) and
model = "" // TODO
sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance, model) and
e = interpretElement(package, type, subtypes, name, signature, ext)
)
}

View File

@@ -11,6 +11,7 @@ private import Node0ToString
private import ModelUtil
private import semmle.code.cpp.models.interfaces.FunctionInputsAndOutputs as IO
private import semmle.code.cpp.models.interfaces.DataFlow as DF
private import semmle.code.cpp.dataflow.ExternalFlow as External
cached
private module Cached {
@@ -1333,9 +1334,9 @@ predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) {
/** Extra data-flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
predicate knownSourceModel(Node source, string model) { none() }
predicate knownSourceModel(Node source, string model) { External::sourceNode(source, _, model) }
predicate knownSinkModel(Node sink, string model) { none() }
predicate knownSinkModel(Node sink, string model) { External::sinkNode(sink, _, model) }
/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a

View File

@@ -1,2 +1,46 @@
testFailures
failures
edges
| asio_streams.cpp:56:18:56:23 | [summary param] *0 in buffer | asio_streams.cpp:56:18:56:23 | [summary] to write: ReturnValue in buffer | provenance | MaD:10 |
| asio_streams.cpp:87:34:87:44 | read_until output argument | asio_streams.cpp:91:7:91:17 | recv_buffer | provenance | Src:MaD:2 |
| asio_streams.cpp:87:34:87:44 | read_until output argument | asio_streams.cpp:93:29:93:39 | *recv_buffer | provenance | Src:MaD:2 Sink:MaD:6 |
| asio_streams.cpp:97:37:97:44 | call to source | asio_streams.cpp:98:7:98:14 | send_str | provenance | TaintFunction |
| asio_streams.cpp:97:37:97:44 | call to source | asio_streams.cpp:100:64:100:71 | *send_str | provenance | TaintFunction |
| asio_streams.cpp:100:44:100:62 | call to buffer | asio_streams.cpp:100:44:100:62 | call to buffer | provenance | |
| asio_streams.cpp:100:44:100:62 | call to buffer | asio_streams.cpp:101:7:101:17 | send_buffer | provenance | |
| asio_streams.cpp:100:44:100:62 | call to buffer | asio_streams.cpp:103:29:103:39 | *send_buffer | provenance | Sink:MaD:6 |
| asio_streams.cpp:100:64:100:71 | *send_str | asio_streams.cpp:56:18:56:23 | [summary param] *0 in buffer | provenance | |
| asio_streams.cpp:100:64:100:71 | *send_str | asio_streams.cpp:100:44:100:62 | call to buffer | provenance | MaD:10 |
| test.cpp:4:5:4:11 | [summary param] 0 in ymlStep | test.cpp:4:5:4:11 | [summary] to write: ReturnValue in ymlStep | provenance | MaD:2 |
| test.cpp:7:10:7:18 | call to ymlSource | test.cpp:7:10:7:18 | call to ymlSource | provenance | Src:MaD:0 |
| test.cpp:7:10:7:18 | call to ymlSource | test.cpp:11:10:11:10 | x | provenance | Sink:MaD:1 |
| test.cpp:7:10:7:18 | call to ymlSource | test.cpp:13:18:13:18 | x | provenance | |
| test.cpp:13:10:13:16 | call to ymlStep | test.cpp:13:10:13:16 | call to ymlStep | provenance | |
| test.cpp:13:10:13:16 | call to ymlStep | test.cpp:15:10:15:10 | y | provenance | Sink:MaD:1 |
| test.cpp:13:18:13:18 | x | test.cpp:4:5:4:11 | [summary param] 0 in ymlStep | provenance | |
| test.cpp:13:18:13:18 | x | test.cpp:13:10:13:16 | call to ymlStep | provenance | MaD:2 |
nodes
| asio_streams.cpp:56:18:56:23 | [summary param] *0 in buffer | semmle.label | [summary param] *0 in buffer |
| asio_streams.cpp:56:18:56:23 | [summary] to write: ReturnValue in buffer | semmle.label | [summary] to write: ReturnValue in buffer |
| asio_streams.cpp:87:34:87:44 | read_until output argument | semmle.label | read_until output argument |
| asio_streams.cpp:91:7:91:17 | recv_buffer | semmle.label | recv_buffer |
| asio_streams.cpp:93:29:93:39 | *recv_buffer | semmle.label | *recv_buffer |
| asio_streams.cpp:97:37:97:44 | call to source | semmle.label | call to source |
| asio_streams.cpp:98:7:98:14 | send_str | semmle.label | send_str |
| asio_streams.cpp:100:44:100:62 | call to buffer | semmle.label | call to buffer |
| asio_streams.cpp:100:44:100:62 | call to buffer | semmle.label | call to buffer |
| asio_streams.cpp:100:64:100:71 | *send_str | semmle.label | *send_str |
| asio_streams.cpp:101:7:101:17 | send_buffer | semmle.label | send_buffer |
| asio_streams.cpp:103:29:103:39 | *send_buffer | semmle.label | *send_buffer |
| test.cpp:4:5:4:11 | [summary param] 0 in ymlStep | semmle.label | [summary param] 0 in ymlStep |
| test.cpp:4:5:4:11 | [summary] to write: ReturnValue in ymlStep | semmle.label | [summary] to write: ReturnValue in ymlStep |
| test.cpp:7:10:7:18 | call to ymlSource | semmle.label | call to ymlSource |
| test.cpp:7:10:7:18 | call to ymlSource | semmle.label | call to ymlSource |
| test.cpp:11:10:11:10 | x | semmle.label | x |
| test.cpp:13:10:13:16 | call to ymlStep | semmle.label | call to ymlStep |
| test.cpp:13:10:13:16 | call to ymlStep | semmle.label | call to ymlStep |
| test.cpp:13:18:13:18 | x | semmle.label | x |
| test.cpp:15:10:15:10 | y | semmle.label | y |
subpaths
| asio_streams.cpp:100:64:100:71 | *send_str | asio_streams.cpp:56:18:56:23 | [summary param] *0 in buffer | asio_streams.cpp:56:18:56:23 | [summary] to write: ReturnValue in buffer | asio_streams.cpp:100:44:100:62 | call to buffer |
| test.cpp:13:18:13:18 | x | test.cpp:4:5:4:11 | [summary param] 0 in ymlStep | test.cpp:4:5:4:11 | [summary] to write: ReturnValue in ymlStep | test.cpp:13:10:13:16 | call to ymlStep |

View File

@@ -1,6 +1,7 @@
import TestUtilities.dataflow.FlowTestCommon
import cpp
import semmle.code.cpp.security.FlowSources
import IRTest::IRFlow::PathGraph
module IRTest {
private import semmle.code.cpp.ir.IR