Remove CSV model infrastructure from ExternalFlow.qll

Remove SourceModelCsv, SinkModelCsv, SummaryModelCsv classes,
single-argument CSV predicates, CSV parsing in MadInput, and
CSV-specific validation checks. Simplify MadInput to only contain
the namespace separator. Convert test models to .ext.yml format.

Agent-Logs-Url: https://github.com/github/codeql/sessions/89ff81fe-5585-446d-99e2-6fe6966495c5

Co-authored-by: owen-mc <62447351+owen-mc@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-03-26 15:07:39 +00:00
committed by GitHub
parent 64a52ba07f
commit d69bcca687
4 changed files with 94 additions and 256 deletions

View File

@@ -1,4 +1,4 @@
---
category: minorAnalysis
category: breaking
---
* ZeroMQ and `getc`-family models have been migrated from inline CSV specifications in QL files to `.model.yml` data extension files in the `ext/` directory.
* ZeroMQ and `getc`-family models have been migrated from inline CSV specifications in QL files to `.model.yml` data extension files in the `ext/` directory. The `SourceModelCsv`, `SinkModelCsv`, and `SummaryModelCsv` classes and the associated CSV parsing infrastructure have been removed from `ExternalFlow.qll`. New models should be added as `.model.yml` files in the `ext/` directory.

View File

@@ -1,9 +1,10 @@
/**
* INTERNAL use only. This is an experimental API subject to change without notice.
*
* Provides classes and predicates for dealing with flow models specified in CSV format.
* Provides classes and predicates for dealing with flow models specified
* in data extension files.
*
* The CSV specification has the following columns:
* The specification has the following columns:
* - Sources:
* `namespace; type; subtypes; name; signature; ext; output; kind`
* - Sinks:
@@ -104,117 +105,9 @@ private import internal.FlowSummaryImpl::Private
private import internal.FlowSummaryImpl::Private::External
private import internal.ExternalFlowExtensions::Extensions as Extensions
private import codeql.mad.ModelValidation as SharedModelVal
private import codeql.util.Unit
private import codeql.mad.static.ModelsAsData as SharedMaD
/**
* A unit class for adding additional source model rows.
*
* Extend this class to add additional source definitions.
*/
class SourceModelCsv extends Unit {
/** Holds if `row` specifies a source definition. */
abstract predicate row(string row);
}
/**
* A unit class for adding additional sink model rows.
*
* Extend this class to add additional sink definitions.
*/
class SinkModelCsv extends Unit {
/** Holds if `row` specifies a sink definition. */
abstract predicate row(string row);
}
/**
* A unit class for adding additional summary model rows.
*
* Extend this class to add additional flow summary definitions.
*/
class SummaryModelCsv extends Unit {
/** Holds if `row` specifies a summary definition. */
abstract predicate row(string row);
}
/** Holds if `row` is a source model. */
predicate sourceModel(string row) { any(SourceModelCsv s).row(row) }
/** Holds if `row` is a sink model. */
predicate sinkModel(string row) { any(SinkModelCsv s).row(row) }
/** Holds if `row` is a summary model. */
predicate summaryModel(string row) { any(SummaryModelCsv s).row(row) }
private module MadInput implements SharedMaD::InputSig {
/** Holds if a source model exists for the given parameters. */
predicate additionalSourceModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext,
string output, string kind, string provenance, string model
) {
exists(string row |
sourceModel(row) and
row.splitAt(";", 0) = namespace and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = subtypes.toString() and
subtypes = [true, false] and
row.splitAt(";", 3) = name and
row.splitAt(";", 4) = signature and
row.splitAt(";", 5) = ext and
row.splitAt(";", 6) = output and
row.splitAt(";", 7) = kind
) and
provenance = "manual" and
model = ""
}
/** Holds if a sink model exists for the given parameters. */
predicate additionalSinkModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext,
string input, string kind, string provenance, string model
) {
exists(string row |
sinkModel(row) and
row.splitAt(";", 0) = namespace and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = subtypes.toString() and
subtypes = [true, false] and
row.splitAt(";", 3) = name and
row.splitAt(";", 4) = signature and
row.splitAt(";", 5) = ext and
row.splitAt(";", 6) = input and
row.splitAt(";", 7) = kind
) and
provenance = "manual" and
model = ""
}
/**
* Holds if a summary model exists for the given parameters.
*
* This predicate does not expand `@` to `*`s.
*/
predicate additionalSummaryModel(
string namespace, string type, boolean subtypes, string name, string signature, string ext,
string input, string output, string kind, string provenance, string model
) {
exists(string row |
summaryModel(row) and
row.splitAt(";", 0) = namespace and
row.splitAt(";", 1) = type and
row.splitAt(";", 2) = subtypes.toString() and
subtypes = [true, false] and
row.splitAt(";", 3) = name and
row.splitAt(";", 4) = signature and
row.splitAt(";", 5) = ext and
row.splitAt(";", 6) = input and
row.splitAt(";", 7) = output and
row.splitAt(";", 8) = kind
) and
provenance = "manual" and
model = ""
}
string namespaceSegmentSeparator() { result = "::" }
}
@@ -250,7 +143,7 @@ predicate summaryModel(
)
}
/** Provides a query predicate to check the CSV data for validation errors. */
/** Provides a query predicate to check the data for validation errors. */
module CsvValidation {
private string getInvalidModelInput() {
exists(string pred, AccessPath input, string part |
@@ -294,40 +187,6 @@ module CsvValidation {
private module KindVal = SharedModelVal::KindValidation<KindValConfig>;
private string getInvalidModelSubtype() {
exists(string pred, string row |
sourceModel(row) and pred = "source"
or
sinkModel(row) and pred = "sink"
or
summaryModel(row) and pred = "summary"
|
exists(string b |
b = row.splitAt(";", 2) and
not b = ["true", "false"] and
result = "Invalid boolean \"" + b + "\" in " + pred + " model."
)
)
}
private string getInvalidModelColumnCount() {
exists(string pred, string row, int expect |
sourceModel(row) and expect = 8 and pred = "source"
or
sinkModel(row) and expect = 8 and pred = "sink"
or
summaryModel(row) and expect = 9 and pred = "summary"
|
exists(int cols |
cols = 1 + max(int n | exists(row.splitAt(";", n))) and
cols != expect and
result =
"Wrong number of columns in " + pred + " model row, expected " + expect + ", got " + cols +
"."
)
)
}
private string getInvalidModelSignature() {
exists(string pred, string namespace, string type, string name, string signature, string ext |
sourceModel(namespace, type, _, name, signature, ext, _, _, _, _) and pred = "source"
@@ -366,13 +225,12 @@ module CsvValidation {
)
}
/** Holds if some row in a CSV-based flow model appears to contain typos. */
/** Holds if some row in a MaD flow model appears to contain typos. */
query predicate invalidModelRow(string msg) {
msg =
[
getInvalidModelSignature(), getInvalidModelInput(), getInvalidModelOutput(),
getInvalidModelSubtype(), getInvalidModelColumnCount(), KindVal::getInvalidModelKind(),
getIncorrectConstructorSummaryOutput()
KindVal::getInvalidModelKind(), getIncorrectConstructorSummaryOutput()
]
}
}
@@ -1026,7 +884,7 @@ private module Cached {
}
/**
* Holds if `node` is specified as a source with the given kind in a CSV flow
* Holds if `node` is specified as a source with the given kind in a MaD flow
* model.
*/
cached
@@ -1037,7 +895,7 @@ private module Cached {
}
/**
* Holds if `node` is specified as a sink with the given kind in a CSV flow
* Holds if `node` is specified as a sink with the given kind in a MaD flow
* model.
*/
cached

View File

@@ -0,0 +1,84 @@
extensions:
- addsTo:
pack: codeql/cpp-all
extensible: sourceModel
data: # namespace, type, subtypes, name, signature, ext, output, kind, provenance
- ["", "", False, "localMadSource", "", "", "ReturnValue", "local", "manual"]
- ["", "", False, "remoteMadSource", "", "", "ReturnValue", "remote", "manual"]
- ["", "", False, "localMadSourceVoid", "", "", "ReturnValue", "local", "manual"]
- ["", "", False, "localMadSourceHasBody", "", "", "ReturnValue", "local", "manual"]
- ["", "", False, "remoteMadSourceIndirect", "", "", "ReturnValue[*]", "remote", "manual"]
- ["", "", False, "remoteMadSourceDoubleIndirect", "", "", "ReturnValue[**]", "remote", "manual"]
- ["", "", False, "remoteMadSourceIndirectArg0", "", "", "Argument[*0]", "remote", "manual"]
- ["", "", False, "remoteMadSourceIndirectArg1", "", "", "Argument[*1]", "remote", "manual"]
- ["", "", False, "remoteMadSourceVar", "", "", "", "remote", "manual"]
- ["", "", False, "remoteMadSourceVarIndirect", "", "", "*", "remote", "manual"]
- ["", "", False, "remoteMadSourceParam0", "", "", "Parameter[0]", "remote", "manual"]
- ["MyNamespace", "", False, "namespaceLocalMadSource", "", "", "ReturnValue", "local", "manual"]
- ["MyNamespace", "", False, "namespaceLocalMadSourceVar", "", "", "", "local", "manual"]
- ["MyNamespace::MyNamespace2", "", False, "namespace2LocalMadSource", "", "", "ReturnValue", "local", "manual"]
- ["", "MyClass", True, "memberRemoteMadSource", "", "", "ReturnValue", "remote", "manual"]
- ["", "MyClass", True, "memberRemoteMadSourceIndirectArg0", "", "", "Argument[*0]", "remote", "manual"]
- ["", "MyClass", True, "memberRemoteMadSourceVar", "", "", "", "remote", "manual"]
- ["", "MyClass", True, "subtypeRemoteMadSource1", "", "", "ReturnValue", "remote", "manual"]
- ["", "MyClass", False, "subtypeNonSource", "", "", "ReturnValue", "remote", "manual"]
- ["", "MyClass", True, "qualifierSource", "", "", "Argument[-1]", "remote", "manual"]
- ["", "MyClass", True, "qualifierFieldSource", "", "", "Argument[-1].val", "remote", "manual"]
- ["", "MyDerivedClass", False, "subtypeRemoteMadSource2", "", "", "ReturnValue", "remote", "manual"]
- addsTo:
pack: codeql/cpp-all
extensible: sinkModel
data: # namespace, type, subtypes, name, signature, ext, input, kind, provenance
- ["", "", False, "madSinkArg0", "", "", "Argument[0]", "test-sink", "manual"]
- ["", "", False, "madSinkArg1", "", "", "Argument[1]", "test-sink", "manual"]
- ["", "", False, "madSinkArg01", "", "", "Argument[0..1]", "test-sink", "manual"]
- ["", "", False, "madSinkArg02", "", "", "Argument[0,2]", "test-sink", "manual"]
- ["", "", False, "madSinkIndirectArg0", "", "", "Argument[*0]", "test-sink", "manual"]
- ["", "", False, "madSinkDoubleIndirectArg0", "", "", "Argument[**0]", "test-sink", "manual"]
- ["", "", False, "madSinkVar", "", "", "", "test-sink", "manual"]
- ["", "", False, "madSinkVarIndirect", "", "", "*", "test-sink", "manual"]
- ["", "", False, "madSinkParam0", "", "", "Parameter[0]", "test-sink", "manual"]
- ["", "MyClass", True, "memberMadSinkArg0", "", "", "Argument[0]", "test-sink", "manual"]
- ["", "MyClass", True, "memberMadSinkVar", "", "", "", "test-sink", "manual"]
- ["", "MyClass", True, "qualifierSink", "", "", "Argument[-1]", "test-sink", "manual"]
- ["", "MyClass", True, "qualifierArg0Sink", "", "", "Argument[-1..0]", "test-sink", "manual"]
- ["", "MyClass", True, "qualifierFieldSink", "", "", "Argument[-1].val", "test-sink", "manual"]
- ["MyNamespace", "MyClass", True, "namespaceMemberMadSinkArg0", "", "", "Argument[0]", "test-sink", "manual"]
- ["MyNamespace", "MyClass", True, "namespaceStaticMemberMadSinkArg0", "", "", "Argument[0]", "test-sink", "manual"]
- ["MyNamespace", "MyClass", True, "namespaceMemberMadSinkVar", "", "", "", "test-sink", "manual"]
- ["MyNamespace", "MyClass", True, "namespaceStaticMemberMadSinkVar", "", "", "", "test-sink", "manual"]
- addsTo:
pack: codeql/cpp-all
extensible: summaryModel
data: # namespace, type, subtypes, name, signature, ext, input, output, kind, provenance
- ["", "", False, "madArg0ToReturn", "", "", "Argument[0]", "ReturnValue", "taint", "manual"]
- ["", "", False, "madArg0ToReturnIndirect", "", "", "Argument[0]", "ReturnValue[*]", "taint", "manual"]
- ["", "", False, "madArg0ToReturnValueFlow", "", "", "Argument[0]", "ReturnValue", "value", "manual"]
- ["", "", False, "madArg0IndirectToReturn", "", "", "Argument[*0]", "ReturnValue", "taint", "manual"]
- ["", "", False, "madArg0DoubleIndirectToReturn", "", "", "Argument[**0]", "ReturnValue", "taint", "manual"]
- ["", "", False, "madArg0NotIndirectToReturn", "", "", "Argument[0]", "ReturnValue", "taint", "manual"]
- ["", "", False, "madArg0ToArg1Indirect", "", "", "Argument[0]", "Argument[*1]", "taint", "manual"]
- ["", "", False, "madArg0IndirectToArg1Indirect", "", "", "Argument[*0]", "Argument[*1]", "taint", "manual"]
- ["", "", False, "madArgsComplex", "", "", "Argument[*0..1,2]", "ReturnValue", "taint", "manual"]
- ["", "", False, "madAndImplementedComplex", "", "", "Argument[2]", "ReturnValue", "taint", "manual"]
- ["", "", False, "madArgsAny", "", "", "Argument", "ReturnValue", "taint", "manual"]
- ["", "", False, "madArg0FieldToReturn", "", "", "Argument[0].Field[value]", "ReturnValue", "taint", "manual"]
- ["", "", False, "madArg0IndirectFieldToReturn", "", "", "Argument[*0].Field[value]", "ReturnValue", "taint", "manual"]
- ["", "", False, "madArg0FieldIndirectToReturn", "", "", "Argument[0].Field[*ptr]", "ReturnValue", "taint", "manual"]
- ["", "", False, "madArg0ToReturnField", "", "", "Argument[0]", "ReturnValue.Field[value]", "taint", "manual"]
- ["", "", False, "madArg0ToReturnIndirectField", "", "", "Argument[0]", "ReturnValue[*].Field[value]", "taint", "manual"]
- ["", "", False, "madArg0ToReturnFieldIndirect", "", "", "Argument[0]", "ReturnValue.Field[*ptr]", "taint", "manual"]
- ["", "", False, "madFieldToFieldVar", "", "", "Field[value]", "Field[value2]", "taint", "manual"]
- ["", "", False, "madFieldToIndirectFieldVar", "", "", "Field[value]", "Field[*ptr]", "taint", "manual"]
- ["", "", False, "madIndirectFieldToFieldVar", "", "", "", "Field[value]", "Field[value2]", "manual"]
- ["", "MyClass", True, "madArg0ToSelf", "", "", "Argument[0]", "Argument[-1]", "taint", "manual"]
- ["", "MyClass", True, "madSelfToReturn", "", "", "Argument[-1]", "ReturnValue", "taint", "manual"]
- ["", "MyClass", True, "madArg0ToField", "", "", "Argument[0]", "Argument[-1].Field[val]", "taint", "manual"]
- ["", "MyClass", True, "madFieldToReturn", "", "", "Argument[-1].Field[val]", "ReturnValue", "taint", "manual"]
- ["MyNamespace", "MyClass", True, "namespaceMadSelfToReturn", "", "", "Argument[-1]", "ReturnValue", "taint", "manual"]
- ["", "", False, "madCallArg0ReturnToReturn", "", "", "Argument[0].ReturnValue", "ReturnValue", "value", "manual"]
- ["", "", False, "madCallArg0ReturnToReturnFirst", "", "", "Argument[0].ReturnValue", "ReturnValue.Field[first]", "value", "manual"]
- ["", "", False, "madCallArg0WithValue", "", "", "Argument[1]", "Argument[0].Parameter[0]", "value", "manual"]
- ["", "", False, "madCallReturnValueIgnoreFunction", "", "", "Argument[1]", "ReturnValue", "value", "manual"]
- ["", "StructWithTypedefInParameter<T>", True, "parameter_ref_to_return_ref", "(const T &)", "", "Argument[*0]", "ReturnValue[*]", "value", "manual"]
- ["", "", False, "receive_array", "(int[20])", "", "Argument[*0]", "ReturnValue", "taint", "manual"]

View File

@@ -1,105 +1 @@
import semmle.code.cpp.security.FlowSources
/**
* Models-as-data source models for this test.
*/
private class TestSources extends SourceModelCsv {
override predicate row(string row) {
row =
[
";;false;localMadSource;;;ReturnValue;local",
";;false;remoteMadSource;;;ReturnValue;remote",
";;false;localMadSourceVoid;;;ReturnValue;local",
";;false;localMadSourceHasBody;;;ReturnValue;local",
";;false;remoteMadSourceIndirect;;;ReturnValue[*];remote",
";;false;remoteMadSourceDoubleIndirect;;;ReturnValue[**];remote",
";;false;remoteMadSourceIndirectArg0;;;Argument[*0];remote",
";;false;remoteMadSourceIndirectArg1;;;Argument[*1];remote",
";;false;remoteMadSourceVar;;;;remote",
";;false;remoteMadSourceVarIndirect;;;*;remote", // not correctly expressed
";;false;remoteMadSourceParam0;;;Parameter[0];remote",
"MyNamespace;;false;namespaceLocalMadSource;;;ReturnValue;local",
"MyNamespace;;false;namespaceLocalMadSourceVar;;;;local",
"MyNamespace::MyNamespace2;;false;namespace2LocalMadSource;;;ReturnValue;local",
";MyClass;true;memberRemoteMadSource;;;ReturnValue;remote",
";MyClass;true;memberRemoteMadSourceIndirectArg0;;;Argument[*0];remote",
";MyClass;true;memberRemoteMadSourceVar;;;;remote",
";MyClass;true;subtypeRemoteMadSource1;;;ReturnValue;remote",
";MyClass;false;subtypeNonSource;;;ReturnValue;remote", // the tests define this in MyDerivedClass, so it should *not* be recongized as a source
";MyClass;true;qualifierSource;;;Argument[-1];remote",
";MyClass;true;qualifierFieldSource;;;Argument[-1].val;remote",
";MyDerivedClass;false;subtypeRemoteMadSource2;;;ReturnValue;remote",
]
}
}
/**
* Models-as-data sink models for this test.
*/
private class TestSinks extends SinkModelCsv {
override predicate row(string row) {
row =
[
";;false;madSinkArg0;;;Argument[0];test-sink",
";;false;madSinkArg1;;;Argument[1];test-sink",
";;false;madSinkArg01;;;Argument[0..1];test-sink",
";;false;madSinkArg02;;;Argument[0,2];test-sink",
";;false;madSinkIndirectArg0;;;Argument[*0];test-sink",
";;false;madSinkDoubleIndirectArg0;;;Argument[**0];test-sink",
";;false;madSinkVar;;;;test-sink",
";;false;madSinkVarIndirect;;;*;test-sink", // not correctly expressed
";;false;madSinkParam0;;;Parameter[0];test-sink",
";MyClass;true;memberMadSinkArg0;;;Argument[0];test-sink",
";MyClass;true;memberMadSinkVar;;;;test-sink",
";MyClass;true;qualifierSink;;;Argument[-1];test-sink",
";MyClass;true;qualifierArg0Sink;;;Argument[-1..0];test-sink",
";MyClass;true;qualifierFieldSink;;;Argument[-1].val;test-sink",
"MyNamespace;MyClass;true;namespaceMemberMadSinkArg0;;;Argument[0];test-sink",
"MyNamespace;MyClass;true;namespaceStaticMemberMadSinkArg0;;;Argument[0];test-sink",
"MyNamespace;MyClass;true;namespaceMemberMadSinkVar;;;;test-sink",
"MyNamespace;MyClass;true;namespaceStaticMemberMadSinkVar;;;;test-sink",
]
}
}
/**
* Models-as-data summary models for this test.
*/
private class TestSummaries extends SummaryModelCsv {
override predicate row(string row) {
row =
[
";;false;madArg0ToReturn;;;Argument[0];ReturnValue;taint",
";;false;madArg0ToReturnIndirect;;;Argument[0];ReturnValue[*];taint",
";;false;madArg0ToReturnValueFlow;;;Argument[0];ReturnValue;value",
";;false;madArg0IndirectToReturn;;;Argument[*0];ReturnValue;taint",
";;false;madArg0DoubleIndirectToReturn;;;Argument[**0];ReturnValue;taint",
";;false;madArg0NotIndirectToReturn;;;Argument[0];ReturnValue;taint",
";;false;madArg0ToArg1Indirect;;;Argument[0];Argument[*1];taint",
";;false;madArg0IndirectToArg1Indirect;;;Argument[*0];Argument[*1];taint",
";;false;madArgsComplex;;;Argument[*0..1,2];ReturnValue;taint",
";;false;madAndImplementedComplex;;;Argument[2];ReturnValue;taint",
";;false;madArgsAny;;;Argument;ReturnValue;taint", // (syntax not supported)
";;false;madArg0FieldToReturn;;;Argument[0].Field[value];ReturnValue;taint",
";;false;madArg0IndirectFieldToReturn;;;Argument[*0].Field[value];ReturnValue;taint",
";;false;madArg0FieldIndirectToReturn;;;Argument[0].Field[*ptr];ReturnValue;taint",
";;false;madArg0ToReturnField;;;Argument[0];ReturnValue.Field[value];taint",
";;false;madArg0ToReturnIndirectField;;;Argument[0];ReturnValue[*].Field[value];taint",
";;false;madArg0ToReturnFieldIndirect;;;Argument[0];ReturnValue.Field[*ptr];taint",
";;false;madFieldToFieldVar;;;Field[value];Field[value2];taint",
";;false;madFieldToIndirectFieldVar;;;Field[value];Field[*ptr];taint",
";;false;madIndirectFieldToFieldVar;;;;Field[value];Field[value2];taint", // not correctly expressed
";MyClass;true;madArg0ToSelf;;;Argument[0];Argument[-1];taint",
";MyClass;true;madSelfToReturn;;;Argument[-1];ReturnValue;taint",
";MyClass;true;madArg0ToField;;;Argument[0];Argument[-1].Field[val];taint",
";MyClass;true;madFieldToReturn;;;Argument[-1].Field[val];ReturnValue;taint",
"MyNamespace;MyClass;true;namespaceMadSelfToReturn;;;Argument[-1];ReturnValue;taint",
";;false;madCallArg0ReturnToReturn;;;Argument[0].ReturnValue;ReturnValue;value",
";;false;madCallArg0ReturnToReturnFirst;;;Argument[0].ReturnValue;ReturnValue.Field[first];value",
";;false;madCallArg0WithValue;;;Argument[1];Argument[0].Parameter[0];value",
";;false;madCallReturnValueIgnoreFunction;;;Argument[1];ReturnValue;value",
";StructWithTypedefInParameter<T>;true;parameter_ref_to_return_ref;(const T &);;Argument[*0];ReturnValue[*];value",
";;false;receive_array;(int[20]);;Argument[*0];ReturnValue;taint"
]
}
}