From 34130d50d30cab90b26767bb5f6c3bc3e9f106ac Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Thu, 23 May 2024 17:25:29 +0100 Subject: [PATCH 01/15] C++: Add library tests for YML extension models. --- .../dataflow/external-models/flow.expected | 2 ++ .../dataflow/external-models/flow.ql | 24 +++++++++++++++++++ .../dataflow/external-models/sinks.expected | 0 .../dataflow/external-models/sinks.ql | 7 ++++++ .../dataflow/external-models/sources.expected | 0 .../dataflow/external-models/sources.ql | 7 ++++++ .../dataflow/external-models/steps.expected | 0 .../dataflow/external-models/steps.ql | 8 +++++++ .../dataflow/external-models/test.cpp | 16 +++++++++++++ .../external-models/validatemodels.expected | 7 ++++++ .../external-models/validatemodels.ql | 2 ++ 11 files changed, 73 insertions(+) create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/flow.expected create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/flow.ql create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/sinks.expected create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/sinks.ql create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/sources.expected create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/sources.ql create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/steps.expected create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/steps.ql create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/test.cpp create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/validatemodels.ql diff --git a/cpp/ql/test/library-tests/dataflow/external-models/flow.expected b/cpp/ql/test/library-tests/dataflow/external-models/flow.expected new file mode 100644 index 00000000000..8ec8033d086 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/flow.expected @@ -0,0 +1,2 @@ +testFailures +failures diff --git a/cpp/ql/test/library-tests/dataflow/external-models/flow.ql b/cpp/ql/test/library-tests/dataflow/external-models/flow.ql new file mode 100644 index 00000000000..36e858b8abd --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/flow.ql @@ -0,0 +1,24 @@ +import TestUtilities.dataflow.FlowTestCommon +import cpp +import semmle.code.cpp.ir.dataflow.DataFlow +import semmle.code.cpp.dataflow.ExternalFlow + +module IRTest { + private import semmle.code.cpp.ir.IR + private import semmle.code.cpp.ir.dataflow.TaintTracking + + /** Common data flow configuration to be used by tests. */ + module TestAllocationConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + sourceNode(source, _) + } + + predicate isSink(DataFlow::Node sink) { + sinkNode(sink, "test-sink") + } + } + + module IRFlow = TaintTracking::Global; +} + +import MakeTest> diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sinks.expected b/cpp/ql/test/library-tests/dataflow/external-models/sinks.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sinks.ql b/cpp/ql/test/library-tests/dataflow/external-models/sinks.ql new file mode 100644 index 00000000000..d3bafd1c369 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/sinks.ql @@ -0,0 +1,7 @@ +import cpp +import semmle.code.cpp.ir.dataflow.DataFlow +import semmle.code.cpp.dataflow.ExternalFlow + +from DataFlow::Node node, string kind +where sinkNode(node, kind) +select node, kind diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sources.expected b/cpp/ql/test/library-tests/dataflow/external-models/sources.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sources.ql b/cpp/ql/test/library-tests/dataflow/external-models/sources.ql new file mode 100644 index 00000000000..ed79d740f88 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/sources.ql @@ -0,0 +1,7 @@ +import cpp +import semmle.code.cpp.ir.dataflow.DataFlow +import semmle.code.cpp.dataflow.ExternalFlow + +from DataFlow::Node node, string kind +where sourceNode(node, kind) +select node, kind diff --git a/cpp/ql/test/library-tests/dataflow/external-models/steps.expected b/cpp/ql/test/library-tests/dataflow/external-models/steps.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cpp/ql/test/library-tests/dataflow/external-models/steps.ql b/cpp/ql/test/library-tests/dataflow/external-models/steps.ql new file mode 100644 index 00000000000..2c141d8334b --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/steps.ql @@ -0,0 +1,8 @@ +import cpp +import semmle.code.cpp.ir.dataflow.DataFlow +import semmle.code.cpp.dataflow.ExternalFlow +import semmle.code.cpp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl + +from DataFlow::Node node1, DataFlow::Node node2 +where FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(node1, node2, _) +select node1, node2 diff --git a/cpp/ql/test/library-tests/dataflow/external-models/test.cpp b/cpp/ql/test/library-tests/dataflow/external-models/test.cpp new file mode 100644 index 00000000000..1d9c5711f3d --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/test.cpp @@ -0,0 +1,16 @@ + +int ymlSource(); +void ymlSink(int value); +int ymlStep(int value); + +void test() { + int x = ymlSource(); + + ymlSink(0); + + ymlSink(x); // $ MISSING: ir + + int y = ymlStep(x); + + ymlSink(y); // $ MISSING: ir +} diff --git a/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected new file mode 100644 index 00000000000..6e8576979d2 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected @@ -0,0 +1,7 @@ +| Dubious namespace "" in sink model. | +| Dubious namespace "" in source model. | +| Dubious namespace "" in summary model. | +| Dubious type "" in sink model. | +| Dubious type "" in source model. | +| Dubious type "" in summary model. | +| Invalid kind "remote-sink" in sink model. | diff --git a/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.ql b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.ql new file mode 100644 index 00000000000..a162349b7cd --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.ql @@ -0,0 +1,2 @@ +import cpp +import semmle.code.cpp.dataflow.ExternalFlow::CsvValidation From 94413c8c2e9aac54caf09d8a5606b8650593b04a Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Tue, 28 May 2024 15:42:48 +0100 Subject: [PATCH 02/15] C++: Implement YML extension models. --- cpp/ql/lib/ext/empty.model.yml | 15 +++++++++++ cpp/ql/lib/qlpack.yml | 2 ++ .../semmle/code/cpp/dataflow/ExternalFlow.qll | 9 +++++++ .../internal/ExternalFlowExtensions.qll | 27 +++++++++++++++++++ .../dataflow/external-models/flow.ext.yml | 16 +++++++++++ .../dataflow/external-models/sinks.expected | 3 +++ .../dataflow/external-models/sinks.ext.yml | 6 +++++ .../dataflow/external-models/sources.expected | 1 + .../dataflow/external-models/sources.ext.yml | 6 +++++ .../dataflow/external-models/steps.expected | 1 + .../dataflow/external-models/steps.ext.yml | 6 +++++ .../dataflow/external-models/test.cpp | 4 +-- 12 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 cpp/ql/lib/ext/empty.model.yml create mode 100644 cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/flow.ext.yml create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/sinks.ext.yml create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/sources.ext.yml create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/steps.ext.yml diff --git a/cpp/ql/lib/ext/empty.model.yml b/cpp/ql/lib/ext/empty.model.yml new file mode 100644 index 00000000000..6f160b62d7a --- /dev/null +++ b/cpp/ql/lib/ext/empty.model.yml @@ -0,0 +1,15 @@ +extensions: + # Make sure that the extensible model predicates have at least one definition + # to avoid errors about undefined extensionals. + - addsTo: + pack: codeql/cpp-all + extensible: sourceModel + data: [] + - addsTo: + pack: codeql/cpp-all + extensible: sinkModel + data: [] + - addsTo: + pack: codeql/cpp-all + extensible: summaryModel + data: [] diff --git a/cpp/ql/lib/qlpack.yml b/cpp/ql/lib/qlpack.yml index f0ef22f89d6..a1787458c6d 100644 --- a/cpp/ql/lib/qlpack.yml +++ b/cpp/ql/lib/qlpack.yml @@ -14,4 +14,6 @@ dependencies: codeql/tutorial: ${workspace} codeql/util: ${workspace} codeql/xml: ${workspace} +dataExtensions: + - ext/*.model.yml warnOnImplicitThis: true diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll index 236989f55d0..1266661b913 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll @@ -78,6 +78,7 @@ private import internal.FlowSummaryImpl private import internal.FlowSummaryImpl::Public private import internal.FlowSummaryImpl::Private private import internal.FlowSummaryImpl::Private::External +private import internal.ExternalFlowExtensions as Extensions private import codeql.mad.ModelValidation as SharedModelVal private import codeql.util.Unit @@ -138,6 +139,9 @@ predicate sourceModel( row.splitAt(";", 7) = kind ) and provenance = "manual" + or + Extensions::sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance, + _) } /** Holds if a sink model exists for the given parameters. */ @@ -158,6 +162,8 @@ predicate sinkModel( row.splitAt(";", 7) = kind ) and provenance = "manual" + or + Extensions::sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance, _) } /** Holds if a summary model exists for the given parameters. */ @@ -179,6 +185,9 @@ predicate summaryModel( row.splitAt(";", 8) = kind ) and provenance = "manual" + or + Extensions::summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, + provenance, _) } private predicate relevantNamespace(string namespace) { diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll new file mode 100644 index 00000000000..cd1af34c8d8 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll @@ -0,0 +1,27 @@ +/** + * This module provides extensible predicates for defining MaD models. + */ + +/** + * Holds if an external source model exists for the given parameters. + */ +extensible predicate sourceModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance, QlBuiltins::ExtensionId madId +); + +/** + * Holds if an external sink model exists for the given parameters. + */ +extensible predicate sinkModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string kind, string provenance, QlBuiltins::ExtensionId madId +); + +/** + * Holds if an external summary model exists for the given parameters. + */ +extensible predicate summaryModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string output, string kind, string provenance, QlBuiltins::ExtensionId madId +); diff --git a/cpp/ql/test/library-tests/dataflow/external-models/flow.ext.yml b/cpp/ql/test/library-tests/dataflow/external-models/flow.ext.yml new file mode 100644 index 00000000000..42ca51bc424 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/flow.ext.yml @@ -0,0 +1,16 @@ +extensions: + - addsTo: + pack: codeql/cpp-all + extensible: sourceModel + data: # namespace, type, subtypes, name, signature, ext, output, kind, provenance + - ["", "", False, "ymlSource", "", "", "ReturnValue", "local", "manual"] + - addsTo: + pack: codeql/cpp-all + extensible: sinkModel + data: # namespace, type, subtypes, name, signature, ext, input, kind, provenance + - ["", "", False, "ymlSink", "", "", "Argument[0]", "test-sink", "manual"] + - addsTo: + pack: codeql/cpp-all + extensible: summaryModel + data: # namespace, type, subtypes, name, signature, ext, input, output, kind, provenance + - ["", "", False, "ymlStep", "", "", "Argument[0]", "ReturnValue", "taint", "manual"] \ No newline at end of file diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sinks.expected b/cpp/ql/test/library-tests/dataflow/external-models/sinks.expected index e69de29bb2d..bce01f9259b 100644 --- a/cpp/ql/test/library-tests/dataflow/external-models/sinks.expected +++ b/cpp/ql/test/library-tests/dataflow/external-models/sinks.expected @@ -0,0 +1,3 @@ +| test.cpp:9:10:9:10 | 0 | test-sink | +| test.cpp:11:10:11:10 | x | test-sink | +| test.cpp:15:10:15:10 | y | test-sink | diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sinks.ext.yml b/cpp/ql/test/library-tests/dataflow/external-models/sinks.ext.yml new file mode 100644 index 00000000000..b2ee15edfd3 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/sinks.ext.yml @@ -0,0 +1,6 @@ +extensions: + - addsTo: + pack: codeql/cpp-all + extensible: sinkModel + data: # namespace, type, subtypes, name, signature, ext, input, kind, provenance + - ["", "", False, "ymlSink", "", "", "Argument[0]", "test-sink", "manual"] diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sources.expected b/cpp/ql/test/library-tests/dataflow/external-models/sources.expected index e69de29bb2d..35c33437a57 100644 --- a/cpp/ql/test/library-tests/dataflow/external-models/sources.expected +++ b/cpp/ql/test/library-tests/dataflow/external-models/sources.expected @@ -0,0 +1 @@ +| test.cpp:7:10:7:18 | call to ymlSource | local | diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sources.ext.yml b/cpp/ql/test/library-tests/dataflow/external-models/sources.ext.yml new file mode 100644 index 00000000000..91bf18cf79b --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/sources.ext.yml @@ -0,0 +1,6 @@ +extensions: + - addsTo: + pack: codeql/cpp-all + extensible: sourceModel + data: # namespace, type, subtypes, name, signature, ext, output, kind, provenance + - ["", "", False, "ymlSource", "", "", "ReturnValue", "local", "manual"] diff --git a/cpp/ql/test/library-tests/dataflow/external-models/steps.expected b/cpp/ql/test/library-tests/dataflow/external-models/steps.expected index e69de29bb2d..6815274189b 100644 --- a/cpp/ql/test/library-tests/dataflow/external-models/steps.expected +++ b/cpp/ql/test/library-tests/dataflow/external-models/steps.expected @@ -0,0 +1 @@ +| test.cpp:13:18:13:18 | x | test.cpp:13:10:13:16 | call to ymlStep | diff --git a/cpp/ql/test/library-tests/dataflow/external-models/steps.ext.yml b/cpp/ql/test/library-tests/dataflow/external-models/steps.ext.yml new file mode 100644 index 00000000000..c8a195b7aa6 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/steps.ext.yml @@ -0,0 +1,6 @@ +extensions: + - addsTo: + pack: codeql/cpp-all + extensible: summaryModel + data: # namespace, type, subtypes, name, signature, ext, input, output, kind, provenance + - ["", "", False, "ymlStep", "", "", "Argument[0]", "ReturnValue", "taint", "manual"] diff --git a/cpp/ql/test/library-tests/dataflow/external-models/test.cpp b/cpp/ql/test/library-tests/dataflow/external-models/test.cpp index 1d9c5711f3d..aa50f6715f2 100644 --- a/cpp/ql/test/library-tests/dataflow/external-models/test.cpp +++ b/cpp/ql/test/library-tests/dataflow/external-models/test.cpp @@ -8,9 +8,9 @@ void test() { ymlSink(0); - ymlSink(x); // $ MISSING: ir + ymlSink(x); // $ ir int y = ymlStep(x); - ymlSink(y); // $ MISSING: ir + ymlSink(y); // $ ir } From e87593af756272a0476ea0ed43eac41387487119 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Tue, 28 May 2024 15:54:20 +0100 Subject: [PATCH 03/15] C++: Add the doc (copy from csharp). --- .../codeql-language-guides/codeql-for-cpp.rst | 4 + .../customizing-library-models-for-cpp.rst | 349 ++++++++++++++++++ 2 files changed, 353 insertions(+) create mode 100644 docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst diff --git a/docs/codeql/codeql-language-guides/codeql-for-cpp.rst b/docs/codeql/codeql-language-guides/codeql-for-cpp.rst index a564f54042f..584f8c63bf6 100644 --- a/docs/codeql/codeql-language-guides/codeql-for-cpp.rst +++ b/docs/codeql/codeql-language-guides/codeql-for-cpp.rst @@ -21,6 +21,8 @@ Experiment and learn how to write effective and efficient queries for CodeQL dat using-range-analsis-in-cpp hash-consing-and-value-numbering advanced-dataflow-scenarios-cpp + customizing-library-models-for-cpp + - :doc:`Basic query for C and C++ code `: Learn to write and run a simple CodeQL query. @@ -46,3 +48,5 @@ Experiment and learn how to write effective and efficient queries for CodeQL dat - :doc:`Hash consing and value numbering `: You can use specialized CodeQL libraries to recognize expressions that are syntactically identical or compute the same value at runtime in C and C++ codebases. - :doc:`Advanced C/C++ dataflow scenarios `: You can track precise data flow in C and C++ codebases by distinguishing between a pointer and its indirection(s). + +- :doc:`Customizing library models for C and C++ `: You can model frameworks and libraries that your codebase depends on using data extensions and publish them as CodeQL model packs. diff --git a/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst b/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst new file mode 100644 index 00000000000..39b5ee30ee4 --- /dev/null +++ b/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst @@ -0,0 +1,349 @@ +.. _customizing-library-models-for-csharp: + +Customizing library models for C# +================================= + +You can model the methods and callables that control data flow in any framework or library. This is especially useful for custom frameworks or niche libraries, that are not supported by the standard CodeQL libraries. + +.. include:: ../reusables/beta-note-customizing-library-models.rst + +About this article +------------------ + +This article contains reference material about how to define custom models for sources, sinks, and flow summaries for C# dependencies in data extension files. + +About data extensions +--------------------- + +You can customize analysis by defining models (summaries, sinks, and sources) of your code's C#/.NET dependencies in data extension files. Each model defines the behavior of one or more elements of your library or framework, such as methods, properties, and callables. When you run dataflow analysis, these models expand the potential sources and sinks tracked by dataflow analysis and improve the precision of results. + +Most of the security queries search for paths from a source of untrusted input to a sink that represents a vulnerability. This is known as taint tracking. Each source is a starting point for dataflow analysis to track tainted data and each sink is an end point. + +Taint tracking queries also need to know how data can flow through elements that are not included in the source code. These are modeled as summaries. A summary model enables queries to synthesize the flow behavior through elements in dependency code that is not stored in your repository. + +Syntax used to define an element in an extension file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Each model of an element is defined using a data extension where each tuple constitutes a model. +A data extension file to extend the standard C# queries included with CodeQL is a YAML file with the form: + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/csharp-all + extensible: + data: + - + - + - ... + +Each YAML file may contain one or more top-level extensions. + +- ``addsTo`` defines the CodeQL pack name and extensible predicate that the extension is injected into. +- ``data`` defines one or more rows of tuples that are injected as values into the extensible predicate. The number of columns and their types must match the definition of the extensible predicate. + +Data extensions use union semantics, which means that the tuples of all extensions for a single extensible predicate are combined, duplicates are removed, and all of the remaining tuples are queryable by referencing the extensible predicate. + +Publish data extension files in a CodeQL model pack to share +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can group one or more data extension files into a CodeQL model pack and publish it to the GitHub Container Registry. This makes it easy for anyone to download the model pack and use it to extend their analysis. For more information, see `Creating a CodeQL model pack `__ and `Publishing and using CodeQL packs `__ in the CodeQL CLI documentation. + +Extensible predicates used to create custom models in C# +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The CodeQL library for C# analysis exposes the following extensible predicates: + +- ``sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance)``. This is used to model sources of potentially tainted data. The ``kind`` of the sources defined using this predicate determine which threat model they are associated with. Different threat models can be used to customize the sources used in an analysis. For more information, see ":ref:`Threat models `." +- ``sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance)``. This is used to model sinks where tainted data may be used in a way that makes the code vulnerable. +- ``summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance)``. This is used to model flow through elements. +- ``neutralModel(namespace, type, name, signature, kind, provenance)``. This is similar to a summary model but used to model the flow of values that have only a minor impact on the dataflow analysis. Manual neutral models (those with a provenance such as ``manual`` or ``ai-manual``) can be used to override generated summary models (those with a provenance such as ``df-generated``), so that the summary model will be ignored. Other than that, neutral models have no effect. + +The extensible predicates are populated using the models defined in data extension files. + +Examples of custom model definitions +------------------------------------ + +The examples in this section are taken from the standard CodeQL C# query pack published by GitHub. They demonstrate how to add tuples to extend extensible predicates that are used by the standard queries. + +Example: Taint sink in the ``System.Data.SqlClient`` namespace +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This example shows how the C# query pack models the argument of the ``SqlCommand`` constructor as a SQL injection sink. +This is the constructor of the ``SqlCommand`` class, which is located in the ``System.Data.SqlClient`` namespace. + +.. code-block:: csharp + + public static void TaintSink(SqlConnection conn, string query) { + SqlCommand command = new SqlCommand(query, connection) // The argument to this method is a SQL injection sink. + ... + } + +We need to add a tuple to the ``sinkModel``\(namespace, type, subtypes, name, signature, ext, input, kind, provenance) extensible predicate by updating a data extension file. + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/csharp-all + extensible: sinkModel + data: + - ["System.Data.SqlClient", "SqlCommand", False, "SqlCommand", "(System.String,System.Data.SqlClient.SqlConnection)", "", "Argument[0]", "sql-injection", "manual"] + +Since we want to add a new sink, we need to add a tuple to the ``sinkModel`` extensible predicate. +The first five values identify the callable (in this case a method) to be modeled as a sink. + +- The first value ``System.Data.SqlClient`` is the namespace name. +- The second value ``SqlCommand`` is the name of the class (type) that contains the method. +- The third value ``False`` is a flag that indicates whether or not the sink also applies to all overrides of the method. +- The fourth value ``SqlCommand`` is the method name. Constructors are named after the class. +- The fifth value ``(System.String,System.Data.SqlClient.SqlConnection)`` is the method input type signature. The type names must be fully qualified. + +The sixth value should be left empty and is out of scope for this documentation. +The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the sink. + +- The seventh value ``Argument[0]`` is the ``access path`` to the first argument passed to the method, which means that this is the location of the sink. +- The eighth value ``sql-injection`` is the kind of the sink. The sink kind is used to define the queries where the sink is in scope. In this case - the SQL injection queries. +- The ninth value ``manual`` is the provenance of the sink, which is used to identify the origin of the sink. + +Example: Taint source from the ``System.Net.Sockets`` namespace +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This example shows how the C# query pack models the return value from the ``GetStream`` method as a ``remote`` source. +This is the ``GetStream`` method in the ``TcpClient`` class, which is located in the ``System.Net.Sockets`` namespace. + +.. code-block:: csharp + + public static void Tainted(TcpClient client) { + NetworkStream stream = client.GetStream(); // The return value of this method is a remote source of taint. + ... + } + +We need to add a tuple to the ``sourceModel``\(namespace, type, subtypes, name, signature, ext, output, kind, provenance) extensible predicate by updating a data extension file. + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/csharp-all + extensible: sourceModel + data: + - ["System.Net.Sockets", "TcpClient", False, "GetStream", "()", "", "ReturnValue", "remote", "manual"] + + +Since we are adding a new source, we need to add a tuple to the ``sourceModel`` extensible predicate. +The first five values identify the callable (in this case a method) to be modeled as a source. + +- The first value ``System.Net.Sockets`` is the namespace name. +- The second value ``TcpClient`` is the name of the class (type) that contains the source. +- The third value ``False`` is a flag that indicates whether or not the source also applies to all overrides of the method. +- The fourth value ``GetStream`` is the method name. +- The fifth value ``()`` is the method input type signature. + +The sixth value should be left empty and is out of scope for this documentation. +The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the source. + +- The seventh value ``ReturnValue`` is the access path to the return of the method, which means that it is the return value that should be considered a source of tainted input. +- The eighth value ``remote`` is the kind of the source. The source kind is used to define the threat model where the source is in scope. ``remote`` applies to many of the security related queries as it means a remote source of untrusted data. As an example the SQL injection query uses ``remote`` sources. For more information, see ":ref:`Threat models `." +- The ninth value ``manual`` is the provenance of the source, which is used to identify the origin of the source. + +Example: Add flow through the ``Concat`` method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This example shows how the C# query pack models flow through a method for a simple case. +This pattern covers many of the cases where we need to summarize flow through a method that is stored in a library or framework outside the repository. + +.. code-block:: csharp + + public static void TaintFlow(string s1, string s2) { + string t = String.Concat(s1, s2); // There is taint flow from s1 and s2 to t. + ... + } + +We need to add tuples to the ``summaryModel``\(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) extensible predicate by updating a data extension file: + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/csharp-all + extensible: summaryModel + data: + - ["System", "String", False, "Concat", "(System.Object,System.Object)", "", "Argument[0]", "ReturnValue", "taint", "manual"] + - ["System", "String", False, "Concat", "(System.Object,System.Object)", "", "Argument[1]", "ReturnValue", "taint", "manual"] + +Since we are adding flow through a method, we need to add tuples to the ``summaryModel`` extensible predicate. +Each tuple defines flow from one argument to the return value. +The first row defines flow from the first argument (``s1`` in the example) to the return value (``t`` in the example) and the second row defines flow from the second argument (``s2`` in the example) to the return value (``t`` in the example). + +The first five values identify the callable (in this case a method) to be modeled as a summary. +These are the same for both of the rows above as we are adding two summaries for the same method. + +- The first value ``System`` is the namespace name. +- The second value ``String`` is the class (type) name. +- The third value ``False`` is a flag that indicates whether or not the summary also applies to all overrides of the method. +- The fourth value ``Concat`` is the method name. +- The fifth value ``(System.Object,System.Object)`` is the method input type signature. + +The sixth value should be left empty and is out of scope for this documentation. +The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the summary. + +- The seventh value is the access path to the input (where data flows from). ``Argument[0]`` is the access path to the first argument (``s1`` in the example) and ``Argument[1]`` is the access path to the second argument (``s2`` in the example). +- The eighth value ``ReturnValue`` is the access path to the output (where data flows to), in this case ``ReturnValue``, which means that the input flows to the return value. +- The ninth value ``taint`` is the kind of the flow. ``taint`` means that taint is propagated through the call. +- The tenth value ``manual`` is the provenance of the summary, which is used to identify the origin of the summary. + +It would also be possible to merge the two rows into one by using a comma-separated list in the seventh value. This would be useful if the method has many arguments and the flow is the same for all of them. + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/csharp-all + extensible: summaryModel + data: + - ["System", "String", False, "Concat", "(System.Object,System.Object)", "", "Argument[0,1]", "ReturnValue", "taint", "manual"] + +This row defines flow from both the first and the second argument to the return value. The seventh value ``Argument[0,1]`` is shorthand for specifying an access path to both ``Argument[0]`` and ``Argument[1]``. + +Example: Add flow through the ``Trim`` method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This example shows how the C# query pack models flow through a method for a simple case. + +.. code-block:: csharp + + public static void TaintFlow(string s) { + string t = s.Trim(); // There is taint flow from s to t. + ... + } + +We need to add a tuple to the ``summaryModel``\(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) extensible predicate by updating a data extension file: + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/csharp-all + extensible: summaryModel + data: + - ["System", "String", False, "Trim", "()", "", "Argument[this]", "ReturnValue", "taint", "manual"] + +Since we are adding flow through a method, we need to add tuples to the ``summaryModel`` extensible predicate. +Each tuple defines flow from one argument to the return value. +The first row defines flow from the qualifier of the method call (``s1`` in the example) to the return value (``t`` in the example). + +The first five values identify the callable (in this case a method) to be modeled as a summary. +These are the same for both of the rows above as we are adding two summaries for the same method. + +- The first value ``System`` is the namespace name. +- The second value ``String`` is the class (type) name. +- The third value ``False`` is a flag that indicates whether or not the summary also applies to all overrides of the method. +- The fourth value ``Trim`` is the method name. +- The fifth value ``()`` is the method input type signature. + +The sixth value should be left empty and is out of scope for this documentation. +The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the summary. + +- The seventh value is the access path to the input (where data flows from). ``Argument[this]`` is the access path to the qualifier (``s`` in the example). +- The eighth value ``ReturnValue`` is the access path to the output (where data flows to), in this case ``ReturnValue``, which means that the input flows to the return value. +- The ninth value ``taint`` is the kind of the flow. ``taint`` means that taint is propagated through the call. +- The tenth value ``manual`` is the provenance of the summary, which is used to identify the origin of the summary. + +Example: Add flow through the ``Select`` method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This example shows how the C# query pack models a more complex flow through a method. +Here we model flow through higher order methods and collection types, as well as how to handle extension methods and generics. + +.. code-block:: csharp + + public static void TaintFlow(IEnumerable stream) { + IEnumerable lines = stream.Select(item => item + "\n"); + ... + } + +We need to add tuples to the ``summaryModel``\(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) extensible predicate by updating a data extension file: + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/csharp-all + extensible: summaryModel + data: + - ["System.Linq", "Enumerable", False, "Select", "(System.Collections.Generic.IEnumerable,System.Func)", "", "Argument[0].Element", "Argument[1].Parameter[0]", "value", "manual"] + - ["System.Linq", "Enumerable", False, "Select", "(System.Collections.Generic.IEnumerable,System.Func)", "", "Argument[1].ReturnValue", "ReturnValue.Element", "value", "manual"] + + +Since we are adding flow through a method, we need to add tuples to the ``summaryModel`` extensible predicate. +Each tuple defines part of the flow that comprises the total flow through the ``Select`` method. +The first five values identify the callable (in this case a method) to be modeled as a summary. +These are the same for both of the rows above as we are adding two summaries for the same method. + +- The first value ``System.Linq`` is the namespace name. +- The second value ``Enumerable`` is the class (type) name. +- The third value ``False`` is a flag that indicates whether or not the summary also applies to all overrides of the method. +- The fourth value ``Select`` is the method name, along with the type parameters for the method. The names of the generic type parameters provided in the model must match the names of the generic type parameters in the method signature in the source code. +- The fifth value ``(System.Collections.Generic.IEnumerable,System.Func)`` is the method input type signature. The generics in the signature must match the generics in the method signature in the source code. + +The sixth value should be left empty and is out of scope for this documentation. +The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the summary definition. + +- The seventh value is the access path to the ``input`` (where data flows from). +- The eighth value is the access path to the ``output`` (where data flows to). + +For the first row: + +- The seventh value is ``Argument[0].Element``, which is the access path to the elements of the qualifier (the elements of the enumerable ``stream`` in the example). +- The eight value is ``Argument[1].Parameter[0]``, which is the access path to the first parameter of the ``System.Func`` argument of ``Select`` (the lambda parameter ``item`` in the example). + +For the second row: + +- The seventh value is ``Argument[1].ReturnValue``, which is the access path to the return value of the ``System.Func`` argument of ``Select`` (the return value of the lambda in the example). +- The eighth value is ``ReturnValue.Element``, which is the access path to the elements of the return value of ``Select`` (the elements of the enumerable ``lines`` in the example). + +For the remaining values for both rows: + +- The ninth value ``value`` is the kind of the flow. ``value`` means that the value is preserved. +- The tenth value ``manual`` is the provenance of the summary, which is used to identify the origin of the summary. + +That is, the first row specifies that values can flow from the elements of the qualifier enumerable into the first argument of the function provided to ``Select``. The second row specifies that values can flow from the return value of the function to the elements of the enumerable returned from ``Select``. + +Example: Add a ``neutral`` method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This example shows how we can model a method as being neutral with respect to flow. We will also cover how to model a property by modeling the getter of the ``Now`` property of the ``DateTime`` class as neutral. +A neutral model is used to define that there is no flow through a method. + +.. code-block:: csharp + + public static void TaintFlow() { + System.DateTime t = System.DateTime.Now; // There is no flow from Now to t. + ... + } + +We need to add a tuple to the ``neutralModel``\(namespace, type, name, signature, kind, provenance) extensible predicate by updating a data extension file. + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/csharp-all + extensible: neutralModel + data: + - ["System", "DateTime", "get_Now", "()", "summary", "manual"] + + +Since we are adding a neutral model, we need to add tuples to the ``neutralModel`` extensible predicate. +The first four values identify the callable (in this case the getter of the ``Now`` property) to be modeled as a neutral, the fifth value is the kind, and the sixth value is the provenance (origin) of the neutral. + +- The first value ``System`` is the namespace name. +- The second value ``DateTime`` is the class (type) name. +- The third value ``get_Now`` is the method name. Getter and setter methods are named ``get_`` and ``set_`` respectively. +- The fourth value ``()`` is the method input type signature. +- The fifth value ``summary`` is the kind of the neutral. +- The sixth value ``manual`` is the provenance of the neutral. + +.. _threat-models-csharp: + +Threat models +------------- + +.. include:: ../reusables/threat-model-description.rst From af6a08893ac3e778c4b23a31a1d89605f5a03947 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Tue, 28 May 2024 16:39:08 +0100 Subject: [PATCH 04/15] C++: Update the doc text for C/C++. --- .../customizing-library-models-for-cpp.rst | 99 +++++++------------ 1 file changed, 33 insertions(+), 66 deletions(-) diff --git a/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst b/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst index 39b5ee30ee4..3bd6d61f3b9 100644 --- a/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst +++ b/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst @@ -1,7 +1,7 @@ -.. _customizing-library-models-for-csharp: +.. _customizing-library-models-for-cpp: -Customizing library models for C# -================================= +Customizing library models for C and C++ +======================================== You can model the methods and callables that control data flow in any framework or library. This is especially useful for custom frameworks or niche libraries, that are not supported by the standard CodeQL libraries. @@ -10,14 +10,14 @@ You can model the methods and callables that control data flow in any framework About this article ------------------ -This article contains reference material about how to define custom models for sources, sinks, and flow summaries for C# dependencies in data extension files. +This article contains reference material about how to define custom models for sources, sinks, and flow summaries for C and C++ dependencies in data extension files. About data extensions --------------------- -You can customize analysis by defining models (summaries, sinks, and sources) of your code's C#/.NET dependencies in data extension files. Each model defines the behavior of one or more elements of your library or framework, such as methods, properties, and callables. When you run dataflow analysis, these models expand the potential sources and sinks tracked by dataflow analysis and improve the precision of results. +You can customize analysis by defining models (summaries, sinks, and sources) of your code's C and C++ dependencies in data extension files. Each model defines the behavior of one or more elements of your library or framework, such as callables. When you run dataflow analysis, these models expand the potential sources and sinks tracked by dataflow analysis and improve the precision of results. -Most of the security queries search for paths from a source of untrusted input to a sink that represents a vulnerability. This is known as taint tracking. Each source is a starting point for dataflow analysis to track tainted data and each sink is an end point. +Many of the security queries search for paths from a source of untrusted input to a sink that represents a vulnerability. This is known as taint tracking. Each source is a starting point for dataflow analysis to track tainted data and each sink is an end point. Taint tracking queries also need to know how data can flow through elements that are not included in the source code. These are modeled as summaries. A summary model enables queries to synthesize the flow behavior through elements in dependency code that is not stored in your repository. @@ -25,13 +25,13 @@ Syntax used to define an element in an extension file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each model of an element is defined using a data extension where each tuple constitutes a model. -A data extension file to extend the standard C# queries included with CodeQL is a YAML file with the form: +A data extension file to extend the standard CPP queries included with CodeQL is a YAML file with the form: .. code-block:: yaml extensions: - addsTo: - pack: codeql/csharp-all + pack: codeql/cpp-all extensible: data: - @@ -50,30 +50,31 @@ Publish data extension files in a CodeQL model pack to share You can group one or more data extension files into a CodeQL model pack and publish it to the GitHub Container Registry. This makes it easy for anyone to download the model pack and use it to extend their analysis. For more information, see `Creating a CodeQL model pack `__ and `Publishing and using CodeQL packs `__ in the CodeQL CLI documentation. -Extensible predicates used to create custom models in C# -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Extensible predicates used to create custom models in C and C++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The CodeQL library for C# analysis exposes the following extensible predicates: +The CodeQL library for CPP analysis exposes the following extensible predicates: -- ``sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance)``. This is used to model sources of potentially tainted data. The ``kind`` of the sources defined using this predicate determine which threat model they are associated with. Different threat models can be used to customize the sources used in an analysis. For more information, see ":ref:`Threat models `." +- ``sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance)``. This is used to model sources of potentially tainted data. The ``kind`` of the sources defined using this predicate determine which threat model they are associated with. Different threat models can be used to customize the sources used in an analysis. For more information, see ":ref:`Threat models `." - ``sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance)``. This is used to model sinks where tainted data may be used in a way that makes the code vulnerable. - ``summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance)``. This is used to model flow through elements. -- ``neutralModel(namespace, type, name, signature, kind, provenance)``. This is similar to a summary model but used to model the flow of values that have only a minor impact on the dataflow analysis. Manual neutral models (those with a provenance such as ``manual`` or ``ai-manual``) can be used to override generated summary models (those with a provenance such as ``df-generated``), so that the summary model will be ignored. Other than that, neutral models have no effect. The extensible predicates are populated using the models defined in data extension files. Examples of custom model definitions ------------------------------------ -The examples in this section are taken from the standard CodeQL C# query pack published by GitHub. They demonstrate how to add tuples to extend extensible predicates that are used by the standard queries. +TODO: one good example might do, but we currently have zero. + +The examples in this section are taken from the standard CodeQL CPP query pack published by GitHub. They demonstrate how to add tuples to extend extensible predicates that are used by the standard queries. Example: Taint sink in the ``System.Data.SqlClient`` namespace ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This example shows how the C# query pack models the argument of the ``SqlCommand`` constructor as a SQL injection sink. +This example shows how the CPP query pack models the argument of the ``SqlCommand`` constructor as a SQL injection sink. This is the constructor of the ``SqlCommand`` class, which is located in the ``System.Data.SqlClient`` namespace. -.. code-block:: csharp +.. code-block:: csharp TODO public static void TaintSink(SqlConnection conn, string query) { SqlCommand command = new SqlCommand(query, connection) // The argument to this method is a SQL injection sink. @@ -86,7 +87,7 @@ We need to add a tuple to the ``sinkModel``\(namespace, type, subtypes, name, si extensions: - addsTo: - pack: codeql/csharp-all + pack: codeql/cpp-all extensible: sinkModel data: - ["System.Data.SqlClient", "SqlCommand", False, "SqlCommand", "(System.String,System.Data.SqlClient.SqlConnection)", "", "Argument[0]", "sql-injection", "manual"] @@ -109,10 +110,10 @@ The remaining values are used to define the ``access path``, the ``kind``, and t Example: Taint source from the ``System.Net.Sockets`` namespace ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This example shows how the C# query pack models the return value from the ``GetStream`` method as a ``remote`` source. +This example shows how the CPP query pack models the return value from the ``GetStream`` method as a ``remote`` source. This is the ``GetStream`` method in the ``TcpClient`` class, which is located in the ``System.Net.Sockets`` namespace. -.. code-block:: csharp +.. code-block:: csharp TODO public static void Tainted(TcpClient client) { NetworkStream stream = client.GetStream(); // The return value of this method is a remote source of taint. @@ -125,7 +126,7 @@ We need to add a tuple to the ``sourceModel``\(namespace, type, subtypes, name, extensions: - addsTo: - pack: codeql/csharp-all + pack: codeql/cpp-all extensible: sourceModel data: - ["System.Net.Sockets", "TcpClient", False, "GetStream", "()", "", "ReturnValue", "remote", "manual"] @@ -144,15 +145,15 @@ The sixth value should be left empty and is out of scope for this documentation. The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the source. - The seventh value ``ReturnValue`` is the access path to the return of the method, which means that it is the return value that should be considered a source of tainted input. -- The eighth value ``remote`` is the kind of the source. The source kind is used to define the threat model where the source is in scope. ``remote`` applies to many of the security related queries as it means a remote source of untrusted data. As an example the SQL injection query uses ``remote`` sources. For more information, see ":ref:`Threat models `." +- The eighth value ``remote`` is the kind of the source. The source kind is used to define the threat model where the source is in scope. ``remote`` applies to many of the security related queries as it means a remote source of untrusted data. As an example the SQL injection query uses ``remote`` sources. For more information, see ":ref:`Threat models `." - The ninth value ``manual`` is the provenance of the source, which is used to identify the origin of the source. Example: Add flow through the ``Concat`` method ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This example shows how the C# query pack models flow through a method for a simple case. +This example shows how the CPP query pack models flow through a method for a simple case. This pattern covers many of the cases where we need to summarize flow through a method that is stored in a library or framework outside the repository. -.. code-block:: csharp +.. code-block:: cpp TODO public static void TaintFlow(string s1, string s2) { string t = String.Concat(s1, s2); // There is taint flow from s1 and s2 to t. @@ -165,7 +166,7 @@ We need to add tuples to the ``summaryModel``\(namespace, type, subtypes, name, extensions: - addsTo: - pack: codeql/csharp-all + pack: codeql/cpp-all extensible: summaryModel data: - ["System", "String", False, "Concat", "(System.Object,System.Object)", "", "Argument[0]", "ReturnValue", "taint", "manual"] @@ -198,7 +199,7 @@ It would also be possible to merge the two rows into one by using a comma-separa extensions: - addsTo: - pack: codeql/csharp-all + pack: codeql/cpp-all extensible: summaryModel data: - ["System", "String", False, "Concat", "(System.Object,System.Object)", "", "Argument[0,1]", "ReturnValue", "taint", "manual"] @@ -207,9 +208,9 @@ This row defines flow from both the first and the second argument to the return Example: Add flow through the ``Trim`` method ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This example shows how the C# query pack models flow through a method for a simple case. +This example shows how the CPP query pack models flow through a method for a simple case. -.. code-block:: csharp +.. code-block:: cpp TODO public static void TaintFlow(string s) { string t = s.Trim(); // There is taint flow from s to t. @@ -222,7 +223,7 @@ We need to add a tuple to the ``summaryModel``\(namespace, type, subtypes, name, extensions: - addsTo: - pack: codeql/csharp-all + pack: codeql/cpp-all extensible: summaryModel data: - ["System", "String", False, "Trim", "()", "", "Argument[this]", "ReturnValue", "taint", "manual"] @@ -250,10 +251,10 @@ The remaining values are used to define the ``access path``, the ``kind``, and t Example: Add flow through the ``Select`` method ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This example shows how the C# query pack models a more complex flow through a method. +This example shows how the CPP query pack models a more complex flow through a method. Here we model flow through higher order methods and collection types, as well as how to handle extension methods and generics. -.. code-block:: csharp +.. code-block:: cpp TODO public static void TaintFlow(IEnumerable stream) { IEnumerable lines = stream.Select(item => item + "\n"); @@ -266,7 +267,7 @@ We need to add tuples to the ``summaryModel``\(namespace, type, subtypes, name, extensions: - addsTo: - pack: codeql/csharp-all + pack: codeql/cpp-all extensible: summaryModel data: - ["System.Linq", "Enumerable", False, "Select", "(System.Collections.Generic.IEnumerable,System.Func)", "", "Argument[0].Element", "Argument[1].Parameter[0]", "value", "manual"] @@ -307,41 +308,7 @@ For the remaining values for both rows: That is, the first row specifies that values can flow from the elements of the qualifier enumerable into the first argument of the function provided to ``Select``. The second row specifies that values can flow from the return value of the function to the elements of the enumerable returned from ``Select``. -Example: Add a ``neutral`` method -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This example shows how we can model a method as being neutral with respect to flow. We will also cover how to model a property by modeling the getter of the ``Now`` property of the ``DateTime`` class as neutral. -A neutral model is used to define that there is no flow through a method. - -.. code-block:: csharp - - public static void TaintFlow() { - System.DateTime t = System.DateTime.Now; // There is no flow from Now to t. - ... - } - -We need to add a tuple to the ``neutralModel``\(namespace, type, name, signature, kind, provenance) extensible predicate by updating a data extension file. - -.. code-block:: yaml - - extensions: - - addsTo: - pack: codeql/csharp-all - extensible: neutralModel - data: - - ["System", "DateTime", "get_Now", "()", "summary", "manual"] - - -Since we are adding a neutral model, we need to add tuples to the ``neutralModel`` extensible predicate. -The first four values identify the callable (in this case the getter of the ``Now`` property) to be modeled as a neutral, the fifth value is the kind, and the sixth value is the provenance (origin) of the neutral. - -- The first value ``System`` is the namespace name. -- The second value ``DateTime`` is the class (type) name. -- The third value ``get_Now`` is the method name. Getter and setter methods are named ``get_`` and ``set_`` respectively. -- The fourth value ``()`` is the method input type signature. -- The fifth value ``summary`` is the kind of the neutral. -- The sixth value ``manual`` is the provenance of the neutral. - -.. _threat-models-csharp: +.. _threat-models-cpp: Threat models ------------- From 8a5b5d220a09ca3bec352f9894f0243dd0aa9467 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 31 May 2024 19:01:37 +0100 Subject: [PATCH 05/15] C++: Add source/sink test cases for Boost::Asio. --- .../source-sink-tests/asio_streams.cpp | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 cpp/ql/test/library-tests/dataflow/source-sink-tests/asio_streams.cpp diff --git a/cpp/ql/test/library-tests/dataflow/source-sink-tests/asio_streams.cpp b/cpp/ql/test/library-tests/dataflow/source-sink-tests/asio_streams.cpp new file mode 100644 index 00000000000..9813839091b --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/source-sink-tests/asio_streams.cpp @@ -0,0 +1,89 @@ + +// --- stub library headers --- + +namespace std { + typedef unsigned long size_t; + #define SIZE_MAX 0xFFFFFFFF + + template class allocator { + }; + + template struct char_traits { + }; + + template, class Allocator = allocator > + class basic_string { + public: + basic_string(const charT* s, const Allocator& a = Allocator()); + }; + + typedef basic_string string; +}; + +namespace boost { + namespace system { + class error_code { + public: + operator bool() const; + }; + }; + + namespace asio { + template + class basic_stream_socket /*: public basic_socket*/ { + }; + + namespace ip { + class tcp { + public: + typedef basic_stream_socket socket; + }; + }; + + template> class basic_streambuf { + public: + basic_streambuf( + std::size_t maximum_size = SIZE_MAX, + const Allocator &allocator = Allocator()); + }; + + typedef basic_streambuf<> streambuf; + + class mutable_buffer { + }; + + template + mutable_buffer buffer(std::basic_string & data); + + template std::size_t read_until( + SyncReadStream &s, + asio::basic_streambuf &b, + char delim, + boost::system::error_code &ec); + + template std::size_t write( + SyncWriteStream &s, + const ConstBufferSequence &buffers, + boost::system::error_code &ec, + int constraint = 0); // simplified + }; +}; + +// --- test code --- + +void test(boost::asio::ip::tcp::socket &socket) { + boost::asio::streambuf recv_buffer; + boost::system::error_code error; + + boost::asio::read_until(socket, recv_buffer, '\0', error); // $ MISSING: remote_source + if (error) { + // ... + } + + std::string send_str = std::string("message"); + boost::asio::mutable_buffer send_buffer = boost::asio::buffer(send_str); + boost::asio::write(socket, send_buffer, error); // $ MISSING: remote_sink + if (error) { + // ... + } +} From 7e5b7346c0afa0e58afde62ccd0fa307152f2839 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:41:23 +0100 Subject: [PATCH 06/15] C++: Add flow test cases for Boost::Asio. --- .../dataflow/external-models/asio_streams.cpp | 107 ++++++++++++++++++ .../dataflow/external-models/flow.ql | 15 ++- 2 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 cpp/ql/test/library-tests/dataflow/external-models/asio_streams.cpp diff --git a/cpp/ql/test/library-tests/dataflow/external-models/asio_streams.cpp b/cpp/ql/test/library-tests/dataflow/external-models/asio_streams.cpp new file mode 100644 index 00000000000..645691dba9d --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/asio_streams.cpp @@ -0,0 +1,107 @@ + +// --- stub library headers --- + +namespace std { + typedef unsigned long size_t; + #define SIZE_MAX 0xFFFFFFFF + + template class allocator { + }; + + template struct char_traits { + }; + + template, class Allocator = allocator > + class basic_string { + public: + basic_string(const charT* s, const Allocator& a = Allocator()); + }; + + typedef basic_string string; +}; + +namespace boost { + namespace system { + class error_code { + public: + operator bool() const; + }; + }; + + namespace asio { + template + class basic_stream_socket /*: public basic_socket*/ { + }; + + namespace ip { + class tcp { + public: + typedef basic_stream_socket socket; + }; + }; + + template> class basic_streambuf { + public: + basic_streambuf( + std::size_t maximum_size = SIZE_MAX, + const Allocator &allocator = Allocator()); + }; + + typedef basic_streambuf<> streambuf; + + class mutable_buffer { + }; + + template + mutable_buffer buffer(std::basic_string & data); + + template std::size_t read_until( + SyncReadStream &s, + asio::basic_streambuf &b, + char delim, + boost::system::error_code &ec); + + template std::size_t write( + SyncWriteStream &s, + const ConstBufferSequence &buffers, + boost::system::error_code &ec, + int constraint = 0); // simplified + }; +}; + +// --- test code --- + +char *source(); +void sink(char *); +void sink(std::string); +void sink(boost::asio::streambuf); +void sink(boost::asio::mutable_buffer); + +char *getenv(const char *name); +int send(int, const void*, int, int); + +void test(boost::asio::ip::tcp::socket &socket) { + boost::asio::streambuf recv_buffer; + boost::system::error_code error; + + boost::asio::read_until(socket, recv_buffer, '\0', error); + if (error) { + // ... + } + sink(recv_buffer); // $ MISSING: ir + + boost::asio::write(socket, recv_buffer, error); // $ MISSING: ir + + // --- + + std::string send_str = std::string(source()); + sink(send_str); // $ ir + + boost::asio::mutable_buffer send_buffer = boost::asio::buffer(send_str); + sink(send_buffer); // $ MISSING: ir + + boost::asio::write(socket, send_buffer, error); // $ MISSING: ir + if (error) { + // ... + } +} diff --git a/cpp/ql/test/library-tests/dataflow/external-models/flow.ql b/cpp/ql/test/library-tests/dataflow/external-models/flow.ql index 36e858b8abd..99338d20558 100644 --- a/cpp/ql/test/library-tests/dataflow/external-models/flow.ql +++ b/cpp/ql/test/library-tests/dataflow/external-models/flow.ql @@ -1,7 +1,7 @@ import TestUtilities.dataflow.FlowTestCommon import cpp import semmle.code.cpp.ir.dataflow.DataFlow -import semmle.code.cpp.dataflow.ExternalFlow +import semmle.code.cpp.security.FlowSources module IRTest { private import semmle.code.cpp.ir.IR @@ -10,11 +10,22 @@ module IRTest { /** Common data flow configuration to be used by tests. */ module TestAllocationConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node source) { + // external flow source node sourceNode(source, _) + or + // test source function + source.asExpr().(FunctionCall).getTarget().getName() = "source" } predicate isSink(DataFlow::Node sink) { - sinkNode(sink, "test-sink") + // external flow sink node + sinkNode(sink, _) + or + // test sink function + exists(FunctionCall call | + call.getTarget().getName() = "sink" and + sink.asExpr() = call.getAnArgument() + ) } } From f479649727b1cc4b78de4d187e86637b22faf964 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 3 Jun 2024 18:37:35 +0100 Subject: [PATCH 07/15] C++: Add Boost::Asio models. --- cpp/ql/lib/ext/Boost.Asio.model.yml | 26 +++++++++++++++++++ .../dataflow/external-models/asio_streams.cpp | 8 +++--- .../dataflow/external-models/sinks.expected | 2 ++ .../dataflow/external-models/sources.expected | 1 + .../dataflow/external-models/steps.expected | 1 + .../external-models/validatemodels.expected | 3 +++ .../source-sink-tests/asio_streams.cpp | 4 +-- 7 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 cpp/ql/lib/ext/Boost.Asio.model.yml diff --git a/cpp/ql/lib/ext/Boost.Asio.model.yml b/cpp/ql/lib/ext/Boost.Asio.model.yml new file mode 100644 index 00000000000..652fd65595d --- /dev/null +++ b/cpp/ql/lib/ext/Boost.Asio.model.yml @@ -0,0 +1,26 @@ +extensions: + # partial model of the Boost::Asio network library +extensions: + - addsTo: + pack: codeql/cpp-all + extensible: sourceModel + data: # namespace, type, subtypes, name, signature, ext, output, kind, provenance + - ["boost::asio", "", False, "read", "", "", "Argument[1*]", "remote", "manual"] + - ["boost::asio", "", False, "read_at", "", "", "Argument[*2]", "remote", "manual"] + - ["boost::asio", "", False, "read_until", "", "", "Argument[*1]", "remote", "manual"] + - ["boost::asio", "", False, "async_read", "", "", "Argument[1*]", "remote", "manual"] + - ["boost::asio", "", False, "async_read_at", "", "", "Argument[*2]", "remote", "manual"] + - ["boost::asio", "", False, "async_read_until", "", "", "Argument[*1]", "remote", "manual"] + - addsTo: + pack: codeql/cpp-all + extensible: sinkModel + data: # namespace, type, subtypes, name, signature, ext, input, kind, provenance + - ["boost::asio", "", False, "write", "", "", "Argument[*1]", "remote-sink", "manual"] + - ["boost::asio", "", False, "write_at", "", "", "Argument[*2]", "remote-sink", "manual"] + - ["boost::asio", "", False, "async_write", "", "", "Argument[*1]", "remote-sink", "manual"] + - ["boost::asio", "", False, "async_write_at", "", "", "Argument[*2]", "remote-sink", "manual"] + - addsTo: + pack: codeql/cpp-all + extensible: summaryModel + data: # namespace, type, subtypes, name, signature, ext, input, output, kind, provenance + - ["boost::asio", "", False, "buffer", "", "", "Argument[*0]", "ReturnValue", "taint", "manual"] diff --git a/cpp/ql/test/library-tests/dataflow/external-models/asio_streams.cpp b/cpp/ql/test/library-tests/dataflow/external-models/asio_streams.cpp index 645691dba9d..401091122b8 100644 --- a/cpp/ql/test/library-tests/dataflow/external-models/asio_streams.cpp +++ b/cpp/ql/test/library-tests/dataflow/external-models/asio_streams.cpp @@ -88,9 +88,9 @@ void test(boost::asio::ip::tcp::socket &socket) { if (error) { // ... } - sink(recv_buffer); // $ MISSING: ir + sink(recv_buffer); // $ ir - boost::asio::write(socket, recv_buffer, error); // $ MISSING: ir + boost::asio::write(socket, recv_buffer, error); // $ ir // --- @@ -98,9 +98,9 @@ void test(boost::asio::ip::tcp::socket &socket) { sink(send_str); // $ ir boost::asio::mutable_buffer send_buffer = boost::asio::buffer(send_str); - sink(send_buffer); // $ MISSING: ir + sink(send_buffer); // $ ir - boost::asio::write(socket, send_buffer, error); // $ MISSING: ir + boost::asio::write(socket, send_buffer, error); // $ ir if (error) { // ... } diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sinks.expected b/cpp/ql/test/library-tests/dataflow/external-models/sinks.expected index bce01f9259b..392c0bc03c1 100644 --- a/cpp/ql/test/library-tests/dataflow/external-models/sinks.expected +++ b/cpp/ql/test/library-tests/dataflow/external-models/sinks.expected @@ -1,3 +1,5 @@ +| asio_streams.cpp:93:29:93:39 | *recv_buffer | remote-sink | +| asio_streams.cpp:103:29:103:39 | *send_buffer | remote-sink | | test.cpp:9:10:9:10 | 0 | test-sink | | test.cpp:11:10:11:10 | x | test-sink | | test.cpp:15:10:15:10 | y | test-sink | diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sources.expected b/cpp/ql/test/library-tests/dataflow/external-models/sources.expected index 35c33437a57..aa85e74fc03 100644 --- a/cpp/ql/test/library-tests/dataflow/external-models/sources.expected +++ b/cpp/ql/test/library-tests/dataflow/external-models/sources.expected @@ -1 +1,2 @@ +| asio_streams.cpp:87:34:87:44 | read_until output argument | remote | | test.cpp:7:10:7:18 | call to ymlSource | local | diff --git a/cpp/ql/test/library-tests/dataflow/external-models/steps.expected b/cpp/ql/test/library-tests/dataflow/external-models/steps.expected index 6815274189b..2bc7fb6b49a 100644 --- a/cpp/ql/test/library-tests/dataflow/external-models/steps.expected +++ b/cpp/ql/test/library-tests/dataflow/external-models/steps.expected @@ -1 +1,2 @@ +| asio_streams.cpp:100:64:100:71 | *send_str | asio_streams.cpp:100:44:100:62 | call to buffer | | test.cpp:13:18:13:18 | x | test.cpp:13:10:13:16 | call to ymlStep | diff --git a/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected index 6e8576979d2..9ad0e33d3c0 100644 --- a/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected +++ b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected @@ -1,6 +1,9 @@ | Dubious namespace "" in sink model. | | Dubious namespace "" in source model. | | Dubious namespace "" in summary model. | +| Dubious namespace "boost::asio" in sink model. | +| Dubious namespace "boost::asio" in source model. | +| Dubious namespace "boost::asio" in summary model. | | Dubious type "" in sink model. | | Dubious type "" in source model. | | Dubious type "" in summary model. | diff --git a/cpp/ql/test/library-tests/dataflow/source-sink-tests/asio_streams.cpp b/cpp/ql/test/library-tests/dataflow/source-sink-tests/asio_streams.cpp index 9813839091b..bbcf41b0e36 100644 --- a/cpp/ql/test/library-tests/dataflow/source-sink-tests/asio_streams.cpp +++ b/cpp/ql/test/library-tests/dataflow/source-sink-tests/asio_streams.cpp @@ -75,14 +75,14 @@ void test(boost::asio::ip::tcp::socket &socket) { boost::asio::streambuf recv_buffer; boost::system::error_code error; - boost::asio::read_until(socket, recv_buffer, '\0', error); // $ MISSING: remote_source + boost::asio::read_until(socket, recv_buffer, '\0', error); // $ remote_source if (error) { // ... } std::string send_str = std::string("message"); boost::asio::mutable_buffer send_buffer = boost::asio::buffer(send_str); - boost::asio::write(socket, send_buffer, error); // $ MISSING: remote_sink + boost::asio::write(socket, send_buffer, error); // $ remote_sink if (error) { // ... } From 79e9198b2013acecc1af1eb714efdfdec7e829c0 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Tue, 4 Jun 2024 08:55:49 +0100 Subject: [PATCH 08/15] C++: An empty models-as-data namespace / type is not 'dubious' in CPP. --- cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll | 4 ++-- .../dataflow/external-models/validatemodels.expected | 6 ------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll index 1266661b913..f0b3006b779 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll @@ -330,10 +330,10 @@ module CsvValidation { or summaryModel(namespace, type, _, name, signature, ext, _, _, _, _) and pred = "summary" | - not namespace.regexpMatch("[a-zA-Z0-9_\\.]+") and + not namespace.regexpMatch("[a-zA-Z0-9_\\.]*") and result = "Dubious namespace \"" + namespace + "\" in " + pred + " model." or - not type.regexpMatch("[a-zA-Z0-9_<>,\\+]+") and + not type.regexpMatch("[a-zA-Z0-9_<>,\\+]*") and result = "Dubious type \"" + type + "\" in " + pred + " model." or not name.regexpMatch("[a-zA-Z0-9_<>,]*") and diff --git a/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected index 9ad0e33d3c0..f40618dc235 100644 --- a/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected +++ b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected @@ -1,10 +1,4 @@ -| Dubious namespace "" in sink model. | -| Dubious namespace "" in source model. | -| Dubious namespace "" in summary model. | | Dubious namespace "boost::asio" in sink model. | | Dubious namespace "boost::asio" in source model. | | Dubious namespace "boost::asio" in summary model. | -| Dubious type "" in sink model. | -| Dubious type "" in source model. | -| Dubious type "" in summary model. | | Invalid kind "remote-sink" in sink model. | From 9c2b4c9446c6e9d54611c19a322a795bc9fd9aef Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Tue, 4 Jun 2024 10:20:35 +0100 Subject: [PATCH 09/15] C++: Update the doc examples for C/C++. --- .../customizing-library-models-for-cpp.rst | 272 +++++------------- 1 file changed, 70 insertions(+), 202 deletions(-) diff --git a/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst b/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst index 3bd6d61f3b9..77372f4a1e7 100644 --- a/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst +++ b/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst @@ -61,64 +61,19 @@ The CodeQL library for CPP analysis exposes the following extensible predicates: The extensible predicates are populated using the models defined in data extension files. -Examples of custom model definitions +Example of custom model definitions ------------------------------------ -TODO: one good example might do, but we currently have zero. - The examples in this section are taken from the standard CodeQL CPP query pack published by GitHub. They demonstrate how to add tuples to extend extensible predicates that are used by the standard queries. -Example: Taint sink in the ``System.Data.SqlClient`` namespace -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Example: Taint source from the ``boost::asio`` namespace +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This example shows how the CPP query pack models the argument of the ``SqlCommand`` constructor as a SQL injection sink. -This is the constructor of the ``SqlCommand`` class, which is located in the ``System.Data.SqlClient`` namespace. +This example shows how the CPP query pack models the return value from the ``read_until`` function as a ``remote`` source. -.. code-block:: csharp TODO +.. code-block:: cpp - public static void TaintSink(SqlConnection conn, string query) { - SqlCommand command = new SqlCommand(query, connection) // The argument to this method is a SQL injection sink. - ... - } - -We need to add a tuple to the ``sinkModel``\(namespace, type, subtypes, name, signature, ext, input, kind, provenance) extensible predicate by updating a data extension file. - -.. code-block:: yaml - - extensions: - - addsTo: - pack: codeql/cpp-all - extensible: sinkModel - data: - - ["System.Data.SqlClient", "SqlCommand", False, "SqlCommand", "(System.String,System.Data.SqlClient.SqlConnection)", "", "Argument[0]", "sql-injection", "manual"] - -Since we want to add a new sink, we need to add a tuple to the ``sinkModel`` extensible predicate. -The first five values identify the callable (in this case a method) to be modeled as a sink. - -- The first value ``System.Data.SqlClient`` is the namespace name. -- The second value ``SqlCommand`` is the name of the class (type) that contains the method. -- The third value ``False`` is a flag that indicates whether or not the sink also applies to all overrides of the method. -- The fourth value ``SqlCommand`` is the method name. Constructors are named after the class. -- The fifth value ``(System.String,System.Data.SqlClient.SqlConnection)`` is the method input type signature. The type names must be fully qualified. - -The sixth value should be left empty and is out of scope for this documentation. -The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the sink. - -- The seventh value ``Argument[0]`` is the ``access path`` to the first argument passed to the method, which means that this is the location of the sink. -- The eighth value ``sql-injection`` is the kind of the sink. The sink kind is used to define the queries where the sink is in scope. In this case - the SQL injection queries. -- The ninth value ``manual`` is the provenance of the sink, which is used to identify the origin of the sink. - -Example: Taint source from the ``System.Net.Sockets`` namespace -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This example shows how the CPP query pack models the return value from the ``GetStream`` method as a ``remote`` source. -This is the ``GetStream`` method in the ``TcpClient`` class, which is located in the ``System.Net.Sockets`` namespace. - -.. code-block:: csharp TODO - - public static void Tainted(TcpClient client) { - NetworkStream stream = client.GetStream(); // The return value of this method is a remote source of taint. - ... - } + boost::asio::read_until(socket, recv_buffer, '\0', error); We need to add a tuple to the ``sourceModel``\(namespace, type, subtypes, name, signature, ext, output, kind, provenance) extensible predicate by updating a data extension file. @@ -129,36 +84,68 @@ We need to add a tuple to the ``sourceModel``\(namespace, type, subtypes, name, pack: codeql/cpp-all extensible: sourceModel data: - - ["System.Net.Sockets", "TcpClient", False, "GetStream", "()", "", "ReturnValue", "remote", "manual"] - + - ["boost::asio", "", False, "read_until", "", "", "Argument[*1]", "remote", "manual"] Since we are adding a new source, we need to add a tuple to the ``sourceModel`` extensible predicate. -The first five values identify the callable (in this case a method) to be modeled as a source. +The first five values identify the callable (in this case a free function) to be modeled as a source. -- The first value ``System.Net.Sockets`` is the namespace name. -- The second value ``TcpClient`` is the name of the class (type) that contains the source. -- The third value ``False`` is a flag that indicates whether or not the source also applies to all overrides of the method. -- The fourth value ``GetStream`` is the method name. -- The fifth value ``()`` is the method input type signature. +- The first value ``"boost::asio"`` is the namespace name. +- The second value ``""`` is the name of the type (class) that contains the method. Because we're modelling a free function, the type is left blank. +- The third value ``False`` is a flag that indicates whether or not the sink also applies to all overrides of the method. For a free function, this should be ``False``. +- The fourth value ``"read_until"`` is the function name. +- The fifth value is the function input type signature, which can be used to narrow down between functions that have the same name. In this case, we want the model to include all functions in ``boost::asio`` called ``read_until``. The sixth value should be left empty and is out of scope for this documentation. The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the source. -- The seventh value ``ReturnValue`` is the access path to the return of the method, which means that it is the return value that should be considered a source of tainted input. -- The eighth value ``remote`` is the kind of the source. The source kind is used to define the threat model where the source is in scope. ``remote`` applies to many of the security related queries as it means a remote source of untrusted data. As an example the SQL injection query uses ``remote`` sources. For more information, see ":ref:`Threat models `." -- The ninth value ``manual`` is the provenance of the source, which is used to identify the origin of the source. +- The seventh value ``"Argument[*1]"`` is the ``access path``, which means that the sink is the first indirection (or pointed-to value, ``*``) of the second argument (``Argument[1]``) passed to the function. +- The eighth value ``"remote"`` is the kind of the source. The source kind is used to define the threat model where the source is in scope. ``remote`` applies to many of the security related queries as it means a remote source of untrusted data. For more information, see ":ref:`Threat models `." +- The ninth value ``"manual"`` is the provenance of the source, which is used to identify the origin of the source. -Example: Add flow through the ``Concat`` method -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This example shows how the CPP query pack models flow through a method for a simple case. -This pattern covers many of the cases where we need to summarize flow through a method that is stored in a library or framework outside the repository. +Example: Taint sink in the ``boost::asio`` namespace +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. code-block:: cpp TODO +This example shows how the CPP query pack models the second argument of the ``boost::asio::write`` function as a remote flow sink. A remote flow sink is where data is transmitted to other machines across a network, which is used for example by the "Cleartext transmission of sensitive information" (`cpp/cleartext-transmission`) query. - public static void TaintFlow(string s1, string s2) { - string t = String.Concat(s1, s2); // There is taint flow from s1 and s2 to t. - ... - } +.. code-block:: cpp + + boost::asio::write(socket, send_buffer, error); + +We need to add a tuple to the ``sinkModel``\(namespace, type, subtypes, name, signature, ext, input, kind, provenance) extensible predicate by updating a data extension file. + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/cpp-all + extensible: sinkModel + data: + - ["boost::asio", "", False, "write", "", "", "Argument[*1]", "remote-sink", "manual"] + +Since we want to add a new sink, we need to add a tuple to the ``sinkModel`` extensible predicate. +The first five values identify the callable (in this case a free function) to be modeled as a sink. + +- The first value ``"boost::asio"`` is the namespace name. +- The second value ``""`` is the name of the type (class) that contains the method. Because we're modelling a free function, the type is left blank. +- The third value ``False`` is a flag that indicates whether or not the sink also applies to all overrides of the method. For a free function, this should be ``False``. +- The fourth value ``"write"`` is the function name. +- The fifth value is the function input type signature, which can be used to narrow down between functions that have the same name. In this case, we want the model to include all functions in ``boost::asio`` called ``write``. + +The sixth value should be left empty and is out of scope for this documentation. +The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the sink. + +- The seventh value ``"Argument[*1]"`` is the ``access path``, which means that the sink is the first indirection (or pointed-to value, ``*``) of the second argument (``Argument[1]``) passed to the function. +- The eighth value ``"remote-sink"`` is the kind of the sink. The sink kind is used to define the queries where the sink is in scope. +- The ninth value ``"manual"`` is the provenance of the sink, which is used to identify the origin of the sink. + +Example: Add flow through the ``boost::asio::buffer`` method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This example shows how the CPP query pack models flow through a function for a simple case. + +.. code-block:: cpp + + boost::asio::write(socket, boost::asio::buffer(send_str), error); We need to add tuples to the ``summaryModel``\(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) extensible predicate by updating a data extension file: @@ -169,144 +156,25 @@ We need to add tuples to the ``summaryModel``\(namespace, type, subtypes, name, pack: codeql/cpp-all extensible: summaryModel data: - - ["System", "String", False, "Concat", "(System.Object,System.Object)", "", "Argument[0]", "ReturnValue", "taint", "manual"] - - ["System", "String", False, "Concat", "(System.Object,System.Object)", "", "Argument[1]", "ReturnValue", "taint", "manual"] + - ["boost::asio", "", False, "buffer", "", "", "Argument[*0]", "ReturnValue", "taint", "manual"] -Since we are adding flow through a method, we need to add tuples to the ``summaryModel`` extensible predicate. -Each tuple defines flow from one argument to the return value. -The first row defines flow from the first argument (``s1`` in the example) to the return value (``t`` in the example) and the second row defines flow from the second argument (``s2`` in the example) to the return value (``t`` in the example). +Since we are adding flow through a function, we need to add tuples to the ``summaryModel`` extensible predicate. -The first five values identify the callable (in this case a method) to be modeled as a summary. -These are the same for both of the rows above as we are adding two summaries for the same method. +The first five values identify the callable (in this case free function) to be modeled as a summary. -- The first value ``System`` is the namespace name. -- The second value ``String`` is the class (type) name. -- The third value ``False`` is a flag that indicates whether or not the summary also applies to all overrides of the method. -- The fourth value ``Concat`` is the method name. -- The fifth value ``(System.Object,System.Object)`` is the method input type signature. +- The first value ``"boost::asio"`` is the namespace name. +- The second value ``""`` is the name of the type (class) that contains the method. Because we're modelling a free function, the type is left blank. +- The third value ``False`` is a flag that indicates whether or not the sink also applies to all overrides of the method. For a free function, this should be ``False``. +- The fourth value ``"buffer"`` is the function name. +- The fifth value is the function input type signature, which can be used to narrow down between functions that have the same name. In this case, we want the model to include all functions in ``boost::asio`` called ``buffer``. The sixth value should be left empty and is out of scope for this documentation. The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the summary. -- The seventh value is the access path to the input (where data flows from). ``Argument[0]`` is the access path to the first argument (``s1`` in the example) and ``Argument[1]`` is the access path to the second argument (``s2`` in the example). -- The eighth value ``ReturnValue`` is the access path to the output (where data flows to), in this case ``ReturnValue``, which means that the input flows to the return value. -- The ninth value ``taint`` is the kind of the flow. ``taint`` means that taint is propagated through the call. -- The tenth value ``manual`` is the provenance of the summary, which is used to identify the origin of the summary. - -It would also be possible to merge the two rows into one by using a comma-separated list in the seventh value. This would be useful if the method has many arguments and the flow is the same for all of them. - -.. code-block:: yaml - - extensions: - - addsTo: - pack: codeql/cpp-all - extensible: summaryModel - data: - - ["System", "String", False, "Concat", "(System.Object,System.Object)", "", "Argument[0,1]", "ReturnValue", "taint", "manual"] - -This row defines flow from both the first and the second argument to the return value. The seventh value ``Argument[0,1]`` is shorthand for specifying an access path to both ``Argument[0]`` and ``Argument[1]``. - -Example: Add flow through the ``Trim`` method -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This example shows how the CPP query pack models flow through a method for a simple case. - -.. code-block:: cpp TODO - - public static void TaintFlow(string s) { - string t = s.Trim(); // There is taint flow from s to t. - ... - } - -We need to add a tuple to the ``summaryModel``\(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) extensible predicate by updating a data extension file: - -.. code-block:: yaml - - extensions: - - addsTo: - pack: codeql/cpp-all - extensible: summaryModel - data: - - ["System", "String", False, "Trim", "()", "", "Argument[this]", "ReturnValue", "taint", "manual"] - -Since we are adding flow through a method, we need to add tuples to the ``summaryModel`` extensible predicate. -Each tuple defines flow from one argument to the return value. -The first row defines flow from the qualifier of the method call (``s1`` in the example) to the return value (``t`` in the example). - -The first five values identify the callable (in this case a method) to be modeled as a summary. -These are the same for both of the rows above as we are adding two summaries for the same method. - -- The first value ``System`` is the namespace name. -- The second value ``String`` is the class (type) name. -- The third value ``False`` is a flag that indicates whether or not the summary also applies to all overrides of the method. -- The fourth value ``Trim`` is the method name. -- The fifth value ``()`` is the method input type signature. - -The sixth value should be left empty and is out of scope for this documentation. -The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the summary. - -- The seventh value is the access path to the input (where data flows from). ``Argument[this]`` is the access path to the qualifier (``s`` in the example). -- The eighth value ``ReturnValue`` is the access path to the output (where data flows to), in this case ``ReturnValue``, which means that the input flows to the return value. -- The ninth value ``taint`` is the kind of the flow. ``taint`` means that taint is propagated through the call. -- The tenth value ``manual`` is the provenance of the summary, which is used to identify the origin of the summary. - -Example: Add flow through the ``Select`` method -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This example shows how the CPP query pack models a more complex flow through a method. -Here we model flow through higher order methods and collection types, as well as how to handle extension methods and generics. - -.. code-block:: cpp TODO - - public static void TaintFlow(IEnumerable stream) { - IEnumerable lines = stream.Select(item => item + "\n"); - ... - } - -We need to add tuples to the ``summaryModel``\(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) extensible predicate by updating a data extension file: - -.. code-block:: yaml - - extensions: - - addsTo: - pack: codeql/cpp-all - extensible: summaryModel - data: - - ["System.Linq", "Enumerable", False, "Select", "(System.Collections.Generic.IEnumerable,System.Func)", "", "Argument[0].Element", "Argument[1].Parameter[0]", "value", "manual"] - - ["System.Linq", "Enumerable", False, "Select", "(System.Collections.Generic.IEnumerable,System.Func)", "", "Argument[1].ReturnValue", "ReturnValue.Element", "value", "manual"] - - -Since we are adding flow through a method, we need to add tuples to the ``summaryModel`` extensible predicate. -Each tuple defines part of the flow that comprises the total flow through the ``Select`` method. -The first five values identify the callable (in this case a method) to be modeled as a summary. -These are the same for both of the rows above as we are adding two summaries for the same method. - -- The first value ``System.Linq`` is the namespace name. -- The second value ``Enumerable`` is the class (type) name. -- The third value ``False`` is a flag that indicates whether or not the summary also applies to all overrides of the method. -- The fourth value ``Select`` is the method name, along with the type parameters for the method. The names of the generic type parameters provided in the model must match the names of the generic type parameters in the method signature in the source code. -- The fifth value ``(System.Collections.Generic.IEnumerable,System.Func)`` is the method input type signature. The generics in the signature must match the generics in the method signature in the source code. - -The sixth value should be left empty and is out of scope for this documentation. -The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the summary definition. - -- The seventh value is the access path to the ``input`` (where data flows from). -- The eighth value is the access path to the ``output`` (where data flows to). - -For the first row: - -- The seventh value is ``Argument[0].Element``, which is the access path to the elements of the qualifier (the elements of the enumerable ``stream`` in the example). -- The eight value is ``Argument[1].Parameter[0]``, which is the access path to the first parameter of the ``System.Func`` argument of ``Select`` (the lambda parameter ``item`` in the example). - -For the second row: - -- The seventh value is ``Argument[1].ReturnValue``, which is the access path to the return value of the ``System.Func`` argument of ``Select`` (the return value of the lambda in the example). -- The eighth value is ``ReturnValue.Element``, which is the access path to the elements of the return value of ``Select`` (the elements of the enumerable ``lines`` in the example). - -For the remaining values for both rows: - -- The ninth value ``value`` is the kind of the flow. ``value`` means that the value is preserved. -- The tenth value ``manual`` is the provenance of the summary, which is used to identify the origin of the summary. - -That is, the first row specifies that values can flow from the elements of the qualifier enumerable into the first argument of the function provided to ``Select``. The second row specifies that values can flow from the return value of the function to the elements of the enumerable returned from ``Select``. +- The seventh value is the access path to the input (where data flows from). ``Argument[*0]`` is the access path to the first indirection (or pointed-to value, ``*``) of the first argument (``Argument[0]``) passed to the function. +- The eighth value ``"ReturnValue"`` is the access path to the output (where data flows to), in this case the return value. +- The ninth value ``"taint"`` is the kind of the flow. ``taint`` means that taint is propagated through the call. +- The tenth value ``"manual"`` is the provenance of the summary, which is used to identify the origin of the summary. .. _threat-models-cpp: From 38c47a4b11612f1df30e8d35804e705a11d208a5 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Tue, 4 Jun 2024 10:47:25 +0100 Subject: [PATCH 10/15] C++: Add change notes. --- cpp/ql/lib/change-notes/2024-06-14-boost-asio.md | 4 ++++ .../change-notes/2024-06-14-models-as-data-yml-extensions.md | 4 ++++ 2 files changed, 8 insertions(+) create mode 100644 cpp/ql/lib/change-notes/2024-06-14-boost-asio.md create mode 100644 cpp/ql/lib/change-notes/2024-06-14-models-as-data-yml-extensions.md diff --git a/cpp/ql/lib/change-notes/2024-06-14-boost-asio.md b/cpp/ql/lib/change-notes/2024-06-14-boost-asio.md new file mode 100644 index 00000000000..b62e9e4d4cc --- /dev/null +++ b/cpp/ql/lib/change-notes/2024-06-14-boost-asio.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* A partial model for the `Boost.Asio` network library has been added. This includes sources, sinks and summaries for certain functions in `Boost.Asio`, such as `read_until` and `write`. diff --git a/cpp/ql/lib/change-notes/2024-06-14-models-as-data-yml-extensions.md b/cpp/ql/lib/change-notes/2024-06-14-models-as-data-yml-extensions.md new file mode 100644 index 00000000000..7229959f92a --- /dev/null +++ b/cpp/ql/lib/change-notes/2024-06-14-models-as-data-yml-extensions.md @@ -0,0 +1,4 @@ +--- +category: feature +--- +* Data models can now be added with data extensions. In this way source, sink and summary models can be added in extension `.model.yml` files, rather than by writing classes in QL code. New models should be added in the `lib/ext` folder. From f9ef72eca60e80cad6326f8b5458fb75d5ad013a Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Thu, 6 Jun 2024 12:31:44 +0100 Subject: [PATCH 11/15] C++: Fix typos. --- cpp/ql/lib/ext/Boost.Asio.model.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/ql/lib/ext/Boost.Asio.model.yml b/cpp/ql/lib/ext/Boost.Asio.model.yml index 652fd65595d..436d36fdfc2 100644 --- a/cpp/ql/lib/ext/Boost.Asio.model.yml +++ b/cpp/ql/lib/ext/Boost.Asio.model.yml @@ -5,10 +5,10 @@ extensions: pack: codeql/cpp-all extensible: sourceModel data: # namespace, type, subtypes, name, signature, ext, output, kind, provenance - - ["boost::asio", "", False, "read", "", "", "Argument[1*]", "remote", "manual"] + - ["boost::asio", "", False, "read", "", "", "Argument[*1]", "remote", "manual"] - ["boost::asio", "", False, "read_at", "", "", "Argument[*2]", "remote", "manual"] - ["boost::asio", "", False, "read_until", "", "", "Argument[*1]", "remote", "manual"] - - ["boost::asio", "", False, "async_read", "", "", "Argument[1*]", "remote", "manual"] + - ["boost::asio", "", False, "async_read", "", "", "Argument[*1]", "remote", "manual"] - ["boost::asio", "", False, "async_read_at", "", "", "Argument[*2]", "remote", "manual"] - ["boost::asio", "", False, "async_read_until", "", "", "Argument[*1]", "remote", "manual"] - addsTo: From 894497218dfe066d12ddc993f3523c962c2cd35a Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Thu, 6 Jun 2024 12:49:13 +0100 Subject: [PATCH 12/15] Shared: Recognize 'remote-sink' in ModelValidation.qll. --- .../dataflow/external-models/validatemodels.expected | 1 - shared/mad/codeql/mad/ModelValidation.qll | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected index f40618dc235..c0215759ea4 100644 --- a/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected +++ b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected @@ -1,4 +1,3 @@ | Dubious namespace "boost::asio" in sink model. | | Dubious namespace "boost::asio" in source model. | | Dubious namespace "boost::asio" in summary model. | -| Invalid kind "remote-sink" in sink model. | diff --git a/shared/mad/codeql/mad/ModelValidation.qll b/shared/mad/codeql/mad/ModelValidation.qll index 20bcdd1908c..d403ecdb053 100644 --- a/shared/mad/codeql/mad/ModelValidation.qll +++ b/shared/mad/codeql/mad/ModelValidation.qll @@ -41,7 +41,9 @@ module KindValidation { "database-store", "format-string", "hash-iteration-count", "predicate-injection", "preferences-store", "tls-protocol-version", "transmission", "webview-fetch", "xxe", // Go-only currently, but may be shared in the future - "jwt" + "jwt", + // CPP-only currently + "remote-sink" ] or this.matches([ From 7aec488d8a58ba2c98a576664228608d5cb3ff32 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Thu, 6 Jun 2024 12:53:00 +0100 Subject: [PATCH 13/15] C++: Permit ':' in models-as-data namespaces. --- cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll | 2 +- .../dataflow/external-models/validatemodels.expected | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll index f0b3006b779..bb893dc74c0 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll @@ -330,7 +330,7 @@ module CsvValidation { or summaryModel(namespace, type, _, name, signature, ext, _, _, _, _) and pred = "summary" | - not namespace.regexpMatch("[a-zA-Z0-9_\\.]*") and + not namespace.regexpMatch("[a-zA-Z0-9_\\.:]*") and result = "Dubious namespace \"" + namespace + "\" in " + pred + " model." or not type.regexpMatch("[a-zA-Z0-9_<>,\\+]*") and diff --git a/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected index c0215759ea4..e69de29bb2d 100644 --- a/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected +++ b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected @@ -1,3 +0,0 @@ -| Dubious namespace "boost::asio" in sink model. | -| Dubious namespace "boost::asio" in source model. | -| Dubious namespace "boost::asio" in summary model. | From 38acfcf42ec973c04244f440f70f8bdbc31d8310 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Thu, 6 Jun 2024 13:07:27 +0100 Subject: [PATCH 14/15] C++: Doc tweaks addressing review comments. --- .../customizing-library-models-for-cpp.rst | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst b/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst index 77372f4a1e7..29e8be5a4ae 100644 --- a/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst +++ b/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst @@ -96,11 +96,11 @@ The first five values identify the callable (in this case a free function) to be - The fifth value is the function input type signature, which can be used to narrow down between functions that have the same name. In this case, we want the model to include all functions in ``boost::asio`` called ``read_until``. The sixth value should be left empty and is out of scope for this documentation. -The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the source. +The remaining values are used to define the output specification, the ``kind``, and the ``provenance`` (origin) of the source. -- The seventh value ``"Argument[*1]"`` is the ``access path``, which means that the sink is the first indirection (or pointed-to value, ``*``) of the second argument (``Argument[1]``) passed to the function. +- The seventh value ``"Argument[*1]"`` is the output specification, which means in this case that the sink is the first indirection (or pointed-to value, ``*``) of the second argument (``Argument[1]``) passed to the function. - The eighth value ``"remote"`` is the kind of the source. The source kind is used to define the threat model where the source is in scope. ``remote`` applies to many of the security related queries as it means a remote source of untrusted data. For more information, see ":ref:`Threat models `." -- The ninth value ``"manual"`` is the provenance of the source, which is used to identify the origin of the source. +- The ninth value ``"manual"`` is the provenance of the source, which is used to identify the origin of the source model. Example: Taint sink in the ``boost::asio`` namespace ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -132,11 +132,11 @@ The first five values identify the callable (in this case a free function) to be - The fifth value is the function input type signature, which can be used to narrow down between functions that have the same name. In this case, we want the model to include all functions in ``boost::asio`` called ``write``. The sixth value should be left empty and is out of scope for this documentation. -The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the sink. +The remaining values are used to define the output specification, the ``kind``, and the ``provenance`` (origin) of the sink. -- The seventh value ``"Argument[*1]"`` is the ``access path``, which means that the sink is the first indirection (or pointed-to value, ``*``) of the second argument (``Argument[1]``) passed to the function. +- The seventh value ``"Argument[*1]"`` is the output specification, which means in this case that the sink is the first indirection (or pointed-to value, ``*``) of the second argument (``Argument[1]``) passed to the function. - The eighth value ``"remote-sink"`` is the kind of the sink. The sink kind is used to define the queries where the sink is in scope. -- The ninth value ``"manual"`` is the provenance of the sink, which is used to identify the origin of the sink. +- The ninth value ``"manual"`` is the provenance of the sink, which is used to identify the origin of the sink model. Example: Add flow through the ``boost::asio::buffer`` method ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -169,12 +169,12 @@ The first five values identify the callable (in this case free function) to be m - The fifth value is the function input type signature, which can be used to narrow down between functions that have the same name. In this case, we want the model to include all functions in ``boost::asio`` called ``buffer``. The sixth value should be left empty and is out of scope for this documentation. -The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the summary. +The remaining values are used to define the input and output specifications, the ``kind``, and the ``provenance`` (origin) of the summary. -- The seventh value is the access path to the input (where data flows from). ``Argument[*0]`` is the access path to the first indirection (or pointed-to value, ``*``) of the first argument (``Argument[0]``) passed to the function. -- The eighth value ``"ReturnValue"`` is the access path to the output (where data flows to), in this case the return value. +- The seventh value is the input specification (where data flows from). ``Argument[*0]`` specifies the first indirection (or pointed-to value, ``*``) of the first argument (``Argument[0]``) passed to the function. +- The eighth value ``"ReturnValue"`` is the output specification (where data flows to), in this case the return value. - The ninth value ``"taint"`` is the kind of the flow. ``taint`` means that taint is propagated through the call. -- The tenth value ``"manual"`` is the provenance of the summary, which is used to identify the origin of the summary. +- The tenth value ``"manual"`` is the provenance of the summary, which is used to identify the origin of the summary model. .. _threat-models-cpp: From 8acb11924d53bd409a914da6b53529f70af63cb4 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Thu, 6 Jun 2024 13:22:38 +0100 Subject: [PATCH 15/15] C++: Remove redundant import. --- cpp/ql/test/library-tests/dataflow/external-models/flow.ql | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/ql/test/library-tests/dataflow/external-models/flow.ql b/cpp/ql/test/library-tests/dataflow/external-models/flow.ql index 99338d20558..d6c2a70c4d9 100644 --- a/cpp/ql/test/library-tests/dataflow/external-models/flow.ql +++ b/cpp/ql/test/library-tests/dataflow/external-models/flow.ql @@ -1,6 +1,5 @@ import TestUtilities.dataflow.FlowTestCommon import cpp -import semmle.code.cpp.ir.dataflow.DataFlow import semmle.code.cpp.security.FlowSources module IRTest {