diff --git a/cpp/ql/lib/change-notes/2024-06-14-boost-asio.md b/cpp/ql/lib/change-notes/2024-06-14-boost-asio.md new file mode 100644 index 00000000000..b62e9e4d4cc --- /dev/null +++ b/cpp/ql/lib/change-notes/2024-06-14-boost-asio.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* A partial model for the `Boost.Asio` network library has been added. This includes sources, sinks and summaries for certain functions in `Boost.Asio`, such as `read_until` and `write`. diff --git a/cpp/ql/lib/change-notes/2024-06-14-models-as-data-yml-extensions.md b/cpp/ql/lib/change-notes/2024-06-14-models-as-data-yml-extensions.md new file mode 100644 index 00000000000..7229959f92a --- /dev/null +++ b/cpp/ql/lib/change-notes/2024-06-14-models-as-data-yml-extensions.md @@ -0,0 +1,4 @@ +--- +category: feature +--- +* Data models can now be added with data extensions. In this way source, sink and summary models can be added in extension `.model.yml` files, rather than by writing classes in QL code. New models should be added in the `lib/ext` folder. diff --git a/cpp/ql/lib/ext/Boost.Asio.model.yml b/cpp/ql/lib/ext/Boost.Asio.model.yml new file mode 100644 index 00000000000..436d36fdfc2 --- /dev/null +++ b/cpp/ql/lib/ext/Boost.Asio.model.yml @@ -0,0 +1,26 @@ +extensions: + # partial model of the Boost::Asio network library +extensions: + - addsTo: + pack: codeql/cpp-all + extensible: sourceModel + data: # namespace, type, subtypes, name, signature, ext, output, kind, provenance + - ["boost::asio", "", False, "read", "", "", "Argument[*1]", "remote", "manual"] + - ["boost::asio", "", False, "read_at", "", "", "Argument[*2]", "remote", "manual"] + - ["boost::asio", "", False, "read_until", "", "", "Argument[*1]", "remote", "manual"] + - ["boost::asio", "", False, "async_read", "", "", "Argument[*1]", "remote", "manual"] + - ["boost::asio", "", False, "async_read_at", "", "", "Argument[*2]", "remote", "manual"] + - ["boost::asio", "", False, "async_read_until", "", "", "Argument[*1]", "remote", "manual"] + - addsTo: + pack: codeql/cpp-all + extensible: sinkModel + data: # namespace, type, subtypes, name, signature, ext, input, kind, provenance + - ["boost::asio", "", False, "write", "", "", "Argument[*1]", "remote-sink", "manual"] + - ["boost::asio", "", False, "write_at", "", "", "Argument[*2]", "remote-sink", "manual"] + - ["boost::asio", "", False, "async_write", "", "", "Argument[*1]", "remote-sink", "manual"] + - ["boost::asio", "", False, "async_write_at", "", "", "Argument[*2]", "remote-sink", "manual"] + - addsTo: + pack: codeql/cpp-all + extensible: summaryModel + data: # namespace, type, subtypes, name, signature, ext, input, output, kind, provenance + - ["boost::asio", "", False, "buffer", "", "", "Argument[*0]", "ReturnValue", "taint", "manual"] diff --git a/cpp/ql/lib/ext/empty.model.yml b/cpp/ql/lib/ext/empty.model.yml new file mode 100644 index 00000000000..6f160b62d7a --- /dev/null +++ b/cpp/ql/lib/ext/empty.model.yml @@ -0,0 +1,15 @@ +extensions: + # Make sure that the extensible model predicates have at least one definition + # to avoid errors about undefined extensionals. + - addsTo: + pack: codeql/cpp-all + extensible: sourceModel + data: [] + - addsTo: + pack: codeql/cpp-all + extensible: sinkModel + data: [] + - addsTo: + pack: codeql/cpp-all + extensible: summaryModel + data: [] diff --git a/cpp/ql/lib/qlpack.yml b/cpp/ql/lib/qlpack.yml index 4d378c49259..35aaae8593a 100644 --- a/cpp/ql/lib/qlpack.yml +++ b/cpp/ql/lib/qlpack.yml @@ -14,4 +14,6 @@ dependencies: codeql/tutorial: ${workspace} codeql/util: ${workspace} codeql/xml: ${workspace} +dataExtensions: + - ext/*.model.yml warnOnImplicitThis: true diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll index cbb212e50e2..d3d7d6b2442 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll @@ -78,6 +78,7 @@ private import internal.FlowSummaryImpl private import internal.FlowSummaryImpl::Public private import internal.FlowSummaryImpl::Private private import internal.FlowSummaryImpl::Private::External +private import internal.ExternalFlowExtensions as Extensions private import codeql.mad.ModelValidation as SharedModelVal private import codeql.util.Unit @@ -138,6 +139,9 @@ predicate sourceModel( row.splitAt(";", 7) = kind ) and provenance = "manual" + or + Extensions::sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance, + _) } /** Holds if a sink model exists for the given parameters. */ @@ -158,6 +162,8 @@ predicate sinkModel( row.splitAt(";", 7) = kind ) and provenance = "manual" + or + Extensions::sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance, _) } /** Holds if a summary model exists for the given parameters. */ @@ -179,6 +185,9 @@ predicate summaryModel( row.splitAt(";", 8) = kind ) and provenance = "manual" + or + Extensions::summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, + provenance, _) } private predicate relevantNamespace(string namespace) { @@ -323,10 +332,10 @@ module CsvValidation { or summaryModel(namespace, type, _, name, signature, ext, _, _, _, _) and pred = "summary" | - not namespace.regexpMatch("[a-zA-Z0-9_\\.]+") and + not namespace.regexpMatch("[a-zA-Z0-9_\\.:]*") and result = "Dubious namespace \"" + namespace + "\" in " + pred + " model." or - not type.regexpMatch("[a-zA-Z0-9_<>,\\+]+") and + not type.regexpMatch("[a-zA-Z0-9_<>,\\+]*") and result = "Dubious type \"" + type + "\" in " + pred + " model." or not name.regexpMatch("[a-zA-Z0-9_<>,]*") and diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll new file mode 100644 index 00000000000..cd1af34c8d8 --- /dev/null +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll @@ -0,0 +1,27 @@ +/** + * This module provides extensible predicates for defining MaD models. + */ + +/** + * Holds if an external source model exists for the given parameters. + */ +extensible predicate sourceModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string output, string kind, string provenance, QlBuiltins::ExtensionId madId +); + +/** + * Holds if an external sink model exists for the given parameters. + */ +extensible predicate sinkModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string kind, string provenance, QlBuiltins::ExtensionId madId +); + +/** + * Holds if an external summary model exists for the given parameters. + */ +extensible predicate summaryModel( + string namespace, string type, boolean subtypes, string name, string signature, string ext, + string input, string output, string kind, string provenance, QlBuiltins::ExtensionId madId +); diff --git a/cpp/ql/test/library-tests/dataflow/external-models/asio_streams.cpp b/cpp/ql/test/library-tests/dataflow/external-models/asio_streams.cpp new file mode 100644 index 00000000000..401091122b8 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/asio_streams.cpp @@ -0,0 +1,107 @@ + +// --- stub library headers --- + +namespace std { + typedef unsigned long size_t; + #define SIZE_MAX 0xFFFFFFFF + + template class allocator { + }; + + template struct char_traits { + }; + + template, class Allocator = allocator > + class basic_string { + public: + basic_string(const charT* s, const Allocator& a = Allocator()); + }; + + typedef basic_string string; +}; + +namespace boost { + namespace system { + class error_code { + public: + operator bool() const; + }; + }; + + namespace asio { + template + class basic_stream_socket /*: public basic_socket*/ { + }; + + namespace ip { + class tcp { + public: + typedef basic_stream_socket socket; + }; + }; + + template> class basic_streambuf { + public: + basic_streambuf( + std::size_t maximum_size = SIZE_MAX, + const Allocator &allocator = Allocator()); + }; + + typedef basic_streambuf<> streambuf; + + class mutable_buffer { + }; + + template + mutable_buffer buffer(std::basic_string & data); + + template std::size_t read_until( + SyncReadStream &s, + asio::basic_streambuf &b, + char delim, + boost::system::error_code &ec); + + template std::size_t write( + SyncWriteStream &s, + const ConstBufferSequence &buffers, + boost::system::error_code &ec, + int constraint = 0); // simplified + }; +}; + +// --- test code --- + +char *source(); +void sink(char *); +void sink(std::string); +void sink(boost::asio::streambuf); +void sink(boost::asio::mutable_buffer); + +char *getenv(const char *name); +int send(int, const void*, int, int); + +void test(boost::asio::ip::tcp::socket &socket) { + boost::asio::streambuf recv_buffer; + boost::system::error_code error; + + boost::asio::read_until(socket, recv_buffer, '\0', error); + if (error) { + // ... + } + sink(recv_buffer); // $ ir + + boost::asio::write(socket, recv_buffer, error); // $ ir + + // --- + + std::string send_str = std::string(source()); + sink(send_str); // $ ir + + boost::asio::mutable_buffer send_buffer = boost::asio::buffer(send_str); + sink(send_buffer); // $ ir + + boost::asio::write(socket, send_buffer, error); // $ ir + if (error) { + // ... + } +} diff --git a/cpp/ql/test/library-tests/dataflow/external-models/flow.expected b/cpp/ql/test/library-tests/dataflow/external-models/flow.expected new file mode 100644 index 00000000000..8ec8033d086 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/flow.expected @@ -0,0 +1,2 @@ +testFailures +failures diff --git a/cpp/ql/test/library-tests/dataflow/external-models/flow.ext.yml b/cpp/ql/test/library-tests/dataflow/external-models/flow.ext.yml new file mode 100644 index 00000000000..42ca51bc424 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/flow.ext.yml @@ -0,0 +1,16 @@ +extensions: + - addsTo: + pack: codeql/cpp-all + extensible: sourceModel + data: # namespace, type, subtypes, name, signature, ext, output, kind, provenance + - ["", "", False, "ymlSource", "", "", "ReturnValue", "local", "manual"] + - addsTo: + pack: codeql/cpp-all + extensible: sinkModel + data: # namespace, type, subtypes, name, signature, ext, input, kind, provenance + - ["", "", False, "ymlSink", "", "", "Argument[0]", "test-sink", "manual"] + - addsTo: + pack: codeql/cpp-all + extensible: summaryModel + data: # namespace, type, subtypes, name, signature, ext, input, output, kind, provenance + - ["", "", False, "ymlStep", "", "", "Argument[0]", "ReturnValue", "taint", "manual"] \ No newline at end of file diff --git a/cpp/ql/test/library-tests/dataflow/external-models/flow.ql b/cpp/ql/test/library-tests/dataflow/external-models/flow.ql new file mode 100644 index 00000000000..d6c2a70c4d9 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/flow.ql @@ -0,0 +1,34 @@ +import TestUtilities.dataflow.FlowTestCommon +import cpp +import semmle.code.cpp.security.FlowSources + +module IRTest { + private import semmle.code.cpp.ir.IR + private import semmle.code.cpp.ir.dataflow.TaintTracking + + /** Common data flow configuration to be used by tests. */ + module TestAllocationConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + // external flow source node + sourceNode(source, _) + or + // test source function + source.asExpr().(FunctionCall).getTarget().getName() = "source" + } + + predicate isSink(DataFlow::Node sink) { + // external flow sink node + sinkNode(sink, _) + or + // test sink function + exists(FunctionCall call | + call.getTarget().getName() = "sink" and + sink.asExpr() = call.getAnArgument() + ) + } + } + + module IRFlow = TaintTracking::Global; +} + +import MakeTest> diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sinks.expected b/cpp/ql/test/library-tests/dataflow/external-models/sinks.expected new file mode 100644 index 00000000000..392c0bc03c1 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/sinks.expected @@ -0,0 +1,5 @@ +| asio_streams.cpp:93:29:93:39 | *recv_buffer | remote-sink | +| asio_streams.cpp:103:29:103:39 | *send_buffer | remote-sink | +| test.cpp:9:10:9:10 | 0 | test-sink | +| test.cpp:11:10:11:10 | x | test-sink | +| test.cpp:15:10:15:10 | y | test-sink | diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sinks.ext.yml b/cpp/ql/test/library-tests/dataflow/external-models/sinks.ext.yml new file mode 100644 index 00000000000..b2ee15edfd3 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/sinks.ext.yml @@ -0,0 +1,6 @@ +extensions: + - addsTo: + pack: codeql/cpp-all + extensible: sinkModel + data: # namespace, type, subtypes, name, signature, ext, input, kind, provenance + - ["", "", False, "ymlSink", "", "", "Argument[0]", "test-sink", "manual"] diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sinks.ql b/cpp/ql/test/library-tests/dataflow/external-models/sinks.ql new file mode 100644 index 00000000000..d3bafd1c369 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/sinks.ql @@ -0,0 +1,7 @@ +import cpp +import semmle.code.cpp.ir.dataflow.DataFlow +import semmle.code.cpp.dataflow.ExternalFlow + +from DataFlow::Node node, string kind +where sinkNode(node, kind) +select node, kind diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sources.expected b/cpp/ql/test/library-tests/dataflow/external-models/sources.expected new file mode 100644 index 00000000000..aa85e74fc03 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/sources.expected @@ -0,0 +1,2 @@ +| asio_streams.cpp:87:34:87:44 | read_until output argument | remote | +| test.cpp:7:10:7:18 | call to ymlSource | local | diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sources.ext.yml b/cpp/ql/test/library-tests/dataflow/external-models/sources.ext.yml new file mode 100644 index 00000000000..91bf18cf79b --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/sources.ext.yml @@ -0,0 +1,6 @@ +extensions: + - addsTo: + pack: codeql/cpp-all + extensible: sourceModel + data: # namespace, type, subtypes, name, signature, ext, output, kind, provenance + - ["", "", False, "ymlSource", "", "", "ReturnValue", "local", "manual"] diff --git a/cpp/ql/test/library-tests/dataflow/external-models/sources.ql b/cpp/ql/test/library-tests/dataflow/external-models/sources.ql new file mode 100644 index 00000000000..ed79d740f88 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/sources.ql @@ -0,0 +1,7 @@ +import cpp +import semmle.code.cpp.ir.dataflow.DataFlow +import semmle.code.cpp.dataflow.ExternalFlow + +from DataFlow::Node node, string kind +where sourceNode(node, kind) +select node, kind diff --git a/cpp/ql/test/library-tests/dataflow/external-models/steps.expected b/cpp/ql/test/library-tests/dataflow/external-models/steps.expected new file mode 100644 index 00000000000..2bc7fb6b49a --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/steps.expected @@ -0,0 +1,2 @@ +| asio_streams.cpp:100:64:100:71 | *send_str | asio_streams.cpp:100:44:100:62 | call to buffer | +| test.cpp:13:18:13:18 | x | test.cpp:13:10:13:16 | call to ymlStep | diff --git a/cpp/ql/test/library-tests/dataflow/external-models/steps.ext.yml b/cpp/ql/test/library-tests/dataflow/external-models/steps.ext.yml new file mode 100644 index 00000000000..c8a195b7aa6 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/steps.ext.yml @@ -0,0 +1,6 @@ +extensions: + - addsTo: + pack: codeql/cpp-all + extensible: summaryModel + data: # namespace, type, subtypes, name, signature, ext, input, output, kind, provenance + - ["", "", False, "ymlStep", "", "", "Argument[0]", "ReturnValue", "taint", "manual"] diff --git a/cpp/ql/test/library-tests/dataflow/external-models/steps.ql b/cpp/ql/test/library-tests/dataflow/external-models/steps.ql new file mode 100644 index 00000000000..2c141d8334b --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/steps.ql @@ -0,0 +1,8 @@ +import cpp +import semmle.code.cpp.ir.dataflow.DataFlow +import semmle.code.cpp.dataflow.ExternalFlow +import semmle.code.cpp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl + +from DataFlow::Node node1, DataFlow::Node node2 +where FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(node1, node2, _) +select node1, node2 diff --git a/cpp/ql/test/library-tests/dataflow/external-models/test.cpp b/cpp/ql/test/library-tests/dataflow/external-models/test.cpp new file mode 100644 index 00000000000..aa50f6715f2 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/test.cpp @@ -0,0 +1,16 @@ + +int ymlSource(); +void ymlSink(int value); +int ymlStep(int value); + +void test() { + int x = ymlSource(); + + ymlSink(0); + + ymlSink(x); // $ ir + + int y = ymlStep(x); + + ymlSink(y); // $ ir +} diff --git a/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.expected new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.ql b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.ql new file mode 100644 index 00000000000..a162349b7cd --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/external-models/validatemodels.ql @@ -0,0 +1,2 @@ +import cpp +import semmle.code.cpp.dataflow.ExternalFlow::CsvValidation diff --git a/cpp/ql/test/library-tests/dataflow/source-sink-tests/asio_streams.cpp b/cpp/ql/test/library-tests/dataflow/source-sink-tests/asio_streams.cpp new file mode 100644 index 00000000000..bbcf41b0e36 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/source-sink-tests/asio_streams.cpp @@ -0,0 +1,89 @@ + +// --- stub library headers --- + +namespace std { + typedef unsigned long size_t; + #define SIZE_MAX 0xFFFFFFFF + + template class allocator { + }; + + template struct char_traits { + }; + + template, class Allocator = allocator > + class basic_string { + public: + basic_string(const charT* s, const Allocator& a = Allocator()); + }; + + typedef basic_string string; +}; + +namespace boost { + namespace system { + class error_code { + public: + operator bool() const; + }; + }; + + namespace asio { + template + class basic_stream_socket /*: public basic_socket*/ { + }; + + namespace ip { + class tcp { + public: + typedef basic_stream_socket socket; + }; + }; + + template> class basic_streambuf { + public: + basic_streambuf( + std::size_t maximum_size = SIZE_MAX, + const Allocator &allocator = Allocator()); + }; + + typedef basic_streambuf<> streambuf; + + class mutable_buffer { + }; + + template + mutable_buffer buffer(std::basic_string & data); + + template std::size_t read_until( + SyncReadStream &s, + asio::basic_streambuf &b, + char delim, + boost::system::error_code &ec); + + template std::size_t write( + SyncWriteStream &s, + const ConstBufferSequence &buffers, + boost::system::error_code &ec, + int constraint = 0); // simplified + }; +}; + +// --- test code --- + +void test(boost::asio::ip::tcp::socket &socket) { + boost::asio::streambuf recv_buffer; + boost::system::error_code error; + + boost::asio::read_until(socket, recv_buffer, '\0', error); // $ remote_source + if (error) { + // ... + } + + std::string send_str = std::string("message"); + boost::asio::mutable_buffer send_buffer = boost::asio::buffer(send_str); + boost::asio::write(socket, send_buffer, error); // $ remote_sink + if (error) { + // ... + } +} diff --git a/docs/codeql/codeql-language-guides/codeql-for-cpp.rst b/docs/codeql/codeql-language-guides/codeql-for-cpp.rst index a564f54042f..584f8c63bf6 100644 --- a/docs/codeql/codeql-language-guides/codeql-for-cpp.rst +++ b/docs/codeql/codeql-language-guides/codeql-for-cpp.rst @@ -21,6 +21,8 @@ Experiment and learn how to write effective and efficient queries for CodeQL dat using-range-analsis-in-cpp hash-consing-and-value-numbering advanced-dataflow-scenarios-cpp + customizing-library-models-for-cpp + - :doc:`Basic query for C and C++ code `: Learn to write and run a simple CodeQL query. @@ -46,3 +48,5 @@ Experiment and learn how to write effective and efficient queries for CodeQL dat - :doc:`Hash consing and value numbering `: You can use specialized CodeQL libraries to recognize expressions that are syntactically identical or compute the same value at runtime in C and C++ codebases. - :doc:`Advanced C/C++ dataflow scenarios `: You can track precise data flow in C and C++ codebases by distinguishing between a pointer and its indirection(s). + +- :doc:`Customizing library models for C and C++ `: You can model frameworks and libraries that your codebase depends on using data extensions and publish them as CodeQL model packs. diff --git a/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst b/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst new file mode 100644 index 00000000000..29e8be5a4ae --- /dev/null +++ b/docs/codeql/codeql-language-guides/customizing-library-models-for-cpp.rst @@ -0,0 +1,184 @@ +.. _customizing-library-models-for-cpp: + +Customizing library models for C and C++ +======================================== + +You can model the methods and callables that control data flow in any framework or library. This is especially useful for custom frameworks or niche libraries, that are not supported by the standard CodeQL libraries. + +.. include:: ../reusables/beta-note-customizing-library-models.rst + +About this article +------------------ + +This article contains reference material about how to define custom models for sources, sinks, and flow summaries for C and C++ dependencies in data extension files. + +About data extensions +--------------------- + +You can customize analysis by defining models (summaries, sinks, and sources) of your code's C and C++ dependencies in data extension files. Each model defines the behavior of one or more elements of your library or framework, such as callables. When you run dataflow analysis, these models expand the potential sources and sinks tracked by dataflow analysis and improve the precision of results. + +Many of the security queries search for paths from a source of untrusted input to a sink that represents a vulnerability. This is known as taint tracking. Each source is a starting point for dataflow analysis to track tainted data and each sink is an end point. + +Taint tracking queries also need to know how data can flow through elements that are not included in the source code. These are modeled as summaries. A summary model enables queries to synthesize the flow behavior through elements in dependency code that is not stored in your repository. + +Syntax used to define an element in an extension file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Each model of an element is defined using a data extension where each tuple constitutes a model. +A data extension file to extend the standard CPP queries included with CodeQL is a YAML file with the form: + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/cpp-all + extensible: + data: + - + - + - ... + +Each YAML file may contain one or more top-level extensions. + +- ``addsTo`` defines the CodeQL pack name and extensible predicate that the extension is injected into. +- ``data`` defines one or more rows of tuples that are injected as values into the extensible predicate. The number of columns and their types must match the definition of the extensible predicate. + +Data extensions use union semantics, which means that the tuples of all extensions for a single extensible predicate are combined, duplicates are removed, and all of the remaining tuples are queryable by referencing the extensible predicate. + +Publish data extension files in a CodeQL model pack to share +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can group one or more data extension files into a CodeQL model pack and publish it to the GitHub Container Registry. This makes it easy for anyone to download the model pack and use it to extend their analysis. For more information, see `Creating a CodeQL model pack `__ and `Publishing and using CodeQL packs `__ in the CodeQL CLI documentation. + +Extensible predicates used to create custom models in C and C++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The CodeQL library for CPP analysis exposes the following extensible predicates: + +- ``sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance)``. This is used to model sources of potentially tainted data. The ``kind`` of the sources defined using this predicate determine which threat model they are associated with. Different threat models can be used to customize the sources used in an analysis. For more information, see ":ref:`Threat models `." +- ``sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance)``. This is used to model sinks where tainted data may be used in a way that makes the code vulnerable. +- ``summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance)``. This is used to model flow through elements. + +The extensible predicates are populated using the models defined in data extension files. + +Example of custom model definitions +------------------------------------ + +The examples in this section are taken from the standard CodeQL CPP query pack published by GitHub. They demonstrate how to add tuples to extend extensible predicates that are used by the standard queries. + +Example: Taint source from the ``boost::asio`` namespace +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This example shows how the CPP query pack models the return value from the ``read_until`` function as a ``remote`` source. + +.. code-block:: cpp + + boost::asio::read_until(socket, recv_buffer, '\0', error); + +We need to add a tuple to the ``sourceModel``\(namespace, type, subtypes, name, signature, ext, output, kind, provenance) extensible predicate by updating a data extension file. + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/cpp-all + extensible: sourceModel + data: + - ["boost::asio", "", False, "read_until", "", "", "Argument[*1]", "remote", "manual"] + +Since we are adding a new source, we need to add a tuple to the ``sourceModel`` extensible predicate. +The first five values identify the callable (in this case a free function) to be modeled as a source. + +- The first value ``"boost::asio"`` is the namespace name. +- The second value ``""`` is the name of the type (class) that contains the method. Because we're modelling a free function, the type is left blank. +- The third value ``False`` is a flag that indicates whether or not the sink also applies to all overrides of the method. For a free function, this should be ``False``. +- The fourth value ``"read_until"`` is the function name. +- The fifth value is the function input type signature, which can be used to narrow down between functions that have the same name. In this case, we want the model to include all functions in ``boost::asio`` called ``read_until``. + +The sixth value should be left empty and is out of scope for this documentation. +The remaining values are used to define the output specification, the ``kind``, and the ``provenance`` (origin) of the source. + +- The seventh value ``"Argument[*1]"`` is the output specification, which means in this case that the sink is the first indirection (or pointed-to value, ``*``) of the second argument (``Argument[1]``) passed to the function. +- The eighth value ``"remote"`` is the kind of the source. The source kind is used to define the threat model where the source is in scope. ``remote`` applies to many of the security related queries as it means a remote source of untrusted data. For more information, see ":ref:`Threat models `." +- The ninth value ``"manual"`` is the provenance of the source, which is used to identify the origin of the source model. + +Example: Taint sink in the ``boost::asio`` namespace +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This example shows how the CPP query pack models the second argument of the ``boost::asio::write`` function as a remote flow sink. A remote flow sink is where data is transmitted to other machines across a network, which is used for example by the "Cleartext transmission of sensitive information" (`cpp/cleartext-transmission`) query. + +.. code-block:: cpp + + boost::asio::write(socket, send_buffer, error); + +We need to add a tuple to the ``sinkModel``\(namespace, type, subtypes, name, signature, ext, input, kind, provenance) extensible predicate by updating a data extension file. + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/cpp-all + extensible: sinkModel + data: + - ["boost::asio", "", False, "write", "", "", "Argument[*1]", "remote-sink", "manual"] + +Since we want to add a new sink, we need to add a tuple to the ``sinkModel`` extensible predicate. +The first five values identify the callable (in this case a free function) to be modeled as a sink. + +- The first value ``"boost::asio"`` is the namespace name. +- The second value ``""`` is the name of the type (class) that contains the method. Because we're modelling a free function, the type is left blank. +- The third value ``False`` is a flag that indicates whether or not the sink also applies to all overrides of the method. For a free function, this should be ``False``. +- The fourth value ``"write"`` is the function name. +- The fifth value is the function input type signature, which can be used to narrow down between functions that have the same name. In this case, we want the model to include all functions in ``boost::asio`` called ``write``. + +The sixth value should be left empty and is out of scope for this documentation. +The remaining values are used to define the output specification, the ``kind``, and the ``provenance`` (origin) of the sink. + +- The seventh value ``"Argument[*1]"`` is the output specification, which means in this case that the sink is the first indirection (or pointed-to value, ``*``) of the second argument (``Argument[1]``) passed to the function. +- The eighth value ``"remote-sink"`` is the kind of the sink. The sink kind is used to define the queries where the sink is in scope. +- The ninth value ``"manual"`` is the provenance of the sink, which is used to identify the origin of the sink model. + +Example: Add flow through the ``boost::asio::buffer`` method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This example shows how the CPP query pack models flow through a function for a simple case. + +.. code-block:: cpp + + boost::asio::write(socket, boost::asio::buffer(send_str), error); + +We need to add tuples to the ``summaryModel``\(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) extensible predicate by updating a data extension file: + +.. code-block:: yaml + + extensions: + - addsTo: + pack: codeql/cpp-all + extensible: summaryModel + data: + - ["boost::asio", "", False, "buffer", "", "", "Argument[*0]", "ReturnValue", "taint", "manual"] + +Since we are adding flow through a function, we need to add tuples to the ``summaryModel`` extensible predicate. + +The first five values identify the callable (in this case free function) to be modeled as a summary. + +- The first value ``"boost::asio"`` is the namespace name. +- The second value ``""`` is the name of the type (class) that contains the method. Because we're modelling a free function, the type is left blank. +- The third value ``False`` is a flag that indicates whether or not the sink also applies to all overrides of the method. For a free function, this should be ``False``. +- The fourth value ``"buffer"`` is the function name. +- The fifth value is the function input type signature, which can be used to narrow down between functions that have the same name. In this case, we want the model to include all functions in ``boost::asio`` called ``buffer``. + +The sixth value should be left empty and is out of scope for this documentation. +The remaining values are used to define the input and output specifications, the ``kind``, and the ``provenance`` (origin) of the summary. + +- The seventh value is the input specification (where data flows from). ``Argument[*0]`` specifies the first indirection (or pointed-to value, ``*``) of the first argument (``Argument[0]``) passed to the function. +- The eighth value ``"ReturnValue"`` is the output specification (where data flows to), in this case the return value. +- The ninth value ``"taint"`` is the kind of the flow. ``taint`` means that taint is propagated through the call. +- The tenth value ``"manual"`` is the provenance of the summary, which is used to identify the origin of the summary model. + +.. _threat-models-cpp: + +Threat models +------------- + +.. include:: ../reusables/threat-model-description.rst diff --git a/shared/mad/codeql/mad/ModelValidation.qll b/shared/mad/codeql/mad/ModelValidation.qll index 20bcdd1908c..d403ecdb053 100644 --- a/shared/mad/codeql/mad/ModelValidation.qll +++ b/shared/mad/codeql/mad/ModelValidation.qll @@ -41,7 +41,9 @@ module KindValidation { "database-store", "format-string", "hash-iteration-count", "predicate-injection", "preferences-store", "tls-protocol-version", "transmission", "webview-fetch", "xxe", // Go-only currently, but may be shared in the future - "jwt" + "jwt", + // CPP-only currently + "remote-sink" ] or this.matches([