Swift: add location support to TSP diagnostics

This required a bit of an overhaul of the original integration of
JSON diagnostics into binlog.

The problem is that it is quite hard to add a kind of metadata to
binlog entries without changing its code. Another problem is that when
wanting to avoid double evaluation of logging macro arguments one
cannot really add a separate "diagnose" step easily.

The proposed solution consists in two things:
* hook into a binlog plumbing function by providing a better overload
  resolution match, which happens after logging macro expansion,
  bypassing the problem of double evaluation
* in that hook, produce the diagnostic directly, without waiting to
  reconstruct the diagnostics entry from the binlog serialized entry.

This allows to forgo the weird category to diagnostic mapping, and now a
diagnostics emission simply happens when a diagnostic source is given
as the first argument after the log format string. A flavour of
diganostics sources with locations is then added with the same
mechanism, allowing to write something like
```cpp
LOG_ERROR("[{}] ouch!", internalError.withLocation("foo.swift", 32));
```
This commit is contained in:
Paolo Tranquilli
2023-05-11 17:52:02 +02:00
parent 82e780d175
commit 3f2a059b3b
10 changed files with 245 additions and 134 deletions

View File

@@ -6,6 +6,7 @@ cc_library(
deps = [
"@absl//absl/strings",
"@binlog",
"@fmt",
"@json",
],
)

View File

@@ -1,7 +1,6 @@
#include "swift/logging/SwiftDiagnostics.h"
#include <binlog/Entries.hpp>
#include <nlohmann/json.hpp>
#include "absl/strings/str_join.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
@@ -9,17 +8,9 @@
namespace codeql {
namespace {
Logger& logger() {
static Logger ret{"diagnostics"};
return ret;
}
} // namespace
void SwiftDiagnosticsSource::emit(std::ostream& out,
std::string_view timestamp,
std::string_view message) const {
nlohmann::json entry = {
nlohmann::json SwiftDiagnosticsSource::json(const std::chrono::system_clock::time_point& timestamp,
std::string_view message) const {
return {
{"source",
{
{"id", sourceId()},
@@ -35,34 +26,28 @@ void SwiftDiagnosticsSource::emit(std::ostream& out,
{"severity", "error"},
{"helpLinks", std::vector<std::string_view>(absl::StrSplit(helpLinks, ' '))},
{"plaintextMessage", absl::StrCat(message, ".\n\n", action, ".")},
{"timestamp", timestamp},
{"timestamp", fmt::format("{:%FT%T%z}", timestamp)},
};
out << entry << '\n';
}
std::string SwiftDiagnosticsSource::sourceId() const {
auto ret = absl::StrJoin({extractorName, programName, id}, "/");
std::replace(ret.begin(), ret.end(), '_', '-');
return ret;
}
void SwiftDiagnosticsSource::registerImpl(const SwiftDiagnosticsSource* source) {
auto [it, inserted] = map().emplace(source->id, source);
CODEQL_ASSERT(inserted, "duplicate diagnostics source detected with id {}", source->id);
return absl::StrJoin({extractorName, programName, id}, "/");
}
void SwiftDiagnosticsDumper::write(const char* buffer, std::size_t bufferSize) {
binlog::Range range{buffer, bufferSize};
binlog::RangeEntryStream input{range};
while (auto event = events.nextEvent(input)) {
const auto& source = SwiftDiagnosticsSource::get(event->source->category);
std::ostringstream oss;
timestampedMessagePrinter.printEvent(oss, *event, events.writerProp(), events.clockSync());
// TODO(C++20) use oss.view() directly
auto data = oss.str();
std::string_view view = data;
using ViewPair = std::pair<std::string_view, std::string_view>;
auto [timestamp, message] = ViewPair(absl::StrSplit(view, absl::MaxSplits(' ', 1)));
source.emit(output, timestamp, message);
}
nlohmann::json SwiftDiagnosticsSourceWithLocation::json(
const std::chrono::system_clock::time_point& timestamp,
std::string_view message) const {
auto ret = source.json(timestamp, message);
auto& location = ret["location"] = {{"file", file}};
if (startLine) location["startLine"] = startLine;
if (startColumn) location["startColumn"] = startColumn;
if (endLine) location["endLine"] = endLine;
if (endColumn) location["endColumn"] = endColumn;
return ret;
}
std::string SwiftDiagnosticsSourceWithLocation::str() const {
return absl::StrCat(source.id, "@", file, ":", startLine, ":", startColumn, ":", endLine, ":",
endColumn);
}
} // namespace codeql

View File

@@ -1,7 +1,6 @@
#pragma once
#include <binlog/EventStream.hpp>
#include <binlog/PrettyPrinter.hpp>
#include <binlog/binlog.hpp>
#include <string>
#include <vector>
#include <unordered_map>
@@ -10,11 +9,31 @@
#include <filesystem>
#include <sstream>
#include <mutex>
#include <fmt/format.h>
#include <fmt/chrono.h>
#include <nlohmann/json.hpp>
namespace codeql {
extern const std::string_view programName;
struct SwiftDiagnosticsSource;
struct SwiftDiagnosticsSourceWithLocation {
const SwiftDiagnosticsSource& source;
std::string_view file;
unsigned startLine;
unsigned startColumn;
unsigned endLine;
unsigned endColumn;
// see SwiftDiagnosticsSource::json
nlohmann::json json(const std::chrono::system_clock::time_point& timestamp,
std::string_view message) const;
std::string str() const;
};
// Models a diagnostic source for Swift, holding static information that goes out into a diagnostic
// These are internally stored into a map on id's. A specific error log can use binlog's category
// as id, which will then be used to recover the diagnostic source while dumping.
@@ -29,36 +48,25 @@ struct SwiftDiagnosticsSource {
// for the moment, we only output errors, so no need to store the severity
// registers a diagnostics source for later retrieval with get, if not done yet
template <const SwiftDiagnosticsSource* Source>
static void ensureRegistered() {
static std::once_flag once;
std::call_once(once, registerImpl, Source);
}
// gets a previously inscribed SwiftDiagnosticsSource for the given id. Will abort if none exists
static const SwiftDiagnosticsSource& get(const std::string& id) { return *map().at(id); }
// emit a JSON diagnostics for this source with the given timestamp and message to out
// create a JSON diagnostics for this source with the given timestamp and message to out
// A plaintextMessage is used that includes both the message and the action to take. Dots are
// appended to both. The id is used to construct the source id in the form
// `swift/<prog name>/<id with '-' replacing '_'>`
void emit(std::ostream& out, std::string_view timestamp, std::string_view message) const;
// `swift/<prog name>/<id>`
nlohmann::json json(const std::chrono::system_clock::time_point& timestamp,
std::string_view message) const;
SwiftDiagnosticsSourceWithLocation withLocation(std::string_view file,
unsigned startLine = 0,
unsigned startColumn = 0,
unsigned endLine = 0,
unsigned endColumn = 0) const {
return {*this, file, startLine, startColumn, endLine, endColumn};
}
private:
static void registerImpl(const SwiftDiagnosticsSource* source);
std::string sourceId() const;
using Map = std::unordered_map<std::string, const SwiftDiagnosticsSource*>;
static Map& map() {
static Map ret;
return ret;
}
};
// An output modeling binlog's output stream concept that intercepts binlog entries and translates
// them to appropriate diagnostics JSON entries
class SwiftDiagnosticsDumper {
public:
// opens path for writing out JSON entries. Returns whether the operation was successful.
@@ -69,22 +77,65 @@ class SwiftDiagnosticsDumper {
void flush() { output.flush(); }
// write out binlog entries as corresponding JSON diagnostics entries. Expects all entries to have
// a category equal to an id of a previously created SwiftDiagnosticSource.
void write(const char* buffer, std::size_t bufferSize);
template <typename Source>
void write(const Source& source,
const std::chrono::system_clock::time_point& timestamp,
std::string_view message) {
if (output) {
output << source.json(timestamp, message) << '\n';
}
}
bool good() const { return output.good(); }
explicit operator bool() const { return good(); }
private:
binlog::EventStream events;
std::ofstream output;
binlog::PrettyPrinter timestampedMessagePrinter{"%u %m", "%Y-%m-%dT%H:%M:%S.%NZ"};
};
} // namespace codeql
namespace codeql_diagnostics {
constexpr codeql::SwiftDiagnosticsSource internal_error{
"internal_error",
constexpr codeql::SwiftDiagnosticsSource internalError{
"internal-error",
"Internal error",
"Contact us about this issue",
};
} // namespace codeql_diagnostics
} // namespace codeql
namespace mserialize {
// log diagnostic sources using just their id, using binlog/mserialize internal plumbing
template <>
struct CustomTag<codeql::SwiftDiagnosticsSource, void> : detail::BuiltinTag<std::string> {
using T = codeql::SwiftDiagnosticsSource;
};
template <>
struct CustomSerializer<codeql::SwiftDiagnosticsSource, void> {
template <typename OutputStream>
static void serialize(const codeql::SwiftDiagnosticsSource& source, OutputStream& out) {
mserialize::serialize(source.id, out);
}
static size_t serialized_size(const codeql::SwiftDiagnosticsSource& source) {
return mserialize::serialized_size(source.id);
}
};
template <>
struct CustomTag<codeql::SwiftDiagnosticsSourceWithLocation, void>
: detail::BuiltinTag<std::string> {
using T = codeql::SwiftDiagnosticsSourceWithLocation;
};
template <>
struct CustomSerializer<codeql::SwiftDiagnosticsSourceWithLocation, void> {
template <typename OutputStream>
static void serialize(const codeql::SwiftDiagnosticsSourceWithLocation& source,
OutputStream& out) {
mserialize::serialize(source.str(), out);
}
static size_t serialized_size(const codeql::SwiftDiagnosticsSourceWithLocation& source) {
return mserialize::serialized_size(source.str());
}
};
} // namespace mserialize

View File

@@ -60,7 +60,7 @@ std::vector<std::string> Log::collectLevelRulesAndReturnProblems(const char* env
// expect comma-separated <glob pattern>:<log severity>
std::regex comma{","};
std::regex levelAssignment{
R"((?:([*./\w]+)|(?:out:(binary|text|console|diagnostics))):()" LEVEL_REGEX_PATTERN ")"};
R"((?:([*./\w]+)|(?:out:(binary|text|console))):()" LEVEL_REGEX_PATTERN ")"};
std::cregex_token_iterator begin{levels, levels + strlen(levels), comma, -1};
std::cregex_token_iterator end{};
for (auto it = begin; it != end; ++it) {
@@ -84,8 +84,6 @@ std::vector<std::string> Log::collectLevelRulesAndReturnProblems(const char* env
text.level = level;
} else if (out == "console") {
console.level = level;
} else if (out == "diagnostics") {
diagnostics.level = level;
}
}
} else {
@@ -132,30 +130,27 @@ void Log::configure() {
text.level = Level::no_logs;
}
}
if (diagnostics) {
std::filesystem::path diagFile =
getEnvOr("CODEQL_EXTRACTOR_SWIFT_DIAGNOSTIC_DIR", "extractor-out/diagnostics");
if (auto diagDir = getEnvOr("CODEQL_EXTRACTOR_SWIFT_DIAGNOSTIC_DIR", nullptr)) {
std::filesystem::path diagFile = diagDir;
diagFile /= programName;
diagFile /= logBaseName;
diagFile.replace_extension(".jsonl");
std::error_code ec;
std::filesystem::create_directories(diagFile.parent_path(), ec);
if (!ec) {
if (!diagnostics.output.open(diagFile)) {
if (!diagnostics.open(diagFile)) {
problems.emplace_back("Unable to open diagnostics json file " + diagFile.string());
diagnostics.level = Level::no_logs;
}
} else {
problems.emplace_back("Unable to create diagnostics directory " +
diagFile.parent_path().string() + ": " + ec.message());
diagnostics.level = Level::no_logs;
}
}
for (const auto& problem : problems) {
LOG_ERROR("{}", problem);
}
LOG_INFO("Logging configured (binary: {}, text: {}, console: {}, diagnostics: {})", binary.level,
text.level, console.level, diagnostics.level);
text.level, console.level, diagnostics.good());
initialized = true;
flushImpl();
}
@@ -169,7 +164,7 @@ void Log::flushImpl() {
binary.output.flush();
}
if (diagnostics) {
diagnostics.output.flush();
diagnostics.flush();
}
}
@@ -190,7 +185,6 @@ Log& Log::write(const char* buffer, std::streamsize size) {
if (text) text.write(buffer, size);
if (binary) binary.write(buffer, size);
if (console) console.write(buffer, size);
if (diagnostics) diagnostics.write(buffer, size);
return *this;
}
@@ -198,5 +192,4 @@ Logger& Log::logger() {
static Logger ret{getLoggerConfigurationImpl("logging")};
return ret;
}
} // namespace codeql

View File

@@ -23,6 +23,11 @@
// * passing a logger around using a `Logger& logger` function parameter
// They are created with a name that appears in the logs and can be used to filter debug levels (see
// `Logger`).
// If the first argument after the format is a SwiftDiagnosticSource or
// SwiftDiagnosticSourceWithLocation, a JSON diagnostic entry is emitted. In this case the
// format string **must** start with "[{}] " (which is checked at compile time), and everything
// following that is used to form the message in the diagnostics using fmt::format instead of the
// internal binlog formatting. The two are fairly compatible though.
#define LOG_CRITICAL(...) LOG_WITH_LEVEL(critical, __VA_ARGS__)
#define LOG_ERROR(...) LOG_WITH_LEVEL(error, __VA_ARGS__)
#define LOG_WARNING(...) LOG_WITH_LEVEL(warning, __VA_ARGS__)
@@ -30,11 +35,18 @@
#define LOG_DEBUG(...) LOG_WITH_LEVEL(debug, __VA_ARGS__)
#define LOG_TRACE(...) LOG_WITH_LEVEL(trace, __VA_ARGS__)
#define CODEQL_GET_SECOND(...) CODEQL_GET_SECOND_I(__VA_ARGS__, 0, 0)
#define CODEQL_GET_SECOND_I(X, Y, ...) Y
// only do the actual logging if the picked up `Logger` instance is configured to handle the
// provided log level. `LEVEL` must be a compile-time constant. `logger()` is evaluated once
// TODO(C++20) replace non-standard ##__VA_ARGS__ with __VA_OPT__(,) __VA_ARGS__
#define LOG_WITH_LEVEL_AND_CATEGORY(LEVEL, CATEGORY, ...) \
do { \
constexpr codeql::Log::Level _level = ::codeql::Log::Level::LEVEL; \
static_assert(::codeql::detail::checkLogArgs<decltype(CODEQL_GET_SECOND(__VA_ARGS__))>( \
MSERIALIZE_FIRST(__VA_ARGS__)), \
"diagnostics logs must have format starting with \"[{}]\""); \
constexpr auto _level = ::codeql::Log::Level::LEVEL; \
::codeql::Logger& _logger = logger(); \
if (_level >= _logger.level()) { \
BINLOG_CREATE_SOURCE_AND_EVENT(_logger.writer(), _level, CATEGORY, ::binlog::clockNow(), \
@@ -47,17 +59,6 @@
#define LOG_WITH_LEVEL(LEVEL, ...) LOG_WITH_LEVEL_AND_CATEGORY(LEVEL, , __VA_ARGS__)
// Emit errors with a specified diagnostics ID. This must be the name of a `SwiftDiagnosticsSource`
// defined in the `codeql_diagnostics` namespace, which must have `id` equal to its name.
#define DIAGNOSE_CRITICAL(ID, ...) DIAGNOSE_WITH_LEVEL(critical, ID, __VA_ARGS__)
#define DIAGNOSE_ERROR(ID, ...) DIAGNOSE_WITH_LEVEL(error, ID, __VA_ARGS__)
#define DIAGNOSE_WITH_LEVEL(LEVEL, ID, ...) \
do { \
::codeql::SwiftDiagnosticsSource::ensureRegistered<&::codeql_diagnostics::ID>(); \
LOG_WITH_LEVEL_AND_CATEGORY(LEVEL, ID, __VA_ARGS__); \
} while (false)
// avoid calling into binlog's original macros
#undef BINLOG_CRITICAL
#undef BINLOG_CRITICAL_W
@@ -125,6 +126,13 @@ class Log {
return instance().getLoggerConfigurationImpl(name);
}
template <typename Source>
static void diagnose(const Source& source,
const std::chrono::system_clock::time_point& time,
std::string_view message) {
instance().diagnostics.write(source, time, message);
}
private:
static constexpr const char* format = "%u %S [%n] %m (%G:%L)\n";
static bool initialized;
@@ -140,14 +148,13 @@ class Log {
void configure();
void flushImpl();
LoggerConfiguration getLoggerConfigurationImpl(std::string_view name);
// make `session.consume(*this)` work, which requires access to `write`
friend binlog::Session;
Log& write(const char* buffer, std::streamsize size);
struct OnlyWithCategory {};
// Output filtered according to a configured log level
template <typename Output>
struct FilteredOutput {
@@ -159,12 +166,6 @@ class Log {
FilteredOutput(Level level, Args&&... args)
: level{level}, output{std::forward<Args>(args)...}, filter{filterOnLevel()} {}
template <typename... Args>
FilteredOutput(OnlyWithCategory, Level level, Args&&... args)
: level{level},
output{std::forward<Args>(args)...},
filter{filterOnLevelAndNonEmptyCategory()} {}
FilteredOutput& write(const char* buffer, std::streamsize size) {
filter.writeAllowed(buffer, size, output);
return *this;
@@ -174,12 +175,6 @@ class Log {
return [this](const binlog::EventSource& src) { return src.severity >= level; };
}
binlog::EventFilter::Predicate filterOnLevelAndNonEmptyCategory() const {
return [this](const binlog::EventSource& src) {
return !src.category.empty() && src.severity >= level;
};
}
// if configured as `no_logs`, the output is effectively disabled
explicit operator bool() const { return level < Level::no_logs; }
};
@@ -192,7 +187,7 @@ class Log {
FilteredOutput<std::ofstream> binary{Level::no_logs};
FilteredOutput<binlog::TextOutputStream> text{Level::info, textFile, format};
FilteredOutput<binlog::TextOutputStream> console{Level::warning, std::cerr, format};
FilteredOutput<SwiftDiagnosticsDumper> diagnostics{OnlyWithCategory{}, Level::error};
SwiftDiagnosticsDumper diagnostics{};
LevelRules sourceRules;
std::vector<std::string> collectLevelRulesAndReturnProblems(const char* envVar);
};
@@ -228,4 +223,71 @@ class Logger {
Log::Level level_;
};
namespace detail {
constexpr std::string_view diagnosticsFormatPrefix = "[{}] ";
template <typename T>
constexpr bool checkLogArgs(std::string_view format) {
using Type = std::remove_cv_t<std::remove_reference_t<T>>;
constexpr bool isDiagnostic = std::is_same_v<Type, SwiftDiagnosticsSource> ||
std::is_same_v<Type, SwiftDiagnosticsSourceWithLocation>;
return !isDiagnostic ||
format.substr(0, diagnosticsFormatPrefix.size()) == diagnosticsFormatPrefix;
}
template <typename Writer, typename Source, typename... T>
void binlogAddEventIgnoreFirstOverload(Writer& writer,
std::uint64_t eventSourceId,
std::uint64_t clock,
const char* format,
const Source& source,
T&&... t) {
std::chrono::system_clock::time_point point{
std::chrono::duration_cast<std::chrono::system_clock::duration>(
std::chrono::nanoseconds{clock})};
constexpr auto offset = ::codeql::detail::diagnosticsFormatPrefix.size();
::codeql::Log::diagnose(source, point, fmt::format(format + offset, t...));
writer.addEvent(eventSourceId, clock, source, std::forward<T>(t)...);
}
} // namespace detail
} // namespace codeql
// we intercept this binlog plumbing function providing better overload resolution matches in
// case the first non-format argument is a diagnostic source, and emit it in that case with the
// same timestamp
namespace binlog::detail {
template <typename Writer, size_t N, typename... T>
void addEventIgnoreFirst(Writer& writer,
std::uint64_t eventSourceId,
std::uint64_t clock,
const char (&format)[N],
const codeql::SwiftDiagnosticsSource& source,
T&&... t) {
codeql::detail::binlogAddEventIgnoreFirstOverload(writer, eventSourceId, clock, format, source,
std::forward<T>(t)...);
}
template <typename Writer, size_t N, typename... T>
void addEventIgnoreFirst(Writer& writer,
std::uint64_t eventSourceId,
std::uint64_t clock,
const char (&format)[N],
codeql::SwiftDiagnosticsSourceWithLocation&& source,
T&&... t) {
codeql::detail::binlogAddEventIgnoreFirstOverload(writer, eventSourceId, clock, format, source,
std::forward<T>(t)...);
}
template <typename Writer, size_t N, typename... T>
void addEventIgnoreFirst(Writer& writer,
std::uint64_t eventSourceId,
std::uint64_t clock,
const char (&format)[N],
const codeql::SwiftDiagnosticsSourceWithLocation& source,
T&&... t) {
codeql::detail::binlogAddEventIgnoreFirstOverload(writer, eventSourceId, clock, format, source,
std::forward<T>(t)...);
}
} // namespace binlog::detail