Swift: add support to output JSON diagnostics

New `DIAGNOSE_ERROR` and `DIAGNOSE_CRITICAL` macros are added.

These accept an ID which should indicate a diagnostic source via
a function definition in `codeql::diagnostics`, together with the usual
format + arguments accepted by other `LOG_*` macros.

When the log is flushed, these special logs will result in an error JSON
diagnostic entry in the database.
This commit is contained in:
Paolo Tranquilli
2023-05-03 12:01:50 +02:00
parent 0ad529dff8
commit 8de2f9958e
6 changed files with 245 additions and 27 deletions

View File

@@ -22,7 +22,7 @@
using namespace std::string_literals;
const std::string_view codeql::logRootName = "extractor";
const std::string_view codeql::programName = "extractor";
// must be called before processFrontendOptions modifies output paths
static void lockOutputSwiftModuleTraps(codeql::SwiftExtractorState& state,
@@ -220,6 +220,7 @@ int main(int argc, char** argv, char** envp) {
codeql::Logger logger{"main"};
LOG_INFO("calling extractor with arguments \"{}\"", argDump(argc, argv));
LOG_DEBUG("environment:\n{}\n", envDump(envp));
DIAGNOSE_ERROR(internal_error, "prout {}", 42);
}
auto openInterception = codeql::setupFileInterception(configuration);

View File

@@ -5,6 +5,7 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
"@binlog",
"@date",
"@json",
],
)

View File

@@ -0,0 +1,73 @@
#include "swift/log/SwiftDiagnostics.h"
#include <date/date.h>
#include <binlog/Entries.hpp>
#include <nlohmann/json.hpp>
namespace codeql {
SwiftDiagnosticsSource::SwiftDiagnosticsSource(std::string_view internalId,
std::string&& name,
std::vector<std::string>&& helpLinks,
std::string&& action)
: name{std::move(name)}, helpLinks{std::move(helpLinks)}, action{std::move(action)} {
id = extractorName;
id += '/';
id += programName;
id += '/';
std::transform(internalId.begin(), internalId.end(), std::back_inserter(id),
[](char c) { return c == '_' ? '-' : c; });
}
void SwiftDiagnosticsSource::create(std::string_view id,
std::string name,
std::vector<std::string> helpLinks,
std::string action) {
auto [it, inserted] = map().emplace(
id, SwiftDiagnosticsSource{id, std::move(name), std::move(helpLinks), std::move(action)});
assert(inserted);
}
void SwiftDiagnosticsSource::emit(std::ostream& out,
std::string_view timestamp,
std::string_view message) const {
nlohmann::json entry;
auto& source = entry["source"];
source["id"] = id;
source["name"] = name;
source["extractorName"] = extractorName;
auto& visibility = entry["visibility"];
visibility["statusPage"] = true;
visibility["cliSummaryTable"] = true;
visibility["telemetry"] = true;
entry["severity"] = "error";
entry["helpLinks"] = helpLinks;
std::string plaintextMessage{message};
plaintextMessage += ".\n\n";
plaintextMessage += action;
plaintextMessage += '.';
entry["plaintextMessage"] = plaintextMessage;
entry["timestamp"] = timestamp;
out << entry << '\n';
}
void SwiftDiagnosticsDumper::write(const char* buffer, std::size_t bufferSize) {
binlog::Range range{buffer, bufferSize};
binlog::RangeEntryStream input{range};
while (auto event = events.nextEvent(input)) {
const auto& source = SwiftDiagnosticsSource::get(event->source->category);
std::ostringstream oss;
timestampedMessagePrinter.printEvent(oss, *event, events.writerProp(), events.clockSync());
auto data = oss.str();
std::string_view view = data;
auto sep = view.find(' ');
assert(sep != std::string::npos);
auto timestamp = view.substr(0, sep);
auto message = view.substr(sep + 1);
source.emit(output, timestamp, message);
}
}
} // namespace codeql

View File

@@ -0,0 +1,86 @@
#pragma once
#include <binlog/EventStream.hpp>
#include <binlog/PrettyPrinter.hpp>
#include <string>
#include <vector>
#include <unordered_map>
#include <cassert>
#include <fstream>
#include <filesystem>
namespace codeql {
extern const std::string_view programName;
// Models a diagnostic source for Swift, holding static information that goes out into a diagnostic
// These are internally stored into a map on id's. A specific error log can use binlog's category
// as id, which will then be used to recover the diagnostic source while dumping.
class SwiftDiagnosticsSource {
public:
// creates a SwiftDiagnosticsSource with the given data
static void create(std::string_view id,
std::string name,
std::vector<std::string> helpLinks,
std::string action);
// gets a previously created SwiftDiagnosticsSource for the given id. Will abort if none exists
static const SwiftDiagnosticsSource& get(const std::string& id) { return map().at(id); }
// emit a JSON diagnostics for this source with the given timestamp and message to out
// A plaintextMessage is used that includes both the message and the action to take. Dots are
// appended to both. The id is used to construct the source id in the form
// `swift/<prog name>/<id with '-' replacing '_'>`
void emit(std::ostream& out, std::string_view timestamp, std::string_view message) const;
private:
using Map = std::unordered_map<std::string, SwiftDiagnosticsSource>;
std::string id;
std::string name;
static constexpr std::string_view extractorName = "swift";
// for the moment, we only output errors, so no need to store the severity
std::vector<std::string> helpLinks;
std::string action;
static Map& map() {
static Map ret;
return ret;
}
SwiftDiagnosticsSource(std::string_view internalId,
std::string&& name,
std::vector<std::string>&& helpLinks,
std::string&& action);
};
// An output modeling binlog's output stream concept that intercepts binlog entries and translates
// them to appropriate diagnostics JSON entries
class SwiftDiagnosticsDumper {
public:
// opens path for writing out JSON entries. Returns whether the operation was successful.
bool open(const std::filesystem::path& path) {
output.open(path);
return output.good();
}
// write out binlog entries as corresponding JSON diagnostics entries. Expects all entries to have
// a category equal to an id of a previously created SwiftDiagnosticSource.
void write(const char* buffer, std::size_t bufferSize);
private:
binlog::EventStream events;
std::ofstream output;
binlog::PrettyPrinter timestampedMessagePrinter{"%u %m", "%Y-%m-%dT%H:%M:%S.%NZ"};
};
namespace diagnostics {
inline void internal_error() {
SwiftDiagnosticsSource::create("internal_error", "Internal error", {},
"Contact us about this issue");
}
} // namespace diagnostics
} // namespace codeql

View File

@@ -50,7 +50,7 @@ Log::Level matchToLevel(std::csub_match m) {
} // namespace
std::vector<std::string> Log::collectSeverityRulesAndReturnProblems(const char* envVar) {
std::vector<std::string> Log::collectLevelRulesAndReturnProblems(const char* envVar) {
std::vector<std::string> problems;
if (auto levels = getEnvOr(envVar, nullptr)) {
// expect comma-separated <glob pattern>:<log severity>
@@ -92,12 +92,13 @@ std::vector<std::string> Log::collectSeverityRulesAndReturnProblems(const char*
void Log::configure() {
// as we are configuring logging right now, we collect problems and log them at the end
auto problems = collectSeverityRulesAndReturnProblems("CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS");
auto problems = collectLevelRulesAndReturnProblems("CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS");
auto now = std::to_string(std::chrono::system_clock::now().time_since_epoch().count());
if (text || binary) {
std::filesystem::path logFile = getEnvOr("CODEQL_EXTRACTOR_SWIFT_LOG_DIR", "extractor-out/log");
logFile /= "swift";
logFile /= logRootName;
logFile /= std::to_string(std::chrono::system_clock::now().time_since_epoch().count());
logFile /= programName;
logFile /= now;
std::error_code ec;
std::filesystem::create_directories(logFile.parent_path(), ec);
if (!ec) {
@@ -123,6 +124,25 @@ void Log::configure() {
binary.level = Level::no_logs;
text.level = Level::no_logs;
}
if (diagnostics) {
std::filesystem::path diagFile =
getEnvOr("CODEQL_EXTRACTOR_SWIFT_DIAGNOSTIC_DIR", "extractor-out/diagnostics");
diagFile /= programName;
diagFile /= now;
diagFile.replace_extension(".jsonl");
std::error_code ec;
std::filesystem::create_directories(diagFile.parent_path(), ec);
if (!ec) {
if (!diagnostics.output.open(diagFile)) {
problems.emplace_back("Unable to open diagnostics json file " + diagFile.string());
diagnostics.level = Level::no_logs;
}
} else {
problems.emplace_back("Unable to create diagnostics directory " +
diagFile.parent_path().string() + ": " + ec.message());
diagnostics.level = Level::no_logs;
}
}
}
for (const auto& problem : problems) {
LOG_ERROR("{}", problem);
@@ -137,7 +157,7 @@ void Log::flushImpl() {
}
Log::LoggerConfiguration Log::getLoggerConfigurationImpl(std::string_view name) {
LoggerConfiguration ret{session, std::string{logRootName}};
LoggerConfiguration ret{session, std::string{programName}};
ret.fullyQualifiedName += '/';
ret.fullyQualifiedName += name;
ret.level = std::min({binary.level, text.level, console.level});
@@ -153,6 +173,7 @@ Log& Log::write(const char* buffer, std::streamsize size) {
if (text) text.write(buffer, size);
if (binary) binary.write(buffer, size);
if (console) console.write(buffer, size);
if (diagnostics) diagnostics.write(buffer, size);
return *this;
}

View File

@@ -12,6 +12,8 @@
#include <binlog/adapt_stdoptional.hpp>
#include <binlog/adapt_stdvariant.hpp>
#include "swift/log/SwiftDiagnostics.h"
// Logging macros. These will call `logger()` to get a Logger instance, picking up any `logger`
// defined in the current scope. Domain-specific loggers can be added or used by either:
// * providing a class field called `logger` (as `Logger::operator()()` returns itself)
@@ -20,23 +22,39 @@
// * passing a logger around using a `Logger& logger` function parameter
// They are created with a name that appears in the logs and can be used to filter debug levels (see
// `Logger`).
#define LOG_CRITICAL(...) LOG_WITH_LEVEL(codeql::Log::Level::critical, __VA_ARGS__)
#define LOG_ERROR(...) LOG_WITH_LEVEL(codeql::Log::Level::error, __VA_ARGS__)
#define LOG_WARNING(...) LOG_WITH_LEVEL(codeql::Log::Level::warning, __VA_ARGS__)
#define LOG_INFO(...) LOG_WITH_LEVEL(codeql::Log::Level::info, __VA_ARGS__)
#define LOG_DEBUG(...) LOG_WITH_LEVEL(codeql::Log::Level::debug, __VA_ARGS__)
#define LOG_TRACE(...) LOG_WITH_LEVEL(codeql::Log::Level::trace, __VA_ARGS__)
#define LOG_CRITICAL(...) LOG_WITH_LEVEL(critical, __VA_ARGS__)
#define LOG_ERROR(...) LOG_WITH_LEVEL(error, __VA_ARGS__)
#define LOG_WARNING(...) LOG_WITH_LEVEL(warning, __VA_ARGS__)
#define LOG_INFO(...) LOG_WITH_LEVEL(info, __VA_ARGS__)
#define LOG_DEBUG(...) LOG_WITH_LEVEL(debug, __VA_ARGS__)
#define LOG_TRACE(...) LOG_WITH_LEVEL(trace, __VA_ARGS__)
// only do the actual logging if the picked up `Logger` instance is configured to handle the
// provided log level. `LEVEL` must be a compile-time constant. `logger()` is evaluated once
#define LOG_WITH_LEVEL(LEVEL, ...) \
do { \
constexpr codeql::Log::Level _level = LEVEL; \
codeql::Logger& _logger = logger(); \
if (_level >= _logger.level()) { \
BINLOG_CREATE_SOURCE_AND_EVENT(_logger.writer(), _level, /* category */, binlog::clockNow(), \
__VA_ARGS__); \
} \
#define LOG_WITH_LEVEL_AND_CATEGORY(LEVEL, CATEGORY, ...) \
do { \
constexpr codeql::Log::Level _level = codeql::Log::Level::LEVEL; \
codeql::Logger& _logger = logger(); \
if (_level >= _logger.level()) { \
BINLOG_CREATE_SOURCE_AND_EVENT(_logger.writer(), _level, CATEGORY, binlog::clockNow(), \
__VA_ARGS__); \
} \
} while (false)
#define LOG_WITH_LEVEL(LEVEL, ...) LOG_WITH_LEVEL_AND_CATEGORY(LEVEL, , __VA_ARGS__)
// Emit errors with a specified diagnostics ID. This must be the name of a function in the
// codeql::diagnostics namespace, which must call SwiftDiagnosticSource::create with ID as first
// argument. This function will be called at most once during the program execution.
// See codeql::diagnostics::internal_error below as an example.
#define DIAGNOSE_CRITICAL(ID, ...) DIAGNOSE_WITH_LEVEL(critical, ID, __VA_ARGS__)
#define DIAGNOSE_ERROR(ID, ...) DIAGNOSE_WITH_LEVEL(error, ID, __VA_ARGS__)
#define DIAGNOSE_WITH_LEVEL(LEVEL, ID, ...) \
do { \
static int _ignore = (codeql::diagnostics::ID(), 0); \
std::ignore = _ignore; \
LOG_WITH_LEVEL_AND_CATEGORY(LEVEL, ID, __VA_ARGS__); \
} while (false)
// avoid calling into binlog's original macros
@@ -68,7 +86,7 @@
namespace codeql {
// tools should define this to tweak the root name of all loggers
extern const std::string_view logRootName;
extern const std::string_view programName;
// This class is responsible for the global log state (outputs, log level rules, flushing)
// State is stored in the singleton `Log::instance()`.
@@ -76,7 +94,7 @@ extern const std::string_view logRootName;
// `Log::configure("extractor")`). Then, `Log::flush()` should be regularly called.
// Logging is configured upon first usage. This consists in
// * using environment variable `CODEQL_EXTRACTOR_SWIFT_LOG_DIR` to choose where to dump the log
// file(s). Log files will go to a subdirectory thereof named after `logRootName`
// file(s). Log files will go to a subdirectory thereof named after `programName`
// * using environment variable `CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS` to configure levels for
// loggers and outputs. This must have the form of a comma separated `spec:level` list, where
// `spec` is either a glob pattern (made up of alphanumeric, `/`, `*` and `.` characters) for
@@ -122,23 +140,40 @@ class Log {
friend binlog::Session;
Log& write(const char* buffer, std::streamsize size);
struct OnlyWithCategory {};
// Output filtered according to a configured log level
template <typename Output>
struct FilteredOutput {
binlog::Severity level;
Output output;
binlog::EventFilter filter{
[this](const binlog::EventSource& src) { return src.severity >= level; }};
binlog::EventFilter filter;
template <typename... Args>
FilteredOutput(Level level, Args&&... args)
: level{level}, output{std::forward<Args>(args)...} {}
: level{level}, output{std::forward<Args>(args)...}, filter{filterOnLevel()} {}
template <typename... Args>
FilteredOutput(OnlyWithCategory, Level level, Args&&... args)
: level{level},
output{std::forward<Args>(args)...},
filter{filterOnLevelAndNonEmptyCategory()} {}
FilteredOutput& write(const char* buffer, std::streamsize size) {
filter.writeAllowed(buffer, size, output);
return *this;
}
binlog::EventFilter::Predicate filterOnLevel() const {
return [this](const binlog::EventSource& src) { return src.severity >= level; };
}
binlog::EventFilter::Predicate filterOnLevelAndNonEmptyCategory() const {
return [this](const binlog::EventSource& src) {
return !src.category.empty() && src.severity >= level;
};
}
// if configured as `no_logs`, the output is effectively disabled
explicit operator bool() const { return level < Level::no_logs; }
};
@@ -151,14 +186,15 @@ class Log {
FilteredOutput<std::ofstream> binary{Level::no_logs};
FilteredOutput<binlog::TextOutputStream> text{Level::info, textFile, format};
FilteredOutput<binlog::TextOutputStream> console{Level::warning, std::cerr, format};
FilteredOutput<SwiftDiagnosticsDumper> diagnostics{OnlyWithCategory{}, Level::error};
LevelRules sourceRules;
std::vector<std::string> collectSeverityRulesAndReturnProblems(const char* envVar);
std::vector<std::string> collectLevelRulesAndReturnProblems(const char* envVar);
};
// This class represent a named domain-specific logger, responsible for pushing logs using the
// underlying `binlog::SessionWriter` class. This has a configured log level, so that logs on this
// `Logger` with a level lower than the configured one are no-ops. The level is configured based
// on rules matching `<logRootName>/<name>` in `CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS` (see above).
// on rules matching `<programName>/<name>` in `CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS` (see above).
// `<name>` is provided in the constructor. If no rule matches the name, the log level defaults to
// the minimum level of all outputs.
class Logger {