Swift: introduce SwiftDispatcher

This commit is contained in:
Alex Denisov
2022-05-11 10:28:08 +02:00
parent 8f8ece63e7
commit d0e2e2bec8
6 changed files with 331 additions and 11 deletions

View File

@@ -6,6 +6,8 @@ swift_cc_binary(
"SwiftExtractor.cpp",
"SwiftExtractor.h",
"SwiftExtractorConfiguration.h",
"SwiftDispatcher.h",
"SwiftTagTraits.h",
"main.cpp",
],
visibility = ["//swift:__pkg__"],

View File

@@ -0,0 +1,178 @@
#pragma once
#include "swift/extractor/trap/TrapArena.h"
#include "swift/extractor/trap/TrapLabelStore.h"
// autogenerated file
#include "swift/extractor/trap/TrapClasses.h"
#include "swift/extractor/SwiftTagTraits.h"
#include <swift/AST/SourceFile.h>
#include <swift/Basic/SourceManager.h>
#include <llvm/Support/FileSystem.h>
namespace codeql {
namespace detail {
// The following `getKindName`s are used within "TBD" TRAP entries to visually mark an AST node as
// not properly emitted yet.
// TODO: To be replaced with QL counterpart
template <typename Parent, typename Kind>
inline std::string getKindName(Kind kind) {
return Parent::getKindName(kind).str();
}
template <>
inline std::string getKindName<swift::TypeBase, swift::TypeKind>(swift::TypeKind kind) {
switch (kind) {
#define TYPE(CLASS, PARENT) \
case swift::TypeKind::CLASS: \
return #CLASS;
#include "swift/AST/TypeNodes.def"
default:
return "Unknown";
}
}
template <>
std::string inline getKindName<swift::TypeRepr, swift::TypeReprKind>(swift::TypeReprKind kind) {
switch (kind) {
#define TYPEREPR(CLASS, PARENT) \
case swift::TypeReprKind::CLASS: \
return #CLASS;
#include "swift/AST/TypeReprNodes.def"
default:
return "Unknown";
}
}
} // namespace detail
// The main reponsibilities of the SwiftDispatcher are as follows:
// * redirect specific AST node emission to a corresponding visitor (statements, expressions, etc.)
// * storing TRAP labels for emitted AST nodes (in the TrapLabelStore) to avoid re-emission
// Since SwiftDispatcher sees all the AST nodes, it also attaches a location to every 'locatable'
// node (AST nodes that are not types: declarations, statements, expressions, etc.).
class SwiftDispatcher {
public:
SwiftDispatcher(const swift::SourceManager& sourceManager, TrapArena& arena, TrapOutput& trap)
: sourceManager{sourceManager}, arena{arena}, trap{trap} {}
template <typename T>
void extract(T* entity) {
fetchLabel(entity);
}
private:
// This method gives a TRAP label for already emitted AST node.
// If the AST node was not emitted yet, then the emission is dispatched to a corresponding
// visitor (see `visit(T *)` methods below).
template <typename E>
TrapLabel<ToTag<E>> fetchLabel(E* e) {
// this is required so we avoid any recursive loop: a `fetchLabel` during the visit of `e` might
// end up calling `fetchLabel` on `e` itself, so we want the visit of `e` to call `fetchLabel`
// only after having called `assignNewLabel` on `e`
assert(!waitingForNewLabel && "fetchLabel called before assignNewLabel");
if (auto l = store.get(e)) {
return *l;
}
waitingForNewLabel = getCanonicalPtr(e);
visit(e);
if (auto l = store.get(e)) {
if constexpr (!std::is_base_of_v<swift::TypeBase, E>) {
attachLocation(e, *l);
}
return *l;
}
assert(!"assignNewLabel not called during visit");
return {};
}
// Due to the lazy emission approach, we must assign a label to a corresponding AST node before
// it actually gets emitted to handle recursive cases such as recursive calls, or recursive type
// declarations
template <typename E>
TrapLabel<ToTag<E>> assignNewLabel(E* e) {
assert(waitingForNewLabel == getCanonicalPtr(e) && "assignNewLabel called on wrong entity");
auto label = getLabel<ToTag<E>>();
trap.assignStar(label);
store.insert(e, label);
waitingForNewLabel = nullptr;
return label;
}
template <typename Tag>
TrapLabel<Tag> getLabel() {
return arena.allocateLabel<Tag>();
}
// This is a helper method to emit TRAP entries for AST nodes that we don't fully support yet.
template <typename Parent, typename Child>
void TBD(Child* entity, const std::string& suffix) {
using namespace std::string_literals;
auto label = assignNewLabel(entity);
auto kind = detail::getKindName<Parent>(static_cast<const Parent*>(entity)->getKind());
auto name = "TBD ("s + kind + suffix + ")";
if constexpr (std::is_same_v<Parent, swift::TypeBase>) {
trap.emit(UnknownTypesTrap{label, name});
} else {
trap.emit(UnknownAstNodesTrap{label, name});
}
}
template <typename Locatable>
void attachLocation(Locatable locatable, TrapLabel<LocatableTag> locatableLabel) {
attachLocation(&locatable, locatableLabel);
}
// Emits a Location TRAP entry and attaches it to an AST node
template <typename Locatable>
void attachLocation(Locatable* locatable, TrapLabel<LocatableTag> locatableLabel) {
auto start = locatable->getStartLoc();
auto end = locatable->getEndLoc();
if (!start.isValid() || !end.isValid()) {
// invalid locations seem to come from entities synthesized by the compiler
return;
}
std::string filepath = getFilepath(start);
auto fileLabel = arena.allocateLabel<FileTag>();
trap.assignKey(fileLabel, filepath);
/// TODO: do not emit duplicate trap entries for Files
trap.emit(FilesTrap{fileLabel, filepath});
auto [startLine, startColumn] = sourceManager.getLineAndColumnInBuffer(start);
auto [endLine, endColumn] = sourceManager.getLineAndColumnInBuffer(end);
auto locLabel = arena.allocateLabel<LocationTag>();
trap.assignKey(locLabel, '{', fileLabel, "}:", startLine, ':', startColumn, ':', endLine, ':',
endColumn);
trap.emit(LocationsTrap{locLabel, fileLabel, startLine, startColumn, endLine, endColumn});
trap.emit(LocatablesTrap{locatableLabel, locLabel});
}
std::string getFilepath(swift::SourceLoc loc) {
/// TODO: this needs more testing
std::string displayName = sourceManager.getDisplayNameForLoc(loc).str();
llvm::SmallString<PATH_MAX> filePath(displayName);
if (std::error_code ec = llvm::sys::fs::make_absolute(filePath)) {
std::cerr << "Cannot make absolute path: '" << displayName << "': " << ec.message() << "\n";
return {};
}
llvm::sys::path::remove_dots(filePath);
return filePath.str().str();
}
// TODO: The following methods are supposed to redirect TRAP emission to correpsonding visitors,
// which are to be introduced in follow-up PRs
void visit(swift::Decl* decl) { TBD<swift::Decl>(decl, "Decl"); }
void visit(swift::Stmt* stmt) { TBD<swift::Stmt>(stmt, "Stmt"); }
void visit(swift::Expr* expr) { TBD<swift::Expr>(expr, "Expr"); }
void visit(swift::Pattern* pattern) { TBD<swift::Pattern>(pattern, "Pattern"); }
void visit(swift::TypeRepr* type) { TBD<swift::TypeRepr>(type, "TypeRepr"); }
void visit(swift::TypeBase* type) { TBD<swift::TypeBase>(type, "Type"); }
const swift::SourceManager& sourceManager;
TrapArena& arena;
TrapOutput& trap;
TrapLabelStore store;
const void* waitingForNewLabel{nullptr};
};
} // namespace codeql

View File

@@ -13,12 +13,14 @@
#include <llvm/Support/Path.h>
#include "swift/extractor/trap/TrapClasses.h"
#include "swift/extractor/trap/TrapArena.h"
#include "swift/extractor/trap/TrapOutput.h"
#include "swift/extractor/SwiftDispatcher.h"
using namespace codeql;
static void extractFile(const SwiftExtractorConfiguration& config, swift::SourceFile& file) {
static void extractFile(const SwiftExtractorConfiguration& config,
swift::CompilerInstance& compiler,
swift::SourceFile& file) {
if (std::error_code ec = llvm::sys::fs::create_directories(config.trapDir)) {
std::cerr << "Cannot create TRAP directory: " << ec.message() << "\n";
return;
@@ -79,12 +81,17 @@ static void extractFile(const SwiftExtractorConfiguration& config, swift::Source
TrapOutput trap{trapStream};
TrapArena arena{};
auto label = arena.allocateLabel<FileTag>();
trap.assignStar(label);
File f{};
f.id = label;
f.name = srcFilePath.str().str();
trap.emit(f);
// In the case of emtpy files, the dispatcher is not called, but we still want to 'record' the
// fact that the file was extracted
auto fileLabel = arena.allocateLabel<FileTag>();
trap.assignKey(fileLabel, srcFilePath.str().str());
trap.emit(FilesTrap{fileLabel, srcFilePath.str().str()});
SwiftDispatcher dispatcher(compiler.getSourceMgr(), arena, trap);
for (swift::Decl* decl : file.getTopLevelDecls()) {
dispatcher.extract(decl);
}
// TODO: Pick a better name to avoid collisions
std::string trapName = file.getFilename().str() + ".trap";
@@ -108,11 +115,11 @@ void codeql::extractSwiftFiles(const SwiftExtractorConfiguration& config,
module->getFiles().front()->getKind() == swift::FileUnitKind::Source) {
// We can only call getMainSourceFile if the first file is of a Source kind
swift::SourceFile& file = module->getMainSourceFile();
extractFile(config, file);
extractFile(config, compiler, file);
}
} else {
for (auto s : compiler.getPrimarySourceFiles()) {
extractFile(config, *s);
extractFile(config, compiler, *s);
}
}
}

View File

@@ -0,0 +1,71 @@
#pragma once
#include <swift/AST/ASTVisitor.h>
// autogenerated header
#include "swift/extractor/trap/TrapTags.h"
namespace codeql {
// codegen goes with QL acronym convention (Sil instead of SIL), we need to remap it to Swift's
// convention
using SILBlockStorageTypeTag = SilBlockStorageTypeTag;
using SILBoxTypeTag = SilBoxTypeTag;
using SILFunctionTypeTag = SilFunctionTypeTag;
using SILTokenTypeTag = SilTokenTypeTag;
#define MAP_TYPE_TO_TAG(TYPE, TAG) \
template <> \
struct ToTagFunctor<swift::TYPE> { \
using type = TAG; \
}
#define MAP_TAG(TYPE) MAP_TYPE_TO_TAG(TYPE, TYPE##Tag)
#define MAP_SUBTAG(TYPE, PARENT) \
MAP_TAG(TYPE); \
static_assert(std::is_base_of_v<PARENT##Tag, TYPE##Tag>, \
#PARENT "Tag must be a base of " #TYPE "Tag");
#define OVERRIDE_TAG(TYPE, TAG) \
template <> \
struct ToTagOverride<swift::TYPE> { \
using type = TAG; \
}; \
static_assert(std::is_base_of_v<TYPE##Tag, TAG>, "override is not a subtag");
MAP_TAG(Stmt);
#define ABSTRACT_STMT(CLASS, PARENT) MAP_SUBTAG(CLASS##Stmt, PARENT)
#define STMT(CLASS, PARENT) ABSTRACT_STMT(CLASS, PARENT)
#include "swift/AST/StmtNodes.def"
MAP_TAG(Expr);
#define ABSTRACT_EXPR(CLASS, PARENT) MAP_SUBTAG(CLASS##Expr, PARENT)
#define EXPR(CLASS, PARENT) ABSTRACT_EXPR(CLASS, PARENT)
#include "swift/AST/ExprNodes.def"
MAP_TAG(Decl);
#define ABSTRACT_DECL(CLASS, PARENT) MAP_SUBTAG(CLASS##Decl, PARENT)
#define DECL(CLASS, PARENT) ABSTRACT_DECL(CLASS, PARENT)
#include "swift/AST/DeclNodes.def"
MAP_TAG(Pattern);
#define ABSTRACT_PATTERN(CLASS, PARENT) MAP_SUBTAG(CLASS##Pattern, PARENT)
#define PATTERN(CLASS, PARENT) ABSTRACT_PATTERN(CLASS, PARENT)
#include "swift/AST/PatternNodes.def"
MAP_TAG(TypeRepr);
MAP_TYPE_TO_TAG(TypeBase, TypeTag);
#define ABSTRACT_TYPE(CLASS, PARENT) MAP_SUBTAG(CLASS##Type, PARENT)
#define TYPE(CLASS, PARENT) ABSTRACT_TYPE(CLASS, PARENT)
#include "swift/AST/TypeNodes.def"
OVERRIDE_TAG(FuncDecl, ConcreteFuncDeclTag);
OVERRIDE_TAG(VarDecl, ConcreteVarDeclTag);
#undef MAP_TAG
#undef MAP_SUBTAG
#undef MAP_TYPE_TO_TAG
#undef OVERRIDE_TAG
// All the other macros defined here are undefined by the .def files
} // namespace codeql

View File

@@ -14,6 +14,9 @@ class UntypedTrapLabel {
friend class std::hash<UntypedTrapLabel>;
// we want to have access to the untyped, underlying id
friend class TrapLabelStore;
protected:
UntypedTrapLabel() : id_{0xffffffffffffffff} {}
UntypedTrapLabel(uint64_t id) : id_{id} {}
@@ -33,7 +36,6 @@ class TrapLabel : public UntypedTrapLabel {
friend class TrapLabel;
using UntypedTrapLabel::UntypedTrapLabel;
public:
using Tag = TagParam;

View File

@@ -0,0 +1,60 @@
#pragma once
#include <cassert>
#include <optional>
#include <unordered_map>
#include <swift/AST/ASTVisitor.h>
#include "swift/extractor/trap/TrapLabel.h"
#include "swift/extractor/trap/TrapTagTraits.h"
// autogenerated file
#include "swift/extractor/trap/TrapTags.h"
namespace codeql {
// the following is needed to avoid the problem of subclass pointers not necessarily coinciding
// with superclass ones in case of multiple inheritance
inline const void* getCanonicalPtr(const swift::Decl* e) {
return e;
}
inline const void* getCanonicalPtr(const swift::Stmt* e) {
return e;
}
inline const void* getCanonicalPtr(const swift::Expr* e) {
return e;
}
inline const void* getCanonicalPtr(const swift::Pattern* e) {
return e;
}
inline const void* getCanonicalPtr(const swift::TypeRepr* e) {
return e;
}
inline const void* getCanonicalPtr(const swift::TypeBase* e) {
return e;
}
// The extraction is done in a lazy/on-demand fashion:
// Each emitted TRAP entry for an AST node gets a TRAP label assigned to it.
// To avoid re-emission, we store the "AST node <> label" entry in the TrapLabelStore.
class TrapLabelStore {
public:
template <typename T>
std::optional<TrapLabel<ToTag<T>>> get(const T* e) {
if (auto found = store_.find(getCanonicalPtr(e)); found != store_.end()) {
return TrapLabel<ToTag<T>>::unsafeCreateFromExplicitId(found->second);
}
return std::nullopt;
}
template <typename T>
void insert(const T* e, TrapLabel<ToTag<T>> l) {
auto [_, inserted] = store_.emplace(getCanonicalPtr(e), l.id_);
assert(inserted && "already inserted");
}
private:
std::unordered_map<const void*, uint64_t> store_;
};
} // namespace codeql