Merge pull request #12335 from github/alexdenisov/extract-lazy-declarations

Swift: extract lazy declarations
This commit is contained in:
AlexDenisov
2023-03-03 16:06:20 +01:00
committed by GitHub
11 changed files with 123 additions and 115 deletions

View File

@@ -1,73 +0,0 @@
#pragma once
#include <array>
namespace codeql {
constexpr std::array swiftBuiltins = {
"zeroInitializer",
"BridgeObject",
"Word",
"NativeObject",
"RawPointer",
"Executor",
"Job",
"RawUnsafeContinuation",
"addressof",
"initialize",
"reinterpretCast",
"Int1",
"Int8",
"Int16",
"Int32",
"Int64",
"IntLiteral",
"FPIEEE16",
"FPIEEE32",
"FPIEEE64",
"FPIEEE80",
"Vec2xInt8",
"Vec4xInt8",
"Vec8xInt8",
"Vec16xInt8",
"Vec32xInt8",
"Vec64xInt8",
"Vec2xInt16",
"Vec4xInt16",
"Vec8xInt16",
"Vec16xInt16",
"Vec32xInt16",
"Vec64xInt16",
"Vec2xInt32",
"Vec4xInt32",
"Vec8xInt32",
"Vec16xInt32",
"Vec32xInt32",
"Vec64xInt32",
"Vec2xInt64",
"Vec4xInt64",
"Vec8xInt64",
"Vec16xInt64",
"Vec32xInt64",
"Vec64xInt64",
"Vec2xFPIEEE16",
"Vec4xFPIEEE16",
"Vec8xFPIEEE16",
"Vec16xFPIEEE16",
"Vec32xFPIEEE16",
"Vec64xFPIEEE16",
"Vec2xFPIEEE32",
"Vec4xFPIEEE32",
"Vec8xFPIEEE32",
"Vec16xFPIEEE32",
"Vec32xFPIEEE32",
"Vec64xFPIEEE32",
"Vec2xFPIEEE64",
"Vec4xFPIEEE64",
"Vec8xFPIEEE64",
"Vec16xFPIEEE64",
"Vec32xFPIEEE64",
"Vec64xFPIEEE64",
"buildDefaultActorExecutorRef",
"buildMainActorExecutorRef",
};
}

View File

@@ -10,10 +10,10 @@
#include "swift/extractor/translators/SwiftVisitor.h"
#include "swift/extractor/infra/TargetDomains.h"
#include "swift/extractor/SwiftBuiltinSymbols.h"
#include "swift/extractor/infra/file/Path.h"
#include "swift/extractor/infra/SwiftLocationExtractor.h"
#include "swift/extractor/infra/SwiftBodyEmissionStrategy.h"
#include "swift/extractor/mangler/SwiftMangler.h"
using namespace codeql;
using namespace std::string_literals;
@@ -43,10 +43,16 @@ static void archiveFile(const SwiftExtractorConfiguration& config, swift::Source
}
}
static fs::path getFilename(swift::ModuleDecl& module, swift::SourceFile* primaryFile) {
static fs::path getFilename(swift::ModuleDecl& module,
swift::SourceFile* primaryFile,
const swift::Decl* lazyDeclaration) {
if (primaryFile) {
return resolvePath(primaryFile->getFilename());
}
if (lazyDeclaration) {
SwiftMangler mangler;
return mangler.mangledName(*lazyDeclaration);
}
// PCM clang module
if (module.isNonSwiftModule()) {
// Several modules with different names might come from .pcm (clang module) files
@@ -72,49 +78,42 @@ static fs::path getFilename(swift::ModuleDecl& module, swift::SourceFile* primar
return resolvePath(filename);
}
/* The builtin module is special, as it does not publish any top-level declaration
* It creates (and caches) declarations on demand when a lookup is carried out
* (see BuiltinUnit in swift/AST/FileUnit.h for the cache details, and getBuiltinValueDecl in
* swift/AST/Builtins.h for the creation details)
* As we want to create the Builtin trap file once and for all so that it works for other
* extraction runs, rather than collecting what we need we pre-populate the builtin trap with
* what we expect. This list might need thus to be expanded.
* Notice, that while swift/AST/Builtins.def has a list of builtin symbols, it does not contain
* all information required to instantiate builtin variants.
* Other possible approaches:
* * create one trap per builtin declaration when encountered
* * expand the list to all possible builtins (of which there are a lot)
*/
static void getBuiltinDecls(swift::ModuleDecl& builtinModule,
llvm::SmallVector<swift::Decl*>& decls) {
llvm::SmallVector<swift::ValueDecl*> values;
for (auto symbol : swiftBuiltins) {
builtinModule.lookupValue(builtinModule.getASTContext().getIdentifier(symbol),
swift::NLKind::QualifiedLookup, values);
static llvm::SmallVector<const swift::Decl*> getTopLevelDecls(swift::ModuleDecl& module,
swift::SourceFile* primaryFile,
const swift::Decl* lazyDeclaration) {
llvm::SmallVector<const swift::Decl*> ret;
if (lazyDeclaration) {
ret.push_back(lazyDeclaration);
return ret;
}
decls.insert(decls.end(), values.begin(), values.end());
ret.push_back(&module);
llvm::SmallVector<swift::Decl*> topLevelDecls;
if (primaryFile) {
primaryFile->getTopLevelDecls(topLevelDecls);
} else {
module.getTopLevelDecls(topLevelDecls);
}
ret.insert(ret.end(), topLevelDecls.data(), topLevelDecls.data() + topLevelDecls.size());
return ret;
}
static llvm::SmallVector<swift::Decl*> getTopLevelDecls(swift::ModuleDecl& module,
swift::SourceFile* primaryFile = nullptr) {
llvm::SmallVector<swift::Decl*> ret;
ret.push_back(&module);
static TrapType getTrapType(swift::SourceFile* primaryFile, const swift::Decl* lazyDeclaration) {
if (primaryFile) {
primaryFile->getTopLevelDecls(ret);
} else if (module.isBuiltinModule()) {
getBuiltinDecls(module, ret);
} else {
module.getTopLevelDecls(ret);
return TrapType::source;
}
return ret;
if (lazyDeclaration) {
return TrapType::lazy_declaration;
}
return TrapType::module;
}
static std::unordered_set<swift::ModuleDecl*> extractDeclarations(
SwiftExtractorState& state,
swift::CompilerInstance& compiler,
swift::ModuleDecl& module,
swift::SourceFile* primaryFile = nullptr) {
auto filename = getFilename(module, primaryFile);
swift::SourceFile* primaryFile,
const swift::Decl* lazyDeclaration) {
auto filename = getFilename(module, primaryFile, lazyDeclaration);
if (primaryFile) {
state.sourceFiles.push_back(filename);
}
@@ -122,10 +121,13 @@ static std::unordered_set<swift::ModuleDecl*> extractDeclarations(
// The extractor can be called several times from different processes with
// the same input file(s). Using `TargetFile` the first process will win, and the following
// will just skip the work
const auto trapType = primaryFile ? TrapType::source : TrapType::module;
const auto trapType = getTrapType(primaryFile, lazyDeclaration);
auto trap = createTargetTrapDomain(state, filename, trapType);
if (!trap) {
// another process arrived first, nothing to do for us
if (lazyDeclaration) {
state.emittedDeclarations.insert(lazyDeclaration);
}
return {};
}
@@ -143,9 +145,10 @@ static std::unordered_set<swift::ModuleDecl*> extractDeclarations(
SwiftLocationExtractor locationExtractor(*trap);
locationExtractor.emitFile(primaryFile);
SwiftBodyEmissionStrategy bodyEmissionStrategy(module, primaryFile);
SwiftVisitor visitor(compiler.getSourceMgr(), *trap, locationExtractor, bodyEmissionStrategy);
auto topLevelDecls = getTopLevelDecls(module, primaryFile);
SwiftBodyEmissionStrategy bodyEmissionStrategy(module, primaryFile, lazyDeclaration);
SwiftVisitor visitor(compiler.getSourceMgr(), state, *trap, locationExtractor,
bodyEmissionStrategy);
auto topLevelDecls = getTopLevelDecls(module, primaryFile, lazyDeclaration);
for (auto decl : topLevelDecls) {
visitor.extract(decl);
}
@@ -198,10 +201,12 @@ void codeql::extractSwiftFiles(SwiftExtractorState& state, swift::CompilerInstan
continue;
}
archiveFile(state.configuration, *sourceFile);
encounteredModules = extractDeclarations(state, compiler, *module, sourceFile);
encounteredModules =
extractDeclarations(state, compiler, *module, sourceFile, /*lazy declaration*/ nullptr);
}
if (!isFromSourceFile) {
encounteredModules = extractDeclarations(state, compiler, *module);
encounteredModules = extractDeclarations(state, compiler, *module, /*source file*/ nullptr,
/*lazy declaration*/ nullptr);
}
for (auto encountered : encounteredModules) {
if (state.encounteredModules.count(encountered) == 0) {
@@ -211,3 +216,37 @@ void codeql::extractSwiftFiles(SwiftExtractorState& state, swift::CompilerInstan
}
}
}
static void cleanupPendingDeclarations(SwiftExtractorState& state) {
std::vector<const swift::Decl*> worklist(std::begin(state.pendingDeclarations),
std::end(state.pendingDeclarations));
for (auto decl : worklist) {
if (state.emittedDeclarations.count(decl)) {
state.pendingDeclarations.erase(decl);
}
}
}
static void extractLazy(SwiftExtractorState& state, swift::CompilerInstance& compiler) {
cleanupPendingDeclarations(state);
std::vector<const swift::Decl*> worklist(std::begin(state.pendingDeclarations),
std::end(state.pendingDeclarations));
for (auto pending : worklist) {
extractDeclarations(state, compiler, *pending->getModuleContext(), /*source file*/ nullptr,
pending);
}
}
void codeql::extractExtractLazyDeclarations(SwiftExtractorState& state,
swift::CompilerInstance& compiler) {
// Just in case
const int upperBound = 100;
int iteration = 0;
while (!state.pendingDeclarations.empty() && iteration++ < upperBound) {
extractLazy(state, compiler);
}
if (iteration >= upperBound) {
std::cerr << "Swift extractor reached upper bound while extracting lazy declarations\n";
abort();
}
}

View File

@@ -7,4 +7,5 @@
namespace codeql {
void extractSwiftFiles(SwiftExtractorState& state, swift::CompilerInstance& compiler);
void extractExtractLazyDeclarations(SwiftExtractorState& state, swift::CompilerInstance& compiler);
} // namespace codeql

View File

@@ -24,6 +24,12 @@ struct SwiftExtractorState {
// The path for the modules outputted by the underlying frontend run, ignoring path redirection
std::vector<std::filesystem::path> originalOutputModules;
// All lazy named declarations that were already emitted
std::unordered_set<const swift::Decl*> emittedDeclarations;
// Lazy named declarations that were not yet emitted and will be emitted each one separately
std::unordered_set<const swift::Decl*> pendingDeclarations;
};
} // namespace codeql

View File

@@ -16,6 +16,9 @@ bool SwiftBodyEmissionStrategy::shouldEmitDeclBody(const swift::Decl& decl) {
if (module != &currentModule) {
return false;
}
if (currentLazyDeclaration && currentLazyDeclaration != &decl) {
return false;
}
// ModuleDecl is a special case: if it passed the previous test, it is the current module
// but it never has a source file, so we short circuit to emit it in any case
if (!currentPrimarySourceFile || decl.getKind() == swift::DeclKind::Module) {

View File

@@ -8,13 +8,17 @@ namespace codeql {
class SwiftBodyEmissionStrategy {
public:
SwiftBodyEmissionStrategy(swift::ModuleDecl& currentModule,
swift::SourceFile* currentPrimarySourceFile)
: currentModule(currentModule), currentPrimarySourceFile(currentPrimarySourceFile) {}
swift::SourceFile* currentPrimarySourceFile,
const swift::Decl* currentLazyDeclaration)
: currentModule(currentModule),
currentPrimarySourceFile(currentPrimarySourceFile),
currentLazyDeclaration(currentLazyDeclaration) {}
bool shouldEmitDeclBody(const swift::Decl& decl);
private:
swift::ModuleDecl& currentModule;
swift::SourceFile* currentPrimarySourceFile;
const swift::Decl* currentLazyDeclaration;
};
} // namespace codeql

View File

@@ -12,6 +12,7 @@
#include "swift/extractor/trap/generated/TrapClasses.h"
#include "swift/extractor/infra/SwiftLocationExtractor.h"
#include "swift/extractor/infra/SwiftBodyEmissionStrategy.h"
#include "swift/extractor/config/SwiftExtractorState.h"
namespace codeql {
@@ -45,10 +46,12 @@ class SwiftDispatcher {
// all references and pointers passed as parameters to this constructor are supposed to outlive
// the SwiftDispatcher
SwiftDispatcher(const swift::SourceManager& sourceManager,
SwiftExtractorState& state,
TrapDomain& trap,
SwiftLocationExtractor& locationExtractor,
SwiftBodyEmissionStrategy& bodyEmissionStrategy)
: sourceManager{sourceManager},
state{state},
trap{trap},
locationExtractor{locationExtractor},
bodyEmissionStrategy{bodyEmissionStrategy} {}
@@ -248,7 +251,23 @@ class SwiftDispatcher {
locationExtractor.attachLocation(sourceManager, comment, entry.id);
}
void extractedDeclaration(const swift::Decl& decl) {
if (isLazyDeclaration(decl)) {
state.emittedDeclarations.insert(&decl);
}
}
void skippedDeclaration(const swift::Decl& decl) {
if (isLazyDeclaration(decl)) {
state.pendingDeclarations.insert(&decl);
}
}
private:
bool isLazyDeclaration(const swift::Decl& decl) {
swift::ModuleDecl* module = decl.getModuleContext();
return module->isBuiltinModule() || module->getName().str() == "__ObjC";
}
template <typename T, typename = void>
struct HasSize : std::false_type {};
@@ -302,6 +321,7 @@ class SwiftDispatcher {
virtual void visit(const swift::CapturedValue* capture) = 0;
const swift::SourceManager& sourceManager;
SwiftExtractorState& state;
TrapDomain& trap;
Store store;
SwiftLocationExtractor& locationExtractor;

View File

@@ -13,6 +13,8 @@ static const char* typeToStr(TrapType type) {
return "invocations";
case TrapType::linkage:
return "linkage";
case TrapType::lazy_declaration:
return "lazy_decls";
default:
return "";
}

View File

@@ -12,6 +12,7 @@ enum class TrapType {
module,
invocation,
linkage,
lazy_declaration,
};
std::filesystem::path getTrapPath(const SwiftExtractorState& state,

View File

@@ -92,6 +92,7 @@ class Observer : public swift::FrontendObserver {
void performedSemanticAnalysis(swift::CompilerInstance& compiler) override {
codeql::extractSwiftFiles(state, compiler);
codeql::extractSwiftInvocation(state, compiler, invocationTrap);
codeql::extractExtractLazyDeclarations(state, compiler);
}
private:

View File

@@ -2,6 +2,7 @@
#include <swift/AST/Decl.h>
#include <swift/AST/ASTMangler.h>
#include <swift/AST/Module.h>
#include "swift/extractor/translators/TranslatorBase.h"
#include "swift/extractor/trap/generated/decl/TrapClasses.h"
@@ -70,8 +71,11 @@ class DeclTranslator : public AstTranslatorBase<DeclTranslator> {
std::optional<TrapClassOf<D>> entry;
auto id = dispatcher.assignNewLabel(decl, mangledName(decl));
if (dispatcher.shouldEmitDeclBody(decl)) {
dispatcher.extractedDeclaration(decl);
entry.emplace(id);
fillDecl(decl, *entry);
} else {
dispatcher.skippedDeclaration(decl);
}
return entry;
}