Swift: fix missing extractions from Builtin

There were missing extractions from the Builtin (and other) modules.

This was actually caused by two issues:
* we did not visit all required modules, as for example the `Builtin`
  module does not appear as being imported by anybody (together with
  another mysterious `__Objc` module)
* moreover the `Builtin` module works internally by only creating
  declarations on demand, and does not provide a list of its top level
  declarations.

The first problem was solved by moving module collection to the actual
visiting. This may mean we extract less modules, as we only extract the
modules we actually use something from (recursively). This change can
be reverted if we feel we need it.

The second one was solved by explicitly listing the builtin symbols
encountered during a normal extraction. This does mean this list needs
to be kept up to date.
This commit is contained in:
Paolo Tranquilli
2022-08-25 15:18:24 +02:00
parent 606b9e6e38
commit df3dc9677f
6 changed files with 69 additions and 48 deletions

View File

@@ -6,6 +6,7 @@
#include <queue>
#include <swift/AST/SourceFile.h>
#include <swift/AST/Builtins.h>
#include <swift/Basic/FileTypes.h>
#include <llvm/ADT/SmallString.h>
#include <llvm/Support/FileSystem.h>
@@ -68,22 +69,52 @@ static std::string getFilename(swift::ModuleDecl& module, swift::SourceFile* pri
return module.getModuleFilename().str();
}
static llvm::SmallVector<swift::ValueDecl*> getBuiltinDecls(swift::ModuleDecl& builtinModule) {
llvm::SmallVector<swift::ValueDecl*> values;
for (auto symbol : {
"zeroInitializer", "BridgeObject", "Word", "NativeObject",
"RawPointer", "Int1", "Int8", "Int16",
"Int32", "Int64", "IntLiteral", "FPIEEE16",
"FPIEEE32", "FPIEEE64", "FPIEEE80", "Vec2xInt8",
"Vec4xInt8", "Vec8xInt8", "Vec16xInt8", "Vec32xInt8",
"Vec64xInt8", "Vec2xInt16", "Vec4xInt16", "Vec8xInt16",
"Vec16xInt16", "Vec32xInt16", "Vec64xInt16", "Vec2xInt32",
"Vec4xInt32", "Vec8xInt32", "Vec16xInt32", "Vec32xInt32",
"Vec64xInt32", "Vec2xInt64", "Vec4xInt64", "Vec8xInt64",
"Vec16xInt64", "Vec32xInt64", "Vec64xInt64", "Vec2xFPIEEE16",
"Vec4xFPIEEE16", "Vec8xFPIEEE16", "Vec16xFPIEEE16", "Vec32xFPIEEE16",
"Vec64xFPIEEE16", "Vec2xFPIEEE32", "Vec4xFPIEEE32", "Vec8xFPIEEE32",
"Vec16xFPIEEE32", "Vec32xFPIEEE32", "Vec64xFPIEEE32", "Vec2xFPIEEE64",
"Vec4xFPIEEE64", "Vec8xFPIEEE64", "Vec16xFPIEEE64", "Vec32xFPIEEE64",
"Vec64xFPIEEE64",
}) {
builtinModule.lookupValue(builtinModule.getASTContext().getIdentifier(symbol),
swift::NLKind::QualifiedLookup, values);
}
return values;
}
static llvm::SmallVector<swift::Decl*> getTopLevelDecls(swift::ModuleDecl& module,
swift::SourceFile* primaryFile = nullptr) {
llvm::SmallVector<swift::Decl*> ret;
ret.push_back(&module);
if (primaryFile) {
primaryFile->getTopLevelDecls(ret);
} else if (module.isBuiltinModule()) {
for (auto d : getBuiltinDecls(module)) {
ret.push_back(d);
}
} else {
module.getTopLevelDecls(ret);
}
return ret;
}
static void extractDeclarations(const SwiftExtractorConfiguration& config,
swift::CompilerInstance& compiler,
swift::ModuleDecl& module,
swift::SourceFile* primaryFile = nullptr) {
static std::unordered_set<swift::ModuleDecl*> extractDeclarations(
const SwiftExtractorConfiguration& config,
swift::CompilerInstance& compiler,
swift::ModuleDecl& module,
swift::SourceFile* primaryFile = nullptr) {
auto filename = getFilename(module, primaryFile);
// The extractor can be called several times from different processes with
@@ -92,7 +123,7 @@ static void extractDeclarations(const SwiftExtractorConfiguration& config,
auto trapTarget = createTargetTrapFile(config, filename);
if (!trapTarget) {
// another process arrived first, nothing to do for us
return;
return {};
}
TrapDomain trap{*trapTarget};
@@ -116,6 +147,7 @@ static void extractDeclarations(const SwiftExtractorConfiguration& config,
for (auto& comment : comments) {
visitor.extract(comment);
}
return std::move(visitor).getEncounteredModules();
}
static std::unordered_set<std::string> collectInputFilenames(swift::CompilerInstance& compiler) {
@@ -132,40 +164,18 @@ static std::unordered_set<std::string> collectInputFilenames(swift::CompilerInst
return sourceFiles;
}
static std::unordered_set<swift::ModuleDecl*> collectModules(swift::CompilerInstance& compiler) {
// getASTContext().getLoadedModules() does not provide all the modules available within the
// program.
// We need to iterate over all the imported modules (recursively) to see the whole "universe."
std::unordered_set<swift::ModuleDecl*> allModules;
std::queue<swift::ModuleDecl*> worklist;
for (auto& [_, module] : compiler.getASTContext().getLoadedModules()) {
worklist.push(module);
allModules.insert(module);
}
while (!worklist.empty()) {
auto module = worklist.front();
worklist.pop();
llvm::SmallVector<swift::ImportedModule> importedModules;
// TODO: we may need more than just Exported ones
module->getImportedModules(importedModules, swift::ModuleDecl::ImportFilterKind::Exported);
for (auto& imported : importedModules) {
if (allModules.count(imported.importedModule) == 0) {
worklist.push(imported.importedModule);
allModules.insert(imported.importedModule);
}
}
}
return allModules;
}
void codeql::extractSwiftFiles(const SwiftExtractorConfiguration& config,
swift::CompilerInstance& compiler) {
auto inputFiles = collectInputFilenames(compiler);
auto modules = collectModules(compiler);
std::vector<swift::ModuleDecl*> todo = {compiler.getMainModule()};
std::unordered_set<swift::ModuleDecl*> processed = {};
for (auto& module : modules) {
while (!todo.empty()) {
auto module = todo.back();
todo.pop_back();
llvm::errs() << "processing module " << module->getName() << '\n';
bool isFromSourceFile = false;
std::unordered_set<swift::ModuleDecl*> encounteredModules;
for (auto file : module->getFiles()) {
auto sourceFile = llvm::dyn_cast<swift::SourceFile>(file);
if (!sourceFile) {
@@ -176,10 +186,16 @@ void codeql::extractSwiftFiles(const SwiftExtractorConfiguration& config,
continue;
}
archiveFile(config, *sourceFile);
extractDeclarations(config, compiler, *module, sourceFile);
encounteredModules = extractDeclarations(config, compiler, *module, sourceFile);
}
if (!isFromSourceFile) {
extractDeclarations(config, compiler, *module);
encounteredModules = extractDeclarations(config, compiler, *module);
}
processed.insert(module);
for (auto encountered : encounteredModules) {
if (processed.count(encountered) == 0) {
todo.push_back(encountered);
}
}
}
}

View File

@@ -55,6 +55,10 @@ class SwiftDispatcher {
}
}
const std::unordered_set<swift::ModuleDecl*> getEncounteredModules() && {
return std::move(encounteredModules);
}
template <typename Entry>
void emit(const Entry& entry) {
trap.emit(entry);
@@ -228,8 +232,16 @@ class SwiftDispatcher {
// - extracting a primary source file: in this mode, we extract several files belonging to the
// same module one by one. In this mode, we restrict emission only to the same file ignoring
// all the other files.
// This is also used to register the modules we encounter.
// TODO calls to this function should be taken away from `DeclVisitor` and moved around with a
// clearer separation between naming entities (some decls, all types), deciding whether to emit
// them and finally visiting emitting the contents of the entity (which should remain in the
// visitors). Then this double responsibility (carrying out the test and registering encountered
// modules) should also be cleared out
bool shouldEmitDeclBody(const swift::Decl& decl) {
if (decl.getModuleContext() != &currentModule) {
auto module = decl.getModuleContext();
if (module != &currentModule) {
encounteredModules.insert(module);
return false;
}
// ModuleDecl is a special case: if it passed the previous test, it is the current module
@@ -333,6 +345,7 @@ class SwiftDispatcher {
Store::Handle waitingForNewLabel{std::monostate{}};
swift::ModuleDecl& currentModule;
swift::SourceFile* currentPrimarySourceFile;
std::unordered_set<swift::ModuleDecl*> encounteredModules;
};
} // namespace codeql

View File

@@ -11,6 +11,7 @@ namespace codeql {
class SwiftVisitor : private SwiftDispatcher {
public:
using SwiftDispatcher::getEncounteredModules;
using SwiftDispatcher::SwiftDispatcher;
template <typename T>

View File

@@ -1,5 +1,6 @@
| file://:0:0:0:0 | A |
| file://:0:0:0:0 | B |
| file://:0:0:0:0 | PackageDescription |
| file://:0:0:0:0 | __ObjC |
| file://:0:0:0:0 | main |
| file://:0:0:0:0 | partial_modules |

View File

@@ -1,11 +1,6 @@
| Builtin.BridgeObject | getName: | Builtin.BridgeObject | getCanonicalType: | Builtin.BridgeObject |
| Builtin.DefaultActorStorage | getName: | Builtin.DefaultActorStorage | getCanonicalType: | Builtin.DefaultActorStorage |
| Builtin.Executor | getName: | Builtin.Executor | getCanonicalType: | Builtin.Executor |
| Builtin.FPIEEE32 | getName: | Builtin.FPIEEE32 | getCanonicalType: | Builtin.FPIEEE32 |
| Builtin.FPIEEE64 | getName: | Builtin.FPIEEE64 | getCanonicalType: | Builtin.FPIEEE64 |
| Builtin.IntLiteral | getName: | Builtin.IntLiteral | getCanonicalType: | Builtin.IntLiteral |
| Builtin.Job | getName: | Builtin.Job | getCanonicalType: | Builtin.Job |
| Builtin.NativeObject | getName: | Builtin.NativeObject | getCanonicalType: | Builtin.NativeObject |
| Builtin.RawPointer | getName: | Builtin.RawPointer | getCanonicalType: | Builtin.RawPointer |
| Builtin.RawUnsafeContinuation | getName: | Builtin.RawUnsafeContinuation | getCanonicalType: | Builtin.RawUnsafeContinuation |
| Builtin.UnsafeValueBuffer | getName: | Builtin.UnsafeValueBuffer | getCanonicalType: | Builtin.UnsafeValueBuffer |

View File

@@ -4,11 +4,6 @@ func foo(
_: Builtin.FPIEEE32,
_: Builtin.FPIEEE64,
_: Builtin.BridgeObject,
_: Builtin.DefaultActorStorage,
_: Builtin.Executor,
_: Builtin.Job,
_: Builtin.NativeObject,
_: Builtin.RawPointer,
_: Builtin.RawUnsafeContinuation,
_: Builtin.UnsafeValueBuffer
_: Builtin.RawPointer
) {}