#include "SwiftExtractor.h" #include #include #include #include #include #include #include #include "swift/extractor/translators/SwiftVisitor.h" #include "swift/extractor/infra/TargetDomains.h" #include "swift/extractor/infra/file/Path.h" #include "swift/extractor/infra/SwiftLocationExtractor.h" #include "swift/extractor/infra/SwiftBodyEmissionStrategy.h" #include "swift/logging/SwiftAssert.h" using namespace codeql; using namespace std::string_literals; namespace fs = std::filesystem; Logger& main_logger::logger() { static Logger ret{"main"}; return ret; } using namespace main_logger; static void ensureDirectory(const char* label, const fs::path& dir) { std::error_code ec; fs::create_directories(dir, ec); CODEQL_ASSERT(!ec, "Cannot create {} directory ({})", label, ec); } static void archiveFile(const SwiftExtractorConfiguration& config, swift::SourceFile& file) { auto source = codeql::resolvePath(file.getFilename()); auto destination = config.sourceArchiveDir / source.relative_path(); ensureDirectory("source archive destination", destination.parent_path()); std::error_code ec; fs::copy(source, destination, fs::copy_options::overwrite_existing, ec); if (ec) { LOG_INFO( "Cannot archive source file {} -> {}, probably a harmless race with another process ({})", source, destination, ec); } } static std::string mangledDeclName(const swift::Decl& decl) { // SwiftRecursiveMangler mangled name cannot be used directly as it can be too long for file names // so we build some minimal human readable info for debuggability and append a hash auto ret = decl.getModuleContext()->getRealName().str().str(); ret += '/'; ret += swift::Decl::getKindName(decl.getKind()); ret += '_'; if (auto valueDecl = llvm::dyn_cast(&decl); valueDecl && valueDecl->hasName()) { ret += valueDecl->getBaseName().userFacingName(); ret += '_'; } SwiftRecursiveMangler mangler; ret += mangler.mangleDecl(decl).hash(); return ret; } static fs::path getFilename(swift::ModuleDecl& module, swift::SourceFile* primaryFile, const swift::Decl* lazyDeclaration) { if (primaryFile) { return resolvePath(primaryFile->getFilename()); } if (lazyDeclaration) { return mangledDeclName(*lazyDeclaration); } // PCM clang module if (module.isNonSwiftModule()) { // Several modules with different names might come from .pcm (clang module) files // In this case we want to differentiate them // Moreover, pcm files may come from caches located in different directories, but are // unambiguously identified by the base file name, so we can discard the absolute directory fs::path filename = "/pcms"; filename /= fs::path{std::string_view{module.getModuleFilename()}}.filename(); filename += "-"; filename += module.getName().str(); return filename; } if (module.isBuiltinModule()) { // The Builtin module has an empty filename, let's fix that return "/__Builtin__"; } std::string_view filename = module.getModuleFilename(); // there is a special case of a module without an actual filename reporting ``: in this // case we want to avoid the `<>` characters, in case a dirty DB is imported on Windows if (filename == "") { return "/__imports__"; } return resolvePath(filename); } static llvm::SmallVector getTopLevelDecls(swift::ModuleDecl& module, swift::SourceFile* primaryFile, const swift::Decl* lazyDeclaration) { llvm::SmallVector ret; if (lazyDeclaration) { ret.push_back(lazyDeclaration); return ret; } ret.push_back(&module); llvm::SmallVector topLevelDecls; if (primaryFile) { primaryFile->getTopLevelDecls(topLevelDecls); } else { module.getTopLevelDecls(topLevelDecls); } ret.insert(ret.end(), topLevelDecls.data(), topLevelDecls.data() + topLevelDecls.size()); return ret; } static TrapType getTrapType(swift::SourceFile* primaryFile, const swift::Decl* lazyDeclaration) { if (primaryFile) { return TrapType::source; } if (lazyDeclaration) { return TrapType::lazy_declaration; } return TrapType::module; } static std::unordered_set extractDeclarations( SwiftExtractorState& state, swift::CompilerInstance& compiler, swift::ModuleDecl& module, swift::SourceFile* primaryFile, const swift::Decl* lazyDeclaration) { auto filename = getFilename(module, primaryFile, lazyDeclaration); if (primaryFile) { state.sourceFiles.push_back(filename); } // The extractor can be called several times from different processes with // the same input file(s). Using `TargetFile` the first process will win, and the following // will just skip the work const auto trapType = getTrapType(primaryFile, lazyDeclaration); auto trap = createTargetTrapDomain(state, filename, trapType); if (!trap) { // another process arrived first, nothing to do for us if (lazyDeclaration) { state.emittedDeclarations.insert(lazyDeclaration); } return {}; } std::vector comments; if (primaryFile && primaryFile->getBufferID()) { auto& sourceManager = compiler.getSourceMgr(); auto tokens = swift::tokenize(compiler.getInvocation().getLangOptions(), sourceManager, primaryFile->getBufferID()); for (auto& token : tokens) { if (token.getKind() == swift::tok::comment) { comments.push_back(token); } } } SwiftLocationExtractor locationExtractor(*trap); locationExtractor.emitFile(primaryFile); SwiftBodyEmissionStrategy bodyEmissionStrategy(module, primaryFile, lazyDeclaration); SwiftVisitor visitor(compiler.getSourceMgr(), state, *trap, locationExtractor, bodyEmissionStrategy); auto topLevelDecls = getTopLevelDecls(module, primaryFile, lazyDeclaration); for (auto decl : topLevelDecls) { // TODO: Swift 6.2 is unavailable visitor.extract(decl); } for (auto& comment : comments) { visitor.extract(comment); } return std::move(visitor).getEncounteredModules(); } static std::unordered_set collectInputFilenames(swift::CompilerInstance& compiler) { // The frontend can be called in many different ways. // At each invocation we only extract system and builtin modules and any input source files that // are primary inputs, or all of them if there are no primary inputs (whole module optimization) std::unordered_set sourceFiles; const auto& inOuts = compiler.getInvocation().getFrontendOptions().InputsAndOutputs; for (auto& input : inOuts.getAllInputs()) { LOG_INFO("> {}", input.getFileName()); if (input.getType() == swift::file_types::TY_Swift && (!inOuts.hasPrimaryInputs() || input.isPrimary())) { sourceFiles.insert(input.getFileName()); } } return sourceFiles; } static std::vector collectLoadedModules(swift::CompilerInstance& compiler) { std::vector ret; for (const auto& [id, module] : compiler.getASTContext().getLoadedModules()) { std::ignore = id; ret.push_back(module); } return ret; } void codeql::extractSwiftFiles(SwiftExtractorState& state, swift::CompilerInstance& compiler) { auto inputFiles = collectInputFilenames(compiler); std::vector todo = collectLoadedModules(compiler); state.encounteredModules.insert(todo.begin(), todo.end()); LOG_DEBUG("{} modules loaded", todo.size()); while (!todo.empty()) { auto module = todo.back(); todo.pop_back(); bool isFromSourceFile = false; std::unordered_set encounteredModules; for (auto file : module->getFiles()) { auto sourceFile = llvm::dyn_cast(file); if (!sourceFile) { continue; } isFromSourceFile = true; if (inputFiles.count(sourceFile->getFilename().str()) == 0) { LOG_DEBUG("skipping module {} from file {}, not in input files", module->getName().get(), sourceFile->getFilename()); continue; } LOG_DEBUG("extracting module {} from input file {}", module->getName().get(), sourceFile->getFilename()); archiveFile(state.configuration, *sourceFile); encounteredModules = extractDeclarations(state, compiler, *module, sourceFile, /*lazy declaration*/ nullptr); } if (!isFromSourceFile) { LOG_DEBUG("extracting module {} from non-source file", module->getName().get()); encounteredModules = extractDeclarations(state, compiler, *module, /*source file*/ nullptr, /*lazy declaration*/ nullptr); } for (auto encountered : encounteredModules) { if (state.encounteredModules.count(encountered) == 0) { todo.push_back(encountered); state.encounteredModules.insert(encountered); } } } } static void cleanupPendingDeclarations(SwiftExtractorState& state) { std::vector worklist(std::begin(state.pendingDeclarations), std::end(state.pendingDeclarations)); for (auto decl : worklist) { if (state.emittedDeclarations.count(decl)) { state.pendingDeclarations.erase(decl); } } } static void extractLazy(SwiftExtractorState& state, swift::CompilerInstance& compiler) { cleanupPendingDeclarations(state); std::vector worklist(std::begin(state.pendingDeclarations), std::end(state.pendingDeclarations)); for (auto pending : worklist) { extractDeclarations(state, compiler, *pending->getModuleContext(), /*source file*/ nullptr, pending); } } void codeql::extractExtractLazyDeclarations(SwiftExtractorState& state, swift::CompilerInstance& compiler) { // Just in case const int upperBound = 100; int iteration = 0; while (!state.pendingDeclarations.empty()) { CODEQL_ASSERT(iteration++ < upperBound, "Swift extractor reached upper bound while extracting lazy declarations"); extractLazy(state, compiler); } }