Swift: extract precompiled swiftmodule files

Previously we were not extracting any `swiftmodule` file that was not
a system or a built-in one. This was done to avoid re-extracting
`swiftmodule` files that were built previously in the same build, but it
turned out to be too eager, as there are legitimate cases where a
non-system, non-built-in precompiled swift module can be used. An
example of that is the `PackageDescription` module used in Swift
Package Manager manifest files (`Package.swift`).

We now relax the test and trigger module extraction on all loaded
modules that do not have source files (we trigger source file extraction
for those). The catch, is that we also create empty trap files for
current output `swiftmodule` files (including possible alias locations
set up by XCode).

This means that if a following extractor run loads a previously built
`swiftmodule` file, although it will trigger module extraction, this
will however be skipped as it will find its target file already present
(this is done via the `TargetFile` semantics).
This commit is contained in:
Paolo Tranquilli
2022-07-29 10:55:26 +02:00
parent 9876c391fa
commit 065fecc57e
7 changed files with 46 additions and 16 deletions

View File

@@ -165,22 +165,31 @@ void codeql::extractSwiftFiles(const SwiftExtractorConfiguration& config,
auto inputFiles = collectInputFilenames(compiler);
auto modules = collectModules(compiler);
// we want to make sure any following extractor run will not try to extract things from
// the swiftmodule files we are creating in this run, as those things will already have been
// extracted from source with more information. We do this by creating empty trap files.
// TargetFile semantics will ensure any following run trying to extract that swiftmodule will just
// skip doing it
auto outputModuleTrapSuffix = "-" + compiler.getMainModule()->getName().str().str() + ".trap";
for (const auto& output : config.outputSwiftModules) {
TargetFile::create(output + outputModuleTrapSuffix, config.trapDir, config.getTempTrapDir());
}
for (auto& module : modules) {
// We only extract system and builtin modules here as the other "user" modules can be built
// during the build process and then re-used at a later stage. In this case, we extract the
// user code twice: once during the module build in a form of a source file, and then as
// a pre-built module during building of the dependent source files.
if (module->isSystemModule() || module->isBuiltinModule()) {
extractDeclarations(config, compiler, *module);
} else {
for (auto file : module->getFiles()) {
auto sourceFile = llvm::dyn_cast<swift::SourceFile>(file);
if (!sourceFile || inputFiles.count(sourceFile->getFilename().str()) == 0) {
continue;
}
archiveFile(config, *sourceFile);
extractDeclarations(config, compiler, *module, sourceFile);
bool isFromSourceFile = false;
for (auto file : module->getFiles()) {
auto sourceFile = llvm::dyn_cast<swift::SourceFile>(file);
if (!sourceFile) {
continue;
}
isFromSourceFile = true;
if (inputFiles.count(sourceFile->getFilename().str()) == 0) {
continue;
}
archiveFile(config, *sourceFile);
extractDeclarations(config, compiler, *module, sourceFile);
}
if (!isFromSourceFile) {
extractDeclarations(config, compiler, *module);
}
}
}

View File

@@ -32,5 +32,9 @@ struct SwiftExtractorConfiguration {
// A temporary directory that contains build artifacts generated by the extractor during the
// overall extraction process.
std::string getTempArtifactDir() const { return scratchDir + "/swift-extraction-artifacts"; }
// Output swiftmodule files. This also includes possible locations where XCode internally moves
// modules
std::vector<std::string> outputSwiftModules;
};
} // namespace codeql

View File

@@ -163,7 +163,7 @@ static std::vector<std::string> computeModuleAliases(llvm::StringRef modulePath,
namespace codeql {
std::unordered_map<std::string, std::string> rewriteOutputsInPlace(
SwiftExtractorConfiguration& config,
const SwiftExtractorConfiguration& config,
std::vector<std::string>& CLIArgs) {
std::unordered_map<std::string, std::string> remapping;
@@ -323,5 +323,15 @@ std::vector<std::string> collectVFSFiles(const SwiftExtractorConfiguration& conf
return overlays;
}
std::vector<std::string> getOutputSwiftModules(
const std::unordered_map<std::string, std::string>& remapping) {
std::vector<std::string> ret;
for (const auto& [oldPath, newPath] : remapping) {
if (llvm::StringRef(oldPath).endswith(".swiftmodule")) {
ret.push_back(oldPath);
}
}
return ret;
}
} // namespace codeql

View File

@@ -13,7 +13,7 @@ struct SwiftExtractorConfiguration;
// artifacts produced by the actual Swift compiler.
// Returns the map containing remapping oldpath -> newPath.
std::unordered_map<std::string, std::string> rewriteOutputsInPlace(
SwiftExtractorConfiguration& config,
const SwiftExtractorConfiguration& config,
std::vector<std::string>& CLIArgs);
// Create directories for all the redirected new paths as the Swift compiler expects them to exist.
@@ -29,4 +29,7 @@ void storeRemappingForVFS(const SwiftExtractorConfiguration& config,
// This is separate from storeRemappingForVFS as we also collect files produced by other processes.
std::vector<std::string> collectVFSFiles(const SwiftExtractorConfiguration& config);
// Returns a list of output remapped swift module files
std::vector<std::string> getOutputSwiftModules(
const std::unordered_map<std::string, std::string>& remapping);
} // namespace codeql

View File

@@ -68,6 +68,7 @@ int main(int argc, char** argv) {
codeql::rewriteOutputsInPlace(configuration, configuration.patchedFrontendOptions);
codeql::ensureDirectoriesForNewPathsExist(remapping);
codeql::storeRemappingForVFS(configuration, remapping);
configuration.outputSwiftModules = codeql::getOutputSwiftModules(remapping);
std::vector<const char*> args;
for (auto& arg : configuration.patchedFrontendOptions) {

View File

@@ -1,4 +1,5 @@
| file://:0:0:0:0 | A |
| file://:0:0:0:0 | B |
| file://:0:0:0:0 | PackageDescription |
| file://:0:0:0:0 | main |
| file://:0:0:0:0 | partial_modules |

View File

@@ -62,6 +62,8 @@ def main(opts):
]
if opts.check_databases:
cmd.append("--check-databases")
else:
cmd.append("--no-check-databases")
if opts.learn:
cmd.append("--learn")
cmd.extend(str(t.parent) for t in succesful_db_creation)