Swift: open(2) interception

This commit is contained in:
Alex Denisov
2022-09-12 10:17:45 +02:00
parent 726772220c
commit c638789f3e
12 changed files with 151 additions and 13 deletions

View File

@@ -1,5 +1,6 @@
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe")
load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
_swift_prebuilt_version = "swift-5.6-RELEASE.42271.54"
_swift_sha_map = {
@@ -55,3 +56,11 @@ def codeql_workspace(repository_name = "codeql"):
"https://github.com/bazelbuild/rules_python/archive/refs/tags/0.8.1.tar.gz",
],
)
new_git_repository(
name = "fishhook",
commit = "aadc161ac3b80db07a9908851839a17ba63a9eb1",
shallow_since = "1634071885 -0400",
build_file = "//swift/tools/fishhook:BUILD.fishhook.bazel",
remote = "https://github.com/facebook/fishhook",
)

View File

@@ -10,6 +10,7 @@ swift_cc_binary(
deps = [
"//swift/extractor/infra",
"//swift/extractor/visitors",
"//swift/extractor/remapping",
"//swift/tools/prebuilt:swift-llvm-support",
],
)

View File

@@ -184,7 +184,6 @@ void codeql::extractSwiftFiles(const SwiftExtractorConfiguration& config,
while (!todo.empty()) {
auto module = todo.back();
todo.pop_back();
llvm::errs() << "processing module " << module->getName() << '\n';
bool isFromSourceFile = false;
std::unordered_set<swift::ModuleDecl*> encounteredModules;
for (auto file : module->getFiles()) {

View File

@@ -1,4 +1,4 @@
#include "SwiftOutputRewrite.h"
#include "swift/extractor/SwiftOutputRewrite.h"
#include "swift/extractor/SwiftExtractorConfiguration.h"
#include "swift/extractor/TargetTrapFile.h"

View File

@@ -32,4 +32,5 @@ std::vector<std::string> collectVFSFiles(const SwiftExtractorConfiguration& conf
// Creates empty trap files for output swiftmodule files
void lockOutputSwiftModuleTraps(const SwiftExtractorConfiguration& config,
const std::unordered_map<std::string, std::string>& remapping);
} // namespace codeql

View File

@@ -9,8 +9,9 @@
#include <swift/Basic/LLVMInitialize.h>
#include <swift/FrontendTool/FrontendTool.h>
#include "SwiftExtractor.h"
#include "SwiftOutputRewrite.h"
#include "swift/extractor/SwiftExtractor.h"
#include "swift/extractor/SwiftOutputRewrite.h"
#include "swift/extractor/remapping/SwiftOpenInterception.h"
using namespace std::string_literals;
@@ -21,14 +22,6 @@ class Observer : public swift::FrontendObserver {
public:
explicit Observer(const codeql::SwiftExtractorConfiguration& config) : config{config} {}
void parsedArgs(swift::CompilerInvocation& invocation) override {
auto& overlays = invocation.getSearchPathOptions().VFSOverlayFiles;
auto vfsFiles = codeql::collectVFSFiles(config);
for (auto& vfsFile : vfsFiles) {
overlays.push_back(vfsFile);
}
}
void performedSemanticAnalysis(swift::CompilerInstance& compiler) override {
codeql::extractSwiftFiles(config, compiler);
}
@@ -49,6 +42,7 @@ int main(int argc, char** argv) {
// TODO: print usage
return 1;
}
// Required by Swift/LLVM
PROGRAM_START(argc, argv);
INITIALIZE_LLVM();
@@ -58,6 +52,8 @@ int main(int argc, char** argv) {
configuration.sourceArchiveDir = getenv_or("CODEQL_EXTRACTOR_SWIFT_SOURCE_ARCHIVE_DIR", ".");
configuration.scratchDir = getenv_or("CODEQL_EXTRACTOR_SWIFT_SCRATCH_DIR", ".");
codeql::initInterception(configuration.getTempArtifactDir());
configuration.frontendOptions.reserve(argc - 1);
for (int i = 1; i < argc; i++) {
configuration.frontendOptions.push_back(argv[i]);
@@ -67,7 +63,6 @@ int main(int argc, char** argv) {
auto remapping =
codeql::rewriteOutputsInPlace(configuration, configuration.patchedFrontendOptions);
codeql::ensureDirectoriesForNewPathsExist(remapping);
codeql::storeRemappingForVFS(configuration, remapping);
codeql::lockOutputSwiftModuleTraps(configuration, remapping);
std::vector<const char*> args;
@@ -77,5 +72,8 @@ int main(int argc, char** argv) {
Observer observer(configuration);
int frontend_rc = swift::performFrontend(args, "swift-extractor", (void*)main, &observer);
codeql::remapArtifacts(remapping);
return frontend_rc;
}

View File

@@ -0,0 +1,23 @@
load("//swift:rules.bzl", "swift_cc_library")
swift_cc_library(
name = "remapping",
srcs = select({
"@platforms//os:linux": [
"SwiftOpenInterception.Linux.cpp",
],
"@platforms//os:macos": [
"SwiftOpenInterception.macOS.cpp",
],
}),
hdrs = glob(["*.h"]),
visibility = ["//swift:__subpackages__"],
deps = [
"//swift/tools/prebuilt:swift-llvm-support",
] + select({
"@platforms//os:linux": [],
"@platforms//os:macos": [
"@fishhook//:fishhook",
],
}),
)

View File

@@ -0,0 +1,8 @@
#include "swift/extractor/remapping/SwiftOpenInterception.h"
namespace codeql {
// TBD
void remapArtifacts(const std::unordered_map<std::string, std::string>& mapping) {}
void initInterception(const std::string& dir) {}
} // namespace codeql

View File

@@ -0,0 +1,11 @@
#pragma once
#include <string>
#include <unordered_map>
namespace codeql {
void initInterception(const std::string& dir);
void remapArtifacts(const std::unordered_map<std::string, std::string>& mapping);
} // namespace codeql

View File

@@ -0,0 +1,79 @@
#include "swift/extractor/remapping/SwiftOpenInterception.h"
#include <fishhook.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Support/FileSystem.h>
#include <llvm/Support/Path.h>
#include <fcntl.h>
#include <unistd.h>
namespace codeql {
static std::string scratchDir;
static int (*original_open)(const char*, int, ...) = nullptr;
static std::string fileHash(const std::string& filename) {
int fd = original_open(filename.c_str(), O_RDONLY);
if (fd == -1) {
return {};
}
auto maybeMD5 = llvm::sys::fs::md5_contents(fd);
close(fd);
if (!maybeMD5) {
return {};
}
return maybeMD5->digest().str().str();
}
static int codeql_open(const char* path, int oflag, ...) {
va_list ap = {0};
mode_t mode = 0;
if ((oflag & O_CREAT) != 0) {
// mode only applies to O_CREAT
va_start(ap, oflag);
mode = va_arg(ap, int);
va_end(ap);
}
std::string newPath(path);
if (llvm::sys::fs::exists(newPath)) {
// TODO: check file magic instead
if (llvm::StringRef(newPath).endswith(".swiftmodule")) {
auto hash = fileHash(newPath);
auto hashed = scratchDir + "/" + hash;
if (!hash.empty() && llvm::sys::fs::exists(hashed)) {
newPath = hashed;
}
}
}
return original_open(newPath.c_str(), oflag, mode);
}
void remapArtifacts(const std::unordered_map<std::string, std::string>& mapping) {
for (auto& [original, patched] : mapping) {
// TODO: Check file magic instead
if (!llvm::StringRef(original).endswith(".swiftmodule")) {
continue;
}
auto hash = fileHash(original);
auto hashed = scratchDir + "/" + hash;
if (!hash.empty() && llvm::sys::fs::exists(patched)) {
if (std::error_code ec = llvm::sys::fs::create_link(/* from */ patched, /* to */ hashed)) {
llvm::errs() << "Cannot remap file '" << patched << "' -> '" << hashed
<< "': " << ec.message() << "\n";
}
}
}
}
void initInterception(const std::string& dir) {
scratchDir = dir;
struct rebinding binding[] = {
{"open", reinterpret_cast<void*>(codeql_open), reinterpret_cast<void**>(&original_open)}};
rebind_symbols(binding, 1);
}
} // namespace codeql

View File

View File

@@ -0,0 +1,9 @@
load("@//swift:rules.bzl", "swift_cc_library")
swift_cc_library(
name = "fishhook",
srcs = glob(["*.c"]),
hdrs = glob(["*.h"]),
strip_include_prefix = ".",
visibility = ["@//swift:__subpackages__"],
)