Bazel/Swift: add zip imports to packs

2025-12-16 08:43:11 +01:00 · 2024-05-24 11:07:29 +02:00
parent 4d93e8a732
commit e8b857b79e
18 changed files with 805 additions and 36 deletions
--- a/.bazelrc
+++ b/.bazelrc
@@ -10,10 +10,10 @@ common --override_module=semmle_code=%workspace%/misc/bazel/semmle_code_stub

 build --repo_env=CC=clang --repo_env=CXX=clang++

-build:linux --cxxopt=-std=c++20
+build:linux --cxxopt=-std=c++20 --host_cxxopt=-std=c++20
 # we currently cannot built the swift extractor for ARM
-build:macos --cxxopt=-std=c++20 --copt=-arch --copt=x86_64 --linkopt=-arch --linkopt=x86_64
-build:windows --cxxopt=/std:c++20 --cxxopt=/Zc:preprocessor
+build:macos --cxxopt=-std=c++20 --host_cxxopt=-std=c++20 --copt=-arch --copt=x86_64 --linkopt=-arch --linkopt=x86_64
+build:windows --cxxopt=/std:c++20 --cxxopt=/Zc:preprocessor --host_cxxopt=/std:c++20 --host_cxxopt=/Zc:preprocessor

 # this requires developer mode, but is required to have pack installer functioning
 startup --windows_enable_symlinks
--- a/.gitattributes
+++ b/.gitattributes
@@ -77,4 +77,10 @@ ruby/extractor/cargo-bazel-lock.json -merge
 # auto-generated files for the C# build
 csharp/paket.lock linguist-generated=true
 # needs eol=crlf, as `paket` touches this file and saves it als crlf
-csharp/.paket/Paket.Restore.targets linguist-generated=true eol=crlf
+csharp/.paket/Paket.Restore.targets linguist-generated=true eol=cr
+
+# ripunzip tool
+/misc/bazel/internal/bin/*/ripunzip* filter=lfs diff=lfs merge=lfs -text
+
+# swift prebuilt resources
+/swift/third_party/resource-dir/*.zip filter=lfs diff=lfs merge=lfs -text
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -57,6 +57,33 @@ use_repo(node, "nodejs", "nodejs_toolchains")
 go_sdk = use_extension("@rules_go//go:extensions.bzl", "go_sdk")
 go_sdk.download(version = "1.22.2")

+lfs_files = use_repo_rule("//misc/bazel:lfs.bzl", "lfs_files")
+
+lfs_files(
+    name = "ripunzip-linux",
+    srcs = ["//misc/bazel/internal/bin:linux/ripunzip"],
+)
+
+lfs_files(
+    name = "ripunzip-windows",
+    srcs = ["//misc/bazel/internal/bin:windows/ripunzip.exe"],
+)
+
+lfs_files(
+    name = "ripunzip-macos",
+    srcs = ["//misc/bazel/internal/bin:macos/ripunzip"],
+)
+
+lfs_files(
+    name = "swift-resource-dir-linux",
+    srcs = ["//swift/third_party/resource-dir:resource-dir-linux.zip"],
+)
+
+#lfs_files(
+#    name = "swift-resource-dir-macos",
+#    srcs = ["//swift/third_party/resource-dir:resource-dir-macos.zip"],
+#)
+
 register_toolchains(
    "@nodejs_toolchains//:all",
 )
--- a/misc/bazel/internal/bin/BUILD.bazel
+++ b/misc/bazel/internal/bin/BUILD.bazel
@@ -0,0 +1,8 @@
+load("@bazel_skylib//rules:native_binary.bzl", "native_binary")
+
+native_binary(
+    name = "ripunzip",
+    src = select({"@platforms//os:" + os: "@ripunzip-" + os for os in ("linux", "windows", "macos")}),
+    out = "ripunzip.exe",
+    visibility = ["//visibility:public"],
+)
--- a/misc/bazel/internal/bin/linux/ripunzip
+++ b/misc/bazel/internal/bin/linux/ripunzip
--- a/misc/bazel/internal/bin/macos/ripunzip
+++ b/misc/bazel/internal/bin/macos/ripunzip
--- a/misc/bazel/internal/bin/windows/ripunzip.exe
+++ b/misc/bazel/internal/bin/windows/ripunzip.exe
--- a/misc/bazel/internal/bin/zipmerge/BUILD.bazel
+++ b/misc/bazel/internal/bin/zipmerge/BUILD.bazel
@@ -0,0 +1,20 @@
+cc_library(
+    name = "lib",
+    srcs = [
+        "zipmerge.cpp",
+    ],
+    hdrs = ["zipmerge.h"],
+)
+
+cc_binary(
+    name = "zipmerge",
+    srcs = [
+        "zipmerge_main.cpp",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":lib",
+    ],
+)
+
+#TODO port tests from internal repo
--- a/misc/bazel/internal/bin/zipmerge/zipmerge.cpp
+++ b/misc/bazel/internal/bin/zipmerge/zipmerge.cpp
@@ -0,0 +1,529 @@
+/*
+  Utility for munging zip files.
+
+  The high-level pseudo-code is:
+    for each input zip Z:
+      for each file F in Z:
+        F.name = adjust(F.name)
+        if F.name should be included:
+          write F to the output zip
+
+  File inclusion testing consists of two parts:
+    1. Don't include anything matching an explicit removal list.
+    2. If the same filename occurs in multiple input zips, only include the file from the last input
+       zip.
+
+  Filename adjustment consists of optionally prepending a prefix to the filename.
+*/
+
+#include "misc/bazel/internal/bin/zipmerge/zipmerge.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef _WIN32
+#include <Windows.h>
+#define unlink(s) DeleteFileA(s)
+#else
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#endif
+
+#include <string_view>
+
+namespace {
+struct {
+  FILE* file;
+  uint32_t num_bytes_written;
+  uint16_t num_files_written;
+} output_zip{};  // The zip file being written.
+
+struct {
+  uint8_t* bytes;
+  uint16_t length;
+} filename_prefix{};  // A string to prepend to all filenames added to the output file.
+
+constexpr size_t maximum_input_files = 1000;
+struct {
+  int count;
+  struct {
+    const char* prefix;
+    const char* name;
+  } entries[maximum_input_files];
+} input_files;  // A list of input zip files.
+
+static bool verbose;                  // If true, more things are written to stdout.
+static const char* output_file_name;  // The name of the output zip file.
+static const char*
+    current_input_file_name;  // The name of the current input zip file (used for diagnostics).
+
+constexpr size_t filename_hash_table_size = 0x20000;
+typedef struct {
+  uint32_t hash;
+  uint32_t len;
+  const uint8_t* data;
+} hash_entry_t;
+
+// A hash set containing the name of everything so far written to the output file.
+static hash_entry_t filename_hash_table[filename_hash_table_size];
+
+constexpr size_t maximum_removals = 1000;
+struct removal_entry {
+  // A removal entry can either be a literal string, or a wildcard containing a single "*".
+  // In the former case, the literal string is called the head. In the latter case, the
+  // segment before the "*" is called the head, and the segment after the "*" is called the tail.
+  uint32_t head_len;
+  uint32_t tail_len;  // zero for literal removals, possibly zero for wildcard removals
+  const uint8_t* head;
+  const uint8_t* tail;  // NULL for literal removals, non-NULL for wildcard removals
+};
+
+struct {
+  int count;
+  removal_entry entries[maximum_removals];
+} removals;  // A list of files and directories to ignore in input files.
+
+// Sizes and signatures of zip file structures (central-directory, local-file-header,
+// end-of-central-directory).
+constexpr size_t cd_size = 46;
+constexpr std::string_view cd_signature = "\x50\x4b\x01\x02";
+constexpr size_t lfh_size = 30;
+constexpr std::string_view lfh_signature = "\x50\x4b\x03\x04";
+constexpr size_t eocd_size = 22;
+
+// Write the bytes [src, src + len) to the output file.
+void append_data(const uint8_t* src, uint32_t len) {
+  if (fwrite(src, 1, len, output_zip.file) != len) {
+    printf("Error: Could not write %lu bytes to output file.\n", (unsigned long)len);
+    exit(1);
+  }
+  uint32_t new_output_size = output_zip.num_bytes_written + len;
+  if (new_output_size < output_zip.num_bytes_written) {
+    printf("Error: Output zip file exceeds 4 gigabytes.\n");
+    exit(1);
+  }
+  output_zip.num_bytes_written = new_output_size;
+}
+}  // namespace
+
+void append_cd(const uint8_t* src, uint32_t len) {
+  if ((output_cd.capacity - output_cd.length) < len) {
+    uint32_t new_capacity;
+    uint8_t* new_data;
+
+    new_capacity = output_cd.capacity + (output_cd.capacity >> 1);
+    if (new_capacity < output_cd.length + len) new_capacity = output_cd.length + len;
+    new_data = (uint8_t*)realloc(output_cd.bytes, new_capacity);
+    if (!new_data) {
+      printf("Error: Could not grow central-directory buffer from %lu bytes to %lu bytes.\n",
+             (unsigned long)output_cd.capacity, (unsigned long)new_capacity);
+      exit(1);
+    }
+    output_cd.bytes = new_data;
+    output_cd.capacity = new_capacity;
+  }
+  memcpy(output_cd.bytes + output_cd.length, src, len);
+  output_cd.length += len;
+}
+
+namespace {
+// Copy a local-file-header and accompanying file data from an input file to the output file.
+// The input file is [input_file, input_file + input_file_len).
+// The offset within the input file of the local-file-header is given by lfh_offset.
+// The central-directory entry corresponding to the file is given by cd.
+void copy_file_data(const uint8_t* input_file,
+                    size_t lfh_offset,
+                    const uint8_t* cd,
+                    size_t input_file_len) {
+  if (lfh_offset >= input_file_len || (size_t)(input_file_len - lfh_offset) < lfh_size) {
+    printf("Error: %s is invalid; central-directory references local-file-header at offset %llu, "
+           "but file is only %llu bytes.\n",
+           current_input_file_name, (unsigned long long)lfh_offset,
+           (unsigned long long)input_file_len);
+    exit(1);
+  }
+
+  const uint8_t* lfh = input_file + lfh_offset;
+  if (memcmp(lfh, lfh_signature.data(), lfh_signature.size()) != 0) {
+    printf("Error: Expected local-file-header signature at offset %llu of %s, but instead got %02x "
+           "%02x %02x %02x.\n",
+           (unsigned long long)lfh_offset, current_input_file_name, lfh[0], lfh[1], lfh[2], lfh[3]);
+    exit(1);
+  }
+
+  size_t data_offset = lfh_offset + lfh_size;
+  uint16_t name_len = read2(lfh + 26);
+  uint16_t extra_len = read2(lfh + 28);
+  uint32_t data_len = read4(cd + 20);
+  append_data(lfh, 6);  // signature, version
+  // flags, compression, mod-time, mod-date, crc-32, compressed-size, uncompressed-size, name-len
+  append_data(cd + 8, 22);
+  append_data(lfh + 28, 2);  // extra-len
+
+  size_t total_variable_len = (size_t)name_len + (size_t)extra_len + (size_t)data_len;
+  if ((size_t)(input_file_len - data_offset) < total_variable_len) {
+    printf(
+        "Error: %s is invalid; starting at offset %llu, reading a filename of %u bytes, extra data "
+        "of %u bytes, and %lu bytes of compressed data would exceed file size of %llu bytes.\n",
+        current_input_file_name, (unsigned long long)data_offset, (unsigned)name_len,
+        (unsigned)extra_len, (unsigned long)data_len, (unsigned long long)input_file_len);
+    exit(1);
+  }
+  append_data(filename_prefix.bytes, filename_prefix.length);
+  append_data(input_file + data_offset, (uint32_t)total_variable_len);
+}
+
+bool removal_entry_matches(const struct removal_entry* re, const uint8_t* full_name, uint32_t len) {
+  if (len < re->head_len + re->tail_len) {
+    return false;
+  }
+  if (memcmp(full_name, re->head, re->head_len) != 0) {
+    return false;
+  }
+  if (re->tail) {
+    for (uint32_t i = re->head_len + re->tail_len;; ++i) {
+      if (len == i || full_name[i] == '/') {
+        if (memcmp(full_name + i - re->tail_len, re->tail, re->tail_len) == 0) {
+          return true;
+        }
+      }
+      if (len == i || full_name[i - re->tail_len] == '/') {
+        return false;
+      }
+    }
+  } else {
+    return len == re->head_len || full_name[re->head_len] == '/';
+  }
+}
+}  // namespace
+
+bool should_include_filename_now(const uint8_t* name, uint32_t len) {
+  uint8_t* full_name = (uint8_t*)malloc(filename_prefix.length + len + 1);
+  memcpy(full_name, filename_prefix.bytes, filename_prefix.length);
+  memcpy(full_name + filename_prefix.length, name, len);
+  len += filename_prefix.length;
+
+  for (int i = 0; i < removals.count; ++i) {
+    if (removal_entry_matches(&removals.entries[i], full_name, len)) {
+      free(full_name);
+      return false;
+    }
+  }
+
+  uint32_t hash = 5381;
+  for (uint32_t i = 0; i < len; ++i)
+    hash = hash * 33 ^ full_name[i];
+
+  for (uint32_t idx = hash;; ++idx) {
+    hash_entry_t* e = filename_hash_table + (idx & (filename_hash_table_size - 1));
+    if (e->hash == hash && e->len == len && memcmp(e->data, full_name, len) == 0) {
+      free(full_name);
+      return false;
+    } else if (e->data == NULL) {
+      e->hash = hash;
+      e->len = len;
+      e->data = full_name;
+      return true;
+    }
+  }
+}
+
+// Try to find the end-of-central-directory record in a zip file.
+const uint8_t* find_eocd(const uint8_t* input_file, size_t input_file_len) {
+  for (size_t i = eocd_size; i < 1024 + eocd_size && i <= input_file_len; ++i) {
+    const uint8_t* candidate = input_file + input_file_len - i;
+    if (memcmp(candidate, eocd_signature.data(), eocd_signature.size()) == 0) {
+      return candidate;
+    }
+  }
+  return NULL;
+}
+
+namespace {
+// Copy all appropriate files from an input zip to the output zip.
+void process_input_file(const uint8_t* input_file, size_t input_file_len) {
+  const uint8_t* eocd = find_eocd(input_file, input_file_len);
+  if (!eocd) {
+    printf("Error: Could not find end-of-central-directory in %s.\n", current_input_file_name);
+    exit(1);
+  }
+  if (read2(eocd + 4) != 0 || read2(eocd + 6) != 0) {
+    printf("Error: %s is split over multiple disks, which is not supported.\n",
+           current_input_file_name);
+    exit(1);
+  }
+  if (!(uint16_t)~read2(eocd + 8) || !(uint16_t)~read2(eocd + 10) || !~read4(eocd + 12) ||
+      !~read4(eocd + 16)) {
+    printf("Error: %s is zip64, which is not supported.\n", current_input_file_name);
+    exit(1);
+  }
+  uint16_t num_entries = read2(eocd + 10);
+  size_t cd_offset = read4(eocd + 16);
+
+  for (uint16_t i = 0; i < num_entries; ++i) {
+    uint8_t cd[cd_size];
+    if (cd_offset >= input_file_len || (size_t)(input_file_len - cd_offset) < sizeof(cd)) {
+      printf("Error: %s is invalid; central-directory %u/%u would start at offset %llu, but file "
+             "is only %llu bytes.\n",
+             current_input_file_name, (unsigned)i, (unsigned)num_entries,
+             (unsigned long long)cd_offset, (unsigned long long)input_file_len);
+      exit(1);
+    }
+
+    memcpy(cd, input_file + cd_offset, sizeof(cd));
+    if (memcmp(cd, cd_signature.data(), cd_signature.size()) != 0) {
+      printf("Error: Expected central-directory signature at offset %llu of %s, but instead got "
+             "%02x %02x %02x %02x.\n",
+             (unsigned long long)cd_offset, current_input_file_name, cd[0], cd[1], cd[2], cd[3]);
+      exit(1);
+    }
+    cd[8] &= 0xF7;  // Clear the bit indicating that a local-file-footer follows the file data
+    cd_offset += sizeof(cd);
+
+    uint16_t name_len = read2(cd + 28);
+    if (((uint32_t)name_len + (uint32_t)filename_prefix.length) > 0xFFFFU) {
+      printf("Error: Combining prefix of %.*s with filename of %.*s results in a filename which is "
+             "too long.\n",
+             (int)filename_prefix.length, (const char*)filename_prefix.bytes, (int)name_len,
+             (const char*)(input_file + cd_offset));
+      exit(1);
+    }
+    write2(cd + 28, name_len + filename_prefix.length);
+    uint16_t extra_len = read2(cd + 30);
+    uint16_t comment_len = read2(cd + 32);
+    uint32_t offset = read4(cd + 42);
+    write4(cd + 42, output_zip.num_bytes_written);
+    if (!~offset || !~read4(cd + 20)) {
+      printf("Error: %s is zip64 (because of %.*s), which is not supported.\n",
+             current_input_file_name, (int)name_len, (const char*)(input_file + cd_offset));
+      exit(1);
+    }
+
+    size_t total_variable_len = (size_t)name_len + (size_t)extra_len + (size_t)comment_len;
+    if ((size_t)(input_file_len - cd_offset) < total_variable_len) {
+      printf("Error: %s is invalid; starting at offset %llu, reading a filename of %u bytes, extra "
+             "data of %u bytes, and comment of %u bytes exceed file size of %llu bytes.\n",
+             current_input_file_name, (unsigned long long)offset, (unsigned)name_len,
+             (unsigned)extra_len, (unsigned)comment_len, (unsigned long long)input_file_len);
+      exit(1);
+    }
+
+    bool should_include = should_include_filename_now(input_file + cd_offset, name_len);
+    if (verbose) {
+      printf("%s %.*s from %s\n", should_include ? "Using" : "Skipping", (int)name_len,
+             (const char*)(input_file + cd_offset), current_input_file_name);
+    }
+    if (should_include) {
+      append_cd(cd, sizeof(cd));
+      append_cd(filename_prefix.bytes, filename_prefix.length);
+      append_cd(input_file + cd_offset, (uint32_t)total_variable_len);
+      copy_file_data(input_file, offset, cd, input_file_len);
+      if (output_zip.num_files_written == 0xFFFFU) {
+        printf("Error: Too many files in output zip.\n");
+        exit(1);
+      }
+      ++output_zip.num_files_written;
+    }
+    cd_offset += total_variable_len;
+  }
+}
+
+// Read a file into memory and pass it to process_input_file.
+void read_and_process_input_file(const char* filename) {
+#ifdef _WIN32
+  HANDLE file = CreateFileA(filename, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING,
+                            FILE_ATTRIBUTE_NORMAL, NULL);
+  if (file == INVALID_HANDLE_VALUE) {
+    printf("Error: Cannot open %s for reading.\n", filename);
+    exit(1);
+  }
+  LARGE_INTEGER size;
+  if (!GetFileSizeEx(file, &size)) {
+    printf("Error: Cannot determine size of %s.\n", filename);
+    exit(1);
+  }
+  if (size.HighPart != 0) {
+    printf("Error: Input file %s exceeds 4 gigabytes.\n", filename);
+    exit(1);
+  }
+  if (size.LowPart == 0) {
+    printf("Error: Input file %s is empty.\n", filename);
+    exit(1);
+  }
+  HANDLE mapping = CreateFileMappingA(file, NULL, PAGE_READONLY, 0, size.LowPart, NULL);
+  if (mapping == NULL) {
+    printf("Error: Cannot mmap %s (CreateFileMapping).\n", filename);
+    exit(1);
+  }
+  void* data = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, size.LowPart);
+  if (data == NULL) {
+    printf("Error: Cannot mmap %s (MapViewOfFile).\n", filename);
+    exit(1);
+  }
+  process_input_file((uint8_t*)data, size.LowPart);
+  UnmapViewOfFile(data);
+  CloseHandle(mapping);
+  CloseHandle(file);
+#else
+  int file = open(filename, O_RDONLY);
+  if (file == -1) {
+    printf("Error: Cannot open %s for reading.\n", filename);
+    exit(1);
+  }
+  struct stat st;
+  if (fstat(file, &st) == -1) {
+    printf("Error: Cannot stat %s.\n", filename);
+    exit(1);
+  }
+  void* data = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, file, 0);
+  if (data == MAP_FAILED) {
+    printf("Error: Cannot mmap %s.\n", filename);
+    exit(1);
+  }
+  process_input_file((uint8_t*)data, st.st_size);
+  munmap(data, st.st_size);
+  close(file);
+#endif
+}
+
+// Print usage information and exit.
+void usage_and_exit(const char** argv) {
+  printf("Usage: %s [-v|--verbose] [--remove=FILE] outfile.zip [--prefix=PREFIX] infile1.zip "
+         "[--prefix=PREFIX] infile2.zip ...\n",
+         argv[0]);
+  exit(1);
+}
+
+// Set filename_prefix based on a string from the command line.
+void set_filename_prefix(const char* prefix) {
+  free(filename_prefix.bytes);
+  filename_prefix.bytes = NULL;
+  filename_prefix.length = 0;
+
+  if (prefix == NULL) {
+    return;
+  }
+  if (*prefix == '/' || *prefix == '\\') {
+    ++prefix;
+  }
+  size_t len = strlen(prefix);
+  if (len == 0) {
+    return;
+  }
+
+  filename_prefix.bytes = (uint8_t*)malloc(len + 1);
+  memcpy(filename_prefix.bytes, prefix, len);
+  for (size_t i = 0; i < len; ++i) {
+    if (filename_prefix.bytes[i] == '\\') filename_prefix.bytes[i] = '/';
+  }
+  filename_prefix.bytes[len] = '/';
+  filename_prefix.length = (uint16_t)(len + 1);
+}
+
+// Set various global variables based on the command line.
+void parse_command_line(int argc, const char** argv) {
+  int i = 1;
+  for (; i < argc; ++i) {
+    const char* arg = argv[i];
+    if (strcmp(arg, "-v") == 0 || strcmp(arg, "--verbose") == 0) {
+      verbose = true;
+    } else if (strncmp(arg, "--remove=", 9) == 0) {
+      arg += 9;
+      if (*arg == '/' || *arg == '\\') ++arg;
+      if (removals.count == maximum_removals) {
+        printf("Error: Too many --remove flags.\n");
+        exit(1);
+      }
+      const char* star = strchr(arg, '*');
+      struct removal_entry* re = &removals.entries[removals.count++];
+      if (star == NULL) {
+        re->head_len = (uint32_t)strlen(arg);
+        re->tail_len = 0;
+        re->head = (const uint8_t*)arg;
+        re->tail = NULL;
+      } else {
+        if (strchr(star + 1, '*')) {
+          printf("Error: At most one * is permitted per removal (%s).\n", arg);
+          exit(1);
+        }
+        re->head_len = (uint32_t)(star - arg);
+        re->tail_len = (uint32_t)strlen(star + 1);
+        re->head = (const uint8_t*)arg;
+        re->tail = (const uint8_t*)(star + 1);
+      }
+      ++removals.count;
+    } else {
+      break;
+    }
+  }
+
+  if (i == argc) {
+    printf("Error: Missing output file name.\n");
+    usage_and_exit(argv);
+  }
+  output_file_name = argv[i];
+  ++i;
+
+  const char* prefix = NULL;
+  for (; i < argc; ++i) {
+    const char* arg = argv[i];
+    if (strncmp(arg, "--prefix=", 9) == 0) {
+      prefix = arg + 9;
+    } else {
+      if (input_files.count == maximum_input_files) {
+        printf("Error: Too many input files.\n");
+        exit(1);
+      }
+      input_files.entries[input_files.count].prefix = prefix;
+      input_files.entries[input_files.count].name = arg;
+      ++input_files.count;
+    }
+  }
+
+  if (input_files.count <= 0) {
+    printf("Error: Missing input file names.\n");
+    usage_and_exit(argv);
+  }
+}
+}  // namespace
+
+int zipmerge_main(int argc, const char** argv) {
+  parse_command_line(argc, argv);
+
+  output_zip.file = fopen(output_file_name, "wb");
+  if (!output_zip.file) {
+    printf("Error: Cannot open %s for writing.\n", output_file_name);
+    return 1;
+  }
+
+  for (int i = input_files.count - 1; i >= 0; --i) {
+    set_filename_prefix(input_files.entries[i].prefix);
+    current_input_file_name = input_files.entries[i].name;
+    read_and_process_input_file(current_input_file_name);
+  }
+
+  uint8_t eocd[eocd_size] = {0};
+  memcpy(eocd, eocd_signature.data(), eocd_signature.size());
+  write2(eocd + 8, output_zip.num_files_written);
+  write2(eocd + 10, output_zip.num_files_written);
+  write4(eocd + 12, output_cd.length);
+  write4(eocd + 16, output_zip.num_bytes_written);
+  append_data(output_cd.bytes, output_cd.length);
+  append_data(eocd, sizeof(eocd));
+  fclose(output_zip.file);
+  return 0;
+}
+
+void reset() {
+  memset(&output_zip, 0, sizeof(output_zip));
+  memset(&filename_prefix, 0, sizeof(filename_prefix));
+  memset(&output_cd, 0, sizeof(output_cd));
+  memset(&input_files, 0, sizeof(input_files));
+  memset(&filename_hash_table, 0, sizeof(filename_hash_table));
+  memset(&removals, 0, sizeof(removals));
+}
--- a/misc/bazel/internal/bin/zipmerge/zipmerge.h
+++ b/misc/bazel/internal/bin/zipmerge/zipmerge.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <cstdlib>
+#include <cstdint>
+#include <string_view>
+
+struct output_cd_t {
+  uint8_t* bytes;
+  uint32_t length;
+  uint32_t capacity;
+};
+
+inline output_cd_t output_cd{};  // An in-memory buffer in which the central-directory records for
+                                 // the output file are accumulated.
+
+// Read and write little-endian integers (as the only supported host platforms are little-endian,
+// and all host platforms support unaligned memory accesses, these macros are currently very
+// simple).
+#define read2(ptr) (*(uint16_t*)(ptr))
+#define read4(ptr) (*(uint32_t*)(ptr))
+#define write2(ptr, val) (*(uint16_t*)(ptr) = (val))
+#define write4(ptr, val) (*(uint32_t*)(ptr) = (val))
+
+// Add the bytes [src, src + len) to the output's central-directory.
+void append_cd(const uint8_t* src, uint32_t len);
+
+// Test whether a given filename should be included in the output zip.
+// Note that if a call returns true for a given filename, all future calls with the same filename
+// will return false.
+bool should_include_filename_now(const uint8_t* name, uint32_t len);
+
+inline constexpr std::string_view eocd_signature = "\x50\x4b\x05\x06";
+const uint8_t* find_eocd(const uint8_t* input_file, size_t input_file_len);
+
+int zipmerge_main(int argc, const char** argv);
+
+void reset();
--- a/misc/bazel/internal/bin/zipmerge/zipmerge_main.cpp
+++ b/misc/bazel/internal/bin/zipmerge/zipmerge_main.cpp
@@ -0,0 +1,5 @@
+#include "misc/bazel/internal/bin/zipmerge/zipmerge.h"
+
+int main(int argc, const char** argv) {
+  return zipmerge_main(argc, argv);
+}
--- a/misc/bazel/internal/install.py
+++ b/misc/bazel/internal/install.py
@@ -5,17 +5,20 @@ import subprocess
 from python.runfiles import runfiles

 runfiles = runfiles.Create()
-if not runfiles:
-    raise Exception("Installer should be run with `bazel run`")
+assert runfiles, "Installer should be run with `bazel run`"

 parser = argparse.ArgumentParser()
 parser.add_argument("--destdir", type=pathlib.Path, required=True)
 parser.add_argument("--script", required=True)
 parser.add_argument("--build-file", required=True)
+parser.add_argument("--ripunzip", required=True)
+parser.add_argument("--zip-manifest", action="append", default=[], dest="zip_manifests")
 opts = parser.parse_args()

-script = runfiles.Rlocation(opts.script)
 build_file = runfiles.Rlocation(opts.build_file)
+script = runfiles.Rlocation(opts.script)
+ripunzip = runfiles.Rlocation(opts.ripunzip)
+zip_manifests = [runfiles.Rlocation(z) for z in opts.zip_manifests]
 destdir = pathlib.Path(build_file).parent / opts.destdir

 if destdir.exists():
@@ -23,3 +26,12 @@ if destdir.exists():

 destdir.mkdir(parents=True)
 subprocess.run([script, "--destdir", destdir], check=True)
+
+for zip_manifest in zip_manifests:
+    with open(zip_manifest) as manifest:
+        for line in manifest:
+            prefix, _, zip = line.partition(":")
+            assert zip, f"missing prefix for {prefix}, you should use prefix:zip format"
+            dest = destdir / prefix
+            dest.mkdir(parents=True, exist_ok=True)
+            subprocess.run([ripunzip, "unzip-file", zip, "-d", dest])
--- a/misc/bazel/pkg.bzl
+++ b/misc/bazel/pkg.bzl
@@ -7,7 +7,6 @@ load("@rules_pkg//pkg:mappings.bzl", "pkg_attributes", "pkg_filegroup", "pkg_fil
 load("@rules_pkg//pkg:pkg.bzl", "pkg_zip")
 load("@rules_pkg//pkg:providers.bzl", "PackageFilegroupInfo", "PackageFilesInfo")
 load("@rules_python//python:defs.bzl", "py_binary")
-load("//:defs.bzl", "codeql_platform")

 def _make_internal(name):
    def internal(suffix = "internal"):
@@ -22,27 +21,36 @@ def _get_subrule(label, suffix):
    return "%s/%s:%s-%s" % (path, pkg, pkg, suffix)

 _PackageFileWrapperInfo = provider(fields = {"pfi": "", "src": "", "arch_specific": ""})
+CodeqlZipInfo = provider(fields = {"prefix": "", "src": "", "arch_specific": ""})

 CodeqlFilesInfo = provider(
    doc = """Wrapper around `rules_pkg` `PackageFilesInfo` carrying information about generic and arch-specific files.""",
    fields = {
        "files": "list of `_PackageFileWrapperInfo`.",
+        "zips": "list of `CodeqlPackageZipInfo`.",
    },
 )

+_PLAT_DETECTION_ATTRS = {
+    "_windows": attr.label(default = "@platforms//os:windows"),
+    "_macos": attr.label(default = "@platforms//os:macos"),
+}
+
+def _detect_plat(ctx):
+    if ctx.target_platform_has_constraint(ctx.attr._windows[platform_common.ConstraintValueInfo]):
+        return "windows64"
+    elif ctx.target_platform_has_constraint(ctx.attr._macos[platform_common.ConstraintValueInfo]):
+        return "osx64"
+    else:
+        return "linux64"
+
 def _codeql_pkg_filegroup_impl(ctx):
    prefix = ctx.attr.prefix
    if prefix:
        prefix += "/"
    generic_prefix = prefix
    if ctx.attr.arch_specific:
-        if ctx.target_platform_has_constraint(ctx.attr._windows[platform_common.ConstraintValueInfo]):
-            plat = "windows64"
-        elif ctx.target_platform_has_constraint(ctx.attr._macos[platform_common.ConstraintValueInfo]):
-            plat = "osx64"
-        else:
-            plat = "linux64"
-        prefix = prefix + plat + "/"
+        prefix = prefix + _detect_plat(ctx) + "/"

    def transform_pfi(pfi, src, prefix = prefix, arch_specific = ctx.attr.arch_specific):
        return _PackageFileWrapperInfo(
@@ -54,34 +62,45 @@ def _codeql_pkg_filegroup_impl(ctx):
            arch_specific = arch_specific,
        )

+    def transform_pfwi(pfwi):
+        return transform_pfi(
+            pfwi.pfi,
+            pfwi.src,
+            # if it was already arch-specific the plat prefix was already added
+            generic_prefix if pfwi.arch_specific else prefix,
+            pfwi.arch_specific or ctx.attr.arch_specific,
+        )
+
+    def transform_czi(czi):
+        return CodeqlZipInfo(
+            # if it was already arch-specific the plat prefix was already added
+            prefix = (generic_prefix if czi.arch_specific else prefix) + czi.prefix,
+            src = czi.src,
+            arch_specific = czi.arch_specific or ctx.attr.arch_specific,
+        )
+
    files = []
+    zips = []

    for src in ctx.attr.srcs:
        if PackageFilesInfo in src:
-            pfi = src[PackageFilesInfo]
-            files.append(transform_pfi(pfi, src.label))
+            files.append(transform_pfi(src[PackageFilesInfo], src.label))
        elif PackageFilegroupInfo in src:
            pfgi = src[PackageFilegroupInfo]
            if pfgi.pkg_dirs or pfgi.pkg_symlinks:
                fail("while assembling %s found %s which contains `pkg_dirs` or `pkg_symlinks` targets" %
                     (ctx.label, src.label) + ", which is not currently supported")
            files += [transform_pfi(pfi, src) for pfi, src in pfgi.pkg_files]
+        elif CodeqlZipInfo in src:
+            zips.append(transform_czi(src[CodeqlZipInfo]))
        else:
-            cfi = src[CodeqlFilesInfo]
-            files += [
-                transform_pfi(
-                    pfwi.pfi,
-                    pfwi.src,
-                    # if it was already arch specific the plat prefix was already added
-                    generic_prefix if pfwi.arch_specific else prefix,
-                    pfwi.arch_specific or ctx.attr.arch_specific,
-                )
-                for pfwi in cfi.files
-            ]
+            files += [transform_pfwi(pfwi) for pfwi in src[CodeqlFilesInfo].files]
+            zips += [transform_czi(czi) for czi in src[CodeqlFilesInfo].zips]

    return [
        CodeqlFilesInfo(
            files = files,
+            zips = zips,
        ),
        DefaultInfo(
            files = depset(transitive = [src[DefaultInfo].files for src in ctx.attr.srcs]),
@@ -99,14 +118,13 @@ codeql_pkg_filegroup = rule(
                [PackageFilesInfo, DefaultInfo],
                [PackageFilegroupInfo, DefaultInfo],
                [CodeqlFilesInfo, DefaultInfo],
+                [CodeqlZipInfo, DefaultInfo],
            ],
            default = [],
        ),
        "prefix": attr.string(doc = "Prefix to add to the files", default = ""),
        "arch_specific": attr.bool(doc = "Whether the included files should be treated as arch-specific"),
-        "_windows": attr.label(default = "@platforms//os:windows"),
-        "_macos": attr.label(default = "@platforms//os:macos"),
-    },
+    } | _PLAT_DETECTION_ATTRS,
 )

 def codeql_pkg_files(
@@ -169,6 +187,61 @@ def _extract_pkg_filegroup_impl(ctx):
        DefaultInfo(files = depset(transitive = files)),
    ]

+def _codeql_pkg_zip_import_impl(ctx):
+    prefix = ctx.attr.prefix
+    if prefix:
+        prefix += "/"
+    if ctx.attr.arch_specific:
+        prefix += _detect_plat(ctx) + "/"
+    return [
+        CodeqlZipInfo(
+            prefix = prefix,
+            src = ctx.file.src,
+            arch_specific = ctx.attr.arch_specific,
+        ),
+        DefaultInfo(files = depset([ctx.file.src])),
+    ]
+
+codeql_pkg_zip_import = rule(
+    implementation = _codeql_pkg_zip_import_impl,
+    doc = "Wrap a zip file to be consumed by `codeql_pkg_filegroup` and `codeql_pack` rules",
+    attrs = {
+        "src": attr.label(mandatory = True, allow_single_file = True, doc = "Zip file to wrap"),
+        "prefix": attr.string(doc = "Posix path prefix to nest the zip contents into"),
+        "arch_specific": attr.bool(doc = "Whether this is to be considered arch-specific"),
+    } | _PLAT_DETECTION_ATTRS,
+)
+
+def _imported_zips_manifest_impl(ctx):
+    src = ctx.attr.src[CodeqlFilesInfo]
+    zips = [czi for czi in src.zips if czi.arch_specific == ctx.attr.arch_specific]
+
+    # zipmerge is run in a build context, so it requries File.path pointers to find the zips
+    # installation runs in a run context, so it requries File.short_path to find the zips
+    # hence we require two separate files, regardless of the format
+    ctx.actions.write(
+        ctx.outputs.zipmerge_out,
+        "\n".join(["--prefix=%s %s" % (czi.prefix.rstrip("/"), czi.src.path) for czi in zips]),
+    )
+    ctx.actions.write(
+        ctx.outputs.install_out,
+        "\n".join(["%s:%s" % (czi.prefix, czi.src.short_path) for czi in zips]),
+    )
+    outputs = [ctx.outputs.zipmerge_out, ctx.outputs.install_out] + [czi.src for czi in zips]
+    return DefaultInfo(
+        files = depset(outputs),
+    )
+
+_imported_zips_manifests = rule(
+    implementation = _imported_zips_manifest_impl,
+    attrs = {
+        "src": attr.label(providers = [CodeqlFilesInfo]),
+        "arch_specific": attr.bool(),
+        "zipmerge_out": attr.output(),
+        "install_out": attr.output(),
+    },
+)
+
 _extrac_pkg_filegroup = rule(
    implementation = _extract_pkg_filegroup_impl,
    attrs = {
@@ -219,11 +292,29 @@ def codeql_pack(
            visibility = ["//visibility:private"],
        )
        pkg_zip(
-            name = internal(kind + "-zip"),
+            name = internal(kind + "-zip-base"),
            srcs = [internal(kind + "-zip-contents")],
-            package_file_name = zip_filename + "-" + (codeql_platform if kind == "arch" else kind) + ".zip",
            visibility = visibility,
        )
+        _imported_zips_manifests(
+            name = internal(kind + "-zip-manifests"),
+            src = name,
+            zipmerge_out = internal(kind + "-zipmerge.params"),
+            install_out = internal(kind + "-install.params"),
+            arch_specific = kind == "arch",
+        )
+        native.genrule(
+            name = internal(kind + "-zip"),
+            tools = ["//misc/bazel/internal/bin/zipmerge", internal(kind + "-zipmerge.params")],
+            srcs = [internal(kind + "-zip-base"), internal(kind + "-zip-manifests")],
+            outs = ["%s-%s.zip" % (zip_filename, kind)],
+            cmd = " ".join([
+                "$(execpath //misc/bazel/internal/bin/zipmerge)",
+                "$@",
+                "$(execpath %s)" % internal(kind + "-zip-base"),
+                "$$(cat $(execpath %s))" % internal(kind + "-zipmerge.params"),
+            ]),
+        )
    pkg_install(
        name = internal("script"),
        srcs = [internal("generic"), internal("arch")],
@@ -239,13 +330,24 @@ def codeql_pack(
        name = internal("installer"),
        srcs = ["//misc/bazel/internal:install.py"],
        main = "//misc/bazel/internal:install.py",
-        data = [internal("build-file"), internal("script")],
+        data = [
+            internal("build-file"),
+            internal("script"),
+            internal("generic-install.params"),
+            internal("generic-zip-manifests"),
+            internal("arch-install.params"),
+            internal("arch-zip-manifests"),
+            "//misc/bazel/internal/bin:ripunzip",
+        ],
        deps = ["@rules_python//python/runfiles"],
        args = [
            "--build-file=$(rlocationpath %s)" % internal("build-file"),
            "--script=$(rlocationpath %s)" % internal("script"),
            "--destdir",
            install_dest,
+            "--ripunzip=$(rlocationpath //misc/bazel/internal/bin:ripunzip)",
+            "--zip-manifest=$(rlocationpath %s)" % internal("generic-install.params"),
+            "--zip-manifest=$(rlocationpath %s)" % internal("arch-install.params"),
        ],
        visibility = visibility,
    )
--- a/swift/BUILD.bazel
+++ b/swift/BUILD.bazel
@@ -64,8 +64,7 @@ codeql_pkg_filegroup(

 codeql_pkg_filegroup(
    name = "resource-dir",
-    srcs = ["//swift/third_party/swift-llvm-support:swift-resource-dir"],
-    arch_specific = True,
+    srcs = ["//swift/third_party/resource-dir"],
    prefix = "resource-dir",
 )

--- a/swift/third_party/resource-dir/BUILD.bazel
+++ b/swift/third_party/resource-dir/BUILD.bazel
@@ -0,0 +1,9 @@
+load("//misc/bazel:pkg.bzl", "codeql_pkg_zip")
+
+codeql_pkg_zip(
+    name = "resource-dir",
+    src = select({"@platforms//os:" + os: "@swift-resource-dir-" + os for os in ("linux", "macos")}),
+    arch_specific = True,
+    target_compatible_with = select({"@platforms//os:windows": ["@platforms//:incompatible"]}),
+    visibility = ["//swift:__pkg__"],
+)
--- a/swift/third_party/resource-dir/resource-dir-linux.zip
+++ b/swift/third_party/resource-dir/resource-dir-linux.zip
--- a/swift/third_party/resource-dir/resource-dir-macos.zip
+++ b/swift/third_party/resource-dir/resource-dir-macos.zip
--- a/swift/third_party/resource-dir/update.sh
+++ b/swift/third_party/resource-dir/update.sh