extract src.zip files from codeql db .zips

This commit is contained in:
Michael Hohn
2025-03-03 14:56:27 -08:00
committed by =Michael Hohn
parent 8a1b0dd753
commit 056507dd51

View File

@@ -0,0 +1,86 @@
#!/usr/bin/env python3
import os
import sys
from plumbum import cli, local
from plumbum.cmd import find, mkdir, unzip, rm, cd
# Logging function
def log(level, message):
colors = {
"INFO": "\033[1;34m",
"WARN": "\033[1;33m",
"ERROR": "\033[1;31m",
"RESET": "\033[0m",
}
print(f"{colors[level]}[{level}] {message}{colors['RESET']}", file=sys.stderr)
# Expand environment variables in paths
def expand_path(path):
return local.env.expand(path)
# Process a single zip file
def process_zip_file(zip_path, output_base_dir):
zip_name = local.path(zip_path).stem # Remove .zip extension
output_dir = local.path(output_base_dir) / zip_name
mkdir("-p", output_dir)
try:
log("INFO", f"Processing {zip_path}")
# Extract file list to find src.zip
file_list = unzip("-l", zip_path).splitlines()
src_zip_entry = \
next((line.split()[-1]
for line in file_list if line.endswith("src.zip")), None)
if not src_zip_entry:
log("WARN", f"No src.zip found in {zip_path}")
return
# Extract src.zip to memory and write to output directory
log("INFO", f"{zip_path=} {src_zip_entry=} {output_dir=}")
unzip(zip_path, src_zip_entry, "-d", str(output_dir))
log("INFO", f"Extracted src.zip to {output_dir}")
except Exception as e:
log("ERROR", f"Error processing {zip_path}: {e}")
finally:
rm("-rf", output_dir / "__MACOSX") # Cleanup if macOS metadata exists
# Main application class
class Extractor(cli.Application):
"""
Extracts src.zip from zip archives and places them into named directories.
"""
output_base_dir = cli.SwitchAttr(
"--output_base_dir", str, mandatory=True,
help="Specify the directory where extracted src.zip files should be stored"
)
starting_path = cli.SwitchAttr(
"--starting_path", str, mandatory=True, help="Specify the directory containing zip files"
)
def main(self):
output_base_dir = expand_path(self.output_base_dir)
starting_path = expand_path(self.starting_path)
mkdir("-p", output_base_dir)
log("INFO", f"Searching for zip files in {starting_path}")
zip_files = find(starting_path, "-type", "f", "-name", "*.zip").splitlines()
if not zip_files:
log("WARN", "No zip files found in the specified directory.")
return
for zip_path in zip_files:
process_zip_file(zip_path, output_base_dir)
log("INFO", "Processing completed.")
if __name__ == "__main__":
Extractor.run()