Files
mrvacommander/client/qldbtools/bin/mc-hepc-init

121 lines
4.3 KiB
Python
Executable File

#!/usr/bin/env python3
import json
import hashlib
import yaml
import sys
from plumbum import cli, local
from plumbum.cmd import find, mkdir, ln, rm, mktemp, unzip, date, env
# Logging function
def log(level, message):
colors = {
"INFO": "\033[1;34m",
"WARN": "\033[1;33m",
"ERROR": "\033[1;31m",
"RESET": "\033[0m",
}
timestamp = date("+%Y-%m-%d %H:%M:%S").strip()
print(f"{colors[level]}[{timestamp}] [{level}] {message}{colors['RESET']}", file=sys.stderr)
# Generate a CID (cumulative id)
def generate_cid(cli_version, creation_time, primary_language, sha):
hash_input = f"{cli_version} {creation_time} {primary_language} {sha}".encode()
return hashlib.sha256(hash_input).hexdigest()[:6]
# Expand environment variables in paths
def expand_path(path):
return local.env.expand(path)
# Process a single db.zip file
def process_db_file(zip_path, db_collection_dir):
temp_dir = mktemp("-d").strip()
try:
unzip("-o", "-q", zip_path, "*codeql-database.yml", "-d", temp_dir)
# Locate the YAML file regardless of its depth
yaml_files = list(local.path(temp_dir).walk(
filter=lambda p: p.name == "codeql-database.yml"))
if not yaml_files:
log("WARN", f"No codeql-database.yml found in {zip_path}")
return
yaml_path = yaml_files[0]
with yaml_path.open("r") as f:
yaml_data = yaml.safe_load(f)
primary_language = yaml_data["primaryLanguage"]
creation_metadata = yaml_data["creationMetadata"]
sha = creation_metadata["sha"]
cli_version = creation_metadata["cliVersion"]
creation_time = creation_metadata["creationTime"]
source_location_prefix = local.path(yaml_data["sourceLocationPrefix"])
repo = source_location_prefix.name
owner = source_location_prefix.parent.name
cid = generate_cid(cli_version, creation_time, primary_language, sha)
new_db_fname = f"{owner}-{repo}-ctsj-{cid}.zip"
result_url = f"http://hepc/{db_collection_dir}/{new_db_fname}"
metadata = {
"git_branch" : "HEAD",
"git_commit_id" : sha,
"git_repo" : repo,
"ingestion_datetime_utc" : str(creation_time),
"result_url" : result_url,
"tool_id" : "9f2f9642-febb-4435-9204-fb50bbd43de4",
"tool_name" : f"codeql-{primary_language}",
"tool_version" : cli_version,
"projname" : f"{owner}/{repo}",
}
metadata_file = local.path(db_collection_dir) / "metadata.json"
with metadata_file.open("a") as f:
json.dump(metadata, f)
f.write("\n")
link_path = local.path(db_collection_dir) / new_db_fname
if not link_path.exists():
ln("-sf", zip_path, link_path)
except Exception as e:
log("WARN", f"Error processing {zip_path}: {e}")
finally:
rm("-rf", temp_dir)
# Main application class
class DBProcessor(cli.Application):
"""
DBProcessor processes db.zip files found in a starting directory,
symlinks updated names in a collection directory,
and adds a metadata information file "metadata.json" to the directory.
"""
db_collection_dir = cli.SwitchAttr(
"--db_collection_dir", str, mandatory=True, help="Specify the database collection directory"
)
starting_path = cli.SwitchAttr(
"--starting_path", str, mandatory=True, help="Specify the starting path"
)
def main(self):
db_collection_dir = expand_path(self.db_collection_dir)
starting_path = expand_path(self.starting_path)
mkdir("-p", db_collection_dir)
log("INFO", f"Searching for db.zip files in {starting_path}")
db_files = find(starting_path, "-type", "f", "-name", "db.zip",
"-size", "+0c").splitlines()
if not db_files:
log("WARN", "No db.zip files found in the specified starting path.")
return
for zip_path in db_files:
process_db_file(zip_path, db_collection_dir)
log("INFO", "Processing completed.")
if __name__ == "__main__":
DBProcessor.run()