Change hepc-init to Python and add debugging configuration
This heavily uses plumbum to retain a shell-script style but add data structures that will be needed
This commit is contained in:
committed by
=Michael Hohn
parent
d486b6b4db
commit
537ebdf19d
24
client/qldbtools/.vscode/launch.json
vendored
Normal file
24
client/qldbtools/.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "Python Debugger: Current File with Arguments",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal",
|
||||
"args": [
|
||||
"--db_collection_dir",
|
||||
"db-collection-py",
|
||||
"--starting_path",
|
||||
"$HOME/work-gh/mrva/mrva-open-source-download"
|
||||
],
|
||||
"justMyCode": true,
|
||||
"stopOnEntry": false
|
||||
}
|
||||
]
|
||||
}
|
||||
249
client/qldbtools/bin/hepc-init
Executable file → Normal file
249
client/qldbtools/bin/hepc-init
Executable file → Normal file
@@ -1,144 +1,115 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env python3
|
||||
|
||||
#* Utility functions
|
||||
log() {
|
||||
local level="$1"
|
||||
shift
|
||||
local color_reset="\033[0m"
|
||||
local color_info="\033[1;34m"
|
||||
local color_warn="\033[1;33m"
|
||||
local color_error="\033[1;31m"
|
||||
import json
|
||||
import hashlib
|
||||
import yaml
|
||||
import sys
|
||||
from plumbum import cli, local
|
||||
from plumbum.cmd import find, mkdir, ln, rm, mktemp, unzip, date, env
|
||||
|
||||
local color
|
||||
case "$level" in
|
||||
INFO) color="$color_info" ;;
|
||||
WARN) color="$color_warn" ;;
|
||||
ERROR) color="$color_error" ;;
|
||||
*) color="$color_reset" ;;
|
||||
esac
|
||||
|
||||
echo -e "${color}[$(date +"%Y-%m-%d %H:%M:%S")] [$level] $*${color_reset}" >&2
|
||||
}
|
||||
usage() {
|
||||
echo "Usage: $0 --db_collection_dir <directory> --starting_path <path> [-h]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " --db_collection_dir <directory> Specify the database collection directory."
|
||||
echo " --starting_path <path> Specify the starting path."
|
||||
echo " -h Show this help message."
|
||||
exit 1
|
||||
}
|
||||
|
||||
|
||||
#* Initialize and parse arguments
|
||||
set -euo pipefail # exit on error, unset var, pipefail
|
||||
trap 'rm -fR /tmp/hepc.$$-*' EXIT
|
||||
|
||||
starting_dir=$(pwd)
|
||||
db_collection_dir=""
|
||||
starting_path=""
|
||||
|
||||
# Parse arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--db_collection_dir)
|
||||
shift
|
||||
if [[ -z "$1" || "$1" == -* ]]; then
|
||||
echo "Error: --db_collection_dir requires a directory as an argument."
|
||||
usage
|
||||
fi
|
||||
db_collection_dir="$1"
|
||||
;;
|
||||
--starting_path)
|
||||
shift
|
||||
if [[ -z "$1" || "$1" == -* ]]; then
|
||||
echo "Error: --starting_path requires a path as an argument."
|
||||
usage
|
||||
fi
|
||||
starting_path="$1"
|
||||
;;
|
||||
-h)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unknown option '$1'."
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# Check if required arguments were provided
|
||||
if [[ -z "$db_collection_dir" ]]; then
|
||||
echo "Error: --db_collection_dir is required."
|
||||
usage
|
||||
fi
|
||||
|
||||
if [[ -z "$starting_path" ]]; then
|
||||
echo "Error: --starting_path is required."
|
||||
usage
|
||||
fi
|
||||
|
||||
#* Find all DBs
|
||||
log INFO "searching for db.zip files"
|
||||
find ${starting_path} -type f -name "db.zip" -size +0c > /tmp/hepc.$$-paths
|
||||
|
||||
#* Collect detailed information from the database files
|
||||
# Don't assume they are unique.
|
||||
log INFO "collecting information from db.zip files"
|
||||
mkdir -p $db_collection_dir
|
||||
cat /tmp/hepc.$$-paths | while read -r zip_path
|
||||
do
|
||||
log INFO "Extracting from ${zip_path}"
|
||||
zip_dir=$(dirname ${zip_path})
|
||||
zip_file=$(basename ${zip_path})
|
||||
unzip -o -q ${zip_path} '*codeql-database.yml' -d /tmp/hepc.$$-zip
|
||||
# The content may be LANGUAGE/codeql-database.yml
|
||||
|
||||
#* For every database, create a metadata record.
|
||||
mkdir -p /tmp/hepc.$$-zip
|
||||
cd /tmp/hepc.$$-zip/*
|
||||
|
||||
# Information from codeql-database.yml
|
||||
primaryLanguage=$(yq '.primaryLanguage' codeql-database.yml)
|
||||
sha=$(yq '.creationMetadata.sha' codeql-database.yml)
|
||||
cliVersion=$(yq '.creationMetadata.cliVersion' codeql-database.yml)
|
||||
creationTime=$(yq '.creationMetadata.creationTime' codeql-database.yml)
|
||||
sourceLocationPrefix=$(yq '.sourceLocationPrefix' codeql-database.yml)
|
||||
repo=${sourceLocationPrefix##*/} # keep only last component
|
||||
# Get sourceLocationPrefix[-2]
|
||||
owner="${sourceLocationPrefix%/*}" # strip last component
|
||||
owner="${owner##*/}" # keep only last component
|
||||
|
||||
# cid for repository / db
|
||||
cid=$(echo "${cliVersion} ${creationTime} ${primaryLanguage} ${sha}" | b2sum |\
|
||||
awk '{print substr($1, 1, 6)}')
|
||||
|
||||
# Prepare the metadata record for this DB.
|
||||
new_db_fname="${owner}-${repo}-ctsj-${cid}.zip"
|
||||
result_url="http://hepc/${db_collection_dir}/${new_db_fname}"
|
||||
record='
|
||||
{
|
||||
"git_branch": "HEAD",
|
||||
"git_commit_id": "'${sha}'",
|
||||
"git_repo": "'${repo}'",
|
||||
"ingestion_datetime_utc": "'${creationTime}'",
|
||||
"result_url": "'${result_url}'",
|
||||
"tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4",
|
||||
"tool_name": "codeql-'${primaryLanguage}'",
|
||||
"tool_version": "'${cliVersion}'",
|
||||
"projname": "'${owner}/${repo}'"
|
||||
# Logging function
|
||||
def log(level, message):
|
||||
colors = {
|
||||
"INFO": "\033[1;34m",
|
||||
"WARN": "\033[1;33m",
|
||||
"ERROR": "\033[1;31m",
|
||||
"RESET": "\033[0m",
|
||||
}
|
||||
'
|
||||
cd "$starting_dir"
|
||||
rm -fR /tmp/hepc.$$-zip
|
||||
echo "$record" >> $db_collection_dir/metadata.json
|
||||
timestamp = date("+%Y-%m-%d %H:%M:%S").strip()
|
||||
print(f"{colors[level]}[{timestamp}] [{level}] {message}{colors['RESET']}", file=sys.stderr)
|
||||
|
||||
#* Link original file path to collection directory for serving. Use name including
|
||||
# the cid and field separator ctsj
|
||||
cd ${db_collection_dir}
|
||||
[ -L ${new_db_fname} ] || ln -s ${zip_path} ${new_db_fname}
|
||||
# Generate a CID
|
||||
def generate_cid(cli_version, creation_time, primary_language, sha):
|
||||
hash_input = f"{cli_version} {creation_time} {primary_language} {sha}".encode()
|
||||
return hashlib.sha256(hash_input).hexdigest()[:6]
|
||||
|
||||
# Interim cleanup
|
||||
rm -fR "/tmp/hepc.$$-*"
|
||||
done
|
||||
# Expand environment variables in paths
|
||||
def expand_path(path):
|
||||
return local.env.expand(path)
|
||||
|
||||
# Process a single db.zip file
|
||||
def process_db_file(zip_path, db_collection_dir):
|
||||
temp_dir = mktemp("-d").strip()
|
||||
try:
|
||||
unzip("-o", "-q", zip_path, "*codeql-database.yml", "-d", temp_dir)
|
||||
|
||||
# Locate the YAML file regardless of its depth
|
||||
yaml_files = list(local.path(temp_dir).walk(
|
||||
filter=lambda p: p.name == "codeql-database.yml"))
|
||||
if not yaml_files:
|
||||
log("WARN", f"No codeql-database.yml found in {zip_path}")
|
||||
return
|
||||
|
||||
yaml_path = yaml_files[0]
|
||||
with yaml_path.open("r") as f:
|
||||
yaml_data = yaml.safe_load(f)
|
||||
|
||||
primary_language = yaml_data["primaryLanguage"]
|
||||
creation_metadata = yaml_data["creationMetadata"]
|
||||
sha = creation_metadata["sha"]
|
||||
cli_version = creation_metadata["cliVersion"]
|
||||
creation_time = creation_metadata["creationTime"]
|
||||
source_location_prefix = local.path(yaml_data["sourceLocationPrefix"])
|
||||
|
||||
repo = source_location_prefix.name
|
||||
owner = source_location_prefix.parent.name
|
||||
cid = generate_cid(cli_version, creation_time, primary_language, sha)
|
||||
new_db_fname = f"{owner}-{repo}-ctsj-{cid}.zip"
|
||||
result_url = f"http://hepc/{db_collection_dir}/{new_db_fname}"
|
||||
|
||||
metadata = {
|
||||
"git_branch": "HEAD",
|
||||
"git_commit_id": sha,
|
||||
"git_repo": repo,
|
||||
"ingestion_datetime_utc": str(creation_time),
|
||||
"result_url": result_url,
|
||||
"tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4",
|
||||
"tool_name": f"codeql-{primary_language}",
|
||||
"tool_version": cli_version,
|
||||
"projname": f"{owner}/{repo}",
|
||||
}
|
||||
|
||||
metadata_file = local.path(db_collection_dir) / "metadata.json"
|
||||
with metadata_file.open("a") as f:
|
||||
json.dump(metadata, f)
|
||||
f.write("\n")
|
||||
|
||||
link_path = local.path(db_collection_dir) / new_db_fname
|
||||
if not link_path.exists():
|
||||
ln("-sf", zip_path, link_path)
|
||||
|
||||
except Exception as e:
|
||||
log("WARN", f"Error processing {zip_path}: {e}")
|
||||
finally:
|
||||
rm("-rf", temp_dir)
|
||||
|
||||
# Main application class
|
||||
class DBProcessor(cli.Application):
|
||||
db_collection_dir = cli.SwitchAttr(
|
||||
"--db_collection_dir", str, mandatory=True, help="Specify the database collection directory"
|
||||
)
|
||||
starting_path = cli.SwitchAttr(
|
||||
"--starting_path", str, mandatory=True, help="Specify the starting path"
|
||||
)
|
||||
|
||||
def main(self):
|
||||
db_collection_dir = expand_path(self.db_collection_dir)
|
||||
starting_path = expand_path(self.starting_path)
|
||||
|
||||
mkdir("-p", db_collection_dir)
|
||||
log("INFO", f"Searching for db.zip files in {starting_path}")
|
||||
|
||||
db_files = find(starting_path, "-type", "f", "-name", "db.zip",
|
||||
"-size", "+0c").splitlines()
|
||||
|
||||
if not db_files:
|
||||
log("WARN", "No db.zip files found in the specified starting path.")
|
||||
return
|
||||
|
||||
for zip_path in db_files:
|
||||
process_db_file(zip_path, db_collection_dir)
|
||||
|
||||
log("INFO", "Processing completed.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
DBProcessor.run()
|
||||
|
||||
144
client/qldbtools/bin/hepc-init.sh
Executable file
144
client/qldbtools/bin/hepc-init.sh
Executable file
@@ -0,0 +1,144 @@
|
||||
#!/bin/bash
|
||||
|
||||
#* Utility functions
|
||||
log() {
|
||||
local level="$1"
|
||||
shift
|
||||
local color_reset="\033[0m"
|
||||
local color_info="\033[1;34m"
|
||||
local color_warn="\033[1;33m"
|
||||
local color_error="\033[1;31m"
|
||||
|
||||
local color
|
||||
case "$level" in
|
||||
INFO) color="$color_info" ;;
|
||||
WARN) color="$color_warn" ;;
|
||||
ERROR) color="$color_error" ;;
|
||||
*) color="$color_reset" ;;
|
||||
esac
|
||||
|
||||
echo -e "${color}[$(date +"%Y-%m-%d %H:%M:%S")] [$level] $*${color_reset}" >&2
|
||||
}
|
||||
usage() {
|
||||
echo "Usage: $0 --db_collection_dir <directory> --starting_path <path> [-h]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " --db_collection_dir <directory> Specify the database collection directory."
|
||||
echo " --starting_path <path> Specify the starting path."
|
||||
echo " -h Show this help message."
|
||||
exit 1
|
||||
}
|
||||
|
||||
|
||||
#* Initialize and parse arguments
|
||||
set -euo pipefail # exit on error, unset var, pipefail
|
||||
trap 'rm -fR /tmp/hepc.$$-*' EXIT
|
||||
|
||||
starting_dir=$(pwd)
|
||||
db_collection_dir=""
|
||||
starting_path=""
|
||||
|
||||
# Parse arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--db_collection_dir)
|
||||
shift
|
||||
if [[ -z "$1" || "$1" == -* ]]; then
|
||||
echo "Error: --db_collection_dir requires a directory as an argument."
|
||||
usage
|
||||
fi
|
||||
db_collection_dir="$1"
|
||||
;;
|
||||
--starting_path)
|
||||
shift
|
||||
if [[ -z "$1" || "$1" == -* ]]; then
|
||||
echo "Error: --starting_path requires a path as an argument."
|
||||
usage
|
||||
fi
|
||||
starting_path="$1"
|
||||
;;
|
||||
-h)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unknown option '$1'."
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# Check if required arguments were provided
|
||||
if [[ -z "$db_collection_dir" ]]; then
|
||||
echo "Error: --db_collection_dir is required."
|
||||
usage
|
||||
fi
|
||||
|
||||
if [[ -z "$starting_path" ]]; then
|
||||
echo "Error: --starting_path is required."
|
||||
usage
|
||||
fi
|
||||
|
||||
#* Find all DBs
|
||||
log INFO "searching for db.zip files"
|
||||
find ${starting_path} -type f -name "db.zip" -size +0c > /tmp/hepc.$$-paths
|
||||
|
||||
#* Collect detailed information from the database files
|
||||
# Don't assume they are unique.
|
||||
log INFO "collecting information from db.zip files"
|
||||
mkdir -p $db_collection_dir
|
||||
cat /tmp/hepc.$$-paths | while read -r zip_path
|
||||
do
|
||||
log INFO "Extracting from ${zip_path}"
|
||||
zip_dir=$(dirname ${zip_path})
|
||||
zip_file=$(basename ${zip_path})
|
||||
unzip -o -q ${zip_path} '*codeql-database.yml' -d /tmp/hepc.$$-zip
|
||||
# The content may be LANGUAGE/codeql-database.yml
|
||||
|
||||
#* For every database, create a metadata record.
|
||||
mkdir -p /tmp/hepc.$$-zip
|
||||
cd /tmp/hepc.$$-zip/*
|
||||
|
||||
# Information from codeql-database.yml
|
||||
primaryLanguage=$(yq '.primaryLanguage' codeql-database.yml)
|
||||
sha=$(yq '.creationMetadata.sha' codeql-database.yml)
|
||||
cliVersion=$(yq '.creationMetadata.cliVersion' codeql-database.yml)
|
||||
creationTime=$(yq '.creationMetadata.creationTime' codeql-database.yml)
|
||||
sourceLocationPrefix=$(yq '.sourceLocationPrefix' codeql-database.yml)
|
||||
repo=${sourceLocationPrefix##*/} # keep only last component
|
||||
# Get sourceLocationPrefix[-2]
|
||||
owner="${sourceLocationPrefix%/*}" # strip last component
|
||||
owner="${owner##*/}" # keep only last component
|
||||
|
||||
# cid for repository / db
|
||||
cid=$(echo "${cliVersion} ${creationTime} ${primaryLanguage} ${sha}" | b2sum |\
|
||||
awk '{print substr($1, 1, 6)}')
|
||||
|
||||
# Prepare the metadata record for this DB.
|
||||
new_db_fname="${owner}-${repo}-ctsj-${cid}.zip"
|
||||
result_url="http://hepc/${db_collection_dir}/${new_db_fname}"
|
||||
record='
|
||||
{
|
||||
"git_branch": "HEAD",
|
||||
"git_commit_id": "'${sha}'",
|
||||
"git_repo": "'${repo}'",
|
||||
"ingestion_datetime_utc": "'${creationTime}'",
|
||||
"result_url": "'${result_url}'",
|
||||
"tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4",
|
||||
"tool_name": "codeql-'${primaryLanguage}'",
|
||||
"tool_version": "'${cliVersion}'",
|
||||
"projname": "'${owner}/${repo}'"
|
||||
}
|
||||
'
|
||||
cd "$starting_dir"
|
||||
rm -fR /tmp/hepc.$$-zip
|
||||
echo "$record" >> $db_collection_dir/metadata.json
|
||||
|
||||
#* Link original file path to collection directory for serving. Use name including
|
||||
# the cid and field separator ctsj
|
||||
cd ${db_collection_dir}
|
||||
[ -L ${new_db_fname} ] || ln -s ${zip_path} ${new_db_fname}
|
||||
|
||||
# Interim cleanup
|
||||
rm -fR "/tmp/hepc.$$-*"
|
||||
done
|
||||
11
client/qldbtools/qldbtools.code-workspace
Normal file
11
client/qldbtools/qldbtools.code-workspace
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"folders": [
|
||||
{
|
||||
"path": "."
|
||||
}
|
||||
],
|
||||
"settings": {
|
||||
"git.ignoreLimitWarning": true,
|
||||
"makefile.configureOnOpen": false
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user