Change hepc-init to Python and add debugging configuration
This heavily uses plumbum to retain a shell-script style but add data structures that will be needed
This commit is contained in:
committed by
=Michael Hohn
parent
d486b6b4db
commit
537ebdf19d
24
client/qldbtools/.vscode/launch.json
vendored
Normal file
24
client/qldbtools/.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "Python Debugger: Current File with Arguments",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${file}",
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"args": [
|
||||||
|
"--db_collection_dir",
|
||||||
|
"db-collection-py",
|
||||||
|
"--starting_path",
|
||||||
|
"$HOME/work-gh/mrva/mrva-open-source-download"
|
||||||
|
],
|
||||||
|
"justMyCode": true,
|
||||||
|
"stopOnEntry": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
249
client/qldbtools/bin/hepc-init
Executable file → Normal file
249
client/qldbtools/bin/hepc-init
Executable file → Normal file
@@ -1,144 +1,115 @@
|
|||||||
#!/bin/bash
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
#* Utility functions
|
import json
|
||||||
log() {
|
import hashlib
|
||||||
local level="$1"
|
import yaml
|
||||||
shift
|
import sys
|
||||||
local color_reset="\033[0m"
|
from plumbum import cli, local
|
||||||
local color_info="\033[1;34m"
|
from plumbum.cmd import find, mkdir, ln, rm, mktemp, unzip, date, env
|
||||||
local color_warn="\033[1;33m"
|
|
||||||
local color_error="\033[1;31m"
|
|
||||||
|
|
||||||
local color
|
# Logging function
|
||||||
case "$level" in
|
def log(level, message):
|
||||||
INFO) color="$color_info" ;;
|
colors = {
|
||||||
WARN) color="$color_warn" ;;
|
"INFO": "\033[1;34m",
|
||||||
ERROR) color="$color_error" ;;
|
"WARN": "\033[1;33m",
|
||||||
*) color="$color_reset" ;;
|
"ERROR": "\033[1;31m",
|
||||||
esac
|
"RESET": "\033[0m",
|
||||||
|
|
||||||
echo -e "${color}[$(date +"%Y-%m-%d %H:%M:%S")] [$level] $*${color_reset}" >&2
|
|
||||||
}
|
|
||||||
usage() {
|
|
||||||
echo "Usage: $0 --db_collection_dir <directory> --starting_path <path> [-h]"
|
|
||||||
echo
|
|
||||||
echo "Options:"
|
|
||||||
echo " --db_collection_dir <directory> Specify the database collection directory."
|
|
||||||
echo " --starting_path <path> Specify the starting path."
|
|
||||||
echo " -h Show this help message."
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#* Initialize and parse arguments
|
|
||||||
set -euo pipefail # exit on error, unset var, pipefail
|
|
||||||
trap 'rm -fR /tmp/hepc.$$-*' EXIT
|
|
||||||
|
|
||||||
starting_dir=$(pwd)
|
|
||||||
db_collection_dir=""
|
|
||||||
starting_path=""
|
|
||||||
|
|
||||||
# Parse arguments
|
|
||||||
while [[ $# -gt 0 ]]; do
|
|
||||||
case "$1" in
|
|
||||||
--db_collection_dir)
|
|
||||||
shift
|
|
||||||
if [[ -z "$1" || "$1" == -* ]]; then
|
|
||||||
echo "Error: --db_collection_dir requires a directory as an argument."
|
|
||||||
usage
|
|
||||||
fi
|
|
||||||
db_collection_dir="$1"
|
|
||||||
;;
|
|
||||||
--starting_path)
|
|
||||||
shift
|
|
||||||
if [[ -z "$1" || "$1" == -* ]]; then
|
|
||||||
echo "Error: --starting_path requires a path as an argument."
|
|
||||||
usage
|
|
||||||
fi
|
|
||||||
starting_path="$1"
|
|
||||||
;;
|
|
||||||
-h)
|
|
||||||
usage
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Error: Unknown option '$1'."
|
|
||||||
usage
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
shift
|
|
||||||
done
|
|
||||||
|
|
||||||
# Check if required arguments were provided
|
|
||||||
if [[ -z "$db_collection_dir" ]]; then
|
|
||||||
echo "Error: --db_collection_dir is required."
|
|
||||||
usage
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -z "$starting_path" ]]; then
|
|
||||||
echo "Error: --starting_path is required."
|
|
||||||
usage
|
|
||||||
fi
|
|
||||||
|
|
||||||
#* Find all DBs
|
|
||||||
log INFO "searching for db.zip files"
|
|
||||||
find ${starting_path} -type f -name "db.zip" -size +0c > /tmp/hepc.$$-paths
|
|
||||||
|
|
||||||
#* Collect detailed information from the database files
|
|
||||||
# Don't assume they are unique.
|
|
||||||
log INFO "collecting information from db.zip files"
|
|
||||||
mkdir -p $db_collection_dir
|
|
||||||
cat /tmp/hepc.$$-paths | while read -r zip_path
|
|
||||||
do
|
|
||||||
log INFO "Extracting from ${zip_path}"
|
|
||||||
zip_dir=$(dirname ${zip_path})
|
|
||||||
zip_file=$(basename ${zip_path})
|
|
||||||
unzip -o -q ${zip_path} '*codeql-database.yml' -d /tmp/hepc.$$-zip
|
|
||||||
# The content may be LANGUAGE/codeql-database.yml
|
|
||||||
|
|
||||||
#* For every database, create a metadata record.
|
|
||||||
mkdir -p /tmp/hepc.$$-zip
|
|
||||||
cd /tmp/hepc.$$-zip/*
|
|
||||||
|
|
||||||
# Information from codeql-database.yml
|
|
||||||
primaryLanguage=$(yq '.primaryLanguage' codeql-database.yml)
|
|
||||||
sha=$(yq '.creationMetadata.sha' codeql-database.yml)
|
|
||||||
cliVersion=$(yq '.creationMetadata.cliVersion' codeql-database.yml)
|
|
||||||
creationTime=$(yq '.creationMetadata.creationTime' codeql-database.yml)
|
|
||||||
sourceLocationPrefix=$(yq '.sourceLocationPrefix' codeql-database.yml)
|
|
||||||
repo=${sourceLocationPrefix##*/} # keep only last component
|
|
||||||
# Get sourceLocationPrefix[-2]
|
|
||||||
owner="${sourceLocationPrefix%/*}" # strip last component
|
|
||||||
owner="${owner##*/}" # keep only last component
|
|
||||||
|
|
||||||
# cid for repository / db
|
|
||||||
cid=$(echo "${cliVersion} ${creationTime} ${primaryLanguage} ${sha}" | b2sum |\
|
|
||||||
awk '{print substr($1, 1, 6)}')
|
|
||||||
|
|
||||||
# Prepare the metadata record for this DB.
|
|
||||||
new_db_fname="${owner}-${repo}-ctsj-${cid}.zip"
|
|
||||||
result_url="http://hepc/${db_collection_dir}/${new_db_fname}"
|
|
||||||
record='
|
|
||||||
{
|
|
||||||
"git_branch": "HEAD",
|
|
||||||
"git_commit_id": "'${sha}'",
|
|
||||||
"git_repo": "'${repo}'",
|
|
||||||
"ingestion_datetime_utc": "'${creationTime}'",
|
|
||||||
"result_url": "'${result_url}'",
|
|
||||||
"tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4",
|
|
||||||
"tool_name": "codeql-'${primaryLanguage}'",
|
|
||||||
"tool_version": "'${cliVersion}'",
|
|
||||||
"projname": "'${owner}/${repo}'"
|
|
||||||
}
|
}
|
||||||
'
|
timestamp = date("+%Y-%m-%d %H:%M:%S").strip()
|
||||||
cd "$starting_dir"
|
print(f"{colors[level]}[{timestamp}] [{level}] {message}{colors['RESET']}", file=sys.stderr)
|
||||||
rm -fR /tmp/hepc.$$-zip
|
|
||||||
echo "$record" >> $db_collection_dir/metadata.json
|
|
||||||
|
|
||||||
#* Link original file path to collection directory for serving. Use name including
|
# Generate a CID
|
||||||
# the cid and field separator ctsj
|
def generate_cid(cli_version, creation_time, primary_language, sha):
|
||||||
cd ${db_collection_dir}
|
hash_input = f"{cli_version} {creation_time} {primary_language} {sha}".encode()
|
||||||
[ -L ${new_db_fname} ] || ln -s ${zip_path} ${new_db_fname}
|
return hashlib.sha256(hash_input).hexdigest()[:6]
|
||||||
|
|
||||||
# Interim cleanup
|
# Expand environment variables in paths
|
||||||
rm -fR "/tmp/hepc.$$-*"
|
def expand_path(path):
|
||||||
done
|
return local.env.expand(path)
|
||||||
|
|
||||||
|
# Process a single db.zip file
|
||||||
|
def process_db_file(zip_path, db_collection_dir):
|
||||||
|
temp_dir = mktemp("-d").strip()
|
||||||
|
try:
|
||||||
|
unzip("-o", "-q", zip_path, "*codeql-database.yml", "-d", temp_dir)
|
||||||
|
|
||||||
|
# Locate the YAML file regardless of its depth
|
||||||
|
yaml_files = list(local.path(temp_dir).walk(
|
||||||
|
filter=lambda p: p.name == "codeql-database.yml"))
|
||||||
|
if not yaml_files:
|
||||||
|
log("WARN", f"No codeql-database.yml found in {zip_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
yaml_path = yaml_files[0]
|
||||||
|
with yaml_path.open("r") as f:
|
||||||
|
yaml_data = yaml.safe_load(f)
|
||||||
|
|
||||||
|
primary_language = yaml_data["primaryLanguage"]
|
||||||
|
creation_metadata = yaml_data["creationMetadata"]
|
||||||
|
sha = creation_metadata["sha"]
|
||||||
|
cli_version = creation_metadata["cliVersion"]
|
||||||
|
creation_time = creation_metadata["creationTime"]
|
||||||
|
source_location_prefix = local.path(yaml_data["sourceLocationPrefix"])
|
||||||
|
|
||||||
|
repo = source_location_prefix.name
|
||||||
|
owner = source_location_prefix.parent.name
|
||||||
|
cid = generate_cid(cli_version, creation_time, primary_language, sha)
|
||||||
|
new_db_fname = f"{owner}-{repo}-ctsj-{cid}.zip"
|
||||||
|
result_url = f"http://hepc/{db_collection_dir}/{new_db_fname}"
|
||||||
|
|
||||||
|
metadata = {
|
||||||
|
"git_branch": "HEAD",
|
||||||
|
"git_commit_id": sha,
|
||||||
|
"git_repo": repo,
|
||||||
|
"ingestion_datetime_utc": str(creation_time),
|
||||||
|
"result_url": result_url,
|
||||||
|
"tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4",
|
||||||
|
"tool_name": f"codeql-{primary_language}",
|
||||||
|
"tool_version": cli_version,
|
||||||
|
"projname": f"{owner}/{repo}",
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata_file = local.path(db_collection_dir) / "metadata.json"
|
||||||
|
with metadata_file.open("a") as f:
|
||||||
|
json.dump(metadata, f)
|
||||||
|
f.write("\n")
|
||||||
|
|
||||||
|
link_path = local.path(db_collection_dir) / new_db_fname
|
||||||
|
if not link_path.exists():
|
||||||
|
ln("-sf", zip_path, link_path)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log("WARN", f"Error processing {zip_path}: {e}")
|
||||||
|
finally:
|
||||||
|
rm("-rf", temp_dir)
|
||||||
|
|
||||||
|
# Main application class
|
||||||
|
class DBProcessor(cli.Application):
|
||||||
|
db_collection_dir = cli.SwitchAttr(
|
||||||
|
"--db_collection_dir", str, mandatory=True, help="Specify the database collection directory"
|
||||||
|
)
|
||||||
|
starting_path = cli.SwitchAttr(
|
||||||
|
"--starting_path", str, mandatory=True, help="Specify the starting path"
|
||||||
|
)
|
||||||
|
|
||||||
|
def main(self):
|
||||||
|
db_collection_dir = expand_path(self.db_collection_dir)
|
||||||
|
starting_path = expand_path(self.starting_path)
|
||||||
|
|
||||||
|
mkdir("-p", db_collection_dir)
|
||||||
|
log("INFO", f"Searching for db.zip files in {starting_path}")
|
||||||
|
|
||||||
|
db_files = find(starting_path, "-type", "f", "-name", "db.zip",
|
||||||
|
"-size", "+0c").splitlines()
|
||||||
|
|
||||||
|
if not db_files:
|
||||||
|
log("WARN", "No db.zip files found in the specified starting path.")
|
||||||
|
return
|
||||||
|
|
||||||
|
for zip_path in db_files:
|
||||||
|
process_db_file(zip_path, db_collection_dir)
|
||||||
|
|
||||||
|
log("INFO", "Processing completed.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
DBProcessor.run()
|
||||||
|
|||||||
144
client/qldbtools/bin/hepc-init.sh
Executable file
144
client/qldbtools/bin/hepc-init.sh
Executable file
@@ -0,0 +1,144 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
#* Utility functions
|
||||||
|
log() {
|
||||||
|
local level="$1"
|
||||||
|
shift
|
||||||
|
local color_reset="\033[0m"
|
||||||
|
local color_info="\033[1;34m"
|
||||||
|
local color_warn="\033[1;33m"
|
||||||
|
local color_error="\033[1;31m"
|
||||||
|
|
||||||
|
local color
|
||||||
|
case "$level" in
|
||||||
|
INFO) color="$color_info" ;;
|
||||||
|
WARN) color="$color_warn" ;;
|
||||||
|
ERROR) color="$color_error" ;;
|
||||||
|
*) color="$color_reset" ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
echo -e "${color}[$(date +"%Y-%m-%d %H:%M:%S")] [$level] $*${color_reset}" >&2
|
||||||
|
}
|
||||||
|
usage() {
|
||||||
|
echo "Usage: $0 --db_collection_dir <directory> --starting_path <path> [-h]"
|
||||||
|
echo
|
||||||
|
echo "Options:"
|
||||||
|
echo " --db_collection_dir <directory> Specify the database collection directory."
|
||||||
|
echo " --starting_path <path> Specify the starting path."
|
||||||
|
echo " -h Show this help message."
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#* Initialize and parse arguments
|
||||||
|
set -euo pipefail # exit on error, unset var, pipefail
|
||||||
|
trap 'rm -fR /tmp/hepc.$$-*' EXIT
|
||||||
|
|
||||||
|
starting_dir=$(pwd)
|
||||||
|
db_collection_dir=""
|
||||||
|
starting_path=""
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--db_collection_dir)
|
||||||
|
shift
|
||||||
|
if [[ -z "$1" || "$1" == -* ]]; then
|
||||||
|
echo "Error: --db_collection_dir requires a directory as an argument."
|
||||||
|
usage
|
||||||
|
fi
|
||||||
|
db_collection_dir="$1"
|
||||||
|
;;
|
||||||
|
--starting_path)
|
||||||
|
shift
|
||||||
|
if [[ -z "$1" || "$1" == -* ]]; then
|
||||||
|
echo "Error: --starting_path requires a path as an argument."
|
||||||
|
usage
|
||||||
|
fi
|
||||||
|
starting_path="$1"
|
||||||
|
;;
|
||||||
|
-h)
|
||||||
|
usage
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Error: Unknown option '$1'."
|
||||||
|
usage
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
# Check if required arguments were provided
|
||||||
|
if [[ -z "$db_collection_dir" ]]; then
|
||||||
|
echo "Error: --db_collection_dir is required."
|
||||||
|
usage
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -z "$starting_path" ]]; then
|
||||||
|
echo "Error: --starting_path is required."
|
||||||
|
usage
|
||||||
|
fi
|
||||||
|
|
||||||
|
#* Find all DBs
|
||||||
|
log INFO "searching for db.zip files"
|
||||||
|
find ${starting_path} -type f -name "db.zip" -size +0c > /tmp/hepc.$$-paths
|
||||||
|
|
||||||
|
#* Collect detailed information from the database files
|
||||||
|
# Don't assume they are unique.
|
||||||
|
log INFO "collecting information from db.zip files"
|
||||||
|
mkdir -p $db_collection_dir
|
||||||
|
cat /tmp/hepc.$$-paths | while read -r zip_path
|
||||||
|
do
|
||||||
|
log INFO "Extracting from ${zip_path}"
|
||||||
|
zip_dir=$(dirname ${zip_path})
|
||||||
|
zip_file=$(basename ${zip_path})
|
||||||
|
unzip -o -q ${zip_path} '*codeql-database.yml' -d /tmp/hepc.$$-zip
|
||||||
|
# The content may be LANGUAGE/codeql-database.yml
|
||||||
|
|
||||||
|
#* For every database, create a metadata record.
|
||||||
|
mkdir -p /tmp/hepc.$$-zip
|
||||||
|
cd /tmp/hepc.$$-zip/*
|
||||||
|
|
||||||
|
# Information from codeql-database.yml
|
||||||
|
primaryLanguage=$(yq '.primaryLanguage' codeql-database.yml)
|
||||||
|
sha=$(yq '.creationMetadata.sha' codeql-database.yml)
|
||||||
|
cliVersion=$(yq '.creationMetadata.cliVersion' codeql-database.yml)
|
||||||
|
creationTime=$(yq '.creationMetadata.creationTime' codeql-database.yml)
|
||||||
|
sourceLocationPrefix=$(yq '.sourceLocationPrefix' codeql-database.yml)
|
||||||
|
repo=${sourceLocationPrefix##*/} # keep only last component
|
||||||
|
# Get sourceLocationPrefix[-2]
|
||||||
|
owner="${sourceLocationPrefix%/*}" # strip last component
|
||||||
|
owner="${owner##*/}" # keep only last component
|
||||||
|
|
||||||
|
# cid for repository / db
|
||||||
|
cid=$(echo "${cliVersion} ${creationTime} ${primaryLanguage} ${sha}" | b2sum |\
|
||||||
|
awk '{print substr($1, 1, 6)}')
|
||||||
|
|
||||||
|
# Prepare the metadata record for this DB.
|
||||||
|
new_db_fname="${owner}-${repo}-ctsj-${cid}.zip"
|
||||||
|
result_url="http://hepc/${db_collection_dir}/${new_db_fname}"
|
||||||
|
record='
|
||||||
|
{
|
||||||
|
"git_branch": "HEAD",
|
||||||
|
"git_commit_id": "'${sha}'",
|
||||||
|
"git_repo": "'${repo}'",
|
||||||
|
"ingestion_datetime_utc": "'${creationTime}'",
|
||||||
|
"result_url": "'${result_url}'",
|
||||||
|
"tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4",
|
||||||
|
"tool_name": "codeql-'${primaryLanguage}'",
|
||||||
|
"tool_version": "'${cliVersion}'",
|
||||||
|
"projname": "'${owner}/${repo}'"
|
||||||
|
}
|
||||||
|
'
|
||||||
|
cd "$starting_dir"
|
||||||
|
rm -fR /tmp/hepc.$$-zip
|
||||||
|
echo "$record" >> $db_collection_dir/metadata.json
|
||||||
|
|
||||||
|
#* Link original file path to collection directory for serving. Use name including
|
||||||
|
# the cid and field separator ctsj
|
||||||
|
cd ${db_collection_dir}
|
||||||
|
[ -L ${new_db_fname} ] || ln -s ${zip_path} ${new_db_fname}
|
||||||
|
|
||||||
|
# Interim cleanup
|
||||||
|
rm -fR "/tmp/hepc.$$-*"
|
||||||
|
done
|
||||||
11
client/qldbtools/qldbtools.code-workspace
Normal file
11
client/qldbtools/qldbtools.code-workspace
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"folders": [
|
||||||
|
{
|
||||||
|
"path": "."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"settings": {
|
||||||
|
"git.ignoreLimitWarning": true,
|
||||||
|
"makefile.configureOnOpen": false
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user