Use full repository path name in place of mrvacommander

add pdf version of structure diagram
* Running the VS Code plugin
2024-12-13 10:54:35 -08:00 · 2024-12-12 11:42:12 -08:00 · 2024-12-10 16:10:24 -08:00 · 2024-12-09 14:54:30 -08:00 · 2024-12-06 13:30:03 -08:00 · 2024-12-06 12:53:24 -08:00
135 changed files with 8330 additions and 1213 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,9 @@
 # Excludes
 /dbstore-data
 /qpstore-data
 /test-data
 /venv
 /client
 /cmd/server/var
 /.git
--- a/.env.container
+++ b/.env.container
@@ -0,0 +1,12 @@
 MRVA_RABBITMQ_HOST=rabbitmq
 MRVA_RABBITMQ_PORT=5672
 MRVA_RABBITMQ_USER=user
 MRVA_RABBITMQ_PASSWORD=password
 MINIO_ROOT_USER=user
 MINIO_ROOT_PASSWORD=mmusty8432
 ARTIFACT_MINIO_ENDPOINT=artifactstore:9000
 ARTIFACT_MINIO_ID=${MINIO_ROOT_USER}
 ARTIFACT_MINIO_SECRET=${MINIO_ROOT_PASSWORD}
 QLDB_MINIO_ENDPOINT=dbstore:9000
 QLDB_MINIO_ID=${MINIO_ROOT_USER}
 QLDB_MINIO_SECRET=${MINIO_ROOT_PASSWORD}
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,9 @@ cmd/server/var/
 # vscode project dir
 .vscode/
 # idea project dir
 .idea/
 # Compiled binary
 cmd/server/server
 cmd/agent/agent
@@ -41,3 +44,16 @@ go.work.sum
 # env file
 .env
 /artifactstore-data/.minio.sys
 /qldbminio/qldb
 .ipynb_checkpoints/
 venv/
 venv-*/
 *.egg-info
 __pycache__
 README.html
 ChangeLog
 notes/*.html
 # Make timestamp files
 mk.*
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -0,0 +1,29 @@
 linters:
  enable:
    - staticcheck
    - unused
    - decorder
    - errchkjson
    - exhaustruct
    - gochecknoinits
    - gochecksumtype
    - goconst
    - gocritic
    - godox
    - lll
    - loggercheck
    - revive
    - sloglint
    - tagalign
    - unparam
 linters-settings:
  revive:
    config: .revive.toml
  staticcheck:
    checks:
      - "SA"
 issues:
  format: "format: {{.FromLinter}}: {{.Text}}"
--- a/.revive.toml
+++ b/.revive.toml
@@ -0,0 +1,13 @@
 ignoreGeneratedHeader = true
 [rule.blank-imports]
  Arguments = [true]
 [[rule]]
  name = "max-public-identifier-length"
  arguments = [15]  # Maximum length for public identifiers
 [[rule]]
  name = "max-private-identifier-length"
  arguments = [15]  # Maximum length for private identifiers
--- a/55
+++ b/55
@@ -0,0 +1,55 @@
 all: server agent
 .phony: view
 view: README.html
 	open $<
 html: README.html
 %.html: %.md
 	pandoc --toc=true --standalone $< --out $@
 # Build the qldbtools container image
 dbt: mk.client-qldbtools-container
 mk.client-qldbtools-container:
 	cd client/containers/qldbtools && \
 		docker build -t client-qldbtools-container:0.1.24 .
 	touch $@
 # Run a shell in the container with the qldbtools
 dbt-run: mk.client-qldbtools-container
 	docker run --rm -it client-qldbtools-container:0.1.24 /bin/bash
 # Run one of the scripts in the container as check
 dbt-check: mk.client-qldbtools-container
 	docker run --rm -it client-qldbtools-container:0.1.24 mc-db-initial-info
 dbt-push: mk.dbt-push
 mk.dbt-push: mk.client-qldbtools-container
 	docker tag client-qldbtools-container:0.1.24 ghcr.io/hohn/client-qldbtools-container:0.1.24 
 	docker push ghcr.io/hohn/client-qldbtools-container:0.1.24
 	touch $@
 server:
 	cd cmd/server && GOOS=linux GOARCH=arm64 go build
 agent:
 	cd cmd/agent && GOOS=linux GOARCH=arm64 go build
 fullbuild:
 	cd cmd/server && GOOS=linux GOARCH=arm64 go build -a
 sendsubmit:
 	cd tools && sh ./submit-request.curl
 # Requires
 #		go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
 lint:
 	golangci-lint run cmd/... pkg/...
 deps:
 	godepgraph -maxlevel 4 -nostdlib  -i github.com/minio/minio-go ./cmd/server | dot -Tpdf > deps-server.pdf && open deps-server.pdf
 depa:
 	godepgraph -maxlevel 4 -nostdlib  -i github.com/minio/minio-go ./cmd/agent | dot -Tpdf > deps-agent.pdf && open deps-agent.pdf
--- a/README.md
+++ b/README.md
@@ -6,6 +6,52 @@ TODO Style notes
 - NO package init() functions
 - Dynamic behaviour must be explicit
 ## Client CodeQL Database Selector
 Separate from the server's downloading of databases, a client-side interface is needed to generate the `databases.json` file. This
 1.  must be usable from the shell
 2.  must be interactive (Python, Jupyter)
 3.  is session based to allow iterations on selection / narrowing
 4.  must be queryable. There is no need to reinvent sql / dataframes
 Python with dataframes is ideal for this; the project is in `client/`.
 ## Reverse proxy
 For testing, replay flows using mitmweb.  This is faster and simpler than using
 gh-mrva or the VS Code plugin.
 -   Set up the virtual environment and install tools
        python3.11 -m venv venv
        source venv/bin/activate
        pip install mitmproxy
 For intercepting requests:
 1.  Start mitmproxy to listen on port 8080 and forward requests to port 8081, with
    web interface
        mitmweb --mode reverse:http://localhost:8081 -p 8080
 1.  Change `server` ports in `docker-compose.yml` to 
        ports:
        - "8081:8080" # host:container
 1.  Start the containers.
 1.  Submit requests.
 3.  Save the flows for later replay.
 One such session is in `tools/mitmweb-flows`; it can be loaded to replay the
 requests:
 1.  start `mitmweb --mode reverse:http://localhost:8081 -p 8080`
 2.  `file` > `open` > `tools/mitmweb-flows`
 3.  replay at least the submit, status, and download requests
 ## Cross-compile server on host, run it in container 
 These are simple steps using a single container.
@@ -31,7 +77,10 @@ These are simple steps using a single container.
        cd /mrva/mrvacommander/cmd/server/ && ./server
 ## Using docker-compose
-### Steps to build and run the server in a multi-container environment set up by docker-compose.
+### Steps to build and run the server
 Steps to build and run the server in a multi-container environment set up by
 docker-compose. 
 1.  Built the server-image, above
@@ -53,6 +102,28 @@ These are simple steps using a single container.
        cd /mrva/mrvacommander/cmd/server/ 
        ./server -loglevel=debug -mode=container
 1.  Test server from the host via
        cd ~/work-gh/mrva/mrvacommander/tools
        sh ./request_16-Jun-2024_11-33-16.curl
 1.  Follow server logging via
        cd ~/work-gh/mrva/mrvacommander
        docker-compose up -d
        docker-compose logs -f server
 1.  Completely rebuild all containers.  Useful when running into docker errors
        cd ~/work-gh/mrva/mrvacommander
        docker-compose up --build
 1.  Start the server containers and the desktop/demo containers
        cd ~/work-gh/mrva/mrvacommander/
        docker-compose down --remove-orphans
        docker-compose -f docker-compose-demo.yml up -d
 1.  Test server via remote client by following the steps in [gh-mrva](https://github.com/hohn/gh-mrva/blob/connection-redirect/README.org#compacted-edit-run-debug-cycle)
 ### Some general docker-compose commands
--- a/client/Plan.ipynb
+++ b/client/Plan.ipynb
--- a/client/containers/ghmrva/Dockerfile
+++ b/client/containers/ghmrva/Dockerfile
@@ -0,0 +1,64 @@
 # ######################
 # Use an official Golang image as the base image
 FROM golang:1.22 AS builder
 # Set the working directory inside the container
 WORKDIR /work-gh/mrva/gh-mrva
 # Clone the repository
 RUN git clone https://github.com/hohn/gh-mrva.git . &&\
    git checkout hohn-0.1.24-demo
 # Download dependencies
 RUN go mod download
 # Build the Go binary
 RUN go build .
 # ######################
 # Provide codeql and java
 # 
 FROM ubuntu:24.10 as runner
 ENV DEBIAN_FRONTEND=noninteractive
 # Build argument for CodeQL version, defaulting to the latest release
 ARG CODEQL_VERSION=latest
 # Install packages
 RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
    unzip \
    curl \
    ca-certificates \
    default-jdk
 # If the version is 'latest', get the latest release version from GitHub, unzip
 # the bundle into /opt, and delete the archive
 RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
    CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
    fi && \
    echo "Using CodeQL version $CODEQL_VERSION" && \
    curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
    unzip /tmp/codeql.zip -d /opt && \
    rm /tmp/codeql.zip && \
    chmod -R +x /opt/codeql
 # Set environment variables for CodeQL
 ENV CODEQL_CLI_PATH=/opt/codeql/codeql
 # Set environment variable for CodeQL for `codeql database analyze` support on ARM
 # This env var has no functional effect on CodeQL when running on x86_64 linux
 ENV CODEQL_JAVA_HOME=/usr
 # ######################
 # Set the working directory inside the final image
 WORKDIR /app
 # Copy the binary from the builder stage
 COPY --from=builder /work-gh/mrva/gh-mrva/gh-mrva /usr/local/bin/gh-mrva
 # Put CodeQL on the PATH
 ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/codeql
 # Run forever
 CMD ["tail", "-f", "/dev/null"]
--- a/client/containers/ghmrva/Makefile
+++ b/client/containers/ghmrva/Makefile
@@ -0,0 +1,13 @@
 ghm: mk.client-ghmrva-container
 mk.client-ghmrva-container:
 	docker build -t client-ghmrva-container:0.1.24 .
 	touch $@
 ghm-push: mk.ghm-push
 mk.ghm-push: mk.client-ghmrva-container
 	docker tag client-ghmrva-container:0.1.24 ghcr.io/hohn/client-ghmrva-container:0.1.24 
 	docker push ghcr.io/hohn/client-ghmrva-container:0.1.24 
 	touch $@
 ghm-run:
 	docker run --rm -it ghcr.io/hohn/client-ghmrva-container:0.1.24 /bin/bash
--- a/client/containers/ghmrva/README.org
+++ b/client/containers/ghmrva/README.org
@@ -0,0 +1,16 @@
 * MRVA cli tools container
  Set up / run:
  #+BEGIN_SRC sh 
    # Build
    cd ~/work-gh/mrva/mrvacommander/client/containers/ghmrva/
    make ghm
    # Run
    docker run -ti  client-ghmrva-container:0.1.24 /bin/bash
    # In the container
    gh-mrva -h
    codeql -h
    # Push
    make ghm-push
  #+END_SRC
--- a/client/containers/hepc/Dockerfile
+++ b/client/containers/hepc/Dockerfile
@@ -0,0 +1,30 @@
 # Use a Python 3.11 image as the base
 FROM python:3.11-slim
 # Install git
 RUN apt-get update && apt-get install -y git
 # Create the required directory structure
 RUN mkdir -p /work-gh/mrva/
 # Change to the directory and clone the repository
 WORKDIR /work-gh/mrva/
 RUN git clone https://github.com/hohn/mrvacommander.git && \
    cd mrvacommander && \
    git checkout hohn-0.1.24-demo
 # Change to the client directory
 WORKDIR /work-gh/mrva/mrvacommander/client/qldbtools/
 # We're in a container, so use pip globally -- no virtual env
 RUN pip install --upgrade pip
 # Install the required Python packages from requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
 # Install qldbtools
 RUN pip install .
 # Run forever
 CMD ["tail", "-f", "/dev/null"]
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/BentoML-BentoML-ctsj-d6963d.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/BentoML-BentoML-ctsj-d6963d.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/Serial-Studio-Serial-Studio-ctsj-2b2721.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/Serial-Studio-Serial-Studio-ctsj-2b2721.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/UEFITool-UEFITool-ctsj-ee2d3c.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/UEFITool-UEFITool-ctsj-ee2d3c.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/apprise-apprise-ctsj-3f4a4e.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/apprise-apprise-ctsj-3f4a4e.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/attrs-attrs-ctsj-e2c939.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/attrs-attrs-ctsj-e2c939.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sam-cli-aws-sam-cli-ctsj-b7f561.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sam-cli-aws-sam-cli-ctsj-b7f561.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sdk-pandas-aws-sdk-pandas-ctsj-2b7750.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sdk-pandas-aws-sdk-pandas-ctsj-2b7750.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/behave-behave-ctsj-b297b5.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/behave-behave-ctsj-b297b5.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-01864e.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-01864e.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0189aa.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0189aa.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-035849.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-035849.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-051a5c.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-051a5c.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-099796.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-099796.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a35a1.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a35a1.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a6352.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a6352.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0c6575.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0c6575.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0cdf2f.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0cdf2f.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d667f.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d667f.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d6cf6.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d6cf6.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d7b69.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d7b69.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-103a8a.zip
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-103a8a.zip
--- a/client/containers/hepc/codeql-dbs/db-collection-mixed/metadata.json
+++ b/client/containers/hepc/codeql-dbs/db-collection-mixed/metadata.json
@@ -0,0 +1,23 @@
 {"git_branch": "HEAD", "git_commit_id": "2b41915dac8966e95f9e63638d30769b0d69ad68", "git_repo": "aircrack-ng", "ingestion_datetime_utc": "2024-06-07 16:57:47.683012+00:00", "result_url": "http://hepc/db-collection-py/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.4", "projname": "aircrack-ng/aircrack-ng"}
 {"git_branch": "HEAD", "git_commit_id": "8b399e9f51701b34f2f3c9375e637e6fffc642b7", "git_repo": "Serial-Studio", "ingestion_datetime_utc": "2023-10-01T15:18:43.503672671Z", "result_url": "http://hepc/db-collection-py/Serial-Studio-Serial-Studio-ctsj-2b2721.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.12.0", "projname": "Serial-Studio/Serial-Studio"}
 {"git_branch": "HEAD", "git_commit_id": "9a9308fd5477d2a44f4e491d5a712546d4a2b3e4", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-22 13:30:21.681180+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0189aa.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
 {"git_branch": "HEAD", "git_commit_id": "34412555665923bc07d43ce970e9d81be3795de7", "git_repo": "UEFITool", "ingestion_datetime_utc": "2024-07-04 19:00:38.543297+00:00", "result_url": "http://hepc/db-collection-py/UEFITool-UEFITool-ctsj-ee2d3c.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.6", "projname": "UEFITool/UEFITool"}
 {"git_branch": "HEAD", "git_commit_id": "00aa56f5257060304d41f09651c6ab58ee6104d6", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-18 14:12:52.904410+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0c6575.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
 {"git_branch": "HEAD", "git_commit_id": "e4bffa0a7450e1abd9f4df9565728ae18d86cfd2", "git_repo": "attrs", "ingestion_datetime_utc": "2024-07-18 22:34:57.795427+00:00", "result_url": "http://hepc/db-collection-py/attrs-attrs-ctsj-e2c939.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "attrs/attrs"}
 {"git_branch": "HEAD", "git_commit_id": "9620901afce56f720e856aca600951c9b61a9460", "git_repo": "apprise", "ingestion_datetime_utc": "2024-07-22 22:26:48.720348+00:00", "result_url": "http://hepc/db-collection-py/apprise-apprise-ctsj-3f4a4e.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "apprise/apprise"}
 {"git_branch": "HEAD", "git_commit_id": "c38e6c8cfba28980aea8f895c71b376e8a5155d5", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2022-04-16T12:45:56.739003883Z", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d6cf6.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.8.5", "projname": "bulk-builder/bulk-builder"}
 {"git_branch": "HEAD", "git_commit_id": "18f6be580b12dc406ef356b2cd65f47c24fce63e", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-19 05:46:23.392157+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d667f.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
 {"git_branch": "HEAD", "git_commit_id": "a587921bac074b1bd1b0a0a5536587660a9b954e", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-19 16:13:39.094478+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0a6352.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-java", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
 {"git_branch": "HEAD", "git_commit_id": "9b361c7ff497d57651856650667aece8230fab6d", "git_repo": "BentoML", "ingestion_datetime_utc": "2024-07-24 02:17:07.095690+00:00", "result_url": "http://hepc/db-collection-py/BentoML-BentoML-ctsj-d6963d.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "BentoML/BentoML"}
 {"git_branch": "HEAD", "git_commit_id": "8b399e9f51701b34f2f3c9375e637e6fffc642b7", "git_repo": "Serial-Studio", "ingestion_datetime_utc": "2023-10-01T15:18:43.503672671Z", "result_url": "http://hepc/db-collection-py/Serial-Studio-Serial-Studio-ctsj-2b2721.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.12.0", "projname": "Serial-Studio/Serial-Studio"}
 {"git_branch": "HEAD", "git_commit_id": "53ad2da1a8e6e79e0986ddfa3a45e1db6fdd491c", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-14 02:24:19.208812+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-01864e.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
 {"git_branch": "HEAD", "git_commit_id": "db8f1a7930c6b5826357646746337dafc983f953", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2023-11-22 01:18:25.079473+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-099796.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.15.2", "projname": "bulk-builder/bulk-builder"}
 {"git_branch": "HEAD", "git_commit_id": "f8df9dd749a549dec20aa286a7639ba04190faab", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-12 16:39:28.854142+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d7b69.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
 {"git_branch": "HEAD", "git_commit_id": "b5274976cb0a792d05d541a749c0adcd9d20062d", "git_repo": "behave", "ingestion_datetime_utc": "2024-05-11 19:20:51.916333+00:00", "result_url": "http://hepc/db-collection-py/behave-behave-ctsj-b297b5.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "behave/behave"}
 {"git_branch": "HEAD", "git_commit_id": "4c825c198df470506b0f84da0b25b3b385150dcb", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-04-25 03:26:03.986270+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-035849.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
 {"git_branch": "HEAD", "git_commit_id": "a8b8ff0acc6fcc629d08a3a9952f83be56a9a3c3", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-03 13:30:48.829134+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-051a5c.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-java", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
 {"git_branch": "HEAD", "git_commit_id": "9ef05731e7c6cbad2e897faa7c526558eed3ceaa", "git_repo": "aws-sam-cli", "ingestion_datetime_utc": "2024-05-14 01:03:18.130142+00:00", "result_url": "http://hepc/db-collection-py/aws-sam-cli-aws-sam-cli-ctsj-b7f561.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "aws-sam-cli/aws-sam-cli"}
 {"git_branch": "HEAD", "git_commit_id": "16865390a653ceaeabe354df1b37e4a775161a70", "git_repo": "aws-sdk-pandas", "ingestion_datetime_utc": "2024-05-13 15:13:31.853042+00:00", "result_url": "http://hepc/db-collection-py/aws-sdk-pandas-aws-sdk-pandas-ctsj-2b7750.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "aws-sdk-pandas/aws-sdk-pandas"}
 {"git_branch": "HEAD", "git_commit_id": "093856995af0811d3ebbe8c179b8febf4ae706f0", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-03-20 14:18:02.500590+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-103a8a.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.16.4", "projname": "bulk-builder/bulk-builder"}
 {"git_branch": "HEAD", "git_commit_id": "0573e6f96637f08fb4cb85e0552f0622d36827d4", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-01-24 09:21:05.977294+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0cdf2f.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.15.5", "projname": "bulk-builder/bulk-builder"}
 {"git_branch": "HEAD", "git_commit_id": "93314995a5ee2217d58c3d9cbcbdef5df6c34566", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-09 05:29:25.243273+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0a35a1.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
--- a/client/containers/qldbtools/Dockerfile
+++ b/client/containers/qldbtools/Dockerfile
@@ -0,0 +1,30 @@
 # Use a Python 3.11 image as the base
 FROM python:3.11-slim
 # Install git
 RUN apt-get update && apt-get install -y git
 # Create the required directory structure
 RUN mkdir -p /work-gh/mrva/
 # Change to the directory and clone the repository
 WORKDIR /work-gh/mrva/
 RUN git clone https://github.com/hohn/mrvacommander.git && \
    cd mrvacommander && \
    git checkout hohn-0.1.24-demo
 # Change to the client directory
 WORKDIR /work-gh/mrva/mrvacommander/client/qldbtools/
 # We're in a container, so use pip globally -- no virtual env
 RUN pip install --upgrade pip
 # Install the required Python packages from requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
 # Install qldbtools
 RUN pip install .
 # Run forever
 CMD ["tail", "-f", "/dev/null"]
--- a/client/containers/qldbtools/Makefile
+++ b/client/containers/qldbtools/Makefile
@@ -0,0 +1,25 @@
 DBT_TARGET := client-qldbtools-container:0.1.24
 # Build the qldbtools container image
 dbt: mk.client-qldbtools-container
 mk.client-qldbtools-container:
 	docker build -t ${DBT_TARGET} .
 	touch $@
 # Run a shell in the container with the qldbtools
 dbt-run: dbt
 	docker run --rm -it ${DBT_TARGET} /bin/bash
 # Run one of the scripts in the container as check.  Should exit with error. 
 dbt-check: dbt
 	docker run --rm -it ${DBT_TARGET} mc-db-initial-info
 dbt-push: mk.dbt-push
 mk.dbt-push: dbt
 	docker tag ${DBT_TARGET} ghcr.io/hohn/${DBT_TARGET} 
 	docker push ghcr.io/hohn/${DBT_TARGET}
 	touch $@
 dbt-test:
 	docker pull ghcr.io/hohn/${DBT_TARGET}
 	docker run --rm -it --name test-dbt-server ghcr.io/hohn/${DBT_TARGET} sh
--- a/client/containers/qldbtools/README.org
+++ b/client/containers/qldbtools/README.org
@@ -0,0 +1,13 @@
 * MRVA python tools container
  Set up Docker image with python 3.11 and pip and the qldbtools.  The targets are
  in the =Makefile=; most important are
  #+BEGIN_SRC sh 
    # Build
    make dbt
    # Check
    make dbt-check
  #+END_SRC
--- a/client/containers/vscode/Dockerfile
+++ b/client/containers/vscode/Dockerfile
@@ -0,0 +1,67 @@
 FROM codercom/code-server:4.92.2-debian
 # ======================
 # Pre-install a custom JDK for this platform and redirect CodeQL to it
 USER root
 ENV DEBIAN_FRONTEND=noninteractive
 # Install packages
 RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
    ca-certificates                                 \
    curl                                            \
    default-jdk                                     \
    git                                             \
    libcurl4-openssl-dev                            \
    libssl-dev                                      \
    python3                                         \
    python3-dev                                     \
    unzip
 # Build argument for CodeQL version, defaulting to the latest release
 ARG CODEQL_VERSION=latest
 # If the version is 'latest', get the latest release version from GitHub, unzip
 # the bundle into /opt, and delete the archive
 RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
    CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
    fi && \
    echo "Using CodeQL version $CODEQL_VERSION" && \
    curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
    unzip /tmp/codeql.zip -d /opt && \
    rm /tmp/codeql.zip && \
    chmod -R +x /opt/codeql
 # ======================
 # Install code-server
 USER coder
 # Set environment variables
 ENV PASSWORD mrva
 # Install VS Code extensions as user root -- globally
 RUN code-server --install-extension ms-python.python \
    && code-server --install-extension esbenp.prettier-vscode \
    && code-server --install-extension GitHub.vscode-codeql
 # Expose the port that Code Server runs on
 EXPOSE 9080
 # Point CodeQL to the java binary for this platform
 ENV CODEQL_JAVA_HOME=/usr
 # Add
 #   codeQl.cli.executablePath
 # to user settings.
 # This is in addition to the environment variable CODEQL_JAVA_HOME which has no
 # effect on the plugin
 USER root
 COPY ./settings.json  /home/coder/.local/share/code-server/User/
 RUN chown -R coder:coder /home/coder/.local/share/code-server/
 # Start Code Server
 ENTRYPOINT ["dumb-init", "code-server", "--bind-addr", "0.0.0.0:9080", "."]
 # Run as the coder user
 USER coder
--- a/client/containers/vscode/README.org
+++ b/client/containers/vscode/README.org
@@ -0,0 +1,119 @@
 * MRVA VS Code server container
  On the host:
    #+BEGIN_SRC sh 
      # Build the container via 
      cd ~/work-gh/mrva/mrvacommander/client/containers/vscode/
      docker build -t code-server-initialized:0.1.24 .
      # Run the container in standalone mode via 
      cd ~/work-gh/mrva/mrvacommander/client/containers/vscode/
      docker run -v ~/work-gh/mrva/vscode-codeql:/work-gh/mrva/vscode-codeql \
             -d -p 9080:9080 code-server-initialized:0.1.24 
    #+END_SRC
  - Connect to it at http://localhost:9080/?folder=/home/coder, password is =mrva=.
  Inside the container:
  - Setup inside the container
    #+BEGIN_SRC shell
      cd 
      export PATH=/opt/codeql:$PATH
      codeql pack init qldemo
      cd qldemo
      codeql pack add codeql/python-all@1.0.6
    #+END_SRC
  - Create a new file =qldemo/simple.ql= with this query.  Open it in VS Code.
    The plugin will download the CodeQL binaries (but never use them -- the
    configuration redirects)
    #+BEGIN_SRC sh 
      cd
      cat > qldemo/simple.ql <<eof
      import python
      select 42
      eof
    #+END_SRC
  - Create database.
    #+BEGIN_SRC sh 
      cd ~/qldemo
      cat > short.py <<EOF
      print('hello world')
      EOF
      export PATH=/opt/codeql:$PATH  
      codeql database create --language=python -s . -v short-db
    #+END_SRC
  - Set the database as default and run the query =simple.ql=
  - Add the customized VS Code plugin
    On the host
    #+BEGIN_SRC sh 
      cd ~/work-gh/mrva/vscode-codeql
      git checkout mrva-standalone
      # Install nvm
      curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
      # Install correct node version 
      cd ./extensions/ql-vscode
      nvm install
      # Build the extension
      cd ~/work-gh/mrva/vscode-codeql/extensions/ql-vscode
      npm install
      npm run build
    #+END_SRC
    In the container
    #+BEGIN_SRC sh 
      # Install extension
      cd /work-gh/mrva/vscode-codeql/dist
      /bin/code-server --force --install-extension vscode-codeql-*.vsix
    #+END_SRC
  - Capture the state of this container and create a new image from it
    #+BEGIN_SRC sh 
      docker ps
      # Check id column.  Use it below.
      docker commit 2df5732c1850 code-server-initialized:0.1.24
      # Keep the sha
      # sha256:87c8260146e28aed25b094d023a30a015a958f829c09e66cb50ccca2c4a2a000
      docker kill 2df5732c1850
      # Make sure the image tag matches the sha
      docker inspect code-server-initialized:0.1.24 |grep Id
      # Run the image and check
      docker run --rm -d -p 9080:9080 --name test-code-server-codeql \
             code-server-initialized:0.1.24
    #+END_SRC
    Again connect to it at http://localhost:9080/?folder=/home/coder, password is =mrva=.
  - Push this container
    #+BEGIN_SRC sh 
      # Common
      export CSI_TARGET=code-server-initialized:0.1.24
      # Push container
      docker tag ${CSI_TARGET} ghcr.io/hohn/${CSI_TARGET}
      docker push ghcr.io/hohn/${CSI_TARGET}
    #+END_SRC
  - Test the registry image
    #+BEGIN_SRC sh 
      # Test pushed container
      docker pull ghcr.io/hohn/${CSI_TARGET}
      docker run --rm -d -p 9080:9080 --name test-code-server-codeql\
             ghcr.io/hohn/${CSI_TARGET}
    #+END_SRC
    In the container, inside the running vs code:
    - Check the plugin version number via the command
       : codeql: copy version information
--- a/client/containers/vscode/settings.json
+++ b/client/containers/vscode/settings.json
@@ -0,0 +1,4 @@
 {
    "codeQL.runningQueries.numberOfThreads": 2,
    "codeQL.cli.executablePath": "/opt/codeql/codeql"
 }
--- a/client/qldbtools/.vscode/launch.json
+++ b/client/qldbtools/.vscode/launch.json
@@ -0,0 +1,24 @@
 {
    // Use IntelliSense to learn about possible attributes.
    // Hover to view descriptions of existing attributes.
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
           {
            "name": "Python Debugger: Current File with Arguments",
            "type": "debugpy",
            "request": "launch",
            "program": "${file}",
            "console": "integratedTerminal",
            "args": [
                "--db_collection_dir",
                "db-collection-py",
                "--starting_path",
                "$HOME/work-gh/mrva/mrva-open-source-download"
            ],
            "justMyCode": true,
            "stopOnEntry": false
        }
    ]
 }
--- a/client/qldbtools/Makefile
+++ b/client/qldbtools/Makefile
@@ -0,0 +1,2 @@
 doc:
 	pandoc -s --css=./gfm.css README.md > foo.html && open foo.html
--- a/client/qldbtools/README.org
+++ b/client/qldbtools/README.org
@@ -0,0 +1,171 @@
 * Introduction to hepc -- HTTP End Point for CodeQL
  #+BEGIN_SRC sh 
    1:$ ./bin/hepc-init --db_collection_dir db-collection --starting_path ~/work-gh/mrva/mrva-open-source-download
    [2024-11-19 14:12:06] [INFO] searching for db.zip files
    [2024-11-19 14:12:08] [INFO] collecting information from db.zip files
    [2024-11-19 14:12:08] [INFO] Extracting from /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/aircrack-ng/aircrack-ng/code-scanning/codeql/databases/cpp/db.zip
    [2024-11-19 14:12:08] [INFO] Adding record to db-collection/metadata.json
  #+END_SRC
 * Introduction to qldbtools
 =qldbtools= is a Python package for selecting sets of CodeQL databases
 to work on. It uses a (pandas) dataframe in the implementation, but all
 results sets are available as CSV files to provide flexibility in the
 tools you want to work with.
 The rationale is simple: When working with larger collections of CodeQL
 databases, spread over time, languages, etc., many criteria can be used
 to select the subset of interest. This package addresses that aspect of
 MRVA (multi repository variant analysis).
 For example, consider this scenario from an enterprise. We have 10,000
 repositories in C/C++, 5,000 in Python. We build CodeQL dabases weekly
 and keep the last 2 years worth. This means for the last 2 years there
 are
 #+begin_example
 (10000 + 5000) * 52 * 2 = 1560000
 #+end_example
 databases to select from for a single MRVA run. 1.5 Million rows are
 readily handled by a pandas (or R) dataframe.
 The full list of criteria currently encoded via the columns is
 - owner
 - name
 - CID
 - cliVersion
 - creationTime
 - language
 - sha -- git commit sha of the code the CodeQL database is built against
 - baselineLinesOfCode
 - path
 - db_lang
 - db_lang_displayName
 - db_lang_file_count
 - db_lang_linesOfCode
 - ctime
 - primaryLanguage
 - finalised
 - left_index
 - size
 The minimal criteria needed to distinguish databases in the above
 scenario are
 - cliVersion
 - creationTime
 - language
 - sha
 These are encoded in the single custom id column 'CID'.
 Thus, a database can be fully specified using a (owner,name,CID) tuple
 and this is encoded in the names used by the MRVA server and clients.
 The selection of databases can of course be done using the whole table.
 For an example of the workflow, see [[#command-line-use][section
 'command line use']].
 A small sample of a full table:
 |   | owner    | name           | CID    | cliVersion | creationTime                     | language | sha                                      | baselineLinesOfCode | path                                                                                                                          | db_lang     | db_lang_displayName | db_lang_file_count | db_lang_linesOfCode | ctime                      | primaryLanguage | finalised | left_index | size     |
 |---+----------+----------------+--------+------------+----------------------------------+----------+------------------------------------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+-------------+---------------------+--------------------+---------------------+----------------------------+-----------------+-----------+------------+----------|
 | 0 | 1adrianb | face-alignment | 1f8d99 | 2.16.1     | 2024-02-08 14:18:20.983830+00:00 | python   | c94dd024b1f5410ef160ff82a8423141e2bbb6b4 | 1839                | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/1adrianb/face-alignment/code-scanning/codeql/databases/python/db.zip | python      | Python              | 25                 | 1839                | 2024-07-24T14:09:02.187201 | python          | 1         | 1454       | 24075001 |
 | 1 | 2shou    | TextGrocery    | 9ab87a | 2.12.1     | 2023-02-17T11:32:30.863093193Z   | cpp      | 8a4e41349a9b0175d9a73bc32a6b2eb6bfb51430 | 3939                | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/2shou/TextGrocery/code-scanning/codeql/databases/cpp/db.zip          | no-language | no-language         | 0                  | -1                  | 2024-07-24T06:25:55.347568 | cpp             | nan       | 1403       | 3612535  |
 | 2 | 3b1b     | manim          | 76fdc7 | 2.17.5     | 2024-06-27 17:37:20.587627+00:00 | python   | 88c7e9d2c96be1ea729b089c06cabb1bd3b2c187 | 19905               | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/3b1b/manim/code-scanning/codeql/databases/python/db.zip              | python      | Python              | 94                 | 19905               | 2024-07-24T13:23:04.716286 | python          | 1         | 1647       | 26407541 |
 ** Installation
 - Set up the virtual environment and install tools
  #+begin_example
          cd ~/work-gh/mrva/mrvacommander/client/qldbtools/
          python3.11 -m venv venv
          source venv/bin/activate
          pip install --upgrade pip
          # From requirements.txt
          pip install -r requirements.txt
          # Or explicitly
          pip install jupyterlab pandas ipython
          pip install lckr-jupyterlab-variableinspector
  #+end_example
 - Local development
  #+begin_example
  ```bash
  cd ~/work-gh/mrva/mrvacommander/client/qldbtools
  source venv/bin/activate
  pip install --editable .
  ```
  The `--editable` *should* use symlinks for all scripts; use `./bin/*` to be sure.
  #+end_example
 - Full installation
  #+begin_example
  ```bash
  pip install qldbtools
  ```
  #+end_example
 ** Use as library
 The best way to examine the code is starting from the high-level scripts
 in =bin/=.
 ** Command line use
 Initial information collection requires a unique file path so it can be
 run repeatedly over DB collections with the same (owner,name) but other
 differences -- namely, in one or more of
 - creationTime
 - sha
 - cliVersion
 - language
 Those fields are collected in =bin/mc-db-refine-info=.
 An example workflow with commands grouped by data files follows.
 #+begin_example
    cd ~/work-gh/mrva/mrvacommander/client/qldbtools && mkdir -p scratch
    ./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > scratch/db-info-1.csv
    ./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
    ./bin/mc-db-view-info < scratch/db-info-2.csv &
    ./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
    ./bin/mc-db-view-info < scratch/db-info-3.csv &
    ./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
    ./bin/mc-db-generate-selection -n 11 \
        scratch/vscode-selection.json \
        scratch/gh-mrva-selection.json \
        < scratch/db-info-3.csv 
 #+end_example
 To see the full information for a selection, use
 =mc-rows-from-mrva-list=:
 #+begin_example
    ./bin/mc-rows-from-mrva-list scratch/gh-mrva-selection.json \
        scratch/db-info-3.csv > scratch/selection-full-info
 #+end_example
 To check, e.g., the =language= column:
 #+begin_example
    csvcut -c language scratch/selection-full-info 
 #+end_example
 ** Notes
 The =preview-data= plugin for VS Code has a bug; it displays =0= instead
 of =0e3379= for the following. There are other entries with similar
 malfunction.
 #+begin_example
    CleverRaven,Cataclysm-DDA,0e3379,2.17.0,2024-05-08 12:13:10.038007+00:00,cpp,5ca7f4e59c2d7b0a93fb801a31138477f7b4a761,578098.0,/Users/hohn/work-gh/mrva/mrva-open-source-download/repos-2024-04-29/CleverRaven/Cataclysm-DDA/code-scanning/codeql/databases/cpp/db.zip,cpp,C/C++,1228.0,578098.0,2024-05-13T12:14:54.650648,cpp,True,4245,563435469
    CleverRaven,Cataclysm-DDA,3231f7,2.18.0,2024-07-18 11:13:01.673231+00:00,cpp,db3435138781937e9e0e999abbaa53f1d3afb5b7,579532.0,/Users/hohn/work-gh/mrva/mrva-open-source-download/repos/CleverRaven/Cataclysm-DDA/code-scanning/codeql/databases/cpp/db.zip,cpp,C/C++,1239.0,579532.0,2024-07-24T02:33:23.900885,cpp,True,1245,573213726
 #+end_example
--- a/client/qldbtools/bin/hepc-init.sh
+++ b/client/qldbtools/bin/hepc-init.sh
@@ -0,0 +1,144 @@
 #!/bin/bash
 #* Utility functions
 log() {
    local level="$1"
    shift
    local color_reset="\033[0m"
    local color_info="\033[1;34m"
    local color_warn="\033[1;33m"
    local color_error="\033[1;31m"
    local color
    case "$level" in
        INFO) color="$color_info" ;;
        WARN) color="$color_warn" ;;
        ERROR) color="$color_error" ;;
        *) color="$color_reset" ;;
    esac
    echo -e "${color}[$(date +"%Y-%m-%d %H:%M:%S")] [$level] $*${color_reset}" >&2
 }
 usage() {
    echo "Usage: $0 --db_collection_dir <directory> --starting_path <path> [-h]"
    echo
    echo "Options:"
    echo "  --db_collection_dir <directory>  Specify the database collection directory."
    echo "  --starting_path <path>           Specify the starting path."
    echo "  -h                               Show this help message."
    exit 1
 }
 #* Initialize and parse arguments
 set -euo pipefail               # exit on error, unset var, pipefail
 trap 'rm -fR /tmp/hepc.$$-*' EXIT
 starting_dir=$(pwd)
 db_collection_dir=""
 starting_path=""
 # Parse arguments
 while [[ $# -gt 0 ]]; do
    case "$1" in
        --db_collection_dir)
            shift
            if [[ -z "$1" || "$1" == -* ]]; then
                echo "Error: --db_collection_dir requires a directory as an argument."
                usage
            fi
            db_collection_dir="$1"
            ;;
        --starting_path)
            shift
            if [[ -z "$1" || "$1" == -* ]]; then
                echo "Error: --starting_path requires a path as an argument."
                usage
            fi
            starting_path="$1"
            ;;
        -h)
            usage
            ;;
        *)
            echo "Error: Unknown option '$1'."
            usage
            ;;
    esac
    shift
 done
 # Check if required arguments were provided
 if [[ -z "$db_collection_dir" ]]; then
    echo "Error: --db_collection_dir is required."
    usage
 fi
 if [[ -z "$starting_path" ]]; then
    echo "Error: --starting_path is required."
    usage
 fi
 #* Find all DBs
 log INFO "searching for db.zip files"
 find ${starting_path} -type f -name "db.zip" -size +0c > /tmp/hepc.$$-paths
 #* Collect detailed information from the database files
 # Don't assume they are unique.
 log INFO "collecting information from db.zip files"
 mkdir -p $db_collection_dir
 cat /tmp/hepc.$$-paths | while read -r zip_path
 do
    log INFO "Extracting from ${zip_path}"
    zip_dir=$(dirname ${zip_path})
    zip_file=$(basename ${zip_path})
    unzip -o -q ${zip_path} '*codeql-database.yml' -d /tmp/hepc.$$-zip 
    # The content may be LANGUAGE/codeql-database.yml
    #* For every database, create a metadata record.
    mkdir -p /tmp/hepc.$$-zip
    cd /tmp/hepc.$$-zip/*
    # Information from codeql-database.yml
    primaryLanguage=$(yq '.primaryLanguage' codeql-database.yml)
    sha=$(yq '.creationMetadata.sha' codeql-database.yml)
    cliVersion=$(yq '.creationMetadata.cliVersion' codeql-database.yml)
    creationTime=$(yq '.creationMetadata.creationTime' codeql-database.yml)
    sourceLocationPrefix=$(yq '.sourceLocationPrefix' codeql-database.yml)
    repo=${sourceLocationPrefix##*/}   # keep only last component
    # Get sourceLocationPrefix[-2]
    owner="${sourceLocationPrefix%/*}" # strip last component
    owner="${owner##*/}"               # keep only last component
    # cid for repository / db
    cid=$(echo  "${cliVersion} ${creationTime} ${primaryLanguage} ${sha}" | b2sum |\
              awk '{print substr($1, 1, 6)}')
    # Prepare the metadata record for this DB.
    new_db_fname="${owner}-${repo}-ctsj-${cid}.zip"
    result_url="http://hepc/${db_collection_dir}/${new_db_fname}"
    record='
    {
        "git_branch": "HEAD",
        "git_commit_id": "'${sha}'",
        "git_repo": "'${repo}'",
        "ingestion_datetime_utc": "'${creationTime}'",
        "result_url": "'${result_url}'",
        "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4",
        "tool_name": "codeql-'${primaryLanguage}'",
        "tool_version": "'${cliVersion}'",
        "projname": "'${owner}/${repo}'"
    }
 '
    cd "$starting_dir"
    rm -fR /tmp/hepc.$$-zip 
    echo "$record" >> $db_collection_dir/metadata.json
    #* Link original file path to collection directory for serving.  Use name including
    # the cid and field separator ctsj 
    cd ${db_collection_dir}
    [ -L ${new_db_fname} ] || ln -s ${zip_path} ${new_db_fname}
    # Interim cleanup
    rm -fR "/tmp/hepc.$$-*"    
 done
--- a/client/qldbtools/bin/hepc-serve.go
+++ b/client/qldbtools/bin/hepc-serve.go
@@ -0,0 +1,104 @@
 /*
   dependencies
   go get -u golang.org/x/exp/slog
   on-the-fly
   go run bin/hepc-serve.go --codeql-db-dir  db-collection-py-1
   compiled
   cd ~/work-gh/mrva/mrvacommander/client/qldbtools/
   go build -o ./bin/hepc-serve.bin ./bin/hepc-serve.go
   test
   curl http://127.0.0.1:8080/api/v1/latest_results/codeql-all -o foo
   curl $(head -1 foo | jq  -r ".result_url" |sed 's|hepc|127.0.0.1:8080/db|g;') -o foo.zip
 */
 package main
 import (
 	"flag"
 	"fmt"
 	"net/http"
 	"os"
 	"path/filepath"
 	"golang.org/x/exp/slog"
 )
 var dbDir string
 func serveFile(w http.ResponseWriter, r *http.Request) {
 	fullPath := r.URL.Path[len("/db/"):]
 	resolvedPath, err := filepath.EvalSymlinks(fullPath)
 	if err != nil {
 		slog.Warn("failed to resolve symlink", slog.String("fullPath", fullPath),
 			slog.String("error", err.Error()))
 		http.Error(w, "File not found", http.StatusNotFound)
 		return
 	}
 	if fileInfo, err := os.Stat(resolvedPath); err != nil || fileInfo.IsDir() {
 		slog.Warn("file not found or is a directory", slog.String("resolvedPath", resolvedPath))
 		http.Error(w, "File not found", http.StatusNotFound)
 		return
 	}
 	slog.Info("serving file", slog.String("resolvedPath", resolvedPath))
 	http.ServeFile(w, r, resolvedPath)
 }
 func serveMetadata(w http.ResponseWriter, r *http.Request) {
 	metadataPath := filepath.Join(dbDir, "metadata.json")
 	if fileInfo, err := os.Stat(metadataPath); err != nil || fileInfo.IsDir() {
 		slog.Warn("metadata.json not found", slog.String("metadataPath", metadataPath))
 		http.Error(w, "metadata.json not found", http.StatusNotFound)
 		return
 	}
 	slog.Info("serving metadata.json", slog.String("metadataPath", metadataPath))
 	http.ServeFile(w, r, metadataPath)
 }
 func logMiddleware(next http.Handler) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		slog.Info("incoming request", slog.String("method", r.Method), slog.String("url", r.URL.Path))
 		next.ServeHTTP(w, r)
 	})
 }
 func main() {
 	var host string
 	var port int
 	flag.StringVar(&dbDir, "codeql-db-dir", "", "Directory containing CodeQL database files (required)")
 	flag.StringVar(&host, "host", "127.0.0.1", "Host address for the HTTP server")
 	flag.IntVar(&port, "port", 8080, "Port for the HTTP server")
 	flag.Parse()
 	if dbDir == "" {
 		slog.Error("missing required flag", slog.String("flag", "--codeql-db-dir"))
 		os.Exit(1)
 	}
 	if _, err := os.Stat(dbDir); os.IsNotExist(err) {
 		slog.Error("invalid directory", slog.String("dbDir", dbDir))
 		os.Exit(1)
 	}
 	slog.Info("starting server", slog.String("host", host), slog.Int("port", port), slog.String("dbDir", dbDir))
 	mux := http.NewServeMux()
 	mux.HandleFunc("/db/", serveFile)
 	mux.HandleFunc("/index", serveMetadata)
 	mux.HandleFunc("/api/v1/latest_results/codeql-all", serveMetadata)
 	loggedHandler := logMiddleware(mux)
 	addr := fmt.Sprintf("%s:%d", host, port)
 	slog.Info("server listening", slog.String("address", addr))
 	if err := http.ListenAndServe(addr, loggedHandler); err != nil {
 		slog.Error("server error", slog.String("error", err.Error()))
 	}
 }
--- a/client/qldbtools/bin/mc-db-generate-selection
+++ b/client/qldbtools/bin/mc-db-generate-selection
@@ -0,0 +1,108 @@
 #!/usr/bin/env python
 """ Read a table of CodeQL DB information
    and generate the selection files for
    1. the VS Code CodeQL plugin
    2. the gh-mrva command-line client
 """
 import argparse
 import logging
 from argparse import Namespace
 from typing import List
 from pandas import DataFrame
 import qldbtools.utils as utils
 import numpy as np
 #
 #* Configure logger
 # 
 logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
 # Overwrite log level set by minio
 root_logger = logging.getLogger()
 root_logger.setLevel(logging.INFO)
 #
 #* Process command line
 #
 parser = argparse.ArgumentParser(
    description=""" Read a table of CodeQL DB information
    and generate the selection files for
    1. the VS Code CodeQL plugin
    2. the gh-mrva command-line client
    """,
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 parser.add_argument('vscode_selection', type=str,
                    help='VS Code selection file to generate')
 parser.add_argument('gh_mrva_selection', type=str,
                    help='gh-mrva cli selection file to generate')
 parser.add_argument('-n', '--num-entries', type=int, 
                    help='Only use N entries', 
                    default=None)
 parser.add_argument('-s', '--seed', type=int, 
                    help='Random number seed', 
                    default=4242)
 parser.add_argument('-l', '--list-name', type=str, 
                    help='Name of the repository list',
                    default='mirva-list')
 args: Namespace = parser.parse_args()
 #
 #* Load the information
 #
 import pandas as pd
 import sys
 df0: DataFrame = pd.read_csv(sys.stdin)
 if args.num_entries == None:
    # Use all entries
    df1: DataFrame = df0
 else:
    # Use num_entries, chosen via pseudo-random numbers
    df1 = df0.sample(n=args.num_entries,
                    random_state=np.random.RandomState(args.seed))
 #
 #* Form and save structures
 #
 repos: list[str] = []
 for index, row in df1[['owner', 'name', 'CID', 'path']].iterrows():
    owner, name, CID, path = row
    repos.append(utils.form_db_req_name(owner, name, CID))
 repo_list_name: str = args.list_name
 vsc = {
    "version": 1,
    "databases": {
        "variantAnalysis": {
            "repositoryLists": [
                {
                    "name": repo_list_name,
                    "repositories": repos,
                }
            ],
            "owners": [],
            "repositories": []
        }
    },
    "selected": {
        "kind": "variantAnalysisUserDefinedList",
        "listName": repo_list_name
    }
 }
 gh = {
    repo_list_name:  repos
 }
 import json
 with open(args.vscode_selection, "w") as fc:
    json.dump(vsc, fc, indent=4)
 with open(args.gh_mrva_selection, "w") as fc:
    json.dump(gh, fc, indent=4)
 # Local Variables:
 # python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
 # End:
--- a/client/qldbtools/bin/mc-db-initial-info
+++ b/client/qldbtools/bin/mc-db-initial-info
@@ -0,0 +1,48 @@
 #!/usr/bin/env python
 """ Collect information about CodeQL databases from the file system and write out
    a table in CSV format.
 """
 from argparse import ArgumentParser
 from typing import List
 from pandas import DataFrame
 import qldbtools.utils as utils
 import argparse
 import logging
 import sys
 import pandas as pd
 from qldbtools.utils import DBInfo
 #
 #* Configure logger
 # 
 logging.basicConfig(format='%(asctime)s %(message)s')
 #
 #* Process command line
 #
 parser: ArgumentParser = argparse.ArgumentParser(
    description="""Find all CodeQL DBs in and below starting_dir and export a CSV 
 file with relevant data.""")
 parser.add_argument('starting_dir', type=str, 
                    help='The starting directory to search for codeql.')
 args = parser.parse_args()
 # 
 #* Collect info
 # 
 # Get the db information in list of DBInfo form
 db_base: str = args.starting_dir
 dbs: list[DBInfo] = list(utils.collect_dbs(db_base))
 dbdf: DataFrame = pd.DataFrame([d.__dict__ for d in dbs])
 #
 #
 #* Write info out
 #
 dbdf.to_csv(sys.stdout, index=False)
 # Local Variables:
 # python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
 # End:
--- a/client/qldbtools/bin/mc-db-populate-minio
+++ b/client/qldbtools/bin/mc-db-populate-minio
@@ -0,0 +1,86 @@
 #!/usr/bin/env python
 """ Read a table of CodeQL DB information (like those produced by
    mc-db-refine-info) and push the databases it lists to the mrvacommander minio
    DB.
 """
 import argparse
 import qldbtools.utils as utils
 import logging
 import pandas as pd
 import numpy as np
 import sys
 from minio import Minio
 from minio.error import S3Error
 from pathlib import Path
 #
 #* Configure logger
 # 
 logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
 # Overwrite log level set by minio
 root_logger = logging.getLogger()
 root_logger.setLevel(logging.INFO)
 #
 #* Process command line
 #
 parser = argparse.ArgumentParser(
    description=""" Read a table of CodeQL DB information (like those produced by
    mc-db-refine-info) and push the databases it lists to the mrvacommander minio
    DB. """)
 parser.add_argument('-n', '--num-entries', type=int, 
                    help='Only use N entries', 
                    default=None)
 parser.add_argument('-s', '--seed', type=int, 
                    help='Random number seed', 
                    default=4242)
 args = parser.parse_args()
 #
 #* Collect the information and select subset
 #
 df = pd.read_csv(sys.stdin)
 if args.num_entries == None:
    # Use all entries
    entries = df
 else:
    # Use num_entries, chosen via pseudo-random numbers
    entries = df.sample(n=args.num_entries,
                        random_state=np.random.RandomState(args.seed))
 #
 #* Push the DBs
 #
 # Configuration
 MINIO_URL = "http://localhost:9000"
 MINIO_ROOT_USER = "user"
 MINIO_ROOT_PASSWORD = "mmusty8432"
 QL_DB_BUCKET_NAME = "qldb"
 # Initialize MinIO client
 client = Minio(
    MINIO_URL.replace("http://", "").replace("https://", ""),
    access_key=MINIO_ROOT_USER,
    secret_key=MINIO_ROOT_PASSWORD,
    secure=False
 )
 # Create the bucket if it doesn't exist
 try:
    if not client.bucket_exists(QL_DB_BUCKET_NAME):
        client.make_bucket(QL_DB_BUCKET_NAME)
    else:
        logging.info(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
 except S3Error as err:
    logging.error(f"Error creating bucket: {err}")
 # Get info from dataframe and push the files
 for index, row in entries[['owner', 'name', 'CID', 'path']].iterrows():
    owner, name, CID, path = row
    new_name = utils.form_db_bucket_name(owner, name, CID)
    try:
        client.fput_object(QL_DB_BUCKET_NAME, new_name, path)
        logging.info(f"Uploaded {path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
    except S3Error as err:
        logging.error(f"Error uploading file {local_path}: {err}")
 # Local Variables:
 # python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
 # End:
--- a/client/qldbtools/bin/mc-db-refine-info
+++ b/client/qldbtools/bin/mc-db-refine-info
@@ -0,0 +1,60 @@
 #!/usr/bin/env python
 """ Read an initial table of CodeQL DB information, produced by
    mc-db-initial-info, and collect more detailed information from the database
    files.  Write out an extended table in CSV format.
 """
 from argparse import ArgumentParser
 from typing import List
 from pandas import DataFrame
 import qldbtools.utils as utils
 import argparse
 import logging
 import pandas as pd
 import sys
 #
 #* Configure logger
 # 
 logging.basicConfig(format='%(asctime)s %(message)s')
 #
 #* Process command line
 #
 parser: ArgumentParser = argparse.ArgumentParser(
    description="""Read an initial table of CodeQL DB information, produced by
    mc-db-initial-info, and collect more detailed information from the database
    files.  Write out an extended table in CSV format. """)
 args = parser.parse_args()
 #
 #* Collect the information
 # This step is time-intensive so we save the results right after.
 d: DataFrame = pd.read_csv(sys.stdin)
 joiners: list[DataFrame] = []
 for left_index in range(0, len(d)-1):
    try:
        metac: object
        cqlc: object
        cqlc, metac = utils.extract_metadata(d.path[left_index])
    except utils.ExtractNotZipfile:
        continue
    except utils.ExtractNoCQLDB:
        continue
    try:
        detail_df: DataFrame = utils.metadata_details(left_index, cqlc, metac)
    except utils.DetailsMissing:
        continue
    joiners.append(detail_df)
 joiners_df: DataFrame = pd.concat(joiners, axis=0)
 full_df: DataFrame = pd.merge(d, joiners_df, left_index=True, right_on='left_index', how='outer')
 #
 #* Save results
 #
 full_df.to_csv(sys.stdout, index=False)
 # Local Variables:
 # python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
 # End:
--- a/client/qldbtools/bin/mc-db-unique
+++ b/client/qldbtools/bin/mc-db-unique
@@ -0,0 +1,122 @@
 #!/usr/bin/env python
 """ Read a table of CodeQL DB information and produce a table with unique entries 
    adding the Cumulative ID (CID) column.
    To make this happen:
    - Group entries by (owner,name,CID),  
      sort each group by creationTime,
      and keep only the top (newest) element.
    - Drop rows that don't have the
          | cliVersion   |
          | creationTime |
          | language     |
          | sha          |
      columns.  There are very few (16 out of 6000 on recent tests) and their DBs
      are quesionable. 
 """
 import argparse
 import logging
 from argparse import Namespace
 from typing import Any
 from pandas import DataFrame, Series
 #
 #* Configure logger
 # 
 logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
 # Overwrite log level set by minio
 root_logger = logging.getLogger()
 root_logger.setLevel(logging.INFO)
 #
 #* Process command line
 #
 parser = argparse.ArgumentParser(
    description=""" Read a table of CodeQL DB information, 
    narrow to <language>,
    group entries by (owner,name),  sort each group by
    creationTime and keep only the top (newest) element.
    """)
 parser.add_argument('language', type=str, 
                    help='The language to be analyzed.')
 args: Namespace = parser.parse_args()
 #
 #* Collect the information and select subset
 #
 import pandas as pd
 import sys
 import qldbtools.utils as utils
 df2: DataFrame = pd.read_csv(sys.stdin)
 # 
 #* Add single uniqueness field -- CID (Cumulative ID)
 # 
 df2['CID'] = df2.apply(lambda row: 
                       utils.cid_hash((
                           row['cliVersion'], 
                           row['creationTime'],
                           row['language'],
                           row['sha'], 
                       )), axis=1)
 # 
 #* Re-order the dataframe columns by importance
 # - Much of the data
 #   1. Is only conditionally present
 #   2. Is extra info, not for the DB proper
 #   3. May have various names
 # 
 # - The essential columns are
 #     | owner               |
 #     | name                |
 #     | language            |
 #     | size                |
 #     | cliVersion          |
 #     | creationTime        |
 #     | sha                 |
 #     | baselineLinesOfCode |
 #     | path                |
 # 
 # - The rest are useful; put them last
 #     | db_lang             |
 #     | db_lang_displayName |
 #     | db_lang_file_count  |
 #     | db_lang_linesOfCode |
 #     | left_index          |
 #     | ctime               |
 #     | primaryLanguage     |
 #     | finalised           |
 df3: DataFrame = df2.reindex( columns=['owner', 'name', 'cliVersion', 'creationTime',
                                     'language', 'sha','CID',
                                     'baselineLinesOfCode', 'path', 'db_lang',
                                     'db_lang_displayName', 'db_lang_file_count',
                                     'db_lang_linesOfCode', 'ctime',
                                     'primaryLanguage', 'finalised', 'left_index',
                                     'size'])
 # Identify rows missing specific entries
 rows = ( df3['cliVersion'].isna() | 
         df3['creationTime'].isna() |
         df3['language'].isna() |
         df3['sha'].isna() )
 df4: DataFrame = df3[~rows]
 # Limit to one language
 df5 = df4[df4['language'] == args.language]
 # Sort and group
 df_sorted: DataFrame = df5.sort_values(by=['owner', 'name', 'CID', 'creationTime'])
 df_unique: DataFrame = df_sorted.groupby(['owner', 'name', 'CID']).first().reset_index()
 # Write output
 df_unique.to_csv(sys.stdout, index=False)
 # Local Variables:
 # python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
 # End:
--- a/client/qldbtools/bin/mc-db-view-info
+++ b/client/qldbtools/bin/mc-db-view-info
@@ -0,0 +1,35 @@
 #!/usr/bin/env python
 """ Read a table of CodeQL DB information and display it using pandasui
 """
 import argparse
 import logging
 import sys
 #
 #* Configure logger
 # 
 logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
 # Overwrite log level set by minio
 root_logger = logging.getLogger()
 root_logger.setLevel(logging.INFO)
 #
 #* Process command line
 #
 parser = argparse.ArgumentParser(
    description="Read a table of CodeQL DB information and display it using pandasui")
 args = parser.parse_args()
 # 
 #* Collect the information display
 #
 import pandas as pd
 df = pd.read_csv(sys.stdin)
 import os
 os.environ['APPDATA'] = "needed-for-pandasgui"
 from pandasgui import show
 show(df)
 # Local Variables:
 # python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
 # End:
--- a/client/qldbtools/bin/mc-hepc-init
+++ b/client/qldbtools/bin/mc-hepc-init
@@ -0,0 +1,120 @@
 #!/usr/bin/env python3
 import json
 import hashlib
 import yaml
 import sys
 from plumbum import cli, local
 from plumbum.cmd import find, mkdir, ln, rm, mktemp, unzip, date, env
 # Logging function
 def log(level, message):
    colors = {
        "INFO":  "\033[1;34m",
        "WARN":  "\033[1;33m",
        "ERROR": "\033[1;31m",
        "RESET": "\033[0m",
    }
    timestamp = date("+%Y-%m-%d %H:%M:%S").strip()
    print(f"{colors[level]}[{timestamp}] [{level}] {message}{colors['RESET']}", file=sys.stderr)
 # Generate a CID (cumulative id)
 def generate_cid(cli_version, creation_time, primary_language, sha):
    hash_input = f"{cli_version} {creation_time} {primary_language} {sha}".encode()
    return hashlib.sha256(hash_input).hexdigest()[:6]
 # Expand environment variables in paths
 def expand_path(path):
    return local.env.expand(path)
 # Process a single db.zip file
 def process_db_file(zip_path, db_collection_dir):
    temp_dir = mktemp("-d").strip()
    try:
        unzip("-o", "-q", zip_path, "*codeql-database.yml", "-d", temp_dir)
        # Locate the YAML file regardless of its depth
        yaml_files = list(local.path(temp_dir).walk(
            filter=lambda p: p.name == "codeql-database.yml"))
        if not yaml_files:
            log("WARN", f"No codeql-database.yml found in {zip_path}")
            return
        yaml_path = yaml_files[0]
        with yaml_path.open("r") as f:
            yaml_data = yaml.safe_load(f)
        primary_language       = yaml_data["primaryLanguage"]
        creation_metadata      = yaml_data["creationMetadata"]
        sha                    = creation_metadata["sha"]
        cli_version            = creation_metadata["cliVersion"]
        creation_time          = creation_metadata["creationTime"]
        source_location_prefix = local.path(yaml_data["sourceLocationPrefix"])
        repo                   = source_location_prefix.name
        owner                  = source_location_prefix.parent.name
        cid                    = generate_cid(cli_version, creation_time, primary_language, sha)
        new_db_fname           = f"{owner}-{repo}-ctsj-{cid}.zip"
        result_url             = f"http://hepc/{db_collection_dir}/{new_db_fname}"
        metadata = {
            "git_branch"             : "HEAD",
            "git_commit_id"          : sha,
            "git_repo"               : repo,
            "ingestion_datetime_utc" : str(creation_time),
            "result_url"             : result_url,
            "tool_id"                : "9f2f9642-febb-4435-9204-fb50bbd43de4",
            "tool_name"              : f"codeql-{primary_language}",
            "tool_version"           : cli_version,
            "projname"               : f"{owner}/{repo}",
        }
        metadata_file = local.path(db_collection_dir) / "metadata.json"
        with metadata_file.open("a") as f:
            json.dump(metadata, f)
            f.write("\n")
        link_path = local.path(db_collection_dir) / new_db_fname
        if not link_path.exists():
            ln("-sf", zip_path, link_path)
    except Exception as e:
        log("WARN", f"Error processing {zip_path}: {e}")
    finally:
        rm("-rf", temp_dir)
 # Main application class
 class DBProcessor(cli.Application):
    """
    DBProcessor processes db.zip files found in a starting directory,
    symlinks updated names in a collection directory,
    and adds a metadata information file "metadata.json" to the directory.
    """
    db_collection_dir = cli.SwitchAttr(
        "--db_collection_dir", str, mandatory=True, help="Specify the database collection directory"
    )
    starting_path = cli.SwitchAttr(
        "--starting_path", str, mandatory=True, help="Specify the starting path"
    )
    def main(self):
        db_collection_dir = expand_path(self.db_collection_dir)
        starting_path = expand_path(self.starting_path)
        mkdir("-p", db_collection_dir)
        log("INFO", f"Searching for db.zip files in {starting_path}")
        db_files = find(starting_path, "-type", "f", "-name", "db.zip",
                        "-size", "+0c").splitlines()
        if not db_files:
            log("WARN", "No db.zip files found in the specified starting path.")
            return
        for zip_path in db_files:
            process_db_file(zip_path, db_collection_dir)
        log("INFO", "Processing completed.")
 if __name__ == "__main__":
    DBProcessor.run()
--- a/client/qldbtools/bin/mc-hepc-serve
+++ b/client/qldbtools/bin/mc-hepc-serve
@@ -0,0 +1,89 @@
 #!/usr/bin/env python3
 import logging
 from pathlib import Path
 from plumbum import cli
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import FileResponse
 import uvicorn
 # Logging configuration
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[logging.StreamHandler()]
 )
 logger = logging.getLogger(__name__)
 # FastAPI application
 app = FastAPI()
 db_dir = None  # This will be set by the CLI application
@app.get("/db/{file_path:path}")
 def serve_file(file_path: str):
    """
    Serve files from the database directory, such as .zip files or metadata.json.
    """
    logger.info(f"Requested file: {file_path}")
    # Resolve symlink
    resolved_path = Path(file_path).resolve(strict=True)
    logger.info(f"file resolved to: {resolved_path}")
    if not resolved_path.exists():
        logger.error(f"File not found: {resolved_path}")
        raise HTTPException(status_code=404, detail=f"{resolved_path} not found")
    return FileResponse(resolved_path)
@app.get("/index")
@app.get("/api/v1/latest_results/codeql-all")
 def serve_metadata_json():
    """
    Serve the metadata.json file for multiple routes.
    """
    metadata_path = Path(db_dir) / "metadata.json"
    logger.info(f"Requested metadata.json at: {metadata_path}")
    if not metadata_path.exists():
        logger.error("metadata.json not found.")
        raise HTTPException(status_code=404, detail="metadata.json not found")
    logger.info(f"Serving metadata.json from: {metadata_path}")
    return FileResponse(metadata_path)
@app.middleware("http")
 async def log_request(request, call_next):
    logger.info(f"Incoming request: {request.method} {request.url}")
    response = await call_next(request)
    return response
 class DBService(cli.Application):
    """
    DBService serves:
    1. CodeQL database .zip files symlinked in the --codeql-db-dir
    2. Metadata for those zip files, contained in metadata.json in the same
       directory.
    The HTTP endpoints are:
    1. /db/{filename}
    2. /index
    3. /api/v1/latest_results/codeql-all
    """
    codeql_db_dir = cli.SwitchAttr("--codeql-db-dir", str, mandatory=True,
                                   help="Directory containing CodeQL database files")
    host = cli.SwitchAttr("--host", str, default="127.0.0.1",
                          help="Host address for the HTTP server")
    port = cli.SwitchAttr("--port", int, default=8080, help="Port for the HTTP server")
    def main(self):
        global db_dir
        db_dir = Path(self.codeql_db_dir)
        if not db_dir.is_dir():
            logger.error(f"Invalid directory: {db_dir}")
            return 1
        logger.info(f"Starting server at {self.host}:{self.port}")
        logger.info(f"Serving files from directory: {db_dir}")
        # Run the FastAPI server using Uvicorn
        uvicorn.run(app, host=self.host, port=self.port)
 if __name__ == "__main__":
    DBService.run()
--- a/client/qldbtools/bin/mc-rows-from-mrva-list
+++ b/client/qldbtools/bin/mc-rows-from-mrva-list
@@ -0,0 +1,67 @@
 #!/usr/bin/env python
 """
 Script to list full details for a mrva-list file
 1. reads files containing
 {
    "mirva-list": [
        "NLPchina/elasticsearch-sqlctsj168cc4",
        "LMAX-Exchange/disruptorctsj3e75ec",
        "justauth/JustAuthctsj8a6177",
        "FasterXML/jackson-modules-basectsj2fe248",
        "ionic-team/capacitor-pluginsctsj38d457",
        "PaddlePaddle/PaddleOCRctsj60e555",
        "elastic/apm-agent-pythonctsj21dc64",
        "flipkart-incubator/zjsonpatchctsjc4db35",
        "stephane/libmodbusctsj54237e",
        "wso2/carbon-kernelctsj5a8a6e",
        "apache/servicecomb-packctsj4d98f5"
    ]
 }
 2. reads a pandas dataframe stored in a csv file 
 3. selects all rows from 2. that
   - contain the 'owner' column matching the string before the slash from 1. and
   - the 'name' column matching the string between the slash and the marker
     'ctsj' and
   - the 'CID' column matching the string after the marker 'ctsj'
 """
 import argparse
 import json
 import sys
 #
 #* Process command line
 #
 parser = argparse.ArgumentParser(
    description="""Script to list full details for a mrva-list file""")
 parser.add_argument('mrva_list', type=str, 
                    help='The JSON file containing the mrva-list')
 parser.add_argument('info_csv', type=str, 
                    help='The CSV file containing the full information')
 args = parser.parse_args()
 #* Step 1: Read the JSON file containing the "mirva-list"
 with open(args.mrva_list, 'r') as f:
    data = json.load(f)
 # Extract and parse the "mirva-list"
 mirva_list = data['mirva-list']
 parsed_mirva_list = []
 for item in mirva_list:
    owner_name = item.split('/')[0]
    repo_name = item.split('/')[1].split('ctsj')[0]
    cid = item.split('/')[1].split('ctsj')[1]
    parsed_mirva_list.append((owner_name, repo_name, cid))
 #* Step 2: Read the CSV file into a pandas dataframe
 import pandas as pd
 df = pd.read_csv(args.info_csv)
 #* Step 3: Filter the dataframe based on the parsed "mirva-list"
 filtered_df = df[
    df.apply(lambda row: 
             (row['owner'], row['name'], row['CID']) in parsed_mirva_list, axis=1)]
 # Optionally, you can save the filtered dataframe to a new CSV file
 filtered_df.to_csv(sys.stdout, index=False)
--- a/client/qldbtools/gfm.css
+++ b/client/qldbtools/gfm.css
--- a/client/qldbtools/qldbtools.code-workspace
+++ b/client/qldbtools/qldbtools.code-workspace
@@ -0,0 +1,11 @@
 {
 	"folders": [
 		{
 			"path": "."
 		}
    ],
    "settings": {
        "git.ignoreLimitWarning": true,
        "makefile.configureOnOpen": false
    }
 }
--- a/client/qldbtools/qldbtools/init.py
+++ b/client/qldbtools/qldbtools/init.py
@@ -0,0 +1,2 @@
 from . import utils
--- a/client/qldbtools/qldbtools/utils.py
+++ b/client/qldbtools/qldbtools/utils.py
@@ -0,0 +1,205 @@
 """ This module supports the selection of CodeQL databases based on various
 criteria.  
 """
 #* Imports 
 from dataclasses import dataclass
 from pathlib import Path
 import datetime
 import json
 import logging
 import os
 from typing import List, Dict, Any
 import pandas as pd
 import time
 import yaml
 import zipfile
 from pandas import DataFrame
 #* Setup
 logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s [%(levelname)s] %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
 )
 #* Utility functions
 def log_and_raise(message):
    logging.error(message)
    raise Exception(message)
 def log_and_raise_e(message, exception):
    logging.error(message)
    raise exception(message)
 def traverse_tree(root: str) -> Path:
    root_path = Path(os.path.expanduser(root))
    if not root_path.exists() or not root_path.is_dir():
        log_and_raise(f"The specified root path '{root}' does not exist or "
                      "is not a directory.")
    for path in root_path.rglob('*'):
        if path.is_file():
            yield path
        elif path.is_dir():
            pass
@dataclass
 class DBInfo:
    ctime : str = '2024-05-13T12:04:01.593586'
    language : str = 'cpp'
    name : str = 'nanobind'
    owner : str = 'wjakob'
    path : Path = Path('/Users/.../db.zip')
    size : int = 63083064
 def collect_dbs(db_base: str) -> DBInfo:
    for path in traverse_tree(db_base):
        if path.name == "db.zip":
            # For the current repository, we have
            #     In [292]: len(path.parts)
            #     Out[292]: 14
            # and can work from the end to get relevant info from the file path.
            db = DBInfo()
            (*_, db.owner, db.name, _, _, _, db.language, _) = path.parts
            db.path = path
            s = path.stat()
            db.size = s.st_size
            # db.ctime_raw = s.st_ctime
            # db.ctime = time.ctime(s.st_ctime)
            db.ctime = datetime.datetime.fromtimestamp(s.st_ctime).isoformat()
            yield db 
 def extract_metadata(zipfile_path: str) -> tuple[object,object]:
    """
       extract_metadata(zipfile)
    Unzip zipfile into memory and return the contents of the files
    codeql-database.yml and baseline-info.json that it contains in a tuple
    """
    codeql_content = None
    meta_content = None
    try:
        with zipfile.ZipFile(zipfile_path, 'r') as z:
            for file_info in z.infolist():
                # Filenames seen
                #       java/codeql-database.yml
                #       codeql_db/codeql-database.yml
                if file_info.filename.endswith('codeql-database.yml'):
                    with z.open(file_info) as f:
                        codeql_content = yaml.safe_load(f)
                # And
                #       java/baseline-info.json
                #       codeql_db/baseline-info.json
                elif file_info.filename.endswith('baseline-info.json'):
                    with z.open(file_info) as f:
                        meta_content = json.load(f)
    except zipfile.BadZipFile:
        log_and_raise_e(f"Not a zipfile: '{zipfile_path}'", ExtractNotZipfile)
    # The baseline-info is only available in more recent CodeQL versions
    if not meta_content:
        meta_content = {'languages':
                        {'no-language': {'displayName': 'no-language',
                                 'files': [],
                                 'linesOfCode': -1,
                                 'name': 'nolang'},
                         }}
    if not codeql_content:
        log_and_raise_e(f"Not a zipfile: '{zipfile_path}'", ExtractNoCQLDB)
    return codeql_content, meta_content
 class ExtractNotZipfile(Exception): pass
 class ExtractNoCQLDB(Exception): pass
 def metadata_details(left_index: int, codeql_content: object, meta_content: object) -> pd.DataFrame:
    """
       metadata_details(codeql_content, meta_content)
    Extract the details from metadata that will be used in DB selection and return a
    dataframe with the information.  Example, cropped to fit:
    full_df.T
    Out[535]: 
                                         0                  1
    left_index                           0                  0
    baselineLinesOfCode              17990              17990
    primaryLanguage                    cpp                cpp
    sha                  288920efc079766f4  282c20efc079766f4
    cliVersion                      2.17.0             2.17.0
    creationTime             .325253+00:00    51.325253+00:00
    finalised                         True               True
    db_lang                            cpp             python
    db_lang_displayName              C/C++             Python
    db_lang_file_count                 102                 27
    db_lang_linesOfCode              17990               5586
    """
    cqlc, metac = codeql_content, meta_content
    d = {'left_index': left_index,
         'baselineLinesOfCode': cqlc['baselineLinesOfCode'],
         'primaryLanguage': cqlc['primaryLanguage'],
         'sha': cqlc['creationMetadata'].get('sha', 'abcde0123'),
         'cliVersion': cqlc['creationMetadata']['cliVersion'],
         'creationTime': cqlc['creationMetadata']['creationTime'],
         'finalised': cqlc.get('finalised', pd.NA),
         }
    f = pd.DataFrame(d, index=[0])
    joiners: list[dict[str, int | Any]] = []
    if not ('languages' in metac):
        log_and_raise_e("Missing 'languages' in metadata", DetailsMissing)
    for lang, lang_cont in metac['languages'].items():
        d1: dict[str, int | Any] = { 'left_index' : left_index,
               'db_lang':  lang }
        for prop, val in lang_cont.items():
            if prop == 'files':
                d1['db_lang_file_count'] = len(val)
            elif prop == 'linesOfCode':
                d1['db_lang_linesOfCode'] = val
            elif prop == 'displayName':
                d1['db_lang_displayName'] = val
        joiners.append(d1)
    fj: DataFrame = pd.DataFrame(joiners)
    full_df: DataFrame = pd.merge(f, fj, on='left_index', how='outer')
    return full_df
 class DetailsMissing(Exception): pass                        
 from hashlib import blake2b
 def cid_hash(row_tuple: tuple):
    """
        cid_hash(row_tuple)
    Take a bytes object and return hash as hex string
    """
    h = blake2b(digest_size = 3)
    h.update(str(row_tuple).encode())
    # return int.from_bytes(h.digest(), byteorder='big')
    return h.hexdigest()
 def form_db_bucket_name(owner, name, CID):
    """
        form_db_bucket_name(owner, name, CID)
    Return the name to use in minio storage; this function is trivial and used to
    enforce consistent naming.
    The 'ctsj' prefix is a random, unique key to identify the information.
    """
    return f'{owner}${name}ctsj{CID}.zip'
 def form_db_req_name(owner: str, name: str, CID: str) -> str:
    """
        form_db_req_name(owner, name, CID)
    Return the name to use in mrva requests; this function is trivial and used to
    enforce consistent naming.
    The 'ctsj' prefix is a random, unique key to identify the information.
    """
    return f'{owner}/{name}ctsj{CID}'
 # Local Variables:
 # python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
 # End:
--- a/client/qldbtools/requirements.txt
+++ b/client/qldbtools/requirements.txt
@@ -0,0 +1,109 @@
 annotated-types==0.7.0
 anyio==4.4.0
 appnope==0.1.4
 argon2-cffi==23.1.0
 argon2-cffi-bindings==21.2.0
 arrow==1.3.0
 asttokens==2.4.1
 async-lru==2.0.4
 attrs==24.2.0
 babel==2.16.0
 beautifulsoup4==4.12.3
 bleach==6.1.0
 blinker==1.9.0
 certifi==2024.7.4
 cffi==1.17.0
 charset-normalizer==3.3.2
 click==8.1.7
 comm==0.2.2
 debugpy==1.8.5
 decorator==5.1.1
 defusedxml==0.7.1
 executing==2.0.1
 fastapi==0.115.5
 fastjsonschema==2.20.0
 Flask==3.1.0
 fqdn==1.5.1
 h11==0.14.0
 httpcore==1.0.5
 httpx==0.27.0
 idna==3.7
 ipykernel==6.29.5
 ipython==8.26.0
 isoduration==20.11.0
 itsdangerous==2.2.0
 jedi==0.19.1
 Jinja2==3.1.4
 json5==0.9.25
 jsonpointer==3.0.0
 jsonschema==4.23.0
 jsonschema-specifications==2023.12.1
 jupyter-events==0.10.0
 jupyter-lsp==2.2.5
 jupyter_client==8.6.2
 jupyter_core==5.7.2
 jupyter_server==2.14.2
 jupyter_server_terminals==0.5.3
 jupyterlab==4.2.4
 jupyterlab_pygments==0.3.0
 jupyterlab_server==2.27.3
 MarkupSafe==2.1.5
 matplotlib-inline==0.1.7
 minio==7.2.8
 mistune==3.0.2
 nbclient==0.10.0
 nbconvert==7.16.4
 nbformat==5.10.4
 nest-asyncio==1.6.0
 notebook_shim==0.2.4
 numpy==2.1.0
 overrides==7.7.0
 packaging==24.1
 pandas==2.2.2
 pandocfilters==1.5.1
 parso==0.8.4
 pexpect==4.9.0
 platformdirs==4.2.2
 plumbum==1.9.0
 prometheus_client==0.20.0
 prompt_toolkit==3.0.47
 psutil==6.0.0
 ptyprocess==0.7.0
 pure_eval==0.2.3
 pycparser==2.22
 pycryptodome==3.20.0
 pydantic==2.10.2
 pydantic_core==2.27.1
 Pygments==2.18.0
 python-dateutil==2.9.0.post0
 python-json-logger==2.0.7
 pytz==2024.1
 PyYAML==6.0.2
 pyzmq==26.1.1
 referencing==0.35.1
 requests==2.32.3
 rfc3339-validator==0.1.4
 rfc3986-validator==0.1.1
 rpds-py==0.20.0
 Send2Trash==1.8.3
 setuptools==75.5.0
 six==1.16.0
 sniffio==1.3.1
 soupsieve==2.6
 stack-data==0.6.3
 starlette==0.41.3
 terminado==0.18.1
 tinycss2==1.3.0
 tornado==6.4.1
 traitlets==5.14.3
 types-python-dateutil==2.9.0.20240821
 typing_extensions==4.12.2
 tzdata==2024.1
 uri-template==1.3.0
 urllib3==2.2.2
 uvicorn==0.32.1
 wcwidth==0.2.13
 webcolors==24.8.0
 webencodings==0.5.1
 websocket-client==1.8.0
 Werkzeug==3.1.3
--- a/client/qldbtools/session/db-generate-selection.py
+++ b/client/qldbtools/session/db-generate-selection.py
@@ -0,0 +1,61 @@
 """ Read a table of CodeQL DB information
    and generate the selection files for
    1. the VS Code CodeQL plugin
    2. the gh-mrva command-line client
 """
 #
 #* Collect the information and write files
 #
 import pandas as pd
 import sys
 import qldbtools.utils as utils
 import numpy as np
 import importlib
 importlib.reload(utils)
 df0 = pd.read_csv('scratch/db-info-3.csv')
 # Use num_entries, chosen via pseudo-random numbers
 df1 = df0.sample(n=3, random_state=np.random.RandomState(4242))
 repos = []
 for index, row in df1[['owner', 'name', 'CID', 'path']].iterrows():
    owner, name, CID, path = row
    repos.append(utils.form_db_req_name(owner, name, CID))
 repo_list_name = "mirva-list"
 vsc = {
    "version": 1,
    "databases": {
        "variantAnalysis": {
            "repositoryLists": [
                {
                    "name": repo_list_name,
                    "repositories": repos,
                }
            ],
            "owners": [],
            "repositories": []
        }
    },
    "selected": {
        "kind": "variantAnalysisUserDefinedList",
        "listName": repo_list_name
    }
 }
 gh = {
    repo_list_name:  repos
 }
 # write the files
 import json
 with open("tmp-selection-vsc.json", "w") as fc:
    json.dump(vsc, fc, indent=4)
 with open("tmp-selection-gh.json", "w") as fc:
    json.dump(gh, fc, indent=4)
 # Local Variables:
 # python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
 # End:
--- a/client/qldbtools/session/db-initial-info.py
+++ b/client/qldbtools/session/db-initial-info.py
@@ -0,0 +1,59 @@
 #* Experimental work with utils.py, to be merged into it.
 # The rest of this interactive script is available as cli script in
 # mc-db-initial-info
 from utils import *
 #* Data collection
 # Get the db information in list of DBInfo form
 db_base = "~/work-gh/mrva/mrva-open-source-download/"
 dbs = list(collect_dbs(db_base))
 # Inspect:
 from pprint import pprint
 pprint(["len", len(dbs)])
 pprint(["dbs[0]", dbs[0].__dict__])
 pprint(["dbs[-1]", dbs[-1].__dict__])
 # 
 # Get a dataframe
 dbdf = pd.DataFrame([d.__dict__ for d in dbs])
 # 
 #* Experiments with on-disk format
 # Continue use of raw information in separate session.
 # 
 # PosixPath is a problem for json and parquet
 # 
 dbdf['path'] = dbdf['path'].astype(str)
 #
 dbdf.to_csv('dbdf.csv')
 #
 dbdf.to_csv('dbdf.csv.gz', compression='gzip', index=False)
 # 
 dbdf.to_json('dbdf.json')
 #
 # dbdf.to_hdf('dbdf.h5', key='dbdf', mode='w')
 # 
 # fast, binary
 dbdf.to_parquet('dbdf.parquet')
 # 
 # fast
 import sqlite3
 conn = sqlite3.connect('dbdf.db')
 dbdf.to_sql('qldbs', conn, if_exists='replace', index=False)
 conn.close()
 # 
 # Sizes:
 # ls -laSr dbdf.*
 # -rw-r--r--@ 1 hohn  staff  101390 Jul 12 14:17 dbdf.csv.gz
 # -rw-r--r--@ 1 hohn  staff  202712 Jul 12 14:17 dbdf.parquet
 # -rw-r--r--@ 1 hohn  staff  560623 Jul 12 14:17 dbdf.csv
 # -rw-r--r--@ 1 hohn  staff  610304 Jul 12 14:17 dbdf.db
 # -rw-r--r--@ 1 hohn  staff  735097 Jul 12 14:17 dbdf.json
 #
 # parquet has many libraries, including go: xitongsys/parquet-go
 # https://parquet.apache.org/
 # 
 # Local Variables:
 # python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
 # End:
--- a/client/qldbtools/session/db-populate-minio.py
+++ b/client/qldbtools/session/db-populate-minio.py
@@ -0,0 +1,65 @@
 import qldbtools.utils as utils
 import pandas as pd
 import numpy as np
 import sys
 from minio import Minio
 from minio.error import S3Error
 from pathlib import Path
 #
 #* Collect the information and select subset
 #
 df = pd.read_csv('scratch/db-info-2.csv')
 seed = 4242
 if 0:
    # Use all entries
    entries = df
 else:
    # Use num_entries, chosen via pseudo-random numbers
    entries = df.sample(n=3,
                        random_state=np.random.RandomState(seed))
 #
 #* Push the DBs
 #
 # Configuration
 MINIO_URL = "http://localhost:9000"
 MINIO_ROOT_USER = "user"
 MINIO_ROOT_PASSWORD = "mmusty8432"
 QL_DB_BUCKET_NAME = "qldb"
 # Initialize MinIO client
 client = Minio(
    MINIO_URL.replace("http://", "").replace("https://", ""),
    access_key=MINIO_ROOT_USER,
    secret_key=MINIO_ROOT_PASSWORD,
    secure=False
 )
 # Create the bucket if it doesn't exist
 try:
    if not client.bucket_exists(QL_DB_BUCKET_NAME):
        client.make_bucket(QL_DB_BUCKET_NAME)
    else:
        print(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
 except S3Error as err:
    print(f"Error creating bucket: {err}")
 # (test) File paths and new names
 files_to_upload = {
    "cmd/server/codeql/dbs/google/flatbuffers/google_flatbuffers_db.zip": "google$flatbuffers.zip",
    "cmd/server/codeql/dbs/psycopg/psycopg2/psycopg_psycopg2_db.zip": "psycopg$psycopg2.zip"
 }
 # (test) Push the files
 prefix = Path('/Users/hohn/work-gh/mrva/mrvacommander')
 for local_path, new_name in files_to_upload.items():
    try:
        client.fput_object(QL_DB_BUCKET_NAME, new_name, prefix / Path(local_path))
        print(f"Uploaded {local_path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
    except S3Error as err:
        print(f"Error uploading file {local_path}: {err}")
 # Local Variables:
 # python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
 # End:
--- a/client/qldbtools/session/db-post-refine-info.py
+++ b/client/qldbtools/session/db-post-refine-info.py
@@ -0,0 +1,46 @@
 # Session around bin/mc-db-unique
 import qldbtools.utils as utils
 import pandas as pd
 #
 #* Collect the information
 #
 df1 = pd.read_csv("scratch/db-info-2.csv")
 # Add single uniqueness field -- CID (Cumulative ID) -- using
 # - creationTime
 # - sha
 # - cliVersion
 # - language
 from hashlib import blake2b
 def cid_hash(row_tuple: tuple):
    """
        cid_hash(row_tuple)
    Take a bytes object and return hash as hex string
    """
    h = blake2b(digest_size = 3)
    h.update(str(row_tuple).encode())
    # return int.from_bytes(h.digest(), byteorder='big')
    return h.hexdigest()
 # Apply the cid_hash function to the specified columns and create the 'CID' column
 df1['CID'] = df1.apply(lambda row: cid_hash( (row['creationTime'],
                                              row['sha'], 
                                              row['cliVersion'], 
                                              row['language'])
                                            ), axis=1)
 df2 = df1.reindex(columns=['owner', 'name', 'cliVersion', 'creationTime',
 	                       'language', 'sha','CID', 'baselineLinesOfCode', 'path',
 	                       'db_lang', 'db_lang_displayName', 'db_lang_file_count',
 	                       'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
 	                       'finalised', 'left_index', 'size'])
 df1['cid']
 # Local Variables:
 # python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
 # End:
--- a/client/qldbtools/session/db-refine-info.py
+++ b/client/qldbtools/session/db-refine-info.py
@@ -0,0 +1,118 @@
 # Experimental work be merged with bin/mc-db-refine-info
 from utils import *
 from pprint import pprint
 #* Reload gzipped CSV file to continue work
 dbdf_1 = pd.read_csv('dbdf.csv.gz', compression='gzip')
 #
 # (old) Consistency check:
 # dbdf_1.columns == dbdf.columns
 # dbmask = (dbdf_1 != dbdf)
 # dbdf_1[dbmask]
 # dbdf_1[dbmask].dropna(how='all')
 # ctime_raw is different in places, so don't use it.
 # 
 #* Interact with/visualize the dataframe
 # Using pandasgui -- qt
 from pandasgui import show
 os.environ['APPDATA'] = "needed-for-pandasgui"
 show(dbdf_1)
 # Using dtale -- web
 import dtale
 dtale.show(dbdf_1)
 # 
 #
 #* Collect metadata from DB zip files
 #
 #** A manual sample
 #
 d = dbdf_1
 left_index = 0
 d.path[0]
 cqlc, metac = extract_metadata(d.path[0])
 cqlc['baselineLinesOfCode']
 cqlc['primaryLanguage']
 cqlc['creationMetadata']['sha']
 cqlc['creationMetadata']['cliVersion']
 cqlc['creationMetadata']['creationTime'].isoformat()
 cqlc['finalised']
 for lang, lang_cont in metac['languages'].items():
    print(lang)
    indent = "    "
    for prop, val in lang_cont.items():
        if prop == 'files':
            print("%sfiles count %d" % (indent, len(val)))
        elif prop == 'linesOfCode':
            print("%slinesOfCode %d" % (indent, val))
        elif prop == 'displayName':
            print("%sdisplayName %s" % (indent, val))
 #** Automated for all entries
 # The rest of this interactive script is available as cli script in
 # mc-db-refine-info
 d = dbdf_1
 joiners = []
 for left_index in range(0, len(d)-1):
    try:
        cqlc, metac = extract_metadata(d.path[left_index])
    except ExtractNotZipfile:
        continue
    except ExtractNoCQLDB:
        continue
    try:
        detail_df = metadata_details(left_index, cqlc, metac)
    except DetailsMissing:
        continue
    joiners.append(detail_df)
 joiners_df = pd.concat(joiners, axis=0)
 full_df = pd.merge(d, joiners_df, left_index=True, right_on='left_index', how='outer')    
 #** View the full dataframe with metadata
 from pandasgui import show
 os.environ['APPDATA'] = "needed-for-pandasgui"
 show(full_df)
 #** Re-order the dataframe columns by importance
 # - Much of the data
 #   1. Is only conditionally present
 #   2. Is extra info, not for the DB proper
 #   3. May have various names
 # - The essential columns are
 #     | owner               |
 #     | name                |
 #     | language            |
 #     | size                |
 #     | cliVersion          |
 #     | creationTime        |
 #     | sha                 |
 #     | baselineLinesOfCode |
 #     | path                |
 # - The rest are useful; put them last
 #     | db_lang             |
 #     | db_lang_displayName |
 #     | db_lang_file_count  |
 #     | db_lang_linesOfCode |
 #     | left_index          |
 #     | ctime               |
 #     | primaryLanguage     |
 #     | finalised           |
 final_df = full_df.reindex(columns=['owner', 'name', 'language', 'size', 'cliVersion',
 	                                'creationTime', 'sha', 'baselineLinesOfCode', 'path',
 	                                'db_lang', 'db_lang_displayName', 'db_lang_file_count',
 	                                'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
 	                                'finalised', 'left_index'])
 final_df.to_csv('all-info-table.csv.gz', compression='gzip', index=False)
 # 
 # Local Variables:
 # python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
 # End:
 # 
--- a/client/qldbtools/session/db-unique-1.py
+++ b/client/qldbtools/session/db-unique-1.py
@@ -0,0 +1,41 @@
 # Experimental work for ../bin/mc-db-unique, to be merged into it.
 import qldbtools.utils as utils
 from pprint import pprint
 import pandas as pd
 # cd ../
 #* Reload CSV file to continue work
 df2 = df_refined = pd.read_csv('scratch/db-info-2.csv')
 # Identify rows missing specific entries
 rows = ( df2['cliVersion'].isna() | 
         df2['creationTime'].isna() |
         df2['language'].isna() |
         df2['sha'].isna() )
 df2[rows]
 df3 = df2[~rows]
 df3
 #* post-save work
 df4 = pd.read_csv('scratch/db-info-3.csv')
 # Sort and group
 df_sorted = df4.sort_values(by=['owner', 'name', 'CID', 'creationTime'])
 df_unique = df_sorted.groupby(['owner', 'name', 'CID']).first().reset_index()
 # Find duplicates
 df_dups = df_unique[df_unique['CID'].duplicated(keep=False)]
 len(df_dups)
 df_dups['CID']
 # Set display options
 pd.set_option('display.max_colwidth', None)
 pd.set_option('display.max_columns', None)
 pd.set_option('display.width', 140)
 # 
 # Local Variables:
 # python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
 # End:
 # 
--- a/client/qldbtools/session/db-unique.py
+++ b/client/qldbtools/session/db-unique.py
@@ -0,0 +1,46 @@
 # Session around bin/mc-db-unique
 import qldbtools.utils as utils
 import pandas as pd
 #
 #* Collect the information
 #
 df1 = pd.read_csv("scratch/db-info-2.csv")
 # Add single uniqueness field -- CID (Cumulative ID) -- using
 # - creationTime
 # - sha
 # - cliVersion
 # - language
 from hashlib import blake2b
 def cid_hash(row_tuple: tuple):
    """
        cid_hash(row_tuple)
    Take a bytes object and return hash as hex string
    """
    h = blake2b(digest_size = 3)
    h.update(str(row_tuple).encode())
    # return int.from_bytes(h.digest(), byteorder='big')
    return h.hexdigest()
 # Apply the cid_hash function to the specified columns and create the 'CID' column
 df1['CID'] = df1.apply(lambda row: cid_hash( (row['creationTime'],
                                              row['sha'], 
                                              row['cliVersion'], 
                                              row['language'])
                                            ), axis=1)
 df2 = df1.reindex(columns=['owner', 'name', 'cliVersion', 'creationTime',
 	                       'language', 'sha','CID', 'baselineLinesOfCode', 'path',
 	                       'db_lang', 'db_lang_displayName', 'db_lang_file_count',
 	                       'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
 	                       'finalised', 'left_index', 'size'])
 df1['cid']
 # Local Variables:
 # python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
 # End:
--- a/client/qldbtools/setup.py
+++ b/client/qldbtools/setup.py
@@ -0,0 +1,13 @@
 from setuptools import setup, find_packages
 import glob
 setup(
    name='qldbtools',
    version='0.1.0',
    description='A Python package for working with CodeQL databases',
    author='Michael Hohn',
    author_email='hohn@github.com',
    packages=['qldbtools'],
    install_requires=[],
    scripts=glob.glob("bin/mc-*"),
 )
--- a/cmd/agent/Dockerfile
+++ b/cmd/agent/Dockerfile
@@ -23,7 +23,8 @@ ARG CODEQL_VERSION=latest
 RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
    unzip \
    curl \
-    ca-certificates
+    ca-certificates \
    default-jdk
 # If the version is 'latest', lsget the latest release version from GitHub, unzip the bundle into /opt, and delete the archive
 RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
@@ -32,18 +33,19 @@ RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
    echo "Using CodeQL version $CODEQL_VERSION" && \
    curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
    unzip /tmp/codeql.zip -d /opt && \
-    rm /tmp/codeql.zip
+    rm /tmp/codeql.zip && \
    chmod -R +x /opt/codeql
 # Set environment variables for CodeQL
-ENV CODEQL_CLI_PATH=/opt/codeql
+ENV CODEQL_CLI_PATH=/opt/codeql/codeql
 # Set environment variable for CodeQL for `codeql database analyze` support on ARM
 # This env var has no functional effect on CodeQL when running on x86_64 linux
-ENV CODEQL_JAVA_HOME=/usr/
+ENV CODEQL_JAVA_HOME=/usr
 # Copy built agent binary from the builder stage
 WORKDIR /app
 COPY --from=builder /bin/mrva_agent ./mrva_agent
 # Run the agent
-ENTRYPOINT ["./mrva_agent"]
+ENTRYPOINT ["./mrva_agent"]
--- a/cmd/agent/Makefile
+++ b/cmd/agent/Makefile
@@ -0,0 +1,23 @@
 all: mrva-agent
 MAI_TARGET := mrva-agent:0.1.24
 mai: mk.mrva-agent
 mrva-agent: mk.mrva-agent
 mk.mrva-agent:
 	cd ../../ && docker build -t mrva-agent:0.1.24 -f cmd/agent/Dockerfile .
 	touch $@
 mai-serve: mai
 	docker run --rm -it ${MAI_TARGET} /bin/bash
 clean:
 	-docker rmi -f ${MAI_TARGET}
 	-rm mrva-agent
 mai-push: mk.mai-push
 mk.mai-push: mai
 	docker tag ${MAI_TARGET} ghcr.io/hohn/${MAI_TARGET}
 	docker push ghcr.io/hohn/${MAI_TARGET}
 	touch $@
--- a/cmd/agent/main.go
+++ b/cmd/agent/main.go
@@ -3,170 +3,71 @@ package main
 import (
 	"context"
 	"flag"
 	"log"
 	"log/slog"
 	"os"
 	"os/signal"
 	"runtime"
 	"strconv"
 	"sync"
 	"syscall"
 	"time"
-	"github.com/elastic/go-sysinfo"
+	"github.com/hohn/mrvacommander/pkg/agent"
-	"golang.org/x/exp/slog"
+	"github.com/hohn/mrvacommander/pkg/deploy"
 	"mrvacommander/pkg/agent"
 	"mrvacommander/pkg/queue"
 )
 const (
 	workerMemoryMB     = 2048 // 2 GB
 	monitorIntervalSec = 10   // Monitor every 10 seconds
 )
 func calculateWorkers() int {
 	host, err := sysinfo.Host()
 	if err != nil {
 		slog.Error("failed to get host info", "error", err)
 		os.Exit(1)
 	}
 	memInfo, err := host.Memory()
 	if err != nil {
 		slog.Error("failed to get memory info", "error", err)
 		os.Exit(1)
 	}
 	// Get available memory in MB
 	totalMemoryMB := memInfo.Available / (1024 * 1024)
 	// Ensure we have at least one worker
 	workers := int(totalMemoryMB / workerMemoryMB)
 	if workers < 1 {
 		workers = 1
 	}
 	// Limit the number of workers to the number of CPUs
 	cpuCount := runtime.NumCPU()
 	if workers > cpuCount {
 		workers = max(cpuCount, 1)
 	}
 	return workers
 }
 func startAndMonitorWorkers(ctx context.Context, queue queue.Queue, desiredWorkerCount int, wg *sync.WaitGroup) {
 	currentWorkerCount := 0
 	stopChans := make([]chan struct{}, 0)
 	if desiredWorkerCount != 0 {
 		slog.Info("Starting workers", slog.Int("count", desiredWorkerCount))
 		for i := 0; i < desiredWorkerCount; i++ {
 			stopChan := make(chan struct{})
 			stopChans = append(stopChans, stopChan)
 			wg.Add(1)
 			go agent.RunWorker(ctx, stopChan, queue, wg)
 		}
 		return
 	}
 	slog.Info("Worker count not specified, managing based on available memory and CPU")
 	for {
 		select {
 		case <-ctx.Done():
 			// signal all workers to stop
 			for _, stopChan := range stopChans {
 				close(stopChan)
 			}
 			return
 		default:
 			newWorkerCount := calculateWorkers()
 			if newWorkerCount != currentWorkerCount {
 				slog.Info(
 					"Modifying worker count",
 					slog.Int("current", currentWorkerCount),
 					slog.Int("new", newWorkerCount))
 			}
 			if newWorkerCount > currentWorkerCount {
 				for i := currentWorkerCount; i < newWorkerCount; i++ {
 					stopChan := make(chan struct{})
 					stopChans = append(stopChans, stopChan)
 					wg.Add(1)
 					go agent.RunWorker(ctx, stopChan, queue, wg)
 				}
 			} else if newWorkerCount < currentWorkerCount {
 				for i := newWorkerCount; i < currentWorkerCount; i++ {
 					close(stopChans[i])
 				}
 				stopChans = stopChans[:newWorkerCount]
 			}
 			currentWorkerCount = newWorkerCount
 			time.Sleep(monitorIntervalSec * time.Second)
 		}
 	}
 }
 func main() {
 	slog.Info("Starting agent")
 	workerCount := flag.Int("workers", 0, "number of workers")
 	logLevel := flag.String("loglevel", "info", "Set log level: debug, info, warn, error")
 	flag.Parse()
-	requiredEnvVars := []string{
+	// Apply 'loglevel' flag
-		"MRVA_RABBITMQ_HOST",
+	switch *logLevel {
-		"MRVA_RABBITMQ_PORT",
+	case "debug":
-		"MRVA_RABBITMQ_USER",
+		slog.SetLogLoggerLevel(slog.LevelDebug)
-		"MRVA_RABBITMQ_PASSWORD",
+	case "info":
-		"CODEQL_JAVA_HOME",
+		slog.SetLogLoggerLevel(slog.LevelInfo)
-		"CODEQL_CLI_PATH",
+	case "warn":
-	}
+		slog.SetLogLoggerLevel(slog.LevelWarn)
-
+	case "error":
-	for _, envVar := range requiredEnvVars {
+		slog.SetLogLoggerLevel(slog.LevelError)
-		if _, ok := os.LookupEnv(envVar); !ok {
+	default:
-			slog.Error("Missing required environment variable", "key", envVar)
+		log.Printf("Invalid logging verbosity level: %s", *logLevel)
 			os.Exit(1)
 		}
 	}
 	rmqHost := os.Getenv("MRVA_RABBITMQ_HOST")
 	rmqPort := os.Getenv("MRVA_RABBITMQ_PORT")
 	rmqUser := os.Getenv("MRVA_RABBITMQ_USER")
 	rmqPass := os.Getenv("MRVA_RABBITMQ_PASSWORD")
 	rmqPortAsInt, err := strconv.ParseInt(rmqPort, 10, 16)
 	if err != nil {
 		slog.Error("Failed to parse RabbitMQ port", slog.Any("error", err))
 		os.Exit(1)
 	}
-	slog.Info("Initializing RabbitMQ queue")
+	isAgent := true
-	rabbitMQQueue, err := queue.InitializeRabbitMQQueue(rmqHost, int16(rmqPortAsInt), rmqUser, rmqPass, false)
+	rabbitMQQueue, err := deploy.InitRabbitMQ(isAgent)
 	if err != nil {
-		slog.Error("failed to initialize RabbitMQ", slog.Any("error", err))
+		slog.Error("Failed to initialize RabbitMQ", slog.Any("error", err))
 		os.Exit(1)
 	}
 	defer rabbitMQQueue.Close()
 	artifacts, err := deploy.InitMinIOArtifactStore()
 	if err != nil {
 		slog.Error("Failed to initialize artifact store", slog.Any("error", err))
 		os.Exit(1)
 	}
 	databases, err := deploy.InitMinIOCodeQLDatabaseStore()
 	if err != nil {
 		slog.Error("Failed to initialize database store", slog.Any("error", err))
 		os.Exit(1)
 	}
 	var wg sync.WaitGroup
 	ctx, cancel := context.WithCancel(context.Background())
-
+	go agent.StartAndMonitorWorkers(ctx, artifacts, databases, rabbitMQQueue, *workerCount, &wg)
 	go startAndMonitorWorkers(ctx, rabbitMQQueue, *workerCount, &wg)
 	slog.Info("Agent started")
 	// Gracefully exit on SIGINT/SIGTERM
 	sigChan := make(chan os.Signal, 1)
 	signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
 	<-sigChan
 	slog.Info("Shutting down agent")
-	// TODO: fix this to gracefully terminate agent workers during jobs
+	slog.Info("Shutting down agent")
 	cancel()
 	wg.Wait()
 	slog.Info("Agent shutdown complete")
 }
--- a/cmd/server/Dockerfile
+++ b/cmd/server/Dockerfile
@@ -1,38 +1,56 @@
-# Use the ubuntu 22.04 base image
+FROM golang:1.22 AS builder
 FROM ubuntu:24.10
-# Set architecture to arm64
+# Copy the entire project
-ARG ARCH=arm64
+WORKDIR /app
-ARG AARCH=aarch64
+COPY . .
-# Set environment variables
+# Download dependencies
 RUN go mod download
 # Set the working directory to the cmd/server subproject
 WORKDIR /app/cmd/server
 # Build the server
 RUN go build -o /bin/mrva_server ./main.go
 FROM ubuntu:24.10 as runner
 ENV DEBIAN_FRONTEND=noninteractive
 ENV CODEQL_VERSION=codeql-bundle-v2.17.5
 ENV CODEQL_DOWNLOAD_URL=https://github.com/github/codeql-action/releases/download/${CODEQL_VERSION}/codeql-bundle-linux64.tar.gz
 ENV JDK_VERSION=22.0.1
 ENV JDK_DOWNLOAD_URL=https://download.oracle.com/java/21/latest/jdk-${JDK_VERSION}_linux-${AARCH}_bin.tar.gz
 ENV JDK_DOWNLOAD_URL=https://download.java.net/java/GA/jdk${JDK_VERSION}/c7ec1332f7bb44aeba2eb341ae18aca4/8/GPL/openjdk-${JDK_VERSION}_linux-${AARCH}_bin.tar.gz
-ENV CODEQL_JAVA_HOME=/usr/local/jdk-${JDK_VERSION}
+# Build argument for CodeQL version, defaulting to the latest release
 ARG CODEQL_VERSION=latest
-# Install necessary tools
+# Install packages
-RUN apt-get update && \
+RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
-    apt-get install -y curl tar && \
+    unzip \
-    apt-get clean && \
+    curl \
-    rm -rf /var/lib/apt/lists/*
+    ca-certificates \
    default-jdk
-# Add and extract the CodeQL bundle
+# If the version is 'latest', lsget the latest release version from GitHub, unzip the bundle into /opt, and delete the archive
-RUN curl -L $CODEQL_DOWNLOAD_URL -o /tmp/${CODEQL_VERSION}.tar.gz && \
+RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
-    tar -xzf /tmp/${CODEQL_VERSION}.tar.gz -C /opt && \
+    CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
-    rm /tmp/${CODEQL_VERSION}.tar.gz
+    fi && \
    echo "Using CodeQL version $CODEQL_VERSION" && \
    curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
    unzip /tmp/codeql.zip -d /opt && \
    rm /tmp/codeql.zip && \
    chmod -R +x /opt/codeql
-# Add and extract the JDK
+# Set environment variables for CodeQL
-RUN curl -L $JDK_DOWNLOAD_URL -o /tmp/jdk-${JDK_VERSION}.tar.gz && \
+ENV CODEQL_CLI_PATH=/opt/codeql/codeql
    tar -xzf /tmp/jdk-${JDK_VERSION}.tar.gz -C /usr/local && \
    rm /tmp/jdk-${JDK_VERSION}.tar.gz
-# Set PATH
+# Set environment variable for CodeQL for `codeql database analyze` support on ARM
-ENV PATH=/opt/codeql:"$PATH"
+# This env var has no functional effect on CodeQL when running on x86_64 linux
 ENV CODEQL_JAVA_HOME=/usr
-# Prepare host mount point
+# Set working directory to /app
-RUN mkdir /mrva
+
 # Copy built server binary from the builder stage
 COPY --from=builder /bin/mrva_server ./mrva_server
 # Copy the CodeQL database directory from the builder stage (for standalone mode)
 COPY --from=builder /app/cmd/server/codeql ./codeql
 # Run the server with the default mode set to container
 ENTRYPOINT ["./mrva_server"]
 CMD ["--mode=container"]
--- a/cmd/server/Makefile
+++ b/cmd/server/Makefile
@@ -0,0 +1,26 @@
 all: mrva-server
 MSI_TARGET := mrva-server:0.1.24
 msi: mk.mrva-server
 mrva-server: mk.mrva-server
 mk.mrva-server:
 	cd ../../ && docker build -t mrva-server:0.1.24 -f cmd/server/Dockerfile .
 	touch $@
 msi-serve: msi
 	docker run --rm -it ${MSI_TARGET} /bin/bash
 clean:
 	-docker rmi -f ${MSI_TARGET}
 	-rm mrva-server
 msi-push: mk.msi-push
 mk.msi-push: mk.mrva-server
 	docker tag ${MSI_TARGET} ghcr.io/hohn/${MSI_TARGET}
 	docker push ghcr.io/hohn/${MSI_TARGET}
 	touch $@
 msi-test:
 	docker pull ghcr.io/hohn/${MSI_TARGET}
 	docker run --rm -it --name test-mrva-server-codeql ghcr.io/hohn/${MSI_TARGET} sh
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -4,20 +4,25 @@
 package main
 import (
 	"context"
 	"flag"
 	"log"
 	"log/slog"
 	"os"
 	"os/signal"
 	"path/filepath"
 	"sync"
 	"syscall"
-	"mrvacommander/config/mcc"
+	"github.com/hohn/mrvacommander/config/mcc"
-	"mrvacommander/pkg/agent"
+	"github.com/hohn/mrvacommander/pkg/agent"
-	"mrvacommander/pkg/logger"
+	"github.com/hohn/mrvacommander/pkg/artifactstore"
-	"mrvacommander/pkg/qldbstore"
+	"github.com/hohn/mrvacommander/pkg/deploy"
-	"mrvacommander/pkg/qpstore"
+	"github.com/hohn/mrvacommander/pkg/qldbstore"
-	"mrvacommander/pkg/queue"
+	"github.com/hohn/mrvacommander/pkg/queue"
-	"mrvacommander/pkg/server"
+	"github.com/hohn/mrvacommander/pkg/server"
-	"mrvacommander/pkg/storage"
+	"github.com/hohn/mrvacommander/pkg/state"
 )
 func main() {
@@ -25,13 +30,14 @@ func main() {
 	helpFlag := flag.Bool("help", false, "Display help message")
 	logLevel := flag.String("loglevel", "info", "Set log level: debug, info, warn, error")
 	mode := flag.String("mode", "standalone", "Set mode: standalone, container, cluster")
 	dbPathRoot := flag.String("dbpath", "", "Set the root path for the database store if using standalone mode.")
 	// Custom usage function for the help flag
 	flag.Usage = func() {
 		log.Printf("Usage of %s:\n", os.Args[0])
 		flag.PrintDefaults()
 		log.Println("\nExamples:")
-		log.Println("  go run main.go --loglevel=debug --mode=container")
+		log.Println("go run main.go --loglevel=debug --mode=container --dbpath=/path/to/db_dir")
 	}
 	// Parse the flags
@@ -58,6 +64,20 @@ func main() {
 		os.Exit(1)
 	}
 	// Process database root if standalone and not provided
 	if *mode == "standalone" && *dbPathRoot == "" {
 		slog.Warn("No database root path provided.")
 		// Current directory of the Executable has a codeql directory. There.
 		// Resolve the absolute directory based on os.Executable()
 		execPath, err := os.Executable()
 		if err != nil {
 			slog.Error("Failed to get executable path", slog.Any("error", err))
 			os.Exit(1)
 		}
 		*dbPathRoot = filepath.Dir(execPath) + "/codeql/dbs/"
 		slog.Info("Using default database root path", "dbPathRoot", *dbPathRoot)
 	}
 	// Read configuration
 	config := mcc.LoadConfig("mcconfig.toml")
@@ -66,91 +86,73 @@ func main() {
 	log.Printf("Log Level: %s\n", *logLevel)
 	log.Printf("Mode: %s\n", *mode)
 	// Handle signals
 	sigChan := make(chan os.Signal, 1)
 	signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
 	// Apply 'mode' flag
 	switch *mode {
 	case "standalone":
 		// Assemble single-process version
-
+		sq := queue.NewQueueSingle(2)
-		sl := logger.NewLoggerSingle(&logger.Visibles{})
+		ss := state.NewLocalState(config.Storage.StartingID)
-
+		as := artifactstore.NewInMemoryArtifactStore()
-		// FIXME take value from configuration
+		ql := qldbstore.NewLocalFilesystemCodeQLDatabaseStore(*dbPathRoot)
 		sq := queue.NewQueueSingle(2, &queue.Visibles{
 			Logger: sl,
 		})
 		ss := storage.NewStorageSingle(config.Storage.StartingID, &storage.Visibles{})
 		qp, err := qpstore.NewStore(&qpstore.Visibles{})
 		if err != nil {
 			slog.Error("Unable to initialize query pack storage")
 			os.Exit(1)
 		}
 		ql, err := qldbstore.NewStore(&qldbstore.Visibles{})
 		if err != nil {
 			slog.Error("Unable to initialize ql database storage")
 			os.Exit(1)
 		}
 		server.NewCommanderSingle(&server.Visibles{
-			Logger:         sl,
+			Queue:         sq,
-			Queue:          sq,
+			State:         ss,
-			ServerStore:    ss,
+			Artifacts:     as,
-			QueryPackStore: qp,
+			CodeQLDBStore: ql,
 			QLDBStore:      ql,
 		})
-		// FIXME take value from configuration
+		var wg sync.WaitGroup
-		agent.NewAgentSingle(2, &agent.Visibles{
+		ctx, cancel := context.WithCancel(context.Background())
-			Logger:         sl,
+
-			Queue:          sq,
+		go agent.StartAndMonitorWorkers(ctx, as, ql, sq, 2, &wg)
-			QueryPackStore: qp,
+
-			QLDBStore:      ql,
+		slog.Info("Started server and standalone agent")
-		})
+		<-sigChan
 		slog.Info("Shutting down...")
 		cancel()
 		wg.Wait()
 		slog.Info("Agent shutdown complete")
 	case "container":
-		// Assemble container version
+		isAgent := false
 		sl := logger.NewLoggerSingle(&logger.Visibles{})
-		// FIXME take value from configuration
+		rabbitMQQueue, err := deploy.InitRabbitMQ(isAgent)
 		sq := queue.NewQueueSingle(2, &queue.Visibles{
 			Logger: sl,
 		})
 		ss := storage.NewStorageSingle(config.Storage.StartingID, &storage.Visibles{})
 		qp, err := qpstore.NewStore(&qpstore.Visibles{})
 		if err != nil {
-			slog.Error("Unable to initialize query pack storage")
+			slog.Error("Failed to initialize RabbitMQ", slog.Any("error", err))
 			os.Exit(1)
 		}
 		defer rabbitMQQueue.Close()
 		artifacts, err := deploy.InitMinIOArtifactStore()
 		if err != nil {
 			slog.Error("Failed to initialize artifact store", slog.Any("error", err))
 			os.Exit(1)
 		}
-		ql, err := qldbstore.NewStore(&qldbstore.Visibles{})
+		databases, err := deploy.InitMinIOCodeQLDatabaseStore()
 		if err != nil {
-			slog.Error("Unable to initialize ql database storage")
+			slog.Error("Failed to initialize database store", slog.Any("error", err))
 			os.Exit(1)
 		}
 		agent.NewAgentSingle(2, &agent.Visibles{
 			Logger:         sl,
 			Queue:          sq,
 			QueryPackStore: qp,
 			QLDBStore:      ql,
 		})
 		server.NewCommanderSingle(&server.Visibles{
-			Logger:         sl,
+			Queue:         rabbitMQQueue,
-			Queue:          sq,
+			State:         state.NewLocalState(config.Storage.StartingID),
-			ServerStore:    ss,
+			Artifacts:     artifacts,
-			QueryPackStore: qp,
+			CodeQLDBStore: databases,
 			QLDBStore:      ql,
 		})
-	case "cluster":
+		slog.Info("Started server in container mode.")
-		// Assemble cluster version
+		<-sigChan
 	default:
-		slog.Error("Invalid value for --mode. Allowed values are: standalone, container, cluster\n")
+		slog.Error("Invalid value for --mode. Allowed values are: standalone, container, cluster")
 		os.Exit(1)
 	}
 	slog.Info("Server shutdown complete")
 }
--- a/config/mcc/system.go
+++ b/config/mcc/system.go
@@ -17,15 +17,15 @@ type System struct {
 func LoadConfig(fname string) *System {
 	if _, err := os.Stat(fname); err != nil {
-		slog.Error("Configuration file %s not found", fname)
+		slog.Warn("Configuration file not found", "name", fname)
-		os.Exit(1)
+		return &System{}
 	}
 	var config System
 	_, err := toml.DecodeFile(fname, &config)
 	if err != nil {
-		slog.Error("", err)
+		slog.Error("Error decoding configuration file", err)
 		os.Exit(1)
 	}
--- a/demo/containers/dbsdata/Dockerfile
+++ b/demo/containers/dbsdata/Dockerfile
@@ -0,0 +1,7 @@
 # Use a minimal base image
 FROM busybox
 ADD dbsdata_backup.tar /
 # Just run sh if this container is ever started
 CMD ["sh"]
--- a/demo/containers/dbsdata/README.org
+++ b/demo/containers/dbsdata/README.org
@@ -0,0 +1,70 @@
 * MRVA cli tools container
  Set up / run:
  #+BEGIN_SRC sh 
    # Run the raw container assembly
    cd ~/work-gh/mrva/mrvacommander/
    docker-compose -f docker-compose-demo-build.yml up -d
    # Use the following commands to populate the mrvacommander database storage
    cd ~/work-gh/mrva/mrvacommander/client/qldbtools 
    mkdir -p scratch 
    source venv/bin/activate
    ./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > scratch/db-info-1.csv
    ./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
    ./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
    ./bin/mc-db-generate-selection -n 11 \
                                   scratch/vscode-selection.json \
                                   scratch/gh-mrva-selection.json \
                                   < scratch/db-info-3.csv 
    # Several seconds start-up time; fast db population
    ./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
    # While the containers are running, this will show minio's storage.  The zip files
    # are split into part.* and xl.meta by minio.  Use the web interface to see real
    # names. 
    docker exec dbstore ls -R /data/mrvacommander/
    # Open browser to see the file listing
    open http://localhost:9001/browser/qldb
    # list the volumes
    docker volume ls |grep dbs
    docker volume inspect mrvacommander_dbsdata
    # Persist volume using container
    cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata
    # Note: use mrvacommander_dbsdata, not mrvacommander-dbsdata
    # Get the data as tar file from the image
    docker run --rm \
           -v mrvacommander_dbsdata:/data \
           -v $(pwd):/backup \
           busybox sh -c "tar cvf /backup/dbsdata_backup.tar  ."
    # Build container with the tarball
    cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata
    docker build -t dbsdata-container:0.1.24 .
    docker image ls | grep dbs
    # check container contents
    docker run -it dbsdata-container:0.1.24 /bin/sh
    docker run -it dbsdata-container:0.1.24 ls data/qldb
    # Tag the dbstore backing container
    docker inspect dbsdata-container:0.1.24 |grep Id
    docker tag dbsdata-container:0.1.24 ghcr.io/hohn/dbsdata-container:0.1.24
    # Push the pre-populated image
    docker push ghcr.io/hohn/dbsdata-container:0.1.24
    # Check the tagged image
    docker run -it ghcr.io/hohn/dbsdata-container:0.1.24 \
           ls data/qldb
    # Shut down the container assembly
    docker-compose -f docker-compose-demo-build.yml down
  #+END_SRC
--- a/doc/README.md
+++ b/doc/README.md
@@ -0,0 +1,11 @@
 ## The doc/ directory
   The `doc/` directory serves as home for documentation.  This is the place to
   put refined documentation after it has gone through `notes/`.  The contents of
   this directory should be accessible to a broad audience including prospective
   users, active users, and developers.  Highly technical 
   1.  The note authors and
   2.  Developers of the project
   It need not be meaningful to casual users.
--- a/docker-compose-demo-build.yml
+++ b/docker-compose-demo-build.yml
@@ -0,0 +1,129 @@
 # This is the compose configuration used to build / prepopulate the containers for
 # a demo.
 services:
    dbssvc:
        ## image: ghcr.io/hohn/dbsdata-container:0.1.24
        build:
            context: .
            dockerfile: ./demo/containers/dbsdata/Dockerfile
        container_name: dbssvc
        volumes:
            - dbsdata:/data/mrvacommander/dbstore-data
        networks:
            - backend
    dbstore:
        image: minio/minio:RELEASE.2024-06-11T03-13-30Z
        container_name: dbstore
        ports:
            - "9000:9000"
            - "9001:9001"
        env_file:
            - path: .env.container
              required: true
        command: server /data/mrvacommander/dbstore-data --console-address ":9001"
        depends_on:
            - dbssvc
        volumes:
            - dbsdata:/data/mrvacommander/dbstore-data
        networks:
            - backend
    client-ghmrva:
        ## image: ghcr.io/hohn/client-ghmrva-container:0.1.24 
        build:
            context: .
            dockerfile: ./client/containers/ghmrva/Dockerfile
        network_mode: "service:server" # Share the 'server' network namespace
        environment:
            - SERVER_URL=http://localhost:8080  # 'localhost' now refers to 'server'
    code-server:
        ## image: ghcr.io/hohn/code-server-initialized:0.1.24
        build:
            context: ./client/containers/vscode
            dockerfile: Dockerfile
        ports:
            - "9080:9080"
        environment:
            - PASSWORD=mrva
    rabbitmq:
        image: rabbitmq:3-management
        hostname: rabbitmq
        container_name: rabbitmq
        volumes:
            - ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
            - ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
        ports:
            - "5672:5672"
            - "15672:15672"
        healthcheck:
            test: rabbitmq-diagnostics check_port_connectivity
            interval: 30s
            timeout: 30s
            retries: 10
        networks:
            - backend
    server:
        build:
            context: .
            dockerfile: ./cmd/server/Dockerfile
        command: [ '--mode=container', '--loglevel=debug' ]
        container_name: server
        stop_grace_period: 1s
        ports:
            # - "8081:8080" # host:container for proxy
            - "8080:8080" # host:container
        depends_on:
            - rabbitmq
            - dbstore
            - artifactstore
        env_file:
            - path: ./.env.container
              required: true
        networks:
            - backend
    artifactstore:
        image: minio/minio:RELEASE.2024-06-11T03-13-30Z
        container_name: artifactstore
        ports:
            - "19000:9000" # host:container
            - "19001:9001"
        env_file:
            - path: ./.env.container
              required: true
        command: server /data --console-address ":9001"
        volumes:
            # The artifactstore is only populated at runtime so there is no need
            # for Docker storage; a directory is fine.
            - ./qpstore-data:/data
        networks:
            - backend
    agent:
        ## image: ghcr.io/hohn/mrva-agent:0.1.24
        build:
          context: .
          dockerfile: ./cmd/agent/Dockerfile
        command: [ '--loglevel=debug' ]
        container_name: agent
        depends_on:
            - rabbitmq
            - dbstore
            - artifactstore
        env_file:
            - path: ./.env.container
              required: true
        networks:
            - backend
 networks:
    backend:
        driver: bridge
 volumes:
  dbsdata:
--- a/docker-compose-demo.yml
+++ b/docker-compose-demo.yml
@@ -0,0 +1,116 @@
 services:
    dbssvc:
        # dbsdata-container:0.1.24
        image: ghcr.io/hohn/dbsdata-container:0.1.24
        command: tail -f /dev/null # Keep the container running            
        # volumes:
        #     - /qldb  # Directory inside the container that contains the data
        volumes:
            - dbsdata:/data
        container_name: dbssvc
        networks:
            - backend
    dbstore:
        image: minio/minio:RELEASE.2024-06-11T03-13-30Z
        container_name: dbstore
        ports:
            - "9000:9000"
            - "9001:9001"
        env_file:
            - path: .env.container
              required: true
        command: server /data/mrvacommander/dbstore-data --console-address ":9001"
        depends_on:
            - dbssvc            
        # volumes_from:
        #     - dbsdata  # Use the volumes from dbsdata container
        volumes:
            - dbsdata:/data/mrvacommander/dbstore-data
        networks:
            - backend
    client-ghmrva:
        image: ghcr.io/hohn/client-ghmrva-container:0.1.24 
        network_mode: "service:server" # Share the 'server' network namespace
        environment:
            - SERVER_URL=http://localhost:8080  # 'localhost' now refers to 'server'
    code-server:
        image: ghcr.io/hohn/code-server-initialized:0.1.24
        ports:
            - "9080:9080"
        # XX: Include codeql binary in code-server (if it's not there already)
        environment:
            - PASSWORD=mrva
    rabbitmq:
        image: rabbitmq:3-management
        hostname: rabbitmq
        container_name: rabbitmq
        volumes:
            - ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
            - ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
        ports:
            - "5672:5672"
            - "15672:15672"
        healthcheck:
          test: rabbitmq-diagnostics check_port_connectivity
          interval: 30s
          timeout: 30s
          retries: 10
        networks:
            - backend
    server:
        image: ghcr.io/hohn/mrva-server:0.1.24
        command: [ '--mode=container', '--loglevel=debug' ]
        container_name: server
        stop_grace_period: 1s
        depends_on:
            - rabbitmq
            - dbstore
            - artifactstore
        env_file:
            - path: ./.env.container
              required: true
        networks:
            - backend
    artifactstore:
        image: minio/minio:RELEASE.2024-06-11T03-13-30Z
        container_name: artifactstore
        ports:
            - "19000:9000" # host:container
            - "19001:9001"
        env_file:
            - path: ./.env.container
              required: true
        command: server /data --console-address ":9001"
        volumes:
            # The artifactstore is only populated at runtime so there is no need
            # for Docker storage; a directory is fine.
            - ./qpstore-data:/data
        networks:
            - backend
    agent:
        image: ghcr.io/hohn/mrva-agent:0.1.24
        command: [ '--loglevel=debug' ]
        container_name: agent
        depends_on:
            - rabbitmq
            - dbstore
            - artifactstore
        env_file:
            - path: ./.env.container
              required: true
        networks:
            - backend
 networks:
    backend:
        driver: bridge
 volumes:
    dbsdata:
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -7,37 +7,36 @@ services:
    volumes:
      - ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
      - ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
    expose:
      - "5672"
      - "15672"
    ports:
      - "5672:5672"
      - "15672:15672"
    networks:
      - backend
    healthcheck:
-      test: [ "CMD", "nc", "-z", "localhost", "5672" ]
+      test: rabbitmq-diagnostics check_port_connectivity
-      interval: 5s
+      interval: 30s
-      timeout: 15s
+      timeout: 30s
-      retries: 1
+      retries: 10
  server:
    build:
-      context: ./cmd/server
+      context: .
-      dockerfile: Dockerfile
+      dockerfile: ./cmd/server/Dockerfile
    command: [ '--mode=container', '--loglevel=debug' ]
    container_name: server
-    stop_grace_period: 1s  # Reduce the timeout period for testing
+    stop_grace_period: 1s
    environment:
      - MRVA_SERVER_ROOT=/mrva/mrvacommander/cmd/server
    command: sh -c "tail -f /dev/null"
    ports:
-      - "8080:8080" 
+      # - "8081:8080" # host:container for proxy
-    volumes:
+      - "8080:8080" # host:container
      - ./:/mrva/mrvacommander
    depends_on:
      - rabbitmq
      - dbstore
      - artifactstore
    networks:
      - backend
    env_file:
      - path: ./.env.container
        required: true
  dbstore:
    image: minio/minio:RELEASE.2024-06-11T03-13-30Z
@@ -45,52 +44,46 @@ services:
    ports:
      - "9000:9000"
      - "9001:9001"
-    environment:
+    env_file:
-      MINIO_ROOT_USER: user
+      - path: .env.container
-      MINIO_ROOT_PASSWORD: mmusty8432
+        required: true
    command: server /data --console-address ":9001"
    volumes:
      - ./dbstore-data:/data
    networks:
      - backend
-
+  artifactstore:
  qpstore:
    image: minio/minio:RELEASE.2024-06-11T03-13-30Z
-    container_name: qpstore
+    container_name: artifactstore
    ports:
      - "19000:9000" # host:container
      - "19001:9001"
-    environment:
+    env_file:
-      MINIO_ROOT_USER: user
+      - path: ./.env.container
-      MINIO_ROOT_PASSWORD: mmusty8432
+        required: true
    command: server /data --console-address ":9001"
    volumes:
      - ./qpstore-data:/data
-      
+    networks:
      - backend
  agent:
    build:
      context: .
      dockerfile: ./cmd/agent/Dockerfile
    command: [ '--loglevel=debug' ]
    container_name: agent
    depends_on:
      - rabbitmq
-      - minio
+      - dbstore
-    environment:
+      - artifactstore
-      MRVA_RABBITMQ_HOST: rabbitmq
+    env_file:
-      MRVA_RABBITMQ_PORT: 5672
+      - path: ./.env.container
-      MRVA_RABBITMQ_USER: user
+        required: true
      MRVA_RABBITMQ_PASSWORD: password
    networks:
      - backend
 networks:
  backend:
    driver: bridge
 # Remove named volumes to use bind mounts
 # volumes:
 #   minio-data:
--- a/go.mod
+++ b/go.mod
@@ -1,4 +1,4 @@
-module mrvacommander
+module github.com/hohn/mrvacommander
 go 1.22.0
@@ -9,7 +9,7 @@ require (
 	github.com/gorilla/mux v1.8.1
 	github.com/minio/minio-go/v7 v7.0.71
 	github.com/rabbitmq/amqp091-go v1.10.0
-	golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8
+	golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f
 	gopkg.in/yaml.v3 v3.0.1
 	gorm.io/driver/postgres v1.5.9
 	gorm.io/gorm v1.25.10
@@ -35,7 +35,7 @@ require (
 	github.com/rs/xid v1.5.0 // indirect
 	golang.org/x/crypto v0.24.0 // indirect
 	golang.org/x/net v0.23.0 // indirect
-	golang.org/x/sync v0.7.0 // indirect
+	golang.org/x/sync v0.9.0 // indirect
 	golang.org/x/sys v0.21.0 // indirect
 	golang.org/x/text v0.16.0 // indirect
 	gopkg.in/ini.v1 v1.67.0 // indirect
--- a/go.sum
+++ b/go.sum
@@ -66,12 +66,12 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
 go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
 golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
 golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
-golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY=
+golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo=
-golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI=
+golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak=
 golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
 golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
-golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
+golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
-golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
--- a/mrvacommander.code-workspace
+++ b/mrvacommander.code-workspace
@@ -6,6 +6,8 @@
 	],
 	"settings": {
 		"sarif-viewer.connectToGithubCodeScanning": "off",
-		"codeQL.githubDatabase.download": "never"
+        "codeQL.githubDatabase.download": "never",
        "makefile.configureOnOpen": false,
        "git.ignoreLimitWarning": true
 	}
 }
--- a/notes/Makefile
+++ b/notes/Makefile
@@ -0,0 +1,5 @@
 view: docker-demo-container-deps.pdf
 	open $<
 docker-demo-container-deps.pdf: docker-demo-container-deps.dot
 	dot -Tpdf $<  > $@
--- a/notes/README.md
+++ b/notes/README.md
@@ -0,0 +1,9 @@
 ## The notes/ directory
   The `notes/` directory serves as staging directory for documentation.  This is
   the place to develop documentation and short notes.  The contents of this
   directory should be accessible to 
   1.  The note authors and
   2.  Developers of the project
   It need not be meaningful to casual users.
--- a/notes/cli-end-to-end-demo-build.org
+++ b/notes/cli-end-to-end-demo-build.org
@@ -0,0 +1,471 @@
 # -*- coding: utf-8 -*-
 #+OPTIONS: H:2 num:t \n:nil @:t ::t |:t ^:{} f:t *:t TeX:t LaTeX:t skip:nil p:nil
 * End-to-end example of CLI use
  This document describes the build steps for the demo containers.
 * Database Aquisition
  For this demo, the data is preloaded via container.  To set up the container
  #+BEGIN_SRC sh 
    # On host, run 
    docker exec -it dbstore /bin/bash
    # In the container
    ls -la /data/dbstore-data/
    ls  /data/dbstore-data/qldb/ | wc -l
  #+END_SRC
  Here we use a small sample of an example for open-source
  repositories, 23 in all.
 * Repository Selection
  When using all of the MRVA system, we select a small subset of repositories
  available to you in [[*Database Aquisition][Database Aquisition]].  For this demo we include a small
  collection -- 23 repositories -- and here we further narrow the selection to 12.
  The full list
  #+BEGIN_SRC text
    ls -1 /data/dbstore-data/qldb/
    'BoomingTech$Piccoloctsj6d7177.zip'
    'KhronosGroup$OpenXR-SDKctsj984ee6.zip'
    'OpenRCT2$OpenRCT2ctsj975d7c.zip'
    'StanfordLegion$legionctsj39cbe4.zip'
    'USCiLab$cerealctsj264953.zip'
    'WinMerge$winmergectsj101305.zip'
    'draios$sysdigctsj12c02d.zip'
    'gildor2$UEViewerctsjfefdd8.zip'
    'git-for-windows$gitctsjb7c2bd.zip'
    'google$orbitctsj9bbeaf.zip'
    'libfuse$libfusectsj7a66a4.zip'
    'luigirizzo$netmapctsj6417fa.zip'
    'mawww$kakounectsjc54fab.zip'
    'microsoft$node-native-keymapctsj4cc9a2.zip'
    'nem0$LumixEnginectsjfab756.zip'
    'pocoproject$pococtsj26b932.zip'
    'quickfix$quickfixctsjebfd13.zip'
    'rui314$moldctsjfec16a.zip'
    'swig$swigctsj78bcd3.zip'
    'tdlib$telegram-bot-apictsj8529d9.zip'
    'timescale$timescaledbctsjf617cf.zip'
    'xoreaxeaxeax$movfuscatorctsj8f7e5b.zip'
    'xrootd$xrootdctsje4b745.zip'
  #+END_SRC
  The selection of 12 repositories, from an initial collection of 6000 was made
  using a collection of Python/pandas scripts made for the purpose, the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#installation][qldbtools]]
  package.  The resulting selection, in the format expected by the VS Code
  extension, follows.
  #+BEGIN_SRC text
    cat  /data/qldbtools/scratch/vscode-selection.json
    {
        "version": 1,
        "databases": {
            "variantAnalysis": {
                "repositoryLists": [
                    {
                        "name": "mirva-list",
                        "repositories": [
                            "xoreaxeaxeax/movfuscatorctsj8f7e5b",
                            "microsoft/node-native-keymapctsj4cc9a2",
                            "BoomingTech/Piccoloctsj6d7177",
                            "USCiLab/cerealctsj264953",
                            "KhronosGroup/OpenXR-SDKctsj984ee6",
                            "tdlib/telegram-bot-apictsj8529d9",
                            "WinMerge/winmergectsj101305",
                            "timescale/timescaledbctsjf617cf",
                            "pocoproject/pococtsj26b932",
                            "quickfix/quickfixctsjebfd13",
                            "libfuse/libfusectsj7a66a4"
                        ]
                    }
                ],
                "owners": [],
                "repositories": []
            }
        },
        "selected": {
            "kind": "variantAnalysisUserDefinedList",
            "listName": "mirva-list"
        }
  #+END_SRC
  This selection is deceptively simple.  For a full explanation, see [[file:cli-end-to-end-detailed.org::*Repository Selection][Repository
  Selection]] in the detailed version of this document.
 ** Optional: The meaning of the names
   The repository names all end with =ctsj= followed by 6 hex digits like
   =ctsj4cc9a2=.
   The information critial for selection of databases are the columns
   1. owner
   2. name
   3. language
   4. "sha"
   5. "cliVersion"
   6. "creationTime"
   There are others that may be useful, but they are not strictly required.
   The critical ones deserve more explanation:
   1. "sha": The =git= commit SHA of the repository the CodeQL database was
      created from.  Required to distinguish query results over the evolution of
      a code base.
   2. "cliVersion":  The version of the CodeQL CLI used to create the database.
      Required to identify advances/regressions originating from the CodeQL binary.
   3. "creationTime":  The time the database was created.  Required (or at least
      very handy) for following the evolution of query results over time.
   There is a computed column, CID. The CID column combines 
   - cliVersion
   - creationTime
   - language
   - sha
   into a single 6-character string via hashing.  Together with (owner, repo) it
   provides a unique index for every DB.
   For this document, we simply use a pseudo-random selection of 11 databases via
   #+BEGIN_SRC sh 
     ./bin/mc-db-generate-selection -n 11 \
                                    scratch/vscode-selection.json \
                                    scratch/gh-mrva-selection.json \
                                    < scratch/db-info-3.csv 
   #+END_SRC
   Note that these use pseudo-random numbers, so the selection is in fact
   deterministic.  
 * Starting the server
  Clone the full repository before continuing:
  #+BEGIN_SRC sh 
    mkdir -p ~/work-gh/mrva/
    git clone git@github.com:hohn/mrvacommander.git
  #+END_SRC
  Make sure Docker is installed and running.
  With docker-compose set up and this repository cloned, we just run
  #+BEGIN_SRC sh 
    cd ~/work-gh/mrva/mrvacommander
    docker-compose -f docker-compose-demo.yml up -d
  #+END_SRC
  and wait until the log output no longer changes.
  Should look like
  #+BEGIN_SRC text
    docker-compose -f docker-compose-demo.yml up -d
    [+] Running 27/6
     ✔ dbstore Pulled 1.1s
     ✔ artifactstore Pulled 1.1s
     ✔ mrvadata 3 layers [⣿⣿⣿]      0B/0B      Pulled 263.8s
     ✔ server 2 layers [⣿⣿]      0B/0B      Pulled 25.2s
     ✔ agent 5 layers [⣿⣿⣿⣿⣿]      0B/0B      Pulled 24.9s
     ✔ client-qldbtools 11 layers [⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿]      0B/0B      Pulled 20.8s
    [+] Running 9/9
     ✔ Container mrvadata Started 0.3s
     ✔ Container mrvacommander-client-qldbtools-1  Started 0.3s
     ✔ Container mrvacommander-client-ghmrva-1     Running 0.0s
     ✔ Container mrvacommander-code-server-1       Running 0.0s
     ✔ Container artifactstore Running 0.0s
     ✔ Container rabbitmq Running 0.0s
     ✔ Container dbstore Started 0.4s
     ✔ Container agent Started 0.5s
     ✔ Container server Started 0.5s
  #+END_SRC
  The content is prepopulated in the =dbstore= container.  
 ** Optional: Inspect the Backing Store
   As completely optional step, you can inspect the backing store:
   #+BEGIN_SRC sh 
     docker exec -it dbstore /bin/bash
     ls /data/qldb/
     # 'BoomingTech$Piccoloctsj6d7177.zip'	 'mawww$kakounectsjc54fab.zip'
     # 'KhronosGroup$OpenXR-SDKctsj984ee6.zip'  'microsoft$node-native-keymapctsj4cc9a2.zip'
     # ...
   #+END_SRC
 ** Optional: Inspect the MinIO DB
   Another completely optional step, you can inspect the minio DB contents if you
   have the minio cli installed:
   #+BEGIN_SRC sh 
     # Configuration
     MINIO_ALIAS="qldbminio"
     MINIO_URL="http://localhost:9000"
     MINIO_ROOT_USER="user"
     MINIO_ROOT_PASSWORD="mmusty8432"
     QL_DB_BUCKET_NAME="qldb"
     # Check for MinIO client
     if ! command -v mc &> /dev/null
     then
         echo "MinIO client (mc) not found."
     fi
     # Configure MinIO client
     mc alias set $MINIO_ALIAS $MINIO_URL $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD
     # Show contents
     mc ls qldbminio/qldb
   #+END_SRC
 * Running the gh-mrva command-line client
  The first run uses the test query to verify basic functionality, but it returns
  no results.
 ** Run MRVA from command line
   # From ~/work-gh/mrva/gh-mrva
   1. Check mrva cli
      #+BEGIN_SRC sh 
        docker exec -it mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva -h
      #+END_SRC
   2. Set up the configuration
      #+BEGIN_SRC sh 
        docker exec -i mrvacommander-client-ghmrva-1 \
               sh -c 'mkdir -p /root/.config/gh-mrva/'
        cat | docker exec -i mrvacommander-client-ghmrva-1 \
                     sh -c 'cat > /root/.config/gh-mrva/config.yml' <<eof
        codeql_path: not-used/$HOME/work-gh
        controller: not-used/mirva-controller
        list_file: /root/work-gh/mrva/gh-mrva/gh-mrva-selection.json
        eof
        # check:
        docker exec -i mrvacommander-client-ghmrva-1 ls /root/.config/gh-mrva/config.yml
        docker exec -i mrvacommander-client-ghmrva-1 cat /root/.config/gh-mrva/config.yml
      #+END_SRC
   3. Provide the repository list file
      #+BEGIN_SRC sh 
        docker exec -i mrvacommander-client-ghmrva-1 \
               sh -c 'mkdir -p /root/work-gh/mrva/gh-mrva'
        cat | docker exec -i mrvacommander-client-ghmrva-1 \
                     sh -c 'cat > /root/work-gh/mrva/gh-mrva/gh-mrva-selection.json' <<eof
        {
            "mirva-list": [
                "xoreaxeaxeax/movfuscatorctsj8f7e5b",
                "microsoft/node-native-keymapctsj4cc9a2",
                "BoomingTech/Piccoloctsj6d7177",
                "USCiLab/cerealctsj264953",
                "KhronosGroup/OpenXR-SDKctsj984ee6",
                "tdlib/telegram-bot-apictsj8529d9",
                "WinMerge/winmergectsj101305",
                "timescale/timescaledbctsjf617cf",
                "pocoproject/pococtsj26b932",
                "quickfix/quickfixctsjebfd13",
                "libfuse/libfusectsj7a66a4"
            ]
        }
        eof
      #+END_SRC
   4. Provide the CodeQL query
      #+BEGIN_SRC sh 
        cat | docker exec -i mrvacommander-client-ghmrva-1 \
                     sh -c 'cat > /root/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql' <<eof
        /**
         ,* @name pickfun
         ,* @description pick function from FlatBuffers
         ,* @kind problem
         ,* @id cpp-flatbuffer-func
         ,* @problem.severity warning
         ,*/
        import cpp
        from Function f
        where
          f.getName() = "MakeBinaryRegion" or
          f.getName() = "microprotocols_add"
        select f, "definition of MakeBinaryRegion"
        eof
      #+END_SRC
   5. Submit the mrva job
      #+BEGIN_SRC sh 
        docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
               submit --language cpp --session mirva-session-1360           \
               --list mirva-list                                            \
               --query /root/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql     
      #+END_SRC
   6. Check the status
      #+BEGIN_SRC sh 
        # Check the status
        docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
               status --session mirva-session-1360
      #+END_SRC
   7. Download the sarif files, optionally also get databases.  For the current
      query / database combination there are zero result hence no downloads.
      #+BEGIN_SRC sh 
        docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
               download --session mirva-session-1360                        \
               --download-dbs                                               \
               --output-dir mirva-session-1360
      #+END_SRC
 ** TODO Write query that has some results
   XX:
   In this case, the trivial =alu_mul=,
 alu_mul for https://github.com/xoreaxeaxeax/movfuscator/blob/master/movfuscator/movfuscator.c
   #+BEGIN_SRC java
     /**
      ,* @name findalu
      ,* @description find calls to a function
      ,* @kind problem
      ,* @id cpp-call
      ,* @problem.severity warning
      ,*/
     import cpp
     from FunctionCall fc
     where
       fc.getTarget().getName() = "alu_mul"
     select fc, "call of alu_mul"
   #+END_SRC
   Repeat the submit steps with this query
   1. [X] -- 
   2. [X] --
   3. [ ] Provide the CodeQL query
      #+BEGIN_SRC sh 
        cat | docker exec -i mrvacommander-client-ghmrva-1 \
                     sh -c 'cat > /root/work-gh/mrva/gh-mrva/Alu_Mul.ql' <<eof
        /**
         ,* @name findalu
         ,* @description find calls to a function
         ,* @kind problem
         ,* @id cpp-call
         ,* @problem.severity warning
         ,*/
        import cpp
        from FunctionCall fc
        where
          fc.getTarget().getName() = "alu_mul"
        select fc, "call of alu_mul"
        eof
      #+END_SRC
   4. [-] Submit the mrva job
      #+BEGIN_SRC sh 
        docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
               submit --language cpp --session mirva-session-1490           \
               --list mirva-list                                            \
               --query /root/work-gh/mrva/gh-mrva/Alu_Mul.ql
      #+END_SRC
      - [X] XX: 
        server  | 2024/09/27 20:03:16 DEBUG Processed request info location="{Key:3 Bucket:packs}" language=cpp
        server  | 2024/09/27 20:03:16 WARN No repositories found for analysis
        server  | 2024/09/27 20:03:16 DEBUG Queueing analysis jobs count=0
        server  | 2024/09/27 20:03:16 DEBUG Forming and sending response for submitted analysis job id=3
        NO: debug in the server container
        #+BEGIN_SRC sh 
          docker exec -it server  /bin/bash
          apt-get update
          apt-get install delve
          replace
          ENTRYPOINT ["./mrva_server"]
          CMD ["--mode=container"]
        #+END_SRC
      - [ ] XX: 
        The dbstore is empty -- see http://localhost:9001/browser
        must populate it properly, then save the image.
   5. [ ] Check the status
      #+BEGIN_SRC sh 
        docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
               status --session mirva-session-1490
      #+END_SRC
      This time we have results
      #+BEGIN_SRC text
        ...
        Run name: mirva-session-1490
        Status: succeeded
        Total runs: 1
        Total successful scans: 11
        Total failed scans: 0
        Total skipped repositories: 0
        Total skipped repositories due to access mismatch: 0
        Total skipped repositories due to not found: 0
        Total skipped repositories due to no database: 0
        Total skipped repositories due to over limit: 0
        Total repositories with findings: 7
        Total findings: 618
        Repositories with findings:
          quickfix/quickfixctsjebfd13 (cpp-fprintf-call): 5
          libfuse/libfusectsj7a66a4 (cpp-fprintf-call): 146
          xoreaxeaxeax/movfuscatorctsj8f7e5b (cpp-fprintf-call): 80
          pocoproject/pococtsj26b932 (cpp-fprintf-call): 17
          BoomingTech/Piccoloctsj6d7177 (cpp-fprintf-call): 10
          tdlib/telegram-bot-apictsj8529d9 (cpp-fprintf-call): 247
          WinMerge/winmergectsj101305 (cpp-fprintf-call): 113
      #+END_SRC
   6. [ ] Download the sarif files, optionally also get databases.  
      #+BEGIN_SRC sh 
        docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
               download --session mirva-session-1490                        \
               --download-dbs                                               \
               --output-dir mirva-session-1490
        # And list them:
        \ls -la *1490*
      #+END_SRC
   7. [ ] Use the [[https://marketplace.visualstudio.com/items?itemName=MS-SarifVSCode.sarif-viewer][SARIF Viewer]] plugin in VS Code to open and review the results.
      Prepare the source directory so the viewer can be pointed at it
      #+BEGIN_SRC sh 
        cd ~/work-gh/mrva/gh-mrva/mirva-session-1490
        unzip -qd BoomingTech_Piccoloctsj6d7177_1_db BoomingTech_Piccoloctsj6d7177_1_db.zip 
        cd BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/
        unzip -qd src src.zip
      #+END_SRC
      Use the viewer
      #+BEGIN_SRC sh 
        code BoomingTech_Piccoloctsj6d7177_1.sarif
        # For lauxlib.c, point the source viewer to 
        find ~/work-gh/mrva/gh-mrva/mirva-session-1490/BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/src/home/runner/work/bulk-builder/bulk-builder -name lauxlib.c
        # Here: ~/work-gh/mrva/gh-mrva/mirva-session-1490/BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/src/home/runner/work/bulk-builder/bulk-builder/engine/3rdparty/lua-5.4.4/lauxlib.c
      #+END_SRC
   8. [ ] (optional) Large result sets are more easily filtered via
      dataframes or spreadsheets.  Convert the SARIF to CSV if needed; see [[https://github.com/hohn/sarif-cli/][sarif-cli]].
 * Running the CodeQL VS Code plugin
  - [ ] XX: include the *custom* codeql plugin in the container.
 * Ending the session
  Shut down docker via
  #+BEGIN_SRC sh 
    cd ~/work-gh/mrva/mrvacommander
    docker-compose -f docker-compose-demo.yml down
  #+END_SRC
 * Footnotes
 [fn:1]The =csvkit= can be installed into the same Python virtual environment as
 the =qldbtools=.
--- a/notes/cli-end-to-end-demo.org
+++ b/notes/cli-end-to-end-demo.org
@@ -0,0 +1,493 @@
 # -*- coding: utf-8 -*-
 #+OPTIONS: H:2 num:t \n:nil @:t ::t |:t ^:{} f:t *:t TeX:t LaTeX:t skip:nil p:nil
 * End-to-end example of CLI use
  This document describes a complete cycle of the MRVA workflow, but using
  pre-populated data.  The steps  included are 
  1. aquiring CodeQL databases
  2. selection of databases
  3. configuration and use of the command-line client
  4. server startup
  5. submission of the jobs
  6. retrieval of the results
  7. examination of the results
 * Start the containers
  #+BEGIN_SRC sh 
    cd ~/work-gh/mrva/mrvacommander/
    docker-compose -f docker-compose-demo.yml down --volumes --remove-orphans
    docker-compose -f docker-compose-demo.yml up --build
  #+END_SRC
 * Database Aquisition
  General database aquisition is beyond the scope of this document as it is very specific
  to an organization's environment.
  For this demo, the data is preloaded via container.  To inspect it:
  #+BEGIN_SRC sh 
    # On host, run 
    docker exec -it dbstore /bin/bash
    # In the container
    ls -la /data/mrvacommander/dbstore-data/qldb
    # Or in one step
    docker exec -it dbstore ls -la /data/mrvacommander/dbstore-data/qldb
  #+END_SRC
  Here we use a small sample of an example for open-source
  repositories, 23 in all.
 * Repository Selection
  When using all of the MRVA system, we select a small subset of repositories
  available to you in [[*Database Aquisition][Database Aquisition]].  For this demo we include a small
  collection -- 23 repositories -- and here we further narrow the selection to 12.
  The full list
  #+BEGIN_SRC text
    ls -1 /data/dbstore-data/qldb/
    'BoomingTech$Piccoloctsj6d7177.zip'
    'KhronosGroup$OpenXR-SDKctsj984ee6.zip'
    'OpenRCT2$OpenRCT2ctsj975d7c.zip'
    'StanfordLegion$legionctsj39cbe4.zip'
    'USCiLab$cerealctsj264953.zip'
    'WinMerge$winmergectsj101305.zip'
    'draios$sysdigctsj12c02d.zip'
    'gildor2$UEViewerctsjfefdd8.zip'
    'git-for-windows$gitctsjb7c2bd.zip'
    'google$orbitctsj9bbeaf.zip'
    'libfuse$libfusectsj7a66a4.zip'
    'luigirizzo$netmapctsj6417fa.zip'
    'mawww$kakounectsjc54fab.zip'
    'microsoft$node-native-keymapctsj4cc9a2.zip'
    'nem0$LumixEnginectsjfab756.zip'
    'pocoproject$pococtsj26b932.zip'
    'quickfix$quickfixctsjebfd13.zip'
    'rui314$moldctsjfec16a.zip'
    'swig$swigctsj78bcd3.zip'
    'tdlib$telegram-bot-apictsj8529d9.zip'
    'timescale$timescaledbctsjf617cf.zip'
    'xoreaxeaxeax$movfuscatorctsj8f7e5b.zip'
    'xrootd$xrootdctsje4b745.zip'
  #+END_SRC
  The selection of 12 repositories, from an initial collection of 6000 was made
  using a collection of Python/pandas scripts made for the purpose, the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#installation][qldbtools]]
  package.  The resulting selection, in the format expected by the VS Code
  extension, follows.
  #+BEGIN_SRC text
    cat  /data/qldbtools/scratch/vscode-selection.json
    {
        "version": 1,
        "databases": {
            "variantAnalysis": {
                "repositoryLists": [
                    {
                        "name": "mirva-list",
                        "repositories": [
                            "xoreaxeaxeax/movfuscatorctsj8f7e5b",
                            "microsoft/node-native-keymapctsj4cc9a2",
                            "BoomingTech/Piccoloctsj6d7177",
                            "USCiLab/cerealctsj264953",
                            "KhronosGroup/OpenXR-SDKctsj984ee6",
                            "tdlib/telegram-bot-apictsj8529d9",
                            "WinMerge/winmergectsj101305",
                            "timescale/timescaledbctsjf617cf",
                            "pocoproject/pococtsj26b932",
                            "quickfix/quickfixctsjebfd13",
                            "libfuse/libfusectsj7a66a4"
                        ]
                    }
                ],
                "owners": [],
                "repositories": []
            }
        },
        "selected": {
            "kind": "variantAnalysisUserDefinedList",
            "listName": "mirva-list"
        }
  #+END_SRC
  This selection is deceptively simple.  For a full explanation, see [[file:cli-end-to-end-detailed.org::*Repository Selection][Repository
  Selection]] in the detailed version of this document.
 ** Optional: The meaning of the names
   The repository names all end with =ctsj= followed by 6 hex digits like
   =ctsj4cc9a2=.
   The information critial for selection of databases are the columns
   1. owner
   2. name
   3. language
   4. "sha"
   5. "cliVersion"
   6. "creationTime"
   There are others that may be useful, but they are not strictly required.
   The critical ones deserve more explanation:
   1. "sha": The =git= commit SHA of the repository the CodeQL database was
      created from.  Required to distinguish query results over the evolution of
      a code base.
   2. "cliVersion":  The version of the CodeQL CLI used to create the database.
      Required to identify advances/regressions originating from the CodeQL binary.
   3. "creationTime":  The time the database was created.  Required (or at least
      very handy) for following the evolution of query results over time.
   There is a computed column, CID. The CID column combines 
   - cliVersion
   - creationTime
   - language
   - sha
   into a single 6-character string via hashing.  Together with (owner, repo) it
   provides a unique index for every DB.
   For this document, we simply use a pseudo-random selection of 11 databases via
   #+BEGIN_SRC sh 
     ./bin/mc-db-generate-selection -n 11 \
                                    scratch/vscode-selection.json \
                                    scratch/gh-mrva-selection.json \
                                    < scratch/db-info-3.csv 
   #+END_SRC
   Note that these use pseudo-random numbers, so the selection is in fact
   deterministic.  
 * Starting the server
  Clone the full repository before continuing:
  #+BEGIN_SRC sh 
    mkdir -p ~/work-gh/mrva/
    git clone git@github.com:hohn/mrvacommander.git
  #+END_SRC
  Make sure Docker is installed and running.
  With docker-compose set up and this repository cloned, we just run
  #+BEGIN_SRC sh 
    cd ~/work-gh/mrva/mrvacommander
    docker-compose -f docker-compose-demo.yml up -d
  #+END_SRC
  and wait until the log output no longer changes.
  Should look like
  #+BEGIN_SRC text
    docker-compose -f docker-compose-demo.yml up -d
    [+] Running 27/6
     ✔ dbstore Pulled 1.1s
     ✔ artifactstore Pulled 1.1s
     ✔ mrvadata 3 layers [⣿⣿⣿]      0B/0B      Pulled 263.8s
     ✔ server 2 layers [⣿⣿]      0B/0B      Pulled 25.2s
     ✔ agent 5 layers [⣿⣿⣿⣿⣿]      0B/0B      Pulled 24.9s
     ✔ client-qldbtools 11 layers [⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿]      0B/0B      Pulled 20.8s
    [+] Running 9/9
     ✔ Container mrvadata Started 0.3s
     ✔ Container mrvacommander-client-qldbtools-1  Started 0.3s
     ✔ Container mrvacommander-client-ghmrva-1     Running 0.0s
     ✔ Container mrvacommander-code-server-1       Running 0.0s
     ✔ Container artifactstore Running 0.0s
     ✔ Container rabbitmq Running 0.0s
     ✔ Container dbstore Started 0.4s
     ✔ Container agent Started 0.5s
     ✔ Container server Started 0.5s
  #+END_SRC
  The content is prepopulated in the =dbstore= container.  
 ** Optional: Inspect the Backing Store
   As completely optional step, you can inspect the backing store:
   #+BEGIN_SRC sh 
     docker exec -it dbstore /bin/bash
     ls /data/qldb/
     # 'BoomingTech$Piccoloctsj6d7177.zip'	 'mawww$kakounectsjc54fab.zip'
     # 'KhronosGroup$OpenXR-SDKctsj984ee6.zip'  'microsoft$node-native-keymapctsj4cc9a2.zip'
     # ...
   #+END_SRC
 ** Optional: Inspect the MinIO DB
   Another completely optional step, you can inspect the minio DB contents if you
   have the minio cli installed:
   #+BEGIN_SRC sh 
     # Configuration
     MINIO_ALIAS="qldbminio"
     MINIO_URL="http://localhost:9000"
     MINIO_ROOT_USER="user"
     MINIO_ROOT_PASSWORD="mmusty8432"
     QL_DB_BUCKET_NAME="qldb"
     # Check for MinIO client
     if ! command -v mc &> /dev/null
     then
         echo "MinIO client (mc) not found."
     fi
     # Configure MinIO client
     mc alias set $MINIO_ALIAS $MINIO_URL $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD
     # Show contents
     mc ls qldbminio/qldb
   #+END_SRC
 * Running the gh-mrva command-line client
  The first run uses the test query to verify basic functionality, but it returns
  no results.
 ** Run MRVA from command line
   # From ~/work-gh/mrva/gh-mrva
   1. Check mrva cli
      #+BEGIN_SRC sh 
        docker exec -it mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva -h
      #+END_SRC
   2. Set up the configuration
      #+BEGIN_SRC sh 
        docker exec -i mrvacommander-client-ghmrva-1 \
               sh -c 'mkdir -p /root/.config/gh-mrva/'
        cat | docker exec -i mrvacommander-client-ghmrva-1 \
                     sh -c 'cat > /root/.config/gh-mrva/config.yml' <<eof
        codeql_path: not-used/$HOME/work-gh
        controller: not-used/mirva-controller
        list_file: /root/work-gh/mrva/gh-mrva/gh-mrva-selection.json
        eof
        # check:
        docker exec -i mrvacommander-client-ghmrva-1 ls /root/.config/gh-mrva/config.yml
        docker exec -i mrvacommander-client-ghmrva-1 cat /root/.config/gh-mrva/config.yml
      #+END_SRC
   3. Provide the repository list file
      #+BEGIN_SRC sh 
        docker exec -i mrvacommander-client-ghmrva-1 \
               sh -c 'mkdir -p /root/work-gh/mrva/gh-mrva'
        cat | docker exec -i mrvacommander-client-ghmrva-1 \
                     sh -c 'cat > /root/work-gh/mrva/gh-mrva/gh-mrva-selection.json' <<eof
        {
            "mirva-list": [
                "xoreaxeaxeax/movfuscatorctsj8f7e5b",
                "microsoft/node-native-keymapctsj4cc9a2",
                "BoomingTech/Piccoloctsj6d7177",
                "USCiLab/cerealctsj264953",
                "KhronosGroup/OpenXR-SDKctsj984ee6",
                "tdlib/telegram-bot-apictsj8529d9",
                "WinMerge/winmergectsj101305",
                "timescale/timescaledbctsjf617cf",
                "pocoproject/pococtsj26b932",
                "quickfix/quickfixctsjebfd13",
                "libfuse/libfusectsj7a66a4"
            ]
        }
        eof
      #+END_SRC
   4. Provide the CodeQL query
      #+BEGIN_SRC sh 
        cat | docker exec -i mrvacommander-client-ghmrva-1 \
                     sh -c 'cat > /root/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql' <<eof
        /**
         ,* @name pickfun
         ,* @description pick function from FlatBuffers
         ,* @kind problem
         ,* @id cpp-flatbuffer-func
         ,* @problem.severity warning
         ,*/
        import cpp
        from Function f
        where
          f.getName() = "MakeBinaryRegion" or
          f.getName() = "microprotocols_add"
        select f, "definition of MakeBinaryRegion"
        eof
      #+END_SRC
   5. Submit the mrva job
      #+BEGIN_SRC sh 
        docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
               submit --language cpp --session mirva-session-1360           \
               --list mirva-list                                            \
               --query /root/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql     
      #+END_SRC
   6. Check the status
      #+BEGIN_SRC sh 
        # Check the status
        docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
               status --session mirva-session-1360
      #+END_SRC
   7. Download the sarif files, optionally also get databases.  For the current
      query / database combination there are zero result hence no downloads.
      #+BEGIN_SRC sh 
        docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
               download --session mirva-session-1360                        \
               --download-dbs                                               \
               --output-dir mirva-session-1360
      #+END_SRC
 ** TODO Write query that has some results
   XX:
   In this case, the trivial =alu_mul=,
 alu_mul for https://github.com/xoreaxeaxeax/movfuscator/blob/master/movfuscator/movfuscator.c
   #+BEGIN_SRC java
     /**
      ,* @name findalu
      ,* @description find calls to a function
      ,* @kind problem
      ,* @id cpp-call
      ,* @problem.severity warning
      ,*/
     import cpp
     from FunctionCall fc
     where
       fc.getTarget().getName() = "alu_mul"
     select fc, "call of alu_mul"
   #+END_SRC
   Repeat the submit steps with this query
   1. [X] -- 
   2. [X] --
   3. [ ] Provide the CodeQL query
      #+BEGIN_SRC sh 
        cat | docker exec -i mrvacommander-client-ghmrva-1 \
                     sh -c 'cat > /root/work-gh/mrva/gh-mrva/Alu_Mul.ql' <<eof
        /**
         ,* @name findalu
         ,* @description find calls to a function
         ,* @kind problem
         ,* @id cpp-call
         ,* @problem.severity warning
         ,*/
        import cpp
        from FunctionCall fc
        where
          fc.getTarget().getName() = "alu_mul"
        select fc, "call of alu_mul"
        eof
      #+END_SRC
   4. [-] Submit the mrva job
      #+BEGIN_SRC sh 
        docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
               submit --language cpp --session mirva-session-1490           \
               --list mirva-list                                            \
               --query /root/work-gh/mrva/gh-mrva/Alu_Mul.ql
      #+END_SRC
      - [X] XX: 
        server  | 2024/09/27 20:03:16 DEBUG Processed request info location="{Key:3 Bucket:packs}" language=cpp
        server  | 2024/09/27 20:03:16 WARN No repositories found for analysis
        server  | 2024/09/27 20:03:16 DEBUG Queueing analysis jobs count=0
        server  | 2024/09/27 20:03:16 DEBUG Forming and sending response for submitted analysis job id=3
        NO: debug in the server container
        #+BEGIN_SRC sh 
          docker exec -it server  /bin/bash
          apt-get update
          apt-get install delve
          replace
          ENTRYPOINT ["./mrva_server"]
          CMD ["--mode=container"]
        #+END_SRC
      - [ ] XX: 
        The dbstore is empty -- see http://localhost:9001/browser
        must populate it properly, then save the image.
   5. [ ] Check the status
      #+BEGIN_SRC sh 
        docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
               status --session mirva-session-1490
      #+END_SRC
      This time we have results
      #+BEGIN_SRC text
        ...
        Run name: mirva-session-1490
        Status: succeeded
        Total runs: 1
        Total successful scans: 11
        Total failed scans: 0
        Total skipped repositories: 0
        Total skipped repositories due to access mismatch: 0
        Total skipped repositories due to not found: 0
        Total skipped repositories due to no database: 0
        Total skipped repositories due to over limit: 0
        Total repositories with findings: 7
        Total findings: 618
        Repositories with findings:
          quickfix/quickfixctsjebfd13 (cpp-fprintf-call): 5
          libfuse/libfusectsj7a66a4 (cpp-fprintf-call): 146
          xoreaxeaxeax/movfuscatorctsj8f7e5b (cpp-fprintf-call): 80
          pocoproject/pococtsj26b932 (cpp-fprintf-call): 17
          BoomingTech/Piccoloctsj6d7177 (cpp-fprintf-call): 10
          tdlib/telegram-bot-apictsj8529d9 (cpp-fprintf-call): 247
          WinMerge/winmergectsj101305 (cpp-fprintf-call): 113
      #+END_SRC
   6. [ ] Download the sarif files, optionally also get databases.  
      #+BEGIN_SRC sh 
        docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
               download --session mirva-session-1490                        \
               --download-dbs                                               \
               --output-dir mirva-session-1490
        # And list them:
        \ls -la *1490*
      #+END_SRC
   7. [ ] Use the [[https://marketplace.visualstudio.com/items?itemName=MS-SarifVSCode.sarif-viewer][SARIF Viewer]] plugin in VS Code to open and review the results.
      Prepare the source directory so the viewer can be pointed at it
      #+BEGIN_SRC sh 
        cd ~/work-gh/mrva/gh-mrva/mirva-session-1490
        unzip -qd BoomingTech_Piccoloctsj6d7177_1_db BoomingTech_Piccoloctsj6d7177_1_db.zip 
        cd BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/
        unzip -qd src src.zip
      #+END_SRC
      Use the viewer
      #+BEGIN_SRC sh 
        code BoomingTech_Piccoloctsj6d7177_1.sarif
        # For lauxlib.c, point the source viewer to 
        find ~/work-gh/mrva/gh-mrva/mirva-session-1490/BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/src/home/runner/work/bulk-builder/bulk-builder -name lauxlib.c
        # Here: ~/work-gh/mrva/gh-mrva/mirva-session-1490/BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/src/home/runner/work/bulk-builder/bulk-builder/engine/3rdparty/lua-5.4.4/lauxlib.c
      #+END_SRC
   8. [ ] (optional) Large result sets are more easily filtered via
      dataframes or spreadsheets.  Convert the SARIF to CSV if needed; see [[https://github.com/hohn/sarif-cli/][sarif-cli]].
 * Running the CodeQL VS Code plugin
  - [ ] XX: include the *custom* codeql plugin in the container.
 * Ending the session
  Shut down docker via
  #+BEGIN_SRC sh 
    cd ~/work-gh/mrva/mrvacommander
    docker-compose -f docker-compose-demo.yml down
  #+END_SRC
 * Footnotes
 [fn:1]The =csvkit= can be installed into the same Python virtual environment as
 the =qldbtools=.
--- a/notes/cli-end-to-end-detailed.org
+++ b/notes/cli-end-to-end-detailed.org
@@ -0,0 +1,524 @@
 # -*- coding: utf-8 -*-
 * End-to-end example of CLI use
  This document describes a complete cycle of the MRVA workflow.  The steps
  included are 
  1. aquiring CodeQL databases
  2. selection of databases
  3. configuration and use of the command-line client
  4. server startup
  5. submission of the jobs
  6. retrieval of the results
  7. examination of the results
 * Database Aquisition
  General database aquisition is beyond the scope of this document as it is very specific
  to an organization's environment.  Here we use an example for open-source
  repositories, [[https://github.com/hohn/mrva-open-source-download.git][mrva-open-source-download]], which downloads the top 1000 databases for each of
  C/C++, Java, Python -- 3000 CodeQL DBs in all.
  The scripts in [[https://github.com/hohn/mrva-open-source-download.git][mrva-open-source-download]] were used to download on two distinct dates
  resulting in close to 6000 databases to choose from.  The DBs were directly
  saved to the file system, resulting in paths like
  : .../mrva-open-source-download/repos-2024-04-29/google/re2/code-scanning/codeql/databases/cpp/db.zip
  and
  : .../mrva-open-source-download/repos/google/re2/code-scanning/codeql/databases/cpp/db.zip
  Note that the only information in these paths are (owner, repository, download
  date).  The databases contain more information which is used in the [[*Repository Selection][Repository
  Selection]] section.
  To get a collection of databases follow the [[https://github.com/hohn/mrva-open-source-download?tab=readme-ov-file#mrva-download][instructions]].
 * Repository Selection
  Here we select a small subset of those repositories using a collection scripts
  made for the purpose, the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#installation][qldbtools]] package.
  Clone the full repository before continuing:
  #+BEGIN_SRC sh 
    mkdir -p ~/work-gh/mrva/
    git clone git@github.com:hohn/mrvacommander.git
    cd ~/work-gh/mrva/mrvacommander/client/qldbtools && mkdir -p scratch
  #+END_SRC
  After performing the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#installation][installation]] steps, we can follow the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#command-line-use][command line]] use
  instructions to collect all the database information from the file system into a
  single table:
  #+BEGIN_SRC sh 
    cd ~/work-gh/mrva/mrvacommander/client/qldbtools && mkdir -p scratch
    source venv/bin/activate
    ./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > scratch/db-info-1.csv
  #+END_SRC
  The [[https://csvkit.readthedocs.io/en/latest/scripts/csvstat.html][=csvstat=]] tool gives a good overview[fn:1]; here is a pruned version of the
  output 
  #+BEGIN_SRC text
    csvstat  scratch/db-info-1.csv 
      1. "ctime"
          Type of data:          DateTime
          ...
      2. "language"
        Type of data:          Text
        Non-null values:       6000
        Unique values:         3
        Longest value:         6 characters
        Most common values:    cpp (2000x)
                               java (2000x)
                               python (2000x)
      3. "name"
         ...
      4. "owner"
        Type of data:          Text
        Non-null values:       6000
        Unique values:         2189
        Longest value:         29 characters
        Most common values:    apache (258x)
                               google (86x)
                               microsoft (64x)
                               spring-projects (56x)
                               alibaba (42x)
      5. "path"
         ...
      6. "size"
        Type of data:          Number
        Non-null values:       6000
        Unique values:         5354
        Smallest value:        0
        Largest value:         1,885,008,701
        Sum:                   284,766,326,993
        ...
    Row count: 6000
  #+END_SRC
  The information critial for selection are the columns
  1. owner
  2. name
  3. language
  The size column is interesting:  a smallest value of 0 indicates some error
  while our largest DB is 1.88 GB in size
  This information is not sufficient, so we collect more.  The following script
  extracts information from every database on disk and takes more time accordingly
  -- about 30 seconds on my laptop.
  #+BEGIN_SRC sh 
    ./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
  #+END_SRC
  This new table is a merge of all the available meta-information with the
  previous table causing the increase in the number of rows.  The following
  columns are now present
  #+BEGIN_SRC text
    0:$     csvstat  scratch/db-info-2.csv 
      1. "ctime"
      2. "language"
      3. "name"
      4. "owner"
      5. "path"
      6. "size"
      7. "left_index"
      8. "baselineLinesOfCode"
        Type of data:          Number
        Contains null values:  True (excluded from calculations)
        Non-null values:       11920
        Unique values:         4708
        Smallest value:        0
        Largest value:         22,028,732
        Sum:                   3,454,019,142
        Mean:                  289,766.707
        Median:                54,870.5
      9. "primaryLanguage"
     10. "sha"
        Type of data:          Text
        Contains null values:  True (excluded from calculations)
        Non-null values:       11920
        Unique values:         4928
     11. "cliVersion"
        Type of data:          Text
        Contains null values:  True (excluded from calculations)
        Non-null values:       11920
        Unique values:         59
        Longest value:         6 characters
        Most common values:    2.17.0 (3850x)
                               2.18.0 (3622x)
                               2.17.2 (1097x)
                               2.17.6 (703x)
                               2.16.3 (378x)
     12. "creationTime"
        Type of data:          Text
        Contains null values:  True (excluded from calculations)
        Non-null values:       11920
        Unique values:         5345
        Longest value:         32 characters
        Most common values:    None (19x)
                               2024-03-19 01:40:14.507823+00:00 (16x)
                               2024-02-29 19:12:59.785147+00:00 (16x)
                               2024-01-30 22:24:17.411939+00:00 (14x)
                               2024-04-05 09:34:03.774619+00:00 (14x)
     13. "finalised"
        Type of data:          Boolean
        Contains null values:  True (excluded from calculations)
        Non-null values:       11617
        Unique values:         2
        Most common values:    True (11617x)
                               None (322x)
     14. "db_lang"
     15. "db_lang_displayName"
     16. "db_lang_file_count"
     17. "db_lang_linesOfCode"
    Row count: 11939
  #+END_SRC
  There are several columns that are critical, namely
     1. "sha"
     2. "cliVersion"
     3. "creationTime"
  The others may be useful, but they are not strictly required.
  The critical ones deserve more explanation:
     1. "sha": The =git= commit SHA of the repository the CodeQL database was
        created from.  Required to distinguish query results over the evolution of
        a code base.
     2. "cliVersion":  The version of the CodeQL CLI used to create the database.
        Required to identify advances/regressions originating from the CodeQL binary.
     3. "creationTime":  The time the database was created.  Required (or at least
        very handy) for following the evolution of query results over time.
  This leaves us with a row count of 11939
  To start reducing that count, start with
  #+BEGIN_SRC sh 
    ./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
  #+END_SRC
  and get a reduced count and a new column:
  #+BEGIN_SRC text
    csvstat  scratch/db-info-3.csv 
    3. "CID"
      Type of data:          Text
      Contains null values:  False
      Non-null values:       5344
      Unique values:         5344
      Longest value:         6 characters
      Most common values:    1f8d99 (1x)
                             9ab87a (1x)
                             76fdc7 (1x)
                             b21305 (1x)
                             4ae79b (1x)
  #+END_SRC
  From the docs:  'Read a table of CodeQL DB information and produce a table with unique entries 
  adding the Cumulative ID (CID) column.'
  The CID column combines 
  - cliVersion
  - creationTime
  - language
  - sha
  into a single 6-character string via hashing and with (owner, repo) provides a
  unique index for every DB.
  We still have too many rows.  The tables are all in CSV format, so you can use
  your favorite tool to narrow the selection for your needs.   For this document,
  we simply use a pseudo-random selection of 11 databases via
  #+BEGIN_SRC sh 
    ./bin/mc-db-generate-selection -n 11 \
                                   scratch/vscode-selection.json \
                                   scratch/gh-mrva-selection.json \
                                   < scratch/db-info-3.csv 
  #+END_SRC
  Note that these use pseudo-random numbers, so the selection is in fact
  deterministic.  The selected databases in =gh-mrva-selection.json=, to be used
  in section [[*Running the gh-mrva command-line client][Running the gh-mrva command-line client]], are the following:
  #+begin_src javascript
    {
        "mirva-list": [
            "NLPchina/elasticsearch-sqlctsj168cc4",
            "LMAX-Exchange/disruptorctsj3e75ec",
            "justauth/JustAuthctsj8a6177",
            "FasterXML/jackson-modules-basectsj2fe248",
            "ionic-team/capacitor-pluginsctsj38d457",
            "PaddlePaddle/PaddleOCRctsj60e555",
            "elastic/apm-agent-pythonctsj21dc64",
            "flipkart-incubator/zjsonpatchctsjc4db35",
            "stephane/libmodbusctsj54237e",
            "wso2/carbon-kernelctsj5a8a6e",
            "apache/servicecomb-packctsj4d98f5"
        ]
    }
  #+end_src
 * Starting the server
  The full instructions for building and running the server are in [[../README.md]] under
  'Steps to build and run the server'
  With docker-compose set up and this repository cloned as previously described,
  we just run
  #+BEGIN_SRC sh 
        cd ~/work-gh/mrva/mrvacommander
        docker-compose up --build
  #+END_SRC
  and wait until the log output no longer changes.
  Then, use the following command to populate the mrvacommander database storage:
  #+BEGIN_SRC sh 
    cd ~/work-gh/mrva/mrvacommander/client/qldbtools && \
        ./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
  #+END_SRC
 * Running the gh-mrva command-line client
  The first run uses the test query to verify basic functionality, but it returns
  no results.
 ** Run MRVA from command line
   1. Install mrva cli
      #+BEGIN_SRC sh 
        mkdir -p ~/work-gh/mrva && cd ~/work-gh/mrva
        git clone https://github.com/hohn/gh-mrva.git
        cd ~/work-gh/mrva/gh-mrva && git checkout mrvacommander-end-to-end
        # Build it
        go mod edit -replace="github.com/GitHubSecurityLab/gh-mrva=$HOME/work-gh/mrva/gh-mrva"
        go build .
        # Sanity check
        ./gh-mrva -h
      #+END_SRC
   2. Set up the configuration
      #+BEGIN_SRC sh 
        mkdir -p ~/.config/gh-mrva
        cat > ~/.config/gh-mrva/config.yml <<eof
        # The following options are supported
        # codeql_path: Path to CodeQL distribution (checkout of codeql repo)
        # controller: NWO of the MRVA controller to use.  Not used here.
        # list_file: Path to the JSON file containing the target repos
        # XX:
        codeql_path: $HOME/work-gh/not-used
        controller: not-used/mirva-controller
        list_file: $HOME/work-gh/mrva/gh-mrva/gh-mrva-selection.json
        eof
      #+END_SRC
   3. Submit the mrva job
      #+BEGIN_SRC sh 
        cp ~/work-gh/mrva/mrvacommander/client/qldbtools/scratch/gh-mrva-selection.json \
           ~/work-gh/mrva/gh-mrva/gh-mrva-selection.json 
        cd ~/work-gh/mrva/gh-mrva/
        ./gh-mrva submit --language cpp --session mirva-session-4160    \
                  --list mirva-list                                     \
                  --query ~/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql
      #+END_SRC
   4. Check the status
      #+BEGIN_SRC sh 
        cd ~/work-gh/mrva/gh-mrva/
        # Check the status
        ./gh-mrva status --session mirva-session-4160
      #+END_SRC
   5. Download the sarif files, optionally also get databases.  For the current
      query / database combination there are zero result hence no downloads.
      #+BEGIN_SRC sh 
        cd ~/work-gh/mrva/gh-mrva/
        # Just download the sarif files
        ./gh-mrva download --session mirva-session-4160 \
                  --output-dir mirva-session-4160
        # Download the sarif files and CodeQL dbs
        ./gh-mrva download --session mirva-session-4160 \
                  --download-dbs \
                  --output-dir mirva-session-4160
      #+END_SRC
 ** Write query that has some results
   First, get the list of paths corresponding to the previously selected
   databases. 
   #+BEGIN_SRC sh 
     cd ~/work-gh/mrva/mrvacommander/client/qldbtools 
     . venv/bin/activate
     ./bin/mc-rows-from-mrva-list scratch/gh-mrva-selection.json \
                                  scratch/db-info-3.csv > scratch/selection-full-info
     csvcut -c path scratch/selection-full-info 
   #+END_SRC
   Use one of these databases to write a query.  It need not produce results.  
   #+BEGIN_SRC sh 
     cd ~/work-gh/mrva/gh-mrva/
     code gh-mrva.code-workspace
   #+END_SRC
   In this case, the trivial =findPrintf= query, in the file =Fprintf.ql=
   #+BEGIN_SRC java
     /**
      ,* @name findPrintf
      ,* @description find calls to plain fprintf
      ,* @kind problem
      ,* @id cpp-fprintf-call
      ,* @problem.severity warning
      ,*/
     import cpp
     from FunctionCall fc
     where
       fc.getTarget().getName() = "fprintf"
     select fc, "call of fprintf"
   #+END_SRC
   Repeat the submit steps with this query
   1. -- 
   2. --
   3. Submit the mrva job
      #+BEGIN_SRC sh 
        cp ~/work-gh/mrva/mrvacommander/client/qldbtools/scratch/gh-mrva-selection.json \
           ~/work-gh/mrva/gh-mrva/gh-mrva-selection.json 
        cd ~/work-gh/mrva/gh-mrva/
        ./gh-mrva submit --language cpp --session mirva-session-3660    \
                  --list mirva-list                                     \
                  --query ~/work-gh/mrva/gh-mrva/Fprintf.ql
      #+END_SRC
   4. Check the status
      #+BEGIN_SRC sh 
        cd ~/work-gh/mrva/gh-mrva/
        ./gh-mrva status --session mirva-session-3660
      #+END_SRC
      This time we have results
      #+BEGIN_SRC text
                ...
        0:$ Run name: mirva-session-3660
        Status: succeeded
        Total runs: 1
        Total successful scans: 11
        Total failed scans: 0
        Total skipped repositories: 0
        Total skipped repositories due to access mismatch: 0
        Total skipped repositories due to not found: 0
        Total skipped repositories due to no database: 0
        Total skipped repositories due to over limit: 0
        Total repositories with findings: 8
        Total findings: 7055
        Repositories with findings:
          lz4/lz4ctsj2479c5 (cpp-fprintf-call): 307
          Mbed-TLS/mbedtlsctsj17ef85 (cpp-fprintf-call): 6464
          tsl0922/ttydctsj2e3faa (cpp-fprintf-call): 11
          medooze/media-server-nodectsj5e30b3 (cpp-fprintf-call): 105
          ampl/gslctsj4b270e (cpp-fprintf-call): 102
          baidu/sofa-pbrpcctsjba3501 (cpp-fprintf-call): 24
          dlundquist/sniproxyctsj3d83e7 (cpp-fprintf-call): 34
          hyprwm/Hyprlandctsjc2425f (cpp-fprintf-call): 8
      #+END_SRC
   5. Download the sarif files, optionally also get databases.  
      #+BEGIN_SRC sh 
        cd ~/work-gh/mrva/gh-mrva/
        # Just download the sarif files
        ./gh-mrva download --session mirva-session-3660 \
                  --output-dir mirva-session-3660
        # Download the sarif files and CodeQL dbs
        ./gh-mrva download --session mirva-session-3660 \
                  --download-dbs \
                  --output-dir mirva-session-3660
      #+END_SRC
      #+BEGIN_SRC sh 
        # And list them:
        \ls -la *3660*
        drwxr-xr-x@ 18 hohn  staff       576 Nov 14 11:54 .
        drwxrwxr-x@ 56 hohn  staff      1792 Nov 14 11:54 ..
        -rwxr-xr-x@  1 hohn  staff   9035554 Nov 14 11:54 Mbed-TLS_mbedtlsctsj17ef85_1.sarif
        -rwxr-xr-x@  1 hohn  staff  57714273 Nov 14 11:54 Mbed-TLS_mbedtlsctsj17ef85_1_db.zip
        -rwxr-xr-x@  1 hohn  staff    132484 Nov 14 11:54 ampl_gslctsj4b270e_1.sarif
        -rwxr-xr-x@  1 hohn  staff  99234414 Nov 14 11:54 ampl_gslctsj4b270e_1_db.zip
        -rwxr-xr-x@  1 hohn  staff     34419 Nov 14 11:54 baidu_sofa-pbrpcctsjba3501_1.sarif
        -rwxr-xr-x@  1 hohn  staff  55177796 Nov 14 11:54 baidu_sofa-pbrpcctsjba3501_1_db.zip
        -rwxr-xr-x@  1 hohn  staff     80744 Nov 14 11:54 dlundquist_sniproxyctsj3d83e7_1.sarif
        -rwxr-xr-x@  1 hohn  staff   2183836 Nov 14 11:54 dlundquist_sniproxyctsj3d83e7_1_db.zip
        -rwxr-xr-x@  1 hohn  staff    169079 Nov 14 11:54 hyprwm_Hyprlandctsjc2425f_1.sarif
        -rwxr-xr-x@  1 hohn  staff  21383303 Nov 14 11:54 hyprwm_Hyprlandctsjc2425f_1_db.zip
        -rwxr-xr-x@  1 hohn  staff    489064 Nov 14 11:54 lz4_lz4ctsj2479c5_1.sarif
        -rwxr-xr-x@  1 hohn  staff   2991310 Nov 14 11:54 lz4_lz4ctsj2479c5_1_db.zip
        -rwxr-xr-x@  1 hohn  staff    141336 Nov 14 11:54 medooze_media-server-nodectsj5e30b3_1.sarif
        -rwxr-xr-x@  1 hohn  staff  38217703 Nov 14 11:54 medooze_media-server-nodectsj5e30b3_1_db.zip
        -rwxr-xr-x@  1 hohn  staff     33861 Nov 14 11:54 tsl0922_ttydctsj2e3faa_1.sarif
        -rwxr-xr-x@  1 hohn  staff   5140183 Nov 14 11:54 tsl0922_ttydctsj2e3faa_1_db.zip
      #+END_SRC
   6. Use the [[https://marketplace.visualstudio.com/items?itemName=MS-SarifVSCode.sarif-viewer][SARIF Viewer]] plugin in VS Code to open and review the results.
      Prepare the source directory so the viewer can be pointed at it
      #+BEGIN_SRC sh 
        cd ~/work-gh/mrva/gh-mrva/mirva-session-3660
        unzip -qd ampl_gslctsj4b270e_1_db  ampl_gslctsj4b270e_1_db.zip
        cd ampl_gslctsj4b270e_1_db/codeql_db
        unzip -qd src  src.zip
      #+END_SRC
      Use the viewer in VS Code
      #+BEGIN_SRC sh 
        cd ~/work-gh/mrva/gh-mrva/mirva-session-3660
        code ampl_gslctsj4b270e_1.sarif 
        # For the file vegas.c, when asked, point the source viewer to 
        find ~/work-gh/mrva/gh-mrva/mirva-session-3660/ampl_gslctsj4b270e_1_db/codeql_db/src/\
             -name vegas.c
        # Here: ~/work-gh/mrva/gh-mrva/mirva-session-3660/ampl_gslctsj4b270e_1_db/codeql_db/src//home/runner/work/bulk-builder/bulk-builder/monte/vegas.c
      #+END_SRC
   7. (optional) Large result sets are more easily filtered via
      dataframes or spreadsheets.  Convert the SARIF to CSV if needed; see [[https://github.com/hohn/sarif-cli/][sarif-cli]].
 * Running the VS Code plugin
 ** Compile and Load the Extension
   #+BEGIN_SRC sh 
     cd ~/work-gh/mrva/vscode-codeql
     git checkout mrva-standalone
     # Install nvm
     brew install nvm
     [ -s "/opt/homebrew/opt/nvm/nvm.sh" ] && \. "/opt/homebrew/opt/nvm/nvm.sh" 
     # or
     # curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
     # Install correct node version 
     cd ./extensions/ql-vscode
     nvm install
     # Build the extension
     cd ~/work-gh/mrva/vscode-codeql/extensions/ql-vscode
     npm install
     npm run build
     # Install extension
     cd ~/work-gh/mrva/vscode-codeql/dist
     code --force --install-extension vscode-codeql-*.vsix
     # Extension 'vscode-codeql-1.13.2-dev.2024.12.10.23.51.57.vsix' was successfully installed.
   #+END_SRC
 ** Continue the CLI Sample using the Extension
   Start VS Code
   #+BEGIN_SRC sh 
     cd ~/work-gh/mrva/gh-mrva/
     code .
   #+END_SRC
   Set up 'variant analysis repositories', continuin from the
   =scratch/vscode-selection.json= file formed previously:
   1. Select '{}' and open db selection file
   2. paste
      : ~/work-gh/mrva/mrvacommander/client/qldbtools/scratch/vscode-selection.json
   3. open =Fprintf.ql=
   4. right click =>= 'run variant analysis'
   The extension will assemble the pack, send it to the server, and display
   results as they arrive.
 * Footnotes
 [fn:1]The =csvkit= can be installed into the same Python virtual environment as
 the =qldbtools=.
--- a/notes/docker-demo-container-deps.dot
+++ b/notes/docker-demo-container-deps.dot
@@ -0,0 +1,24 @@
 digraph G {
    // Define nodes
    mrvadata [label="mrvadata" shape=box];
    client_qldbtools [label="client-qldbtools" shape=box];
    client_ghmrva [label="client-ghmrva" shape=box];
    code_server [label="code-server" shape=box];
    rabbitmq [label="rabbitmq" shape=box];
    server [label="server" shape=box];
    dbstore [label="dbstore" shape=box];
    artifactstore [label="artifactstore" shape=box];
    agent [label="agent" shape=box];
    // Define edges (dependencies)
    server -> rabbitmq;
    server -> dbstore;
    server -> artifactstore;
    dbstore -> mrvadata;
    agent -> rabbitmq;
    agent -> dbstore;
    agent -> artifactstore;
    // Define styling
    edge [arrowhead=normal];
 }
--- a/notes/docker-demo-container-deps.pdf
+++ b/notes/docker-demo-container-deps.pdf
--- a/notes/l3style.css
+++ b/notes/l3style.css
@@ -0,0 +1,170 @@
 /* The sum of width and margin percentages must not exceed 100.*/
 div#toc {
    /* Use a moving table of contents (scrolled away for long contents) */
        /*
         * float: left;
         */
    /* OR */
    /* use a fixed-position toc */
    position: fixed;
    top:  80px;
    left: 0px;
    /* match toc, org-content, postamble */
    width: 26%;
    margin-right: 1%;
    margin-left: 1%;
 }
 div#org-content {
    float: right;
    width: 70%;
    /* match toc, org-content, postamble */
    margin-left: 28%;
 }
 div#postamble {
    float: right;
    width: 70%;
    /* match toc, org-content, postamble */
    margin-left: 28%;
 }
 p.author {
    clear: both;
    font-size: 1em;
    margin-left: 25%;
 }
 p.date {
    clear: both;
    font-size: 1em;
    margin-left: 25%;
 }
 #toc * {
    font-size:1em;
 }
 #toc h3 {
    font-weight:normal;
    margin:1em 0 0 0;
    padding: 4px 0;
    border-bottom:1px solid #666;
    text-transform:uppercase;
 }
 #toc ul, #toc li {
    margin:0;
    padding:0;
    list-style:none;
 }
 #toc li {
    display:inline;
 }
 #toc ul li a {
    text-decoration:none;
    display:block;
    margin:0;
    padding:4px 6px;
    color:#990000;
    border-bottom:1px solid #aaa;
 }
 #toc ul ul li a {
    padding-left:18px;
    color:#666;
 }
 #toc ul li a:hover {
    background-color:#F6F6F6;
 }
 /* Description lists. */
 dt { 
    font-style: bold;
    background-color:#F6F6F6;
 }
 /* From org-mode page. */
 body {
    font-family: avenir, Lao Sangam MN, Myanmar Sangam MN, Songti SC, Kohinoor Devanagari, Menlo, avenir, helvetica, verdana, sans-serif;
    font-size: 100%;
    margin-top: 5%;
    margin-bottom: 8%;
    background: white; color: black;
    margin-left: 3% !important; margin-right: 3% !important;
 }
 h1 { 
   font-size: 2em;
   color: #cc8c00;
 /*    padding-top: 5px; */
   border-bottom: 2px solid #aaa;
   width: 70%;
    /* match toc, org-content, postamble */
   margin-left: 28%;            /* Align with div#content */
 }
 h2 {
   font-size: 1.5em;
   padding-top: 1em;
   border-bottom: 1px solid #ccc;
 }
 h3 {
   font-size: 1.2em; 
   padding-top: 0.5em;
   border-bottom: 1px solid #eee;
 }
 .todo, .deadline { color: red; font-style: italic }
 .done { color: green; font-style: italic }
 .timestamp { color: grey }
 .timestamp-kwd { color: CadetBlue; }
 .tag { background-color:lightblue; font-weight:normal; }
 .target { background-color: lavender; }
 .menu {
      color: #666;
 }
 .menu a:link { 
      color: #888;
 }
 .menu a:active { 
      color: #888;
 }
 .menu a:visited { 
      color: #888;
 }
 img { align: center; }
 pre {
 	padding: 5pt;
 	font-family: andale mono, vera sans mono, monospace, courier ;
 	font-size: 0.8em;
        background-color: #f0f0f0;
 }
 code {
 	font-family: andale mono, vera sans mono, monospace, courier ;
 	font-size: 0.8em;
        background-color: #f0f0f0;
 }
 table { border-collapse: collapse; }
 td, th {
 	vertical-align: top;
 	border: 1pt solid #ADB9CC;
 }
--- a/notes/system-structure.dot
+++ b/notes/system-structure.dot
@@ -0,0 +1,127 @@
 digraph DockerComposeDemo {
    rankdir=LR; // Left-to-right layout
    node [shape=plaintext fontname="Helvetica"];
    edge [arrowsize=0.5];
    // Title
    label="Container Dependencies for Demo";
    labelloc=top;
    fontsize=20;
    fontname="Helvetica";
    // Define nodes with clickable Dockerfile references
    dbssvc [
        href="../demo/containers/dbsdata/Dockerfile"
        target="_blank"
        shape=plaintext
        label=<
            <table border="1" cellborder="0" cellspacing="0" cellpadding="4">
                <tr><td colspan="1" bgcolor="lightblue"><b>dbssvc</b></td></tr>
                <tr><td align="left"><font point-size="10">Dockerfile: ./demo/containers/dbsdata/Dockerfile</font></td></tr>
            </table>
        >
    ];
    dbstore [
        shape=plaintext
        label=<
            <table border="1" cellborder="0" cellspacing="0" cellpadding="4">
                <tr><td colspan="1" bgcolor="lightblue"><b>dbstore</b></td></tr>
                <tr><td align="left"><font point-size="10">Image: minio/minio:RELEASE.2024-06-11T03-13-30Z</font></td></tr>
            </table>
        >
    ];
    client_ghmrva [
        href="../client/containers/ghmrva/Dockerfile"
        target="_blank"
        shape=plaintext
        label=<
            <table border="1" cellborder="0" cellspacing="0" cellpadding="4">
                <tr><td colspan="1" bgcolor="lightblue"><b>client-ghmrva</b></td></tr>
                <tr><td align="left"><font point-size="10">Dockerfile: ./client/containers/ghmrva/Dockerfile</font></td></tr>
                <tr><td port="slot1"></td></tr>
                <tr><td port="slot2"></td></tr>
                <tr><td port="slot3"></td></tr>
            </table>
        >
    ];
    code_server [
        href="../client/containers/vscode/Dockerfile"
        target="_blank"
        shape=plaintext
        label=<
            <table border="1" cellborder="0" cellspacing="0" cellpadding="4">
                <tr><td colspan="1" bgcolor="lightblue"><b>code-server</b></td></tr>
                <tr><td align="left"><font point-size="10">Dockerfile: ./client/containers/vscode/Dockerfile</font></td></tr>
            </table>
        >
    ];
    rabbitmq [
        shape=plaintext
        label=<
            <table border="1" cellborder="0" cellspacing="0" cellpadding="4">
                <tr><td colspan="1" bgcolor="lightblue"><b>rabbitmq</b></td></tr>
                <tr><td align="left"><font point-size="10">Image: rabbitmq:3-management</font></td></tr>
            </table>
        >
    ];
    artifactstore [
        shape=plaintext
        label=<
            <table border="1" cellborder="0" cellspacing="0" cellpadding="4">
                <tr><td colspan="1" bgcolor="lightblue"><b>artifactstore</b></td></tr>
                <tr><td align="left"><font point-size="10">Image: minio/minio:RELEASE.2024-06-11T03-13-30Z</font></td></tr>
            </table>
        >
    ];
    agent [
        href="../cmd/agent/Dockerfile"
        target="_blank"
        shape=plaintext
        label=<
            <table border="1" cellborder="0" cellspacing="0" cellpadding="4">
                <tr><td colspan="1" bgcolor="lightblue"><b>agent</b></td></tr>
                <tr><td align="left"><font point-size="10">Dockerfile: ./cmd/agent/Dockerfile</font></td></tr>
            </table>
        >
    ];
    // Expanded 'server' node with handler names and Dockerfile reference
    server [
        href="../cmd/server/Dockerfile"
        target="_blank"
        shape=plaintext
        label=<
            <table border="1" cellborder="0" cellspacing="0" cellpadding="4">
                <tr><td colspan="1" bgcolor="lightblue"><b>server</b></td></tr>
                <tr><td align="left"><font point-size="10">Dockerfile: ./cmd/server/Dockerfile</font></td></tr>
                <tr><td port="c_MRVARequest" align="left"><font point-size="10">c.MRVARequest</font></td></tr>
                <tr><td port="c_MRVAStatus" align="left"><font point-size="10">c.MRVAStatus</font></td></tr>
                <tr><td port="c_MRVADownloadArtifact" align="left"><font point-size="10">c.MRVADownloadArtifact</font></td></tr>
                <tr><td align="left"><font point-size="10">c.MRVARequestID</font></td></tr>
                <tr><td align="left"><font point-size="10">c.MRVADownloadQLDB</font></td></tr>
                <tr><td align="left"><font point-size="10"><i>Not Found</i></font></td></tr>
            </table>
        >
    ];
    // Define edges (dependencies)
    dbstore -> dbssvc;
    server -> dbstore;
    server -> rabbitmq;
    server -> artifactstore;
    agent -> dbstore;
    agent -> artifactstore;
    agent -> rabbitmq;
    // Message links
    client_ghmrva:slot1 -> server:c_MRVARequest [label="message" style=dashed penwidth=2 fontsize=8];
    client_ghmrva:slot2 -> server:c_MRVAStatus [label="message" style=dashed  penwidth=2  fontsize=8];
    client_ghmrva:slot3 -> server:c_MRVADownloadArtifact [label="message" style=dashed penwidth=2  fontsize=8];
 }
--- a/notes/system-structure.pdf
+++ b/notes/system-structure.pdf
--- a/notes/system-structure.svg
+++ b/notes/system-structure.svg
@@ -0,0 +1,162 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
 "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 <!-- Generated by graphviz version 10.0.1 (20240210.2158)
 -->
 <!-- Title: DockerComposeDemo Pages: 1 -->
 <svg width="1057pt" height="280pt"
 viewBox="0.00 0.00 1056.75 280.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 276)">
 <title>DockerComposeDemo</title>
 <polygon fill="white" stroke="none" points="-4,4 -4,-276 1052.75,-276 1052.75,4 -4,4"/>
 <text text-anchor="middle" x="524.38" y="-249" font-family="Helvetica,sans-Serif" font-size="20.00">Container Dependencies for Demo</text>
 <!-- dbssvc -->
 <g id="node1" class="node">
 <title>dbssvc</title>
 <g id="a_node1"><a xlink:href="../demo/containers/dbsdata/Dockerfile" xlink:title="&lt;TABLE&gt;" target="_blank">
 <polygon fill="lightblue" stroke="none" points="818.75,-145 818.75,-168 1039.75,-168 1039.75,-145 818.75,-145"/>
 <text text-anchor="start" x="906" y="-151.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">dbssvc</text>
 <text text-anchor="start" x="822.75" y="-131.5" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./demo/containers/dbsdata/Dockerfile</text>
 <polygon fill="none" stroke="black" points="817.75,-124.75 817.75,-169 1040.75,-169 1040.75,-124.75 817.75,-124.75"/>
 </a>
 </g>
 </g>
 <!-- dbstore -->
 <g id="node2" class="node">
 <title>dbstore</title>
 <polygon fill="lightblue" stroke="none" points="523.25,-145 523.25,-168 763.75,-168 763.75,-145 523.25,-145"/>
 <text text-anchor="start" x="618.75" y="-151.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">dbstore</text>
 <text text-anchor="start" x="527.25" y="-131.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: minio/minio:RELEASE.2024&#45;06&#45;11T03&#45;13&#45;30Z</text>
 <polygon fill="none" stroke="black" points="522.25,-124.75 522.25,-169 764.75,-169 764.75,-124.75 522.25,-124.75"/>
 </g>
 <!-- dbstore&#45;&gt;dbssvc -->
 <g id="edge1" class="edge">
 <title>dbstore&#45;&gt;dbssvc</title>
 <path fill="none" stroke="black" d="M772.68,-146.88C783.02,-146.88 793.45,-146.88 803.76,-146.88"/>
 <polygon fill="black" stroke="black" points="803.65,-148.63 808.65,-146.88 803.65,-145.13 803.65,-148.63"/>
 </g>
 <!-- client_ghmrva -->
 <g id="node3" class="node">
 <title>client_ghmrva</title>
 <g id="a_node3"><a xlink:href="../client/containers/ghmrva/Dockerfile" xlink:title="&lt;TABLE&gt;" target="_blank">
 <polygon fill="lightblue" stroke="none" points="9,-123 9,-146 227,-146 227,-123 9,-123"/>
 <text text-anchor="start" x="73.38" y="-129.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">client&#45;ghmrva</text>
 <text text-anchor="start" x="13" y="-109.5" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./client/containers/ghmrva/Dockerfile</text>
 <polygon fill="none" stroke="black" points="8,-78.75 8,-147 228,-147 228,-78.75 8,-78.75"/>
 </a>
 </g>
 </g>
 <!-- server -->
 <g id="node8" class="node">
 <title>server</title>
 <g id="a_node8"><a xlink:href="../cmd/server/Dockerfile" xlink:title="&lt;TABLE&gt;" target="_blank">
 <polygon fill="lightblue" stroke="none" points="308,-139.75 308,-162.75 468.25,-162.75 468.25,-139.75 308,-139.75"/>
 <text text-anchor="start" x="367.88" y="-146.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">server</text>
 <text text-anchor="start" x="312" y="-126.25" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./cmd/server/Dockerfile</text>
 <text text-anchor="start" x="312" y="-107" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVARequest</text>
 <text text-anchor="start" x="312" y="-87.75" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVAStatus</text>
 <text text-anchor="start" x="312" y="-68.5" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVADownloadArtifact</text>
 <text text-anchor="start" x="312" y="-49.25" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVARequestID</text>
 <text text-anchor="start" x="312" y="-30" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVADownloadQLDB</text>
 <text text-anchor="start" x="312" y="-11.75" font-family="Helvetica,sans-Serif" font-style="italic" font-size="10.00">Not Found</text>
 <polygon fill="none" stroke="black" points="307,-4 307,-163.75 469.25,-163.75 469.25,-4 307,-4"/>
 </a>
 </g>
 </g>
 <!-- client_ghmrva&#45;&gt;server -->
 <g id="edge8" class="edge">
 <title>client_ghmrva:slot1&#45;&gt;server:c_MRVARequest</title>
 <path fill="none" stroke="black" stroke-width="2" stroke-dasharray="5,2" d="M228,-99.88C243.53,-99.88 278.87,-108.3 299.36,-110.4"/>
 <polygon fill="black" stroke="black" stroke-width="2" points="299.01,-112.13 304.1,-110.69 299.22,-108.64 299.01,-112.13"/>
 <text text-anchor="middle" x="267.5" y="-109.03" font-family="Times,serif" font-size="8.00">message</text>
 </g>
 <!-- client_ghmrva&#45;&gt;server -->
 <g id="edge9" class="edge">
 <title>client_ghmrva:slot2&#45;&gt;server:c_MRVAStatus</title>
 <path fill="none" stroke="black" stroke-width="2" stroke-dasharray="5,2" d="M228,-91.88C260.42,-91.88 270.42,-91.88 299.34,-91.88"/>
 <polygon fill="black" stroke="black" stroke-width="2" points="299.1,-93.63 304.1,-91.88 299.1,-90.13 299.1,-93.63"/>
 <text text-anchor="middle" x="267.5" y="-94.03" font-family="Times,serif" font-size="8.00">message</text>
 </g>
 <!-- client_ghmrva&#45;&gt;server -->
 <g id="edge10" class="edge">
 <title>client_ghmrva:slot3&#45;&gt;server:c_MRVADownloadArtifact</title>
 <path fill="none" stroke="black" stroke-width="2" stroke-dasharray="5,2" d="M228,-83.88C239.83,-83.88 242.33,-80.1 254,-78.12 274.69,-74.63 281.73,-72.43 299.45,-71.97"/>
 <polygon fill="black" stroke="black" stroke-width="2" points="299.12,-73.72 304.1,-71.91 299.08,-70.22 299.12,-73.72"/>
 <text text-anchor="middle" x="267.5" y="-80.03" font-family="Times,serif" font-size="8.00">message</text>
 </g>
 <!-- code_server -->
 <g id="node4" class="node">
 <title>code_server</title>
 <g id="a_node4"><a xlink:href="../client/containers/vscode/Dockerfile" xlink:title="&lt;TABLE&gt;" target="_blank">
 <polygon fill="lightblue" stroke="none" points="9.38,-193 9.38,-216 226.62,-216 226.62,-193 9.38,-193"/>
 <text text-anchor="start" x="79.75" y="-199.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">code&#45;server</text>
 <text text-anchor="start" x="13.38" y="-179.5" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./client/containers/vscode/Dockerfile</text>
 <polygon fill="none" stroke="black" points="8.38,-172.75 8.38,-217 227.62,-217 227.62,-172.75 8.38,-172.75"/>
 </a>
 </g>
 </g>
 <!-- rabbitmq -->
 <g id="node5" class="node">
 <title>rabbitmq</title>
 <polygon fill="lightblue" stroke="none" points="570.5,-215 570.5,-238 716.5,-238 716.5,-215 570.5,-215"/>
 <text text-anchor="start" x="614.25" y="-221.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">rabbitmq</text>
 <text text-anchor="start" x="574.5" y="-201.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: rabbitmq:3&#45;management</text>
 <polygon fill="none" stroke="black" points="569.5,-194.75 569.5,-239 717.5,-239 717.5,-194.75 569.5,-194.75"/>
 </g>
 <!-- artifactstore -->
 <g id="node6" class="node">
 <title>artifactstore</title>
 <polygon fill="lightblue" stroke="none" points="523.25,-75 523.25,-98 763.75,-98 763.75,-75 523.25,-75"/>
 <text text-anchor="start" x="604.5" y="-81.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">artifactstore</text>
 <text text-anchor="start" x="527.25" y="-61.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: minio/minio:RELEASE.2024&#45;06&#45;11T03&#45;13&#45;30Z</text>
 <polygon fill="none" stroke="black" points="522.25,-54.75 522.25,-99 764.75,-99 764.75,-54.75 522.25,-54.75"/>
 </g>
 <!-- agent -->
 <g id="node7" class="node">
 <title>agent</title>
 <g id="a_node7"><a xlink:href="../cmd/agent/Dockerfile" xlink:title="&lt;TABLE&gt;" target="_blank">
 <polygon fill="lightblue" stroke="none" points="309.5,-210 309.5,-233 466.75,-233 466.75,-210 309.5,-210"/>
 <text text-anchor="start" x="370.12" y="-216.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">agent</text>
 <text text-anchor="start" x="313.5" y="-196.5" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./cmd/agent/Dockerfile</text>
 <polygon fill="none" stroke="black" points="308.5,-189.75 308.5,-234 467.75,-234 467.75,-189.75 308.5,-189.75"/>
 </a>
 </g>
 </g>
 <!-- agent&#45;&gt;dbstore -->
 <g id="edge5" class="edge">
 <title>agent&#45;&gt;dbstore</title>
 <path fill="none" stroke="black" d="M475.61,-189.69C494.58,-184.82 514.89,-179.61 534.58,-174.56"/>
 <polygon fill="black" stroke="black" points="534.8,-176.31 539.21,-173.37 533.93,-172.92 534.8,-176.31"/>
 </g>
 <!-- agent&#45;&gt;rabbitmq -->
 <g id="edge7" class="edge">
 <title>agent&#45;&gt;rabbitmq</title>
 <path fill="none" stroke="black" d="M475.61,-213.58C501.25,-214.09 529.34,-214.64 555.04,-215.15"/>
 <polygon fill="black" stroke="black" points="554.98,-216.9 560.01,-215.25 555.04,-213.4 554.98,-216.9"/>
 </g>
 <!-- agent&#45;&gt;artifactstore -->
 <g id="edge6" class="edge">
 <title>agent&#45;&gt;artifactstore</title>
 <path fill="none" stroke="black" d="M465.19,-185.78C469.46,-183.08 473.52,-180.12 477.25,-176.88 502.34,-155.06 487.99,-132.25 514.25,-111.88 516.87,-109.84 519.6,-107.93 522.42,-106.12"/>
 <polygon fill="black" stroke="black" points="522.93,-107.86 526.3,-103.78 521.11,-104.87 522.93,-107.86"/>
 </g>
 <!-- server&#45;&gt;dbstore -->
 <g id="edge2" class="edge">
 <title>server&#45;&gt;dbstore</title>
 <path fill="none" stroke="black" d="M477.03,-105.73C494.63,-110.11 513.32,-114.76 531.58,-119.29"/>
 <polygon fill="black" stroke="black" points="531,-120.96 536.28,-120.46 531.85,-117.56 531,-120.96"/>
 </g>
 <!-- server&#45;&gt;rabbitmq -->
 <g id="edge3" class="edge">
 <title>server&#45;&gt;rabbitmq</title>
 <path fill="none" stroke="black" d="M477.14,-159.61C489.14,-167.87 501.67,-175.56 514.25,-181.88 527.15,-188.36 541.41,-193.76 555.59,-198.22"/>
 <polygon fill="black" stroke="black" points="554.91,-199.85 560.21,-199.64 555.94,-196.5 554.91,-199.85"/>
 </g>
 <!-- server&#45;&gt;artifactstore -->
 <g id="edge4" class="edge">
 <title>server&#45;&gt;artifactstore</title>
 <path fill="none" stroke="black" d="M477.03,-81.45C487.19,-81.17 497.71,-80.88 508.31,-80.58"/>
 <polygon fill="black" stroke="black" points="508.09,-82.34 513.04,-80.45 507.99,-78.84 508.09,-82.34"/>
 </g>
 </g>
 </svg>
--- a/notes/vscode-plugin.org
+++ b/notes/vscode-plugin.org
@@ -0,0 +1,26 @@
 * Building the plugin
  #+BEGIN_SRC sh 
    # Clone hohn's fork of Nick's fork
    cd /tmp
    git clone git@github.com:hohn/vscode-codeql.git
    cd /tmp/vscode-codeql
    git checkout mrva-standalone
    # Install nvm
    curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
    # Install correct node version 
    cd /tmp/vscode-codeql/extensions/ql-vscode
    nvm install
    # Build the extension
    cd /tmp/vscode-codeql/extensions/ql-vscode
    npm install
    npm run build
    # Install extension
    cd /tmp/vscode-codeql/dist
    code --force --install-extension vscode-codeql-*.vsix
  #+END_SRC
--- a/pkg/agent/agent.go
+++ b/pkg/agent/agent.go
@@ -4,26 +4,29 @@ import (
 	"context"
 	"fmt"
 	"log/slog"
-	"mrvacommander/pkg/codeql"
+	"github.com/hohn/mrvacommander/pkg/artifactstore"
-	"mrvacommander/pkg/common"
+	"github.com/hohn/mrvacommander/pkg/codeql"
-	"mrvacommander/pkg/logger"
+	"github.com/hohn/mrvacommander/pkg/common"
-	"mrvacommander/pkg/qldbstore"
+	"github.com/hohn/mrvacommander/pkg/qldbstore"
-	"mrvacommander/pkg/qpstore"
+	"github.com/hohn/mrvacommander/pkg/queue"
-	"mrvacommander/pkg/queue"
+	"github.com/hohn/mrvacommander/utils"
 	"mrvacommander/utils"
 	"os"
 	"path/filepath"
 	"runtime"
 	"sync"
 	"time"
 	"github.com/elastic/go-sysinfo"
 	"github.com/google/uuid"
 )
 /*
 type RunnerSingle struct {
 	queue queue.Queue
 }
-func NewAgentSingle(numWorkers int, av *Visibles) *RunnerSingle {
+func NewAgentSingle(numWorkers int, v *Visibles) *RunnerSingle {
-	r := RunnerSingle{queue: av.Queue}
+	r := RunnerSingle{queue: v.Queue}
 	for id := 1; id <= numWorkers; id++ {
 		go r.worker(id)
@@ -31,72 +34,169 @@ func NewAgentSingle(numWorkers int, av *Visibles) *RunnerSingle {
 	return &r
 }
-type Visibles struct {
+func (r *RunnerSingle) worker(wid int) {
-	Logger logger.Logger
+	var job common.AnalyzeJob
-	Queue  queue.Queue
+
-	// TODO extra package for query pack storage
+	for {
-	QueryPackStore qpstore.Storage
+		job = <-r.queue.Jobs()
-	// TODO extra package for ql db storage
+		result, err := RunAnalysisJob(job)
-	QLDBStore qldbstore.Storage
+		if err != nil {
 			slog.Error("Failed to run analysis job", slog.Any("error", err))
 			continue
 		}
 		r.queue.Results() <- result
 	}
 }
 */
 const (
 	workerMemoryMB     = 2048 // 2 GB
 	monitorIntervalSec = 10   // Monitor every 10 seconds
 )
 func calculateWorkers() int {
 	host, err := sysinfo.Host()
 	if err != nil {
 		slog.Error("failed to get host info", "error", err)
 		os.Exit(1)
 	}
 	memInfo, err := host.Memory()
 	if err != nil {
 		slog.Error("failed to get memory info", "error", err)
 		os.Exit(1)
 	}
 	// Get available memory in MB
 	totalMemoryMB := memInfo.Available / (1024 * 1024)
 	// Ensure we have at least one worker
 	workers := int(totalMemoryMB / workerMemoryMB)
 	if workers < 1 {
 		workers = 1
 	}
 	// Limit the number of workers to the number of CPUs
 	cpuCount := runtime.NumCPU()
 	if workers > cpuCount {
 		workers = max(cpuCount, 1)
 	}
 	return workers
 }
-func (r *RunnerSingle) worker(wid int) {
+func StartAndMonitorWorkers(ctx context.Context,
-	// TODO: reimplement this later
+	artifacts artifactstore.Store,
-	/*
+	databases qldbstore.Store,
-		var job common.AnalyzeJob
+	queue queue.Queue,
 	desiredWorkerCount int,
 	wg *sync.WaitGroup) {
-		for {
+	currentWorkerCount := 0
-			job = <-r.queue.Jobs()
+	stopChans := make([]chan struct{}, 0)
 			slog.Debug("Picking up job", "job", job, "worker", wid)
 			slog.Debug("Analysis: running", "job", job)
 			storage.SetStatus(job.QueryPackId, job.NWO, common.StatusQueued)
 			resultFile, err := RunAnalysis(job)
 			if err != nil {
 				continue
 			}
 			slog.Debug("Analysis run finished", "job", job)
 			// TODO: FIX THIS
 			res := common.AnalyzeResult{
 				RunAnalysisSARIF: resultFile,
 				RunAnalysisBQRS:  "", // FIXME ?
 			}
 			r.queue.Results() <- res
 			storage.SetStatus(job.QueryPackId, job.NWO, common.StatusSuccess)
 			storage.SetResult(job.QueryPackId, job.NWO, res)
 	if desiredWorkerCount != 0 {
 		slog.Info("Starting workers", slog.Int("count", desiredWorkerCount))
 		for i := 0; i < desiredWorkerCount; i++ {
 			stopChan := make(chan struct{})
 			stopChans = append(stopChans, stopChan)
 			wg.Add(1)
 			go RunWorker(ctx, artifacts, databases, queue, stopChan, wg)
 		}
-	*/
+		return
 	}
 	slog.Info("Worker count not specified, managing based on available memory and CPU")
 	for {
 		select {
 		case <-ctx.Done():
 			// signal all workers to stop
 			for _, stopChan := range stopChans {
 				close(stopChan)
 			}
 			return
 		default:
 			newWorkerCount := calculateWorkers()
 			if newWorkerCount != currentWorkerCount {
 				slog.Info(
 					"Modifying worker count",
 					slog.Int("current", currentWorkerCount),
 					slog.Int("new", newWorkerCount))
 			}
 			if newWorkerCount > currentWorkerCount {
 				for i := currentWorkerCount; i < newWorkerCount; i++ {
 					stopChan := make(chan struct{})
 					stopChans = append(stopChans, stopChan)
 					wg.Add(1)
 					go RunWorker(ctx, artifacts, databases, queue, stopChan, wg)
 				}
 			} else if newWorkerCount < currentWorkerCount {
 				for i := newWorkerCount; i < currentWorkerCount; i++ {
 					close(stopChans[i])
 				}
 				stopChans = stopChans[:newWorkerCount]
 			}
 			currentWorkerCount = newWorkerCount
 			time.Sleep(monitorIntervalSec * time.Second)
 		}
 	}
 }
 // RunAnalysisJob runs a CodeQL analysis job (AnalyzeJob) returning an AnalyzeResult
-func RunAnalysisJob(job common.AnalyzeJob) (common.AnalyzeResult, error) {
+func RunAnalysisJob(
-	var result = common.AnalyzeResult{
+	job queue.AnalyzeJob, artifacts artifactstore.Store, dbs qldbstore.Store) (queue.AnalyzeResult, error) {
-		RequestId:        job.RequestId,
+	var result = queue.AnalyzeResult{
-		ResultCount:      0,
+		Spec:           job.Spec,
-		ResultArchiveURL: "",
+		ResultCount:    0,
-		Status:           common.StatusError,
+		ResultLocation: artifactstore.ArtifactLocation{},
 		Status:         common.StatusError,
 	}
 	// Create a temporary directory
 	tempDir := filepath.Join(os.TempDir(), uuid.New().String())
-	if err := os.MkdirAll(tempDir, 0755); err != nil {
+	if err := os.MkdirAll(tempDir, 0600); err != nil {
 		return result, fmt.Errorf("failed to create temporary directory: %v", err)
 	}
 	defer os.RemoveAll(tempDir)
-	// Extract the query pack
+	// Download the query pack as a byte slice
-	// TODO: download from the 'job' query pack URL
+	queryPackData, err := artifacts.GetQueryPack(job.QueryPackLocation)
-	// utils.downloadFile
+	if err != nil {
-	queryPackPath := filepath.Join(tempDir, "qp-54674")
+		return result, fmt.Errorf("failed to download query pack: %w", err)
-	utils.UntarGz("qp-54674.tgz", queryPackPath)
+	}
 	// Write the query pack data to the filesystem
 	queryPackArchivePath := filepath.Join(tempDir, "query-pack.tar.gz")
 	if err := os.WriteFile(queryPackArchivePath, queryPackData, 0600); err != nil {
 		return result, fmt.Errorf("failed to write query pack archive to disk: %w", err)
 	}
 	// Make a directory and extract the query pack
 	queryPackPath := filepath.Join(tempDir, "pack")
 	if err := os.Mkdir(queryPackPath, 0600); err != nil {
 		return result, fmt.Errorf("failed to create query pack directory: %w", err)
 	}
 	if err := utils.UntarGz(queryPackArchivePath, queryPackPath); err != nil {
 		return result, fmt.Errorf("failed to extract query pack: %w", err)
 	}
 	databaseData, err := dbs.GetDatabase(job.Spec.NameWithOwner)
 	if err != nil {
 		return result, fmt.Errorf("failed to get database: %w", err)
 	}
 	// Write the CodeQL database data to the filesystem
 	databasePath := filepath.Join(tempDir, "database.zip")
 	if err := os.WriteFile(databasePath, databaseData, 0600); err != nil {
 		return result, fmt.Errorf("failed to write CodeQL database to disk: %w", err)
 	}
 	// Perform the CodeQL analysis
-	runResult, err := codeql.RunQuery("google_flatbuffers_db.zip", "cpp", queryPackPath, tempDir)
+	runResult, err := codeql.RunQuery(databasePath, job.QueryLanguage, queryPackPath, tempDir)
 	if err != nil {
 		return result, fmt.Errorf("failed to run analysis: %w", err)
 	}
@@ -107,21 +207,32 @@ func RunAnalysisJob(job common.AnalyzeJob) (common.AnalyzeResult, error) {
 		return result, fmt.Errorf("failed to generate results archive: %w", err)
 	}
-	// TODO: Upload the archive to storage
+	// Upload the archive to storage
 	slog.Debug("Results archive size", slog.Int("size", len(resultsArchive)))
 	resultsLocation, err := artifacts.SaveResult(job.Spec, resultsArchive)
 	if err != nil {
 		return result, fmt.Errorf("failed to save results archive: %w", err)
 	}
-	result = common.AnalyzeResult{
+	result = queue.AnalyzeResult{
-		RequestId:        job.RequestId,
+		Spec:                 job.Spec,
-		ResultCount:      runResult.ResultCount,
+		ResultCount:          runResult.ResultCount,
-		ResultArchiveURL: "REPLACE_THIS_WITH_STORED_RESULTS_ARCHIVE", // TODO
+		ResultLocation:       resultsLocation,
-		Status:           common.StatusSuccess,
+		Status:               common.StatusSuccess,
 		SourceLocationPrefix: runResult.SourceLocationPrefix,
 		DatabaseSHA:          runResult.DatabaseSHA,
 	}
 	return result, nil
 }
 // RunWorker runs a worker that processes jobs from queue
-func RunWorker(ctx context.Context, stopChan chan struct{}, queue queue.Queue, wg *sync.WaitGroup) {
+func RunWorker(ctx context.Context,
 	artifacts artifactstore.Store,
 	databases qldbstore.Store,
 	queue queue.Queue,
 	stopChan chan struct{},
 	wg *sync.WaitGroup) {
 	const (
 		WORKER_COUNT_STOP_MESSAGE   = "Worker stopping due to reduction in worker count"
 		WORKER_CONTEXT_STOP_MESSAGE = "Worker stopping due to context cancellation"
@@ -144,7 +255,7 @@ func RunWorker(ctx context.Context, stopChan chan struct{}, queue queue.Queue, w
 					return
 				}
 				slog.Info("Running analysis job", slog.Any("job", job))
-				result, err := RunAnalysisJob(job)
+				result, err := RunAnalysisJob(job, artifacts, databases)
 				if err != nil {
 					slog.Error("Failed to run analysis job", slog.Any("error", err))
 					continue
--- a/pkg/agent/interfaces.go
+++ b/pkg/agent/interfaces.go
@@ -1,4 +1,13 @@
 package agent
-type Runner interface {
+import (
 	"github.com/hohn/mrvacommander/pkg/artifactstore"
 	"github.com/hohn/mrvacommander/pkg/qldbstore"
 	"github.com/hohn/mrvacommander/pkg/queue"
 )
 type Visibles struct {
 	Queue         queue.Queue
 	Artifacts     artifactstore.Store
 	CodeQLDBStore qldbstore.Store
 }
--- a/pkg/artifactstore/common.go
+++ b/pkg/artifactstore/common.go
@@ -0,0 +1,28 @@
 package artifactstore
 import (
 	"fmt"
 	"github.com/hohn/mrvacommander/pkg/common"
 )
 // Restrict the keys / values for ArtifactLocation and centralize the common ones
 // here
 const (
 	AF_BUCKETNAME_RESULTS = "results"
 	AF_BUCKETNAME_PACKS   = "packs"
 )
 type ArtifactLocation struct {
 	Key    string // location in bucket OR full location for file paths
 	Bucket string // which bucket: packs or results
 }
 // deriveKeyFromSessionId generates a key for a query pack based on the job ID
 func deriveKeyFromSessionId(sessionId int) string {
 	return fmt.Sprintf("%d", sessionId)
 }
 // deriveKeyFromJobSpec generates a key for a result based on the JobSpec
 func deriveKeyFromJobSpec(jobSpec common.JobSpec) string {
 	return fmt.Sprintf("%d-%s", jobSpec.SessionID, jobSpec.NameWithOwner)
 }
--- a/pkg/artifactstore/interfaces.go
+++ b/pkg/artifactstore/interfaces.go
@@ -0,0 +1,20 @@
 package artifactstore
 import "github.com/hohn/mrvacommander/pkg/common"
 type Store interface {
 	// GetQueryPack retrieves the query pack from the specified location.
 	GetQueryPack(location ArtifactLocation) ([]byte, error)
 	// SaveQueryPack saves the query pack using the session ID and returns the artifact location.
 	SaveQueryPack(sessionId int, data []byte) (ArtifactLocation, error)
 	// GetResult retrieves the result from the specified location.
 	GetResult(location ArtifactLocation) ([]byte, error)
 	// GetResultSize retrieves the size of the result from the specified location.
 	GetResultSize(location ArtifactLocation) (int, error)
 	// SaveResult saves the result using the JobSpec and returns the artifact location.
 	SaveResult(jobSpec common.JobSpec, data []byte) (ArtifactLocation, error)
 }
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1,2 @@`
							`doc:`
							`pandoc -s --css=./gfm.css README.md > foo.html && open foo.html`