53 Commits
rm ... master

Author SHA1 Message Date
47de30a56e Add uv support with pyproject.toml
- Create pyproject.toml with all dependencies from requirements.txt
- Configure for Python >=3.11
- Include standalone scripts via script-files
- Set README.org content-type for proper packaging
- Enable uv sync and uv run workflows
2025-11-25 10:26:21 -08:00
Michael Hohn
d2c7b98d1c add status fix code; to be tested 2025-08-28 15:11:13 -07:00
fb5adf1b5f add updated container dependency diagram 2025-08-12 09:37:06 -07:00
Michael Hohn
750187fb12 Fix: get the correct repo_id from postgres, use the mapping in responses 2025-08-04 16:13:12 -07:00
Michael Hohn
807d5f3d45 Fix: AddJob was completely wrong. The job_repo_id is a bijection; updated all code 2025-08-04 13:30:43 -07:00
Michael Hohn
1377d4cec9 start repoID counter at 1, not 0 2025-07-29 11:15:36 -07:00
ec8bb0cc63 fix repo id handling and PostgreSQL Query Ordering
Patch 1: PostgreSQL Query Ordering
Fixed GetJobList() to return jobs ordered by job_repo_id
Added JOIN with job_repo_map table to ensure proper ordering
This ensures slice indices match the stored repository IDs

Patch 2: Updated Comments
Removed the TODO comment about hacky job IDing
Added explanation that ordering is now consistent

Patch 3: Added Validation
Added runtime validation to catch ID mismatches
Logs warnings/errors if slice index doesn't match expected job_repo_id
Helps debug issues in different state implementations
2025-07-25 16:31:34 -07:00
8d7aa780ed fix: use postgres to generate gap/concurrency/race-safe job_repo_it
This takes the place of the mutex used by the in-memory version
2025-07-25 15:51:11 -07:00
Michael Hohn
43a7143e27 fix: code logic updates: make sessionID consistent with original 2025-07-17 10:28:31 -07:00
0d6e31713f Create a new PGState, reading connection info from env vars 2025-07-14 13:26:23 -07:00
a2cfe0676c wip: update state_postgres, update dependent modules 2025-07-11 15:34:07 -07:00
f920a799d3 wip: postgres state: update SetupSchemas, add GetJobSpecByRepoId 2025-07-11 15:24:48 -07:00
41146f5aaf wip: postgres state: update SetupSchemas, finish ServerState interface 2025-07-11 14:47:30 -07:00
173a61e3fa wip: postgres state: add SetupSchemas, SetResult, GetResult 2025-07-11 14:26:07 -07:00
e294fcdf4f wip: first NewPGState 2025-07-11 14:07:51 -07:00
Michael Hohn
9fe6aed357 fix: add polling reconnect 2025-06-17 15:59:24 -07:00
Michael Hohn
3762654ef2 switch to pollling rabbitmq; update images 2025-06-16 16:07:37 -07:00
d94f69be09 comment more code with math notation 2025-06-13 22:59:51 -07:00
1fd220416c start commenting code with math notation 2025-06-13 22:36:41 -07:00
df97e6ef10 mrva-interconnect with graph extraction description 2025-05-24 12:29:24 -07:00
2e99bdfedf \section{Graph Extraction from Log Table} 2025-05-24 12:26:49 -07:00
a507797eff mrva-interconnect: existing summary 2025-05-24 12:06:27 -07:00
Michael Hohn
0115e74d07 Add {packs,results} to minio store artifact names 2025-05-21 19:25:48 -07:00
Michael Hohn
8577e1775a Merge branch 'master' into mrva-missing-sarif 2025-05-14 10:46:29 -07:00
Michael Hohn
8590bd6de7 tuple hashing functions across languages 2025-05-14 10:38:17 -07:00
Michael Hohn
cf37b474e4 inline trivial functions 2025-05-13 19:01:30 -07:00
Michael Hohn
5bdbd60cc5 update s3 endpoint handling
virtual host extracts bucket name from endpoint environment variable;
path uses fixed bucket name
2025-05-13 10:47:54 -07:00
Michael Hohn
bde8ac2db7 Drop dynamic worker count; set default to 1 2025-05-12 14:59:59 -07:00
Michael Hohn
75e57dc0a8 Fix codeql db extraction logic when examining possibly nested archive 2025-05-12 12:45:22 -07:00
Michael Hohn
c32ff755ef add more ignore patterns 2025-05-12 11:12:44 -07:00
Michael Hohn
19a936087f Remove REFROOT; simplify external hepc / spigot-cli call 2025-05-07 10:55:38 -07:00
Michael Hohn
bb6189322a Add sighelp*.go usage plan 2025-05-06 11:27:57 -07:00
Michael Hohn
f7dc5318e4 Start adding sighelp*.go files for gpt assist 2025-05-06 11:16:01 -07:00
Michael Hohn
70c06e4fae Add mrva dot overview 2025-04-30 10:50:54 -07:00
Michael Hohn
a2be014b2f Add mrva.man, a brief overview 2025-04-30 10:49:08 -07:00
Michael Hohn
58f4fe1ca7 Add nested zip file handling to agent 2025-04-10 10:47:24 -07:00
Michael Hohn
14d6057248 Add option MRVA_HEPC_DATAVIACLI, get hepc data using cli tool 2025-04-09 19:20:10 -07:00
Michael Hohn
01ddf38069 improve list formatting 2025-04-09 18:23:20 -07:00
Michael Hohn
47a021d84a updated to buckete.minio.store virtual host style 2025-03-26 09:02:08 -07:00
Michael Hohn
8d4c766e8c MRVA for CodeQL: A Business View 2025-03-17 12:36:29 -07:00
Michael Hohn
2409728960 fix: reconciled status names between server/agent/vscode-codeql 2025-03-14 12:45:36 -07:00
Michael Hohn
f066c767e2 section{Execution Loop in Pseudo-Code, hybrid} 2025-03-01 15:37:06 -08:00
Michael Hohn
397b86c735 latex positioning adjustments 2025-03-01 13:37:39 -08:00
Michael Hohn
511c544f6e latex positioning adjustments 2025-03-01 13:13:05 -08:00
Michael Hohn
bd74ed646f section{Execution Loop in Pseudo-Code, declarative} 2025-03-01 11:54:38 -08:00
Michael Hohn
45e40abf5d section{Execution Loop in Pseudo-Code, declarative} 2025-03-01 11:53:55 -08:00
Michael Hohn
a3593cbba2 Add technical report 2025-02-28 13:46:21 -08:00
Michael Hohn
a0185df9d5 workspace for all mrva 2025-02-25 14:59:21 -08:00
Michael Hohn
23e3ea9367 Add hepc integration 2025-01-30 14:40:46 -08:00
Michael Hohn
4140eaafc4 wip: hepc db store 2025-01-21 21:48:25 -08:00
Michael Hohn
3e47bd4adb fix typo 2024-12-17 10:28:12 -08:00
Michael Hohn
f92dfc89a2 Resolve bootstrap problem 2024-12-16 11:26:36 -08:00
Michael Hohn
a5bb232af2 Use full repository path name in place of mrvacommander 2024-12-13 10:54:35 -08:00
55 changed files with 5296 additions and 617 deletions

1
.gitattributes vendored
View File

@@ -1,2 +1,3 @@
*.zip filter=lfs diff=lfs merge=lfs -text
*.blob filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text

10
.gitignore vendored
View File

@@ -57,3 +57,13 @@ notes/*.html
# Make timestamp files
mk.*
demo/containers/dbsdata/data/
demo/containers/dbsdata/tmp.dbsdata_backup.tar
client/qldbtools/db-collection-py-1/
mrva-overview.aux
mrva-overview.log
mrva-overview.synctex.gz
mrva-overview.toc
auto/

View File

@@ -3,6 +3,13 @@
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
DB.
"""
# /// script
# dependencies = [
# "pandas",
# "numpy",
# "minio",
# ]
# ///
import argparse
import qldbtools.utils as utils
import logging

View File

@@ -0,0 +1,138 @@
[project]
name = "qldbtools"
version = "0.1.0"
description = "A Python package for selecting sets of CodeQL databases to work on"
authors = [
{name = "Michael Hohn", email = "hohn@github.com"}
]
readme = {file = "README.org", content-type = "text/plain"}
requires-python = ">=3.11"
dependencies = [
"annotated-types>=0.7.0",
"anyio>=4.4.0",
"appnope>=0.1.4",
"argon2-cffi>=23.1.0",
"argon2-cffi-bindings>=21.2.0",
"arrow>=1.3.0",
"asttokens>=2.4.1",
"async-lru>=2.0.4",
"attrs>=24.2.0",
"babel>=2.16.0",
"beautifulsoup4>=4.12.3",
"bleach>=6.1.0",
"blinker>=1.9.0",
"certifi>=2024.7.4",
"cffi>=1.17.0",
"charset-normalizer>=3.3.2",
"click>=8.1.7",
"comm>=0.2.2",
"debugpy>=1.8.5",
"decorator>=5.1.1",
"defusedxml>=0.7.1",
"executing>=2.0.1",
"fastapi>=0.115.5",
"fastjsonschema>=2.20.0",
"flask>=3.1.0",
"fqdn>=1.5.1",
"h11>=0.14.0",
"httpcore>=1.0.5",
"httpx>=0.27.0",
"idna>=3.7",
"ipykernel>=6.29.5",
"ipython>=8.26.0",
"isoduration>=20.11.0",
"itsdangerous>=2.2.0",
"jedi>=0.19.1",
"jinja2>=3.1.4",
"json5>=0.9.25",
"jsonpointer>=3.0.0",
"jsonschema>=4.23.0",
"jsonschema-specifications>=2023.12.1",
"jupyter-events>=0.10.0",
"jupyter-lsp>=2.2.5",
"jupyter-client>=8.6.2",
"jupyter-core>=5.7.2",
"jupyter-server>=2.14.2",
"jupyter-server-terminals>=0.5.3",
"jupyterlab>=4.2.4",
"jupyterlab-pygments>=0.3.0",
"jupyterlab-server>=2.27.3",
"lckr-jupyterlab-variableinspector",
"markupsafe>=2.1.5",
"matplotlib-inline>=0.1.7",
"minio==7.2.8",
"mistune>=3.0.2",
"nbclient>=0.10.0",
"nbconvert>=7.16.4",
"nbformat>=5.10.4",
"nest-asyncio>=1.6.0",
"notebook-shim>=0.2.4",
"numpy>=2.1.0",
"overrides>=7.7.0",
"packaging>=24.1",
"pandas>=2.2.2",
"pandocfilters>=1.5.1",
"parso>=0.8.4",
"pexpect>=4.9.0",
"platformdirs>=4.2.2",
"plumbum>=1.9.0",
"prometheus-client>=0.20.0",
"prompt-toolkit>=3.0.47",
"psutil>=6.0.0",
"ptyprocess>=0.7.0",
"pure-eval>=0.2.3",
"pycparser>=2.22",
"pycryptodome>=3.20.0",
"pydantic>=2.10.2",
"pydantic-core>=2.27.1",
"pygments>=2.18.0",
"python-dateutil>=2.9.0.post0",
"python-json-logger>=2.0.7",
"pytz>=2024.1",
"pyyaml>=6.0.2",
"pyzmq>=26.1.1",
"referencing>=0.35.1",
"requests>=2.32.3",
"rfc3339-validator>=0.1.4",
"rfc3986-validator>=0.1.1",
"rpds-py>=0.20.0",
"send2trash>=1.8.3",
"six>=1.16.0",
"sniffio>=1.3.1",
"soupsieve>=2.6",
"stack-data>=0.6.3",
"starlette>=0.41.3",
"terminado>=0.18.1",
"tinycss2>=1.3.0",
"tornado>=6.4.1",
"traitlets>=5.14.3",
"types-python-dateutil>=2.9.0.20240821",
"typing-extensions>=4.12.2",
"tzdata>=2024.1",
"uri-template>=1.3.0",
"urllib3>=2.2.2",
"uvicorn>=0.32.1",
"wcwidth>=0.2.13",
"webcolors>=24.8.0",
"webencodings>=0.5.1",
"websocket-client>=1.8.0",
"werkzeug>=3.1.3",
]
[build-system]
requires = ["setuptools>=75.5.0", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools]
packages = ["qldbtools"]
script-files = [
"bin/mc-db-generate-selection",
"bin/mc-db-initial-info",
"bin/mc-db-populate-minio",
"bin/mc-db-refine-info",
"bin/mc-db-unique",
"bin/mc-db-view-info",
"bin/mc-hepc-init",
"bin/mc-hepc-serve",
"bin/mc-rows-from-mrva-list",
]

2278
client/qldbtools/uv.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,73 +0,0 @@
package main
import (
"context"
"flag"
"log"
"log/slog"
"os"
"os/signal"
"sync"
"syscall"
"mrvacommander/pkg/agent"
"mrvacommander/pkg/deploy"
)
func main() {
slog.Info("Starting agent")
workerCount := flag.Int("workers", 0, "number of workers")
logLevel := flag.String("loglevel", "info", "Set log level: debug, info, warn, error")
flag.Parse()
// Apply 'loglevel' flag
switch *logLevel {
case "debug":
slog.SetLogLoggerLevel(slog.LevelDebug)
case "info":
slog.SetLogLoggerLevel(slog.LevelInfo)
case "warn":
slog.SetLogLoggerLevel(slog.LevelWarn)
case "error":
slog.SetLogLoggerLevel(slog.LevelError)
default:
log.Printf("Invalid logging verbosity level: %s", *logLevel)
os.Exit(1)
}
isAgent := true
rabbitMQQueue, err := deploy.InitRabbitMQ(isAgent)
if err != nil {
slog.Error("Failed to initialize RabbitMQ", slog.Any("error", err))
os.Exit(1)
}
defer rabbitMQQueue.Close()
artifacts, err := deploy.InitMinIOArtifactStore()
if err != nil {
slog.Error("Failed to initialize artifact store", slog.Any("error", err))
os.Exit(1)
}
databases, err := deploy.InitMinIOCodeQLDatabaseStore()
if err != nil {
slog.Error("Failed to initialize database store", slog.Any("error", err))
os.Exit(1)
}
var wg sync.WaitGroup
ctx, cancel := context.WithCancel(context.Background())
go agent.StartAndMonitorWorkers(ctx, artifacts, databases, rabbitMQQueue, *workerCount, &wg)
slog.Info("Agent started")
// Gracefully exit on SIGINT/SIGTERM
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
<-sigChan
slog.Info("Shutting down agent")
cancel()
wg.Wait()
slog.Info("Agent shutdown complete")
}

View File

@@ -1,158 +0,0 @@
// Copyright © 2024 github
// Licensed under the Apache License, Version 2.0 (the "License").
package main
import (
"context"
"flag"
"log"
"log/slog"
"os"
"os/signal"
"path/filepath"
"sync"
"syscall"
"mrvacommander/config/mcc"
"mrvacommander/pkg/agent"
"mrvacommander/pkg/artifactstore"
"mrvacommander/pkg/deploy"
"mrvacommander/pkg/qldbstore"
"mrvacommander/pkg/queue"
"mrvacommander/pkg/server"
"mrvacommander/pkg/state"
)
func main() {
// Define flags
helpFlag := flag.Bool("help", false, "Display help message")
logLevel := flag.String("loglevel", "info", "Set log level: debug, info, warn, error")
mode := flag.String("mode", "standalone", "Set mode: standalone, container, cluster")
dbPathRoot := flag.String("dbpath", "", "Set the root path for the database store if using standalone mode.")
// Custom usage function for the help flag
flag.Usage = func() {
log.Printf("Usage of %s:\n", os.Args[0])
flag.PrintDefaults()
log.Println("\nExamples:")
log.Println("go run main.go --loglevel=debug --mode=container --dbpath=/path/to/db_dir")
}
// Parse the flags
flag.Parse()
// Handle the help flag
if *helpFlag {
flag.Usage()
return
}
// Apply 'loglevel' flag
switch *logLevel {
case "debug":
slog.SetLogLoggerLevel(slog.LevelDebug)
case "info":
slog.SetLogLoggerLevel(slog.LevelInfo)
case "warn":
slog.SetLogLoggerLevel(slog.LevelWarn)
case "error":
slog.SetLogLoggerLevel(slog.LevelError)
default:
log.Printf("Invalid logging verbosity level: %s", *logLevel)
os.Exit(1)
}
// Process database root if standalone and not provided
if *mode == "standalone" && *dbPathRoot == "" {
slog.Warn("No database root path provided.")
// Current directory of the Executable has a codeql directory. There.
// Resolve the absolute directory based on os.Executable()
execPath, err := os.Executable()
if err != nil {
slog.Error("Failed to get executable path", slog.Any("error", err))
os.Exit(1)
}
*dbPathRoot = filepath.Dir(execPath) + "/codeql/dbs/"
slog.Info("Using default database root path", "dbPathRoot", *dbPathRoot)
}
// Read configuration
config := mcc.LoadConfig("mcconfig.toml")
// Output configuration summary
log.Printf("Help: %t\n", *helpFlag)
log.Printf("Log Level: %s\n", *logLevel)
log.Printf("Mode: %s\n", *mode)
// Handle signals
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
// Apply 'mode' flag
switch *mode {
case "standalone":
// Assemble single-process version
sq := queue.NewQueueSingle(2)
ss := state.NewLocalState(config.Storage.StartingID)
as := artifactstore.NewInMemoryArtifactStore()
ql := qldbstore.NewLocalFilesystemCodeQLDatabaseStore(*dbPathRoot)
server.NewCommanderSingle(&server.Visibles{
Queue: sq,
State: ss,
Artifacts: as,
CodeQLDBStore: ql,
})
var wg sync.WaitGroup
ctx, cancel := context.WithCancel(context.Background())
go agent.StartAndMonitorWorkers(ctx, as, ql, sq, 2, &wg)
slog.Info("Started server and standalone agent")
<-sigChan
slog.Info("Shutting down...")
cancel()
wg.Wait()
slog.Info("Agent shutdown complete")
case "container":
isAgent := false
rabbitMQQueue, err := deploy.InitRabbitMQ(isAgent)
if err != nil {
slog.Error("Failed to initialize RabbitMQ", slog.Any("error", err))
os.Exit(1)
}
defer rabbitMQQueue.Close()
artifacts, err := deploy.InitMinIOArtifactStore()
if err != nil {
slog.Error("Failed to initialize artifact store", slog.Any("error", err))
os.Exit(1)
}
databases, err := deploy.InitMinIOCodeQLDatabaseStore()
if err != nil {
slog.Error("Failed to initialize database store", slog.Any("error", err))
os.Exit(1)
}
server.NewCommanderSingle(&server.Visibles{
Queue: rabbitMQQueue,
State: state.NewLocalState(config.Storage.StartingID),
Artifacts: artifacts,
CodeQLDBStore: databases,
})
slog.Info("Started server in container mode.")
<-sigChan
default:
slog.Error("Invalid value for --mode. Allowed values are: standalone, container, cluster")
os.Exit(1)
}
slog.Info("Server shutdown complete")
}

View File

@@ -38,12 +38,19 @@
# Persist volume using container
cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata
# Note: use mrvacommander_dbsdata, not mrvacommander-dbsdata
# Get the data as tar file from the image
# Use mrvacommander_dbsdata to access the compose cluster
# EITHER
# Get the data as tar file from the image using container
rm -f dbsdata_backup.tar
docker run --rm \
-v mrvacommander_dbsdata:/data \
-v $(pwd):/backup \
busybox sh -c "tar cvf /backup/dbsdata_backup.tar ."
busybox sh -c "tar cf /backup/dbsdata_backup.tar /data"
# OR
# Use gnu tar on host. The macos tar adds extended attributes
# brew install gnu-tar
rm -f dbsdata_backup.tar && gtar cf dbsdata_backup.tar data/
# Build container with the tarball
cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata
docker build -t dbsdata-container:0.1.24 .

BIN
demo/containers/dbsdata/dbsdata_backup.tar (Stored with Git LFS) Normal file

Binary file not shown.

101
doc/mrva-business.org Normal file
View File

@@ -0,0 +1,101 @@
* MRVA for CodeQL: A Business View
** Introduction
The companion documents in this directory are mostly technical. The purpose of
this document is to explain, from a business perspective, what MRVA is and why
it matters.
To illustrate its impact, consider two real-world cases:
*** Case 1: Preventing Costly Security Failures
One of our customers faced a significant lawsuit due to inadequate security.
The root cause? Unaddressed technical risks in their code. The work we do
directly prevents similar vulnerabilities from reaching this stage.
While lawsuits of this scale are rare, security failures are not. More common
consequences include:
- Compliance violations (e.g., GDPR, SOC2 penalties)
- Security breaches leading to reputation damage
- Productivity loss from disruptive technical failures
Lawsuits may be exceptional, but code security failures occur daily. Our role
isnt just about preventing catastrophic losses—its about avoiding the small,
accumulating failures that erode security, compliance, and trust over time.
*** Case 2: Identifying Hidden Risks at Scale
Another customer manages a massive software portfolio of 120,000+ distinct
codebases—a scale at which traditional security tools and manual review
processes become impractical.
- A few known vulnerabilities had already been identified and patched.
- Our analysis uncovered 30 additional high-risk instances, previously undetected.
These findings were critical because:
- Traditional security tools break down at scale. Most solutions work well for
isolated codebases but lack the capability to analyze patterns across
120,000 repositories.
- Complexity hides risk. Identifying these vulnerabilities required specialized
techniques beyond simple scanning—capable of handling variations,
context, and subtle exploit paths.
- Existing security processes failed to detect these vulnerabilities. Without
proactive intervention, these risks would have remained undetected until
a potential breach occurred.
This case highlights a critical gap in standard security practices. By leveraging
advanced, scalable analysis, we identified and mitigated risks that would have
otherwise gone unnoticed—demonstrating the value of proactive security
at scale.
** Why This Matters
These examples, along with others, reinforce the importance of proactive
security—especially in the context of MRVA. Security risks dont just exist
in theory; they have tangible business consequences.
MRVA provides a scalable, systematic approach to identifying and addressing
risks before they escalate—ensuring that security is a strategic advantage, not
just a cost.
** What is MRVA?
MRVA stands for /Multi-Repository Variant Analysis/. The concept is straightforward:
1. A /problem/ is identified in one codebase.
2. Variations of this problem (/variants/) can be defined.
3. The organization manages many code repositories (/multi-repository/).
4. A systematic /analysis/ is required to detect these variants across all repositories.
In practice:
- Steps 1 & 2: Defined through CodeQL queries, often custom-written for this purpose.
- Steps 3 & 4: Can be done manually but come with significant challenges.
*** Challenges of Manual Execution
Manually searching for these variants across multiple repositories is possible
but inefficient and error-prone due to:
- /High bookkeeping overhead/ Tracking thousands of repositories is
cumbersome.
- /Heavy scripting requirements/ Expert /Unix scripting skills/ are
necessary.
- /Scaling limitations/ Analyzing /thousands of repositories sequentially/
is slow, and manual parallelization is impractical.
- /Cumbersome review process/ Results are stored as /raw text files/,
requiring multiple processing steps for meaningful analysis.
*** MRVA: A Streamlined, Integrated Solution
Instead of relying on manual effort, MRVA is designed to /automate and
integrate/ the process.
- The system is designed to be /machine-driven/ and integrated into an
automated pipeline.
- Once incorporated, MRVA leverages the /CodeQL VS Code plugin/ to provide a
/seamless user experience/.
- How it works:
- Users submit queries through the UI.
- Results are retrieved and displayed dynamically as they become available.
- The entire workflow is automated, scalable, and significantly more
efficient than manual methods.
By eliminating manual inefficiencies, MRVA enables organizations to identify
and resolve security issues across massive codebases at scale, ensuring both
accuracy and speed in vulnerability detection.

331
doc/mrva-interconnect.ltx Normal file
View File

@@ -0,0 +1,331 @@
\documentclass[11pt]{article}
% Load the geometry package to set margins
\usepackage[lmargin=2cm,rmargin=2cm,tmargin=1.8cm,bmargin=1.8cm]{geometry}
% increase nesting depth
\usepackage{enumitem}
\setlistdepth{9}
%
\renewlist{itemize}{itemize}{9}
\setlist[itemize,1]{label=\textbullet}
\setlist[itemize,2]{label=--}
\setlist[itemize,3]{label=*}
\setlist[itemize,4]{label=•}
\setlist[itemize,5]{label=}
\setlist[itemize,6]{label=>}
\setlist[itemize,7]{label=»}
\setlist[itemize,8]{label=}
\setlist[itemize,9]{label=·}
%
\renewlist{enumerate}{enumerate}{9}
\setlist[enumerate,1]{label=\arabic*.,ref=\arabic*}
\setlist[enumerate,2]{label=\alph*.),ref=\theenumi\alph*}
\setlist[enumerate,3]{label=\roman*.),ref=\theenumii\roman*}
\setlist[enumerate,4]{label=\Alph*.),ref=\theenumiii\Alph*}
\setlist[enumerate,5]{label=\Roman*.),ref=\theenumiv\Roman*}
\setlist[enumerate,6]{label=\arabic*),ref=\theenumv\arabic*}
\setlist[enumerate,7]{label=\alph*),ref=\theenumvi\alph*}
\setlist[enumerate,8]{label=\roman*),ref=\theenumvii\roman*}
\setlist[enumerate,9]{label=\Alph*),ref=\theenumviii\Alph*}
% Load CM Bright for math
\usepackage{amsmath} % Standard math package
\usepackage{amssymb} % Additional math symbols
\usepackage{cmbright} % Sans-serif math font that complements Fira Sans
\usepackage{fourier}
% Font configuration
% \usepackage{bera}
% or
% Load Fira Sans for text
\usepackage{fontspec}
\setmainfont{Fira Sans} % System-installed Fira Sans
\renewcommand{\familydefault}{\sfdefault} % Set sans-serif as default
% pseudo-code with math
\usepackage{listings}
\usepackage{float}
\usepackage{xcolor}
\usepackage{colortbl}
% Set TT font
% \usepackage{inconsolata}
% or
\setmonofont{IBMPlexMono-Light}
% Define custom settings for listings
\lstset{
language=Python,
basicstyle=\ttfamily\small, % Monospaced font
commentstyle=\itshape\color{gray}, % Italic and gray for comments
keywordstyle=\color{blue}, % Keywords in blue
stringstyle=\color{red}, % Strings in red
mathescape=true, % Enable math in comments
breaklines=true, % Break long lines
numbers=left, % Add line numbers
numberstyle=\tiny\color{gray}, % Style for line numbers
frame=single, % Add a frame around the code
}
\usepackage{newfloat} % Allows creating custom float types
% Define 'listing' as a floating environment
\DeclareFloatingEnvironment[
fileext=lol,
listname=List of Listings,
name=Listing
]{listing}
% To prevent floats from moving past a section boundary but still allow some floating:
\usepackage{placeins}
% used with \FloatBarrier
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{graphicx}
\usepackage{longtable}
\usepackage{wrapfig}
\usepackage{rotating}
\usepackage[normalem]{ulem}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{capt-of}
\usepackage{hyperref}
\usepackage{algorithm}
\usepackage{algpseudocode}
% Title, Author, and Date (or Report Number)
\title{MRVA component interconnections}
\author{Michael Hohn}
\date{Technical Report 20250524}
\hypersetup{
pdfauthor={Michael Hohn},
pdftitle={MRVA component interconnections},
pdfkeywords={},
pdfsubject={},
pdfcreator={Emacs 29.1},
pdflang={English}}
\begin{document}
\maketitle
\tableofcontents
\section{Overview}
\label{sec:overview}
The MRVA system is organized as a collection of services. On the server side, the
system is containerized using Docker and comprises several key components:
\begin{itemize}
\item {\textbf{Server}}: Acts as the central coordinator.
\item \textbf{Agents}: One or more agents that execute tasks.
\item \textbf{RabbitMQ}: Handles messaging between components.
\item \textbf{MinIO}: Provides storage for both queries and results.
\item \textbf{HEPC}: An HTTP endpoint that hosts and serves CodeQL databases.
\end{itemize}
The execution process follows a structured workflow:
\begin{enumerate}
\item A client submits a set of queries $\mathcal{Q}$ targeting a repository
set $\mathcal{R}$.
\item The server enqueues jobs and distributes them to available agents.
\item Each agent retrieves a job, executes queries against its assigned repository, and accumulates results.
\item The agent sends results back to the server, which then forwards them to the client.
\end{enumerate}
This full round-trip can be expressed as:
\begin{equation}
\text{Client} \xrightarrow{\mathcal{Q}} \text{Server}
\xrightarrow{\text{enqueue}}
\text{Queue} \xrightarrow{\text{dispatch}} \text{Agent}
\xrightarrow{\mathcal{Q}(\mathcal{R}_i)}
\text{Server} \xrightarrow{\mathcal{Q}(\mathcal{R}_i} \text{Client}
\end{equation}
\section{Symbols and Notation}
\label{sec:orgb695d5a}
We define the following symbols for entities in the system:
\begin{center}
\begin{tabular}{lll}
Concept & Symbol & Description \\[0pt]
\hline
Client & \(C\) & The source of the query submission \\[0pt]
Server & \(S\) & Manages job queue and communicates results back to the client \\[0pt]
Job Queue & \(Q\) & Queue for managing submitted jobs \\[0pt]
Agent & \(\alpha\) & Independently polls, executes jobs, and accumulates results \\[0pt]
Agent Set & \(A\) & The set of all available agents \\[0pt]
Query Suite & \(\mathcal{Q}\) & Collection of queries submitted by the client \\[0pt]
Repository List & \(\mathcal{R}\) & Collection of repositories \\[0pt]
\(i\)-th Repository & \(\mathcal{R}_i\) & Specific repository indexed by \(i\) \\[0pt]
\(j\)-th Query & \(\mathcal{Q}_j\) & Specific query from the suite indexed by \(j\) \\[0pt]
Query Result & \(r_{i,j,k_{i,j}}\) & \(k_{i,j}\)-th result from query \(j\) executed on repository \(i\) \\[0pt]
Query Result Set & \(\mathcal{R}_i^{\mathcal{Q}_j}\) & Set of all results for query \(j\) on repository \(i\) \\[0pt]
Accumulated Results & \(\mathcal{R}_i^{\mathcal{Q}}\) & All results from executing all queries on \(\mathcal{R}_i\) \\[0pt]
\end{tabular}
\end{center}
\section{Full Round-Trip Representation}
\label{sec:full-round-trip}
The full round-trip execution, from query submission to result delivery, can be summarized as:
\[
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q
\xrightarrow{\text{poll}}
\alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{\mathcal{R}_i^{\mathcal{Q}}} C
\]
\begin{itemize}
\item \(C \to S\): Client submits a query suite \(\mathcal{Q}\) to the server.
\item \(S \to Q\): Server enqueues the query suite \((\mathcal{Q}, \mathcal{R}_i)\) for each repository.
\item \(Q \to \alpha\): Agent \(\alpha\) polls the queue and retrieves a job.
\item \(\alpha \to S\): Agent executes the queries and returns the accumulated results \(\mathcal{R}_i^{\mathcal{Q}}\) to the server.
\item \(S \to C\): Server sends the complete result set \(\mathcal{R}_i^{\mathcal{Q}}\) for each repository back to the client.
\end{itemize}
\section{Result Representation}
For the complete collection of results across all repositories and queries:
\[
\mathcal{R}^{\mathcal{Q}} = \bigcup_{i=1}^{N} \bigcup_{j=1}^{M}
\left\{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \right\}
\]
where:
\begin{itemize}
\item \(N\) is the total number of repositories.
\item \(M\) is the total number of queries in \(\mathcal{Q}\).
\item \(k_{i,j}\) is the number of results from executing query
\(\mathcal{Q}_j\)
on repository \(\mathcal{R}_i\).
\end{itemize}
An individual result from the \(i\)-th repository, \(j\)-th query, and \(k\)-th result is:
\[
r_{i,j,k}
\]
\[
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q \xrightarrow{\text{dispatch}} \alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{r_{i,j}} C
\]
Each result can be further indexed to track multiple repositories and result sets.
\section{Graph Extraction from Log Table}
Assume we have a structured event log represented as a set of tuples.
\subsection*{Event Log Structure}
Let
\[
\mathcal{T} = \{ t_1, t_2, \dots, t_n \}
\]
be the set of all events, where each event
\[
t_i = (\mathit{id}_i, \tau_i, a_i, e_i, q_i, r_i, c_i)
\]
consists of:
\begin{itemize}
\item \(\mathit{id}_i\): unique event ID
\item \(\tau_i\): timestamp
\item \(a_i\): actor (e.g., ``agent\_alpha1'')
\item \(e_i\): event type (e.g., ``enqueue'', ``execute'')
\item \(q_i\): query ID
\item \(r_i\): repository ID
\item \(c_i\): result count (may be \(\bot\) if not applicable)
\end{itemize}
Let
\[
\mathcal{G} = (V, E)
\]
be a directed graph constructed from \(\mathcal{T}\), with vertices \(V\) and edges \(E\).
\subsection*{Graph Definition}
\begin{align*}
V &= \{ \mathit{id}_i \mid t_i \in \mathcal{T} \} \\
E &\subseteq V \times V
\end{align*}
Edges capture temporal or semantic relationships between events.
\subsection*{Construction Steps}
\paragraph{1. Partition by Job Identity}
Define the set of job identifiers:
\[
J = \{ (q, r) \mid \exists i: q_i = q \land r_i = r \}
\]
Then for each \((q, r) \in J\), define:
\[
\mathcal{T}_{q,r} = \{ t_i \in \mathcal{T} \mid q_i = q \land r_i = r \}
\]
\paragraph{2. Sort by Time}
Order each \(\mathcal{T}_{q,r}\) as a list:
\[
\mathcal{T}_{q,r} = [ t_{i_1}, t_{i_2}, \dots, t_{i_k} ]
\quad \text{such that } \tau_{i_j} < \tau_{i_{j+1}}
\]
\paragraph{3. Causal Edges}
Define within-job edges:
\[
E_{q,r} = \{ (\mathit{id}_{i_j}, \mathit{id}_{i_{j+1}}) \mid 1 \leq j < k \}
\]
\paragraph{4. Global Causal Graph}
Take the union:
\[
E_{\text{causal}} = \bigcup_{(q, r) \in J} E_{q,r}
\]
\paragraph{5. Semantic Edges (Optional)}
Define semantic predicates such as:
\[
\mathsf{pulls}(i, j) \iff e_i = \text{enqueue} \land e_j = \text{pull} \land
q_i = q_j \land r_i = r_j \land \tau_i < \tau_j \land a_i = \text{server} \land a_j = \text{agent}
\]
Then:
\[
E_{\text{semantic}} = \{ (\mathit{id}_i, \mathit{id}_j) \mid \mathsf{pulls}(i, j) \}
\]
\subsection*{Final Graph}
\begin{align*}
V &= \{ \mathit{id}_i \mid t_i \in \mathcal{T} \} \\
E &= E_{\text{causal}} \cup E_{\text{semantic}}
\end{align*}
\subsection*{Notes}
\begin{itemize}
\item This construction is generic: the log store \(\mathcal{T}\) may come from a database, file, or tuple-indexed dictionary.
\item Each semantic edge rule corresponds to a logical filter/join over \(\mathcal{T}\).
\item The construction is schema-free on the graph side and can be recomputed on demand with different edge logic.
\end{itemize}
\end{document}
%%% Local Variables:
%%% mode: LaTeX
%%% TeX-master: nil
%%% TeX-engine: luatex
%%% TeX-command-extra-options: "-synctex=1 -shell-escape -interaction=nonstopmode"
%%% End:

BIN
doc/mrva-interconnect.pdf Normal file

Binary file not shown.

BIN
doc/mrva-overview.pdf Normal file

Binary file not shown.

605
doc/mrva-overview.tex Normal file
View File

@@ -0,0 +1,605 @@
\documentclass[11pt]{article}
% Load the geometry package to set margins
\usepackage[lmargin=2cm,rmargin=2cm,tmargin=1.8cm,bmargin=1.8cm]{geometry}
% increase nesting depth
\usepackage{enumitem}
\setlistdepth{9}
%
\renewlist{itemize}{itemize}{9}
\setlist[itemize,1]{label=\textbullet}
\setlist[itemize,2]{label=--}
\setlist[itemize,3]{label=*}
\setlist[itemize,4]{label=•}
\setlist[itemize,5]{label=}
\setlist[itemize,6]{label=>}
\setlist[itemize,7]{label=»}
\setlist[itemize,8]{label=}
\setlist[itemize,9]{label=·}
%
\renewlist{enumerate}{enumerate}{9}
\setlist[enumerate,1]{label=\arabic*.,ref=\arabic*}
\setlist[enumerate,2]{label=\alph*.),ref=\theenumi\alph*}
\setlist[enumerate,3]{label=\roman*.),ref=\theenumii\roman*}
\setlist[enumerate,4]{label=\Alph*.),ref=\theenumiii\Alph*}
\setlist[enumerate,5]{label=\Roman*.),ref=\theenumiv\Roman*}
\setlist[enumerate,6]{label=\arabic*),ref=\theenumv\arabic*}
\setlist[enumerate,7]{label=\alph*),ref=\theenumvi\alph*}
\setlist[enumerate,8]{label=\roman*),ref=\theenumvii\roman*}
\setlist[enumerate,9]{label=\Alph*),ref=\theenumviii\Alph*}
% Load CM Bright for math
\usepackage{amsmath} % Standard math package
\usepackage{amssymb} % Additional math symbols
\usepackage{cmbright} % Sans-serif math font that complements Fira Sans
\usepackage{fourier}
% Font configuration
% \usepackage{bera}
% or
% Load Fira Sans for text
\usepackage{fontspec}
\setmainfont{Fira Sans} % System-installed Fira Sans
\renewcommand{\familydefault}{\sfdefault} % Set sans-serif as default
% pseudo-code with math
\usepackage{listings}
\usepackage{float}
\usepackage{xcolor}
\usepackage{colortbl}
% Set TT font
% \usepackage{inconsolata}
% or
\setmonofont{IBMPlexMono-Light}
% Define custom settings for listings
\lstset{
language=Python,
basicstyle=\ttfamily\small, % Monospaced font
commentstyle=\itshape\color{gray}, % Italic and gray for comments
keywordstyle=\color{blue}, % Keywords in blue
stringstyle=\color{red}, % Strings in red
mathescape=true, % Enable math in comments
breaklines=true, % Break long lines
numbers=left, % Add line numbers
numberstyle=\tiny\color{gray}, % Style for line numbers
frame=single, % Add a frame around the code
}
\usepackage{newfloat} % Allows creating custom float types
% Define 'listing' as a floating environment
\DeclareFloatingEnvironment[
fileext=lol,
listname=List of Listings,
name=Listing
]{listing}
% To prevent floats from moving past a section boundary but still allow some floating:
\usepackage{placeins}
% used with \FloatBarrier
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{graphicx}
\usepackage{longtable}
\usepackage{wrapfig}
\usepackage{rotating}
\usepackage[normalem]{ulem}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{capt-of}
\usepackage{hyperref}
\usepackage{algorithm}
\usepackage{algpseudocode}
% Title, Author, and Date (or Report Number)
\title{MRVA for CodeQL}
\author{Michael Hohn}
\date{Technical Report 20250224}
\hypersetup{
pdfauthor={Michael Hohn},
pdftitle={MRVA for CodeQL},
pdfkeywords={},
pdfsubject={},
pdfcreator={Emacs 29.1},
pdflang={English}}
\begin{document}
\maketitle
\tableofcontents
\section{MRVA System Architecture Summary}
The MRVA system is organized as a collection of services. On the server side, the
system is containerized using Docker and comprises several key components:
\begin{itemize}
\item {\textbf{Server}}: Acts as the central coordinator.
\item \textbf{Agents}: One or more agents that execute tasks.
\item \textbf{RabbitMQ}: Handles messaging between components.
\item \textbf{MinIO}: Provides storage for both queries and results.
\item \textbf{HEPC}: An HTTP endpoint that hosts and serves CodeQL databases.
\end{itemize}
On the client side, users can interact with the system in two ways:
\begin{itemize}
\item {\textbf{VSCode-CodeQL}}: A graphical interface integrated with Visual Studio Code.
\item \textbf{gh-mrva CLI}: A command-line interface that connects to the server in a similar way.
\end{itemize}
This architecture enables a robust and flexible workflow for code analysis, combining a containerized back-end with both graphical and CLI front-end tools.
The full system details can be seen in the source code. This document provides an
overview.
\section{Distributed Query Execution in MRVA}
\subsection{Execution Overview}
The \textit{MRVA system} is a distributed platform for executing \textit{CodeQL
queries} across multiple repositories using a set of worker agents. The system is
{containerized} and built around a set of core services:
\begin{itemize}
\item \textbf{Server}: Coordinates job distribution and result aggregation.
\item \textbf{Agents}: Execute queries independently and return results.
\item \textbf{RabbitMQ}: Handles messaging between system components.
\item \textbf{MinIO}: Stores query inputs and execution results.
\item \textbf{HEPC}: Serves CodeQL databases over HTTP.
\end{itemize}
Clients interact with MRVA via \texttt{VSCode-CodeQL} (a graphical interface) or
\texttt{gh-mrva CLI} (a command-line tool), both of which submit queries to the
server.
The execution process follows a structured workflow:
\begin{enumerate}
\item A client submits a set of queries $\mathcal{Q}$ targeting a repository
set $\mathcal{R}$.
\item The server enqueues jobs and distributes them to available agents.
\item Each agent retrieves a job, executes queries against its assigned repository, and accumulates results.
\item The agent sends results back to the server, which then forwards them to the client.
\end{enumerate}
This full round-trip can be expressed as:
\begin{equation}
\text{Client} \xrightarrow{\mathcal{Q}} \text{Server}
\xrightarrow{\text{enqueue}}
\text{Queue} \xrightarrow{\text{dispatch}} \text{Agent}
\xrightarrow{\mathcal{Q}(\mathcal{R}_i)}
\text{Server} \xrightarrow{\mathcal{Q}(\mathcal{R}_i} \text{Client}
\end{equation}
where the Client submits queries to the Server, which enqueues jobs in the
Queue. Agents execute the queries, returning results $\mathcal{Q}(\mathcal{R}_i)$
to the Server and ultimately back to the Client.
A more rigorous description of this is in section \ref{sec:full-round-trip}.
\subsection{System Structure Overview}
This design allows for scalable and efficient query execution across multiple
repositories, whether on a single machine or a distributed cluster. The key idea
is that both setups follow the same structural approach:
\begin{itemize}
\item \textbf{Single machine setup:}
\begin{itemize}
\item Uses \textit{at least 5 Docker containers} to manage different
components of the system.
\item The number of \textit{agent containers} (responsible for executing
queries) is constrained by the available \textit{RAM and CPU cores}.
\end{itemize}
\item \textbf{Cluster setup:}
\begin{itemize}
\item Uses \textit{at least 5 virtual machines (VMs) and / or Docker containers}.
\item The number of \textit{agent VMs} is limited by \textit{network bandwidth
and available resources} (e.g., distributed storage and inter-node communication
overhead).
\end{itemize}
\end{itemize}
Thus:
\begin{itemize}
\item The {functional architecture is identical} between the single-machine and cluster setups.
\item The {primary difference} is in \textit{scale}:
\begin{itemize}
\item A single machine is limited by \textit{local CPU and RAM}.
\item A cluster is constrained by \textit{network and inter-node coordination overhead} but allows for higher overall compute capacity.
\end{itemize}
\end{itemize}
\subsection{Messages and their Types}
\label{sec:msg-types}
The following table enumerates the types (messages) passed from Client to Server.
\begin{longtable}{|p{5cm}|p{5cm}|p{5cm}|}
\hline
\rowcolor{gray!20} \textbf{Type Name} & \textbf{Field} & \textbf{Type} \\
\hline
\endfirsthead
\hline
\rowcolor{gray!20} \textbf{Type Name} & \textbf{Field} & \textbf{Type} \\
\hline
\endhead
\hline
\endfoot
\hline
\endlastfoot
ServerState & NextID & () $\rightarrow$ int \\
& GetResult & JobSpec $\rightarrow$ IO (Either Error AnalyzeResult) \\
& GetJobSpecByRepoId & (int, int) $\rightarrow$ IO (Either Error JobSpec) \\
& SetResult & (JobSpec, AnalyzeResult) $\rightarrow$ IO () \\
& GetJobList & int $\rightarrow$ IO (Either Error \textbf{[AnalyzeJob]}) \\
& GetJobInfo & JobSpec $\rightarrow$ IO (Either Error JobInfo) \\
& SetJobInfo & (JobSpec, JobInfo) $\rightarrow$ IO () \\
& GetStatus & JobSpec $\rightarrow$ IO (Either Error Status) \\
& SetStatus & (JobSpec, Status) $\rightarrow$ IO () \\
& AddJob & AnalyzeJob $\rightarrow$ IO () \\
\hline
JobSpec & sessionID & int \\
& nameWithOwner & string \\
\hline
AnalyzeResult & spec & JobSpec \\
& status & Status \\
& resultCount & int \\
& resultLocation & ArtifactLocation \\
& sourceLocationPrefix & string \\
& databaseSHA & string \\
\hline
ArtifactLocation & Key & string \\
& Bucket & string \\
\hline
AnalyzeJob & Spec & JobSpec \\
& QueryPackLocation & ArtifactLocation \\
& QueryLanguage & QueryLanguage \\
\hline
QueryLanguage & & string \\
\hline
JobInfo & QueryLanguage & string \\
& CreatedAt & string \\
& UpdatedAt & string \\
& SkippedRepositories & SkippedRepositories \\
\hline
SkippedRepositories & AccessMismatchRepos & AccessMismatchRepos \\
& NotFoundRepos & NotFoundRepos \\
& NoCodeqlDBRepos & NoCodeqlDBRepos \\
& OverLimitRepos & OverLimitRepos \\
\hline
AccessMismatchRepos & RepositoryCount & int \\
& Repositories & \textbf{[Repository]} \\
\hline
NotFoundRepos & RepositoryCount & int \\
& RepositoryFullNames & \textbf{[string]} \\
\hline
Repository & ID & int \\
& Name & string \\
& FullName & string \\
& Private & bool \\
& StargazersCount & int \\
& UpdatedAt & string \\
\end{longtable}
\section{Symbols and Notation}
\label{sec:orgb695d5a}
We define the following symbols for entities in the system:
\begin{center}
\begin{tabular}{lll}
Concept & Symbol & Description \\[0pt]
\hline
\href{vscode://file//Users/hohn/work-gh/mrva/gh-mrva/README.org:39:1}{Client} & \(C\) & The source of the query submission \\[0pt]
Server & \(S\) & Manages job queue and communicates results back to the client \\[0pt]
Job Queue & \(Q\) & Queue for managing submitted jobs \\[0pt]
Agent & \(\alpha\) & Independently polls, executes jobs, and accumulates results \\[0pt]
Agent Set & \(A\) & The set of all available agents \\[0pt]
Query Suite & \(\mathcal{Q}\) & Collection of queries submitted by the client \\[0pt]
Repository List & \(\mathcal{R}\) & Collection of repositories \\[0pt]
\(i\)-th Repository & \(\mathcal{R}_i\) & Specific repository indexed by \(i\) \\[0pt]
\(j\)-th Query & \(\mathcal{Q}_j\) & Specific query from the suite indexed by \(j\) \\[0pt]
Query Result & \(r_{i,j,k_{i,j}}\) & \(k_{i,j}\)-th result from query \(j\) executed on repository \(i\) \\[0pt]
Query Result Set & \(\mathcal{R}_i^{\mathcal{Q}_j}\) & Set of all results for query \(j\) on repository \(i\) \\[0pt]
Accumulated Results & \(\mathcal{R}_i^{\mathcal{Q}}\) & All results from executing all queries on \(\mathcal{R}_i\) \\[0pt]
\end{tabular}
\end{center}
\section{Full Round-Trip Representation}
\label{sec:full-round-trip}
The full round-trip execution, from query submission to result delivery, can be summarized as:
\[
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q
\xrightarrow{\text{poll}}
\alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{\mathcal{R}_i^{\mathcal{Q}}} C
\]
\begin{itemize}
\item \(C \to S\): Client submits a query suite \(\mathcal{Q}\) to the server.
\item \(S \to Q\): Server enqueues the query suite \((\mathcal{Q}, \mathcal{R}_i)\) for each repository.
\item \(Q \to \alpha\): Agent \(\alpha\) polls the queue and retrieves a job.
\item \(\alpha \to S\): Agent executes the queries and returns the accumulated results \(\mathcal{R}_i^{\mathcal{Q}}\) to the server.
\item \(S \to C\): Server sends the complete result set \(\mathcal{R}_i^{\mathcal{Q}}\) for each repository back to the client.
\end{itemize}
\section{Result Representation}
For the complete collection of results across all repositories and queries:
\[
\mathcal{R}^{\mathcal{Q}} = \bigcup_{i=1}^{N} \bigcup_{j=1}^{M}
\left\{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \right\}
\]
where:
\begin{itemize}
\item \(N\) is the total number of repositories.
\item \(M\) is the total number of queries in \(\mathcal{Q}\).
\item \(k_{i,j}\) is the number of results from executing query
\(\mathcal{Q}_j\)
on repository \(\mathcal{R}_i\).
\end{itemize}
An individual result from the \(i\)-th repository, \(j\)-th query, and \(k\)-th result is:
\[
r_{i,j,k}
\]
\[
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q \xrightarrow{\text{dispatch}} \alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{r_{i,j}} C
\]
Each result can be further indexed to track multiple repositories and result sets.
\section{Execution Loop in Pseudo-Code}
\begin{listing}[H] % h = here, t = top, b = bottom, p = page of floats
\caption{Distributed Query Execution Algorithm}
\begin{lstlisting}[language=Python]
# Distributed Query Execution with Agent Polling and Accumulated Results
# Initialization
$\mathcal{R}$ = set() # Repository list
$Q$ = [] # Job queue
$A$ = set() # Set of agents
$\mathcal{R}_i^{\mathcal{Q}}$ = {} # Result storage for each repository
# Initialize result sets for each repository
for $R_i$ in $\mathcal{R}$:
$\mathcal{R}_i^{\mathcal{Q}} = \{\}$ # Initialize empty result set
# Enqueue the entire query suite for all repositories
for $R_i$ in $\mathcal{R}$:
$Q$.append(($\mathcal{Q}$, $R_i$)) # Enqueue $(\mathcal{Q}, \mathcal{R}_i)$ pair
# Processing loop while there are jobs in the queue
while $Q \neq \emptyset$:
# Agents autonomously poll the queue
for $\alpha$ in $A$:
if $\alpha$.is_available():
$(\mathcal{Q}, \mathcal{R}_i)$ = $Q$.pop(0) # Agent polls a job
# Agent execution begins
$\mathcal{R}_i^{\mathcal{Q}} = \{\}$ # Initialize results for repository $R_i$
for $\mathcal{Q}_j$ in $\mathcal{Q}$:
# Execute query $\mathcal{Q}_j$ on repository $\mathcal{R}_i$
$r_{i,j,1}, \dots, r_{i,j,k_{i,j}}$ = $\alpha$.execute($\mathcal{Q}_j$, $R_i$)
# Store results for query $j$
$\mathcal{R}_i^{\mathcal{Q}_j} = \{r_{i,j,1}, \dots, r_{i,j,k_{i,j}}\}$
# Accumulate results
$\mathcal{R}_i^{\mathcal{Q}} = \mathcal{R}_i^{\mathcal{Q}} \cup \mathcal{R}_i^{\mathcal{Q}_j}$
# Send all accumulated results back to the server
$\alpha$.send_results($S$, ($\mathcal{Q}$, $R_i$, $\mathcal{R}_i^{\mathcal{Q}}$))
# Server sends results for $(\mathcal{Q}, \mathcal{R}_i)$ back to the client
$S$.send_results_to_client($C$, ($\mathcal{Q}$, $R_i$, $\mathcal{R}_i^{\mathcal{Q}}$))
\end{lstlisting}
\end{listing}
\FloatBarrier
\section{Execution Loop in Pseudo-Code, declarative}
\begin{listing}[H] % h = here, t = top, b = bottom, p = page of floats
\caption{Distributed Query Execution Algorithm}
\begin{lstlisting}[language=Python]
# Distributed Query Execution with Agent Polling and Accumulated Results
# Define initial state
$\mathcal{R}$: set # Set of repositories
$\mathcal{Q}$: set # Set of queries
A: set # Set of agents
Q: list # Queue of $(\mathcal{Q}, \mathcal{R}_i)$ pairs
$\mathcal{R}_{\text{results}}$: dict = {} # Mapping of repositories to their accumulated query results
# Initialize result sets for each repository
$\mathcal{R}_{\text{results}}$ = {$\mathcal{R}_i$: set() for $\mathcal{R}_i$ in $\mathcal{R}$}
# Define job queue as an immutable mapping
Q = [($\mathcal{Q}$, $\mathcal{R}_i$) for $\mathcal{R}_i$ in $\mathcal{R}$]
# Processing as a declarative iteration over the job queue
def execute_queries(agents, job_queue, repository_results):
def available_agents():
return {$\alpha$ for $\alpha$ in agents if $\alpha$.is_available()}
def process_job($\mathcal{Q}$, $\mathcal{R}_i$, $\alpha$):
results = {$\mathcal{Q}_j$: $\alpha$.execute($\mathcal{Q}_j$, $\mathcal{R}_i$) for $\mathcal{Q}_j$ in $\mathcal{Q}$}
return $\mathcal{R}_i$, results
def accumulate_results($\mathcal{R}_{\text{results}}$, $\mathcal{R}_i$, query_results):
return {**$\mathcal{R}_{\text{results}}$, $\mathcal{R}_i$: $\mathcal{R}_{\text{results}}$[$\mathcal{R}_i$] | set().union(*query_results.values())}
while job_queue:
active_agents = available_agents()
for $\alpha$ in active_agents:
$\mathcal{Q}$, $\mathcal{R}_i$ = job_queue[0] # Peek at the first job
_, query_results = process_job($\mathcal{Q}$, $\mathcal{R}_i$, $\alpha$)
repository_results = accumulate_results(repository_results, $\mathcal{R}_i$, query_results)
$\alpha$.send_results(S, ($\mathcal{Q}$, $\mathcal{R}_i$, repository_results[$\mathcal{R}_i$]))
S.send_results_to_client(C, ($\mathcal{Q}$, $\mathcal{R}_i$, repository_results[$\mathcal{R}_i$]))
job_queue = job_queue[1:] # Move to the next job
return repository_results
# Execute the distributed query process
$\mathcal{R}_{\text{results}}$ = execute_queries(A, Q, $\mathcal{R}_{\text{results}}$)
\end{lstlisting}
\end{listing}
\FloatBarrier
\newpage{}
\section{Execution Loop in Pseudo-Code, algorithmic}
\begin{algorithm}
\caption{Distribute a set of queries $\mathcal{Q}$ across repositories
$\mathcal{R}$ using agents $A$}
\begin{algorithmic}[1] % Line numbering enabled
\Procedure{DistributedQueryExecution}{$\mathcal{Q}, \mathcal{R}, A$}
\ForAll{$\mathcal{R}_i \in \mathcal{R}$}
\Comment{Initialize result sets for each repository and query}
\State $\mathcal{R}_i^{\mathcal{Q}} \gets \left\{ \, \right\}$
\EndFor
\State $Q \gets \left\{ \, \right\}$ \Comment{Initialize empty job queue}
\ForAll{$\mathcal{R}_i \in \mathcal{R}$}
\Comment{Enqueue the entire query suite across all repositories}
\State $S \xrightarrow{\text{enqueue}(\mathcal{Q}, \mathcal{R}_i)} Q$
\EndFor
\While{$Q \neq \emptyset$}
\Comment{Agents poll the queue for available jobs}
\ForAll{$\alpha \in A$ \textbf{where} $\alpha$ \text{is available}}
\State $\alpha \xleftarrow{\text{poll}(Q)}$ \Comment{Agent autonomously retrieves a job}
% --- Begin Agent Execution Block ---
\State \textbf{\raisebox{0.5ex}{\rule{25em}{0.7pt}}} \Comment{Agent Execution Begins}
\State $\mathcal{R}_i^{\mathcal{Q}} \gets \left\{ \, \right\}$ \Comment{Initialize result set for this repository}
\ForAll{$\mathcal{Q}_j \in \mathcal{Q}$}
\State $\mathcal{R}_i^{\mathcal{Q}_j} \gets \left\{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \right\}$
\Comment{Collect results for query $j$ on repository $i$}
\State $\mathcal{R}_i^{\mathcal{Q}} \gets \mathcal{R}_i^{\mathcal{Q}}
\cup \mathcal{R}_i^{\mathcal{Q}_j}$
\Comment{Accumulate results}
\EndFor
\State $\alpha \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} S$
\Comment{Agent sends all accumulated results back to server}
\State \textbf{\raisebox{0.5ex}{\rule{25em}{0.7pt}}} \Comment{Agent
Execution Ends}
% --- End Agent Execution Block ---
\State $S \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} C$
\Comment{Server sends results for repository $i$ back to the client}
\EndFor
\EndWhile
\EndProcedure
\end{algorithmic}
\end{algorithm}
\FloatBarrier
\section{Execution Loop in Pseudo-Code, hybrid}
\label{sec:orgb767ab2}
{\textbf{Algorithm:} Distribute a set of queries \(\mathcal{Q}\) across repositories \(\mathcal{R}\) using agents \(A\)}
\begin{enumerate}
\item \textbf{\textbf{Initialization}}
\begin{itemize}
\item For each repository \(\mathcal{R}_i \in \mathcal{R}\):
\begin{itemize}
\item Initialize result sets: \(\mathcal{R}_i^{\mathcal{Q}} \gets \{\}\).
\end{itemize}
\item Initialize an empty job queue: \(Q \gets \{\}\).
\end{itemize}
\item \textbf{\textbf{Enqueue Queries}}
\begin{itemize}
\item For each repository \(\mathcal{R}_i \in \mathcal{R}\):
\begin{itemize}
\item Enqueue the entire query suite: \(S \xrightarrow{\text{enqueue}(\mathcal{Q}, \mathcal{R}_i)} Q\).
\end{itemize}
\end{itemize}
\item \textbf{\textbf{Execution Loop}}
\begin{itemize}
\item While \(Q \neq \emptyset\): (agents poll the queue for available jobs)
\begin{itemize}
\item For each available agent \(\alpha \in A\):
\begin{itemize}
\item Agent autonomously retrieves a job: \(\alpha \xleftarrow{\text{poll}(Q)}\).
\item \textbf{\textbf{Agent Execution Block}}
\begin{itemize}
\item Initialize result set for this repository: \(\mathcal{R}_i^{\mathcal{Q}} \gets \{\}\).
\item For each query \(\mathcal{Q}_j \in \mathcal{Q}\):
\begin{itemize}
\item Collect results:
\(\mathcal{R}_i^{\mathcal{Q}_j} \gets \{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \}\).
\item Accumulate results:
\(\mathcal{R}_i^{\mathcal{Q}} \gets \mathcal{R}_i^{\mathcal{Q}} \cup \mathcal{R}_i^{\mathcal{Q}_j}\).
\end{itemize}
\item Agent sends all accumulated results back to the server:
\(\alpha \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} S\).
\end{itemize}
\end{itemize}
\end{itemize}
\end{itemize}
\item \textbf{\textbf{Agent Sends Results}}
\begin{itemize}
\item Server sends results for repository \(i\) back to the client:
\(S \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} C\).
\end{itemize}
\end{enumerate}
\end{document}
%%% Local Variables:
%%% mode: LaTeX
%%% TeX-master: t
%%% TeX-engine: luatex
%%% TeX-command-extra-options: "-synctex=1 -shell-escape -interaction=nonstopmode"
%%% End:

56
doc/mrva.dot Normal file
View File

@@ -0,0 +1,56 @@
digraph mrvacommander {
rankdir=LR;
node [shape=box style=filled fillcolor=lightgrey fontname="monospace"];
// Entry points
cmd_server [label="cmd/server\nmain()", fillcolor=lightblue];
cmd_agent [label="cmd/agent\nmain()", fillcolor=lightblue];
// Config
config [label="config/mcc\nparseEnv()", shape=ellipse, fillcolor=lightyellow];
// Server-side
server [label="pkg/server\nServer.Run()"];
deploy [label="pkg/deploy\nInit()"];
qldbstore [label="pkg/qldbstore\nQLDB Store"];
artifactstore [label="pkg/artifactstore\nArtifact Store"];
queue [label="pkg/queue\nQueue Interface"];
// Agent-side
agent [label="pkg/agent\nAgent.Run()"];
state [label="pkg/state\nState"];
codeql [label="pkg/codeql\nrunCodeQL()"];
// Common
common [label="pkg/common\nTypes, MinIO, Jobs"];
utils [label="utils\nDownload, Archive"];
// Edges: config used by both
cmd_server -> config;
cmd_agent -> config;
// Server wiring
cmd_server -> server;
server -> queue;
server -> artifactstore;
server -> qldbstore;
// Agent wiring
cmd_agent -> agent;
agent -> queue;
agent -> codeql;
agent -> artifactstore;
agent -> state;
// Shared deps
server -> common;
agent -> common;
codeql -> common;
qldbstore -> common;
artifactstore -> common;
// Utils used by backends
qldbstore -> utils;
artifactstore -> utils;
codeql -> utils;
}

84
doc/mrva.man Normal file
View File

@@ -0,0 +1,84 @@
.TH MRVACOMMANDER 7 "April 2025" "MRVA Project" "System Overview"
.SH NAME
mrvacommander \- distributed CodeQL task queue and execution system
.SH SYNOPSIS
.B server
.RI [ environment ]
.br
.B agent
.RI [ environment ]
.SH DESCRIPTION
mrvacommander coordinates analysis jobs over multiple worker nodes using queues, pluggable storage, and CodeQL execution. It consists of multiple interacting packages and entry points.
.SH STRUCTURE
.TP
.B cmd/server
Entry point. Loads configuration, initializes dependencies, runs queue subscriber with a dispatcher.
.TP
.B cmd/agent
Entry point. Loads configuration, runs a processing loop: receive job, execute query, save result, update state.
.SH CONFIGURATION
.TP
.B config/mcc
Parses environment variables into structured configuration. Modules include:
.IR queue ,
.IR storage ,
.IR logger ,
.IR commander .
.SH SERVER SIDE MODULES
.TP
.B pkg/server
Initializes:
queue backend
QLDB store
artifact store
Subscribes to queue and dispatches jobs to handler.
.TP
.B pkg/deploy
Deployment helpers: validate environment variables, bootstrap key services.
.SH AGENT SIDE MODULES
.TP
.B pkg/agent
Receives jobs, executes CodeQL queries, stores outputs, marks completion.
.TP
.B pkg/state
Tracks which jobs have been completed. Local file-backed.
.SH SHARED MODULES
.TP
.B pkg/common
Core types: Job, JobOutput, NameWithOwner, Query.
Includes MinIO wrappers, external API access, and job spec parsing.
.TP
.B pkg/codeql
Defines query structure and executes CodeQL against a database.
.TP
.B pkg/qldbstore
Provides read-only access to CodeQL databases via:
- MinIO (S3)
- HTTP (hepc)
- Filesystem
.TP
.B pkg/artifactstore
Persists job results. Implementations:
- MinIO
- Memory
.TP
.B pkg/queue
Job queue interface. Implementations:
- RabbitMQ
- In-memory single-node
.TP
.B utils
Generic helpers:
- HTTP download
- tar.gz extraction
.SH SEE ALSO
.BR codeql (1),
.BR rabbitmq-server (1),
.BR minio (1)

BIN
doc/mrva.pdf Normal file

Binary file not shown.

View File

@@ -4,8 +4,8 @@ services:
dbssvc:
## image: ghcr.io/hohn/dbsdata-container:0.1.24
build:
context: .
dockerfile: ./demo/containers/dbsdata/Dockerfile
context: ./demo/containers/dbsdata
dockerfile: Dockerfile
container_name: dbssvc
volumes:
- dbsdata:/data/mrvacommander/dbstore-data

View File

@@ -0,0 +1,22 @@
* tuple hashing functions across languages
There are three parallel implementations of a hash for every entry of a tuple
list. The functions produce identical results across 3 languages and can be
used across agent / server / client.
#+BEGIN_SRC sh
hohn@ghm3 ~/work-gh/mrva/mrvacommander/experimental/qldb-specification
0:$ node tuple-hash.js
[
'91b80a9933218ff5bc62df8ff71f1252',
'b0934b29293e91aefaac73c99fc75e94'
]
hohn@ghm3 ~/work-gh/mrva/mrvacommander/experimental/qldb-specification
0:$ python3 tuple-hash.py
['91b80a9933218ff5bc62df8ff71f1252', 'b0934b29293e91aefaac73c99fc75e94']
hohn@ghm3 ~/work-gh/mrva/mrvacommander/experimental/qldb-specification
0:$ go run tuple-hash.go
[91b80a9933218ff5bc62df8ff71f1252 b0934b29293e91aefaac73c99fc75e94]
#+END_SRC

View File

@@ -0,0 +1,28 @@
package main
import (
"crypto/md5"
"encoding/hex"
"encoding/json"
"fmt"
)
func main() {
atl_L := [][2]interface{}{
{1, "s1"},
{2, "str"},
}
var sl_hash []string
for _, item := range atl_L {
jsonBytes, err := json.Marshal(item)
if err != nil {
panic(err)
}
sum := md5.Sum(jsonBytes)
sl_hash = append(sl_hash, hex.EncodeToString(sum[:]))
}
fmt.Println(sl_hash)
}

View File

@@ -0,0 +1,9 @@
const crypto = require("crypto");
const atl_L = [[1, "s1"], [2, "str"]];
const sl_hash = atl_L.map(item => {
const json = JSON.stringify(item);
return crypto.createHash("md5").update(json).digest("hex");
});
console.log(sl_hash);

View File

@@ -0,0 +1,12 @@
import hashlib
import json
atl_L = [(1, "s1"), (2, "str")]
sl_hash = []
for item in atl_L:
encoded = json.dumps(item, separators=(',', ':')).encode("utf-8")
md5sum = hashlib.md5(encoded).hexdigest()
sl_hash.append(md5sum)
print(sl_hash)

13
go.mod
View File

@@ -1,36 +1,28 @@
module mrvacommander
module github.com/hohn/mrvacommander
go 1.22.0
require (
github.com/BurntSushi/toml v1.4.0
github.com/elastic/go-sysinfo v1.14.0
github.com/google/uuid v1.6.0
github.com/gorilla/mux v1.8.1
github.com/jackc/pgx/v5 v5.6.0
github.com/minio/minio-go/v7 v7.0.71
github.com/rabbitmq/amqp091-go v1.10.0
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f
gopkg.in/yaml.v3 v3.0.1
gorm.io/driver/postgres v1.5.9
gorm.io/gorm v1.25.10
)
require (
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/elastic/go-windows v1.0.1 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
github.com/jackc/pgx/v5 v5.6.0 // indirect
github.com/jackc/puddle/v2 v2.2.1 // indirect
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/jinzhu/now v1.1.5 // indirect
github.com/klauspost/compress v1.17.6 // indirect
github.com/klauspost/cpuid/v2 v2.2.6 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/minio/md5-simd v1.1.2 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/rogpeppe/go-internal v1.12.0 // indirect
github.com/rs/xid v1.5.0 // indirect
golang.org/x/crypto v0.24.0 // indirect
@@ -39,5 +31,4 @@ require (
golang.org/x/sys v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
howett.net/plist v1.0.1 // indirect
)

28
go.sum
View File

@@ -6,14 +6,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/elastic/go-sysinfo v1.14.0 h1:dQRtiqLycoOOla7IflZg3aN213vqJmP0lpVpKQ9lUEY=
github.com/elastic/go-sysinfo v1.14.0/go.mod h1:FKUXnZWhnYI0ueO7jhsGV3uQJ5hiz8OqM5b3oGyaRr8=
github.com/elastic/go-windows v1.0.1 h1:AlYZOldA+UJ0/2nBuqWdo90GFCgG9xuyw9SYzGUtJm0=
github.com/elastic/go-windows v1.0.1/go.mod h1:FoVvqWSun28vaDQPbj2Elfc0JahhPB7WQEGa3c814Ss=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
@@ -26,11 +20,6 @@ github.com/jackc/pgx/v5 v5.6.0 h1:SWJzexBzPL5jb0GEsrPMLIsi/3jOo7RHlzTjcAeDrPY=
github.com/jackc/pgx/v5 v5.6.0/go.mod h1:DNZ/vlrUnhWCoFGxHAG8U2ljioxukquj7utPDgtQdTw=
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI=
github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
@@ -44,13 +33,8 @@ github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
github.com/minio/minio-go/v7 v7.0.71 h1:No9XfOKTYi6i0GnBj+WZwD8WP5GZfL7n7GOjRqCdAjA=
github.com/minio/minio-go/v7 v7.0.71/go.mod h1:4yBA8v80xGA30cfM3fz0DKYMXunWl/AV/6tWEs9ryzo=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/rabbitmq/amqp091-go v1.10.0 h1:STpn5XsHlHGcecLmMFCtg7mqq0RnD+zFr4uzukfVhBw=
github.com/rabbitmq/amqp091-go v1.10.0/go.mod h1:Hy4jKW5kQART1u+JkDTF9YYOQUHXqMuhrgxOEeS7G4o=
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
@@ -66,17 +50,12 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY=
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI=
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo=
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak=
golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
@@ -87,13 +66,6 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntN
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gorm.io/driver/postgres v1.5.9 h1:DkegyItji119OlcaLjqN11kHoUgZ/j13E0jkJZgD6A8=
gorm.io/driver/postgres v1.5.9/go.mod h1:DX3GReXH+3FPWGrrgffdvCk3DQ1dwDPdmbenSkweRGI=
gorm.io/gorm v1.25.10 h1:dQpO+33KalOA+aFYGlK+EfxcI5MbO7EP2yYygwh9h+s=
gorm.io/gorm v1.25.10/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8=
howett.net/plist v1.0.1 h1:37GdZ8tP09Q35o9ych3ehygcsL+HqKSwzctveSlarvM=
howett.net/plist v1.0.1/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=

View File

@@ -1,7 +1,16 @@
{
"folders": [
{
"name": "mrvaagent",
"path": "../mrvaagent"
},
{
"name": "mrvacommander",
"path": "."
},
{
"name": "mrvaserver",
"path": "../mrvaserver"
}
],
"settings": {
@@ -10,4 +19,4 @@
"makefile.configureOnOpen": false,
"git.ignoreLimitWarning": true
}
}
}

View File

@@ -1,9 +0,0 @@
## The notes/ directory
The `notes/` directory serves as staging directory for documentation. This is
the place to develop documentation and short notes. The contents of this
directory should be accessible to
1. The note authors and
2. Developers of the project
It need not be meaningful to casual users.

9
notes/README.org Normal file
View File

@@ -0,0 +1,9 @@
* The notes/ directory
The =notes/= directory serves as a staging directory for documentation.
This is the place to develop documentation and short notes.
The contents of this directory should be accessible to:
1. The note authors
2. Developers of the project
It need not be meaningful to casual users.

View File

@@ -508,7 +508,7 @@
code .
#+END_SRC
Set up 'variant analysis repositories', continuin from the
Set up 'variant analysis repositories', continuing from the
=scratch/vscode-selection.json= file formed previously:
1. Select '{}' and open db selection file
2. paste

BIN
notes/dwg-r1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

BIN
notes/dwg-r2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

146
notes/dwg-r2.svg Normal file
View File

@@ -0,0 +1,146 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 12.2.1 (20241206.2353)
-->
<!-- Title: MRVA_Containers Pages: 1 -->
<svg width="659pt" height="315pt"
viewBox="0.00 0.00 659.00 315.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 311)">
<title>MRVA_Containers</title>
<polygon fill="white" stroke="none" points="-4,4 -4,-311 655,-311 655,4 -4,4"/>
<text text-anchor="middle" x="325.5" y="-284" font-family="Helvetica,sans-Serif" font-size="20.00">Container Dependencies for MRVA</text>
<!-- mrvastore_init -->
<g id="node1" class="node">
<title>mrvastore_init</title>
<polygon fill="lightblue" stroke="none" points="239.38,-247 239.38,-270.75 338.12,-270.75 338.12,-247 239.38,-247"/>
<text text-anchor="start" x="243.38" y="-254.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">mrvastore&#45;init</text>
<text text-anchor="start" x="243.38" y="-233.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: minio/mc</text>
<polygon fill="none" stroke="black" points="238.38,-226 238.38,-271.75 339.12,-271.75 339.12,-226 238.38,-226"/>
</g>
<!-- mrvastore -->
<g id="node2" class="node">
<title>mrvastore</title>
<polygon fill="lightblue" stroke="none" points="401.5,-241 401.5,-264.75 642,-264.75 642,-241 401.5,-241"/>
<text text-anchor="start" x="488.75" y="-248.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">mrvastore</text>
<text text-anchor="start" x="405.5" y="-227.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: minio/minio:RELEASE.2024&#45;06&#45;11T03&#45;13&#45;30Z</text>
<polygon fill="none" stroke="black" points="400.5,-220 400.5,-265.75 643,-265.75 643,-220 400.5,-220"/>
</g>
<!-- mrvastore_init&#45;&gt;mrvastore -->
<g id="edge1" class="edge">
<title>mrvastore_init&#45;&gt;mrvastore</title>
<path fill="none" stroke="black" d="M346.85,-247.39C359.13,-247.07 372.61,-246.72 386.46,-246.36"/>
<polygon fill="black" stroke="black" points="386.38,-248.12 391.33,-246.24 386.28,-244.62 386.38,-248.12"/>
</g>
<!-- client_ghmrva -->
<g id="node3" class="node">
<title>client_ghmrva</title>
<polygon fill="lightblue" stroke="none" points="9,-127 9,-150.75 176,-150.75 176,-127 9,-127"/>
<text text-anchor="start" x="47.88" y="-134.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">client&#45;ghmrva</text>
<text text-anchor="start" x="13" y="-113.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: client&#45;ghmrva&#45;container:0.4.0</text>
<polygon fill="none" stroke="black" points="8,-106 8,-151.75 177,-151.75 177,-106 8,-106"/>
</g>
<!-- server -->
<g id="node7" class="node">
<title>server</title>
<polygon fill="lightblue" stroke="none" points="230,-103 230,-126.75 347.5,-126.75 347.5,-103 230,-103"/>
<text text-anchor="start" x="268.5" y="-110.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">server</text>
<text text-anchor="start" x="234" y="-89.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: mrva&#45;server:0.4.0</text>
<polygon fill="none" stroke="black" points="229,-58 229,-127.75 348.5,-127.75 348.5,-58 229,-58"/>
</g>
<!-- client_ghmrva&#45;&gt;server -->
<g id="edge10" class="edge">
<title>client_ghmrva&#45;&gt;server</title>
<path fill="none" stroke="black" d="M184.94,-111.93C194.95,-110.07 205.04,-108.2 214.78,-106.4"/>
<polygon fill="black" stroke="black" points="215.04,-108.13 219.64,-105.5 214.41,-104.69 215.04,-108.13"/>
</g>
<!-- code_server -->
<g id="node4" class="node">
<title>code_server</title>
<polygon fill="lightblue" stroke="none" points="12.38,-55 12.38,-78.75 172.62,-78.75 172.62,-55 12.38,-55"/>
<text text-anchor="start" x="54.25" y="-62.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">code&#45;server</text>
<text text-anchor="start" x="16.38" y="-41.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: code&#45;server&#45;initialized:0.4.0</text>
<polygon fill="none" stroke="black" points="11.38,-34 11.38,-79.75 173.62,-79.75 173.62,-34 11.38,-34"/>
</g>
<!-- code_server&#45;&gt;server -->
<g id="edge9" class="edge">
<title>code_server&#45;&gt;server</title>
<path fill="none" stroke="black" d="M181.53,-73.19C192.69,-75.26 204.01,-77.36 214.9,-79.37"/>
<polygon fill="black" stroke="black" points="214.53,-81.09 219.77,-80.28 215.17,-77.64 214.53,-81.09"/>
</g>
<!-- hepc -->
<g id="node5" class="node">
<title>hepc</title>
<polygon fill="lightblue" stroke="none" points="444.62,-169 444.62,-192.75 598.88,-192.75 598.88,-169 444.62,-169"/>
<text text-anchor="start" x="506" y="-176.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">hepc</text>
<text text-anchor="start" x="448.62" y="-155.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: mrva&#45;hepc&#45;container:0.4.0</text>
<polygon fill="none" stroke="black" points="443.62,-148 443.62,-193.75 599.88,-193.75 599.88,-148 443.62,-148"/>
</g>
<!-- rabbitmq -->
<g id="node6" class="node">
<title>rabbitmq</title>
<polygon fill="lightblue" stroke="none" points="448.75,-97 448.75,-120.75 594.75,-120.75 594.75,-97 448.75,-97"/>
<text text-anchor="start" x="492.5" y="-104.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">rabbitmq</text>
<text text-anchor="start" x="452.75" y="-83.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: rabbitmq:3&#45;management</text>
<polygon fill="none" stroke="black" points="447.75,-76 447.75,-121.75 595.75,-121.75 595.75,-76 447.75,-76"/>
</g>
<!-- server&#45;&gt;mrvastore -->
<g id="edge3" class="edge">
<title>server&#45;&gt;mrvastore</title>
<path fill="none" stroke="black" d="M347.34,-131.55C350.58,-134.55 353.67,-137.67 356.5,-140.88 378.62,-165.92 366.36,-186.06 392.5,-206.88 395.16,-208.99 397.93,-210.98 400.8,-212.86"/>
<polygon fill="black" stroke="black" points="399.6,-214.17 404.78,-215.31 401.44,-211.2 399.6,-214.17"/>
</g>
<!-- server&#45;&gt;hepc -->
<g id="edge5" class="edge">
<title>server&#45;&gt;hepc</title>
<path fill="none" stroke="black" d="M356.48,-121.44C368.39,-126.17 380.74,-130.84 392.5,-134.88 404.44,-138.97 417.11,-142.96 429.67,-146.7"/>
<polygon fill="black" stroke="black" points="429.07,-148.35 434.37,-148.08 430.06,-144.99 429.07,-148.35"/>
</g>
<!-- server&#45;&gt;rabbitmq -->
<g id="edge2" class="edge">
<title>server&#45;&gt;rabbitmq</title>
<path fill="none" stroke="black" d="M356.25,-94.6C380.34,-95.23 407.91,-95.94 433.51,-96.61"/>
<polygon fill="black" stroke="black" points="433.41,-98.36 438.46,-96.74 433.5,-94.86 433.41,-98.36"/>
</g>
<!-- postgres -->
<g id="node8" class="node">
<title>postgres</title>
<polygon fill="lightblue" stroke="none" points="475.75,-25 475.75,-48.75 567.75,-48.75 567.75,-25 475.75,-25"/>
<text text-anchor="start" x="493.25" y="-32.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">postgres</text>
<text text-anchor="start" x="479.75" y="-11.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: postgres:15</text>
<polygon fill="none" stroke="black" points="474.75,-4 474.75,-49.75 568.75,-49.75 568.75,-4 474.75,-4"/>
</g>
<!-- server&#45;&gt;postgres -->
<g id="edge4" class="edge">
<title>server&#45;&gt;postgres</title>
<path fill="none" stroke="black" d="M356.32,-73.33C368.36,-69.83 380.8,-66.23 392.5,-62.88 414.85,-56.47 439.43,-49.55 461.04,-43.5"/>
<polygon fill="black" stroke="black" points="461.25,-45.26 465.59,-42.22 460.31,-41.88 461.25,-45.26"/>
</g>
<!-- agent -->
<g id="node9" class="node">
<title>agent</title>
<polygon fill="lightblue" stroke="none" points="231.5,-175 231.5,-198.75 346,-198.75 346,-175 231.5,-175"/>
<text text-anchor="start" x="270.75" y="-182.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">agent</text>
<text text-anchor="start" x="235.5" y="-161.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: mrva&#45;agent:0.4.0</text>
<polygon fill="none" stroke="black" points="230.5,-154 230.5,-199.75 347,-199.75 347,-154 230.5,-154"/>
</g>
<!-- agent&#45;&gt;mrvastore -->
<g id="edge7" class="edge">
<title>agent&#45;&gt;mrvastore</title>
<path fill="none" stroke="black" d="M354.95,-196.02C367.41,-199.65 380.35,-203.4 392.5,-206.88 400.99,-209.31 409.8,-211.81 418.65,-214.32"/>
<polygon fill="black" stroke="black" points="418.03,-215.96 423.32,-215.64 418.98,-212.59 418.03,-215.96"/>
</g>
<!-- agent&#45;&gt;hepc -->
<g id="edge8" class="edge">
<title>agent&#45;&gt;hepc</title>
<path fill="none" stroke="black" d="M354.71,-175.19C377.96,-174.58 404.62,-173.89 429.67,-173.24"/>
<polygon fill="black" stroke="black" points="429.53,-175 434.48,-173.12 429.44,-171.5 429.53,-175"/>
</g>
<!-- agent&#45;&gt;rabbitmq -->
<g id="edge6" class="edge">
<title>agent&#45;&gt;rabbitmq</title>
<path fill="none" stroke="black" d="M352.08,-150.07C365.32,-144.75 379.29,-139.41 392.5,-134.88 405.8,-130.31 420.02,-125.88 433.97,-121.78"/>
<polygon fill="black" stroke="black" points="434.16,-123.54 438.48,-120.47 433.19,-120.18 434.16,-123.54"/>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 9.0 KiB

128
notes/system-structure.org Normal file
View File

@@ -0,0 +1,128 @@
* system structure
#+BEGIN_SRC dot :file dwg-r2.svg :cmdline -Kdot -Tsvg
digraph MRVA_Containers {
rankdir=LR;
node [shape=plaintext fontname="Helvetica"];
edge [arrowsize=0.5];
// Title
label="Container Dependencies for MRVA";
labelloc=top;
fontsize=20;
fontname="Helvetica";
// mrvastore-init
mrvastore_init [
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td bgcolor="lightblue"><b>mrvastore-init</b></td></tr>
<tr><td align="left"><font point-size="10">Image: minio/mc</font></td></tr>
</table>
>
];
// mrvastore
mrvastore [
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td bgcolor="lightblue"><b>mrvastore</b></td></tr>
<tr><td align="left"><font point-size="10">Image: minio/minio:RELEASE.2024-06-11T03-13-30Z</font></td></tr>
</table>
>
];
// client-ghmrva
client_ghmrva [
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td bgcolor="lightblue"><b>client-ghmrva</b></td></tr>
<tr><td align="left"><font point-size="10">Image: client-ghmrva-container:0.4.0</font></td></tr>
</table>
>
];
// code-server
code_server [
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td bgcolor="lightblue"><b>code-server</b></td></tr>
<tr><td align="left"><font point-size="10">Image: code-server-initialized:0.4.0</font></td></tr>
</table>
>
];
// hepc
hepc [
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td bgcolor="lightblue"><b>hepc</b></td></tr>
<tr><td align="left"><font point-size="10">Image: mrva-hepc-container:0.4.0</font></td></tr>
</table>
>
];
// rabbitmq
rabbitmq [
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td bgcolor="lightblue"><b>rabbitmq</b></td></tr>
<tr><td align="left"><font point-size="10">Image: rabbitmq:3-management</font></td></tr>
</table>
>
];
// server
server [
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td bgcolor="lightblue"><b>server</b></td></tr>
<tr><td align="left"><font point-size="10">Image: mrva-server:0.4.0</font></td></tr>
<tr><td port="slot1"></td></tr>
<tr><td port="slot2"></td></tr>
<tr><td port="slot3"></td></tr>
</table>
>
];
// postgres
postgres [
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td bgcolor="lightblue"><b>postgres</b></td></tr>
<tr><td align="left"><font point-size="10">Image: postgres:15</font></td></tr>
</table>
>
];
// agent
agent [
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td bgcolor="lightblue"><b>agent</b></td></tr>
<tr><td align="left"><font point-size="10">Image: mrva-agent:0.4.0</font></td></tr>
</table>
>
];
// Edges (dependencies)
mrvastore_init -> mrvastore;
server -> rabbitmq;
server -> mrvastore;
server -> postgres;
server -> hepc;
agent -> rabbitmq;
agent -> mrvastore;
agent -> hepc;
code_server -> server;
client_ghmrva -> server;
}
#+END_SRC

Binary file not shown.

View File

@@ -4,19 +4,17 @@ import (
"context"
"fmt"
"log/slog"
"mrvacommander/pkg/artifactstore"
"mrvacommander/pkg/codeql"
"mrvacommander/pkg/common"
"mrvacommander/pkg/qldbstore"
"mrvacommander/pkg/queue"
"mrvacommander/utils"
"os"
"path/filepath"
"runtime"
"sync"
"time"
"github.com/elastic/go-sysinfo"
"github.com/hohn/mrvacommander/pkg/artifactstore"
"github.com/hohn/mrvacommander/pkg/codeql"
"github.com/hohn/mrvacommander/pkg/common"
"github.com/hohn/mrvacommander/pkg/qldbstore"
"github.com/hohn/mrvacommander/pkg/queue"
"github.com/hohn/mrvacommander/utils"
"github.com/google/uuid"
)
@@ -50,41 +48,9 @@ func (r *RunnerSingle) worker(wid int) {
*/
const (
workerMemoryMB = 2048 // 2 GB
monitorIntervalSec = 10 // Monitor every 10 seconds
workerMemoryMB = 2048 // 2 GB
)
func calculateWorkers() int {
host, err := sysinfo.Host()
if err != nil {
slog.Error("failed to get host info", "error", err)
os.Exit(1)
}
memInfo, err := host.Memory()
if err != nil {
slog.Error("failed to get memory info", "error", err)
os.Exit(1)
}
// Get available memory in MB
totalMemoryMB := memInfo.Available / (1024 * 1024)
// Ensure we have at least one worker
workers := int(totalMemoryMB / workerMemoryMB)
if workers < 1 {
workers = 1
}
// Limit the number of workers to the number of CPUs
cpuCount := runtime.NumCPU()
if workers > cpuCount {
workers = max(cpuCount, 1)
}
return workers
}
func StartAndMonitorWorkers(ctx context.Context,
artifacts artifactstore.Store,
databases qldbstore.Store,
@@ -92,57 +58,29 @@ func StartAndMonitorWorkers(ctx context.Context,
desiredWorkerCount int,
wg *sync.WaitGroup) {
currentWorkerCount := 0
stopChans := make([]chan struct{}, 0)
if desiredWorkerCount != 0 {
slog.Info("Starting workers", slog.Int("count", desiredWorkerCount))
for i := 0; i < desiredWorkerCount; i++ {
stopChan := make(chan struct{})
stopChans = append(stopChans, stopChan)
wg.Add(1)
go RunWorker(ctx, artifacts, databases, queue, stopChan, wg)
}
return
var workerCount int
if desiredWorkerCount > 0 {
workerCount = desiredWorkerCount
slog.Info("Starting fixed number of workers", slog.Int("count", workerCount))
} else {
workerCount = 1
slog.Info("Starting preset number of workers", slog.Int("count", workerCount))
}
slog.Info("Worker count not specified, managing based on available memory and CPU")
stopChans := make([]chan struct{}, workerCount)
for {
select {
case <-ctx.Done():
// signal all workers to stop
for _, stopChan := range stopChans {
close(stopChan)
}
return
default:
newWorkerCount := calculateWorkers()
for i := 0; i < workerCount; i++ {
stopChan := make(chan struct{})
stopChans[i] = stopChan
wg.Add(1)
go RunWorker(ctx, artifacts, databases, queue, stopChan, wg)
}
if newWorkerCount != currentWorkerCount {
slog.Info(
"Modifying worker count",
slog.Int("current", currentWorkerCount),
slog.Int("new", newWorkerCount))
}
// Wait for context cancellation
<-ctx.Done()
if newWorkerCount > currentWorkerCount {
for i := currentWorkerCount; i < newWorkerCount; i++ {
stopChan := make(chan struct{})
stopChans = append(stopChans, stopChan)
wg.Add(1)
go RunWorker(ctx, artifacts, databases, queue, stopChan, wg)
}
} else if newWorkerCount < currentWorkerCount {
for i := newWorkerCount; i < currentWorkerCount; i++ {
close(stopChans[i])
}
stopChans = stopChans[:newWorkerCount]
}
currentWorkerCount = newWorkerCount
time.Sleep(monitorIntervalSec * time.Second)
}
for _, stopChan := range stopChans {
close(stopChan)
}
}
@@ -153,7 +91,7 @@ func RunAnalysisJob(
Spec: job.Spec,
ResultCount: 0,
ResultLocation: artifactstore.ArtifactLocation{},
Status: common.StatusError,
Status: common.StatusFailed,
}
// Create a temporary directory
@@ -186,7 +124,15 @@ func RunAnalysisJob(
databaseData, err := dbs.GetDatabase(job.Spec.NameWithOwner)
if err != nil {
return result, fmt.Errorf("failed to get database: %w", err)
slog.Error("Failed to get database",
slog.String("owner", job.Spec.Owner),
slog.String("repo", job.Spec.Repo),
slog.Int("session_id", job.Spec.SessionID),
slog.String("operation", "GetDatabase"),
slog.Any("error", err),
)
return result, fmt.Errorf("failed to get database for %s/%s: %w",
job.Spec.Owner, job.Spec.Repo, err)
}
// Write the CodeQL database data to the filesystem
@@ -218,7 +164,7 @@ func RunAnalysisJob(
Spec: job.Spec,
ResultCount: runResult.ResultCount,
ResultLocation: resultsLocation,
Status: common.StatusSuccess,
Status: common.StatusSucceeded,
SourceLocationPrefix: runResult.SourceLocationPrefix,
DatabaseSHA: runResult.DatabaseSHA,
}

View File

@@ -1,9 +1,9 @@
package agent
import (
"mrvacommander/pkg/artifactstore"
"mrvacommander/pkg/qldbstore"
"mrvacommander/pkg/queue"
"github.com/hohn/mrvacommander/pkg/artifactstore"
"github.com/hohn/mrvacommander/pkg/qldbstore"
"github.com/hohn/mrvacommander/pkg/queue"
)
type Visibles struct {

View File

@@ -1,28 +1,13 @@
package artifactstore
import (
"fmt"
"mrvacommander/pkg/common"
)
// Restrict the keys / values for ArtifactLocation and centralize the common ones
// here
const (
AF_BUCKETNAME_RESULTS = "results"
AF_BUCKETNAME_PACKS = "packs"
var (
AF_BUCKETNAME_RESULTS = "mrvabucket"
AF_BUCKETNAME_PACKS = "mrvabucket"
)
type ArtifactLocation struct {
Key string // location in bucket OR full location for file paths
Bucket string // which bucket: packs or results
}
// deriveKeyFromSessionId generates a key for a query pack based on the job ID
func deriveKeyFromSessionId(sessionId int) string {
return fmt.Sprintf("%d", sessionId)
}
// deriveKeyFromJobSpec generates a key for a result based on the JobSpec
func deriveKeyFromJobSpec(jobSpec common.JobSpec) string {
return fmt.Sprintf("%d-%s", jobSpec.SessionID, jobSpec.NameWithOwner)
}

View File

@@ -1,6 +1,6 @@
package artifactstore
import "mrvacommander/pkg/common"
import "github.com/hohn/mrvacommander/pkg/common"
type Store interface {
// GetQueryPack retrieves the query pack from the specified location.

View File

@@ -2,8 +2,9 @@ package artifactstore
import (
"fmt"
"mrvacommander/pkg/common"
"sync"
"github.com/hohn/mrvacommander/pkg/common"
)
// InMemoryArtifactStore is an in-memory implementation of the ArtifactStore interface
@@ -38,7 +39,7 @@ func (store *InMemoryArtifactStore) SaveQueryPack(sessionId int, data []byte) (A
store.mu.Lock()
defer store.mu.Unlock()
key := deriveKeyFromSessionId(sessionId)
key := fmt.Sprintf("%d-packs", sessionId)
store.packs[key] = data
location := ArtifactLocation{
@@ -79,7 +80,7 @@ func (store *InMemoryArtifactStore) SaveResult(jobSpec common.JobSpec, data []by
store.mu.Lock()
defer store.mu.Unlock()
key := deriveKeyFromJobSpec(jobSpec)
key := fmt.Sprintf("%d-results-%s", jobSpec.SessionID, jobSpec.NameWithOwner)
store.results[key] = data
location := ArtifactLocation{

View File

@@ -7,7 +7,8 @@ import (
"io"
"log/slog"
"math"
"mrvacommander/pkg/common"
"github.com/hohn/mrvacommander/pkg/common"
"github.com/minio/minio-go/v7"
"github.com/minio/minio-go/v7/pkg/credentials"
@@ -17,10 +18,11 @@ type MinIOArtifactStore struct {
client *minio.Client
}
func NewMinIOArtifactStore(endpoint, id, secret string) (*MinIOArtifactStore, error) {
func NewMinIOArtifactStore(endpoint, id, secret string, lookup minio.BucketLookupType) (*MinIOArtifactStore, error) {
minioClient, err := minio.New(endpoint, &minio.Options{
Creds: credentials.NewStaticV4(id, secret, ""),
Secure: false,
Creds: credentials.NewStaticV4(id, secret, ""),
Secure: false,
BucketLookup: lookup,
})
if err != nil {
return nil, err
@@ -28,16 +30,6 @@ func NewMinIOArtifactStore(endpoint, id, secret string) (*MinIOArtifactStore, er
slog.Info("Connected to MinIO artifact store server")
// Create "results" bucket
if err := common.CreateMinIOBucketIfNotExists(minioClient, AF_BUCKETNAME_RESULTS); err != nil {
return nil, fmt.Errorf("could not create results bucket: %v", err)
}
// Create "packs" bucket
if err := common.CreateMinIOBucketIfNotExists(minioClient, AF_BUCKETNAME_PACKS); err != nil {
return nil, fmt.Errorf("could not create packs bucket: %v", err)
}
return &MinIOArtifactStore{
client: minioClient,
}, nil
@@ -48,7 +40,8 @@ func (store *MinIOArtifactStore) GetQueryPack(location ArtifactLocation) ([]byte
}
func (store *MinIOArtifactStore) SaveQueryPack(jobId int, data []byte) (ArtifactLocation, error) {
return store.saveArtifact(AF_BUCKETNAME_PACKS, deriveKeyFromSessionId(jobId), data, "application/gzip")
key := fmt.Sprintf("%d-packs", jobId)
return store.saveArtifact(AF_BUCKETNAME_PACKS, key, data, "application/gzip")
}
func (store *MinIOArtifactStore) GetResult(location ArtifactLocation) ([]byte, error) {
@@ -70,9 +63,9 @@ func (store *MinIOArtifactStore) GetResultSize(location ArtifactLocation) (int,
return int(objectInfo.Size), nil
}
func (store *MinIOArtifactStore) SaveResult(jobSpec common.JobSpec, data []byte) (ArtifactLocation, error) {
return store.saveArtifact(AF_BUCKETNAME_RESULTS, deriveKeyFromJobSpec(jobSpec), data, "application/zip")
key := fmt.Sprintf("%d-results-%s", jobSpec.SessionID, jobSpec.NameWithOwner)
return store.saveArtifact(AF_BUCKETNAME_RESULTS, key, data, "application/zip")
}
func (store *MinIOArtifactStore) getArtifact(location ArtifactLocation) ([]byte, error) {
@@ -95,7 +88,12 @@ func (store *MinIOArtifactStore) getArtifact(location ArtifactLocation) ([]byte,
func (store *MinIOArtifactStore) saveArtifact(bucket, key string, data []byte,
contentType string) (ArtifactLocation, error) {
_, err := store.client.PutObject(context.Background(), bucket, key,
exists, err := store.client.BucketExists(context.Background(), bucket)
if err != nil || !exists {
slog.Error("Bucket does not exist", "bucket", bucket)
}
_, err = store.client.PutObject(context.Background(), bucket, key,
bytes.NewReader(data), int64(len(data)), minio.PutObjectOptions{
ContentType: contentType,
})

View File

@@ -9,8 +9,8 @@ import (
"io"
"log"
"log/slog"
"mrvacommander/pkg/queue"
"mrvacommander/utils"
"github.com/hohn/mrvacommander/pkg/queue"
"github.com/hohn/mrvacommander/utils"
"os"
"os/exec"
"os/signal"

View File

@@ -16,6 +16,7 @@ func CreateMinIOBucketIfNotExists(client *minio.Client, bucketName string) error
}
if !exists {
// if env.Get("MRVA_S3_PATHSTYLE") == "true" {}
slog.Info("Creating bucket", "name", bucketName)
err = client.MakeBucket(ctx, bucketName, minio.MakeBucketOptions{})
if err != nil {

View File

@@ -10,25 +10,28 @@ type NameWithOwner struct {
type Status int
const (
StatusInProgress = iota
StatusQueued
StatusError
StatusSuccess
StatusPending Status = iota
StatusInProgress
StatusSucceeded
StatusFailed
StatusCanceled
StatusTimedOut
)
func (s Status) ToExternalString() string {
switch s {
case StatusPending:
return "pending"
case StatusInProgress:
return "in_progress"
case StatusQueued:
return "queued"
case StatusError:
return "error"
case StatusSuccess:
return "inProgress"
case StatusSucceeded:
return "succeeded"
case StatusFailed:
return "failed"
case StatusCanceled:
return "canceled"
case StatusTimedOut:
return "timedOut"
default:
return "unknown"
}
@@ -38,3 +41,8 @@ type JobSpec struct {
SessionID int
NameWithOwner
}
type StatusSummary struct {
Overall Status
Counts map[Status]int
}

87
pkg/deploy/README.org Normal file
View File

@@ -0,0 +1,87 @@
* sighelp.go : GPT-Assistable Semantic Outline
This file provides *non-functional symbolic structure* for the corresponding =.go= file (e.g. =init.go=), optimized for:
- GPT parsing and assistance
- IDE symbol navigation (LSP)
- Type-checking to detect drift
- Readable overview for human developers
Each =sighelp_XXX()= function:
- Mirrors a real function (e.g. =InitRabbitMQ=)
- Calls it with placeholder arguments
- Discards the result to avoid side effects
- Includes structured GPT-readable comments in the form =// gpt:<tag>: …=
This allows both humans and GPT tools to:
- See what functions exist and what they do
- Understand return types and call relations
- Navigate codebases via structure, not prose
**Example**
#+BEGIN_SRC go
// gpt:flowinfo: InitMinIOArtifactStore returns a store configured via env vars
func sighelp_InitMinIOArtifactStore() {
var s artifactstore.Store
var err error
s, err = InitMinIOArtifactStore()
_ = s
_ = err
}
#+END_SRC
**Style Guidelines**
- Always use valid, compilable Go.
- Maintain one =sighelp_= per actual function.
- Add =// gpt:= comments to express intent or relationships.
- Avoid runtime logic — this file is for *structure*, not execution.
* GPT-Assisted Spec → Code Change Workflow
To reduce time spent mapping high-level spec changes to actual code edits, we use this workflow to integrate GPT into the loop. This allows structured delegation of search, mapping, and edit proposal.
**Flow**
1. You declare a spec change as a structured Org block (see below).
2. GPT uses =sighelp.go= (and optionally the real code) to:
- Identify affected functions
- Propose an edit plan
- Track and validate type-level constraints
3. You confirm the plan or adjust scope.
4. GPT writes candidate diffs or summaries for manual patching.
**Example Change Request**
#+BEGIN_SRC org
,* Change: Make artifact store initialization async with retry
,* Affects: InitMinIOArtifactStore, InitMinIOCodeQLDatabaseStore
,* Required: non-blocking behavior, robust to transient failures
,* Notes: Must be compatible with sighelp stubs and InitX signatures
#+END_SRC
**GPT Responsibilities**
- Match affected symbols from =sighelp_XXX()= stubs
- Generate patch plan as Org list:
#+BEGIN_SRC org
,* deploy/init.go
- InitMinIOArtifactStore: wrap NewMinIOArtifactStore in goroutine, add retry
- InitMinIOCodeQLDatabaseStore: apply same pattern
#+END_SRC
- Output scoped diffs, patch instructions, or replacement code
**Optional Enhancements**
- GPT can update =sighelp.go= alongside implementation changes
- You may keep =change.org= files in the repo to track historical refactor plans
- Each change block can include tags like =:spec:async:init:= for search
* Summary
This structure treats GPT as a symbolic reasoning assistant that uses =sighelp.go= as its internal call graph. It allows high-level human changes to be mapped, tracked, and diffed without manual bottom-up spelunking.
This flow is especially effective when multiple entry points share structural patterns (e.g. InitXXX for services).

View File

@@ -4,11 +4,15 @@ import (
"fmt"
"log"
"log/slog"
"mrvacommander/pkg/artifactstore"
"mrvacommander/pkg/qldbstore"
"mrvacommander/pkg/queue"
"net/url"
"os"
"strconv"
"strings"
"github.com/hohn/mrvacommander/pkg/artifactstore"
"github.com/hohn/mrvacommander/pkg/qldbstore"
"github.com/hohn/mrvacommander/pkg/queue"
"github.com/minio/minio-go/v7"
)
func validateEnvVars(requiredEnvVars []string) {
@@ -60,37 +64,57 @@ func InitMinIOArtifactStore() (artifactstore.Store, error) {
"ARTIFACT_MINIO_ENDPOINT",
"ARTIFACT_MINIO_ID",
"ARTIFACT_MINIO_SECRET",
"MRVA_MINIO_VIRTUAL_HOST",
}
validateEnvVars(requiredEnvVars)
endpoint := os.Getenv("ARTIFACT_MINIO_ENDPOINT")
id := os.Getenv("ARTIFACT_MINIO_ID")
secret := os.Getenv("ARTIFACT_MINIO_SECRET")
useVirtual := os.Getenv("MRVA_MINIO_VIRTUAL_HOST") == "1"
store, err := artifactstore.NewMinIOArtifactStore(endpoint, id, secret)
var lookup minio.BucketLookupType
var bucketName string
if useVirtual {
parsedURL, err := url.Parse(endpoint)
if err != nil {
return nil, fmt.Errorf("failed to parse ARTIFACT_MINIO_ENDPOINT: %w", err)
}
hostParts := strings.Split(parsedURL.Hostname(), ".")
if len(hostParts) < 2 {
return nil, fmt.Errorf("unable to extract bucket from host: %s", parsedURL.Hostname())
}
bucketName = hostParts[0]
lookup = minio.BucketLookupDNS
} else {
bucketName = "mrvabucket"
lookup = minio.BucketLookupPath
}
// TODO: unify into one. clean up state handling.
artifactstore.AF_BUCKETNAME_RESULTS = bucketName
artifactstore.AF_BUCKETNAME_PACKS = bucketName
store, err := artifactstore.NewMinIOArtifactStore(endpoint, id, secret, lookup)
if err != nil {
return nil, fmt.Errorf("failed to initialize artifact store: %v", err)
}
return store, nil
}
func InitMinIOCodeQLDatabaseStore() (qldbstore.Store, error) {
}
func InitHEPCDatabaseStore() (qldbstore.Store, error) {
requiredEnvVars := []string{
"QLDB_MINIO_ENDPOINT",
"QLDB_MINIO_ID",
"QLDB_MINIO_SECRET",
"MRVA_HEPC_ENDPOINT",
"MRVA_HEPC_CACHE_DURATION",
"MRVA_HEPC_DATAVIACLI",
"MRVA_HEPC_OUTDIR",
"MRVA_HEPC_TOOL",
}
validateEnvVars(requiredEnvVars)
endpoint := os.Getenv("QLDB_MINIO_ENDPOINT")
id := os.Getenv("QLDB_MINIO_ID")
secret := os.Getenv("QLDB_MINIO_SECRET")
endpoint := os.Getenv("MRVA_HEPC_ENDPOINT")
store, err := qldbstore.NewMinIOCodeQLDatabaseStore(endpoint, id, secret)
if err != nil {
return nil, fmt.Errorf("failed to initialize ql database storage: %v", err)
}
store := qldbstore.NewHepcStore(endpoint)
return store, nil
}

View File

@@ -1,7 +1,7 @@
package qldbstore
import (
"mrvacommander/pkg/common"
"github.com/hohn/mrvacommander/pkg/common"
)
type Store interface {

View File

@@ -0,0 +1,476 @@
package qldbstore
import (
"archive/tar"
"bytes"
"compress/gzip"
"encoding/json"
"fmt"
"io"
"log/slog"
"net/http"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"github.com/hohn/mrvacommander/pkg/common"
)
const defaultCacheDurationMinutes = 60
type HepcStore struct {
Endpoint string
metadataCache []HepcResult
cacheLastUpdated time.Time
cacheMutex sync.Mutex
cacheDuration time.Duration
}
type HepcResult struct {
GitBranch string `json:"git_branch"`
GitCommitID string `json:"git_commit_id"`
GitRepo string `json:"git_repo"`
IngestionDatetime string `json:"ingestion_datetime_utc"`
ResultURL string `json:"result_url"`
ToolID string `json:"tool_id"`
ToolName string `json:"tool_name"`
ToolVersion string `json:"tool_version"`
Projname string `json:"projname"`
}
func NewHepcStore(endpoint string) *HepcStore {
cacheDuration := getMetaCacheDuration()
return &HepcStore{
Endpoint: endpoint,
cacheDuration: cacheDuration,
}
}
func getMetaCacheDuration() time.Duration {
/*
Input:
env("MRVA_HEPC_CACHE_DURATION") = s
if s = "" s ∉ int → defaultCacheDurationMinutes × time.Minute
else → int(s) × time.Minute
*/
durationStr := os.Getenv("MRVA_HEPC_CACHE_DURATION")
if durationStr == "" {
return time.Minute * defaultCacheDurationMinutes
}
duration, err := strconv.Atoi(durationStr)
if err != nil {
slog.Warn("Invalid MRVA_HEPC_CACHE_DURATION value. Using default",
durationStr, defaultCacheDurationMinutes,
)
return time.Minute * defaultCacheDurationMinutes
}
return time.Minute * time.Duration(duration)
}
func (h *HepcStore) fetchViaHTTP() ([]HepcResult, error) {
/*
Input:
h.Endpoint = baseURL
url := baseURL + "/index"
Do:
HTTP GET url → resp
Require:
resp.StatusCode = 200
Then:
decode resp.Body as stream of HepcResult
Output:
if success → (results, nil)
if net/http/json error → (nil, error)
*/
url := fmt.Sprintf("%s/index", h.Endpoint)
resp, err := http.Get(url)
if err != nil {
slog.Warn("Error fetching metadata", "err", err)
return nil, fmt.Errorf("error fetching metadata: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
slog.Warn("Non-OK HTTP status", resp.Status)
return nil, fmt.Errorf("non-OK HTTP status: %s", resp.Status)
}
var results []HepcResult
decoder := json.NewDecoder(resp.Body)
for {
var result HepcResult
if err := decoder.Decode(&result); err == io.EOF {
break
} else if err != nil {
slog.Warn("Error decoding JSON", err)
return nil, fmt.Errorf("error decoding JSON: %w", err)
}
results = append(results, result)
}
return results, nil
}
func (h *HepcStore) fetchViaCli() ([]HepcResult, error) {
/*
Inputs:
env("MRVA_HEPC_OUTDIR") = outDir
env("MRVA_HEPC_TOOL") = toolName
Require:
outDir ≠ "" ∧ toolName ≠ ""
(expand ~ in outDir)
mkdir(outDir)
Let:
jsonPath := outDir / "spigot-results.json"
Do:
run:
spigot-cli bulk-download-results
--tool-name toolName
--metadata-only all
> jsonPath
Then:
decode jsonPath as JSON array or stream of HepcResult
Output:
if success → (results, nil)
if env/exec/json error → (nil, error)
*/
outDir := os.Getenv("MRVA_HEPC_OUTDIR")
toolName := os.Getenv("MRVA_HEPC_TOOL")
var missing []string
if outDir == "" {
slog.Error("Missing required environment variable", "var", "MRVA_HEPC_OUTDIR")
missing = append(missing, "MRVA_HEPC_OUTDIR")
}
if toolName == "" {
slog.Error("Missing required environment variable", "var", "MRVA_HEPC_TOOL")
missing = append(missing, "MRVA_HEPC_TOOL")
}
if len(missing) > 0 {
return nil, fmt.Errorf("missing required environment variables: %s", strings.Join(missing, ", "))
}
// Expand ~ in outDir
if strings.HasPrefix(outDir, "~/") {
home, err := os.UserHomeDir()
if err != nil {
slog.Error("Unable to get home directory", "error", err)
return nil, err
}
outDir = filepath.Join(home, outDir[2:])
}
if err := os.MkdirAll(outDir, 0755); err != nil {
slog.Error("Failed to create output directory", "error", err)
return nil, err
}
jsonPath := filepath.Join(outDir, "spigot-results.json")
// ----------------------
// Go version of
// spigot-cli bulk-download-results \
// --tool-name "$TOOL_NAME" \
// --metadata-only all \
// > "$OUT_DIR/spigot-results.json"
// ----------------------
outFile, err := os.Create(jsonPath)
if err != nil {
slog.Error("Failed to create spigot output file", "error", err)
return nil, err
}
defer outFile.Close()
cmd := exec.Command(
"spigot-cli",
"bulk-download-results",
"--tool-name", toolName,
"--metadata-only", "all",
)
cmd.Stdout = outFile
cmd.Stderr = os.Stderr // for error logging
if err := cmd.Run(); err != nil {
slog.Error("spigot-cli failed", "error", err)
return nil, err
}
// ----------------------
// Decode the resulting JSON file
f, err := os.Open(jsonPath)
if err != nil {
slog.Error("Failed to open JSON output", "path", jsonPath, "error", err)
return nil, fmt.Errorf("failed to open result file: %w", err)
}
defer f.Close()
var results []HepcResult
decoder := json.NewDecoder(f)
for {
var result HepcResult
if err := decoder.Decode(&result); err == io.EOF {
break
} else if err != nil {
slog.Warn("Error decoding CLI JSON", "error", err)
return nil, fmt.Errorf("error decoding CLI JSON: %w", err)
}
results = append(results, result)
}
return results, nil
}
func (h *HepcStore) fetchMetadata() ([]HepcResult, error) {
// Get via request or cli?
hepcDataViaCli := os.Getenv("MRVA_HEPC_DATAVIACLI")
if hepcDataViaCli == "1" {
return h.fetchViaCli()
} else {
return h.fetchViaHTTP()
}
}
func (h *HepcStore) FindAvailableDBs(analysisReposRequested []common.NameWithOwner) (
notFoundRepos []common.NameWithOwner,
foundRepos []common.NameWithOwner) {
/*
Input:
analysisReposRequested : List[Repo]
h.metadataCache : List[HepcResult]
h.cacheLastUpdated : Time
h.cacheDuration : Duration
If time.Now() h.cacheLastUpdated > h.cacheDuration:
h.metadataCache := fetchMetadata() // or return (requested, nil) on error
h.cacheLastUpdated := time.Now()
Let:
repoSet := { r.Projname | r ∈ h.metadataCache }
Partition:
analysisReposRequested into:
foundRepos = { r ∈ requested | r ∈ repoSet }
notFoundRepos = { r ∈ requested | r ∉ repoSet }
Output:
(notFoundRepos, foundRepos)
*/
// Check cache
h.cacheMutex.Lock()
if time.Since(h.cacheLastUpdated) > h.cacheDuration {
// Cache is expired or not set; refresh
results, err := h.fetchMetadata()
if err != nil {
h.cacheMutex.Unlock()
slog.Warn("Error fetching metadata", err)
return analysisReposRequested, nil
}
h.metadataCache = results
h.cacheLastUpdated = time.Now()
}
cachedResults := h.metadataCache
h.cacheMutex.Unlock()
// Compare against requested repos
repoSet := make(map[string]struct{})
for _, result := range cachedResults {
repoSet[result.Projname] = struct{}{}
}
for _, reqRepo := range analysisReposRequested {
repoKey := fmt.Sprintf("%s/%s", reqRepo.Owner, reqRepo.Repo)
if _, exists := repoSet[repoKey]; exists {
foundRepos = append(foundRepos, reqRepo)
} else {
notFoundRepos = append(notFoundRepos, reqRepo)
}
}
return notFoundRepos, foundRepos
}
func extractDatabaseFromTar(tarStream io.Reader) ([]byte, bool, error) {
/*
Input: tarStream ∈ GZIP(TAR(Files))
Find f ∈ Files | name(f) = "artifacts/codeql_database.zip"
if ∃ f → (bytes(f), true, nil)
if ¬∃ f → (nil, false, nil)
if error → (nil, false, error)
*/
gzReader, err := gzip.NewReader(tarStream)
if err != nil {
slog.Error("failed to open gzip stream", "error", err)
return nil, false, fmt.Errorf("failed to open gzip stream: %w", err)
}
defer gzReader.Close()
tarReader := tar.NewReader(gzReader)
for {
hdr, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
slog.Error("failed to read tar entry", "error", err)
return nil, false, fmt.Errorf("failed to read tar entry: %w", err)
}
if hdr.Name == "artifacts/codeql_database.zip" {
var buf bytes.Buffer
if _, err := io.Copy(&buf, tarReader); err != nil {
slog.Error("failed to extract zip from tar", "error", err)
return nil, false, fmt.Errorf("failed to extract zip from tar: %w", err)
}
return buf.Bytes(), true, nil
}
}
return nil, false, nil // not found
}
func (h *HepcStore) GetDatabase(location common.NameWithOwner) ([]byte, error) {
/*
Input:
location = (owner, repo)
key := owner + "/" + repo
Step 1 — Ensure metadata cache:
if now h.cacheLastUpdated > h.cacheDuration:
h.metadataCache := fetchMetadata()
h.cacheLastUpdated := now
else:
use h.metadataCache
if fetchMetadata fails → (nil, error)
Step 2 — Lookup URL:
if ∃ r ∈ h.metadataCache | r.Projname = key → resultURL := r.ResultURL
if ¬∃ r → return (nil, "not found")
Step 3 — Download:
GET replaceHepcURL(resultURL) → resp
if status ≠ 200 → (nil, "bad HTTP")
body := ReadAll(resp.Body)
if error → return (nil, error)
Step 4 — Detect + Decode:
if hasGzipHeader(body):
extractDatabaseFromTar(body) → (data, found, err)
if err → (nil, err)
if ¬found → (nil, "zip not found")
→ (data, nil)
else:
→ (body, nil)
*/
h.cacheMutex.Lock()
if time.Since(h.cacheLastUpdated) > h.cacheDuration {
results, err := h.fetchMetadata()
if err != nil {
slog.Error("error refreshing metadata cache", "error", err)
h.cacheMutex.Unlock()
return nil, fmt.Errorf("error refreshing metadata cache: %w", err)
}
h.metadataCache = results
h.cacheLastUpdated = time.Now()
}
cachedResults := h.metadataCache
h.cacheMutex.Unlock()
key := fmt.Sprintf("%s/%s", location.Owner, location.Repo)
var resultURL string
for _, result := range cachedResults {
if result.Projname == key {
resultURL = result.ResultURL
break
}
}
if resultURL == "" {
slog.Error("database not found in metadata", "repo", key)
return nil, fmt.Errorf("database not found for repository: %s", key)
}
resp, err := http.Get(replaceHepcURL(resultURL))
if err != nil {
slog.Error("failed to fetch database", "url", resultURL, "error", err)
return nil, fmt.Errorf("error fetching database: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
slog.Error("non-OK HTTP status", "status", resp.Status, "url", resultURL)
return nil, fmt.Errorf("non-OK HTTP status for database fetch: %s", resp.Status)
}
// Buffer the full stream into RAM
fullBody, err := io.ReadAll(resp.Body)
if err != nil {
slog.Error("error reading full database stream into memory", "error", err)
return nil, fmt.Errorf("error reading database content: %w", err)
}
// The input could be the codeql db as zip, or a tar stream containing the zip;
// If gzip header is found, treat the input as a tar+gz archive
// Check for gzip magic number (0x1F 0x8B)
isGzip := len(fullBody) >= 2 && fullBody[0] == 0x1F && fullBody[1] == 0x8B
if isGzip {
// Extract zip data from tar+gz archive
data, found, err := extractDatabaseFromTar(bytes.NewReader(fullBody))
if err != nil {
slog.Error("error extracting from tar stream", "error", err)
return nil, err
}
if !found {
slog.Warn("tar archive read succeeded, but zip entry not found")
return nil, fmt.Errorf("zip file not found in tar archive")
} else {
return data, nil
}
}
// Treat input as raw zip file content
slog.Info("no gzip header found; assuming raw zip content")
return fullBody, nil
}
// replaceHepcURL replaces the fixed "http://hepc" with the value from
// MRVA_HEPC_ENDPOINT
func replaceHepcURL(originalURL string) string {
hepcEndpoint := os.Getenv("MRVA_HEPC_ENDPOINT")
if hepcEndpoint == "" {
hepcEndpoint = "http://hepc:8070" // Default fallback
}
// Replace "http://hepc" at the beginning of the URL
newURL := strings.Replace(originalURL, "http://hepc", hepcEndpoint, 1)
return newURL
}

View File

@@ -2,7 +2,7 @@ package qldbstore
import (
"fmt"
"mrvacommander/pkg/common"
"github.com/hohn/mrvacommander/pkg/common"
"os"
"path/filepath"
)

View File

@@ -1,99 +0,0 @@
package qldbstore
import (
"context"
"fmt"
"io"
"log/slog"
"mrvacommander/pkg/common"
"github.com/minio/minio-go/v7"
"github.com/minio/minio-go/v7/pkg/credentials"
)
// XX: static types: split by type?
// Restrict the keys / values and centralize the common ones here
const (
QL_DB_BUCKETNAME = "qldb"
)
type MinIOCodeQLDatabaseStore struct {
client *minio.Client
bucketName string
}
func NewMinIOCodeQLDatabaseStore(endpoint, id, secret string) (*MinIOCodeQLDatabaseStore, error) {
minioClient, err := minio.New(endpoint, &minio.Options{
Creds: credentials.NewStaticV4(id, secret, ""),
Secure: false,
})
if err != nil {
return nil, err
}
slog.Info("Connected to MinIO CodeQL database store server")
err = common.CreateMinIOBucketIfNotExists(minioClient, QL_DB_BUCKETNAME)
if err != nil {
return nil, fmt.Errorf("could not create bucket: %v", err)
}
return &MinIOCodeQLDatabaseStore{
client: minioClient,
bucketName: QL_DB_BUCKETNAME,
}, nil
}
func (store *MinIOCodeQLDatabaseStore) FindAvailableDBs(analysisReposRequested []common.NameWithOwner) (
notFoundRepos []common.NameWithOwner,
foundRepos []common.NameWithOwner) {
for _, repo := range analysisReposRequested {
status := store.haveDatabase(repo)
if status {
foundRepos = append(foundRepos, repo)
} else {
notFoundRepos = append(notFoundRepos, repo)
}
}
return notFoundRepos, foundRepos
}
func (store *MinIOCodeQLDatabaseStore) GetDatabase(location common.NameWithOwner) ([]byte, error) {
key := fmt.Sprintf("%s$%s.zip", location.Owner, location.Repo)
object, err := store.client.GetObject(context.Background(),
store.bucketName,
key,
minio.GetObjectOptions{})
if err != nil {
return nil, err
}
defer object.Close()
data, err := io.ReadAll(object)
if err != nil {
return nil, err
}
return data, nil
}
func (store *MinIOCodeQLDatabaseStore) haveDatabase(location common.NameWithOwner) bool {
objectName := fmt.Sprintf("%s$%s.zip", location.Owner, location.Repo)
// Check if the object exists
_, err := store.client.StatObject(context.Background(),
store.bucketName,
objectName,
minio.StatObjectOptions{})
if err != nil {
if minio.ToErrorResponse(err).Code == "NoSuchKey" {
slog.Info("No database found for", location)
return false
}
slog.Info("General database error while checking for", location)
return false
}
return true
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"sync"
"time"
amqp "github.com/rabbitmq/amqp091-go"
@@ -15,6 +16,9 @@ type RabbitMQQueue struct {
results chan AnalyzeResult
conn *amqp.Connection
channel *amqp.Channel
mu sync.Mutex
connString string
}
// NewRabbitMQQueue initializes a RabbitMQ queue.
@@ -89,10 +93,12 @@ func NewRabbitMQQueue(
}
result := RabbitMQQueue{
conn: conn,
channel: ch,
jobs: make(chan AnalyzeJob),
results: make(chan AnalyzeResult),
conn: conn,
channel: ch,
jobs: make(chan AnalyzeJob),
results: make(chan AnalyzeResult),
mu: sync.Mutex{},
connString: rabbitMQURL,
}
if isAgent {
@@ -125,34 +131,96 @@ func (q *RabbitMQQueue) Close() {
q.conn.Close()
}
func (q *RabbitMQQueue) ConsumeJobs(queueName string) {
autoAck := false
msgs, err := q.channel.Consume(queueName, "", autoAck, false, false, false, nil)
func (q *RabbitMQQueue) reconnectIfNeeded() error {
q.mu.Lock()
defer q.mu.Unlock()
if err != nil {
slog.Error("failed to consume from queue", slog.Any("error", err))
if q.conn != nil && !q.conn.IsClosed() && q.channel != nil {
return nil // still valid
}
for msg := range msgs {
// Process message
job := AnalyzeJob{}
err := json.Unmarshal(msg.Body, &job)
if err != nil {
slog.Error("failed to unmarshal job", slog.Any("error", err))
// Recreate everything
conn, err := amqp.Dial(q.connString)
if err != nil {
return fmt.Errorf("failed to reconnect: %w", err)
}
ch, err := conn.Channel()
if err != nil {
conn.Close()
return fmt.Errorf("failed to open channel: %w", err)
}
// Optional: redeclare queues here
// _, _ = ch.QueueDeclare(...)
q.conn = conn
q.channel = ch
return nil
}
func (q *RabbitMQQueue) invalidateConnection() {
q.mu.Lock()
defer q.mu.Unlock()
if q.channel != nil {
_ = q.channel.Close()
}
if q.conn != nil {
_ = q.conn.Close()
}
q.channel = nil
q.conn = nil
}
func (q *RabbitMQQueue) ConsumeJobs(queueName string) {
const pollInterval = 5 * time.Second
// | scenario | result |
// |-------------------+---------------------------------------|
// | Queue is empty | msg = zero, ok = false, err = nil |
// | Queue has message | msg = valid, ok = true, err = nil |
// | Connection lost | msg = zero, ok = false, err = non-nil |
for {
if err := q.reconnectIfNeeded(); err != nil {
slog.Error("failed to reconnect", slog.Any("error", err))
time.Sleep(10 * time.Second)
continue
}
msg, ok, err := q.channel.Get(queueName, false) // false = manual ack
if err != nil {
slog.Error("polling error while getting job", slog.Any("error", err))
q.invalidateConnection()
time.Sleep(pollInterval)
continue
}
if !ok {
// No message in queue
time.Sleep(pollInterval)
continue
}
var job AnalyzeJob
if err := json.Unmarshal(msg.Body, &job); err != nil {
slog.Error("failed to unmarshal job", slog.Any("error", err))
_ = msg.Nack(false, false) // do not requeue
continue
}
// Send job to channel for processing
q.jobs <- job
// Acknowledge the message after successful processing
err = msg.Ack(false)
if err != nil {
slog.Error("Failed to acknowledge job consumption message",
slog.Any("error", err))
// Acknowledge successful processing
if err := msg.Ack(false); err != nil {
slog.Error("failed to ack job message", slog.Any("error", err))
continue
}
}
close(q.jobs)
}
func (q *RabbitMQQueue) PublishResults(queueName string) {
@@ -247,30 +315,31 @@ func (q *RabbitMQQueue) PublishJobs(queueName string) {
}
func (q *RabbitMQQueue) ConsumeResults(queueName string) {
autoAck := false
msgs, err := q.channel.Consume(queueName, "", autoAck, false, false, false, nil)
if err != nil {
slog.Error("failed to register a consumer", slog.Any("error", err))
}
autoAck := false // false = manual ack
sleepFor := 5 // polling interval
for msg := range msgs {
// Process message
result := AnalyzeResult{}
err := json.Unmarshal(msg.Body, &result)
for {
msg, ok, err := q.channel.Get(queueName, autoAck)
if err != nil {
slog.Error("failed to unmarshal result", slog.Any("error", err))
slog.Error("poll error", slog.Any("err", err))
time.Sleep(time.Duration(sleepFor) * time.Second)
continue
}
if !ok {
// no message
time.Sleep(time.Duration(sleepFor) * time.Second)
continue
}
var result AnalyzeResult
if err := json.Unmarshal(msg.Body, &result); err != nil {
slog.Error("unmarshal error", slog.Any("err", err))
_ = msg.Nack(false, false) // finish .Get() with nack
continue
}
q.results <- result
// Acknowledge the message after successful processing
err = msg.Ack(false)
if err != nil {
slog.Error("Failed to acknowledge result consumption message",
slog.Any("error", err))
continue
}
_ = msg.Ack(false) // finish .Get() with nack
}
close(q.results)
}

View File

@@ -1,8 +1,8 @@
package queue
import (
"mrvacommander/pkg/artifactstore"
"mrvacommander/pkg/common"
"github.com/hohn/mrvacommander/pkg/artifactstore"
"github.com/hohn/mrvacommander/pkg/common"
)
type QueryLanguage string

View File

@@ -14,10 +14,10 @@ import (
"strings"
"time"
"mrvacommander/pkg/artifactstore"
"mrvacommander/pkg/common"
"mrvacommander/pkg/queue"
"mrvacommander/utils"
"github.com/hohn/mrvacommander/pkg/artifactstore"
"github.com/hohn/mrvacommander/pkg/common"
"github.com/hohn/mrvacommander/pkg/queue"
"github.com/hohn/mrvacommander/utils"
"github.com/gorilla/mux"
)
@@ -41,7 +41,7 @@ func (c *CommanderSingle) startAnalyses(
QueryLanguage: queryLanguage,
}
c.v.Queue.Jobs() <- info
c.v.State.SetStatus(jobSpec, common.StatusQueued)
c.v.State.SetStatus(jobSpec, common.StatusPending)
c.v.State.AddJob(info)
}
}
@@ -132,7 +132,7 @@ func (c *CommanderSingle) submitEmptyStatusResponse(w http.ResponseWriter,
scannedRepos := []common.ScannedRepo{}
var jobStatus common.Status
jobStatus = common.StatusSuccess
jobStatus = common.StatusSucceeded
status := common.StatusResponse{
SessionId: jsSessionID,
@@ -176,9 +176,9 @@ func (c *CommanderSingle) submitStatusResponse(w http.ResponseWriter, js common.
}
// Loop through all jobs under the same session id
// TODO: as a high priority, fix this hacky job IDing by index
// this may break with other state implementations
for jobRepoId, job := range jobs {
// fix
for _, job := range jobs {
// Get the job status
status, err := c.v.State.GetStatus(job.Spec)
if err != nil {
@@ -191,7 +191,7 @@ func (c *CommanderSingle) submitStatusResponse(w http.ResponseWriter, js common.
var artifactSize int
var resultCount int
if status != common.StatusSuccess {
if status != common.StatusSucceeded {
// If the job is not successful, we don't need to get the result
artifactSize = 0
resultCount = 0
@@ -210,6 +210,8 @@ func (c *CommanderSingle) submitStatusResponse(w http.ResponseWriter, js common.
}
resultCount = jobResult.ResultCount
}
// Get jobRepoID from (owner,repo)
jobRepoId := c.v.State.GetRepoId(job.Spec.NameWithOwner)
// Append all scanned (complete and incomplete) repos to the response
scannedRepos = append(scannedRepos,
@@ -326,11 +328,13 @@ func (c *CommanderSingle) MRVAStatus(w http.ResponseWriter, r *http.Request) {
}
// Download artifacts
func (c *CommanderSingle) MRVADownloadArtifactCommon(w http.ResponseWriter, r *http.Request, jobRepoId int, jobSpec common.JobSpec) {
func (c *CommanderSingle) MRVADownloadArtifactCommon(w http.ResponseWriter,
r *http.Request, jobRepoId int, jobSpec common.JobSpec) {
slog.Debug("MRVA artifact download",
"codeql_variant_analysis_id", jobSpec.SessionID,
"repo_owner", jobSpec.NameWithOwner.Owner,
"repo_name", jobSpec.NameWithOwner.Repo,
"jobRepoId", jobRepoId,
)
c.sendArtifactDownloadResponse(w, jobRepoId, jobSpec)
@@ -424,7 +428,8 @@ func (c *CommanderSingle) MRVADownloadArtifact(w http.ResponseWriter, r *http.Re
c.MRVADownloadArtifactCommon(w, r, -1, jobSpec)
}
func (c *CommanderSingle) sendArtifactDownloadResponse(w http.ResponseWriter, jobRepoId int, jobSpec common.JobSpec) {
func (c *CommanderSingle) sendArtifactDownloadResponse(w http.ResponseWriter,
jobRepoId int, jobSpec common.JobSpec) {
var response common.DownloadResponse
slog.Debug("Forming download response", "job", jobSpec)
@@ -436,7 +441,7 @@ func (c *CommanderSingle) sendArtifactDownloadResponse(w http.ResponseWriter, jo
return
}
if jobStatus == common.StatusSuccess {
if jobStatus == common.StatusSucceeded {
jobResult, err := c.v.State.GetResult(jobSpec)
if err != nil {
slog.Error(err.Error())
@@ -511,6 +516,8 @@ func (c *CommanderSingle) sendArtifactDownloadResponse(w http.ResponseWriter, jo
return
}
slog.Debug("MRVA: Sending download response", "responseJson", responseJson)
// Send analysisReposJSON via ResponseWriter
w.Header().Set("Content-Type", "application/json")
w.Write(responseJson)

View File

@@ -1,11 +1,11 @@
package server
import (
"mrvacommander/pkg/artifactstore"
"mrvacommander/pkg/common"
"mrvacommander/pkg/qldbstore"
"mrvacommander/pkg/queue"
"mrvacommander/pkg/state"
"github.com/hohn/mrvacommander/pkg/artifactstore"
"github.com/hohn/mrvacommander/pkg/common"
"github.com/hohn/mrvacommander/pkg/qldbstore"
"github.com/hohn/mrvacommander/pkg/queue"
"github.com/hohn/mrvacommander/pkg/state"
)
type SessionInfo struct {

View File

@@ -1,8 +1,8 @@
package state
import (
"mrvacommander/pkg/common"
"mrvacommander/pkg/queue"
"github.com/hohn/mrvacommander/pkg/common"
"github.com/hohn/mrvacommander/pkg/queue"
)
// StorageInterface defines the methods required for managing storage operations
@@ -18,6 +18,9 @@ type ServerState interface {
// TODO: fix this hacky logic
GetJobSpecByRepoId(sessionId int, jobRepoId int) (common.JobSpec, error)
// The repo id is uniquely determined by NameWithOwner
GetRepoId(owner common.NameWithOwner) int
// SetResult stores the analysis result for the specified session ID and repository.
SetResult(js common.JobSpec, ar queue.AnalyzeResult)

View File

@@ -3,9 +3,10 @@ package state
import (
"fmt"
"log/slog"
"mrvacommander/pkg/common"
"mrvacommander/pkg/queue"
"sync"
"github.com/hohn/mrvacommander/pkg/common"
"github.com/hohn/mrvacommander/pkg/queue"
)
type LocalState struct {
@@ -97,7 +98,7 @@ func (s *LocalState) GetStatus(js common.JobSpec) (common.Status, error) {
s.mutex.Lock()
defer s.mutex.Unlock()
if _, ok := s.status[js]; !ok {
return common.StatusError, fmt.Errorf("status not found for job spec %v", js)
return common.StatusFailed, fmt.Errorf("status not found for job spec %v", js)
}
return s.status[js], nil
}

465
pkg/state/state_postgres.go Normal file
View File

@@ -0,0 +1,465 @@
package state
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"os"
"github.com/hohn/mrvacommander/pkg/common"
"github.com/hohn/mrvacommander/pkg/queue"
"github.com/jackc/pgx/v5/pgxpool"
)
// ----- PGState holds the shared connection pool
type PGState struct {
pool *pgxpool.Pool
}
func validateEnvVars(requiredEnvVars []string) {
missing := false
for _, envVar := range requiredEnvVars {
if _, ok := os.LookupEnv(envVar); !ok {
slog.Error("Missing required environment variable", "key", envVar)
missing = true
}
}
if missing {
os.Exit(1)
}
}
func NewPGState() *PGState {
ctx := context.Background()
required := []string{
"POSTGRES_USER",
"POSTGRES_PASSWORD",
"POSTGRES_DB",
// Host & port may be omitted if you rely on Docker DNS, but list
// them here to make the requirement explicit:
"POSTGRES_HOST",
"POSTGRES_PORT",
}
validateEnvVars(required)
// Assemble from vars
user := os.Getenv("POSTGRES_USER")
pass := os.Getenv("POSTGRES_PASSWORD")
host := os.Getenv("POSTGRES_HOST")
port := os.Getenv("POSTGRES_PORT")
db := os.Getenv("POSTGRES_DB")
dbURL := fmt.Sprintf("postgres://%s:%s@%s:%s/%s", user, pass, host, port, db)
slog.Info("Assembled Postgres connection URL from POSTGRES_* variables", "url", dbURL)
config, err := pgxpool.ParseConfig(dbURL)
if err != nil {
slog.Error("Failed to parse connection URL", "url", dbURL, "error", err)
os.Exit(1)
}
config.MaxConns = 10
pool, err := pgxpool.NewWithConfig(ctx, config)
if err != nil {
slog.Error("Failed to create pgx pool", "error", err)
os.Exit(1)
}
slog.Info("Connected to Postgres", "max_conns", config.MaxConns)
SetupSchemas(pool)
return &PGState{pool: pool}
}
func SetupSchemas(pool *pgxpool.Pool) {
ctx := context.Background()
schemas := []struct {
name string
sql string
}{
{
name: "job_repo_map",
sql: `
CREATE TABLE IF NOT EXISTS job_repo_map (
job_repo_id SERIAL PRIMARY KEY,
owner TEXT NOT NULL,
repo TEXT NOT NULL,
UNIQUE(owner, repo)
);
`,
},
{
name: "session_id_seq",
sql: `
CREATE SEQUENCE IF NOT EXISTS session_id_seq;
`,
},
{
name: "analyze_results",
sql: `
CREATE TABLE IF NOT EXISTS analyze_results (
session_id INTEGER NOT NULL,
owner TEXT NOT NULL,
repo TEXT NOT NULL,
result JSONB NOT NULL,
PRIMARY KEY (session_id, owner, repo)
);
`,
},
{
name: "analyze_jobs",
sql: `
CREATE TABLE IF NOT EXISTS analyze_jobs (
session_id INTEGER NOT NULL,
owner TEXT NOT NULL,
repo TEXT NOT NULL,
payload JSONB NOT NULL,
PRIMARY KEY (session_id, owner, repo)
);
`,
},
{
name: "job_info",
sql: `
CREATE TABLE IF NOT EXISTS job_info (
session_id INTEGER NOT NULL,
owner TEXT NOT NULL,
repo TEXT NOT NULL,
payload JSONB NOT NULL,
PRIMARY KEY (session_id, owner, repo)
);
`,
},
{
name: "job_status",
sql: `
CREATE TABLE IF NOT EXISTS job_status (
session_id INTEGER NOT NULL,
owner TEXT NOT NULL,
repo TEXT NOT NULL,
status INTEGER NOT NULL,
PRIMARY KEY (session_id, owner, repo)
);
`,
},
}
for _, schema := range schemas {
_, err := pool.Exec(ctx, schema.sql)
if err != nil {
slog.Error("Failed to create table", "table", schema.name, "error", err)
os.Exit(1)
}
slog.Info("Schema initialized", "table", schema.name)
}
}
// ----- Sequence-based NextID (implements ServerState)
func (s *PGState) NextID() int {
ctx := context.Background()
var id int
err := s.pool.QueryRow(ctx, `SELECT nextval('session_id_seq')`).Scan(&id)
if err != nil {
slog.Error("NextID query failed", "error", err)
panic("NextID(): " + err.Error()) // interface doesn't allow returning error
}
slog.Debug("NextID generated", "id", id)
return id
}
func (s *PGState) SetResult(js common.JobSpec, ar queue.AnalyzeResult) {
ctx := context.Background()
ar.Spec = js // ensure internal consistency
jsonBytes, err := json.Marshal(ar)
if err != nil {
slog.Error("SetResult: JSON marshal failed", "job", js, "error", err)
panic("SetResult(): " + err.Error())
}
_, err = s.pool.Exec(ctx, `
INSERT INTO analyze_results (session_id, owner, repo, result)
VALUES ($1, $2, $3, $4)
ON CONFLICT (session_id, owner, repo)
DO UPDATE SET result = EXCLUDED.result
`, js.SessionID, js.Owner, js.Repo, jsonBytes)
if err != nil {
slog.Error("SetResult: insert/update failed", "job", js, "error", err)
panic("SetResult(): " + err.Error())
}
}
func (s *PGState) GetResult(js common.JobSpec) (queue.AnalyzeResult, error) {
ctx := context.Background()
var jsonBytes []byte
err := s.pool.QueryRow(ctx, `
SELECT result FROM analyze_results
WHERE session_id = $1 AND owner = $2 AND repo = $3
`, js.SessionID, js.Owner, js.Repo).Scan(&jsonBytes)
if err != nil {
return queue.AnalyzeResult{}, err
}
var ar queue.AnalyzeResult
if err := json.Unmarshal(jsonBytes, &ar); err != nil {
return queue.AnalyzeResult{}, fmt.Errorf("unmarshal AnalyzeResult: %w", err)
}
return ar, nil
}
func (s *PGState) SetJobInfo(js common.JobSpec, ji common.JobInfo) {
ctx := context.Background()
jiJSON, err := json.Marshal(ji)
if err != nil {
slog.Error("SetJobInfo: marshal failed", "job", js, "error", err)
panic("SetJobInfo(): " + err.Error())
}
_, err = s.pool.Exec(ctx, `
INSERT INTO job_info (session_id, owner, repo, payload)
VALUES ($1, $2, $3, $4)
ON CONFLICT (session_id, owner, repo)
DO UPDATE SET payload = EXCLUDED.payload
`, js.SessionID, js.Owner, js.Repo, jiJSON)
if err != nil {
slog.Error("SetJobInfo: insert/update failed", "job", js, "error", err)
panic("SetJobInfo(): " + err.Error())
}
}
func (s *PGState) GetJobInfo(js common.JobSpec) (common.JobInfo, error) {
ctx := context.Background()
var jsonBytes []byte
err := s.pool.QueryRow(ctx, `
SELECT payload FROM job_info
WHERE session_id = $1 AND owner = $2 AND repo = $3
`, js.SessionID, js.Owner, js.Repo).Scan(&jsonBytes)
if err != nil {
return common.JobInfo{}, err
}
var ji common.JobInfo
if err := json.Unmarshal(jsonBytes, &ji); err != nil {
return common.JobInfo{}, fmt.Errorf("unmarshal JobInfo: %w", err)
}
return ji, nil
}
func (s *PGState) SetStatus(js common.JobSpec, status common.Status) {
ctx := context.Background()
_, err := s.pool.Exec(ctx, `
INSERT INTO job_status (session_id, owner, repo, status)
VALUES ($1, $2, $3, $4)
ON CONFLICT (session_id, owner, repo)
DO UPDATE SET status = EXCLUDED.status
`, js.SessionID, js.Owner, js.Repo, status)
if err != nil {
slog.Error("SetStatus failed", "job", js, "status", status, "error", err)
panic("SetStatus(): " + err.Error())
}
}
func (s *PGState) GetSessionStatus(sessionID int) (common.StatusSummary, error) {
ctx := context.Background()
rows, err := s.pool.Query(ctx, `
SELECT status
FROM job_status
WHERE session_id = $1
`, sessionID)
if err != nil {
return common.StatusSummary{}, err
}
defer rows.Close()
counts := map[common.Status]int{
common.StatusPending: 0,
common.StatusInProgress: 0,
common.StatusSucceeded: 0,
common.StatusFailed: 0,
common.StatusCanceled: 0,
common.StatusTimedOut: 0,
}
total := 0
for rows.Next() {
var st int
if err := rows.Scan(&st); err != nil {
return common.StatusSummary{}, err
}
counts[common.Status(st)]++
total++
}
// apply deterministic rules
var overall common.Status
switch {
case counts[common.StatusSucceeded] == total:
overall = common.StatusSucceeded
case counts[common.StatusFailed] == total:
overall = common.StatusFailed
case counts[common.StatusCanceled] == total:
overall = common.StatusCanceled
case counts[common.StatusTimedOut] == total:
overall = common.StatusFailed
case counts[common.StatusInProgress] > 0:
overall = common.StatusPending
case counts[common.StatusPending] > 0 && counts[common.StatusInProgress] == 0:
overall = common.StatusPending
case counts[common.StatusPending] == 0 && counts[common.StatusInProgress] == 0:
overall = common.StatusSucceeded // covers mixed complete
default:
overall = common.StatusPending
}
return common.StatusSummary{Overall: overall, Counts: counts}, nil
}
func (s *PGState) GetStatus(js common.JobSpec) (common.Status, error) {
summary, err := s.GetSessionStatus(js.SessionID)
if err != nil {
return 0, err
}
return summary.Overall, nil
}
// GetRepoId returns a stable unique ID for a given (owner, repo).
// If the pair doesn't exist, it is inserted atomically.
func (s *PGState) GetRepoId(cno common.NameWithOwner) int {
ctx := context.Background()
var jobRepoID int
err := s.pool.QueryRow(ctx, `
INSERT INTO job_repo_map (owner, repo)
VALUES ($1, $2)
ON CONFLICT (owner, repo) DO UPDATE SET owner = EXCLUDED.owner
RETURNING job_repo_id
`, cno.Owner, cno.Repo).Scan(&jobRepoID)
if err != nil {
slog.Error("GetRepoId failed", "NameWithOwner", cno, "error", err)
panic("GetRepoId: " + err.Error())
}
return jobRepoID
}
func (s *PGState) AddJob(job queue.AnalyzeJob) {
ctx := context.Background()
js := job.Spec
// Begin transaction for atomic operation
tx, err := s.pool.Begin(ctx)
if err != nil {
slog.Error("AddJob: failed to begin transaction", "job", js, "error", err)
panic("AddJob(): " + err.Error())
}
defer tx.Rollback(ctx) // Will be ignored if tx.Commit() succeeds
// 1. Store AnalyzeJob payload -------------------------------
jb, err := json.Marshal(job)
if err != nil {
slog.Error("AddJob: marshal failed", "job", js, "error", err)
panic("AddJob(): " + err.Error())
}
_, err = tx.Exec(ctx, `
INSERT INTO analyze_jobs (session_id, owner, repo, payload)
VALUES ($1, $2, $3, $4)
ON CONFLICT DO NOTHING
`, js.SessionID, js.Owner, js.Repo, jb)
if err != nil {
slog.Error("AddJob: insert analyze_jobs failed", "job", js, "error", err)
panic("AddJob(): " + err.Error())
}
// 2. Get job_repo_id
jobRepoID := s.GetRepoId(job.Spec.NameWithOwner)
// Commit the transaction
if err = tx.Commit(ctx); err != nil {
slog.Error("AddJob: failed to commit transaction", "job", js, "error", err)
panic("AddJob(): " + err.Error())
}
slog.Debug("AddJob stored", "session", js.SessionID, "jobRepoId", jobRepoID, "owner", js.Owner, "repo", js.Repo)
}
func (s *PGState) GetJobList(sessionId int) ([]queue.AnalyzeJob, error) {
ctx := context.Background()
rows, err := s.pool.Query(ctx, `
SELECT payload FROM analyze_jobs
WHERE session_id = $1
ORDER BY owner, repo
`, sessionId)
if err != nil {
slog.Error("GetJobList: query failed", "session_id", sessionId, "error", err)
return nil, err
}
defer rows.Close()
var jobs []queue.AnalyzeJob
for rows.Next() {
var jsonBytes []byte
if err := rows.Scan(&jsonBytes); err != nil {
slog.Error("GetJobList: scan failed", "error", err)
return nil, err
}
var job queue.AnalyzeJob
if err := json.Unmarshal(jsonBytes, &job); err != nil {
slog.Error("GetJobList: unmarshal failed", "error", err)
return nil, err
}
jobs = append(jobs, job)
}
if err := rows.Err(); err != nil {
slog.Error("GetJobList: rows iteration failed", "error", err)
return nil, err
}
return jobs, nil
}
func (s *PGState) GetJobSpecByRepoId(sessionId, jobRepoId int) (common.JobSpec, error) {
ctx := context.Background()
var owner, repo string
err := s.pool.QueryRow(ctx, `
SELECT owner, repo
FROM job_repo_map
WHERE job_repo_id = $1
`, jobRepoId).Scan(&owner, &repo)
if err != nil {
return common.JobSpec{}, err
}
return common.JobSpec{
SessionID: sessionId,
NameWithOwner: common.NameWithOwner{
Owner: owner,
Repo: repo,
},
}, nil
}