Compare commits
52 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 47de30a56e | |||
|
|
d2c7b98d1c | ||
| fb5adf1b5f | |||
|
|
750187fb12 | ||
|
|
807d5f3d45 | ||
|
|
1377d4cec9 | ||
| ec8bb0cc63 | |||
| 8d7aa780ed | |||
|
|
43a7143e27 | ||
| 0d6e31713f | |||
| a2cfe0676c | |||
| f920a799d3 | |||
| 41146f5aaf | |||
| 173a61e3fa | |||
| e294fcdf4f | |||
|
|
9fe6aed357 | ||
|
|
3762654ef2 | ||
| d94f69be09 | |||
| 1fd220416c | |||
| df97e6ef10 | |||
| 2e99bdfedf | |||
| a507797eff | |||
|
|
0115e74d07 | ||
|
|
8577e1775a | ||
|
|
8590bd6de7 | ||
|
|
cf37b474e4 | ||
|
|
5bdbd60cc5 | ||
|
|
bde8ac2db7 | ||
|
|
75e57dc0a8 | ||
|
|
c32ff755ef | ||
|
|
19a936087f | ||
|
|
bb6189322a | ||
|
|
f7dc5318e4 | ||
|
|
70c06e4fae | ||
|
|
a2be014b2f | ||
|
|
58f4fe1ca7 | ||
|
|
14d6057248 | ||
|
|
01ddf38069 | ||
|
|
47a021d84a | ||
|
|
8d4c766e8c | ||
|
|
2409728960 | ||
|
|
f066c767e2 | ||
|
|
397b86c735 | ||
|
|
511c544f6e | ||
|
|
bd74ed646f | ||
|
|
45e40abf5d | ||
|
|
a3593cbba2 | ||
|
|
a0185df9d5 | ||
|
|
23e3ea9367 | ||
|
|
4140eaafc4 | ||
|
|
3e47bd4adb | ||
|
|
f92dfc89a2 |
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1,2 +1,3 @@
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.blob filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
10
.gitignore
vendored
10
.gitignore
vendored
@@ -57,3 +57,13 @@ notes/*.html
|
||||
|
||||
# Make timestamp files
|
||||
mk.*
|
||||
demo/containers/dbsdata/data/
|
||||
demo/containers/dbsdata/tmp.dbsdata_backup.tar
|
||||
client/qldbtools/db-collection-py-1/
|
||||
|
||||
mrva-overview.aux
|
||||
mrva-overview.log
|
||||
mrva-overview.synctex.gz
|
||||
mrva-overview.toc
|
||||
|
||||
auto/
|
||||
|
||||
@@ -3,6 +3,13 @@
|
||||
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
|
||||
DB.
|
||||
"""
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "pandas",
|
||||
# "numpy",
|
||||
# "minio",
|
||||
# ]
|
||||
# ///
|
||||
import argparse
|
||||
import qldbtools.utils as utils
|
||||
import logging
|
||||
|
||||
138
client/qldbtools/pyproject.toml
Normal file
138
client/qldbtools/pyproject.toml
Normal file
@@ -0,0 +1,138 @@
|
||||
[project]
|
||||
name = "qldbtools"
|
||||
version = "0.1.0"
|
||||
description = "A Python package for selecting sets of CodeQL databases to work on"
|
||||
authors = [
|
||||
{name = "Michael Hohn", email = "hohn@github.com"}
|
||||
]
|
||||
readme = {file = "README.org", content-type = "text/plain"}
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"annotated-types>=0.7.0",
|
||||
"anyio>=4.4.0",
|
||||
"appnope>=0.1.4",
|
||||
"argon2-cffi>=23.1.0",
|
||||
"argon2-cffi-bindings>=21.2.0",
|
||||
"arrow>=1.3.0",
|
||||
"asttokens>=2.4.1",
|
||||
"async-lru>=2.0.4",
|
||||
"attrs>=24.2.0",
|
||||
"babel>=2.16.0",
|
||||
"beautifulsoup4>=4.12.3",
|
||||
"bleach>=6.1.0",
|
||||
"blinker>=1.9.0",
|
||||
"certifi>=2024.7.4",
|
||||
"cffi>=1.17.0",
|
||||
"charset-normalizer>=3.3.2",
|
||||
"click>=8.1.7",
|
||||
"comm>=0.2.2",
|
||||
"debugpy>=1.8.5",
|
||||
"decorator>=5.1.1",
|
||||
"defusedxml>=0.7.1",
|
||||
"executing>=2.0.1",
|
||||
"fastapi>=0.115.5",
|
||||
"fastjsonschema>=2.20.0",
|
||||
"flask>=3.1.0",
|
||||
"fqdn>=1.5.1",
|
||||
"h11>=0.14.0",
|
||||
"httpcore>=1.0.5",
|
||||
"httpx>=0.27.0",
|
||||
"idna>=3.7",
|
||||
"ipykernel>=6.29.5",
|
||||
"ipython>=8.26.0",
|
||||
"isoduration>=20.11.0",
|
||||
"itsdangerous>=2.2.0",
|
||||
"jedi>=0.19.1",
|
||||
"jinja2>=3.1.4",
|
||||
"json5>=0.9.25",
|
||||
"jsonpointer>=3.0.0",
|
||||
"jsonschema>=4.23.0",
|
||||
"jsonschema-specifications>=2023.12.1",
|
||||
"jupyter-events>=0.10.0",
|
||||
"jupyter-lsp>=2.2.5",
|
||||
"jupyter-client>=8.6.2",
|
||||
"jupyter-core>=5.7.2",
|
||||
"jupyter-server>=2.14.2",
|
||||
"jupyter-server-terminals>=0.5.3",
|
||||
"jupyterlab>=4.2.4",
|
||||
"jupyterlab-pygments>=0.3.0",
|
||||
"jupyterlab-server>=2.27.3",
|
||||
"lckr-jupyterlab-variableinspector",
|
||||
"markupsafe>=2.1.5",
|
||||
"matplotlib-inline>=0.1.7",
|
||||
"minio==7.2.8",
|
||||
"mistune>=3.0.2",
|
||||
"nbclient>=0.10.0",
|
||||
"nbconvert>=7.16.4",
|
||||
"nbformat>=5.10.4",
|
||||
"nest-asyncio>=1.6.0",
|
||||
"notebook-shim>=0.2.4",
|
||||
"numpy>=2.1.0",
|
||||
"overrides>=7.7.0",
|
||||
"packaging>=24.1",
|
||||
"pandas>=2.2.2",
|
||||
"pandocfilters>=1.5.1",
|
||||
"parso>=0.8.4",
|
||||
"pexpect>=4.9.0",
|
||||
"platformdirs>=4.2.2",
|
||||
"plumbum>=1.9.0",
|
||||
"prometheus-client>=0.20.0",
|
||||
"prompt-toolkit>=3.0.47",
|
||||
"psutil>=6.0.0",
|
||||
"ptyprocess>=0.7.0",
|
||||
"pure-eval>=0.2.3",
|
||||
"pycparser>=2.22",
|
||||
"pycryptodome>=3.20.0",
|
||||
"pydantic>=2.10.2",
|
||||
"pydantic-core>=2.27.1",
|
||||
"pygments>=2.18.0",
|
||||
"python-dateutil>=2.9.0.post0",
|
||||
"python-json-logger>=2.0.7",
|
||||
"pytz>=2024.1",
|
||||
"pyyaml>=6.0.2",
|
||||
"pyzmq>=26.1.1",
|
||||
"referencing>=0.35.1",
|
||||
"requests>=2.32.3",
|
||||
"rfc3339-validator>=0.1.4",
|
||||
"rfc3986-validator>=0.1.1",
|
||||
"rpds-py>=0.20.0",
|
||||
"send2trash>=1.8.3",
|
||||
"six>=1.16.0",
|
||||
"sniffio>=1.3.1",
|
||||
"soupsieve>=2.6",
|
||||
"stack-data>=0.6.3",
|
||||
"starlette>=0.41.3",
|
||||
"terminado>=0.18.1",
|
||||
"tinycss2>=1.3.0",
|
||||
"tornado>=6.4.1",
|
||||
"traitlets>=5.14.3",
|
||||
"types-python-dateutil>=2.9.0.20240821",
|
||||
"typing-extensions>=4.12.2",
|
||||
"tzdata>=2024.1",
|
||||
"uri-template>=1.3.0",
|
||||
"urllib3>=2.2.2",
|
||||
"uvicorn>=0.32.1",
|
||||
"wcwidth>=0.2.13",
|
||||
"webcolors>=24.8.0",
|
||||
"webencodings>=0.5.1",
|
||||
"websocket-client>=1.8.0",
|
||||
"werkzeug>=3.1.3",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=75.5.0", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = ["qldbtools"]
|
||||
script-files = [
|
||||
"bin/mc-db-generate-selection",
|
||||
"bin/mc-db-initial-info",
|
||||
"bin/mc-db-populate-minio",
|
||||
"bin/mc-db-refine-info",
|
||||
"bin/mc-db-unique",
|
||||
"bin/mc-db-view-info",
|
||||
"bin/mc-hepc-init",
|
||||
"bin/mc-hepc-serve",
|
||||
"bin/mc-rows-from-mrva-list",
|
||||
]
|
||||
2278
client/qldbtools/uv.lock
generated
Normal file
2278
client/qldbtools/uv.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,73 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"log"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
"github.com/hohn/mrvacommander/pkg/agent"
|
||||
"github.com/hohn/mrvacommander/pkg/deploy"
|
||||
)
|
||||
|
||||
func main() {
|
||||
slog.Info("Starting agent")
|
||||
workerCount := flag.Int("workers", 0, "number of workers")
|
||||
logLevel := flag.String("loglevel", "info", "Set log level: debug, info, warn, error")
|
||||
flag.Parse()
|
||||
|
||||
// Apply 'loglevel' flag
|
||||
switch *logLevel {
|
||||
case "debug":
|
||||
slog.SetLogLoggerLevel(slog.LevelDebug)
|
||||
case "info":
|
||||
slog.SetLogLoggerLevel(slog.LevelInfo)
|
||||
case "warn":
|
||||
slog.SetLogLoggerLevel(slog.LevelWarn)
|
||||
case "error":
|
||||
slog.SetLogLoggerLevel(slog.LevelError)
|
||||
default:
|
||||
log.Printf("Invalid logging verbosity level: %s", *logLevel)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
isAgent := true
|
||||
|
||||
rabbitMQQueue, err := deploy.InitRabbitMQ(isAgent)
|
||||
if err != nil {
|
||||
slog.Error("Failed to initialize RabbitMQ", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
defer rabbitMQQueue.Close()
|
||||
|
||||
artifacts, err := deploy.InitMinIOArtifactStore()
|
||||
if err != nil {
|
||||
slog.Error("Failed to initialize artifact store", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
databases, err := deploy.InitMinIOCodeQLDatabaseStore()
|
||||
if err != nil {
|
||||
slog.Error("Failed to initialize database store", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
go agent.StartAndMonitorWorkers(ctx, artifacts, databases, rabbitMQQueue, *workerCount, &wg)
|
||||
slog.Info("Agent started")
|
||||
|
||||
// Gracefully exit on SIGINT/SIGTERM
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
<-sigChan
|
||||
|
||||
slog.Info("Shutting down agent")
|
||||
cancel()
|
||||
wg.Wait()
|
||||
slog.Info("Agent shutdown complete")
|
||||
}
|
||||
@@ -1,158 +0,0 @@
|
||||
// Copyright © 2024 github
|
||||
// Licensed under the Apache License, Version 2.0 (the "License").
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"log"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
"github.com/hohn/mrvacommander/config/mcc"
|
||||
|
||||
"github.com/hohn/mrvacommander/pkg/agent"
|
||||
"github.com/hohn/mrvacommander/pkg/artifactstore"
|
||||
"github.com/hohn/mrvacommander/pkg/deploy"
|
||||
"github.com/hohn/mrvacommander/pkg/qldbstore"
|
||||
"github.com/hohn/mrvacommander/pkg/queue"
|
||||
"github.com/hohn/mrvacommander/pkg/server"
|
||||
"github.com/hohn/mrvacommander/pkg/state"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Define flags
|
||||
helpFlag := flag.Bool("help", false, "Display help message")
|
||||
logLevel := flag.String("loglevel", "info", "Set log level: debug, info, warn, error")
|
||||
mode := flag.String("mode", "standalone", "Set mode: standalone, container, cluster")
|
||||
dbPathRoot := flag.String("dbpath", "", "Set the root path for the database store if using standalone mode.")
|
||||
|
||||
// Custom usage function for the help flag
|
||||
flag.Usage = func() {
|
||||
log.Printf("Usage of %s:\n", os.Args[0])
|
||||
flag.PrintDefaults()
|
||||
log.Println("\nExamples:")
|
||||
log.Println("go run main.go --loglevel=debug --mode=container --dbpath=/path/to/db_dir")
|
||||
}
|
||||
|
||||
// Parse the flags
|
||||
flag.Parse()
|
||||
|
||||
// Handle the help flag
|
||||
if *helpFlag {
|
||||
flag.Usage()
|
||||
return
|
||||
}
|
||||
|
||||
// Apply 'loglevel' flag
|
||||
switch *logLevel {
|
||||
case "debug":
|
||||
slog.SetLogLoggerLevel(slog.LevelDebug)
|
||||
case "info":
|
||||
slog.SetLogLoggerLevel(slog.LevelInfo)
|
||||
case "warn":
|
||||
slog.SetLogLoggerLevel(slog.LevelWarn)
|
||||
case "error":
|
||||
slog.SetLogLoggerLevel(slog.LevelError)
|
||||
default:
|
||||
log.Printf("Invalid logging verbosity level: %s", *logLevel)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Process database root if standalone and not provided
|
||||
if *mode == "standalone" && *dbPathRoot == "" {
|
||||
slog.Warn("No database root path provided.")
|
||||
// Current directory of the Executable has a codeql directory. There.
|
||||
// Resolve the absolute directory based on os.Executable()
|
||||
execPath, err := os.Executable()
|
||||
if err != nil {
|
||||
slog.Error("Failed to get executable path", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
*dbPathRoot = filepath.Dir(execPath) + "/codeql/dbs/"
|
||||
slog.Info("Using default database root path", "dbPathRoot", *dbPathRoot)
|
||||
}
|
||||
|
||||
// Read configuration
|
||||
config := mcc.LoadConfig("mcconfig.toml")
|
||||
|
||||
// Output configuration summary
|
||||
log.Printf("Help: %t\n", *helpFlag)
|
||||
log.Printf("Log Level: %s\n", *logLevel)
|
||||
log.Printf("Mode: %s\n", *mode)
|
||||
|
||||
// Handle signals
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
// Apply 'mode' flag
|
||||
switch *mode {
|
||||
case "standalone":
|
||||
// Assemble single-process version
|
||||
sq := queue.NewQueueSingle(2)
|
||||
ss := state.NewLocalState(config.Storage.StartingID)
|
||||
as := artifactstore.NewInMemoryArtifactStore()
|
||||
ql := qldbstore.NewLocalFilesystemCodeQLDatabaseStore(*dbPathRoot)
|
||||
|
||||
server.NewCommanderSingle(&server.Visibles{
|
||||
Queue: sq,
|
||||
State: ss,
|
||||
Artifacts: as,
|
||||
CodeQLDBStore: ql,
|
||||
})
|
||||
|
||||
var wg sync.WaitGroup
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
go agent.StartAndMonitorWorkers(ctx, as, ql, sq, 2, &wg)
|
||||
|
||||
slog.Info("Started server and standalone agent")
|
||||
<-sigChan
|
||||
slog.Info("Shutting down...")
|
||||
cancel()
|
||||
wg.Wait()
|
||||
slog.Info("Agent shutdown complete")
|
||||
|
||||
case "container":
|
||||
isAgent := false
|
||||
|
||||
rabbitMQQueue, err := deploy.InitRabbitMQ(isAgent)
|
||||
if err != nil {
|
||||
slog.Error("Failed to initialize RabbitMQ", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
defer rabbitMQQueue.Close()
|
||||
|
||||
artifacts, err := deploy.InitMinIOArtifactStore()
|
||||
if err != nil {
|
||||
slog.Error("Failed to initialize artifact store", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
databases, err := deploy.InitMinIOCodeQLDatabaseStore()
|
||||
if err != nil {
|
||||
slog.Error("Failed to initialize database store", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
server.NewCommanderSingle(&server.Visibles{
|
||||
Queue: rabbitMQQueue,
|
||||
State: state.NewLocalState(config.Storage.StartingID),
|
||||
Artifacts: artifacts,
|
||||
CodeQLDBStore: databases,
|
||||
})
|
||||
|
||||
slog.Info("Started server in container mode.")
|
||||
<-sigChan
|
||||
default:
|
||||
slog.Error("Invalid value for --mode. Allowed values are: standalone, container, cluster")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
slog.Info("Server shutdown complete")
|
||||
}
|
||||
@@ -38,12 +38,19 @@
|
||||
|
||||
# Persist volume using container
|
||||
cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata
|
||||
# Note: use mrvacommander_dbsdata, not mrvacommander-dbsdata
|
||||
# Get the data as tar file from the image
|
||||
# Use mrvacommander_dbsdata to access the compose cluster
|
||||
# EITHER
|
||||
# Get the data as tar file from the image using container
|
||||
rm -f dbsdata_backup.tar
|
||||
docker run --rm \
|
||||
-v mrvacommander_dbsdata:/data \
|
||||
-v $(pwd):/backup \
|
||||
busybox sh -c "tar cvf /backup/dbsdata_backup.tar ."
|
||||
busybox sh -c "tar cf /backup/dbsdata_backup.tar /data"
|
||||
# OR
|
||||
# Use gnu tar on host. The macos tar adds extended attributes
|
||||
# brew install gnu-tar
|
||||
rm -f dbsdata_backup.tar && gtar cf dbsdata_backup.tar data/
|
||||
|
||||
# Build container with the tarball
|
||||
cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata
|
||||
docker build -t dbsdata-container:0.1.24 .
|
||||
|
||||
BIN
demo/containers/dbsdata/dbsdata_backup.tar
(Stored with Git LFS)
Normal file
BIN
demo/containers/dbsdata/dbsdata_backup.tar
(Stored with Git LFS)
Normal file
Binary file not shown.
101
doc/mrva-business.org
Normal file
101
doc/mrva-business.org
Normal file
@@ -0,0 +1,101 @@
|
||||
* MRVA for CodeQL: A Business View
|
||||
** Introduction
|
||||
The companion documents in this directory are mostly technical. The purpose of
|
||||
this document is to explain, from a business perspective, what MRVA is and why
|
||||
it matters.
|
||||
|
||||
To illustrate its impact, consider two real-world cases:
|
||||
|
||||
*** Case 1: Preventing Costly Security Failures
|
||||
One of our customers faced a significant lawsuit due to inadequate security.
|
||||
The root cause? Unaddressed technical risks in their code. The work we do
|
||||
directly prevents similar vulnerabilities from reaching this stage.
|
||||
|
||||
While lawsuits of this scale are rare, security failures are not. More common
|
||||
consequences include:
|
||||
|
||||
- Compliance violations (e.g., GDPR, SOC2 penalties)
|
||||
- Security breaches leading to reputation damage
|
||||
- Productivity loss from disruptive technical failures
|
||||
|
||||
Lawsuits may be exceptional, but code security failures occur daily. Our role
|
||||
isn’t just about preventing catastrophic losses—it’s about avoiding the small,
|
||||
accumulating failures that erode security, compliance, and trust over time.
|
||||
|
||||
*** Case 2: Identifying Hidden Risks at Scale
|
||||
Another customer manages a massive software portfolio of 120,000+ distinct
|
||||
codebases—a scale at which traditional security tools and manual review
|
||||
processes become impractical.
|
||||
|
||||
- A few known vulnerabilities had already been identified and patched.
|
||||
- Our analysis uncovered 30 additional high-risk instances, previously undetected.
|
||||
|
||||
These findings were critical because:
|
||||
|
||||
- Traditional security tools break down at scale. Most solutions work well for
|
||||
isolated codebases but lack the capability to analyze patterns across
|
||||
120,000 repositories.
|
||||
- Complexity hides risk. Identifying these vulnerabilities required specialized
|
||||
techniques beyond simple scanning—capable of handling variations,
|
||||
context, and subtle exploit paths.
|
||||
- Existing security processes failed to detect these vulnerabilities. Without
|
||||
proactive intervention, these risks would have remained undetected until
|
||||
a potential breach occurred.
|
||||
|
||||
This case highlights a critical gap in standard security practices. By leveraging
|
||||
advanced, scalable analysis, we identified and mitigated risks that would have
|
||||
otherwise gone unnoticed—demonstrating the value of proactive security
|
||||
at scale.
|
||||
|
||||
** Why This Matters
|
||||
These examples, along with others, reinforce the importance of proactive
|
||||
security—especially in the context of MRVA. Security risks don’t just exist
|
||||
in theory; they have tangible business consequences.
|
||||
|
||||
MRVA provides a scalable, systematic approach to identifying and addressing
|
||||
risks before they escalate—ensuring that security is a strategic advantage, not
|
||||
just a cost.
|
||||
|
||||
** What is MRVA?
|
||||
MRVA stands for /Multi-Repository Variant Analysis/. The concept is straightforward:
|
||||
|
||||
1. A /problem/ is identified in one codebase.
|
||||
2. Variations of this problem (/variants/) can be defined.
|
||||
3. The organization manages many code repositories (/multi-repository/).
|
||||
4. A systematic /analysis/ is required to detect these variants across all repositories.
|
||||
|
||||
In practice:
|
||||
- Steps 1 & 2: Defined through CodeQL queries, often custom-written for this purpose.
|
||||
- Steps 3 & 4: Can be done manually but come with significant challenges.
|
||||
|
||||
*** Challenges of Manual Execution
|
||||
Manually searching for these variants across multiple repositories is possible
|
||||
but inefficient and error-prone due to:
|
||||
|
||||
- /High bookkeeping overhead/ – Tracking thousands of repositories is
|
||||
cumbersome.
|
||||
- /Heavy scripting requirements/ – Expert /Unix scripting skills/ are
|
||||
necessary.
|
||||
- /Scaling limitations/ – Analyzing /thousands of repositories sequentially/
|
||||
is slow, and manual parallelization is impractical.
|
||||
- /Cumbersome review process/ – Results are stored as /raw text files/,
|
||||
requiring multiple processing steps for meaningful analysis.
|
||||
|
||||
*** MRVA: A Streamlined, Integrated Solution
|
||||
Instead of relying on manual effort, MRVA is designed to /automate and
|
||||
integrate/ the process.
|
||||
|
||||
- The system is designed to be /machine-driven/ and integrated into an
|
||||
automated pipeline.
|
||||
- Once incorporated, MRVA leverages the /CodeQL VS Code plugin/ to provide a
|
||||
/seamless user experience/.
|
||||
- How it works:
|
||||
- Users submit queries through the UI.
|
||||
- Results are retrieved and displayed dynamically as they become available.
|
||||
- The entire workflow is automated, scalable, and significantly more
|
||||
efficient than manual methods.
|
||||
|
||||
By eliminating manual inefficiencies, MRVA enables organizations to identify
|
||||
and resolve security issues across massive codebases at scale, ensuring both
|
||||
accuracy and speed in vulnerability detection.
|
||||
|
||||
331
doc/mrva-interconnect.ltx
Normal file
331
doc/mrva-interconnect.ltx
Normal file
@@ -0,0 +1,331 @@
|
||||
\documentclass[11pt]{article}
|
||||
|
||||
% Load the geometry package to set margins
|
||||
\usepackage[lmargin=2cm,rmargin=2cm,tmargin=1.8cm,bmargin=1.8cm]{geometry}
|
||||
|
||||
% increase nesting depth
|
||||
|
||||
\usepackage{enumitem}
|
||||
\setlistdepth{9}
|
||||
%
|
||||
\renewlist{itemize}{itemize}{9}
|
||||
\setlist[itemize,1]{label=\textbullet}
|
||||
\setlist[itemize,2]{label=--}
|
||||
\setlist[itemize,3]{label=*}
|
||||
\setlist[itemize,4]{label=•}
|
||||
\setlist[itemize,5]{label=–}
|
||||
\setlist[itemize,6]{label=>}
|
||||
\setlist[itemize,7]{label=»}
|
||||
\setlist[itemize,8]{label=›}
|
||||
\setlist[itemize,9]{label=·}
|
||||
%
|
||||
\renewlist{enumerate}{enumerate}{9}
|
||||
\setlist[enumerate,1]{label=\arabic*.,ref=\arabic*}
|
||||
\setlist[enumerate,2]{label=\alph*.),ref=\theenumi\alph*}
|
||||
\setlist[enumerate,3]{label=\roman*.),ref=\theenumii\roman*}
|
||||
\setlist[enumerate,4]{label=\Alph*.),ref=\theenumiii\Alph*}
|
||||
\setlist[enumerate,5]{label=\Roman*.),ref=\theenumiv\Roman*}
|
||||
\setlist[enumerate,6]{label=\arabic*),ref=\theenumv\arabic*}
|
||||
\setlist[enumerate,7]{label=\alph*),ref=\theenumvi\alph*}
|
||||
\setlist[enumerate,8]{label=\roman*),ref=\theenumvii\roman*}
|
||||
\setlist[enumerate,9]{label=\Alph*),ref=\theenumviii\Alph*}
|
||||
|
||||
|
||||
% Load CM Bright for math
|
||||
\usepackage{amsmath} % Standard math package
|
||||
\usepackage{amssymb} % Additional math symbols
|
||||
\usepackage{cmbright} % Sans-serif math font that complements Fira Sans
|
||||
|
||||
\usepackage{fourier}
|
||||
|
||||
% Font configuration
|
||||
% \usepackage{bera}
|
||||
% or
|
||||
% Load Fira Sans for text
|
||||
\usepackage{fontspec}
|
||||
\setmainfont{Fira Sans} % System-installed Fira Sans
|
||||
\renewcommand{\familydefault}{\sfdefault} % Set sans-serif as default
|
||||
|
||||
% pseudo-code with math
|
||||
\usepackage{listings}
|
||||
\usepackage{float}
|
||||
\usepackage{xcolor}
|
||||
\usepackage{colortbl}
|
||||
% Set TT font
|
||||
% \usepackage{inconsolata}
|
||||
% or
|
||||
\setmonofont{IBMPlexMono-Light}
|
||||
% Define custom settings for listings
|
||||
\lstset{
|
||||
language=Python,
|
||||
basicstyle=\ttfamily\small, % Monospaced font
|
||||
commentstyle=\itshape\color{gray}, % Italic and gray for comments
|
||||
keywordstyle=\color{blue}, % Keywords in blue
|
||||
stringstyle=\color{red}, % Strings in red
|
||||
mathescape=true, % Enable math in comments
|
||||
breaklines=true, % Break long lines
|
||||
numbers=left, % Add line numbers
|
||||
numberstyle=\tiny\color{gray}, % Style for line numbers
|
||||
frame=single, % Add a frame around the code
|
||||
}
|
||||
|
||||
\usepackage{newfloat} % Allows creating custom float types
|
||||
|
||||
% Define 'listing' as a floating environment
|
||||
\DeclareFloatingEnvironment[
|
||||
fileext=lol,
|
||||
listname=List of Listings,
|
||||
name=Listing
|
||||
]{listing}
|
||||
|
||||
% To prevent floats from moving past a section boundary but still allow some floating:
|
||||
\usepackage{placeins}
|
||||
% used with \FloatBarrier
|
||||
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage{graphicx}
|
||||
\usepackage{longtable}
|
||||
\usepackage{wrapfig}
|
||||
\usepackage{rotating}
|
||||
\usepackage[normalem]{ulem}
|
||||
\usepackage{amsmath}
|
||||
\usepackage{amssymb}
|
||||
\usepackage{capt-of}
|
||||
\usepackage{hyperref}
|
||||
\usepackage{algorithm}
|
||||
\usepackage{algpseudocode}
|
||||
|
||||
% Title, Author, and Date (or Report Number)
|
||||
\title{MRVA component interconnections}
|
||||
\author{Michael Hohn}
|
||||
\date{Technical Report 20250524}
|
||||
|
||||
\hypersetup{
|
||||
pdfauthor={Michael Hohn},
|
||||
pdftitle={MRVA component interconnections},
|
||||
pdfkeywords={},
|
||||
pdfsubject={},
|
||||
pdfcreator={Emacs 29.1},
|
||||
pdflang={English}}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\maketitle
|
||||
\tableofcontents
|
||||
|
||||
\section{Overview}
|
||||
\label{sec:overview}
|
||||
|
||||
The MRVA system is organized as a collection of services. On the server side, the
|
||||
system is containerized using Docker and comprises several key components:
|
||||
|
||||
|
||||
\begin{itemize}
|
||||
\item {\textbf{Server}}: Acts as the central coordinator.
|
||||
\item \textbf{Agents}: One or more agents that execute tasks.
|
||||
\item \textbf{RabbitMQ}: Handles messaging between components.
|
||||
\item \textbf{MinIO}: Provides storage for both queries and results.
|
||||
\item \textbf{HEPC}: An HTTP endpoint that hosts and serves CodeQL databases.
|
||||
\end{itemize}
|
||||
|
||||
The execution process follows a structured workflow:
|
||||
|
||||
\begin{enumerate}
|
||||
\item A client submits a set of queries $\mathcal{Q}$ targeting a repository
|
||||
set $\mathcal{R}$.
|
||||
\item The server enqueues jobs and distributes them to available agents.
|
||||
\item Each agent retrieves a job, executes queries against its assigned repository, and accumulates results.
|
||||
\item The agent sends results back to the server, which then forwards them to the client.
|
||||
\end{enumerate}
|
||||
|
||||
This full round-trip can be expressed as:
|
||||
|
||||
\begin{equation}
|
||||
\text{Client} \xrightarrow{\mathcal{Q}} \text{Server}
|
||||
\xrightarrow{\text{enqueue}}
|
||||
\text{Queue} \xrightarrow{\text{dispatch}} \text{Agent}
|
||||
\xrightarrow{\mathcal{Q}(\mathcal{R}_i)}
|
||||
\text{Server} \xrightarrow{\mathcal{Q}(\mathcal{R}_i} \text{Client}
|
||||
\end{equation}
|
||||
|
||||
\section{Symbols and Notation}
|
||||
\label{sec:orgb695d5a}
|
||||
|
||||
We define the following symbols for entities in the system:
|
||||
|
||||
\begin{center}
|
||||
\begin{tabular}{lll}
|
||||
Concept & Symbol & Description \\[0pt]
|
||||
\hline
|
||||
Client & \(C\) & The source of the query submission \\[0pt]
|
||||
Server & \(S\) & Manages job queue and communicates results back to the client \\[0pt]
|
||||
Job Queue & \(Q\) & Queue for managing submitted jobs \\[0pt]
|
||||
Agent & \(\alpha\) & Independently polls, executes jobs, and accumulates results \\[0pt]
|
||||
Agent Set & \(A\) & The set of all available agents \\[0pt]
|
||||
Query Suite & \(\mathcal{Q}\) & Collection of queries submitted by the client \\[0pt]
|
||||
Repository List & \(\mathcal{R}\) & Collection of repositories \\[0pt]
|
||||
\(i\)-th Repository & \(\mathcal{R}_i\) & Specific repository indexed by \(i\) \\[0pt]
|
||||
\(j\)-th Query & \(\mathcal{Q}_j\) & Specific query from the suite indexed by \(j\) \\[0pt]
|
||||
Query Result & \(r_{i,j,k_{i,j}}\) & \(k_{i,j}\)-th result from query \(j\) executed on repository \(i\) \\[0pt]
|
||||
Query Result Set & \(\mathcal{R}_i^{\mathcal{Q}_j}\) & Set of all results for query \(j\) on repository \(i\) \\[0pt]
|
||||
Accumulated Results & \(\mathcal{R}_i^{\mathcal{Q}}\) & All results from executing all queries on \(\mathcal{R}_i\) \\[0pt]
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
|
||||
|
||||
\section{Full Round-Trip Representation}
|
||||
\label{sec:full-round-trip}
|
||||
The full round-trip execution, from query submission to result delivery, can be summarized as:
|
||||
|
||||
\[
|
||||
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q
|
||||
\xrightarrow{\text{poll}}
|
||||
\alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{\mathcal{R}_i^{\mathcal{Q}}} C
|
||||
\]
|
||||
|
||||
\begin{itemize}
|
||||
\item \(C \to S\): Client submits a query suite \(\mathcal{Q}\) to the server.
|
||||
\item \(S \to Q\): Server enqueues the query suite \((\mathcal{Q}, \mathcal{R}_i)\) for each repository.
|
||||
\item \(Q \to \alpha\): Agent \(\alpha\) polls the queue and retrieves a job.
|
||||
\item \(\alpha \to S\): Agent executes the queries and returns the accumulated results \(\mathcal{R}_i^{\mathcal{Q}}\) to the server.
|
||||
\item \(S \to C\): Server sends the complete result set \(\mathcal{R}_i^{\mathcal{Q}}\) for each repository back to the client.
|
||||
\end{itemize}
|
||||
|
||||
|
||||
\section{Result Representation}
|
||||
|
||||
For the complete collection of results across all repositories and queries:
|
||||
\[
|
||||
\mathcal{R}^{\mathcal{Q}} = \bigcup_{i=1}^{N} \bigcup_{j=1}^{M}
|
||||
\left\{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \right\}
|
||||
\]
|
||||
|
||||
where:
|
||||
\begin{itemize}
|
||||
\item \(N\) is the total number of repositories.
|
||||
\item \(M\) is the total number of queries in \(\mathcal{Q}\).
|
||||
\item \(k_{i,j}\) is the number of results from executing query
|
||||
\(\mathcal{Q}_j\)
|
||||
on repository \(\mathcal{R}_i\).
|
||||
\end{itemize}
|
||||
|
||||
An individual result from the \(i\)-th repository, \(j\)-th query, and \(k\)-th result is:
|
||||
\[
|
||||
r_{i,j,k}
|
||||
\]
|
||||
|
||||
|
||||
|
||||
\[
|
||||
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q \xrightarrow{\text{dispatch}} \alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{r_{i,j}} C
|
||||
\]
|
||||
|
||||
Each result can be further indexed to track multiple repositories and result sets.
|
||||
|
||||
|
||||
\section{Graph Extraction from Log Table}
|
||||
|
||||
Assume we have a structured event log represented as a set of tuples.
|
||||
|
||||
\subsection*{Event Log Structure}
|
||||
|
||||
Let
|
||||
\[
|
||||
\mathcal{T} = \{ t_1, t_2, \dots, t_n \}
|
||||
\]
|
||||
be the set of all events, where each event
|
||||
\[
|
||||
t_i = (\mathit{id}_i, \tau_i, a_i, e_i, q_i, r_i, c_i)
|
||||
\]
|
||||
consists of:
|
||||
\begin{itemize}
|
||||
\item \(\mathit{id}_i\): unique event ID
|
||||
\item \(\tau_i\): timestamp
|
||||
\item \(a_i\): actor (e.g., ``agent\_alpha1'')
|
||||
\item \(e_i\): event type (e.g., ``enqueue'', ``execute'')
|
||||
\item \(q_i\): query ID
|
||||
\item \(r_i\): repository ID
|
||||
\item \(c_i\): result count (may be \(\bot\) if not applicable)
|
||||
\end{itemize}
|
||||
|
||||
Let
|
||||
\[
|
||||
\mathcal{G} = (V, E)
|
||||
\]
|
||||
be a directed graph constructed from \(\mathcal{T}\), with vertices \(V\) and edges \(E\).
|
||||
|
||||
\subsection*{Graph Definition}
|
||||
|
||||
\begin{align*}
|
||||
V &= \{ \mathit{id}_i \mid t_i \in \mathcal{T} \} \\
|
||||
E &\subseteq V \times V
|
||||
\end{align*}
|
||||
|
||||
Edges capture temporal or semantic relationships between events.
|
||||
|
||||
\subsection*{Construction Steps}
|
||||
|
||||
\paragraph{1. Partition by Job Identity}
|
||||
Define the set of job identifiers:
|
||||
\[
|
||||
J = \{ (q, r) \mid \exists i: q_i = q \land r_i = r \}
|
||||
\]
|
||||
Then for each \((q, r) \in J\), define:
|
||||
\[
|
||||
\mathcal{T}_{q,r} = \{ t_i \in \mathcal{T} \mid q_i = q \land r_i = r \}
|
||||
\]
|
||||
|
||||
\paragraph{2. Sort by Time}
|
||||
Order each \(\mathcal{T}_{q,r}\) as a list:
|
||||
\[
|
||||
\mathcal{T}_{q,r} = [ t_{i_1}, t_{i_2}, \dots, t_{i_k} ]
|
||||
\quad \text{such that } \tau_{i_j} < \tau_{i_{j+1}}
|
||||
\]
|
||||
|
||||
\paragraph{3. Causal Edges}
|
||||
Define within-job edges:
|
||||
\[
|
||||
E_{q,r} = \{ (\mathit{id}_{i_j}, \mathit{id}_{i_{j+1}}) \mid 1 \leq j < k \}
|
||||
\]
|
||||
|
||||
\paragraph{4. Global Causal Graph}
|
||||
Take the union:
|
||||
\[
|
||||
E_{\text{causal}} = \bigcup_{(q, r) \in J} E_{q,r}
|
||||
\]
|
||||
|
||||
\paragraph{5. Semantic Edges (Optional)}
|
||||
Define semantic predicates such as:
|
||||
\[
|
||||
\mathsf{pulls}(i, j) \iff e_i = \text{enqueue} \land e_j = \text{pull} \land
|
||||
q_i = q_j \land r_i = r_j \land \tau_i < \tau_j \land a_i = \text{server} \land a_j = \text{agent}
|
||||
\]
|
||||
Then:
|
||||
\[
|
||||
E_{\text{semantic}} = \{ (\mathit{id}_i, \mathit{id}_j) \mid \mathsf{pulls}(i, j) \}
|
||||
\]
|
||||
|
||||
\subsection*{Final Graph}
|
||||
|
||||
\begin{align*}
|
||||
V &= \{ \mathit{id}_i \mid t_i \in \mathcal{T} \} \\
|
||||
E &= E_{\text{causal}} \cup E_{\text{semantic}}
|
||||
\end{align*}
|
||||
|
||||
\subsection*{Notes}
|
||||
\begin{itemize}
|
||||
\item This construction is generic: the log store \(\mathcal{T}\) may come from a database, file, or tuple-indexed dictionary.
|
||||
\item Each semantic edge rule corresponds to a logical filter/join over \(\mathcal{T}\).
|
||||
\item The construction is schema-free on the graph side and can be recomputed on demand with different edge logic.
|
||||
\end{itemize}
|
||||
|
||||
|
||||
\end{document}
|
||||
|
||||
%%% Local Variables:
|
||||
%%% mode: LaTeX
|
||||
%%% TeX-master: nil
|
||||
%%% TeX-engine: luatex
|
||||
%%% TeX-command-extra-options: "-synctex=1 -shell-escape -interaction=nonstopmode"
|
||||
%%% End:
|
||||
BIN
doc/mrva-interconnect.pdf
Normal file
BIN
doc/mrva-interconnect.pdf
Normal file
Binary file not shown.
BIN
doc/mrva-overview.pdf
Normal file
BIN
doc/mrva-overview.pdf
Normal file
Binary file not shown.
605
doc/mrva-overview.tex
Normal file
605
doc/mrva-overview.tex
Normal file
@@ -0,0 +1,605 @@
|
||||
\documentclass[11pt]{article}
|
||||
|
||||
% Load the geometry package to set margins
|
||||
\usepackage[lmargin=2cm,rmargin=2cm,tmargin=1.8cm,bmargin=1.8cm]{geometry}
|
||||
|
||||
% increase nesting depth
|
||||
|
||||
\usepackage{enumitem}
|
||||
\setlistdepth{9}
|
||||
%
|
||||
\renewlist{itemize}{itemize}{9}
|
||||
\setlist[itemize,1]{label=\textbullet}
|
||||
\setlist[itemize,2]{label=--}
|
||||
\setlist[itemize,3]{label=*}
|
||||
\setlist[itemize,4]{label=•}
|
||||
\setlist[itemize,5]{label=–}
|
||||
\setlist[itemize,6]{label=>}
|
||||
\setlist[itemize,7]{label=»}
|
||||
\setlist[itemize,8]{label=›}
|
||||
\setlist[itemize,9]{label=·}
|
||||
%
|
||||
\renewlist{enumerate}{enumerate}{9}
|
||||
\setlist[enumerate,1]{label=\arabic*.,ref=\arabic*}
|
||||
\setlist[enumerate,2]{label=\alph*.),ref=\theenumi\alph*}
|
||||
\setlist[enumerate,3]{label=\roman*.),ref=\theenumii\roman*}
|
||||
\setlist[enumerate,4]{label=\Alph*.),ref=\theenumiii\Alph*}
|
||||
\setlist[enumerate,5]{label=\Roman*.),ref=\theenumiv\Roman*}
|
||||
\setlist[enumerate,6]{label=\arabic*),ref=\theenumv\arabic*}
|
||||
\setlist[enumerate,7]{label=\alph*),ref=\theenumvi\alph*}
|
||||
\setlist[enumerate,8]{label=\roman*),ref=\theenumvii\roman*}
|
||||
\setlist[enumerate,9]{label=\Alph*),ref=\theenumviii\Alph*}
|
||||
|
||||
|
||||
% Load CM Bright for math
|
||||
\usepackage{amsmath} % Standard math package
|
||||
\usepackage{amssymb} % Additional math symbols
|
||||
\usepackage{cmbright} % Sans-serif math font that complements Fira Sans
|
||||
|
||||
\usepackage{fourier}
|
||||
|
||||
% Font configuration
|
||||
% \usepackage{bera}
|
||||
% or
|
||||
% Load Fira Sans for text
|
||||
\usepackage{fontspec}
|
||||
\setmainfont{Fira Sans} % System-installed Fira Sans
|
||||
\renewcommand{\familydefault}{\sfdefault} % Set sans-serif as default
|
||||
|
||||
% pseudo-code with math
|
||||
\usepackage{listings}
|
||||
\usepackage{float}
|
||||
\usepackage{xcolor}
|
||||
\usepackage{colortbl}
|
||||
% Set TT font
|
||||
% \usepackage{inconsolata}
|
||||
% or
|
||||
\setmonofont{IBMPlexMono-Light}
|
||||
% Define custom settings for listings
|
||||
\lstset{
|
||||
language=Python,
|
||||
basicstyle=\ttfamily\small, % Monospaced font
|
||||
commentstyle=\itshape\color{gray}, % Italic and gray for comments
|
||||
keywordstyle=\color{blue}, % Keywords in blue
|
||||
stringstyle=\color{red}, % Strings in red
|
||||
mathescape=true, % Enable math in comments
|
||||
breaklines=true, % Break long lines
|
||||
numbers=left, % Add line numbers
|
||||
numberstyle=\tiny\color{gray}, % Style for line numbers
|
||||
frame=single, % Add a frame around the code
|
||||
}
|
||||
|
||||
\usepackage{newfloat} % Allows creating custom float types
|
||||
|
||||
% Define 'listing' as a floating environment
|
||||
\DeclareFloatingEnvironment[
|
||||
fileext=lol,
|
||||
listname=List of Listings,
|
||||
name=Listing
|
||||
]{listing}
|
||||
|
||||
% To prevent floats from moving past a section boundary but still allow some floating:
|
||||
\usepackage{placeins}
|
||||
% used with \FloatBarrier
|
||||
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage{graphicx}
|
||||
\usepackage{longtable}
|
||||
\usepackage{wrapfig}
|
||||
\usepackage{rotating}
|
||||
\usepackage[normalem]{ulem}
|
||||
\usepackage{amsmath}
|
||||
\usepackage{amssymb}
|
||||
\usepackage{capt-of}
|
||||
\usepackage{hyperref}
|
||||
\usepackage{algorithm}
|
||||
\usepackage{algpseudocode}
|
||||
|
||||
% Title, Author, and Date (or Report Number)
|
||||
\title{MRVA for CodeQL}
|
||||
\author{Michael Hohn}
|
||||
\date{Technical Report 20250224}
|
||||
|
||||
\hypersetup{
|
||||
pdfauthor={Michael Hohn},
|
||||
pdftitle={MRVA for CodeQL},
|
||||
pdfkeywords={},
|
||||
pdfsubject={},
|
||||
pdfcreator={Emacs 29.1},
|
||||
pdflang={English}}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\maketitle
|
||||
\tableofcontents
|
||||
|
||||
\section{MRVA System Architecture Summary}
|
||||
|
||||
The MRVA system is organized as a collection of services. On the server side, the
|
||||
system is containerized using Docker and comprises several key components:
|
||||
\begin{itemize}
|
||||
\item {\textbf{Server}}: Acts as the central coordinator.
|
||||
\item \textbf{Agents}: One or more agents that execute tasks.
|
||||
\item \textbf{RabbitMQ}: Handles messaging between components.
|
||||
\item \textbf{MinIO}: Provides storage for both queries and results.
|
||||
\item \textbf{HEPC}: An HTTP endpoint that hosts and serves CodeQL databases.
|
||||
\end{itemize}
|
||||
|
||||
On the client side, users can interact with the system in two ways:
|
||||
\begin{itemize}
|
||||
\item {\textbf{VSCode-CodeQL}}: A graphical interface integrated with Visual Studio Code.
|
||||
\item \textbf{gh-mrva CLI}: A command-line interface that connects to the server in a similar way.
|
||||
\end{itemize}
|
||||
|
||||
This architecture enables a robust and flexible workflow for code analysis, combining a containerized back-end with both graphical and CLI front-end tools.
|
||||
|
||||
The full system details can be seen in the source code. This document provides an
|
||||
overview.
|
||||
|
||||
\section{Distributed Query Execution in MRVA}
|
||||
|
||||
\subsection{Execution Overview}
|
||||
|
||||
The \textit{MRVA system} is a distributed platform for executing \textit{CodeQL
|
||||
queries} across multiple repositories using a set of worker agents. The system is
|
||||
{containerized} and built around a set of core services:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Server}: Coordinates job distribution and result aggregation.
|
||||
\item \textbf{Agents}: Execute queries independently and return results.
|
||||
\item \textbf{RabbitMQ}: Handles messaging between system components.
|
||||
\item \textbf{MinIO}: Stores query inputs and execution results.
|
||||
\item \textbf{HEPC}: Serves CodeQL databases over HTTP.
|
||||
\end{itemize}
|
||||
|
||||
Clients interact with MRVA via \texttt{VSCode-CodeQL} (a graphical interface) or
|
||||
\texttt{gh-mrva CLI} (a command-line tool), both of which submit queries to the
|
||||
server.
|
||||
|
||||
The execution process follows a structured workflow:
|
||||
|
||||
\begin{enumerate}
|
||||
\item A client submits a set of queries $\mathcal{Q}$ targeting a repository
|
||||
set $\mathcal{R}$.
|
||||
\item The server enqueues jobs and distributes them to available agents.
|
||||
\item Each agent retrieves a job, executes queries against its assigned repository, and accumulates results.
|
||||
\item The agent sends results back to the server, which then forwards them to the client.
|
||||
\end{enumerate}
|
||||
|
||||
This full round-trip can be expressed as:
|
||||
|
||||
\begin{equation}
|
||||
\text{Client} \xrightarrow{\mathcal{Q}} \text{Server}
|
||||
\xrightarrow{\text{enqueue}}
|
||||
\text{Queue} \xrightarrow{\text{dispatch}} \text{Agent}
|
||||
\xrightarrow{\mathcal{Q}(\mathcal{R}_i)}
|
||||
\text{Server} \xrightarrow{\mathcal{Q}(\mathcal{R}_i} \text{Client}
|
||||
\end{equation}
|
||||
|
||||
where the Client submits queries to the Server, which enqueues jobs in the
|
||||
Queue. Agents execute the queries, returning results $\mathcal{Q}(\mathcal{R}_i)$
|
||||
to the Server and ultimately back to the Client.
|
||||
|
||||
A more rigorous description of this is in section \ref{sec:full-round-trip}.
|
||||
|
||||
\subsection{System Structure Overview}
|
||||
|
||||
This design allows for scalable and efficient query execution across multiple
|
||||
repositories, whether on a single machine or a distributed cluster. The key idea
|
||||
is that both setups follow the same structural approach:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Single machine setup:}
|
||||
\begin{itemize}
|
||||
\item Uses \textit{at least 5 Docker containers} to manage different
|
||||
components of the system.
|
||||
\item The number of \textit{agent containers} (responsible for executing
|
||||
queries) is constrained by the available \textit{RAM and CPU cores}.
|
||||
\end{itemize}
|
||||
|
||||
\item \textbf{Cluster setup:}
|
||||
\begin{itemize}
|
||||
\item Uses \textit{at least 5 virtual machines (VMs) and / or Docker containers}.
|
||||
\item The number of \textit{agent VMs} is limited by \textit{network bandwidth
|
||||
and available resources} (e.g., distributed storage and inter-node communication
|
||||
overhead).
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
Thus:
|
||||
\begin{itemize}
|
||||
\item The {functional architecture is identical} between the single-machine and cluster setups.
|
||||
\item The {primary difference} is in \textit{scale}:
|
||||
\begin{itemize}
|
||||
\item A single machine is limited by \textit{local CPU and RAM}.
|
||||
\item A cluster is constrained by \textit{network and inter-node coordination overhead} but allows for higher overall compute capacity.
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
|
||||
\subsection{Messages and their Types}
|
||||
\label{sec:msg-types}
|
||||
The following table enumerates the types (messages) passed from Client to Server.
|
||||
|
||||
\begin{longtable}{|p{5cm}|p{5cm}|p{5cm}|}
|
||||
\hline
|
||||
\rowcolor{gray!20} \textbf{Type Name} & \textbf{Field} & \textbf{Type} \\
|
||||
\hline
|
||||
\endfirsthead
|
||||
|
||||
\hline
|
||||
\rowcolor{gray!20} \textbf{Type Name} & \textbf{Field} & \textbf{Type} \\
|
||||
\hline
|
||||
\endhead
|
||||
|
||||
\hline
|
||||
\endfoot
|
||||
|
||||
\hline
|
||||
\endlastfoot
|
||||
|
||||
ServerState & NextID & () $\rightarrow$ int \\
|
||||
& GetResult & JobSpec $\rightarrow$ IO (Either Error AnalyzeResult) \\
|
||||
& GetJobSpecByRepoId & (int, int) $\rightarrow$ IO (Either Error JobSpec) \\
|
||||
& SetResult & (JobSpec, AnalyzeResult) $\rightarrow$ IO () \\
|
||||
& GetJobList & int $\rightarrow$ IO (Either Error \textbf{[AnalyzeJob]}) \\
|
||||
& GetJobInfo & JobSpec $\rightarrow$ IO (Either Error JobInfo) \\
|
||||
& SetJobInfo & (JobSpec, JobInfo) $\rightarrow$ IO () \\
|
||||
& GetStatus & JobSpec $\rightarrow$ IO (Either Error Status) \\
|
||||
& SetStatus & (JobSpec, Status) $\rightarrow$ IO () \\
|
||||
& AddJob & AnalyzeJob $\rightarrow$ IO () \\
|
||||
|
||||
\hline
|
||||
JobSpec & sessionID & int \\
|
||||
& nameWithOwner & string \\
|
||||
|
||||
\hline
|
||||
AnalyzeResult & spec & JobSpec \\
|
||||
& status & Status \\
|
||||
& resultCount & int \\
|
||||
& resultLocation & ArtifactLocation \\
|
||||
& sourceLocationPrefix & string \\
|
||||
& databaseSHA & string \\
|
||||
|
||||
\hline
|
||||
ArtifactLocation & Key & string \\
|
||||
& Bucket & string \\
|
||||
|
||||
\hline
|
||||
AnalyzeJob & Spec & JobSpec \\
|
||||
& QueryPackLocation & ArtifactLocation \\
|
||||
& QueryLanguage & QueryLanguage \\
|
||||
|
||||
\hline
|
||||
QueryLanguage & & string \\
|
||||
|
||||
\hline
|
||||
JobInfo & QueryLanguage & string \\
|
||||
& CreatedAt & string \\
|
||||
& UpdatedAt & string \\
|
||||
& SkippedRepositories & SkippedRepositories \\
|
||||
|
||||
\hline
|
||||
SkippedRepositories & AccessMismatchRepos & AccessMismatchRepos \\
|
||||
& NotFoundRepos & NotFoundRepos \\
|
||||
& NoCodeqlDBRepos & NoCodeqlDBRepos \\
|
||||
& OverLimitRepos & OverLimitRepos \\
|
||||
|
||||
\hline
|
||||
AccessMismatchRepos & RepositoryCount & int \\
|
||||
& Repositories & \textbf{[Repository]} \\
|
||||
|
||||
\hline
|
||||
NotFoundRepos & RepositoryCount & int \\
|
||||
& RepositoryFullNames & \textbf{[string]} \\
|
||||
|
||||
\hline
|
||||
Repository & ID & int \\
|
||||
& Name & string \\
|
||||
& FullName & string \\
|
||||
& Private & bool \\
|
||||
& StargazersCount & int \\
|
||||
& UpdatedAt & string \\
|
||||
|
||||
\end{longtable}
|
||||
|
||||
|
||||
\section{Symbols and Notation}
|
||||
\label{sec:orgb695d5a}
|
||||
|
||||
We define the following symbols for entities in the system:
|
||||
|
||||
\begin{center}
|
||||
\begin{tabular}{lll}
|
||||
Concept & Symbol & Description \\[0pt]
|
||||
\hline
|
||||
\href{vscode://file//Users/hohn/work-gh/mrva/gh-mrva/README.org:39:1}{Client} & \(C\) & The source of the query submission \\[0pt]
|
||||
Server & \(S\) & Manages job queue and communicates results back to the client \\[0pt]
|
||||
Job Queue & \(Q\) & Queue for managing submitted jobs \\[0pt]
|
||||
Agent & \(\alpha\) & Independently polls, executes jobs, and accumulates results \\[0pt]
|
||||
Agent Set & \(A\) & The set of all available agents \\[0pt]
|
||||
Query Suite & \(\mathcal{Q}\) & Collection of queries submitted by the client \\[0pt]
|
||||
Repository List & \(\mathcal{R}\) & Collection of repositories \\[0pt]
|
||||
\(i\)-th Repository & \(\mathcal{R}_i\) & Specific repository indexed by \(i\) \\[0pt]
|
||||
\(j\)-th Query & \(\mathcal{Q}_j\) & Specific query from the suite indexed by \(j\) \\[0pt]
|
||||
Query Result & \(r_{i,j,k_{i,j}}\) & \(k_{i,j}\)-th result from query \(j\) executed on repository \(i\) \\[0pt]
|
||||
Query Result Set & \(\mathcal{R}_i^{\mathcal{Q}_j}\) & Set of all results for query \(j\) on repository \(i\) \\[0pt]
|
||||
Accumulated Results & \(\mathcal{R}_i^{\mathcal{Q}}\) & All results from executing all queries on \(\mathcal{R}_i\) \\[0pt]
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
|
||||
|
||||
\section{Full Round-Trip Representation}
|
||||
\label{sec:full-round-trip}
|
||||
The full round-trip execution, from query submission to result delivery, can be summarized as:
|
||||
|
||||
\[
|
||||
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q
|
||||
\xrightarrow{\text{poll}}
|
||||
\alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{\mathcal{R}_i^{\mathcal{Q}}} C
|
||||
\]
|
||||
|
||||
\begin{itemize}
|
||||
\item \(C \to S\): Client submits a query suite \(\mathcal{Q}\) to the server.
|
||||
\item \(S \to Q\): Server enqueues the query suite \((\mathcal{Q}, \mathcal{R}_i)\) for each repository.
|
||||
\item \(Q \to \alpha\): Agent \(\alpha\) polls the queue and retrieves a job.
|
||||
\item \(\alpha \to S\): Agent executes the queries and returns the accumulated results \(\mathcal{R}_i^{\mathcal{Q}}\) to the server.
|
||||
\item \(S \to C\): Server sends the complete result set \(\mathcal{R}_i^{\mathcal{Q}}\) for each repository back to the client.
|
||||
\end{itemize}
|
||||
|
||||
\section{Result Representation}
|
||||
|
||||
For the complete collection of results across all repositories and queries:
|
||||
\[
|
||||
\mathcal{R}^{\mathcal{Q}} = \bigcup_{i=1}^{N} \bigcup_{j=1}^{M}
|
||||
\left\{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \right\}
|
||||
\]
|
||||
|
||||
where:
|
||||
\begin{itemize}
|
||||
\item \(N\) is the total number of repositories.
|
||||
\item \(M\) is the total number of queries in \(\mathcal{Q}\).
|
||||
\item \(k_{i,j}\) is the number of results from executing query
|
||||
\(\mathcal{Q}_j\)
|
||||
on repository \(\mathcal{R}_i\).
|
||||
\end{itemize}
|
||||
|
||||
An individual result from the \(i\)-th repository, \(j\)-th query, and \(k\)-th result is:
|
||||
\[
|
||||
r_{i,j,k}
|
||||
\]
|
||||
|
||||
|
||||
|
||||
\[
|
||||
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q \xrightarrow{\text{dispatch}} \alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{r_{i,j}} C
|
||||
\]
|
||||
|
||||
Each result can be further indexed to track multiple repositories and result sets.
|
||||
|
||||
\section{Execution Loop in Pseudo-Code}
|
||||
\begin{listing}[H] % h = here, t = top, b = bottom, p = page of floats
|
||||
\caption{Distributed Query Execution Algorithm}
|
||||
|
||||
\begin{lstlisting}[language=Python]
|
||||
# Distributed Query Execution with Agent Polling and Accumulated Results
|
||||
|
||||
# Initialization
|
||||
$\mathcal{R}$ = set() # Repository list
|
||||
$Q$ = [] # Job queue
|
||||
$A$ = set() # Set of agents
|
||||
$\mathcal{R}_i^{\mathcal{Q}}$ = {} # Result storage for each repository
|
||||
|
||||
# Initialize result sets for each repository
|
||||
for $R_i$ in $\mathcal{R}$:
|
||||
$\mathcal{R}_i^{\mathcal{Q}} = \{\}$ # Initialize empty result set
|
||||
|
||||
# Enqueue the entire query suite for all repositories
|
||||
for $R_i$ in $\mathcal{R}$:
|
||||
$Q$.append(($\mathcal{Q}$, $R_i$)) # Enqueue $(\mathcal{Q}, \mathcal{R}_i)$ pair
|
||||
|
||||
# Processing loop while there are jobs in the queue
|
||||
while $Q \neq \emptyset$:
|
||||
# Agents autonomously poll the queue
|
||||
for $\alpha$ in $A$:
|
||||
if $\alpha$.is_available():
|
||||
$(\mathcal{Q}, \mathcal{R}_i)$ = $Q$.pop(0) # Agent polls a job
|
||||
|
||||
# Agent execution begins
|
||||
$\mathcal{R}_i^{\mathcal{Q}} = \{\}$ # Initialize results for repository $R_i$
|
||||
|
||||
for $\mathcal{Q}_j$ in $\mathcal{Q}$:
|
||||
# Execute query $\mathcal{Q}_j$ on repository $\mathcal{R}_i$
|
||||
$r_{i,j,1}, \dots, r_{i,j,k_{i,j}}$ = $\alpha$.execute($\mathcal{Q}_j$, $R_i$)
|
||||
|
||||
# Store results for query $j$
|
||||
$\mathcal{R}_i^{\mathcal{Q}_j} = \{r_{i,j,1}, \dots, r_{i,j,k_{i,j}}\}$
|
||||
|
||||
# Accumulate results
|
||||
$\mathcal{R}_i^{\mathcal{Q}} = \mathcal{R}_i^{\mathcal{Q}} \cup \mathcal{R}_i^{\mathcal{Q}_j}$
|
||||
|
||||
# Send all accumulated results back to the server
|
||||
$\alpha$.send_results($S$, ($\mathcal{Q}$, $R_i$, $\mathcal{R}_i^{\mathcal{Q}}$))
|
||||
|
||||
# Server sends results for $(\mathcal{Q}, \mathcal{R}_i)$ back to the client
|
||||
$S$.send_results_to_client($C$, ($\mathcal{Q}$, $R_i$, $\mathcal{R}_i^{\mathcal{Q}}$))
|
||||
\end{lstlisting}
|
||||
\end{listing}
|
||||
\FloatBarrier
|
||||
|
||||
\section{Execution Loop in Pseudo-Code, declarative}
|
||||
\begin{listing}[H] % h = here, t = top, b = bottom, p = page of floats
|
||||
\caption{Distributed Query Execution Algorithm}
|
||||
|
||||
\begin{lstlisting}[language=Python]
|
||||
# Distributed Query Execution with Agent Polling and Accumulated Results
|
||||
|
||||
# Define initial state
|
||||
$\mathcal{R}$: set # Set of repositories
|
||||
$\mathcal{Q}$: set # Set of queries
|
||||
A: set # Set of agents
|
||||
Q: list # Queue of $(\mathcal{Q}, \mathcal{R}_i)$ pairs
|
||||
$\mathcal{R}_{\text{results}}$: dict = {} # Mapping of repositories to their accumulated query results
|
||||
|
||||
# Initialize result sets for each repository
|
||||
$\mathcal{R}_{\text{results}}$ = {$\mathcal{R}_i$: set() for $\mathcal{R}_i$ in $\mathcal{R}$}
|
||||
|
||||
# Define job queue as an immutable mapping
|
||||
Q = [($\mathcal{Q}$, $\mathcal{R}_i$) for $\mathcal{R}_i$ in $\mathcal{R}$]
|
||||
|
||||
# Processing as a declarative iteration over the job queue
|
||||
def execute_queries(agents, job_queue, repository_results):
|
||||
def available_agents():
|
||||
return {$\alpha$ for $\alpha$ in agents if $\alpha$.is_available()}
|
||||
|
||||
def process_job($\mathcal{Q}$, $\mathcal{R}_i$, $\alpha$):
|
||||
results = {$\mathcal{Q}_j$: $\alpha$.execute($\mathcal{Q}_j$, $\mathcal{R}_i$) for $\mathcal{Q}_j$ in $\mathcal{Q}$}
|
||||
return $\mathcal{R}_i$, results
|
||||
|
||||
def accumulate_results($\mathcal{R}_{\text{results}}$, $\mathcal{R}_i$, query_results):
|
||||
return {**$\mathcal{R}_{\text{results}}$, $\mathcal{R}_i$: $\mathcal{R}_{\text{results}}$[$\mathcal{R}_i$] | set().union(*query_results.values())}
|
||||
|
||||
while job_queue:
|
||||
active_agents = available_agents()
|
||||
for $\alpha$ in active_agents:
|
||||
$\mathcal{Q}$, $\mathcal{R}_i$ = job_queue[0] # Peek at the first job
|
||||
_, query_results = process_job($\mathcal{Q}$, $\mathcal{R}_i$, $\alpha$)
|
||||
repository_results = accumulate_results(repository_results, $\mathcal{R}_i$, query_results)
|
||||
|
||||
$\alpha$.send_results(S, ($\mathcal{Q}$, $\mathcal{R}_i$, repository_results[$\mathcal{R}_i$]))
|
||||
S.send_results_to_client(C, ($\mathcal{Q}$, $\mathcal{R}_i$, repository_results[$\mathcal{R}_i$]))
|
||||
|
||||
job_queue = job_queue[1:] # Move to the next job
|
||||
|
||||
return repository_results
|
||||
|
||||
# Execute the distributed query process
|
||||
$\mathcal{R}_{\text{results}}$ = execute_queries(A, Q, $\mathcal{R}_{\text{results}}$)
|
||||
\end{lstlisting}
|
||||
\end{listing}
|
||||
\FloatBarrier
|
||||
|
||||
\newpage{}
|
||||
\section{Execution Loop in Pseudo-Code, algorithmic}
|
||||
\begin{algorithm}
|
||||
\caption{Distribute a set of queries $\mathcal{Q}$ across repositories
|
||||
$\mathcal{R}$ using agents $A$}
|
||||
\begin{algorithmic}[1] % Line numbering enabled
|
||||
\Procedure{DistributedQueryExecution}{$\mathcal{Q}, \mathcal{R}, A$}
|
||||
|
||||
\ForAll{$\mathcal{R}_i \in \mathcal{R}$}
|
||||
\Comment{Initialize result sets for each repository and query}
|
||||
\State $\mathcal{R}_i^{\mathcal{Q}} \gets \left\{ \, \right\}$
|
||||
\EndFor
|
||||
|
||||
\State $Q \gets \left\{ \, \right\}$ \Comment{Initialize empty job queue}
|
||||
|
||||
\ForAll{$\mathcal{R}_i \in \mathcal{R}$}
|
||||
\Comment{Enqueue the entire query suite across all repositories}
|
||||
\State $S \xrightarrow{\text{enqueue}(\mathcal{Q}, \mathcal{R}_i)} Q$
|
||||
\EndFor
|
||||
|
||||
\While{$Q \neq \emptyset$}
|
||||
\Comment{Agents poll the queue for available jobs}
|
||||
|
||||
\ForAll{$\alpha \in A$ \textbf{where} $\alpha$ \text{is available}}
|
||||
\State $\alpha \xleftarrow{\text{poll}(Q)}$ \Comment{Agent autonomously retrieves a job}
|
||||
|
||||
% --- Begin Agent Execution Block ---
|
||||
\State \textbf{\raisebox{0.5ex}{\rule{25em}{0.7pt}}} \Comment{Agent Execution Begins}
|
||||
|
||||
|
||||
\State $\mathcal{R}_i^{\mathcal{Q}} \gets \left\{ \, \right\}$ \Comment{Initialize result set for this repository}
|
||||
|
||||
\ForAll{$\mathcal{Q}_j \in \mathcal{Q}$}
|
||||
\State $\mathcal{R}_i^{\mathcal{Q}_j} \gets \left\{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \right\}$
|
||||
\Comment{Collect results for query $j$ on repository $i$}
|
||||
|
||||
\State $\mathcal{R}_i^{\mathcal{Q}} \gets \mathcal{R}_i^{\mathcal{Q}}
|
||||
\cup \mathcal{R}_i^{\mathcal{Q}_j}$
|
||||
\Comment{Accumulate results}
|
||||
\EndFor
|
||||
|
||||
\State $\alpha \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} S$
|
||||
\Comment{Agent sends all accumulated results back to server}
|
||||
|
||||
\State \textbf{\raisebox{0.5ex}{\rule{25em}{0.7pt}}} \Comment{Agent
|
||||
Execution Ends}
|
||||
% --- End Agent Execution Block ---
|
||||
|
||||
\State $S \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} C$
|
||||
\Comment{Server sends results for repository $i$ back to the client}
|
||||
|
||||
\EndFor
|
||||
|
||||
\EndWhile
|
||||
|
||||
\EndProcedure
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
\FloatBarrier
|
||||
|
||||
\section{Execution Loop in Pseudo-Code, hybrid}
|
||||
\label{sec:orgb767ab2}
|
||||
{\textbf{Algorithm:} Distribute a set of queries \(\mathcal{Q}\) across repositories \(\mathcal{R}\) using agents \(A\)}
|
||||
|
||||
\begin{enumerate}
|
||||
\item \textbf{\textbf{Initialization}}
|
||||
\begin{itemize}
|
||||
\item For each repository \(\mathcal{R}_i \in \mathcal{R}\):
|
||||
\begin{itemize}
|
||||
\item Initialize result sets: \(\mathcal{R}_i^{\mathcal{Q}} \gets \{\}\).
|
||||
\end{itemize}
|
||||
\item Initialize an empty job queue: \(Q \gets \{\}\).
|
||||
\end{itemize}
|
||||
|
||||
\item \textbf{\textbf{Enqueue Queries}}
|
||||
\begin{itemize}
|
||||
\item For each repository \(\mathcal{R}_i \in \mathcal{R}\):
|
||||
\begin{itemize}
|
||||
\item Enqueue the entire query suite: \(S \xrightarrow{\text{enqueue}(\mathcal{Q}, \mathcal{R}_i)} Q\).
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\item \textbf{\textbf{Execution Loop}}
|
||||
\begin{itemize}
|
||||
\item While \(Q \neq \emptyset\): (agents poll the queue for available jobs)
|
||||
\begin{itemize}
|
||||
\item For each available agent \(\alpha \in A\):
|
||||
\begin{itemize}
|
||||
\item Agent autonomously retrieves a job: \(\alpha \xleftarrow{\text{poll}(Q)}\).
|
||||
|
||||
\item \textbf{\textbf{Agent Execution Block}}
|
||||
\begin{itemize}
|
||||
\item Initialize result set for this repository: \(\mathcal{R}_i^{\mathcal{Q}} \gets \{\}\).
|
||||
\item For each query \(\mathcal{Q}_j \in \mathcal{Q}\):
|
||||
\begin{itemize}
|
||||
\item Collect results:
|
||||
\(\mathcal{R}_i^{\mathcal{Q}_j} \gets \{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \}\).
|
||||
\item Accumulate results:
|
||||
\(\mathcal{R}_i^{\mathcal{Q}} \gets \mathcal{R}_i^{\mathcal{Q}} \cup \mathcal{R}_i^{\mathcal{Q}_j}\).
|
||||
\end{itemize}
|
||||
\item Agent sends all accumulated results back to the server:
|
||||
\(\alpha \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} S\).
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\item \textbf{\textbf{Agent Sends Results}}
|
||||
\begin{itemize}
|
||||
\item Server sends results for repository \(i\) back to the client:
|
||||
\(S \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} C\).
|
||||
\end{itemize}
|
||||
\end{enumerate}
|
||||
|
||||
\end{document}
|
||||
|
||||
%%% Local Variables:
|
||||
%%% mode: LaTeX
|
||||
%%% TeX-master: t
|
||||
%%% TeX-engine: luatex
|
||||
%%% TeX-command-extra-options: "-synctex=1 -shell-escape -interaction=nonstopmode"
|
||||
%%% End:
|
||||
56
doc/mrva.dot
Normal file
56
doc/mrva.dot
Normal file
@@ -0,0 +1,56 @@
|
||||
digraph mrvacommander {
|
||||
rankdir=LR;
|
||||
node [shape=box style=filled fillcolor=lightgrey fontname="monospace"];
|
||||
|
||||
// Entry points
|
||||
cmd_server [label="cmd/server\nmain()", fillcolor=lightblue];
|
||||
cmd_agent [label="cmd/agent\nmain()", fillcolor=lightblue];
|
||||
|
||||
// Config
|
||||
config [label="config/mcc\nparseEnv()", shape=ellipse, fillcolor=lightyellow];
|
||||
|
||||
// Server-side
|
||||
server [label="pkg/server\nServer.Run()"];
|
||||
deploy [label="pkg/deploy\nInit()"];
|
||||
qldbstore [label="pkg/qldbstore\nQLDB Store"];
|
||||
artifactstore [label="pkg/artifactstore\nArtifact Store"];
|
||||
queue [label="pkg/queue\nQueue Interface"];
|
||||
|
||||
// Agent-side
|
||||
agent [label="pkg/agent\nAgent.Run()"];
|
||||
state [label="pkg/state\nState"];
|
||||
codeql [label="pkg/codeql\nrunCodeQL()"];
|
||||
|
||||
// Common
|
||||
common [label="pkg/common\nTypes, MinIO, Jobs"];
|
||||
utils [label="utils\nDownload, Archive"];
|
||||
|
||||
// Edges: config used by both
|
||||
cmd_server -> config;
|
||||
cmd_agent -> config;
|
||||
|
||||
// Server wiring
|
||||
cmd_server -> server;
|
||||
server -> queue;
|
||||
server -> artifactstore;
|
||||
server -> qldbstore;
|
||||
|
||||
// Agent wiring
|
||||
cmd_agent -> agent;
|
||||
agent -> queue;
|
||||
agent -> codeql;
|
||||
agent -> artifactstore;
|
||||
agent -> state;
|
||||
|
||||
// Shared deps
|
||||
server -> common;
|
||||
agent -> common;
|
||||
codeql -> common;
|
||||
qldbstore -> common;
|
||||
artifactstore -> common;
|
||||
|
||||
// Utils used by backends
|
||||
qldbstore -> utils;
|
||||
artifactstore -> utils;
|
||||
codeql -> utils;
|
||||
}
|
||||
84
doc/mrva.man
Normal file
84
doc/mrva.man
Normal file
@@ -0,0 +1,84 @@
|
||||
.TH MRVACOMMANDER 7 "April 2025" "MRVA Project" "System Overview"
|
||||
.SH NAME
|
||||
mrvacommander \- distributed CodeQL task queue and execution system
|
||||
.SH SYNOPSIS
|
||||
.B server
|
||||
.RI [ environment ]
|
||||
.br
|
||||
.B agent
|
||||
.RI [ environment ]
|
||||
.SH DESCRIPTION
|
||||
mrvacommander coordinates analysis jobs over multiple worker nodes using queues, pluggable storage, and CodeQL execution. It consists of multiple interacting packages and entry points.
|
||||
|
||||
.SH STRUCTURE
|
||||
.TP
|
||||
.B cmd/server
|
||||
Entry point. Loads configuration, initializes dependencies, runs queue subscriber with a dispatcher.
|
||||
.TP
|
||||
.B cmd/agent
|
||||
Entry point. Loads configuration, runs a processing loop: receive job, execute query, save result, update state.
|
||||
|
||||
.SH CONFIGURATION
|
||||
.TP
|
||||
.B config/mcc
|
||||
Parses environment variables into structured configuration. Modules include:
|
||||
.IR queue ,
|
||||
.IR storage ,
|
||||
.IR logger ,
|
||||
.IR commander .
|
||||
|
||||
.SH SERVER SIDE MODULES
|
||||
.TP
|
||||
.B pkg/server
|
||||
Initializes:
|
||||
queue backend
|
||||
QLDB store
|
||||
artifact store
|
||||
|
||||
Subscribes to queue and dispatches jobs to handler.
|
||||
.TP
|
||||
.B pkg/deploy
|
||||
Deployment helpers: validate environment variables, bootstrap key services.
|
||||
|
||||
.SH AGENT SIDE MODULES
|
||||
.TP
|
||||
.B pkg/agent
|
||||
Receives jobs, executes CodeQL queries, stores outputs, marks completion.
|
||||
.TP
|
||||
.B pkg/state
|
||||
Tracks which jobs have been completed. Local file-backed.
|
||||
|
||||
.SH SHARED MODULES
|
||||
.TP
|
||||
.B pkg/common
|
||||
Core types: Job, JobOutput, NameWithOwner, Query.
|
||||
Includes MinIO wrappers, external API access, and job spec parsing.
|
||||
.TP
|
||||
.B pkg/codeql
|
||||
Defines query structure and executes CodeQL against a database.
|
||||
.TP
|
||||
.B pkg/qldbstore
|
||||
Provides read-only access to CodeQL databases via:
|
||||
- MinIO (S3)
|
||||
- HTTP (hepc)
|
||||
- Filesystem
|
||||
.TP
|
||||
.B pkg/artifactstore
|
||||
Persists job results. Implementations:
|
||||
- MinIO
|
||||
- Memory
|
||||
.TP
|
||||
.B pkg/queue
|
||||
Job queue interface. Implementations:
|
||||
- RabbitMQ
|
||||
- In-memory single-node
|
||||
.TP
|
||||
.B utils
|
||||
Generic helpers:
|
||||
- HTTP download
|
||||
- tar.gz extraction
|
||||
|
||||
.SH SEE ALSO
|
||||
.BR codeql (1),
|
||||
.BR rabbitmq-server (1),
|
||||
.BR minio (1)
|
||||
BIN
doc/mrva.pdf
Normal file
BIN
doc/mrva.pdf
Normal file
Binary file not shown.
@@ -4,8 +4,8 @@ services:
|
||||
dbssvc:
|
||||
## image: ghcr.io/hohn/dbsdata-container:0.1.24
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./demo/containers/dbsdata/Dockerfile
|
||||
context: ./demo/containers/dbsdata
|
||||
dockerfile: Dockerfile
|
||||
container_name: dbssvc
|
||||
volumes:
|
||||
- dbsdata:/data/mrvacommander/dbstore-data
|
||||
|
||||
22
experimental/qldb-specification/readme.org
Normal file
22
experimental/qldb-specification/readme.org
Normal file
@@ -0,0 +1,22 @@
|
||||
* tuple hashing functions across languages
|
||||
There are three parallel implementations of a hash for every entry of a tuple
|
||||
list. The functions produce identical results across 3 languages and can be
|
||||
used across agent / server / client.
|
||||
|
||||
#+BEGIN_SRC sh
|
||||
hohn@ghm3 ~/work-gh/mrva/mrvacommander/experimental/qldb-specification
|
||||
0:$ node tuple-hash.js
|
||||
[
|
||||
'91b80a9933218ff5bc62df8ff71f1252',
|
||||
'b0934b29293e91aefaac73c99fc75e94'
|
||||
]
|
||||
|
||||
hohn@ghm3 ~/work-gh/mrva/mrvacommander/experimental/qldb-specification
|
||||
0:$ python3 tuple-hash.py
|
||||
['91b80a9933218ff5bc62df8ff71f1252', 'b0934b29293e91aefaac73c99fc75e94']
|
||||
|
||||
hohn@ghm3 ~/work-gh/mrva/mrvacommander/experimental/qldb-specification
|
||||
0:$ go run tuple-hash.go
|
||||
[91b80a9933218ff5bc62df8ff71f1252 b0934b29293e91aefaac73c99fc75e94]
|
||||
#+END_SRC
|
||||
|
||||
28
experimental/qldb-specification/tuple-hash.go
Normal file
28
experimental/qldb-specification/tuple-hash.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
func main() {
|
||||
atl_L := [][2]interface{}{
|
||||
{1, "s1"},
|
||||
{2, "str"},
|
||||
}
|
||||
|
||||
var sl_hash []string
|
||||
|
||||
for _, item := range atl_L {
|
||||
jsonBytes, err := json.Marshal(item)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
sum := md5.Sum(jsonBytes)
|
||||
sl_hash = append(sl_hash, hex.EncodeToString(sum[:]))
|
||||
}
|
||||
|
||||
fmt.Println(sl_hash)
|
||||
}
|
||||
9
experimental/qldb-specification/tuple-hash.js
Normal file
9
experimental/qldb-specification/tuple-hash.js
Normal file
@@ -0,0 +1,9 @@
|
||||
const crypto = require("crypto");
|
||||
|
||||
const atl_L = [[1, "s1"], [2, "str"]];
|
||||
const sl_hash = atl_L.map(item => {
|
||||
const json = JSON.stringify(item);
|
||||
return crypto.createHash("md5").update(json).digest("hex");
|
||||
});
|
||||
|
||||
console.log(sl_hash);
|
||||
12
experimental/qldb-specification/tuple-hash.py
Normal file
12
experimental/qldb-specification/tuple-hash.py
Normal file
@@ -0,0 +1,12 @@
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
atl_L = [(1, "s1"), (2, "str")]
|
||||
sl_hash = []
|
||||
|
||||
for item in atl_L:
|
||||
encoded = json.dumps(item, separators=(',', ':')).encode("utf-8")
|
||||
md5sum = hashlib.md5(encoded).hexdigest()
|
||||
sl_hash.append(md5sum)
|
||||
|
||||
print(sl_hash)
|
||||
11
go.mod
11
go.mod
@@ -4,33 +4,25 @@ go 1.22.0
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v1.4.0
|
||||
github.com/elastic/go-sysinfo v1.14.0
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/gorilla/mux v1.8.1
|
||||
github.com/jackc/pgx/v5 v5.6.0
|
||||
github.com/minio/minio-go/v7 v7.0.71
|
||||
github.com/rabbitmq/amqp091-go v1.10.0
|
||||
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
gorm.io/driver/postgres v1.5.9
|
||||
gorm.io/gorm v1.25.10
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/elastic/go-windows v1.0.1 // indirect
|
||||
github.com/goccy/go-json v0.10.2 // indirect
|
||||
github.com/jackc/pgpassfile v1.0.0 // indirect
|
||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
||||
github.com/jackc/pgx/v5 v5.6.0 // indirect
|
||||
github.com/jackc/puddle/v2 v2.2.1 // indirect
|
||||
github.com/jinzhu/inflection v1.0.0 // indirect
|
||||
github.com/jinzhu/now v1.1.5 // indirect
|
||||
github.com/klauspost/compress v1.17.6 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.2.6 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/minio/md5-simd v1.1.2 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/prometheus/procfs v0.15.1 // indirect
|
||||
github.com/rogpeppe/go-internal v1.12.0 // indirect
|
||||
github.com/rs/xid v1.5.0 // indirect
|
||||
golang.org/x/crypto v0.24.0 // indirect
|
||||
@@ -39,5 +31,4 @@ require (
|
||||
golang.org/x/sys v0.21.0 // indirect
|
||||
golang.org/x/text v0.16.0 // indirect
|
||||
gopkg.in/ini.v1 v1.67.0 // indirect
|
||||
howett.net/plist v1.0.1 // indirect
|
||||
)
|
||||
|
||||
24
go.sum
24
go.sum
@@ -6,14 +6,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/elastic/go-sysinfo v1.14.0 h1:dQRtiqLycoOOla7IflZg3aN213vqJmP0lpVpKQ9lUEY=
|
||||
github.com/elastic/go-sysinfo v1.14.0/go.mod h1:FKUXnZWhnYI0ueO7jhsGV3uQJ5hiz8OqM5b3oGyaRr8=
|
||||
github.com/elastic/go-windows v1.0.1 h1:AlYZOldA+UJ0/2nBuqWdo90GFCgG9xuyw9SYzGUtJm0=
|
||||
github.com/elastic/go-windows v1.0.1/go.mod h1:FoVvqWSun28vaDQPbj2Elfc0JahhPB7WQEGa3c814Ss=
|
||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
||||
@@ -26,11 +20,6 @@ github.com/jackc/pgx/v5 v5.6.0 h1:SWJzexBzPL5jb0GEsrPMLIsi/3jOo7RHlzTjcAeDrPY=
|
||||
github.com/jackc/pgx/v5 v5.6.0/go.mod h1:DNZ/vlrUnhWCoFGxHAG8U2ljioxukquj7utPDgtQdTw=
|
||||
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
|
||||
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
|
||||
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
|
||||
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
||||
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
|
||||
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
|
||||
github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
|
||||
github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI=
|
||||
github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
|
||||
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||
@@ -44,13 +33,8 @@ github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
|
||||
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
|
||||
github.com/minio/minio-go/v7 v7.0.71 h1:No9XfOKTYi6i0GnBj+WZwD8WP5GZfL7n7GOjRqCdAjA=
|
||||
github.com/minio/minio-go/v7 v7.0.71/go.mod h1:4yBA8v80xGA30cfM3fz0DKYMXunWl/AV/6tWEs9ryzo=
|
||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
|
||||
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
|
||||
github.com/rabbitmq/amqp091-go v1.10.0 h1:STpn5XsHlHGcecLmMFCtg7mqq0RnD+zFr4uzukfVhBw=
|
||||
github.com/rabbitmq/amqp091-go v1.10.0/go.mod h1:Hy4jKW5kQART1u+JkDTF9YYOQUHXqMuhrgxOEeS7G4o=
|
||||
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
|
||||
@@ -72,7 +56,6 @@ golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
|
||||
golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
|
||||
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
|
||||
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
|
||||
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
@@ -83,13 +66,6 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntN
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
|
||||
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
|
||||
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gorm.io/driver/postgres v1.5.9 h1:DkegyItji119OlcaLjqN11kHoUgZ/j13E0jkJZgD6A8=
|
||||
gorm.io/driver/postgres v1.5.9/go.mod h1:DX3GReXH+3FPWGrrgffdvCk3DQ1dwDPdmbenSkweRGI=
|
||||
gorm.io/gorm v1.25.10 h1:dQpO+33KalOA+aFYGlK+EfxcI5MbO7EP2yYygwh9h+s=
|
||||
gorm.io/gorm v1.25.10/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8=
|
||||
howett.net/plist v1.0.1 h1:37GdZ8tP09Q35o9ych3ehygcsL+HqKSwzctveSlarvM=
|
||||
howett.net/plist v1.0.1/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
|
||||
|
||||
@@ -1,7 +1,16 @@
|
||||
{
|
||||
"folders": [
|
||||
{
|
||||
"name": "mrvaagent",
|
||||
"path": "../mrvaagent"
|
||||
},
|
||||
{
|
||||
"name": "mrvacommander",
|
||||
"path": "."
|
||||
},
|
||||
{
|
||||
"name": "mrvaserver",
|
||||
"path": "../mrvaserver"
|
||||
}
|
||||
],
|
||||
"settings": {
|
||||
@@ -10,4 +19,4 @@
|
||||
"makefile.configureOnOpen": false,
|
||||
"git.ignoreLimitWarning": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
## The notes/ directory
|
||||
The `notes/` directory serves as staging directory for documentation. This is
|
||||
the place to develop documentation and short notes. The contents of this
|
||||
directory should be accessible to
|
||||
1. The note authors and
|
||||
2. Developers of the project
|
||||
|
||||
It need not be meaningful to casual users.
|
||||
|
||||
9
notes/README.org
Normal file
9
notes/README.org
Normal file
@@ -0,0 +1,9 @@
|
||||
* The notes/ directory
|
||||
The =notes/= directory serves as a staging directory for documentation.
|
||||
This is the place to develop documentation and short notes.
|
||||
|
||||
The contents of this directory should be accessible to:
|
||||
1. The note authors
|
||||
2. Developers of the project
|
||||
|
||||
It need not be meaningful to casual users.
|
||||
@@ -508,7 +508,7 @@
|
||||
code .
|
||||
#+END_SRC
|
||||
|
||||
Set up 'variant analysis repositories', continuin from the
|
||||
Set up 'variant analysis repositories', continuing from the
|
||||
=scratch/vscode-selection.json= file formed previously:
|
||||
1. Select '{}' and open db selection file
|
||||
2. paste
|
||||
|
||||
BIN
notes/dwg-r1.png
Normal file
BIN
notes/dwg-r1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 48 KiB |
BIN
notes/dwg-r2.png
Normal file
BIN
notes/dwg-r2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 58 KiB |
146
notes/dwg-r2.svg
Normal file
146
notes/dwg-r2.svg
Normal file
@@ -0,0 +1,146 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<!-- Generated by graphviz version 12.2.1 (20241206.2353)
|
||||
-->
|
||||
<!-- Title: MRVA_Containers Pages: 1 -->
|
||||
<svg width="659pt" height="315pt"
|
||||
viewBox="0.00 0.00 659.00 315.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 311)">
|
||||
<title>MRVA_Containers</title>
|
||||
<polygon fill="white" stroke="none" points="-4,4 -4,-311 655,-311 655,4 -4,4"/>
|
||||
<text text-anchor="middle" x="325.5" y="-284" font-family="Helvetica,sans-Serif" font-size="20.00">Container Dependencies for MRVA</text>
|
||||
<!-- mrvastore_init -->
|
||||
<g id="node1" class="node">
|
||||
<title>mrvastore_init</title>
|
||||
<polygon fill="lightblue" stroke="none" points="239.38,-247 239.38,-270.75 338.12,-270.75 338.12,-247 239.38,-247"/>
|
||||
<text text-anchor="start" x="243.38" y="-254.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">mrvastore-init</text>
|
||||
<text text-anchor="start" x="243.38" y="-233.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: minio/mc</text>
|
||||
<polygon fill="none" stroke="black" points="238.38,-226 238.38,-271.75 339.12,-271.75 339.12,-226 238.38,-226"/>
|
||||
</g>
|
||||
<!-- mrvastore -->
|
||||
<g id="node2" class="node">
|
||||
<title>mrvastore</title>
|
||||
<polygon fill="lightblue" stroke="none" points="401.5,-241 401.5,-264.75 642,-264.75 642,-241 401.5,-241"/>
|
||||
<text text-anchor="start" x="488.75" y="-248.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">mrvastore</text>
|
||||
<text text-anchor="start" x="405.5" y="-227.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: minio/minio:RELEASE.2024-06-11T03-13-30Z</text>
|
||||
<polygon fill="none" stroke="black" points="400.5,-220 400.5,-265.75 643,-265.75 643,-220 400.5,-220"/>
|
||||
</g>
|
||||
<!-- mrvastore_init->mrvastore -->
|
||||
<g id="edge1" class="edge">
|
||||
<title>mrvastore_init->mrvastore</title>
|
||||
<path fill="none" stroke="black" d="M346.85,-247.39C359.13,-247.07 372.61,-246.72 386.46,-246.36"/>
|
||||
<polygon fill="black" stroke="black" points="386.38,-248.12 391.33,-246.24 386.28,-244.62 386.38,-248.12"/>
|
||||
</g>
|
||||
<!-- client_ghmrva -->
|
||||
<g id="node3" class="node">
|
||||
<title>client_ghmrva</title>
|
||||
<polygon fill="lightblue" stroke="none" points="9,-127 9,-150.75 176,-150.75 176,-127 9,-127"/>
|
||||
<text text-anchor="start" x="47.88" y="-134.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">client-ghmrva</text>
|
||||
<text text-anchor="start" x="13" y="-113.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: client-ghmrva-container:0.4.0</text>
|
||||
<polygon fill="none" stroke="black" points="8,-106 8,-151.75 177,-151.75 177,-106 8,-106"/>
|
||||
</g>
|
||||
<!-- server -->
|
||||
<g id="node7" class="node">
|
||||
<title>server</title>
|
||||
<polygon fill="lightblue" stroke="none" points="230,-103 230,-126.75 347.5,-126.75 347.5,-103 230,-103"/>
|
||||
<text text-anchor="start" x="268.5" y="-110.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">server</text>
|
||||
<text text-anchor="start" x="234" y="-89.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: mrva-server:0.4.0</text>
|
||||
<polygon fill="none" stroke="black" points="229,-58 229,-127.75 348.5,-127.75 348.5,-58 229,-58"/>
|
||||
</g>
|
||||
<!-- client_ghmrva->server -->
|
||||
<g id="edge10" class="edge">
|
||||
<title>client_ghmrva->server</title>
|
||||
<path fill="none" stroke="black" d="M184.94,-111.93C194.95,-110.07 205.04,-108.2 214.78,-106.4"/>
|
||||
<polygon fill="black" stroke="black" points="215.04,-108.13 219.64,-105.5 214.41,-104.69 215.04,-108.13"/>
|
||||
</g>
|
||||
<!-- code_server -->
|
||||
<g id="node4" class="node">
|
||||
<title>code_server</title>
|
||||
<polygon fill="lightblue" stroke="none" points="12.38,-55 12.38,-78.75 172.62,-78.75 172.62,-55 12.38,-55"/>
|
||||
<text text-anchor="start" x="54.25" y="-62.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">code-server</text>
|
||||
<text text-anchor="start" x="16.38" y="-41.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: code-server-initialized:0.4.0</text>
|
||||
<polygon fill="none" stroke="black" points="11.38,-34 11.38,-79.75 173.62,-79.75 173.62,-34 11.38,-34"/>
|
||||
</g>
|
||||
<!-- code_server->server -->
|
||||
<g id="edge9" class="edge">
|
||||
<title>code_server->server</title>
|
||||
<path fill="none" stroke="black" d="M181.53,-73.19C192.69,-75.26 204.01,-77.36 214.9,-79.37"/>
|
||||
<polygon fill="black" stroke="black" points="214.53,-81.09 219.77,-80.28 215.17,-77.64 214.53,-81.09"/>
|
||||
</g>
|
||||
<!-- hepc -->
|
||||
<g id="node5" class="node">
|
||||
<title>hepc</title>
|
||||
<polygon fill="lightblue" stroke="none" points="444.62,-169 444.62,-192.75 598.88,-192.75 598.88,-169 444.62,-169"/>
|
||||
<text text-anchor="start" x="506" y="-176.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">hepc</text>
|
||||
<text text-anchor="start" x="448.62" y="-155.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: mrva-hepc-container:0.4.0</text>
|
||||
<polygon fill="none" stroke="black" points="443.62,-148 443.62,-193.75 599.88,-193.75 599.88,-148 443.62,-148"/>
|
||||
</g>
|
||||
<!-- rabbitmq -->
|
||||
<g id="node6" class="node">
|
||||
<title>rabbitmq</title>
|
||||
<polygon fill="lightblue" stroke="none" points="448.75,-97 448.75,-120.75 594.75,-120.75 594.75,-97 448.75,-97"/>
|
||||
<text text-anchor="start" x="492.5" y="-104.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">rabbitmq</text>
|
||||
<text text-anchor="start" x="452.75" y="-83.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: rabbitmq:3-management</text>
|
||||
<polygon fill="none" stroke="black" points="447.75,-76 447.75,-121.75 595.75,-121.75 595.75,-76 447.75,-76"/>
|
||||
</g>
|
||||
<!-- server->mrvastore -->
|
||||
<g id="edge3" class="edge">
|
||||
<title>server->mrvastore</title>
|
||||
<path fill="none" stroke="black" d="M347.34,-131.55C350.58,-134.55 353.67,-137.67 356.5,-140.88 378.62,-165.92 366.36,-186.06 392.5,-206.88 395.16,-208.99 397.93,-210.98 400.8,-212.86"/>
|
||||
<polygon fill="black" stroke="black" points="399.6,-214.17 404.78,-215.31 401.44,-211.2 399.6,-214.17"/>
|
||||
</g>
|
||||
<!-- server->hepc -->
|
||||
<g id="edge5" class="edge">
|
||||
<title>server->hepc</title>
|
||||
<path fill="none" stroke="black" d="M356.48,-121.44C368.39,-126.17 380.74,-130.84 392.5,-134.88 404.44,-138.97 417.11,-142.96 429.67,-146.7"/>
|
||||
<polygon fill="black" stroke="black" points="429.07,-148.35 434.37,-148.08 430.06,-144.99 429.07,-148.35"/>
|
||||
</g>
|
||||
<!-- server->rabbitmq -->
|
||||
<g id="edge2" class="edge">
|
||||
<title>server->rabbitmq</title>
|
||||
<path fill="none" stroke="black" d="M356.25,-94.6C380.34,-95.23 407.91,-95.94 433.51,-96.61"/>
|
||||
<polygon fill="black" stroke="black" points="433.41,-98.36 438.46,-96.74 433.5,-94.86 433.41,-98.36"/>
|
||||
</g>
|
||||
<!-- postgres -->
|
||||
<g id="node8" class="node">
|
||||
<title>postgres</title>
|
||||
<polygon fill="lightblue" stroke="none" points="475.75,-25 475.75,-48.75 567.75,-48.75 567.75,-25 475.75,-25"/>
|
||||
<text text-anchor="start" x="493.25" y="-32.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">postgres</text>
|
||||
<text text-anchor="start" x="479.75" y="-11.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: postgres:15</text>
|
||||
<polygon fill="none" stroke="black" points="474.75,-4 474.75,-49.75 568.75,-49.75 568.75,-4 474.75,-4"/>
|
||||
</g>
|
||||
<!-- server->postgres -->
|
||||
<g id="edge4" class="edge">
|
||||
<title>server->postgres</title>
|
||||
<path fill="none" stroke="black" d="M356.32,-73.33C368.36,-69.83 380.8,-66.23 392.5,-62.88 414.85,-56.47 439.43,-49.55 461.04,-43.5"/>
|
||||
<polygon fill="black" stroke="black" points="461.25,-45.26 465.59,-42.22 460.31,-41.88 461.25,-45.26"/>
|
||||
</g>
|
||||
<!-- agent -->
|
||||
<g id="node9" class="node">
|
||||
<title>agent</title>
|
||||
<polygon fill="lightblue" stroke="none" points="231.5,-175 231.5,-198.75 346,-198.75 346,-175 231.5,-175"/>
|
||||
<text text-anchor="start" x="270.75" y="-182.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">agent</text>
|
||||
<text text-anchor="start" x="235.5" y="-161.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: mrva-agent:0.4.0</text>
|
||||
<polygon fill="none" stroke="black" points="230.5,-154 230.5,-199.75 347,-199.75 347,-154 230.5,-154"/>
|
||||
</g>
|
||||
<!-- agent->mrvastore -->
|
||||
<g id="edge7" class="edge">
|
||||
<title>agent->mrvastore</title>
|
||||
<path fill="none" stroke="black" d="M354.95,-196.02C367.41,-199.65 380.35,-203.4 392.5,-206.88 400.99,-209.31 409.8,-211.81 418.65,-214.32"/>
|
||||
<polygon fill="black" stroke="black" points="418.03,-215.96 423.32,-215.64 418.98,-212.59 418.03,-215.96"/>
|
||||
</g>
|
||||
<!-- agent->hepc -->
|
||||
<g id="edge8" class="edge">
|
||||
<title>agent->hepc</title>
|
||||
<path fill="none" stroke="black" d="M354.71,-175.19C377.96,-174.58 404.62,-173.89 429.67,-173.24"/>
|
||||
<polygon fill="black" stroke="black" points="429.53,-175 434.48,-173.12 429.44,-171.5 429.53,-175"/>
|
||||
</g>
|
||||
<!-- agent->rabbitmq -->
|
||||
<g id="edge6" class="edge">
|
||||
<title>agent->rabbitmq</title>
|
||||
<path fill="none" stroke="black" d="M352.08,-150.07C365.32,-144.75 379.29,-139.41 392.5,-134.88 405.8,-130.31 420.02,-125.88 433.97,-121.78"/>
|
||||
<polygon fill="black" stroke="black" points="434.16,-123.54 438.48,-120.47 433.19,-120.18 434.16,-123.54"/>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 9.0 KiB |
128
notes/system-structure.org
Normal file
128
notes/system-structure.org
Normal file
@@ -0,0 +1,128 @@
|
||||
* system structure
|
||||
#+BEGIN_SRC dot :file dwg-r2.svg :cmdline -Kdot -Tsvg
|
||||
digraph MRVA_Containers {
|
||||
rankdir=LR;
|
||||
node [shape=plaintext fontname="Helvetica"];
|
||||
edge [arrowsize=0.5];
|
||||
|
||||
// Title
|
||||
label="Container Dependencies for MRVA";
|
||||
labelloc=top;
|
||||
fontsize=20;
|
||||
fontname="Helvetica";
|
||||
|
||||
// mrvastore-init
|
||||
mrvastore_init [
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td bgcolor="lightblue"><b>mrvastore-init</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Image: minio/mc</font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
// mrvastore
|
||||
mrvastore [
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td bgcolor="lightblue"><b>mrvastore</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Image: minio/minio:RELEASE.2024-06-11T03-13-30Z</font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
// client-ghmrva
|
||||
client_ghmrva [
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td bgcolor="lightblue"><b>client-ghmrva</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Image: client-ghmrva-container:0.4.0</font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
// code-server
|
||||
code_server [
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td bgcolor="lightblue"><b>code-server</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Image: code-server-initialized:0.4.0</font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
// hepc
|
||||
hepc [
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td bgcolor="lightblue"><b>hepc</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Image: mrva-hepc-container:0.4.0</font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
// rabbitmq
|
||||
rabbitmq [
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td bgcolor="lightblue"><b>rabbitmq</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Image: rabbitmq:3-management</font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
// server
|
||||
server [
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td bgcolor="lightblue"><b>server</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Image: mrva-server:0.4.0</font></td></tr>
|
||||
<tr><td port="slot1"></td></tr>
|
||||
<tr><td port="slot2"></td></tr>
|
||||
<tr><td port="slot3"></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
// postgres
|
||||
postgres [
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td bgcolor="lightblue"><b>postgres</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Image: postgres:15</font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
// agent
|
||||
agent [
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td bgcolor="lightblue"><b>agent</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Image: mrva-agent:0.4.0</font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
// Edges (dependencies)
|
||||
mrvastore_init -> mrvastore;
|
||||
server -> rabbitmq;
|
||||
server -> mrvastore;
|
||||
server -> postgres;
|
||||
server -> hepc;
|
||||
agent -> rabbitmq;
|
||||
agent -> mrvastore;
|
||||
agent -> hepc;
|
||||
code_server -> server;
|
||||
client_ghmrva -> server;
|
||||
}
|
||||
#+END_SRC
|
||||
Binary file not shown.
@@ -4,19 +4,17 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/hohn/mrvacommander/pkg/artifactstore"
|
||||
"github.com/hohn/mrvacommander/pkg/codeql"
|
||||
"github.com/hohn/mrvacommander/pkg/common"
|
||||
"github.com/hohn/mrvacommander/pkg/qldbstore"
|
||||
"github.com/hohn/mrvacommander/pkg/queue"
|
||||
"github.com/hohn/mrvacommander/utils"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/elastic/go-sysinfo"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
@@ -50,41 +48,9 @@ func (r *RunnerSingle) worker(wid int) {
|
||||
*/
|
||||
|
||||
const (
|
||||
workerMemoryMB = 2048 // 2 GB
|
||||
monitorIntervalSec = 10 // Monitor every 10 seconds
|
||||
workerMemoryMB = 2048 // 2 GB
|
||||
)
|
||||
|
||||
func calculateWorkers() int {
|
||||
host, err := sysinfo.Host()
|
||||
if err != nil {
|
||||
slog.Error("failed to get host info", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
memInfo, err := host.Memory()
|
||||
if err != nil {
|
||||
slog.Error("failed to get memory info", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Get available memory in MB
|
||||
totalMemoryMB := memInfo.Available / (1024 * 1024)
|
||||
|
||||
// Ensure we have at least one worker
|
||||
workers := int(totalMemoryMB / workerMemoryMB)
|
||||
if workers < 1 {
|
||||
workers = 1
|
||||
}
|
||||
|
||||
// Limit the number of workers to the number of CPUs
|
||||
cpuCount := runtime.NumCPU()
|
||||
if workers > cpuCount {
|
||||
workers = max(cpuCount, 1)
|
||||
}
|
||||
|
||||
return workers
|
||||
}
|
||||
|
||||
func StartAndMonitorWorkers(ctx context.Context,
|
||||
artifacts artifactstore.Store,
|
||||
databases qldbstore.Store,
|
||||
@@ -92,57 +58,29 @@ func StartAndMonitorWorkers(ctx context.Context,
|
||||
desiredWorkerCount int,
|
||||
wg *sync.WaitGroup) {
|
||||
|
||||
currentWorkerCount := 0
|
||||
stopChans := make([]chan struct{}, 0)
|
||||
|
||||
if desiredWorkerCount != 0 {
|
||||
slog.Info("Starting workers", slog.Int("count", desiredWorkerCount))
|
||||
for i := 0; i < desiredWorkerCount; i++ {
|
||||
stopChan := make(chan struct{})
|
||||
stopChans = append(stopChans, stopChan)
|
||||
wg.Add(1)
|
||||
go RunWorker(ctx, artifacts, databases, queue, stopChan, wg)
|
||||
}
|
||||
return
|
||||
var workerCount int
|
||||
if desiredWorkerCount > 0 {
|
||||
workerCount = desiredWorkerCount
|
||||
slog.Info("Starting fixed number of workers", slog.Int("count", workerCount))
|
||||
} else {
|
||||
workerCount = 1
|
||||
slog.Info("Starting preset number of workers", slog.Int("count", workerCount))
|
||||
}
|
||||
|
||||
slog.Info("Worker count not specified, managing based on available memory and CPU")
|
||||
stopChans := make([]chan struct{}, workerCount)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// signal all workers to stop
|
||||
for _, stopChan := range stopChans {
|
||||
close(stopChan)
|
||||
}
|
||||
return
|
||||
default:
|
||||
newWorkerCount := calculateWorkers()
|
||||
for i := 0; i < workerCount; i++ {
|
||||
stopChan := make(chan struct{})
|
||||
stopChans[i] = stopChan
|
||||
wg.Add(1)
|
||||
go RunWorker(ctx, artifacts, databases, queue, stopChan, wg)
|
||||
}
|
||||
|
||||
if newWorkerCount != currentWorkerCount {
|
||||
slog.Info(
|
||||
"Modifying worker count",
|
||||
slog.Int("current", currentWorkerCount),
|
||||
slog.Int("new", newWorkerCount))
|
||||
}
|
||||
// Wait for context cancellation
|
||||
<-ctx.Done()
|
||||
|
||||
if newWorkerCount > currentWorkerCount {
|
||||
for i := currentWorkerCount; i < newWorkerCount; i++ {
|
||||
stopChan := make(chan struct{})
|
||||
stopChans = append(stopChans, stopChan)
|
||||
wg.Add(1)
|
||||
go RunWorker(ctx, artifacts, databases, queue, stopChan, wg)
|
||||
}
|
||||
} else if newWorkerCount < currentWorkerCount {
|
||||
for i := newWorkerCount; i < currentWorkerCount; i++ {
|
||||
close(stopChans[i])
|
||||
}
|
||||
stopChans = stopChans[:newWorkerCount]
|
||||
}
|
||||
currentWorkerCount = newWorkerCount
|
||||
|
||||
time.Sleep(monitorIntervalSec * time.Second)
|
||||
}
|
||||
for _, stopChan := range stopChans {
|
||||
close(stopChan)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -153,7 +91,7 @@ func RunAnalysisJob(
|
||||
Spec: job.Spec,
|
||||
ResultCount: 0,
|
||||
ResultLocation: artifactstore.ArtifactLocation{},
|
||||
Status: common.StatusError,
|
||||
Status: common.StatusFailed,
|
||||
}
|
||||
|
||||
// Create a temporary directory
|
||||
@@ -186,7 +124,15 @@ func RunAnalysisJob(
|
||||
|
||||
databaseData, err := dbs.GetDatabase(job.Spec.NameWithOwner)
|
||||
if err != nil {
|
||||
return result, fmt.Errorf("failed to get database: %w", err)
|
||||
slog.Error("Failed to get database",
|
||||
slog.String("owner", job.Spec.Owner),
|
||||
slog.String("repo", job.Spec.Repo),
|
||||
slog.Int("session_id", job.Spec.SessionID),
|
||||
slog.String("operation", "GetDatabase"),
|
||||
slog.Any("error", err),
|
||||
)
|
||||
return result, fmt.Errorf("failed to get database for %s/%s: %w",
|
||||
job.Spec.Owner, job.Spec.Repo, err)
|
||||
}
|
||||
|
||||
// Write the CodeQL database data to the filesystem
|
||||
@@ -218,7 +164,7 @@ func RunAnalysisJob(
|
||||
Spec: job.Spec,
|
||||
ResultCount: runResult.ResultCount,
|
||||
ResultLocation: resultsLocation,
|
||||
Status: common.StatusSuccess,
|
||||
Status: common.StatusSucceeded,
|
||||
SourceLocationPrefix: runResult.SourceLocationPrefix,
|
||||
DatabaseSHA: runResult.DatabaseSHA,
|
||||
}
|
||||
|
||||
@@ -1,28 +1,13 @@
|
||||
package artifactstore
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/hohn/mrvacommander/pkg/common"
|
||||
)
|
||||
|
||||
// Restrict the keys / values for ArtifactLocation and centralize the common ones
|
||||
// here
|
||||
const (
|
||||
AF_BUCKETNAME_RESULTS = "results"
|
||||
AF_BUCKETNAME_PACKS = "packs"
|
||||
var (
|
||||
AF_BUCKETNAME_RESULTS = "mrvabucket"
|
||||
AF_BUCKETNAME_PACKS = "mrvabucket"
|
||||
)
|
||||
|
||||
type ArtifactLocation struct {
|
||||
Key string // location in bucket OR full location for file paths
|
||||
Bucket string // which bucket: packs or results
|
||||
}
|
||||
|
||||
// deriveKeyFromSessionId generates a key for a query pack based on the job ID
|
||||
func deriveKeyFromSessionId(sessionId int) string {
|
||||
return fmt.Sprintf("%d", sessionId)
|
||||
}
|
||||
|
||||
// deriveKeyFromJobSpec generates a key for a result based on the JobSpec
|
||||
func deriveKeyFromJobSpec(jobSpec common.JobSpec) string {
|
||||
return fmt.Sprintf("%d-%s", jobSpec.SessionID, jobSpec.NameWithOwner)
|
||||
}
|
||||
|
||||
@@ -2,8 +2,9 @@ package artifactstore
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/hohn/mrvacommander/pkg/common"
|
||||
"sync"
|
||||
|
||||
"github.com/hohn/mrvacommander/pkg/common"
|
||||
)
|
||||
|
||||
// InMemoryArtifactStore is an in-memory implementation of the ArtifactStore interface
|
||||
@@ -38,7 +39,7 @@ func (store *InMemoryArtifactStore) SaveQueryPack(sessionId int, data []byte) (A
|
||||
store.mu.Lock()
|
||||
defer store.mu.Unlock()
|
||||
|
||||
key := deriveKeyFromSessionId(sessionId)
|
||||
key := fmt.Sprintf("%d-packs", sessionId)
|
||||
store.packs[key] = data
|
||||
|
||||
location := ArtifactLocation{
|
||||
@@ -79,7 +80,7 @@ func (store *InMemoryArtifactStore) SaveResult(jobSpec common.JobSpec, data []by
|
||||
store.mu.Lock()
|
||||
defer store.mu.Unlock()
|
||||
|
||||
key := deriveKeyFromJobSpec(jobSpec)
|
||||
key := fmt.Sprintf("%d-results-%s", jobSpec.SessionID, jobSpec.NameWithOwner)
|
||||
store.results[key] = data
|
||||
|
||||
location := ArtifactLocation{
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"io"
|
||||
"log/slog"
|
||||
"math"
|
||||
|
||||
"github.com/hohn/mrvacommander/pkg/common"
|
||||
|
||||
"github.com/minio/minio-go/v7"
|
||||
@@ -17,10 +18,11 @@ type MinIOArtifactStore struct {
|
||||
client *minio.Client
|
||||
}
|
||||
|
||||
func NewMinIOArtifactStore(endpoint, id, secret string) (*MinIOArtifactStore, error) {
|
||||
func NewMinIOArtifactStore(endpoint, id, secret string, lookup minio.BucketLookupType) (*MinIOArtifactStore, error) {
|
||||
minioClient, err := minio.New(endpoint, &minio.Options{
|
||||
Creds: credentials.NewStaticV4(id, secret, ""),
|
||||
Secure: false,
|
||||
Creds: credentials.NewStaticV4(id, secret, ""),
|
||||
Secure: false,
|
||||
BucketLookup: lookup,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -28,16 +30,6 @@ func NewMinIOArtifactStore(endpoint, id, secret string) (*MinIOArtifactStore, er
|
||||
|
||||
slog.Info("Connected to MinIO artifact store server")
|
||||
|
||||
// Create "results" bucket
|
||||
if err := common.CreateMinIOBucketIfNotExists(minioClient, AF_BUCKETNAME_RESULTS); err != nil {
|
||||
return nil, fmt.Errorf("could not create results bucket: %v", err)
|
||||
}
|
||||
|
||||
// Create "packs" bucket
|
||||
if err := common.CreateMinIOBucketIfNotExists(minioClient, AF_BUCKETNAME_PACKS); err != nil {
|
||||
return nil, fmt.Errorf("could not create packs bucket: %v", err)
|
||||
}
|
||||
|
||||
return &MinIOArtifactStore{
|
||||
client: minioClient,
|
||||
}, nil
|
||||
@@ -48,7 +40,8 @@ func (store *MinIOArtifactStore) GetQueryPack(location ArtifactLocation) ([]byte
|
||||
}
|
||||
|
||||
func (store *MinIOArtifactStore) SaveQueryPack(jobId int, data []byte) (ArtifactLocation, error) {
|
||||
return store.saveArtifact(AF_BUCKETNAME_PACKS, deriveKeyFromSessionId(jobId), data, "application/gzip")
|
||||
key := fmt.Sprintf("%d-packs", jobId)
|
||||
return store.saveArtifact(AF_BUCKETNAME_PACKS, key, data, "application/gzip")
|
||||
}
|
||||
|
||||
func (store *MinIOArtifactStore) GetResult(location ArtifactLocation) ([]byte, error) {
|
||||
@@ -70,9 +63,9 @@ func (store *MinIOArtifactStore) GetResultSize(location ArtifactLocation) (int,
|
||||
|
||||
return int(objectInfo.Size), nil
|
||||
}
|
||||
|
||||
func (store *MinIOArtifactStore) SaveResult(jobSpec common.JobSpec, data []byte) (ArtifactLocation, error) {
|
||||
return store.saveArtifact(AF_BUCKETNAME_RESULTS, deriveKeyFromJobSpec(jobSpec), data, "application/zip")
|
||||
key := fmt.Sprintf("%d-results-%s", jobSpec.SessionID, jobSpec.NameWithOwner)
|
||||
return store.saveArtifact(AF_BUCKETNAME_RESULTS, key, data, "application/zip")
|
||||
}
|
||||
|
||||
func (store *MinIOArtifactStore) getArtifact(location ArtifactLocation) ([]byte, error) {
|
||||
@@ -95,7 +88,12 @@ func (store *MinIOArtifactStore) getArtifact(location ArtifactLocation) ([]byte,
|
||||
|
||||
func (store *MinIOArtifactStore) saveArtifact(bucket, key string, data []byte,
|
||||
contentType string) (ArtifactLocation, error) {
|
||||
_, err := store.client.PutObject(context.Background(), bucket, key,
|
||||
exists, err := store.client.BucketExists(context.Background(), bucket)
|
||||
if err != nil || !exists {
|
||||
slog.Error("Bucket does not exist", "bucket", bucket)
|
||||
}
|
||||
|
||||
_, err = store.client.PutObject(context.Background(), bucket, key,
|
||||
bytes.NewReader(data), int64(len(data)), minio.PutObjectOptions{
|
||||
ContentType: contentType,
|
||||
})
|
||||
|
||||
@@ -16,6 +16,7 @@ func CreateMinIOBucketIfNotExists(client *minio.Client, bucketName string) error
|
||||
}
|
||||
|
||||
if !exists {
|
||||
// if env.Get("MRVA_S3_PATHSTYLE") == "true" {}
|
||||
slog.Info("Creating bucket", "name", bucketName)
|
||||
err = client.MakeBucket(ctx, bucketName, minio.MakeBucketOptions{})
|
||||
if err != nil {
|
||||
|
||||
@@ -10,25 +10,28 @@ type NameWithOwner struct {
|
||||
type Status int
|
||||
|
||||
const (
|
||||
StatusInProgress = iota
|
||||
StatusQueued
|
||||
StatusError
|
||||
StatusSuccess
|
||||
StatusPending Status = iota
|
||||
StatusInProgress
|
||||
StatusSucceeded
|
||||
StatusFailed
|
||||
StatusCanceled
|
||||
StatusTimedOut
|
||||
)
|
||||
|
||||
func (s Status) ToExternalString() string {
|
||||
switch s {
|
||||
case StatusPending:
|
||||
return "pending"
|
||||
case StatusInProgress:
|
||||
return "in_progress"
|
||||
case StatusQueued:
|
||||
return "queued"
|
||||
case StatusError:
|
||||
return "error"
|
||||
case StatusSuccess:
|
||||
return "inProgress"
|
||||
case StatusSucceeded:
|
||||
return "succeeded"
|
||||
case StatusFailed:
|
||||
return "failed"
|
||||
case StatusCanceled:
|
||||
return "canceled"
|
||||
case StatusTimedOut:
|
||||
return "timedOut"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
@@ -38,3 +41,8 @@ type JobSpec struct {
|
||||
SessionID int
|
||||
NameWithOwner
|
||||
}
|
||||
|
||||
type StatusSummary struct {
|
||||
Overall Status
|
||||
Counts map[Status]int
|
||||
}
|
||||
|
||||
87
pkg/deploy/README.org
Normal file
87
pkg/deploy/README.org
Normal file
@@ -0,0 +1,87 @@
|
||||
* sighelp.go : GPT-Assistable Semantic Outline
|
||||
|
||||
This file provides *non-functional symbolic structure* for the corresponding =.go= file (e.g. =init.go=), optimized for:
|
||||
|
||||
- GPT parsing and assistance
|
||||
- IDE symbol navigation (LSP)
|
||||
- Type-checking to detect drift
|
||||
- Readable overview for human developers
|
||||
|
||||
Each =sighelp_XXX()= function:
|
||||
|
||||
- Mirrors a real function (e.g. =InitRabbitMQ=)
|
||||
- Calls it with placeholder arguments
|
||||
- Discards the result to avoid side effects
|
||||
- Includes structured GPT-readable comments in the form =// gpt:<tag>: …=
|
||||
|
||||
This allows both humans and GPT tools to:
|
||||
|
||||
- See what functions exist and what they do
|
||||
- Understand return types and call relations
|
||||
- Navigate codebases via structure, not prose
|
||||
|
||||
**Example**
|
||||
|
||||
#+BEGIN_SRC go
|
||||
// gpt:flowinfo: InitMinIOArtifactStore returns a store configured via env vars
|
||||
func sighelp_InitMinIOArtifactStore() {
|
||||
var s artifactstore.Store
|
||||
var err error
|
||||
s, err = InitMinIOArtifactStore()
|
||||
_ = s
|
||||
_ = err
|
||||
}
|
||||
#+END_SRC
|
||||
|
||||
**Style Guidelines**
|
||||
|
||||
- Always use valid, compilable Go.
|
||||
- Maintain one =sighelp_= per actual function.
|
||||
- Add =// gpt:= comments to express intent or relationships.
|
||||
- Avoid runtime logic — this file is for *structure*, not execution.
|
||||
|
||||
* GPT-Assisted Spec → Code Change Workflow
|
||||
|
||||
To reduce time spent mapping high-level spec changes to actual code edits, we use this workflow to integrate GPT into the loop. This allows structured delegation of search, mapping, and edit proposal.
|
||||
|
||||
**Flow**
|
||||
|
||||
1. You declare a spec change as a structured Org block (see below).
|
||||
2. GPT uses =sighelp.go= (and optionally the real code) to:
|
||||
- Identify affected functions
|
||||
- Propose an edit plan
|
||||
- Track and validate type-level constraints
|
||||
3. You confirm the plan or adjust scope.
|
||||
4. GPT writes candidate diffs or summaries for manual patching.
|
||||
|
||||
**Example Change Request**
|
||||
|
||||
#+BEGIN_SRC org
|
||||
,* Change: Make artifact store initialization async with retry
|
||||
,* Affects: InitMinIOArtifactStore, InitMinIOCodeQLDatabaseStore
|
||||
,* Required: non-blocking behavior, robust to transient failures
|
||||
,* Notes: Must be compatible with sighelp stubs and InitX signatures
|
||||
#+END_SRC
|
||||
|
||||
**GPT Responsibilities**
|
||||
|
||||
- Match affected symbols from =sighelp_XXX()= stubs
|
||||
- Generate patch plan as Org list:
|
||||
#+BEGIN_SRC org
|
||||
,* deploy/init.go
|
||||
- InitMinIOArtifactStore: wrap NewMinIOArtifactStore in goroutine, add retry
|
||||
- InitMinIOCodeQLDatabaseStore: apply same pattern
|
||||
#+END_SRC
|
||||
- Output scoped diffs, patch instructions, or replacement code
|
||||
|
||||
**Optional Enhancements**
|
||||
|
||||
- GPT can update =sighelp.go= alongside implementation changes
|
||||
- You may keep =change.org= files in the repo to track historical refactor plans
|
||||
- Each change block can include tags like =:spec:async:init:= for search
|
||||
|
||||
* Summary
|
||||
|
||||
This structure treats GPT as a symbolic reasoning assistant that uses =sighelp.go= as its internal call graph. It allows high-level human changes to be mapped, tracked, and diffed without manual bottom-up spelunking.
|
||||
|
||||
This flow is especially effective when multiple entry points share structural patterns (e.g. InitXXX for services).
|
||||
@@ -4,11 +4,15 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
"log/slog"
|
||||
"net/url"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/hohn/mrvacommander/pkg/artifactstore"
|
||||
"github.com/hohn/mrvacommander/pkg/qldbstore"
|
||||
"github.com/hohn/mrvacommander/pkg/queue"
|
||||
"os"
|
||||
"strconv"
|
||||
"github.com/minio/minio-go/v7"
|
||||
)
|
||||
|
||||
func validateEnvVars(requiredEnvVars []string) {
|
||||
@@ -60,37 +64,57 @@ func InitMinIOArtifactStore() (artifactstore.Store, error) {
|
||||
"ARTIFACT_MINIO_ENDPOINT",
|
||||
"ARTIFACT_MINIO_ID",
|
||||
"ARTIFACT_MINIO_SECRET",
|
||||
"MRVA_MINIO_VIRTUAL_HOST",
|
||||
}
|
||||
validateEnvVars(requiredEnvVars)
|
||||
|
||||
endpoint := os.Getenv("ARTIFACT_MINIO_ENDPOINT")
|
||||
id := os.Getenv("ARTIFACT_MINIO_ID")
|
||||
secret := os.Getenv("ARTIFACT_MINIO_SECRET")
|
||||
useVirtual := os.Getenv("MRVA_MINIO_VIRTUAL_HOST") == "1"
|
||||
|
||||
store, err := artifactstore.NewMinIOArtifactStore(endpoint, id, secret)
|
||||
var lookup minio.BucketLookupType
|
||||
var bucketName string
|
||||
|
||||
if useVirtual {
|
||||
parsedURL, err := url.Parse(endpoint)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse ARTIFACT_MINIO_ENDPOINT: %w", err)
|
||||
}
|
||||
hostParts := strings.Split(parsedURL.Hostname(), ".")
|
||||
if len(hostParts) < 2 {
|
||||
return nil, fmt.Errorf("unable to extract bucket from host: %s", parsedURL.Hostname())
|
||||
}
|
||||
bucketName = hostParts[0]
|
||||
lookup = minio.BucketLookupDNS
|
||||
} else {
|
||||
bucketName = "mrvabucket"
|
||||
lookup = minio.BucketLookupPath
|
||||
}
|
||||
// TODO: unify into one. clean up state handling.
|
||||
artifactstore.AF_BUCKETNAME_RESULTS = bucketName
|
||||
artifactstore.AF_BUCKETNAME_PACKS = bucketName
|
||||
|
||||
store, err := artifactstore.NewMinIOArtifactStore(endpoint, id, secret, lookup)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize artifact store: %v", err)
|
||||
}
|
||||
|
||||
return store, nil
|
||||
}
|
||||
|
||||
func InitMinIOCodeQLDatabaseStore() (qldbstore.Store, error) {
|
||||
}
|
||||
func InitHEPCDatabaseStore() (qldbstore.Store, error) {
|
||||
requiredEnvVars := []string{
|
||||
"QLDB_MINIO_ENDPOINT",
|
||||
"QLDB_MINIO_ID",
|
||||
"QLDB_MINIO_SECRET",
|
||||
"MRVA_HEPC_ENDPOINT",
|
||||
"MRVA_HEPC_CACHE_DURATION",
|
||||
"MRVA_HEPC_DATAVIACLI",
|
||||
"MRVA_HEPC_OUTDIR",
|
||||
"MRVA_HEPC_TOOL",
|
||||
}
|
||||
validateEnvVars(requiredEnvVars)
|
||||
|
||||
endpoint := os.Getenv("QLDB_MINIO_ENDPOINT")
|
||||
id := os.Getenv("QLDB_MINIO_ID")
|
||||
secret := os.Getenv("QLDB_MINIO_SECRET")
|
||||
endpoint := os.Getenv("MRVA_HEPC_ENDPOINT")
|
||||
|
||||
store, err := qldbstore.NewMinIOCodeQLDatabaseStore(endpoint, id, secret)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize ql database storage: %v", err)
|
||||
}
|
||||
store := qldbstore.NewHepcStore(endpoint)
|
||||
|
||||
return store, nil
|
||||
}
|
||||
|
||||
476
pkg/qldbstore/qldbstore_hepc.go
Normal file
476
pkg/qldbstore/qldbstore_hepc.go
Normal file
@@ -0,0 +1,476 @@
|
||||
package qldbstore
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hohn/mrvacommander/pkg/common"
|
||||
)
|
||||
|
||||
const defaultCacheDurationMinutes = 60
|
||||
|
||||
type HepcStore struct {
|
||||
Endpoint string
|
||||
metadataCache []HepcResult
|
||||
cacheLastUpdated time.Time
|
||||
cacheMutex sync.Mutex
|
||||
cacheDuration time.Duration
|
||||
}
|
||||
|
||||
type HepcResult struct {
|
||||
GitBranch string `json:"git_branch"`
|
||||
GitCommitID string `json:"git_commit_id"`
|
||||
GitRepo string `json:"git_repo"`
|
||||
IngestionDatetime string `json:"ingestion_datetime_utc"`
|
||||
ResultURL string `json:"result_url"`
|
||||
ToolID string `json:"tool_id"`
|
||||
ToolName string `json:"tool_name"`
|
||||
ToolVersion string `json:"tool_version"`
|
||||
Projname string `json:"projname"`
|
||||
}
|
||||
|
||||
func NewHepcStore(endpoint string) *HepcStore {
|
||||
cacheDuration := getMetaCacheDuration()
|
||||
return &HepcStore{
|
||||
Endpoint: endpoint,
|
||||
cacheDuration: cacheDuration,
|
||||
}
|
||||
}
|
||||
|
||||
func getMetaCacheDuration() time.Duration {
|
||||
/*
|
||||
Input:
|
||||
env("MRVA_HEPC_CACHE_DURATION") = s
|
||||
|
||||
if s = "" ∨ s ∉ int → defaultCacheDurationMinutes × time.Minute
|
||||
|
||||
else → int(s) × time.Minute
|
||||
*/
|
||||
durationStr := os.Getenv("MRVA_HEPC_CACHE_DURATION")
|
||||
if durationStr == "" {
|
||||
return time.Minute * defaultCacheDurationMinutes
|
||||
}
|
||||
duration, err := strconv.Atoi(durationStr)
|
||||
if err != nil {
|
||||
slog.Warn("Invalid MRVA_HEPC_CACHE_DURATION value. Using default",
|
||||
durationStr, defaultCacheDurationMinutes,
|
||||
)
|
||||
return time.Minute * defaultCacheDurationMinutes
|
||||
}
|
||||
return time.Minute * time.Duration(duration)
|
||||
}
|
||||
|
||||
func (h *HepcStore) fetchViaHTTP() ([]HepcResult, error) {
|
||||
/*
|
||||
Input:
|
||||
h.Endpoint = baseURL
|
||||
url := baseURL + "/index"
|
||||
|
||||
Do:
|
||||
HTTP GET url → resp
|
||||
|
||||
Require:
|
||||
resp.StatusCode = 200
|
||||
|
||||
Then:
|
||||
decode resp.Body as stream of HepcResult
|
||||
|
||||
Output:
|
||||
if success → (results, nil)
|
||||
if net/http/json error → (nil, error)
|
||||
*/
|
||||
|
||||
url := fmt.Sprintf("%s/index", h.Endpoint)
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
slog.Warn("Error fetching metadata", "err", err)
|
||||
return nil, fmt.Errorf("error fetching metadata: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
slog.Warn("Non-OK HTTP status", resp.Status)
|
||||
return nil, fmt.Errorf("non-OK HTTP status: %s", resp.Status)
|
||||
}
|
||||
|
||||
var results []HepcResult
|
||||
decoder := json.NewDecoder(resp.Body)
|
||||
for {
|
||||
var result HepcResult
|
||||
if err := decoder.Decode(&result); err == io.EOF {
|
||||
break
|
||||
} else if err != nil {
|
||||
slog.Warn("Error decoding JSON", err)
|
||||
return nil, fmt.Errorf("error decoding JSON: %w", err)
|
||||
}
|
||||
results = append(results, result)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (h *HepcStore) fetchViaCli() ([]HepcResult, error) {
|
||||
/*
|
||||
Inputs:
|
||||
env("MRVA_HEPC_OUTDIR") = outDir
|
||||
env("MRVA_HEPC_TOOL") = toolName
|
||||
|
||||
Require:
|
||||
outDir ≠ "" ∧ toolName ≠ ""
|
||||
(expand ~ in outDir)
|
||||
mkdir(outDir)
|
||||
|
||||
Let:
|
||||
jsonPath := outDir / "spigot-results.json"
|
||||
|
||||
Do:
|
||||
run:
|
||||
spigot-cli bulk-download-results
|
||||
--tool-name toolName
|
||||
--metadata-only all
|
||||
> jsonPath
|
||||
|
||||
Then:
|
||||
decode jsonPath as JSON array or stream of HepcResult
|
||||
|
||||
Output:
|
||||
if success → (results, nil)
|
||||
if env/exec/json error → (nil, error)
|
||||
*/
|
||||
|
||||
outDir := os.Getenv("MRVA_HEPC_OUTDIR")
|
||||
toolName := os.Getenv("MRVA_HEPC_TOOL")
|
||||
|
||||
var missing []string
|
||||
|
||||
if outDir == "" {
|
||||
slog.Error("Missing required environment variable", "var", "MRVA_HEPC_OUTDIR")
|
||||
missing = append(missing, "MRVA_HEPC_OUTDIR")
|
||||
}
|
||||
if toolName == "" {
|
||||
slog.Error("Missing required environment variable", "var", "MRVA_HEPC_TOOL")
|
||||
missing = append(missing, "MRVA_HEPC_TOOL")
|
||||
}
|
||||
|
||||
if len(missing) > 0 {
|
||||
return nil, fmt.Errorf("missing required environment variables: %s", strings.Join(missing, ", "))
|
||||
}
|
||||
|
||||
// Expand ~ in outDir
|
||||
if strings.HasPrefix(outDir, "~/") {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
slog.Error("Unable to get home directory", "error", err)
|
||||
return nil, err
|
||||
}
|
||||
outDir = filepath.Join(home, outDir[2:])
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(outDir, 0755); err != nil {
|
||||
slog.Error("Failed to create output directory", "error", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jsonPath := filepath.Join(outDir, "spigot-results.json")
|
||||
|
||||
// ----------------------
|
||||
// Go version of
|
||||
// spigot-cli bulk-download-results \
|
||||
// --tool-name "$TOOL_NAME" \
|
||||
// --metadata-only all \
|
||||
// > "$OUT_DIR/spigot-results.json"
|
||||
// ----------------------
|
||||
outFile, err := os.Create(jsonPath)
|
||||
if err != nil {
|
||||
slog.Error("Failed to create spigot output file", "error", err)
|
||||
return nil, err
|
||||
}
|
||||
defer outFile.Close()
|
||||
|
||||
cmd := exec.Command(
|
||||
"spigot-cli",
|
||||
"bulk-download-results",
|
||||
"--tool-name", toolName,
|
||||
"--metadata-only", "all",
|
||||
)
|
||||
cmd.Stdout = outFile
|
||||
|
||||
cmd.Stderr = os.Stderr // for error logging
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
slog.Error("spigot-cli failed", "error", err)
|
||||
return nil, err
|
||||
}
|
||||
// ----------------------
|
||||
|
||||
// Decode the resulting JSON file
|
||||
f, err := os.Open(jsonPath)
|
||||
if err != nil {
|
||||
slog.Error("Failed to open JSON output", "path", jsonPath, "error", err)
|
||||
return nil, fmt.Errorf("failed to open result file: %w", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var results []HepcResult
|
||||
decoder := json.NewDecoder(f)
|
||||
for {
|
||||
var result HepcResult
|
||||
if err := decoder.Decode(&result); err == io.EOF {
|
||||
break
|
||||
} else if err != nil {
|
||||
slog.Warn("Error decoding CLI JSON", "error", err)
|
||||
return nil, fmt.Errorf("error decoding CLI JSON: %w", err)
|
||||
}
|
||||
results = append(results, result)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (h *HepcStore) fetchMetadata() ([]HepcResult, error) {
|
||||
// Get via request or cli?
|
||||
hepcDataViaCli := os.Getenv("MRVA_HEPC_DATAVIACLI")
|
||||
if hepcDataViaCli == "1" {
|
||||
return h.fetchViaCli()
|
||||
} else {
|
||||
return h.fetchViaHTTP()
|
||||
}
|
||||
}
|
||||
|
||||
func (h *HepcStore) FindAvailableDBs(analysisReposRequested []common.NameWithOwner) (
|
||||
notFoundRepos []common.NameWithOwner,
|
||||
foundRepos []common.NameWithOwner) {
|
||||
/*
|
||||
Input:
|
||||
analysisReposRequested : List[Repo]
|
||||
h.metadataCache : List[HepcResult]
|
||||
h.cacheLastUpdated : Time
|
||||
h.cacheDuration : Duration
|
||||
|
||||
If time.Now() − h.cacheLastUpdated > h.cacheDuration:
|
||||
h.metadataCache := fetchMetadata() // or return (requested, nil) on error
|
||||
h.cacheLastUpdated := time.Now()
|
||||
|
||||
Let:
|
||||
repoSet := { r.Projname | r ∈ h.metadataCache }
|
||||
|
||||
Partition:
|
||||
analysisReposRequested into:
|
||||
foundRepos = { r ∈ requested | r ∈ repoSet }
|
||||
notFoundRepos = { r ∈ requested | r ∉ repoSet }
|
||||
|
||||
Output:
|
||||
(notFoundRepos, foundRepos)
|
||||
*/
|
||||
|
||||
// Check cache
|
||||
h.cacheMutex.Lock()
|
||||
if time.Since(h.cacheLastUpdated) > h.cacheDuration {
|
||||
// Cache is expired or not set; refresh
|
||||
results, err := h.fetchMetadata()
|
||||
if err != nil {
|
||||
h.cacheMutex.Unlock()
|
||||
slog.Warn("Error fetching metadata", err)
|
||||
return analysisReposRequested, nil
|
||||
}
|
||||
h.metadataCache = results
|
||||
h.cacheLastUpdated = time.Now()
|
||||
}
|
||||
cachedResults := h.metadataCache
|
||||
h.cacheMutex.Unlock()
|
||||
|
||||
// Compare against requested repos
|
||||
repoSet := make(map[string]struct{})
|
||||
for _, result := range cachedResults {
|
||||
repoSet[result.Projname] = struct{}{}
|
||||
}
|
||||
|
||||
for _, reqRepo := range analysisReposRequested {
|
||||
repoKey := fmt.Sprintf("%s/%s", reqRepo.Owner, reqRepo.Repo)
|
||||
if _, exists := repoSet[repoKey]; exists {
|
||||
foundRepos = append(foundRepos, reqRepo)
|
||||
} else {
|
||||
notFoundRepos = append(notFoundRepos, reqRepo)
|
||||
}
|
||||
}
|
||||
|
||||
return notFoundRepos, foundRepos
|
||||
}
|
||||
|
||||
func extractDatabaseFromTar(tarStream io.Reader) ([]byte, bool, error) {
|
||||
/*
|
||||
Input: tarStream ∈ GZIP(TAR(Files))
|
||||
|
||||
Find f ∈ Files | name(f) = "artifacts/codeql_database.zip"
|
||||
|
||||
if ∃ f → (bytes(f), true, nil)
|
||||
if ¬∃ f → (nil, false, nil)
|
||||
if error → (nil, false, error)
|
||||
*/
|
||||
gzReader, err := gzip.NewReader(tarStream)
|
||||
if err != nil {
|
||||
slog.Error("failed to open gzip stream", "error", err)
|
||||
return nil, false, fmt.Errorf("failed to open gzip stream: %w", err)
|
||||
}
|
||||
defer gzReader.Close()
|
||||
|
||||
tarReader := tar.NewReader(gzReader)
|
||||
|
||||
for {
|
||||
hdr, err := tarReader.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
slog.Error("failed to read tar entry", "error", err)
|
||||
return nil, false, fmt.Errorf("failed to read tar entry: %w", err)
|
||||
}
|
||||
|
||||
if hdr.Name == "artifacts/codeql_database.zip" {
|
||||
var buf bytes.Buffer
|
||||
if _, err := io.Copy(&buf, tarReader); err != nil {
|
||||
slog.Error("failed to extract zip from tar", "error", err)
|
||||
return nil, false, fmt.Errorf("failed to extract zip from tar: %w", err)
|
||||
}
|
||||
return buf.Bytes(), true, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, false, nil // not found
|
||||
}
|
||||
|
||||
func (h *HepcStore) GetDatabase(location common.NameWithOwner) ([]byte, error) {
|
||||
/*
|
||||
Input:
|
||||
location = (owner, repo)
|
||||
key := owner + "/" + repo
|
||||
|
||||
Step 1 — Ensure metadata cache:
|
||||
if now − h.cacheLastUpdated > h.cacheDuration:
|
||||
h.metadataCache := fetchMetadata()
|
||||
h.cacheLastUpdated := now
|
||||
else:
|
||||
use h.metadataCache
|
||||
|
||||
if fetchMetadata fails → (nil, error)
|
||||
|
||||
Step 2 — Lookup URL:
|
||||
if ∃ r ∈ h.metadataCache | r.Projname = key → resultURL := r.ResultURL
|
||||
if ¬∃ r → return (nil, "not found")
|
||||
|
||||
Step 3 — Download:
|
||||
GET replaceHepcURL(resultURL) → resp
|
||||
if status ≠ 200 → (nil, "bad HTTP")
|
||||
|
||||
body := ReadAll(resp.Body)
|
||||
if error → return (nil, error)
|
||||
|
||||
Step 4 — Detect + Decode:
|
||||
if hasGzipHeader(body):
|
||||
extractDatabaseFromTar(body) → (data, found, err)
|
||||
if err → (nil, err)
|
||||
if ¬found → (nil, "zip not found")
|
||||
→ (data, nil)
|
||||
else:
|
||||
→ (body, nil)
|
||||
*/
|
||||
|
||||
h.cacheMutex.Lock()
|
||||
if time.Since(h.cacheLastUpdated) > h.cacheDuration {
|
||||
results, err := h.fetchMetadata()
|
||||
if err != nil {
|
||||
slog.Error("error refreshing metadata cache", "error", err)
|
||||
h.cacheMutex.Unlock()
|
||||
return nil, fmt.Errorf("error refreshing metadata cache: %w", err)
|
||||
}
|
||||
h.metadataCache = results
|
||||
h.cacheLastUpdated = time.Now()
|
||||
}
|
||||
cachedResults := h.metadataCache
|
||||
h.cacheMutex.Unlock()
|
||||
|
||||
key := fmt.Sprintf("%s/%s", location.Owner, location.Repo)
|
||||
|
||||
var resultURL string
|
||||
for _, result := range cachedResults {
|
||||
if result.Projname == key {
|
||||
resultURL = result.ResultURL
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if resultURL == "" {
|
||||
slog.Error("database not found in metadata", "repo", key)
|
||||
return nil, fmt.Errorf("database not found for repository: %s", key)
|
||||
}
|
||||
|
||||
resp, err := http.Get(replaceHepcURL(resultURL))
|
||||
if err != nil {
|
||||
slog.Error("failed to fetch database", "url", resultURL, "error", err)
|
||||
return nil, fmt.Errorf("error fetching database: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
slog.Error("non-OK HTTP status", "status", resp.Status, "url", resultURL)
|
||||
return nil, fmt.Errorf("non-OK HTTP status for database fetch: %s", resp.Status)
|
||||
}
|
||||
|
||||
// Buffer the full stream into RAM
|
||||
fullBody, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
slog.Error("error reading full database stream into memory", "error", err)
|
||||
return nil, fmt.Errorf("error reading database content: %w", err)
|
||||
}
|
||||
|
||||
// The input could be the codeql db as zip, or a tar stream containing the zip;
|
||||
// If gzip header is found, treat the input as a tar+gz archive
|
||||
|
||||
// Check for gzip magic number (0x1F 0x8B)
|
||||
isGzip := len(fullBody) >= 2 && fullBody[0] == 0x1F && fullBody[1] == 0x8B
|
||||
|
||||
if isGzip {
|
||||
// Extract zip data from tar+gz archive
|
||||
data, found, err := extractDatabaseFromTar(bytes.NewReader(fullBody))
|
||||
if err != nil {
|
||||
slog.Error("error extracting from tar stream", "error", err)
|
||||
return nil, err
|
||||
}
|
||||
if !found {
|
||||
slog.Warn("tar archive read succeeded, but zip entry not found")
|
||||
return nil, fmt.Errorf("zip file not found in tar archive")
|
||||
} else {
|
||||
return data, nil
|
||||
}
|
||||
}
|
||||
// Treat input as raw zip file content
|
||||
slog.Info("no gzip header found; assuming raw zip content")
|
||||
return fullBody, nil
|
||||
}
|
||||
|
||||
// replaceHepcURL replaces the fixed "http://hepc" with the value from
|
||||
// MRVA_HEPC_ENDPOINT
|
||||
func replaceHepcURL(originalURL string) string {
|
||||
hepcEndpoint := os.Getenv("MRVA_HEPC_ENDPOINT")
|
||||
if hepcEndpoint == "" {
|
||||
hepcEndpoint = "http://hepc:8070" // Default fallback
|
||||
}
|
||||
|
||||
// Replace "http://hepc" at the beginning of the URL
|
||||
newURL := strings.Replace(originalURL, "http://hepc", hepcEndpoint, 1)
|
||||
|
||||
return newURL
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
package qldbstore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"github.com/hohn/mrvacommander/pkg/common"
|
||||
|
||||
"github.com/minio/minio-go/v7"
|
||||
"github.com/minio/minio-go/v7/pkg/credentials"
|
||||
)
|
||||
|
||||
// XX: static types: split by type?
|
||||
// Restrict the keys / values and centralize the common ones here
|
||||
const (
|
||||
QL_DB_BUCKETNAME = "qldb"
|
||||
)
|
||||
|
||||
type MinIOCodeQLDatabaseStore struct {
|
||||
client *minio.Client
|
||||
bucketName string
|
||||
}
|
||||
|
||||
func NewMinIOCodeQLDatabaseStore(endpoint, id, secret string) (*MinIOCodeQLDatabaseStore, error) {
|
||||
minioClient, err := minio.New(endpoint, &minio.Options{
|
||||
Creds: credentials.NewStaticV4(id, secret, ""),
|
||||
Secure: false,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
slog.Info("Connected to MinIO CodeQL database store server")
|
||||
|
||||
err = common.CreateMinIOBucketIfNotExists(minioClient, QL_DB_BUCKETNAME)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not create bucket: %v", err)
|
||||
}
|
||||
|
||||
return &MinIOCodeQLDatabaseStore{
|
||||
client: minioClient,
|
||||
bucketName: QL_DB_BUCKETNAME,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (store *MinIOCodeQLDatabaseStore) FindAvailableDBs(analysisReposRequested []common.NameWithOwner) (
|
||||
notFoundRepos []common.NameWithOwner,
|
||||
foundRepos []common.NameWithOwner) {
|
||||
|
||||
for _, repo := range analysisReposRequested {
|
||||
status := store.haveDatabase(repo)
|
||||
if status {
|
||||
foundRepos = append(foundRepos, repo)
|
||||
} else {
|
||||
notFoundRepos = append(notFoundRepos, repo)
|
||||
}
|
||||
}
|
||||
|
||||
return notFoundRepos, foundRepos
|
||||
}
|
||||
|
||||
func (store *MinIOCodeQLDatabaseStore) GetDatabase(location common.NameWithOwner) ([]byte, error) {
|
||||
key := fmt.Sprintf("%s$%s.zip", location.Owner, location.Repo)
|
||||
object, err := store.client.GetObject(context.Background(),
|
||||
store.bucketName,
|
||||
key,
|
||||
minio.GetObjectOptions{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer object.Close()
|
||||
|
||||
data, err := io.ReadAll(object)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (store *MinIOCodeQLDatabaseStore) haveDatabase(location common.NameWithOwner) bool {
|
||||
objectName := fmt.Sprintf("%s$%s.zip", location.Owner, location.Repo)
|
||||
|
||||
// Check if the object exists
|
||||
_, err := store.client.StatObject(context.Background(),
|
||||
store.bucketName,
|
||||
objectName,
|
||||
minio.StatObjectOptions{})
|
||||
if err != nil {
|
||||
if minio.ToErrorResponse(err).Code == "NoSuchKey" {
|
||||
slog.Info("No database found for", location)
|
||||
return false
|
||||
}
|
||||
slog.Info("General database error while checking for", location)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
amqp "github.com/rabbitmq/amqp091-go"
|
||||
@@ -15,6 +16,9 @@ type RabbitMQQueue struct {
|
||||
results chan AnalyzeResult
|
||||
conn *amqp.Connection
|
||||
channel *amqp.Channel
|
||||
|
||||
mu sync.Mutex
|
||||
connString string
|
||||
}
|
||||
|
||||
// NewRabbitMQQueue initializes a RabbitMQ queue.
|
||||
@@ -89,10 +93,12 @@ func NewRabbitMQQueue(
|
||||
}
|
||||
|
||||
result := RabbitMQQueue{
|
||||
conn: conn,
|
||||
channel: ch,
|
||||
jobs: make(chan AnalyzeJob),
|
||||
results: make(chan AnalyzeResult),
|
||||
conn: conn,
|
||||
channel: ch,
|
||||
jobs: make(chan AnalyzeJob),
|
||||
results: make(chan AnalyzeResult),
|
||||
mu: sync.Mutex{},
|
||||
connString: rabbitMQURL,
|
||||
}
|
||||
|
||||
if isAgent {
|
||||
@@ -125,34 +131,96 @@ func (q *RabbitMQQueue) Close() {
|
||||
q.conn.Close()
|
||||
}
|
||||
|
||||
func (q *RabbitMQQueue) ConsumeJobs(queueName string) {
|
||||
autoAck := false
|
||||
msgs, err := q.channel.Consume(queueName, "", autoAck, false, false, false, nil)
|
||||
func (q *RabbitMQQueue) reconnectIfNeeded() error {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
|
||||
if err != nil {
|
||||
slog.Error("failed to consume from queue", slog.Any("error", err))
|
||||
if q.conn != nil && !q.conn.IsClosed() && q.channel != nil {
|
||||
return nil // still valid
|
||||
}
|
||||
|
||||
for msg := range msgs {
|
||||
// Process message
|
||||
job := AnalyzeJob{}
|
||||
err := json.Unmarshal(msg.Body, &job)
|
||||
if err != nil {
|
||||
slog.Error("failed to unmarshal job", slog.Any("error", err))
|
||||
// Recreate everything
|
||||
conn, err := amqp.Dial(q.connString)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to reconnect: %w", err)
|
||||
}
|
||||
|
||||
ch, err := conn.Channel()
|
||||
if err != nil {
|
||||
conn.Close()
|
||||
return fmt.Errorf("failed to open channel: %w", err)
|
||||
}
|
||||
|
||||
// Optional: redeclare queues here
|
||||
// _, _ = ch.QueueDeclare(...)
|
||||
|
||||
q.conn = conn
|
||||
q.channel = ch
|
||||
return nil
|
||||
}
|
||||
|
||||
func (q *RabbitMQQueue) invalidateConnection() {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
|
||||
if q.channel != nil {
|
||||
_ = q.channel.Close()
|
||||
}
|
||||
if q.conn != nil {
|
||||
_ = q.conn.Close()
|
||||
}
|
||||
|
||||
q.channel = nil
|
||||
q.conn = nil
|
||||
}
|
||||
|
||||
func (q *RabbitMQQueue) ConsumeJobs(queueName string) {
|
||||
const pollInterval = 5 * time.Second
|
||||
|
||||
// | scenario | result |
|
||||
// |-------------------+---------------------------------------|
|
||||
// | Queue is empty | msg = zero, ok = false, err = nil |
|
||||
// | Queue has message | msg = valid, ok = true, err = nil |
|
||||
// | Connection lost | msg = zero, ok = false, err = non-nil |
|
||||
|
||||
for {
|
||||
|
||||
if err := q.reconnectIfNeeded(); err != nil {
|
||||
slog.Error("failed to reconnect", slog.Any("error", err))
|
||||
time.Sleep(10 * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
msg, ok, err := q.channel.Get(queueName, false) // false = manual ack
|
||||
if err != nil {
|
||||
slog.Error("polling error while getting job", slog.Any("error", err))
|
||||
q.invalidateConnection()
|
||||
time.Sleep(pollInterval)
|
||||
continue
|
||||
}
|
||||
|
||||
if !ok {
|
||||
// No message in queue
|
||||
time.Sleep(pollInterval)
|
||||
continue
|
||||
}
|
||||
|
||||
var job AnalyzeJob
|
||||
if err := json.Unmarshal(msg.Body, &job); err != nil {
|
||||
slog.Error("failed to unmarshal job", slog.Any("error", err))
|
||||
_ = msg.Nack(false, false) // do not requeue
|
||||
continue
|
||||
}
|
||||
|
||||
// Send job to channel for processing
|
||||
q.jobs <- job
|
||||
|
||||
// Acknowledge the message after successful processing
|
||||
err = msg.Ack(false)
|
||||
if err != nil {
|
||||
slog.Error("Failed to acknowledge job consumption message",
|
||||
slog.Any("error", err))
|
||||
// Acknowledge successful processing
|
||||
if err := msg.Ack(false); err != nil {
|
||||
slog.Error("failed to ack job message", slog.Any("error", err))
|
||||
continue
|
||||
}
|
||||
|
||||
}
|
||||
close(q.jobs)
|
||||
}
|
||||
|
||||
func (q *RabbitMQQueue) PublishResults(queueName string) {
|
||||
@@ -247,30 +315,31 @@ func (q *RabbitMQQueue) PublishJobs(queueName string) {
|
||||
}
|
||||
|
||||
func (q *RabbitMQQueue) ConsumeResults(queueName string) {
|
||||
autoAck := false
|
||||
msgs, err := q.channel.Consume(queueName, "", autoAck, false, false, false, nil)
|
||||
if err != nil {
|
||||
slog.Error("failed to register a consumer", slog.Any("error", err))
|
||||
}
|
||||
autoAck := false // false = manual ack
|
||||
sleepFor := 5 // polling interval
|
||||
|
||||
for msg := range msgs {
|
||||
// Process message
|
||||
result := AnalyzeResult{}
|
||||
err := json.Unmarshal(msg.Body, &result)
|
||||
for {
|
||||
msg, ok, err := q.channel.Get(queueName, autoAck)
|
||||
if err != nil {
|
||||
slog.Error("failed to unmarshal result", slog.Any("error", err))
|
||||
slog.Error("poll error", slog.Any("err", err))
|
||||
time.Sleep(time.Duration(sleepFor) * time.Second)
|
||||
continue
|
||||
}
|
||||
if !ok {
|
||||
// no message
|
||||
time.Sleep(time.Duration(sleepFor) * time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
var result AnalyzeResult
|
||||
if err := json.Unmarshal(msg.Body, &result); err != nil {
|
||||
slog.Error("unmarshal error", slog.Any("err", err))
|
||||
_ = msg.Nack(false, false) // finish .Get() with nack
|
||||
continue
|
||||
}
|
||||
|
||||
q.results <- result
|
||||
|
||||
// Acknowledge the message after successful processing
|
||||
err = msg.Ack(false)
|
||||
if err != nil {
|
||||
slog.Error("Failed to acknowledge result consumption message",
|
||||
slog.Any("error", err))
|
||||
continue
|
||||
}
|
||||
|
||||
_ = msg.Ack(false) // finish .Get() with nack
|
||||
}
|
||||
close(q.results)
|
||||
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ func (c *CommanderSingle) startAnalyses(
|
||||
QueryLanguage: queryLanguage,
|
||||
}
|
||||
c.v.Queue.Jobs() <- info
|
||||
c.v.State.SetStatus(jobSpec, common.StatusQueued)
|
||||
c.v.State.SetStatus(jobSpec, common.StatusPending)
|
||||
c.v.State.AddJob(info)
|
||||
}
|
||||
}
|
||||
@@ -132,7 +132,7 @@ func (c *CommanderSingle) submitEmptyStatusResponse(w http.ResponseWriter,
|
||||
scannedRepos := []common.ScannedRepo{}
|
||||
|
||||
var jobStatus common.Status
|
||||
jobStatus = common.StatusSuccess
|
||||
jobStatus = common.StatusSucceeded
|
||||
|
||||
status := common.StatusResponse{
|
||||
SessionId: jsSessionID,
|
||||
@@ -176,9 +176,9 @@ func (c *CommanderSingle) submitStatusResponse(w http.ResponseWriter, js common.
|
||||
}
|
||||
|
||||
// Loop through all jobs under the same session id
|
||||
// TODO: as a high priority, fix this hacky job IDing by index
|
||||
// this may break with other state implementations
|
||||
for jobRepoId, job := range jobs {
|
||||
// fix
|
||||
for _, job := range jobs {
|
||||
|
||||
// Get the job status
|
||||
status, err := c.v.State.GetStatus(job.Spec)
|
||||
if err != nil {
|
||||
@@ -191,7 +191,7 @@ func (c *CommanderSingle) submitStatusResponse(w http.ResponseWriter, js common.
|
||||
var artifactSize int
|
||||
var resultCount int
|
||||
|
||||
if status != common.StatusSuccess {
|
||||
if status != common.StatusSucceeded {
|
||||
// If the job is not successful, we don't need to get the result
|
||||
artifactSize = 0
|
||||
resultCount = 0
|
||||
@@ -210,6 +210,8 @@ func (c *CommanderSingle) submitStatusResponse(w http.ResponseWriter, js common.
|
||||
}
|
||||
resultCount = jobResult.ResultCount
|
||||
}
|
||||
// Get jobRepoID from (owner,repo)
|
||||
jobRepoId := c.v.State.GetRepoId(job.Spec.NameWithOwner)
|
||||
|
||||
// Append all scanned (complete and incomplete) repos to the response
|
||||
scannedRepos = append(scannedRepos,
|
||||
@@ -326,11 +328,13 @@ func (c *CommanderSingle) MRVAStatus(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// Download artifacts
|
||||
func (c *CommanderSingle) MRVADownloadArtifactCommon(w http.ResponseWriter, r *http.Request, jobRepoId int, jobSpec common.JobSpec) {
|
||||
func (c *CommanderSingle) MRVADownloadArtifactCommon(w http.ResponseWriter,
|
||||
r *http.Request, jobRepoId int, jobSpec common.JobSpec) {
|
||||
slog.Debug("MRVA artifact download",
|
||||
"codeql_variant_analysis_id", jobSpec.SessionID,
|
||||
"repo_owner", jobSpec.NameWithOwner.Owner,
|
||||
"repo_name", jobSpec.NameWithOwner.Repo,
|
||||
"jobRepoId", jobRepoId,
|
||||
)
|
||||
|
||||
c.sendArtifactDownloadResponse(w, jobRepoId, jobSpec)
|
||||
@@ -424,7 +428,8 @@ func (c *CommanderSingle) MRVADownloadArtifact(w http.ResponseWriter, r *http.Re
|
||||
c.MRVADownloadArtifactCommon(w, r, -1, jobSpec)
|
||||
}
|
||||
|
||||
func (c *CommanderSingle) sendArtifactDownloadResponse(w http.ResponseWriter, jobRepoId int, jobSpec common.JobSpec) {
|
||||
func (c *CommanderSingle) sendArtifactDownloadResponse(w http.ResponseWriter,
|
||||
jobRepoId int, jobSpec common.JobSpec) {
|
||||
var response common.DownloadResponse
|
||||
|
||||
slog.Debug("Forming download response", "job", jobSpec)
|
||||
@@ -436,7 +441,7 @@ func (c *CommanderSingle) sendArtifactDownloadResponse(w http.ResponseWriter, jo
|
||||
return
|
||||
}
|
||||
|
||||
if jobStatus == common.StatusSuccess {
|
||||
if jobStatus == common.StatusSucceeded {
|
||||
jobResult, err := c.v.State.GetResult(jobSpec)
|
||||
if err != nil {
|
||||
slog.Error(err.Error())
|
||||
@@ -511,6 +516,8 @@ func (c *CommanderSingle) sendArtifactDownloadResponse(w http.ResponseWriter, jo
|
||||
return
|
||||
}
|
||||
|
||||
slog.Debug("MRVA: Sending download response", "responseJson", responseJson)
|
||||
|
||||
// Send analysisReposJSON via ResponseWriter
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(responseJson)
|
||||
|
||||
@@ -18,6 +18,9 @@ type ServerState interface {
|
||||
// TODO: fix this hacky logic
|
||||
GetJobSpecByRepoId(sessionId int, jobRepoId int) (common.JobSpec, error)
|
||||
|
||||
// The repo id is uniquely determined by NameWithOwner
|
||||
GetRepoId(owner common.NameWithOwner) int
|
||||
|
||||
// SetResult stores the analysis result for the specified session ID and repository.
|
||||
SetResult(js common.JobSpec, ar queue.AnalyzeResult)
|
||||
|
||||
|
||||
@@ -3,9 +3,10 @@ package state
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sync"
|
||||
|
||||
"github.com/hohn/mrvacommander/pkg/common"
|
||||
"github.com/hohn/mrvacommander/pkg/queue"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type LocalState struct {
|
||||
@@ -97,7 +98,7 @@ func (s *LocalState) GetStatus(js common.JobSpec) (common.Status, error) {
|
||||
s.mutex.Lock()
|
||||
defer s.mutex.Unlock()
|
||||
if _, ok := s.status[js]; !ok {
|
||||
return common.StatusError, fmt.Errorf("status not found for job spec %v", js)
|
||||
return common.StatusFailed, fmt.Errorf("status not found for job spec %v", js)
|
||||
}
|
||||
return s.status[js], nil
|
||||
}
|
||||
|
||||
465
pkg/state/state_postgres.go
Normal file
465
pkg/state/state_postgres.go
Normal file
@@ -0,0 +1,465 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
|
||||
"github.com/hohn/mrvacommander/pkg/common"
|
||||
"github.com/hohn/mrvacommander/pkg/queue"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
)
|
||||
|
||||
// ----- PGState holds the shared connection pool
|
||||
type PGState struct {
|
||||
pool *pgxpool.Pool
|
||||
}
|
||||
|
||||
func validateEnvVars(requiredEnvVars []string) {
|
||||
missing := false
|
||||
|
||||
for _, envVar := range requiredEnvVars {
|
||||
if _, ok := os.LookupEnv(envVar); !ok {
|
||||
slog.Error("Missing required environment variable", "key", envVar)
|
||||
missing = true
|
||||
}
|
||||
}
|
||||
|
||||
if missing {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func NewPGState() *PGState {
|
||||
ctx := context.Background()
|
||||
|
||||
required := []string{
|
||||
"POSTGRES_USER",
|
||||
"POSTGRES_PASSWORD",
|
||||
"POSTGRES_DB",
|
||||
// Host & port may be omitted if you rely on Docker DNS, but list
|
||||
// them here to make the requirement explicit:
|
||||
"POSTGRES_HOST",
|
||||
"POSTGRES_PORT",
|
||||
}
|
||||
|
||||
validateEnvVars(required)
|
||||
|
||||
// Assemble from vars
|
||||
user := os.Getenv("POSTGRES_USER")
|
||||
pass := os.Getenv("POSTGRES_PASSWORD")
|
||||
host := os.Getenv("POSTGRES_HOST")
|
||||
port := os.Getenv("POSTGRES_PORT")
|
||||
db := os.Getenv("POSTGRES_DB")
|
||||
|
||||
dbURL := fmt.Sprintf("postgres://%s:%s@%s:%s/%s", user, pass, host, port, db)
|
||||
slog.Info("Assembled Postgres connection URL from POSTGRES_* variables", "url", dbURL)
|
||||
|
||||
config, err := pgxpool.ParseConfig(dbURL)
|
||||
if err != nil {
|
||||
slog.Error("Failed to parse connection URL", "url", dbURL, "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
config.MaxConns = 10
|
||||
|
||||
pool, err := pgxpool.NewWithConfig(ctx, config)
|
||||
if err != nil {
|
||||
slog.Error("Failed to create pgx pool", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
slog.Info("Connected to Postgres", "max_conns", config.MaxConns)
|
||||
|
||||
SetupSchemas(pool)
|
||||
|
||||
return &PGState{pool: pool}
|
||||
}
|
||||
|
||||
func SetupSchemas(pool *pgxpool.Pool) {
|
||||
ctx := context.Background()
|
||||
|
||||
schemas := []struct {
|
||||
name string
|
||||
sql string
|
||||
}{
|
||||
{
|
||||
name: "job_repo_map",
|
||||
sql: `
|
||||
CREATE TABLE IF NOT EXISTS job_repo_map (
|
||||
job_repo_id SERIAL PRIMARY KEY,
|
||||
owner TEXT NOT NULL,
|
||||
repo TEXT NOT NULL,
|
||||
UNIQUE(owner, repo)
|
||||
);
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "session_id_seq",
|
||||
sql: `
|
||||
CREATE SEQUENCE IF NOT EXISTS session_id_seq;
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "analyze_results",
|
||||
sql: `
|
||||
CREATE TABLE IF NOT EXISTS analyze_results (
|
||||
session_id INTEGER NOT NULL,
|
||||
owner TEXT NOT NULL,
|
||||
repo TEXT NOT NULL,
|
||||
result JSONB NOT NULL,
|
||||
PRIMARY KEY (session_id, owner, repo)
|
||||
);
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "analyze_jobs",
|
||||
sql: `
|
||||
CREATE TABLE IF NOT EXISTS analyze_jobs (
|
||||
session_id INTEGER NOT NULL,
|
||||
owner TEXT NOT NULL,
|
||||
repo TEXT NOT NULL,
|
||||
payload JSONB NOT NULL,
|
||||
PRIMARY KEY (session_id, owner, repo)
|
||||
);
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "job_info",
|
||||
sql: `
|
||||
CREATE TABLE IF NOT EXISTS job_info (
|
||||
session_id INTEGER NOT NULL,
|
||||
owner TEXT NOT NULL,
|
||||
repo TEXT NOT NULL,
|
||||
payload JSONB NOT NULL,
|
||||
PRIMARY KEY (session_id, owner, repo)
|
||||
);
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "job_status",
|
||||
sql: `
|
||||
CREATE TABLE IF NOT EXISTS job_status (
|
||||
session_id INTEGER NOT NULL,
|
||||
owner TEXT NOT NULL,
|
||||
repo TEXT NOT NULL,
|
||||
status INTEGER NOT NULL,
|
||||
PRIMARY KEY (session_id, owner, repo)
|
||||
);
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, schema := range schemas {
|
||||
_, err := pool.Exec(ctx, schema.sql)
|
||||
if err != nil {
|
||||
slog.Error("Failed to create table", "table", schema.name, "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
slog.Info("Schema initialized", "table", schema.name)
|
||||
}
|
||||
}
|
||||
|
||||
// ----- Sequence-based NextID (implements ServerState)
|
||||
func (s *PGState) NextID() int {
|
||||
ctx := context.Background()
|
||||
|
||||
var id int
|
||||
err := s.pool.QueryRow(ctx, `SELECT nextval('session_id_seq')`).Scan(&id)
|
||||
if err != nil {
|
||||
slog.Error("NextID query failed", "error", err)
|
||||
panic("NextID(): " + err.Error()) // interface doesn't allow returning error
|
||||
}
|
||||
|
||||
slog.Debug("NextID generated", "id", id)
|
||||
return id
|
||||
}
|
||||
|
||||
func (s *PGState) SetResult(js common.JobSpec, ar queue.AnalyzeResult) {
|
||||
ctx := context.Background()
|
||||
|
||||
ar.Spec = js // ensure internal consistency
|
||||
|
||||
jsonBytes, err := json.Marshal(ar)
|
||||
if err != nil {
|
||||
slog.Error("SetResult: JSON marshal failed", "job", js, "error", err)
|
||||
panic("SetResult(): " + err.Error())
|
||||
}
|
||||
|
||||
_, err = s.pool.Exec(ctx, `
|
||||
INSERT INTO analyze_results (session_id, owner, repo, result)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
ON CONFLICT (session_id, owner, repo)
|
||||
DO UPDATE SET result = EXCLUDED.result
|
||||
`, js.SessionID, js.Owner, js.Repo, jsonBytes)
|
||||
|
||||
if err != nil {
|
||||
slog.Error("SetResult: insert/update failed", "job", js, "error", err)
|
||||
panic("SetResult(): " + err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func (s *PGState) GetResult(js common.JobSpec) (queue.AnalyzeResult, error) {
|
||||
ctx := context.Background()
|
||||
|
||||
var jsonBytes []byte
|
||||
err := s.pool.QueryRow(ctx, `
|
||||
SELECT result FROM analyze_results
|
||||
WHERE session_id = $1 AND owner = $2 AND repo = $3
|
||||
`, js.SessionID, js.Owner, js.Repo).Scan(&jsonBytes)
|
||||
if err != nil {
|
||||
return queue.AnalyzeResult{}, err
|
||||
}
|
||||
|
||||
var ar queue.AnalyzeResult
|
||||
if err := json.Unmarshal(jsonBytes, &ar); err != nil {
|
||||
return queue.AnalyzeResult{}, fmt.Errorf("unmarshal AnalyzeResult: %w", err)
|
||||
}
|
||||
|
||||
return ar, nil
|
||||
}
|
||||
|
||||
func (s *PGState) SetJobInfo(js common.JobSpec, ji common.JobInfo) {
|
||||
ctx := context.Background()
|
||||
|
||||
jiJSON, err := json.Marshal(ji)
|
||||
if err != nil {
|
||||
slog.Error("SetJobInfo: marshal failed", "job", js, "error", err)
|
||||
panic("SetJobInfo(): " + err.Error())
|
||||
}
|
||||
|
||||
_, err = s.pool.Exec(ctx, `
|
||||
INSERT INTO job_info (session_id, owner, repo, payload)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
ON CONFLICT (session_id, owner, repo)
|
||||
DO UPDATE SET payload = EXCLUDED.payload
|
||||
`, js.SessionID, js.Owner, js.Repo, jiJSON)
|
||||
|
||||
if err != nil {
|
||||
slog.Error("SetJobInfo: insert/update failed", "job", js, "error", err)
|
||||
panic("SetJobInfo(): " + err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func (s *PGState) GetJobInfo(js common.JobSpec) (common.JobInfo, error) {
|
||||
ctx := context.Background()
|
||||
|
||||
var jsonBytes []byte
|
||||
err := s.pool.QueryRow(ctx, `
|
||||
SELECT payload FROM job_info
|
||||
WHERE session_id = $1 AND owner = $2 AND repo = $3
|
||||
`, js.SessionID, js.Owner, js.Repo).Scan(&jsonBytes)
|
||||
if err != nil {
|
||||
return common.JobInfo{}, err
|
||||
}
|
||||
|
||||
var ji common.JobInfo
|
||||
if err := json.Unmarshal(jsonBytes, &ji); err != nil {
|
||||
return common.JobInfo{}, fmt.Errorf("unmarshal JobInfo: %w", err)
|
||||
}
|
||||
|
||||
return ji, nil
|
||||
}
|
||||
|
||||
func (s *PGState) SetStatus(js common.JobSpec, status common.Status) {
|
||||
ctx := context.Background()
|
||||
|
||||
_, err := s.pool.Exec(ctx, `
|
||||
INSERT INTO job_status (session_id, owner, repo, status)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
ON CONFLICT (session_id, owner, repo)
|
||||
DO UPDATE SET status = EXCLUDED.status
|
||||
`, js.SessionID, js.Owner, js.Repo, status)
|
||||
|
||||
if err != nil {
|
||||
slog.Error("SetStatus failed", "job", js, "status", status, "error", err)
|
||||
panic("SetStatus(): " + err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func (s *PGState) GetSessionStatus(sessionID int) (common.StatusSummary, error) {
|
||||
ctx := context.Background()
|
||||
|
||||
rows, err := s.pool.Query(ctx, `
|
||||
SELECT status
|
||||
FROM job_status
|
||||
WHERE session_id = $1
|
||||
`, sessionID)
|
||||
if err != nil {
|
||||
return common.StatusSummary{}, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
counts := map[common.Status]int{
|
||||
common.StatusPending: 0,
|
||||
common.StatusInProgress: 0,
|
||||
common.StatusSucceeded: 0,
|
||||
common.StatusFailed: 0,
|
||||
common.StatusCanceled: 0,
|
||||
common.StatusTimedOut: 0,
|
||||
}
|
||||
total := 0
|
||||
|
||||
for rows.Next() {
|
||||
var st int
|
||||
if err := rows.Scan(&st); err != nil {
|
||||
return common.StatusSummary{}, err
|
||||
}
|
||||
counts[common.Status(st)]++
|
||||
total++
|
||||
}
|
||||
|
||||
// apply deterministic rules
|
||||
var overall common.Status
|
||||
switch {
|
||||
case counts[common.StatusSucceeded] == total:
|
||||
overall = common.StatusSucceeded
|
||||
case counts[common.StatusFailed] == total:
|
||||
overall = common.StatusFailed
|
||||
case counts[common.StatusCanceled] == total:
|
||||
overall = common.StatusCanceled
|
||||
case counts[common.StatusTimedOut] == total:
|
||||
overall = common.StatusFailed
|
||||
case counts[common.StatusInProgress] > 0:
|
||||
overall = common.StatusPending
|
||||
case counts[common.StatusPending] > 0 && counts[common.StatusInProgress] == 0:
|
||||
overall = common.StatusPending
|
||||
case counts[common.StatusPending] == 0 && counts[common.StatusInProgress] == 0:
|
||||
overall = common.StatusSucceeded // covers mixed complete
|
||||
default:
|
||||
overall = common.StatusPending
|
||||
}
|
||||
|
||||
return common.StatusSummary{Overall: overall, Counts: counts}, nil
|
||||
}
|
||||
|
||||
func (s *PGState) GetStatus(js common.JobSpec) (common.Status, error) {
|
||||
summary, err := s.GetSessionStatus(js.SessionID)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return summary.Overall, nil
|
||||
}
|
||||
|
||||
// GetRepoId returns a stable unique ID for a given (owner, repo).
|
||||
// If the pair doesn't exist, it is inserted atomically.
|
||||
func (s *PGState) GetRepoId(cno common.NameWithOwner) int {
|
||||
ctx := context.Background()
|
||||
|
||||
var jobRepoID int
|
||||
|
||||
err := s.pool.QueryRow(ctx, `
|
||||
INSERT INTO job_repo_map (owner, repo)
|
||||
VALUES ($1, $2)
|
||||
ON CONFLICT (owner, repo) DO UPDATE SET owner = EXCLUDED.owner
|
||||
RETURNING job_repo_id
|
||||
`, cno.Owner, cno.Repo).Scan(&jobRepoID)
|
||||
if err != nil {
|
||||
slog.Error("GetRepoId failed", "NameWithOwner", cno, "error", err)
|
||||
panic("GetRepoId: " + err.Error())
|
||||
}
|
||||
|
||||
return jobRepoID
|
||||
|
||||
}
|
||||
|
||||
func (s *PGState) AddJob(job queue.AnalyzeJob) {
|
||||
ctx := context.Background()
|
||||
js := job.Spec
|
||||
|
||||
// Begin transaction for atomic operation
|
||||
tx, err := s.pool.Begin(ctx)
|
||||
if err != nil {
|
||||
slog.Error("AddJob: failed to begin transaction", "job", js, "error", err)
|
||||
panic("AddJob(): " + err.Error())
|
||||
}
|
||||
defer tx.Rollback(ctx) // Will be ignored if tx.Commit() succeeds
|
||||
|
||||
// 1. Store AnalyzeJob payload -------------------------------
|
||||
jb, err := json.Marshal(job)
|
||||
if err != nil {
|
||||
slog.Error("AddJob: marshal failed", "job", js, "error", err)
|
||||
panic("AddJob(): " + err.Error())
|
||||
}
|
||||
|
||||
_, err = tx.Exec(ctx, `
|
||||
INSERT INTO analyze_jobs (session_id, owner, repo, payload)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
ON CONFLICT DO NOTHING
|
||||
`, js.SessionID, js.Owner, js.Repo, jb)
|
||||
if err != nil {
|
||||
slog.Error("AddJob: insert analyze_jobs failed", "job", js, "error", err)
|
||||
panic("AddJob(): " + err.Error())
|
||||
}
|
||||
|
||||
// 2. Get job_repo_id
|
||||
jobRepoID := s.GetRepoId(job.Spec.NameWithOwner)
|
||||
|
||||
// Commit the transaction
|
||||
if err = tx.Commit(ctx); err != nil {
|
||||
slog.Error("AddJob: failed to commit transaction", "job", js, "error", err)
|
||||
panic("AddJob(): " + err.Error())
|
||||
}
|
||||
|
||||
slog.Debug("AddJob stored", "session", js.SessionID, "jobRepoId", jobRepoID, "owner", js.Owner, "repo", js.Repo)
|
||||
}
|
||||
|
||||
func (s *PGState) GetJobList(sessionId int) ([]queue.AnalyzeJob, error) {
|
||||
ctx := context.Background()
|
||||
|
||||
rows, err := s.pool.Query(ctx, `
|
||||
SELECT payload FROM analyze_jobs
|
||||
WHERE session_id = $1
|
||||
ORDER BY owner, repo
|
||||
`, sessionId)
|
||||
if err != nil {
|
||||
slog.Error("GetJobList: query failed", "session_id", sessionId, "error", err)
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var jobs []queue.AnalyzeJob
|
||||
for rows.Next() {
|
||||
var jsonBytes []byte
|
||||
if err := rows.Scan(&jsonBytes); err != nil {
|
||||
slog.Error("GetJobList: scan failed", "error", err)
|
||||
return nil, err
|
||||
}
|
||||
var job queue.AnalyzeJob
|
||||
if err := json.Unmarshal(jsonBytes, &job); err != nil {
|
||||
slog.Error("GetJobList: unmarshal failed", "error", err)
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
|
||||
if err := rows.Err(); err != nil {
|
||||
slog.Error("GetJobList: rows iteration failed", "error", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
func (s *PGState) GetJobSpecByRepoId(sessionId, jobRepoId int) (common.JobSpec, error) {
|
||||
ctx := context.Background()
|
||||
|
||||
var owner, repo string
|
||||
err := s.pool.QueryRow(ctx, `
|
||||
SELECT owner, repo
|
||||
FROM job_repo_map
|
||||
WHERE job_repo_id = $1
|
||||
`, jobRepoId).Scan(&owner, &repo)
|
||||
if err != nil {
|
||||
return common.JobSpec{}, err
|
||||
}
|
||||
return common.JobSpec{
|
||||
SessionID: sessionId,
|
||||
NameWithOwner: common.NameWithOwner{
|
||||
Owner: owner,
|
||||
Repo: repo,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
Reference in New Issue
Block a user