9 Commits

Author SHA1 Message Date
Michael Hohn
9e44c8dfe1 d 2024-06-18 14:05:07 -07:00
Michael Hohn
1a009ccde0 test lfs 2024-06-18 13:55:32 -07:00
Michael Hohn
5dfca00fa5 Add instructions to test server from the host 2024-06-18 13:29:41 -07:00
Michael Hohn
46052cd20f Fix simple SIGSEV 2024-06-18 13:29:14 -07:00
Michael Hohn
8f318c114f Add CommanderContainer and CommonState
Use statically distinct types for each mrvacommander configuration
2024-06-18 12:54:59 -07:00
Michael Hohn
1633245444 wip: make server compile post-merge 2024-06-18 10:07:47 -07:00
Nicolas Will
02acf3eeaf Remove storage, add state and store pkgs, refactor 2024-06-18 17:41:28 +02:00
Nicolas Will
30f2d22a71 Format comments in pkg/server/server.go 2024-06-17 15:01:23 +02:00
Nicolas Will
95e42ae85a Fix docker-compose.yml agent depends_on 2024-06-17 13:16:24 +02:00
131 changed files with 1395 additions and 7905 deletions

View File

@@ -1,9 +0,0 @@
# Excludes
/dbstore-data
/qpstore-data
/test-data
/venv
/client
/cmd/server/var
/.git

View File

@@ -1,12 +0,0 @@
MRVA_RABBITMQ_HOST=rabbitmq
MRVA_RABBITMQ_PORT=5672
MRVA_RABBITMQ_USER=user
MRVA_RABBITMQ_PASSWORD=password
MINIO_ROOT_USER=user
MINIO_ROOT_PASSWORD=mmusty8432
ARTIFACT_MINIO_ENDPOINT=artifactstore:9000
ARTIFACT_MINIO_ID=${MINIO_ROOT_USER}
ARTIFACT_MINIO_SECRET=${MINIO_ROOT_PASSWORD}
QLDB_MINIO_ENDPOINT=dbstore:9000
QLDB_MINIO_ID=${MINIO_ROOT_USER}
QLDB_MINIO_SECRET=${MINIO_ROOT_PASSWORD}

16
.gitignore vendored
View File

@@ -4,9 +4,6 @@ cmd/server/var/
# vscode project dir
.vscode/
# idea project dir
.idea/
# Compiled binary
cmd/server/server
cmd/agent/agent
@@ -44,16 +41,3 @@ go.work.sum
# env file
.env
/artifactstore-data/.minio.sys
/qldbminio/qldb
.ipynb_checkpoints/
venv/
venv-*/
*.egg-info
__pycache__
README.html
ChangeLog
notes/*.html
# Make timestamp files
mk.*

View File

@@ -1,29 +0,0 @@
linters:
enable:
- staticcheck
- unused
- decorder
- errchkjson
- exhaustruct
- gochecknoinits
- gochecksumtype
- goconst
- gocritic
- godox
- lll
- loggercheck
- revive
- sloglint
- tagalign
- unparam
linters-settings:
revive:
config: .revive.toml
staticcheck:
checks:
- "SA"
issues:
format: "format: {{.FromLinter}}: {{.Text}}"

View File

@@ -1,13 +0,0 @@
ignoreGeneratedHeader = true
[rule.blank-imports]
Arguments = [true]
[[rule]]
name = "max-public-identifier-length"
arguments = [15] # Maximum length for public identifiers
[[rule]]
name = "max-private-identifier-length"
arguments = [15] # Maximum length for private identifiers

View File

@@ -1,55 +0,0 @@
all: server agent
.phony: view
view: README.html
open $<
html: README.html
%.html: %.md
pandoc --toc=true --standalone $< --out $@
# Build the qldbtools container image
dbt: mk.client-qldbtools-container
mk.client-qldbtools-container:
cd client/containers/qldbtools && \
docker build -t client-qldbtools-container:0.1.24 .
touch $@
# Run a shell in the container with the qldbtools
dbt-run: mk.client-qldbtools-container
docker run --rm -it client-qldbtools-container:0.1.24 /bin/bash
# Run one of the scripts in the container as check
dbt-check: mk.client-qldbtools-container
docker run --rm -it client-qldbtools-container:0.1.24 mc-db-initial-info
dbt-push: mk.dbt-push
mk.dbt-push: mk.client-qldbtools-container
docker tag client-qldbtools-container:0.1.24 ghcr.io/hohn/client-qldbtools-container:0.1.24
docker push ghcr.io/hohn/client-qldbtools-container:0.1.24
touch $@
server:
cd cmd/server && GOOS=linux GOARCH=arm64 go build
agent:
cd cmd/agent && GOOS=linux GOARCH=arm64 go build
fullbuild:
cd cmd/server && GOOS=linux GOARCH=arm64 go build -a
sendsubmit:
cd tools && sh ./submit-request.curl
# Requires
# go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
lint:
golangci-lint run cmd/... pkg/...
deps:
godepgraph -maxlevel 4 -nostdlib -i github.com/minio/minio-go ./cmd/server | dot -Tpdf > deps-server.pdf && open deps-server.pdf
depa:
godepgraph -maxlevel 4 -nostdlib -i github.com/minio/minio-go ./cmd/agent | dot -Tpdf > deps-agent.pdf && open deps-agent.pdf

View File

@@ -6,52 +6,6 @@ TODO Style notes
- NO package init() functions
- Dynamic behaviour must be explicit
## Client CodeQL Database Selector
Separate from the server's downloading of databases, a client-side interface is needed to generate the `databases.json` file. This
1. must be usable from the shell
2. must be interactive (Python, Jupyter)
3. is session based to allow iterations on selection / narrowing
4. must be queryable. There is no need to reinvent sql / dataframes
Python with dataframes is ideal for this; the project is in `client/`.
## Reverse proxy
For testing, replay flows using mitmweb. This is faster and simpler than using
gh-mrva or the VS Code plugin.
- Set up the virtual environment and install tools
python3.11 -m venv venv
source venv/bin/activate
pip install mitmproxy
For intercepting requests:
1. Start mitmproxy to listen on port 8080 and forward requests to port 8081, with
web interface
mitmweb --mode reverse:http://localhost:8081 -p 8080
1. Change `server` ports in `docker-compose.yml` to
ports:
- "8081:8080" # host:container
1. Start the containers.
1. Submit requests.
3. Save the flows for later replay.
One such session is in `tools/mitmweb-flows`; it can be loaded to replay the
requests:
1. start `mitmweb --mode reverse:http://localhost:8081 -p 8080`
2. `file` > `open` > `tools/mitmweb-flows`
3. replay at least the submit, status, and download requests
## Cross-compile server on host, run it in container
These are simple steps using a single container.
@@ -77,10 +31,7 @@ These are simple steps using a single container.
cd /mrva/mrvacommander/cmd/server/ && ./server
## Using docker-compose
### Steps to build and run the server
Steps to build and run the server in a multi-container environment set up by
docker-compose.
### Steps to build and run the server in a multi-container environment set up by docker-compose.
1. Built the server-image, above
@@ -107,23 +58,6 @@ docker-compose.
cd ~/work-gh/mrva/mrvacommander/tools
sh ./request_16-Jun-2024_11-33-16.curl
1. Follow server logging via
cd ~/work-gh/mrva/mrvacommander
docker-compose up -d
docker-compose logs -f server
1. Completely rebuild all containers. Useful when running into docker errors
cd ~/work-gh/mrva/mrvacommander
docker-compose up --build
1. Start the server containers and the desktop/demo containers
cd ~/work-gh/mrva/mrvacommander/
docker-compose down --remove-orphans
docker-compose -f docker-compose-demo.yml up -d
1. Test server via remote client by following the steps in [gh-mrva](https://github.com/hohn/gh-mrva/blob/connection-redirect/README.org#compacted-edit-run-debug-cycle)
### Some general docker-compose commands

File diff suppressed because one or more lines are too long

View File

@@ -1,64 +0,0 @@
# ######################
# Use an official Golang image as the base image
FROM golang:1.22 AS builder
# Set the working directory inside the container
WORKDIR /work-gh/mrva/gh-mrva
# Clone the repository
RUN git clone https://github.com/hohn/gh-mrva.git . &&\
git checkout hohn-0.1.24-demo
# Download dependencies
RUN go mod download
# Build the Go binary
RUN go build .
# ######################
# Provide codeql and java
#
FROM ubuntu:24.10 as runner
ENV DEBIAN_FRONTEND=noninteractive
# Build argument for CodeQL version, defaulting to the latest release
ARG CODEQL_VERSION=latest
# Install packages
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
unzip \
curl \
ca-certificates \
default-jdk
# If the version is 'latest', get the latest release version from GitHub, unzip
# the bundle into /opt, and delete the archive
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
fi && \
echo "Using CodeQL version $CODEQL_VERSION" && \
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
unzip /tmp/codeql.zip -d /opt && \
rm /tmp/codeql.zip && \
chmod -R +x /opt/codeql
# Set environment variables for CodeQL
ENV CODEQL_CLI_PATH=/opt/codeql/codeql
# Set environment variable for CodeQL for `codeql database analyze` support on ARM
# This env var has no functional effect on CodeQL when running on x86_64 linux
ENV CODEQL_JAVA_HOME=/usr
# ######################
# Set the working directory inside the final image
WORKDIR /app
# Copy the binary from the builder stage
COPY --from=builder /work-gh/mrva/gh-mrva/gh-mrva /usr/local/bin/gh-mrva
# Put CodeQL on the PATH
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/codeql
# Run forever
CMD ["tail", "-f", "/dev/null"]

View File

@@ -1,13 +0,0 @@
ghm: mk.client-ghmrva-container
mk.client-ghmrva-container:
docker build -t client-ghmrva-container:0.1.24 .
touch $@
ghm-push: mk.ghm-push
mk.ghm-push: mk.client-ghmrva-container
docker tag client-ghmrva-container:0.1.24 ghcr.io/hohn/client-ghmrva-container:0.1.24
docker push ghcr.io/hohn/client-ghmrva-container:0.1.24
touch $@
ghm-run:
docker run --rm -it ghcr.io/hohn/client-ghmrva-container:0.1.24 /bin/bash

View File

@@ -1,16 +0,0 @@
* MRVA cli tools container
Set up / run:
#+BEGIN_SRC sh
# Build
cd ~/work-gh/mrva/mrvacommander/client/containers/ghmrva/
make ghm
# Run
docker run -ti client-ghmrva-container:0.1.24 /bin/bash
# In the container
gh-mrva -h
codeql -h
# Push
make ghm-push
#+END_SRC

View File

@@ -1,30 +0,0 @@
# Use a Python 3.11 image as the base
FROM python:3.11-slim
# Install git
RUN apt-get update && apt-get install -y git
# Create the required directory structure
RUN mkdir -p /work-gh/mrva/
# Change to the directory and clone the repository
WORKDIR /work-gh/mrva/
RUN git clone https://github.com/hohn/mrvacommander.git && \
cd mrvacommander && \
git checkout hohn-0.1.24-demo
# Change to the client directory
WORKDIR /work-gh/mrva/mrvacommander/client/qldbtools/
# We're in a container, so use pip globally -- no virtual env
RUN pip install --upgrade pip
# Install the required Python packages from requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# Install qldbtools
RUN pip install .
# Run forever
CMD ["tail", "-f", "/dev/null"]

View File

@@ -1,23 +0,0 @@
{"git_branch": "HEAD", "git_commit_id": "2b41915dac8966e95f9e63638d30769b0d69ad68", "git_repo": "aircrack-ng", "ingestion_datetime_utc": "2024-06-07 16:57:47.683012+00:00", "result_url": "http://hepc/db-collection-py/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.4", "projname": "aircrack-ng/aircrack-ng"}
{"git_branch": "HEAD", "git_commit_id": "8b399e9f51701b34f2f3c9375e637e6fffc642b7", "git_repo": "Serial-Studio", "ingestion_datetime_utc": "2023-10-01T15:18:43.503672671Z", "result_url": "http://hepc/db-collection-py/Serial-Studio-Serial-Studio-ctsj-2b2721.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.12.0", "projname": "Serial-Studio/Serial-Studio"}
{"git_branch": "HEAD", "git_commit_id": "9a9308fd5477d2a44f4e491d5a712546d4a2b3e4", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-22 13:30:21.681180+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0189aa.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
{"git_branch": "HEAD", "git_commit_id": "34412555665923bc07d43ce970e9d81be3795de7", "git_repo": "UEFITool", "ingestion_datetime_utc": "2024-07-04 19:00:38.543297+00:00", "result_url": "http://hepc/db-collection-py/UEFITool-UEFITool-ctsj-ee2d3c.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.6", "projname": "UEFITool/UEFITool"}
{"git_branch": "HEAD", "git_commit_id": "00aa56f5257060304d41f09651c6ab58ee6104d6", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-18 14:12:52.904410+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0c6575.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
{"git_branch": "HEAD", "git_commit_id": "e4bffa0a7450e1abd9f4df9565728ae18d86cfd2", "git_repo": "attrs", "ingestion_datetime_utc": "2024-07-18 22:34:57.795427+00:00", "result_url": "http://hepc/db-collection-py/attrs-attrs-ctsj-e2c939.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "attrs/attrs"}
{"git_branch": "HEAD", "git_commit_id": "9620901afce56f720e856aca600951c9b61a9460", "git_repo": "apprise", "ingestion_datetime_utc": "2024-07-22 22:26:48.720348+00:00", "result_url": "http://hepc/db-collection-py/apprise-apprise-ctsj-3f4a4e.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "apprise/apprise"}
{"git_branch": "HEAD", "git_commit_id": "c38e6c8cfba28980aea8f895c71b376e8a5155d5", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2022-04-16T12:45:56.739003883Z", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d6cf6.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.8.5", "projname": "bulk-builder/bulk-builder"}
{"git_branch": "HEAD", "git_commit_id": "18f6be580b12dc406ef356b2cd65f47c24fce63e", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-19 05:46:23.392157+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d667f.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
{"git_branch": "HEAD", "git_commit_id": "a587921bac074b1bd1b0a0a5536587660a9b954e", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-19 16:13:39.094478+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0a6352.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-java", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
{"git_branch": "HEAD", "git_commit_id": "9b361c7ff497d57651856650667aece8230fab6d", "git_repo": "BentoML", "ingestion_datetime_utc": "2024-07-24 02:17:07.095690+00:00", "result_url": "http://hepc/db-collection-py/BentoML-BentoML-ctsj-d6963d.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "BentoML/BentoML"}
{"git_branch": "HEAD", "git_commit_id": "8b399e9f51701b34f2f3c9375e637e6fffc642b7", "git_repo": "Serial-Studio", "ingestion_datetime_utc": "2023-10-01T15:18:43.503672671Z", "result_url": "http://hepc/db-collection-py/Serial-Studio-Serial-Studio-ctsj-2b2721.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.12.0", "projname": "Serial-Studio/Serial-Studio"}
{"git_branch": "HEAD", "git_commit_id": "53ad2da1a8e6e79e0986ddfa3a45e1db6fdd491c", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-14 02:24:19.208812+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-01864e.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
{"git_branch": "HEAD", "git_commit_id": "db8f1a7930c6b5826357646746337dafc983f953", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2023-11-22 01:18:25.079473+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-099796.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.15.2", "projname": "bulk-builder/bulk-builder"}
{"git_branch": "HEAD", "git_commit_id": "f8df9dd749a549dec20aa286a7639ba04190faab", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-12 16:39:28.854142+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d7b69.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
{"git_branch": "HEAD", "git_commit_id": "b5274976cb0a792d05d541a749c0adcd9d20062d", "git_repo": "behave", "ingestion_datetime_utc": "2024-05-11 19:20:51.916333+00:00", "result_url": "http://hepc/db-collection-py/behave-behave-ctsj-b297b5.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "behave/behave"}
{"git_branch": "HEAD", "git_commit_id": "4c825c198df470506b0f84da0b25b3b385150dcb", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-04-25 03:26:03.986270+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-035849.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
{"git_branch": "HEAD", "git_commit_id": "a8b8ff0acc6fcc629d08a3a9952f83be56a9a3c3", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-03 13:30:48.829134+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-051a5c.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-java", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
{"git_branch": "HEAD", "git_commit_id": "9ef05731e7c6cbad2e897faa7c526558eed3ceaa", "git_repo": "aws-sam-cli", "ingestion_datetime_utc": "2024-05-14 01:03:18.130142+00:00", "result_url": "http://hepc/db-collection-py/aws-sam-cli-aws-sam-cli-ctsj-b7f561.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "aws-sam-cli/aws-sam-cli"}
{"git_branch": "HEAD", "git_commit_id": "16865390a653ceaeabe354df1b37e4a775161a70", "git_repo": "aws-sdk-pandas", "ingestion_datetime_utc": "2024-05-13 15:13:31.853042+00:00", "result_url": "http://hepc/db-collection-py/aws-sdk-pandas-aws-sdk-pandas-ctsj-2b7750.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "aws-sdk-pandas/aws-sdk-pandas"}
{"git_branch": "HEAD", "git_commit_id": "093856995af0811d3ebbe8c179b8febf4ae706f0", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-03-20 14:18:02.500590+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-103a8a.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.16.4", "projname": "bulk-builder/bulk-builder"}
{"git_branch": "HEAD", "git_commit_id": "0573e6f96637f08fb4cb85e0552f0622d36827d4", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-01-24 09:21:05.977294+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0cdf2f.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.15.5", "projname": "bulk-builder/bulk-builder"}
{"git_branch": "HEAD", "git_commit_id": "93314995a5ee2217d58c3d9cbcbdef5df6c34566", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-09 05:29:25.243273+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0a35a1.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}

View File

@@ -1,30 +0,0 @@
# Use a Python 3.11 image as the base
FROM python:3.11-slim
# Install git
RUN apt-get update && apt-get install -y git
# Create the required directory structure
RUN mkdir -p /work-gh/mrva/
# Change to the directory and clone the repository
WORKDIR /work-gh/mrva/
RUN git clone https://github.com/hohn/mrvacommander.git && \
cd mrvacommander && \
git checkout hohn-0.1.24-demo
# Change to the client directory
WORKDIR /work-gh/mrva/mrvacommander/client/qldbtools/
# We're in a container, so use pip globally -- no virtual env
RUN pip install --upgrade pip
# Install the required Python packages from requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# Install qldbtools
RUN pip install .
# Run forever
CMD ["tail", "-f", "/dev/null"]

View File

@@ -1,25 +0,0 @@
DBT_TARGET := client-qldbtools-container:0.1.24
# Build the qldbtools container image
dbt: mk.client-qldbtools-container
mk.client-qldbtools-container:
docker build -t ${DBT_TARGET} .
touch $@
# Run a shell in the container with the qldbtools
dbt-run: dbt
docker run --rm -it ${DBT_TARGET} /bin/bash
# Run one of the scripts in the container as check. Should exit with error.
dbt-check: dbt
docker run --rm -it ${DBT_TARGET} mc-db-initial-info
dbt-push: mk.dbt-push
mk.dbt-push: dbt
docker tag ${DBT_TARGET} ghcr.io/hohn/${DBT_TARGET}
docker push ghcr.io/hohn/${DBT_TARGET}
touch $@
dbt-test:
docker pull ghcr.io/hohn/${DBT_TARGET}
docker run --rm -it --name test-dbt-server ghcr.io/hohn/${DBT_TARGET} sh

View File

@@ -1,13 +0,0 @@
* MRVA python tools container
Set up Docker image with python 3.11 and pip and the qldbtools. The targets are
in the =Makefile=; most important are
#+BEGIN_SRC sh
# Build
make dbt
# Check
make dbt-check
#+END_SRC

View File

@@ -1,67 +0,0 @@
FROM codercom/code-server:4.92.2-debian
# ======================
# Pre-install a custom JDK for this platform and redirect CodeQL to it
USER root
ENV DEBIAN_FRONTEND=noninteractive
# Install packages
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
ca-certificates \
curl \
default-jdk \
git \
libcurl4-openssl-dev \
libssl-dev \
python3 \
python3-dev \
unzip
# Build argument for CodeQL version, defaulting to the latest release
ARG CODEQL_VERSION=latest
# If the version is 'latest', get the latest release version from GitHub, unzip
# the bundle into /opt, and delete the archive
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
fi && \
echo "Using CodeQL version $CODEQL_VERSION" && \
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
unzip /tmp/codeql.zip -d /opt && \
rm /tmp/codeql.zip && \
chmod -R +x /opt/codeql
# ======================
# Install code-server
USER coder
# Set environment variables
ENV PASSWORD mrva
# Install VS Code extensions as user root -- globally
RUN code-server --install-extension ms-python.python \
&& code-server --install-extension esbenp.prettier-vscode \
&& code-server --install-extension GitHub.vscode-codeql
# Expose the port that Code Server runs on
EXPOSE 9080
# Point CodeQL to the java binary for this platform
ENV CODEQL_JAVA_HOME=/usr
# Add
# codeQl.cli.executablePath
# to user settings.
# This is in addition to the environment variable CODEQL_JAVA_HOME which has no
# effect on the plugin
USER root
COPY ./settings.json /home/coder/.local/share/code-server/User/
RUN chown -R coder:coder /home/coder/.local/share/code-server/
# Start Code Server
ENTRYPOINT ["dumb-init", "code-server", "--bind-addr", "0.0.0.0:9080", "."]
# Run as the coder user
USER coder

View File

@@ -1,119 +0,0 @@
* MRVA VS Code server container
On the host:
#+BEGIN_SRC sh
# Build the container via
cd ~/work-gh/mrva/mrvacommander/client/containers/vscode/
docker build -t code-server-initialized:0.1.24 .
# Run the container in standalone mode via
cd ~/work-gh/mrva/mrvacommander/client/containers/vscode/
docker run -v ~/work-gh/mrva/vscode-codeql:/work-gh/mrva/vscode-codeql \
-d -p 9080:9080 code-server-initialized:0.1.24
#+END_SRC
- Connect to it at http://localhost:9080/?folder=/home/coder, password is =mrva=.
Inside the container:
- Setup inside the container
#+BEGIN_SRC shell
cd
export PATH=/opt/codeql:$PATH
codeql pack init qldemo
cd qldemo
codeql pack add codeql/python-all@1.0.6
#+END_SRC
- Create a new file =qldemo/simple.ql= with this query. Open it in VS Code.
The plugin will download the CodeQL binaries (but never use them -- the
configuration redirects)
#+BEGIN_SRC sh
cd
cat > qldemo/simple.ql <<eof
import python
select 42
eof
#+END_SRC
- Create database.
#+BEGIN_SRC sh
cd ~/qldemo
cat > short.py <<EOF
print('hello world')
EOF
export PATH=/opt/codeql:$PATH
codeql database create --language=python -s . -v short-db
#+END_SRC
- Set the database as default and run the query =simple.ql=
- Add the customized VS Code plugin
On the host
#+BEGIN_SRC sh
cd ~/work-gh/mrva/vscode-codeql
git checkout mrva-standalone
# Install nvm
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
# Install correct node version
cd ./extensions/ql-vscode
nvm install
# Build the extension
cd ~/work-gh/mrva/vscode-codeql/extensions/ql-vscode
npm install
npm run build
#+END_SRC
In the container
#+BEGIN_SRC sh
# Install extension
cd /work-gh/mrva/vscode-codeql/dist
/bin/code-server --force --install-extension vscode-codeql-*.vsix
#+END_SRC
- Capture the state of this container and create a new image from it
#+BEGIN_SRC sh
docker ps
# Check id column. Use it below.
docker commit 2df5732c1850 code-server-initialized:0.1.24
# Keep the sha
# sha256:87c8260146e28aed25b094d023a30a015a958f829c09e66cb50ccca2c4a2a000
docker kill 2df5732c1850
# Make sure the image tag matches the sha
docker inspect code-server-initialized:0.1.24 |grep Id
# Run the image and check
docker run --rm -d -p 9080:9080 --name test-code-server-codeql \
code-server-initialized:0.1.24
#+END_SRC
Again connect to it at http://localhost:9080/?folder=/home/coder, password is =mrva=.
- Push this container
#+BEGIN_SRC sh
# Common
export CSI_TARGET=code-server-initialized:0.1.24
# Push container
docker tag ${CSI_TARGET} ghcr.io/hohn/${CSI_TARGET}
docker push ghcr.io/hohn/${CSI_TARGET}
#+END_SRC
- Test the registry image
#+BEGIN_SRC sh
# Test pushed container
docker pull ghcr.io/hohn/${CSI_TARGET}
docker run --rm -d -p 9080:9080 --name test-code-server-codeql\
ghcr.io/hohn/${CSI_TARGET}
#+END_SRC
In the container, inside the running vs code:
- Check the plugin version number via the command
: codeql: copy version information

View File

@@ -1,4 +0,0 @@
{
"codeQL.runningQueries.numberOfThreads": 2,
"codeQL.cli.executablePath": "/opt/codeql/codeql"
}

View File

@@ -1,24 +0,0 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File with Arguments",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"args": [
"--db_collection_dir",
"db-collection-py",
"--starting_path",
"$HOME/work-gh/mrva/mrva-open-source-download"
],
"justMyCode": true,
"stopOnEntry": false
}
]
}

View File

@@ -1,2 +0,0 @@
doc:
pandoc -s --css=./gfm.css README.md > foo.html && open foo.html

View File

@@ -1,171 +0,0 @@
* Introduction to hepc -- HTTP End Point for CodeQL
#+BEGIN_SRC sh
1:$ ./bin/hepc-init --db_collection_dir db-collection --starting_path ~/work-gh/mrva/mrva-open-source-download
[2024-11-19 14:12:06] [INFO] searching for db.zip files
[2024-11-19 14:12:08] [INFO] collecting information from db.zip files
[2024-11-19 14:12:08] [INFO] Extracting from /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/aircrack-ng/aircrack-ng/code-scanning/codeql/databases/cpp/db.zip
[2024-11-19 14:12:08] [INFO] Adding record to db-collection/metadata.json
#+END_SRC
* Introduction to qldbtools
=qldbtools= is a Python package for selecting sets of CodeQL databases
to work on. It uses a (pandas) dataframe in the implementation, but all
results sets are available as CSV files to provide flexibility in the
tools you want to work with.
The rationale is simple: When working with larger collections of CodeQL
databases, spread over time, languages, etc., many criteria can be used
to select the subset of interest. This package addresses that aspect of
MRVA (multi repository variant analysis).
For example, consider this scenario from an enterprise. We have 10,000
repositories in C/C++, 5,000 in Python. We build CodeQL dabases weekly
and keep the last 2 years worth. This means for the last 2 years there
are
#+begin_example
(10000 + 5000) * 52 * 2 = 1560000
#+end_example
databases to select from for a single MRVA run. 1.5 Million rows are
readily handled by a pandas (or R) dataframe.
The full list of criteria currently encoded via the columns is
- owner
- name
- CID
- cliVersion
- creationTime
- language
- sha -- git commit sha of the code the CodeQL database is built against
- baselineLinesOfCode
- path
- db_lang
- db_lang_displayName
- db_lang_file_count
- db_lang_linesOfCode
- ctime
- primaryLanguage
- finalised
- left_index
- size
The minimal criteria needed to distinguish databases in the above
scenario are
- cliVersion
- creationTime
- language
- sha
These are encoded in the single custom id column 'CID'.
Thus, a database can be fully specified using a (owner,name,CID) tuple
and this is encoded in the names used by the MRVA server and clients.
The selection of databases can of course be done using the whole table.
For an example of the workflow, see [[#command-line-use][section
'command line use']].
A small sample of a full table:
| | owner | name | CID | cliVersion | creationTime | language | sha | baselineLinesOfCode | path | db_lang | db_lang_displayName | db_lang_file_count | db_lang_linesOfCode | ctime | primaryLanguage | finalised | left_index | size |
|---+----------+----------------+--------+------------+----------------------------------+----------+------------------------------------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+-------------+---------------------+--------------------+---------------------+----------------------------+-----------------+-----------+------------+----------|
| 0 | 1adrianb | face-alignment | 1f8d99 | 2.16.1 | 2024-02-08 14:18:20.983830+00:00 | python | c94dd024b1f5410ef160ff82a8423141e2bbb6b4 | 1839 | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/1adrianb/face-alignment/code-scanning/codeql/databases/python/db.zip | python | Python | 25 | 1839 | 2024-07-24T14:09:02.187201 | python | 1 | 1454 | 24075001 |
| 1 | 2shou | TextGrocery | 9ab87a | 2.12.1 | 2023-02-17T11:32:30.863093193Z | cpp | 8a4e41349a9b0175d9a73bc32a6b2eb6bfb51430 | 3939 | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/2shou/TextGrocery/code-scanning/codeql/databases/cpp/db.zip | no-language | no-language | 0 | -1 | 2024-07-24T06:25:55.347568 | cpp | nan | 1403 | 3612535 |
| 2 | 3b1b | manim | 76fdc7 | 2.17.5 | 2024-06-27 17:37:20.587627+00:00 | python | 88c7e9d2c96be1ea729b089c06cabb1bd3b2c187 | 19905 | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/3b1b/manim/code-scanning/codeql/databases/python/db.zip | python | Python | 94 | 19905 | 2024-07-24T13:23:04.716286 | python | 1 | 1647 | 26407541 |
** Installation
- Set up the virtual environment and install tools
#+begin_example
cd ~/work-gh/mrva/mrvacommander/client/qldbtools/
python3.11 -m venv venv
source venv/bin/activate
pip install --upgrade pip
# From requirements.txt
pip install -r requirements.txt
# Or explicitly
pip install jupyterlab pandas ipython
pip install lckr-jupyterlab-variableinspector
#+end_example
- Local development
#+begin_example
```bash
cd ~/work-gh/mrva/mrvacommander/client/qldbtools
source venv/bin/activate
pip install --editable .
```
The `--editable` *should* use symlinks for all scripts; use `./bin/*` to be sure.
#+end_example
- Full installation
#+begin_example
```bash
pip install qldbtools
```
#+end_example
** Use as library
The best way to examine the code is starting from the high-level scripts
in =bin/=.
** Command line use
Initial information collection requires a unique file path so it can be
run repeatedly over DB collections with the same (owner,name) but other
differences -- namely, in one or more of
- creationTime
- sha
- cliVersion
- language
Those fields are collected in =bin/mc-db-refine-info=.
An example workflow with commands grouped by data files follows.
#+begin_example
cd ~/work-gh/mrva/mrvacommander/client/qldbtools && mkdir -p scratch
./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > scratch/db-info-1.csv
./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
./bin/mc-db-view-info < scratch/db-info-2.csv &
./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
./bin/mc-db-view-info < scratch/db-info-3.csv &
./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
./bin/mc-db-generate-selection -n 11 \
scratch/vscode-selection.json \
scratch/gh-mrva-selection.json \
< scratch/db-info-3.csv
#+end_example
To see the full information for a selection, use
=mc-rows-from-mrva-list=:
#+begin_example
./bin/mc-rows-from-mrva-list scratch/gh-mrva-selection.json \
scratch/db-info-3.csv > scratch/selection-full-info
#+end_example
To check, e.g., the =language= column:
#+begin_example
csvcut -c language scratch/selection-full-info
#+end_example
** Notes
The =preview-data= plugin for VS Code has a bug; it displays =0= instead
of =0e3379= for the following. There are other entries with similar
malfunction.
#+begin_example
CleverRaven,Cataclysm-DDA,0e3379,2.17.0,2024-05-08 12:13:10.038007+00:00,cpp,5ca7f4e59c2d7b0a93fb801a31138477f7b4a761,578098.0,/Users/hohn/work-gh/mrva/mrva-open-source-download/repos-2024-04-29/CleverRaven/Cataclysm-DDA/code-scanning/codeql/databases/cpp/db.zip,cpp,C/C++,1228.0,578098.0,2024-05-13T12:14:54.650648,cpp,True,4245,563435469
CleverRaven,Cataclysm-DDA,3231f7,2.18.0,2024-07-18 11:13:01.673231+00:00,cpp,db3435138781937e9e0e999abbaa53f1d3afb5b7,579532.0,/Users/hohn/work-gh/mrva/mrva-open-source-download/repos/CleverRaven/Cataclysm-DDA/code-scanning/codeql/databases/cpp/db.zip,cpp,C/C++,1239.0,579532.0,2024-07-24T02:33:23.900885,cpp,True,1245,573213726
#+end_example

View File

@@ -1,144 +0,0 @@
#!/bin/bash
#* Utility functions
log() {
local level="$1"
shift
local color_reset="\033[0m"
local color_info="\033[1;34m"
local color_warn="\033[1;33m"
local color_error="\033[1;31m"
local color
case "$level" in
INFO) color="$color_info" ;;
WARN) color="$color_warn" ;;
ERROR) color="$color_error" ;;
*) color="$color_reset" ;;
esac
echo -e "${color}[$(date +"%Y-%m-%d %H:%M:%S")] [$level] $*${color_reset}" >&2
}
usage() {
echo "Usage: $0 --db_collection_dir <directory> --starting_path <path> [-h]"
echo
echo "Options:"
echo " --db_collection_dir <directory> Specify the database collection directory."
echo " --starting_path <path> Specify the starting path."
echo " -h Show this help message."
exit 1
}
#* Initialize and parse arguments
set -euo pipefail # exit on error, unset var, pipefail
trap 'rm -fR /tmp/hepc.$$-*' EXIT
starting_dir=$(pwd)
db_collection_dir=""
starting_path=""
# Parse arguments
while [[ $# -gt 0 ]]; do
case "$1" in
--db_collection_dir)
shift
if [[ -z "$1" || "$1" == -* ]]; then
echo "Error: --db_collection_dir requires a directory as an argument."
usage
fi
db_collection_dir="$1"
;;
--starting_path)
shift
if [[ -z "$1" || "$1" == -* ]]; then
echo "Error: --starting_path requires a path as an argument."
usage
fi
starting_path="$1"
;;
-h)
usage
;;
*)
echo "Error: Unknown option '$1'."
usage
;;
esac
shift
done
# Check if required arguments were provided
if [[ -z "$db_collection_dir" ]]; then
echo "Error: --db_collection_dir is required."
usage
fi
if [[ -z "$starting_path" ]]; then
echo "Error: --starting_path is required."
usage
fi
#* Find all DBs
log INFO "searching for db.zip files"
find ${starting_path} -type f -name "db.zip" -size +0c > /tmp/hepc.$$-paths
#* Collect detailed information from the database files
# Don't assume they are unique.
log INFO "collecting information from db.zip files"
mkdir -p $db_collection_dir
cat /tmp/hepc.$$-paths | while read -r zip_path
do
log INFO "Extracting from ${zip_path}"
zip_dir=$(dirname ${zip_path})
zip_file=$(basename ${zip_path})
unzip -o -q ${zip_path} '*codeql-database.yml' -d /tmp/hepc.$$-zip
# The content may be LANGUAGE/codeql-database.yml
#* For every database, create a metadata record.
mkdir -p /tmp/hepc.$$-zip
cd /tmp/hepc.$$-zip/*
# Information from codeql-database.yml
primaryLanguage=$(yq '.primaryLanguage' codeql-database.yml)
sha=$(yq '.creationMetadata.sha' codeql-database.yml)
cliVersion=$(yq '.creationMetadata.cliVersion' codeql-database.yml)
creationTime=$(yq '.creationMetadata.creationTime' codeql-database.yml)
sourceLocationPrefix=$(yq '.sourceLocationPrefix' codeql-database.yml)
repo=${sourceLocationPrefix##*/} # keep only last component
# Get sourceLocationPrefix[-2]
owner="${sourceLocationPrefix%/*}" # strip last component
owner="${owner##*/}" # keep only last component
# cid for repository / db
cid=$(echo "${cliVersion} ${creationTime} ${primaryLanguage} ${sha}" | b2sum |\
awk '{print substr($1, 1, 6)}')
# Prepare the metadata record for this DB.
new_db_fname="${owner}-${repo}-ctsj-${cid}.zip"
result_url="http://hepc/${db_collection_dir}/${new_db_fname}"
record='
{
"git_branch": "HEAD",
"git_commit_id": "'${sha}'",
"git_repo": "'${repo}'",
"ingestion_datetime_utc": "'${creationTime}'",
"result_url": "'${result_url}'",
"tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4",
"tool_name": "codeql-'${primaryLanguage}'",
"tool_version": "'${cliVersion}'",
"projname": "'${owner}/${repo}'"
}
'
cd "$starting_dir"
rm -fR /tmp/hepc.$$-zip
echo "$record" >> $db_collection_dir/metadata.json
#* Link original file path to collection directory for serving. Use name including
# the cid and field separator ctsj
cd ${db_collection_dir}
[ -L ${new_db_fname} ] || ln -s ${zip_path} ${new_db_fname}
# Interim cleanup
rm -fR "/tmp/hepc.$$-*"
done

View File

@@ -1,104 +0,0 @@
/*
dependencies
go get -u golang.org/x/exp/slog
on-the-fly
go run bin/hepc-serve.go --codeql-db-dir db-collection-py-1
compiled
cd ~/work-gh/mrva/mrvacommander/client/qldbtools/
go build -o ./bin/hepc-serve.bin ./bin/hepc-serve.go
test
curl http://127.0.0.1:8080/api/v1/latest_results/codeql-all -o foo
curl $(head -1 foo | jq -r ".result_url" |sed 's|hepc|127.0.0.1:8080/db|g;') -o foo.zip
*/
package main
import (
"flag"
"fmt"
"net/http"
"os"
"path/filepath"
"golang.org/x/exp/slog"
)
var dbDir string
func serveFile(w http.ResponseWriter, r *http.Request) {
fullPath := r.URL.Path[len("/db/"):]
resolvedPath, err := filepath.EvalSymlinks(fullPath)
if err != nil {
slog.Warn("failed to resolve symlink", slog.String("fullPath", fullPath),
slog.String("error", err.Error()))
http.Error(w, "File not found", http.StatusNotFound)
return
}
if fileInfo, err := os.Stat(resolvedPath); err != nil || fileInfo.IsDir() {
slog.Warn("file not found or is a directory", slog.String("resolvedPath", resolvedPath))
http.Error(w, "File not found", http.StatusNotFound)
return
}
slog.Info("serving file", slog.String("resolvedPath", resolvedPath))
http.ServeFile(w, r, resolvedPath)
}
func serveMetadata(w http.ResponseWriter, r *http.Request) {
metadataPath := filepath.Join(dbDir, "metadata.json")
if fileInfo, err := os.Stat(metadataPath); err != nil || fileInfo.IsDir() {
slog.Warn("metadata.json not found", slog.String("metadataPath", metadataPath))
http.Error(w, "metadata.json not found", http.StatusNotFound)
return
}
slog.Info("serving metadata.json", slog.String("metadataPath", metadataPath))
http.ServeFile(w, r, metadataPath)
}
func logMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
slog.Info("incoming request", slog.String("method", r.Method), slog.String("url", r.URL.Path))
next.ServeHTTP(w, r)
})
}
func main() {
var host string
var port int
flag.StringVar(&dbDir, "codeql-db-dir", "", "Directory containing CodeQL database files (required)")
flag.StringVar(&host, "host", "127.0.0.1", "Host address for the HTTP server")
flag.IntVar(&port, "port", 8080, "Port for the HTTP server")
flag.Parse()
if dbDir == "" {
slog.Error("missing required flag", slog.String("flag", "--codeql-db-dir"))
os.Exit(1)
}
if _, err := os.Stat(dbDir); os.IsNotExist(err) {
slog.Error("invalid directory", slog.String("dbDir", dbDir))
os.Exit(1)
}
slog.Info("starting server", slog.String("host", host), slog.Int("port", port), slog.String("dbDir", dbDir))
mux := http.NewServeMux()
mux.HandleFunc("/db/", serveFile)
mux.HandleFunc("/index", serveMetadata)
mux.HandleFunc("/api/v1/latest_results/codeql-all", serveMetadata)
loggedHandler := logMiddleware(mux)
addr := fmt.Sprintf("%s:%d", host, port)
slog.Info("server listening", slog.String("address", addr))
if err := http.ListenAndServe(addr, loggedHandler); err != nil {
slog.Error("server error", slog.String("error", err.Error()))
}
}

View File

@@ -1,108 +0,0 @@
#!/usr/bin/env python
""" Read a table of CodeQL DB information
and generate the selection files for
1. the VS Code CodeQL plugin
2. the gh-mrva command-line client
"""
import argparse
import logging
from argparse import Namespace
from typing import List
from pandas import DataFrame
import qldbtools.utils as utils
import numpy as np
#
#* Configure logger
#
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
# Overwrite log level set by minio
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)
#
#* Process command line
#
parser = argparse.ArgumentParser(
description=""" Read a table of CodeQL DB information
and generate the selection files for
1. the VS Code CodeQL plugin
2. the gh-mrva command-line client
""",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('vscode_selection', type=str,
help='VS Code selection file to generate')
parser.add_argument('gh_mrva_selection', type=str,
help='gh-mrva cli selection file to generate')
parser.add_argument('-n', '--num-entries', type=int,
help='Only use N entries',
default=None)
parser.add_argument('-s', '--seed', type=int,
help='Random number seed',
default=4242)
parser.add_argument('-l', '--list-name', type=str,
help='Name of the repository list',
default='mirva-list')
args: Namespace = parser.parse_args()
#
#* Load the information
#
import pandas as pd
import sys
df0: DataFrame = pd.read_csv(sys.stdin)
if args.num_entries == None:
# Use all entries
df1: DataFrame = df0
else:
# Use num_entries, chosen via pseudo-random numbers
df1 = df0.sample(n=args.num_entries,
random_state=np.random.RandomState(args.seed))
#
#* Form and save structures
#
repos: list[str] = []
for index, row in df1[['owner', 'name', 'CID', 'path']].iterrows():
owner, name, CID, path = row
repos.append(utils.form_db_req_name(owner, name, CID))
repo_list_name: str = args.list_name
vsc = {
"version": 1,
"databases": {
"variantAnalysis": {
"repositoryLists": [
{
"name": repo_list_name,
"repositories": repos,
}
],
"owners": [],
"repositories": []
}
},
"selected": {
"kind": "variantAnalysisUserDefinedList",
"listName": repo_list_name
}
}
gh = {
repo_list_name: repos
}
import json
with open(args.vscode_selection, "w") as fc:
json.dump(vsc, fc, indent=4)
with open(args.gh_mrva_selection, "w") as fc:
json.dump(gh, fc, indent=4)
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End:

View File

@@ -1,48 +0,0 @@
#!/usr/bin/env python
""" Collect information about CodeQL databases from the file system and write out
a table in CSV format.
"""
from argparse import ArgumentParser
from typing import List
from pandas import DataFrame
import qldbtools.utils as utils
import argparse
import logging
import sys
import pandas as pd
from qldbtools.utils import DBInfo
#
#* Configure logger
#
logging.basicConfig(format='%(asctime)s %(message)s')
#
#* Process command line
#
parser: ArgumentParser = argparse.ArgumentParser(
description="""Find all CodeQL DBs in and below starting_dir and export a CSV
file with relevant data.""")
parser.add_argument('starting_dir', type=str,
help='The starting directory to search for codeql.')
args = parser.parse_args()
#
#* Collect info
#
# Get the db information in list of DBInfo form
db_base: str = args.starting_dir
dbs: list[DBInfo] = list(utils.collect_dbs(db_base))
dbdf: DataFrame = pd.DataFrame([d.__dict__ for d in dbs])
#
#
#* Write info out
#
dbdf.to_csv(sys.stdout, index=False)
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End:

View File

@@ -1,86 +0,0 @@
#!/usr/bin/env python
""" Read a table of CodeQL DB information (like those produced by
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
DB.
"""
import argparse
import qldbtools.utils as utils
import logging
import pandas as pd
import numpy as np
import sys
from minio import Minio
from minio.error import S3Error
from pathlib import Path
#
#* Configure logger
#
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
# Overwrite log level set by minio
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)
#
#* Process command line
#
parser = argparse.ArgumentParser(
description=""" Read a table of CodeQL DB information (like those produced by
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
DB. """)
parser.add_argument('-n', '--num-entries', type=int,
help='Only use N entries',
default=None)
parser.add_argument('-s', '--seed', type=int,
help='Random number seed',
default=4242)
args = parser.parse_args()
#
#* Collect the information and select subset
#
df = pd.read_csv(sys.stdin)
if args.num_entries == None:
# Use all entries
entries = df
else:
# Use num_entries, chosen via pseudo-random numbers
entries = df.sample(n=args.num_entries,
random_state=np.random.RandomState(args.seed))
#
#* Push the DBs
#
# Configuration
MINIO_URL = "http://localhost:9000"
MINIO_ROOT_USER = "user"
MINIO_ROOT_PASSWORD = "mmusty8432"
QL_DB_BUCKET_NAME = "qldb"
# Initialize MinIO client
client = Minio(
MINIO_URL.replace("http://", "").replace("https://", ""),
access_key=MINIO_ROOT_USER,
secret_key=MINIO_ROOT_PASSWORD,
secure=False
)
# Create the bucket if it doesn't exist
try:
if not client.bucket_exists(QL_DB_BUCKET_NAME):
client.make_bucket(QL_DB_BUCKET_NAME)
else:
logging.info(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
except S3Error as err:
logging.error(f"Error creating bucket: {err}")
# Get info from dataframe and push the files
for index, row in entries[['owner', 'name', 'CID', 'path']].iterrows():
owner, name, CID, path = row
new_name = utils.form_db_bucket_name(owner, name, CID)
try:
client.fput_object(QL_DB_BUCKET_NAME, new_name, path)
logging.info(f"Uploaded {path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
except S3Error as err:
logging.error(f"Error uploading file {local_path}: {err}")
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End:

View File

@@ -1,60 +0,0 @@
#!/usr/bin/env python
""" Read an initial table of CodeQL DB information, produced by
mc-db-initial-info, and collect more detailed information from the database
files. Write out an extended table in CSV format.
"""
from argparse import ArgumentParser
from typing import List
from pandas import DataFrame
import qldbtools.utils as utils
import argparse
import logging
import pandas as pd
import sys
#
#* Configure logger
#
logging.basicConfig(format='%(asctime)s %(message)s')
#
#* Process command line
#
parser: ArgumentParser = argparse.ArgumentParser(
description="""Read an initial table of CodeQL DB information, produced by
mc-db-initial-info, and collect more detailed information from the database
files. Write out an extended table in CSV format. """)
args = parser.parse_args()
#
#* Collect the information
# This step is time-intensive so we save the results right after.
d: DataFrame = pd.read_csv(sys.stdin)
joiners: list[DataFrame] = []
for left_index in range(0, len(d)-1):
try:
metac: object
cqlc: object
cqlc, metac = utils.extract_metadata(d.path[left_index])
except utils.ExtractNotZipfile:
continue
except utils.ExtractNoCQLDB:
continue
try:
detail_df: DataFrame = utils.metadata_details(left_index, cqlc, metac)
except utils.DetailsMissing:
continue
joiners.append(detail_df)
joiners_df: DataFrame = pd.concat(joiners, axis=0)
full_df: DataFrame = pd.merge(d, joiners_df, left_index=True, right_on='left_index', how='outer')
#
#* Save results
#
full_df.to_csv(sys.stdout, index=False)
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End:

View File

@@ -1,122 +0,0 @@
#!/usr/bin/env python
""" Read a table of CodeQL DB information and produce a table with unique entries
adding the Cumulative ID (CID) column.
To make this happen:
- Group entries by (owner,name,CID),
sort each group by creationTime,
and keep only the top (newest) element.
- Drop rows that don't have the
| cliVersion |
| creationTime |
| language |
| sha |
columns. There are very few (16 out of 6000 on recent tests) and their DBs
are quesionable.
"""
import argparse
import logging
from argparse import Namespace
from typing import Any
from pandas import DataFrame, Series
#
#* Configure logger
#
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
# Overwrite log level set by minio
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)
#
#* Process command line
#
parser = argparse.ArgumentParser(
description=""" Read a table of CodeQL DB information,
narrow to <language>,
group entries by (owner,name), sort each group by
creationTime and keep only the top (newest) element.
""")
parser.add_argument('language', type=str,
help='The language to be analyzed.')
args: Namespace = parser.parse_args()
#
#* Collect the information and select subset
#
import pandas as pd
import sys
import qldbtools.utils as utils
df2: DataFrame = pd.read_csv(sys.stdin)
#
#* Add single uniqueness field -- CID (Cumulative ID)
#
df2['CID'] = df2.apply(lambda row:
utils.cid_hash((
row['cliVersion'],
row['creationTime'],
row['language'],
row['sha'],
)), axis=1)
#
#* Re-order the dataframe columns by importance
# - Much of the data
# 1. Is only conditionally present
# 2. Is extra info, not for the DB proper
# 3. May have various names
#
# - The essential columns are
# | owner |
# | name |
# | language |
# | size |
# | cliVersion |
# | creationTime |
# | sha |
# | baselineLinesOfCode |
# | path |
#
# - The rest are useful; put them last
# | db_lang |
# | db_lang_displayName |
# | db_lang_file_count |
# | db_lang_linesOfCode |
# | left_index |
# | ctime |
# | primaryLanguage |
# | finalised |
df3: DataFrame = df2.reindex( columns=['owner', 'name', 'cliVersion', 'creationTime',
'language', 'sha','CID',
'baselineLinesOfCode', 'path', 'db_lang',
'db_lang_displayName', 'db_lang_file_count',
'db_lang_linesOfCode', 'ctime',
'primaryLanguage', 'finalised', 'left_index',
'size'])
# Identify rows missing specific entries
rows = ( df3['cliVersion'].isna() |
df3['creationTime'].isna() |
df3['language'].isna() |
df3['sha'].isna() )
df4: DataFrame = df3[~rows]
# Limit to one language
df5 = df4[df4['language'] == args.language]
# Sort and group
df_sorted: DataFrame = df5.sort_values(by=['owner', 'name', 'CID', 'creationTime'])
df_unique: DataFrame = df_sorted.groupby(['owner', 'name', 'CID']).first().reset_index()
# Write output
df_unique.to_csv(sys.stdout, index=False)
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End:

View File

@@ -1,35 +0,0 @@
#!/usr/bin/env python
""" Read a table of CodeQL DB information and display it using pandasui
"""
import argparse
import logging
import sys
#
#* Configure logger
#
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
# Overwrite log level set by minio
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)
#
#* Process command line
#
parser = argparse.ArgumentParser(
description="Read a table of CodeQL DB information and display it using pandasui")
args = parser.parse_args()
#
#* Collect the information display
#
import pandas as pd
df = pd.read_csv(sys.stdin)
import os
os.environ['APPDATA'] = "needed-for-pandasgui"
from pandasgui import show
show(df)
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End:

View File

@@ -1,120 +0,0 @@
#!/usr/bin/env python3
import json
import hashlib
import yaml
import sys
from plumbum import cli, local
from plumbum.cmd import find, mkdir, ln, rm, mktemp, unzip, date, env
# Logging function
def log(level, message):
colors = {
"INFO": "\033[1;34m",
"WARN": "\033[1;33m",
"ERROR": "\033[1;31m",
"RESET": "\033[0m",
}
timestamp = date("+%Y-%m-%d %H:%M:%S").strip()
print(f"{colors[level]}[{timestamp}] [{level}] {message}{colors['RESET']}", file=sys.stderr)
# Generate a CID (cumulative id)
def generate_cid(cli_version, creation_time, primary_language, sha):
hash_input = f"{cli_version} {creation_time} {primary_language} {sha}".encode()
return hashlib.sha256(hash_input).hexdigest()[:6]
# Expand environment variables in paths
def expand_path(path):
return local.env.expand(path)
# Process a single db.zip file
def process_db_file(zip_path, db_collection_dir):
temp_dir = mktemp("-d").strip()
try:
unzip("-o", "-q", zip_path, "*codeql-database.yml", "-d", temp_dir)
# Locate the YAML file regardless of its depth
yaml_files = list(local.path(temp_dir).walk(
filter=lambda p: p.name == "codeql-database.yml"))
if not yaml_files:
log("WARN", f"No codeql-database.yml found in {zip_path}")
return
yaml_path = yaml_files[0]
with yaml_path.open("r") as f:
yaml_data = yaml.safe_load(f)
primary_language = yaml_data["primaryLanguage"]
creation_metadata = yaml_data["creationMetadata"]
sha = creation_metadata["sha"]
cli_version = creation_metadata["cliVersion"]
creation_time = creation_metadata["creationTime"]
source_location_prefix = local.path(yaml_data["sourceLocationPrefix"])
repo = source_location_prefix.name
owner = source_location_prefix.parent.name
cid = generate_cid(cli_version, creation_time, primary_language, sha)
new_db_fname = f"{owner}-{repo}-ctsj-{cid}.zip"
result_url = f"http://hepc/{db_collection_dir}/{new_db_fname}"
metadata = {
"git_branch" : "HEAD",
"git_commit_id" : sha,
"git_repo" : repo,
"ingestion_datetime_utc" : str(creation_time),
"result_url" : result_url,
"tool_id" : "9f2f9642-febb-4435-9204-fb50bbd43de4",
"tool_name" : f"codeql-{primary_language}",
"tool_version" : cli_version,
"projname" : f"{owner}/{repo}",
}
metadata_file = local.path(db_collection_dir) / "metadata.json"
with metadata_file.open("a") as f:
json.dump(metadata, f)
f.write("\n")
link_path = local.path(db_collection_dir) / new_db_fname
if not link_path.exists():
ln("-sf", zip_path, link_path)
except Exception as e:
log("WARN", f"Error processing {zip_path}: {e}")
finally:
rm("-rf", temp_dir)
# Main application class
class DBProcessor(cli.Application):
"""
DBProcessor processes db.zip files found in a starting directory,
symlinks updated names in a collection directory,
and adds a metadata information file "metadata.json" to the directory.
"""
db_collection_dir = cli.SwitchAttr(
"--db_collection_dir", str, mandatory=True, help="Specify the database collection directory"
)
starting_path = cli.SwitchAttr(
"--starting_path", str, mandatory=True, help="Specify the starting path"
)
def main(self):
db_collection_dir = expand_path(self.db_collection_dir)
starting_path = expand_path(self.starting_path)
mkdir("-p", db_collection_dir)
log("INFO", f"Searching for db.zip files in {starting_path}")
db_files = find(starting_path, "-type", "f", "-name", "db.zip",
"-size", "+0c").splitlines()
if not db_files:
log("WARN", "No db.zip files found in the specified starting path.")
return
for zip_path in db_files:
process_db_file(zip_path, db_collection_dir)
log("INFO", "Processing completed.")
if __name__ == "__main__":
DBProcessor.run()

View File

@@ -1,89 +0,0 @@
#!/usr/bin/env python3
import logging
from pathlib import Path
from plumbum import cli
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
import uvicorn
# Logging configuration
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[logging.StreamHandler()]
)
logger = logging.getLogger(__name__)
# FastAPI application
app = FastAPI()
db_dir = None # This will be set by the CLI application
@app.get("/db/{file_path:path}")
def serve_file(file_path: str):
"""
Serve files from the database directory, such as .zip files or metadata.json.
"""
logger.info(f"Requested file: {file_path}")
# Resolve symlink
resolved_path = Path(file_path).resolve(strict=True)
logger.info(f"file resolved to: {resolved_path}")
if not resolved_path.exists():
logger.error(f"File not found: {resolved_path}")
raise HTTPException(status_code=404, detail=f"{resolved_path} not found")
return FileResponse(resolved_path)
@app.get("/index")
@app.get("/api/v1/latest_results/codeql-all")
def serve_metadata_json():
"""
Serve the metadata.json file for multiple routes.
"""
metadata_path = Path(db_dir) / "metadata.json"
logger.info(f"Requested metadata.json at: {metadata_path}")
if not metadata_path.exists():
logger.error("metadata.json not found.")
raise HTTPException(status_code=404, detail="metadata.json not found")
logger.info(f"Serving metadata.json from: {metadata_path}")
return FileResponse(metadata_path)
@app.middleware("http")
async def log_request(request, call_next):
logger.info(f"Incoming request: {request.method} {request.url}")
response = await call_next(request)
return response
class DBService(cli.Application):
"""
DBService serves:
1. CodeQL database .zip files symlinked in the --codeql-db-dir
2. Metadata for those zip files, contained in metadata.json in the same
directory.
The HTTP endpoints are:
1. /db/{filename}
2. /index
3. /api/v1/latest_results/codeql-all
"""
codeql_db_dir = cli.SwitchAttr("--codeql-db-dir", str, mandatory=True,
help="Directory containing CodeQL database files")
host = cli.SwitchAttr("--host", str, default="127.0.0.1",
help="Host address for the HTTP server")
port = cli.SwitchAttr("--port", int, default=8080, help="Port for the HTTP server")
def main(self):
global db_dir
db_dir = Path(self.codeql_db_dir)
if not db_dir.is_dir():
logger.error(f"Invalid directory: {db_dir}")
return 1
logger.info(f"Starting server at {self.host}:{self.port}")
logger.info(f"Serving files from directory: {db_dir}")
# Run the FastAPI server using Uvicorn
uvicorn.run(app, host=self.host, port=self.port)
if __name__ == "__main__":
DBService.run()

View File

@@ -1,67 +0,0 @@
#!/usr/bin/env python
"""
Script to list full details for a mrva-list file
1. reads files containing
{
"mirva-list": [
"NLPchina/elasticsearch-sqlctsj168cc4",
"LMAX-Exchange/disruptorctsj3e75ec",
"justauth/JustAuthctsj8a6177",
"FasterXML/jackson-modules-basectsj2fe248",
"ionic-team/capacitor-pluginsctsj38d457",
"PaddlePaddle/PaddleOCRctsj60e555",
"elastic/apm-agent-pythonctsj21dc64",
"flipkart-incubator/zjsonpatchctsjc4db35",
"stephane/libmodbusctsj54237e",
"wso2/carbon-kernelctsj5a8a6e",
"apache/servicecomb-packctsj4d98f5"
]
}
2. reads a pandas dataframe stored in a csv file
3. selects all rows from 2. that
- contain the 'owner' column matching the string before the slash from 1. and
- the 'name' column matching the string between the slash and the marker
'ctsj' and
- the 'CID' column matching the string after the marker 'ctsj'
"""
import argparse
import json
import sys
#
#* Process command line
#
parser = argparse.ArgumentParser(
description="""Script to list full details for a mrva-list file""")
parser.add_argument('mrva_list', type=str,
help='The JSON file containing the mrva-list')
parser.add_argument('info_csv', type=str,
help='The CSV file containing the full information')
args = parser.parse_args()
#* Step 1: Read the JSON file containing the "mirva-list"
with open(args.mrva_list, 'r') as f:
data = json.load(f)
# Extract and parse the "mirva-list"
mirva_list = data['mirva-list']
parsed_mirva_list = []
for item in mirva_list:
owner_name = item.split('/')[0]
repo_name = item.split('/')[1].split('ctsj')[0]
cid = item.split('/')[1].split('ctsj')[1]
parsed_mirva_list.append((owner_name, repo_name, cid))
#* Step 2: Read the CSV file into a pandas dataframe
import pandas as pd
df = pd.read_csv(args.info_csv)
#* Step 3: Filter the dataframe based on the parsed "mirva-list"
filtered_df = df[
df.apply(lambda row:
(row['owner'], row['name'], row['CID']) in parsed_mirva_list, axis=1)]
# Optionally, you can save the filtered dataframe to a new CSV file
filtered_df.to_csv(sys.stdout, index=False)

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +0,0 @@
{
"folders": [
{
"path": "."
}
],
"settings": {
"git.ignoreLimitWarning": true,
"makefile.configureOnOpen": false
}
}

View File

@@ -1,2 +0,0 @@
from . import utils

View File

@@ -1,205 +0,0 @@
""" This module supports the selection of CodeQL databases based on various
criteria.
"""
#* Imports
from dataclasses import dataclass
from pathlib import Path
import datetime
import json
import logging
import os
from typing import List, Dict, Any
import pandas as pd
import time
import yaml
import zipfile
from pandas import DataFrame
#* Setup
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s [%(levelname)s] %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
#* Utility functions
def log_and_raise(message):
logging.error(message)
raise Exception(message)
def log_and_raise_e(message, exception):
logging.error(message)
raise exception(message)
def traverse_tree(root: str) -> Path:
root_path = Path(os.path.expanduser(root))
if not root_path.exists() or not root_path.is_dir():
log_and_raise(f"The specified root path '{root}' does not exist or "
"is not a directory.")
for path in root_path.rglob('*'):
if path.is_file():
yield path
elif path.is_dir():
pass
@dataclass
class DBInfo:
ctime : str = '2024-05-13T12:04:01.593586'
language : str = 'cpp'
name : str = 'nanobind'
owner : str = 'wjakob'
path : Path = Path('/Users/.../db.zip')
size : int = 63083064
def collect_dbs(db_base: str) -> DBInfo:
for path in traverse_tree(db_base):
if path.name == "db.zip":
# For the current repository, we have
# In [292]: len(path.parts)
# Out[292]: 14
# and can work from the end to get relevant info from the file path.
db = DBInfo()
(*_, db.owner, db.name, _, _, _, db.language, _) = path.parts
db.path = path
s = path.stat()
db.size = s.st_size
# db.ctime_raw = s.st_ctime
# db.ctime = time.ctime(s.st_ctime)
db.ctime = datetime.datetime.fromtimestamp(s.st_ctime).isoformat()
yield db
def extract_metadata(zipfile_path: str) -> tuple[object,object]:
"""
extract_metadata(zipfile)
Unzip zipfile into memory and return the contents of the files
codeql-database.yml and baseline-info.json that it contains in a tuple
"""
codeql_content = None
meta_content = None
try:
with zipfile.ZipFile(zipfile_path, 'r') as z:
for file_info in z.infolist():
# Filenames seen
# java/codeql-database.yml
# codeql_db/codeql-database.yml
if file_info.filename.endswith('codeql-database.yml'):
with z.open(file_info) as f:
codeql_content = yaml.safe_load(f)
# And
# java/baseline-info.json
# codeql_db/baseline-info.json
elif file_info.filename.endswith('baseline-info.json'):
with z.open(file_info) as f:
meta_content = json.load(f)
except zipfile.BadZipFile:
log_and_raise_e(f"Not a zipfile: '{zipfile_path}'", ExtractNotZipfile)
# The baseline-info is only available in more recent CodeQL versions
if not meta_content:
meta_content = {'languages':
{'no-language': {'displayName': 'no-language',
'files': [],
'linesOfCode': -1,
'name': 'nolang'},
}}
if not codeql_content:
log_and_raise_e(f"Not a zipfile: '{zipfile_path}'", ExtractNoCQLDB)
return codeql_content, meta_content
class ExtractNotZipfile(Exception): pass
class ExtractNoCQLDB(Exception): pass
def metadata_details(left_index: int, codeql_content: object, meta_content: object) -> pd.DataFrame:
"""
metadata_details(codeql_content, meta_content)
Extract the details from metadata that will be used in DB selection and return a
dataframe with the information. Example, cropped to fit:
full_df.T
Out[535]:
0 1
left_index 0 0
baselineLinesOfCode 17990 17990
primaryLanguage cpp cpp
sha 288920efc079766f4 282c20efc079766f4
cliVersion 2.17.0 2.17.0
creationTime .325253+00:00 51.325253+00:00
finalised True True
db_lang cpp python
db_lang_displayName C/C++ Python
db_lang_file_count 102 27
db_lang_linesOfCode 17990 5586
"""
cqlc, metac = codeql_content, meta_content
d = {'left_index': left_index,
'baselineLinesOfCode': cqlc['baselineLinesOfCode'],
'primaryLanguage': cqlc['primaryLanguage'],
'sha': cqlc['creationMetadata'].get('sha', 'abcde0123'),
'cliVersion': cqlc['creationMetadata']['cliVersion'],
'creationTime': cqlc['creationMetadata']['creationTime'],
'finalised': cqlc.get('finalised', pd.NA),
}
f = pd.DataFrame(d, index=[0])
joiners: list[dict[str, int | Any]] = []
if not ('languages' in metac):
log_and_raise_e("Missing 'languages' in metadata", DetailsMissing)
for lang, lang_cont in metac['languages'].items():
d1: dict[str, int | Any] = { 'left_index' : left_index,
'db_lang': lang }
for prop, val in lang_cont.items():
if prop == 'files':
d1['db_lang_file_count'] = len(val)
elif prop == 'linesOfCode':
d1['db_lang_linesOfCode'] = val
elif prop == 'displayName':
d1['db_lang_displayName'] = val
joiners.append(d1)
fj: DataFrame = pd.DataFrame(joiners)
full_df: DataFrame = pd.merge(f, fj, on='left_index', how='outer')
return full_df
class DetailsMissing(Exception): pass
from hashlib import blake2b
def cid_hash(row_tuple: tuple):
"""
cid_hash(row_tuple)
Take a bytes object and return hash as hex string
"""
h = blake2b(digest_size = 3)
h.update(str(row_tuple).encode())
# return int.from_bytes(h.digest(), byteorder='big')
return h.hexdigest()
def form_db_bucket_name(owner, name, CID):
"""
form_db_bucket_name(owner, name, CID)
Return the name to use in minio storage; this function is trivial and used to
enforce consistent naming.
The 'ctsj' prefix is a random, unique key to identify the information.
"""
return f'{owner}${name}ctsj{CID}.zip'
def form_db_req_name(owner: str, name: str, CID: str) -> str:
"""
form_db_req_name(owner, name, CID)
Return the name to use in mrva requests; this function is trivial and used to
enforce consistent naming.
The 'ctsj' prefix is a random, unique key to identify the information.
"""
return f'{owner}/{name}ctsj{CID}'
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End:

View File

@@ -1,109 +0,0 @@
annotated-types==0.7.0
anyio==4.4.0
appnope==0.1.4
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
arrow==1.3.0
asttokens==2.4.1
async-lru==2.0.4
attrs==24.2.0
babel==2.16.0
beautifulsoup4==4.12.3
bleach==6.1.0
blinker==1.9.0
certifi==2024.7.4
cffi==1.17.0
charset-normalizer==3.3.2
click==8.1.7
comm==0.2.2
debugpy==1.8.5
decorator==5.1.1
defusedxml==0.7.1
executing==2.0.1
fastapi==0.115.5
fastjsonschema==2.20.0
Flask==3.1.0
fqdn==1.5.1
h11==0.14.0
httpcore==1.0.5
httpx==0.27.0
idna==3.7
ipykernel==6.29.5
ipython==8.26.0
isoduration==20.11.0
itsdangerous==2.2.0
jedi==0.19.1
Jinja2==3.1.4
json5==0.9.25
jsonpointer==3.0.0
jsonschema==4.23.0
jsonschema-specifications==2023.12.1
jupyter-events==0.10.0
jupyter-lsp==2.2.5
jupyter_client==8.6.2
jupyter_core==5.7.2
jupyter_server==2.14.2
jupyter_server_terminals==0.5.3
jupyterlab==4.2.4
jupyterlab_pygments==0.3.0
jupyterlab_server==2.27.3
MarkupSafe==2.1.5
matplotlib-inline==0.1.7
minio==7.2.8
mistune==3.0.2
nbclient==0.10.0
nbconvert==7.16.4
nbformat==5.10.4
nest-asyncio==1.6.0
notebook_shim==0.2.4
numpy==2.1.0
overrides==7.7.0
packaging==24.1
pandas==2.2.2
pandocfilters==1.5.1
parso==0.8.4
pexpect==4.9.0
platformdirs==4.2.2
plumbum==1.9.0
prometheus_client==0.20.0
prompt_toolkit==3.0.47
psutil==6.0.0
ptyprocess==0.7.0
pure_eval==0.2.3
pycparser==2.22
pycryptodome==3.20.0
pydantic==2.10.2
pydantic_core==2.27.1
Pygments==2.18.0
python-dateutil==2.9.0.post0
python-json-logger==2.0.7
pytz==2024.1
PyYAML==6.0.2
pyzmq==26.1.1
referencing==0.35.1
requests==2.32.3
rfc3339-validator==0.1.4
rfc3986-validator==0.1.1
rpds-py==0.20.0
Send2Trash==1.8.3
setuptools==75.5.0
six==1.16.0
sniffio==1.3.1
soupsieve==2.6
stack-data==0.6.3
starlette==0.41.3
terminado==0.18.1
tinycss2==1.3.0
tornado==6.4.1
traitlets==5.14.3
types-python-dateutil==2.9.0.20240821
typing_extensions==4.12.2
tzdata==2024.1
uri-template==1.3.0
urllib3==2.2.2
uvicorn==0.32.1
wcwidth==0.2.13
webcolors==24.8.0
webencodings==0.5.1
websocket-client==1.8.0
Werkzeug==3.1.3

View File

@@ -1,61 +0,0 @@
""" Read a table of CodeQL DB information
and generate the selection files for
1. the VS Code CodeQL plugin
2. the gh-mrva command-line client
"""
#
#* Collect the information and write files
#
import pandas as pd
import sys
import qldbtools.utils as utils
import numpy as np
import importlib
importlib.reload(utils)
df0 = pd.read_csv('scratch/db-info-3.csv')
# Use num_entries, chosen via pseudo-random numbers
df1 = df0.sample(n=3, random_state=np.random.RandomState(4242))
repos = []
for index, row in df1[['owner', 'name', 'CID', 'path']].iterrows():
owner, name, CID, path = row
repos.append(utils.form_db_req_name(owner, name, CID))
repo_list_name = "mirva-list"
vsc = {
"version": 1,
"databases": {
"variantAnalysis": {
"repositoryLists": [
{
"name": repo_list_name,
"repositories": repos,
}
],
"owners": [],
"repositories": []
}
},
"selected": {
"kind": "variantAnalysisUserDefinedList",
"listName": repo_list_name
}
}
gh = {
repo_list_name: repos
}
# write the files
import json
with open("tmp-selection-vsc.json", "w") as fc:
json.dump(vsc, fc, indent=4)
with open("tmp-selection-gh.json", "w") as fc:
json.dump(gh, fc, indent=4)
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End:

View File

@@ -1,59 +0,0 @@
#* Experimental work with utils.py, to be merged into it.
# The rest of this interactive script is available as cli script in
# mc-db-initial-info
from utils import *
#* Data collection
# Get the db information in list of DBInfo form
db_base = "~/work-gh/mrva/mrva-open-source-download/"
dbs = list(collect_dbs(db_base))
# Inspect:
from pprint import pprint
pprint(["len", len(dbs)])
pprint(["dbs[0]", dbs[0].__dict__])
pprint(["dbs[-1]", dbs[-1].__dict__])
#
# Get a dataframe
dbdf = pd.DataFrame([d.__dict__ for d in dbs])
#
#* Experiments with on-disk format
# Continue use of raw information in separate session.
#
# PosixPath is a problem for json and parquet
#
dbdf['path'] = dbdf['path'].astype(str)
#
dbdf.to_csv('dbdf.csv')
#
dbdf.to_csv('dbdf.csv.gz', compression='gzip', index=False)
#
dbdf.to_json('dbdf.json')
#
# dbdf.to_hdf('dbdf.h5', key='dbdf', mode='w')
#
# fast, binary
dbdf.to_parquet('dbdf.parquet')
#
# fast
import sqlite3
conn = sqlite3.connect('dbdf.db')
dbdf.to_sql('qldbs', conn, if_exists='replace', index=False)
conn.close()
#
# Sizes:
# ls -laSr dbdf.*
# -rw-r--r--@ 1 hohn staff 101390 Jul 12 14:17 dbdf.csv.gz
# -rw-r--r--@ 1 hohn staff 202712 Jul 12 14:17 dbdf.parquet
# -rw-r--r--@ 1 hohn staff 560623 Jul 12 14:17 dbdf.csv
# -rw-r--r--@ 1 hohn staff 610304 Jul 12 14:17 dbdf.db
# -rw-r--r--@ 1 hohn staff 735097 Jul 12 14:17 dbdf.json
#
# parquet has many libraries, including go: xitongsys/parquet-go
# https://parquet.apache.org/
#
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End:

View File

@@ -1,65 +0,0 @@
import qldbtools.utils as utils
import pandas as pd
import numpy as np
import sys
from minio import Minio
from minio.error import S3Error
from pathlib import Path
#
#* Collect the information and select subset
#
df = pd.read_csv('scratch/db-info-2.csv')
seed = 4242
if 0:
# Use all entries
entries = df
else:
# Use num_entries, chosen via pseudo-random numbers
entries = df.sample(n=3,
random_state=np.random.RandomState(seed))
#
#* Push the DBs
#
# Configuration
MINIO_URL = "http://localhost:9000"
MINIO_ROOT_USER = "user"
MINIO_ROOT_PASSWORD = "mmusty8432"
QL_DB_BUCKET_NAME = "qldb"
# Initialize MinIO client
client = Minio(
MINIO_URL.replace("http://", "").replace("https://", ""),
access_key=MINIO_ROOT_USER,
secret_key=MINIO_ROOT_PASSWORD,
secure=False
)
# Create the bucket if it doesn't exist
try:
if not client.bucket_exists(QL_DB_BUCKET_NAME):
client.make_bucket(QL_DB_BUCKET_NAME)
else:
print(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
except S3Error as err:
print(f"Error creating bucket: {err}")
# (test) File paths and new names
files_to_upload = {
"cmd/server/codeql/dbs/google/flatbuffers/google_flatbuffers_db.zip": "google$flatbuffers.zip",
"cmd/server/codeql/dbs/psycopg/psycopg2/psycopg_psycopg2_db.zip": "psycopg$psycopg2.zip"
}
# (test) Push the files
prefix = Path('/Users/hohn/work-gh/mrva/mrvacommander')
for local_path, new_name in files_to_upload.items():
try:
client.fput_object(QL_DB_BUCKET_NAME, new_name, prefix / Path(local_path))
print(f"Uploaded {local_path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
except S3Error as err:
print(f"Error uploading file {local_path}: {err}")
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End:

View File

@@ -1,46 +0,0 @@
# Session around bin/mc-db-unique
import qldbtools.utils as utils
import pandas as pd
#
#* Collect the information
#
df1 = pd.read_csv("scratch/db-info-2.csv")
# Add single uniqueness field -- CID (Cumulative ID) -- using
# - creationTime
# - sha
# - cliVersion
# - language
from hashlib import blake2b
def cid_hash(row_tuple: tuple):
"""
cid_hash(row_tuple)
Take a bytes object and return hash as hex string
"""
h = blake2b(digest_size = 3)
h.update(str(row_tuple).encode())
# return int.from_bytes(h.digest(), byteorder='big')
return h.hexdigest()
# Apply the cid_hash function to the specified columns and create the 'CID' column
df1['CID'] = df1.apply(lambda row: cid_hash( (row['creationTime'],
row['sha'],
row['cliVersion'],
row['language'])
), axis=1)
df2 = df1.reindex(columns=['owner', 'name', 'cliVersion', 'creationTime',
'language', 'sha','CID', 'baselineLinesOfCode', 'path',
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
'finalised', 'left_index', 'size'])
df1['cid']
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End:

View File

@@ -1,118 +0,0 @@
# Experimental work be merged with bin/mc-db-refine-info
from utils import *
from pprint import pprint
#* Reload gzipped CSV file to continue work
dbdf_1 = pd.read_csv('dbdf.csv.gz', compression='gzip')
#
# (old) Consistency check:
# dbdf_1.columns == dbdf.columns
# dbmask = (dbdf_1 != dbdf)
# dbdf_1[dbmask]
# dbdf_1[dbmask].dropna(how='all')
# ctime_raw is different in places, so don't use it.
#
#* Interact with/visualize the dataframe
# Using pandasgui -- qt
from pandasgui import show
os.environ['APPDATA'] = "needed-for-pandasgui"
show(dbdf_1)
# Using dtale -- web
import dtale
dtale.show(dbdf_1)
#
#
#* Collect metadata from DB zip files
#
#** A manual sample
#
d = dbdf_1
left_index = 0
d.path[0]
cqlc, metac = extract_metadata(d.path[0])
cqlc['baselineLinesOfCode']
cqlc['primaryLanguage']
cqlc['creationMetadata']['sha']
cqlc['creationMetadata']['cliVersion']
cqlc['creationMetadata']['creationTime'].isoformat()
cqlc['finalised']
for lang, lang_cont in metac['languages'].items():
print(lang)
indent = " "
for prop, val in lang_cont.items():
if prop == 'files':
print("%sfiles count %d" % (indent, len(val)))
elif prop == 'linesOfCode':
print("%slinesOfCode %d" % (indent, val))
elif prop == 'displayName':
print("%sdisplayName %s" % (indent, val))
#** Automated for all entries
# The rest of this interactive script is available as cli script in
# mc-db-refine-info
d = dbdf_1
joiners = []
for left_index in range(0, len(d)-1):
try:
cqlc, metac = extract_metadata(d.path[left_index])
except ExtractNotZipfile:
continue
except ExtractNoCQLDB:
continue
try:
detail_df = metadata_details(left_index, cqlc, metac)
except DetailsMissing:
continue
joiners.append(detail_df)
joiners_df = pd.concat(joiners, axis=0)
full_df = pd.merge(d, joiners_df, left_index=True, right_on='left_index', how='outer')
#** View the full dataframe with metadata
from pandasgui import show
os.environ['APPDATA'] = "needed-for-pandasgui"
show(full_df)
#** Re-order the dataframe columns by importance
# - Much of the data
# 1. Is only conditionally present
# 2. Is extra info, not for the DB proper
# 3. May have various names
# - The essential columns are
# | owner |
# | name |
# | language |
# | size |
# | cliVersion |
# | creationTime |
# | sha |
# | baselineLinesOfCode |
# | path |
# - The rest are useful; put them last
# | db_lang |
# | db_lang_displayName |
# | db_lang_file_count |
# | db_lang_linesOfCode |
# | left_index |
# | ctime |
# | primaryLanguage |
# | finalised |
final_df = full_df.reindex(columns=['owner', 'name', 'language', 'size', 'cliVersion',
'creationTime', 'sha', 'baselineLinesOfCode', 'path',
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
'finalised', 'left_index'])
final_df.to_csv('all-info-table.csv.gz', compression='gzip', index=False)
#
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End:
#

View File

@@ -1,41 +0,0 @@
# Experimental work for ../bin/mc-db-unique, to be merged into it.
import qldbtools.utils as utils
from pprint import pprint
import pandas as pd
# cd ../
#* Reload CSV file to continue work
df2 = df_refined = pd.read_csv('scratch/db-info-2.csv')
# Identify rows missing specific entries
rows = ( df2['cliVersion'].isna() |
df2['creationTime'].isna() |
df2['language'].isna() |
df2['sha'].isna() )
df2[rows]
df3 = df2[~rows]
df3
#* post-save work
df4 = pd.read_csv('scratch/db-info-3.csv')
# Sort and group
df_sorted = df4.sort_values(by=['owner', 'name', 'CID', 'creationTime'])
df_unique = df_sorted.groupby(['owner', 'name', 'CID']).first().reset_index()
# Find duplicates
df_dups = df_unique[df_unique['CID'].duplicated(keep=False)]
len(df_dups)
df_dups['CID']
# Set display options
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 140)
#
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End:
#

View File

@@ -1,46 +0,0 @@
# Session around bin/mc-db-unique
import qldbtools.utils as utils
import pandas as pd
#
#* Collect the information
#
df1 = pd.read_csv("scratch/db-info-2.csv")
# Add single uniqueness field -- CID (Cumulative ID) -- using
# - creationTime
# - sha
# - cliVersion
# - language
from hashlib import blake2b
def cid_hash(row_tuple: tuple):
"""
cid_hash(row_tuple)
Take a bytes object and return hash as hex string
"""
h = blake2b(digest_size = 3)
h.update(str(row_tuple).encode())
# return int.from_bytes(h.digest(), byteorder='big')
return h.hexdigest()
# Apply the cid_hash function to the specified columns and create the 'CID' column
df1['CID'] = df1.apply(lambda row: cid_hash( (row['creationTime'],
row['sha'],
row['cliVersion'],
row['language'])
), axis=1)
df2 = df1.reindex(columns=['owner', 'name', 'cliVersion', 'creationTime',
'language', 'sha','CID', 'baselineLinesOfCode', 'path',
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
'finalised', 'left_index', 'size'])
df1['cid']
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End:

View File

@@ -1,13 +0,0 @@
from setuptools import setup, find_packages
import glob
setup(
name='qldbtools',
version='0.1.0',
description='A Python package for working with CodeQL databases',
author='Michael Hohn',
author_email='hohn@github.com',
packages=['qldbtools'],
install_requires=[],
scripts=glob.glob("bin/mc-*"),
)

View File

@@ -23,8 +23,7 @@ ARG CODEQL_VERSION=latest
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
unzip \
curl \
ca-certificates \
default-jdk
ca-certificates
# If the version is 'latest', lsget the latest release version from GitHub, unzip the bundle into /opt, and delete the archive
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
@@ -33,15 +32,14 @@ RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
echo "Using CodeQL version $CODEQL_VERSION" && \
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
unzip /tmp/codeql.zip -d /opt && \
rm /tmp/codeql.zip && \
chmod -R +x /opt/codeql
rm /tmp/codeql.zip
# Set environment variables for CodeQL
ENV CODEQL_CLI_PATH=/opt/codeql/codeql
ENV CODEQL_CLI_PATH=/opt/codeql
# Set environment variable for CodeQL for `codeql database analyze` support on ARM
# This env var has no functional effect on CodeQL when running on x86_64 linux
ENV CODEQL_JAVA_HOME=/usr
ENV CODEQL_JAVA_HOME=/usr/
# Copy built agent binary from the builder stage
WORKDIR /app

View File

@@ -1,23 +0,0 @@
all: mrva-agent
MAI_TARGET := mrva-agent:0.1.24
mai: mk.mrva-agent
mrva-agent: mk.mrva-agent
mk.mrva-agent:
cd ../../ && docker build -t mrva-agent:0.1.24 -f cmd/agent/Dockerfile .
touch $@
mai-serve: mai
docker run --rm -it ${MAI_TARGET} /bin/bash
clean:
-docker rmi -f ${MAI_TARGET}
-rm mrva-agent
mai-push: mk.mai-push
mk.mai-push: mai
docker tag ${MAI_TARGET} ghcr.io/hohn/${MAI_TARGET}
docker push ghcr.io/hohn/${MAI_TARGET}
touch $@

View File

@@ -3,71 +3,171 @@ package main
import (
"context"
"flag"
"log"
"log/slog"
"os"
"os/signal"
"runtime"
"strconv"
"sync"
"syscall"
"time"
"github.com/elastic/go-sysinfo"
"golang.org/x/exp/slog"
"mrvacommander/pkg/agent"
"mrvacommander/pkg/deploy"
"mrvacommander/pkg/queue"
)
func main() {
slog.Info("Starting agent")
workerCount := flag.Int("workers", 0, "number of workers")
logLevel := flag.String("loglevel", "info", "Set log level: debug, info, warn, error")
flag.Parse()
const (
workerMemoryMB = 2048 // 2 GB
monitorIntervalSec = 10 // Monitor every 10 seconds
)
// Apply 'loglevel' flag
switch *logLevel {
case "debug":
slog.SetLogLoggerLevel(slog.LevelDebug)
case "info":
slog.SetLogLoggerLevel(slog.LevelInfo)
case "warn":
slog.SetLogLoggerLevel(slog.LevelWarn)
case "error":
slog.SetLogLoggerLevel(slog.LevelError)
default:
log.Printf("Invalid logging verbosity level: %s", *logLevel)
func calculateWorkers() int {
host, err := sysinfo.Host()
if err != nil {
slog.Error("failed to get host info", "error", err)
os.Exit(1)
}
isAgent := true
rabbitMQQueue, err := deploy.InitRabbitMQ(isAgent)
memInfo, err := host.Memory()
if err != nil {
slog.Error("Failed to initialize RabbitMQ", slog.Any("error", err))
slog.Error("failed to get memory info", "error", err)
os.Exit(1)
}
// Get available memory in MB
totalMemoryMB := memInfo.Available / (1024 * 1024)
// Ensure we have at least one worker
workers := int(totalMemoryMB / workerMemoryMB)
if workers < 1 {
workers = 1
}
// Limit the number of workers to the number of CPUs
cpuCount := runtime.NumCPU()
if workers > cpuCount {
workers = max(cpuCount, 1)
}
return workers
}
func startAndMonitorWorkers(ctx context.Context, queue queue.Queue, desiredWorkerCount int, wg *sync.WaitGroup) {
currentWorkerCount := 0
stopChans := make([]chan struct{}, 0)
if desiredWorkerCount != 0 {
slog.Info("Starting workers", slog.Int("count", desiredWorkerCount))
for i := 0; i < desiredWorkerCount; i++ {
stopChan := make(chan struct{})
stopChans = append(stopChans, stopChan)
wg.Add(1)
go agent.RunWorker(ctx, stopChan, queue, wg)
}
return
}
slog.Info("Worker count not specified, managing based on available memory and CPU")
for {
select {
case <-ctx.Done():
// signal all workers to stop
for _, stopChan := range stopChans {
close(stopChan)
}
return
default:
newWorkerCount := calculateWorkers()
if newWorkerCount != currentWorkerCount {
slog.Info(
"Modifying worker count",
slog.Int("current", currentWorkerCount),
slog.Int("new", newWorkerCount))
}
if newWorkerCount > currentWorkerCount {
for i := currentWorkerCount; i < newWorkerCount; i++ {
stopChan := make(chan struct{})
stopChans = append(stopChans, stopChan)
wg.Add(1)
go agent.RunWorker(ctx, stopChan, queue, wg)
}
} else if newWorkerCount < currentWorkerCount {
for i := newWorkerCount; i < currentWorkerCount; i++ {
close(stopChans[i])
}
stopChans = stopChans[:newWorkerCount]
}
currentWorkerCount = newWorkerCount
time.Sleep(monitorIntervalSec * time.Second)
}
}
}
func main() {
slog.Info("Starting agent")
workerCount := flag.Int("workers", 0, "number of workers")
flag.Parse()
requiredEnvVars := []string{
"MRVA_RABBITMQ_HOST",
"MRVA_RABBITMQ_PORT",
"MRVA_RABBITMQ_USER",
"MRVA_RABBITMQ_PASSWORD",
"CODEQL_JAVA_HOME",
"CODEQL_CLI_PATH",
}
for _, envVar := range requiredEnvVars {
if _, ok := os.LookupEnv(envVar); !ok {
slog.Error("Missing required environment variable", "key", envVar)
os.Exit(1)
}
}
rmqHost := os.Getenv("MRVA_RABBITMQ_HOST")
rmqPort := os.Getenv("MRVA_RABBITMQ_PORT")
rmqUser := os.Getenv("MRVA_RABBITMQ_USER")
rmqPass := os.Getenv("MRVA_RABBITMQ_PASSWORD")
rmqPortAsInt, err := strconv.ParseInt(rmqPort, 10, 16)
if err != nil {
slog.Error("Failed to parse RabbitMQ port", slog.Any("error", err))
os.Exit(1)
}
slog.Info("Initializing RabbitMQ queue")
rabbitMQQueue, err := queue.NewRabbitMQQueue(rmqHost, int16(rmqPortAsInt), rmqUser, rmqPass, false)
if err != nil {
slog.Error("failed to initialize RabbitMQ", slog.Any("error", err))
os.Exit(1)
}
defer rabbitMQQueue.Close()
artifacts, err := deploy.InitMinIOArtifactStore()
if err != nil {
slog.Error("Failed to initialize artifact store", slog.Any("error", err))
os.Exit(1)
}
databases, err := deploy.InitMinIOCodeQLDatabaseStore()
if err != nil {
slog.Error("Failed to initialize database store", slog.Any("error", err))
os.Exit(1)
}
var wg sync.WaitGroup
ctx, cancel := context.WithCancel(context.Background())
go agent.StartAndMonitorWorkers(ctx, artifacts, databases, rabbitMQQueue, *workerCount, &wg)
go startAndMonitorWorkers(ctx, rabbitMQQueue, *workerCount, &wg)
slog.Info("Agent started")
// Gracefully exit on SIGINT/SIGTERM
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
<-sigChan
slog.Info("Shutting down agent")
// TODO: fix this to gracefully terminate agent workers during jobs
cancel()
wg.Wait()
slog.Info("Agent shutdown complete")
}

View File

@@ -1,56 +1,38 @@
FROM golang:1.22 AS builder
# Use the ubuntu 22.04 base image
FROM ubuntu:24.10
# Copy the entire project
WORKDIR /app
COPY . .
# Set architecture to arm64
ARG ARCH=arm64
ARG AARCH=aarch64
# Download dependencies
RUN go mod download
# Set the working directory to the cmd/server subproject
WORKDIR /app/cmd/server
# Build the server
RUN go build -o /bin/mrva_server ./main.go
FROM ubuntu:24.10 as runner
# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive
ENV CODEQL_VERSION=codeql-bundle-v2.17.5
ENV CODEQL_DOWNLOAD_URL=https://github.com/github/codeql-action/releases/download/${CODEQL_VERSION}/codeql-bundle-linux64.tar.gz
ENV JDK_VERSION=22.0.1
ENV JDK_DOWNLOAD_URL=https://download.oracle.com/java/21/latest/jdk-${JDK_VERSION}_linux-${AARCH}_bin.tar.gz
ENV JDK_DOWNLOAD_URL=https://download.java.net/java/GA/jdk${JDK_VERSION}/c7ec1332f7bb44aeba2eb341ae18aca4/8/GPL/openjdk-${JDK_VERSION}_linux-${AARCH}_bin.tar.gz
# Build argument for CodeQL version, defaulting to the latest release
ARG CODEQL_VERSION=latest
ENV CODEQL_JAVA_HOME=/usr/local/jdk-${JDK_VERSION}
# Install packages
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
unzip \
curl \
ca-certificates \
default-jdk
# Install necessary tools
RUN apt-get update && \
apt-get install -y curl tar && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# If the version is 'latest', lsget the latest release version from GitHub, unzip the bundle into /opt, and delete the archive
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
fi && \
echo "Using CodeQL version $CODEQL_VERSION" && \
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
unzip /tmp/codeql.zip -d /opt && \
rm /tmp/codeql.zip && \
chmod -R +x /opt/codeql
# Add and extract the CodeQL bundle
RUN curl -L $CODEQL_DOWNLOAD_URL -o /tmp/${CODEQL_VERSION}.tar.gz && \
tar -xzf /tmp/${CODEQL_VERSION}.tar.gz -C /opt && \
rm /tmp/${CODEQL_VERSION}.tar.gz
# Set environment variables for CodeQL
ENV CODEQL_CLI_PATH=/opt/codeql/codeql
# Add and extract the JDK
RUN curl -L $JDK_DOWNLOAD_URL -o /tmp/jdk-${JDK_VERSION}.tar.gz && \
tar -xzf /tmp/jdk-${JDK_VERSION}.tar.gz -C /usr/local && \
rm /tmp/jdk-${JDK_VERSION}.tar.gz
# Set environment variable for CodeQL for `codeql database analyze` support on ARM
# This env var has no functional effect on CodeQL when running on x86_64 linux
ENV CODEQL_JAVA_HOME=/usr
# Set PATH
ENV PATH=/opt/codeql:"$PATH"
# Set working directory to /app
# Copy built server binary from the builder stage
COPY --from=builder /bin/mrva_server ./mrva_server
# Copy the CodeQL database directory from the builder stage (for standalone mode)
COPY --from=builder /app/cmd/server/codeql ./codeql
# Run the server with the default mode set to container
ENTRYPOINT ["./mrva_server"]
CMD ["--mode=container"]
# Prepare host mount point
RUN mkdir /mrva

View File

@@ -1,26 +0,0 @@
all: mrva-server
MSI_TARGET := mrva-server:0.1.24
msi: mk.mrva-server
mrva-server: mk.mrva-server
mk.mrva-server:
cd ../../ && docker build -t mrva-server:0.1.24 -f cmd/server/Dockerfile .
touch $@
msi-serve: msi
docker run --rm -it ${MSI_TARGET} /bin/bash
clean:
-docker rmi -f ${MSI_TARGET}
-rm mrva-server
msi-push: mk.msi-push
mk.msi-push: mk.mrva-server
docker tag ${MSI_TARGET} ghcr.io/hohn/${MSI_TARGET}
docker push ghcr.io/hohn/${MSI_TARGET}
touch $@
msi-test:
docker pull ghcr.io/hohn/${MSI_TARGET}
docker run --rm -it --name test-mrva-server-codeql ghcr.io/hohn/${MSI_TARGET} sh

Binary file not shown.

View File

@@ -4,21 +4,16 @@
package main
import (
"context"
"flag"
"log"
"log/slog"
"os"
"os/signal"
"path/filepath"
"sync"
"syscall"
"strconv"
"mrvacommander/config/mcc"
"mrvacommander/pkg/agent"
"mrvacommander/pkg/artifactstore"
"mrvacommander/pkg/deploy"
"mrvacommander/pkg/qldbstore"
"mrvacommander/pkg/queue"
"mrvacommander/pkg/server"
@@ -30,14 +25,13 @@ func main() {
helpFlag := flag.Bool("help", false, "Display help message")
logLevel := flag.String("loglevel", "info", "Set log level: debug, info, warn, error")
mode := flag.String("mode", "standalone", "Set mode: standalone, container, cluster")
dbPathRoot := flag.String("dbpath", "", "Set the root path for the database store if using standalone mode.")
// Custom usage function for the help flag
flag.Usage = func() {
log.Printf("Usage of %s:\n", os.Args[0])
flag.PrintDefaults()
log.Println("\nExamples:")
log.Println("go run main.go --loglevel=debug --mode=container --dbpath=/path/to/db_dir")
log.Println(" go run main.go --loglevel=debug --mode=container")
}
// Parse the flags
@@ -64,20 +58,6 @@ func main() {
os.Exit(1)
}
// Process database root if standalone and not provided
if *mode == "standalone" && *dbPathRoot == "" {
slog.Warn("No database root path provided.")
// Current directory of the Executable has a codeql directory. There.
// Resolve the absolute directory based on os.Executable()
execPath, err := os.Executable()
if err != nil {
slog.Error("Failed to get executable path", slog.Any("error", err))
os.Exit(1)
}
*dbPathRoot = filepath.Dir(execPath) + "/codeql/dbs/"
slog.Info("Using default database root path", "dbPathRoot", *dbPathRoot)
}
// Read configuration
config := mcc.LoadConfig("mcconfig.toml")
@@ -86,10 +66,6 @@ func main() {
log.Printf("Log Level: %s\n", *logLevel)
log.Printf("Mode: %s\n", *mode)
// Handle signals
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
// Apply 'mode' flag
switch *mode {
case "standalone":
@@ -97,7 +73,7 @@ func main() {
sq := queue.NewQueueSingle(2)
ss := state.NewLocalState(config.Storage.StartingID)
as := artifactstore.NewInMemoryArtifactStore()
ql := qldbstore.NewLocalFilesystemCodeQLDatabaseStore(*dbPathRoot)
ql := qldbstore.NewLocalFilesystemCodeQLDatabaseStore("")
server.NewCommanderSingle(&server.Visibles{
Queue: sq,
@@ -106,53 +82,59 @@ func main() {
CodeQLDBStore: ql,
})
var wg sync.WaitGroup
ctx, cancel := context.WithCancel(context.Background())
go agent.StartAndMonitorWorkers(ctx, as, ql, sq, 2, &wg)
slog.Info("Started server and standalone agent")
<-sigChan
slog.Info("Shutting down...")
cancel()
wg.Wait()
slog.Info("Agent shutdown complete")
case "container":
isAgent := false
rabbitMQQueue, err := deploy.InitRabbitMQ(isAgent)
if err != nil {
slog.Error("Failed to initialize RabbitMQ", slog.Any("error", err))
os.Exit(1)
}
defer rabbitMQQueue.Close()
artifacts, err := deploy.InitMinIOArtifactStore()
if err != nil {
slog.Error("Failed to initialize artifact store", slog.Any("error", err))
os.Exit(1)
}
databases, err := deploy.InitMinIOCodeQLDatabaseStore()
if err != nil {
slog.Error("Failed to initialize database store", slog.Any("error", err))
os.Exit(1)
}
server.NewCommanderSingle(&server.Visibles{
Queue: rabbitMQQueue,
State: state.NewLocalState(config.Storage.StartingID),
Artifacts: artifacts,
CodeQLDBStore: databases,
// FIXME take value from configuration
agent.NewAgentSingle(2, &agent.Visibles{
Queue: sq,
Artifacts: as,
CodeQLDBStore: ql,
})
slog.Info("Started server in container mode.")
<-sigChan
case "container":
rmqHost := os.Getenv("MRVA_RABBITMQ_HOST")
rmqPort := os.Getenv("MRVA_RABBITMQ_PORT")
rmqUser := os.Getenv("MRVA_RABBITMQ_USER")
rmqPass := os.Getenv("MRVA_RABBITMQ_PASSWORD")
rmqPortAsInt, err := strconv.ParseInt(rmqPort, 10, 16)
if err != nil {
slog.Error("Failed to parse RabbitMQ port", slog.Any("error", err))
os.Exit(1)
}
sq, err := queue.NewRabbitMQQueue(rmqHost, int16(rmqPortAsInt), rmqUser, rmqPass, false)
if err != nil {
slog.Error("Unable to initialize RabbitMQ queue")
os.Exit(1)
}
ss := state.NewContainerState(config.Storage.StartingID)
// TODO: add arguments
as, err := artifactstore.NewMinIOArtifactStore("", "", "")
if err != nil {
slog.Error("Unable to initialize artifact store")
os.Exit(1)
}
// TODO: add arguments
ql, err := qldbstore.NewMinIOCodeQLDatabaseStore("", "", "", "")
if err != nil {
slog.Error("Unable to initialize ql database storage")
os.Exit(1)
}
server.NewCommanderContainer(&server.Visibles{
Queue: sq,
State: ss,
Artifacts: as,
CodeQLDBStore: ql,
})
case "cluster":
// Assemble cluster version
default:
slog.Error("Invalid value for --mode. Allowed values are: standalone, container, cluster")
slog.Error("Invalid value for --mode. Allowed values are: standalone, container, cluster\n")
os.Exit(1)
}
slog.Info("Server shutdown complete")
}

View File

@@ -17,15 +17,15 @@ type System struct {
func LoadConfig(fname string) *System {
if _, err := os.Stat(fname); err != nil {
slog.Warn("Configuration file not found", "name", fname)
return &System{}
slog.Error("Configuration file %s not found", fname)
os.Exit(1)
}
var config System
_, err := toml.DecodeFile(fname, &config)
if err != nil {
slog.Error("Error decoding configuration file", err)
slog.Error("", err)
os.Exit(1)
}

View File

@@ -1,7 +0,0 @@
# Use a minimal base image
FROM busybox
ADD dbsdata_backup.tar /
# Just run sh if this container is ever started
CMD ["sh"]

View File

@@ -1,70 +0,0 @@
* MRVA cli tools container
Set up / run:
#+BEGIN_SRC sh
# Run the raw container assembly
cd ~/work-gh/mrva/mrvacommander/
docker-compose -f docker-compose-demo-build.yml up -d
# Use the following commands to populate the mrvacommander database storage
cd ~/work-gh/mrva/mrvacommander/client/qldbtools
mkdir -p scratch
source venv/bin/activate
./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > scratch/db-info-1.csv
./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
./bin/mc-db-generate-selection -n 11 \
scratch/vscode-selection.json \
scratch/gh-mrva-selection.json \
< scratch/db-info-3.csv
# Several seconds start-up time; fast db population
./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
# While the containers are running, this will show minio's storage. The zip files
# are split into part.* and xl.meta by minio. Use the web interface to see real
# names.
docker exec dbstore ls -R /data/mrvacommander/
# Open browser to see the file listing
open http://localhost:9001/browser/qldb
# list the volumes
docker volume ls |grep dbs
docker volume inspect mrvacommander_dbsdata
# Persist volume using container
cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata
# Note: use mrvacommander_dbsdata, not mrvacommander-dbsdata
# Get the data as tar file from the image
docker run --rm \
-v mrvacommander_dbsdata:/data \
-v $(pwd):/backup \
busybox sh -c "tar cvf /backup/dbsdata_backup.tar ."
# Build container with the tarball
cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata
docker build -t dbsdata-container:0.1.24 .
docker image ls | grep dbs
# check container contents
docker run -it dbsdata-container:0.1.24 /bin/sh
docker run -it dbsdata-container:0.1.24 ls data/qldb
# Tag the dbstore backing container
docker inspect dbsdata-container:0.1.24 |grep Id
docker tag dbsdata-container:0.1.24 ghcr.io/hohn/dbsdata-container:0.1.24
# Push the pre-populated image
docker push ghcr.io/hohn/dbsdata-container:0.1.24
# Check the tagged image
docker run -it ghcr.io/hohn/dbsdata-container:0.1.24 \
ls data/qldb
# Shut down the container assembly
docker-compose -f docker-compose-demo-build.yml down
#+END_SRC

View File

@@ -1,11 +0,0 @@
## The doc/ directory
The `doc/` directory serves as home for documentation. This is the place to
put refined documentation after it has gone through `notes/`. The contents of
this directory should be accessible to a broad audience including prospective
users, active users, and developers. Highly technical
1. The note authors and
2. Developers of the project
It need not be meaningful to casual users.

View File

@@ -1,129 +0,0 @@
# This is the compose configuration used to build / prepopulate the containers for
# a demo.
services:
dbssvc:
## image: ghcr.io/hohn/dbsdata-container:0.1.24
build:
context: .
dockerfile: ./demo/containers/dbsdata/Dockerfile
container_name: dbssvc
volumes:
- dbsdata:/data/mrvacommander/dbstore-data
networks:
- backend
dbstore:
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
container_name: dbstore
ports:
- "9000:9000"
- "9001:9001"
env_file:
- path: .env.container
required: true
command: server /data/mrvacommander/dbstore-data --console-address ":9001"
depends_on:
- dbssvc
volumes:
- dbsdata:/data/mrvacommander/dbstore-data
networks:
- backend
client-ghmrva:
## image: ghcr.io/hohn/client-ghmrva-container:0.1.24
build:
context: .
dockerfile: ./client/containers/ghmrva/Dockerfile
network_mode: "service:server" # Share the 'server' network namespace
environment:
- SERVER_URL=http://localhost:8080 # 'localhost' now refers to 'server'
code-server:
## image: ghcr.io/hohn/code-server-initialized:0.1.24
build:
context: ./client/containers/vscode
dockerfile: Dockerfile
ports:
- "9080:9080"
environment:
- PASSWORD=mrva
rabbitmq:
image: rabbitmq:3-management
hostname: rabbitmq
container_name: rabbitmq
volumes:
- ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
- ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
ports:
- "5672:5672"
- "15672:15672"
healthcheck:
test: rabbitmq-diagnostics check_port_connectivity
interval: 30s
timeout: 30s
retries: 10
networks:
- backend
server:
build:
context: .
dockerfile: ./cmd/server/Dockerfile
command: [ '--mode=container', '--loglevel=debug' ]
container_name: server
stop_grace_period: 1s
ports:
# - "8081:8080" # host:container for proxy
- "8080:8080" # host:container
depends_on:
- rabbitmq
- dbstore
- artifactstore
env_file:
- path: ./.env.container
required: true
networks:
- backend
artifactstore:
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
container_name: artifactstore
ports:
- "19000:9000" # host:container
- "19001:9001"
env_file:
- path: ./.env.container
required: true
command: server /data --console-address ":9001"
volumes:
# The artifactstore is only populated at runtime so there is no need
# for Docker storage; a directory is fine.
- ./qpstore-data:/data
networks:
- backend
agent:
## image: ghcr.io/hohn/mrva-agent:0.1.24
build:
context: .
dockerfile: ./cmd/agent/Dockerfile
command: [ '--loglevel=debug' ]
container_name: agent
depends_on:
- rabbitmq
- dbstore
- artifactstore
env_file:
- path: ./.env.container
required: true
networks:
- backend
networks:
backend:
driver: bridge
volumes:
dbsdata:

View File

@@ -1,116 +0,0 @@
services:
dbssvc:
# dbsdata-container:0.1.24
image: ghcr.io/hohn/dbsdata-container:0.1.24
command: tail -f /dev/null # Keep the container running
# volumes:
# - /qldb # Directory inside the container that contains the data
volumes:
- dbsdata:/data
container_name: dbssvc
networks:
- backend
dbstore:
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
container_name: dbstore
ports:
- "9000:9000"
- "9001:9001"
env_file:
- path: .env.container
required: true
command: server /data/mrvacommander/dbstore-data --console-address ":9001"
depends_on:
- dbssvc
# volumes_from:
# - dbsdata # Use the volumes from dbsdata container
volumes:
- dbsdata:/data/mrvacommander/dbstore-data
networks:
- backend
client-ghmrva:
image: ghcr.io/hohn/client-ghmrva-container:0.1.24
network_mode: "service:server" # Share the 'server' network namespace
environment:
- SERVER_URL=http://localhost:8080 # 'localhost' now refers to 'server'
code-server:
image: ghcr.io/hohn/code-server-initialized:0.1.24
ports:
- "9080:9080"
# XX: Include codeql binary in code-server (if it's not there already)
environment:
- PASSWORD=mrva
rabbitmq:
image: rabbitmq:3-management
hostname: rabbitmq
container_name: rabbitmq
volumes:
- ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
- ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
ports:
- "5672:5672"
- "15672:15672"
healthcheck:
test: rabbitmq-diagnostics check_port_connectivity
interval: 30s
timeout: 30s
retries: 10
networks:
- backend
server:
image: ghcr.io/hohn/mrva-server:0.1.24
command: [ '--mode=container', '--loglevel=debug' ]
container_name: server
stop_grace_period: 1s
depends_on:
- rabbitmq
- dbstore
- artifactstore
env_file:
- path: ./.env.container
required: true
networks:
- backend
artifactstore:
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
container_name: artifactstore
ports:
- "19000:9000" # host:container
- "19001:9001"
env_file:
- path: ./.env.container
required: true
command: server /data --console-address ":9001"
volumes:
# The artifactstore is only populated at runtime so there is no need
# for Docker storage; a directory is fine.
- ./qpstore-data:/data
networks:
- backend
agent:
image: ghcr.io/hohn/mrva-agent:0.1.24
command: [ '--loglevel=debug' ]
container_name: agent
depends_on:
- rabbitmq
- dbstore
- artifactstore
env_file:
- path: ./.env.container
required: true
networks:
- backend
networks:
backend:
driver: bridge
volumes:
dbsdata:

View File

@@ -7,36 +7,37 @@ services:
volumes:
- ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
- ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
expose:
- "5672"
- "15672"
ports:
- "5672:5672"
- "15672:15672"
networks:
- backend
healthcheck:
test: rabbitmq-diagnostics check_port_connectivity
interval: 30s
timeout: 30s
retries: 10
test: [ "CMD", "nc", "-z", "localhost", "5672" ]
interval: 5s
timeout: 15s
retries: 1
server:
build:
context: .
dockerfile: ./cmd/server/Dockerfile
command: [ '--mode=container', '--loglevel=debug' ]
context: ./cmd/server
dockerfile: Dockerfile
container_name: server
stop_grace_period: 1s
stop_grace_period: 1s # Reduce the timeout period for testing
environment:
- MRVA_SERVER_ROOT=/mrva/mrvacommander/cmd/server
command: sh -c "tail -f /dev/null"
ports:
# - "8081:8080" # host:container for proxy
- "8080:8080" # host:container
- "8080:8080"
volumes:
- ./:/mrva/mrvacommander
depends_on:
- rabbitmq
- dbstore
- artifactstore
networks:
- backend
env_file:
- path: ./.env.container
required: true
dbstore:
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
@@ -44,46 +45,49 @@ services:
ports:
- "9000:9000"
- "9001:9001"
env_file:
- path: .env.container
required: true
environment:
MINIO_ROOT_USER: user
MINIO_ROOT_PASSWORD: mmusty8432
command: server /data --console-address ":9001"
volumes:
- ./dbstore-data:/data
networks:
- backend
artifactstore:
qpstore:
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
container_name: artifactstore
container_name: qpstore
ports:
- "19000:9000" # host:container
- "19001:9001"
env_file:
- path: ./.env.container
required: true
environment:
MINIO_ROOT_USER: user
MINIO_ROOT_PASSWORD: mmusty8432
command: server /data --console-address ":9001"
volumes:
- ./qpstore-data:/data
networks:
- backend
agent:
build:
context: .
dockerfile: ./cmd/agent/Dockerfile
command: [ '--loglevel=debug' ]
container_name: agent
depends_on:
- rabbitmq
- dbstore
- artifactstore
env_file:
- path: ./.env.container
required: true
- qpstore
environment:
MRVA_RABBITMQ_HOST: rabbitmq
MRVA_RABBITMQ_PORT: 5672
MRVA_RABBITMQ_USER: user
MRVA_RABBITMQ_PASSWORD: password
networks:
- backend
networks:
backend:
driver: bridge
# Remove named volumes to use bind mounts
# volumes:
# minio-data:

4
go.mod
View File

@@ -9,7 +9,7 @@ require (
github.com/gorilla/mux v1.8.1
github.com/minio/minio-go/v7 v7.0.71
github.com/rabbitmq/amqp091-go v1.10.0
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8
gopkg.in/yaml.v3 v3.0.1
gorm.io/driver/postgres v1.5.9
gorm.io/gorm v1.25.10
@@ -35,7 +35,7 @@ require (
github.com/rs/xid v1.5.0 // indirect
golang.org/x/crypto v0.24.0 // indirect
golang.org/x/net v0.23.0 // indirect
golang.org/x/sync v0.9.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect

4
go.sum
View File

@@ -68,14 +68,10 @@ golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY=
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI=
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo=
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak=
golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=

View File

@@ -6,8 +6,6 @@
],
"settings": {
"sarif-viewer.connectToGithubCodeScanning": "off",
"codeQL.githubDatabase.download": "never",
"makefile.configureOnOpen": false,
"git.ignoreLimitWarning": true
"codeQL.githubDatabase.download": "never"
}
}

View File

@@ -1,5 +0,0 @@
view: docker-demo-container-deps.pdf
open $<
docker-demo-container-deps.pdf: docker-demo-container-deps.dot
dot -Tpdf $< > $@

View File

@@ -1,9 +0,0 @@
## The notes/ directory
The `notes/` directory serves as staging directory for documentation. This is
the place to develop documentation and short notes. The contents of this
directory should be accessible to
1. The note authors and
2. Developers of the project
It need not be meaningful to casual users.

View File

@@ -1,471 +0,0 @@
# -*- coding: utf-8 -*-
#+OPTIONS: H:2 num:t \n:nil @:t ::t |:t ^:{} f:t *:t TeX:t LaTeX:t skip:nil p:nil
* End-to-end example of CLI use
This document describes the build steps for the demo containers.
* Database Aquisition
For this demo, the data is preloaded via container. To set up the container
#+BEGIN_SRC sh
# On host, run
docker exec -it dbstore /bin/bash
# In the container
ls -la /data/dbstore-data/
ls /data/dbstore-data/qldb/ | wc -l
#+END_SRC
Here we use a small sample of an example for open-source
repositories, 23 in all.
* Repository Selection
When using all of the MRVA system, we select a small subset of repositories
available to you in [[*Database Aquisition][Database Aquisition]]. For this demo we include a small
collection -- 23 repositories -- and here we further narrow the selection to 12.
The full list
#+BEGIN_SRC text
ls -1 /data/dbstore-data/qldb/
'BoomingTech$Piccoloctsj6d7177.zip'
'KhronosGroup$OpenXR-SDKctsj984ee6.zip'
'OpenRCT2$OpenRCT2ctsj975d7c.zip'
'StanfordLegion$legionctsj39cbe4.zip'
'USCiLab$cerealctsj264953.zip'
'WinMerge$winmergectsj101305.zip'
'draios$sysdigctsj12c02d.zip'
'gildor2$UEViewerctsjfefdd8.zip'
'git-for-windows$gitctsjb7c2bd.zip'
'google$orbitctsj9bbeaf.zip'
'libfuse$libfusectsj7a66a4.zip'
'luigirizzo$netmapctsj6417fa.zip'
'mawww$kakounectsjc54fab.zip'
'microsoft$node-native-keymapctsj4cc9a2.zip'
'nem0$LumixEnginectsjfab756.zip'
'pocoproject$pococtsj26b932.zip'
'quickfix$quickfixctsjebfd13.zip'
'rui314$moldctsjfec16a.zip'
'swig$swigctsj78bcd3.zip'
'tdlib$telegram-bot-apictsj8529d9.zip'
'timescale$timescaledbctsjf617cf.zip'
'xoreaxeaxeax$movfuscatorctsj8f7e5b.zip'
'xrootd$xrootdctsje4b745.zip'
#+END_SRC
The selection of 12 repositories, from an initial collection of 6000 was made
using a collection of Python/pandas scripts made for the purpose, the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#installation][qldbtools]]
package. The resulting selection, in the format expected by the VS Code
extension, follows.
#+BEGIN_SRC text
cat /data/qldbtools/scratch/vscode-selection.json
{
"version": 1,
"databases": {
"variantAnalysis": {
"repositoryLists": [
{
"name": "mirva-list",
"repositories": [
"xoreaxeaxeax/movfuscatorctsj8f7e5b",
"microsoft/node-native-keymapctsj4cc9a2",
"BoomingTech/Piccoloctsj6d7177",
"USCiLab/cerealctsj264953",
"KhronosGroup/OpenXR-SDKctsj984ee6",
"tdlib/telegram-bot-apictsj8529d9",
"WinMerge/winmergectsj101305",
"timescale/timescaledbctsjf617cf",
"pocoproject/pococtsj26b932",
"quickfix/quickfixctsjebfd13",
"libfuse/libfusectsj7a66a4"
]
}
],
"owners": [],
"repositories": []
}
},
"selected": {
"kind": "variantAnalysisUserDefinedList",
"listName": "mirva-list"
}
#+END_SRC
This selection is deceptively simple. For a full explanation, see [[file:cli-end-to-end-detailed.org::*Repository Selection][Repository
Selection]] in the detailed version of this document.
** Optional: The meaning of the names
The repository names all end with =ctsj= followed by 6 hex digits like
=ctsj4cc9a2=.
The information critial for selection of databases are the columns
1. owner
2. name
3. language
4. "sha"
5. "cliVersion"
6. "creationTime"
There are others that may be useful, but they are not strictly required.
The critical ones deserve more explanation:
1. "sha": The =git= commit SHA of the repository the CodeQL database was
created from. Required to distinguish query results over the evolution of
a code base.
2. "cliVersion": The version of the CodeQL CLI used to create the database.
Required to identify advances/regressions originating from the CodeQL binary.
3. "creationTime": The time the database was created. Required (or at least
very handy) for following the evolution of query results over time.
There is a computed column, CID. The CID column combines
- cliVersion
- creationTime
- language
- sha
into a single 6-character string via hashing. Together with (owner, repo) it
provides a unique index for every DB.
For this document, we simply use a pseudo-random selection of 11 databases via
#+BEGIN_SRC sh
./bin/mc-db-generate-selection -n 11 \
scratch/vscode-selection.json \
scratch/gh-mrva-selection.json \
< scratch/db-info-3.csv
#+END_SRC
Note that these use pseudo-random numbers, so the selection is in fact
deterministic.
* Starting the server
Clone the full repository before continuing:
#+BEGIN_SRC sh
mkdir -p ~/work-gh/mrva/
git clone git@github.com:hohn/mrvacommander.git
#+END_SRC
Make sure Docker is installed and running.
With docker-compose set up and this repository cloned, we just run
#+BEGIN_SRC sh
cd ~/work-gh/mrva/mrvacommander
docker-compose -f docker-compose-demo.yml up -d
#+END_SRC
and wait until the log output no longer changes.
Should look like
#+BEGIN_SRC text
docker-compose -f docker-compose-demo.yml up -d
[+] Running 27/6
✔ dbstore Pulled 1.1s
✔ artifactstore Pulled 1.1s
✔ mrvadata 3 layers [⣿⣿⣿] 0B/0B Pulled 263.8s
✔ server 2 layers [⣿⣿] 0B/0B Pulled 25.2s
✔ agent 5 layers [⣿⣿⣿⣿⣿] 0B/0B Pulled 24.9s
✔ client-qldbtools 11 layers [⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿] 0B/0B Pulled 20.8s
[+] Running 9/9
✔ Container mrvadata Started 0.3s
✔ Container mrvacommander-client-qldbtools-1 Started 0.3s
✔ Container mrvacommander-client-ghmrva-1 Running 0.0s
✔ Container mrvacommander-code-server-1 Running 0.0s
✔ Container artifactstore Running 0.0s
✔ Container rabbitmq Running 0.0s
✔ Container dbstore Started 0.4s
✔ Container agent Started 0.5s
✔ Container server Started 0.5s
#+END_SRC
The content is prepopulated in the =dbstore= container.
** Optional: Inspect the Backing Store
As completely optional step, you can inspect the backing store:
#+BEGIN_SRC sh
docker exec -it dbstore /bin/bash
ls /data/qldb/
# 'BoomingTech$Piccoloctsj6d7177.zip' 'mawww$kakounectsjc54fab.zip'
# 'KhronosGroup$OpenXR-SDKctsj984ee6.zip' 'microsoft$node-native-keymapctsj4cc9a2.zip'
# ...
#+END_SRC
** Optional: Inspect the MinIO DB
Another completely optional step, you can inspect the minio DB contents if you
have the minio cli installed:
#+BEGIN_SRC sh
# Configuration
MINIO_ALIAS="qldbminio"
MINIO_URL="http://localhost:9000"
MINIO_ROOT_USER="user"
MINIO_ROOT_PASSWORD="mmusty8432"
QL_DB_BUCKET_NAME="qldb"
# Check for MinIO client
if ! command -v mc &> /dev/null
then
echo "MinIO client (mc) not found."
fi
# Configure MinIO client
mc alias set $MINIO_ALIAS $MINIO_URL $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD
# Show contents
mc ls qldbminio/qldb
#+END_SRC
* Running the gh-mrva command-line client
The first run uses the test query to verify basic functionality, but it returns
no results.
** Run MRVA from command line
# From ~/work-gh/mrva/gh-mrva
1. Check mrva cli
#+BEGIN_SRC sh
docker exec -it mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva -h
#+END_SRC
2. Set up the configuration
#+BEGIN_SRC sh
docker exec -i mrvacommander-client-ghmrva-1 \
sh -c 'mkdir -p /root/.config/gh-mrva/'
cat | docker exec -i mrvacommander-client-ghmrva-1 \
sh -c 'cat > /root/.config/gh-mrva/config.yml' <<eof
codeql_path: not-used/$HOME/work-gh
controller: not-used/mirva-controller
list_file: /root/work-gh/mrva/gh-mrva/gh-mrva-selection.json
eof
# check:
docker exec -i mrvacommander-client-ghmrva-1 ls /root/.config/gh-mrva/config.yml
docker exec -i mrvacommander-client-ghmrva-1 cat /root/.config/gh-mrva/config.yml
#+END_SRC
3. Provide the repository list file
#+BEGIN_SRC sh
docker exec -i mrvacommander-client-ghmrva-1 \
sh -c 'mkdir -p /root/work-gh/mrva/gh-mrva'
cat | docker exec -i mrvacommander-client-ghmrva-1 \
sh -c 'cat > /root/work-gh/mrva/gh-mrva/gh-mrva-selection.json' <<eof
{
"mirva-list": [
"xoreaxeaxeax/movfuscatorctsj8f7e5b",
"microsoft/node-native-keymapctsj4cc9a2",
"BoomingTech/Piccoloctsj6d7177",
"USCiLab/cerealctsj264953",
"KhronosGroup/OpenXR-SDKctsj984ee6",
"tdlib/telegram-bot-apictsj8529d9",
"WinMerge/winmergectsj101305",
"timescale/timescaledbctsjf617cf",
"pocoproject/pococtsj26b932",
"quickfix/quickfixctsjebfd13",
"libfuse/libfusectsj7a66a4"
]
}
eof
#+END_SRC
4. Provide the CodeQL query
#+BEGIN_SRC sh
cat | docker exec -i mrvacommander-client-ghmrva-1 \
sh -c 'cat > /root/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql' <<eof
/**
,* @name pickfun
,* @description pick function from FlatBuffers
,* @kind problem
,* @id cpp-flatbuffer-func
,* @problem.severity warning
,*/
import cpp
from Function f
where
f.getName() = "MakeBinaryRegion" or
f.getName() = "microprotocols_add"
select f, "definition of MakeBinaryRegion"
eof
#+END_SRC
5. Submit the mrva job
#+BEGIN_SRC sh
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
submit --language cpp --session mirva-session-1360 \
--list mirva-list \
--query /root/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql
#+END_SRC
6. Check the status
#+BEGIN_SRC sh
# Check the status
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
status --session mirva-session-1360
#+END_SRC
7. Download the sarif files, optionally also get databases. For the current
query / database combination there are zero result hence no downloads.
#+BEGIN_SRC sh
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
download --session mirva-session-1360 \
--download-dbs \
--output-dir mirva-session-1360
#+END_SRC
** TODO Write query that has some results
XX:
In this case, the trivial =alu_mul=,
alu_mul for https://github.com/xoreaxeaxeax/movfuscator/blob/master/movfuscator/movfuscator.c
#+BEGIN_SRC java
/**
,* @name findalu
,* @description find calls to a function
,* @kind problem
,* @id cpp-call
,* @problem.severity warning
,*/
import cpp
from FunctionCall fc
where
fc.getTarget().getName() = "alu_mul"
select fc, "call of alu_mul"
#+END_SRC
Repeat the submit steps with this query
1. [X] --
2. [X] --
3. [ ] Provide the CodeQL query
#+BEGIN_SRC sh
cat | docker exec -i mrvacommander-client-ghmrva-1 \
sh -c 'cat > /root/work-gh/mrva/gh-mrva/Alu_Mul.ql' <<eof
/**
,* @name findalu
,* @description find calls to a function
,* @kind problem
,* @id cpp-call
,* @problem.severity warning
,*/
import cpp
from FunctionCall fc
where
fc.getTarget().getName() = "alu_mul"
select fc, "call of alu_mul"
eof
#+END_SRC
4. [-] Submit the mrva job
#+BEGIN_SRC sh
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
submit --language cpp --session mirva-session-1490 \
--list mirva-list \
--query /root/work-gh/mrva/gh-mrva/Alu_Mul.ql
#+END_SRC
- [X] XX:
server | 2024/09/27 20:03:16 DEBUG Processed request info location="{Key:3 Bucket:packs}" language=cpp
server | 2024/09/27 20:03:16 WARN No repositories found for analysis
server | 2024/09/27 20:03:16 DEBUG Queueing analysis jobs count=0
server | 2024/09/27 20:03:16 DEBUG Forming and sending response for submitted analysis job id=3
NO: debug in the server container
#+BEGIN_SRC sh
docker exec -it server /bin/bash
apt-get update
apt-get install delve
replace
ENTRYPOINT ["./mrva_server"]
CMD ["--mode=container"]
#+END_SRC
- [ ] XX:
The dbstore is empty -- see http://localhost:9001/browser
must populate it properly, then save the image.
5. [ ] Check the status
#+BEGIN_SRC sh
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
status --session mirva-session-1490
#+END_SRC
This time we have results
#+BEGIN_SRC text
...
Run name: mirva-session-1490
Status: succeeded
Total runs: 1
Total successful scans: 11
Total failed scans: 0
Total skipped repositories: 0
Total skipped repositories due to access mismatch: 0
Total skipped repositories due to not found: 0
Total skipped repositories due to no database: 0
Total skipped repositories due to over limit: 0
Total repositories with findings: 7
Total findings: 618
Repositories with findings:
quickfix/quickfixctsjebfd13 (cpp-fprintf-call): 5
libfuse/libfusectsj7a66a4 (cpp-fprintf-call): 146
xoreaxeaxeax/movfuscatorctsj8f7e5b (cpp-fprintf-call): 80
pocoproject/pococtsj26b932 (cpp-fprintf-call): 17
BoomingTech/Piccoloctsj6d7177 (cpp-fprintf-call): 10
tdlib/telegram-bot-apictsj8529d9 (cpp-fprintf-call): 247
WinMerge/winmergectsj101305 (cpp-fprintf-call): 113
#+END_SRC
6. [ ] Download the sarif files, optionally also get databases.
#+BEGIN_SRC sh
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
download --session mirva-session-1490 \
--download-dbs \
--output-dir mirva-session-1490
# And list them:
\ls -la *1490*
#+END_SRC
7. [ ] Use the [[https://marketplace.visualstudio.com/items?itemName=MS-SarifVSCode.sarif-viewer][SARIF Viewer]] plugin in VS Code to open and review the results.
Prepare the source directory so the viewer can be pointed at it
#+BEGIN_SRC sh
cd ~/work-gh/mrva/gh-mrva/mirva-session-1490
unzip -qd BoomingTech_Piccoloctsj6d7177_1_db BoomingTech_Piccoloctsj6d7177_1_db.zip
cd BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/
unzip -qd src src.zip
#+END_SRC
Use the viewer
#+BEGIN_SRC sh
code BoomingTech_Piccoloctsj6d7177_1.sarif
# For lauxlib.c, point the source viewer to
find ~/work-gh/mrva/gh-mrva/mirva-session-1490/BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/src/home/runner/work/bulk-builder/bulk-builder -name lauxlib.c
# Here: ~/work-gh/mrva/gh-mrva/mirva-session-1490/BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/src/home/runner/work/bulk-builder/bulk-builder/engine/3rdparty/lua-5.4.4/lauxlib.c
#+END_SRC
8. [ ] (optional) Large result sets are more easily filtered via
dataframes or spreadsheets. Convert the SARIF to CSV if needed; see [[https://github.com/hohn/sarif-cli/][sarif-cli]].
* Running the CodeQL VS Code plugin
- [ ] XX: include the *custom* codeql plugin in the container.
* Ending the session
Shut down docker via
#+BEGIN_SRC sh
cd ~/work-gh/mrva/mrvacommander
docker-compose -f docker-compose-demo.yml down
#+END_SRC
* Footnotes
[fn:1]The =csvkit= can be installed into the same Python virtual environment as
the =qldbtools=.

View File

@@ -1,493 +0,0 @@
# -*- coding: utf-8 -*-
#+OPTIONS: H:2 num:t \n:nil @:t ::t |:t ^:{} f:t *:t TeX:t LaTeX:t skip:nil p:nil
* End-to-end example of CLI use
This document describes a complete cycle of the MRVA workflow, but using
pre-populated data. The steps included are
1. aquiring CodeQL databases
2. selection of databases
3. configuration and use of the command-line client
4. server startup
5. submission of the jobs
6. retrieval of the results
7. examination of the results
* Start the containers
#+BEGIN_SRC sh
cd ~/work-gh/mrva/mrvacommander/
docker-compose -f docker-compose-demo.yml down --volumes --remove-orphans
docker-compose -f docker-compose-demo.yml up --build
#+END_SRC
* Database Aquisition
General database aquisition is beyond the scope of this document as it is very specific
to an organization's environment.
For this demo, the data is preloaded via container. To inspect it:
#+BEGIN_SRC sh
# On host, run
docker exec -it dbstore /bin/bash
# In the container
ls -la /data/mrvacommander/dbstore-data/qldb
# Or in one step
docker exec -it dbstore ls -la /data/mrvacommander/dbstore-data/qldb
#+END_SRC
Here we use a small sample of an example for open-source
repositories, 23 in all.
* Repository Selection
When using all of the MRVA system, we select a small subset of repositories
available to you in [[*Database Aquisition][Database Aquisition]]. For this demo we include a small
collection -- 23 repositories -- and here we further narrow the selection to 12.
The full list
#+BEGIN_SRC text
ls -1 /data/dbstore-data/qldb/
'BoomingTech$Piccoloctsj6d7177.zip'
'KhronosGroup$OpenXR-SDKctsj984ee6.zip'
'OpenRCT2$OpenRCT2ctsj975d7c.zip'
'StanfordLegion$legionctsj39cbe4.zip'
'USCiLab$cerealctsj264953.zip'
'WinMerge$winmergectsj101305.zip'
'draios$sysdigctsj12c02d.zip'
'gildor2$UEViewerctsjfefdd8.zip'
'git-for-windows$gitctsjb7c2bd.zip'
'google$orbitctsj9bbeaf.zip'
'libfuse$libfusectsj7a66a4.zip'
'luigirizzo$netmapctsj6417fa.zip'
'mawww$kakounectsjc54fab.zip'
'microsoft$node-native-keymapctsj4cc9a2.zip'
'nem0$LumixEnginectsjfab756.zip'
'pocoproject$pococtsj26b932.zip'
'quickfix$quickfixctsjebfd13.zip'
'rui314$moldctsjfec16a.zip'
'swig$swigctsj78bcd3.zip'
'tdlib$telegram-bot-apictsj8529d9.zip'
'timescale$timescaledbctsjf617cf.zip'
'xoreaxeaxeax$movfuscatorctsj8f7e5b.zip'
'xrootd$xrootdctsje4b745.zip'
#+END_SRC
The selection of 12 repositories, from an initial collection of 6000 was made
using a collection of Python/pandas scripts made for the purpose, the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#installation][qldbtools]]
package. The resulting selection, in the format expected by the VS Code
extension, follows.
#+BEGIN_SRC text
cat /data/qldbtools/scratch/vscode-selection.json
{
"version": 1,
"databases": {
"variantAnalysis": {
"repositoryLists": [
{
"name": "mirva-list",
"repositories": [
"xoreaxeaxeax/movfuscatorctsj8f7e5b",
"microsoft/node-native-keymapctsj4cc9a2",
"BoomingTech/Piccoloctsj6d7177",
"USCiLab/cerealctsj264953",
"KhronosGroup/OpenXR-SDKctsj984ee6",
"tdlib/telegram-bot-apictsj8529d9",
"WinMerge/winmergectsj101305",
"timescale/timescaledbctsjf617cf",
"pocoproject/pococtsj26b932",
"quickfix/quickfixctsjebfd13",
"libfuse/libfusectsj7a66a4"
]
}
],
"owners": [],
"repositories": []
}
},
"selected": {
"kind": "variantAnalysisUserDefinedList",
"listName": "mirva-list"
}
#+END_SRC
This selection is deceptively simple. For a full explanation, see [[file:cli-end-to-end-detailed.org::*Repository Selection][Repository
Selection]] in the detailed version of this document.
** Optional: The meaning of the names
The repository names all end with =ctsj= followed by 6 hex digits like
=ctsj4cc9a2=.
The information critial for selection of databases are the columns
1. owner
2. name
3. language
4. "sha"
5. "cliVersion"
6. "creationTime"
There are others that may be useful, but they are not strictly required.
The critical ones deserve more explanation:
1. "sha": The =git= commit SHA of the repository the CodeQL database was
created from. Required to distinguish query results over the evolution of
a code base.
2. "cliVersion": The version of the CodeQL CLI used to create the database.
Required to identify advances/regressions originating from the CodeQL binary.
3. "creationTime": The time the database was created. Required (or at least
very handy) for following the evolution of query results over time.
There is a computed column, CID. The CID column combines
- cliVersion
- creationTime
- language
- sha
into a single 6-character string via hashing. Together with (owner, repo) it
provides a unique index for every DB.
For this document, we simply use a pseudo-random selection of 11 databases via
#+BEGIN_SRC sh
./bin/mc-db-generate-selection -n 11 \
scratch/vscode-selection.json \
scratch/gh-mrva-selection.json \
< scratch/db-info-3.csv
#+END_SRC
Note that these use pseudo-random numbers, so the selection is in fact
deterministic.
* Starting the server
Clone the full repository before continuing:
#+BEGIN_SRC sh
mkdir -p ~/work-gh/mrva/
git clone git@github.com:hohn/mrvacommander.git
#+END_SRC
Make sure Docker is installed and running.
With docker-compose set up and this repository cloned, we just run
#+BEGIN_SRC sh
cd ~/work-gh/mrva/mrvacommander
docker-compose -f docker-compose-demo.yml up -d
#+END_SRC
and wait until the log output no longer changes.
Should look like
#+BEGIN_SRC text
docker-compose -f docker-compose-demo.yml up -d
[+] Running 27/6
✔ dbstore Pulled 1.1s
✔ artifactstore Pulled 1.1s
✔ mrvadata 3 layers [⣿⣿⣿] 0B/0B Pulled 263.8s
✔ server 2 layers [⣿⣿] 0B/0B Pulled 25.2s
✔ agent 5 layers [⣿⣿⣿⣿⣿] 0B/0B Pulled 24.9s
✔ client-qldbtools 11 layers [⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿] 0B/0B Pulled 20.8s
[+] Running 9/9
✔ Container mrvadata Started 0.3s
✔ Container mrvacommander-client-qldbtools-1 Started 0.3s
✔ Container mrvacommander-client-ghmrva-1 Running 0.0s
✔ Container mrvacommander-code-server-1 Running 0.0s
✔ Container artifactstore Running 0.0s
✔ Container rabbitmq Running 0.0s
✔ Container dbstore Started 0.4s
✔ Container agent Started 0.5s
✔ Container server Started 0.5s
#+END_SRC
The content is prepopulated in the =dbstore= container.
** Optional: Inspect the Backing Store
As completely optional step, you can inspect the backing store:
#+BEGIN_SRC sh
docker exec -it dbstore /bin/bash
ls /data/qldb/
# 'BoomingTech$Piccoloctsj6d7177.zip' 'mawww$kakounectsjc54fab.zip'
# 'KhronosGroup$OpenXR-SDKctsj984ee6.zip' 'microsoft$node-native-keymapctsj4cc9a2.zip'
# ...
#+END_SRC
** Optional: Inspect the MinIO DB
Another completely optional step, you can inspect the minio DB contents if you
have the minio cli installed:
#+BEGIN_SRC sh
# Configuration
MINIO_ALIAS="qldbminio"
MINIO_URL="http://localhost:9000"
MINIO_ROOT_USER="user"
MINIO_ROOT_PASSWORD="mmusty8432"
QL_DB_BUCKET_NAME="qldb"
# Check for MinIO client
if ! command -v mc &> /dev/null
then
echo "MinIO client (mc) not found."
fi
# Configure MinIO client
mc alias set $MINIO_ALIAS $MINIO_URL $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD
# Show contents
mc ls qldbminio/qldb
#+END_SRC
* Running the gh-mrva command-line client
The first run uses the test query to verify basic functionality, but it returns
no results.
** Run MRVA from command line
# From ~/work-gh/mrva/gh-mrva
1. Check mrva cli
#+BEGIN_SRC sh
docker exec -it mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva -h
#+END_SRC
2. Set up the configuration
#+BEGIN_SRC sh
docker exec -i mrvacommander-client-ghmrva-1 \
sh -c 'mkdir -p /root/.config/gh-mrva/'
cat | docker exec -i mrvacommander-client-ghmrva-1 \
sh -c 'cat > /root/.config/gh-mrva/config.yml' <<eof
codeql_path: not-used/$HOME/work-gh
controller: not-used/mirva-controller
list_file: /root/work-gh/mrva/gh-mrva/gh-mrva-selection.json
eof
# check:
docker exec -i mrvacommander-client-ghmrva-1 ls /root/.config/gh-mrva/config.yml
docker exec -i mrvacommander-client-ghmrva-1 cat /root/.config/gh-mrva/config.yml
#+END_SRC
3. Provide the repository list file
#+BEGIN_SRC sh
docker exec -i mrvacommander-client-ghmrva-1 \
sh -c 'mkdir -p /root/work-gh/mrva/gh-mrva'
cat | docker exec -i mrvacommander-client-ghmrva-1 \
sh -c 'cat > /root/work-gh/mrva/gh-mrva/gh-mrva-selection.json' <<eof
{
"mirva-list": [
"xoreaxeaxeax/movfuscatorctsj8f7e5b",
"microsoft/node-native-keymapctsj4cc9a2",
"BoomingTech/Piccoloctsj6d7177",
"USCiLab/cerealctsj264953",
"KhronosGroup/OpenXR-SDKctsj984ee6",
"tdlib/telegram-bot-apictsj8529d9",
"WinMerge/winmergectsj101305",
"timescale/timescaledbctsjf617cf",
"pocoproject/pococtsj26b932",
"quickfix/quickfixctsjebfd13",
"libfuse/libfusectsj7a66a4"
]
}
eof
#+END_SRC
4. Provide the CodeQL query
#+BEGIN_SRC sh
cat | docker exec -i mrvacommander-client-ghmrva-1 \
sh -c 'cat > /root/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql' <<eof
/**
,* @name pickfun
,* @description pick function from FlatBuffers
,* @kind problem
,* @id cpp-flatbuffer-func
,* @problem.severity warning
,*/
import cpp
from Function f
where
f.getName() = "MakeBinaryRegion" or
f.getName() = "microprotocols_add"
select f, "definition of MakeBinaryRegion"
eof
#+END_SRC
5. Submit the mrva job
#+BEGIN_SRC sh
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
submit --language cpp --session mirva-session-1360 \
--list mirva-list \
--query /root/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql
#+END_SRC
6. Check the status
#+BEGIN_SRC sh
# Check the status
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
status --session mirva-session-1360
#+END_SRC
7. Download the sarif files, optionally also get databases. For the current
query / database combination there are zero result hence no downloads.
#+BEGIN_SRC sh
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
download --session mirva-session-1360 \
--download-dbs \
--output-dir mirva-session-1360
#+END_SRC
** TODO Write query that has some results
XX:
In this case, the trivial =alu_mul=,
alu_mul for https://github.com/xoreaxeaxeax/movfuscator/blob/master/movfuscator/movfuscator.c
#+BEGIN_SRC java
/**
,* @name findalu
,* @description find calls to a function
,* @kind problem
,* @id cpp-call
,* @problem.severity warning
,*/
import cpp
from FunctionCall fc
where
fc.getTarget().getName() = "alu_mul"
select fc, "call of alu_mul"
#+END_SRC
Repeat the submit steps with this query
1. [X] --
2. [X] --
3. [ ] Provide the CodeQL query
#+BEGIN_SRC sh
cat | docker exec -i mrvacommander-client-ghmrva-1 \
sh -c 'cat > /root/work-gh/mrva/gh-mrva/Alu_Mul.ql' <<eof
/**
,* @name findalu
,* @description find calls to a function
,* @kind problem
,* @id cpp-call
,* @problem.severity warning
,*/
import cpp
from FunctionCall fc
where
fc.getTarget().getName() = "alu_mul"
select fc, "call of alu_mul"
eof
#+END_SRC
4. [-] Submit the mrva job
#+BEGIN_SRC sh
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
submit --language cpp --session mirva-session-1490 \
--list mirva-list \
--query /root/work-gh/mrva/gh-mrva/Alu_Mul.ql
#+END_SRC
- [X] XX:
server | 2024/09/27 20:03:16 DEBUG Processed request info location="{Key:3 Bucket:packs}" language=cpp
server | 2024/09/27 20:03:16 WARN No repositories found for analysis
server | 2024/09/27 20:03:16 DEBUG Queueing analysis jobs count=0
server | 2024/09/27 20:03:16 DEBUG Forming and sending response for submitted analysis job id=3
NO: debug in the server container
#+BEGIN_SRC sh
docker exec -it server /bin/bash
apt-get update
apt-get install delve
replace
ENTRYPOINT ["./mrva_server"]
CMD ["--mode=container"]
#+END_SRC
- [ ] XX:
The dbstore is empty -- see http://localhost:9001/browser
must populate it properly, then save the image.
5. [ ] Check the status
#+BEGIN_SRC sh
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
status --session mirva-session-1490
#+END_SRC
This time we have results
#+BEGIN_SRC text
...
Run name: mirva-session-1490
Status: succeeded
Total runs: 1
Total successful scans: 11
Total failed scans: 0
Total skipped repositories: 0
Total skipped repositories due to access mismatch: 0
Total skipped repositories due to not found: 0
Total skipped repositories due to no database: 0
Total skipped repositories due to over limit: 0
Total repositories with findings: 7
Total findings: 618
Repositories with findings:
quickfix/quickfixctsjebfd13 (cpp-fprintf-call): 5
libfuse/libfusectsj7a66a4 (cpp-fprintf-call): 146
xoreaxeaxeax/movfuscatorctsj8f7e5b (cpp-fprintf-call): 80
pocoproject/pococtsj26b932 (cpp-fprintf-call): 17
BoomingTech/Piccoloctsj6d7177 (cpp-fprintf-call): 10
tdlib/telegram-bot-apictsj8529d9 (cpp-fprintf-call): 247
WinMerge/winmergectsj101305 (cpp-fprintf-call): 113
#+END_SRC
6. [ ] Download the sarif files, optionally also get databases.
#+BEGIN_SRC sh
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
download --session mirva-session-1490 \
--download-dbs \
--output-dir mirva-session-1490
# And list them:
\ls -la *1490*
#+END_SRC
7. [ ] Use the [[https://marketplace.visualstudio.com/items?itemName=MS-SarifVSCode.sarif-viewer][SARIF Viewer]] plugin in VS Code to open and review the results.
Prepare the source directory so the viewer can be pointed at it
#+BEGIN_SRC sh
cd ~/work-gh/mrva/gh-mrva/mirva-session-1490
unzip -qd BoomingTech_Piccoloctsj6d7177_1_db BoomingTech_Piccoloctsj6d7177_1_db.zip
cd BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/
unzip -qd src src.zip
#+END_SRC
Use the viewer
#+BEGIN_SRC sh
code BoomingTech_Piccoloctsj6d7177_1.sarif
# For lauxlib.c, point the source viewer to
find ~/work-gh/mrva/gh-mrva/mirva-session-1490/BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/src/home/runner/work/bulk-builder/bulk-builder -name lauxlib.c
# Here: ~/work-gh/mrva/gh-mrva/mirva-session-1490/BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/src/home/runner/work/bulk-builder/bulk-builder/engine/3rdparty/lua-5.4.4/lauxlib.c
#+END_SRC
8. [ ] (optional) Large result sets are more easily filtered via
dataframes or spreadsheets. Convert the SARIF to CSV if needed; see [[https://github.com/hohn/sarif-cli/][sarif-cli]].
* Running the CodeQL VS Code plugin
- [ ] XX: include the *custom* codeql plugin in the container.
* Ending the session
Shut down docker via
#+BEGIN_SRC sh
cd ~/work-gh/mrva/mrvacommander
docker-compose -f docker-compose-demo.yml down
#+END_SRC
* Footnotes
[fn:1]The =csvkit= can be installed into the same Python virtual environment as
the =qldbtools=.

View File

@@ -1,524 +0,0 @@
# -*- coding: utf-8 -*-
* End-to-end example of CLI use
This document describes a complete cycle of the MRVA workflow. The steps
included are
1. aquiring CodeQL databases
2. selection of databases
3. configuration and use of the command-line client
4. server startup
5. submission of the jobs
6. retrieval of the results
7. examination of the results
* Database Aquisition
General database aquisition is beyond the scope of this document as it is very specific
to an organization's environment. Here we use an example for open-source
repositories, [[https://github.com/hohn/mrva-open-source-download.git][mrva-open-source-download]], which downloads the top 1000 databases for each of
C/C++, Java, Python -- 3000 CodeQL DBs in all.
The scripts in [[https://github.com/hohn/mrva-open-source-download.git][mrva-open-source-download]] were used to download on two distinct dates
resulting in close to 6000 databases to choose from. The DBs were directly
saved to the file system, resulting in paths like
: .../mrva-open-source-download/repos-2024-04-29/google/re2/code-scanning/codeql/databases/cpp/db.zip
and
: .../mrva-open-source-download/repos/google/re2/code-scanning/codeql/databases/cpp/db.zip
Note that the only information in these paths are (owner, repository, download
date). The databases contain more information which is used in the [[*Repository Selection][Repository
Selection]] section.
To get a collection of databases follow the [[https://github.com/hohn/mrva-open-source-download?tab=readme-ov-file#mrva-download][instructions]].
* Repository Selection
Here we select a small subset of those repositories using a collection scripts
made for the purpose, the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#installation][qldbtools]] package.
Clone the full repository before continuing:
#+BEGIN_SRC sh
mkdir -p ~/work-gh/mrva/
git clone git@github.com:hohn/mrvacommander.git
cd ~/work-gh/mrva/mrvacommander/client/qldbtools && mkdir -p scratch
#+END_SRC
After performing the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#installation][installation]] steps, we can follow the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#command-line-use][command line]] use
instructions to collect all the database information from the file system into a
single table:
#+BEGIN_SRC sh
cd ~/work-gh/mrva/mrvacommander/client/qldbtools && mkdir -p scratch
source venv/bin/activate
./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > scratch/db-info-1.csv
#+END_SRC
The [[https://csvkit.readthedocs.io/en/latest/scripts/csvstat.html][=csvstat=]] tool gives a good overview[fn:1]; here is a pruned version of the
output
#+BEGIN_SRC text
csvstat scratch/db-info-1.csv
1. "ctime"
Type of data: DateTime
...
2. "language"
Type of data: Text
Non-null values: 6000
Unique values: 3
Longest value: 6 characters
Most common values: cpp (2000x)
java (2000x)
python (2000x)
3. "name"
...
4. "owner"
Type of data: Text
Non-null values: 6000
Unique values: 2189
Longest value: 29 characters
Most common values: apache (258x)
google (86x)
microsoft (64x)
spring-projects (56x)
alibaba (42x)
5. "path"
...
6. "size"
Type of data: Number
Non-null values: 6000
Unique values: 5354
Smallest value: 0
Largest value: 1,885,008,701
Sum: 284,766,326,993
...
Row count: 6000
#+END_SRC
The information critial for selection are the columns
1. owner
2. name
3. language
The size column is interesting: a smallest value of 0 indicates some error
while our largest DB is 1.88 GB in size
This information is not sufficient, so we collect more. The following script
extracts information from every database on disk and takes more time accordingly
-- about 30 seconds on my laptop.
#+BEGIN_SRC sh
./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
#+END_SRC
This new table is a merge of all the available meta-information with the
previous table causing the increase in the number of rows. The following
columns are now present
#+BEGIN_SRC text
0:$ csvstat scratch/db-info-2.csv
1. "ctime"
2. "language"
3. "name"
4. "owner"
5. "path"
6. "size"
7. "left_index"
8. "baselineLinesOfCode"
Type of data: Number
Contains null values: True (excluded from calculations)
Non-null values: 11920
Unique values: 4708
Smallest value: 0
Largest value: 22,028,732
Sum: 3,454,019,142
Mean: 289,766.707
Median: 54,870.5
9. "primaryLanguage"
10. "sha"
Type of data: Text
Contains null values: True (excluded from calculations)
Non-null values: 11920
Unique values: 4928
11. "cliVersion"
Type of data: Text
Contains null values: True (excluded from calculations)
Non-null values: 11920
Unique values: 59
Longest value: 6 characters
Most common values: 2.17.0 (3850x)
2.18.0 (3622x)
2.17.2 (1097x)
2.17.6 (703x)
2.16.3 (378x)
12. "creationTime"
Type of data: Text
Contains null values: True (excluded from calculations)
Non-null values: 11920
Unique values: 5345
Longest value: 32 characters
Most common values: None (19x)
2024-03-19 01:40:14.507823+00:00 (16x)
2024-02-29 19:12:59.785147+00:00 (16x)
2024-01-30 22:24:17.411939+00:00 (14x)
2024-04-05 09:34:03.774619+00:00 (14x)
13. "finalised"
Type of data: Boolean
Contains null values: True (excluded from calculations)
Non-null values: 11617
Unique values: 2
Most common values: True (11617x)
None (322x)
14. "db_lang"
15. "db_lang_displayName"
16. "db_lang_file_count"
17. "db_lang_linesOfCode"
Row count: 11939
#+END_SRC
There are several columns that are critical, namely
1. "sha"
2. "cliVersion"
3. "creationTime"
The others may be useful, but they are not strictly required.
The critical ones deserve more explanation:
1. "sha": The =git= commit SHA of the repository the CodeQL database was
created from. Required to distinguish query results over the evolution of
a code base.
2. "cliVersion": The version of the CodeQL CLI used to create the database.
Required to identify advances/regressions originating from the CodeQL binary.
3. "creationTime": The time the database was created. Required (or at least
very handy) for following the evolution of query results over time.
This leaves us with a row count of 11939
To start reducing that count, start with
#+BEGIN_SRC sh
./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
#+END_SRC
and get a reduced count and a new column:
#+BEGIN_SRC text
csvstat scratch/db-info-3.csv
3. "CID"
Type of data: Text
Contains null values: False
Non-null values: 5344
Unique values: 5344
Longest value: 6 characters
Most common values: 1f8d99 (1x)
9ab87a (1x)
76fdc7 (1x)
b21305 (1x)
4ae79b (1x)
#+END_SRC
From the docs: 'Read a table of CodeQL DB information and produce a table with unique entries
adding the Cumulative ID (CID) column.'
The CID column combines
- cliVersion
- creationTime
- language
- sha
into a single 6-character string via hashing and with (owner, repo) provides a
unique index for every DB.
We still have too many rows. The tables are all in CSV format, so you can use
your favorite tool to narrow the selection for your needs. For this document,
we simply use a pseudo-random selection of 11 databases via
#+BEGIN_SRC sh
./bin/mc-db-generate-selection -n 11 \
scratch/vscode-selection.json \
scratch/gh-mrva-selection.json \
< scratch/db-info-3.csv
#+END_SRC
Note that these use pseudo-random numbers, so the selection is in fact
deterministic. The selected databases in =gh-mrva-selection.json=, to be used
in section [[*Running the gh-mrva command-line client][Running the gh-mrva command-line client]], are the following:
#+begin_src javascript
{
"mirva-list": [
"NLPchina/elasticsearch-sqlctsj168cc4",
"LMAX-Exchange/disruptorctsj3e75ec",
"justauth/JustAuthctsj8a6177",
"FasterXML/jackson-modules-basectsj2fe248",
"ionic-team/capacitor-pluginsctsj38d457",
"PaddlePaddle/PaddleOCRctsj60e555",
"elastic/apm-agent-pythonctsj21dc64",
"flipkart-incubator/zjsonpatchctsjc4db35",
"stephane/libmodbusctsj54237e",
"wso2/carbon-kernelctsj5a8a6e",
"apache/servicecomb-packctsj4d98f5"
]
}
#+end_src
* Starting the server
The full instructions for building and running the server are in [[../README.md]] under
'Steps to build and run the server'
With docker-compose set up and this repository cloned as previously described,
we just run
#+BEGIN_SRC sh
cd ~/work-gh/mrva/mrvacommander
docker-compose up --build
#+END_SRC
and wait until the log output no longer changes.
Then, use the following command to populate the mrvacommander database storage:
#+BEGIN_SRC sh
cd ~/work-gh/mrva/mrvacommander/client/qldbtools && \
./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
#+END_SRC
* Running the gh-mrva command-line client
The first run uses the test query to verify basic functionality, but it returns
no results.
** Run MRVA from command line
1. Install mrva cli
#+BEGIN_SRC sh
mkdir -p ~/work-gh/mrva && cd ~/work-gh/mrva
git clone https://github.com/hohn/gh-mrva.git
cd ~/work-gh/mrva/gh-mrva && git checkout mrvacommander-end-to-end
# Build it
go mod edit -replace="github.com/GitHubSecurityLab/gh-mrva=$HOME/work-gh/mrva/gh-mrva"
go build .
# Sanity check
./gh-mrva -h
#+END_SRC
2. Set up the configuration
#+BEGIN_SRC sh
mkdir -p ~/.config/gh-mrva
cat > ~/.config/gh-mrva/config.yml <<eof
# The following options are supported
# codeql_path: Path to CodeQL distribution (checkout of codeql repo)
# controller: NWO of the MRVA controller to use. Not used here.
# list_file: Path to the JSON file containing the target repos
# XX:
codeql_path: $HOME/work-gh/not-used
controller: not-used/mirva-controller
list_file: $HOME/work-gh/mrva/gh-mrva/gh-mrva-selection.json
eof
#+END_SRC
3. Submit the mrva job
#+BEGIN_SRC sh
cp ~/work-gh/mrva/mrvacommander/client/qldbtools/scratch/gh-mrva-selection.json \
~/work-gh/mrva/gh-mrva/gh-mrva-selection.json
cd ~/work-gh/mrva/gh-mrva/
./gh-mrva submit --language cpp --session mirva-session-4160 \
--list mirva-list \
--query ~/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql
#+END_SRC
4. Check the status
#+BEGIN_SRC sh
cd ~/work-gh/mrva/gh-mrva/
# Check the status
./gh-mrva status --session mirva-session-4160
#+END_SRC
5. Download the sarif files, optionally also get databases. For the current
query / database combination there are zero result hence no downloads.
#+BEGIN_SRC sh
cd ~/work-gh/mrva/gh-mrva/
# Just download the sarif files
./gh-mrva download --session mirva-session-4160 \
--output-dir mirva-session-4160
# Download the sarif files and CodeQL dbs
./gh-mrva download --session mirva-session-4160 \
--download-dbs \
--output-dir mirva-session-4160
#+END_SRC
** Write query that has some results
First, get the list of paths corresponding to the previously selected
databases.
#+BEGIN_SRC sh
cd ~/work-gh/mrva/mrvacommander/client/qldbtools
. venv/bin/activate
./bin/mc-rows-from-mrva-list scratch/gh-mrva-selection.json \
scratch/db-info-3.csv > scratch/selection-full-info
csvcut -c path scratch/selection-full-info
#+END_SRC
Use one of these databases to write a query. It need not produce results.
#+BEGIN_SRC sh
cd ~/work-gh/mrva/gh-mrva/
code gh-mrva.code-workspace
#+END_SRC
In this case, the trivial =findPrintf= query, in the file =Fprintf.ql=
#+BEGIN_SRC java
/**
,* @name findPrintf
,* @description find calls to plain fprintf
,* @kind problem
,* @id cpp-fprintf-call
,* @problem.severity warning
,*/
import cpp
from FunctionCall fc
where
fc.getTarget().getName() = "fprintf"
select fc, "call of fprintf"
#+END_SRC
Repeat the submit steps with this query
1. --
2. --
3. Submit the mrva job
#+BEGIN_SRC sh
cp ~/work-gh/mrva/mrvacommander/client/qldbtools/scratch/gh-mrva-selection.json \
~/work-gh/mrva/gh-mrva/gh-mrva-selection.json
cd ~/work-gh/mrva/gh-mrva/
./gh-mrva submit --language cpp --session mirva-session-3660 \
--list mirva-list \
--query ~/work-gh/mrva/gh-mrva/Fprintf.ql
#+END_SRC
4. Check the status
#+BEGIN_SRC sh
cd ~/work-gh/mrva/gh-mrva/
./gh-mrva status --session mirva-session-3660
#+END_SRC
This time we have results
#+BEGIN_SRC text
...
0:$ Run name: mirva-session-3660
Status: succeeded
Total runs: 1
Total successful scans: 11
Total failed scans: 0
Total skipped repositories: 0
Total skipped repositories due to access mismatch: 0
Total skipped repositories due to not found: 0
Total skipped repositories due to no database: 0
Total skipped repositories due to over limit: 0
Total repositories with findings: 8
Total findings: 7055
Repositories with findings:
lz4/lz4ctsj2479c5 (cpp-fprintf-call): 307
Mbed-TLS/mbedtlsctsj17ef85 (cpp-fprintf-call): 6464
tsl0922/ttydctsj2e3faa (cpp-fprintf-call): 11
medooze/media-server-nodectsj5e30b3 (cpp-fprintf-call): 105
ampl/gslctsj4b270e (cpp-fprintf-call): 102
baidu/sofa-pbrpcctsjba3501 (cpp-fprintf-call): 24
dlundquist/sniproxyctsj3d83e7 (cpp-fprintf-call): 34
hyprwm/Hyprlandctsjc2425f (cpp-fprintf-call): 8
#+END_SRC
5. Download the sarif files, optionally also get databases.
#+BEGIN_SRC sh
cd ~/work-gh/mrva/gh-mrva/
# Just download the sarif files
./gh-mrva download --session mirva-session-3660 \
--output-dir mirva-session-3660
# Download the sarif files and CodeQL dbs
./gh-mrva download --session mirva-session-3660 \
--download-dbs \
--output-dir mirva-session-3660
#+END_SRC
#+BEGIN_SRC sh
# And list them:
\ls -la *3660*
drwxr-xr-x@ 18 hohn staff 576 Nov 14 11:54 .
drwxrwxr-x@ 56 hohn staff 1792 Nov 14 11:54 ..
-rwxr-xr-x@ 1 hohn staff 9035554 Nov 14 11:54 Mbed-TLS_mbedtlsctsj17ef85_1.sarif
-rwxr-xr-x@ 1 hohn staff 57714273 Nov 14 11:54 Mbed-TLS_mbedtlsctsj17ef85_1_db.zip
-rwxr-xr-x@ 1 hohn staff 132484 Nov 14 11:54 ampl_gslctsj4b270e_1.sarif
-rwxr-xr-x@ 1 hohn staff 99234414 Nov 14 11:54 ampl_gslctsj4b270e_1_db.zip
-rwxr-xr-x@ 1 hohn staff 34419 Nov 14 11:54 baidu_sofa-pbrpcctsjba3501_1.sarif
-rwxr-xr-x@ 1 hohn staff 55177796 Nov 14 11:54 baidu_sofa-pbrpcctsjba3501_1_db.zip
-rwxr-xr-x@ 1 hohn staff 80744 Nov 14 11:54 dlundquist_sniproxyctsj3d83e7_1.sarif
-rwxr-xr-x@ 1 hohn staff 2183836 Nov 14 11:54 dlundquist_sniproxyctsj3d83e7_1_db.zip
-rwxr-xr-x@ 1 hohn staff 169079 Nov 14 11:54 hyprwm_Hyprlandctsjc2425f_1.sarif
-rwxr-xr-x@ 1 hohn staff 21383303 Nov 14 11:54 hyprwm_Hyprlandctsjc2425f_1_db.zip
-rwxr-xr-x@ 1 hohn staff 489064 Nov 14 11:54 lz4_lz4ctsj2479c5_1.sarif
-rwxr-xr-x@ 1 hohn staff 2991310 Nov 14 11:54 lz4_lz4ctsj2479c5_1_db.zip
-rwxr-xr-x@ 1 hohn staff 141336 Nov 14 11:54 medooze_media-server-nodectsj5e30b3_1.sarif
-rwxr-xr-x@ 1 hohn staff 38217703 Nov 14 11:54 medooze_media-server-nodectsj5e30b3_1_db.zip
-rwxr-xr-x@ 1 hohn staff 33861 Nov 14 11:54 tsl0922_ttydctsj2e3faa_1.sarif
-rwxr-xr-x@ 1 hohn staff 5140183 Nov 14 11:54 tsl0922_ttydctsj2e3faa_1_db.zip
#+END_SRC
6. Use the [[https://marketplace.visualstudio.com/items?itemName=MS-SarifVSCode.sarif-viewer][SARIF Viewer]] plugin in VS Code to open and review the results.
Prepare the source directory so the viewer can be pointed at it
#+BEGIN_SRC sh
cd ~/work-gh/mrva/gh-mrva/mirva-session-3660
unzip -qd ampl_gslctsj4b270e_1_db ampl_gslctsj4b270e_1_db.zip
cd ampl_gslctsj4b270e_1_db/codeql_db
unzip -qd src src.zip
#+END_SRC
Use the viewer in VS Code
#+BEGIN_SRC sh
cd ~/work-gh/mrva/gh-mrva/mirva-session-3660
code ampl_gslctsj4b270e_1.sarif
# For the file vegas.c, when asked, point the source viewer to
find ~/work-gh/mrva/gh-mrva/mirva-session-3660/ampl_gslctsj4b270e_1_db/codeql_db/src/\
-name vegas.c
# Here: ~/work-gh/mrva/gh-mrva/mirva-session-3660/ampl_gslctsj4b270e_1_db/codeql_db/src//home/runner/work/bulk-builder/bulk-builder/monte/vegas.c
#+END_SRC
7. (optional) Large result sets are more easily filtered via
dataframes or spreadsheets. Convert the SARIF to CSV if needed; see [[https://github.com/hohn/sarif-cli/][sarif-cli]].
* Running the VS Code plugin
** Compile and Load the Extension
#+BEGIN_SRC sh
cd ~/work-gh/mrva/vscode-codeql
git checkout mrva-standalone
# Install nvm
brew install nvm
[ -s "/opt/homebrew/opt/nvm/nvm.sh" ] && \. "/opt/homebrew/opt/nvm/nvm.sh"
# or
# curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
# Install correct node version
cd ./extensions/ql-vscode
nvm install
# Build the extension
cd ~/work-gh/mrva/vscode-codeql/extensions/ql-vscode
npm install
npm run build
# Install extension
cd ~/work-gh/mrva/vscode-codeql/dist
code --force --install-extension vscode-codeql-*.vsix
# Extension 'vscode-codeql-1.13.2-dev.2024.12.10.23.51.57.vsix' was successfully installed.
#+END_SRC
** Continue the CLI Sample using the Extension
Start VS Code
#+BEGIN_SRC sh
cd ~/work-gh/mrva/gh-mrva/
code .
#+END_SRC
Set up 'variant analysis repositories', continuin from the
=scratch/vscode-selection.json= file formed previously:
1. Select '{}' and open db selection file
2. paste
: ~/work-gh/mrva/mrvacommander/client/qldbtools/scratch/vscode-selection.json
3. open =Fprintf.ql=
4. right click =>= 'run variant analysis'
The extension will assemble the pack, send it to the server, and display
results as they arrive.
* Footnotes
[fn:1]The =csvkit= can be installed into the same Python virtual environment as
the =qldbtools=.

View File

@@ -1,24 +0,0 @@
digraph G {
// Define nodes
mrvadata [label="mrvadata" shape=box];
client_qldbtools [label="client-qldbtools" shape=box];
client_ghmrva [label="client-ghmrva" shape=box];
code_server [label="code-server" shape=box];
rabbitmq [label="rabbitmq" shape=box];
server [label="server" shape=box];
dbstore [label="dbstore" shape=box];
artifactstore [label="artifactstore" shape=box];
agent [label="agent" shape=box];
// Define edges (dependencies)
server -> rabbitmq;
server -> dbstore;
server -> artifactstore;
dbstore -> mrvadata;
agent -> rabbitmq;
agent -> dbstore;
agent -> artifactstore;
// Define styling
edge [arrowhead=normal];
}

Binary file not shown.

View File

@@ -1,170 +0,0 @@
/* The sum of width and margin percentages must not exceed 100.*/
div#toc {
/* Use a moving table of contents (scrolled away for long contents) */
/*
* float: left;
*/
/* OR */
/* use a fixed-position toc */
position: fixed;
top: 80px;
left: 0px;
/* match toc, org-content, postamble */
width: 26%;
margin-right: 1%;
margin-left: 1%;
}
div#org-content {
float: right;
width: 70%;
/* match toc, org-content, postamble */
margin-left: 28%;
}
div#postamble {
float: right;
width: 70%;
/* match toc, org-content, postamble */
margin-left: 28%;
}
p.author {
clear: both;
font-size: 1em;
margin-left: 25%;
}
p.date {
clear: both;
font-size: 1em;
margin-left: 25%;
}
#toc * {
font-size:1em;
}
#toc h3 {
font-weight:normal;
margin:1em 0 0 0;
padding: 4px 0;
border-bottom:1px solid #666;
text-transform:uppercase;
}
#toc ul, #toc li {
margin:0;
padding:0;
list-style:none;
}
#toc li {
display:inline;
}
#toc ul li a {
text-decoration:none;
display:block;
margin:0;
padding:4px 6px;
color:#990000;
border-bottom:1px solid #aaa;
}
#toc ul ul li a {
padding-left:18px;
color:#666;
}
#toc ul li a:hover {
background-color:#F6F6F6;
}
/* Description lists. */
dt {
font-style: bold;
background-color:#F6F6F6;
}
/* From org-mode page. */
body {
font-family: avenir, Lao Sangam MN, Myanmar Sangam MN, Songti SC, Kohinoor Devanagari, Menlo, avenir, helvetica, verdana, sans-serif;
font-size: 100%;
margin-top: 5%;
margin-bottom: 8%;
background: white; color: black;
margin-left: 3% !important; margin-right: 3% !important;
}
h1 {
font-size: 2em;
color: #cc8c00;
/* padding-top: 5px; */
border-bottom: 2px solid #aaa;
width: 70%;
/* match toc, org-content, postamble */
margin-left: 28%; /* Align with div#content */
}
h2 {
font-size: 1.5em;
padding-top: 1em;
border-bottom: 1px solid #ccc;
}
h3 {
font-size: 1.2em;
padding-top: 0.5em;
border-bottom: 1px solid #eee;
}
.todo, .deadline { color: red; font-style: italic }
.done { color: green; font-style: italic }
.timestamp { color: grey }
.timestamp-kwd { color: CadetBlue; }
.tag { background-color:lightblue; font-weight:normal; }
.target { background-color: lavender; }
.menu {
color: #666;
}
.menu a:link {
color: #888;
}
.menu a:active {
color: #888;
}
.menu a:visited {
color: #888;
}
img { align: center; }
pre {
padding: 5pt;
font-family: andale mono, vera sans mono, monospace, courier ;
font-size: 0.8em;
background-color: #f0f0f0;
}
code {
font-family: andale mono, vera sans mono, monospace, courier ;
font-size: 0.8em;
background-color: #f0f0f0;
}
table { border-collapse: collapse; }
td, th {
vertical-align: top;
border: 1pt solid #ADB9CC;
}

View File

@@ -1,127 +0,0 @@
digraph DockerComposeDemo {
rankdir=LR; // Left-to-right layout
node [shape=plaintext fontname="Helvetica"];
edge [arrowsize=0.5];
// Title
label="Container Dependencies for Demo";
labelloc=top;
fontsize=20;
fontname="Helvetica";
// Define nodes with clickable Dockerfile references
dbssvc [
href="../demo/containers/dbsdata/Dockerfile"
target="_blank"
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td colspan="1" bgcolor="lightblue"><b>dbssvc</b></td></tr>
<tr><td align="left"><font point-size="10">Dockerfile: ./demo/containers/dbsdata/Dockerfile</font></td></tr>
</table>
>
];
dbstore [
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td colspan="1" bgcolor="lightblue"><b>dbstore</b></td></tr>
<tr><td align="left"><font point-size="10">Image: minio/minio:RELEASE.2024-06-11T03-13-30Z</font></td></tr>
</table>
>
];
client_ghmrva [
href="../client/containers/ghmrva/Dockerfile"
target="_blank"
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td colspan="1" bgcolor="lightblue"><b>client-ghmrva</b></td></tr>
<tr><td align="left"><font point-size="10">Dockerfile: ./client/containers/ghmrva/Dockerfile</font></td></tr>
<tr><td port="slot1"></td></tr>
<tr><td port="slot2"></td></tr>
<tr><td port="slot3"></td></tr>
</table>
>
];
code_server [
href="../client/containers/vscode/Dockerfile"
target="_blank"
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td colspan="1" bgcolor="lightblue"><b>code-server</b></td></tr>
<tr><td align="left"><font point-size="10">Dockerfile: ./client/containers/vscode/Dockerfile</font></td></tr>
</table>
>
];
rabbitmq [
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td colspan="1" bgcolor="lightblue"><b>rabbitmq</b></td></tr>
<tr><td align="left"><font point-size="10">Image: rabbitmq:3-management</font></td></tr>
</table>
>
];
artifactstore [
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td colspan="1" bgcolor="lightblue"><b>artifactstore</b></td></tr>
<tr><td align="left"><font point-size="10">Image: minio/minio:RELEASE.2024-06-11T03-13-30Z</font></td></tr>
</table>
>
];
agent [
href="../cmd/agent/Dockerfile"
target="_blank"
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td colspan="1" bgcolor="lightblue"><b>agent</b></td></tr>
<tr><td align="left"><font point-size="10">Dockerfile: ./cmd/agent/Dockerfile</font></td></tr>
</table>
>
];
// Expanded 'server' node with handler names and Dockerfile reference
server [
href="../cmd/server/Dockerfile"
target="_blank"
shape=plaintext
label=<
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
<tr><td colspan="1" bgcolor="lightblue"><b>server</b></td></tr>
<tr><td align="left"><font point-size="10">Dockerfile: ./cmd/server/Dockerfile</font></td></tr>
<tr><td port="c_MRVARequest" align="left"><font point-size="10">c.MRVARequest</font></td></tr>
<tr><td port="c_MRVAStatus" align="left"><font point-size="10">c.MRVAStatus</font></td></tr>
<tr><td port="c_MRVADownloadArtifact" align="left"><font point-size="10">c.MRVADownloadArtifact</font></td></tr>
<tr><td align="left"><font point-size="10">c.MRVARequestID</font></td></tr>
<tr><td align="left"><font point-size="10">c.MRVADownloadQLDB</font></td></tr>
<tr><td align="left"><font point-size="10"><i>Not Found</i></font></td></tr>
</table>
>
];
// Define edges (dependencies)
dbstore -> dbssvc;
server -> dbstore;
server -> rabbitmq;
server -> artifactstore;
agent -> dbstore;
agent -> artifactstore;
agent -> rabbitmq;
// Message links
client_ghmrva:slot1 -> server:c_MRVARequest [label="message" style=dashed penwidth=2 fontsize=8];
client_ghmrva:slot2 -> server:c_MRVAStatus [label="message" style=dashed penwidth=2 fontsize=8];
client_ghmrva:slot3 -> server:c_MRVADownloadArtifact [label="message" style=dashed penwidth=2 fontsize=8];
}

Binary file not shown.

View File

@@ -1,162 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 10.0.1 (20240210.2158)
-->
<!-- Title: DockerComposeDemo Pages: 1 -->
<svg width="1057pt" height="280pt"
viewBox="0.00 0.00 1056.75 280.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 276)">
<title>DockerComposeDemo</title>
<polygon fill="white" stroke="none" points="-4,4 -4,-276 1052.75,-276 1052.75,4 -4,4"/>
<text text-anchor="middle" x="524.38" y="-249" font-family="Helvetica,sans-Serif" font-size="20.00">Container Dependencies for Demo</text>
<!-- dbssvc -->
<g id="node1" class="node">
<title>dbssvc</title>
<g id="a_node1"><a xlink:href="../demo/containers/dbsdata/Dockerfile" xlink:title="&lt;TABLE&gt;" target="_blank">
<polygon fill="lightblue" stroke="none" points="818.75,-145 818.75,-168 1039.75,-168 1039.75,-145 818.75,-145"/>
<text text-anchor="start" x="906" y="-151.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">dbssvc</text>
<text text-anchor="start" x="822.75" y="-131.5" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./demo/containers/dbsdata/Dockerfile</text>
<polygon fill="none" stroke="black" points="817.75,-124.75 817.75,-169 1040.75,-169 1040.75,-124.75 817.75,-124.75"/>
</a>
</g>
</g>
<!-- dbstore -->
<g id="node2" class="node">
<title>dbstore</title>
<polygon fill="lightblue" stroke="none" points="523.25,-145 523.25,-168 763.75,-168 763.75,-145 523.25,-145"/>
<text text-anchor="start" x="618.75" y="-151.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">dbstore</text>
<text text-anchor="start" x="527.25" y="-131.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: minio/minio:RELEASE.2024&#45;06&#45;11T03&#45;13&#45;30Z</text>
<polygon fill="none" stroke="black" points="522.25,-124.75 522.25,-169 764.75,-169 764.75,-124.75 522.25,-124.75"/>
</g>
<!-- dbstore&#45;&gt;dbssvc -->
<g id="edge1" class="edge">
<title>dbstore&#45;&gt;dbssvc</title>
<path fill="none" stroke="black" d="M772.68,-146.88C783.02,-146.88 793.45,-146.88 803.76,-146.88"/>
<polygon fill="black" stroke="black" points="803.65,-148.63 808.65,-146.88 803.65,-145.13 803.65,-148.63"/>
</g>
<!-- client_ghmrva -->
<g id="node3" class="node">
<title>client_ghmrva</title>
<g id="a_node3"><a xlink:href="../client/containers/ghmrva/Dockerfile" xlink:title="&lt;TABLE&gt;" target="_blank">
<polygon fill="lightblue" stroke="none" points="9,-123 9,-146 227,-146 227,-123 9,-123"/>
<text text-anchor="start" x="73.38" y="-129.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">client&#45;ghmrva</text>
<text text-anchor="start" x="13" y="-109.5" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./client/containers/ghmrva/Dockerfile</text>
<polygon fill="none" stroke="black" points="8,-78.75 8,-147 228,-147 228,-78.75 8,-78.75"/>
</a>
</g>
</g>
<!-- server -->
<g id="node8" class="node">
<title>server</title>
<g id="a_node8"><a xlink:href="../cmd/server/Dockerfile" xlink:title="&lt;TABLE&gt;" target="_blank">
<polygon fill="lightblue" stroke="none" points="308,-139.75 308,-162.75 468.25,-162.75 468.25,-139.75 308,-139.75"/>
<text text-anchor="start" x="367.88" y="-146.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">server</text>
<text text-anchor="start" x="312" y="-126.25" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./cmd/server/Dockerfile</text>
<text text-anchor="start" x="312" y="-107" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVARequest</text>
<text text-anchor="start" x="312" y="-87.75" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVAStatus</text>
<text text-anchor="start" x="312" y="-68.5" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVADownloadArtifact</text>
<text text-anchor="start" x="312" y="-49.25" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVARequestID</text>
<text text-anchor="start" x="312" y="-30" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVADownloadQLDB</text>
<text text-anchor="start" x="312" y="-11.75" font-family="Helvetica,sans-Serif" font-style="italic" font-size="10.00">Not Found</text>
<polygon fill="none" stroke="black" points="307,-4 307,-163.75 469.25,-163.75 469.25,-4 307,-4"/>
</a>
</g>
</g>
<!-- client_ghmrva&#45;&gt;server -->
<g id="edge8" class="edge">
<title>client_ghmrva:slot1&#45;&gt;server:c_MRVARequest</title>
<path fill="none" stroke="black" stroke-width="2" stroke-dasharray="5,2" d="M228,-99.88C243.53,-99.88 278.87,-108.3 299.36,-110.4"/>
<polygon fill="black" stroke="black" stroke-width="2" points="299.01,-112.13 304.1,-110.69 299.22,-108.64 299.01,-112.13"/>
<text text-anchor="middle" x="267.5" y="-109.03" font-family="Times,serif" font-size="8.00">message</text>
</g>
<!-- client_ghmrva&#45;&gt;server -->
<g id="edge9" class="edge">
<title>client_ghmrva:slot2&#45;&gt;server:c_MRVAStatus</title>
<path fill="none" stroke="black" stroke-width="2" stroke-dasharray="5,2" d="M228,-91.88C260.42,-91.88 270.42,-91.88 299.34,-91.88"/>
<polygon fill="black" stroke="black" stroke-width="2" points="299.1,-93.63 304.1,-91.88 299.1,-90.13 299.1,-93.63"/>
<text text-anchor="middle" x="267.5" y="-94.03" font-family="Times,serif" font-size="8.00">message</text>
</g>
<!-- client_ghmrva&#45;&gt;server -->
<g id="edge10" class="edge">
<title>client_ghmrva:slot3&#45;&gt;server:c_MRVADownloadArtifact</title>
<path fill="none" stroke="black" stroke-width="2" stroke-dasharray="5,2" d="M228,-83.88C239.83,-83.88 242.33,-80.1 254,-78.12 274.69,-74.63 281.73,-72.43 299.45,-71.97"/>
<polygon fill="black" stroke="black" stroke-width="2" points="299.12,-73.72 304.1,-71.91 299.08,-70.22 299.12,-73.72"/>
<text text-anchor="middle" x="267.5" y="-80.03" font-family="Times,serif" font-size="8.00">message</text>
</g>
<!-- code_server -->
<g id="node4" class="node">
<title>code_server</title>
<g id="a_node4"><a xlink:href="../client/containers/vscode/Dockerfile" xlink:title="&lt;TABLE&gt;" target="_blank">
<polygon fill="lightblue" stroke="none" points="9.38,-193 9.38,-216 226.62,-216 226.62,-193 9.38,-193"/>
<text text-anchor="start" x="79.75" y="-199.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">code&#45;server</text>
<text text-anchor="start" x="13.38" y="-179.5" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./client/containers/vscode/Dockerfile</text>
<polygon fill="none" stroke="black" points="8.38,-172.75 8.38,-217 227.62,-217 227.62,-172.75 8.38,-172.75"/>
</a>
</g>
</g>
<!-- rabbitmq -->
<g id="node5" class="node">
<title>rabbitmq</title>
<polygon fill="lightblue" stroke="none" points="570.5,-215 570.5,-238 716.5,-238 716.5,-215 570.5,-215"/>
<text text-anchor="start" x="614.25" y="-221.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">rabbitmq</text>
<text text-anchor="start" x="574.5" y="-201.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: rabbitmq:3&#45;management</text>
<polygon fill="none" stroke="black" points="569.5,-194.75 569.5,-239 717.5,-239 717.5,-194.75 569.5,-194.75"/>
</g>
<!-- artifactstore -->
<g id="node6" class="node">
<title>artifactstore</title>
<polygon fill="lightblue" stroke="none" points="523.25,-75 523.25,-98 763.75,-98 763.75,-75 523.25,-75"/>
<text text-anchor="start" x="604.5" y="-81.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">artifactstore</text>
<text text-anchor="start" x="527.25" y="-61.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: minio/minio:RELEASE.2024&#45;06&#45;11T03&#45;13&#45;30Z</text>
<polygon fill="none" stroke="black" points="522.25,-54.75 522.25,-99 764.75,-99 764.75,-54.75 522.25,-54.75"/>
</g>
<!-- agent -->
<g id="node7" class="node">
<title>agent</title>
<g id="a_node7"><a xlink:href="../cmd/agent/Dockerfile" xlink:title="&lt;TABLE&gt;" target="_blank">
<polygon fill="lightblue" stroke="none" points="309.5,-210 309.5,-233 466.75,-233 466.75,-210 309.5,-210"/>
<text text-anchor="start" x="370.12" y="-216.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">agent</text>
<text text-anchor="start" x="313.5" y="-196.5" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./cmd/agent/Dockerfile</text>
<polygon fill="none" stroke="black" points="308.5,-189.75 308.5,-234 467.75,-234 467.75,-189.75 308.5,-189.75"/>
</a>
</g>
</g>
<!-- agent&#45;&gt;dbstore -->
<g id="edge5" class="edge">
<title>agent&#45;&gt;dbstore</title>
<path fill="none" stroke="black" d="M475.61,-189.69C494.58,-184.82 514.89,-179.61 534.58,-174.56"/>
<polygon fill="black" stroke="black" points="534.8,-176.31 539.21,-173.37 533.93,-172.92 534.8,-176.31"/>
</g>
<!-- agent&#45;&gt;rabbitmq -->
<g id="edge7" class="edge">
<title>agent&#45;&gt;rabbitmq</title>
<path fill="none" stroke="black" d="M475.61,-213.58C501.25,-214.09 529.34,-214.64 555.04,-215.15"/>
<polygon fill="black" stroke="black" points="554.98,-216.9 560.01,-215.25 555.04,-213.4 554.98,-216.9"/>
</g>
<!-- agent&#45;&gt;artifactstore -->
<g id="edge6" class="edge">
<title>agent&#45;&gt;artifactstore</title>
<path fill="none" stroke="black" d="M465.19,-185.78C469.46,-183.08 473.52,-180.12 477.25,-176.88 502.34,-155.06 487.99,-132.25 514.25,-111.88 516.87,-109.84 519.6,-107.93 522.42,-106.12"/>
<polygon fill="black" stroke="black" points="522.93,-107.86 526.3,-103.78 521.11,-104.87 522.93,-107.86"/>
</g>
<!-- server&#45;&gt;dbstore -->
<g id="edge2" class="edge">
<title>server&#45;&gt;dbstore</title>
<path fill="none" stroke="black" d="M477.03,-105.73C494.63,-110.11 513.32,-114.76 531.58,-119.29"/>
<polygon fill="black" stroke="black" points="531,-120.96 536.28,-120.46 531.85,-117.56 531,-120.96"/>
</g>
<!-- server&#45;&gt;rabbitmq -->
<g id="edge3" class="edge">
<title>server&#45;&gt;rabbitmq</title>
<path fill="none" stroke="black" d="M477.14,-159.61C489.14,-167.87 501.67,-175.56 514.25,-181.88 527.15,-188.36 541.41,-193.76 555.59,-198.22"/>
<polygon fill="black" stroke="black" points="554.91,-199.85 560.21,-199.64 555.94,-196.5 554.91,-199.85"/>
</g>
<!-- server&#45;&gt;artifactstore -->
<g id="edge4" class="edge">
<title>server&#45;&gt;artifactstore</title>
<path fill="none" stroke="black" d="M477.03,-81.45C487.19,-81.17 497.71,-80.88 508.31,-80.58"/>
<polygon fill="black" stroke="black" points="508.09,-82.34 513.04,-80.45 507.99,-78.84 508.09,-82.34"/>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 10 KiB

View File

@@ -1,26 +0,0 @@
* Building the plugin
#+BEGIN_SRC sh
# Clone hohn's fork of Nick's fork
cd /tmp
git clone git@github.com:hohn/vscode-codeql.git
cd /tmp/vscode-codeql
git checkout mrva-standalone
# Install nvm
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
# Install correct node version
cd /tmp/vscode-codeql/extensions/ql-vscode
nvm install
# Build the extension
cd /tmp/vscode-codeql/extensions/ql-vscode
npm install
npm run build
# Install extension
cd /tmp/vscode-codeql/dist
code --force --install-extension vscode-codeql-*.vsix
#+END_SRC

View File

@@ -7,20 +7,15 @@ import (
"mrvacommander/pkg/artifactstore"
"mrvacommander/pkg/codeql"
"mrvacommander/pkg/common"
"mrvacommander/pkg/qldbstore"
"mrvacommander/pkg/queue"
"mrvacommander/utils"
"os"
"path/filepath"
"runtime"
"sync"
"time"
"github.com/elastic/go-sysinfo"
"github.com/google/uuid"
)
/*
type RunnerSingle struct {
queue queue.Queue
}
@@ -35,122 +30,42 @@ func NewAgentSingle(numWorkers int, v *Visibles) *RunnerSingle {
}
func (r *RunnerSingle) worker(wid int) {
var job common.AnalyzeJob
// TODO: reimplement this later
/*
var job common.AnalyzeJob
for {
job = <-r.queue.Jobs()
result, err := RunAnalysisJob(job)
if err != nil {
slog.Error("Failed to run analysis job", slog.Any("error", err))
continue
}
r.queue.Results() <- result
}
}
*/
for {
job = <-r.queue.Jobs()
const (
workerMemoryMB = 2048 // 2 GB
monitorIntervalSec = 10 // Monitor every 10 seconds
)
slog.Debug("Picking up job", "job", job, "worker", wid)
func calculateWorkers() int {
host, err := sysinfo.Host()
if err != nil {
slog.Error("failed to get host info", "error", err)
os.Exit(1)
}
slog.Debug("Analysis: running", "job", job)
storage.SetStatus(job.QueryPackId, job.NWO, common.StatusQueued)
memInfo, err := host.Memory()
if err != nil {
slog.Error("failed to get memory info", "error", err)
os.Exit(1)
}
// Get available memory in MB
totalMemoryMB := memInfo.Available / (1024 * 1024)
// Ensure we have at least one worker
workers := int(totalMemoryMB / workerMemoryMB)
if workers < 1 {
workers = 1
}
// Limit the number of workers to the number of CPUs
cpuCount := runtime.NumCPU()
if workers > cpuCount {
workers = max(cpuCount, 1)
}
return workers
}
func StartAndMonitorWorkers(ctx context.Context,
artifacts artifactstore.Store,
databases qldbstore.Store,
queue queue.Queue,
desiredWorkerCount int,
wg *sync.WaitGroup) {
currentWorkerCount := 0
stopChans := make([]chan struct{}, 0)
if desiredWorkerCount != 0 {
slog.Info("Starting workers", slog.Int("count", desiredWorkerCount))
for i := 0; i < desiredWorkerCount; i++ {
stopChan := make(chan struct{})
stopChans = append(stopChans, stopChan)
wg.Add(1)
go RunWorker(ctx, artifacts, databases, queue, stopChan, wg)
}
return
}
slog.Info("Worker count not specified, managing based on available memory and CPU")
for {
select {
case <-ctx.Done():
// signal all workers to stop
for _, stopChan := range stopChans {
close(stopChan)
}
return
default:
newWorkerCount := calculateWorkers()
if newWorkerCount != currentWorkerCount {
slog.Info(
"Modifying worker count",
slog.Int("current", currentWorkerCount),
slog.Int("new", newWorkerCount))
resultFile, err := RunAnalysis(job)
if err != nil {
continue
}
if newWorkerCount > currentWorkerCount {
for i := currentWorkerCount; i < newWorkerCount; i++ {
stopChan := make(chan struct{})
stopChans = append(stopChans, stopChan)
wg.Add(1)
go RunWorker(ctx, artifacts, databases, queue, stopChan, wg)
}
} else if newWorkerCount < currentWorkerCount {
for i := newWorkerCount; i < currentWorkerCount; i++ {
close(stopChans[i])
}
stopChans = stopChans[:newWorkerCount]
}
currentWorkerCount = newWorkerCount
slog.Debug("Analysis run finished", "job", job)
// TODO: FIX THIS
res := common.AnalyzeResult{
RunAnalysisSARIF: resultFile,
RunAnalysisBQRS: "", // FIXME ?
}
r.queue.Results() <- res
storage.SetStatus(job.QueryPackId, job.NWO, common.StatusSuccess)
storage.SetResult(job.QueryPackId, job.NWO, res)
time.Sleep(monitorIntervalSec * time.Second)
}
}
*/
}
// RunAnalysisJob runs a CodeQL analysis job (AnalyzeJob) returning an AnalyzeResult
func RunAnalysisJob(
job queue.AnalyzeJob, artifacts artifactstore.Store, dbs qldbstore.Store) (queue.AnalyzeResult, error) {
var result = queue.AnalyzeResult{
Spec: job.Spec,
func RunAnalysisJob(job common.AnalyzeJob) (common.AnalyzeResult, error) {
var result = common.AnalyzeResult{
RequestId: job.RequestId,
ResultCount: 0,
ResultLocation: artifactstore.ArtifactLocation{},
Status: common.StatusError,
@@ -158,45 +73,19 @@ func RunAnalysisJob(
// Create a temporary directory
tempDir := filepath.Join(os.TempDir(), uuid.New().String())
if err := os.MkdirAll(tempDir, 0600); err != nil {
if err := os.MkdirAll(tempDir, 0755); err != nil {
return result, fmt.Errorf("failed to create temporary directory: %v", err)
}
defer os.RemoveAll(tempDir)
// Download the query pack as a byte slice
queryPackData, err := artifacts.GetQueryPack(job.QueryPackLocation)
if err != nil {
return result, fmt.Errorf("failed to download query pack: %w", err)
}
// Write the query pack data to the filesystem
queryPackArchivePath := filepath.Join(tempDir, "query-pack.tar.gz")
if err := os.WriteFile(queryPackArchivePath, queryPackData, 0600); err != nil {
return result, fmt.Errorf("failed to write query pack archive to disk: %w", err)
}
// Make a directory and extract the query pack
queryPackPath := filepath.Join(tempDir, "pack")
if err := os.Mkdir(queryPackPath, 0600); err != nil {
return result, fmt.Errorf("failed to create query pack directory: %w", err)
}
if err := utils.UntarGz(queryPackArchivePath, queryPackPath); err != nil {
return result, fmt.Errorf("failed to extract query pack: %w", err)
}
databaseData, err := dbs.GetDatabase(job.Spec.NameWithOwner)
if err != nil {
return result, fmt.Errorf("failed to get database: %w", err)
}
// Write the CodeQL database data to the filesystem
databasePath := filepath.Join(tempDir, "database.zip")
if err := os.WriteFile(databasePath, databaseData, 0600); err != nil {
return result, fmt.Errorf("failed to write CodeQL database to disk: %w", err)
}
// Extract the query pack
// TODO: download from the 'job' query pack URL
// utils.downloadFile
queryPackPath := filepath.Join(tempDir, "qp-54674")
utils.UntarGz("qp-54674.tgz", queryPackPath)
// Perform the CodeQL analysis
runResult, err := codeql.RunQuery(databasePath, job.QueryLanguage, queryPackPath, tempDir)
runResult, err := codeql.RunQuery("google_flatbuffers_db.zip", "cpp", queryPackPath, tempDir)
if err != nil {
return result, fmt.Errorf("failed to run analysis: %w", err)
}
@@ -207,32 +96,21 @@ func RunAnalysisJob(
return result, fmt.Errorf("failed to generate results archive: %w", err)
}
// Upload the archive to storage
// TODO: Upload the archive to storage
slog.Debug("Results archive size", slog.Int("size", len(resultsArchive)))
resultsLocation, err := artifacts.SaveResult(job.Spec, resultsArchive)
if err != nil {
return result, fmt.Errorf("failed to save results archive: %w", err)
}
result = queue.AnalyzeResult{
Spec: job.Spec,
ResultCount: runResult.ResultCount,
ResultLocation: resultsLocation,
Status: common.StatusSuccess,
SourceLocationPrefix: runResult.SourceLocationPrefix,
DatabaseSHA: runResult.DatabaseSHA,
result = common.AnalyzeResult{
RequestId: job.RequestId,
ResultCount: runResult.ResultCount,
ResultLocation: artifactstore.ArtifactLocation{}, // TODO "REPLACE_THIS_WITH_STORED_RESULTS_ARCHIVE"
Status: common.StatusSuccess,
}
return result, nil
}
// RunWorker runs a worker that processes jobs from queue
func RunWorker(ctx context.Context,
artifacts artifactstore.Store,
databases qldbstore.Store,
queue queue.Queue,
stopChan chan struct{},
wg *sync.WaitGroup) {
func RunWorker(ctx context.Context, stopChan chan struct{}, queue queue.Queue, wg *sync.WaitGroup) {
const (
WORKER_COUNT_STOP_MESSAGE = "Worker stopping due to reduction in worker count"
WORKER_CONTEXT_STOP_MESSAGE = "Worker stopping due to context cancellation"
@@ -255,7 +133,7 @@ func RunWorker(ctx context.Context,
return
}
slog.Info("Running analysis job", slog.Any("job", job))
result, err := RunAnalysisJob(job, artifacts, databases)
result, err := RunAnalysisJob(job)
if err != nil {
slog.Error("Failed to run analysis job", slog.Any("error", err))
continue

View File

@@ -8,6 +8,6 @@ import (
type Visibles struct {
Queue queue.Queue
Artifacts artifactstore.Store
CodeQLDBStore qldbstore.Store
Artifacts artifactstore.ArtifactStore
CodeQLDBStore qldbstore.CodeQLDatabaseStore
}

View File

@@ -1,28 +0,0 @@
package artifactstore
import (
"fmt"
"mrvacommander/pkg/common"
)
// Restrict the keys / values for ArtifactLocation and centralize the common ones
// here
const (
AF_BUCKETNAME_RESULTS = "results"
AF_BUCKETNAME_PACKS = "packs"
)
type ArtifactLocation struct {
Key string // location in bucket OR full location for file paths
Bucket string // which bucket: packs or results
}
// deriveKeyFromSessionId generates a key for a query pack based on the job ID
func deriveKeyFromSessionId(sessionId int) string {
return fmt.Sprintf("%d", sessionId)
}
// deriveKeyFromJobSpec generates a key for a result based on the JobSpec
func deriveKeyFromJobSpec(jobSpec common.JobSpec) string {
return fmt.Sprintf("%d-%s", jobSpec.SessionID, jobSpec.NameWithOwner)
}

Some files were not shown because too many files have changed in this diff Show More