Compare commits
171 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 47de30a56e | |||
|
|
d2c7b98d1c | ||
| fb5adf1b5f | |||
|
|
750187fb12 | ||
|
|
807d5f3d45 | ||
|
|
1377d4cec9 | ||
| ec8bb0cc63 | |||
| 8d7aa780ed | |||
|
|
43a7143e27 | ||
| 0d6e31713f | |||
| a2cfe0676c | |||
| f920a799d3 | |||
| 41146f5aaf | |||
| 173a61e3fa | |||
| e294fcdf4f | |||
|
|
9fe6aed357 | ||
|
|
3762654ef2 | ||
| d94f69be09 | |||
| 1fd220416c | |||
| df97e6ef10 | |||
| 2e99bdfedf | |||
| a507797eff | |||
|
|
0115e74d07 | ||
|
|
8577e1775a | ||
|
|
8590bd6de7 | ||
|
|
cf37b474e4 | ||
|
|
5bdbd60cc5 | ||
|
|
bde8ac2db7 | ||
|
|
75e57dc0a8 | ||
|
|
c32ff755ef | ||
|
|
19a936087f | ||
|
|
bb6189322a | ||
|
|
f7dc5318e4 | ||
|
|
70c06e4fae | ||
|
|
a2be014b2f | ||
|
|
58f4fe1ca7 | ||
|
|
14d6057248 | ||
|
|
01ddf38069 | ||
|
|
47a021d84a | ||
|
|
8d4c766e8c | ||
|
|
2409728960 | ||
|
|
f066c767e2 | ||
|
|
397b86c735 | ||
|
|
511c544f6e | ||
|
|
bd74ed646f | ||
|
|
45e40abf5d | ||
|
|
a3593cbba2 | ||
|
|
a0185df9d5 | ||
|
|
23e3ea9367 | ||
|
|
4140eaafc4 | ||
|
|
3e47bd4adb | ||
|
|
f92dfc89a2 | ||
|
|
a5bb232af2 | ||
|
|
008708469c | ||
|
|
37d5b1c6c1 | ||
|
|
1302db0b4e | ||
|
|
c624925aba | ||
|
|
e3e91534a0 | ||
|
|
af043f3f59 | ||
|
|
8ea453f8b0 | ||
|
|
3f24fbb07d | ||
|
|
de0d1b7434 | ||
|
|
be7cc3b0cf | ||
|
|
ba66cb9258 | ||
|
|
baf20fa7af | ||
|
|
6bfcbb33ea | ||
|
|
9d6587872c | ||
|
|
f809917c2e | ||
|
|
a22d8d77f2 | ||
|
|
92a22f55d1 | ||
|
|
3db629e2ca | ||
|
|
95d2638546 | ||
|
|
ff96b34f5e | ||
|
|
537ebdf19d | ||
|
|
d486b6b4db | ||
|
|
b61fbf8896 | ||
|
|
dd776e312a | ||
|
|
18333bfdb1 | ||
|
|
e335b6c843 | ||
|
|
4d52176c5a | ||
|
|
dd58a64ef7 | ||
|
|
4e93929943 | ||
|
|
e7d32861e5 | ||
|
|
52aafd6fc9 | ||
|
|
77ce997fbb | ||
|
|
187c49688e | ||
|
|
d5bcb8b981 | ||
|
|
ec0799696e | ||
|
|
9ccea8ac80 | ||
|
|
080c311516 | ||
|
|
faeb13efb1 | ||
|
|
0378c4cb7f | ||
|
|
7de3ee59ce | ||
|
|
7ae6e9a1cb | ||
|
|
2d92ad51c3 | ||
|
|
bef8a6dc97 | ||
|
|
d08e32dc42 | ||
|
|
64b77c5d70 | ||
|
|
71ce8c0823 | ||
|
|
067e477f61 | ||
|
|
8f807e0e42 | ||
|
|
195dda9fd7 | ||
|
|
f60b55f181 | ||
|
|
727381dc5a | ||
|
|
a35fc619e6 | ||
|
|
8dd6c94918 | ||
|
|
34958e4cf4 | ||
|
|
259bac55fb | ||
|
|
41f6db5de0 | ||
|
|
19330c3a0f | ||
|
|
1e2df515e3 | ||
|
|
681fcdab8c | ||
|
|
5021fc824b | ||
|
|
7d27b910cd | ||
|
|
0d3f4c5e40 | ||
|
|
a86f955aab | ||
|
|
c556605e44 | ||
|
|
7b06484b29 | ||
|
|
fc751ae08f | ||
|
|
d956f47db3 | ||
|
|
0a52b729cd | ||
|
|
6bebf4abfc | ||
|
|
9d60489908 | ||
|
|
35100f89a7 | ||
|
|
742b059a49 | ||
|
|
d1f56ae196 | ||
|
|
6262197c8d | ||
|
|
781571044d | ||
|
|
b183cee78d | ||
|
|
5a95f0ea08 | ||
|
|
349d758c14 | ||
|
|
582d933130 | ||
|
|
b7b4839fe0 | ||
|
|
06dcf50728 | ||
|
|
8f151ab002 | ||
|
|
65cdf9a883 | ||
|
|
1e1daf9330 | ||
|
|
b4f1a2b8a6 | ||
|
|
f652a6719c | ||
|
|
81c44ab14a | ||
|
|
92ca709458 | ||
|
|
242ba3fc1e | ||
|
|
26dd69c976 | ||
|
|
731b44b187 | ||
|
|
aaeafa9e88 | ||
|
|
129b8cc302 | ||
|
|
d64522d168 | ||
|
|
6b4e753e69 | ||
|
|
3df1cac5ae | ||
|
|
dcc32ea8ab | ||
|
|
3c8db9cbe4 | ||
|
|
be1304bdd9 | ||
|
|
8965725e42 | ||
|
|
2df48b9f98 | ||
|
|
8d80272922 | ||
|
|
e3f4d9f012 | ||
|
|
3566f5169e | ||
|
|
b3cf7a4f65 | ||
|
|
07f93f3d27 | ||
|
|
7413e23bab | ||
|
|
380e90135a | ||
|
|
1642894ccf | ||
|
|
c54bda8432 | ||
|
|
17bf9049e4 | ||
|
|
62a7b227f0 | ||
|
|
b543cebfac | ||
|
|
d145731c4b | ||
|
|
0cffb3c849 | ||
|
|
9d1a891c72 | ||
|
|
b4d9833da3 | ||
|
|
e0cbc01d21 |
9
.dockerignore
Normal file
9
.dockerignore
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Excludes
|
||||||
|
|
||||||
|
/dbstore-data
|
||||||
|
/qpstore-data
|
||||||
|
/test-data
|
||||||
|
/venv
|
||||||
|
/client
|
||||||
|
/cmd/server/var
|
||||||
|
/.git
|
||||||
12
.env.container
Normal file
12
.env.container
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
MRVA_RABBITMQ_HOST=rabbitmq
|
||||||
|
MRVA_RABBITMQ_PORT=5672
|
||||||
|
MRVA_RABBITMQ_USER=user
|
||||||
|
MRVA_RABBITMQ_PASSWORD=password
|
||||||
|
MINIO_ROOT_USER=user
|
||||||
|
MINIO_ROOT_PASSWORD=mmusty8432
|
||||||
|
ARTIFACT_MINIO_ENDPOINT=artifactstore:9000
|
||||||
|
ARTIFACT_MINIO_ID=${MINIO_ROOT_USER}
|
||||||
|
ARTIFACT_MINIO_SECRET=${MINIO_ROOT_PASSWORD}
|
||||||
|
QLDB_MINIO_ENDPOINT=dbstore:9000
|
||||||
|
QLDB_MINIO_ID=${MINIO_ROOT_USER}
|
||||||
|
QLDB_MINIO_SECRET=${MINIO_ROOT_PASSWORD}
|
||||||
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1,2 +1,3 @@
|
|||||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
*.blob filter=lfs diff=lfs merge=lfs -text
|
*.blob filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
|||||||
26
.gitignore
vendored
26
.gitignore
vendored
@@ -4,6 +4,9 @@ cmd/server/var/
|
|||||||
# vscode project dir
|
# vscode project dir
|
||||||
.vscode/
|
.vscode/
|
||||||
|
|
||||||
|
# idea project dir
|
||||||
|
.idea/
|
||||||
|
|
||||||
# Compiled binary
|
# Compiled binary
|
||||||
cmd/server/server
|
cmd/server/server
|
||||||
cmd/agent/agent
|
cmd/agent/agent
|
||||||
@@ -41,3 +44,26 @@ go.work.sum
|
|||||||
|
|
||||||
# env file
|
# env file
|
||||||
.env
|
.env
|
||||||
|
/artifactstore-data/.minio.sys
|
||||||
|
/qldbminio/qldb
|
||||||
|
.ipynb_checkpoints/
|
||||||
|
venv/
|
||||||
|
venv-*/
|
||||||
|
*.egg-info
|
||||||
|
__pycache__
|
||||||
|
README.html
|
||||||
|
ChangeLog
|
||||||
|
notes/*.html
|
||||||
|
|
||||||
|
# Make timestamp files
|
||||||
|
mk.*
|
||||||
|
demo/containers/dbsdata/data/
|
||||||
|
demo/containers/dbsdata/tmp.dbsdata_backup.tar
|
||||||
|
client/qldbtools/db-collection-py-1/
|
||||||
|
|
||||||
|
mrva-overview.aux
|
||||||
|
mrva-overview.log
|
||||||
|
mrva-overview.synctex.gz
|
||||||
|
mrva-overview.toc
|
||||||
|
|
||||||
|
auto/
|
||||||
|
|||||||
29
.golangci.yml
Normal file
29
.golangci.yml
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
linters:
|
||||||
|
enable:
|
||||||
|
- staticcheck
|
||||||
|
- unused
|
||||||
|
- decorder
|
||||||
|
- errchkjson
|
||||||
|
- exhaustruct
|
||||||
|
- gochecknoinits
|
||||||
|
- gochecksumtype
|
||||||
|
- goconst
|
||||||
|
- gocritic
|
||||||
|
- godox
|
||||||
|
- lll
|
||||||
|
- loggercheck
|
||||||
|
- revive
|
||||||
|
- sloglint
|
||||||
|
- tagalign
|
||||||
|
- unparam
|
||||||
|
|
||||||
|
linters-settings:
|
||||||
|
revive:
|
||||||
|
config: .revive.toml
|
||||||
|
staticcheck:
|
||||||
|
checks:
|
||||||
|
- "SA"
|
||||||
|
|
||||||
|
issues:
|
||||||
|
format: "format: {{.FromLinter}}: {{.Text}}"
|
||||||
|
|
||||||
13
.revive.toml
Normal file
13
.revive.toml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
ignoreGeneratedHeader = true
|
||||||
|
|
||||||
|
[rule.blank-imports]
|
||||||
|
Arguments = [true]
|
||||||
|
|
||||||
|
[[rule]]
|
||||||
|
name = "max-public-identifier-length"
|
||||||
|
arguments = [15] # Maximum length for public identifiers
|
||||||
|
|
||||||
|
[[rule]]
|
||||||
|
name = "max-private-identifier-length"
|
||||||
|
arguments = [15] # Maximum length for private identifiers
|
||||||
|
|
||||||
55
Makefile
Normal file
55
Makefile
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
all: server agent
|
||||||
|
|
||||||
|
.phony: view
|
||||||
|
|
||||||
|
view: README.html
|
||||||
|
open $<
|
||||||
|
|
||||||
|
html: README.html
|
||||||
|
|
||||||
|
%.html: %.md
|
||||||
|
pandoc --toc=true --standalone $< --out $@
|
||||||
|
|
||||||
|
# Build the qldbtools container image
|
||||||
|
dbt: mk.client-qldbtools-container
|
||||||
|
mk.client-qldbtools-container:
|
||||||
|
cd client/containers/qldbtools && \
|
||||||
|
docker build -t client-qldbtools-container:0.1.24 .
|
||||||
|
touch $@
|
||||||
|
|
||||||
|
# Run a shell in the container with the qldbtools
|
||||||
|
dbt-run: mk.client-qldbtools-container
|
||||||
|
docker run --rm -it client-qldbtools-container:0.1.24 /bin/bash
|
||||||
|
|
||||||
|
# Run one of the scripts in the container as check
|
||||||
|
dbt-check: mk.client-qldbtools-container
|
||||||
|
docker run --rm -it client-qldbtools-container:0.1.24 mc-db-initial-info
|
||||||
|
|
||||||
|
dbt-push: mk.dbt-push
|
||||||
|
mk.dbt-push: mk.client-qldbtools-container
|
||||||
|
docker tag client-qldbtools-container:0.1.24 ghcr.io/hohn/client-qldbtools-container:0.1.24
|
||||||
|
docker push ghcr.io/hohn/client-qldbtools-container:0.1.24
|
||||||
|
touch $@
|
||||||
|
|
||||||
|
server:
|
||||||
|
cd cmd/server && GOOS=linux GOARCH=arm64 go build
|
||||||
|
|
||||||
|
agent:
|
||||||
|
cd cmd/agent && GOOS=linux GOARCH=arm64 go build
|
||||||
|
|
||||||
|
fullbuild:
|
||||||
|
cd cmd/server && GOOS=linux GOARCH=arm64 go build -a
|
||||||
|
|
||||||
|
sendsubmit:
|
||||||
|
cd tools && sh ./submit-request.curl
|
||||||
|
|
||||||
|
# Requires
|
||||||
|
# go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
|
||||||
|
lint:
|
||||||
|
golangci-lint run cmd/... pkg/...
|
||||||
|
|
||||||
|
deps:
|
||||||
|
godepgraph -maxlevel 4 -nostdlib -i github.com/minio/minio-go ./cmd/server | dot -Tpdf > deps-server.pdf && open deps-server.pdf
|
||||||
|
|
||||||
|
depa:
|
||||||
|
godepgraph -maxlevel 4 -nostdlib -i github.com/minio/minio-go ./cmd/agent | dot -Tpdf > deps-agent.pdf && open deps-agent.pdf
|
||||||
68
README.md
68
README.md
@@ -6,6 +6,52 @@ TODO Style notes
|
|||||||
- NO package init() functions
|
- NO package init() functions
|
||||||
- Dynamic behaviour must be explicit
|
- Dynamic behaviour must be explicit
|
||||||
|
|
||||||
|
|
||||||
|
## Client CodeQL Database Selector
|
||||||
|
Separate from the server's downloading of databases, a client-side interface is needed to generate the `databases.json` file. This
|
||||||
|
|
||||||
|
1. must be usable from the shell
|
||||||
|
2. must be interactive (Python, Jupyter)
|
||||||
|
3. is session based to allow iterations on selection / narrowing
|
||||||
|
4. must be queryable. There is no need to reinvent sql / dataframes
|
||||||
|
|
||||||
|
Python with dataframes is ideal for this; the project is in `client/`.
|
||||||
|
|
||||||
|
## Reverse proxy
|
||||||
|
For testing, replay flows using mitmweb. This is faster and simpler than using
|
||||||
|
gh-mrva or the VS Code plugin.
|
||||||
|
|
||||||
|
- Set up the virtual environment and install tools
|
||||||
|
|
||||||
|
python3.11 -m venv venv
|
||||||
|
source venv/bin/activate
|
||||||
|
pip install mitmproxy
|
||||||
|
|
||||||
|
For intercepting requests:
|
||||||
|
|
||||||
|
1. Start mitmproxy to listen on port 8080 and forward requests to port 8081, with
|
||||||
|
web interface
|
||||||
|
|
||||||
|
mitmweb --mode reverse:http://localhost:8081 -p 8080
|
||||||
|
|
||||||
|
1. Change `server` ports in `docker-compose.yml` to
|
||||||
|
|
||||||
|
ports:
|
||||||
|
- "8081:8080" # host:container
|
||||||
|
|
||||||
|
1. Start the containers.
|
||||||
|
|
||||||
|
1. Submit requests.
|
||||||
|
|
||||||
|
3. Save the flows for later replay.
|
||||||
|
|
||||||
|
One such session is in `tools/mitmweb-flows`; it can be loaded to replay the
|
||||||
|
requests:
|
||||||
|
|
||||||
|
1. start `mitmweb --mode reverse:http://localhost:8081 -p 8080`
|
||||||
|
2. `file` > `open` > `tools/mitmweb-flows`
|
||||||
|
3. replay at least the submit, status, and download requests
|
||||||
|
|
||||||
## Cross-compile server on host, run it in container
|
## Cross-compile server on host, run it in container
|
||||||
These are simple steps using a single container.
|
These are simple steps using a single container.
|
||||||
|
|
||||||
@@ -31,7 +77,10 @@ These are simple steps using a single container.
|
|||||||
cd /mrva/mrvacommander/cmd/server/ && ./server
|
cd /mrva/mrvacommander/cmd/server/ && ./server
|
||||||
|
|
||||||
## Using docker-compose
|
## Using docker-compose
|
||||||
### Steps to build and run the server in a multi-container environment set up by docker-compose.
|
### Steps to build and run the server
|
||||||
|
|
||||||
|
Steps to build and run the server in a multi-container environment set up by
|
||||||
|
docker-compose.
|
||||||
|
|
||||||
1. Built the server-image, above
|
1. Built the server-image, above
|
||||||
|
|
||||||
@@ -58,6 +107,23 @@ These are simple steps using a single container.
|
|||||||
cd ~/work-gh/mrva/mrvacommander/tools
|
cd ~/work-gh/mrva/mrvacommander/tools
|
||||||
sh ./request_16-Jun-2024_11-33-16.curl
|
sh ./request_16-Jun-2024_11-33-16.curl
|
||||||
|
|
||||||
|
1. Follow server logging via
|
||||||
|
|
||||||
|
cd ~/work-gh/mrva/mrvacommander
|
||||||
|
docker-compose up -d
|
||||||
|
docker-compose logs -f server
|
||||||
|
|
||||||
|
1. Completely rebuild all containers. Useful when running into docker errors
|
||||||
|
|
||||||
|
cd ~/work-gh/mrva/mrvacommander
|
||||||
|
docker-compose up --build
|
||||||
|
|
||||||
|
1. Start the server containers and the desktop/demo containers
|
||||||
|
|
||||||
|
cd ~/work-gh/mrva/mrvacommander/
|
||||||
|
docker-compose down --remove-orphans
|
||||||
|
docker-compose -f docker-compose-demo.yml up -d
|
||||||
|
|
||||||
1. Test server via remote client by following the steps in [gh-mrva](https://github.com/hohn/gh-mrva/blob/connection-redirect/README.org#compacted-edit-run-debug-cycle)
|
1. Test server via remote client by following the steps in [gh-mrva](https://github.com/hohn/gh-mrva/blob/connection-redirect/README.org#compacted-edit-run-debug-cycle)
|
||||||
|
|
||||||
### Some general docker-compose commands
|
### Some general docker-compose commands
|
||||||
|
|||||||
213
client/Plan.ipynb
Normal file
213
client/Plan.ipynb
Normal file
File diff suppressed because one or more lines are too long
64
client/containers/ghmrva/Dockerfile
Normal file
64
client/containers/ghmrva/Dockerfile
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
# ######################
|
||||||
|
# Use an official Golang image as the base image
|
||||||
|
FROM golang:1.22 AS builder
|
||||||
|
|
||||||
|
# Set the working directory inside the container
|
||||||
|
WORKDIR /work-gh/mrva/gh-mrva
|
||||||
|
|
||||||
|
# Clone the repository
|
||||||
|
RUN git clone https://github.com/hohn/gh-mrva.git . &&\
|
||||||
|
git checkout hohn-0.1.24-demo
|
||||||
|
|
||||||
|
# Download dependencies
|
||||||
|
RUN go mod download
|
||||||
|
|
||||||
|
# Build the Go binary
|
||||||
|
RUN go build .
|
||||||
|
|
||||||
|
# ######################
|
||||||
|
# Provide codeql and java
|
||||||
|
#
|
||||||
|
FROM ubuntu:24.10 as runner
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
# Build argument for CodeQL version, defaulting to the latest release
|
||||||
|
ARG CODEQL_VERSION=latest
|
||||||
|
|
||||||
|
# Install packages
|
||||||
|
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
|
||||||
|
unzip \
|
||||||
|
curl \
|
||||||
|
ca-certificates \
|
||||||
|
default-jdk
|
||||||
|
|
||||||
|
# If the version is 'latest', get the latest release version from GitHub, unzip
|
||||||
|
# the bundle into /opt, and delete the archive
|
||||||
|
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
||||||
|
CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
|
||||||
|
fi && \
|
||||||
|
echo "Using CodeQL version $CODEQL_VERSION" && \
|
||||||
|
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
|
||||||
|
unzip /tmp/codeql.zip -d /opt && \
|
||||||
|
rm /tmp/codeql.zip && \
|
||||||
|
chmod -R +x /opt/codeql
|
||||||
|
|
||||||
|
# Set environment variables for CodeQL
|
||||||
|
ENV CODEQL_CLI_PATH=/opt/codeql/codeql
|
||||||
|
|
||||||
|
# Set environment variable for CodeQL for `codeql database analyze` support on ARM
|
||||||
|
# This env var has no functional effect on CodeQL when running on x86_64 linux
|
||||||
|
ENV CODEQL_JAVA_HOME=/usr
|
||||||
|
|
||||||
|
# ######################
|
||||||
|
|
||||||
|
# Set the working directory inside the final image
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy the binary from the builder stage
|
||||||
|
COPY --from=builder /work-gh/mrva/gh-mrva/gh-mrva /usr/local/bin/gh-mrva
|
||||||
|
|
||||||
|
# Put CodeQL on the PATH
|
||||||
|
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/codeql
|
||||||
|
|
||||||
|
# Run forever
|
||||||
|
CMD ["tail", "-f", "/dev/null"]
|
||||||
13
client/containers/ghmrva/Makefile
Normal file
13
client/containers/ghmrva/Makefile
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
ghm: mk.client-ghmrva-container
|
||||||
|
mk.client-ghmrva-container:
|
||||||
|
docker build -t client-ghmrva-container:0.1.24 .
|
||||||
|
touch $@
|
||||||
|
|
||||||
|
ghm-push: mk.ghm-push
|
||||||
|
mk.ghm-push: mk.client-ghmrva-container
|
||||||
|
docker tag client-ghmrva-container:0.1.24 ghcr.io/hohn/client-ghmrva-container:0.1.24
|
||||||
|
docker push ghcr.io/hohn/client-ghmrva-container:0.1.24
|
||||||
|
touch $@
|
||||||
|
|
||||||
|
ghm-run:
|
||||||
|
docker run --rm -it ghcr.io/hohn/client-ghmrva-container:0.1.24 /bin/bash
|
||||||
16
client/containers/ghmrva/README.org
Normal file
16
client/containers/ghmrva/README.org
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
* MRVA cli tools container
|
||||||
|
Set up / run:
|
||||||
|
#+BEGIN_SRC sh
|
||||||
|
# Build
|
||||||
|
cd ~/work-gh/mrva/mrvacommander/client/containers/ghmrva/
|
||||||
|
make ghm
|
||||||
|
# Run
|
||||||
|
docker run -ti client-ghmrva-container:0.1.24 /bin/bash
|
||||||
|
|
||||||
|
# In the container
|
||||||
|
gh-mrva -h
|
||||||
|
codeql -h
|
||||||
|
|
||||||
|
# Push
|
||||||
|
make ghm-push
|
||||||
|
#+END_SRC
|
||||||
30
client/containers/hepc/Dockerfile
Normal file
30
client/containers/hepc/Dockerfile
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Use a Python 3.11 image as the base
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Install git
|
||||||
|
RUN apt-get update && apt-get install -y git
|
||||||
|
|
||||||
|
# Create the required directory structure
|
||||||
|
RUN mkdir -p /work-gh/mrva/
|
||||||
|
|
||||||
|
# Change to the directory and clone the repository
|
||||||
|
WORKDIR /work-gh/mrva/
|
||||||
|
RUN git clone https://github.com/hohn/mrvacommander.git && \
|
||||||
|
cd mrvacommander && \
|
||||||
|
git checkout hohn-0.1.24-demo
|
||||||
|
|
||||||
|
# Change to the client directory
|
||||||
|
WORKDIR /work-gh/mrva/mrvacommander/client/qldbtools/
|
||||||
|
|
||||||
|
# We're in a container, so use pip globally -- no virtual env
|
||||||
|
RUN pip install --upgrade pip
|
||||||
|
|
||||||
|
# Install the required Python packages from requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Install qldbtools
|
||||||
|
RUN pip install .
|
||||||
|
|
||||||
|
# Run forever
|
||||||
|
CMD ["tail", "-f", "/dev/null"]
|
||||||
|
|
||||||
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/BentoML-BentoML-ctsj-d6963d.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/BentoML-BentoML-ctsj-d6963d.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/Serial-Studio-Serial-Studio-ctsj-2b2721.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/Serial-Studio-Serial-Studio-ctsj-2b2721.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/UEFITool-UEFITool-ctsj-ee2d3c.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/UEFITool-UEFITool-ctsj-ee2d3c.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/apprise-apprise-ctsj-3f4a4e.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/apprise-apprise-ctsj-3f4a4e.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/attrs-attrs-ctsj-e2c939.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/attrs-attrs-ctsj-e2c939.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sam-cli-aws-sam-cli-ctsj-b7f561.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sam-cli-aws-sam-cli-ctsj-b7f561.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sdk-pandas-aws-sdk-pandas-ctsj-2b7750.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sdk-pandas-aws-sdk-pandas-ctsj-2b7750.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/behave-behave-ctsj-b297b5.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/behave-behave-ctsj-b297b5.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-01864e.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-01864e.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0189aa.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0189aa.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-035849.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-035849.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-051a5c.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-051a5c.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-099796.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-099796.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a35a1.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a35a1.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a6352.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a6352.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0c6575.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0c6575.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0cdf2f.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0cdf2f.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d667f.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d667f.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d6cf6.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d6cf6.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d7b69.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d7b69.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-103a8a.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-103a8a.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
@@ -0,0 +1,23 @@
|
|||||||
|
{"git_branch": "HEAD", "git_commit_id": "2b41915dac8966e95f9e63638d30769b0d69ad68", "git_repo": "aircrack-ng", "ingestion_datetime_utc": "2024-06-07 16:57:47.683012+00:00", "result_url": "http://hepc/db-collection-py/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.4", "projname": "aircrack-ng/aircrack-ng"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "8b399e9f51701b34f2f3c9375e637e6fffc642b7", "git_repo": "Serial-Studio", "ingestion_datetime_utc": "2023-10-01T15:18:43.503672671Z", "result_url": "http://hepc/db-collection-py/Serial-Studio-Serial-Studio-ctsj-2b2721.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.12.0", "projname": "Serial-Studio/Serial-Studio"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "9a9308fd5477d2a44f4e491d5a712546d4a2b3e4", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-22 13:30:21.681180+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0189aa.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "34412555665923bc07d43ce970e9d81be3795de7", "git_repo": "UEFITool", "ingestion_datetime_utc": "2024-07-04 19:00:38.543297+00:00", "result_url": "http://hepc/db-collection-py/UEFITool-UEFITool-ctsj-ee2d3c.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.6", "projname": "UEFITool/UEFITool"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "00aa56f5257060304d41f09651c6ab58ee6104d6", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-18 14:12:52.904410+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0c6575.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "e4bffa0a7450e1abd9f4df9565728ae18d86cfd2", "git_repo": "attrs", "ingestion_datetime_utc": "2024-07-18 22:34:57.795427+00:00", "result_url": "http://hepc/db-collection-py/attrs-attrs-ctsj-e2c939.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "attrs/attrs"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "9620901afce56f720e856aca600951c9b61a9460", "git_repo": "apprise", "ingestion_datetime_utc": "2024-07-22 22:26:48.720348+00:00", "result_url": "http://hepc/db-collection-py/apprise-apprise-ctsj-3f4a4e.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "apprise/apprise"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "c38e6c8cfba28980aea8f895c71b376e8a5155d5", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2022-04-16T12:45:56.739003883Z", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d6cf6.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.8.5", "projname": "bulk-builder/bulk-builder"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "18f6be580b12dc406ef356b2cd65f47c24fce63e", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-19 05:46:23.392157+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d667f.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "a587921bac074b1bd1b0a0a5536587660a9b954e", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-19 16:13:39.094478+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0a6352.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-java", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "9b361c7ff497d57651856650667aece8230fab6d", "git_repo": "BentoML", "ingestion_datetime_utc": "2024-07-24 02:17:07.095690+00:00", "result_url": "http://hepc/db-collection-py/BentoML-BentoML-ctsj-d6963d.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "BentoML/BentoML"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "8b399e9f51701b34f2f3c9375e637e6fffc642b7", "git_repo": "Serial-Studio", "ingestion_datetime_utc": "2023-10-01T15:18:43.503672671Z", "result_url": "http://hepc/db-collection-py/Serial-Studio-Serial-Studio-ctsj-2b2721.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.12.0", "projname": "Serial-Studio/Serial-Studio"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "53ad2da1a8e6e79e0986ddfa3a45e1db6fdd491c", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-14 02:24:19.208812+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-01864e.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "db8f1a7930c6b5826357646746337dafc983f953", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2023-11-22 01:18:25.079473+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-099796.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.15.2", "projname": "bulk-builder/bulk-builder"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "f8df9dd749a549dec20aa286a7639ba04190faab", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-12 16:39:28.854142+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d7b69.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "b5274976cb0a792d05d541a749c0adcd9d20062d", "git_repo": "behave", "ingestion_datetime_utc": "2024-05-11 19:20:51.916333+00:00", "result_url": "http://hepc/db-collection-py/behave-behave-ctsj-b297b5.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "behave/behave"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "4c825c198df470506b0f84da0b25b3b385150dcb", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-04-25 03:26:03.986270+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-035849.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "a8b8ff0acc6fcc629d08a3a9952f83be56a9a3c3", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-03 13:30:48.829134+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-051a5c.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-java", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "9ef05731e7c6cbad2e897faa7c526558eed3ceaa", "git_repo": "aws-sam-cli", "ingestion_datetime_utc": "2024-05-14 01:03:18.130142+00:00", "result_url": "http://hepc/db-collection-py/aws-sam-cli-aws-sam-cli-ctsj-b7f561.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "aws-sam-cli/aws-sam-cli"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "16865390a653ceaeabe354df1b37e4a775161a70", "git_repo": "aws-sdk-pandas", "ingestion_datetime_utc": "2024-05-13 15:13:31.853042+00:00", "result_url": "http://hepc/db-collection-py/aws-sdk-pandas-aws-sdk-pandas-ctsj-2b7750.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "aws-sdk-pandas/aws-sdk-pandas"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "093856995af0811d3ebbe8c179b8febf4ae706f0", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-03-20 14:18:02.500590+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-103a8a.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.16.4", "projname": "bulk-builder/bulk-builder"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "0573e6f96637f08fb4cb85e0552f0622d36827d4", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-01-24 09:21:05.977294+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0cdf2f.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.15.5", "projname": "bulk-builder/bulk-builder"}
|
||||||
|
{"git_branch": "HEAD", "git_commit_id": "93314995a5ee2217d58c3d9cbcbdef5df6c34566", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-09 05:29:25.243273+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0a35a1.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||||
30
client/containers/qldbtools/Dockerfile
Normal file
30
client/containers/qldbtools/Dockerfile
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Use a Python 3.11 image as the base
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Install git
|
||||||
|
RUN apt-get update && apt-get install -y git
|
||||||
|
|
||||||
|
# Create the required directory structure
|
||||||
|
RUN mkdir -p /work-gh/mrva/
|
||||||
|
|
||||||
|
# Change to the directory and clone the repository
|
||||||
|
WORKDIR /work-gh/mrva/
|
||||||
|
RUN git clone https://github.com/hohn/mrvacommander.git && \
|
||||||
|
cd mrvacommander && \
|
||||||
|
git checkout hohn-0.1.24-demo
|
||||||
|
|
||||||
|
# Change to the client directory
|
||||||
|
WORKDIR /work-gh/mrva/mrvacommander/client/qldbtools/
|
||||||
|
|
||||||
|
# We're in a container, so use pip globally -- no virtual env
|
||||||
|
RUN pip install --upgrade pip
|
||||||
|
|
||||||
|
# Install the required Python packages from requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Install qldbtools
|
||||||
|
RUN pip install .
|
||||||
|
|
||||||
|
# Run forever
|
||||||
|
CMD ["tail", "-f", "/dev/null"]
|
||||||
|
|
||||||
25
client/containers/qldbtools/Makefile
Normal file
25
client/containers/qldbtools/Makefile
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
DBT_TARGET := client-qldbtools-container:0.1.24
|
||||||
|
|
||||||
|
# Build the qldbtools container image
|
||||||
|
dbt: mk.client-qldbtools-container
|
||||||
|
mk.client-qldbtools-container:
|
||||||
|
docker build -t ${DBT_TARGET} .
|
||||||
|
touch $@
|
||||||
|
|
||||||
|
# Run a shell in the container with the qldbtools
|
||||||
|
dbt-run: dbt
|
||||||
|
docker run --rm -it ${DBT_TARGET} /bin/bash
|
||||||
|
|
||||||
|
# Run one of the scripts in the container as check. Should exit with error.
|
||||||
|
dbt-check: dbt
|
||||||
|
docker run --rm -it ${DBT_TARGET} mc-db-initial-info
|
||||||
|
|
||||||
|
dbt-push: mk.dbt-push
|
||||||
|
mk.dbt-push: dbt
|
||||||
|
docker tag ${DBT_TARGET} ghcr.io/hohn/${DBT_TARGET}
|
||||||
|
docker push ghcr.io/hohn/${DBT_TARGET}
|
||||||
|
touch $@
|
||||||
|
|
||||||
|
dbt-test:
|
||||||
|
docker pull ghcr.io/hohn/${DBT_TARGET}
|
||||||
|
docker run --rm -it --name test-dbt-server ghcr.io/hohn/${DBT_TARGET} sh
|
||||||
13
client/containers/qldbtools/README.org
Normal file
13
client/containers/qldbtools/README.org
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
* MRVA python tools container
|
||||||
|
Set up Docker image with python 3.11 and pip and the qldbtools. The targets are
|
||||||
|
in the =Makefile=; most important are
|
||||||
|
|
||||||
|
#+BEGIN_SRC sh
|
||||||
|
# Build
|
||||||
|
make dbt
|
||||||
|
|
||||||
|
# Check
|
||||||
|
make dbt-check
|
||||||
|
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
67
client/containers/vscode/Dockerfile
Normal file
67
client/containers/vscode/Dockerfile
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
FROM codercom/code-server:4.92.2-debian
|
||||||
|
|
||||||
|
# ======================
|
||||||
|
# Pre-install a custom JDK for this platform and redirect CodeQL to it
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
# Install packages
|
||||||
|
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
|
||||||
|
ca-certificates \
|
||||||
|
curl \
|
||||||
|
default-jdk \
|
||||||
|
git \
|
||||||
|
libcurl4-openssl-dev \
|
||||||
|
libssl-dev \
|
||||||
|
python3 \
|
||||||
|
python3-dev \
|
||||||
|
unzip
|
||||||
|
|
||||||
|
# Build argument for CodeQL version, defaulting to the latest release
|
||||||
|
ARG CODEQL_VERSION=latest
|
||||||
|
|
||||||
|
# If the version is 'latest', get the latest release version from GitHub, unzip
|
||||||
|
# the bundle into /opt, and delete the archive
|
||||||
|
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
||||||
|
CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
|
||||||
|
fi && \
|
||||||
|
echo "Using CodeQL version $CODEQL_VERSION" && \
|
||||||
|
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
|
||||||
|
unzip /tmp/codeql.zip -d /opt && \
|
||||||
|
rm /tmp/codeql.zip && \
|
||||||
|
chmod -R +x /opt/codeql
|
||||||
|
|
||||||
|
# ======================
|
||||||
|
# Install code-server
|
||||||
|
USER coder
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV PASSWORD mrva
|
||||||
|
|
||||||
|
# Install VS Code extensions as user root -- globally
|
||||||
|
RUN code-server --install-extension ms-python.python \
|
||||||
|
&& code-server --install-extension esbenp.prettier-vscode \
|
||||||
|
&& code-server --install-extension GitHub.vscode-codeql
|
||||||
|
|
||||||
|
# Expose the port that Code Server runs on
|
||||||
|
EXPOSE 9080
|
||||||
|
|
||||||
|
# Point CodeQL to the java binary for this platform
|
||||||
|
ENV CODEQL_JAVA_HOME=/usr
|
||||||
|
|
||||||
|
# Add
|
||||||
|
# codeQl.cli.executablePath
|
||||||
|
# to user settings.
|
||||||
|
# This is in addition to the environment variable CODEQL_JAVA_HOME which has no
|
||||||
|
# effect on the plugin
|
||||||
|
USER root
|
||||||
|
COPY ./settings.json /home/coder/.local/share/code-server/User/
|
||||||
|
RUN chown -R coder:coder /home/coder/.local/share/code-server/
|
||||||
|
|
||||||
|
# Start Code Server
|
||||||
|
ENTRYPOINT ["dumb-init", "code-server", "--bind-addr", "0.0.0.0:9080", "."]
|
||||||
|
|
||||||
|
# Run as the coder user
|
||||||
|
USER coder
|
||||||
119
client/containers/vscode/README.org
Normal file
119
client/containers/vscode/README.org
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
* MRVA VS Code server container
|
||||||
|
On the host:
|
||||||
|
|
||||||
|
#+BEGIN_SRC sh
|
||||||
|
# Build the container via
|
||||||
|
cd ~/work-gh/mrva/mrvacommander/client/containers/vscode/
|
||||||
|
docker build -t code-server-initialized:0.1.24 .
|
||||||
|
|
||||||
|
# Run the container in standalone mode via
|
||||||
|
cd ~/work-gh/mrva/mrvacommander/client/containers/vscode/
|
||||||
|
docker run -v ~/work-gh/mrva/vscode-codeql:/work-gh/mrva/vscode-codeql \
|
||||||
|
-d -p 9080:9080 code-server-initialized:0.1.24
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
- Connect to it at http://localhost:9080/?folder=/home/coder, password is =mrva=.
|
||||||
|
|
||||||
|
Inside the container:
|
||||||
|
|
||||||
|
- Setup inside the container
|
||||||
|
#+BEGIN_SRC shell
|
||||||
|
cd
|
||||||
|
export PATH=/opt/codeql:$PATH
|
||||||
|
codeql pack init qldemo
|
||||||
|
cd qldemo
|
||||||
|
codeql pack add codeql/python-all@1.0.6
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
- Create a new file =qldemo/simple.ql= with this query. Open it in VS Code.
|
||||||
|
The plugin will download the CodeQL binaries (but never use them -- the
|
||||||
|
configuration redirects)
|
||||||
|
#+BEGIN_SRC sh
|
||||||
|
cd
|
||||||
|
cat > qldemo/simple.ql <<eof
|
||||||
|
import python
|
||||||
|
select 42
|
||||||
|
eof
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
- Create database.
|
||||||
|
#+BEGIN_SRC sh
|
||||||
|
cd ~/qldemo
|
||||||
|
|
||||||
|
cat > short.py <<EOF
|
||||||
|
print('hello world')
|
||||||
|
EOF
|
||||||
|
export PATH=/opt/codeql:$PATH
|
||||||
|
codeql database create --language=python -s . -v short-db
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
- Set the database as default and run the query =simple.ql=
|
||||||
|
|
||||||
|
- Add the customized VS Code plugin
|
||||||
|
On the host
|
||||||
|
#+BEGIN_SRC sh
|
||||||
|
cd ~/work-gh/mrva/vscode-codeql
|
||||||
|
git checkout mrva-standalone
|
||||||
|
|
||||||
|
# Install nvm
|
||||||
|
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
|
||||||
|
|
||||||
|
# Install correct node version
|
||||||
|
cd ./extensions/ql-vscode
|
||||||
|
nvm install
|
||||||
|
|
||||||
|
# Build the extension
|
||||||
|
cd ~/work-gh/mrva/vscode-codeql/extensions/ql-vscode
|
||||||
|
npm install
|
||||||
|
npm run build
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
In the container
|
||||||
|
#+BEGIN_SRC sh
|
||||||
|
# Install extension
|
||||||
|
cd /work-gh/mrva/vscode-codeql/dist
|
||||||
|
|
||||||
|
/bin/code-server --force --install-extension vscode-codeql-*.vsix
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
- Capture the state of this container and create a new image from it
|
||||||
|
#+BEGIN_SRC sh
|
||||||
|
docker ps
|
||||||
|
# Check id column. Use it below.
|
||||||
|
docker commit 2df5732c1850 code-server-initialized:0.1.24
|
||||||
|
# Keep the sha
|
||||||
|
# sha256:87c8260146e28aed25b094d023a30a015a958f829c09e66cb50ccca2c4a2a000
|
||||||
|
docker kill 2df5732c1850
|
||||||
|
|
||||||
|
# Make sure the image tag matches the sha
|
||||||
|
docker inspect code-server-initialized:0.1.24 |grep Id
|
||||||
|
|
||||||
|
# Run the image and check
|
||||||
|
docker run --rm -d -p 9080:9080 --name test-code-server-codeql \
|
||||||
|
code-server-initialized:0.1.24
|
||||||
|
#+END_SRC
|
||||||
|
Again connect to it at http://localhost:9080/?folder=/home/coder, password is =mrva=.
|
||||||
|
|
||||||
|
- Push this container
|
||||||
|
#+BEGIN_SRC sh
|
||||||
|
# Common
|
||||||
|
export CSI_TARGET=code-server-initialized:0.1.24
|
||||||
|
|
||||||
|
# Push container
|
||||||
|
docker tag ${CSI_TARGET} ghcr.io/hohn/${CSI_TARGET}
|
||||||
|
docker push ghcr.io/hohn/${CSI_TARGET}
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
- Test the registry image
|
||||||
|
#+BEGIN_SRC sh
|
||||||
|
# Test pushed container
|
||||||
|
docker pull ghcr.io/hohn/${CSI_TARGET}
|
||||||
|
docker run --rm -d -p 9080:9080 --name test-code-server-codeql\
|
||||||
|
ghcr.io/hohn/${CSI_TARGET}
|
||||||
|
#+END_SRC
|
||||||
|
In the container, inside the running vs code:
|
||||||
|
- Check the plugin version number via the command
|
||||||
|
: codeql: copy version information
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
4
client/containers/vscode/settings.json
Normal file
4
client/containers/vscode/settings.json
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
{
|
||||||
|
"codeQL.runningQueries.numberOfThreads": 2,
|
||||||
|
"codeQL.cli.executablePath": "/opt/codeql/codeql"
|
||||||
|
}
|
||||||
24
client/qldbtools/.vscode/launch.json
vendored
Normal file
24
client/qldbtools/.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "Python Debugger: Current File with Arguments",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${file}",
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"args": [
|
||||||
|
"--db_collection_dir",
|
||||||
|
"db-collection-py",
|
||||||
|
"--starting_path",
|
||||||
|
"$HOME/work-gh/mrva/mrva-open-source-download"
|
||||||
|
],
|
||||||
|
"justMyCode": true,
|
||||||
|
"stopOnEntry": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
2
client/qldbtools/Makefile
Normal file
2
client/qldbtools/Makefile
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
doc:
|
||||||
|
pandoc -s --css=./gfm.css README.md > foo.html && open foo.html
|
||||||
171
client/qldbtools/README.org
Normal file
171
client/qldbtools/README.org
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
* Introduction to hepc -- HTTP End Point for CodeQL
|
||||||
|
#+BEGIN_SRC sh
|
||||||
|
1:$ ./bin/hepc-init --db_collection_dir db-collection --starting_path ~/work-gh/mrva/mrva-open-source-download
|
||||||
|
[2024-11-19 14:12:06] [INFO] searching for db.zip files
|
||||||
|
[2024-11-19 14:12:08] [INFO] collecting information from db.zip files
|
||||||
|
[2024-11-19 14:12:08] [INFO] Extracting from /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/aircrack-ng/aircrack-ng/code-scanning/codeql/databases/cpp/db.zip
|
||||||
|
[2024-11-19 14:12:08] [INFO] Adding record to db-collection/metadata.json
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
* Introduction to qldbtools
|
||||||
|
=qldbtools= is a Python package for selecting sets of CodeQL databases
|
||||||
|
to work on. It uses a (pandas) dataframe in the implementation, but all
|
||||||
|
results sets are available as CSV files to provide flexibility in the
|
||||||
|
tools you want to work with.
|
||||||
|
|
||||||
|
The rationale is simple: When working with larger collections of CodeQL
|
||||||
|
databases, spread over time, languages, etc., many criteria can be used
|
||||||
|
to select the subset of interest. This package addresses that aspect of
|
||||||
|
MRVA (multi repository variant analysis).
|
||||||
|
|
||||||
|
For example, consider this scenario from an enterprise. We have 10,000
|
||||||
|
repositories in C/C++, 5,000 in Python. We build CodeQL dabases weekly
|
||||||
|
and keep the last 2 years worth. This means for the last 2 years there
|
||||||
|
are
|
||||||
|
|
||||||
|
#+begin_example
|
||||||
|
(10000 + 5000) * 52 * 2 = 1560000
|
||||||
|
#+end_example
|
||||||
|
|
||||||
|
databases to select from for a single MRVA run. 1.5 Million rows are
|
||||||
|
readily handled by a pandas (or R) dataframe.
|
||||||
|
|
||||||
|
The full list of criteria currently encoded via the columns is
|
||||||
|
|
||||||
|
- owner
|
||||||
|
- name
|
||||||
|
- CID
|
||||||
|
- cliVersion
|
||||||
|
- creationTime
|
||||||
|
- language
|
||||||
|
- sha -- git commit sha of the code the CodeQL database is built against
|
||||||
|
- baselineLinesOfCode
|
||||||
|
- path
|
||||||
|
- db_lang
|
||||||
|
- db_lang_displayName
|
||||||
|
- db_lang_file_count
|
||||||
|
- db_lang_linesOfCode
|
||||||
|
- ctime
|
||||||
|
- primaryLanguage
|
||||||
|
- finalised
|
||||||
|
- left_index
|
||||||
|
- size
|
||||||
|
|
||||||
|
The minimal criteria needed to distinguish databases in the above
|
||||||
|
scenario are
|
||||||
|
|
||||||
|
- cliVersion
|
||||||
|
- creationTime
|
||||||
|
- language
|
||||||
|
- sha
|
||||||
|
|
||||||
|
These are encoded in the single custom id column 'CID'.
|
||||||
|
|
||||||
|
Thus, a database can be fully specified using a (owner,name,CID) tuple
|
||||||
|
and this is encoded in the names used by the MRVA server and clients.
|
||||||
|
The selection of databases can of course be done using the whole table.
|
||||||
|
|
||||||
|
For an example of the workflow, see [[#command-line-use][section
|
||||||
|
'command line use']].
|
||||||
|
|
||||||
|
A small sample of a full table:
|
||||||
|
|
||||||
|
| | owner | name | CID | cliVersion | creationTime | language | sha | baselineLinesOfCode | path | db_lang | db_lang_displayName | db_lang_file_count | db_lang_linesOfCode | ctime | primaryLanguage | finalised | left_index | size |
|
||||||
|
|---+----------+----------------+--------+------------+----------------------------------+----------+------------------------------------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+-------------+---------------------+--------------------+---------------------+----------------------------+-----------------+-----------+------------+----------|
|
||||||
|
| 0 | 1adrianb | face-alignment | 1f8d99 | 2.16.1 | 2024-02-08 14:18:20.983830+00:00 | python | c94dd024b1f5410ef160ff82a8423141e2bbb6b4 | 1839 | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/1adrianb/face-alignment/code-scanning/codeql/databases/python/db.zip | python | Python | 25 | 1839 | 2024-07-24T14:09:02.187201 | python | 1 | 1454 | 24075001 |
|
||||||
|
| 1 | 2shou | TextGrocery | 9ab87a | 2.12.1 | 2023-02-17T11:32:30.863093193Z | cpp | 8a4e41349a9b0175d9a73bc32a6b2eb6bfb51430 | 3939 | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/2shou/TextGrocery/code-scanning/codeql/databases/cpp/db.zip | no-language | no-language | 0 | -1 | 2024-07-24T06:25:55.347568 | cpp | nan | 1403 | 3612535 |
|
||||||
|
| 2 | 3b1b | manim | 76fdc7 | 2.17.5 | 2024-06-27 17:37:20.587627+00:00 | python | 88c7e9d2c96be1ea729b089c06cabb1bd3b2c187 | 19905 | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/3b1b/manim/code-scanning/codeql/databases/python/db.zip | python | Python | 94 | 19905 | 2024-07-24T13:23:04.716286 | python | 1 | 1647 | 26407541 |
|
||||||
|
|
||||||
|
** Installation
|
||||||
|
- Set up the virtual environment and install tools
|
||||||
|
|
||||||
|
#+begin_example
|
||||||
|
cd ~/work-gh/mrva/mrvacommander/client/qldbtools/
|
||||||
|
python3.11 -m venv venv
|
||||||
|
source venv/bin/activate
|
||||||
|
pip install --upgrade pip
|
||||||
|
|
||||||
|
# From requirements.txt
|
||||||
|
pip install -r requirements.txt
|
||||||
|
# Or explicitly
|
||||||
|
pip install jupyterlab pandas ipython
|
||||||
|
pip install lckr-jupyterlab-variableinspector
|
||||||
|
#+end_example
|
||||||
|
|
||||||
|
- Local development
|
||||||
|
|
||||||
|
#+begin_example
|
||||||
|
```bash
|
||||||
|
cd ~/work-gh/mrva/mrvacommander/client/qldbtools
|
||||||
|
source venv/bin/activate
|
||||||
|
pip install --editable .
|
||||||
|
```
|
||||||
|
|
||||||
|
The `--editable` *should* use symlinks for all scripts; use `./bin/*` to be sure.
|
||||||
|
#+end_example
|
||||||
|
|
||||||
|
- Full installation
|
||||||
|
|
||||||
|
#+begin_example
|
||||||
|
```bash
|
||||||
|
pip install qldbtools
|
||||||
|
```
|
||||||
|
#+end_example
|
||||||
|
|
||||||
|
** Use as library
|
||||||
|
The best way to examine the code is starting from the high-level scripts
|
||||||
|
in =bin/=.
|
||||||
|
|
||||||
|
** Command line use
|
||||||
|
Initial information collection requires a unique file path so it can be
|
||||||
|
run repeatedly over DB collections with the same (owner,name) but other
|
||||||
|
differences -- namely, in one or more of
|
||||||
|
|
||||||
|
- creationTime
|
||||||
|
- sha
|
||||||
|
- cliVersion
|
||||||
|
- language
|
||||||
|
|
||||||
|
Those fields are collected in =bin/mc-db-refine-info=.
|
||||||
|
|
||||||
|
An example workflow with commands grouped by data files follows.
|
||||||
|
|
||||||
|
#+begin_example
|
||||||
|
cd ~/work-gh/mrva/mrvacommander/client/qldbtools && mkdir -p scratch
|
||||||
|
./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > scratch/db-info-1.csv
|
||||||
|
./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
|
||||||
|
|
||||||
|
./bin/mc-db-view-info < scratch/db-info-2.csv &
|
||||||
|
./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
|
||||||
|
./bin/mc-db-view-info < scratch/db-info-3.csv &
|
||||||
|
|
||||||
|
./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
|
||||||
|
./bin/mc-db-generate-selection -n 11 \
|
||||||
|
scratch/vscode-selection.json \
|
||||||
|
scratch/gh-mrva-selection.json \
|
||||||
|
< scratch/db-info-3.csv
|
||||||
|
#+end_example
|
||||||
|
|
||||||
|
To see the full information for a selection, use
|
||||||
|
=mc-rows-from-mrva-list=:
|
||||||
|
|
||||||
|
#+begin_example
|
||||||
|
./bin/mc-rows-from-mrva-list scratch/gh-mrva-selection.json \
|
||||||
|
scratch/db-info-3.csv > scratch/selection-full-info
|
||||||
|
#+end_example
|
||||||
|
|
||||||
|
To check, e.g., the =language= column:
|
||||||
|
|
||||||
|
#+begin_example
|
||||||
|
csvcut -c language scratch/selection-full-info
|
||||||
|
#+end_example
|
||||||
|
|
||||||
|
** Notes
|
||||||
|
The =preview-data= plugin for VS Code has a bug; it displays =0= instead
|
||||||
|
of =0e3379= for the following. There are other entries with similar
|
||||||
|
malfunction.
|
||||||
|
|
||||||
|
#+begin_example
|
||||||
|
CleverRaven,Cataclysm-DDA,0e3379,2.17.0,2024-05-08 12:13:10.038007+00:00,cpp,5ca7f4e59c2d7b0a93fb801a31138477f7b4a761,578098.0,/Users/hohn/work-gh/mrva/mrva-open-source-download/repos-2024-04-29/CleverRaven/Cataclysm-DDA/code-scanning/codeql/databases/cpp/db.zip,cpp,C/C++,1228.0,578098.0,2024-05-13T12:14:54.650648,cpp,True,4245,563435469
|
||||||
|
CleverRaven,Cataclysm-DDA,3231f7,2.18.0,2024-07-18 11:13:01.673231+00:00,cpp,db3435138781937e9e0e999abbaa53f1d3afb5b7,579532.0,/Users/hohn/work-gh/mrva/mrva-open-source-download/repos/CleverRaven/Cataclysm-DDA/code-scanning/codeql/databases/cpp/db.zip,cpp,C/C++,1239.0,579532.0,2024-07-24T02:33:23.900885,cpp,True,1245,573213726
|
||||||
|
#+end_example
|
||||||
144
client/qldbtools/bin/hepc-init.sh
Executable file
144
client/qldbtools/bin/hepc-init.sh
Executable file
@@ -0,0 +1,144 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
#* Utility functions
|
||||||
|
log() {
|
||||||
|
local level="$1"
|
||||||
|
shift
|
||||||
|
local color_reset="\033[0m"
|
||||||
|
local color_info="\033[1;34m"
|
||||||
|
local color_warn="\033[1;33m"
|
||||||
|
local color_error="\033[1;31m"
|
||||||
|
|
||||||
|
local color
|
||||||
|
case "$level" in
|
||||||
|
INFO) color="$color_info" ;;
|
||||||
|
WARN) color="$color_warn" ;;
|
||||||
|
ERROR) color="$color_error" ;;
|
||||||
|
*) color="$color_reset" ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
echo -e "${color}[$(date +"%Y-%m-%d %H:%M:%S")] [$level] $*${color_reset}" >&2
|
||||||
|
}
|
||||||
|
usage() {
|
||||||
|
echo "Usage: $0 --db_collection_dir <directory> --starting_path <path> [-h]"
|
||||||
|
echo
|
||||||
|
echo "Options:"
|
||||||
|
echo " --db_collection_dir <directory> Specify the database collection directory."
|
||||||
|
echo " --starting_path <path> Specify the starting path."
|
||||||
|
echo " -h Show this help message."
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#* Initialize and parse arguments
|
||||||
|
set -euo pipefail # exit on error, unset var, pipefail
|
||||||
|
trap 'rm -fR /tmp/hepc.$$-*' EXIT
|
||||||
|
|
||||||
|
starting_dir=$(pwd)
|
||||||
|
db_collection_dir=""
|
||||||
|
starting_path=""
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
--db_collection_dir)
|
||||||
|
shift
|
||||||
|
if [[ -z "$1" || "$1" == -* ]]; then
|
||||||
|
echo "Error: --db_collection_dir requires a directory as an argument."
|
||||||
|
usage
|
||||||
|
fi
|
||||||
|
db_collection_dir="$1"
|
||||||
|
;;
|
||||||
|
--starting_path)
|
||||||
|
shift
|
||||||
|
if [[ -z "$1" || "$1" == -* ]]; then
|
||||||
|
echo "Error: --starting_path requires a path as an argument."
|
||||||
|
usage
|
||||||
|
fi
|
||||||
|
starting_path="$1"
|
||||||
|
;;
|
||||||
|
-h)
|
||||||
|
usage
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Error: Unknown option '$1'."
|
||||||
|
usage
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
# Check if required arguments were provided
|
||||||
|
if [[ -z "$db_collection_dir" ]]; then
|
||||||
|
echo "Error: --db_collection_dir is required."
|
||||||
|
usage
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -z "$starting_path" ]]; then
|
||||||
|
echo "Error: --starting_path is required."
|
||||||
|
usage
|
||||||
|
fi
|
||||||
|
|
||||||
|
#* Find all DBs
|
||||||
|
log INFO "searching for db.zip files"
|
||||||
|
find ${starting_path} -type f -name "db.zip" -size +0c > /tmp/hepc.$$-paths
|
||||||
|
|
||||||
|
#* Collect detailed information from the database files
|
||||||
|
# Don't assume they are unique.
|
||||||
|
log INFO "collecting information from db.zip files"
|
||||||
|
mkdir -p $db_collection_dir
|
||||||
|
cat /tmp/hepc.$$-paths | while read -r zip_path
|
||||||
|
do
|
||||||
|
log INFO "Extracting from ${zip_path}"
|
||||||
|
zip_dir=$(dirname ${zip_path})
|
||||||
|
zip_file=$(basename ${zip_path})
|
||||||
|
unzip -o -q ${zip_path} '*codeql-database.yml' -d /tmp/hepc.$$-zip
|
||||||
|
# The content may be LANGUAGE/codeql-database.yml
|
||||||
|
|
||||||
|
#* For every database, create a metadata record.
|
||||||
|
mkdir -p /tmp/hepc.$$-zip
|
||||||
|
cd /tmp/hepc.$$-zip/*
|
||||||
|
|
||||||
|
# Information from codeql-database.yml
|
||||||
|
primaryLanguage=$(yq '.primaryLanguage' codeql-database.yml)
|
||||||
|
sha=$(yq '.creationMetadata.sha' codeql-database.yml)
|
||||||
|
cliVersion=$(yq '.creationMetadata.cliVersion' codeql-database.yml)
|
||||||
|
creationTime=$(yq '.creationMetadata.creationTime' codeql-database.yml)
|
||||||
|
sourceLocationPrefix=$(yq '.sourceLocationPrefix' codeql-database.yml)
|
||||||
|
repo=${sourceLocationPrefix##*/} # keep only last component
|
||||||
|
# Get sourceLocationPrefix[-2]
|
||||||
|
owner="${sourceLocationPrefix%/*}" # strip last component
|
||||||
|
owner="${owner##*/}" # keep only last component
|
||||||
|
|
||||||
|
# cid for repository / db
|
||||||
|
cid=$(echo "${cliVersion} ${creationTime} ${primaryLanguage} ${sha}" | b2sum |\
|
||||||
|
awk '{print substr($1, 1, 6)}')
|
||||||
|
|
||||||
|
# Prepare the metadata record for this DB.
|
||||||
|
new_db_fname="${owner}-${repo}-ctsj-${cid}.zip"
|
||||||
|
result_url="http://hepc/${db_collection_dir}/${new_db_fname}"
|
||||||
|
record='
|
||||||
|
{
|
||||||
|
"git_branch": "HEAD",
|
||||||
|
"git_commit_id": "'${sha}'",
|
||||||
|
"git_repo": "'${repo}'",
|
||||||
|
"ingestion_datetime_utc": "'${creationTime}'",
|
||||||
|
"result_url": "'${result_url}'",
|
||||||
|
"tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4",
|
||||||
|
"tool_name": "codeql-'${primaryLanguage}'",
|
||||||
|
"tool_version": "'${cliVersion}'",
|
||||||
|
"projname": "'${owner}/${repo}'"
|
||||||
|
}
|
||||||
|
'
|
||||||
|
cd "$starting_dir"
|
||||||
|
rm -fR /tmp/hepc.$$-zip
|
||||||
|
echo "$record" >> $db_collection_dir/metadata.json
|
||||||
|
|
||||||
|
#* Link original file path to collection directory for serving. Use name including
|
||||||
|
# the cid and field separator ctsj
|
||||||
|
cd ${db_collection_dir}
|
||||||
|
[ -L ${new_db_fname} ] || ln -s ${zip_path} ${new_db_fname}
|
||||||
|
|
||||||
|
# Interim cleanup
|
||||||
|
rm -fR "/tmp/hepc.$$-*"
|
||||||
|
done
|
||||||
104
client/qldbtools/bin/hepc-serve.go
Executable file
104
client/qldbtools/bin/hepc-serve.go
Executable file
@@ -0,0 +1,104 @@
|
|||||||
|
/*
|
||||||
|
dependencies
|
||||||
|
go get -u golang.org/x/exp/slog
|
||||||
|
|
||||||
|
on-the-fly
|
||||||
|
go run bin/hepc-serve.go --codeql-db-dir db-collection-py-1
|
||||||
|
|
||||||
|
compiled
|
||||||
|
cd ~/work-gh/mrva/mrvacommander/client/qldbtools/
|
||||||
|
go build -o ./bin/hepc-serve.bin ./bin/hepc-serve.go
|
||||||
|
|
||||||
|
test
|
||||||
|
curl http://127.0.0.1:8080/api/v1/latest_results/codeql-all -o foo
|
||||||
|
curl $(head -1 foo | jq -r ".result_url" |sed 's|hepc|127.0.0.1:8080/db|g;') -o foo.zip
|
||||||
|
|
||||||
|
*/
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"golang.org/x/exp/slog"
|
||||||
|
)
|
||||||
|
|
||||||
|
var dbDir string
|
||||||
|
|
||||||
|
func serveFile(w http.ResponseWriter, r *http.Request) {
|
||||||
|
fullPath := r.URL.Path[len("/db/"):]
|
||||||
|
|
||||||
|
resolvedPath, err := filepath.EvalSymlinks(fullPath)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("failed to resolve symlink", slog.String("fullPath", fullPath),
|
||||||
|
slog.String("error", err.Error()))
|
||||||
|
http.Error(w, "File not found", http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if fileInfo, err := os.Stat(resolvedPath); err != nil || fileInfo.IsDir() {
|
||||||
|
slog.Warn("file not found or is a directory", slog.String("resolvedPath", resolvedPath))
|
||||||
|
http.Error(w, "File not found", http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("serving file", slog.String("resolvedPath", resolvedPath))
|
||||||
|
http.ServeFile(w, r, resolvedPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
func serveMetadata(w http.ResponseWriter, r *http.Request) {
|
||||||
|
metadataPath := filepath.Join(dbDir, "metadata.json")
|
||||||
|
if fileInfo, err := os.Stat(metadataPath); err != nil || fileInfo.IsDir() {
|
||||||
|
slog.Warn("metadata.json not found", slog.String("metadataPath", metadataPath))
|
||||||
|
http.Error(w, "metadata.json not found", http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("serving metadata.json", slog.String("metadataPath", metadataPath))
|
||||||
|
http.ServeFile(w, r, metadataPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
func logMiddleware(next http.Handler) http.Handler {
|
||||||
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
slog.Info("incoming request", slog.String("method", r.Method), slog.String("url", r.URL.Path))
|
||||||
|
next.ServeHTTP(w, r)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
var host string
|
||||||
|
var port int
|
||||||
|
|
||||||
|
flag.StringVar(&dbDir, "codeql-db-dir", "", "Directory containing CodeQL database files (required)")
|
||||||
|
flag.StringVar(&host, "host", "127.0.0.1", "Host address for the HTTP server")
|
||||||
|
flag.IntVar(&port, "port", 8080, "Port for the HTTP server")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if dbDir == "" {
|
||||||
|
slog.Error("missing required flag", slog.String("flag", "--codeql-db-dir"))
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := os.Stat(dbDir); os.IsNotExist(err) {
|
||||||
|
slog.Error("invalid directory", slog.String("dbDir", dbDir))
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("starting server", slog.String("host", host), slog.Int("port", port), slog.String("dbDir", dbDir))
|
||||||
|
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
mux.HandleFunc("/db/", serveFile)
|
||||||
|
mux.HandleFunc("/index", serveMetadata)
|
||||||
|
mux.HandleFunc("/api/v1/latest_results/codeql-all", serveMetadata)
|
||||||
|
|
||||||
|
loggedHandler := logMiddleware(mux)
|
||||||
|
|
||||||
|
addr := fmt.Sprintf("%s:%d", host, port)
|
||||||
|
slog.Info("server listening", slog.String("address", addr))
|
||||||
|
if err := http.ListenAndServe(addr, loggedHandler); err != nil {
|
||||||
|
slog.Error("server error", slog.String("error", err.Error()))
|
||||||
|
}
|
||||||
|
}
|
||||||
108
client/qldbtools/bin/mc-db-generate-selection
Executable file
108
client/qldbtools/bin/mc-db-generate-selection
Executable file
@@ -0,0 +1,108 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
""" Read a table of CodeQL DB information
|
||||||
|
and generate the selection files for
|
||||||
|
1. the VS Code CodeQL plugin
|
||||||
|
2. the gh-mrva command-line client
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
from argparse import Namespace
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from pandas import DataFrame
|
||||||
|
|
||||||
|
import qldbtools.utils as utils
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Configure logger
|
||||||
|
#
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||||
|
# Overwrite log level set by minio
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
root_logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Process command line
|
||||||
|
#
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description=""" Read a table of CodeQL DB information
|
||||||
|
and generate the selection files for
|
||||||
|
1. the VS Code CodeQL plugin
|
||||||
|
2. the gh-mrva command-line client
|
||||||
|
""",
|
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
parser.add_argument('vscode_selection', type=str,
|
||||||
|
help='VS Code selection file to generate')
|
||||||
|
parser.add_argument('gh_mrva_selection', type=str,
|
||||||
|
help='gh-mrva cli selection file to generate')
|
||||||
|
parser.add_argument('-n', '--num-entries', type=int,
|
||||||
|
help='Only use N entries',
|
||||||
|
default=None)
|
||||||
|
parser.add_argument('-s', '--seed', type=int,
|
||||||
|
help='Random number seed',
|
||||||
|
default=4242)
|
||||||
|
parser.add_argument('-l', '--list-name', type=str,
|
||||||
|
help='Name of the repository list',
|
||||||
|
default='mirva-list')
|
||||||
|
|
||||||
|
args: Namespace = parser.parse_args()
|
||||||
|
#
|
||||||
|
#* Load the information
|
||||||
|
#
|
||||||
|
import pandas as pd
|
||||||
|
import sys
|
||||||
|
|
||||||
|
df0: DataFrame = pd.read_csv(sys.stdin)
|
||||||
|
|
||||||
|
if args.num_entries == None:
|
||||||
|
# Use all entries
|
||||||
|
df1: DataFrame = df0
|
||||||
|
else:
|
||||||
|
# Use num_entries, chosen via pseudo-random numbers
|
||||||
|
df1 = df0.sample(n=args.num_entries,
|
||||||
|
random_state=np.random.RandomState(args.seed))
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Form and save structures
|
||||||
|
#
|
||||||
|
repos: list[str] = []
|
||||||
|
for index, row in df1[['owner', 'name', 'CID', 'path']].iterrows():
|
||||||
|
owner, name, CID, path = row
|
||||||
|
repos.append(utils.form_db_req_name(owner, name, CID))
|
||||||
|
|
||||||
|
repo_list_name: str = args.list_name
|
||||||
|
vsc = {
|
||||||
|
"version": 1,
|
||||||
|
"databases": {
|
||||||
|
"variantAnalysis": {
|
||||||
|
"repositoryLists": [
|
||||||
|
{
|
||||||
|
"name": repo_list_name,
|
||||||
|
"repositories": repos,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"owners": [],
|
||||||
|
"repositories": []
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"selected": {
|
||||||
|
"kind": "variantAnalysisUserDefinedList",
|
||||||
|
"listName": repo_list_name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gh = {
|
||||||
|
repo_list_name: repos
|
||||||
|
}
|
||||||
|
|
||||||
|
import json
|
||||||
|
with open(args.vscode_selection, "w") as fc:
|
||||||
|
json.dump(vsc, fc, indent=4)
|
||||||
|
|
||||||
|
with open(args.gh_mrva_selection, "w") as fc:
|
||||||
|
json.dump(gh, fc, indent=4)
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
48
client/qldbtools/bin/mc-db-initial-info
Executable file
48
client/qldbtools/bin/mc-db-initial-info
Executable file
@@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
""" Collect information about CodeQL databases from the file system and write out
|
||||||
|
a table in CSV format.
|
||||||
|
"""
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from pandas import DataFrame
|
||||||
|
|
||||||
|
import qldbtools.utils as utils
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from qldbtools.utils import DBInfo
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Configure logger
|
||||||
|
#
|
||||||
|
logging.basicConfig(format='%(asctime)s %(message)s')
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Process command line
|
||||||
|
#
|
||||||
|
parser: ArgumentParser = argparse.ArgumentParser(
|
||||||
|
description="""Find all CodeQL DBs in and below starting_dir and export a CSV
|
||||||
|
file with relevant data.""")
|
||||||
|
parser.add_argument('starting_dir', type=str,
|
||||||
|
help='The starting directory to search for codeql.')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Collect info
|
||||||
|
#
|
||||||
|
# Get the db information in list of DBInfo form
|
||||||
|
db_base: str = args.starting_dir
|
||||||
|
dbs: list[DBInfo] = list(utils.collect_dbs(db_base))
|
||||||
|
dbdf: DataFrame = pd.DataFrame([d.__dict__ for d in dbs])
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#* Write info out
|
||||||
|
#
|
||||||
|
dbdf.to_csv(sys.stdout, index=False)
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
93
client/qldbtools/bin/mc-db-populate-minio
Executable file
93
client/qldbtools/bin/mc-db-populate-minio
Executable file
@@ -0,0 +1,93 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
""" Read a table of CodeQL DB information (like those produced by
|
||||||
|
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
|
||||||
|
DB.
|
||||||
|
"""
|
||||||
|
# /// script
|
||||||
|
# dependencies = [
|
||||||
|
# "pandas",
|
||||||
|
# "numpy",
|
||||||
|
# "minio",
|
||||||
|
# ]
|
||||||
|
# ///
|
||||||
|
import argparse
|
||||||
|
import qldbtools.utils as utils
|
||||||
|
import logging
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import sys
|
||||||
|
from minio import Minio
|
||||||
|
from minio.error import S3Error
|
||||||
|
from pathlib import Path
|
||||||
|
#
|
||||||
|
#* Configure logger
|
||||||
|
#
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||||
|
# Overwrite log level set by minio
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
root_logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Process command line
|
||||||
|
#
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description=""" Read a table of CodeQL DB information (like those produced by
|
||||||
|
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
|
||||||
|
DB. """)
|
||||||
|
parser.add_argument('-n', '--num-entries', type=int,
|
||||||
|
help='Only use N entries',
|
||||||
|
default=None)
|
||||||
|
parser.add_argument('-s', '--seed', type=int,
|
||||||
|
help='Random number seed',
|
||||||
|
default=4242)
|
||||||
|
args = parser.parse_args()
|
||||||
|
#
|
||||||
|
#* Collect the information and select subset
|
||||||
|
#
|
||||||
|
df = pd.read_csv(sys.stdin)
|
||||||
|
if args.num_entries == None:
|
||||||
|
# Use all entries
|
||||||
|
entries = df
|
||||||
|
else:
|
||||||
|
# Use num_entries, chosen via pseudo-random numbers
|
||||||
|
entries = df.sample(n=args.num_entries,
|
||||||
|
random_state=np.random.RandomState(args.seed))
|
||||||
|
#
|
||||||
|
#* Push the DBs
|
||||||
|
#
|
||||||
|
# Configuration
|
||||||
|
MINIO_URL = "http://localhost:9000"
|
||||||
|
MINIO_ROOT_USER = "user"
|
||||||
|
MINIO_ROOT_PASSWORD = "mmusty8432"
|
||||||
|
QL_DB_BUCKET_NAME = "qldb"
|
||||||
|
|
||||||
|
# Initialize MinIO client
|
||||||
|
client = Minio(
|
||||||
|
MINIO_URL.replace("http://", "").replace("https://", ""),
|
||||||
|
access_key=MINIO_ROOT_USER,
|
||||||
|
secret_key=MINIO_ROOT_PASSWORD,
|
||||||
|
secure=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create the bucket if it doesn't exist
|
||||||
|
try:
|
||||||
|
if not client.bucket_exists(QL_DB_BUCKET_NAME):
|
||||||
|
client.make_bucket(QL_DB_BUCKET_NAME)
|
||||||
|
else:
|
||||||
|
logging.info(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
|
||||||
|
except S3Error as err:
|
||||||
|
logging.error(f"Error creating bucket: {err}")
|
||||||
|
|
||||||
|
# Get info from dataframe and push the files
|
||||||
|
for index, row in entries[['owner', 'name', 'CID', 'path']].iterrows():
|
||||||
|
owner, name, CID, path = row
|
||||||
|
new_name = utils.form_db_bucket_name(owner, name, CID)
|
||||||
|
try:
|
||||||
|
client.fput_object(QL_DB_BUCKET_NAME, new_name, path)
|
||||||
|
logging.info(f"Uploaded {path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
|
||||||
|
except S3Error as err:
|
||||||
|
logging.error(f"Error uploading file {local_path}: {err}")
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
60
client/qldbtools/bin/mc-db-refine-info
Executable file
60
client/qldbtools/bin/mc-db-refine-info
Executable file
@@ -0,0 +1,60 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
""" Read an initial table of CodeQL DB information, produced by
|
||||||
|
mc-db-initial-info, and collect more detailed information from the database
|
||||||
|
files. Write out an extended table in CSV format.
|
||||||
|
"""
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from pandas import DataFrame
|
||||||
|
|
||||||
|
import qldbtools.utils as utils
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
import pandas as pd
|
||||||
|
import sys
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Configure logger
|
||||||
|
#
|
||||||
|
logging.basicConfig(format='%(asctime)s %(message)s')
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Process command line
|
||||||
|
#
|
||||||
|
parser: ArgumentParser = argparse.ArgumentParser(
|
||||||
|
description="""Read an initial table of CodeQL DB information, produced by
|
||||||
|
mc-db-initial-info, and collect more detailed information from the database
|
||||||
|
files. Write out an extended table in CSV format. """)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Collect the information
|
||||||
|
# This step is time-intensive so we save the results right after.
|
||||||
|
d: DataFrame = pd.read_csv(sys.stdin)
|
||||||
|
joiners: list[DataFrame] = []
|
||||||
|
for left_index in range(0, len(d)-1):
|
||||||
|
try:
|
||||||
|
metac: object
|
||||||
|
cqlc: object
|
||||||
|
cqlc, metac = utils.extract_metadata(d.path[left_index])
|
||||||
|
except utils.ExtractNotZipfile:
|
||||||
|
continue
|
||||||
|
except utils.ExtractNoCQLDB:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
detail_df: DataFrame = utils.metadata_details(left_index, cqlc, metac)
|
||||||
|
except utils.DetailsMissing:
|
||||||
|
continue
|
||||||
|
joiners.append(detail_df)
|
||||||
|
joiners_df: DataFrame = pd.concat(joiners, axis=0)
|
||||||
|
full_df: DataFrame = pd.merge(d, joiners_df, left_index=True, right_on='left_index', how='outer')
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Save results
|
||||||
|
#
|
||||||
|
full_df.to_csv(sys.stdout, index=False)
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
122
client/qldbtools/bin/mc-db-unique
Executable file
122
client/qldbtools/bin/mc-db-unique
Executable file
@@ -0,0 +1,122 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
""" Read a table of CodeQL DB information and produce a table with unique entries
|
||||||
|
adding the Cumulative ID (CID) column.
|
||||||
|
|
||||||
|
To make this happen:
|
||||||
|
- Group entries by (owner,name,CID),
|
||||||
|
sort each group by creationTime,
|
||||||
|
and keep only the top (newest) element.
|
||||||
|
|
||||||
|
- Drop rows that don't have the
|
||||||
|
| cliVersion |
|
||||||
|
| creationTime |
|
||||||
|
| language |
|
||||||
|
| sha |
|
||||||
|
columns. There are very few (16 out of 6000 on recent tests) and their DBs
|
||||||
|
are quesionable.
|
||||||
|
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
from argparse import Namespace
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from pandas import DataFrame, Series
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Configure logger
|
||||||
|
#
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||||
|
# Overwrite log level set by minio
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
root_logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Process command line
|
||||||
|
#
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description=""" Read a table of CodeQL DB information,
|
||||||
|
narrow to <language>,
|
||||||
|
group entries by (owner,name), sort each group by
|
||||||
|
creationTime and keep only the top (newest) element.
|
||||||
|
""")
|
||||||
|
parser.add_argument('language', type=str,
|
||||||
|
help='The language to be analyzed.')
|
||||||
|
|
||||||
|
args: Namespace = parser.parse_args()
|
||||||
|
#
|
||||||
|
#* Collect the information and select subset
|
||||||
|
#
|
||||||
|
import pandas as pd
|
||||||
|
import sys
|
||||||
|
import qldbtools.utils as utils
|
||||||
|
|
||||||
|
df2: DataFrame = pd.read_csv(sys.stdin)
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Add single uniqueness field -- CID (Cumulative ID)
|
||||||
|
#
|
||||||
|
df2['CID'] = df2.apply(lambda row:
|
||||||
|
utils.cid_hash((
|
||||||
|
row['cliVersion'],
|
||||||
|
row['creationTime'],
|
||||||
|
row['language'],
|
||||||
|
row['sha'],
|
||||||
|
)), axis=1)
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Re-order the dataframe columns by importance
|
||||||
|
# - Much of the data
|
||||||
|
# 1. Is only conditionally present
|
||||||
|
# 2. Is extra info, not for the DB proper
|
||||||
|
# 3. May have various names
|
||||||
|
#
|
||||||
|
# - The essential columns are
|
||||||
|
# | owner |
|
||||||
|
# | name |
|
||||||
|
# | language |
|
||||||
|
# | size |
|
||||||
|
# | cliVersion |
|
||||||
|
# | creationTime |
|
||||||
|
# | sha |
|
||||||
|
# | baselineLinesOfCode |
|
||||||
|
# | path |
|
||||||
|
#
|
||||||
|
# - The rest are useful; put them last
|
||||||
|
# | db_lang |
|
||||||
|
# | db_lang_displayName |
|
||||||
|
# | db_lang_file_count |
|
||||||
|
# | db_lang_linesOfCode |
|
||||||
|
# | left_index |
|
||||||
|
# | ctime |
|
||||||
|
# | primaryLanguage |
|
||||||
|
# | finalised |
|
||||||
|
|
||||||
|
df3: DataFrame = df2.reindex( columns=['owner', 'name', 'cliVersion', 'creationTime',
|
||||||
|
'language', 'sha','CID',
|
||||||
|
'baselineLinesOfCode', 'path', 'db_lang',
|
||||||
|
'db_lang_displayName', 'db_lang_file_count',
|
||||||
|
'db_lang_linesOfCode', 'ctime',
|
||||||
|
'primaryLanguage', 'finalised', 'left_index',
|
||||||
|
'size'])
|
||||||
|
|
||||||
|
# Identify rows missing specific entries
|
||||||
|
rows = ( df3['cliVersion'].isna() |
|
||||||
|
df3['creationTime'].isna() |
|
||||||
|
df3['language'].isna() |
|
||||||
|
df3['sha'].isna() )
|
||||||
|
df4: DataFrame = df3[~rows]
|
||||||
|
|
||||||
|
# Limit to one language
|
||||||
|
df5 = df4[df4['language'] == args.language]
|
||||||
|
|
||||||
|
# Sort and group
|
||||||
|
df_sorted: DataFrame = df5.sort_values(by=['owner', 'name', 'CID', 'creationTime'])
|
||||||
|
df_unique: DataFrame = df_sorted.groupby(['owner', 'name', 'CID']).first().reset_index()
|
||||||
|
|
||||||
|
# Write output
|
||||||
|
df_unique.to_csv(sys.stdout, index=False)
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
35
client/qldbtools/bin/mc-db-view-info
Executable file
35
client/qldbtools/bin/mc-db-view-info
Executable file
@@ -0,0 +1,35 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
""" Read a table of CodeQL DB information and display it using pandasui
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
#
|
||||||
|
#* Configure logger
|
||||||
|
#
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||||
|
# Overwrite log level set by minio
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
root_logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Process command line
|
||||||
|
#
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Read a table of CodeQL DB information and display it using pandasui")
|
||||||
|
args = parser.parse_args()
|
||||||
|
#
|
||||||
|
#* Collect the information display
|
||||||
|
#
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
df = pd.read_csv(sys.stdin)
|
||||||
|
|
||||||
|
import os
|
||||||
|
os.environ['APPDATA'] = "needed-for-pandasgui"
|
||||||
|
from pandasgui import show
|
||||||
|
show(df)
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
120
client/qldbtools/bin/mc-hepc-init
Executable file
120
client/qldbtools/bin/mc-hepc-init
Executable file
@@ -0,0 +1,120 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import json
|
||||||
|
import hashlib
|
||||||
|
import yaml
|
||||||
|
import sys
|
||||||
|
from plumbum import cli, local
|
||||||
|
from plumbum.cmd import find, mkdir, ln, rm, mktemp, unzip, date, env
|
||||||
|
|
||||||
|
# Logging function
|
||||||
|
def log(level, message):
|
||||||
|
colors = {
|
||||||
|
"INFO": "\033[1;34m",
|
||||||
|
"WARN": "\033[1;33m",
|
||||||
|
"ERROR": "\033[1;31m",
|
||||||
|
"RESET": "\033[0m",
|
||||||
|
}
|
||||||
|
timestamp = date("+%Y-%m-%d %H:%M:%S").strip()
|
||||||
|
print(f"{colors[level]}[{timestamp}] [{level}] {message}{colors['RESET']}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Generate a CID (cumulative id)
|
||||||
|
def generate_cid(cli_version, creation_time, primary_language, sha):
|
||||||
|
hash_input = f"{cli_version} {creation_time} {primary_language} {sha}".encode()
|
||||||
|
return hashlib.sha256(hash_input).hexdigest()[:6]
|
||||||
|
|
||||||
|
# Expand environment variables in paths
|
||||||
|
def expand_path(path):
|
||||||
|
return local.env.expand(path)
|
||||||
|
|
||||||
|
# Process a single db.zip file
|
||||||
|
def process_db_file(zip_path, db_collection_dir):
|
||||||
|
temp_dir = mktemp("-d").strip()
|
||||||
|
try:
|
||||||
|
unzip("-o", "-q", zip_path, "*codeql-database.yml", "-d", temp_dir)
|
||||||
|
|
||||||
|
# Locate the YAML file regardless of its depth
|
||||||
|
yaml_files = list(local.path(temp_dir).walk(
|
||||||
|
filter=lambda p: p.name == "codeql-database.yml"))
|
||||||
|
if not yaml_files:
|
||||||
|
log("WARN", f"No codeql-database.yml found in {zip_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
yaml_path = yaml_files[0]
|
||||||
|
with yaml_path.open("r") as f:
|
||||||
|
yaml_data = yaml.safe_load(f)
|
||||||
|
|
||||||
|
primary_language = yaml_data["primaryLanguage"]
|
||||||
|
creation_metadata = yaml_data["creationMetadata"]
|
||||||
|
sha = creation_metadata["sha"]
|
||||||
|
cli_version = creation_metadata["cliVersion"]
|
||||||
|
creation_time = creation_metadata["creationTime"]
|
||||||
|
source_location_prefix = local.path(yaml_data["sourceLocationPrefix"])
|
||||||
|
repo = source_location_prefix.name
|
||||||
|
owner = source_location_prefix.parent.name
|
||||||
|
cid = generate_cid(cli_version, creation_time, primary_language, sha)
|
||||||
|
new_db_fname = f"{owner}-{repo}-ctsj-{cid}.zip"
|
||||||
|
result_url = f"http://hepc/{db_collection_dir}/{new_db_fname}"
|
||||||
|
|
||||||
|
metadata = {
|
||||||
|
"git_branch" : "HEAD",
|
||||||
|
"git_commit_id" : sha,
|
||||||
|
"git_repo" : repo,
|
||||||
|
"ingestion_datetime_utc" : str(creation_time),
|
||||||
|
"result_url" : result_url,
|
||||||
|
"tool_id" : "9f2f9642-febb-4435-9204-fb50bbd43de4",
|
||||||
|
"tool_name" : f"codeql-{primary_language}",
|
||||||
|
"tool_version" : cli_version,
|
||||||
|
"projname" : f"{owner}/{repo}",
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata_file = local.path(db_collection_dir) / "metadata.json"
|
||||||
|
with metadata_file.open("a") as f:
|
||||||
|
json.dump(metadata, f)
|
||||||
|
f.write("\n")
|
||||||
|
|
||||||
|
link_path = local.path(db_collection_dir) / new_db_fname
|
||||||
|
if not link_path.exists():
|
||||||
|
ln("-sf", zip_path, link_path)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log("WARN", f"Error processing {zip_path}: {e}")
|
||||||
|
finally:
|
||||||
|
rm("-rf", temp_dir)
|
||||||
|
|
||||||
|
# Main application class
|
||||||
|
class DBProcessor(cli.Application):
|
||||||
|
"""
|
||||||
|
DBProcessor processes db.zip files found in a starting directory,
|
||||||
|
symlinks updated names in a collection directory,
|
||||||
|
and adds a metadata information file "metadata.json" to the directory.
|
||||||
|
"""
|
||||||
|
|
||||||
|
db_collection_dir = cli.SwitchAttr(
|
||||||
|
"--db_collection_dir", str, mandatory=True, help="Specify the database collection directory"
|
||||||
|
)
|
||||||
|
starting_path = cli.SwitchAttr(
|
||||||
|
"--starting_path", str, mandatory=True, help="Specify the starting path"
|
||||||
|
)
|
||||||
|
|
||||||
|
def main(self):
|
||||||
|
db_collection_dir = expand_path(self.db_collection_dir)
|
||||||
|
starting_path = expand_path(self.starting_path)
|
||||||
|
|
||||||
|
mkdir("-p", db_collection_dir)
|
||||||
|
log("INFO", f"Searching for db.zip files in {starting_path}")
|
||||||
|
|
||||||
|
db_files = find(starting_path, "-type", "f", "-name", "db.zip",
|
||||||
|
"-size", "+0c").splitlines()
|
||||||
|
|
||||||
|
if not db_files:
|
||||||
|
log("WARN", "No db.zip files found in the specified starting path.")
|
||||||
|
return
|
||||||
|
|
||||||
|
for zip_path in db_files:
|
||||||
|
process_db_file(zip_path, db_collection_dir)
|
||||||
|
|
||||||
|
log("INFO", "Processing completed.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
DBProcessor.run()
|
||||||
89
client/qldbtools/bin/mc-hepc-serve
Executable file
89
client/qldbtools/bin/mc-hepc-serve
Executable file
@@ -0,0 +1,89 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from plumbum import cli
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
# Logging configuration
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||||
|
handlers=[logging.StreamHandler()]
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# FastAPI application
|
||||||
|
app = FastAPI()
|
||||||
|
db_dir = None # This will be set by the CLI application
|
||||||
|
|
||||||
|
@app.get("/db/{file_path:path}")
|
||||||
|
def serve_file(file_path: str):
|
||||||
|
"""
|
||||||
|
Serve files from the database directory, such as .zip files or metadata.json.
|
||||||
|
"""
|
||||||
|
logger.info(f"Requested file: {file_path}")
|
||||||
|
# Resolve symlink
|
||||||
|
resolved_path = Path(file_path).resolve(strict=True)
|
||||||
|
logger.info(f"file resolved to: {resolved_path}")
|
||||||
|
if not resolved_path.exists():
|
||||||
|
logger.error(f"File not found: {resolved_path}")
|
||||||
|
raise HTTPException(status_code=404, detail=f"{resolved_path} not found")
|
||||||
|
return FileResponse(resolved_path)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/index")
|
||||||
|
@app.get("/api/v1/latest_results/codeql-all")
|
||||||
|
def serve_metadata_json():
|
||||||
|
"""
|
||||||
|
Serve the metadata.json file for multiple routes.
|
||||||
|
"""
|
||||||
|
metadata_path = Path(db_dir) / "metadata.json"
|
||||||
|
logger.info(f"Requested metadata.json at: {metadata_path}")
|
||||||
|
if not metadata_path.exists():
|
||||||
|
logger.error("metadata.json not found.")
|
||||||
|
raise HTTPException(status_code=404, detail="metadata.json not found")
|
||||||
|
logger.info(f"Serving metadata.json from: {metadata_path}")
|
||||||
|
return FileResponse(metadata_path)
|
||||||
|
|
||||||
|
@app.middleware("http")
|
||||||
|
async def log_request(request, call_next):
|
||||||
|
logger.info(f"Incoming request: {request.method} {request.url}")
|
||||||
|
response = await call_next(request)
|
||||||
|
return response
|
||||||
|
|
||||||
|
class DBService(cli.Application):
|
||||||
|
"""
|
||||||
|
DBService serves:
|
||||||
|
1. CodeQL database .zip files symlinked in the --codeql-db-dir
|
||||||
|
2. Metadata for those zip files, contained in metadata.json in the same
|
||||||
|
directory.
|
||||||
|
The HTTP endpoints are:
|
||||||
|
1. /db/{filename}
|
||||||
|
2. /index
|
||||||
|
3. /api/v1/latest_results/codeql-all
|
||||||
|
"""
|
||||||
|
|
||||||
|
codeql_db_dir = cli.SwitchAttr("--codeql-db-dir", str, mandatory=True,
|
||||||
|
help="Directory containing CodeQL database files")
|
||||||
|
host = cli.SwitchAttr("--host", str, default="127.0.0.1",
|
||||||
|
help="Host address for the HTTP server")
|
||||||
|
port = cli.SwitchAttr("--port", int, default=8080, help="Port for the HTTP server")
|
||||||
|
|
||||||
|
def main(self):
|
||||||
|
global db_dir
|
||||||
|
db_dir = Path(self.codeql_db_dir)
|
||||||
|
if not db_dir.is_dir():
|
||||||
|
logger.error(f"Invalid directory: {db_dir}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
logger.info(f"Starting server at {self.host}:{self.port}")
|
||||||
|
logger.info(f"Serving files from directory: {db_dir}")
|
||||||
|
|
||||||
|
# Run the FastAPI server using Uvicorn
|
||||||
|
uvicorn.run(app, host=self.host, port=self.port)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
DBService.run()
|
||||||
67
client/qldbtools/bin/mc-rows-from-mrva-list
Executable file
67
client/qldbtools/bin/mc-rows-from-mrva-list
Executable file
@@ -0,0 +1,67 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""
|
||||||
|
Script to list full details for a mrva-list file
|
||||||
|
|
||||||
|
1. reads files containing
|
||||||
|
{
|
||||||
|
"mirva-list": [
|
||||||
|
"NLPchina/elasticsearch-sqlctsj168cc4",
|
||||||
|
"LMAX-Exchange/disruptorctsj3e75ec",
|
||||||
|
"justauth/JustAuthctsj8a6177",
|
||||||
|
"FasterXML/jackson-modules-basectsj2fe248",
|
||||||
|
"ionic-team/capacitor-pluginsctsj38d457",
|
||||||
|
"PaddlePaddle/PaddleOCRctsj60e555",
|
||||||
|
"elastic/apm-agent-pythonctsj21dc64",
|
||||||
|
"flipkart-incubator/zjsonpatchctsjc4db35",
|
||||||
|
"stephane/libmodbusctsj54237e",
|
||||||
|
"wso2/carbon-kernelctsj5a8a6e",
|
||||||
|
"apache/servicecomb-packctsj4d98f5"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
2. reads a pandas dataframe stored in a csv file
|
||||||
|
3. selects all rows from 2. that
|
||||||
|
- contain the 'owner' column matching the string before the slash from 1. and
|
||||||
|
- the 'name' column matching the string between the slash and the marker
|
||||||
|
'ctsj' and
|
||||||
|
- the 'CID' column matching the string after the marker 'ctsj'
|
||||||
|
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Process command line
|
||||||
|
#
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="""Script to list full details for a mrva-list file""")
|
||||||
|
parser.add_argument('mrva_list', type=str,
|
||||||
|
help='The JSON file containing the mrva-list')
|
||||||
|
parser.add_argument('info_csv', type=str,
|
||||||
|
help='The CSV file containing the full information')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
#* Step 1: Read the JSON file containing the "mirva-list"
|
||||||
|
with open(args.mrva_list, 'r') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
# Extract and parse the "mirva-list"
|
||||||
|
mirva_list = data['mirva-list']
|
||||||
|
parsed_mirva_list = []
|
||||||
|
for item in mirva_list:
|
||||||
|
owner_name = item.split('/')[0]
|
||||||
|
repo_name = item.split('/')[1].split('ctsj')[0]
|
||||||
|
cid = item.split('/')[1].split('ctsj')[1]
|
||||||
|
parsed_mirva_list.append((owner_name, repo_name, cid))
|
||||||
|
|
||||||
|
#* Step 2: Read the CSV file into a pandas dataframe
|
||||||
|
import pandas as pd
|
||||||
|
df = pd.read_csv(args.info_csv)
|
||||||
|
|
||||||
|
#* Step 3: Filter the dataframe based on the parsed "mirva-list"
|
||||||
|
filtered_df = df[
|
||||||
|
df.apply(lambda row:
|
||||||
|
(row['owner'], row['name'], row['CID']) in parsed_mirva_list, axis=1)]
|
||||||
|
|
||||||
|
# Optionally, you can save the filtered dataframe to a new CSV file
|
||||||
|
filtered_df.to_csv(sys.stdout, index=False)
|
||||||
1021
client/qldbtools/gfm.css
Normal file
1021
client/qldbtools/gfm.css
Normal file
File diff suppressed because it is too large
Load Diff
138
client/qldbtools/pyproject.toml
Normal file
138
client/qldbtools/pyproject.toml
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
[project]
|
||||||
|
name = "qldbtools"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "A Python package for selecting sets of CodeQL databases to work on"
|
||||||
|
authors = [
|
||||||
|
{name = "Michael Hohn", email = "hohn@github.com"}
|
||||||
|
]
|
||||||
|
readme = {file = "README.org", content-type = "text/plain"}
|
||||||
|
requires-python = ">=3.11"
|
||||||
|
dependencies = [
|
||||||
|
"annotated-types>=0.7.0",
|
||||||
|
"anyio>=4.4.0",
|
||||||
|
"appnope>=0.1.4",
|
||||||
|
"argon2-cffi>=23.1.0",
|
||||||
|
"argon2-cffi-bindings>=21.2.0",
|
||||||
|
"arrow>=1.3.0",
|
||||||
|
"asttokens>=2.4.1",
|
||||||
|
"async-lru>=2.0.4",
|
||||||
|
"attrs>=24.2.0",
|
||||||
|
"babel>=2.16.0",
|
||||||
|
"beautifulsoup4>=4.12.3",
|
||||||
|
"bleach>=6.1.0",
|
||||||
|
"blinker>=1.9.0",
|
||||||
|
"certifi>=2024.7.4",
|
||||||
|
"cffi>=1.17.0",
|
||||||
|
"charset-normalizer>=3.3.2",
|
||||||
|
"click>=8.1.7",
|
||||||
|
"comm>=0.2.2",
|
||||||
|
"debugpy>=1.8.5",
|
||||||
|
"decorator>=5.1.1",
|
||||||
|
"defusedxml>=0.7.1",
|
||||||
|
"executing>=2.0.1",
|
||||||
|
"fastapi>=0.115.5",
|
||||||
|
"fastjsonschema>=2.20.0",
|
||||||
|
"flask>=3.1.0",
|
||||||
|
"fqdn>=1.5.1",
|
||||||
|
"h11>=0.14.0",
|
||||||
|
"httpcore>=1.0.5",
|
||||||
|
"httpx>=0.27.0",
|
||||||
|
"idna>=3.7",
|
||||||
|
"ipykernel>=6.29.5",
|
||||||
|
"ipython>=8.26.0",
|
||||||
|
"isoduration>=20.11.0",
|
||||||
|
"itsdangerous>=2.2.0",
|
||||||
|
"jedi>=0.19.1",
|
||||||
|
"jinja2>=3.1.4",
|
||||||
|
"json5>=0.9.25",
|
||||||
|
"jsonpointer>=3.0.0",
|
||||||
|
"jsonschema>=4.23.0",
|
||||||
|
"jsonschema-specifications>=2023.12.1",
|
||||||
|
"jupyter-events>=0.10.0",
|
||||||
|
"jupyter-lsp>=2.2.5",
|
||||||
|
"jupyter-client>=8.6.2",
|
||||||
|
"jupyter-core>=5.7.2",
|
||||||
|
"jupyter-server>=2.14.2",
|
||||||
|
"jupyter-server-terminals>=0.5.3",
|
||||||
|
"jupyterlab>=4.2.4",
|
||||||
|
"jupyterlab-pygments>=0.3.0",
|
||||||
|
"jupyterlab-server>=2.27.3",
|
||||||
|
"lckr-jupyterlab-variableinspector",
|
||||||
|
"markupsafe>=2.1.5",
|
||||||
|
"matplotlib-inline>=0.1.7",
|
||||||
|
"minio==7.2.8",
|
||||||
|
"mistune>=3.0.2",
|
||||||
|
"nbclient>=0.10.0",
|
||||||
|
"nbconvert>=7.16.4",
|
||||||
|
"nbformat>=5.10.4",
|
||||||
|
"nest-asyncio>=1.6.0",
|
||||||
|
"notebook-shim>=0.2.4",
|
||||||
|
"numpy>=2.1.0",
|
||||||
|
"overrides>=7.7.0",
|
||||||
|
"packaging>=24.1",
|
||||||
|
"pandas>=2.2.2",
|
||||||
|
"pandocfilters>=1.5.1",
|
||||||
|
"parso>=0.8.4",
|
||||||
|
"pexpect>=4.9.0",
|
||||||
|
"platformdirs>=4.2.2",
|
||||||
|
"plumbum>=1.9.0",
|
||||||
|
"prometheus-client>=0.20.0",
|
||||||
|
"prompt-toolkit>=3.0.47",
|
||||||
|
"psutil>=6.0.0",
|
||||||
|
"ptyprocess>=0.7.0",
|
||||||
|
"pure-eval>=0.2.3",
|
||||||
|
"pycparser>=2.22",
|
||||||
|
"pycryptodome>=3.20.0",
|
||||||
|
"pydantic>=2.10.2",
|
||||||
|
"pydantic-core>=2.27.1",
|
||||||
|
"pygments>=2.18.0",
|
||||||
|
"python-dateutil>=2.9.0.post0",
|
||||||
|
"python-json-logger>=2.0.7",
|
||||||
|
"pytz>=2024.1",
|
||||||
|
"pyyaml>=6.0.2",
|
||||||
|
"pyzmq>=26.1.1",
|
||||||
|
"referencing>=0.35.1",
|
||||||
|
"requests>=2.32.3",
|
||||||
|
"rfc3339-validator>=0.1.4",
|
||||||
|
"rfc3986-validator>=0.1.1",
|
||||||
|
"rpds-py>=0.20.0",
|
||||||
|
"send2trash>=1.8.3",
|
||||||
|
"six>=1.16.0",
|
||||||
|
"sniffio>=1.3.1",
|
||||||
|
"soupsieve>=2.6",
|
||||||
|
"stack-data>=0.6.3",
|
||||||
|
"starlette>=0.41.3",
|
||||||
|
"terminado>=0.18.1",
|
||||||
|
"tinycss2>=1.3.0",
|
||||||
|
"tornado>=6.4.1",
|
||||||
|
"traitlets>=5.14.3",
|
||||||
|
"types-python-dateutil>=2.9.0.20240821",
|
||||||
|
"typing-extensions>=4.12.2",
|
||||||
|
"tzdata>=2024.1",
|
||||||
|
"uri-template>=1.3.0",
|
||||||
|
"urllib3>=2.2.2",
|
||||||
|
"uvicorn>=0.32.1",
|
||||||
|
"wcwidth>=0.2.13",
|
||||||
|
"webcolors>=24.8.0",
|
||||||
|
"webencodings>=0.5.1",
|
||||||
|
"websocket-client>=1.8.0",
|
||||||
|
"werkzeug>=3.1.3",
|
||||||
|
]
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=75.5.0", "wheel"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[tool.setuptools]
|
||||||
|
packages = ["qldbtools"]
|
||||||
|
script-files = [
|
||||||
|
"bin/mc-db-generate-selection",
|
||||||
|
"bin/mc-db-initial-info",
|
||||||
|
"bin/mc-db-populate-minio",
|
||||||
|
"bin/mc-db-refine-info",
|
||||||
|
"bin/mc-db-unique",
|
||||||
|
"bin/mc-db-view-info",
|
||||||
|
"bin/mc-hepc-init",
|
||||||
|
"bin/mc-hepc-serve",
|
||||||
|
"bin/mc-rows-from-mrva-list",
|
||||||
|
]
|
||||||
11
client/qldbtools/qldbtools.code-workspace
Normal file
11
client/qldbtools/qldbtools.code-workspace
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"folders": [
|
||||||
|
{
|
||||||
|
"path": "."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"settings": {
|
||||||
|
"git.ignoreLimitWarning": true,
|
||||||
|
"makefile.configureOnOpen": false
|
||||||
|
}
|
||||||
|
}
|
||||||
2
client/qldbtools/qldbtools/__init__.py
Normal file
2
client/qldbtools/qldbtools/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
from . import utils
|
||||||
|
|
||||||
205
client/qldbtools/qldbtools/utils.py
Normal file
205
client/qldbtools/qldbtools/utils.py
Normal file
@@ -0,0 +1,205 @@
|
|||||||
|
""" This module supports the selection of CodeQL databases based on various
|
||||||
|
criteria.
|
||||||
|
"""
|
||||||
|
#* Imports
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import time
|
||||||
|
import yaml
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
from pandas import DataFrame
|
||||||
|
|
||||||
|
#* Setup
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.DEBUG,
|
||||||
|
format='%(asctime)s [%(levelname)s] %(message)s',
|
||||||
|
datefmt='%Y-%m-%d %H:%M:%S'
|
||||||
|
)
|
||||||
|
|
||||||
|
#* Utility functions
|
||||||
|
def log_and_raise(message):
|
||||||
|
logging.error(message)
|
||||||
|
raise Exception(message)
|
||||||
|
|
||||||
|
def log_and_raise_e(message, exception):
|
||||||
|
logging.error(message)
|
||||||
|
raise exception(message)
|
||||||
|
|
||||||
|
def traverse_tree(root: str) -> Path:
|
||||||
|
root_path = Path(os.path.expanduser(root))
|
||||||
|
if not root_path.exists() or not root_path.is_dir():
|
||||||
|
log_and_raise(f"The specified root path '{root}' does not exist or "
|
||||||
|
"is not a directory.")
|
||||||
|
for path in root_path.rglob('*'):
|
||||||
|
if path.is_file():
|
||||||
|
yield path
|
||||||
|
elif path.is_dir():
|
||||||
|
pass
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DBInfo:
|
||||||
|
ctime : str = '2024-05-13T12:04:01.593586'
|
||||||
|
language : str = 'cpp'
|
||||||
|
name : str = 'nanobind'
|
||||||
|
owner : str = 'wjakob'
|
||||||
|
path : Path = Path('/Users/.../db.zip')
|
||||||
|
size : int = 63083064
|
||||||
|
|
||||||
|
|
||||||
|
def collect_dbs(db_base: str) -> DBInfo:
|
||||||
|
for path in traverse_tree(db_base):
|
||||||
|
if path.name == "db.zip":
|
||||||
|
# For the current repository, we have
|
||||||
|
# In [292]: len(path.parts)
|
||||||
|
# Out[292]: 14
|
||||||
|
# and can work from the end to get relevant info from the file path.
|
||||||
|
db = DBInfo()
|
||||||
|
(*_, db.owner, db.name, _, _, _, db.language, _) = path.parts
|
||||||
|
db.path = path
|
||||||
|
s = path.stat()
|
||||||
|
db.size = s.st_size
|
||||||
|
# db.ctime_raw = s.st_ctime
|
||||||
|
# db.ctime = time.ctime(s.st_ctime)
|
||||||
|
db.ctime = datetime.datetime.fromtimestamp(s.st_ctime).isoformat()
|
||||||
|
yield db
|
||||||
|
|
||||||
|
|
||||||
|
def extract_metadata(zipfile_path: str) -> tuple[object,object]:
|
||||||
|
"""
|
||||||
|
extract_metadata(zipfile)
|
||||||
|
|
||||||
|
Unzip zipfile into memory and return the contents of the files
|
||||||
|
codeql-database.yml and baseline-info.json that it contains in a tuple
|
||||||
|
"""
|
||||||
|
codeql_content = None
|
||||||
|
meta_content = None
|
||||||
|
try:
|
||||||
|
with zipfile.ZipFile(zipfile_path, 'r') as z:
|
||||||
|
for file_info in z.infolist():
|
||||||
|
# Filenames seen
|
||||||
|
# java/codeql-database.yml
|
||||||
|
# codeql_db/codeql-database.yml
|
||||||
|
if file_info.filename.endswith('codeql-database.yml'):
|
||||||
|
with z.open(file_info) as f:
|
||||||
|
codeql_content = yaml.safe_load(f)
|
||||||
|
# And
|
||||||
|
# java/baseline-info.json
|
||||||
|
# codeql_db/baseline-info.json
|
||||||
|
elif file_info.filename.endswith('baseline-info.json'):
|
||||||
|
with z.open(file_info) as f:
|
||||||
|
meta_content = json.load(f)
|
||||||
|
except zipfile.BadZipFile:
|
||||||
|
log_and_raise_e(f"Not a zipfile: '{zipfile_path}'", ExtractNotZipfile)
|
||||||
|
# The baseline-info is only available in more recent CodeQL versions
|
||||||
|
if not meta_content:
|
||||||
|
meta_content = {'languages':
|
||||||
|
{'no-language': {'displayName': 'no-language',
|
||||||
|
'files': [],
|
||||||
|
'linesOfCode': -1,
|
||||||
|
'name': 'nolang'},
|
||||||
|
}}
|
||||||
|
|
||||||
|
if not codeql_content:
|
||||||
|
log_and_raise_e(f"Not a zipfile: '{zipfile_path}'", ExtractNoCQLDB)
|
||||||
|
return codeql_content, meta_content
|
||||||
|
|
||||||
|
class ExtractNotZipfile(Exception): pass
|
||||||
|
class ExtractNoCQLDB(Exception): pass
|
||||||
|
|
||||||
|
def metadata_details(left_index: int, codeql_content: object, meta_content: object) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
metadata_details(codeql_content, meta_content)
|
||||||
|
|
||||||
|
Extract the details from metadata that will be used in DB selection and return a
|
||||||
|
dataframe with the information. Example, cropped to fit:
|
||||||
|
|
||||||
|
full_df.T
|
||||||
|
Out[535]:
|
||||||
|
0 1
|
||||||
|
left_index 0 0
|
||||||
|
baselineLinesOfCode 17990 17990
|
||||||
|
primaryLanguage cpp cpp
|
||||||
|
sha 288920efc079766f4 282c20efc079766f4
|
||||||
|
cliVersion 2.17.0 2.17.0
|
||||||
|
creationTime .325253+00:00 51.325253+00:00
|
||||||
|
finalised True True
|
||||||
|
db_lang cpp python
|
||||||
|
db_lang_displayName C/C++ Python
|
||||||
|
db_lang_file_count 102 27
|
||||||
|
db_lang_linesOfCode 17990 5586
|
||||||
|
"""
|
||||||
|
cqlc, metac = codeql_content, meta_content
|
||||||
|
d = {'left_index': left_index,
|
||||||
|
'baselineLinesOfCode': cqlc['baselineLinesOfCode'],
|
||||||
|
'primaryLanguage': cqlc['primaryLanguage'],
|
||||||
|
'sha': cqlc['creationMetadata'].get('sha', 'abcde0123'),
|
||||||
|
'cliVersion': cqlc['creationMetadata']['cliVersion'],
|
||||||
|
'creationTime': cqlc['creationMetadata']['creationTime'],
|
||||||
|
'finalised': cqlc.get('finalised', pd.NA),
|
||||||
|
}
|
||||||
|
f = pd.DataFrame(d, index=[0])
|
||||||
|
joiners: list[dict[str, int | Any]] = []
|
||||||
|
if not ('languages' in metac):
|
||||||
|
log_and_raise_e("Missing 'languages' in metadata", DetailsMissing)
|
||||||
|
for lang, lang_cont in metac['languages'].items():
|
||||||
|
d1: dict[str, int | Any] = { 'left_index' : left_index,
|
||||||
|
'db_lang': lang }
|
||||||
|
for prop, val in lang_cont.items():
|
||||||
|
if prop == 'files':
|
||||||
|
d1['db_lang_file_count'] = len(val)
|
||||||
|
elif prop == 'linesOfCode':
|
||||||
|
d1['db_lang_linesOfCode'] = val
|
||||||
|
elif prop == 'displayName':
|
||||||
|
d1['db_lang_displayName'] = val
|
||||||
|
joiners.append(d1)
|
||||||
|
fj: DataFrame = pd.DataFrame(joiners)
|
||||||
|
full_df: DataFrame = pd.merge(f, fj, on='left_index', how='outer')
|
||||||
|
return full_df
|
||||||
|
|
||||||
|
class DetailsMissing(Exception): pass
|
||||||
|
|
||||||
|
from hashlib import blake2b
|
||||||
|
|
||||||
|
def cid_hash(row_tuple: tuple):
|
||||||
|
"""
|
||||||
|
cid_hash(row_tuple)
|
||||||
|
Take a bytes object and return hash as hex string
|
||||||
|
"""
|
||||||
|
h = blake2b(digest_size = 3)
|
||||||
|
h.update(str(row_tuple).encode())
|
||||||
|
# return int.from_bytes(h.digest(), byteorder='big')
|
||||||
|
return h.hexdigest()
|
||||||
|
|
||||||
|
def form_db_bucket_name(owner, name, CID):
|
||||||
|
"""
|
||||||
|
form_db_bucket_name(owner, name, CID)
|
||||||
|
Return the name to use in minio storage; this function is trivial and used to
|
||||||
|
enforce consistent naming.
|
||||||
|
|
||||||
|
The 'ctsj' prefix is a random, unique key to identify the information.
|
||||||
|
"""
|
||||||
|
return f'{owner}${name}ctsj{CID}.zip'
|
||||||
|
|
||||||
|
def form_db_req_name(owner: str, name: str, CID: str) -> str:
|
||||||
|
"""
|
||||||
|
form_db_req_name(owner, name, CID)
|
||||||
|
Return the name to use in mrva requests; this function is trivial and used to
|
||||||
|
enforce consistent naming.
|
||||||
|
|
||||||
|
The 'ctsj' prefix is a random, unique key to identify the information.
|
||||||
|
"""
|
||||||
|
return f'{owner}/{name}ctsj{CID}'
|
||||||
|
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
109
client/qldbtools/requirements.txt
Normal file
109
client/qldbtools/requirements.txt
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
annotated-types==0.7.0
|
||||||
|
anyio==4.4.0
|
||||||
|
appnope==0.1.4
|
||||||
|
argon2-cffi==23.1.0
|
||||||
|
argon2-cffi-bindings==21.2.0
|
||||||
|
arrow==1.3.0
|
||||||
|
asttokens==2.4.1
|
||||||
|
async-lru==2.0.4
|
||||||
|
attrs==24.2.0
|
||||||
|
babel==2.16.0
|
||||||
|
beautifulsoup4==4.12.3
|
||||||
|
bleach==6.1.0
|
||||||
|
blinker==1.9.0
|
||||||
|
certifi==2024.7.4
|
||||||
|
cffi==1.17.0
|
||||||
|
charset-normalizer==3.3.2
|
||||||
|
click==8.1.7
|
||||||
|
comm==0.2.2
|
||||||
|
debugpy==1.8.5
|
||||||
|
decorator==5.1.1
|
||||||
|
defusedxml==0.7.1
|
||||||
|
executing==2.0.1
|
||||||
|
fastapi==0.115.5
|
||||||
|
fastjsonschema==2.20.0
|
||||||
|
Flask==3.1.0
|
||||||
|
fqdn==1.5.1
|
||||||
|
h11==0.14.0
|
||||||
|
httpcore==1.0.5
|
||||||
|
httpx==0.27.0
|
||||||
|
idna==3.7
|
||||||
|
ipykernel==6.29.5
|
||||||
|
ipython==8.26.0
|
||||||
|
isoduration==20.11.0
|
||||||
|
itsdangerous==2.2.0
|
||||||
|
jedi==0.19.1
|
||||||
|
Jinja2==3.1.4
|
||||||
|
json5==0.9.25
|
||||||
|
jsonpointer==3.0.0
|
||||||
|
jsonschema==4.23.0
|
||||||
|
jsonschema-specifications==2023.12.1
|
||||||
|
jupyter-events==0.10.0
|
||||||
|
jupyter-lsp==2.2.5
|
||||||
|
jupyter_client==8.6.2
|
||||||
|
jupyter_core==5.7.2
|
||||||
|
jupyter_server==2.14.2
|
||||||
|
jupyter_server_terminals==0.5.3
|
||||||
|
jupyterlab==4.2.4
|
||||||
|
jupyterlab_pygments==0.3.0
|
||||||
|
jupyterlab_server==2.27.3
|
||||||
|
MarkupSafe==2.1.5
|
||||||
|
matplotlib-inline==0.1.7
|
||||||
|
minio==7.2.8
|
||||||
|
mistune==3.0.2
|
||||||
|
nbclient==0.10.0
|
||||||
|
nbconvert==7.16.4
|
||||||
|
nbformat==5.10.4
|
||||||
|
nest-asyncio==1.6.0
|
||||||
|
notebook_shim==0.2.4
|
||||||
|
numpy==2.1.0
|
||||||
|
overrides==7.7.0
|
||||||
|
packaging==24.1
|
||||||
|
pandas==2.2.2
|
||||||
|
pandocfilters==1.5.1
|
||||||
|
parso==0.8.4
|
||||||
|
pexpect==4.9.0
|
||||||
|
platformdirs==4.2.2
|
||||||
|
plumbum==1.9.0
|
||||||
|
prometheus_client==0.20.0
|
||||||
|
prompt_toolkit==3.0.47
|
||||||
|
psutil==6.0.0
|
||||||
|
ptyprocess==0.7.0
|
||||||
|
pure_eval==0.2.3
|
||||||
|
pycparser==2.22
|
||||||
|
pycryptodome==3.20.0
|
||||||
|
pydantic==2.10.2
|
||||||
|
pydantic_core==2.27.1
|
||||||
|
Pygments==2.18.0
|
||||||
|
python-dateutil==2.9.0.post0
|
||||||
|
python-json-logger==2.0.7
|
||||||
|
pytz==2024.1
|
||||||
|
PyYAML==6.0.2
|
||||||
|
pyzmq==26.1.1
|
||||||
|
referencing==0.35.1
|
||||||
|
requests==2.32.3
|
||||||
|
rfc3339-validator==0.1.4
|
||||||
|
rfc3986-validator==0.1.1
|
||||||
|
rpds-py==0.20.0
|
||||||
|
Send2Trash==1.8.3
|
||||||
|
setuptools==75.5.0
|
||||||
|
six==1.16.0
|
||||||
|
sniffio==1.3.1
|
||||||
|
soupsieve==2.6
|
||||||
|
stack-data==0.6.3
|
||||||
|
starlette==0.41.3
|
||||||
|
terminado==0.18.1
|
||||||
|
tinycss2==1.3.0
|
||||||
|
tornado==6.4.1
|
||||||
|
traitlets==5.14.3
|
||||||
|
types-python-dateutil==2.9.0.20240821
|
||||||
|
typing_extensions==4.12.2
|
||||||
|
tzdata==2024.1
|
||||||
|
uri-template==1.3.0
|
||||||
|
urllib3==2.2.2
|
||||||
|
uvicorn==0.32.1
|
||||||
|
wcwidth==0.2.13
|
||||||
|
webcolors==24.8.0
|
||||||
|
webencodings==0.5.1
|
||||||
|
websocket-client==1.8.0
|
||||||
|
Werkzeug==3.1.3
|
||||||
61
client/qldbtools/session/db-generate-selection.py
Normal file
61
client/qldbtools/session/db-generate-selection.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
""" Read a table of CodeQL DB information
|
||||||
|
and generate the selection files for
|
||||||
|
1. the VS Code CodeQL plugin
|
||||||
|
2. the gh-mrva command-line client
|
||||||
|
"""
|
||||||
|
#
|
||||||
|
#* Collect the information and write files
|
||||||
|
#
|
||||||
|
import pandas as pd
|
||||||
|
import sys
|
||||||
|
import qldbtools.utils as utils
|
||||||
|
import numpy as np
|
||||||
|
import importlib
|
||||||
|
importlib.reload(utils)
|
||||||
|
|
||||||
|
df0 = pd.read_csv('scratch/db-info-3.csv')
|
||||||
|
|
||||||
|
# Use num_entries, chosen via pseudo-random numbers
|
||||||
|
df1 = df0.sample(n=3, random_state=np.random.RandomState(4242))
|
||||||
|
|
||||||
|
repos = []
|
||||||
|
for index, row in df1[['owner', 'name', 'CID', 'path']].iterrows():
|
||||||
|
owner, name, CID, path = row
|
||||||
|
repos.append(utils.form_db_req_name(owner, name, CID))
|
||||||
|
|
||||||
|
repo_list_name = "mirva-list"
|
||||||
|
vsc = {
|
||||||
|
"version": 1,
|
||||||
|
"databases": {
|
||||||
|
"variantAnalysis": {
|
||||||
|
"repositoryLists": [
|
||||||
|
{
|
||||||
|
"name": repo_list_name,
|
||||||
|
"repositories": repos,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"owners": [],
|
||||||
|
"repositories": []
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"selected": {
|
||||||
|
"kind": "variantAnalysisUserDefinedList",
|
||||||
|
"listName": repo_list_name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gh = {
|
||||||
|
repo_list_name: repos
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# write the files
|
||||||
|
import json
|
||||||
|
with open("tmp-selection-vsc.json", "w") as fc:
|
||||||
|
json.dump(vsc, fc, indent=4)
|
||||||
|
with open("tmp-selection-gh.json", "w") as fc:
|
||||||
|
json.dump(gh, fc, indent=4)
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
59
client/qldbtools/session/db-initial-info.py
Normal file
59
client/qldbtools/session/db-initial-info.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
#* Experimental work with utils.py, to be merged into it.
|
||||||
|
# The rest of this interactive script is available as cli script in
|
||||||
|
# mc-db-initial-info
|
||||||
|
from utils import *
|
||||||
|
|
||||||
|
#* Data collection
|
||||||
|
# Get the db information in list of DBInfo form
|
||||||
|
db_base = "~/work-gh/mrva/mrva-open-source-download/"
|
||||||
|
dbs = list(collect_dbs(db_base))
|
||||||
|
|
||||||
|
# Inspect:
|
||||||
|
from pprint import pprint
|
||||||
|
pprint(["len", len(dbs)])
|
||||||
|
pprint(["dbs[0]", dbs[0].__dict__])
|
||||||
|
pprint(["dbs[-1]", dbs[-1].__dict__])
|
||||||
|
#
|
||||||
|
# Get a dataframe
|
||||||
|
dbdf = pd.DataFrame([d.__dict__ for d in dbs])
|
||||||
|
#
|
||||||
|
#* Experiments with on-disk format
|
||||||
|
# Continue use of raw information in separate session.
|
||||||
|
#
|
||||||
|
# PosixPath is a problem for json and parquet
|
||||||
|
#
|
||||||
|
dbdf['path'] = dbdf['path'].astype(str)
|
||||||
|
#
|
||||||
|
dbdf.to_csv('dbdf.csv')
|
||||||
|
#
|
||||||
|
dbdf.to_csv('dbdf.csv.gz', compression='gzip', index=False)
|
||||||
|
#
|
||||||
|
dbdf.to_json('dbdf.json')
|
||||||
|
#
|
||||||
|
# dbdf.to_hdf('dbdf.h5', key='dbdf', mode='w')
|
||||||
|
#
|
||||||
|
# fast, binary
|
||||||
|
dbdf.to_parquet('dbdf.parquet')
|
||||||
|
#
|
||||||
|
# fast
|
||||||
|
import sqlite3
|
||||||
|
conn = sqlite3.connect('dbdf.db')
|
||||||
|
dbdf.to_sql('qldbs', conn, if_exists='replace', index=False)
|
||||||
|
conn.close()
|
||||||
|
#
|
||||||
|
# Sizes:
|
||||||
|
# ls -laSr dbdf.*
|
||||||
|
# -rw-r--r--@ 1 hohn staff 101390 Jul 12 14:17 dbdf.csv.gz
|
||||||
|
# -rw-r--r--@ 1 hohn staff 202712 Jul 12 14:17 dbdf.parquet
|
||||||
|
# -rw-r--r--@ 1 hohn staff 560623 Jul 12 14:17 dbdf.csv
|
||||||
|
# -rw-r--r--@ 1 hohn staff 610304 Jul 12 14:17 dbdf.db
|
||||||
|
# -rw-r--r--@ 1 hohn staff 735097 Jul 12 14:17 dbdf.json
|
||||||
|
#
|
||||||
|
# parquet has many libraries, including go: xitongsys/parquet-go
|
||||||
|
# https://parquet.apache.org/
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
65
client/qldbtools/session/db-populate-minio.py
Normal file
65
client/qldbtools/session/db-populate-minio.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
import qldbtools.utils as utils
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import sys
|
||||||
|
from minio import Minio
|
||||||
|
from minio.error import S3Error
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Collect the information and select subset
|
||||||
|
#
|
||||||
|
df = pd.read_csv('scratch/db-info-2.csv')
|
||||||
|
seed = 4242
|
||||||
|
if 0:
|
||||||
|
# Use all entries
|
||||||
|
entries = df
|
||||||
|
else:
|
||||||
|
# Use num_entries, chosen via pseudo-random numbers
|
||||||
|
entries = df.sample(n=3,
|
||||||
|
random_state=np.random.RandomState(seed))
|
||||||
|
#
|
||||||
|
#* Push the DBs
|
||||||
|
#
|
||||||
|
# Configuration
|
||||||
|
MINIO_URL = "http://localhost:9000"
|
||||||
|
MINIO_ROOT_USER = "user"
|
||||||
|
MINIO_ROOT_PASSWORD = "mmusty8432"
|
||||||
|
QL_DB_BUCKET_NAME = "qldb"
|
||||||
|
|
||||||
|
# Initialize MinIO client
|
||||||
|
client = Minio(
|
||||||
|
MINIO_URL.replace("http://", "").replace("https://", ""),
|
||||||
|
access_key=MINIO_ROOT_USER,
|
||||||
|
secret_key=MINIO_ROOT_PASSWORD,
|
||||||
|
secure=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create the bucket if it doesn't exist
|
||||||
|
try:
|
||||||
|
if not client.bucket_exists(QL_DB_BUCKET_NAME):
|
||||||
|
client.make_bucket(QL_DB_BUCKET_NAME)
|
||||||
|
else:
|
||||||
|
print(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
|
||||||
|
except S3Error as err:
|
||||||
|
print(f"Error creating bucket: {err}")
|
||||||
|
|
||||||
|
# (test) File paths and new names
|
||||||
|
files_to_upload = {
|
||||||
|
"cmd/server/codeql/dbs/google/flatbuffers/google_flatbuffers_db.zip": "google$flatbuffers.zip",
|
||||||
|
"cmd/server/codeql/dbs/psycopg/psycopg2/psycopg_psycopg2_db.zip": "psycopg$psycopg2.zip"
|
||||||
|
}
|
||||||
|
|
||||||
|
# (test) Push the files
|
||||||
|
prefix = Path('/Users/hohn/work-gh/mrva/mrvacommander')
|
||||||
|
for local_path, new_name in files_to_upload.items():
|
||||||
|
try:
|
||||||
|
client.fput_object(QL_DB_BUCKET_NAME, new_name, prefix / Path(local_path))
|
||||||
|
print(f"Uploaded {local_path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
|
||||||
|
except S3Error as err:
|
||||||
|
print(f"Error uploading file {local_path}: {err}")
|
||||||
|
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
46
client/qldbtools/session/db-post-refine-info.py
Normal file
46
client/qldbtools/session/db-post-refine-info.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
# Session around bin/mc-db-unique
|
||||||
|
import qldbtools.utils as utils
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Collect the information
|
||||||
|
#
|
||||||
|
df1 = pd.read_csv("scratch/db-info-2.csv")
|
||||||
|
|
||||||
|
# Add single uniqueness field -- CID (Cumulative ID) -- using
|
||||||
|
# - creationTime
|
||||||
|
# - sha
|
||||||
|
# - cliVersion
|
||||||
|
# - language
|
||||||
|
|
||||||
|
from hashlib import blake2b
|
||||||
|
|
||||||
|
def cid_hash(row_tuple: tuple):
|
||||||
|
"""
|
||||||
|
cid_hash(row_tuple)
|
||||||
|
Take a bytes object and return hash as hex string
|
||||||
|
"""
|
||||||
|
h = blake2b(digest_size = 3)
|
||||||
|
h.update(str(row_tuple).encode())
|
||||||
|
# return int.from_bytes(h.digest(), byteorder='big')
|
||||||
|
return h.hexdigest()
|
||||||
|
|
||||||
|
# Apply the cid_hash function to the specified columns and create the 'CID' column
|
||||||
|
df1['CID'] = df1.apply(lambda row: cid_hash( (row['creationTime'],
|
||||||
|
row['sha'],
|
||||||
|
row['cliVersion'],
|
||||||
|
row['language'])
|
||||||
|
), axis=1)
|
||||||
|
|
||||||
|
df2 = df1.reindex(columns=['owner', 'name', 'cliVersion', 'creationTime',
|
||||||
|
'language', 'sha','CID', 'baselineLinesOfCode', 'path',
|
||||||
|
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
|
||||||
|
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
|
||||||
|
'finalised', 'left_index', 'size'])
|
||||||
|
|
||||||
|
df1['cid']
|
||||||
|
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
118
client/qldbtools/session/db-refine-info.py
Normal file
118
client/qldbtools/session/db-refine-info.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
# Experimental work be merged with bin/mc-db-refine-info
|
||||||
|
from utils import *
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
#* Reload gzipped CSV file to continue work
|
||||||
|
dbdf_1 = pd.read_csv('dbdf.csv.gz', compression='gzip')
|
||||||
|
#
|
||||||
|
# (old) Consistency check:
|
||||||
|
# dbdf_1.columns == dbdf.columns
|
||||||
|
# dbmask = (dbdf_1 != dbdf)
|
||||||
|
# dbdf_1[dbmask]
|
||||||
|
# dbdf_1[dbmask].dropna(how='all')
|
||||||
|
# ctime_raw is different in places, so don't use it.
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Interact with/visualize the dataframe
|
||||||
|
# Using pandasgui -- qt
|
||||||
|
from pandasgui import show
|
||||||
|
os.environ['APPDATA'] = "needed-for-pandasgui"
|
||||||
|
show(dbdf_1)
|
||||||
|
# Using dtale -- web
|
||||||
|
import dtale
|
||||||
|
dtale.show(dbdf_1)
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Collect metadata from DB zip files
|
||||||
|
#
|
||||||
|
#** A manual sample
|
||||||
|
#
|
||||||
|
d = dbdf_1
|
||||||
|
left_index = 0
|
||||||
|
d.path[0]
|
||||||
|
cqlc, metac = extract_metadata(d.path[0])
|
||||||
|
|
||||||
|
cqlc['baselineLinesOfCode']
|
||||||
|
cqlc['primaryLanguage']
|
||||||
|
cqlc['creationMetadata']['sha']
|
||||||
|
cqlc['creationMetadata']['cliVersion']
|
||||||
|
cqlc['creationMetadata']['creationTime'].isoformat()
|
||||||
|
cqlc['finalised']
|
||||||
|
|
||||||
|
for lang, lang_cont in metac['languages'].items():
|
||||||
|
print(lang)
|
||||||
|
indent = " "
|
||||||
|
for prop, val in lang_cont.items():
|
||||||
|
if prop == 'files':
|
||||||
|
print("%sfiles count %d" % (indent, len(val)))
|
||||||
|
elif prop == 'linesOfCode':
|
||||||
|
print("%slinesOfCode %d" % (indent, val))
|
||||||
|
elif prop == 'displayName':
|
||||||
|
print("%sdisplayName %s" % (indent, val))
|
||||||
|
|
||||||
|
#** Automated for all entries
|
||||||
|
# The rest of this interactive script is available as cli script in
|
||||||
|
# mc-db-refine-info
|
||||||
|
d = dbdf_1
|
||||||
|
joiners = []
|
||||||
|
for left_index in range(0, len(d)-1):
|
||||||
|
try:
|
||||||
|
cqlc, metac = extract_metadata(d.path[left_index])
|
||||||
|
except ExtractNotZipfile:
|
||||||
|
continue
|
||||||
|
except ExtractNoCQLDB:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
detail_df = metadata_details(left_index, cqlc, metac)
|
||||||
|
except DetailsMissing:
|
||||||
|
continue
|
||||||
|
joiners.append(detail_df)
|
||||||
|
joiners_df = pd.concat(joiners, axis=0)
|
||||||
|
full_df = pd.merge(d, joiners_df, left_index=True, right_on='left_index', how='outer')
|
||||||
|
|
||||||
|
#** View the full dataframe with metadata
|
||||||
|
from pandasgui import show
|
||||||
|
os.environ['APPDATA'] = "needed-for-pandasgui"
|
||||||
|
show(full_df)
|
||||||
|
|
||||||
|
#** Re-order the dataframe columns by importance
|
||||||
|
# - Much of the data
|
||||||
|
# 1. Is only conditionally present
|
||||||
|
# 2. Is extra info, not for the DB proper
|
||||||
|
# 3. May have various names
|
||||||
|
|
||||||
|
# - The essential columns are
|
||||||
|
# | owner |
|
||||||
|
# | name |
|
||||||
|
# | language |
|
||||||
|
# | size |
|
||||||
|
# | cliVersion |
|
||||||
|
# | creationTime |
|
||||||
|
# | sha |
|
||||||
|
# | baselineLinesOfCode |
|
||||||
|
# | path |
|
||||||
|
|
||||||
|
# - The rest are useful; put them last
|
||||||
|
# | db_lang |
|
||||||
|
# | db_lang_displayName |
|
||||||
|
# | db_lang_file_count |
|
||||||
|
# | db_lang_linesOfCode |
|
||||||
|
# | left_index |
|
||||||
|
# | ctime |
|
||||||
|
# | primaryLanguage |
|
||||||
|
# | finalised |
|
||||||
|
|
||||||
|
final_df = full_df.reindex(columns=['owner', 'name', 'language', 'size', 'cliVersion',
|
||||||
|
'creationTime', 'sha', 'baselineLinesOfCode', 'path',
|
||||||
|
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
|
||||||
|
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
|
||||||
|
'finalised', 'left_index'])
|
||||||
|
|
||||||
|
final_df.to_csv('all-info-table.csv.gz', compression='gzip', index=False)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
|
#
|
||||||
41
client/qldbtools/session/db-unique-1.py
Normal file
41
client/qldbtools/session/db-unique-1.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# Experimental work for ../bin/mc-db-unique, to be merged into it.
|
||||||
|
import qldbtools.utils as utils
|
||||||
|
from pprint import pprint
|
||||||
|
import pandas as pd
|
||||||
|
# cd ../
|
||||||
|
|
||||||
|
#* Reload CSV file to continue work
|
||||||
|
df2 = df_refined = pd.read_csv('scratch/db-info-2.csv')
|
||||||
|
|
||||||
|
# Identify rows missing specific entries
|
||||||
|
rows = ( df2['cliVersion'].isna() |
|
||||||
|
df2['creationTime'].isna() |
|
||||||
|
df2['language'].isna() |
|
||||||
|
df2['sha'].isna() )
|
||||||
|
df2[rows]
|
||||||
|
df3 = df2[~rows]
|
||||||
|
df3
|
||||||
|
|
||||||
|
#* post-save work
|
||||||
|
df4 = pd.read_csv('scratch/db-info-3.csv')
|
||||||
|
|
||||||
|
# Sort and group
|
||||||
|
df_sorted = df4.sort_values(by=['owner', 'name', 'CID', 'creationTime'])
|
||||||
|
df_unique = df_sorted.groupby(['owner', 'name', 'CID']).first().reset_index()
|
||||||
|
|
||||||
|
# Find duplicates
|
||||||
|
df_dups = df_unique[df_unique['CID'].duplicated(keep=False)]
|
||||||
|
len(df_dups)
|
||||||
|
df_dups['CID']
|
||||||
|
|
||||||
|
# Set display options
|
||||||
|
pd.set_option('display.max_colwidth', None)
|
||||||
|
pd.set_option('display.max_columns', None)
|
||||||
|
pd.set_option('display.width', 140)
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
|
#
|
||||||
46
client/qldbtools/session/db-unique.py
Normal file
46
client/qldbtools/session/db-unique.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
# Session around bin/mc-db-unique
|
||||||
|
import qldbtools.utils as utils
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Collect the information
|
||||||
|
#
|
||||||
|
df1 = pd.read_csv("scratch/db-info-2.csv")
|
||||||
|
|
||||||
|
# Add single uniqueness field -- CID (Cumulative ID) -- using
|
||||||
|
# - creationTime
|
||||||
|
# - sha
|
||||||
|
# - cliVersion
|
||||||
|
# - language
|
||||||
|
|
||||||
|
from hashlib import blake2b
|
||||||
|
|
||||||
|
def cid_hash(row_tuple: tuple):
|
||||||
|
"""
|
||||||
|
cid_hash(row_tuple)
|
||||||
|
Take a bytes object and return hash as hex string
|
||||||
|
"""
|
||||||
|
h = blake2b(digest_size = 3)
|
||||||
|
h.update(str(row_tuple).encode())
|
||||||
|
# return int.from_bytes(h.digest(), byteorder='big')
|
||||||
|
return h.hexdigest()
|
||||||
|
|
||||||
|
# Apply the cid_hash function to the specified columns and create the 'CID' column
|
||||||
|
df1['CID'] = df1.apply(lambda row: cid_hash( (row['creationTime'],
|
||||||
|
row['sha'],
|
||||||
|
row['cliVersion'],
|
||||||
|
row['language'])
|
||||||
|
), axis=1)
|
||||||
|
|
||||||
|
df2 = df1.reindex(columns=['owner', 'name', 'cliVersion', 'creationTime',
|
||||||
|
'language', 'sha','CID', 'baselineLinesOfCode', 'path',
|
||||||
|
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
|
||||||
|
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
|
||||||
|
'finalised', 'left_index', 'size'])
|
||||||
|
|
||||||
|
df1['cid']
|
||||||
|
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
13
client/qldbtools/setup.py
Normal file
13
client/qldbtools/setup.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
from setuptools import setup, find_packages
|
||||||
|
import glob
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='qldbtools',
|
||||||
|
version='0.1.0',
|
||||||
|
description='A Python package for working with CodeQL databases',
|
||||||
|
author='Michael Hohn',
|
||||||
|
author_email='hohn@github.com',
|
||||||
|
packages=['qldbtools'],
|
||||||
|
install_requires=[],
|
||||||
|
scripts=glob.glob("bin/mc-*"),
|
||||||
|
)
|
||||||
2278
client/qldbtools/uv.lock
generated
Normal file
2278
client/qldbtools/uv.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,8 @@ ARG CODEQL_VERSION=latest
|
|||||||
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
|
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
|
||||||
unzip \
|
unzip \
|
||||||
curl \
|
curl \
|
||||||
ca-certificates
|
ca-certificates \
|
||||||
|
default-jdk
|
||||||
|
|
||||||
# If the version is 'latest', lsget the latest release version from GitHub, unzip the bundle into /opt, and delete the archive
|
# If the version is 'latest', lsget the latest release version from GitHub, unzip the bundle into /opt, and delete the archive
|
||||||
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
||||||
@@ -32,18 +33,19 @@ RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
|||||||
echo "Using CodeQL version $CODEQL_VERSION" && \
|
echo "Using CodeQL version $CODEQL_VERSION" && \
|
||||||
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
|
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
|
||||||
unzip /tmp/codeql.zip -d /opt && \
|
unzip /tmp/codeql.zip -d /opt && \
|
||||||
rm /tmp/codeql.zip
|
rm /tmp/codeql.zip && \
|
||||||
|
chmod -R +x /opt/codeql
|
||||||
|
|
||||||
# Set environment variables for CodeQL
|
# Set environment variables for CodeQL
|
||||||
ENV CODEQL_CLI_PATH=/opt/codeql
|
ENV CODEQL_CLI_PATH=/opt/codeql/codeql
|
||||||
|
|
||||||
# Set environment variable for CodeQL for `codeql database analyze` support on ARM
|
# Set environment variable for CodeQL for `codeql database analyze` support on ARM
|
||||||
# This env var has no functional effect on CodeQL when running on x86_64 linux
|
# This env var has no functional effect on CodeQL when running on x86_64 linux
|
||||||
ENV CODEQL_JAVA_HOME=/usr/
|
ENV CODEQL_JAVA_HOME=/usr
|
||||||
|
|
||||||
# Copy built agent binary from the builder stage
|
# Copy built agent binary from the builder stage
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY --from=builder /bin/mrva_agent ./mrva_agent
|
COPY --from=builder /bin/mrva_agent ./mrva_agent
|
||||||
|
|
||||||
# Run the agent
|
# Run the agent
|
||||||
ENTRYPOINT ["./mrva_agent"]
|
ENTRYPOINT ["./mrva_agent"]
|
||||||
|
|||||||
23
cmd/agent/Makefile
Normal file
23
cmd/agent/Makefile
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
all: mrva-agent
|
||||||
|
|
||||||
|
MAI_TARGET := mrva-agent:0.1.24
|
||||||
|
mai: mk.mrva-agent
|
||||||
|
mrva-agent: mk.mrva-agent
|
||||||
|
mk.mrva-agent:
|
||||||
|
cd ../../ && docker build -t mrva-agent:0.1.24 -f cmd/agent/Dockerfile .
|
||||||
|
touch $@
|
||||||
|
|
||||||
|
mai-serve: mai
|
||||||
|
docker run --rm -it ${MAI_TARGET} /bin/bash
|
||||||
|
|
||||||
|
clean:
|
||||||
|
-docker rmi -f ${MAI_TARGET}
|
||||||
|
-rm mrva-agent
|
||||||
|
|
||||||
|
mai-push: mk.mai-push
|
||||||
|
mk.mai-push: mai
|
||||||
|
docker tag ${MAI_TARGET} ghcr.io/hohn/${MAI_TARGET}
|
||||||
|
docker push ghcr.io/hohn/${MAI_TARGET}
|
||||||
|
touch $@
|
||||||
|
|
||||||
|
|
||||||
@@ -1,173 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"flag"
|
|
||||||
"os"
|
|
||||||
"os/signal"
|
|
||||||
"runtime"
|
|
||||||
"strconv"
|
|
||||||
"sync"
|
|
||||||
"syscall"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/elastic/go-sysinfo"
|
|
||||||
"golang.org/x/exp/slog"
|
|
||||||
|
|
||||||
"mrvacommander/pkg/agent"
|
|
||||||
"mrvacommander/pkg/queue"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
workerMemoryMB = 2048 // 2 GB
|
|
||||||
monitorIntervalSec = 10 // Monitor every 10 seconds
|
|
||||||
)
|
|
||||||
|
|
||||||
func calculateWorkers() int {
|
|
||||||
host, err := sysinfo.Host()
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("failed to get host info", "error", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
memInfo, err := host.Memory()
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("failed to get memory info", "error", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get available memory in MB
|
|
||||||
totalMemoryMB := memInfo.Available / (1024 * 1024)
|
|
||||||
|
|
||||||
// Ensure we have at least one worker
|
|
||||||
workers := int(totalMemoryMB / workerMemoryMB)
|
|
||||||
if workers < 1 {
|
|
||||||
workers = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
// Limit the number of workers to the number of CPUs
|
|
||||||
cpuCount := runtime.NumCPU()
|
|
||||||
if workers > cpuCount {
|
|
||||||
workers = max(cpuCount, 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
return workers
|
|
||||||
}
|
|
||||||
|
|
||||||
func startAndMonitorWorkers(ctx context.Context, queue queue.Queue, desiredWorkerCount int, wg *sync.WaitGroup) {
|
|
||||||
currentWorkerCount := 0
|
|
||||||
stopChans := make([]chan struct{}, 0)
|
|
||||||
|
|
||||||
if desiredWorkerCount != 0 {
|
|
||||||
slog.Info("Starting workers", slog.Int("count", desiredWorkerCount))
|
|
||||||
for i := 0; i < desiredWorkerCount; i++ {
|
|
||||||
stopChan := make(chan struct{})
|
|
||||||
stopChans = append(stopChans, stopChan)
|
|
||||||
wg.Add(1)
|
|
||||||
go agent.RunWorker(ctx, stopChan, queue, wg)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Info("Worker count not specified, managing based on available memory and CPU")
|
|
||||||
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
// signal all workers to stop
|
|
||||||
for _, stopChan := range stopChans {
|
|
||||||
close(stopChan)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
newWorkerCount := calculateWorkers()
|
|
||||||
|
|
||||||
if newWorkerCount != currentWorkerCount {
|
|
||||||
slog.Info(
|
|
||||||
"Modifying worker count",
|
|
||||||
slog.Int("current", currentWorkerCount),
|
|
||||||
slog.Int("new", newWorkerCount))
|
|
||||||
}
|
|
||||||
|
|
||||||
if newWorkerCount > currentWorkerCount {
|
|
||||||
for i := currentWorkerCount; i < newWorkerCount; i++ {
|
|
||||||
stopChan := make(chan struct{})
|
|
||||||
stopChans = append(stopChans, stopChan)
|
|
||||||
wg.Add(1)
|
|
||||||
go agent.RunWorker(ctx, stopChan, queue, wg)
|
|
||||||
}
|
|
||||||
} else if newWorkerCount < currentWorkerCount {
|
|
||||||
for i := newWorkerCount; i < currentWorkerCount; i++ {
|
|
||||||
close(stopChans[i])
|
|
||||||
}
|
|
||||||
stopChans = stopChans[:newWorkerCount]
|
|
||||||
}
|
|
||||||
currentWorkerCount = newWorkerCount
|
|
||||||
|
|
||||||
time.Sleep(monitorIntervalSec * time.Second)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
slog.Info("Starting agent")
|
|
||||||
|
|
||||||
workerCount := flag.Int("workers", 0, "number of workers")
|
|
||||||
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
requiredEnvVars := []string{
|
|
||||||
"MRVA_RABBITMQ_HOST",
|
|
||||||
"MRVA_RABBITMQ_PORT",
|
|
||||||
"MRVA_RABBITMQ_USER",
|
|
||||||
"MRVA_RABBITMQ_PASSWORD",
|
|
||||||
"CODEQL_JAVA_HOME",
|
|
||||||
"CODEQL_CLI_PATH",
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, envVar := range requiredEnvVars {
|
|
||||||
if _, ok := os.LookupEnv(envVar); !ok {
|
|
||||||
slog.Error("Missing required environment variable", "key", envVar)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rmqHost := os.Getenv("MRVA_RABBITMQ_HOST")
|
|
||||||
rmqPort := os.Getenv("MRVA_RABBITMQ_PORT")
|
|
||||||
rmqUser := os.Getenv("MRVA_RABBITMQ_USER")
|
|
||||||
rmqPass := os.Getenv("MRVA_RABBITMQ_PASSWORD")
|
|
||||||
|
|
||||||
rmqPortAsInt, err := strconv.ParseInt(rmqPort, 10, 16)
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("Failed to parse RabbitMQ port", slog.Any("error", err))
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Info("Initializing RabbitMQ queue")
|
|
||||||
|
|
||||||
rabbitMQQueue, err := queue.NewRabbitMQQueue(rmqHost, int16(rmqPortAsInt), rmqUser, rmqPass, false)
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("failed to initialize RabbitMQ", slog.Any("error", err))
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
defer rabbitMQQueue.Close()
|
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
|
||||||
|
|
||||||
go startAndMonitorWorkers(ctx, rabbitMQQueue, *workerCount, &wg)
|
|
||||||
|
|
||||||
slog.Info("Agent started")
|
|
||||||
|
|
||||||
// Gracefully exit on SIGINT/SIGTERM
|
|
||||||
sigChan := make(chan os.Signal, 1)
|
|
||||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
|
||||||
<-sigChan
|
|
||||||
slog.Info("Shutting down agent")
|
|
||||||
|
|
||||||
// TODO: fix this to gracefully terminate agent workers during jobs
|
|
||||||
cancel()
|
|
||||||
wg.Wait()
|
|
||||||
|
|
||||||
slog.Info("Agent shutdown complete")
|
|
||||||
}
|
|
||||||
@@ -1,38 +1,56 @@
|
|||||||
# Use the ubuntu 22.04 base image
|
FROM golang:1.22 AS builder
|
||||||
FROM ubuntu:24.10
|
|
||||||
|
|
||||||
# Set architecture to arm64
|
# Copy the entire project
|
||||||
ARG ARCH=arm64
|
WORKDIR /app
|
||||||
ARG AARCH=aarch64
|
COPY . .
|
||||||
|
|
||||||
# Set environment variables
|
# Download dependencies
|
||||||
|
RUN go mod download
|
||||||
|
|
||||||
|
# Set the working directory to the cmd/server subproject
|
||||||
|
WORKDIR /app/cmd/server
|
||||||
|
|
||||||
|
# Build the server
|
||||||
|
RUN go build -o /bin/mrva_server ./main.go
|
||||||
|
|
||||||
|
FROM ubuntu:24.10 as runner
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV CODEQL_VERSION=codeql-bundle-v2.17.5
|
|
||||||
ENV CODEQL_DOWNLOAD_URL=https://github.com/github/codeql-action/releases/download/${CODEQL_VERSION}/codeql-bundle-linux64.tar.gz
|
|
||||||
ENV JDK_VERSION=22.0.1
|
|
||||||
ENV JDK_DOWNLOAD_URL=https://download.oracle.com/java/21/latest/jdk-${JDK_VERSION}_linux-${AARCH}_bin.tar.gz
|
|
||||||
ENV JDK_DOWNLOAD_URL=https://download.java.net/java/GA/jdk${JDK_VERSION}/c7ec1332f7bb44aeba2eb341ae18aca4/8/GPL/openjdk-${JDK_VERSION}_linux-${AARCH}_bin.tar.gz
|
|
||||||
|
|
||||||
ENV CODEQL_JAVA_HOME=/usr/local/jdk-${JDK_VERSION}
|
# Build argument for CodeQL version, defaulting to the latest release
|
||||||
|
ARG CODEQL_VERSION=latest
|
||||||
|
|
||||||
# Install necessary tools
|
# Install packages
|
||||||
RUN apt-get update && \
|
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
|
||||||
apt-get install -y curl tar && \
|
unzip \
|
||||||
apt-get clean && \
|
curl \
|
||||||
rm -rf /var/lib/apt/lists/*
|
ca-certificates \
|
||||||
|
default-jdk
|
||||||
|
|
||||||
# Add and extract the CodeQL bundle
|
# If the version is 'latest', lsget the latest release version from GitHub, unzip the bundle into /opt, and delete the archive
|
||||||
RUN curl -L $CODEQL_DOWNLOAD_URL -o /tmp/${CODEQL_VERSION}.tar.gz && \
|
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
||||||
tar -xzf /tmp/${CODEQL_VERSION}.tar.gz -C /opt && \
|
CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
|
||||||
rm /tmp/${CODEQL_VERSION}.tar.gz
|
fi && \
|
||||||
|
echo "Using CodeQL version $CODEQL_VERSION" && \
|
||||||
|
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
|
||||||
|
unzip /tmp/codeql.zip -d /opt && \
|
||||||
|
rm /tmp/codeql.zip && \
|
||||||
|
chmod -R +x /opt/codeql
|
||||||
|
|
||||||
# Add and extract the JDK
|
# Set environment variables for CodeQL
|
||||||
RUN curl -L $JDK_DOWNLOAD_URL -o /tmp/jdk-${JDK_VERSION}.tar.gz && \
|
ENV CODEQL_CLI_PATH=/opt/codeql/codeql
|
||||||
tar -xzf /tmp/jdk-${JDK_VERSION}.tar.gz -C /usr/local && \
|
|
||||||
rm /tmp/jdk-${JDK_VERSION}.tar.gz
|
|
||||||
|
|
||||||
# Set PATH
|
# Set environment variable for CodeQL for `codeql database analyze` support on ARM
|
||||||
ENV PATH=/opt/codeql:"$PATH"
|
# This env var has no functional effect on CodeQL when running on x86_64 linux
|
||||||
|
ENV CODEQL_JAVA_HOME=/usr
|
||||||
|
|
||||||
# Prepare host mount point
|
# Set working directory to /app
|
||||||
RUN mkdir /mrva
|
|
||||||
|
# Copy built server binary from the builder stage
|
||||||
|
COPY --from=builder /bin/mrva_server ./mrva_server
|
||||||
|
|
||||||
|
# Copy the CodeQL database directory from the builder stage (for standalone mode)
|
||||||
|
COPY --from=builder /app/cmd/server/codeql ./codeql
|
||||||
|
|
||||||
|
# Run the server with the default mode set to container
|
||||||
|
ENTRYPOINT ["./mrva_server"]
|
||||||
|
CMD ["--mode=container"]
|
||||||
26
cmd/server/Makefile
Normal file
26
cmd/server/Makefile
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
all: mrva-server
|
||||||
|
|
||||||
|
MSI_TARGET := mrva-server:0.1.24
|
||||||
|
msi: mk.mrva-server
|
||||||
|
mrva-server: mk.mrva-server
|
||||||
|
mk.mrva-server:
|
||||||
|
cd ../../ && docker build -t mrva-server:0.1.24 -f cmd/server/Dockerfile .
|
||||||
|
touch $@
|
||||||
|
|
||||||
|
msi-serve: msi
|
||||||
|
docker run --rm -it ${MSI_TARGET} /bin/bash
|
||||||
|
|
||||||
|
clean:
|
||||||
|
-docker rmi -f ${MSI_TARGET}
|
||||||
|
-rm mrva-server
|
||||||
|
|
||||||
|
msi-push: mk.msi-push
|
||||||
|
mk.msi-push: mk.mrva-server
|
||||||
|
docker tag ${MSI_TARGET} ghcr.io/hohn/${MSI_TARGET}
|
||||||
|
docker push ghcr.io/hohn/${MSI_TARGET}
|
||||||
|
touch $@
|
||||||
|
|
||||||
|
msi-test:
|
||||||
|
docker pull ghcr.io/hohn/${MSI_TARGET}
|
||||||
|
docker run --rm -it --name test-mrva-server-codeql ghcr.io/hohn/${MSI_TARGET} sh
|
||||||
|
|
||||||
@@ -1,140 +0,0 @@
|
|||||||
// Copyright © 2024 github
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License").
|
|
||||||
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
"log"
|
|
||||||
"log/slog"
|
|
||||||
"os"
|
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"mrvacommander/config/mcc"
|
|
||||||
|
|
||||||
"mrvacommander/pkg/agent"
|
|
||||||
"mrvacommander/pkg/artifactstore"
|
|
||||||
"mrvacommander/pkg/qldbstore"
|
|
||||||
"mrvacommander/pkg/queue"
|
|
||||||
"mrvacommander/pkg/server"
|
|
||||||
"mrvacommander/pkg/state"
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
// Define flags
|
|
||||||
helpFlag := flag.Bool("help", false, "Display help message")
|
|
||||||
logLevel := flag.String("loglevel", "info", "Set log level: debug, info, warn, error")
|
|
||||||
mode := flag.String("mode", "standalone", "Set mode: standalone, container, cluster")
|
|
||||||
|
|
||||||
// Custom usage function for the help flag
|
|
||||||
flag.Usage = func() {
|
|
||||||
log.Printf("Usage of %s:\n", os.Args[0])
|
|
||||||
flag.PrintDefaults()
|
|
||||||
log.Println("\nExamples:")
|
|
||||||
log.Println(" go run main.go --loglevel=debug --mode=container")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse the flags
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
// Handle the help flag
|
|
||||||
if *helpFlag {
|
|
||||||
flag.Usage()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply 'loglevel' flag
|
|
||||||
switch *logLevel {
|
|
||||||
case "debug":
|
|
||||||
slog.SetLogLoggerLevel(slog.LevelDebug)
|
|
||||||
case "info":
|
|
||||||
slog.SetLogLoggerLevel(slog.LevelInfo)
|
|
||||||
case "warn":
|
|
||||||
slog.SetLogLoggerLevel(slog.LevelWarn)
|
|
||||||
case "error":
|
|
||||||
slog.SetLogLoggerLevel(slog.LevelError)
|
|
||||||
default:
|
|
||||||
log.Printf("Invalid logging verbosity level: %s", *logLevel)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read configuration
|
|
||||||
config := mcc.LoadConfig("mcconfig.toml")
|
|
||||||
|
|
||||||
// Output configuration summary
|
|
||||||
log.Printf("Help: %t\n", *helpFlag)
|
|
||||||
log.Printf("Log Level: %s\n", *logLevel)
|
|
||||||
log.Printf("Mode: %s\n", *mode)
|
|
||||||
|
|
||||||
// Apply 'mode' flag
|
|
||||||
switch *mode {
|
|
||||||
case "standalone":
|
|
||||||
// Assemble single-process version
|
|
||||||
sq := queue.NewQueueSingle(2)
|
|
||||||
ss := state.NewLocalState(config.Storage.StartingID)
|
|
||||||
as := artifactstore.NewInMemoryArtifactStore()
|
|
||||||
ql := qldbstore.NewLocalFilesystemCodeQLDatabaseStore("")
|
|
||||||
|
|
||||||
server.NewCommanderSingle(&server.Visibles{
|
|
||||||
Queue: sq,
|
|
||||||
State: ss,
|
|
||||||
Artifacts: as,
|
|
||||||
CodeQLDBStore: ql,
|
|
||||||
})
|
|
||||||
|
|
||||||
// FIXME take value from configuration
|
|
||||||
agent.NewAgentSingle(2, &agent.Visibles{
|
|
||||||
Queue: sq,
|
|
||||||
Artifacts: as,
|
|
||||||
CodeQLDBStore: ql,
|
|
||||||
})
|
|
||||||
|
|
||||||
case "container":
|
|
||||||
rmqHost := os.Getenv("MRVA_RABBITMQ_HOST")
|
|
||||||
rmqPort := os.Getenv("MRVA_RABBITMQ_PORT")
|
|
||||||
rmqUser := os.Getenv("MRVA_RABBITMQ_USER")
|
|
||||||
rmqPass := os.Getenv("MRVA_RABBITMQ_PASSWORD")
|
|
||||||
|
|
||||||
rmqPortAsInt, err := strconv.ParseInt(rmqPort, 10, 16)
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("Failed to parse RabbitMQ port", slog.Any("error", err))
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
sq, err := queue.NewRabbitMQQueue(rmqHost, int16(rmqPortAsInt), rmqUser, rmqPass, false)
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("Unable to initialize RabbitMQ queue")
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
ss := state.NewContainerState(config.Storage.StartingID)
|
|
||||||
|
|
||||||
// TODO: add arguments
|
|
||||||
as, err := artifactstore.NewMinIOArtifactStore("", "", "")
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("Unable to initialize artifact store")
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: add arguments
|
|
||||||
ql, err := qldbstore.NewMinIOCodeQLDatabaseStore("", "", "", "")
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("Unable to initialize ql database storage")
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
server.NewCommanderContainer(&server.Visibles{
|
|
||||||
Queue: sq,
|
|
||||||
State: ss,
|
|
||||||
Artifacts: as,
|
|
||||||
CodeQLDBStore: ql,
|
|
||||||
})
|
|
||||||
|
|
||||||
case "cluster":
|
|
||||||
// Assemble cluster version
|
|
||||||
default:
|
|
||||||
slog.Error("Invalid value for --mode. Allowed values are: standalone, container, cluster\n")
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -17,15 +17,15 @@ type System struct {
|
|||||||
|
|
||||||
func LoadConfig(fname string) *System {
|
func LoadConfig(fname string) *System {
|
||||||
if _, err := os.Stat(fname); err != nil {
|
if _, err := os.Stat(fname); err != nil {
|
||||||
slog.Error("Configuration file %s not found", fname)
|
slog.Warn("Configuration file not found", "name", fname)
|
||||||
os.Exit(1)
|
return &System{}
|
||||||
}
|
}
|
||||||
|
|
||||||
var config System
|
var config System
|
||||||
|
|
||||||
_, err := toml.DecodeFile(fname, &config)
|
_, err := toml.DecodeFile(fname, &config)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Error("", err)
|
slog.Error("Error decoding configuration file", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
7
demo/containers/dbsdata/Dockerfile
Normal file
7
demo/containers/dbsdata/Dockerfile
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
# Use a minimal base image
|
||||||
|
FROM busybox
|
||||||
|
|
||||||
|
ADD dbsdata_backup.tar /
|
||||||
|
|
||||||
|
# Just run sh if this container is ever started
|
||||||
|
CMD ["sh"]
|
||||||
77
demo/containers/dbsdata/README.org
Normal file
77
demo/containers/dbsdata/README.org
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
* MRVA cli tools container
|
||||||
|
Set up / run:
|
||||||
|
#+BEGIN_SRC sh
|
||||||
|
# Run the raw container assembly
|
||||||
|
cd ~/work-gh/mrva/mrvacommander/
|
||||||
|
docker-compose -f docker-compose-demo-build.yml up -d
|
||||||
|
|
||||||
|
# Use the following commands to populate the mrvacommander database storage
|
||||||
|
cd ~/work-gh/mrva/mrvacommander/client/qldbtools
|
||||||
|
mkdir -p scratch
|
||||||
|
source venv/bin/activate
|
||||||
|
|
||||||
|
./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > scratch/db-info-1.csv
|
||||||
|
|
||||||
|
./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
|
||||||
|
|
||||||
|
./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
|
||||||
|
|
||||||
|
./bin/mc-db-generate-selection -n 11 \
|
||||||
|
scratch/vscode-selection.json \
|
||||||
|
scratch/gh-mrva-selection.json \
|
||||||
|
< scratch/db-info-3.csv
|
||||||
|
|
||||||
|
# Several seconds start-up time; fast db population
|
||||||
|
./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
|
||||||
|
|
||||||
|
# While the containers are running, this will show minio's storage. The zip files
|
||||||
|
# are split into part.* and xl.meta by minio. Use the web interface to see real
|
||||||
|
# names.
|
||||||
|
docker exec dbstore ls -R /data/mrvacommander/
|
||||||
|
|
||||||
|
# Open browser to see the file listing
|
||||||
|
open http://localhost:9001/browser/qldb
|
||||||
|
|
||||||
|
# list the volumes
|
||||||
|
docker volume ls |grep dbs
|
||||||
|
docker volume inspect mrvacommander_dbsdata
|
||||||
|
|
||||||
|
# Persist volume using container
|
||||||
|
cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata
|
||||||
|
# Use mrvacommander_dbsdata to access the compose cluster
|
||||||
|
# EITHER
|
||||||
|
# Get the data as tar file from the image using container
|
||||||
|
rm -f dbsdata_backup.tar
|
||||||
|
docker run --rm \
|
||||||
|
-v mrvacommander_dbsdata:/data \
|
||||||
|
-v $(pwd):/backup \
|
||||||
|
busybox sh -c "tar cf /backup/dbsdata_backup.tar /data"
|
||||||
|
# OR
|
||||||
|
# Use gnu tar on host. The macos tar adds extended attributes
|
||||||
|
# brew install gnu-tar
|
||||||
|
rm -f dbsdata_backup.tar && gtar cf dbsdata_backup.tar data/
|
||||||
|
|
||||||
|
# Build container with the tarball
|
||||||
|
cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata
|
||||||
|
docker build -t dbsdata-container:0.1.24 .
|
||||||
|
docker image ls | grep dbs
|
||||||
|
|
||||||
|
# check container contents
|
||||||
|
docker run -it dbsdata-container:0.1.24 /bin/sh
|
||||||
|
docker run -it dbsdata-container:0.1.24 ls data/qldb
|
||||||
|
|
||||||
|
# Tag the dbstore backing container
|
||||||
|
docker inspect dbsdata-container:0.1.24 |grep Id
|
||||||
|
docker tag dbsdata-container:0.1.24 ghcr.io/hohn/dbsdata-container:0.1.24
|
||||||
|
|
||||||
|
# Push the pre-populated image
|
||||||
|
docker push ghcr.io/hohn/dbsdata-container:0.1.24
|
||||||
|
|
||||||
|
# Check the tagged image
|
||||||
|
docker run -it ghcr.io/hohn/dbsdata-container:0.1.24 \
|
||||||
|
ls data/qldb
|
||||||
|
|
||||||
|
# Shut down the container assembly
|
||||||
|
docker-compose -f docker-compose-demo-build.yml down
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
BIN
demo/containers/dbsdata/dbsdata_backup.tar
(Stored with Git LFS)
Normal file
BIN
demo/containers/dbsdata/dbsdata_backup.tar
(Stored with Git LFS)
Normal file
Binary file not shown.
11
doc/README.md
Normal file
11
doc/README.md
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
## The doc/ directory
|
||||||
|
The `doc/` directory serves as home for documentation. This is the place to
|
||||||
|
put refined documentation after it has gone through `notes/`. The contents of
|
||||||
|
this directory should be accessible to a broad audience including prospective
|
||||||
|
users, active users, and developers. Highly technical
|
||||||
|
|
||||||
|
1. The note authors and
|
||||||
|
2. Developers of the project
|
||||||
|
|
||||||
|
It need not be meaningful to casual users.
|
||||||
|
|
||||||
101
doc/mrva-business.org
Normal file
101
doc/mrva-business.org
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
* MRVA for CodeQL: A Business View
|
||||||
|
** Introduction
|
||||||
|
The companion documents in this directory are mostly technical. The purpose of
|
||||||
|
this document is to explain, from a business perspective, what MRVA is and why
|
||||||
|
it matters.
|
||||||
|
|
||||||
|
To illustrate its impact, consider two real-world cases:
|
||||||
|
|
||||||
|
*** Case 1: Preventing Costly Security Failures
|
||||||
|
One of our customers faced a significant lawsuit due to inadequate security.
|
||||||
|
The root cause? Unaddressed technical risks in their code. The work we do
|
||||||
|
directly prevents similar vulnerabilities from reaching this stage.
|
||||||
|
|
||||||
|
While lawsuits of this scale are rare, security failures are not. More common
|
||||||
|
consequences include:
|
||||||
|
|
||||||
|
- Compliance violations (e.g., GDPR, SOC2 penalties)
|
||||||
|
- Security breaches leading to reputation damage
|
||||||
|
- Productivity loss from disruptive technical failures
|
||||||
|
|
||||||
|
Lawsuits may be exceptional, but code security failures occur daily. Our role
|
||||||
|
isn’t just about preventing catastrophic losses—it’s about avoiding the small,
|
||||||
|
accumulating failures that erode security, compliance, and trust over time.
|
||||||
|
|
||||||
|
*** Case 2: Identifying Hidden Risks at Scale
|
||||||
|
Another customer manages a massive software portfolio of 120,000+ distinct
|
||||||
|
codebases—a scale at which traditional security tools and manual review
|
||||||
|
processes become impractical.
|
||||||
|
|
||||||
|
- A few known vulnerabilities had already been identified and patched.
|
||||||
|
- Our analysis uncovered 30 additional high-risk instances, previously undetected.
|
||||||
|
|
||||||
|
These findings were critical because:
|
||||||
|
|
||||||
|
- Traditional security tools break down at scale. Most solutions work well for
|
||||||
|
isolated codebases but lack the capability to analyze patterns across
|
||||||
|
120,000 repositories.
|
||||||
|
- Complexity hides risk. Identifying these vulnerabilities required specialized
|
||||||
|
techniques beyond simple scanning—capable of handling variations,
|
||||||
|
context, and subtle exploit paths.
|
||||||
|
- Existing security processes failed to detect these vulnerabilities. Without
|
||||||
|
proactive intervention, these risks would have remained undetected until
|
||||||
|
a potential breach occurred.
|
||||||
|
|
||||||
|
This case highlights a critical gap in standard security practices. By leveraging
|
||||||
|
advanced, scalable analysis, we identified and mitigated risks that would have
|
||||||
|
otherwise gone unnoticed—demonstrating the value of proactive security
|
||||||
|
at scale.
|
||||||
|
|
||||||
|
** Why This Matters
|
||||||
|
These examples, along with others, reinforce the importance of proactive
|
||||||
|
security—especially in the context of MRVA. Security risks don’t just exist
|
||||||
|
in theory; they have tangible business consequences.
|
||||||
|
|
||||||
|
MRVA provides a scalable, systematic approach to identifying and addressing
|
||||||
|
risks before they escalate—ensuring that security is a strategic advantage, not
|
||||||
|
just a cost.
|
||||||
|
|
||||||
|
** What is MRVA?
|
||||||
|
MRVA stands for /Multi-Repository Variant Analysis/. The concept is straightforward:
|
||||||
|
|
||||||
|
1. A /problem/ is identified in one codebase.
|
||||||
|
2. Variations of this problem (/variants/) can be defined.
|
||||||
|
3. The organization manages many code repositories (/multi-repository/).
|
||||||
|
4. A systematic /analysis/ is required to detect these variants across all repositories.
|
||||||
|
|
||||||
|
In practice:
|
||||||
|
- Steps 1 & 2: Defined through CodeQL queries, often custom-written for this purpose.
|
||||||
|
- Steps 3 & 4: Can be done manually but come with significant challenges.
|
||||||
|
|
||||||
|
*** Challenges of Manual Execution
|
||||||
|
Manually searching for these variants across multiple repositories is possible
|
||||||
|
but inefficient and error-prone due to:
|
||||||
|
|
||||||
|
- /High bookkeeping overhead/ – Tracking thousands of repositories is
|
||||||
|
cumbersome.
|
||||||
|
- /Heavy scripting requirements/ – Expert /Unix scripting skills/ are
|
||||||
|
necessary.
|
||||||
|
- /Scaling limitations/ – Analyzing /thousands of repositories sequentially/
|
||||||
|
is slow, and manual parallelization is impractical.
|
||||||
|
- /Cumbersome review process/ – Results are stored as /raw text files/,
|
||||||
|
requiring multiple processing steps for meaningful analysis.
|
||||||
|
|
||||||
|
*** MRVA: A Streamlined, Integrated Solution
|
||||||
|
Instead of relying on manual effort, MRVA is designed to /automate and
|
||||||
|
integrate/ the process.
|
||||||
|
|
||||||
|
- The system is designed to be /machine-driven/ and integrated into an
|
||||||
|
automated pipeline.
|
||||||
|
- Once incorporated, MRVA leverages the /CodeQL VS Code plugin/ to provide a
|
||||||
|
/seamless user experience/.
|
||||||
|
- How it works:
|
||||||
|
- Users submit queries through the UI.
|
||||||
|
- Results are retrieved and displayed dynamically as they become available.
|
||||||
|
- The entire workflow is automated, scalable, and significantly more
|
||||||
|
efficient than manual methods.
|
||||||
|
|
||||||
|
By eliminating manual inefficiencies, MRVA enables organizations to identify
|
||||||
|
and resolve security issues across massive codebases at scale, ensuring both
|
||||||
|
accuracy and speed in vulnerability detection.
|
||||||
|
|
||||||
331
doc/mrva-interconnect.ltx
Normal file
331
doc/mrva-interconnect.ltx
Normal file
@@ -0,0 +1,331 @@
|
|||||||
|
\documentclass[11pt]{article}
|
||||||
|
|
||||||
|
% Load the geometry package to set margins
|
||||||
|
\usepackage[lmargin=2cm,rmargin=2cm,tmargin=1.8cm,bmargin=1.8cm]{geometry}
|
||||||
|
|
||||||
|
% increase nesting depth
|
||||||
|
|
||||||
|
\usepackage{enumitem}
|
||||||
|
\setlistdepth{9}
|
||||||
|
%
|
||||||
|
\renewlist{itemize}{itemize}{9}
|
||||||
|
\setlist[itemize,1]{label=\textbullet}
|
||||||
|
\setlist[itemize,2]{label=--}
|
||||||
|
\setlist[itemize,3]{label=*}
|
||||||
|
\setlist[itemize,4]{label=•}
|
||||||
|
\setlist[itemize,5]{label=–}
|
||||||
|
\setlist[itemize,6]{label=>}
|
||||||
|
\setlist[itemize,7]{label=»}
|
||||||
|
\setlist[itemize,8]{label=›}
|
||||||
|
\setlist[itemize,9]{label=·}
|
||||||
|
%
|
||||||
|
\renewlist{enumerate}{enumerate}{9}
|
||||||
|
\setlist[enumerate,1]{label=\arabic*.,ref=\arabic*}
|
||||||
|
\setlist[enumerate,2]{label=\alph*.),ref=\theenumi\alph*}
|
||||||
|
\setlist[enumerate,3]{label=\roman*.),ref=\theenumii\roman*}
|
||||||
|
\setlist[enumerate,4]{label=\Alph*.),ref=\theenumiii\Alph*}
|
||||||
|
\setlist[enumerate,5]{label=\Roman*.),ref=\theenumiv\Roman*}
|
||||||
|
\setlist[enumerate,6]{label=\arabic*),ref=\theenumv\arabic*}
|
||||||
|
\setlist[enumerate,7]{label=\alph*),ref=\theenumvi\alph*}
|
||||||
|
\setlist[enumerate,8]{label=\roman*),ref=\theenumvii\roman*}
|
||||||
|
\setlist[enumerate,9]{label=\Alph*),ref=\theenumviii\Alph*}
|
||||||
|
|
||||||
|
|
||||||
|
% Load CM Bright for math
|
||||||
|
\usepackage{amsmath} % Standard math package
|
||||||
|
\usepackage{amssymb} % Additional math symbols
|
||||||
|
\usepackage{cmbright} % Sans-serif math font that complements Fira Sans
|
||||||
|
|
||||||
|
\usepackage{fourier}
|
||||||
|
|
||||||
|
% Font configuration
|
||||||
|
% \usepackage{bera}
|
||||||
|
% or
|
||||||
|
% Load Fira Sans for text
|
||||||
|
\usepackage{fontspec}
|
||||||
|
\setmainfont{Fira Sans} % System-installed Fira Sans
|
||||||
|
\renewcommand{\familydefault}{\sfdefault} % Set sans-serif as default
|
||||||
|
|
||||||
|
% pseudo-code with math
|
||||||
|
\usepackage{listings}
|
||||||
|
\usepackage{float}
|
||||||
|
\usepackage{xcolor}
|
||||||
|
\usepackage{colortbl}
|
||||||
|
% Set TT font
|
||||||
|
% \usepackage{inconsolata}
|
||||||
|
% or
|
||||||
|
\setmonofont{IBMPlexMono-Light}
|
||||||
|
% Define custom settings for listings
|
||||||
|
\lstset{
|
||||||
|
language=Python,
|
||||||
|
basicstyle=\ttfamily\small, % Monospaced font
|
||||||
|
commentstyle=\itshape\color{gray}, % Italic and gray for comments
|
||||||
|
keywordstyle=\color{blue}, % Keywords in blue
|
||||||
|
stringstyle=\color{red}, % Strings in red
|
||||||
|
mathescape=true, % Enable math in comments
|
||||||
|
breaklines=true, % Break long lines
|
||||||
|
numbers=left, % Add line numbers
|
||||||
|
numberstyle=\tiny\color{gray}, % Style for line numbers
|
||||||
|
frame=single, % Add a frame around the code
|
||||||
|
}
|
||||||
|
|
||||||
|
\usepackage{newfloat} % Allows creating custom float types
|
||||||
|
|
||||||
|
% Define 'listing' as a floating environment
|
||||||
|
\DeclareFloatingEnvironment[
|
||||||
|
fileext=lol,
|
||||||
|
listname=List of Listings,
|
||||||
|
name=Listing
|
||||||
|
]{listing}
|
||||||
|
|
||||||
|
% To prevent floats from moving past a section boundary but still allow some floating:
|
||||||
|
\usepackage{placeins}
|
||||||
|
% used with \FloatBarrier
|
||||||
|
|
||||||
|
\usepackage[utf8]{inputenc}
|
||||||
|
\usepackage[T1]{fontenc}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{longtable}
|
||||||
|
\usepackage{wrapfig}
|
||||||
|
\usepackage{rotating}
|
||||||
|
\usepackage[normalem]{ulem}
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\usepackage{amssymb}
|
||||||
|
\usepackage{capt-of}
|
||||||
|
\usepackage{hyperref}
|
||||||
|
\usepackage{algorithm}
|
||||||
|
\usepackage{algpseudocode}
|
||||||
|
|
||||||
|
% Title, Author, and Date (or Report Number)
|
||||||
|
\title{MRVA component interconnections}
|
||||||
|
\author{Michael Hohn}
|
||||||
|
\date{Technical Report 20250524}
|
||||||
|
|
||||||
|
\hypersetup{
|
||||||
|
pdfauthor={Michael Hohn},
|
||||||
|
pdftitle={MRVA component interconnections},
|
||||||
|
pdfkeywords={},
|
||||||
|
pdfsubject={},
|
||||||
|
pdfcreator={Emacs 29.1},
|
||||||
|
pdflang={English}}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\maketitle
|
||||||
|
\tableofcontents
|
||||||
|
|
||||||
|
\section{Overview}
|
||||||
|
\label{sec:overview}
|
||||||
|
|
||||||
|
The MRVA system is organized as a collection of services. On the server side, the
|
||||||
|
system is containerized using Docker and comprises several key components:
|
||||||
|
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item {\textbf{Server}}: Acts as the central coordinator.
|
||||||
|
\item \textbf{Agents}: One or more agents that execute tasks.
|
||||||
|
\item \textbf{RabbitMQ}: Handles messaging between components.
|
||||||
|
\item \textbf{MinIO}: Provides storage for both queries and results.
|
||||||
|
\item \textbf{HEPC}: An HTTP endpoint that hosts and serves CodeQL databases.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
The execution process follows a structured workflow:
|
||||||
|
|
||||||
|
\begin{enumerate}
|
||||||
|
\item A client submits a set of queries $\mathcal{Q}$ targeting a repository
|
||||||
|
set $\mathcal{R}$.
|
||||||
|
\item The server enqueues jobs and distributes them to available agents.
|
||||||
|
\item Each agent retrieves a job, executes queries against its assigned repository, and accumulates results.
|
||||||
|
\item The agent sends results back to the server, which then forwards them to the client.
|
||||||
|
\end{enumerate}
|
||||||
|
|
||||||
|
This full round-trip can be expressed as:
|
||||||
|
|
||||||
|
\begin{equation}
|
||||||
|
\text{Client} \xrightarrow{\mathcal{Q}} \text{Server}
|
||||||
|
\xrightarrow{\text{enqueue}}
|
||||||
|
\text{Queue} \xrightarrow{\text{dispatch}} \text{Agent}
|
||||||
|
\xrightarrow{\mathcal{Q}(\mathcal{R}_i)}
|
||||||
|
\text{Server} \xrightarrow{\mathcal{Q}(\mathcal{R}_i} \text{Client}
|
||||||
|
\end{equation}
|
||||||
|
|
||||||
|
\section{Symbols and Notation}
|
||||||
|
\label{sec:orgb695d5a}
|
||||||
|
|
||||||
|
We define the following symbols for entities in the system:
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\begin{tabular}{lll}
|
||||||
|
Concept & Symbol & Description \\[0pt]
|
||||||
|
\hline
|
||||||
|
Client & \(C\) & The source of the query submission \\[0pt]
|
||||||
|
Server & \(S\) & Manages job queue and communicates results back to the client \\[0pt]
|
||||||
|
Job Queue & \(Q\) & Queue for managing submitted jobs \\[0pt]
|
||||||
|
Agent & \(\alpha\) & Independently polls, executes jobs, and accumulates results \\[0pt]
|
||||||
|
Agent Set & \(A\) & The set of all available agents \\[0pt]
|
||||||
|
Query Suite & \(\mathcal{Q}\) & Collection of queries submitted by the client \\[0pt]
|
||||||
|
Repository List & \(\mathcal{R}\) & Collection of repositories \\[0pt]
|
||||||
|
\(i\)-th Repository & \(\mathcal{R}_i\) & Specific repository indexed by \(i\) \\[0pt]
|
||||||
|
\(j\)-th Query & \(\mathcal{Q}_j\) & Specific query from the suite indexed by \(j\) \\[0pt]
|
||||||
|
Query Result & \(r_{i,j,k_{i,j}}\) & \(k_{i,j}\)-th result from query \(j\) executed on repository \(i\) \\[0pt]
|
||||||
|
Query Result Set & \(\mathcal{R}_i^{\mathcal{Q}_j}\) & Set of all results for query \(j\) on repository \(i\) \\[0pt]
|
||||||
|
Accumulated Results & \(\mathcal{R}_i^{\mathcal{Q}}\) & All results from executing all queries on \(\mathcal{R}_i\) \\[0pt]
|
||||||
|
\end{tabular}
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
|
||||||
|
\section{Full Round-Trip Representation}
|
||||||
|
\label{sec:full-round-trip}
|
||||||
|
The full round-trip execution, from query submission to result delivery, can be summarized as:
|
||||||
|
|
||||||
|
\[
|
||||||
|
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q
|
||||||
|
\xrightarrow{\text{poll}}
|
||||||
|
\alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{\mathcal{R}_i^{\mathcal{Q}}} C
|
||||||
|
\]
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item \(C \to S\): Client submits a query suite \(\mathcal{Q}\) to the server.
|
||||||
|
\item \(S \to Q\): Server enqueues the query suite \((\mathcal{Q}, \mathcal{R}_i)\) for each repository.
|
||||||
|
\item \(Q \to \alpha\): Agent \(\alpha\) polls the queue and retrieves a job.
|
||||||
|
\item \(\alpha \to S\): Agent executes the queries and returns the accumulated results \(\mathcal{R}_i^{\mathcal{Q}}\) to the server.
|
||||||
|
\item \(S \to C\): Server sends the complete result set \(\mathcal{R}_i^{\mathcal{Q}}\) for each repository back to the client.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
|
||||||
|
\section{Result Representation}
|
||||||
|
|
||||||
|
For the complete collection of results across all repositories and queries:
|
||||||
|
\[
|
||||||
|
\mathcal{R}^{\mathcal{Q}} = \bigcup_{i=1}^{N} \bigcup_{j=1}^{M}
|
||||||
|
\left\{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \right\}
|
||||||
|
\]
|
||||||
|
|
||||||
|
where:
|
||||||
|
\begin{itemize}
|
||||||
|
\item \(N\) is the total number of repositories.
|
||||||
|
\item \(M\) is the total number of queries in \(\mathcal{Q}\).
|
||||||
|
\item \(k_{i,j}\) is the number of results from executing query
|
||||||
|
\(\mathcal{Q}_j\)
|
||||||
|
on repository \(\mathcal{R}_i\).
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
An individual result from the \(i\)-th repository, \(j\)-th query, and \(k\)-th result is:
|
||||||
|
\[
|
||||||
|
r_{i,j,k}
|
||||||
|
\]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\[
|
||||||
|
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q \xrightarrow{\text{dispatch}} \alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{r_{i,j}} C
|
||||||
|
\]
|
||||||
|
|
||||||
|
Each result can be further indexed to track multiple repositories and result sets.
|
||||||
|
|
||||||
|
|
||||||
|
\section{Graph Extraction from Log Table}
|
||||||
|
|
||||||
|
Assume we have a structured event log represented as a set of tuples.
|
||||||
|
|
||||||
|
\subsection*{Event Log Structure}
|
||||||
|
|
||||||
|
Let
|
||||||
|
\[
|
||||||
|
\mathcal{T} = \{ t_1, t_2, \dots, t_n \}
|
||||||
|
\]
|
||||||
|
be the set of all events, where each event
|
||||||
|
\[
|
||||||
|
t_i = (\mathit{id}_i, \tau_i, a_i, e_i, q_i, r_i, c_i)
|
||||||
|
\]
|
||||||
|
consists of:
|
||||||
|
\begin{itemize}
|
||||||
|
\item \(\mathit{id}_i\): unique event ID
|
||||||
|
\item \(\tau_i\): timestamp
|
||||||
|
\item \(a_i\): actor (e.g., ``agent\_alpha1'')
|
||||||
|
\item \(e_i\): event type (e.g., ``enqueue'', ``execute'')
|
||||||
|
\item \(q_i\): query ID
|
||||||
|
\item \(r_i\): repository ID
|
||||||
|
\item \(c_i\): result count (may be \(\bot\) if not applicable)
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
Let
|
||||||
|
\[
|
||||||
|
\mathcal{G} = (V, E)
|
||||||
|
\]
|
||||||
|
be a directed graph constructed from \(\mathcal{T}\), with vertices \(V\) and edges \(E\).
|
||||||
|
|
||||||
|
\subsection*{Graph Definition}
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
V &= \{ \mathit{id}_i \mid t_i \in \mathcal{T} \} \\
|
||||||
|
E &\subseteq V \times V
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
Edges capture temporal or semantic relationships between events.
|
||||||
|
|
||||||
|
\subsection*{Construction Steps}
|
||||||
|
|
||||||
|
\paragraph{1. Partition by Job Identity}
|
||||||
|
Define the set of job identifiers:
|
||||||
|
\[
|
||||||
|
J = \{ (q, r) \mid \exists i: q_i = q \land r_i = r \}
|
||||||
|
\]
|
||||||
|
Then for each \((q, r) \in J\), define:
|
||||||
|
\[
|
||||||
|
\mathcal{T}_{q,r} = \{ t_i \in \mathcal{T} \mid q_i = q \land r_i = r \}
|
||||||
|
\]
|
||||||
|
|
||||||
|
\paragraph{2. Sort by Time}
|
||||||
|
Order each \(\mathcal{T}_{q,r}\) as a list:
|
||||||
|
\[
|
||||||
|
\mathcal{T}_{q,r} = [ t_{i_1}, t_{i_2}, \dots, t_{i_k} ]
|
||||||
|
\quad \text{such that } \tau_{i_j} < \tau_{i_{j+1}}
|
||||||
|
\]
|
||||||
|
|
||||||
|
\paragraph{3. Causal Edges}
|
||||||
|
Define within-job edges:
|
||||||
|
\[
|
||||||
|
E_{q,r} = \{ (\mathit{id}_{i_j}, \mathit{id}_{i_{j+1}}) \mid 1 \leq j < k \}
|
||||||
|
\]
|
||||||
|
|
||||||
|
\paragraph{4. Global Causal Graph}
|
||||||
|
Take the union:
|
||||||
|
\[
|
||||||
|
E_{\text{causal}} = \bigcup_{(q, r) \in J} E_{q,r}
|
||||||
|
\]
|
||||||
|
|
||||||
|
\paragraph{5. Semantic Edges (Optional)}
|
||||||
|
Define semantic predicates such as:
|
||||||
|
\[
|
||||||
|
\mathsf{pulls}(i, j) \iff e_i = \text{enqueue} \land e_j = \text{pull} \land
|
||||||
|
q_i = q_j \land r_i = r_j \land \tau_i < \tau_j \land a_i = \text{server} \land a_j = \text{agent}
|
||||||
|
\]
|
||||||
|
Then:
|
||||||
|
\[
|
||||||
|
E_{\text{semantic}} = \{ (\mathit{id}_i, \mathit{id}_j) \mid \mathsf{pulls}(i, j) \}
|
||||||
|
\]
|
||||||
|
|
||||||
|
\subsection*{Final Graph}
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
V &= \{ \mathit{id}_i \mid t_i \in \mathcal{T} \} \\
|
||||||
|
E &= E_{\text{causal}} \cup E_{\text{semantic}}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
\subsection*{Notes}
|
||||||
|
\begin{itemize}
|
||||||
|
\item This construction is generic: the log store \(\mathcal{T}\) may come from a database, file, or tuple-indexed dictionary.
|
||||||
|
\item Each semantic edge rule corresponds to a logical filter/join over \(\mathcal{T}\).
|
||||||
|
\item The construction is schema-free on the graph side and can be recomputed on demand with different edge logic.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
|
||||||
|
\end{document}
|
||||||
|
|
||||||
|
%%% Local Variables:
|
||||||
|
%%% mode: LaTeX
|
||||||
|
%%% TeX-master: nil
|
||||||
|
%%% TeX-engine: luatex
|
||||||
|
%%% TeX-command-extra-options: "-synctex=1 -shell-escape -interaction=nonstopmode"
|
||||||
|
%%% End:
|
||||||
BIN
doc/mrva-interconnect.pdf
Normal file
BIN
doc/mrva-interconnect.pdf
Normal file
Binary file not shown.
BIN
doc/mrva-overview.pdf
Normal file
BIN
doc/mrva-overview.pdf
Normal file
Binary file not shown.
605
doc/mrva-overview.tex
Normal file
605
doc/mrva-overview.tex
Normal file
@@ -0,0 +1,605 @@
|
|||||||
|
\documentclass[11pt]{article}
|
||||||
|
|
||||||
|
% Load the geometry package to set margins
|
||||||
|
\usepackage[lmargin=2cm,rmargin=2cm,tmargin=1.8cm,bmargin=1.8cm]{geometry}
|
||||||
|
|
||||||
|
% increase nesting depth
|
||||||
|
|
||||||
|
\usepackage{enumitem}
|
||||||
|
\setlistdepth{9}
|
||||||
|
%
|
||||||
|
\renewlist{itemize}{itemize}{9}
|
||||||
|
\setlist[itemize,1]{label=\textbullet}
|
||||||
|
\setlist[itemize,2]{label=--}
|
||||||
|
\setlist[itemize,3]{label=*}
|
||||||
|
\setlist[itemize,4]{label=•}
|
||||||
|
\setlist[itemize,5]{label=–}
|
||||||
|
\setlist[itemize,6]{label=>}
|
||||||
|
\setlist[itemize,7]{label=»}
|
||||||
|
\setlist[itemize,8]{label=›}
|
||||||
|
\setlist[itemize,9]{label=·}
|
||||||
|
%
|
||||||
|
\renewlist{enumerate}{enumerate}{9}
|
||||||
|
\setlist[enumerate,1]{label=\arabic*.,ref=\arabic*}
|
||||||
|
\setlist[enumerate,2]{label=\alph*.),ref=\theenumi\alph*}
|
||||||
|
\setlist[enumerate,3]{label=\roman*.),ref=\theenumii\roman*}
|
||||||
|
\setlist[enumerate,4]{label=\Alph*.),ref=\theenumiii\Alph*}
|
||||||
|
\setlist[enumerate,5]{label=\Roman*.),ref=\theenumiv\Roman*}
|
||||||
|
\setlist[enumerate,6]{label=\arabic*),ref=\theenumv\arabic*}
|
||||||
|
\setlist[enumerate,7]{label=\alph*),ref=\theenumvi\alph*}
|
||||||
|
\setlist[enumerate,8]{label=\roman*),ref=\theenumvii\roman*}
|
||||||
|
\setlist[enumerate,9]{label=\Alph*),ref=\theenumviii\Alph*}
|
||||||
|
|
||||||
|
|
||||||
|
% Load CM Bright for math
|
||||||
|
\usepackage{amsmath} % Standard math package
|
||||||
|
\usepackage{amssymb} % Additional math symbols
|
||||||
|
\usepackage{cmbright} % Sans-serif math font that complements Fira Sans
|
||||||
|
|
||||||
|
\usepackage{fourier}
|
||||||
|
|
||||||
|
% Font configuration
|
||||||
|
% \usepackage{bera}
|
||||||
|
% or
|
||||||
|
% Load Fira Sans for text
|
||||||
|
\usepackage{fontspec}
|
||||||
|
\setmainfont{Fira Sans} % System-installed Fira Sans
|
||||||
|
\renewcommand{\familydefault}{\sfdefault} % Set sans-serif as default
|
||||||
|
|
||||||
|
% pseudo-code with math
|
||||||
|
\usepackage{listings}
|
||||||
|
\usepackage{float}
|
||||||
|
\usepackage{xcolor}
|
||||||
|
\usepackage{colortbl}
|
||||||
|
% Set TT font
|
||||||
|
% \usepackage{inconsolata}
|
||||||
|
% or
|
||||||
|
\setmonofont{IBMPlexMono-Light}
|
||||||
|
% Define custom settings for listings
|
||||||
|
\lstset{
|
||||||
|
language=Python,
|
||||||
|
basicstyle=\ttfamily\small, % Monospaced font
|
||||||
|
commentstyle=\itshape\color{gray}, % Italic and gray for comments
|
||||||
|
keywordstyle=\color{blue}, % Keywords in blue
|
||||||
|
stringstyle=\color{red}, % Strings in red
|
||||||
|
mathescape=true, % Enable math in comments
|
||||||
|
breaklines=true, % Break long lines
|
||||||
|
numbers=left, % Add line numbers
|
||||||
|
numberstyle=\tiny\color{gray}, % Style for line numbers
|
||||||
|
frame=single, % Add a frame around the code
|
||||||
|
}
|
||||||
|
|
||||||
|
\usepackage{newfloat} % Allows creating custom float types
|
||||||
|
|
||||||
|
% Define 'listing' as a floating environment
|
||||||
|
\DeclareFloatingEnvironment[
|
||||||
|
fileext=lol,
|
||||||
|
listname=List of Listings,
|
||||||
|
name=Listing
|
||||||
|
]{listing}
|
||||||
|
|
||||||
|
% To prevent floats from moving past a section boundary but still allow some floating:
|
||||||
|
\usepackage{placeins}
|
||||||
|
% used with \FloatBarrier
|
||||||
|
|
||||||
|
\usepackage[utf8]{inputenc}
|
||||||
|
\usepackage[T1]{fontenc}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{longtable}
|
||||||
|
\usepackage{wrapfig}
|
||||||
|
\usepackage{rotating}
|
||||||
|
\usepackage[normalem]{ulem}
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\usepackage{amssymb}
|
||||||
|
\usepackage{capt-of}
|
||||||
|
\usepackage{hyperref}
|
||||||
|
\usepackage{algorithm}
|
||||||
|
\usepackage{algpseudocode}
|
||||||
|
|
||||||
|
% Title, Author, and Date (or Report Number)
|
||||||
|
\title{MRVA for CodeQL}
|
||||||
|
\author{Michael Hohn}
|
||||||
|
\date{Technical Report 20250224}
|
||||||
|
|
||||||
|
\hypersetup{
|
||||||
|
pdfauthor={Michael Hohn},
|
||||||
|
pdftitle={MRVA for CodeQL},
|
||||||
|
pdfkeywords={},
|
||||||
|
pdfsubject={},
|
||||||
|
pdfcreator={Emacs 29.1},
|
||||||
|
pdflang={English}}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\maketitle
|
||||||
|
\tableofcontents
|
||||||
|
|
||||||
|
\section{MRVA System Architecture Summary}
|
||||||
|
|
||||||
|
The MRVA system is organized as a collection of services. On the server side, the
|
||||||
|
system is containerized using Docker and comprises several key components:
|
||||||
|
\begin{itemize}
|
||||||
|
\item {\textbf{Server}}: Acts as the central coordinator.
|
||||||
|
\item \textbf{Agents}: One or more agents that execute tasks.
|
||||||
|
\item \textbf{RabbitMQ}: Handles messaging between components.
|
||||||
|
\item \textbf{MinIO}: Provides storage for both queries and results.
|
||||||
|
\item \textbf{HEPC}: An HTTP endpoint that hosts and serves CodeQL databases.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
On the client side, users can interact with the system in two ways:
|
||||||
|
\begin{itemize}
|
||||||
|
\item {\textbf{VSCode-CodeQL}}: A graphical interface integrated with Visual Studio Code.
|
||||||
|
\item \textbf{gh-mrva CLI}: A command-line interface that connects to the server in a similar way.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
This architecture enables a robust and flexible workflow for code analysis, combining a containerized back-end with both graphical and CLI front-end tools.
|
||||||
|
|
||||||
|
The full system details can be seen in the source code. This document provides an
|
||||||
|
overview.
|
||||||
|
|
||||||
|
\section{Distributed Query Execution in MRVA}
|
||||||
|
|
||||||
|
\subsection{Execution Overview}
|
||||||
|
|
||||||
|
The \textit{MRVA system} is a distributed platform for executing \textit{CodeQL
|
||||||
|
queries} across multiple repositories using a set of worker agents. The system is
|
||||||
|
{containerized} and built around a set of core services:
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item \textbf{Server}: Coordinates job distribution and result aggregation.
|
||||||
|
\item \textbf{Agents}: Execute queries independently and return results.
|
||||||
|
\item \textbf{RabbitMQ}: Handles messaging between system components.
|
||||||
|
\item \textbf{MinIO}: Stores query inputs and execution results.
|
||||||
|
\item \textbf{HEPC}: Serves CodeQL databases over HTTP.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
Clients interact with MRVA via \texttt{VSCode-CodeQL} (a graphical interface) or
|
||||||
|
\texttt{gh-mrva CLI} (a command-line tool), both of which submit queries to the
|
||||||
|
server.
|
||||||
|
|
||||||
|
The execution process follows a structured workflow:
|
||||||
|
|
||||||
|
\begin{enumerate}
|
||||||
|
\item A client submits a set of queries $\mathcal{Q}$ targeting a repository
|
||||||
|
set $\mathcal{R}$.
|
||||||
|
\item The server enqueues jobs and distributes them to available agents.
|
||||||
|
\item Each agent retrieves a job, executes queries against its assigned repository, and accumulates results.
|
||||||
|
\item The agent sends results back to the server, which then forwards them to the client.
|
||||||
|
\end{enumerate}
|
||||||
|
|
||||||
|
This full round-trip can be expressed as:
|
||||||
|
|
||||||
|
\begin{equation}
|
||||||
|
\text{Client} \xrightarrow{\mathcal{Q}} \text{Server}
|
||||||
|
\xrightarrow{\text{enqueue}}
|
||||||
|
\text{Queue} \xrightarrow{\text{dispatch}} \text{Agent}
|
||||||
|
\xrightarrow{\mathcal{Q}(\mathcal{R}_i)}
|
||||||
|
\text{Server} \xrightarrow{\mathcal{Q}(\mathcal{R}_i} \text{Client}
|
||||||
|
\end{equation}
|
||||||
|
|
||||||
|
where the Client submits queries to the Server, which enqueues jobs in the
|
||||||
|
Queue. Agents execute the queries, returning results $\mathcal{Q}(\mathcal{R}_i)$
|
||||||
|
to the Server and ultimately back to the Client.
|
||||||
|
|
||||||
|
A more rigorous description of this is in section \ref{sec:full-round-trip}.
|
||||||
|
|
||||||
|
\subsection{System Structure Overview}
|
||||||
|
|
||||||
|
This design allows for scalable and efficient query execution across multiple
|
||||||
|
repositories, whether on a single machine or a distributed cluster. The key idea
|
||||||
|
is that both setups follow the same structural approach:
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item \textbf{Single machine setup:}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Uses \textit{at least 5 Docker containers} to manage different
|
||||||
|
components of the system.
|
||||||
|
\item The number of \textit{agent containers} (responsible for executing
|
||||||
|
queries) is constrained by the available \textit{RAM and CPU cores}.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\item \textbf{Cluster setup:}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Uses \textit{at least 5 virtual machines (VMs) and / or Docker containers}.
|
||||||
|
\item The number of \textit{agent VMs} is limited by \textit{network bandwidth
|
||||||
|
and available resources} (e.g., distributed storage and inter-node communication
|
||||||
|
overhead).
|
||||||
|
\end{itemize}
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
Thus:
|
||||||
|
\begin{itemize}
|
||||||
|
\item The {functional architecture is identical} between the single-machine and cluster setups.
|
||||||
|
\item The {primary difference} is in \textit{scale}:
|
||||||
|
\begin{itemize}
|
||||||
|
\item A single machine is limited by \textit{local CPU and RAM}.
|
||||||
|
\item A cluster is constrained by \textit{network and inter-node coordination overhead} but allows for higher overall compute capacity.
|
||||||
|
\end{itemize}
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{Messages and their Types}
|
||||||
|
\label{sec:msg-types}
|
||||||
|
The following table enumerates the types (messages) passed from Client to Server.
|
||||||
|
|
||||||
|
\begin{longtable}{|p{5cm}|p{5cm}|p{5cm}|}
|
||||||
|
\hline
|
||||||
|
\rowcolor{gray!20} \textbf{Type Name} & \textbf{Field} & \textbf{Type} \\
|
||||||
|
\hline
|
||||||
|
\endfirsthead
|
||||||
|
|
||||||
|
\hline
|
||||||
|
\rowcolor{gray!20} \textbf{Type Name} & \textbf{Field} & \textbf{Type} \\
|
||||||
|
\hline
|
||||||
|
\endhead
|
||||||
|
|
||||||
|
\hline
|
||||||
|
\endfoot
|
||||||
|
|
||||||
|
\hline
|
||||||
|
\endlastfoot
|
||||||
|
|
||||||
|
ServerState & NextID & () $\rightarrow$ int \\
|
||||||
|
& GetResult & JobSpec $\rightarrow$ IO (Either Error AnalyzeResult) \\
|
||||||
|
& GetJobSpecByRepoId & (int, int) $\rightarrow$ IO (Either Error JobSpec) \\
|
||||||
|
& SetResult & (JobSpec, AnalyzeResult) $\rightarrow$ IO () \\
|
||||||
|
& GetJobList & int $\rightarrow$ IO (Either Error \textbf{[AnalyzeJob]}) \\
|
||||||
|
& GetJobInfo & JobSpec $\rightarrow$ IO (Either Error JobInfo) \\
|
||||||
|
& SetJobInfo & (JobSpec, JobInfo) $\rightarrow$ IO () \\
|
||||||
|
& GetStatus & JobSpec $\rightarrow$ IO (Either Error Status) \\
|
||||||
|
& SetStatus & (JobSpec, Status) $\rightarrow$ IO () \\
|
||||||
|
& AddJob & AnalyzeJob $\rightarrow$ IO () \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
JobSpec & sessionID & int \\
|
||||||
|
& nameWithOwner & string \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
AnalyzeResult & spec & JobSpec \\
|
||||||
|
& status & Status \\
|
||||||
|
& resultCount & int \\
|
||||||
|
& resultLocation & ArtifactLocation \\
|
||||||
|
& sourceLocationPrefix & string \\
|
||||||
|
& databaseSHA & string \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
ArtifactLocation & Key & string \\
|
||||||
|
& Bucket & string \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
AnalyzeJob & Spec & JobSpec \\
|
||||||
|
& QueryPackLocation & ArtifactLocation \\
|
||||||
|
& QueryLanguage & QueryLanguage \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
QueryLanguage & & string \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
JobInfo & QueryLanguage & string \\
|
||||||
|
& CreatedAt & string \\
|
||||||
|
& UpdatedAt & string \\
|
||||||
|
& SkippedRepositories & SkippedRepositories \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
SkippedRepositories & AccessMismatchRepos & AccessMismatchRepos \\
|
||||||
|
& NotFoundRepos & NotFoundRepos \\
|
||||||
|
& NoCodeqlDBRepos & NoCodeqlDBRepos \\
|
||||||
|
& OverLimitRepos & OverLimitRepos \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
AccessMismatchRepos & RepositoryCount & int \\
|
||||||
|
& Repositories & \textbf{[Repository]} \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
NotFoundRepos & RepositoryCount & int \\
|
||||||
|
& RepositoryFullNames & \textbf{[string]} \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
Repository & ID & int \\
|
||||||
|
& Name & string \\
|
||||||
|
& FullName & string \\
|
||||||
|
& Private & bool \\
|
||||||
|
& StargazersCount & int \\
|
||||||
|
& UpdatedAt & string \\
|
||||||
|
|
||||||
|
\end{longtable}
|
||||||
|
|
||||||
|
|
||||||
|
\section{Symbols and Notation}
|
||||||
|
\label{sec:orgb695d5a}
|
||||||
|
|
||||||
|
We define the following symbols for entities in the system:
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\begin{tabular}{lll}
|
||||||
|
Concept & Symbol & Description \\[0pt]
|
||||||
|
\hline
|
||||||
|
\href{vscode://file//Users/hohn/work-gh/mrva/gh-mrva/README.org:39:1}{Client} & \(C\) & The source of the query submission \\[0pt]
|
||||||
|
Server & \(S\) & Manages job queue and communicates results back to the client \\[0pt]
|
||||||
|
Job Queue & \(Q\) & Queue for managing submitted jobs \\[0pt]
|
||||||
|
Agent & \(\alpha\) & Independently polls, executes jobs, and accumulates results \\[0pt]
|
||||||
|
Agent Set & \(A\) & The set of all available agents \\[0pt]
|
||||||
|
Query Suite & \(\mathcal{Q}\) & Collection of queries submitted by the client \\[0pt]
|
||||||
|
Repository List & \(\mathcal{R}\) & Collection of repositories \\[0pt]
|
||||||
|
\(i\)-th Repository & \(\mathcal{R}_i\) & Specific repository indexed by \(i\) \\[0pt]
|
||||||
|
\(j\)-th Query & \(\mathcal{Q}_j\) & Specific query from the suite indexed by \(j\) \\[0pt]
|
||||||
|
Query Result & \(r_{i,j,k_{i,j}}\) & \(k_{i,j}\)-th result from query \(j\) executed on repository \(i\) \\[0pt]
|
||||||
|
Query Result Set & \(\mathcal{R}_i^{\mathcal{Q}_j}\) & Set of all results for query \(j\) on repository \(i\) \\[0pt]
|
||||||
|
Accumulated Results & \(\mathcal{R}_i^{\mathcal{Q}}\) & All results from executing all queries on \(\mathcal{R}_i\) \\[0pt]
|
||||||
|
\end{tabular}
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
|
||||||
|
\section{Full Round-Trip Representation}
|
||||||
|
\label{sec:full-round-trip}
|
||||||
|
The full round-trip execution, from query submission to result delivery, can be summarized as:
|
||||||
|
|
||||||
|
\[
|
||||||
|
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q
|
||||||
|
\xrightarrow{\text{poll}}
|
||||||
|
\alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{\mathcal{R}_i^{\mathcal{Q}}} C
|
||||||
|
\]
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item \(C \to S\): Client submits a query suite \(\mathcal{Q}\) to the server.
|
||||||
|
\item \(S \to Q\): Server enqueues the query suite \((\mathcal{Q}, \mathcal{R}_i)\) for each repository.
|
||||||
|
\item \(Q \to \alpha\): Agent \(\alpha\) polls the queue and retrieves a job.
|
||||||
|
\item \(\alpha \to S\): Agent executes the queries and returns the accumulated results \(\mathcal{R}_i^{\mathcal{Q}}\) to the server.
|
||||||
|
\item \(S \to C\): Server sends the complete result set \(\mathcal{R}_i^{\mathcal{Q}}\) for each repository back to the client.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\section{Result Representation}
|
||||||
|
|
||||||
|
For the complete collection of results across all repositories and queries:
|
||||||
|
\[
|
||||||
|
\mathcal{R}^{\mathcal{Q}} = \bigcup_{i=1}^{N} \bigcup_{j=1}^{M}
|
||||||
|
\left\{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \right\}
|
||||||
|
\]
|
||||||
|
|
||||||
|
where:
|
||||||
|
\begin{itemize}
|
||||||
|
\item \(N\) is the total number of repositories.
|
||||||
|
\item \(M\) is the total number of queries in \(\mathcal{Q}\).
|
||||||
|
\item \(k_{i,j}\) is the number of results from executing query
|
||||||
|
\(\mathcal{Q}_j\)
|
||||||
|
on repository \(\mathcal{R}_i\).
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
An individual result from the \(i\)-th repository, \(j\)-th query, and \(k\)-th result is:
|
||||||
|
\[
|
||||||
|
r_{i,j,k}
|
||||||
|
\]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\[
|
||||||
|
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q \xrightarrow{\text{dispatch}} \alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{r_{i,j}} C
|
||||||
|
\]
|
||||||
|
|
||||||
|
Each result can be further indexed to track multiple repositories and result sets.
|
||||||
|
|
||||||
|
\section{Execution Loop in Pseudo-Code}
|
||||||
|
\begin{listing}[H] % h = here, t = top, b = bottom, p = page of floats
|
||||||
|
\caption{Distributed Query Execution Algorithm}
|
||||||
|
|
||||||
|
\begin{lstlisting}[language=Python]
|
||||||
|
# Distributed Query Execution with Agent Polling and Accumulated Results
|
||||||
|
|
||||||
|
# Initialization
|
||||||
|
$\mathcal{R}$ = set() # Repository list
|
||||||
|
$Q$ = [] # Job queue
|
||||||
|
$A$ = set() # Set of agents
|
||||||
|
$\mathcal{R}_i^{\mathcal{Q}}$ = {} # Result storage for each repository
|
||||||
|
|
||||||
|
# Initialize result sets for each repository
|
||||||
|
for $R_i$ in $\mathcal{R}$:
|
||||||
|
$\mathcal{R}_i^{\mathcal{Q}} = \{\}$ # Initialize empty result set
|
||||||
|
|
||||||
|
# Enqueue the entire query suite for all repositories
|
||||||
|
for $R_i$ in $\mathcal{R}$:
|
||||||
|
$Q$.append(($\mathcal{Q}$, $R_i$)) # Enqueue $(\mathcal{Q}, \mathcal{R}_i)$ pair
|
||||||
|
|
||||||
|
# Processing loop while there are jobs in the queue
|
||||||
|
while $Q \neq \emptyset$:
|
||||||
|
# Agents autonomously poll the queue
|
||||||
|
for $\alpha$ in $A$:
|
||||||
|
if $\alpha$.is_available():
|
||||||
|
$(\mathcal{Q}, \mathcal{R}_i)$ = $Q$.pop(0) # Agent polls a job
|
||||||
|
|
||||||
|
# Agent execution begins
|
||||||
|
$\mathcal{R}_i^{\mathcal{Q}} = \{\}$ # Initialize results for repository $R_i$
|
||||||
|
|
||||||
|
for $\mathcal{Q}_j$ in $\mathcal{Q}$:
|
||||||
|
# Execute query $\mathcal{Q}_j$ on repository $\mathcal{R}_i$
|
||||||
|
$r_{i,j,1}, \dots, r_{i,j,k_{i,j}}$ = $\alpha$.execute($\mathcal{Q}_j$, $R_i$)
|
||||||
|
|
||||||
|
# Store results for query $j$
|
||||||
|
$\mathcal{R}_i^{\mathcal{Q}_j} = \{r_{i,j,1}, \dots, r_{i,j,k_{i,j}}\}$
|
||||||
|
|
||||||
|
# Accumulate results
|
||||||
|
$\mathcal{R}_i^{\mathcal{Q}} = \mathcal{R}_i^{\mathcal{Q}} \cup \mathcal{R}_i^{\mathcal{Q}_j}$
|
||||||
|
|
||||||
|
# Send all accumulated results back to the server
|
||||||
|
$\alpha$.send_results($S$, ($\mathcal{Q}$, $R_i$, $\mathcal{R}_i^{\mathcal{Q}}$))
|
||||||
|
|
||||||
|
# Server sends results for $(\mathcal{Q}, \mathcal{R}_i)$ back to the client
|
||||||
|
$S$.send_results_to_client($C$, ($\mathcal{Q}$, $R_i$, $\mathcal{R}_i^{\mathcal{Q}}$))
|
||||||
|
\end{lstlisting}
|
||||||
|
\end{listing}
|
||||||
|
\FloatBarrier
|
||||||
|
|
||||||
|
\section{Execution Loop in Pseudo-Code, declarative}
|
||||||
|
\begin{listing}[H] % h = here, t = top, b = bottom, p = page of floats
|
||||||
|
\caption{Distributed Query Execution Algorithm}
|
||||||
|
|
||||||
|
\begin{lstlisting}[language=Python]
|
||||||
|
# Distributed Query Execution with Agent Polling and Accumulated Results
|
||||||
|
|
||||||
|
# Define initial state
|
||||||
|
$\mathcal{R}$: set # Set of repositories
|
||||||
|
$\mathcal{Q}$: set # Set of queries
|
||||||
|
A: set # Set of agents
|
||||||
|
Q: list # Queue of $(\mathcal{Q}, \mathcal{R}_i)$ pairs
|
||||||
|
$\mathcal{R}_{\text{results}}$: dict = {} # Mapping of repositories to their accumulated query results
|
||||||
|
|
||||||
|
# Initialize result sets for each repository
|
||||||
|
$\mathcal{R}_{\text{results}}$ = {$\mathcal{R}_i$: set() for $\mathcal{R}_i$ in $\mathcal{R}$}
|
||||||
|
|
||||||
|
# Define job queue as an immutable mapping
|
||||||
|
Q = [($\mathcal{Q}$, $\mathcal{R}_i$) for $\mathcal{R}_i$ in $\mathcal{R}$]
|
||||||
|
|
||||||
|
# Processing as a declarative iteration over the job queue
|
||||||
|
def execute_queries(agents, job_queue, repository_results):
|
||||||
|
def available_agents():
|
||||||
|
return {$\alpha$ for $\alpha$ in agents if $\alpha$.is_available()}
|
||||||
|
|
||||||
|
def process_job($\mathcal{Q}$, $\mathcal{R}_i$, $\alpha$):
|
||||||
|
results = {$\mathcal{Q}_j$: $\alpha$.execute($\mathcal{Q}_j$, $\mathcal{R}_i$) for $\mathcal{Q}_j$ in $\mathcal{Q}$}
|
||||||
|
return $\mathcal{R}_i$, results
|
||||||
|
|
||||||
|
def accumulate_results($\mathcal{R}_{\text{results}}$, $\mathcal{R}_i$, query_results):
|
||||||
|
return {**$\mathcal{R}_{\text{results}}$, $\mathcal{R}_i$: $\mathcal{R}_{\text{results}}$[$\mathcal{R}_i$] | set().union(*query_results.values())}
|
||||||
|
|
||||||
|
while job_queue:
|
||||||
|
active_agents = available_agents()
|
||||||
|
for $\alpha$ in active_agents:
|
||||||
|
$\mathcal{Q}$, $\mathcal{R}_i$ = job_queue[0] # Peek at the first job
|
||||||
|
_, query_results = process_job($\mathcal{Q}$, $\mathcal{R}_i$, $\alpha$)
|
||||||
|
repository_results = accumulate_results(repository_results, $\mathcal{R}_i$, query_results)
|
||||||
|
|
||||||
|
$\alpha$.send_results(S, ($\mathcal{Q}$, $\mathcal{R}_i$, repository_results[$\mathcal{R}_i$]))
|
||||||
|
S.send_results_to_client(C, ($\mathcal{Q}$, $\mathcal{R}_i$, repository_results[$\mathcal{R}_i$]))
|
||||||
|
|
||||||
|
job_queue = job_queue[1:] # Move to the next job
|
||||||
|
|
||||||
|
return repository_results
|
||||||
|
|
||||||
|
# Execute the distributed query process
|
||||||
|
$\mathcal{R}_{\text{results}}$ = execute_queries(A, Q, $\mathcal{R}_{\text{results}}$)
|
||||||
|
\end{lstlisting}
|
||||||
|
\end{listing}
|
||||||
|
\FloatBarrier
|
||||||
|
|
||||||
|
\newpage{}
|
||||||
|
\section{Execution Loop in Pseudo-Code, algorithmic}
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Distribute a set of queries $\mathcal{Q}$ across repositories
|
||||||
|
$\mathcal{R}$ using agents $A$}
|
||||||
|
\begin{algorithmic}[1] % Line numbering enabled
|
||||||
|
\Procedure{DistributedQueryExecution}{$\mathcal{Q}, \mathcal{R}, A$}
|
||||||
|
|
||||||
|
\ForAll{$\mathcal{R}_i \in \mathcal{R}$}
|
||||||
|
\Comment{Initialize result sets for each repository and query}
|
||||||
|
\State $\mathcal{R}_i^{\mathcal{Q}} \gets \left\{ \, \right\}$
|
||||||
|
\EndFor
|
||||||
|
|
||||||
|
\State $Q \gets \left\{ \, \right\}$ \Comment{Initialize empty job queue}
|
||||||
|
|
||||||
|
\ForAll{$\mathcal{R}_i \in \mathcal{R}$}
|
||||||
|
\Comment{Enqueue the entire query suite across all repositories}
|
||||||
|
\State $S \xrightarrow{\text{enqueue}(\mathcal{Q}, \mathcal{R}_i)} Q$
|
||||||
|
\EndFor
|
||||||
|
|
||||||
|
\While{$Q \neq \emptyset$}
|
||||||
|
\Comment{Agents poll the queue for available jobs}
|
||||||
|
|
||||||
|
\ForAll{$\alpha \in A$ \textbf{where} $\alpha$ \text{is available}}
|
||||||
|
\State $\alpha \xleftarrow{\text{poll}(Q)}$ \Comment{Agent autonomously retrieves a job}
|
||||||
|
|
||||||
|
% --- Begin Agent Execution Block ---
|
||||||
|
\State \textbf{\raisebox{0.5ex}{\rule{25em}{0.7pt}}} \Comment{Agent Execution Begins}
|
||||||
|
|
||||||
|
|
||||||
|
\State $\mathcal{R}_i^{\mathcal{Q}} \gets \left\{ \, \right\}$ \Comment{Initialize result set for this repository}
|
||||||
|
|
||||||
|
\ForAll{$\mathcal{Q}_j \in \mathcal{Q}$}
|
||||||
|
\State $\mathcal{R}_i^{\mathcal{Q}_j} \gets \left\{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \right\}$
|
||||||
|
\Comment{Collect results for query $j$ on repository $i$}
|
||||||
|
|
||||||
|
\State $\mathcal{R}_i^{\mathcal{Q}} \gets \mathcal{R}_i^{\mathcal{Q}}
|
||||||
|
\cup \mathcal{R}_i^{\mathcal{Q}_j}$
|
||||||
|
\Comment{Accumulate results}
|
||||||
|
\EndFor
|
||||||
|
|
||||||
|
\State $\alpha \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} S$
|
||||||
|
\Comment{Agent sends all accumulated results back to server}
|
||||||
|
|
||||||
|
\State \textbf{\raisebox{0.5ex}{\rule{25em}{0.7pt}}} \Comment{Agent
|
||||||
|
Execution Ends}
|
||||||
|
% --- End Agent Execution Block ---
|
||||||
|
|
||||||
|
\State $S \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} C$
|
||||||
|
\Comment{Server sends results for repository $i$ back to the client}
|
||||||
|
|
||||||
|
\EndFor
|
||||||
|
|
||||||
|
\EndWhile
|
||||||
|
|
||||||
|
\EndProcedure
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\FloatBarrier
|
||||||
|
|
||||||
|
\section{Execution Loop in Pseudo-Code, hybrid}
|
||||||
|
\label{sec:orgb767ab2}
|
||||||
|
{\textbf{Algorithm:} Distribute a set of queries \(\mathcal{Q}\) across repositories \(\mathcal{R}\) using agents \(A\)}
|
||||||
|
|
||||||
|
\begin{enumerate}
|
||||||
|
\item \textbf{\textbf{Initialization}}
|
||||||
|
\begin{itemize}
|
||||||
|
\item For each repository \(\mathcal{R}_i \in \mathcal{R}\):
|
||||||
|
\begin{itemize}
|
||||||
|
\item Initialize result sets: \(\mathcal{R}_i^{\mathcal{Q}} \gets \{\}\).
|
||||||
|
\end{itemize}
|
||||||
|
\item Initialize an empty job queue: \(Q \gets \{\}\).
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\item \textbf{\textbf{Enqueue Queries}}
|
||||||
|
\begin{itemize}
|
||||||
|
\item For each repository \(\mathcal{R}_i \in \mathcal{R}\):
|
||||||
|
\begin{itemize}
|
||||||
|
\item Enqueue the entire query suite: \(S \xrightarrow{\text{enqueue}(\mathcal{Q}, \mathcal{R}_i)} Q\).
|
||||||
|
\end{itemize}
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\item \textbf{\textbf{Execution Loop}}
|
||||||
|
\begin{itemize}
|
||||||
|
\item While \(Q \neq \emptyset\): (agents poll the queue for available jobs)
|
||||||
|
\begin{itemize}
|
||||||
|
\item For each available agent \(\alpha \in A\):
|
||||||
|
\begin{itemize}
|
||||||
|
\item Agent autonomously retrieves a job: \(\alpha \xleftarrow{\text{poll}(Q)}\).
|
||||||
|
|
||||||
|
\item \textbf{\textbf{Agent Execution Block}}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Initialize result set for this repository: \(\mathcal{R}_i^{\mathcal{Q}} \gets \{\}\).
|
||||||
|
\item For each query \(\mathcal{Q}_j \in \mathcal{Q}\):
|
||||||
|
\begin{itemize}
|
||||||
|
\item Collect results:
|
||||||
|
\(\mathcal{R}_i^{\mathcal{Q}_j} \gets \{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \}\).
|
||||||
|
\item Accumulate results:
|
||||||
|
\(\mathcal{R}_i^{\mathcal{Q}} \gets \mathcal{R}_i^{\mathcal{Q}} \cup \mathcal{R}_i^{\mathcal{Q}_j}\).
|
||||||
|
\end{itemize}
|
||||||
|
\item Agent sends all accumulated results back to the server:
|
||||||
|
\(\alpha \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} S\).
|
||||||
|
\end{itemize}
|
||||||
|
\end{itemize}
|
||||||
|
\end{itemize}
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\item \textbf{\textbf{Agent Sends Results}}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Server sends results for repository \(i\) back to the client:
|
||||||
|
\(S \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} C\).
|
||||||
|
\end{itemize}
|
||||||
|
\end{enumerate}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
|
|
||||||
|
%%% Local Variables:
|
||||||
|
%%% mode: LaTeX
|
||||||
|
%%% TeX-master: t
|
||||||
|
%%% TeX-engine: luatex
|
||||||
|
%%% TeX-command-extra-options: "-synctex=1 -shell-escape -interaction=nonstopmode"
|
||||||
|
%%% End:
|
||||||
56
doc/mrva.dot
Normal file
56
doc/mrva.dot
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
digraph mrvacommander {
|
||||||
|
rankdir=LR;
|
||||||
|
node [shape=box style=filled fillcolor=lightgrey fontname="monospace"];
|
||||||
|
|
||||||
|
// Entry points
|
||||||
|
cmd_server [label="cmd/server\nmain()", fillcolor=lightblue];
|
||||||
|
cmd_agent [label="cmd/agent\nmain()", fillcolor=lightblue];
|
||||||
|
|
||||||
|
// Config
|
||||||
|
config [label="config/mcc\nparseEnv()", shape=ellipse, fillcolor=lightyellow];
|
||||||
|
|
||||||
|
// Server-side
|
||||||
|
server [label="pkg/server\nServer.Run()"];
|
||||||
|
deploy [label="pkg/deploy\nInit()"];
|
||||||
|
qldbstore [label="pkg/qldbstore\nQLDB Store"];
|
||||||
|
artifactstore [label="pkg/artifactstore\nArtifact Store"];
|
||||||
|
queue [label="pkg/queue\nQueue Interface"];
|
||||||
|
|
||||||
|
// Agent-side
|
||||||
|
agent [label="pkg/agent\nAgent.Run()"];
|
||||||
|
state [label="pkg/state\nState"];
|
||||||
|
codeql [label="pkg/codeql\nrunCodeQL()"];
|
||||||
|
|
||||||
|
// Common
|
||||||
|
common [label="pkg/common\nTypes, MinIO, Jobs"];
|
||||||
|
utils [label="utils\nDownload, Archive"];
|
||||||
|
|
||||||
|
// Edges: config used by both
|
||||||
|
cmd_server -> config;
|
||||||
|
cmd_agent -> config;
|
||||||
|
|
||||||
|
// Server wiring
|
||||||
|
cmd_server -> server;
|
||||||
|
server -> queue;
|
||||||
|
server -> artifactstore;
|
||||||
|
server -> qldbstore;
|
||||||
|
|
||||||
|
// Agent wiring
|
||||||
|
cmd_agent -> agent;
|
||||||
|
agent -> queue;
|
||||||
|
agent -> codeql;
|
||||||
|
agent -> artifactstore;
|
||||||
|
agent -> state;
|
||||||
|
|
||||||
|
// Shared deps
|
||||||
|
server -> common;
|
||||||
|
agent -> common;
|
||||||
|
codeql -> common;
|
||||||
|
qldbstore -> common;
|
||||||
|
artifactstore -> common;
|
||||||
|
|
||||||
|
// Utils used by backends
|
||||||
|
qldbstore -> utils;
|
||||||
|
artifactstore -> utils;
|
||||||
|
codeql -> utils;
|
||||||
|
}
|
||||||
84
doc/mrva.man
Normal file
84
doc/mrva.man
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
.TH MRVACOMMANDER 7 "April 2025" "MRVA Project" "System Overview"
|
||||||
|
.SH NAME
|
||||||
|
mrvacommander \- distributed CodeQL task queue and execution system
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.B server
|
||||||
|
.RI [ environment ]
|
||||||
|
.br
|
||||||
|
.B agent
|
||||||
|
.RI [ environment ]
|
||||||
|
.SH DESCRIPTION
|
||||||
|
mrvacommander coordinates analysis jobs over multiple worker nodes using queues, pluggable storage, and CodeQL execution. It consists of multiple interacting packages and entry points.
|
||||||
|
|
||||||
|
.SH STRUCTURE
|
||||||
|
.TP
|
||||||
|
.B cmd/server
|
||||||
|
Entry point. Loads configuration, initializes dependencies, runs queue subscriber with a dispatcher.
|
||||||
|
.TP
|
||||||
|
.B cmd/agent
|
||||||
|
Entry point. Loads configuration, runs a processing loop: receive job, execute query, save result, update state.
|
||||||
|
|
||||||
|
.SH CONFIGURATION
|
||||||
|
.TP
|
||||||
|
.B config/mcc
|
||||||
|
Parses environment variables into structured configuration. Modules include:
|
||||||
|
.IR queue ,
|
||||||
|
.IR storage ,
|
||||||
|
.IR logger ,
|
||||||
|
.IR commander .
|
||||||
|
|
||||||
|
.SH SERVER SIDE MODULES
|
||||||
|
.TP
|
||||||
|
.B pkg/server
|
||||||
|
Initializes:
|
||||||
|
queue backend
|
||||||
|
QLDB store
|
||||||
|
artifact store
|
||||||
|
|
||||||
|
Subscribes to queue and dispatches jobs to handler.
|
||||||
|
.TP
|
||||||
|
.B pkg/deploy
|
||||||
|
Deployment helpers: validate environment variables, bootstrap key services.
|
||||||
|
|
||||||
|
.SH AGENT SIDE MODULES
|
||||||
|
.TP
|
||||||
|
.B pkg/agent
|
||||||
|
Receives jobs, executes CodeQL queries, stores outputs, marks completion.
|
||||||
|
.TP
|
||||||
|
.B pkg/state
|
||||||
|
Tracks which jobs have been completed. Local file-backed.
|
||||||
|
|
||||||
|
.SH SHARED MODULES
|
||||||
|
.TP
|
||||||
|
.B pkg/common
|
||||||
|
Core types: Job, JobOutput, NameWithOwner, Query.
|
||||||
|
Includes MinIO wrappers, external API access, and job spec parsing.
|
||||||
|
.TP
|
||||||
|
.B pkg/codeql
|
||||||
|
Defines query structure and executes CodeQL against a database.
|
||||||
|
.TP
|
||||||
|
.B pkg/qldbstore
|
||||||
|
Provides read-only access to CodeQL databases via:
|
||||||
|
- MinIO (S3)
|
||||||
|
- HTTP (hepc)
|
||||||
|
- Filesystem
|
||||||
|
.TP
|
||||||
|
.B pkg/artifactstore
|
||||||
|
Persists job results. Implementations:
|
||||||
|
- MinIO
|
||||||
|
- Memory
|
||||||
|
.TP
|
||||||
|
.B pkg/queue
|
||||||
|
Job queue interface. Implementations:
|
||||||
|
- RabbitMQ
|
||||||
|
- In-memory single-node
|
||||||
|
.TP
|
||||||
|
.B utils
|
||||||
|
Generic helpers:
|
||||||
|
- HTTP download
|
||||||
|
- tar.gz extraction
|
||||||
|
|
||||||
|
.SH SEE ALSO
|
||||||
|
.BR codeql (1),
|
||||||
|
.BR rabbitmq-server (1),
|
||||||
|
.BR minio (1)
|
||||||
BIN
doc/mrva.pdf
Normal file
BIN
doc/mrva.pdf
Normal file
Binary file not shown.
129
docker-compose-demo-build.yml
Normal file
129
docker-compose-demo-build.yml
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
# This is the compose configuration used to build / prepopulate the containers for
|
||||||
|
# a demo.
|
||||||
|
services:
|
||||||
|
dbssvc:
|
||||||
|
## image: ghcr.io/hohn/dbsdata-container:0.1.24
|
||||||
|
build:
|
||||||
|
context: ./demo/containers/dbsdata
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: dbssvc
|
||||||
|
volumes:
|
||||||
|
- dbsdata:/data/mrvacommander/dbstore-data
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
dbstore:
|
||||||
|
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||||
|
container_name: dbstore
|
||||||
|
ports:
|
||||||
|
- "9000:9000"
|
||||||
|
- "9001:9001"
|
||||||
|
env_file:
|
||||||
|
- path: .env.container
|
||||||
|
required: true
|
||||||
|
command: server /data/mrvacommander/dbstore-data --console-address ":9001"
|
||||||
|
depends_on:
|
||||||
|
- dbssvc
|
||||||
|
volumes:
|
||||||
|
- dbsdata:/data/mrvacommander/dbstore-data
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
client-ghmrva:
|
||||||
|
## image: ghcr.io/hohn/client-ghmrva-container:0.1.24
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: ./client/containers/ghmrva/Dockerfile
|
||||||
|
network_mode: "service:server" # Share the 'server' network namespace
|
||||||
|
environment:
|
||||||
|
- SERVER_URL=http://localhost:8080 # 'localhost' now refers to 'server'
|
||||||
|
|
||||||
|
code-server:
|
||||||
|
## image: ghcr.io/hohn/code-server-initialized:0.1.24
|
||||||
|
build:
|
||||||
|
context: ./client/containers/vscode
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
ports:
|
||||||
|
- "9080:9080"
|
||||||
|
environment:
|
||||||
|
- PASSWORD=mrva
|
||||||
|
|
||||||
|
rabbitmq:
|
||||||
|
image: rabbitmq:3-management
|
||||||
|
hostname: rabbitmq
|
||||||
|
container_name: rabbitmq
|
||||||
|
volumes:
|
||||||
|
- ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
|
||||||
|
- ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
|
||||||
|
ports:
|
||||||
|
- "5672:5672"
|
||||||
|
- "15672:15672"
|
||||||
|
healthcheck:
|
||||||
|
test: rabbitmq-diagnostics check_port_connectivity
|
||||||
|
interval: 30s
|
||||||
|
timeout: 30s
|
||||||
|
retries: 10
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
server:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: ./cmd/server/Dockerfile
|
||||||
|
command: [ '--mode=container', '--loglevel=debug' ]
|
||||||
|
container_name: server
|
||||||
|
stop_grace_period: 1s
|
||||||
|
ports:
|
||||||
|
# - "8081:8080" # host:container for proxy
|
||||||
|
- "8080:8080" # host:container
|
||||||
|
depends_on:
|
||||||
|
- rabbitmq
|
||||||
|
- dbstore
|
||||||
|
- artifactstore
|
||||||
|
env_file:
|
||||||
|
- path: ./.env.container
|
||||||
|
required: true
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
artifactstore:
|
||||||
|
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||||
|
container_name: artifactstore
|
||||||
|
ports:
|
||||||
|
- "19000:9000" # host:container
|
||||||
|
- "19001:9001"
|
||||||
|
env_file:
|
||||||
|
- path: ./.env.container
|
||||||
|
required: true
|
||||||
|
command: server /data --console-address ":9001"
|
||||||
|
volumes:
|
||||||
|
# The artifactstore is only populated at runtime so there is no need
|
||||||
|
# for Docker storage; a directory is fine.
|
||||||
|
- ./qpstore-data:/data
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
agent:
|
||||||
|
## image: ghcr.io/hohn/mrva-agent:0.1.24
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: ./cmd/agent/Dockerfile
|
||||||
|
command: [ '--loglevel=debug' ]
|
||||||
|
container_name: agent
|
||||||
|
depends_on:
|
||||||
|
- rabbitmq
|
||||||
|
- dbstore
|
||||||
|
- artifactstore
|
||||||
|
env_file:
|
||||||
|
- path: ./.env.container
|
||||||
|
required: true
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
networks:
|
||||||
|
backend:
|
||||||
|
driver: bridge
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
dbsdata:
|
||||||
|
|
||||||
116
docker-compose-demo.yml
Normal file
116
docker-compose-demo.yml
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
services:
|
||||||
|
dbssvc:
|
||||||
|
# dbsdata-container:0.1.24
|
||||||
|
image: ghcr.io/hohn/dbsdata-container:0.1.24
|
||||||
|
command: tail -f /dev/null # Keep the container running
|
||||||
|
# volumes:
|
||||||
|
# - /qldb # Directory inside the container that contains the data
|
||||||
|
volumes:
|
||||||
|
- dbsdata:/data
|
||||||
|
container_name: dbssvc
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
dbstore:
|
||||||
|
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||||
|
container_name: dbstore
|
||||||
|
ports:
|
||||||
|
- "9000:9000"
|
||||||
|
- "9001:9001"
|
||||||
|
env_file:
|
||||||
|
- path: .env.container
|
||||||
|
required: true
|
||||||
|
command: server /data/mrvacommander/dbstore-data --console-address ":9001"
|
||||||
|
depends_on:
|
||||||
|
- dbssvc
|
||||||
|
# volumes_from:
|
||||||
|
# - dbsdata # Use the volumes from dbsdata container
|
||||||
|
volumes:
|
||||||
|
- dbsdata:/data/mrvacommander/dbstore-data
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
client-ghmrva:
|
||||||
|
image: ghcr.io/hohn/client-ghmrva-container:0.1.24
|
||||||
|
network_mode: "service:server" # Share the 'server' network namespace
|
||||||
|
environment:
|
||||||
|
- SERVER_URL=http://localhost:8080 # 'localhost' now refers to 'server'
|
||||||
|
|
||||||
|
code-server:
|
||||||
|
image: ghcr.io/hohn/code-server-initialized:0.1.24
|
||||||
|
ports:
|
||||||
|
- "9080:9080"
|
||||||
|
# XX: Include codeql binary in code-server (if it's not there already)
|
||||||
|
environment:
|
||||||
|
- PASSWORD=mrva
|
||||||
|
|
||||||
|
rabbitmq:
|
||||||
|
image: rabbitmq:3-management
|
||||||
|
hostname: rabbitmq
|
||||||
|
container_name: rabbitmq
|
||||||
|
volumes:
|
||||||
|
- ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
|
||||||
|
- ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
|
||||||
|
ports:
|
||||||
|
- "5672:5672"
|
||||||
|
- "15672:15672"
|
||||||
|
healthcheck:
|
||||||
|
test: rabbitmq-diagnostics check_port_connectivity
|
||||||
|
interval: 30s
|
||||||
|
timeout: 30s
|
||||||
|
retries: 10
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
server:
|
||||||
|
image: ghcr.io/hohn/mrva-server:0.1.24
|
||||||
|
command: [ '--mode=container', '--loglevel=debug' ]
|
||||||
|
container_name: server
|
||||||
|
stop_grace_period: 1s
|
||||||
|
depends_on:
|
||||||
|
- rabbitmq
|
||||||
|
- dbstore
|
||||||
|
- artifactstore
|
||||||
|
env_file:
|
||||||
|
- path: ./.env.container
|
||||||
|
required: true
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
artifactstore:
|
||||||
|
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||||
|
container_name: artifactstore
|
||||||
|
ports:
|
||||||
|
- "19000:9000" # host:container
|
||||||
|
- "19001:9001"
|
||||||
|
env_file:
|
||||||
|
- path: ./.env.container
|
||||||
|
required: true
|
||||||
|
command: server /data --console-address ":9001"
|
||||||
|
volumes:
|
||||||
|
# The artifactstore is only populated at runtime so there is no need
|
||||||
|
# for Docker storage; a directory is fine.
|
||||||
|
- ./qpstore-data:/data
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
agent:
|
||||||
|
image: ghcr.io/hohn/mrva-agent:0.1.24
|
||||||
|
command: [ '--loglevel=debug' ]
|
||||||
|
container_name: agent
|
||||||
|
depends_on:
|
||||||
|
- rabbitmq
|
||||||
|
- dbstore
|
||||||
|
- artifactstore
|
||||||
|
env_file:
|
||||||
|
- path: ./.env.container
|
||||||
|
required: true
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
|
networks:
|
||||||
|
backend:
|
||||||
|
driver: bridge
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
dbsdata:
|
||||||
@@ -7,37 +7,36 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
|
- ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
|
||||||
- ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
|
- ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
|
||||||
expose:
|
|
||||||
- "5672"
|
|
||||||
- "15672"
|
|
||||||
ports:
|
ports:
|
||||||
- "5672:5672"
|
- "5672:5672"
|
||||||
- "15672:15672"
|
- "15672:15672"
|
||||||
networks:
|
networks:
|
||||||
- backend
|
- backend
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ "CMD", "nc", "-z", "localhost", "5672" ]
|
test: rabbitmq-diagnostics check_port_connectivity
|
||||||
interval: 5s
|
interval: 30s
|
||||||
timeout: 15s
|
timeout: 30s
|
||||||
retries: 1
|
retries: 10
|
||||||
|
|
||||||
server:
|
server:
|
||||||
build:
|
build:
|
||||||
context: ./cmd/server
|
context: .
|
||||||
dockerfile: Dockerfile
|
dockerfile: ./cmd/server/Dockerfile
|
||||||
|
command: [ '--mode=container', '--loglevel=debug' ]
|
||||||
container_name: server
|
container_name: server
|
||||||
stop_grace_period: 1s # Reduce the timeout period for testing
|
stop_grace_period: 1s
|
||||||
environment:
|
|
||||||
- MRVA_SERVER_ROOT=/mrva/mrvacommander/cmd/server
|
|
||||||
command: sh -c "tail -f /dev/null"
|
|
||||||
ports:
|
ports:
|
||||||
- "8080:8080"
|
# - "8081:8080" # host:container for proxy
|
||||||
volumes:
|
- "8080:8080" # host:container
|
||||||
- ./:/mrva/mrvacommander
|
|
||||||
depends_on:
|
depends_on:
|
||||||
- rabbitmq
|
- rabbitmq
|
||||||
|
- dbstore
|
||||||
|
- artifactstore
|
||||||
networks:
|
networks:
|
||||||
- backend
|
- backend
|
||||||
|
env_file:
|
||||||
|
- path: ./.env.container
|
||||||
|
required: true
|
||||||
|
|
||||||
dbstore:
|
dbstore:
|
||||||
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||||
@@ -45,49 +44,46 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "9000:9000"
|
- "9000:9000"
|
||||||
- "9001:9001"
|
- "9001:9001"
|
||||||
environment:
|
env_file:
|
||||||
MINIO_ROOT_USER: user
|
- path: .env.container
|
||||||
MINIO_ROOT_PASSWORD: mmusty8432
|
required: true
|
||||||
|
|
||||||
command: server /data --console-address ":9001"
|
command: server /data --console-address ":9001"
|
||||||
volumes:
|
volumes:
|
||||||
- ./dbstore-data:/data
|
- ./dbstore-data:/data
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
qpstore:
|
artifactstore:
|
||||||
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||||
container_name: qpstore
|
container_name: artifactstore
|
||||||
ports:
|
ports:
|
||||||
- "19000:9000" # host:container
|
- "19000:9000" # host:container
|
||||||
- "19001:9001"
|
- "19001:9001"
|
||||||
environment:
|
env_file:
|
||||||
MINIO_ROOT_USER: user
|
- path: ./.env.container
|
||||||
MINIO_ROOT_PASSWORD: mmusty8432
|
required: true
|
||||||
|
|
||||||
command: server /data --console-address ":9001"
|
command: server /data --console-address ":9001"
|
||||||
volumes:
|
volumes:
|
||||||
- ./qpstore-data:/data
|
- ./qpstore-data:/data
|
||||||
|
networks:
|
||||||
|
- backend
|
||||||
|
|
||||||
agent:
|
agent:
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: ./cmd/agent/Dockerfile
|
dockerfile: ./cmd/agent/Dockerfile
|
||||||
|
command: [ '--loglevel=debug' ]
|
||||||
container_name: agent
|
container_name: agent
|
||||||
depends_on:
|
depends_on:
|
||||||
- rabbitmq
|
- rabbitmq
|
||||||
- dbstore
|
- dbstore
|
||||||
- qpstore
|
- artifactstore
|
||||||
environment:
|
env_file:
|
||||||
MRVA_RABBITMQ_HOST: rabbitmq
|
- path: ./.env.container
|
||||||
MRVA_RABBITMQ_PORT: 5672
|
required: true
|
||||||
MRVA_RABBITMQ_USER: user
|
|
||||||
MRVA_RABBITMQ_PASSWORD: password
|
|
||||||
networks:
|
networks:
|
||||||
- backend
|
- backend
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
backend:
|
backend:
|
||||||
driver: bridge
|
driver: bridge
|
||||||
|
|
||||||
# Remove named volumes to use bind mounts
|
|
||||||
# volumes:
|
|
||||||
# minio-data:
|
|
||||||
|
|||||||
22
experimental/qldb-specification/readme.org
Normal file
22
experimental/qldb-specification/readme.org
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
* tuple hashing functions across languages
|
||||||
|
There are three parallel implementations of a hash for every entry of a tuple
|
||||||
|
list. The functions produce identical results across 3 languages and can be
|
||||||
|
used across agent / server / client.
|
||||||
|
|
||||||
|
#+BEGIN_SRC sh
|
||||||
|
hohn@ghm3 ~/work-gh/mrva/mrvacommander/experimental/qldb-specification
|
||||||
|
0:$ node tuple-hash.js
|
||||||
|
[
|
||||||
|
'91b80a9933218ff5bc62df8ff71f1252',
|
||||||
|
'b0934b29293e91aefaac73c99fc75e94'
|
||||||
|
]
|
||||||
|
|
||||||
|
hohn@ghm3 ~/work-gh/mrva/mrvacommander/experimental/qldb-specification
|
||||||
|
0:$ python3 tuple-hash.py
|
||||||
|
['91b80a9933218ff5bc62df8ff71f1252', 'b0934b29293e91aefaac73c99fc75e94']
|
||||||
|
|
||||||
|
hohn@ghm3 ~/work-gh/mrva/mrvacommander/experimental/qldb-specification
|
||||||
|
0:$ go run tuple-hash.go
|
||||||
|
[91b80a9933218ff5bc62df8ff71f1252 b0934b29293e91aefaac73c99fc75e94]
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
28
experimental/qldb-specification/tuple-hash.go
Normal file
28
experimental/qldb-specification/tuple-hash.go
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/md5"
|
||||||
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
atl_L := [][2]interface{}{
|
||||||
|
{1, "s1"},
|
||||||
|
{2, "str"},
|
||||||
|
}
|
||||||
|
|
||||||
|
var sl_hash []string
|
||||||
|
|
||||||
|
for _, item := range atl_L {
|
||||||
|
jsonBytes, err := json.Marshal(item)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
sum := md5.Sum(jsonBytes)
|
||||||
|
sl_hash = append(sl_hash, hex.EncodeToString(sum[:]))
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println(sl_hash)
|
||||||
|
}
|
||||||
9
experimental/qldb-specification/tuple-hash.js
Normal file
9
experimental/qldb-specification/tuple-hash.js
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
const crypto = require("crypto");
|
||||||
|
|
||||||
|
const atl_L = [[1, "s1"], [2, "str"]];
|
||||||
|
const sl_hash = atl_L.map(item => {
|
||||||
|
const json = JSON.stringify(item);
|
||||||
|
return crypto.createHash("md5").update(json).digest("hex");
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(sl_hash);
|
||||||
12
experimental/qldb-specification/tuple-hash.py
Normal file
12
experimental/qldb-specification/tuple-hash.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
|
||||||
|
atl_L = [(1, "s1"), (2, "str")]
|
||||||
|
sl_hash = []
|
||||||
|
|
||||||
|
for item in atl_L:
|
||||||
|
encoded = json.dumps(item, separators=(',', ':')).encode("utf-8")
|
||||||
|
md5sum = hashlib.md5(encoded).hexdigest()
|
||||||
|
sl_hash.append(md5sum)
|
||||||
|
|
||||||
|
print(sl_hash)
|
||||||
17
go.mod
17
go.mod
@@ -1,43 +1,34 @@
|
|||||||
module mrvacommander
|
module github.com/hohn/mrvacommander
|
||||||
|
|
||||||
go 1.22.0
|
go 1.22.0
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/BurntSushi/toml v1.4.0
|
github.com/BurntSushi/toml v1.4.0
|
||||||
github.com/elastic/go-sysinfo v1.14.0
|
|
||||||
github.com/google/uuid v1.6.0
|
github.com/google/uuid v1.6.0
|
||||||
github.com/gorilla/mux v1.8.1
|
github.com/gorilla/mux v1.8.1
|
||||||
|
github.com/jackc/pgx/v5 v5.6.0
|
||||||
github.com/minio/minio-go/v7 v7.0.71
|
github.com/minio/minio-go/v7 v7.0.71
|
||||||
github.com/rabbitmq/amqp091-go v1.10.0
|
github.com/rabbitmq/amqp091-go v1.10.0
|
||||||
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8
|
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f
|
||||||
gopkg.in/yaml.v3 v3.0.1
|
gopkg.in/yaml.v3 v3.0.1
|
||||||
gorm.io/driver/postgres v1.5.9
|
|
||||||
gorm.io/gorm v1.25.10
|
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||||
github.com/elastic/go-windows v1.0.1 // indirect
|
|
||||||
github.com/goccy/go-json v0.10.2 // indirect
|
github.com/goccy/go-json v0.10.2 // indirect
|
||||||
github.com/jackc/pgpassfile v1.0.0 // indirect
|
github.com/jackc/pgpassfile v1.0.0 // indirect
|
||||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
||||||
github.com/jackc/pgx/v5 v5.6.0 // indirect
|
|
||||||
github.com/jackc/puddle/v2 v2.2.1 // indirect
|
github.com/jackc/puddle/v2 v2.2.1 // indirect
|
||||||
github.com/jinzhu/inflection v1.0.0 // indirect
|
|
||||||
github.com/jinzhu/now v1.1.5 // indirect
|
|
||||||
github.com/klauspost/compress v1.17.6 // indirect
|
github.com/klauspost/compress v1.17.6 // indirect
|
||||||
github.com/klauspost/cpuid/v2 v2.2.6 // indirect
|
github.com/klauspost/cpuid/v2 v2.2.6 // indirect
|
||||||
github.com/kr/text v0.2.0 // indirect
|
github.com/kr/text v0.2.0 // indirect
|
||||||
github.com/minio/md5-simd v1.1.2 // indirect
|
github.com/minio/md5-simd v1.1.2 // indirect
|
||||||
github.com/pkg/errors v0.9.1 // indirect
|
|
||||||
github.com/prometheus/procfs v0.15.1 // indirect
|
|
||||||
github.com/rogpeppe/go-internal v1.12.0 // indirect
|
github.com/rogpeppe/go-internal v1.12.0 // indirect
|
||||||
github.com/rs/xid v1.5.0 // indirect
|
github.com/rs/xid v1.5.0 // indirect
|
||||||
golang.org/x/crypto v0.24.0 // indirect
|
golang.org/x/crypto v0.24.0 // indirect
|
||||||
golang.org/x/net v0.23.0 // indirect
|
golang.org/x/net v0.23.0 // indirect
|
||||||
golang.org/x/sync v0.7.0 // indirect
|
golang.org/x/sync v0.9.0 // indirect
|
||||||
golang.org/x/sys v0.21.0 // indirect
|
golang.org/x/sys v0.21.0 // indirect
|
||||||
golang.org/x/text v0.16.0 // indirect
|
golang.org/x/text v0.16.0 // indirect
|
||||||
gopkg.in/ini.v1 v1.67.0 // indirect
|
gopkg.in/ini.v1 v1.67.0 // indirect
|
||||||
howett.net/plist v1.0.1 // indirect
|
|
||||||
)
|
)
|
||||||
|
|||||||
32
go.sum
32
go.sum
@@ -6,14 +6,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
|
|||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||||
github.com/elastic/go-sysinfo v1.14.0 h1:dQRtiqLycoOOla7IflZg3aN213vqJmP0lpVpKQ9lUEY=
|
|
||||||
github.com/elastic/go-sysinfo v1.14.0/go.mod h1:FKUXnZWhnYI0ueO7jhsGV3uQJ5hiz8OqM5b3oGyaRr8=
|
|
||||||
github.com/elastic/go-windows v1.0.1 h1:AlYZOldA+UJ0/2nBuqWdo90GFCgG9xuyw9SYzGUtJm0=
|
|
||||||
github.com/elastic/go-windows v1.0.1/go.mod h1:FoVvqWSun28vaDQPbj2Elfc0JahhPB7WQEGa3c814Ss=
|
|
||||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
|
||||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
|
||||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
||||||
@@ -26,11 +20,6 @@ github.com/jackc/pgx/v5 v5.6.0 h1:SWJzexBzPL5jb0GEsrPMLIsi/3jOo7RHlzTjcAeDrPY=
|
|||||||
github.com/jackc/pgx/v5 v5.6.0/go.mod h1:DNZ/vlrUnhWCoFGxHAG8U2ljioxukquj7utPDgtQdTw=
|
github.com/jackc/pgx/v5 v5.6.0/go.mod h1:DNZ/vlrUnhWCoFGxHAG8U2ljioxukquj7utPDgtQdTw=
|
||||||
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
|
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
|
||||||
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
|
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
|
||||||
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
|
|
||||||
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
|
||||||
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
|
|
||||||
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
|
|
||||||
github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
|
|
||||||
github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI=
|
github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI=
|
||||||
github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
|
github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
|
||||||
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||||
@@ -44,13 +33,8 @@ github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
|
|||||||
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
|
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
|
||||||
github.com/minio/minio-go/v7 v7.0.71 h1:No9XfOKTYi6i0GnBj+WZwD8WP5GZfL7n7GOjRqCdAjA=
|
github.com/minio/minio-go/v7 v7.0.71 h1:No9XfOKTYi6i0GnBj+WZwD8WP5GZfL7n7GOjRqCdAjA=
|
||||||
github.com/minio/minio-go/v7 v7.0.71/go.mod h1:4yBA8v80xGA30cfM3fz0DKYMXunWl/AV/6tWEs9ryzo=
|
github.com/minio/minio-go/v7 v7.0.71/go.mod h1:4yBA8v80xGA30cfM3fz0DKYMXunWl/AV/6tWEs9ryzo=
|
||||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
|
||||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
|
||||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
|
|
||||||
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
|
|
||||||
github.com/rabbitmq/amqp091-go v1.10.0 h1:STpn5XsHlHGcecLmMFCtg7mqq0RnD+zFr4uzukfVhBw=
|
github.com/rabbitmq/amqp091-go v1.10.0 h1:STpn5XsHlHGcecLmMFCtg7mqq0RnD+zFr4uzukfVhBw=
|
||||||
github.com/rabbitmq/amqp091-go v1.10.0/go.mod h1:Hy4jKW5kQART1u+JkDTF9YYOQUHXqMuhrgxOEeS7G4o=
|
github.com/rabbitmq/amqp091-go v1.10.0/go.mod h1:Hy4jKW5kQART1u+JkDTF9YYOQUHXqMuhrgxOEeS7G4o=
|
||||||
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
|
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
|
||||||
@@ -66,13 +50,12 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
|||||||
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
||||||
golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
|
golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
|
||||||
golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
|
golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
|
||||||
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY=
|
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo=
|
||||||
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI=
|
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak=
|
||||||
golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
|
golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
|
||||||
golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
|
golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
|
||||||
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
|
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
|
||||||
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
|
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
|
||||||
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
@@ -83,13 +66,6 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntN
|
|||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||||
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
|
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
|
||||||
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
|
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
|
||||||
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
|
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
gorm.io/driver/postgres v1.5.9 h1:DkegyItji119OlcaLjqN11kHoUgZ/j13E0jkJZgD6A8=
|
|
||||||
gorm.io/driver/postgres v1.5.9/go.mod h1:DX3GReXH+3FPWGrrgffdvCk3DQ1dwDPdmbenSkweRGI=
|
|
||||||
gorm.io/gorm v1.25.10 h1:dQpO+33KalOA+aFYGlK+EfxcI5MbO7EP2yYygwh9h+s=
|
|
||||||
gorm.io/gorm v1.25.10/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8=
|
|
||||||
howett.net/plist v1.0.1 h1:37GdZ8tP09Q35o9ych3ehygcsL+HqKSwzctveSlarvM=
|
|
||||||
howett.net/plist v1.0.1/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
|
|
||||||
|
|||||||
@@ -1,11 +1,22 @@
|
|||||||
{
|
{
|
||||||
"folders": [
|
"folders": [
|
||||||
{
|
{
|
||||||
|
"name": "mrvaagent",
|
||||||
|
"path": "../mrvaagent"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "mrvacommander",
|
||||||
"path": "."
|
"path": "."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "mrvaserver",
|
||||||
|
"path": "../mrvaserver"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"settings": {
|
"settings": {
|
||||||
"sarif-viewer.connectToGithubCodeScanning": "off",
|
"sarif-viewer.connectToGithubCodeScanning": "off",
|
||||||
"codeQL.githubDatabase.download": "never"
|
"codeQL.githubDatabase.download": "never",
|
||||||
|
"makefile.configureOnOpen": false,
|
||||||
|
"git.ignoreLimitWarning": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user