Compare commits
171 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 47de30a56e | |||
|
|
d2c7b98d1c | ||
| fb5adf1b5f | |||
|
|
750187fb12 | ||
|
|
807d5f3d45 | ||
|
|
1377d4cec9 | ||
| ec8bb0cc63 | |||
| 8d7aa780ed | |||
|
|
43a7143e27 | ||
| 0d6e31713f | |||
| a2cfe0676c | |||
| f920a799d3 | |||
| 41146f5aaf | |||
| 173a61e3fa | |||
| e294fcdf4f | |||
|
|
9fe6aed357 | ||
|
|
3762654ef2 | ||
| d94f69be09 | |||
| 1fd220416c | |||
| df97e6ef10 | |||
| 2e99bdfedf | |||
| a507797eff | |||
|
|
0115e74d07 | ||
|
|
8577e1775a | ||
|
|
8590bd6de7 | ||
|
|
cf37b474e4 | ||
|
|
5bdbd60cc5 | ||
|
|
bde8ac2db7 | ||
|
|
75e57dc0a8 | ||
|
|
c32ff755ef | ||
|
|
19a936087f | ||
|
|
bb6189322a | ||
|
|
f7dc5318e4 | ||
|
|
70c06e4fae | ||
|
|
a2be014b2f | ||
|
|
58f4fe1ca7 | ||
|
|
14d6057248 | ||
|
|
01ddf38069 | ||
|
|
47a021d84a | ||
|
|
8d4c766e8c | ||
|
|
2409728960 | ||
|
|
f066c767e2 | ||
|
|
397b86c735 | ||
|
|
511c544f6e | ||
|
|
bd74ed646f | ||
|
|
45e40abf5d | ||
|
|
a3593cbba2 | ||
|
|
a0185df9d5 | ||
|
|
23e3ea9367 | ||
|
|
4140eaafc4 | ||
|
|
3e47bd4adb | ||
|
|
f92dfc89a2 | ||
|
|
a5bb232af2 | ||
|
|
008708469c | ||
|
|
37d5b1c6c1 | ||
|
|
1302db0b4e | ||
|
|
c624925aba | ||
|
|
e3e91534a0 | ||
|
|
af043f3f59 | ||
|
|
8ea453f8b0 | ||
|
|
3f24fbb07d | ||
|
|
de0d1b7434 | ||
|
|
be7cc3b0cf | ||
|
|
ba66cb9258 | ||
|
|
baf20fa7af | ||
|
|
6bfcbb33ea | ||
|
|
9d6587872c | ||
|
|
f809917c2e | ||
|
|
a22d8d77f2 | ||
|
|
92a22f55d1 | ||
|
|
3db629e2ca | ||
|
|
95d2638546 | ||
|
|
ff96b34f5e | ||
|
|
537ebdf19d | ||
|
|
d486b6b4db | ||
|
|
b61fbf8896 | ||
|
|
dd776e312a | ||
|
|
18333bfdb1 | ||
|
|
e335b6c843 | ||
|
|
4d52176c5a | ||
|
|
dd58a64ef7 | ||
|
|
4e93929943 | ||
|
|
e7d32861e5 | ||
|
|
52aafd6fc9 | ||
|
|
77ce997fbb | ||
|
|
187c49688e | ||
|
|
d5bcb8b981 | ||
|
|
ec0799696e | ||
|
|
9ccea8ac80 | ||
|
|
080c311516 | ||
|
|
faeb13efb1 | ||
|
|
0378c4cb7f | ||
|
|
7de3ee59ce | ||
|
|
7ae6e9a1cb | ||
|
|
2d92ad51c3 | ||
|
|
bef8a6dc97 | ||
|
|
d08e32dc42 | ||
|
|
64b77c5d70 | ||
|
|
71ce8c0823 | ||
|
|
067e477f61 | ||
|
|
8f807e0e42 | ||
|
|
195dda9fd7 | ||
|
|
f60b55f181 | ||
|
|
727381dc5a | ||
|
|
a35fc619e6 | ||
|
|
8dd6c94918 | ||
|
|
34958e4cf4 | ||
|
|
259bac55fb | ||
|
|
41f6db5de0 | ||
|
|
19330c3a0f | ||
|
|
1e2df515e3 | ||
|
|
681fcdab8c | ||
|
|
5021fc824b | ||
|
|
7d27b910cd | ||
|
|
0d3f4c5e40 | ||
|
|
a86f955aab | ||
|
|
c556605e44 | ||
|
|
7b06484b29 | ||
|
|
fc751ae08f | ||
|
|
d956f47db3 | ||
|
|
0a52b729cd | ||
|
|
6bebf4abfc | ||
|
|
9d60489908 | ||
|
|
35100f89a7 | ||
|
|
742b059a49 | ||
|
|
d1f56ae196 | ||
|
|
6262197c8d | ||
|
|
781571044d | ||
|
|
b183cee78d | ||
|
|
5a95f0ea08 | ||
|
|
349d758c14 | ||
|
|
582d933130 | ||
|
|
b7b4839fe0 | ||
|
|
06dcf50728 | ||
|
|
8f151ab002 | ||
|
|
65cdf9a883 | ||
|
|
1e1daf9330 | ||
|
|
b4f1a2b8a6 | ||
|
|
f652a6719c | ||
|
|
81c44ab14a | ||
|
|
92ca709458 | ||
|
|
242ba3fc1e | ||
|
|
26dd69c976 | ||
|
|
731b44b187 | ||
|
|
aaeafa9e88 | ||
|
|
129b8cc302 | ||
|
|
d64522d168 | ||
|
|
6b4e753e69 | ||
|
|
3df1cac5ae | ||
|
|
dcc32ea8ab | ||
|
|
3c8db9cbe4 | ||
|
|
be1304bdd9 | ||
|
|
8965725e42 | ||
|
|
2df48b9f98 | ||
|
|
8d80272922 | ||
|
|
e3f4d9f012 | ||
|
|
3566f5169e | ||
|
|
b3cf7a4f65 | ||
|
|
07f93f3d27 | ||
|
|
7413e23bab | ||
|
|
380e90135a | ||
|
|
1642894ccf | ||
|
|
c54bda8432 | ||
|
|
17bf9049e4 | ||
|
|
62a7b227f0 | ||
|
|
b543cebfac | ||
|
|
d145731c4b | ||
|
|
0cffb3c849 | ||
|
|
9d1a891c72 | ||
|
|
b4d9833da3 | ||
|
|
e0cbc01d21 |
9
.dockerignore
Normal file
9
.dockerignore
Normal file
@@ -0,0 +1,9 @@
|
||||
# Excludes
|
||||
|
||||
/dbstore-data
|
||||
/qpstore-data
|
||||
/test-data
|
||||
/venv
|
||||
/client
|
||||
/cmd/server/var
|
||||
/.git
|
||||
12
.env.container
Normal file
12
.env.container
Normal file
@@ -0,0 +1,12 @@
|
||||
MRVA_RABBITMQ_HOST=rabbitmq
|
||||
MRVA_RABBITMQ_PORT=5672
|
||||
MRVA_RABBITMQ_USER=user
|
||||
MRVA_RABBITMQ_PASSWORD=password
|
||||
MINIO_ROOT_USER=user
|
||||
MINIO_ROOT_PASSWORD=mmusty8432
|
||||
ARTIFACT_MINIO_ENDPOINT=artifactstore:9000
|
||||
ARTIFACT_MINIO_ID=${MINIO_ROOT_USER}
|
||||
ARTIFACT_MINIO_SECRET=${MINIO_ROOT_PASSWORD}
|
||||
QLDB_MINIO_ENDPOINT=dbstore:9000
|
||||
QLDB_MINIO_ID=${MINIO_ROOT_USER}
|
||||
QLDB_MINIO_SECRET=${MINIO_ROOT_PASSWORD}
|
||||
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1,2 +1,3 @@
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.blob filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
26
.gitignore
vendored
26
.gitignore
vendored
@@ -4,6 +4,9 @@ cmd/server/var/
|
||||
# vscode project dir
|
||||
.vscode/
|
||||
|
||||
# idea project dir
|
||||
.idea/
|
||||
|
||||
# Compiled binary
|
||||
cmd/server/server
|
||||
cmd/agent/agent
|
||||
@@ -41,3 +44,26 @@ go.work.sum
|
||||
|
||||
# env file
|
||||
.env
|
||||
/artifactstore-data/.minio.sys
|
||||
/qldbminio/qldb
|
||||
.ipynb_checkpoints/
|
||||
venv/
|
||||
venv-*/
|
||||
*.egg-info
|
||||
__pycache__
|
||||
README.html
|
||||
ChangeLog
|
||||
notes/*.html
|
||||
|
||||
# Make timestamp files
|
||||
mk.*
|
||||
demo/containers/dbsdata/data/
|
||||
demo/containers/dbsdata/tmp.dbsdata_backup.tar
|
||||
client/qldbtools/db-collection-py-1/
|
||||
|
||||
mrva-overview.aux
|
||||
mrva-overview.log
|
||||
mrva-overview.synctex.gz
|
||||
mrva-overview.toc
|
||||
|
||||
auto/
|
||||
|
||||
29
.golangci.yml
Normal file
29
.golangci.yml
Normal file
@@ -0,0 +1,29 @@
|
||||
linters:
|
||||
enable:
|
||||
- staticcheck
|
||||
- unused
|
||||
- decorder
|
||||
- errchkjson
|
||||
- exhaustruct
|
||||
- gochecknoinits
|
||||
- gochecksumtype
|
||||
- goconst
|
||||
- gocritic
|
||||
- godox
|
||||
- lll
|
||||
- loggercheck
|
||||
- revive
|
||||
- sloglint
|
||||
- tagalign
|
||||
- unparam
|
||||
|
||||
linters-settings:
|
||||
revive:
|
||||
config: .revive.toml
|
||||
staticcheck:
|
||||
checks:
|
||||
- "SA"
|
||||
|
||||
issues:
|
||||
format: "format: {{.FromLinter}}: {{.Text}}"
|
||||
|
||||
13
.revive.toml
Normal file
13
.revive.toml
Normal file
@@ -0,0 +1,13 @@
|
||||
ignoreGeneratedHeader = true
|
||||
|
||||
[rule.blank-imports]
|
||||
Arguments = [true]
|
||||
|
||||
[[rule]]
|
||||
name = "max-public-identifier-length"
|
||||
arguments = [15] # Maximum length for public identifiers
|
||||
|
||||
[[rule]]
|
||||
name = "max-private-identifier-length"
|
||||
arguments = [15] # Maximum length for private identifiers
|
||||
|
||||
55
Makefile
Normal file
55
Makefile
Normal file
@@ -0,0 +1,55 @@
|
||||
all: server agent
|
||||
|
||||
.phony: view
|
||||
|
||||
view: README.html
|
||||
open $<
|
||||
|
||||
html: README.html
|
||||
|
||||
%.html: %.md
|
||||
pandoc --toc=true --standalone $< --out $@
|
||||
|
||||
# Build the qldbtools container image
|
||||
dbt: mk.client-qldbtools-container
|
||||
mk.client-qldbtools-container:
|
||||
cd client/containers/qldbtools && \
|
||||
docker build -t client-qldbtools-container:0.1.24 .
|
||||
touch $@
|
||||
|
||||
# Run a shell in the container with the qldbtools
|
||||
dbt-run: mk.client-qldbtools-container
|
||||
docker run --rm -it client-qldbtools-container:0.1.24 /bin/bash
|
||||
|
||||
# Run one of the scripts in the container as check
|
||||
dbt-check: mk.client-qldbtools-container
|
||||
docker run --rm -it client-qldbtools-container:0.1.24 mc-db-initial-info
|
||||
|
||||
dbt-push: mk.dbt-push
|
||||
mk.dbt-push: mk.client-qldbtools-container
|
||||
docker tag client-qldbtools-container:0.1.24 ghcr.io/hohn/client-qldbtools-container:0.1.24
|
||||
docker push ghcr.io/hohn/client-qldbtools-container:0.1.24
|
||||
touch $@
|
||||
|
||||
server:
|
||||
cd cmd/server && GOOS=linux GOARCH=arm64 go build
|
||||
|
||||
agent:
|
||||
cd cmd/agent && GOOS=linux GOARCH=arm64 go build
|
||||
|
||||
fullbuild:
|
||||
cd cmd/server && GOOS=linux GOARCH=arm64 go build -a
|
||||
|
||||
sendsubmit:
|
||||
cd tools && sh ./submit-request.curl
|
||||
|
||||
# Requires
|
||||
# go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
|
||||
lint:
|
||||
golangci-lint run cmd/... pkg/...
|
||||
|
||||
deps:
|
||||
godepgraph -maxlevel 4 -nostdlib -i github.com/minio/minio-go ./cmd/server | dot -Tpdf > deps-server.pdf && open deps-server.pdf
|
||||
|
||||
depa:
|
||||
godepgraph -maxlevel 4 -nostdlib -i github.com/minio/minio-go ./cmd/agent | dot -Tpdf > deps-agent.pdf && open deps-agent.pdf
|
||||
73
README.md
73
README.md
@@ -6,6 +6,52 @@ TODO Style notes
|
||||
- NO package init() functions
|
||||
- Dynamic behaviour must be explicit
|
||||
|
||||
|
||||
## Client CodeQL Database Selector
|
||||
Separate from the server's downloading of databases, a client-side interface is needed to generate the `databases.json` file. This
|
||||
|
||||
1. must be usable from the shell
|
||||
2. must be interactive (Python, Jupyter)
|
||||
3. is session based to allow iterations on selection / narrowing
|
||||
4. must be queryable. There is no need to reinvent sql / dataframes
|
||||
|
||||
Python with dataframes is ideal for this; the project is in `client/`.
|
||||
|
||||
## Reverse proxy
|
||||
For testing, replay flows using mitmweb. This is faster and simpler than using
|
||||
gh-mrva or the VS Code plugin.
|
||||
|
||||
- Set up the virtual environment and install tools
|
||||
|
||||
python3.11 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install mitmproxy
|
||||
|
||||
For intercepting requests:
|
||||
|
||||
1. Start mitmproxy to listen on port 8080 and forward requests to port 8081, with
|
||||
web interface
|
||||
|
||||
mitmweb --mode reverse:http://localhost:8081 -p 8080
|
||||
|
||||
1. Change `server` ports in `docker-compose.yml` to
|
||||
|
||||
ports:
|
||||
- "8081:8080" # host:container
|
||||
|
||||
1. Start the containers.
|
||||
|
||||
1. Submit requests.
|
||||
|
||||
3. Save the flows for later replay.
|
||||
|
||||
One such session is in `tools/mitmweb-flows`; it can be loaded to replay the
|
||||
requests:
|
||||
|
||||
1. start `mitmweb --mode reverse:http://localhost:8081 -p 8080`
|
||||
2. `file` > `open` > `tools/mitmweb-flows`
|
||||
3. replay at least the submit, status, and download requests
|
||||
|
||||
## Cross-compile server on host, run it in container
|
||||
These are simple steps using a single container.
|
||||
|
||||
@@ -31,7 +77,10 @@ These are simple steps using a single container.
|
||||
cd /mrva/mrvacommander/cmd/server/ && ./server
|
||||
|
||||
## Using docker-compose
|
||||
### Steps to build and run the server in a multi-container environment set up by docker-compose.
|
||||
### Steps to build and run the server
|
||||
|
||||
Steps to build and run the server in a multi-container environment set up by
|
||||
docker-compose.
|
||||
|
||||
1. Built the server-image, above
|
||||
|
||||
@@ -53,6 +102,28 @@ These are simple steps using a single container.
|
||||
cd /mrva/mrvacommander/cmd/server/
|
||||
./server -loglevel=debug -mode=container
|
||||
|
||||
1. Test server from the host via
|
||||
|
||||
cd ~/work-gh/mrva/mrvacommander/tools
|
||||
sh ./request_16-Jun-2024_11-33-16.curl
|
||||
|
||||
1. Follow server logging via
|
||||
|
||||
cd ~/work-gh/mrva/mrvacommander
|
||||
docker-compose up -d
|
||||
docker-compose logs -f server
|
||||
|
||||
1. Completely rebuild all containers. Useful when running into docker errors
|
||||
|
||||
cd ~/work-gh/mrva/mrvacommander
|
||||
docker-compose up --build
|
||||
|
||||
1. Start the server containers and the desktop/demo containers
|
||||
|
||||
cd ~/work-gh/mrva/mrvacommander/
|
||||
docker-compose down --remove-orphans
|
||||
docker-compose -f docker-compose-demo.yml up -d
|
||||
|
||||
1. Test server via remote client by following the steps in [gh-mrva](https://github.com/hohn/gh-mrva/blob/connection-redirect/README.org#compacted-edit-run-debug-cycle)
|
||||
|
||||
### Some general docker-compose commands
|
||||
|
||||
213
client/Plan.ipynb
Normal file
213
client/Plan.ipynb
Normal file
File diff suppressed because one or more lines are too long
64
client/containers/ghmrva/Dockerfile
Normal file
64
client/containers/ghmrva/Dockerfile
Normal file
@@ -0,0 +1,64 @@
|
||||
# ######################
|
||||
# Use an official Golang image as the base image
|
||||
FROM golang:1.22 AS builder
|
||||
|
||||
# Set the working directory inside the container
|
||||
WORKDIR /work-gh/mrva/gh-mrva
|
||||
|
||||
# Clone the repository
|
||||
RUN git clone https://github.com/hohn/gh-mrva.git . &&\
|
||||
git checkout hohn-0.1.24-demo
|
||||
|
||||
# Download dependencies
|
||||
RUN go mod download
|
||||
|
||||
# Build the Go binary
|
||||
RUN go build .
|
||||
|
||||
# ######################
|
||||
# Provide codeql and java
|
||||
#
|
||||
FROM ubuntu:24.10 as runner
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Build argument for CodeQL version, defaulting to the latest release
|
||||
ARG CODEQL_VERSION=latest
|
||||
|
||||
# Install packages
|
||||
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
|
||||
unzip \
|
||||
curl \
|
||||
ca-certificates \
|
||||
default-jdk
|
||||
|
||||
# If the version is 'latest', get the latest release version from GitHub, unzip
|
||||
# the bundle into /opt, and delete the archive
|
||||
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
||||
CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
|
||||
fi && \
|
||||
echo "Using CodeQL version $CODEQL_VERSION" && \
|
||||
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
|
||||
unzip /tmp/codeql.zip -d /opt && \
|
||||
rm /tmp/codeql.zip && \
|
||||
chmod -R +x /opt/codeql
|
||||
|
||||
# Set environment variables for CodeQL
|
||||
ENV CODEQL_CLI_PATH=/opt/codeql/codeql
|
||||
|
||||
# Set environment variable for CodeQL for `codeql database analyze` support on ARM
|
||||
# This env var has no functional effect on CodeQL when running on x86_64 linux
|
||||
ENV CODEQL_JAVA_HOME=/usr
|
||||
|
||||
# ######################
|
||||
|
||||
# Set the working directory inside the final image
|
||||
WORKDIR /app
|
||||
|
||||
# Copy the binary from the builder stage
|
||||
COPY --from=builder /work-gh/mrva/gh-mrva/gh-mrva /usr/local/bin/gh-mrva
|
||||
|
||||
# Put CodeQL on the PATH
|
||||
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/codeql
|
||||
|
||||
# Run forever
|
||||
CMD ["tail", "-f", "/dev/null"]
|
||||
13
client/containers/ghmrva/Makefile
Normal file
13
client/containers/ghmrva/Makefile
Normal file
@@ -0,0 +1,13 @@
|
||||
ghm: mk.client-ghmrva-container
|
||||
mk.client-ghmrva-container:
|
||||
docker build -t client-ghmrva-container:0.1.24 .
|
||||
touch $@
|
||||
|
||||
ghm-push: mk.ghm-push
|
||||
mk.ghm-push: mk.client-ghmrva-container
|
||||
docker tag client-ghmrva-container:0.1.24 ghcr.io/hohn/client-ghmrva-container:0.1.24
|
||||
docker push ghcr.io/hohn/client-ghmrva-container:0.1.24
|
||||
touch $@
|
||||
|
||||
ghm-run:
|
||||
docker run --rm -it ghcr.io/hohn/client-ghmrva-container:0.1.24 /bin/bash
|
||||
16
client/containers/ghmrva/README.org
Normal file
16
client/containers/ghmrva/README.org
Normal file
@@ -0,0 +1,16 @@
|
||||
* MRVA cli tools container
|
||||
Set up / run:
|
||||
#+BEGIN_SRC sh
|
||||
# Build
|
||||
cd ~/work-gh/mrva/mrvacommander/client/containers/ghmrva/
|
||||
make ghm
|
||||
# Run
|
||||
docker run -ti client-ghmrva-container:0.1.24 /bin/bash
|
||||
|
||||
# In the container
|
||||
gh-mrva -h
|
||||
codeql -h
|
||||
|
||||
# Push
|
||||
make ghm-push
|
||||
#+END_SRC
|
||||
30
client/containers/hepc/Dockerfile
Normal file
30
client/containers/hepc/Dockerfile
Normal file
@@ -0,0 +1,30 @@
|
||||
# Use a Python 3.11 image as the base
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Install git
|
||||
RUN apt-get update && apt-get install -y git
|
||||
|
||||
# Create the required directory structure
|
||||
RUN mkdir -p /work-gh/mrva/
|
||||
|
||||
# Change to the directory and clone the repository
|
||||
WORKDIR /work-gh/mrva/
|
||||
RUN git clone https://github.com/hohn/mrvacommander.git && \
|
||||
cd mrvacommander && \
|
||||
git checkout hohn-0.1.24-demo
|
||||
|
||||
# Change to the client directory
|
||||
WORKDIR /work-gh/mrva/mrvacommander/client/qldbtools/
|
||||
|
||||
# We're in a container, so use pip globally -- no virtual env
|
||||
RUN pip install --upgrade pip
|
||||
|
||||
# Install the required Python packages from requirements.txt
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Install qldbtools
|
||||
RUN pip install .
|
||||
|
||||
# Run forever
|
||||
CMD ["tail", "-f", "/dev/null"]
|
||||
|
||||
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/BentoML-BentoML-ctsj-d6963d.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/BentoML-BentoML-ctsj-d6963d.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/Serial-Studio-Serial-Studio-ctsj-2b2721.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/Serial-Studio-Serial-Studio-ctsj-2b2721.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/UEFITool-UEFITool-ctsj-ee2d3c.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/UEFITool-UEFITool-ctsj-ee2d3c.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/apprise-apprise-ctsj-3f4a4e.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/apprise-apprise-ctsj-3f4a4e.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/attrs-attrs-ctsj-e2c939.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/attrs-attrs-ctsj-e2c939.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sam-cli-aws-sam-cli-ctsj-b7f561.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sam-cli-aws-sam-cli-ctsj-b7f561.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sdk-pandas-aws-sdk-pandas-ctsj-2b7750.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sdk-pandas-aws-sdk-pandas-ctsj-2b7750.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/behave-behave-ctsj-b297b5.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/behave-behave-ctsj-b297b5.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-01864e.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-01864e.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0189aa.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0189aa.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-035849.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-035849.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-051a5c.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-051a5c.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-099796.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-099796.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a35a1.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a35a1.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a6352.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a6352.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0c6575.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0c6575.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0cdf2f.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0cdf2f.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d667f.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d667f.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d6cf6.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d6cf6.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d7b69.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d7b69.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-103a8a.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-103a8a.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
@@ -0,0 +1,23 @@
|
||||
{"git_branch": "HEAD", "git_commit_id": "2b41915dac8966e95f9e63638d30769b0d69ad68", "git_repo": "aircrack-ng", "ingestion_datetime_utc": "2024-06-07 16:57:47.683012+00:00", "result_url": "http://hepc/db-collection-py/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.4", "projname": "aircrack-ng/aircrack-ng"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "8b399e9f51701b34f2f3c9375e637e6fffc642b7", "git_repo": "Serial-Studio", "ingestion_datetime_utc": "2023-10-01T15:18:43.503672671Z", "result_url": "http://hepc/db-collection-py/Serial-Studio-Serial-Studio-ctsj-2b2721.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.12.0", "projname": "Serial-Studio/Serial-Studio"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "9a9308fd5477d2a44f4e491d5a712546d4a2b3e4", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-22 13:30:21.681180+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0189aa.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "34412555665923bc07d43ce970e9d81be3795de7", "git_repo": "UEFITool", "ingestion_datetime_utc": "2024-07-04 19:00:38.543297+00:00", "result_url": "http://hepc/db-collection-py/UEFITool-UEFITool-ctsj-ee2d3c.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.6", "projname": "UEFITool/UEFITool"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "00aa56f5257060304d41f09651c6ab58ee6104d6", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-18 14:12:52.904410+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0c6575.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "e4bffa0a7450e1abd9f4df9565728ae18d86cfd2", "git_repo": "attrs", "ingestion_datetime_utc": "2024-07-18 22:34:57.795427+00:00", "result_url": "http://hepc/db-collection-py/attrs-attrs-ctsj-e2c939.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "attrs/attrs"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "9620901afce56f720e856aca600951c9b61a9460", "git_repo": "apprise", "ingestion_datetime_utc": "2024-07-22 22:26:48.720348+00:00", "result_url": "http://hepc/db-collection-py/apprise-apprise-ctsj-3f4a4e.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "apprise/apprise"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "c38e6c8cfba28980aea8f895c71b376e8a5155d5", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2022-04-16T12:45:56.739003883Z", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d6cf6.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.8.5", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "18f6be580b12dc406ef356b2cd65f47c24fce63e", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-19 05:46:23.392157+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d667f.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "a587921bac074b1bd1b0a0a5536587660a9b954e", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-19 16:13:39.094478+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0a6352.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-java", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "9b361c7ff497d57651856650667aece8230fab6d", "git_repo": "BentoML", "ingestion_datetime_utc": "2024-07-24 02:17:07.095690+00:00", "result_url": "http://hepc/db-collection-py/BentoML-BentoML-ctsj-d6963d.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "BentoML/BentoML"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "8b399e9f51701b34f2f3c9375e637e6fffc642b7", "git_repo": "Serial-Studio", "ingestion_datetime_utc": "2023-10-01T15:18:43.503672671Z", "result_url": "http://hepc/db-collection-py/Serial-Studio-Serial-Studio-ctsj-2b2721.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.12.0", "projname": "Serial-Studio/Serial-Studio"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "53ad2da1a8e6e79e0986ddfa3a45e1db6fdd491c", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-14 02:24:19.208812+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-01864e.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "db8f1a7930c6b5826357646746337dafc983f953", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2023-11-22 01:18:25.079473+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-099796.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.15.2", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "f8df9dd749a549dec20aa286a7639ba04190faab", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-12 16:39:28.854142+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d7b69.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "b5274976cb0a792d05d541a749c0adcd9d20062d", "git_repo": "behave", "ingestion_datetime_utc": "2024-05-11 19:20:51.916333+00:00", "result_url": "http://hepc/db-collection-py/behave-behave-ctsj-b297b5.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "behave/behave"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "4c825c198df470506b0f84da0b25b3b385150dcb", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-04-25 03:26:03.986270+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-035849.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "a8b8ff0acc6fcc629d08a3a9952f83be56a9a3c3", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-03 13:30:48.829134+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-051a5c.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-java", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "9ef05731e7c6cbad2e897faa7c526558eed3ceaa", "git_repo": "aws-sam-cli", "ingestion_datetime_utc": "2024-05-14 01:03:18.130142+00:00", "result_url": "http://hepc/db-collection-py/aws-sam-cli-aws-sam-cli-ctsj-b7f561.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "aws-sam-cli/aws-sam-cli"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "16865390a653ceaeabe354df1b37e4a775161a70", "git_repo": "aws-sdk-pandas", "ingestion_datetime_utc": "2024-05-13 15:13:31.853042+00:00", "result_url": "http://hepc/db-collection-py/aws-sdk-pandas-aws-sdk-pandas-ctsj-2b7750.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "aws-sdk-pandas/aws-sdk-pandas"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "093856995af0811d3ebbe8c179b8febf4ae706f0", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-03-20 14:18:02.500590+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-103a8a.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.16.4", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "0573e6f96637f08fb4cb85e0552f0622d36827d4", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-01-24 09:21:05.977294+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0cdf2f.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.15.5", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "93314995a5ee2217d58c3d9cbcbdef5df6c34566", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-09 05:29:25.243273+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0a35a1.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||
30
client/containers/qldbtools/Dockerfile
Normal file
30
client/containers/qldbtools/Dockerfile
Normal file
@@ -0,0 +1,30 @@
|
||||
# Use a Python 3.11 image as the base
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Install git
|
||||
RUN apt-get update && apt-get install -y git
|
||||
|
||||
# Create the required directory structure
|
||||
RUN mkdir -p /work-gh/mrva/
|
||||
|
||||
# Change to the directory and clone the repository
|
||||
WORKDIR /work-gh/mrva/
|
||||
RUN git clone https://github.com/hohn/mrvacommander.git && \
|
||||
cd mrvacommander && \
|
||||
git checkout hohn-0.1.24-demo
|
||||
|
||||
# Change to the client directory
|
||||
WORKDIR /work-gh/mrva/mrvacommander/client/qldbtools/
|
||||
|
||||
# We're in a container, so use pip globally -- no virtual env
|
||||
RUN pip install --upgrade pip
|
||||
|
||||
# Install the required Python packages from requirements.txt
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Install qldbtools
|
||||
RUN pip install .
|
||||
|
||||
# Run forever
|
||||
CMD ["tail", "-f", "/dev/null"]
|
||||
|
||||
25
client/containers/qldbtools/Makefile
Normal file
25
client/containers/qldbtools/Makefile
Normal file
@@ -0,0 +1,25 @@
|
||||
DBT_TARGET := client-qldbtools-container:0.1.24
|
||||
|
||||
# Build the qldbtools container image
|
||||
dbt: mk.client-qldbtools-container
|
||||
mk.client-qldbtools-container:
|
||||
docker build -t ${DBT_TARGET} .
|
||||
touch $@
|
||||
|
||||
# Run a shell in the container with the qldbtools
|
||||
dbt-run: dbt
|
||||
docker run --rm -it ${DBT_TARGET} /bin/bash
|
||||
|
||||
# Run one of the scripts in the container as check. Should exit with error.
|
||||
dbt-check: dbt
|
||||
docker run --rm -it ${DBT_TARGET} mc-db-initial-info
|
||||
|
||||
dbt-push: mk.dbt-push
|
||||
mk.dbt-push: dbt
|
||||
docker tag ${DBT_TARGET} ghcr.io/hohn/${DBT_TARGET}
|
||||
docker push ghcr.io/hohn/${DBT_TARGET}
|
||||
touch $@
|
||||
|
||||
dbt-test:
|
||||
docker pull ghcr.io/hohn/${DBT_TARGET}
|
||||
docker run --rm -it --name test-dbt-server ghcr.io/hohn/${DBT_TARGET} sh
|
||||
13
client/containers/qldbtools/README.org
Normal file
13
client/containers/qldbtools/README.org
Normal file
@@ -0,0 +1,13 @@
|
||||
* MRVA python tools container
|
||||
Set up Docker image with python 3.11 and pip and the qldbtools. The targets are
|
||||
in the =Makefile=; most important are
|
||||
|
||||
#+BEGIN_SRC sh
|
||||
# Build
|
||||
make dbt
|
||||
|
||||
# Check
|
||||
make dbt-check
|
||||
|
||||
#+END_SRC
|
||||
|
||||
67
client/containers/vscode/Dockerfile
Normal file
67
client/containers/vscode/Dockerfile
Normal file
@@ -0,0 +1,67 @@
|
||||
FROM codercom/code-server:4.92.2-debian
|
||||
|
||||
# ======================
|
||||
# Pre-install a custom JDK for this platform and redirect CodeQL to it
|
||||
|
||||
USER root
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Install packages
|
||||
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
|
||||
ca-certificates \
|
||||
curl \
|
||||
default-jdk \
|
||||
git \
|
||||
libcurl4-openssl-dev \
|
||||
libssl-dev \
|
||||
python3 \
|
||||
python3-dev \
|
||||
unzip
|
||||
|
||||
# Build argument for CodeQL version, defaulting to the latest release
|
||||
ARG CODEQL_VERSION=latest
|
||||
|
||||
# If the version is 'latest', get the latest release version from GitHub, unzip
|
||||
# the bundle into /opt, and delete the archive
|
||||
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
||||
CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
|
||||
fi && \
|
||||
echo "Using CodeQL version $CODEQL_VERSION" && \
|
||||
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
|
||||
unzip /tmp/codeql.zip -d /opt && \
|
||||
rm /tmp/codeql.zip && \
|
||||
chmod -R +x /opt/codeql
|
||||
|
||||
# ======================
|
||||
# Install code-server
|
||||
USER coder
|
||||
|
||||
# Set environment variables
|
||||
ENV PASSWORD mrva
|
||||
|
||||
# Install VS Code extensions as user root -- globally
|
||||
RUN code-server --install-extension ms-python.python \
|
||||
&& code-server --install-extension esbenp.prettier-vscode \
|
||||
&& code-server --install-extension GitHub.vscode-codeql
|
||||
|
||||
# Expose the port that Code Server runs on
|
||||
EXPOSE 9080
|
||||
|
||||
# Point CodeQL to the java binary for this platform
|
||||
ENV CODEQL_JAVA_HOME=/usr
|
||||
|
||||
# Add
|
||||
# codeQl.cli.executablePath
|
||||
# to user settings.
|
||||
# This is in addition to the environment variable CODEQL_JAVA_HOME which has no
|
||||
# effect on the plugin
|
||||
USER root
|
||||
COPY ./settings.json /home/coder/.local/share/code-server/User/
|
||||
RUN chown -R coder:coder /home/coder/.local/share/code-server/
|
||||
|
||||
# Start Code Server
|
||||
ENTRYPOINT ["dumb-init", "code-server", "--bind-addr", "0.0.0.0:9080", "."]
|
||||
|
||||
# Run as the coder user
|
||||
USER coder
|
||||
119
client/containers/vscode/README.org
Normal file
119
client/containers/vscode/README.org
Normal file
@@ -0,0 +1,119 @@
|
||||
* MRVA VS Code server container
|
||||
On the host:
|
||||
|
||||
#+BEGIN_SRC sh
|
||||
# Build the container via
|
||||
cd ~/work-gh/mrva/mrvacommander/client/containers/vscode/
|
||||
docker build -t code-server-initialized:0.1.24 .
|
||||
|
||||
# Run the container in standalone mode via
|
||||
cd ~/work-gh/mrva/mrvacommander/client/containers/vscode/
|
||||
docker run -v ~/work-gh/mrva/vscode-codeql:/work-gh/mrva/vscode-codeql \
|
||||
-d -p 9080:9080 code-server-initialized:0.1.24
|
||||
#+END_SRC
|
||||
|
||||
- Connect to it at http://localhost:9080/?folder=/home/coder, password is =mrva=.
|
||||
|
||||
Inside the container:
|
||||
|
||||
- Setup inside the container
|
||||
#+BEGIN_SRC shell
|
||||
cd
|
||||
export PATH=/opt/codeql:$PATH
|
||||
codeql pack init qldemo
|
||||
cd qldemo
|
||||
codeql pack add codeql/python-all@1.0.6
|
||||
#+END_SRC
|
||||
|
||||
- Create a new file =qldemo/simple.ql= with this query. Open it in VS Code.
|
||||
The plugin will download the CodeQL binaries (but never use them -- the
|
||||
configuration redirects)
|
||||
#+BEGIN_SRC sh
|
||||
cd
|
||||
cat > qldemo/simple.ql <<eof
|
||||
import python
|
||||
select 42
|
||||
eof
|
||||
#+END_SRC
|
||||
|
||||
- Create database.
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/qldemo
|
||||
|
||||
cat > short.py <<EOF
|
||||
print('hello world')
|
||||
EOF
|
||||
export PATH=/opt/codeql:$PATH
|
||||
codeql database create --language=python -s . -v short-db
|
||||
#+END_SRC
|
||||
|
||||
- Set the database as default and run the query =simple.ql=
|
||||
|
||||
- Add the customized VS Code plugin
|
||||
On the host
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/vscode-codeql
|
||||
git checkout mrva-standalone
|
||||
|
||||
# Install nvm
|
||||
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
|
||||
|
||||
# Install correct node version
|
||||
cd ./extensions/ql-vscode
|
||||
nvm install
|
||||
|
||||
# Build the extension
|
||||
cd ~/work-gh/mrva/vscode-codeql/extensions/ql-vscode
|
||||
npm install
|
||||
npm run build
|
||||
#+END_SRC
|
||||
|
||||
In the container
|
||||
#+BEGIN_SRC sh
|
||||
# Install extension
|
||||
cd /work-gh/mrva/vscode-codeql/dist
|
||||
|
||||
/bin/code-server --force --install-extension vscode-codeql-*.vsix
|
||||
#+END_SRC
|
||||
|
||||
- Capture the state of this container and create a new image from it
|
||||
#+BEGIN_SRC sh
|
||||
docker ps
|
||||
# Check id column. Use it below.
|
||||
docker commit 2df5732c1850 code-server-initialized:0.1.24
|
||||
# Keep the sha
|
||||
# sha256:87c8260146e28aed25b094d023a30a015a958f829c09e66cb50ccca2c4a2a000
|
||||
docker kill 2df5732c1850
|
||||
|
||||
# Make sure the image tag matches the sha
|
||||
docker inspect code-server-initialized:0.1.24 |grep Id
|
||||
|
||||
# Run the image and check
|
||||
docker run --rm -d -p 9080:9080 --name test-code-server-codeql \
|
||||
code-server-initialized:0.1.24
|
||||
#+END_SRC
|
||||
Again connect to it at http://localhost:9080/?folder=/home/coder, password is =mrva=.
|
||||
|
||||
- Push this container
|
||||
#+BEGIN_SRC sh
|
||||
# Common
|
||||
export CSI_TARGET=code-server-initialized:0.1.24
|
||||
|
||||
# Push container
|
||||
docker tag ${CSI_TARGET} ghcr.io/hohn/${CSI_TARGET}
|
||||
docker push ghcr.io/hohn/${CSI_TARGET}
|
||||
#+END_SRC
|
||||
|
||||
- Test the registry image
|
||||
#+BEGIN_SRC sh
|
||||
# Test pushed container
|
||||
docker pull ghcr.io/hohn/${CSI_TARGET}
|
||||
docker run --rm -d -p 9080:9080 --name test-code-server-codeql\
|
||||
ghcr.io/hohn/${CSI_TARGET}
|
||||
#+END_SRC
|
||||
In the container, inside the running vs code:
|
||||
- Check the plugin version number via the command
|
||||
: codeql: copy version information
|
||||
|
||||
|
||||
|
||||
4
client/containers/vscode/settings.json
Normal file
4
client/containers/vscode/settings.json
Normal file
@@ -0,0 +1,4 @@
|
||||
{
|
||||
"codeQL.runningQueries.numberOfThreads": 2,
|
||||
"codeQL.cli.executablePath": "/opt/codeql/codeql"
|
||||
}
|
||||
24
client/qldbtools/.vscode/launch.json
vendored
Normal file
24
client/qldbtools/.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "Python Debugger: Current File with Arguments",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal",
|
||||
"args": [
|
||||
"--db_collection_dir",
|
||||
"db-collection-py",
|
||||
"--starting_path",
|
||||
"$HOME/work-gh/mrva/mrva-open-source-download"
|
||||
],
|
||||
"justMyCode": true,
|
||||
"stopOnEntry": false
|
||||
}
|
||||
]
|
||||
}
|
||||
2
client/qldbtools/Makefile
Normal file
2
client/qldbtools/Makefile
Normal file
@@ -0,0 +1,2 @@
|
||||
doc:
|
||||
pandoc -s --css=./gfm.css README.md > foo.html && open foo.html
|
||||
171
client/qldbtools/README.org
Normal file
171
client/qldbtools/README.org
Normal file
@@ -0,0 +1,171 @@
|
||||
* Introduction to hepc -- HTTP End Point for CodeQL
|
||||
#+BEGIN_SRC sh
|
||||
1:$ ./bin/hepc-init --db_collection_dir db-collection --starting_path ~/work-gh/mrva/mrva-open-source-download
|
||||
[2024-11-19 14:12:06] [INFO] searching for db.zip files
|
||||
[2024-11-19 14:12:08] [INFO] collecting information from db.zip files
|
||||
[2024-11-19 14:12:08] [INFO] Extracting from /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/aircrack-ng/aircrack-ng/code-scanning/codeql/databases/cpp/db.zip
|
||||
[2024-11-19 14:12:08] [INFO] Adding record to db-collection/metadata.json
|
||||
#+END_SRC
|
||||
|
||||
* Introduction to qldbtools
|
||||
=qldbtools= is a Python package for selecting sets of CodeQL databases
|
||||
to work on. It uses a (pandas) dataframe in the implementation, but all
|
||||
results sets are available as CSV files to provide flexibility in the
|
||||
tools you want to work with.
|
||||
|
||||
The rationale is simple: When working with larger collections of CodeQL
|
||||
databases, spread over time, languages, etc., many criteria can be used
|
||||
to select the subset of interest. This package addresses that aspect of
|
||||
MRVA (multi repository variant analysis).
|
||||
|
||||
For example, consider this scenario from an enterprise. We have 10,000
|
||||
repositories in C/C++, 5,000 in Python. We build CodeQL dabases weekly
|
||||
and keep the last 2 years worth. This means for the last 2 years there
|
||||
are
|
||||
|
||||
#+begin_example
|
||||
(10000 + 5000) * 52 * 2 = 1560000
|
||||
#+end_example
|
||||
|
||||
databases to select from for a single MRVA run. 1.5 Million rows are
|
||||
readily handled by a pandas (or R) dataframe.
|
||||
|
||||
The full list of criteria currently encoded via the columns is
|
||||
|
||||
- owner
|
||||
- name
|
||||
- CID
|
||||
- cliVersion
|
||||
- creationTime
|
||||
- language
|
||||
- sha -- git commit sha of the code the CodeQL database is built against
|
||||
- baselineLinesOfCode
|
||||
- path
|
||||
- db_lang
|
||||
- db_lang_displayName
|
||||
- db_lang_file_count
|
||||
- db_lang_linesOfCode
|
||||
- ctime
|
||||
- primaryLanguage
|
||||
- finalised
|
||||
- left_index
|
||||
- size
|
||||
|
||||
The minimal criteria needed to distinguish databases in the above
|
||||
scenario are
|
||||
|
||||
- cliVersion
|
||||
- creationTime
|
||||
- language
|
||||
- sha
|
||||
|
||||
These are encoded in the single custom id column 'CID'.
|
||||
|
||||
Thus, a database can be fully specified using a (owner,name,CID) tuple
|
||||
and this is encoded in the names used by the MRVA server and clients.
|
||||
The selection of databases can of course be done using the whole table.
|
||||
|
||||
For an example of the workflow, see [[#command-line-use][section
|
||||
'command line use']].
|
||||
|
||||
A small sample of a full table:
|
||||
|
||||
| | owner | name | CID | cliVersion | creationTime | language | sha | baselineLinesOfCode | path | db_lang | db_lang_displayName | db_lang_file_count | db_lang_linesOfCode | ctime | primaryLanguage | finalised | left_index | size |
|
||||
|---+----------+----------------+--------+------------+----------------------------------+----------+------------------------------------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+-------------+---------------------+--------------------+---------------------+----------------------------+-----------------+-----------+------------+----------|
|
||||
| 0 | 1adrianb | face-alignment | 1f8d99 | 2.16.1 | 2024-02-08 14:18:20.983830+00:00 | python | c94dd024b1f5410ef160ff82a8423141e2bbb6b4 | 1839 | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/1adrianb/face-alignment/code-scanning/codeql/databases/python/db.zip | python | Python | 25 | 1839 | 2024-07-24T14:09:02.187201 | python | 1 | 1454 | 24075001 |
|
||||
| 1 | 2shou | TextGrocery | 9ab87a | 2.12.1 | 2023-02-17T11:32:30.863093193Z | cpp | 8a4e41349a9b0175d9a73bc32a6b2eb6bfb51430 | 3939 | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/2shou/TextGrocery/code-scanning/codeql/databases/cpp/db.zip | no-language | no-language | 0 | -1 | 2024-07-24T06:25:55.347568 | cpp | nan | 1403 | 3612535 |
|
||||
| 2 | 3b1b | manim | 76fdc7 | 2.17.5 | 2024-06-27 17:37:20.587627+00:00 | python | 88c7e9d2c96be1ea729b089c06cabb1bd3b2c187 | 19905 | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/3b1b/manim/code-scanning/codeql/databases/python/db.zip | python | Python | 94 | 19905 | 2024-07-24T13:23:04.716286 | python | 1 | 1647 | 26407541 |
|
||||
|
||||
** Installation
|
||||
- Set up the virtual environment and install tools
|
||||
|
||||
#+begin_example
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools/
|
||||
python3.11 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install --upgrade pip
|
||||
|
||||
# From requirements.txt
|
||||
pip install -r requirements.txt
|
||||
# Or explicitly
|
||||
pip install jupyterlab pandas ipython
|
||||
pip install lckr-jupyterlab-variableinspector
|
||||
#+end_example
|
||||
|
||||
- Local development
|
||||
|
||||
#+begin_example
|
||||
```bash
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools
|
||||
source venv/bin/activate
|
||||
pip install --editable .
|
||||
```
|
||||
|
||||
The `--editable` *should* use symlinks for all scripts; use `./bin/*` to be sure.
|
||||
#+end_example
|
||||
|
||||
- Full installation
|
||||
|
||||
#+begin_example
|
||||
```bash
|
||||
pip install qldbtools
|
||||
```
|
||||
#+end_example
|
||||
|
||||
** Use as library
|
||||
The best way to examine the code is starting from the high-level scripts
|
||||
in =bin/=.
|
||||
|
||||
** Command line use
|
||||
Initial information collection requires a unique file path so it can be
|
||||
run repeatedly over DB collections with the same (owner,name) but other
|
||||
differences -- namely, in one or more of
|
||||
|
||||
- creationTime
|
||||
- sha
|
||||
- cliVersion
|
||||
- language
|
||||
|
||||
Those fields are collected in =bin/mc-db-refine-info=.
|
||||
|
||||
An example workflow with commands grouped by data files follows.
|
||||
|
||||
#+begin_example
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools && mkdir -p scratch
|
||||
./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > scratch/db-info-1.csv
|
||||
./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
|
||||
|
||||
./bin/mc-db-view-info < scratch/db-info-2.csv &
|
||||
./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
|
||||
./bin/mc-db-view-info < scratch/db-info-3.csv &
|
||||
|
||||
./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
|
||||
./bin/mc-db-generate-selection -n 11 \
|
||||
scratch/vscode-selection.json \
|
||||
scratch/gh-mrva-selection.json \
|
||||
< scratch/db-info-3.csv
|
||||
#+end_example
|
||||
|
||||
To see the full information for a selection, use
|
||||
=mc-rows-from-mrva-list=:
|
||||
|
||||
#+begin_example
|
||||
./bin/mc-rows-from-mrva-list scratch/gh-mrva-selection.json \
|
||||
scratch/db-info-3.csv > scratch/selection-full-info
|
||||
#+end_example
|
||||
|
||||
To check, e.g., the =language= column:
|
||||
|
||||
#+begin_example
|
||||
csvcut -c language scratch/selection-full-info
|
||||
#+end_example
|
||||
|
||||
** Notes
|
||||
The =preview-data= plugin for VS Code has a bug; it displays =0= instead
|
||||
of =0e3379= for the following. There are other entries with similar
|
||||
malfunction.
|
||||
|
||||
#+begin_example
|
||||
CleverRaven,Cataclysm-DDA,0e3379,2.17.0,2024-05-08 12:13:10.038007+00:00,cpp,5ca7f4e59c2d7b0a93fb801a31138477f7b4a761,578098.0,/Users/hohn/work-gh/mrva/mrva-open-source-download/repos-2024-04-29/CleverRaven/Cataclysm-DDA/code-scanning/codeql/databases/cpp/db.zip,cpp,C/C++,1228.0,578098.0,2024-05-13T12:14:54.650648,cpp,True,4245,563435469
|
||||
CleverRaven,Cataclysm-DDA,3231f7,2.18.0,2024-07-18 11:13:01.673231+00:00,cpp,db3435138781937e9e0e999abbaa53f1d3afb5b7,579532.0,/Users/hohn/work-gh/mrva/mrva-open-source-download/repos/CleverRaven/Cataclysm-DDA/code-scanning/codeql/databases/cpp/db.zip,cpp,C/C++,1239.0,579532.0,2024-07-24T02:33:23.900885,cpp,True,1245,573213726
|
||||
#+end_example
|
||||
144
client/qldbtools/bin/hepc-init.sh
Executable file
144
client/qldbtools/bin/hepc-init.sh
Executable file
@@ -0,0 +1,144 @@
|
||||
#!/bin/bash
|
||||
|
||||
#* Utility functions
|
||||
log() {
|
||||
local level="$1"
|
||||
shift
|
||||
local color_reset="\033[0m"
|
||||
local color_info="\033[1;34m"
|
||||
local color_warn="\033[1;33m"
|
||||
local color_error="\033[1;31m"
|
||||
|
||||
local color
|
||||
case "$level" in
|
||||
INFO) color="$color_info" ;;
|
||||
WARN) color="$color_warn" ;;
|
||||
ERROR) color="$color_error" ;;
|
||||
*) color="$color_reset" ;;
|
||||
esac
|
||||
|
||||
echo -e "${color}[$(date +"%Y-%m-%d %H:%M:%S")] [$level] $*${color_reset}" >&2
|
||||
}
|
||||
usage() {
|
||||
echo "Usage: $0 --db_collection_dir <directory> --starting_path <path> [-h]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " --db_collection_dir <directory> Specify the database collection directory."
|
||||
echo " --starting_path <path> Specify the starting path."
|
||||
echo " -h Show this help message."
|
||||
exit 1
|
||||
}
|
||||
|
||||
|
||||
#* Initialize and parse arguments
|
||||
set -euo pipefail # exit on error, unset var, pipefail
|
||||
trap 'rm -fR /tmp/hepc.$$-*' EXIT
|
||||
|
||||
starting_dir=$(pwd)
|
||||
db_collection_dir=""
|
||||
starting_path=""
|
||||
|
||||
# Parse arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--db_collection_dir)
|
||||
shift
|
||||
if [[ -z "$1" || "$1" == -* ]]; then
|
||||
echo "Error: --db_collection_dir requires a directory as an argument."
|
||||
usage
|
||||
fi
|
||||
db_collection_dir="$1"
|
||||
;;
|
||||
--starting_path)
|
||||
shift
|
||||
if [[ -z "$1" || "$1" == -* ]]; then
|
||||
echo "Error: --starting_path requires a path as an argument."
|
||||
usage
|
||||
fi
|
||||
starting_path="$1"
|
||||
;;
|
||||
-h)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unknown option '$1'."
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# Check if required arguments were provided
|
||||
if [[ -z "$db_collection_dir" ]]; then
|
||||
echo "Error: --db_collection_dir is required."
|
||||
usage
|
||||
fi
|
||||
|
||||
if [[ -z "$starting_path" ]]; then
|
||||
echo "Error: --starting_path is required."
|
||||
usage
|
||||
fi
|
||||
|
||||
#* Find all DBs
|
||||
log INFO "searching for db.zip files"
|
||||
find ${starting_path} -type f -name "db.zip" -size +0c > /tmp/hepc.$$-paths
|
||||
|
||||
#* Collect detailed information from the database files
|
||||
# Don't assume they are unique.
|
||||
log INFO "collecting information from db.zip files"
|
||||
mkdir -p $db_collection_dir
|
||||
cat /tmp/hepc.$$-paths | while read -r zip_path
|
||||
do
|
||||
log INFO "Extracting from ${zip_path}"
|
||||
zip_dir=$(dirname ${zip_path})
|
||||
zip_file=$(basename ${zip_path})
|
||||
unzip -o -q ${zip_path} '*codeql-database.yml' -d /tmp/hepc.$$-zip
|
||||
# The content may be LANGUAGE/codeql-database.yml
|
||||
|
||||
#* For every database, create a metadata record.
|
||||
mkdir -p /tmp/hepc.$$-zip
|
||||
cd /tmp/hepc.$$-zip/*
|
||||
|
||||
# Information from codeql-database.yml
|
||||
primaryLanguage=$(yq '.primaryLanguage' codeql-database.yml)
|
||||
sha=$(yq '.creationMetadata.sha' codeql-database.yml)
|
||||
cliVersion=$(yq '.creationMetadata.cliVersion' codeql-database.yml)
|
||||
creationTime=$(yq '.creationMetadata.creationTime' codeql-database.yml)
|
||||
sourceLocationPrefix=$(yq '.sourceLocationPrefix' codeql-database.yml)
|
||||
repo=${sourceLocationPrefix##*/} # keep only last component
|
||||
# Get sourceLocationPrefix[-2]
|
||||
owner="${sourceLocationPrefix%/*}" # strip last component
|
||||
owner="${owner##*/}" # keep only last component
|
||||
|
||||
# cid for repository / db
|
||||
cid=$(echo "${cliVersion} ${creationTime} ${primaryLanguage} ${sha}" | b2sum |\
|
||||
awk '{print substr($1, 1, 6)}')
|
||||
|
||||
# Prepare the metadata record for this DB.
|
||||
new_db_fname="${owner}-${repo}-ctsj-${cid}.zip"
|
||||
result_url="http://hepc/${db_collection_dir}/${new_db_fname}"
|
||||
record='
|
||||
{
|
||||
"git_branch": "HEAD",
|
||||
"git_commit_id": "'${sha}'",
|
||||
"git_repo": "'${repo}'",
|
||||
"ingestion_datetime_utc": "'${creationTime}'",
|
||||
"result_url": "'${result_url}'",
|
||||
"tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4",
|
||||
"tool_name": "codeql-'${primaryLanguage}'",
|
||||
"tool_version": "'${cliVersion}'",
|
||||
"projname": "'${owner}/${repo}'"
|
||||
}
|
||||
'
|
||||
cd "$starting_dir"
|
||||
rm -fR /tmp/hepc.$$-zip
|
||||
echo "$record" >> $db_collection_dir/metadata.json
|
||||
|
||||
#* Link original file path to collection directory for serving. Use name including
|
||||
# the cid and field separator ctsj
|
||||
cd ${db_collection_dir}
|
||||
[ -L ${new_db_fname} ] || ln -s ${zip_path} ${new_db_fname}
|
||||
|
||||
# Interim cleanup
|
||||
rm -fR "/tmp/hepc.$$-*"
|
||||
done
|
||||
104
client/qldbtools/bin/hepc-serve.go
Executable file
104
client/qldbtools/bin/hepc-serve.go
Executable file
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
dependencies
|
||||
go get -u golang.org/x/exp/slog
|
||||
|
||||
on-the-fly
|
||||
go run bin/hepc-serve.go --codeql-db-dir db-collection-py-1
|
||||
|
||||
compiled
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools/
|
||||
go build -o ./bin/hepc-serve.bin ./bin/hepc-serve.go
|
||||
|
||||
test
|
||||
curl http://127.0.0.1:8080/api/v1/latest_results/codeql-all -o foo
|
||||
curl $(head -1 foo | jq -r ".result_url" |sed 's|hepc|127.0.0.1:8080/db|g;') -o foo.zip
|
||||
|
||||
*/
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"golang.org/x/exp/slog"
|
||||
)
|
||||
|
||||
var dbDir string
|
||||
|
||||
func serveFile(w http.ResponseWriter, r *http.Request) {
|
||||
fullPath := r.URL.Path[len("/db/"):]
|
||||
|
||||
resolvedPath, err := filepath.EvalSymlinks(fullPath)
|
||||
if err != nil {
|
||||
slog.Warn("failed to resolve symlink", slog.String("fullPath", fullPath),
|
||||
slog.String("error", err.Error()))
|
||||
http.Error(w, "File not found", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
if fileInfo, err := os.Stat(resolvedPath); err != nil || fileInfo.IsDir() {
|
||||
slog.Warn("file not found or is a directory", slog.String("resolvedPath", resolvedPath))
|
||||
http.Error(w, "File not found", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
slog.Info("serving file", slog.String("resolvedPath", resolvedPath))
|
||||
http.ServeFile(w, r, resolvedPath)
|
||||
}
|
||||
|
||||
func serveMetadata(w http.ResponseWriter, r *http.Request) {
|
||||
metadataPath := filepath.Join(dbDir, "metadata.json")
|
||||
if fileInfo, err := os.Stat(metadataPath); err != nil || fileInfo.IsDir() {
|
||||
slog.Warn("metadata.json not found", slog.String("metadataPath", metadataPath))
|
||||
http.Error(w, "metadata.json not found", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
slog.Info("serving metadata.json", slog.String("metadataPath", metadataPath))
|
||||
http.ServeFile(w, r, metadataPath)
|
||||
}
|
||||
|
||||
func logMiddleware(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
slog.Info("incoming request", slog.String("method", r.Method), slog.String("url", r.URL.Path))
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
|
||||
func main() {
|
||||
var host string
|
||||
var port int
|
||||
|
||||
flag.StringVar(&dbDir, "codeql-db-dir", "", "Directory containing CodeQL database files (required)")
|
||||
flag.StringVar(&host, "host", "127.0.0.1", "Host address for the HTTP server")
|
||||
flag.IntVar(&port, "port", 8080, "Port for the HTTP server")
|
||||
flag.Parse()
|
||||
|
||||
if dbDir == "" {
|
||||
slog.Error("missing required flag", slog.String("flag", "--codeql-db-dir"))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if _, err := os.Stat(dbDir); os.IsNotExist(err) {
|
||||
slog.Error("invalid directory", slog.String("dbDir", dbDir))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
slog.Info("starting server", slog.String("host", host), slog.Int("port", port), slog.String("dbDir", dbDir))
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/db/", serveFile)
|
||||
mux.HandleFunc("/index", serveMetadata)
|
||||
mux.HandleFunc("/api/v1/latest_results/codeql-all", serveMetadata)
|
||||
|
||||
loggedHandler := logMiddleware(mux)
|
||||
|
||||
addr := fmt.Sprintf("%s:%d", host, port)
|
||||
slog.Info("server listening", slog.String("address", addr))
|
||||
if err := http.ListenAndServe(addr, loggedHandler); err != nil {
|
||||
slog.Error("server error", slog.String("error", err.Error()))
|
||||
}
|
||||
}
|
||||
108
client/qldbtools/bin/mc-db-generate-selection
Executable file
108
client/qldbtools/bin/mc-db-generate-selection
Executable file
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env python
|
||||
""" Read a table of CodeQL DB information
|
||||
and generate the selection files for
|
||||
1. the VS Code CodeQL plugin
|
||||
2. the gh-mrva command-line client
|
||||
"""
|
||||
import argparse
|
||||
import logging
|
||||
from argparse import Namespace
|
||||
from typing import List
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
import qldbtools.utils as utils
|
||||
import numpy as np
|
||||
|
||||
#
|
||||
#* Configure logger
|
||||
#
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
# Overwrite log level set by minio
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser = argparse.ArgumentParser(
|
||||
description=""" Read a table of CodeQL DB information
|
||||
and generate the selection files for
|
||||
1. the VS Code CodeQL plugin
|
||||
2. the gh-mrva command-line client
|
||||
""",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('vscode_selection', type=str,
|
||||
help='VS Code selection file to generate')
|
||||
parser.add_argument('gh_mrva_selection', type=str,
|
||||
help='gh-mrva cli selection file to generate')
|
||||
parser.add_argument('-n', '--num-entries', type=int,
|
||||
help='Only use N entries',
|
||||
default=None)
|
||||
parser.add_argument('-s', '--seed', type=int,
|
||||
help='Random number seed',
|
||||
default=4242)
|
||||
parser.add_argument('-l', '--list-name', type=str,
|
||||
help='Name of the repository list',
|
||||
default='mirva-list')
|
||||
|
||||
args: Namespace = parser.parse_args()
|
||||
#
|
||||
#* Load the information
|
||||
#
|
||||
import pandas as pd
|
||||
import sys
|
||||
|
||||
df0: DataFrame = pd.read_csv(sys.stdin)
|
||||
|
||||
if args.num_entries == None:
|
||||
# Use all entries
|
||||
df1: DataFrame = df0
|
||||
else:
|
||||
# Use num_entries, chosen via pseudo-random numbers
|
||||
df1 = df0.sample(n=args.num_entries,
|
||||
random_state=np.random.RandomState(args.seed))
|
||||
|
||||
#
|
||||
#* Form and save structures
|
||||
#
|
||||
repos: list[str] = []
|
||||
for index, row in df1[['owner', 'name', 'CID', 'path']].iterrows():
|
||||
owner, name, CID, path = row
|
||||
repos.append(utils.form_db_req_name(owner, name, CID))
|
||||
|
||||
repo_list_name: str = args.list_name
|
||||
vsc = {
|
||||
"version": 1,
|
||||
"databases": {
|
||||
"variantAnalysis": {
|
||||
"repositoryLists": [
|
||||
{
|
||||
"name": repo_list_name,
|
||||
"repositories": repos,
|
||||
}
|
||||
],
|
||||
"owners": [],
|
||||
"repositories": []
|
||||
}
|
||||
},
|
||||
"selected": {
|
||||
"kind": "variantAnalysisUserDefinedList",
|
||||
"listName": repo_list_name
|
||||
}
|
||||
}
|
||||
|
||||
gh = {
|
||||
repo_list_name: repos
|
||||
}
|
||||
|
||||
import json
|
||||
with open(args.vscode_selection, "w") as fc:
|
||||
json.dump(vsc, fc, indent=4)
|
||||
|
||||
with open(args.gh_mrva_selection, "w") as fc:
|
||||
json.dump(gh, fc, indent=4)
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
48
client/qldbtools/bin/mc-db-initial-info
Executable file
48
client/qldbtools/bin/mc-db-initial-info
Executable file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python
|
||||
""" Collect information about CodeQL databases from the file system and write out
|
||||
a table in CSV format.
|
||||
"""
|
||||
from argparse import ArgumentParser
|
||||
from typing import List
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
import qldbtools.utils as utils
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
import pandas as pd
|
||||
|
||||
from qldbtools.utils import DBInfo
|
||||
|
||||
#
|
||||
#* Configure logger
|
||||
#
|
||||
logging.basicConfig(format='%(asctime)s %(message)s')
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser: ArgumentParser = argparse.ArgumentParser(
|
||||
description="""Find all CodeQL DBs in and below starting_dir and export a CSV
|
||||
file with relevant data.""")
|
||||
parser.add_argument('starting_dir', type=str,
|
||||
help='The starting directory to search for codeql.')
|
||||
args = parser.parse_args()
|
||||
|
||||
#
|
||||
#* Collect info
|
||||
#
|
||||
# Get the db information in list of DBInfo form
|
||||
db_base: str = args.starting_dir
|
||||
dbs: list[DBInfo] = list(utils.collect_dbs(db_base))
|
||||
dbdf: DataFrame = pd.DataFrame([d.__dict__ for d in dbs])
|
||||
#
|
||||
#
|
||||
#* Write info out
|
||||
#
|
||||
dbdf.to_csv(sys.stdout, index=False)
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
93
client/qldbtools/bin/mc-db-populate-minio
Executable file
93
client/qldbtools/bin/mc-db-populate-minio
Executable file
@@ -0,0 +1,93 @@
|
||||
#!/usr/bin/env python
|
||||
""" Read a table of CodeQL DB information (like those produced by
|
||||
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
|
||||
DB.
|
||||
"""
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "pandas",
|
||||
# "numpy",
|
||||
# "minio",
|
||||
# ]
|
||||
# ///
|
||||
import argparse
|
||||
import qldbtools.utils as utils
|
||||
import logging
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import sys
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
from pathlib import Path
|
||||
#
|
||||
#* Configure logger
|
||||
#
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
# Overwrite log level set by minio
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser = argparse.ArgumentParser(
|
||||
description=""" Read a table of CodeQL DB information (like those produced by
|
||||
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
|
||||
DB. """)
|
||||
parser.add_argument('-n', '--num-entries', type=int,
|
||||
help='Only use N entries',
|
||||
default=None)
|
||||
parser.add_argument('-s', '--seed', type=int,
|
||||
help='Random number seed',
|
||||
default=4242)
|
||||
args = parser.parse_args()
|
||||
#
|
||||
#* Collect the information and select subset
|
||||
#
|
||||
df = pd.read_csv(sys.stdin)
|
||||
if args.num_entries == None:
|
||||
# Use all entries
|
||||
entries = df
|
||||
else:
|
||||
# Use num_entries, chosen via pseudo-random numbers
|
||||
entries = df.sample(n=args.num_entries,
|
||||
random_state=np.random.RandomState(args.seed))
|
||||
#
|
||||
#* Push the DBs
|
||||
#
|
||||
# Configuration
|
||||
MINIO_URL = "http://localhost:9000"
|
||||
MINIO_ROOT_USER = "user"
|
||||
MINIO_ROOT_PASSWORD = "mmusty8432"
|
||||
QL_DB_BUCKET_NAME = "qldb"
|
||||
|
||||
# Initialize MinIO client
|
||||
client = Minio(
|
||||
MINIO_URL.replace("http://", "").replace("https://", ""),
|
||||
access_key=MINIO_ROOT_USER,
|
||||
secret_key=MINIO_ROOT_PASSWORD,
|
||||
secure=False
|
||||
)
|
||||
|
||||
# Create the bucket if it doesn't exist
|
||||
try:
|
||||
if not client.bucket_exists(QL_DB_BUCKET_NAME):
|
||||
client.make_bucket(QL_DB_BUCKET_NAME)
|
||||
else:
|
||||
logging.info(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
|
||||
except S3Error as err:
|
||||
logging.error(f"Error creating bucket: {err}")
|
||||
|
||||
# Get info from dataframe and push the files
|
||||
for index, row in entries[['owner', 'name', 'CID', 'path']].iterrows():
|
||||
owner, name, CID, path = row
|
||||
new_name = utils.form_db_bucket_name(owner, name, CID)
|
||||
try:
|
||||
client.fput_object(QL_DB_BUCKET_NAME, new_name, path)
|
||||
logging.info(f"Uploaded {path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
|
||||
except S3Error as err:
|
||||
logging.error(f"Error uploading file {local_path}: {err}")
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
60
client/qldbtools/bin/mc-db-refine-info
Executable file
60
client/qldbtools/bin/mc-db-refine-info
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env python
|
||||
""" Read an initial table of CodeQL DB information, produced by
|
||||
mc-db-initial-info, and collect more detailed information from the database
|
||||
files. Write out an extended table in CSV format.
|
||||
"""
|
||||
from argparse import ArgumentParser
|
||||
from typing import List
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
import qldbtools.utils as utils
|
||||
import argparse
|
||||
import logging
|
||||
import pandas as pd
|
||||
import sys
|
||||
|
||||
#
|
||||
#* Configure logger
|
||||
#
|
||||
logging.basicConfig(format='%(asctime)s %(message)s')
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser: ArgumentParser = argparse.ArgumentParser(
|
||||
description="""Read an initial table of CodeQL DB information, produced by
|
||||
mc-db-initial-info, and collect more detailed information from the database
|
||||
files. Write out an extended table in CSV format. """)
|
||||
args = parser.parse_args()
|
||||
|
||||
#
|
||||
#* Collect the information
|
||||
# This step is time-intensive so we save the results right after.
|
||||
d: DataFrame = pd.read_csv(sys.stdin)
|
||||
joiners: list[DataFrame] = []
|
||||
for left_index in range(0, len(d)-1):
|
||||
try:
|
||||
metac: object
|
||||
cqlc: object
|
||||
cqlc, metac = utils.extract_metadata(d.path[left_index])
|
||||
except utils.ExtractNotZipfile:
|
||||
continue
|
||||
except utils.ExtractNoCQLDB:
|
||||
continue
|
||||
try:
|
||||
detail_df: DataFrame = utils.metadata_details(left_index, cqlc, metac)
|
||||
except utils.DetailsMissing:
|
||||
continue
|
||||
joiners.append(detail_df)
|
||||
joiners_df: DataFrame = pd.concat(joiners, axis=0)
|
||||
full_df: DataFrame = pd.merge(d, joiners_df, left_index=True, right_on='left_index', how='outer')
|
||||
|
||||
#
|
||||
#* Save results
|
||||
#
|
||||
full_df.to_csv(sys.stdout, index=False)
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
122
client/qldbtools/bin/mc-db-unique
Executable file
122
client/qldbtools/bin/mc-db-unique
Executable file
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env python
|
||||
""" Read a table of CodeQL DB information and produce a table with unique entries
|
||||
adding the Cumulative ID (CID) column.
|
||||
|
||||
To make this happen:
|
||||
- Group entries by (owner,name,CID),
|
||||
sort each group by creationTime,
|
||||
and keep only the top (newest) element.
|
||||
|
||||
- Drop rows that don't have the
|
||||
| cliVersion |
|
||||
| creationTime |
|
||||
| language |
|
||||
| sha |
|
||||
columns. There are very few (16 out of 6000 on recent tests) and their DBs
|
||||
are quesionable.
|
||||
|
||||
"""
|
||||
import argparse
|
||||
import logging
|
||||
from argparse import Namespace
|
||||
from typing import Any
|
||||
|
||||
from pandas import DataFrame, Series
|
||||
|
||||
#
|
||||
#* Configure logger
|
||||
#
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
# Overwrite log level set by minio
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser = argparse.ArgumentParser(
|
||||
description=""" Read a table of CodeQL DB information,
|
||||
narrow to <language>,
|
||||
group entries by (owner,name), sort each group by
|
||||
creationTime and keep only the top (newest) element.
|
||||
""")
|
||||
parser.add_argument('language', type=str,
|
||||
help='The language to be analyzed.')
|
||||
|
||||
args: Namespace = parser.parse_args()
|
||||
#
|
||||
#* Collect the information and select subset
|
||||
#
|
||||
import pandas as pd
|
||||
import sys
|
||||
import qldbtools.utils as utils
|
||||
|
||||
df2: DataFrame = pd.read_csv(sys.stdin)
|
||||
|
||||
#
|
||||
#* Add single uniqueness field -- CID (Cumulative ID)
|
||||
#
|
||||
df2['CID'] = df2.apply(lambda row:
|
||||
utils.cid_hash((
|
||||
row['cliVersion'],
|
||||
row['creationTime'],
|
||||
row['language'],
|
||||
row['sha'],
|
||||
)), axis=1)
|
||||
|
||||
#
|
||||
#* Re-order the dataframe columns by importance
|
||||
# - Much of the data
|
||||
# 1. Is only conditionally present
|
||||
# 2. Is extra info, not for the DB proper
|
||||
# 3. May have various names
|
||||
#
|
||||
# - The essential columns are
|
||||
# | owner |
|
||||
# | name |
|
||||
# | language |
|
||||
# | size |
|
||||
# | cliVersion |
|
||||
# | creationTime |
|
||||
# | sha |
|
||||
# | baselineLinesOfCode |
|
||||
# | path |
|
||||
#
|
||||
# - The rest are useful; put them last
|
||||
# | db_lang |
|
||||
# | db_lang_displayName |
|
||||
# | db_lang_file_count |
|
||||
# | db_lang_linesOfCode |
|
||||
# | left_index |
|
||||
# | ctime |
|
||||
# | primaryLanguage |
|
||||
# | finalised |
|
||||
|
||||
df3: DataFrame = df2.reindex( columns=['owner', 'name', 'cliVersion', 'creationTime',
|
||||
'language', 'sha','CID',
|
||||
'baselineLinesOfCode', 'path', 'db_lang',
|
||||
'db_lang_displayName', 'db_lang_file_count',
|
||||
'db_lang_linesOfCode', 'ctime',
|
||||
'primaryLanguage', 'finalised', 'left_index',
|
||||
'size'])
|
||||
|
||||
# Identify rows missing specific entries
|
||||
rows = ( df3['cliVersion'].isna() |
|
||||
df3['creationTime'].isna() |
|
||||
df3['language'].isna() |
|
||||
df3['sha'].isna() )
|
||||
df4: DataFrame = df3[~rows]
|
||||
|
||||
# Limit to one language
|
||||
df5 = df4[df4['language'] == args.language]
|
||||
|
||||
# Sort and group
|
||||
df_sorted: DataFrame = df5.sort_values(by=['owner', 'name', 'CID', 'creationTime'])
|
||||
df_unique: DataFrame = df_sorted.groupby(['owner', 'name', 'CID']).first().reset_index()
|
||||
|
||||
# Write output
|
||||
df_unique.to_csv(sys.stdout, index=False)
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
35
client/qldbtools/bin/mc-db-view-info
Executable file
35
client/qldbtools/bin/mc-db-view-info
Executable file
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env python
|
||||
""" Read a table of CodeQL DB information and display it using pandasui
|
||||
"""
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
#
|
||||
#* Configure logger
|
||||
#
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
# Overwrite log level set by minio
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Read a table of CodeQL DB information and display it using pandasui")
|
||||
args = parser.parse_args()
|
||||
#
|
||||
#* Collect the information display
|
||||
#
|
||||
import pandas as pd
|
||||
|
||||
df = pd.read_csv(sys.stdin)
|
||||
|
||||
import os
|
||||
os.environ['APPDATA'] = "needed-for-pandasgui"
|
||||
from pandasgui import show
|
||||
show(df)
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
120
client/qldbtools/bin/mc-hepc-init
Executable file
120
client/qldbtools/bin/mc-hepc-init
Executable file
@@ -0,0 +1,120 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import hashlib
|
||||
import yaml
|
||||
import sys
|
||||
from plumbum import cli, local
|
||||
from plumbum.cmd import find, mkdir, ln, rm, mktemp, unzip, date, env
|
||||
|
||||
# Logging function
|
||||
def log(level, message):
|
||||
colors = {
|
||||
"INFO": "\033[1;34m",
|
||||
"WARN": "\033[1;33m",
|
||||
"ERROR": "\033[1;31m",
|
||||
"RESET": "\033[0m",
|
||||
}
|
||||
timestamp = date("+%Y-%m-%d %H:%M:%S").strip()
|
||||
print(f"{colors[level]}[{timestamp}] [{level}] {message}{colors['RESET']}", file=sys.stderr)
|
||||
|
||||
# Generate a CID (cumulative id)
|
||||
def generate_cid(cli_version, creation_time, primary_language, sha):
|
||||
hash_input = f"{cli_version} {creation_time} {primary_language} {sha}".encode()
|
||||
return hashlib.sha256(hash_input).hexdigest()[:6]
|
||||
|
||||
# Expand environment variables in paths
|
||||
def expand_path(path):
|
||||
return local.env.expand(path)
|
||||
|
||||
# Process a single db.zip file
|
||||
def process_db_file(zip_path, db_collection_dir):
|
||||
temp_dir = mktemp("-d").strip()
|
||||
try:
|
||||
unzip("-o", "-q", zip_path, "*codeql-database.yml", "-d", temp_dir)
|
||||
|
||||
# Locate the YAML file regardless of its depth
|
||||
yaml_files = list(local.path(temp_dir).walk(
|
||||
filter=lambda p: p.name == "codeql-database.yml"))
|
||||
if not yaml_files:
|
||||
log("WARN", f"No codeql-database.yml found in {zip_path}")
|
||||
return
|
||||
|
||||
yaml_path = yaml_files[0]
|
||||
with yaml_path.open("r") as f:
|
||||
yaml_data = yaml.safe_load(f)
|
||||
|
||||
primary_language = yaml_data["primaryLanguage"]
|
||||
creation_metadata = yaml_data["creationMetadata"]
|
||||
sha = creation_metadata["sha"]
|
||||
cli_version = creation_metadata["cliVersion"]
|
||||
creation_time = creation_metadata["creationTime"]
|
||||
source_location_prefix = local.path(yaml_data["sourceLocationPrefix"])
|
||||
repo = source_location_prefix.name
|
||||
owner = source_location_prefix.parent.name
|
||||
cid = generate_cid(cli_version, creation_time, primary_language, sha)
|
||||
new_db_fname = f"{owner}-{repo}-ctsj-{cid}.zip"
|
||||
result_url = f"http://hepc/{db_collection_dir}/{new_db_fname}"
|
||||
|
||||
metadata = {
|
||||
"git_branch" : "HEAD",
|
||||
"git_commit_id" : sha,
|
||||
"git_repo" : repo,
|
||||
"ingestion_datetime_utc" : str(creation_time),
|
||||
"result_url" : result_url,
|
||||
"tool_id" : "9f2f9642-febb-4435-9204-fb50bbd43de4",
|
||||
"tool_name" : f"codeql-{primary_language}",
|
||||
"tool_version" : cli_version,
|
||||
"projname" : f"{owner}/{repo}",
|
||||
}
|
||||
|
||||
metadata_file = local.path(db_collection_dir) / "metadata.json"
|
||||
with metadata_file.open("a") as f:
|
||||
json.dump(metadata, f)
|
||||
f.write("\n")
|
||||
|
||||
link_path = local.path(db_collection_dir) / new_db_fname
|
||||
if not link_path.exists():
|
||||
ln("-sf", zip_path, link_path)
|
||||
|
||||
except Exception as e:
|
||||
log("WARN", f"Error processing {zip_path}: {e}")
|
||||
finally:
|
||||
rm("-rf", temp_dir)
|
||||
|
||||
# Main application class
|
||||
class DBProcessor(cli.Application):
|
||||
"""
|
||||
DBProcessor processes db.zip files found in a starting directory,
|
||||
symlinks updated names in a collection directory,
|
||||
and adds a metadata information file "metadata.json" to the directory.
|
||||
"""
|
||||
|
||||
db_collection_dir = cli.SwitchAttr(
|
||||
"--db_collection_dir", str, mandatory=True, help="Specify the database collection directory"
|
||||
)
|
||||
starting_path = cli.SwitchAttr(
|
||||
"--starting_path", str, mandatory=True, help="Specify the starting path"
|
||||
)
|
||||
|
||||
def main(self):
|
||||
db_collection_dir = expand_path(self.db_collection_dir)
|
||||
starting_path = expand_path(self.starting_path)
|
||||
|
||||
mkdir("-p", db_collection_dir)
|
||||
log("INFO", f"Searching for db.zip files in {starting_path}")
|
||||
|
||||
db_files = find(starting_path, "-type", "f", "-name", "db.zip",
|
||||
"-size", "+0c").splitlines()
|
||||
|
||||
if not db_files:
|
||||
log("WARN", "No db.zip files found in the specified starting path.")
|
||||
return
|
||||
|
||||
for zip_path in db_files:
|
||||
process_db_file(zip_path, db_collection_dir)
|
||||
|
||||
log("INFO", "Processing completed.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
DBProcessor.run()
|
||||
89
client/qldbtools/bin/mc-hepc-serve
Executable file
89
client/qldbtools/bin/mc-hepc-serve
Executable file
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env python3
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from plumbum import cli
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
import uvicorn
|
||||
|
||||
# Logging configuration
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[logging.StreamHandler()]
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# FastAPI application
|
||||
app = FastAPI()
|
||||
db_dir = None # This will be set by the CLI application
|
||||
|
||||
@app.get("/db/{file_path:path}")
|
||||
def serve_file(file_path: str):
|
||||
"""
|
||||
Serve files from the database directory, such as .zip files or metadata.json.
|
||||
"""
|
||||
logger.info(f"Requested file: {file_path}")
|
||||
# Resolve symlink
|
||||
resolved_path = Path(file_path).resolve(strict=True)
|
||||
logger.info(f"file resolved to: {resolved_path}")
|
||||
if not resolved_path.exists():
|
||||
logger.error(f"File not found: {resolved_path}")
|
||||
raise HTTPException(status_code=404, detail=f"{resolved_path} not found")
|
||||
return FileResponse(resolved_path)
|
||||
|
||||
|
||||
@app.get("/index")
|
||||
@app.get("/api/v1/latest_results/codeql-all")
|
||||
def serve_metadata_json():
|
||||
"""
|
||||
Serve the metadata.json file for multiple routes.
|
||||
"""
|
||||
metadata_path = Path(db_dir) / "metadata.json"
|
||||
logger.info(f"Requested metadata.json at: {metadata_path}")
|
||||
if not metadata_path.exists():
|
||||
logger.error("metadata.json not found.")
|
||||
raise HTTPException(status_code=404, detail="metadata.json not found")
|
||||
logger.info(f"Serving metadata.json from: {metadata_path}")
|
||||
return FileResponse(metadata_path)
|
||||
|
||||
@app.middleware("http")
|
||||
async def log_request(request, call_next):
|
||||
logger.info(f"Incoming request: {request.method} {request.url}")
|
||||
response = await call_next(request)
|
||||
return response
|
||||
|
||||
class DBService(cli.Application):
|
||||
"""
|
||||
DBService serves:
|
||||
1. CodeQL database .zip files symlinked in the --codeql-db-dir
|
||||
2. Metadata for those zip files, contained in metadata.json in the same
|
||||
directory.
|
||||
The HTTP endpoints are:
|
||||
1. /db/{filename}
|
||||
2. /index
|
||||
3. /api/v1/latest_results/codeql-all
|
||||
"""
|
||||
|
||||
codeql_db_dir = cli.SwitchAttr("--codeql-db-dir", str, mandatory=True,
|
||||
help="Directory containing CodeQL database files")
|
||||
host = cli.SwitchAttr("--host", str, default="127.0.0.1",
|
||||
help="Host address for the HTTP server")
|
||||
port = cli.SwitchAttr("--port", int, default=8080, help="Port for the HTTP server")
|
||||
|
||||
def main(self):
|
||||
global db_dir
|
||||
db_dir = Path(self.codeql_db_dir)
|
||||
if not db_dir.is_dir():
|
||||
logger.error(f"Invalid directory: {db_dir}")
|
||||
return 1
|
||||
|
||||
logger.info(f"Starting server at {self.host}:{self.port}")
|
||||
logger.info(f"Serving files from directory: {db_dir}")
|
||||
|
||||
# Run the FastAPI server using Uvicorn
|
||||
uvicorn.run(app, host=self.host, port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
DBService.run()
|
||||
67
client/qldbtools/bin/mc-rows-from-mrva-list
Executable file
67
client/qldbtools/bin/mc-rows-from-mrva-list
Executable file
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Script to list full details for a mrva-list file
|
||||
|
||||
1. reads files containing
|
||||
{
|
||||
"mirva-list": [
|
||||
"NLPchina/elasticsearch-sqlctsj168cc4",
|
||||
"LMAX-Exchange/disruptorctsj3e75ec",
|
||||
"justauth/JustAuthctsj8a6177",
|
||||
"FasterXML/jackson-modules-basectsj2fe248",
|
||||
"ionic-team/capacitor-pluginsctsj38d457",
|
||||
"PaddlePaddle/PaddleOCRctsj60e555",
|
||||
"elastic/apm-agent-pythonctsj21dc64",
|
||||
"flipkart-incubator/zjsonpatchctsjc4db35",
|
||||
"stephane/libmodbusctsj54237e",
|
||||
"wso2/carbon-kernelctsj5a8a6e",
|
||||
"apache/servicecomb-packctsj4d98f5"
|
||||
]
|
||||
}
|
||||
2. reads a pandas dataframe stored in a csv file
|
||||
3. selects all rows from 2. that
|
||||
- contain the 'owner' column matching the string before the slash from 1. and
|
||||
- the 'name' column matching the string between the slash and the marker
|
||||
'ctsj' and
|
||||
- the 'CID' column matching the string after the marker 'ctsj'
|
||||
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser = argparse.ArgumentParser(
|
||||
description="""Script to list full details for a mrva-list file""")
|
||||
parser.add_argument('mrva_list', type=str,
|
||||
help='The JSON file containing the mrva-list')
|
||||
parser.add_argument('info_csv', type=str,
|
||||
help='The CSV file containing the full information')
|
||||
args = parser.parse_args()
|
||||
|
||||
#* Step 1: Read the JSON file containing the "mirva-list"
|
||||
with open(args.mrva_list, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Extract and parse the "mirva-list"
|
||||
mirva_list = data['mirva-list']
|
||||
parsed_mirva_list = []
|
||||
for item in mirva_list:
|
||||
owner_name = item.split('/')[0]
|
||||
repo_name = item.split('/')[1].split('ctsj')[0]
|
||||
cid = item.split('/')[1].split('ctsj')[1]
|
||||
parsed_mirva_list.append((owner_name, repo_name, cid))
|
||||
|
||||
#* Step 2: Read the CSV file into a pandas dataframe
|
||||
import pandas as pd
|
||||
df = pd.read_csv(args.info_csv)
|
||||
|
||||
#* Step 3: Filter the dataframe based on the parsed "mirva-list"
|
||||
filtered_df = df[
|
||||
df.apply(lambda row:
|
||||
(row['owner'], row['name'], row['CID']) in parsed_mirva_list, axis=1)]
|
||||
|
||||
# Optionally, you can save the filtered dataframe to a new CSV file
|
||||
filtered_df.to_csv(sys.stdout, index=False)
|
||||
1021
client/qldbtools/gfm.css
Normal file
1021
client/qldbtools/gfm.css
Normal file
File diff suppressed because it is too large
Load Diff
138
client/qldbtools/pyproject.toml
Normal file
138
client/qldbtools/pyproject.toml
Normal file
@@ -0,0 +1,138 @@
|
||||
[project]
|
||||
name = "qldbtools"
|
||||
version = "0.1.0"
|
||||
description = "A Python package for selecting sets of CodeQL databases to work on"
|
||||
authors = [
|
||||
{name = "Michael Hohn", email = "hohn@github.com"}
|
||||
]
|
||||
readme = {file = "README.org", content-type = "text/plain"}
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"annotated-types>=0.7.0",
|
||||
"anyio>=4.4.0",
|
||||
"appnope>=0.1.4",
|
||||
"argon2-cffi>=23.1.0",
|
||||
"argon2-cffi-bindings>=21.2.0",
|
||||
"arrow>=1.3.0",
|
||||
"asttokens>=2.4.1",
|
||||
"async-lru>=2.0.4",
|
||||
"attrs>=24.2.0",
|
||||
"babel>=2.16.0",
|
||||
"beautifulsoup4>=4.12.3",
|
||||
"bleach>=6.1.0",
|
||||
"blinker>=1.9.0",
|
||||
"certifi>=2024.7.4",
|
||||
"cffi>=1.17.0",
|
||||
"charset-normalizer>=3.3.2",
|
||||
"click>=8.1.7",
|
||||
"comm>=0.2.2",
|
||||
"debugpy>=1.8.5",
|
||||
"decorator>=5.1.1",
|
||||
"defusedxml>=0.7.1",
|
||||
"executing>=2.0.1",
|
||||
"fastapi>=0.115.5",
|
||||
"fastjsonschema>=2.20.0",
|
||||
"flask>=3.1.0",
|
||||
"fqdn>=1.5.1",
|
||||
"h11>=0.14.0",
|
||||
"httpcore>=1.0.5",
|
||||
"httpx>=0.27.0",
|
||||
"idna>=3.7",
|
||||
"ipykernel>=6.29.5",
|
||||
"ipython>=8.26.0",
|
||||
"isoduration>=20.11.0",
|
||||
"itsdangerous>=2.2.0",
|
||||
"jedi>=0.19.1",
|
||||
"jinja2>=3.1.4",
|
||||
"json5>=0.9.25",
|
||||
"jsonpointer>=3.0.0",
|
||||
"jsonschema>=4.23.0",
|
||||
"jsonschema-specifications>=2023.12.1",
|
||||
"jupyter-events>=0.10.0",
|
||||
"jupyter-lsp>=2.2.5",
|
||||
"jupyter-client>=8.6.2",
|
||||
"jupyter-core>=5.7.2",
|
||||
"jupyter-server>=2.14.2",
|
||||
"jupyter-server-terminals>=0.5.3",
|
||||
"jupyterlab>=4.2.4",
|
||||
"jupyterlab-pygments>=0.3.0",
|
||||
"jupyterlab-server>=2.27.3",
|
||||
"lckr-jupyterlab-variableinspector",
|
||||
"markupsafe>=2.1.5",
|
||||
"matplotlib-inline>=0.1.7",
|
||||
"minio==7.2.8",
|
||||
"mistune>=3.0.2",
|
||||
"nbclient>=0.10.0",
|
||||
"nbconvert>=7.16.4",
|
||||
"nbformat>=5.10.4",
|
||||
"nest-asyncio>=1.6.0",
|
||||
"notebook-shim>=0.2.4",
|
||||
"numpy>=2.1.0",
|
||||
"overrides>=7.7.0",
|
||||
"packaging>=24.1",
|
||||
"pandas>=2.2.2",
|
||||
"pandocfilters>=1.5.1",
|
||||
"parso>=0.8.4",
|
||||
"pexpect>=4.9.0",
|
||||
"platformdirs>=4.2.2",
|
||||
"plumbum>=1.9.0",
|
||||
"prometheus-client>=0.20.0",
|
||||
"prompt-toolkit>=3.0.47",
|
||||
"psutil>=6.0.0",
|
||||
"ptyprocess>=0.7.0",
|
||||
"pure-eval>=0.2.3",
|
||||
"pycparser>=2.22",
|
||||
"pycryptodome>=3.20.0",
|
||||
"pydantic>=2.10.2",
|
||||
"pydantic-core>=2.27.1",
|
||||
"pygments>=2.18.0",
|
||||
"python-dateutil>=2.9.0.post0",
|
||||
"python-json-logger>=2.0.7",
|
||||
"pytz>=2024.1",
|
||||
"pyyaml>=6.0.2",
|
||||
"pyzmq>=26.1.1",
|
||||
"referencing>=0.35.1",
|
||||
"requests>=2.32.3",
|
||||
"rfc3339-validator>=0.1.4",
|
||||
"rfc3986-validator>=0.1.1",
|
||||
"rpds-py>=0.20.0",
|
||||
"send2trash>=1.8.3",
|
||||
"six>=1.16.0",
|
||||
"sniffio>=1.3.1",
|
||||
"soupsieve>=2.6",
|
||||
"stack-data>=0.6.3",
|
||||
"starlette>=0.41.3",
|
||||
"terminado>=0.18.1",
|
||||
"tinycss2>=1.3.0",
|
||||
"tornado>=6.4.1",
|
||||
"traitlets>=5.14.3",
|
||||
"types-python-dateutil>=2.9.0.20240821",
|
||||
"typing-extensions>=4.12.2",
|
||||
"tzdata>=2024.1",
|
||||
"uri-template>=1.3.0",
|
||||
"urllib3>=2.2.2",
|
||||
"uvicorn>=0.32.1",
|
||||
"wcwidth>=0.2.13",
|
||||
"webcolors>=24.8.0",
|
||||
"webencodings>=0.5.1",
|
||||
"websocket-client>=1.8.0",
|
||||
"werkzeug>=3.1.3",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=75.5.0", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = ["qldbtools"]
|
||||
script-files = [
|
||||
"bin/mc-db-generate-selection",
|
||||
"bin/mc-db-initial-info",
|
||||
"bin/mc-db-populate-minio",
|
||||
"bin/mc-db-refine-info",
|
||||
"bin/mc-db-unique",
|
||||
"bin/mc-db-view-info",
|
||||
"bin/mc-hepc-init",
|
||||
"bin/mc-hepc-serve",
|
||||
"bin/mc-rows-from-mrva-list",
|
||||
]
|
||||
11
client/qldbtools/qldbtools.code-workspace
Normal file
11
client/qldbtools/qldbtools.code-workspace
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"folders": [
|
||||
{
|
||||
"path": "."
|
||||
}
|
||||
],
|
||||
"settings": {
|
||||
"git.ignoreLimitWarning": true,
|
||||
"makefile.configureOnOpen": false
|
||||
}
|
||||
}
|
||||
2
client/qldbtools/qldbtools/__init__.py
Normal file
2
client/qldbtools/qldbtools/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
from . import utils
|
||||
|
||||
205
client/qldbtools/qldbtools/utils.py
Normal file
205
client/qldbtools/qldbtools/utils.py
Normal file
@@ -0,0 +1,205 @@
|
||||
""" This module supports the selection of CodeQL databases based on various
|
||||
criteria.
|
||||
"""
|
||||
#* Imports
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import List, Dict, Any
|
||||
|
||||
import pandas as pd
|
||||
import time
|
||||
import yaml
|
||||
import zipfile
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
#* Setup
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s [%(levelname)s] %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
|
||||
#* Utility functions
|
||||
def log_and_raise(message):
|
||||
logging.error(message)
|
||||
raise Exception(message)
|
||||
|
||||
def log_and_raise_e(message, exception):
|
||||
logging.error(message)
|
||||
raise exception(message)
|
||||
|
||||
def traverse_tree(root: str) -> Path:
|
||||
root_path = Path(os.path.expanduser(root))
|
||||
if not root_path.exists() or not root_path.is_dir():
|
||||
log_and_raise(f"The specified root path '{root}' does not exist or "
|
||||
"is not a directory.")
|
||||
for path in root_path.rglob('*'):
|
||||
if path.is_file():
|
||||
yield path
|
||||
elif path.is_dir():
|
||||
pass
|
||||
|
||||
@dataclass
|
||||
class DBInfo:
|
||||
ctime : str = '2024-05-13T12:04:01.593586'
|
||||
language : str = 'cpp'
|
||||
name : str = 'nanobind'
|
||||
owner : str = 'wjakob'
|
||||
path : Path = Path('/Users/.../db.zip')
|
||||
size : int = 63083064
|
||||
|
||||
|
||||
def collect_dbs(db_base: str) -> DBInfo:
|
||||
for path in traverse_tree(db_base):
|
||||
if path.name == "db.zip":
|
||||
# For the current repository, we have
|
||||
# In [292]: len(path.parts)
|
||||
# Out[292]: 14
|
||||
# and can work from the end to get relevant info from the file path.
|
||||
db = DBInfo()
|
||||
(*_, db.owner, db.name, _, _, _, db.language, _) = path.parts
|
||||
db.path = path
|
||||
s = path.stat()
|
||||
db.size = s.st_size
|
||||
# db.ctime_raw = s.st_ctime
|
||||
# db.ctime = time.ctime(s.st_ctime)
|
||||
db.ctime = datetime.datetime.fromtimestamp(s.st_ctime).isoformat()
|
||||
yield db
|
||||
|
||||
|
||||
def extract_metadata(zipfile_path: str) -> tuple[object,object]:
|
||||
"""
|
||||
extract_metadata(zipfile)
|
||||
|
||||
Unzip zipfile into memory and return the contents of the files
|
||||
codeql-database.yml and baseline-info.json that it contains in a tuple
|
||||
"""
|
||||
codeql_content = None
|
||||
meta_content = None
|
||||
try:
|
||||
with zipfile.ZipFile(zipfile_path, 'r') as z:
|
||||
for file_info in z.infolist():
|
||||
# Filenames seen
|
||||
# java/codeql-database.yml
|
||||
# codeql_db/codeql-database.yml
|
||||
if file_info.filename.endswith('codeql-database.yml'):
|
||||
with z.open(file_info) as f:
|
||||
codeql_content = yaml.safe_load(f)
|
||||
# And
|
||||
# java/baseline-info.json
|
||||
# codeql_db/baseline-info.json
|
||||
elif file_info.filename.endswith('baseline-info.json'):
|
||||
with z.open(file_info) as f:
|
||||
meta_content = json.load(f)
|
||||
except zipfile.BadZipFile:
|
||||
log_and_raise_e(f"Not a zipfile: '{zipfile_path}'", ExtractNotZipfile)
|
||||
# The baseline-info is only available in more recent CodeQL versions
|
||||
if not meta_content:
|
||||
meta_content = {'languages':
|
||||
{'no-language': {'displayName': 'no-language',
|
||||
'files': [],
|
||||
'linesOfCode': -1,
|
||||
'name': 'nolang'},
|
||||
}}
|
||||
|
||||
if not codeql_content:
|
||||
log_and_raise_e(f"Not a zipfile: '{zipfile_path}'", ExtractNoCQLDB)
|
||||
return codeql_content, meta_content
|
||||
|
||||
class ExtractNotZipfile(Exception): pass
|
||||
class ExtractNoCQLDB(Exception): pass
|
||||
|
||||
def metadata_details(left_index: int, codeql_content: object, meta_content: object) -> pd.DataFrame:
|
||||
"""
|
||||
metadata_details(codeql_content, meta_content)
|
||||
|
||||
Extract the details from metadata that will be used in DB selection and return a
|
||||
dataframe with the information. Example, cropped to fit:
|
||||
|
||||
full_df.T
|
||||
Out[535]:
|
||||
0 1
|
||||
left_index 0 0
|
||||
baselineLinesOfCode 17990 17990
|
||||
primaryLanguage cpp cpp
|
||||
sha 288920efc079766f4 282c20efc079766f4
|
||||
cliVersion 2.17.0 2.17.0
|
||||
creationTime .325253+00:00 51.325253+00:00
|
||||
finalised True True
|
||||
db_lang cpp python
|
||||
db_lang_displayName C/C++ Python
|
||||
db_lang_file_count 102 27
|
||||
db_lang_linesOfCode 17990 5586
|
||||
"""
|
||||
cqlc, metac = codeql_content, meta_content
|
||||
d = {'left_index': left_index,
|
||||
'baselineLinesOfCode': cqlc['baselineLinesOfCode'],
|
||||
'primaryLanguage': cqlc['primaryLanguage'],
|
||||
'sha': cqlc['creationMetadata'].get('sha', 'abcde0123'),
|
||||
'cliVersion': cqlc['creationMetadata']['cliVersion'],
|
||||
'creationTime': cqlc['creationMetadata']['creationTime'],
|
||||
'finalised': cqlc.get('finalised', pd.NA),
|
||||
}
|
||||
f = pd.DataFrame(d, index=[0])
|
||||
joiners: list[dict[str, int | Any]] = []
|
||||
if not ('languages' in metac):
|
||||
log_and_raise_e("Missing 'languages' in metadata", DetailsMissing)
|
||||
for lang, lang_cont in metac['languages'].items():
|
||||
d1: dict[str, int | Any] = { 'left_index' : left_index,
|
||||
'db_lang': lang }
|
||||
for prop, val in lang_cont.items():
|
||||
if prop == 'files':
|
||||
d1['db_lang_file_count'] = len(val)
|
||||
elif prop == 'linesOfCode':
|
||||
d1['db_lang_linesOfCode'] = val
|
||||
elif prop == 'displayName':
|
||||
d1['db_lang_displayName'] = val
|
||||
joiners.append(d1)
|
||||
fj: DataFrame = pd.DataFrame(joiners)
|
||||
full_df: DataFrame = pd.merge(f, fj, on='left_index', how='outer')
|
||||
return full_df
|
||||
|
||||
class DetailsMissing(Exception): pass
|
||||
|
||||
from hashlib import blake2b
|
||||
|
||||
def cid_hash(row_tuple: tuple):
|
||||
"""
|
||||
cid_hash(row_tuple)
|
||||
Take a bytes object and return hash as hex string
|
||||
"""
|
||||
h = blake2b(digest_size = 3)
|
||||
h.update(str(row_tuple).encode())
|
||||
# return int.from_bytes(h.digest(), byteorder='big')
|
||||
return h.hexdigest()
|
||||
|
||||
def form_db_bucket_name(owner, name, CID):
|
||||
"""
|
||||
form_db_bucket_name(owner, name, CID)
|
||||
Return the name to use in minio storage; this function is trivial and used to
|
||||
enforce consistent naming.
|
||||
|
||||
The 'ctsj' prefix is a random, unique key to identify the information.
|
||||
"""
|
||||
return f'{owner}${name}ctsj{CID}.zip'
|
||||
|
||||
def form_db_req_name(owner: str, name: str, CID: str) -> str:
|
||||
"""
|
||||
form_db_req_name(owner, name, CID)
|
||||
Return the name to use in mrva requests; this function is trivial and used to
|
||||
enforce consistent naming.
|
||||
|
||||
The 'ctsj' prefix is a random, unique key to identify the information.
|
||||
"""
|
||||
return f'{owner}/{name}ctsj{CID}'
|
||||
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
109
client/qldbtools/requirements.txt
Normal file
109
client/qldbtools/requirements.txt
Normal file
@@ -0,0 +1,109 @@
|
||||
annotated-types==0.7.0
|
||||
anyio==4.4.0
|
||||
appnope==0.1.4
|
||||
argon2-cffi==23.1.0
|
||||
argon2-cffi-bindings==21.2.0
|
||||
arrow==1.3.0
|
||||
asttokens==2.4.1
|
||||
async-lru==2.0.4
|
||||
attrs==24.2.0
|
||||
babel==2.16.0
|
||||
beautifulsoup4==4.12.3
|
||||
bleach==6.1.0
|
||||
blinker==1.9.0
|
||||
certifi==2024.7.4
|
||||
cffi==1.17.0
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
comm==0.2.2
|
||||
debugpy==1.8.5
|
||||
decorator==5.1.1
|
||||
defusedxml==0.7.1
|
||||
executing==2.0.1
|
||||
fastapi==0.115.5
|
||||
fastjsonschema==2.20.0
|
||||
Flask==3.1.0
|
||||
fqdn==1.5.1
|
||||
h11==0.14.0
|
||||
httpcore==1.0.5
|
||||
httpx==0.27.0
|
||||
idna==3.7
|
||||
ipykernel==6.29.5
|
||||
ipython==8.26.0
|
||||
isoduration==20.11.0
|
||||
itsdangerous==2.2.0
|
||||
jedi==0.19.1
|
||||
Jinja2==3.1.4
|
||||
json5==0.9.25
|
||||
jsonpointer==3.0.0
|
||||
jsonschema==4.23.0
|
||||
jsonschema-specifications==2023.12.1
|
||||
jupyter-events==0.10.0
|
||||
jupyter-lsp==2.2.5
|
||||
jupyter_client==8.6.2
|
||||
jupyter_core==5.7.2
|
||||
jupyter_server==2.14.2
|
||||
jupyter_server_terminals==0.5.3
|
||||
jupyterlab==4.2.4
|
||||
jupyterlab_pygments==0.3.0
|
||||
jupyterlab_server==2.27.3
|
||||
MarkupSafe==2.1.5
|
||||
matplotlib-inline==0.1.7
|
||||
minio==7.2.8
|
||||
mistune==3.0.2
|
||||
nbclient==0.10.0
|
||||
nbconvert==7.16.4
|
||||
nbformat==5.10.4
|
||||
nest-asyncio==1.6.0
|
||||
notebook_shim==0.2.4
|
||||
numpy==2.1.0
|
||||
overrides==7.7.0
|
||||
packaging==24.1
|
||||
pandas==2.2.2
|
||||
pandocfilters==1.5.1
|
||||
parso==0.8.4
|
||||
pexpect==4.9.0
|
||||
platformdirs==4.2.2
|
||||
plumbum==1.9.0
|
||||
prometheus_client==0.20.0
|
||||
prompt_toolkit==3.0.47
|
||||
psutil==6.0.0
|
||||
ptyprocess==0.7.0
|
||||
pure_eval==0.2.3
|
||||
pycparser==2.22
|
||||
pycryptodome==3.20.0
|
||||
pydantic==2.10.2
|
||||
pydantic_core==2.27.1
|
||||
Pygments==2.18.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-json-logger==2.0.7
|
||||
pytz==2024.1
|
||||
PyYAML==6.0.2
|
||||
pyzmq==26.1.1
|
||||
referencing==0.35.1
|
||||
requests==2.32.3
|
||||
rfc3339-validator==0.1.4
|
||||
rfc3986-validator==0.1.1
|
||||
rpds-py==0.20.0
|
||||
Send2Trash==1.8.3
|
||||
setuptools==75.5.0
|
||||
six==1.16.0
|
||||
sniffio==1.3.1
|
||||
soupsieve==2.6
|
||||
stack-data==0.6.3
|
||||
starlette==0.41.3
|
||||
terminado==0.18.1
|
||||
tinycss2==1.3.0
|
||||
tornado==6.4.1
|
||||
traitlets==5.14.3
|
||||
types-python-dateutil==2.9.0.20240821
|
||||
typing_extensions==4.12.2
|
||||
tzdata==2024.1
|
||||
uri-template==1.3.0
|
||||
urllib3==2.2.2
|
||||
uvicorn==0.32.1
|
||||
wcwidth==0.2.13
|
||||
webcolors==24.8.0
|
||||
webencodings==0.5.1
|
||||
websocket-client==1.8.0
|
||||
Werkzeug==3.1.3
|
||||
61
client/qldbtools/session/db-generate-selection.py
Normal file
61
client/qldbtools/session/db-generate-selection.py
Normal file
@@ -0,0 +1,61 @@
|
||||
""" Read a table of CodeQL DB information
|
||||
and generate the selection files for
|
||||
1. the VS Code CodeQL plugin
|
||||
2. the gh-mrva command-line client
|
||||
"""
|
||||
#
|
||||
#* Collect the information and write files
|
||||
#
|
||||
import pandas as pd
|
||||
import sys
|
||||
import qldbtools.utils as utils
|
||||
import numpy as np
|
||||
import importlib
|
||||
importlib.reload(utils)
|
||||
|
||||
df0 = pd.read_csv('scratch/db-info-3.csv')
|
||||
|
||||
# Use num_entries, chosen via pseudo-random numbers
|
||||
df1 = df0.sample(n=3, random_state=np.random.RandomState(4242))
|
||||
|
||||
repos = []
|
||||
for index, row in df1[['owner', 'name', 'CID', 'path']].iterrows():
|
||||
owner, name, CID, path = row
|
||||
repos.append(utils.form_db_req_name(owner, name, CID))
|
||||
|
||||
repo_list_name = "mirva-list"
|
||||
vsc = {
|
||||
"version": 1,
|
||||
"databases": {
|
||||
"variantAnalysis": {
|
||||
"repositoryLists": [
|
||||
{
|
||||
"name": repo_list_name,
|
||||
"repositories": repos,
|
||||
}
|
||||
],
|
||||
"owners": [],
|
||||
"repositories": []
|
||||
}
|
||||
},
|
||||
"selected": {
|
||||
"kind": "variantAnalysisUserDefinedList",
|
||||
"listName": repo_list_name
|
||||
}
|
||||
}
|
||||
|
||||
gh = {
|
||||
repo_list_name: repos
|
||||
}
|
||||
|
||||
|
||||
# write the files
|
||||
import json
|
||||
with open("tmp-selection-vsc.json", "w") as fc:
|
||||
json.dump(vsc, fc, indent=4)
|
||||
with open("tmp-selection-gh.json", "w") as fc:
|
||||
json.dump(gh, fc, indent=4)
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
59
client/qldbtools/session/db-initial-info.py
Normal file
59
client/qldbtools/session/db-initial-info.py
Normal file
@@ -0,0 +1,59 @@
|
||||
#* Experimental work with utils.py, to be merged into it.
|
||||
# The rest of this interactive script is available as cli script in
|
||||
# mc-db-initial-info
|
||||
from utils import *
|
||||
|
||||
#* Data collection
|
||||
# Get the db information in list of DBInfo form
|
||||
db_base = "~/work-gh/mrva/mrva-open-source-download/"
|
||||
dbs = list(collect_dbs(db_base))
|
||||
|
||||
# Inspect:
|
||||
from pprint import pprint
|
||||
pprint(["len", len(dbs)])
|
||||
pprint(["dbs[0]", dbs[0].__dict__])
|
||||
pprint(["dbs[-1]", dbs[-1].__dict__])
|
||||
#
|
||||
# Get a dataframe
|
||||
dbdf = pd.DataFrame([d.__dict__ for d in dbs])
|
||||
#
|
||||
#* Experiments with on-disk format
|
||||
# Continue use of raw information in separate session.
|
||||
#
|
||||
# PosixPath is a problem for json and parquet
|
||||
#
|
||||
dbdf['path'] = dbdf['path'].astype(str)
|
||||
#
|
||||
dbdf.to_csv('dbdf.csv')
|
||||
#
|
||||
dbdf.to_csv('dbdf.csv.gz', compression='gzip', index=False)
|
||||
#
|
||||
dbdf.to_json('dbdf.json')
|
||||
#
|
||||
# dbdf.to_hdf('dbdf.h5', key='dbdf', mode='w')
|
||||
#
|
||||
# fast, binary
|
||||
dbdf.to_parquet('dbdf.parquet')
|
||||
#
|
||||
# fast
|
||||
import sqlite3
|
||||
conn = sqlite3.connect('dbdf.db')
|
||||
dbdf.to_sql('qldbs', conn, if_exists='replace', index=False)
|
||||
conn.close()
|
||||
#
|
||||
# Sizes:
|
||||
# ls -laSr dbdf.*
|
||||
# -rw-r--r--@ 1 hohn staff 101390 Jul 12 14:17 dbdf.csv.gz
|
||||
# -rw-r--r--@ 1 hohn staff 202712 Jul 12 14:17 dbdf.parquet
|
||||
# -rw-r--r--@ 1 hohn staff 560623 Jul 12 14:17 dbdf.csv
|
||||
# -rw-r--r--@ 1 hohn staff 610304 Jul 12 14:17 dbdf.db
|
||||
# -rw-r--r--@ 1 hohn staff 735097 Jul 12 14:17 dbdf.json
|
||||
#
|
||||
# parquet has many libraries, including go: xitongsys/parquet-go
|
||||
# https://parquet.apache.org/
|
||||
#
|
||||
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
65
client/qldbtools/session/db-populate-minio.py
Normal file
65
client/qldbtools/session/db-populate-minio.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import qldbtools.utils as utils
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import sys
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
from pathlib import Path
|
||||
|
||||
#
|
||||
#* Collect the information and select subset
|
||||
#
|
||||
df = pd.read_csv('scratch/db-info-2.csv')
|
||||
seed = 4242
|
||||
if 0:
|
||||
# Use all entries
|
||||
entries = df
|
||||
else:
|
||||
# Use num_entries, chosen via pseudo-random numbers
|
||||
entries = df.sample(n=3,
|
||||
random_state=np.random.RandomState(seed))
|
||||
#
|
||||
#* Push the DBs
|
||||
#
|
||||
# Configuration
|
||||
MINIO_URL = "http://localhost:9000"
|
||||
MINIO_ROOT_USER = "user"
|
||||
MINIO_ROOT_PASSWORD = "mmusty8432"
|
||||
QL_DB_BUCKET_NAME = "qldb"
|
||||
|
||||
# Initialize MinIO client
|
||||
client = Minio(
|
||||
MINIO_URL.replace("http://", "").replace("https://", ""),
|
||||
access_key=MINIO_ROOT_USER,
|
||||
secret_key=MINIO_ROOT_PASSWORD,
|
||||
secure=False
|
||||
)
|
||||
|
||||
# Create the bucket if it doesn't exist
|
||||
try:
|
||||
if not client.bucket_exists(QL_DB_BUCKET_NAME):
|
||||
client.make_bucket(QL_DB_BUCKET_NAME)
|
||||
else:
|
||||
print(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
|
||||
except S3Error as err:
|
||||
print(f"Error creating bucket: {err}")
|
||||
|
||||
# (test) File paths and new names
|
||||
files_to_upload = {
|
||||
"cmd/server/codeql/dbs/google/flatbuffers/google_flatbuffers_db.zip": "google$flatbuffers.zip",
|
||||
"cmd/server/codeql/dbs/psycopg/psycopg2/psycopg_psycopg2_db.zip": "psycopg$psycopg2.zip"
|
||||
}
|
||||
|
||||
# (test) Push the files
|
||||
prefix = Path('/Users/hohn/work-gh/mrva/mrvacommander')
|
||||
for local_path, new_name in files_to_upload.items():
|
||||
try:
|
||||
client.fput_object(QL_DB_BUCKET_NAME, new_name, prefix / Path(local_path))
|
||||
print(f"Uploaded {local_path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
|
||||
except S3Error as err:
|
||||
print(f"Error uploading file {local_path}: {err}")
|
||||
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
46
client/qldbtools/session/db-post-refine-info.py
Normal file
46
client/qldbtools/session/db-post-refine-info.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# Session around bin/mc-db-unique
|
||||
import qldbtools.utils as utils
|
||||
import pandas as pd
|
||||
|
||||
#
|
||||
#* Collect the information
|
||||
#
|
||||
df1 = pd.read_csv("scratch/db-info-2.csv")
|
||||
|
||||
# Add single uniqueness field -- CID (Cumulative ID) -- using
|
||||
# - creationTime
|
||||
# - sha
|
||||
# - cliVersion
|
||||
# - language
|
||||
|
||||
from hashlib import blake2b
|
||||
|
||||
def cid_hash(row_tuple: tuple):
|
||||
"""
|
||||
cid_hash(row_tuple)
|
||||
Take a bytes object and return hash as hex string
|
||||
"""
|
||||
h = blake2b(digest_size = 3)
|
||||
h.update(str(row_tuple).encode())
|
||||
# return int.from_bytes(h.digest(), byteorder='big')
|
||||
return h.hexdigest()
|
||||
|
||||
# Apply the cid_hash function to the specified columns and create the 'CID' column
|
||||
df1['CID'] = df1.apply(lambda row: cid_hash( (row['creationTime'],
|
||||
row['sha'],
|
||||
row['cliVersion'],
|
||||
row['language'])
|
||||
), axis=1)
|
||||
|
||||
df2 = df1.reindex(columns=['owner', 'name', 'cliVersion', 'creationTime',
|
||||
'language', 'sha','CID', 'baselineLinesOfCode', 'path',
|
||||
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
|
||||
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
|
||||
'finalised', 'left_index', 'size'])
|
||||
|
||||
df1['cid']
|
||||
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
118
client/qldbtools/session/db-refine-info.py
Normal file
118
client/qldbtools/session/db-refine-info.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# Experimental work be merged with bin/mc-db-refine-info
|
||||
from utils import *
|
||||
from pprint import pprint
|
||||
|
||||
#* Reload gzipped CSV file to continue work
|
||||
dbdf_1 = pd.read_csv('dbdf.csv.gz', compression='gzip')
|
||||
#
|
||||
# (old) Consistency check:
|
||||
# dbdf_1.columns == dbdf.columns
|
||||
# dbmask = (dbdf_1 != dbdf)
|
||||
# dbdf_1[dbmask]
|
||||
# dbdf_1[dbmask].dropna(how='all')
|
||||
# ctime_raw is different in places, so don't use it.
|
||||
|
||||
#
|
||||
#* Interact with/visualize the dataframe
|
||||
# Using pandasgui -- qt
|
||||
from pandasgui import show
|
||||
os.environ['APPDATA'] = "needed-for-pandasgui"
|
||||
show(dbdf_1)
|
||||
# Using dtale -- web
|
||||
import dtale
|
||||
dtale.show(dbdf_1)
|
||||
#
|
||||
|
||||
#
|
||||
#* Collect metadata from DB zip files
|
||||
#
|
||||
#** A manual sample
|
||||
#
|
||||
d = dbdf_1
|
||||
left_index = 0
|
||||
d.path[0]
|
||||
cqlc, metac = extract_metadata(d.path[0])
|
||||
|
||||
cqlc['baselineLinesOfCode']
|
||||
cqlc['primaryLanguage']
|
||||
cqlc['creationMetadata']['sha']
|
||||
cqlc['creationMetadata']['cliVersion']
|
||||
cqlc['creationMetadata']['creationTime'].isoformat()
|
||||
cqlc['finalised']
|
||||
|
||||
for lang, lang_cont in metac['languages'].items():
|
||||
print(lang)
|
||||
indent = " "
|
||||
for prop, val in lang_cont.items():
|
||||
if prop == 'files':
|
||||
print("%sfiles count %d" % (indent, len(val)))
|
||||
elif prop == 'linesOfCode':
|
||||
print("%slinesOfCode %d" % (indent, val))
|
||||
elif prop == 'displayName':
|
||||
print("%sdisplayName %s" % (indent, val))
|
||||
|
||||
#** Automated for all entries
|
||||
# The rest of this interactive script is available as cli script in
|
||||
# mc-db-refine-info
|
||||
d = dbdf_1
|
||||
joiners = []
|
||||
for left_index in range(0, len(d)-1):
|
||||
try:
|
||||
cqlc, metac = extract_metadata(d.path[left_index])
|
||||
except ExtractNotZipfile:
|
||||
continue
|
||||
except ExtractNoCQLDB:
|
||||
continue
|
||||
try:
|
||||
detail_df = metadata_details(left_index, cqlc, metac)
|
||||
except DetailsMissing:
|
||||
continue
|
||||
joiners.append(detail_df)
|
||||
joiners_df = pd.concat(joiners, axis=0)
|
||||
full_df = pd.merge(d, joiners_df, left_index=True, right_on='left_index', how='outer')
|
||||
|
||||
#** View the full dataframe with metadata
|
||||
from pandasgui import show
|
||||
os.environ['APPDATA'] = "needed-for-pandasgui"
|
||||
show(full_df)
|
||||
|
||||
#** Re-order the dataframe columns by importance
|
||||
# - Much of the data
|
||||
# 1. Is only conditionally present
|
||||
# 2. Is extra info, not for the DB proper
|
||||
# 3. May have various names
|
||||
|
||||
# - The essential columns are
|
||||
# | owner |
|
||||
# | name |
|
||||
# | language |
|
||||
# | size |
|
||||
# | cliVersion |
|
||||
# | creationTime |
|
||||
# | sha |
|
||||
# | baselineLinesOfCode |
|
||||
# | path |
|
||||
|
||||
# - The rest are useful; put them last
|
||||
# | db_lang |
|
||||
# | db_lang_displayName |
|
||||
# | db_lang_file_count |
|
||||
# | db_lang_linesOfCode |
|
||||
# | left_index |
|
||||
# | ctime |
|
||||
# | primaryLanguage |
|
||||
# | finalised |
|
||||
|
||||
final_df = full_df.reindex(columns=['owner', 'name', 'language', 'size', 'cliVersion',
|
||||
'creationTime', 'sha', 'baselineLinesOfCode', 'path',
|
||||
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
|
||||
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
|
||||
'finalised', 'left_index'])
|
||||
|
||||
final_df.to_csv('all-info-table.csv.gz', compression='gzip', index=False)
|
||||
|
||||
#
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
#
|
||||
41
client/qldbtools/session/db-unique-1.py
Normal file
41
client/qldbtools/session/db-unique-1.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# Experimental work for ../bin/mc-db-unique, to be merged into it.
|
||||
import qldbtools.utils as utils
|
||||
from pprint import pprint
|
||||
import pandas as pd
|
||||
# cd ../
|
||||
|
||||
#* Reload CSV file to continue work
|
||||
df2 = df_refined = pd.read_csv('scratch/db-info-2.csv')
|
||||
|
||||
# Identify rows missing specific entries
|
||||
rows = ( df2['cliVersion'].isna() |
|
||||
df2['creationTime'].isna() |
|
||||
df2['language'].isna() |
|
||||
df2['sha'].isna() )
|
||||
df2[rows]
|
||||
df3 = df2[~rows]
|
||||
df3
|
||||
|
||||
#* post-save work
|
||||
df4 = pd.read_csv('scratch/db-info-3.csv')
|
||||
|
||||
# Sort and group
|
||||
df_sorted = df4.sort_values(by=['owner', 'name', 'CID', 'creationTime'])
|
||||
df_unique = df_sorted.groupby(['owner', 'name', 'CID']).first().reset_index()
|
||||
|
||||
# Find duplicates
|
||||
df_dups = df_unique[df_unique['CID'].duplicated(keep=False)]
|
||||
len(df_dups)
|
||||
df_dups['CID']
|
||||
|
||||
# Set display options
|
||||
pd.set_option('display.max_colwidth', None)
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.width', 140)
|
||||
|
||||
|
||||
#
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
#
|
||||
46
client/qldbtools/session/db-unique.py
Normal file
46
client/qldbtools/session/db-unique.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# Session around bin/mc-db-unique
|
||||
import qldbtools.utils as utils
|
||||
import pandas as pd
|
||||
|
||||
#
|
||||
#* Collect the information
|
||||
#
|
||||
df1 = pd.read_csv("scratch/db-info-2.csv")
|
||||
|
||||
# Add single uniqueness field -- CID (Cumulative ID) -- using
|
||||
# - creationTime
|
||||
# - sha
|
||||
# - cliVersion
|
||||
# - language
|
||||
|
||||
from hashlib import blake2b
|
||||
|
||||
def cid_hash(row_tuple: tuple):
|
||||
"""
|
||||
cid_hash(row_tuple)
|
||||
Take a bytes object and return hash as hex string
|
||||
"""
|
||||
h = blake2b(digest_size = 3)
|
||||
h.update(str(row_tuple).encode())
|
||||
# return int.from_bytes(h.digest(), byteorder='big')
|
||||
return h.hexdigest()
|
||||
|
||||
# Apply the cid_hash function to the specified columns and create the 'CID' column
|
||||
df1['CID'] = df1.apply(lambda row: cid_hash( (row['creationTime'],
|
||||
row['sha'],
|
||||
row['cliVersion'],
|
||||
row['language'])
|
||||
), axis=1)
|
||||
|
||||
df2 = df1.reindex(columns=['owner', 'name', 'cliVersion', 'creationTime',
|
||||
'language', 'sha','CID', 'baselineLinesOfCode', 'path',
|
||||
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
|
||||
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
|
||||
'finalised', 'left_index', 'size'])
|
||||
|
||||
df1['cid']
|
||||
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
13
client/qldbtools/setup.py
Normal file
13
client/qldbtools/setup.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from setuptools import setup, find_packages
|
||||
import glob
|
||||
|
||||
setup(
|
||||
name='qldbtools',
|
||||
version='0.1.0',
|
||||
description='A Python package for working with CodeQL databases',
|
||||
author='Michael Hohn',
|
||||
author_email='hohn@github.com',
|
||||
packages=['qldbtools'],
|
||||
install_requires=[],
|
||||
scripts=glob.glob("bin/mc-*"),
|
||||
)
|
||||
2278
client/qldbtools/uv.lock
generated
Normal file
2278
client/qldbtools/uv.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,8 @@ ARG CODEQL_VERSION=latest
|
||||
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
|
||||
unzip \
|
||||
curl \
|
||||
ca-certificates
|
||||
ca-certificates \
|
||||
default-jdk
|
||||
|
||||
# If the version is 'latest', lsget the latest release version from GitHub, unzip the bundle into /opt, and delete the archive
|
||||
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
||||
@@ -32,18 +33,19 @@ RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
||||
echo "Using CodeQL version $CODEQL_VERSION" && \
|
||||
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
|
||||
unzip /tmp/codeql.zip -d /opt && \
|
||||
rm /tmp/codeql.zip
|
||||
rm /tmp/codeql.zip && \
|
||||
chmod -R +x /opt/codeql
|
||||
|
||||
# Set environment variables for CodeQL
|
||||
ENV CODEQL_CLI_PATH=/opt/codeql
|
||||
ENV CODEQL_CLI_PATH=/opt/codeql/codeql
|
||||
|
||||
# Set environment variable for CodeQL for `codeql database analyze` support on ARM
|
||||
# This env var has no functional effect on CodeQL when running on x86_64 linux
|
||||
ENV CODEQL_JAVA_HOME=/usr/
|
||||
ENV CODEQL_JAVA_HOME=/usr
|
||||
|
||||
# Copy built agent binary from the builder stage
|
||||
WORKDIR /app
|
||||
COPY --from=builder /bin/mrva_agent ./mrva_agent
|
||||
|
||||
# Run the agent
|
||||
ENTRYPOINT ["./mrva_agent"]
|
||||
ENTRYPOINT ["./mrva_agent"]
|
||||
|
||||
23
cmd/agent/Makefile
Normal file
23
cmd/agent/Makefile
Normal file
@@ -0,0 +1,23 @@
|
||||
all: mrva-agent
|
||||
|
||||
MAI_TARGET := mrva-agent:0.1.24
|
||||
mai: mk.mrva-agent
|
||||
mrva-agent: mk.mrva-agent
|
||||
mk.mrva-agent:
|
||||
cd ../../ && docker build -t mrva-agent:0.1.24 -f cmd/agent/Dockerfile .
|
||||
touch $@
|
||||
|
||||
mai-serve: mai
|
||||
docker run --rm -it ${MAI_TARGET} /bin/bash
|
||||
|
||||
clean:
|
||||
-docker rmi -f ${MAI_TARGET}
|
||||
-rm mrva-agent
|
||||
|
||||
mai-push: mk.mai-push
|
||||
mk.mai-push: mai
|
||||
docker tag ${MAI_TARGET} ghcr.io/hohn/${MAI_TARGET}
|
||||
docker push ghcr.io/hohn/${MAI_TARGET}
|
||||
touch $@
|
||||
|
||||
|
||||
@@ -1,172 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"os"
|
||||
"os/signal"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/elastic/go-sysinfo"
|
||||
"golang.org/x/exp/slog"
|
||||
|
||||
"mrvacommander/pkg/agent"
|
||||
"mrvacommander/pkg/queue"
|
||||
)
|
||||
|
||||
const (
|
||||
workerMemoryMB = 2048 // 2 GB
|
||||
monitorIntervalSec = 10 // Monitor every 10 seconds
|
||||
)
|
||||
|
||||
func calculateWorkers() int {
|
||||
host, err := sysinfo.Host()
|
||||
if err != nil {
|
||||
slog.Error("failed to get host info", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
memInfo, err := host.Memory()
|
||||
if err != nil {
|
||||
slog.Error("failed to get memory info", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Get available memory in MB
|
||||
totalMemoryMB := memInfo.Available / (1024 * 1024)
|
||||
|
||||
// Ensure we have at least one worker
|
||||
workers := int(totalMemoryMB / workerMemoryMB)
|
||||
if workers < 1 {
|
||||
workers = 1
|
||||
}
|
||||
|
||||
// Limit the number of workers to the number of CPUs
|
||||
cpuCount := runtime.NumCPU()
|
||||
if workers > cpuCount {
|
||||
workers = max(cpuCount, 1)
|
||||
}
|
||||
|
||||
return workers
|
||||
}
|
||||
|
||||
func startAndMonitorWorkers(ctx context.Context, queue queue.Queue, desiredWorkerCount int, wg *sync.WaitGroup) {
|
||||
currentWorkerCount := 0
|
||||
stopChans := make([]chan struct{}, 0)
|
||||
|
||||
if desiredWorkerCount != 0 {
|
||||
slog.Info("Starting workers", slog.Int("count", desiredWorkerCount))
|
||||
for i := 0; i < desiredWorkerCount; i++ {
|
||||
stopChan := make(chan struct{})
|
||||
stopChans = append(stopChans, stopChan)
|
||||
wg.Add(1)
|
||||
go agent.RunWorker(ctx, stopChan, queue, wg)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
slog.Info("Worker count not specified, managing based on available memory and CPU")
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// signal all workers to stop
|
||||
for _, stopChan := range stopChans {
|
||||
close(stopChan)
|
||||
}
|
||||
return
|
||||
default:
|
||||
newWorkerCount := calculateWorkers()
|
||||
|
||||
if newWorkerCount != currentWorkerCount {
|
||||
slog.Info(
|
||||
"Modifying worker count",
|
||||
slog.Int("current", currentWorkerCount),
|
||||
slog.Int("new", newWorkerCount))
|
||||
}
|
||||
|
||||
if newWorkerCount > currentWorkerCount {
|
||||
for i := currentWorkerCount; i < newWorkerCount; i++ {
|
||||
stopChan := make(chan struct{})
|
||||
stopChans = append(stopChans, stopChan)
|
||||
wg.Add(1)
|
||||
go agent.RunWorker(ctx, stopChan, queue, wg)
|
||||
}
|
||||
} else if newWorkerCount < currentWorkerCount {
|
||||
for i := newWorkerCount; i < currentWorkerCount; i++ {
|
||||
close(stopChans[i])
|
||||
}
|
||||
stopChans = stopChans[:newWorkerCount]
|
||||
}
|
||||
currentWorkerCount = newWorkerCount
|
||||
|
||||
time.Sleep(monitorIntervalSec * time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
slog.Info("Starting agent")
|
||||
|
||||
workerCount := flag.Int("workers", 0, "number of workers")
|
||||
flag.Parse()
|
||||
|
||||
requiredEnvVars := []string{
|
||||
"MRVA_RABBITMQ_HOST",
|
||||
"MRVA_RABBITMQ_PORT",
|
||||
"MRVA_RABBITMQ_USER",
|
||||
"MRVA_RABBITMQ_PASSWORD",
|
||||
"CODEQL_JAVA_HOME",
|
||||
"CODEQL_CLI_PATH",
|
||||
}
|
||||
|
||||
for _, envVar := range requiredEnvVars {
|
||||
if _, ok := os.LookupEnv(envVar); !ok {
|
||||
slog.Error("Missing required environment variable", "key", envVar)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
rmqHost := os.Getenv("MRVA_RABBITMQ_HOST")
|
||||
rmqPort := os.Getenv("MRVA_RABBITMQ_PORT")
|
||||
rmqUser := os.Getenv("MRVA_RABBITMQ_USER")
|
||||
rmqPass := os.Getenv("MRVA_RABBITMQ_PASSWORD")
|
||||
|
||||
rmqPortAsInt, err := strconv.ParseInt(rmqPort, 10, 16)
|
||||
if err != nil {
|
||||
slog.Error("Failed to parse RabbitMQ port", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
slog.Info("Initializing RabbitMQ queue")
|
||||
|
||||
rabbitMQQueue, err := queue.InitializeRabbitMQQueue(rmqHost, int16(rmqPortAsInt), rmqUser, rmqPass, false)
|
||||
if err != nil {
|
||||
slog.Error("failed to initialize RabbitMQ", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
defer rabbitMQQueue.Close()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
go startAndMonitorWorkers(ctx, rabbitMQQueue, *workerCount, &wg)
|
||||
|
||||
slog.Info("Agent started")
|
||||
|
||||
// Gracefully exit on SIGINT/SIGTERM
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
<-sigChan
|
||||
slog.Info("Shutting down agent")
|
||||
|
||||
// TODO: fix this to gracefully terminate agent workers during jobs
|
||||
cancel()
|
||||
wg.Wait()
|
||||
|
||||
slog.Info("Agent shutdown complete")
|
||||
}
|
||||
@@ -1,38 +1,56 @@
|
||||
# Use the ubuntu 22.04 base image
|
||||
FROM ubuntu:24.10
|
||||
FROM golang:1.22 AS builder
|
||||
|
||||
# Set architecture to arm64
|
||||
ARG ARCH=arm64
|
||||
ARG AARCH=aarch64
|
||||
# Copy the entire project
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
|
||||
# Set environment variables
|
||||
# Download dependencies
|
||||
RUN go mod download
|
||||
|
||||
# Set the working directory to the cmd/server subproject
|
||||
WORKDIR /app/cmd/server
|
||||
|
||||
# Build the server
|
||||
RUN go build -o /bin/mrva_server ./main.go
|
||||
|
||||
FROM ubuntu:24.10 as runner
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV CODEQL_VERSION=codeql-bundle-v2.17.5
|
||||
ENV CODEQL_DOWNLOAD_URL=https://github.com/github/codeql-action/releases/download/${CODEQL_VERSION}/codeql-bundle-linux64.tar.gz
|
||||
ENV JDK_VERSION=22.0.1
|
||||
ENV JDK_DOWNLOAD_URL=https://download.oracle.com/java/21/latest/jdk-${JDK_VERSION}_linux-${AARCH}_bin.tar.gz
|
||||
ENV JDK_DOWNLOAD_URL=https://download.java.net/java/GA/jdk${JDK_VERSION}/c7ec1332f7bb44aeba2eb341ae18aca4/8/GPL/openjdk-${JDK_VERSION}_linux-${AARCH}_bin.tar.gz
|
||||
|
||||
ENV CODEQL_JAVA_HOME=/usr/local/jdk-${JDK_VERSION}
|
||||
# Build argument for CodeQL version, defaulting to the latest release
|
||||
ARG CODEQL_VERSION=latest
|
||||
|
||||
# Install necessary tools
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl tar && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
# Install packages
|
||||
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
|
||||
unzip \
|
||||
curl \
|
||||
ca-certificates \
|
||||
default-jdk
|
||||
|
||||
# Add and extract the CodeQL bundle
|
||||
RUN curl -L $CODEQL_DOWNLOAD_URL -o /tmp/${CODEQL_VERSION}.tar.gz && \
|
||||
tar -xzf /tmp/${CODEQL_VERSION}.tar.gz -C /opt && \
|
||||
rm /tmp/${CODEQL_VERSION}.tar.gz
|
||||
# If the version is 'latest', lsget the latest release version from GitHub, unzip the bundle into /opt, and delete the archive
|
||||
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
||||
CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
|
||||
fi && \
|
||||
echo "Using CodeQL version $CODEQL_VERSION" && \
|
||||
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
|
||||
unzip /tmp/codeql.zip -d /opt && \
|
||||
rm /tmp/codeql.zip && \
|
||||
chmod -R +x /opt/codeql
|
||||
|
||||
# Add and extract the JDK
|
||||
RUN curl -L $JDK_DOWNLOAD_URL -o /tmp/jdk-${JDK_VERSION}.tar.gz && \
|
||||
tar -xzf /tmp/jdk-${JDK_VERSION}.tar.gz -C /usr/local && \
|
||||
rm /tmp/jdk-${JDK_VERSION}.tar.gz
|
||||
# Set environment variables for CodeQL
|
||||
ENV CODEQL_CLI_PATH=/opt/codeql/codeql
|
||||
|
||||
# Set PATH
|
||||
ENV PATH=/opt/codeql:"$PATH"
|
||||
# Set environment variable for CodeQL for `codeql database analyze` support on ARM
|
||||
# This env var has no functional effect on CodeQL when running on x86_64 linux
|
||||
ENV CODEQL_JAVA_HOME=/usr
|
||||
|
||||
# Prepare host mount point
|
||||
RUN mkdir /mrva
|
||||
# Set working directory to /app
|
||||
|
||||
# Copy built server binary from the builder stage
|
||||
COPY --from=builder /bin/mrva_server ./mrva_server
|
||||
|
||||
# Copy the CodeQL database directory from the builder stage (for standalone mode)
|
||||
COPY --from=builder /app/cmd/server/codeql ./codeql
|
||||
|
||||
# Run the server with the default mode set to container
|
||||
ENTRYPOINT ["./mrva_server"]
|
||||
CMD ["--mode=container"]
|
||||
26
cmd/server/Makefile
Normal file
26
cmd/server/Makefile
Normal file
@@ -0,0 +1,26 @@
|
||||
all: mrva-server
|
||||
|
||||
MSI_TARGET := mrva-server:0.1.24
|
||||
msi: mk.mrva-server
|
||||
mrva-server: mk.mrva-server
|
||||
mk.mrva-server:
|
||||
cd ../../ && docker build -t mrva-server:0.1.24 -f cmd/server/Dockerfile .
|
||||
touch $@
|
||||
|
||||
msi-serve: msi
|
||||
docker run --rm -it ${MSI_TARGET} /bin/bash
|
||||
|
||||
clean:
|
||||
-docker rmi -f ${MSI_TARGET}
|
||||
-rm mrva-server
|
||||
|
||||
msi-push: mk.msi-push
|
||||
mk.msi-push: mk.mrva-server
|
||||
docker tag ${MSI_TARGET} ghcr.io/hohn/${MSI_TARGET}
|
||||
docker push ghcr.io/hohn/${MSI_TARGET}
|
||||
touch $@
|
||||
|
||||
msi-test:
|
||||
docker pull ghcr.io/hohn/${MSI_TARGET}
|
||||
docker run --rm -it --name test-mrva-server-codeql ghcr.io/hohn/${MSI_TARGET} sh
|
||||
|
||||
@@ -1,156 +0,0 @@
|
||||
// Copyright © 2024 github
|
||||
// Licensed under the Apache License, Version 2.0 (the "License").
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
"log/slog"
|
||||
"os"
|
||||
|
||||
"mrvacommander/config/mcc"
|
||||
|
||||
"mrvacommander/pkg/agent"
|
||||
"mrvacommander/pkg/logger"
|
||||
"mrvacommander/pkg/qldbstore"
|
||||
"mrvacommander/pkg/qpstore"
|
||||
"mrvacommander/pkg/queue"
|
||||
"mrvacommander/pkg/server"
|
||||
"mrvacommander/pkg/storage"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Define flags
|
||||
helpFlag := flag.Bool("help", false, "Display help message")
|
||||
logLevel := flag.String("loglevel", "info", "Set log level: debug, info, warn, error")
|
||||
mode := flag.String("mode", "standalone", "Set mode: standalone, container, cluster")
|
||||
|
||||
// Custom usage function for the help flag
|
||||
flag.Usage = func() {
|
||||
log.Printf("Usage of %s:\n", os.Args[0])
|
||||
flag.PrintDefaults()
|
||||
log.Println("\nExamples:")
|
||||
log.Println(" go run main.go --loglevel=debug --mode=container")
|
||||
}
|
||||
|
||||
// Parse the flags
|
||||
flag.Parse()
|
||||
|
||||
// Handle the help flag
|
||||
if *helpFlag {
|
||||
flag.Usage()
|
||||
return
|
||||
}
|
||||
|
||||
// Apply 'loglevel' flag
|
||||
switch *logLevel {
|
||||
case "debug":
|
||||
slog.SetLogLoggerLevel(slog.LevelDebug)
|
||||
case "info":
|
||||
slog.SetLogLoggerLevel(slog.LevelInfo)
|
||||
case "warn":
|
||||
slog.SetLogLoggerLevel(slog.LevelWarn)
|
||||
case "error":
|
||||
slog.SetLogLoggerLevel(slog.LevelError)
|
||||
default:
|
||||
log.Printf("Invalid logging verbosity level: %s", *logLevel)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Read configuration
|
||||
config := mcc.LoadConfig("mcconfig.toml")
|
||||
|
||||
// Output configuration summary
|
||||
log.Printf("Help: %t\n", *helpFlag)
|
||||
log.Printf("Log Level: %s\n", *logLevel)
|
||||
log.Printf("Mode: %s\n", *mode)
|
||||
|
||||
// Apply 'mode' flag
|
||||
switch *mode {
|
||||
case "standalone":
|
||||
// Assemble single-process version
|
||||
|
||||
sl := logger.NewLoggerSingle(&logger.Visibles{})
|
||||
|
||||
// FIXME take value from configuration
|
||||
sq := queue.NewQueueSingle(2, &queue.Visibles{
|
||||
Logger: sl,
|
||||
})
|
||||
|
||||
ss := storage.NewStorageSingle(config.Storage.StartingID, &storage.Visibles{})
|
||||
|
||||
qp, err := qpstore.NewStore(&qpstore.Visibles{})
|
||||
if err != nil {
|
||||
slog.Error("Unable to initialize query pack storage")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
ql, err := qldbstore.NewStore(&qldbstore.Visibles{})
|
||||
if err != nil {
|
||||
slog.Error("Unable to initialize ql database storage")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
server.NewCommanderSingle(&server.Visibles{
|
||||
Logger: sl,
|
||||
Queue: sq,
|
||||
ServerStore: ss,
|
||||
QueryPackStore: qp,
|
||||
QLDBStore: ql,
|
||||
})
|
||||
|
||||
// FIXME take value from configuration
|
||||
agent.NewAgentSingle(2, &agent.Visibles{
|
||||
Logger: sl,
|
||||
Queue: sq,
|
||||
QueryPackStore: qp,
|
||||
QLDBStore: ql,
|
||||
})
|
||||
|
||||
case "container":
|
||||
// Assemble container version
|
||||
sl := logger.NewLoggerSingle(&logger.Visibles{})
|
||||
|
||||
// FIXME take value from configuration
|
||||
sq := queue.NewQueueSingle(2, &queue.Visibles{
|
||||
Logger: sl,
|
||||
})
|
||||
|
||||
ss := storage.NewStorageSingle(config.Storage.StartingID, &storage.Visibles{})
|
||||
|
||||
qp, err := qpstore.NewStore(&qpstore.Visibles{})
|
||||
if err != nil {
|
||||
slog.Error("Unable to initialize query pack storage")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
ql, err := qldbstore.NewStore(&qldbstore.Visibles{})
|
||||
if err != nil {
|
||||
slog.Error("Unable to initialize ql database storage")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
agent.NewAgentSingle(2, &agent.Visibles{
|
||||
Logger: sl,
|
||||
Queue: sq,
|
||||
QueryPackStore: qp,
|
||||
QLDBStore: ql,
|
||||
})
|
||||
|
||||
server.NewCommanderSingle(&server.Visibles{
|
||||
Logger: sl,
|
||||
Queue: sq,
|
||||
ServerStore: ss,
|
||||
QueryPackStore: qp,
|
||||
QLDBStore: ql,
|
||||
})
|
||||
|
||||
case "cluster":
|
||||
// Assemble cluster version
|
||||
default:
|
||||
slog.Error("Invalid value for --mode. Allowed values are: standalone, container, cluster\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
}
|
||||
@@ -17,15 +17,15 @@ type System struct {
|
||||
|
||||
func LoadConfig(fname string) *System {
|
||||
if _, err := os.Stat(fname); err != nil {
|
||||
slog.Error("Configuration file %s not found", fname)
|
||||
os.Exit(1)
|
||||
slog.Warn("Configuration file not found", "name", fname)
|
||||
return &System{}
|
||||
}
|
||||
|
||||
var config System
|
||||
|
||||
_, err := toml.DecodeFile(fname, &config)
|
||||
if err != nil {
|
||||
slog.Error("", err)
|
||||
slog.Error("Error decoding configuration file", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
|
||||
7
demo/containers/dbsdata/Dockerfile
Normal file
7
demo/containers/dbsdata/Dockerfile
Normal file
@@ -0,0 +1,7 @@
|
||||
# Use a minimal base image
|
||||
FROM busybox
|
||||
|
||||
ADD dbsdata_backup.tar /
|
||||
|
||||
# Just run sh if this container is ever started
|
||||
CMD ["sh"]
|
||||
77
demo/containers/dbsdata/README.org
Normal file
77
demo/containers/dbsdata/README.org
Normal file
@@ -0,0 +1,77 @@
|
||||
* MRVA cli tools container
|
||||
Set up / run:
|
||||
#+BEGIN_SRC sh
|
||||
# Run the raw container assembly
|
||||
cd ~/work-gh/mrva/mrvacommander/
|
||||
docker-compose -f docker-compose-demo-build.yml up -d
|
||||
|
||||
# Use the following commands to populate the mrvacommander database storage
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools
|
||||
mkdir -p scratch
|
||||
source venv/bin/activate
|
||||
|
||||
./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > scratch/db-info-1.csv
|
||||
|
||||
./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
|
||||
|
||||
./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
|
||||
|
||||
./bin/mc-db-generate-selection -n 11 \
|
||||
scratch/vscode-selection.json \
|
||||
scratch/gh-mrva-selection.json \
|
||||
< scratch/db-info-3.csv
|
||||
|
||||
# Several seconds start-up time; fast db population
|
||||
./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
|
||||
|
||||
# While the containers are running, this will show minio's storage. The zip files
|
||||
# are split into part.* and xl.meta by minio. Use the web interface to see real
|
||||
# names.
|
||||
docker exec dbstore ls -R /data/mrvacommander/
|
||||
|
||||
# Open browser to see the file listing
|
||||
open http://localhost:9001/browser/qldb
|
||||
|
||||
# list the volumes
|
||||
docker volume ls |grep dbs
|
||||
docker volume inspect mrvacommander_dbsdata
|
||||
|
||||
# Persist volume using container
|
||||
cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata
|
||||
# Use mrvacommander_dbsdata to access the compose cluster
|
||||
# EITHER
|
||||
# Get the data as tar file from the image using container
|
||||
rm -f dbsdata_backup.tar
|
||||
docker run --rm \
|
||||
-v mrvacommander_dbsdata:/data \
|
||||
-v $(pwd):/backup \
|
||||
busybox sh -c "tar cf /backup/dbsdata_backup.tar /data"
|
||||
# OR
|
||||
# Use gnu tar on host. The macos tar adds extended attributes
|
||||
# brew install gnu-tar
|
||||
rm -f dbsdata_backup.tar && gtar cf dbsdata_backup.tar data/
|
||||
|
||||
# Build container with the tarball
|
||||
cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata
|
||||
docker build -t dbsdata-container:0.1.24 .
|
||||
docker image ls | grep dbs
|
||||
|
||||
# check container contents
|
||||
docker run -it dbsdata-container:0.1.24 /bin/sh
|
||||
docker run -it dbsdata-container:0.1.24 ls data/qldb
|
||||
|
||||
# Tag the dbstore backing container
|
||||
docker inspect dbsdata-container:0.1.24 |grep Id
|
||||
docker tag dbsdata-container:0.1.24 ghcr.io/hohn/dbsdata-container:0.1.24
|
||||
|
||||
# Push the pre-populated image
|
||||
docker push ghcr.io/hohn/dbsdata-container:0.1.24
|
||||
|
||||
# Check the tagged image
|
||||
docker run -it ghcr.io/hohn/dbsdata-container:0.1.24 \
|
||||
ls data/qldb
|
||||
|
||||
# Shut down the container assembly
|
||||
docker-compose -f docker-compose-demo-build.yml down
|
||||
#+END_SRC
|
||||
|
||||
BIN
demo/containers/dbsdata/dbsdata_backup.tar
(Stored with Git LFS)
Normal file
BIN
demo/containers/dbsdata/dbsdata_backup.tar
(Stored with Git LFS)
Normal file
Binary file not shown.
11
doc/README.md
Normal file
11
doc/README.md
Normal file
@@ -0,0 +1,11 @@
|
||||
## The doc/ directory
|
||||
The `doc/` directory serves as home for documentation. This is the place to
|
||||
put refined documentation after it has gone through `notes/`. The contents of
|
||||
this directory should be accessible to a broad audience including prospective
|
||||
users, active users, and developers. Highly technical
|
||||
|
||||
1. The note authors and
|
||||
2. Developers of the project
|
||||
|
||||
It need not be meaningful to casual users.
|
||||
|
||||
101
doc/mrva-business.org
Normal file
101
doc/mrva-business.org
Normal file
@@ -0,0 +1,101 @@
|
||||
* MRVA for CodeQL: A Business View
|
||||
** Introduction
|
||||
The companion documents in this directory are mostly technical. The purpose of
|
||||
this document is to explain, from a business perspective, what MRVA is and why
|
||||
it matters.
|
||||
|
||||
To illustrate its impact, consider two real-world cases:
|
||||
|
||||
*** Case 1: Preventing Costly Security Failures
|
||||
One of our customers faced a significant lawsuit due to inadequate security.
|
||||
The root cause? Unaddressed technical risks in their code. The work we do
|
||||
directly prevents similar vulnerabilities from reaching this stage.
|
||||
|
||||
While lawsuits of this scale are rare, security failures are not. More common
|
||||
consequences include:
|
||||
|
||||
- Compliance violations (e.g., GDPR, SOC2 penalties)
|
||||
- Security breaches leading to reputation damage
|
||||
- Productivity loss from disruptive technical failures
|
||||
|
||||
Lawsuits may be exceptional, but code security failures occur daily. Our role
|
||||
isn’t just about preventing catastrophic losses—it’s about avoiding the small,
|
||||
accumulating failures that erode security, compliance, and trust over time.
|
||||
|
||||
*** Case 2: Identifying Hidden Risks at Scale
|
||||
Another customer manages a massive software portfolio of 120,000+ distinct
|
||||
codebases—a scale at which traditional security tools and manual review
|
||||
processes become impractical.
|
||||
|
||||
- A few known vulnerabilities had already been identified and patched.
|
||||
- Our analysis uncovered 30 additional high-risk instances, previously undetected.
|
||||
|
||||
These findings were critical because:
|
||||
|
||||
- Traditional security tools break down at scale. Most solutions work well for
|
||||
isolated codebases but lack the capability to analyze patterns across
|
||||
120,000 repositories.
|
||||
- Complexity hides risk. Identifying these vulnerabilities required specialized
|
||||
techniques beyond simple scanning—capable of handling variations,
|
||||
context, and subtle exploit paths.
|
||||
- Existing security processes failed to detect these vulnerabilities. Without
|
||||
proactive intervention, these risks would have remained undetected until
|
||||
a potential breach occurred.
|
||||
|
||||
This case highlights a critical gap in standard security practices. By leveraging
|
||||
advanced, scalable analysis, we identified and mitigated risks that would have
|
||||
otherwise gone unnoticed—demonstrating the value of proactive security
|
||||
at scale.
|
||||
|
||||
** Why This Matters
|
||||
These examples, along with others, reinforce the importance of proactive
|
||||
security—especially in the context of MRVA. Security risks don’t just exist
|
||||
in theory; they have tangible business consequences.
|
||||
|
||||
MRVA provides a scalable, systematic approach to identifying and addressing
|
||||
risks before they escalate—ensuring that security is a strategic advantage, not
|
||||
just a cost.
|
||||
|
||||
** What is MRVA?
|
||||
MRVA stands for /Multi-Repository Variant Analysis/. The concept is straightforward:
|
||||
|
||||
1. A /problem/ is identified in one codebase.
|
||||
2. Variations of this problem (/variants/) can be defined.
|
||||
3. The organization manages many code repositories (/multi-repository/).
|
||||
4. A systematic /analysis/ is required to detect these variants across all repositories.
|
||||
|
||||
In practice:
|
||||
- Steps 1 & 2: Defined through CodeQL queries, often custom-written for this purpose.
|
||||
- Steps 3 & 4: Can be done manually but come with significant challenges.
|
||||
|
||||
*** Challenges of Manual Execution
|
||||
Manually searching for these variants across multiple repositories is possible
|
||||
but inefficient and error-prone due to:
|
||||
|
||||
- /High bookkeeping overhead/ – Tracking thousands of repositories is
|
||||
cumbersome.
|
||||
- /Heavy scripting requirements/ – Expert /Unix scripting skills/ are
|
||||
necessary.
|
||||
- /Scaling limitations/ – Analyzing /thousands of repositories sequentially/
|
||||
is slow, and manual parallelization is impractical.
|
||||
- /Cumbersome review process/ – Results are stored as /raw text files/,
|
||||
requiring multiple processing steps for meaningful analysis.
|
||||
|
||||
*** MRVA: A Streamlined, Integrated Solution
|
||||
Instead of relying on manual effort, MRVA is designed to /automate and
|
||||
integrate/ the process.
|
||||
|
||||
- The system is designed to be /machine-driven/ and integrated into an
|
||||
automated pipeline.
|
||||
- Once incorporated, MRVA leverages the /CodeQL VS Code plugin/ to provide a
|
||||
/seamless user experience/.
|
||||
- How it works:
|
||||
- Users submit queries through the UI.
|
||||
- Results are retrieved and displayed dynamically as they become available.
|
||||
- The entire workflow is automated, scalable, and significantly more
|
||||
efficient than manual methods.
|
||||
|
||||
By eliminating manual inefficiencies, MRVA enables organizations to identify
|
||||
and resolve security issues across massive codebases at scale, ensuring both
|
||||
accuracy and speed in vulnerability detection.
|
||||
|
||||
331
doc/mrva-interconnect.ltx
Normal file
331
doc/mrva-interconnect.ltx
Normal file
@@ -0,0 +1,331 @@
|
||||
\documentclass[11pt]{article}
|
||||
|
||||
% Load the geometry package to set margins
|
||||
\usepackage[lmargin=2cm,rmargin=2cm,tmargin=1.8cm,bmargin=1.8cm]{geometry}
|
||||
|
||||
% increase nesting depth
|
||||
|
||||
\usepackage{enumitem}
|
||||
\setlistdepth{9}
|
||||
%
|
||||
\renewlist{itemize}{itemize}{9}
|
||||
\setlist[itemize,1]{label=\textbullet}
|
||||
\setlist[itemize,2]{label=--}
|
||||
\setlist[itemize,3]{label=*}
|
||||
\setlist[itemize,4]{label=•}
|
||||
\setlist[itemize,5]{label=–}
|
||||
\setlist[itemize,6]{label=>}
|
||||
\setlist[itemize,7]{label=»}
|
||||
\setlist[itemize,8]{label=›}
|
||||
\setlist[itemize,9]{label=·}
|
||||
%
|
||||
\renewlist{enumerate}{enumerate}{9}
|
||||
\setlist[enumerate,1]{label=\arabic*.,ref=\arabic*}
|
||||
\setlist[enumerate,2]{label=\alph*.),ref=\theenumi\alph*}
|
||||
\setlist[enumerate,3]{label=\roman*.),ref=\theenumii\roman*}
|
||||
\setlist[enumerate,4]{label=\Alph*.),ref=\theenumiii\Alph*}
|
||||
\setlist[enumerate,5]{label=\Roman*.),ref=\theenumiv\Roman*}
|
||||
\setlist[enumerate,6]{label=\arabic*),ref=\theenumv\arabic*}
|
||||
\setlist[enumerate,7]{label=\alph*),ref=\theenumvi\alph*}
|
||||
\setlist[enumerate,8]{label=\roman*),ref=\theenumvii\roman*}
|
||||
\setlist[enumerate,9]{label=\Alph*),ref=\theenumviii\Alph*}
|
||||
|
||||
|
||||
% Load CM Bright for math
|
||||
\usepackage{amsmath} % Standard math package
|
||||
\usepackage{amssymb} % Additional math symbols
|
||||
\usepackage{cmbright} % Sans-serif math font that complements Fira Sans
|
||||
|
||||
\usepackage{fourier}
|
||||
|
||||
% Font configuration
|
||||
% \usepackage{bera}
|
||||
% or
|
||||
% Load Fira Sans for text
|
||||
\usepackage{fontspec}
|
||||
\setmainfont{Fira Sans} % System-installed Fira Sans
|
||||
\renewcommand{\familydefault}{\sfdefault} % Set sans-serif as default
|
||||
|
||||
% pseudo-code with math
|
||||
\usepackage{listings}
|
||||
\usepackage{float}
|
||||
\usepackage{xcolor}
|
||||
\usepackage{colortbl}
|
||||
% Set TT font
|
||||
% \usepackage{inconsolata}
|
||||
% or
|
||||
\setmonofont{IBMPlexMono-Light}
|
||||
% Define custom settings for listings
|
||||
\lstset{
|
||||
language=Python,
|
||||
basicstyle=\ttfamily\small, % Monospaced font
|
||||
commentstyle=\itshape\color{gray}, % Italic and gray for comments
|
||||
keywordstyle=\color{blue}, % Keywords in blue
|
||||
stringstyle=\color{red}, % Strings in red
|
||||
mathescape=true, % Enable math in comments
|
||||
breaklines=true, % Break long lines
|
||||
numbers=left, % Add line numbers
|
||||
numberstyle=\tiny\color{gray}, % Style for line numbers
|
||||
frame=single, % Add a frame around the code
|
||||
}
|
||||
|
||||
\usepackage{newfloat} % Allows creating custom float types
|
||||
|
||||
% Define 'listing' as a floating environment
|
||||
\DeclareFloatingEnvironment[
|
||||
fileext=lol,
|
||||
listname=List of Listings,
|
||||
name=Listing
|
||||
]{listing}
|
||||
|
||||
% To prevent floats from moving past a section boundary but still allow some floating:
|
||||
\usepackage{placeins}
|
||||
% used with \FloatBarrier
|
||||
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage{graphicx}
|
||||
\usepackage{longtable}
|
||||
\usepackage{wrapfig}
|
||||
\usepackage{rotating}
|
||||
\usepackage[normalem]{ulem}
|
||||
\usepackage{amsmath}
|
||||
\usepackage{amssymb}
|
||||
\usepackage{capt-of}
|
||||
\usepackage{hyperref}
|
||||
\usepackage{algorithm}
|
||||
\usepackage{algpseudocode}
|
||||
|
||||
% Title, Author, and Date (or Report Number)
|
||||
\title{MRVA component interconnections}
|
||||
\author{Michael Hohn}
|
||||
\date{Technical Report 20250524}
|
||||
|
||||
\hypersetup{
|
||||
pdfauthor={Michael Hohn},
|
||||
pdftitle={MRVA component interconnections},
|
||||
pdfkeywords={},
|
||||
pdfsubject={},
|
||||
pdfcreator={Emacs 29.1},
|
||||
pdflang={English}}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\maketitle
|
||||
\tableofcontents
|
||||
|
||||
\section{Overview}
|
||||
\label{sec:overview}
|
||||
|
||||
The MRVA system is organized as a collection of services. On the server side, the
|
||||
system is containerized using Docker and comprises several key components:
|
||||
|
||||
|
||||
\begin{itemize}
|
||||
\item {\textbf{Server}}: Acts as the central coordinator.
|
||||
\item \textbf{Agents}: One or more agents that execute tasks.
|
||||
\item \textbf{RabbitMQ}: Handles messaging between components.
|
||||
\item \textbf{MinIO}: Provides storage for both queries and results.
|
||||
\item \textbf{HEPC}: An HTTP endpoint that hosts and serves CodeQL databases.
|
||||
\end{itemize}
|
||||
|
||||
The execution process follows a structured workflow:
|
||||
|
||||
\begin{enumerate}
|
||||
\item A client submits a set of queries $\mathcal{Q}$ targeting a repository
|
||||
set $\mathcal{R}$.
|
||||
\item The server enqueues jobs and distributes them to available agents.
|
||||
\item Each agent retrieves a job, executes queries against its assigned repository, and accumulates results.
|
||||
\item The agent sends results back to the server, which then forwards them to the client.
|
||||
\end{enumerate}
|
||||
|
||||
This full round-trip can be expressed as:
|
||||
|
||||
\begin{equation}
|
||||
\text{Client} \xrightarrow{\mathcal{Q}} \text{Server}
|
||||
\xrightarrow{\text{enqueue}}
|
||||
\text{Queue} \xrightarrow{\text{dispatch}} \text{Agent}
|
||||
\xrightarrow{\mathcal{Q}(\mathcal{R}_i)}
|
||||
\text{Server} \xrightarrow{\mathcal{Q}(\mathcal{R}_i} \text{Client}
|
||||
\end{equation}
|
||||
|
||||
\section{Symbols and Notation}
|
||||
\label{sec:orgb695d5a}
|
||||
|
||||
We define the following symbols for entities in the system:
|
||||
|
||||
\begin{center}
|
||||
\begin{tabular}{lll}
|
||||
Concept & Symbol & Description \\[0pt]
|
||||
\hline
|
||||
Client & \(C\) & The source of the query submission \\[0pt]
|
||||
Server & \(S\) & Manages job queue and communicates results back to the client \\[0pt]
|
||||
Job Queue & \(Q\) & Queue for managing submitted jobs \\[0pt]
|
||||
Agent & \(\alpha\) & Independently polls, executes jobs, and accumulates results \\[0pt]
|
||||
Agent Set & \(A\) & The set of all available agents \\[0pt]
|
||||
Query Suite & \(\mathcal{Q}\) & Collection of queries submitted by the client \\[0pt]
|
||||
Repository List & \(\mathcal{R}\) & Collection of repositories \\[0pt]
|
||||
\(i\)-th Repository & \(\mathcal{R}_i\) & Specific repository indexed by \(i\) \\[0pt]
|
||||
\(j\)-th Query & \(\mathcal{Q}_j\) & Specific query from the suite indexed by \(j\) \\[0pt]
|
||||
Query Result & \(r_{i,j,k_{i,j}}\) & \(k_{i,j}\)-th result from query \(j\) executed on repository \(i\) \\[0pt]
|
||||
Query Result Set & \(\mathcal{R}_i^{\mathcal{Q}_j}\) & Set of all results for query \(j\) on repository \(i\) \\[0pt]
|
||||
Accumulated Results & \(\mathcal{R}_i^{\mathcal{Q}}\) & All results from executing all queries on \(\mathcal{R}_i\) \\[0pt]
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
|
||||
|
||||
\section{Full Round-Trip Representation}
|
||||
\label{sec:full-round-trip}
|
||||
The full round-trip execution, from query submission to result delivery, can be summarized as:
|
||||
|
||||
\[
|
||||
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q
|
||||
\xrightarrow{\text{poll}}
|
||||
\alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{\mathcal{R}_i^{\mathcal{Q}}} C
|
||||
\]
|
||||
|
||||
\begin{itemize}
|
||||
\item \(C \to S\): Client submits a query suite \(\mathcal{Q}\) to the server.
|
||||
\item \(S \to Q\): Server enqueues the query suite \((\mathcal{Q}, \mathcal{R}_i)\) for each repository.
|
||||
\item \(Q \to \alpha\): Agent \(\alpha\) polls the queue and retrieves a job.
|
||||
\item \(\alpha \to S\): Agent executes the queries and returns the accumulated results \(\mathcal{R}_i^{\mathcal{Q}}\) to the server.
|
||||
\item \(S \to C\): Server sends the complete result set \(\mathcal{R}_i^{\mathcal{Q}}\) for each repository back to the client.
|
||||
\end{itemize}
|
||||
|
||||
|
||||
\section{Result Representation}
|
||||
|
||||
For the complete collection of results across all repositories and queries:
|
||||
\[
|
||||
\mathcal{R}^{\mathcal{Q}} = \bigcup_{i=1}^{N} \bigcup_{j=1}^{M}
|
||||
\left\{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \right\}
|
||||
\]
|
||||
|
||||
where:
|
||||
\begin{itemize}
|
||||
\item \(N\) is the total number of repositories.
|
||||
\item \(M\) is the total number of queries in \(\mathcal{Q}\).
|
||||
\item \(k_{i,j}\) is the number of results from executing query
|
||||
\(\mathcal{Q}_j\)
|
||||
on repository \(\mathcal{R}_i\).
|
||||
\end{itemize}
|
||||
|
||||
An individual result from the \(i\)-th repository, \(j\)-th query, and \(k\)-th result is:
|
||||
\[
|
||||
r_{i,j,k}
|
||||
\]
|
||||
|
||||
|
||||
|
||||
\[
|
||||
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q \xrightarrow{\text{dispatch}} \alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{r_{i,j}} C
|
||||
\]
|
||||
|
||||
Each result can be further indexed to track multiple repositories and result sets.
|
||||
|
||||
|
||||
\section{Graph Extraction from Log Table}
|
||||
|
||||
Assume we have a structured event log represented as a set of tuples.
|
||||
|
||||
\subsection*{Event Log Structure}
|
||||
|
||||
Let
|
||||
\[
|
||||
\mathcal{T} = \{ t_1, t_2, \dots, t_n \}
|
||||
\]
|
||||
be the set of all events, where each event
|
||||
\[
|
||||
t_i = (\mathit{id}_i, \tau_i, a_i, e_i, q_i, r_i, c_i)
|
||||
\]
|
||||
consists of:
|
||||
\begin{itemize}
|
||||
\item \(\mathit{id}_i\): unique event ID
|
||||
\item \(\tau_i\): timestamp
|
||||
\item \(a_i\): actor (e.g., ``agent\_alpha1'')
|
||||
\item \(e_i\): event type (e.g., ``enqueue'', ``execute'')
|
||||
\item \(q_i\): query ID
|
||||
\item \(r_i\): repository ID
|
||||
\item \(c_i\): result count (may be \(\bot\) if not applicable)
|
||||
\end{itemize}
|
||||
|
||||
Let
|
||||
\[
|
||||
\mathcal{G} = (V, E)
|
||||
\]
|
||||
be a directed graph constructed from \(\mathcal{T}\), with vertices \(V\) and edges \(E\).
|
||||
|
||||
\subsection*{Graph Definition}
|
||||
|
||||
\begin{align*}
|
||||
V &= \{ \mathit{id}_i \mid t_i \in \mathcal{T} \} \\
|
||||
E &\subseteq V \times V
|
||||
\end{align*}
|
||||
|
||||
Edges capture temporal or semantic relationships between events.
|
||||
|
||||
\subsection*{Construction Steps}
|
||||
|
||||
\paragraph{1. Partition by Job Identity}
|
||||
Define the set of job identifiers:
|
||||
\[
|
||||
J = \{ (q, r) \mid \exists i: q_i = q \land r_i = r \}
|
||||
\]
|
||||
Then for each \((q, r) \in J\), define:
|
||||
\[
|
||||
\mathcal{T}_{q,r} = \{ t_i \in \mathcal{T} \mid q_i = q \land r_i = r \}
|
||||
\]
|
||||
|
||||
\paragraph{2. Sort by Time}
|
||||
Order each \(\mathcal{T}_{q,r}\) as a list:
|
||||
\[
|
||||
\mathcal{T}_{q,r} = [ t_{i_1}, t_{i_2}, \dots, t_{i_k} ]
|
||||
\quad \text{such that } \tau_{i_j} < \tau_{i_{j+1}}
|
||||
\]
|
||||
|
||||
\paragraph{3. Causal Edges}
|
||||
Define within-job edges:
|
||||
\[
|
||||
E_{q,r} = \{ (\mathit{id}_{i_j}, \mathit{id}_{i_{j+1}}) \mid 1 \leq j < k \}
|
||||
\]
|
||||
|
||||
\paragraph{4. Global Causal Graph}
|
||||
Take the union:
|
||||
\[
|
||||
E_{\text{causal}} = \bigcup_{(q, r) \in J} E_{q,r}
|
||||
\]
|
||||
|
||||
\paragraph{5. Semantic Edges (Optional)}
|
||||
Define semantic predicates such as:
|
||||
\[
|
||||
\mathsf{pulls}(i, j) \iff e_i = \text{enqueue} \land e_j = \text{pull} \land
|
||||
q_i = q_j \land r_i = r_j \land \tau_i < \tau_j \land a_i = \text{server} \land a_j = \text{agent}
|
||||
\]
|
||||
Then:
|
||||
\[
|
||||
E_{\text{semantic}} = \{ (\mathit{id}_i, \mathit{id}_j) \mid \mathsf{pulls}(i, j) \}
|
||||
\]
|
||||
|
||||
\subsection*{Final Graph}
|
||||
|
||||
\begin{align*}
|
||||
V &= \{ \mathit{id}_i \mid t_i \in \mathcal{T} \} \\
|
||||
E &= E_{\text{causal}} \cup E_{\text{semantic}}
|
||||
\end{align*}
|
||||
|
||||
\subsection*{Notes}
|
||||
\begin{itemize}
|
||||
\item This construction is generic: the log store \(\mathcal{T}\) may come from a database, file, or tuple-indexed dictionary.
|
||||
\item Each semantic edge rule corresponds to a logical filter/join over \(\mathcal{T}\).
|
||||
\item The construction is schema-free on the graph side and can be recomputed on demand with different edge logic.
|
||||
\end{itemize}
|
||||
|
||||
|
||||
\end{document}
|
||||
|
||||
%%% Local Variables:
|
||||
%%% mode: LaTeX
|
||||
%%% TeX-master: nil
|
||||
%%% TeX-engine: luatex
|
||||
%%% TeX-command-extra-options: "-synctex=1 -shell-escape -interaction=nonstopmode"
|
||||
%%% End:
|
||||
BIN
doc/mrva-interconnect.pdf
Normal file
BIN
doc/mrva-interconnect.pdf
Normal file
Binary file not shown.
BIN
doc/mrva-overview.pdf
Normal file
BIN
doc/mrva-overview.pdf
Normal file
Binary file not shown.
605
doc/mrva-overview.tex
Normal file
605
doc/mrva-overview.tex
Normal file
@@ -0,0 +1,605 @@
|
||||
\documentclass[11pt]{article}
|
||||
|
||||
% Load the geometry package to set margins
|
||||
\usepackage[lmargin=2cm,rmargin=2cm,tmargin=1.8cm,bmargin=1.8cm]{geometry}
|
||||
|
||||
% increase nesting depth
|
||||
|
||||
\usepackage{enumitem}
|
||||
\setlistdepth{9}
|
||||
%
|
||||
\renewlist{itemize}{itemize}{9}
|
||||
\setlist[itemize,1]{label=\textbullet}
|
||||
\setlist[itemize,2]{label=--}
|
||||
\setlist[itemize,3]{label=*}
|
||||
\setlist[itemize,4]{label=•}
|
||||
\setlist[itemize,5]{label=–}
|
||||
\setlist[itemize,6]{label=>}
|
||||
\setlist[itemize,7]{label=»}
|
||||
\setlist[itemize,8]{label=›}
|
||||
\setlist[itemize,9]{label=·}
|
||||
%
|
||||
\renewlist{enumerate}{enumerate}{9}
|
||||
\setlist[enumerate,1]{label=\arabic*.,ref=\arabic*}
|
||||
\setlist[enumerate,2]{label=\alph*.),ref=\theenumi\alph*}
|
||||
\setlist[enumerate,3]{label=\roman*.),ref=\theenumii\roman*}
|
||||
\setlist[enumerate,4]{label=\Alph*.),ref=\theenumiii\Alph*}
|
||||
\setlist[enumerate,5]{label=\Roman*.),ref=\theenumiv\Roman*}
|
||||
\setlist[enumerate,6]{label=\arabic*),ref=\theenumv\arabic*}
|
||||
\setlist[enumerate,7]{label=\alph*),ref=\theenumvi\alph*}
|
||||
\setlist[enumerate,8]{label=\roman*),ref=\theenumvii\roman*}
|
||||
\setlist[enumerate,9]{label=\Alph*),ref=\theenumviii\Alph*}
|
||||
|
||||
|
||||
% Load CM Bright for math
|
||||
\usepackage{amsmath} % Standard math package
|
||||
\usepackage{amssymb} % Additional math symbols
|
||||
\usepackage{cmbright} % Sans-serif math font that complements Fira Sans
|
||||
|
||||
\usepackage{fourier}
|
||||
|
||||
% Font configuration
|
||||
% \usepackage{bera}
|
||||
% or
|
||||
% Load Fira Sans for text
|
||||
\usepackage{fontspec}
|
||||
\setmainfont{Fira Sans} % System-installed Fira Sans
|
||||
\renewcommand{\familydefault}{\sfdefault} % Set sans-serif as default
|
||||
|
||||
% pseudo-code with math
|
||||
\usepackage{listings}
|
||||
\usepackage{float}
|
||||
\usepackage{xcolor}
|
||||
\usepackage{colortbl}
|
||||
% Set TT font
|
||||
% \usepackage{inconsolata}
|
||||
% or
|
||||
\setmonofont{IBMPlexMono-Light}
|
||||
% Define custom settings for listings
|
||||
\lstset{
|
||||
language=Python,
|
||||
basicstyle=\ttfamily\small, % Monospaced font
|
||||
commentstyle=\itshape\color{gray}, % Italic and gray for comments
|
||||
keywordstyle=\color{blue}, % Keywords in blue
|
||||
stringstyle=\color{red}, % Strings in red
|
||||
mathescape=true, % Enable math in comments
|
||||
breaklines=true, % Break long lines
|
||||
numbers=left, % Add line numbers
|
||||
numberstyle=\tiny\color{gray}, % Style for line numbers
|
||||
frame=single, % Add a frame around the code
|
||||
}
|
||||
|
||||
\usepackage{newfloat} % Allows creating custom float types
|
||||
|
||||
% Define 'listing' as a floating environment
|
||||
\DeclareFloatingEnvironment[
|
||||
fileext=lol,
|
||||
listname=List of Listings,
|
||||
name=Listing
|
||||
]{listing}
|
||||
|
||||
% To prevent floats from moving past a section boundary but still allow some floating:
|
||||
\usepackage{placeins}
|
||||
% used with \FloatBarrier
|
||||
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage{graphicx}
|
||||
\usepackage{longtable}
|
||||
\usepackage{wrapfig}
|
||||
\usepackage{rotating}
|
||||
\usepackage[normalem]{ulem}
|
||||
\usepackage{amsmath}
|
||||
\usepackage{amssymb}
|
||||
\usepackage{capt-of}
|
||||
\usepackage{hyperref}
|
||||
\usepackage{algorithm}
|
||||
\usepackage{algpseudocode}
|
||||
|
||||
% Title, Author, and Date (or Report Number)
|
||||
\title{MRVA for CodeQL}
|
||||
\author{Michael Hohn}
|
||||
\date{Technical Report 20250224}
|
||||
|
||||
\hypersetup{
|
||||
pdfauthor={Michael Hohn},
|
||||
pdftitle={MRVA for CodeQL},
|
||||
pdfkeywords={},
|
||||
pdfsubject={},
|
||||
pdfcreator={Emacs 29.1},
|
||||
pdflang={English}}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\maketitle
|
||||
\tableofcontents
|
||||
|
||||
\section{MRVA System Architecture Summary}
|
||||
|
||||
The MRVA system is organized as a collection of services. On the server side, the
|
||||
system is containerized using Docker and comprises several key components:
|
||||
\begin{itemize}
|
||||
\item {\textbf{Server}}: Acts as the central coordinator.
|
||||
\item \textbf{Agents}: One or more agents that execute tasks.
|
||||
\item \textbf{RabbitMQ}: Handles messaging between components.
|
||||
\item \textbf{MinIO}: Provides storage for both queries and results.
|
||||
\item \textbf{HEPC}: An HTTP endpoint that hosts and serves CodeQL databases.
|
||||
\end{itemize}
|
||||
|
||||
On the client side, users can interact with the system in two ways:
|
||||
\begin{itemize}
|
||||
\item {\textbf{VSCode-CodeQL}}: A graphical interface integrated with Visual Studio Code.
|
||||
\item \textbf{gh-mrva CLI}: A command-line interface that connects to the server in a similar way.
|
||||
\end{itemize}
|
||||
|
||||
This architecture enables a robust and flexible workflow for code analysis, combining a containerized back-end with both graphical and CLI front-end tools.
|
||||
|
||||
The full system details can be seen in the source code. This document provides an
|
||||
overview.
|
||||
|
||||
\section{Distributed Query Execution in MRVA}
|
||||
|
||||
\subsection{Execution Overview}
|
||||
|
||||
The \textit{MRVA system} is a distributed platform for executing \textit{CodeQL
|
||||
queries} across multiple repositories using a set of worker agents. The system is
|
||||
{containerized} and built around a set of core services:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Server}: Coordinates job distribution and result aggregation.
|
||||
\item \textbf{Agents}: Execute queries independently and return results.
|
||||
\item \textbf{RabbitMQ}: Handles messaging between system components.
|
||||
\item \textbf{MinIO}: Stores query inputs and execution results.
|
||||
\item \textbf{HEPC}: Serves CodeQL databases over HTTP.
|
||||
\end{itemize}
|
||||
|
||||
Clients interact with MRVA via \texttt{VSCode-CodeQL} (a graphical interface) or
|
||||
\texttt{gh-mrva CLI} (a command-line tool), both of which submit queries to the
|
||||
server.
|
||||
|
||||
The execution process follows a structured workflow:
|
||||
|
||||
\begin{enumerate}
|
||||
\item A client submits a set of queries $\mathcal{Q}$ targeting a repository
|
||||
set $\mathcal{R}$.
|
||||
\item The server enqueues jobs and distributes them to available agents.
|
||||
\item Each agent retrieves a job, executes queries against its assigned repository, and accumulates results.
|
||||
\item The agent sends results back to the server, which then forwards them to the client.
|
||||
\end{enumerate}
|
||||
|
||||
This full round-trip can be expressed as:
|
||||
|
||||
\begin{equation}
|
||||
\text{Client} \xrightarrow{\mathcal{Q}} \text{Server}
|
||||
\xrightarrow{\text{enqueue}}
|
||||
\text{Queue} \xrightarrow{\text{dispatch}} \text{Agent}
|
||||
\xrightarrow{\mathcal{Q}(\mathcal{R}_i)}
|
||||
\text{Server} \xrightarrow{\mathcal{Q}(\mathcal{R}_i} \text{Client}
|
||||
\end{equation}
|
||||
|
||||
where the Client submits queries to the Server, which enqueues jobs in the
|
||||
Queue. Agents execute the queries, returning results $\mathcal{Q}(\mathcal{R}_i)$
|
||||
to the Server and ultimately back to the Client.
|
||||
|
||||
A more rigorous description of this is in section \ref{sec:full-round-trip}.
|
||||
|
||||
\subsection{System Structure Overview}
|
||||
|
||||
This design allows for scalable and efficient query execution across multiple
|
||||
repositories, whether on a single machine or a distributed cluster. The key idea
|
||||
is that both setups follow the same structural approach:
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Single machine setup:}
|
||||
\begin{itemize}
|
||||
\item Uses \textit{at least 5 Docker containers} to manage different
|
||||
components of the system.
|
||||
\item The number of \textit{agent containers} (responsible for executing
|
||||
queries) is constrained by the available \textit{RAM and CPU cores}.
|
||||
\end{itemize}
|
||||
|
||||
\item \textbf{Cluster setup:}
|
||||
\begin{itemize}
|
||||
\item Uses \textit{at least 5 virtual machines (VMs) and / or Docker containers}.
|
||||
\item The number of \textit{agent VMs} is limited by \textit{network bandwidth
|
||||
and available resources} (e.g., distributed storage and inter-node communication
|
||||
overhead).
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
Thus:
|
||||
\begin{itemize}
|
||||
\item The {functional architecture is identical} between the single-machine and cluster setups.
|
||||
\item The {primary difference} is in \textit{scale}:
|
||||
\begin{itemize}
|
||||
\item A single machine is limited by \textit{local CPU and RAM}.
|
||||
\item A cluster is constrained by \textit{network and inter-node coordination overhead} but allows for higher overall compute capacity.
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
|
||||
\subsection{Messages and their Types}
|
||||
\label{sec:msg-types}
|
||||
The following table enumerates the types (messages) passed from Client to Server.
|
||||
|
||||
\begin{longtable}{|p{5cm}|p{5cm}|p{5cm}|}
|
||||
\hline
|
||||
\rowcolor{gray!20} \textbf{Type Name} & \textbf{Field} & \textbf{Type} \\
|
||||
\hline
|
||||
\endfirsthead
|
||||
|
||||
\hline
|
||||
\rowcolor{gray!20} \textbf{Type Name} & \textbf{Field} & \textbf{Type} \\
|
||||
\hline
|
||||
\endhead
|
||||
|
||||
\hline
|
||||
\endfoot
|
||||
|
||||
\hline
|
||||
\endlastfoot
|
||||
|
||||
ServerState & NextID & () $\rightarrow$ int \\
|
||||
& GetResult & JobSpec $\rightarrow$ IO (Either Error AnalyzeResult) \\
|
||||
& GetJobSpecByRepoId & (int, int) $\rightarrow$ IO (Either Error JobSpec) \\
|
||||
& SetResult & (JobSpec, AnalyzeResult) $\rightarrow$ IO () \\
|
||||
& GetJobList & int $\rightarrow$ IO (Either Error \textbf{[AnalyzeJob]}) \\
|
||||
& GetJobInfo & JobSpec $\rightarrow$ IO (Either Error JobInfo) \\
|
||||
& SetJobInfo & (JobSpec, JobInfo) $\rightarrow$ IO () \\
|
||||
& GetStatus & JobSpec $\rightarrow$ IO (Either Error Status) \\
|
||||
& SetStatus & (JobSpec, Status) $\rightarrow$ IO () \\
|
||||
& AddJob & AnalyzeJob $\rightarrow$ IO () \\
|
||||
|
||||
\hline
|
||||
JobSpec & sessionID & int \\
|
||||
& nameWithOwner & string \\
|
||||
|
||||
\hline
|
||||
AnalyzeResult & spec & JobSpec \\
|
||||
& status & Status \\
|
||||
& resultCount & int \\
|
||||
& resultLocation & ArtifactLocation \\
|
||||
& sourceLocationPrefix & string \\
|
||||
& databaseSHA & string \\
|
||||
|
||||
\hline
|
||||
ArtifactLocation & Key & string \\
|
||||
& Bucket & string \\
|
||||
|
||||
\hline
|
||||
AnalyzeJob & Spec & JobSpec \\
|
||||
& QueryPackLocation & ArtifactLocation \\
|
||||
& QueryLanguage & QueryLanguage \\
|
||||
|
||||
\hline
|
||||
QueryLanguage & & string \\
|
||||
|
||||
\hline
|
||||
JobInfo & QueryLanguage & string \\
|
||||
& CreatedAt & string \\
|
||||
& UpdatedAt & string \\
|
||||
& SkippedRepositories & SkippedRepositories \\
|
||||
|
||||
\hline
|
||||
SkippedRepositories & AccessMismatchRepos & AccessMismatchRepos \\
|
||||
& NotFoundRepos & NotFoundRepos \\
|
||||
& NoCodeqlDBRepos & NoCodeqlDBRepos \\
|
||||
& OverLimitRepos & OverLimitRepos \\
|
||||
|
||||
\hline
|
||||
AccessMismatchRepos & RepositoryCount & int \\
|
||||
& Repositories & \textbf{[Repository]} \\
|
||||
|
||||
\hline
|
||||
NotFoundRepos & RepositoryCount & int \\
|
||||
& RepositoryFullNames & \textbf{[string]} \\
|
||||
|
||||
\hline
|
||||
Repository & ID & int \\
|
||||
& Name & string \\
|
||||
& FullName & string \\
|
||||
& Private & bool \\
|
||||
& StargazersCount & int \\
|
||||
& UpdatedAt & string \\
|
||||
|
||||
\end{longtable}
|
||||
|
||||
|
||||
\section{Symbols and Notation}
|
||||
\label{sec:orgb695d5a}
|
||||
|
||||
We define the following symbols for entities in the system:
|
||||
|
||||
\begin{center}
|
||||
\begin{tabular}{lll}
|
||||
Concept & Symbol & Description \\[0pt]
|
||||
\hline
|
||||
\href{vscode://file//Users/hohn/work-gh/mrva/gh-mrva/README.org:39:1}{Client} & \(C\) & The source of the query submission \\[0pt]
|
||||
Server & \(S\) & Manages job queue and communicates results back to the client \\[0pt]
|
||||
Job Queue & \(Q\) & Queue for managing submitted jobs \\[0pt]
|
||||
Agent & \(\alpha\) & Independently polls, executes jobs, and accumulates results \\[0pt]
|
||||
Agent Set & \(A\) & The set of all available agents \\[0pt]
|
||||
Query Suite & \(\mathcal{Q}\) & Collection of queries submitted by the client \\[0pt]
|
||||
Repository List & \(\mathcal{R}\) & Collection of repositories \\[0pt]
|
||||
\(i\)-th Repository & \(\mathcal{R}_i\) & Specific repository indexed by \(i\) \\[0pt]
|
||||
\(j\)-th Query & \(\mathcal{Q}_j\) & Specific query from the suite indexed by \(j\) \\[0pt]
|
||||
Query Result & \(r_{i,j,k_{i,j}}\) & \(k_{i,j}\)-th result from query \(j\) executed on repository \(i\) \\[0pt]
|
||||
Query Result Set & \(\mathcal{R}_i^{\mathcal{Q}_j}\) & Set of all results for query \(j\) on repository \(i\) \\[0pt]
|
||||
Accumulated Results & \(\mathcal{R}_i^{\mathcal{Q}}\) & All results from executing all queries on \(\mathcal{R}_i\) \\[0pt]
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
|
||||
|
||||
\section{Full Round-Trip Representation}
|
||||
\label{sec:full-round-trip}
|
||||
The full round-trip execution, from query submission to result delivery, can be summarized as:
|
||||
|
||||
\[
|
||||
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q
|
||||
\xrightarrow{\text{poll}}
|
||||
\alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{\mathcal{R}_i^{\mathcal{Q}}} C
|
||||
\]
|
||||
|
||||
\begin{itemize}
|
||||
\item \(C \to S\): Client submits a query suite \(\mathcal{Q}\) to the server.
|
||||
\item \(S \to Q\): Server enqueues the query suite \((\mathcal{Q}, \mathcal{R}_i)\) for each repository.
|
||||
\item \(Q \to \alpha\): Agent \(\alpha\) polls the queue and retrieves a job.
|
||||
\item \(\alpha \to S\): Agent executes the queries and returns the accumulated results \(\mathcal{R}_i^{\mathcal{Q}}\) to the server.
|
||||
\item \(S \to C\): Server sends the complete result set \(\mathcal{R}_i^{\mathcal{Q}}\) for each repository back to the client.
|
||||
\end{itemize}
|
||||
|
||||
\section{Result Representation}
|
||||
|
||||
For the complete collection of results across all repositories and queries:
|
||||
\[
|
||||
\mathcal{R}^{\mathcal{Q}} = \bigcup_{i=1}^{N} \bigcup_{j=1}^{M}
|
||||
\left\{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \right\}
|
||||
\]
|
||||
|
||||
where:
|
||||
\begin{itemize}
|
||||
\item \(N\) is the total number of repositories.
|
||||
\item \(M\) is the total number of queries in \(\mathcal{Q}\).
|
||||
\item \(k_{i,j}\) is the number of results from executing query
|
||||
\(\mathcal{Q}_j\)
|
||||
on repository \(\mathcal{R}_i\).
|
||||
\end{itemize}
|
||||
|
||||
An individual result from the \(i\)-th repository, \(j\)-th query, and \(k\)-th result is:
|
||||
\[
|
||||
r_{i,j,k}
|
||||
\]
|
||||
|
||||
|
||||
|
||||
\[
|
||||
C \xrightarrow{\mathcal{Q}} S \xrightarrow{\text{enqueue}} Q \xrightarrow{\text{dispatch}} \alpha \xrightarrow{\mathcal{Q}(\mathcal{R}_i)} S \xrightarrow{r_{i,j}} C
|
||||
\]
|
||||
|
||||
Each result can be further indexed to track multiple repositories and result sets.
|
||||
|
||||
\section{Execution Loop in Pseudo-Code}
|
||||
\begin{listing}[H] % h = here, t = top, b = bottom, p = page of floats
|
||||
\caption{Distributed Query Execution Algorithm}
|
||||
|
||||
\begin{lstlisting}[language=Python]
|
||||
# Distributed Query Execution with Agent Polling and Accumulated Results
|
||||
|
||||
# Initialization
|
||||
$\mathcal{R}$ = set() # Repository list
|
||||
$Q$ = [] # Job queue
|
||||
$A$ = set() # Set of agents
|
||||
$\mathcal{R}_i^{\mathcal{Q}}$ = {} # Result storage for each repository
|
||||
|
||||
# Initialize result sets for each repository
|
||||
for $R_i$ in $\mathcal{R}$:
|
||||
$\mathcal{R}_i^{\mathcal{Q}} = \{\}$ # Initialize empty result set
|
||||
|
||||
# Enqueue the entire query suite for all repositories
|
||||
for $R_i$ in $\mathcal{R}$:
|
||||
$Q$.append(($\mathcal{Q}$, $R_i$)) # Enqueue $(\mathcal{Q}, \mathcal{R}_i)$ pair
|
||||
|
||||
# Processing loop while there are jobs in the queue
|
||||
while $Q \neq \emptyset$:
|
||||
# Agents autonomously poll the queue
|
||||
for $\alpha$ in $A$:
|
||||
if $\alpha$.is_available():
|
||||
$(\mathcal{Q}, \mathcal{R}_i)$ = $Q$.pop(0) # Agent polls a job
|
||||
|
||||
# Agent execution begins
|
||||
$\mathcal{R}_i^{\mathcal{Q}} = \{\}$ # Initialize results for repository $R_i$
|
||||
|
||||
for $\mathcal{Q}_j$ in $\mathcal{Q}$:
|
||||
# Execute query $\mathcal{Q}_j$ on repository $\mathcal{R}_i$
|
||||
$r_{i,j,1}, \dots, r_{i,j,k_{i,j}}$ = $\alpha$.execute($\mathcal{Q}_j$, $R_i$)
|
||||
|
||||
# Store results for query $j$
|
||||
$\mathcal{R}_i^{\mathcal{Q}_j} = \{r_{i,j,1}, \dots, r_{i,j,k_{i,j}}\}$
|
||||
|
||||
# Accumulate results
|
||||
$\mathcal{R}_i^{\mathcal{Q}} = \mathcal{R}_i^{\mathcal{Q}} \cup \mathcal{R}_i^{\mathcal{Q}_j}$
|
||||
|
||||
# Send all accumulated results back to the server
|
||||
$\alpha$.send_results($S$, ($\mathcal{Q}$, $R_i$, $\mathcal{R}_i^{\mathcal{Q}}$))
|
||||
|
||||
# Server sends results for $(\mathcal{Q}, \mathcal{R}_i)$ back to the client
|
||||
$S$.send_results_to_client($C$, ($\mathcal{Q}$, $R_i$, $\mathcal{R}_i^{\mathcal{Q}}$))
|
||||
\end{lstlisting}
|
||||
\end{listing}
|
||||
\FloatBarrier
|
||||
|
||||
\section{Execution Loop in Pseudo-Code, declarative}
|
||||
\begin{listing}[H] % h = here, t = top, b = bottom, p = page of floats
|
||||
\caption{Distributed Query Execution Algorithm}
|
||||
|
||||
\begin{lstlisting}[language=Python]
|
||||
# Distributed Query Execution with Agent Polling and Accumulated Results
|
||||
|
||||
# Define initial state
|
||||
$\mathcal{R}$: set # Set of repositories
|
||||
$\mathcal{Q}$: set # Set of queries
|
||||
A: set # Set of agents
|
||||
Q: list # Queue of $(\mathcal{Q}, \mathcal{R}_i)$ pairs
|
||||
$\mathcal{R}_{\text{results}}$: dict = {} # Mapping of repositories to their accumulated query results
|
||||
|
||||
# Initialize result sets for each repository
|
||||
$\mathcal{R}_{\text{results}}$ = {$\mathcal{R}_i$: set() for $\mathcal{R}_i$ in $\mathcal{R}$}
|
||||
|
||||
# Define job queue as an immutable mapping
|
||||
Q = [($\mathcal{Q}$, $\mathcal{R}_i$) for $\mathcal{R}_i$ in $\mathcal{R}$]
|
||||
|
||||
# Processing as a declarative iteration over the job queue
|
||||
def execute_queries(agents, job_queue, repository_results):
|
||||
def available_agents():
|
||||
return {$\alpha$ for $\alpha$ in agents if $\alpha$.is_available()}
|
||||
|
||||
def process_job($\mathcal{Q}$, $\mathcal{R}_i$, $\alpha$):
|
||||
results = {$\mathcal{Q}_j$: $\alpha$.execute($\mathcal{Q}_j$, $\mathcal{R}_i$) for $\mathcal{Q}_j$ in $\mathcal{Q}$}
|
||||
return $\mathcal{R}_i$, results
|
||||
|
||||
def accumulate_results($\mathcal{R}_{\text{results}}$, $\mathcal{R}_i$, query_results):
|
||||
return {**$\mathcal{R}_{\text{results}}$, $\mathcal{R}_i$: $\mathcal{R}_{\text{results}}$[$\mathcal{R}_i$] | set().union(*query_results.values())}
|
||||
|
||||
while job_queue:
|
||||
active_agents = available_agents()
|
||||
for $\alpha$ in active_agents:
|
||||
$\mathcal{Q}$, $\mathcal{R}_i$ = job_queue[0] # Peek at the first job
|
||||
_, query_results = process_job($\mathcal{Q}$, $\mathcal{R}_i$, $\alpha$)
|
||||
repository_results = accumulate_results(repository_results, $\mathcal{R}_i$, query_results)
|
||||
|
||||
$\alpha$.send_results(S, ($\mathcal{Q}$, $\mathcal{R}_i$, repository_results[$\mathcal{R}_i$]))
|
||||
S.send_results_to_client(C, ($\mathcal{Q}$, $\mathcal{R}_i$, repository_results[$\mathcal{R}_i$]))
|
||||
|
||||
job_queue = job_queue[1:] # Move to the next job
|
||||
|
||||
return repository_results
|
||||
|
||||
# Execute the distributed query process
|
||||
$\mathcal{R}_{\text{results}}$ = execute_queries(A, Q, $\mathcal{R}_{\text{results}}$)
|
||||
\end{lstlisting}
|
||||
\end{listing}
|
||||
\FloatBarrier
|
||||
|
||||
\newpage{}
|
||||
\section{Execution Loop in Pseudo-Code, algorithmic}
|
||||
\begin{algorithm}
|
||||
\caption{Distribute a set of queries $\mathcal{Q}$ across repositories
|
||||
$\mathcal{R}$ using agents $A$}
|
||||
\begin{algorithmic}[1] % Line numbering enabled
|
||||
\Procedure{DistributedQueryExecution}{$\mathcal{Q}, \mathcal{R}, A$}
|
||||
|
||||
\ForAll{$\mathcal{R}_i \in \mathcal{R}$}
|
||||
\Comment{Initialize result sets for each repository and query}
|
||||
\State $\mathcal{R}_i^{\mathcal{Q}} \gets \left\{ \, \right\}$
|
||||
\EndFor
|
||||
|
||||
\State $Q \gets \left\{ \, \right\}$ \Comment{Initialize empty job queue}
|
||||
|
||||
\ForAll{$\mathcal{R}_i \in \mathcal{R}$}
|
||||
\Comment{Enqueue the entire query suite across all repositories}
|
||||
\State $S \xrightarrow{\text{enqueue}(\mathcal{Q}, \mathcal{R}_i)} Q$
|
||||
\EndFor
|
||||
|
||||
\While{$Q \neq \emptyset$}
|
||||
\Comment{Agents poll the queue for available jobs}
|
||||
|
||||
\ForAll{$\alpha \in A$ \textbf{where} $\alpha$ \text{is available}}
|
||||
\State $\alpha \xleftarrow{\text{poll}(Q)}$ \Comment{Agent autonomously retrieves a job}
|
||||
|
||||
% --- Begin Agent Execution Block ---
|
||||
\State \textbf{\raisebox{0.5ex}{\rule{25em}{0.7pt}}} \Comment{Agent Execution Begins}
|
||||
|
||||
|
||||
\State $\mathcal{R}_i^{\mathcal{Q}} \gets \left\{ \, \right\}$ \Comment{Initialize result set for this repository}
|
||||
|
||||
\ForAll{$\mathcal{Q}_j \in \mathcal{Q}$}
|
||||
\State $\mathcal{R}_i^{\mathcal{Q}_j} \gets \left\{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \right\}$
|
||||
\Comment{Collect results for query $j$ on repository $i$}
|
||||
|
||||
\State $\mathcal{R}_i^{\mathcal{Q}} \gets \mathcal{R}_i^{\mathcal{Q}}
|
||||
\cup \mathcal{R}_i^{\mathcal{Q}_j}$
|
||||
\Comment{Accumulate results}
|
||||
\EndFor
|
||||
|
||||
\State $\alpha \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} S$
|
||||
\Comment{Agent sends all accumulated results back to server}
|
||||
|
||||
\State \textbf{\raisebox{0.5ex}{\rule{25em}{0.7pt}}} \Comment{Agent
|
||||
Execution Ends}
|
||||
% --- End Agent Execution Block ---
|
||||
|
||||
\State $S \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} C$
|
||||
\Comment{Server sends results for repository $i$ back to the client}
|
||||
|
||||
\EndFor
|
||||
|
||||
\EndWhile
|
||||
|
||||
\EndProcedure
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
\FloatBarrier
|
||||
|
||||
\section{Execution Loop in Pseudo-Code, hybrid}
|
||||
\label{sec:orgb767ab2}
|
||||
{\textbf{Algorithm:} Distribute a set of queries \(\mathcal{Q}\) across repositories \(\mathcal{R}\) using agents \(A\)}
|
||||
|
||||
\begin{enumerate}
|
||||
\item \textbf{\textbf{Initialization}}
|
||||
\begin{itemize}
|
||||
\item For each repository \(\mathcal{R}_i \in \mathcal{R}\):
|
||||
\begin{itemize}
|
||||
\item Initialize result sets: \(\mathcal{R}_i^{\mathcal{Q}} \gets \{\}\).
|
||||
\end{itemize}
|
||||
\item Initialize an empty job queue: \(Q \gets \{\}\).
|
||||
\end{itemize}
|
||||
|
||||
\item \textbf{\textbf{Enqueue Queries}}
|
||||
\begin{itemize}
|
||||
\item For each repository \(\mathcal{R}_i \in \mathcal{R}\):
|
||||
\begin{itemize}
|
||||
\item Enqueue the entire query suite: \(S \xrightarrow{\text{enqueue}(\mathcal{Q}, \mathcal{R}_i)} Q\).
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\item \textbf{\textbf{Execution Loop}}
|
||||
\begin{itemize}
|
||||
\item While \(Q \neq \emptyset\): (agents poll the queue for available jobs)
|
||||
\begin{itemize}
|
||||
\item For each available agent \(\alpha \in A\):
|
||||
\begin{itemize}
|
||||
\item Agent autonomously retrieves a job: \(\alpha \xleftarrow{\text{poll}(Q)}\).
|
||||
|
||||
\item \textbf{\textbf{Agent Execution Block}}
|
||||
\begin{itemize}
|
||||
\item Initialize result set for this repository: \(\mathcal{R}_i^{\mathcal{Q}} \gets \{\}\).
|
||||
\item For each query \(\mathcal{Q}_j \in \mathcal{Q}\):
|
||||
\begin{itemize}
|
||||
\item Collect results:
|
||||
\(\mathcal{R}_i^{\mathcal{Q}_j} \gets \{ r_{i,j,1}, r_{i,j,2}, \dots, r_{i,j,k_{i,j}} \}\).
|
||||
\item Accumulate results:
|
||||
\(\mathcal{R}_i^{\mathcal{Q}} \gets \mathcal{R}_i^{\mathcal{Q}} \cup \mathcal{R}_i^{\mathcal{Q}_j}\).
|
||||
\end{itemize}
|
||||
\item Agent sends all accumulated results back to the server:
|
||||
\(\alpha \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} S\).
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\item \textbf{\textbf{Agent Sends Results}}
|
||||
\begin{itemize}
|
||||
\item Server sends results for repository \(i\) back to the client:
|
||||
\(S \xrightarrow{(\mathcal{Q}, \mathcal{R}_i, \mathcal{R}_i^{\mathcal{Q}})} C\).
|
||||
\end{itemize}
|
||||
\end{enumerate}
|
||||
|
||||
\end{document}
|
||||
|
||||
%%% Local Variables:
|
||||
%%% mode: LaTeX
|
||||
%%% TeX-master: t
|
||||
%%% TeX-engine: luatex
|
||||
%%% TeX-command-extra-options: "-synctex=1 -shell-escape -interaction=nonstopmode"
|
||||
%%% End:
|
||||
56
doc/mrva.dot
Normal file
56
doc/mrva.dot
Normal file
@@ -0,0 +1,56 @@
|
||||
digraph mrvacommander {
|
||||
rankdir=LR;
|
||||
node [shape=box style=filled fillcolor=lightgrey fontname="monospace"];
|
||||
|
||||
// Entry points
|
||||
cmd_server [label="cmd/server\nmain()", fillcolor=lightblue];
|
||||
cmd_agent [label="cmd/agent\nmain()", fillcolor=lightblue];
|
||||
|
||||
// Config
|
||||
config [label="config/mcc\nparseEnv()", shape=ellipse, fillcolor=lightyellow];
|
||||
|
||||
// Server-side
|
||||
server [label="pkg/server\nServer.Run()"];
|
||||
deploy [label="pkg/deploy\nInit()"];
|
||||
qldbstore [label="pkg/qldbstore\nQLDB Store"];
|
||||
artifactstore [label="pkg/artifactstore\nArtifact Store"];
|
||||
queue [label="pkg/queue\nQueue Interface"];
|
||||
|
||||
// Agent-side
|
||||
agent [label="pkg/agent\nAgent.Run()"];
|
||||
state [label="pkg/state\nState"];
|
||||
codeql [label="pkg/codeql\nrunCodeQL()"];
|
||||
|
||||
// Common
|
||||
common [label="pkg/common\nTypes, MinIO, Jobs"];
|
||||
utils [label="utils\nDownload, Archive"];
|
||||
|
||||
// Edges: config used by both
|
||||
cmd_server -> config;
|
||||
cmd_agent -> config;
|
||||
|
||||
// Server wiring
|
||||
cmd_server -> server;
|
||||
server -> queue;
|
||||
server -> artifactstore;
|
||||
server -> qldbstore;
|
||||
|
||||
// Agent wiring
|
||||
cmd_agent -> agent;
|
||||
agent -> queue;
|
||||
agent -> codeql;
|
||||
agent -> artifactstore;
|
||||
agent -> state;
|
||||
|
||||
// Shared deps
|
||||
server -> common;
|
||||
agent -> common;
|
||||
codeql -> common;
|
||||
qldbstore -> common;
|
||||
artifactstore -> common;
|
||||
|
||||
// Utils used by backends
|
||||
qldbstore -> utils;
|
||||
artifactstore -> utils;
|
||||
codeql -> utils;
|
||||
}
|
||||
84
doc/mrva.man
Normal file
84
doc/mrva.man
Normal file
@@ -0,0 +1,84 @@
|
||||
.TH MRVACOMMANDER 7 "April 2025" "MRVA Project" "System Overview"
|
||||
.SH NAME
|
||||
mrvacommander \- distributed CodeQL task queue and execution system
|
||||
.SH SYNOPSIS
|
||||
.B server
|
||||
.RI [ environment ]
|
||||
.br
|
||||
.B agent
|
||||
.RI [ environment ]
|
||||
.SH DESCRIPTION
|
||||
mrvacommander coordinates analysis jobs over multiple worker nodes using queues, pluggable storage, and CodeQL execution. It consists of multiple interacting packages and entry points.
|
||||
|
||||
.SH STRUCTURE
|
||||
.TP
|
||||
.B cmd/server
|
||||
Entry point. Loads configuration, initializes dependencies, runs queue subscriber with a dispatcher.
|
||||
.TP
|
||||
.B cmd/agent
|
||||
Entry point. Loads configuration, runs a processing loop: receive job, execute query, save result, update state.
|
||||
|
||||
.SH CONFIGURATION
|
||||
.TP
|
||||
.B config/mcc
|
||||
Parses environment variables into structured configuration. Modules include:
|
||||
.IR queue ,
|
||||
.IR storage ,
|
||||
.IR logger ,
|
||||
.IR commander .
|
||||
|
||||
.SH SERVER SIDE MODULES
|
||||
.TP
|
||||
.B pkg/server
|
||||
Initializes:
|
||||
queue backend
|
||||
QLDB store
|
||||
artifact store
|
||||
|
||||
Subscribes to queue and dispatches jobs to handler.
|
||||
.TP
|
||||
.B pkg/deploy
|
||||
Deployment helpers: validate environment variables, bootstrap key services.
|
||||
|
||||
.SH AGENT SIDE MODULES
|
||||
.TP
|
||||
.B pkg/agent
|
||||
Receives jobs, executes CodeQL queries, stores outputs, marks completion.
|
||||
.TP
|
||||
.B pkg/state
|
||||
Tracks which jobs have been completed. Local file-backed.
|
||||
|
||||
.SH SHARED MODULES
|
||||
.TP
|
||||
.B pkg/common
|
||||
Core types: Job, JobOutput, NameWithOwner, Query.
|
||||
Includes MinIO wrappers, external API access, and job spec parsing.
|
||||
.TP
|
||||
.B pkg/codeql
|
||||
Defines query structure and executes CodeQL against a database.
|
||||
.TP
|
||||
.B pkg/qldbstore
|
||||
Provides read-only access to CodeQL databases via:
|
||||
- MinIO (S3)
|
||||
- HTTP (hepc)
|
||||
- Filesystem
|
||||
.TP
|
||||
.B pkg/artifactstore
|
||||
Persists job results. Implementations:
|
||||
- MinIO
|
||||
- Memory
|
||||
.TP
|
||||
.B pkg/queue
|
||||
Job queue interface. Implementations:
|
||||
- RabbitMQ
|
||||
- In-memory single-node
|
||||
.TP
|
||||
.B utils
|
||||
Generic helpers:
|
||||
- HTTP download
|
||||
- tar.gz extraction
|
||||
|
||||
.SH SEE ALSO
|
||||
.BR codeql (1),
|
||||
.BR rabbitmq-server (1),
|
||||
.BR minio (1)
|
||||
BIN
doc/mrva.pdf
Normal file
BIN
doc/mrva.pdf
Normal file
Binary file not shown.
129
docker-compose-demo-build.yml
Normal file
129
docker-compose-demo-build.yml
Normal file
@@ -0,0 +1,129 @@
|
||||
# This is the compose configuration used to build / prepopulate the containers for
|
||||
# a demo.
|
||||
services:
|
||||
dbssvc:
|
||||
## image: ghcr.io/hohn/dbsdata-container:0.1.24
|
||||
build:
|
||||
context: ./demo/containers/dbsdata
|
||||
dockerfile: Dockerfile
|
||||
container_name: dbssvc
|
||||
volumes:
|
||||
- dbsdata:/data/mrvacommander/dbstore-data
|
||||
networks:
|
||||
- backend
|
||||
|
||||
dbstore:
|
||||
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||
container_name: dbstore
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "9001:9001"
|
||||
env_file:
|
||||
- path: .env.container
|
||||
required: true
|
||||
command: server /data/mrvacommander/dbstore-data --console-address ":9001"
|
||||
depends_on:
|
||||
- dbssvc
|
||||
volumes:
|
||||
- dbsdata:/data/mrvacommander/dbstore-data
|
||||
networks:
|
||||
- backend
|
||||
|
||||
client-ghmrva:
|
||||
## image: ghcr.io/hohn/client-ghmrva-container:0.1.24
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./client/containers/ghmrva/Dockerfile
|
||||
network_mode: "service:server" # Share the 'server' network namespace
|
||||
environment:
|
||||
- SERVER_URL=http://localhost:8080 # 'localhost' now refers to 'server'
|
||||
|
||||
code-server:
|
||||
## image: ghcr.io/hohn/code-server-initialized:0.1.24
|
||||
build:
|
||||
context: ./client/containers/vscode
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "9080:9080"
|
||||
environment:
|
||||
- PASSWORD=mrva
|
||||
|
||||
rabbitmq:
|
||||
image: rabbitmq:3-management
|
||||
hostname: rabbitmq
|
||||
container_name: rabbitmq
|
||||
volumes:
|
||||
- ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
|
||||
- ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
|
||||
ports:
|
||||
- "5672:5672"
|
||||
- "15672:15672"
|
||||
healthcheck:
|
||||
test: rabbitmq-diagnostics check_port_connectivity
|
||||
interval: 30s
|
||||
timeout: 30s
|
||||
retries: 10
|
||||
networks:
|
||||
- backend
|
||||
|
||||
server:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./cmd/server/Dockerfile
|
||||
command: [ '--mode=container', '--loglevel=debug' ]
|
||||
container_name: server
|
||||
stop_grace_period: 1s
|
||||
ports:
|
||||
# - "8081:8080" # host:container for proxy
|
||||
- "8080:8080" # host:container
|
||||
depends_on:
|
||||
- rabbitmq
|
||||
- dbstore
|
||||
- artifactstore
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
networks:
|
||||
- backend
|
||||
|
||||
artifactstore:
|
||||
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||
container_name: artifactstore
|
||||
ports:
|
||||
- "19000:9000" # host:container
|
||||
- "19001:9001"
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
command: server /data --console-address ":9001"
|
||||
volumes:
|
||||
# The artifactstore is only populated at runtime so there is no need
|
||||
# for Docker storage; a directory is fine.
|
||||
- ./qpstore-data:/data
|
||||
networks:
|
||||
- backend
|
||||
|
||||
agent:
|
||||
## image: ghcr.io/hohn/mrva-agent:0.1.24
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./cmd/agent/Dockerfile
|
||||
command: [ '--loglevel=debug' ]
|
||||
container_name: agent
|
||||
depends_on:
|
||||
- rabbitmq
|
||||
- dbstore
|
||||
- artifactstore
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
networks:
|
||||
- backend
|
||||
|
||||
networks:
|
||||
backend:
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
dbsdata:
|
||||
|
||||
116
docker-compose-demo.yml
Normal file
116
docker-compose-demo.yml
Normal file
@@ -0,0 +1,116 @@
|
||||
services:
|
||||
dbssvc:
|
||||
# dbsdata-container:0.1.24
|
||||
image: ghcr.io/hohn/dbsdata-container:0.1.24
|
||||
command: tail -f /dev/null # Keep the container running
|
||||
# volumes:
|
||||
# - /qldb # Directory inside the container that contains the data
|
||||
volumes:
|
||||
- dbsdata:/data
|
||||
container_name: dbssvc
|
||||
networks:
|
||||
- backend
|
||||
|
||||
dbstore:
|
||||
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||
container_name: dbstore
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "9001:9001"
|
||||
env_file:
|
||||
- path: .env.container
|
||||
required: true
|
||||
command: server /data/mrvacommander/dbstore-data --console-address ":9001"
|
||||
depends_on:
|
||||
- dbssvc
|
||||
# volumes_from:
|
||||
# - dbsdata # Use the volumes from dbsdata container
|
||||
volumes:
|
||||
- dbsdata:/data/mrvacommander/dbstore-data
|
||||
networks:
|
||||
- backend
|
||||
|
||||
client-ghmrva:
|
||||
image: ghcr.io/hohn/client-ghmrva-container:0.1.24
|
||||
network_mode: "service:server" # Share the 'server' network namespace
|
||||
environment:
|
||||
- SERVER_URL=http://localhost:8080 # 'localhost' now refers to 'server'
|
||||
|
||||
code-server:
|
||||
image: ghcr.io/hohn/code-server-initialized:0.1.24
|
||||
ports:
|
||||
- "9080:9080"
|
||||
# XX: Include codeql binary in code-server (if it's not there already)
|
||||
environment:
|
||||
- PASSWORD=mrva
|
||||
|
||||
rabbitmq:
|
||||
image: rabbitmq:3-management
|
||||
hostname: rabbitmq
|
||||
container_name: rabbitmq
|
||||
volumes:
|
||||
- ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
|
||||
- ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
|
||||
ports:
|
||||
- "5672:5672"
|
||||
- "15672:15672"
|
||||
healthcheck:
|
||||
test: rabbitmq-diagnostics check_port_connectivity
|
||||
interval: 30s
|
||||
timeout: 30s
|
||||
retries: 10
|
||||
networks:
|
||||
- backend
|
||||
|
||||
server:
|
||||
image: ghcr.io/hohn/mrva-server:0.1.24
|
||||
command: [ '--mode=container', '--loglevel=debug' ]
|
||||
container_name: server
|
||||
stop_grace_period: 1s
|
||||
depends_on:
|
||||
- rabbitmq
|
||||
- dbstore
|
||||
- artifactstore
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
networks:
|
||||
- backend
|
||||
|
||||
artifactstore:
|
||||
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||
container_name: artifactstore
|
||||
ports:
|
||||
- "19000:9000" # host:container
|
||||
- "19001:9001"
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
command: server /data --console-address ":9001"
|
||||
volumes:
|
||||
# The artifactstore is only populated at runtime so there is no need
|
||||
# for Docker storage; a directory is fine.
|
||||
- ./qpstore-data:/data
|
||||
networks:
|
||||
- backend
|
||||
|
||||
agent:
|
||||
image: ghcr.io/hohn/mrva-agent:0.1.24
|
||||
command: [ '--loglevel=debug' ]
|
||||
container_name: agent
|
||||
depends_on:
|
||||
- rabbitmq
|
||||
- dbstore
|
||||
- artifactstore
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
networks:
|
||||
- backend
|
||||
|
||||
networks:
|
||||
backend:
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
dbsdata:
|
||||
@@ -7,37 +7,36 @@ services:
|
||||
volumes:
|
||||
- ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
|
||||
- ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
|
||||
expose:
|
||||
- "5672"
|
||||
- "15672"
|
||||
ports:
|
||||
- "5672:5672"
|
||||
- "15672:15672"
|
||||
networks:
|
||||
- backend
|
||||
healthcheck:
|
||||
test: [ "CMD", "nc", "-z", "localhost", "5672" ]
|
||||
interval: 5s
|
||||
timeout: 15s
|
||||
retries: 1
|
||||
test: rabbitmq-diagnostics check_port_connectivity
|
||||
interval: 30s
|
||||
timeout: 30s
|
||||
retries: 10
|
||||
|
||||
server:
|
||||
build:
|
||||
context: ./cmd/server
|
||||
dockerfile: Dockerfile
|
||||
context: .
|
||||
dockerfile: ./cmd/server/Dockerfile
|
||||
command: [ '--mode=container', '--loglevel=debug' ]
|
||||
container_name: server
|
||||
stop_grace_period: 1s # Reduce the timeout period for testing
|
||||
environment:
|
||||
- MRVA_SERVER_ROOT=/mrva/mrvacommander/cmd/server
|
||||
command: sh -c "tail -f /dev/null"
|
||||
stop_grace_period: 1s
|
||||
ports:
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- ./:/mrva/mrvacommander
|
||||
# - "8081:8080" # host:container for proxy
|
||||
- "8080:8080" # host:container
|
||||
depends_on:
|
||||
- rabbitmq
|
||||
- dbstore
|
||||
- artifactstore
|
||||
networks:
|
||||
- backend
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
|
||||
dbstore:
|
||||
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||
@@ -45,52 +44,46 @@ services:
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "9001:9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: user
|
||||
MINIO_ROOT_PASSWORD: mmusty8432
|
||||
|
||||
env_file:
|
||||
- path: .env.container
|
||||
required: true
|
||||
command: server /data --console-address ":9001"
|
||||
volumes:
|
||||
- ./dbstore-data:/data
|
||||
networks:
|
||||
- backend
|
||||
|
||||
|
||||
qpstore:
|
||||
artifactstore:
|
||||
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||
container_name: qpstore
|
||||
container_name: artifactstore
|
||||
ports:
|
||||
- "19000:9000" # host:container
|
||||
- "19001:9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: user
|
||||
MINIO_ROOT_PASSWORD: mmusty8432
|
||||
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
command: server /data --console-address ":9001"
|
||||
volumes:
|
||||
- ./qpstore-data:/data
|
||||
|
||||
networks:
|
||||
- backend
|
||||
|
||||
agent:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./cmd/agent/Dockerfile
|
||||
command: [ '--loglevel=debug' ]
|
||||
container_name: agent
|
||||
depends_on:
|
||||
- rabbitmq
|
||||
- minio
|
||||
environment:
|
||||
MRVA_RABBITMQ_HOST: rabbitmq
|
||||
MRVA_RABBITMQ_PORT: 5672
|
||||
MRVA_RABBITMQ_USER: user
|
||||
MRVA_RABBITMQ_PASSWORD: password
|
||||
- dbstore
|
||||
- artifactstore
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
networks:
|
||||
- backend
|
||||
|
||||
|
||||
networks:
|
||||
backend:
|
||||
driver: bridge
|
||||
|
||||
# Remove named volumes to use bind mounts
|
||||
# volumes:
|
||||
# minio-data:
|
||||
|
||||
|
||||
22
experimental/qldb-specification/readme.org
Normal file
22
experimental/qldb-specification/readme.org
Normal file
@@ -0,0 +1,22 @@
|
||||
* tuple hashing functions across languages
|
||||
There are three parallel implementations of a hash for every entry of a tuple
|
||||
list. The functions produce identical results across 3 languages and can be
|
||||
used across agent / server / client.
|
||||
|
||||
#+BEGIN_SRC sh
|
||||
hohn@ghm3 ~/work-gh/mrva/mrvacommander/experimental/qldb-specification
|
||||
0:$ node tuple-hash.js
|
||||
[
|
||||
'91b80a9933218ff5bc62df8ff71f1252',
|
||||
'b0934b29293e91aefaac73c99fc75e94'
|
||||
]
|
||||
|
||||
hohn@ghm3 ~/work-gh/mrva/mrvacommander/experimental/qldb-specification
|
||||
0:$ python3 tuple-hash.py
|
||||
['91b80a9933218ff5bc62df8ff71f1252', 'b0934b29293e91aefaac73c99fc75e94']
|
||||
|
||||
hohn@ghm3 ~/work-gh/mrva/mrvacommander/experimental/qldb-specification
|
||||
0:$ go run tuple-hash.go
|
||||
[91b80a9933218ff5bc62df8ff71f1252 b0934b29293e91aefaac73c99fc75e94]
|
||||
#+END_SRC
|
||||
|
||||
28
experimental/qldb-specification/tuple-hash.go
Normal file
28
experimental/qldb-specification/tuple-hash.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
func main() {
|
||||
atl_L := [][2]interface{}{
|
||||
{1, "s1"},
|
||||
{2, "str"},
|
||||
}
|
||||
|
||||
var sl_hash []string
|
||||
|
||||
for _, item := range atl_L {
|
||||
jsonBytes, err := json.Marshal(item)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
sum := md5.Sum(jsonBytes)
|
||||
sl_hash = append(sl_hash, hex.EncodeToString(sum[:]))
|
||||
}
|
||||
|
||||
fmt.Println(sl_hash)
|
||||
}
|
||||
9
experimental/qldb-specification/tuple-hash.js
Normal file
9
experimental/qldb-specification/tuple-hash.js
Normal file
@@ -0,0 +1,9 @@
|
||||
const crypto = require("crypto");
|
||||
|
||||
const atl_L = [[1, "s1"], [2, "str"]];
|
||||
const sl_hash = atl_L.map(item => {
|
||||
const json = JSON.stringify(item);
|
||||
return crypto.createHash("md5").update(json).digest("hex");
|
||||
});
|
||||
|
||||
console.log(sl_hash);
|
||||
12
experimental/qldb-specification/tuple-hash.py
Normal file
12
experimental/qldb-specification/tuple-hash.py
Normal file
@@ -0,0 +1,12 @@
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
atl_L = [(1, "s1"), (2, "str")]
|
||||
sl_hash = []
|
||||
|
||||
for item in atl_L:
|
||||
encoded = json.dumps(item, separators=(',', ':')).encode("utf-8")
|
||||
md5sum = hashlib.md5(encoded).hexdigest()
|
||||
sl_hash.append(md5sum)
|
||||
|
||||
print(sl_hash)
|
||||
17
go.mod
17
go.mod
@@ -1,43 +1,34 @@
|
||||
module mrvacommander
|
||||
module github.com/hohn/mrvacommander
|
||||
|
||||
go 1.22.0
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v1.4.0
|
||||
github.com/elastic/go-sysinfo v1.14.0
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/gorilla/mux v1.8.1
|
||||
github.com/jackc/pgx/v5 v5.6.0
|
||||
github.com/minio/minio-go/v7 v7.0.71
|
||||
github.com/rabbitmq/amqp091-go v1.10.0
|
||||
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8
|
||||
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
gorm.io/driver/postgres v1.5.9
|
||||
gorm.io/gorm v1.25.10
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/elastic/go-windows v1.0.1 // indirect
|
||||
github.com/goccy/go-json v0.10.2 // indirect
|
||||
github.com/jackc/pgpassfile v1.0.0 // indirect
|
||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
||||
github.com/jackc/pgx/v5 v5.6.0 // indirect
|
||||
github.com/jackc/puddle/v2 v2.2.1 // indirect
|
||||
github.com/jinzhu/inflection v1.0.0 // indirect
|
||||
github.com/jinzhu/now v1.1.5 // indirect
|
||||
github.com/klauspost/compress v1.17.6 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.2.6 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/minio/md5-simd v1.1.2 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/prometheus/procfs v0.15.1 // indirect
|
||||
github.com/rogpeppe/go-internal v1.12.0 // indirect
|
||||
github.com/rs/xid v1.5.0 // indirect
|
||||
golang.org/x/crypto v0.24.0 // indirect
|
||||
golang.org/x/net v0.23.0 // indirect
|
||||
golang.org/x/sync v0.7.0 // indirect
|
||||
golang.org/x/sync v0.9.0 // indirect
|
||||
golang.org/x/sys v0.21.0 // indirect
|
||||
golang.org/x/text v0.16.0 // indirect
|
||||
gopkg.in/ini.v1 v1.67.0 // indirect
|
||||
howett.net/plist v1.0.1 // indirect
|
||||
)
|
||||
|
||||
32
go.sum
32
go.sum
@@ -6,14 +6,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/elastic/go-sysinfo v1.14.0 h1:dQRtiqLycoOOla7IflZg3aN213vqJmP0lpVpKQ9lUEY=
|
||||
github.com/elastic/go-sysinfo v1.14.0/go.mod h1:FKUXnZWhnYI0ueO7jhsGV3uQJ5hiz8OqM5b3oGyaRr8=
|
||||
github.com/elastic/go-windows v1.0.1 h1:AlYZOldA+UJ0/2nBuqWdo90GFCgG9xuyw9SYzGUtJm0=
|
||||
github.com/elastic/go-windows v1.0.1/go.mod h1:FoVvqWSun28vaDQPbj2Elfc0JahhPB7WQEGa3c814Ss=
|
||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
||||
@@ -26,11 +20,6 @@ github.com/jackc/pgx/v5 v5.6.0 h1:SWJzexBzPL5jb0GEsrPMLIsi/3jOo7RHlzTjcAeDrPY=
|
||||
github.com/jackc/pgx/v5 v5.6.0/go.mod h1:DNZ/vlrUnhWCoFGxHAG8U2ljioxukquj7utPDgtQdTw=
|
||||
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
|
||||
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
|
||||
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
|
||||
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
||||
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
|
||||
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
|
||||
github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
|
||||
github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI=
|
||||
github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
|
||||
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||
@@ -44,13 +33,8 @@ github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
|
||||
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
|
||||
github.com/minio/minio-go/v7 v7.0.71 h1:No9XfOKTYi6i0GnBj+WZwD8WP5GZfL7n7GOjRqCdAjA=
|
||||
github.com/minio/minio-go/v7 v7.0.71/go.mod h1:4yBA8v80xGA30cfM3fz0DKYMXunWl/AV/6tWEs9ryzo=
|
||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
|
||||
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
|
||||
github.com/rabbitmq/amqp091-go v1.10.0 h1:STpn5XsHlHGcecLmMFCtg7mqq0RnD+zFr4uzukfVhBw=
|
||||
github.com/rabbitmq/amqp091-go v1.10.0/go.mod h1:Hy4jKW5kQART1u+JkDTF9YYOQUHXqMuhrgxOEeS7G4o=
|
||||
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
|
||||
@@ -66,13 +50,12 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
||||
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
||||
golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
|
||||
golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
|
||||
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY=
|
||||
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI=
|
||||
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo=
|
||||
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak=
|
||||
golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
|
||||
golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
|
||||
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
|
||||
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
|
||||
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
|
||||
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
@@ -83,13 +66,6 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntN
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
|
||||
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
|
||||
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gorm.io/driver/postgres v1.5.9 h1:DkegyItji119OlcaLjqN11kHoUgZ/j13E0jkJZgD6A8=
|
||||
gorm.io/driver/postgres v1.5.9/go.mod h1:DX3GReXH+3FPWGrrgffdvCk3DQ1dwDPdmbenSkweRGI=
|
||||
gorm.io/gorm v1.25.10 h1:dQpO+33KalOA+aFYGlK+EfxcI5MbO7EP2yYygwh9h+s=
|
||||
gorm.io/gorm v1.25.10/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8=
|
||||
howett.net/plist v1.0.1 h1:37GdZ8tP09Q35o9ych3ehygcsL+HqKSwzctveSlarvM=
|
||||
howett.net/plist v1.0.1/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
|
||||
|
||||
@@ -1,11 +1,22 @@
|
||||
{
|
||||
"folders": [
|
||||
{
|
||||
"name": "mrvaagent",
|
||||
"path": "../mrvaagent"
|
||||
},
|
||||
{
|
||||
"name": "mrvacommander",
|
||||
"path": "."
|
||||
},
|
||||
{
|
||||
"name": "mrvaserver",
|
||||
"path": "../mrvaserver"
|
||||
}
|
||||
],
|
||||
"settings": {
|
||||
"sarif-viewer.connectToGithubCodeScanning": "off",
|
||||
"codeQL.githubDatabase.download": "never"
|
||||
"codeQL.githubDatabase.download": "never",
|
||||
"makefile.configureOnOpen": false,
|
||||
"git.ignoreLimitWarning": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user