Compare commits
118 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
008708469c | ||
|
|
37d5b1c6c1 | ||
|
|
1302db0b4e | ||
|
|
c624925aba | ||
|
|
e3e91534a0 | ||
|
|
af043f3f59 | ||
|
|
8ea453f8b0 | ||
|
|
3f24fbb07d | ||
|
|
de0d1b7434 | ||
|
|
be7cc3b0cf | ||
|
|
ba66cb9258 | ||
|
|
baf20fa7af | ||
|
|
6bfcbb33ea | ||
|
|
9d6587872c | ||
|
|
f809917c2e | ||
|
|
a22d8d77f2 | ||
|
|
92a22f55d1 | ||
|
|
3db629e2ca | ||
|
|
95d2638546 | ||
|
|
ff96b34f5e | ||
|
|
537ebdf19d | ||
|
|
d486b6b4db | ||
|
|
b61fbf8896 | ||
|
|
dd776e312a | ||
|
|
18333bfdb1 | ||
|
|
e335b6c843 | ||
|
|
4d52176c5a | ||
|
|
dd58a64ef7 | ||
|
|
4e93929943 | ||
|
|
e7d32861e5 | ||
|
|
52aafd6fc9 | ||
|
|
77ce997fbb | ||
|
|
187c49688e | ||
|
|
d5bcb8b981 | ||
|
|
ec0799696e | ||
|
|
9ccea8ac80 | ||
|
|
080c311516 | ||
|
|
faeb13efb1 | ||
|
|
0378c4cb7f | ||
|
|
7de3ee59ce | ||
|
|
7ae6e9a1cb | ||
|
|
2d92ad51c3 | ||
|
|
bef8a6dc97 | ||
|
|
d08e32dc42 | ||
|
|
64b77c5d70 | ||
|
|
71ce8c0823 | ||
|
|
067e477f61 | ||
|
|
8f807e0e42 | ||
|
|
195dda9fd7 | ||
|
|
f60b55f181 | ||
|
|
727381dc5a | ||
|
|
a35fc619e6 | ||
|
|
8dd6c94918 | ||
|
|
34958e4cf4 | ||
|
|
259bac55fb | ||
|
|
41f6db5de0 | ||
|
|
19330c3a0f | ||
|
|
1e2df515e3 | ||
|
|
681fcdab8c | ||
|
|
5021fc824b | ||
|
|
7d27b910cd | ||
|
|
0d3f4c5e40 | ||
|
|
a86f955aab | ||
|
|
c556605e44 | ||
|
|
7b06484b29 | ||
|
|
fc751ae08f | ||
|
|
d956f47db3 | ||
|
|
0a52b729cd | ||
|
|
6bebf4abfc | ||
|
|
9d60489908 | ||
|
|
35100f89a7 | ||
|
|
742b059a49 | ||
|
|
d1f56ae196 | ||
|
|
6262197c8d | ||
|
|
781571044d | ||
|
|
b183cee78d | ||
|
|
5a95f0ea08 | ||
|
|
349d758c14 | ||
|
|
582d933130 | ||
|
|
b7b4839fe0 | ||
|
|
06dcf50728 | ||
|
|
8f151ab002 | ||
|
|
65cdf9a883 | ||
|
|
1e1daf9330 | ||
|
|
b4f1a2b8a6 | ||
|
|
f652a6719c | ||
|
|
81c44ab14a | ||
|
|
92ca709458 | ||
|
|
242ba3fc1e | ||
|
|
26dd69c976 | ||
|
|
731b44b187 | ||
|
|
aaeafa9e88 | ||
|
|
129b8cc302 | ||
|
|
d64522d168 | ||
|
|
6b4e753e69 | ||
|
|
3df1cac5ae | ||
|
|
dcc32ea8ab | ||
|
|
3c8db9cbe4 | ||
|
|
be1304bdd9 | ||
|
|
8965725e42 | ||
|
|
2df48b9f98 | ||
|
|
8d80272922 | ||
|
|
e3f4d9f012 | ||
|
|
3566f5169e | ||
|
|
b3cf7a4f65 | ||
|
|
07f93f3d27 | ||
|
|
7413e23bab | ||
|
|
380e90135a | ||
|
|
1642894ccf | ||
|
|
c54bda8432 | ||
|
|
17bf9049e4 | ||
|
|
62a7b227f0 | ||
|
|
b543cebfac | ||
|
|
d145731c4b | ||
|
|
0cffb3c849 | ||
|
|
9d1a891c72 | ||
|
|
b4d9833da3 | ||
|
|
e0cbc01d21 |
9
.dockerignore
Normal file
9
.dockerignore
Normal file
@@ -0,0 +1,9 @@
|
||||
# Excludes
|
||||
|
||||
/dbstore-data
|
||||
/qpstore-data
|
||||
/test-data
|
||||
/venv
|
||||
/client
|
||||
/cmd/server/var
|
||||
/.git
|
||||
12
.env.container
Normal file
12
.env.container
Normal file
@@ -0,0 +1,12 @@
|
||||
MRVA_RABBITMQ_HOST=rabbitmq
|
||||
MRVA_RABBITMQ_PORT=5672
|
||||
MRVA_RABBITMQ_USER=user
|
||||
MRVA_RABBITMQ_PASSWORD=password
|
||||
MINIO_ROOT_USER=user
|
||||
MINIO_ROOT_PASSWORD=mmusty8432
|
||||
ARTIFACT_MINIO_ENDPOINT=artifactstore:9000
|
||||
ARTIFACT_MINIO_ID=${MINIO_ROOT_USER}
|
||||
ARTIFACT_MINIO_SECRET=${MINIO_ROOT_PASSWORD}
|
||||
QLDB_MINIO_ENDPOINT=dbstore:9000
|
||||
QLDB_MINIO_ID=${MINIO_ROOT_USER}
|
||||
QLDB_MINIO_SECRET=${MINIO_ROOT_PASSWORD}
|
||||
16
.gitignore
vendored
16
.gitignore
vendored
@@ -4,6 +4,9 @@ cmd/server/var/
|
||||
# vscode project dir
|
||||
.vscode/
|
||||
|
||||
# idea project dir
|
||||
.idea/
|
||||
|
||||
# Compiled binary
|
||||
cmd/server/server
|
||||
cmd/agent/agent
|
||||
@@ -41,3 +44,16 @@ go.work.sum
|
||||
|
||||
# env file
|
||||
.env
|
||||
/artifactstore-data/.minio.sys
|
||||
/qldbminio/qldb
|
||||
.ipynb_checkpoints/
|
||||
venv/
|
||||
venv-*/
|
||||
*.egg-info
|
||||
__pycache__
|
||||
README.html
|
||||
ChangeLog
|
||||
notes/*.html
|
||||
|
||||
# Make timestamp files
|
||||
mk.*
|
||||
|
||||
29
.golangci.yml
Normal file
29
.golangci.yml
Normal file
@@ -0,0 +1,29 @@
|
||||
linters:
|
||||
enable:
|
||||
- staticcheck
|
||||
- unused
|
||||
- decorder
|
||||
- errchkjson
|
||||
- exhaustruct
|
||||
- gochecknoinits
|
||||
- gochecksumtype
|
||||
- goconst
|
||||
- gocritic
|
||||
- godox
|
||||
- lll
|
||||
- loggercheck
|
||||
- revive
|
||||
- sloglint
|
||||
- tagalign
|
||||
- unparam
|
||||
|
||||
linters-settings:
|
||||
revive:
|
||||
config: .revive.toml
|
||||
staticcheck:
|
||||
checks:
|
||||
- "SA"
|
||||
|
||||
issues:
|
||||
format: "format: {{.FromLinter}}: {{.Text}}"
|
||||
|
||||
13
.revive.toml
Normal file
13
.revive.toml
Normal file
@@ -0,0 +1,13 @@
|
||||
ignoreGeneratedHeader = true
|
||||
|
||||
[rule.blank-imports]
|
||||
Arguments = [true]
|
||||
|
||||
[[rule]]
|
||||
name = "max-public-identifier-length"
|
||||
arguments = [15] # Maximum length for public identifiers
|
||||
|
||||
[[rule]]
|
||||
name = "max-private-identifier-length"
|
||||
arguments = [15] # Maximum length for private identifiers
|
||||
|
||||
55
Makefile
Normal file
55
Makefile
Normal file
@@ -0,0 +1,55 @@
|
||||
all: server agent
|
||||
|
||||
.phony: view
|
||||
|
||||
view: README.html
|
||||
open $<
|
||||
|
||||
html: README.html
|
||||
|
||||
%.html: %.md
|
||||
pandoc --toc=true --standalone $< --out $@
|
||||
|
||||
# Build the qldbtools container image
|
||||
dbt: mk.client-qldbtools-container
|
||||
mk.client-qldbtools-container:
|
||||
cd client/containers/qldbtools && \
|
||||
docker build -t client-qldbtools-container:0.1.24 .
|
||||
touch $@
|
||||
|
||||
# Run a shell in the container with the qldbtools
|
||||
dbt-run: mk.client-qldbtools-container
|
||||
docker run --rm -it client-qldbtools-container:0.1.24 /bin/bash
|
||||
|
||||
# Run one of the scripts in the container as check
|
||||
dbt-check: mk.client-qldbtools-container
|
||||
docker run --rm -it client-qldbtools-container:0.1.24 mc-db-initial-info
|
||||
|
||||
dbt-push: mk.dbt-push
|
||||
mk.dbt-push: mk.client-qldbtools-container
|
||||
docker tag client-qldbtools-container:0.1.24 ghcr.io/hohn/client-qldbtools-container:0.1.24
|
||||
docker push ghcr.io/hohn/client-qldbtools-container:0.1.24
|
||||
touch $@
|
||||
|
||||
server:
|
||||
cd cmd/server && GOOS=linux GOARCH=arm64 go build
|
||||
|
||||
agent:
|
||||
cd cmd/agent && GOOS=linux GOARCH=arm64 go build
|
||||
|
||||
fullbuild:
|
||||
cd cmd/server && GOOS=linux GOARCH=arm64 go build -a
|
||||
|
||||
sendsubmit:
|
||||
cd tools && sh ./submit-request.curl
|
||||
|
||||
# Requires
|
||||
# go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
|
||||
lint:
|
||||
golangci-lint run cmd/... pkg/...
|
||||
|
||||
deps:
|
||||
godepgraph -maxlevel 4 -nostdlib -i github.com/minio/minio-go ./cmd/server | dot -Tpdf > deps-server.pdf && open deps-server.pdf
|
||||
|
||||
depa:
|
||||
godepgraph -maxlevel 4 -nostdlib -i github.com/minio/minio-go ./cmd/agent | dot -Tpdf > deps-agent.pdf && open deps-agent.pdf
|
||||
73
README.md
73
README.md
@@ -6,6 +6,52 @@ TODO Style notes
|
||||
- NO package init() functions
|
||||
- Dynamic behaviour must be explicit
|
||||
|
||||
|
||||
## Client CodeQL Database Selector
|
||||
Separate from the server's downloading of databases, a client-side interface is needed to generate the `databases.json` file. This
|
||||
|
||||
1. must be usable from the shell
|
||||
2. must be interactive (Python, Jupyter)
|
||||
3. is session based to allow iterations on selection / narrowing
|
||||
4. must be queryable. There is no need to reinvent sql / dataframes
|
||||
|
||||
Python with dataframes is ideal for this; the project is in `client/`.
|
||||
|
||||
## Reverse proxy
|
||||
For testing, replay flows using mitmweb. This is faster and simpler than using
|
||||
gh-mrva or the VS Code plugin.
|
||||
|
||||
- Set up the virtual environment and install tools
|
||||
|
||||
python3.11 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install mitmproxy
|
||||
|
||||
For intercepting requests:
|
||||
|
||||
1. Start mitmproxy to listen on port 8080 and forward requests to port 8081, with
|
||||
web interface
|
||||
|
||||
mitmweb --mode reverse:http://localhost:8081 -p 8080
|
||||
|
||||
1. Change `server` ports in `docker-compose.yml` to
|
||||
|
||||
ports:
|
||||
- "8081:8080" # host:container
|
||||
|
||||
1. Start the containers.
|
||||
|
||||
1. Submit requests.
|
||||
|
||||
3. Save the flows for later replay.
|
||||
|
||||
One such session is in `tools/mitmweb-flows`; it can be loaded to replay the
|
||||
requests:
|
||||
|
||||
1. start `mitmweb --mode reverse:http://localhost:8081 -p 8080`
|
||||
2. `file` > `open` > `tools/mitmweb-flows`
|
||||
3. replay at least the submit, status, and download requests
|
||||
|
||||
## Cross-compile server on host, run it in container
|
||||
These are simple steps using a single container.
|
||||
|
||||
@@ -31,7 +77,10 @@ These are simple steps using a single container.
|
||||
cd /mrva/mrvacommander/cmd/server/ && ./server
|
||||
|
||||
## Using docker-compose
|
||||
### Steps to build and run the server in a multi-container environment set up by docker-compose.
|
||||
### Steps to build and run the server
|
||||
|
||||
Steps to build and run the server in a multi-container environment set up by
|
||||
docker-compose.
|
||||
|
||||
1. Built the server-image, above
|
||||
|
||||
@@ -53,6 +102,28 @@ These are simple steps using a single container.
|
||||
cd /mrva/mrvacommander/cmd/server/
|
||||
./server -loglevel=debug -mode=container
|
||||
|
||||
1. Test server from the host via
|
||||
|
||||
cd ~/work-gh/mrva/mrvacommander/tools
|
||||
sh ./request_16-Jun-2024_11-33-16.curl
|
||||
|
||||
1. Follow server logging via
|
||||
|
||||
cd ~/work-gh/mrva/mrvacommander
|
||||
docker-compose up -d
|
||||
docker-compose logs -f server
|
||||
|
||||
1. Completely rebuild all containers. Useful when running into docker errors
|
||||
|
||||
cd ~/work-gh/mrva/mrvacommander
|
||||
docker-compose up --build
|
||||
|
||||
1. Start the server containers and the desktop/demo containers
|
||||
|
||||
cd ~/work-gh/mrva/mrvacommander/
|
||||
docker-compose down --remove-orphans
|
||||
docker-compose -f docker-compose-demo.yml up -d
|
||||
|
||||
1. Test server via remote client by following the steps in [gh-mrva](https://github.com/hohn/gh-mrva/blob/connection-redirect/README.org#compacted-edit-run-debug-cycle)
|
||||
|
||||
### Some general docker-compose commands
|
||||
|
||||
213
client/Plan.ipynb
Normal file
213
client/Plan.ipynb
Normal file
File diff suppressed because one or more lines are too long
64
client/containers/ghmrva/Dockerfile
Normal file
64
client/containers/ghmrva/Dockerfile
Normal file
@@ -0,0 +1,64 @@
|
||||
# ######################
|
||||
# Use an official Golang image as the base image
|
||||
FROM golang:1.22 AS builder
|
||||
|
||||
# Set the working directory inside the container
|
||||
WORKDIR /work-gh/mrva/gh-mrva
|
||||
|
||||
# Clone the repository
|
||||
RUN git clone https://github.com/hohn/gh-mrva.git . &&\
|
||||
git checkout hohn-0.1.24-demo
|
||||
|
||||
# Download dependencies
|
||||
RUN go mod download
|
||||
|
||||
# Build the Go binary
|
||||
RUN go build .
|
||||
|
||||
# ######################
|
||||
# Provide codeql and java
|
||||
#
|
||||
FROM ubuntu:24.10 as runner
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Build argument for CodeQL version, defaulting to the latest release
|
||||
ARG CODEQL_VERSION=latest
|
||||
|
||||
# Install packages
|
||||
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
|
||||
unzip \
|
||||
curl \
|
||||
ca-certificates \
|
||||
default-jdk
|
||||
|
||||
# If the version is 'latest', get the latest release version from GitHub, unzip
|
||||
# the bundle into /opt, and delete the archive
|
||||
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
||||
CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
|
||||
fi && \
|
||||
echo "Using CodeQL version $CODEQL_VERSION" && \
|
||||
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
|
||||
unzip /tmp/codeql.zip -d /opt && \
|
||||
rm /tmp/codeql.zip && \
|
||||
chmod -R +x /opt/codeql
|
||||
|
||||
# Set environment variables for CodeQL
|
||||
ENV CODEQL_CLI_PATH=/opt/codeql/codeql
|
||||
|
||||
# Set environment variable for CodeQL for `codeql database analyze` support on ARM
|
||||
# This env var has no functional effect on CodeQL when running on x86_64 linux
|
||||
ENV CODEQL_JAVA_HOME=/usr
|
||||
|
||||
# ######################
|
||||
|
||||
# Set the working directory inside the final image
|
||||
WORKDIR /app
|
||||
|
||||
# Copy the binary from the builder stage
|
||||
COPY --from=builder /work-gh/mrva/gh-mrva/gh-mrva /usr/local/bin/gh-mrva
|
||||
|
||||
# Put CodeQL on the PATH
|
||||
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/codeql
|
||||
|
||||
# Run forever
|
||||
CMD ["tail", "-f", "/dev/null"]
|
||||
13
client/containers/ghmrva/Makefile
Normal file
13
client/containers/ghmrva/Makefile
Normal file
@@ -0,0 +1,13 @@
|
||||
ghm: mk.client-ghmrva-container
|
||||
mk.client-ghmrva-container:
|
||||
docker build -t client-ghmrva-container:0.1.24 .
|
||||
touch $@
|
||||
|
||||
ghm-push: mk.ghm-push
|
||||
mk.ghm-push: mk.client-ghmrva-container
|
||||
docker tag client-ghmrva-container:0.1.24 ghcr.io/hohn/client-ghmrva-container:0.1.24
|
||||
docker push ghcr.io/hohn/client-ghmrva-container:0.1.24
|
||||
touch $@
|
||||
|
||||
ghm-run:
|
||||
docker run --rm -it ghcr.io/hohn/client-ghmrva-container:0.1.24 /bin/bash
|
||||
16
client/containers/ghmrva/README.org
Normal file
16
client/containers/ghmrva/README.org
Normal file
@@ -0,0 +1,16 @@
|
||||
* MRVA cli tools container
|
||||
Set up / run:
|
||||
#+BEGIN_SRC sh
|
||||
# Build
|
||||
cd ~/work-gh/mrva/mrvacommander/client/containers/ghmrva/
|
||||
make ghm
|
||||
# Run
|
||||
docker run -ti client-ghmrva-container:0.1.24 /bin/bash
|
||||
|
||||
# In the container
|
||||
gh-mrva -h
|
||||
codeql -h
|
||||
|
||||
# Push
|
||||
make ghm-push
|
||||
#+END_SRC
|
||||
30
client/containers/hepc/Dockerfile
Normal file
30
client/containers/hepc/Dockerfile
Normal file
@@ -0,0 +1,30 @@
|
||||
# Use a Python 3.11 image as the base
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Install git
|
||||
RUN apt-get update && apt-get install -y git
|
||||
|
||||
# Create the required directory structure
|
||||
RUN mkdir -p /work-gh/mrva/
|
||||
|
||||
# Change to the directory and clone the repository
|
||||
WORKDIR /work-gh/mrva/
|
||||
RUN git clone https://github.com/hohn/mrvacommander.git && \
|
||||
cd mrvacommander && \
|
||||
git checkout hohn-0.1.24-demo
|
||||
|
||||
# Change to the client directory
|
||||
WORKDIR /work-gh/mrva/mrvacommander/client/qldbtools/
|
||||
|
||||
# We're in a container, so use pip globally -- no virtual env
|
||||
RUN pip install --upgrade pip
|
||||
|
||||
# Install the required Python packages from requirements.txt
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Install qldbtools
|
||||
RUN pip install .
|
||||
|
||||
# Run forever
|
||||
CMD ["tail", "-f", "/dev/null"]
|
||||
|
||||
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/BentoML-BentoML-ctsj-d6963d.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/BentoML-BentoML-ctsj-d6963d.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/Serial-Studio-Serial-Studio-ctsj-2b2721.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/Serial-Studio-Serial-Studio-ctsj-2b2721.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/UEFITool-UEFITool-ctsj-ee2d3c.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/UEFITool-UEFITool-ctsj-ee2d3c.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/apprise-apprise-ctsj-3f4a4e.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/apprise-apprise-ctsj-3f4a4e.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/attrs-attrs-ctsj-e2c939.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/attrs-attrs-ctsj-e2c939.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sam-cli-aws-sam-cli-ctsj-b7f561.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sam-cli-aws-sam-cli-ctsj-b7f561.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sdk-pandas-aws-sdk-pandas-ctsj-2b7750.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/aws-sdk-pandas-aws-sdk-pandas-ctsj-2b7750.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/behave-behave-ctsj-b297b5.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/behave-behave-ctsj-b297b5.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-01864e.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-01864e.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0189aa.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0189aa.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-035849.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-035849.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-051a5c.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-051a5c.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-099796.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-099796.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a35a1.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a35a1.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a6352.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0a6352.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0c6575.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0c6575.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0cdf2f.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0cdf2f.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d667f.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d667f.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d6cf6.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d6cf6.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d7b69.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-0d7b69.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-103a8a.zip
(Stored with Git LFS)
Normal file
BIN
client/containers/hepc/codeql-dbs/db-collection-mixed/bulk-builder-bulk-builder-ctsj-103a8a.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
@@ -0,0 +1,23 @@
|
||||
{"git_branch": "HEAD", "git_commit_id": "2b41915dac8966e95f9e63638d30769b0d69ad68", "git_repo": "aircrack-ng", "ingestion_datetime_utc": "2024-06-07 16:57:47.683012+00:00", "result_url": "http://hepc/db-collection-py/aircrack-ng-aircrack-ng-ctsj-41ebbe.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.4", "projname": "aircrack-ng/aircrack-ng"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "8b399e9f51701b34f2f3c9375e637e6fffc642b7", "git_repo": "Serial-Studio", "ingestion_datetime_utc": "2023-10-01T15:18:43.503672671Z", "result_url": "http://hepc/db-collection-py/Serial-Studio-Serial-Studio-ctsj-2b2721.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.12.0", "projname": "Serial-Studio/Serial-Studio"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "9a9308fd5477d2a44f4e491d5a712546d4a2b3e4", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-22 13:30:21.681180+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0189aa.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "34412555665923bc07d43ce970e9d81be3795de7", "git_repo": "UEFITool", "ingestion_datetime_utc": "2024-07-04 19:00:38.543297+00:00", "result_url": "http://hepc/db-collection-py/UEFITool-UEFITool-ctsj-ee2d3c.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.6", "projname": "UEFITool/UEFITool"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "00aa56f5257060304d41f09651c6ab58ee6104d6", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-18 14:12:52.904410+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0c6575.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "e4bffa0a7450e1abd9f4df9565728ae18d86cfd2", "git_repo": "attrs", "ingestion_datetime_utc": "2024-07-18 22:34:57.795427+00:00", "result_url": "http://hepc/db-collection-py/attrs-attrs-ctsj-e2c939.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "attrs/attrs"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "9620901afce56f720e856aca600951c9b61a9460", "git_repo": "apprise", "ingestion_datetime_utc": "2024-07-22 22:26:48.720348+00:00", "result_url": "http://hepc/db-collection-py/apprise-apprise-ctsj-3f4a4e.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "apprise/apprise"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "c38e6c8cfba28980aea8f895c71b376e8a5155d5", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2022-04-16T12:45:56.739003883Z", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d6cf6.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.8.5", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "18f6be580b12dc406ef356b2cd65f47c24fce63e", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-19 05:46:23.392157+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d667f.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "a587921bac074b1bd1b0a0a5536587660a9b954e", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-07-19 16:13:39.094478+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0a6352.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-java", "tool_version": "2.18.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "9b361c7ff497d57651856650667aece8230fab6d", "git_repo": "BentoML", "ingestion_datetime_utc": "2024-07-24 02:17:07.095690+00:00", "result_url": "http://hepc/db-collection-py/BentoML-BentoML-ctsj-d6963d.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.18.0", "projname": "BentoML/BentoML"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "8b399e9f51701b34f2f3c9375e637e6fffc642b7", "git_repo": "Serial-Studio", "ingestion_datetime_utc": "2023-10-01T15:18:43.503672671Z", "result_url": "http://hepc/db-collection-py/Serial-Studio-Serial-Studio-ctsj-2b2721.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.12.0", "projname": "Serial-Studio/Serial-Studio"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "53ad2da1a8e6e79e0986ddfa3a45e1db6fdd491c", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-14 02:24:19.208812+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-01864e.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "db8f1a7930c6b5826357646746337dafc983f953", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2023-11-22 01:18:25.079473+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-099796.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.15.2", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "f8df9dd749a549dec20aa286a7639ba04190faab", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-12 16:39:28.854142+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0d7b69.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "b5274976cb0a792d05d541a749c0adcd9d20062d", "git_repo": "behave", "ingestion_datetime_utc": "2024-05-11 19:20:51.916333+00:00", "result_url": "http://hepc/db-collection-py/behave-behave-ctsj-b297b5.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "behave/behave"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "4c825c198df470506b0f84da0b25b3b385150dcb", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-04-25 03:26:03.986270+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-035849.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "a8b8ff0acc6fcc629d08a3a9952f83be56a9a3c3", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-03 13:30:48.829134+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-051a5c.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-java", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "9ef05731e7c6cbad2e897faa7c526558eed3ceaa", "git_repo": "aws-sam-cli", "ingestion_datetime_utc": "2024-05-14 01:03:18.130142+00:00", "result_url": "http://hepc/db-collection-py/aws-sam-cli-aws-sam-cli-ctsj-b7f561.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "aws-sam-cli/aws-sam-cli"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "16865390a653ceaeabe354df1b37e4a775161a70", "git_repo": "aws-sdk-pandas", "ingestion_datetime_utc": "2024-05-13 15:13:31.853042+00:00", "result_url": "http://hepc/db-collection-py/aws-sdk-pandas-aws-sdk-pandas-ctsj-2b7750.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.17.2", "projname": "aws-sdk-pandas/aws-sdk-pandas"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "093856995af0811d3ebbe8c179b8febf4ae706f0", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-03-20 14:18:02.500590+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-103a8a.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.16.4", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "0573e6f96637f08fb4cb85e0552f0622d36827d4", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-01-24 09:21:05.977294+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0cdf2f.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-python", "tool_version": "2.15.5", "projname": "bulk-builder/bulk-builder"}
|
||||
{"git_branch": "HEAD", "git_commit_id": "93314995a5ee2217d58c3d9cbcbdef5df6c34566", "git_repo": "bulk-builder", "ingestion_datetime_utc": "2024-05-09 05:29:25.243273+00:00", "result_url": "http://hepc/db-collection-py/bulk-builder-bulk-builder-ctsj-0a35a1.zip", "tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4", "tool_name": "codeql-cpp", "tool_version": "2.17.0", "projname": "bulk-builder/bulk-builder"}
|
||||
30
client/containers/qldbtools/Dockerfile
Normal file
30
client/containers/qldbtools/Dockerfile
Normal file
@@ -0,0 +1,30 @@
|
||||
# Use a Python 3.11 image as the base
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Install git
|
||||
RUN apt-get update && apt-get install -y git
|
||||
|
||||
# Create the required directory structure
|
||||
RUN mkdir -p /work-gh/mrva/
|
||||
|
||||
# Change to the directory and clone the repository
|
||||
WORKDIR /work-gh/mrva/
|
||||
RUN git clone https://github.com/hohn/mrvacommander.git && \
|
||||
cd mrvacommander && \
|
||||
git checkout hohn-0.1.24-demo
|
||||
|
||||
# Change to the client directory
|
||||
WORKDIR /work-gh/mrva/mrvacommander/client/qldbtools/
|
||||
|
||||
# We're in a container, so use pip globally -- no virtual env
|
||||
RUN pip install --upgrade pip
|
||||
|
||||
# Install the required Python packages from requirements.txt
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Install qldbtools
|
||||
RUN pip install .
|
||||
|
||||
# Run forever
|
||||
CMD ["tail", "-f", "/dev/null"]
|
||||
|
||||
25
client/containers/qldbtools/Makefile
Normal file
25
client/containers/qldbtools/Makefile
Normal file
@@ -0,0 +1,25 @@
|
||||
DBT_TARGET := client-qldbtools-container:0.1.24
|
||||
|
||||
# Build the qldbtools container image
|
||||
dbt: mk.client-qldbtools-container
|
||||
mk.client-qldbtools-container:
|
||||
docker build -t ${DBT_TARGET} .
|
||||
touch $@
|
||||
|
||||
# Run a shell in the container with the qldbtools
|
||||
dbt-run: dbt
|
||||
docker run --rm -it ${DBT_TARGET} /bin/bash
|
||||
|
||||
# Run one of the scripts in the container as check. Should exit with error.
|
||||
dbt-check: dbt
|
||||
docker run --rm -it ${DBT_TARGET} mc-db-initial-info
|
||||
|
||||
dbt-push: mk.dbt-push
|
||||
mk.dbt-push: dbt
|
||||
docker tag ${DBT_TARGET} ghcr.io/hohn/${DBT_TARGET}
|
||||
docker push ghcr.io/hohn/${DBT_TARGET}
|
||||
touch $@
|
||||
|
||||
dbt-test:
|
||||
docker pull ghcr.io/hohn/${DBT_TARGET}
|
||||
docker run --rm -it --name test-dbt-server ghcr.io/hohn/${DBT_TARGET} sh
|
||||
13
client/containers/qldbtools/README.org
Normal file
13
client/containers/qldbtools/README.org
Normal file
@@ -0,0 +1,13 @@
|
||||
* MRVA python tools container
|
||||
Set up Docker image with python 3.11 and pip and the qldbtools. The targets are
|
||||
in the =Makefile=; most important are
|
||||
|
||||
#+BEGIN_SRC sh
|
||||
# Build
|
||||
make dbt
|
||||
|
||||
# Check
|
||||
make dbt-check
|
||||
|
||||
#+END_SRC
|
||||
|
||||
67
client/containers/vscode/Dockerfile
Normal file
67
client/containers/vscode/Dockerfile
Normal file
@@ -0,0 +1,67 @@
|
||||
FROM codercom/code-server:4.92.2-debian
|
||||
|
||||
# ======================
|
||||
# Pre-install a custom JDK for this platform and redirect CodeQL to it
|
||||
|
||||
USER root
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Install packages
|
||||
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
|
||||
ca-certificates \
|
||||
curl \
|
||||
default-jdk \
|
||||
git \
|
||||
libcurl4-openssl-dev \
|
||||
libssl-dev \
|
||||
python3 \
|
||||
python3-dev \
|
||||
unzip
|
||||
|
||||
# Build argument for CodeQL version, defaulting to the latest release
|
||||
ARG CODEQL_VERSION=latest
|
||||
|
||||
# If the version is 'latest', get the latest release version from GitHub, unzip
|
||||
# the bundle into /opt, and delete the archive
|
||||
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
||||
CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
|
||||
fi && \
|
||||
echo "Using CodeQL version $CODEQL_VERSION" && \
|
||||
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
|
||||
unzip /tmp/codeql.zip -d /opt && \
|
||||
rm /tmp/codeql.zip && \
|
||||
chmod -R +x /opt/codeql
|
||||
|
||||
# ======================
|
||||
# Install code-server
|
||||
USER coder
|
||||
|
||||
# Set environment variables
|
||||
ENV PASSWORD mrva
|
||||
|
||||
# Install VS Code extensions as user root -- globally
|
||||
RUN code-server --install-extension ms-python.python \
|
||||
&& code-server --install-extension esbenp.prettier-vscode \
|
||||
&& code-server --install-extension GitHub.vscode-codeql
|
||||
|
||||
# Expose the port that Code Server runs on
|
||||
EXPOSE 9080
|
||||
|
||||
# Point CodeQL to the java binary for this platform
|
||||
ENV CODEQL_JAVA_HOME=/usr
|
||||
|
||||
# Add
|
||||
# codeQl.cli.executablePath
|
||||
# to user settings.
|
||||
# This is in addition to the environment variable CODEQL_JAVA_HOME which has no
|
||||
# effect on the plugin
|
||||
USER root
|
||||
COPY ./settings.json /home/coder/.local/share/code-server/User/
|
||||
RUN chown -R coder:coder /home/coder/.local/share/code-server/
|
||||
|
||||
# Start Code Server
|
||||
ENTRYPOINT ["dumb-init", "code-server", "--bind-addr", "0.0.0.0:9080", "."]
|
||||
|
||||
# Run as the coder user
|
||||
USER coder
|
||||
119
client/containers/vscode/README.org
Normal file
119
client/containers/vscode/README.org
Normal file
@@ -0,0 +1,119 @@
|
||||
* MRVA VS Code server container
|
||||
On the host:
|
||||
|
||||
#+BEGIN_SRC sh
|
||||
# Build the container via
|
||||
cd ~/work-gh/mrva/mrvacommander/client/containers/vscode/
|
||||
docker build -t code-server-initialized:0.1.24 .
|
||||
|
||||
# Run the container in standalone mode via
|
||||
cd ~/work-gh/mrva/mrvacommander/client/containers/vscode/
|
||||
docker run -v ~/work-gh/mrva/vscode-codeql:/work-gh/mrva/vscode-codeql \
|
||||
-d -p 9080:9080 code-server-initialized:0.1.24
|
||||
#+END_SRC
|
||||
|
||||
- Connect to it at http://localhost:9080/?folder=/home/coder, password is =mrva=.
|
||||
|
||||
Inside the container:
|
||||
|
||||
- Setup inside the container
|
||||
#+BEGIN_SRC shell
|
||||
cd
|
||||
export PATH=/opt/codeql:$PATH
|
||||
codeql pack init qldemo
|
||||
cd qldemo
|
||||
codeql pack add codeql/python-all@1.0.6
|
||||
#+END_SRC
|
||||
|
||||
- Create a new file =qldemo/simple.ql= with this query. Open it in VS Code.
|
||||
The plugin will download the CodeQL binaries (but never use them -- the
|
||||
configuration redirects)
|
||||
#+BEGIN_SRC sh
|
||||
cd
|
||||
cat > qldemo/simple.ql <<eof
|
||||
import python
|
||||
select 42
|
||||
eof
|
||||
#+END_SRC
|
||||
|
||||
- Create database.
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/qldemo
|
||||
|
||||
cat > short.py <<EOF
|
||||
print('hello world')
|
||||
EOF
|
||||
export PATH=/opt/codeql:$PATH
|
||||
codeql database create --language=python -s . -v short-db
|
||||
#+END_SRC
|
||||
|
||||
- Set the database as default and run the query =simple.ql=
|
||||
|
||||
- Add the customized VS Code plugin
|
||||
On the host
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/vscode-codeql
|
||||
git checkout mrva-standalone
|
||||
|
||||
# Install nvm
|
||||
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
|
||||
|
||||
# Install correct node version
|
||||
cd ./extensions/ql-vscode
|
||||
nvm install
|
||||
|
||||
# Build the extension
|
||||
cd ~/work-gh/mrva/vscode-codeql/extensions/ql-vscode
|
||||
npm install
|
||||
npm run build
|
||||
#+END_SRC
|
||||
|
||||
In the container
|
||||
#+BEGIN_SRC sh
|
||||
# Install extension
|
||||
cd /work-gh/mrva/vscode-codeql/dist
|
||||
|
||||
/bin/code-server --force --install-extension vscode-codeql-*.vsix
|
||||
#+END_SRC
|
||||
|
||||
- Capture the state of this container and create a new image from it
|
||||
#+BEGIN_SRC sh
|
||||
docker ps
|
||||
# Check id column. Use it below.
|
||||
docker commit 2df5732c1850 code-server-initialized:0.1.24
|
||||
# Keep the sha
|
||||
# sha256:87c8260146e28aed25b094d023a30a015a958f829c09e66cb50ccca2c4a2a000
|
||||
docker kill 2df5732c1850
|
||||
|
||||
# Make sure the image tag matches the sha
|
||||
docker inspect code-server-initialized:0.1.24 |grep Id
|
||||
|
||||
# Run the image and check
|
||||
docker run --rm -d -p 9080:9080 --name test-code-server-codeql \
|
||||
code-server-initialized:0.1.24
|
||||
#+END_SRC
|
||||
Again connect to it at http://localhost:9080/?folder=/home/coder, password is =mrva=.
|
||||
|
||||
- Push this container
|
||||
#+BEGIN_SRC sh
|
||||
# Common
|
||||
export CSI_TARGET=code-server-initialized:0.1.24
|
||||
|
||||
# Push container
|
||||
docker tag ${CSI_TARGET} ghcr.io/hohn/${CSI_TARGET}
|
||||
docker push ghcr.io/hohn/${CSI_TARGET}
|
||||
#+END_SRC
|
||||
|
||||
- Test the registry image
|
||||
#+BEGIN_SRC sh
|
||||
# Test pushed container
|
||||
docker pull ghcr.io/hohn/${CSI_TARGET}
|
||||
docker run --rm -d -p 9080:9080 --name test-code-server-codeql\
|
||||
ghcr.io/hohn/${CSI_TARGET}
|
||||
#+END_SRC
|
||||
In the container, inside the running vs code:
|
||||
- Check the plugin version number via the command
|
||||
: codeql: copy version information
|
||||
|
||||
|
||||
|
||||
4
client/containers/vscode/settings.json
Normal file
4
client/containers/vscode/settings.json
Normal file
@@ -0,0 +1,4 @@
|
||||
{
|
||||
"codeQL.runningQueries.numberOfThreads": 2,
|
||||
"codeQL.cli.executablePath": "/opt/codeql/codeql"
|
||||
}
|
||||
24
client/qldbtools/.vscode/launch.json
vendored
Normal file
24
client/qldbtools/.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "Python Debugger: Current File with Arguments",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal",
|
||||
"args": [
|
||||
"--db_collection_dir",
|
||||
"db-collection-py",
|
||||
"--starting_path",
|
||||
"$HOME/work-gh/mrva/mrva-open-source-download"
|
||||
],
|
||||
"justMyCode": true,
|
||||
"stopOnEntry": false
|
||||
}
|
||||
]
|
||||
}
|
||||
2
client/qldbtools/Makefile
Normal file
2
client/qldbtools/Makefile
Normal file
@@ -0,0 +1,2 @@
|
||||
doc:
|
||||
pandoc -s --css=./gfm.css README.md > foo.html && open foo.html
|
||||
171
client/qldbtools/README.org
Normal file
171
client/qldbtools/README.org
Normal file
@@ -0,0 +1,171 @@
|
||||
* Introduction to hepc -- HTTP End Point for CodeQL
|
||||
#+BEGIN_SRC sh
|
||||
1:$ ./bin/hepc-init --db_collection_dir db-collection --starting_path ~/work-gh/mrva/mrva-open-source-download
|
||||
[2024-11-19 14:12:06] [INFO] searching for db.zip files
|
||||
[2024-11-19 14:12:08] [INFO] collecting information from db.zip files
|
||||
[2024-11-19 14:12:08] [INFO] Extracting from /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/aircrack-ng/aircrack-ng/code-scanning/codeql/databases/cpp/db.zip
|
||||
[2024-11-19 14:12:08] [INFO] Adding record to db-collection/metadata.json
|
||||
#+END_SRC
|
||||
|
||||
* Introduction to qldbtools
|
||||
=qldbtools= is a Python package for selecting sets of CodeQL databases
|
||||
to work on. It uses a (pandas) dataframe in the implementation, but all
|
||||
results sets are available as CSV files to provide flexibility in the
|
||||
tools you want to work with.
|
||||
|
||||
The rationale is simple: When working with larger collections of CodeQL
|
||||
databases, spread over time, languages, etc., many criteria can be used
|
||||
to select the subset of interest. This package addresses that aspect of
|
||||
MRVA (multi repository variant analysis).
|
||||
|
||||
For example, consider this scenario from an enterprise. We have 10,000
|
||||
repositories in C/C++, 5,000 in Python. We build CodeQL dabases weekly
|
||||
and keep the last 2 years worth. This means for the last 2 years there
|
||||
are
|
||||
|
||||
#+begin_example
|
||||
(10000 + 5000) * 52 * 2 = 1560000
|
||||
#+end_example
|
||||
|
||||
databases to select from for a single MRVA run. 1.5 Million rows are
|
||||
readily handled by a pandas (or R) dataframe.
|
||||
|
||||
The full list of criteria currently encoded via the columns is
|
||||
|
||||
- owner
|
||||
- name
|
||||
- CID
|
||||
- cliVersion
|
||||
- creationTime
|
||||
- language
|
||||
- sha -- git commit sha of the code the CodeQL database is built against
|
||||
- baselineLinesOfCode
|
||||
- path
|
||||
- db_lang
|
||||
- db_lang_displayName
|
||||
- db_lang_file_count
|
||||
- db_lang_linesOfCode
|
||||
- ctime
|
||||
- primaryLanguage
|
||||
- finalised
|
||||
- left_index
|
||||
- size
|
||||
|
||||
The minimal criteria needed to distinguish databases in the above
|
||||
scenario are
|
||||
|
||||
- cliVersion
|
||||
- creationTime
|
||||
- language
|
||||
- sha
|
||||
|
||||
These are encoded in the single custom id column 'CID'.
|
||||
|
||||
Thus, a database can be fully specified using a (owner,name,CID) tuple
|
||||
and this is encoded in the names used by the MRVA server and clients.
|
||||
The selection of databases can of course be done using the whole table.
|
||||
|
||||
For an example of the workflow, see [[#command-line-use][section
|
||||
'command line use']].
|
||||
|
||||
A small sample of a full table:
|
||||
|
||||
| | owner | name | CID | cliVersion | creationTime | language | sha | baselineLinesOfCode | path | db_lang | db_lang_displayName | db_lang_file_count | db_lang_linesOfCode | ctime | primaryLanguage | finalised | left_index | size |
|
||||
|---+----------+----------------+--------+------------+----------------------------------+----------+------------------------------------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+-------------+---------------------+--------------------+---------------------+----------------------------+-----------------+-----------+------------+----------|
|
||||
| 0 | 1adrianb | face-alignment | 1f8d99 | 2.16.1 | 2024-02-08 14:18:20.983830+00:00 | python | c94dd024b1f5410ef160ff82a8423141e2bbb6b4 | 1839 | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/1adrianb/face-alignment/code-scanning/codeql/databases/python/db.zip | python | Python | 25 | 1839 | 2024-07-24T14:09:02.187201 | python | 1 | 1454 | 24075001 |
|
||||
| 1 | 2shou | TextGrocery | 9ab87a | 2.12.1 | 2023-02-17T11:32:30.863093193Z | cpp | 8a4e41349a9b0175d9a73bc32a6b2eb6bfb51430 | 3939 | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/2shou/TextGrocery/code-scanning/codeql/databases/cpp/db.zip | no-language | no-language | 0 | -1 | 2024-07-24T06:25:55.347568 | cpp | nan | 1403 | 3612535 |
|
||||
| 2 | 3b1b | manim | 76fdc7 | 2.17.5 | 2024-06-27 17:37:20.587627+00:00 | python | 88c7e9d2c96be1ea729b089c06cabb1bd3b2c187 | 19905 | /Users/hohn/work-gh/mrva/mrva-open-source-download/repos/3b1b/manim/code-scanning/codeql/databases/python/db.zip | python | Python | 94 | 19905 | 2024-07-24T13:23:04.716286 | python | 1 | 1647 | 26407541 |
|
||||
|
||||
** Installation
|
||||
- Set up the virtual environment and install tools
|
||||
|
||||
#+begin_example
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools/
|
||||
python3.11 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install --upgrade pip
|
||||
|
||||
# From requirements.txt
|
||||
pip install -r requirements.txt
|
||||
# Or explicitly
|
||||
pip install jupyterlab pandas ipython
|
||||
pip install lckr-jupyterlab-variableinspector
|
||||
#+end_example
|
||||
|
||||
- Local development
|
||||
|
||||
#+begin_example
|
||||
```bash
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools
|
||||
source venv/bin/activate
|
||||
pip install --editable .
|
||||
```
|
||||
|
||||
The `--editable` *should* use symlinks for all scripts; use `./bin/*` to be sure.
|
||||
#+end_example
|
||||
|
||||
- Full installation
|
||||
|
||||
#+begin_example
|
||||
```bash
|
||||
pip install qldbtools
|
||||
```
|
||||
#+end_example
|
||||
|
||||
** Use as library
|
||||
The best way to examine the code is starting from the high-level scripts
|
||||
in =bin/=.
|
||||
|
||||
** Command line use
|
||||
Initial information collection requires a unique file path so it can be
|
||||
run repeatedly over DB collections with the same (owner,name) but other
|
||||
differences -- namely, in one or more of
|
||||
|
||||
- creationTime
|
||||
- sha
|
||||
- cliVersion
|
||||
- language
|
||||
|
||||
Those fields are collected in =bin/mc-db-refine-info=.
|
||||
|
||||
An example workflow with commands grouped by data files follows.
|
||||
|
||||
#+begin_example
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools && mkdir -p scratch
|
||||
./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > scratch/db-info-1.csv
|
||||
./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
|
||||
|
||||
./bin/mc-db-view-info < scratch/db-info-2.csv &
|
||||
./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
|
||||
./bin/mc-db-view-info < scratch/db-info-3.csv &
|
||||
|
||||
./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
|
||||
./bin/mc-db-generate-selection -n 11 \
|
||||
scratch/vscode-selection.json \
|
||||
scratch/gh-mrva-selection.json \
|
||||
< scratch/db-info-3.csv
|
||||
#+end_example
|
||||
|
||||
To see the full information for a selection, use
|
||||
=mc-rows-from-mrva-list=:
|
||||
|
||||
#+begin_example
|
||||
./bin/mc-rows-from-mrva-list scratch/gh-mrva-selection.json \
|
||||
scratch/db-info-3.csv > scratch/selection-full-info
|
||||
#+end_example
|
||||
|
||||
To check, e.g., the =language= column:
|
||||
|
||||
#+begin_example
|
||||
csvcut -c language scratch/selection-full-info
|
||||
#+end_example
|
||||
|
||||
** Notes
|
||||
The =preview-data= plugin for VS Code has a bug; it displays =0= instead
|
||||
of =0e3379= for the following. There are other entries with similar
|
||||
malfunction.
|
||||
|
||||
#+begin_example
|
||||
CleverRaven,Cataclysm-DDA,0e3379,2.17.0,2024-05-08 12:13:10.038007+00:00,cpp,5ca7f4e59c2d7b0a93fb801a31138477f7b4a761,578098.0,/Users/hohn/work-gh/mrva/mrva-open-source-download/repos-2024-04-29/CleverRaven/Cataclysm-DDA/code-scanning/codeql/databases/cpp/db.zip,cpp,C/C++,1228.0,578098.0,2024-05-13T12:14:54.650648,cpp,True,4245,563435469
|
||||
CleverRaven,Cataclysm-DDA,3231f7,2.18.0,2024-07-18 11:13:01.673231+00:00,cpp,db3435138781937e9e0e999abbaa53f1d3afb5b7,579532.0,/Users/hohn/work-gh/mrva/mrva-open-source-download/repos/CleverRaven/Cataclysm-DDA/code-scanning/codeql/databases/cpp/db.zip,cpp,C/C++,1239.0,579532.0,2024-07-24T02:33:23.900885,cpp,True,1245,573213726
|
||||
#+end_example
|
||||
144
client/qldbtools/bin/hepc-init.sh
Executable file
144
client/qldbtools/bin/hepc-init.sh
Executable file
@@ -0,0 +1,144 @@
|
||||
#!/bin/bash
|
||||
|
||||
#* Utility functions
|
||||
log() {
|
||||
local level="$1"
|
||||
shift
|
||||
local color_reset="\033[0m"
|
||||
local color_info="\033[1;34m"
|
||||
local color_warn="\033[1;33m"
|
||||
local color_error="\033[1;31m"
|
||||
|
||||
local color
|
||||
case "$level" in
|
||||
INFO) color="$color_info" ;;
|
||||
WARN) color="$color_warn" ;;
|
||||
ERROR) color="$color_error" ;;
|
||||
*) color="$color_reset" ;;
|
||||
esac
|
||||
|
||||
echo -e "${color}[$(date +"%Y-%m-%d %H:%M:%S")] [$level] $*${color_reset}" >&2
|
||||
}
|
||||
usage() {
|
||||
echo "Usage: $0 --db_collection_dir <directory> --starting_path <path> [-h]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " --db_collection_dir <directory> Specify the database collection directory."
|
||||
echo " --starting_path <path> Specify the starting path."
|
||||
echo " -h Show this help message."
|
||||
exit 1
|
||||
}
|
||||
|
||||
|
||||
#* Initialize and parse arguments
|
||||
set -euo pipefail # exit on error, unset var, pipefail
|
||||
trap 'rm -fR /tmp/hepc.$$-*' EXIT
|
||||
|
||||
starting_dir=$(pwd)
|
||||
db_collection_dir=""
|
||||
starting_path=""
|
||||
|
||||
# Parse arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--db_collection_dir)
|
||||
shift
|
||||
if [[ -z "$1" || "$1" == -* ]]; then
|
||||
echo "Error: --db_collection_dir requires a directory as an argument."
|
||||
usage
|
||||
fi
|
||||
db_collection_dir="$1"
|
||||
;;
|
||||
--starting_path)
|
||||
shift
|
||||
if [[ -z "$1" || "$1" == -* ]]; then
|
||||
echo "Error: --starting_path requires a path as an argument."
|
||||
usage
|
||||
fi
|
||||
starting_path="$1"
|
||||
;;
|
||||
-h)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unknown option '$1'."
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# Check if required arguments were provided
|
||||
if [[ -z "$db_collection_dir" ]]; then
|
||||
echo "Error: --db_collection_dir is required."
|
||||
usage
|
||||
fi
|
||||
|
||||
if [[ -z "$starting_path" ]]; then
|
||||
echo "Error: --starting_path is required."
|
||||
usage
|
||||
fi
|
||||
|
||||
#* Find all DBs
|
||||
log INFO "searching for db.zip files"
|
||||
find ${starting_path} -type f -name "db.zip" -size +0c > /tmp/hepc.$$-paths
|
||||
|
||||
#* Collect detailed information from the database files
|
||||
# Don't assume they are unique.
|
||||
log INFO "collecting information from db.zip files"
|
||||
mkdir -p $db_collection_dir
|
||||
cat /tmp/hepc.$$-paths | while read -r zip_path
|
||||
do
|
||||
log INFO "Extracting from ${zip_path}"
|
||||
zip_dir=$(dirname ${zip_path})
|
||||
zip_file=$(basename ${zip_path})
|
||||
unzip -o -q ${zip_path} '*codeql-database.yml' -d /tmp/hepc.$$-zip
|
||||
# The content may be LANGUAGE/codeql-database.yml
|
||||
|
||||
#* For every database, create a metadata record.
|
||||
mkdir -p /tmp/hepc.$$-zip
|
||||
cd /tmp/hepc.$$-zip/*
|
||||
|
||||
# Information from codeql-database.yml
|
||||
primaryLanguage=$(yq '.primaryLanguage' codeql-database.yml)
|
||||
sha=$(yq '.creationMetadata.sha' codeql-database.yml)
|
||||
cliVersion=$(yq '.creationMetadata.cliVersion' codeql-database.yml)
|
||||
creationTime=$(yq '.creationMetadata.creationTime' codeql-database.yml)
|
||||
sourceLocationPrefix=$(yq '.sourceLocationPrefix' codeql-database.yml)
|
||||
repo=${sourceLocationPrefix##*/} # keep only last component
|
||||
# Get sourceLocationPrefix[-2]
|
||||
owner="${sourceLocationPrefix%/*}" # strip last component
|
||||
owner="${owner##*/}" # keep only last component
|
||||
|
||||
# cid for repository / db
|
||||
cid=$(echo "${cliVersion} ${creationTime} ${primaryLanguage} ${sha}" | b2sum |\
|
||||
awk '{print substr($1, 1, 6)}')
|
||||
|
||||
# Prepare the metadata record for this DB.
|
||||
new_db_fname="${owner}-${repo}-ctsj-${cid}.zip"
|
||||
result_url="http://hepc/${db_collection_dir}/${new_db_fname}"
|
||||
record='
|
||||
{
|
||||
"git_branch": "HEAD",
|
||||
"git_commit_id": "'${sha}'",
|
||||
"git_repo": "'${repo}'",
|
||||
"ingestion_datetime_utc": "'${creationTime}'",
|
||||
"result_url": "'${result_url}'",
|
||||
"tool_id": "9f2f9642-febb-4435-9204-fb50bbd43de4",
|
||||
"tool_name": "codeql-'${primaryLanguage}'",
|
||||
"tool_version": "'${cliVersion}'",
|
||||
"projname": "'${owner}/${repo}'"
|
||||
}
|
||||
'
|
||||
cd "$starting_dir"
|
||||
rm -fR /tmp/hepc.$$-zip
|
||||
echo "$record" >> $db_collection_dir/metadata.json
|
||||
|
||||
#* Link original file path to collection directory for serving. Use name including
|
||||
# the cid and field separator ctsj
|
||||
cd ${db_collection_dir}
|
||||
[ -L ${new_db_fname} ] || ln -s ${zip_path} ${new_db_fname}
|
||||
|
||||
# Interim cleanup
|
||||
rm -fR "/tmp/hepc.$$-*"
|
||||
done
|
||||
104
client/qldbtools/bin/hepc-serve.go
Executable file
104
client/qldbtools/bin/hepc-serve.go
Executable file
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
dependencies
|
||||
go get -u golang.org/x/exp/slog
|
||||
|
||||
on-the-fly
|
||||
go run bin/hepc-serve.go --codeql-db-dir db-collection-py-1
|
||||
|
||||
compiled
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools/
|
||||
go build -o ./bin/hepc-serve.bin ./bin/hepc-serve.go
|
||||
|
||||
test
|
||||
curl http://127.0.0.1:8080/api/v1/latest_results/codeql-all -o foo
|
||||
curl $(head -1 foo | jq -r ".result_url" |sed 's|hepc|127.0.0.1:8080/db|g;') -o foo.zip
|
||||
|
||||
*/
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"golang.org/x/exp/slog"
|
||||
)
|
||||
|
||||
var dbDir string
|
||||
|
||||
func serveFile(w http.ResponseWriter, r *http.Request) {
|
||||
fullPath := r.URL.Path[len("/db/"):]
|
||||
|
||||
resolvedPath, err := filepath.EvalSymlinks(fullPath)
|
||||
if err != nil {
|
||||
slog.Warn("failed to resolve symlink", slog.String("fullPath", fullPath),
|
||||
slog.String("error", err.Error()))
|
||||
http.Error(w, "File not found", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
if fileInfo, err := os.Stat(resolvedPath); err != nil || fileInfo.IsDir() {
|
||||
slog.Warn("file not found or is a directory", slog.String("resolvedPath", resolvedPath))
|
||||
http.Error(w, "File not found", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
slog.Info("serving file", slog.String("resolvedPath", resolvedPath))
|
||||
http.ServeFile(w, r, resolvedPath)
|
||||
}
|
||||
|
||||
func serveMetadata(w http.ResponseWriter, r *http.Request) {
|
||||
metadataPath := filepath.Join(dbDir, "metadata.json")
|
||||
if fileInfo, err := os.Stat(metadataPath); err != nil || fileInfo.IsDir() {
|
||||
slog.Warn("metadata.json not found", slog.String("metadataPath", metadataPath))
|
||||
http.Error(w, "metadata.json not found", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
slog.Info("serving metadata.json", slog.String("metadataPath", metadataPath))
|
||||
http.ServeFile(w, r, metadataPath)
|
||||
}
|
||||
|
||||
func logMiddleware(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
slog.Info("incoming request", slog.String("method", r.Method), slog.String("url", r.URL.Path))
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
|
||||
func main() {
|
||||
var host string
|
||||
var port int
|
||||
|
||||
flag.StringVar(&dbDir, "codeql-db-dir", "", "Directory containing CodeQL database files (required)")
|
||||
flag.StringVar(&host, "host", "127.0.0.1", "Host address for the HTTP server")
|
||||
flag.IntVar(&port, "port", 8080, "Port for the HTTP server")
|
||||
flag.Parse()
|
||||
|
||||
if dbDir == "" {
|
||||
slog.Error("missing required flag", slog.String("flag", "--codeql-db-dir"))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if _, err := os.Stat(dbDir); os.IsNotExist(err) {
|
||||
slog.Error("invalid directory", slog.String("dbDir", dbDir))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
slog.Info("starting server", slog.String("host", host), slog.Int("port", port), slog.String("dbDir", dbDir))
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/db/", serveFile)
|
||||
mux.HandleFunc("/index", serveMetadata)
|
||||
mux.HandleFunc("/api/v1/latest_results/codeql-all", serveMetadata)
|
||||
|
||||
loggedHandler := logMiddleware(mux)
|
||||
|
||||
addr := fmt.Sprintf("%s:%d", host, port)
|
||||
slog.Info("server listening", slog.String("address", addr))
|
||||
if err := http.ListenAndServe(addr, loggedHandler); err != nil {
|
||||
slog.Error("server error", slog.String("error", err.Error()))
|
||||
}
|
||||
}
|
||||
108
client/qldbtools/bin/mc-db-generate-selection
Executable file
108
client/qldbtools/bin/mc-db-generate-selection
Executable file
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env python
|
||||
""" Read a table of CodeQL DB information
|
||||
and generate the selection files for
|
||||
1. the VS Code CodeQL plugin
|
||||
2. the gh-mrva command-line client
|
||||
"""
|
||||
import argparse
|
||||
import logging
|
||||
from argparse import Namespace
|
||||
from typing import List
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
import qldbtools.utils as utils
|
||||
import numpy as np
|
||||
|
||||
#
|
||||
#* Configure logger
|
||||
#
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
# Overwrite log level set by minio
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser = argparse.ArgumentParser(
|
||||
description=""" Read a table of CodeQL DB information
|
||||
and generate the selection files for
|
||||
1. the VS Code CodeQL plugin
|
||||
2. the gh-mrva command-line client
|
||||
""",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('vscode_selection', type=str,
|
||||
help='VS Code selection file to generate')
|
||||
parser.add_argument('gh_mrva_selection', type=str,
|
||||
help='gh-mrva cli selection file to generate')
|
||||
parser.add_argument('-n', '--num-entries', type=int,
|
||||
help='Only use N entries',
|
||||
default=None)
|
||||
parser.add_argument('-s', '--seed', type=int,
|
||||
help='Random number seed',
|
||||
default=4242)
|
||||
parser.add_argument('-l', '--list-name', type=str,
|
||||
help='Name of the repository list',
|
||||
default='mirva-list')
|
||||
|
||||
args: Namespace = parser.parse_args()
|
||||
#
|
||||
#* Load the information
|
||||
#
|
||||
import pandas as pd
|
||||
import sys
|
||||
|
||||
df0: DataFrame = pd.read_csv(sys.stdin)
|
||||
|
||||
if args.num_entries == None:
|
||||
# Use all entries
|
||||
df1: DataFrame = df0
|
||||
else:
|
||||
# Use num_entries, chosen via pseudo-random numbers
|
||||
df1 = df0.sample(n=args.num_entries,
|
||||
random_state=np.random.RandomState(args.seed))
|
||||
|
||||
#
|
||||
#* Form and save structures
|
||||
#
|
||||
repos: list[str] = []
|
||||
for index, row in df1[['owner', 'name', 'CID', 'path']].iterrows():
|
||||
owner, name, CID, path = row
|
||||
repos.append(utils.form_db_req_name(owner, name, CID))
|
||||
|
||||
repo_list_name: str = args.list_name
|
||||
vsc = {
|
||||
"version": 1,
|
||||
"databases": {
|
||||
"variantAnalysis": {
|
||||
"repositoryLists": [
|
||||
{
|
||||
"name": repo_list_name,
|
||||
"repositories": repos,
|
||||
}
|
||||
],
|
||||
"owners": [],
|
||||
"repositories": []
|
||||
}
|
||||
},
|
||||
"selected": {
|
||||
"kind": "variantAnalysisUserDefinedList",
|
||||
"listName": repo_list_name
|
||||
}
|
||||
}
|
||||
|
||||
gh = {
|
||||
repo_list_name: repos
|
||||
}
|
||||
|
||||
import json
|
||||
with open(args.vscode_selection, "w") as fc:
|
||||
json.dump(vsc, fc, indent=4)
|
||||
|
||||
with open(args.gh_mrva_selection, "w") as fc:
|
||||
json.dump(gh, fc, indent=4)
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
48
client/qldbtools/bin/mc-db-initial-info
Executable file
48
client/qldbtools/bin/mc-db-initial-info
Executable file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python
|
||||
""" Collect information about CodeQL databases from the file system and write out
|
||||
a table in CSV format.
|
||||
"""
|
||||
from argparse import ArgumentParser
|
||||
from typing import List
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
import qldbtools.utils as utils
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
import pandas as pd
|
||||
|
||||
from qldbtools.utils import DBInfo
|
||||
|
||||
#
|
||||
#* Configure logger
|
||||
#
|
||||
logging.basicConfig(format='%(asctime)s %(message)s')
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser: ArgumentParser = argparse.ArgumentParser(
|
||||
description="""Find all CodeQL DBs in and below starting_dir and export a CSV
|
||||
file with relevant data.""")
|
||||
parser.add_argument('starting_dir', type=str,
|
||||
help='The starting directory to search for codeql.')
|
||||
args = parser.parse_args()
|
||||
|
||||
#
|
||||
#* Collect info
|
||||
#
|
||||
# Get the db information in list of DBInfo form
|
||||
db_base: str = args.starting_dir
|
||||
dbs: list[DBInfo] = list(utils.collect_dbs(db_base))
|
||||
dbdf: DataFrame = pd.DataFrame([d.__dict__ for d in dbs])
|
||||
#
|
||||
#
|
||||
#* Write info out
|
||||
#
|
||||
dbdf.to_csv(sys.stdout, index=False)
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
86
client/qldbtools/bin/mc-db-populate-minio
Executable file
86
client/qldbtools/bin/mc-db-populate-minio
Executable file
@@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env python
|
||||
""" Read a table of CodeQL DB information (like those produced by
|
||||
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
|
||||
DB.
|
||||
"""
|
||||
import argparse
|
||||
import qldbtools.utils as utils
|
||||
import logging
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import sys
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
from pathlib import Path
|
||||
#
|
||||
#* Configure logger
|
||||
#
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
# Overwrite log level set by minio
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser = argparse.ArgumentParser(
|
||||
description=""" Read a table of CodeQL DB information (like those produced by
|
||||
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
|
||||
DB. """)
|
||||
parser.add_argument('-n', '--num-entries', type=int,
|
||||
help='Only use N entries',
|
||||
default=None)
|
||||
parser.add_argument('-s', '--seed', type=int,
|
||||
help='Random number seed',
|
||||
default=4242)
|
||||
args = parser.parse_args()
|
||||
#
|
||||
#* Collect the information and select subset
|
||||
#
|
||||
df = pd.read_csv(sys.stdin)
|
||||
if args.num_entries == None:
|
||||
# Use all entries
|
||||
entries = df
|
||||
else:
|
||||
# Use num_entries, chosen via pseudo-random numbers
|
||||
entries = df.sample(n=args.num_entries,
|
||||
random_state=np.random.RandomState(args.seed))
|
||||
#
|
||||
#* Push the DBs
|
||||
#
|
||||
# Configuration
|
||||
MINIO_URL = "http://localhost:9000"
|
||||
MINIO_ROOT_USER = "user"
|
||||
MINIO_ROOT_PASSWORD = "mmusty8432"
|
||||
QL_DB_BUCKET_NAME = "qldb"
|
||||
|
||||
# Initialize MinIO client
|
||||
client = Minio(
|
||||
MINIO_URL.replace("http://", "").replace("https://", ""),
|
||||
access_key=MINIO_ROOT_USER,
|
||||
secret_key=MINIO_ROOT_PASSWORD,
|
||||
secure=False
|
||||
)
|
||||
|
||||
# Create the bucket if it doesn't exist
|
||||
try:
|
||||
if not client.bucket_exists(QL_DB_BUCKET_NAME):
|
||||
client.make_bucket(QL_DB_BUCKET_NAME)
|
||||
else:
|
||||
logging.info(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
|
||||
except S3Error as err:
|
||||
logging.error(f"Error creating bucket: {err}")
|
||||
|
||||
# Get info from dataframe and push the files
|
||||
for index, row in entries[['owner', 'name', 'CID', 'path']].iterrows():
|
||||
owner, name, CID, path = row
|
||||
new_name = utils.form_db_bucket_name(owner, name, CID)
|
||||
try:
|
||||
client.fput_object(QL_DB_BUCKET_NAME, new_name, path)
|
||||
logging.info(f"Uploaded {path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
|
||||
except S3Error as err:
|
||||
logging.error(f"Error uploading file {local_path}: {err}")
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
60
client/qldbtools/bin/mc-db-refine-info
Executable file
60
client/qldbtools/bin/mc-db-refine-info
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env python
|
||||
""" Read an initial table of CodeQL DB information, produced by
|
||||
mc-db-initial-info, and collect more detailed information from the database
|
||||
files. Write out an extended table in CSV format.
|
||||
"""
|
||||
from argparse import ArgumentParser
|
||||
from typing import List
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
import qldbtools.utils as utils
|
||||
import argparse
|
||||
import logging
|
||||
import pandas as pd
|
||||
import sys
|
||||
|
||||
#
|
||||
#* Configure logger
|
||||
#
|
||||
logging.basicConfig(format='%(asctime)s %(message)s')
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser: ArgumentParser = argparse.ArgumentParser(
|
||||
description="""Read an initial table of CodeQL DB information, produced by
|
||||
mc-db-initial-info, and collect more detailed information from the database
|
||||
files. Write out an extended table in CSV format. """)
|
||||
args = parser.parse_args()
|
||||
|
||||
#
|
||||
#* Collect the information
|
||||
# This step is time-intensive so we save the results right after.
|
||||
d: DataFrame = pd.read_csv(sys.stdin)
|
||||
joiners: list[DataFrame] = []
|
||||
for left_index in range(0, len(d)-1):
|
||||
try:
|
||||
metac: object
|
||||
cqlc: object
|
||||
cqlc, metac = utils.extract_metadata(d.path[left_index])
|
||||
except utils.ExtractNotZipfile:
|
||||
continue
|
||||
except utils.ExtractNoCQLDB:
|
||||
continue
|
||||
try:
|
||||
detail_df: DataFrame = utils.metadata_details(left_index, cqlc, metac)
|
||||
except utils.DetailsMissing:
|
||||
continue
|
||||
joiners.append(detail_df)
|
||||
joiners_df: DataFrame = pd.concat(joiners, axis=0)
|
||||
full_df: DataFrame = pd.merge(d, joiners_df, left_index=True, right_on='left_index', how='outer')
|
||||
|
||||
#
|
||||
#* Save results
|
||||
#
|
||||
full_df.to_csv(sys.stdout, index=False)
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
122
client/qldbtools/bin/mc-db-unique
Executable file
122
client/qldbtools/bin/mc-db-unique
Executable file
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env python
|
||||
""" Read a table of CodeQL DB information and produce a table with unique entries
|
||||
adding the Cumulative ID (CID) column.
|
||||
|
||||
To make this happen:
|
||||
- Group entries by (owner,name,CID),
|
||||
sort each group by creationTime,
|
||||
and keep only the top (newest) element.
|
||||
|
||||
- Drop rows that don't have the
|
||||
| cliVersion |
|
||||
| creationTime |
|
||||
| language |
|
||||
| sha |
|
||||
columns. There are very few (16 out of 6000 on recent tests) and their DBs
|
||||
are quesionable.
|
||||
|
||||
"""
|
||||
import argparse
|
||||
import logging
|
||||
from argparse import Namespace
|
||||
from typing import Any
|
||||
|
||||
from pandas import DataFrame, Series
|
||||
|
||||
#
|
||||
#* Configure logger
|
||||
#
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
# Overwrite log level set by minio
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser = argparse.ArgumentParser(
|
||||
description=""" Read a table of CodeQL DB information,
|
||||
narrow to <language>,
|
||||
group entries by (owner,name), sort each group by
|
||||
creationTime and keep only the top (newest) element.
|
||||
""")
|
||||
parser.add_argument('language', type=str,
|
||||
help='The language to be analyzed.')
|
||||
|
||||
args: Namespace = parser.parse_args()
|
||||
#
|
||||
#* Collect the information and select subset
|
||||
#
|
||||
import pandas as pd
|
||||
import sys
|
||||
import qldbtools.utils as utils
|
||||
|
||||
df2: DataFrame = pd.read_csv(sys.stdin)
|
||||
|
||||
#
|
||||
#* Add single uniqueness field -- CID (Cumulative ID)
|
||||
#
|
||||
df2['CID'] = df2.apply(lambda row:
|
||||
utils.cid_hash((
|
||||
row['cliVersion'],
|
||||
row['creationTime'],
|
||||
row['language'],
|
||||
row['sha'],
|
||||
)), axis=1)
|
||||
|
||||
#
|
||||
#* Re-order the dataframe columns by importance
|
||||
# - Much of the data
|
||||
# 1. Is only conditionally present
|
||||
# 2. Is extra info, not for the DB proper
|
||||
# 3. May have various names
|
||||
#
|
||||
# - The essential columns are
|
||||
# | owner |
|
||||
# | name |
|
||||
# | language |
|
||||
# | size |
|
||||
# | cliVersion |
|
||||
# | creationTime |
|
||||
# | sha |
|
||||
# | baselineLinesOfCode |
|
||||
# | path |
|
||||
#
|
||||
# - The rest are useful; put them last
|
||||
# | db_lang |
|
||||
# | db_lang_displayName |
|
||||
# | db_lang_file_count |
|
||||
# | db_lang_linesOfCode |
|
||||
# | left_index |
|
||||
# | ctime |
|
||||
# | primaryLanguage |
|
||||
# | finalised |
|
||||
|
||||
df3: DataFrame = df2.reindex( columns=['owner', 'name', 'cliVersion', 'creationTime',
|
||||
'language', 'sha','CID',
|
||||
'baselineLinesOfCode', 'path', 'db_lang',
|
||||
'db_lang_displayName', 'db_lang_file_count',
|
||||
'db_lang_linesOfCode', 'ctime',
|
||||
'primaryLanguage', 'finalised', 'left_index',
|
||||
'size'])
|
||||
|
||||
# Identify rows missing specific entries
|
||||
rows = ( df3['cliVersion'].isna() |
|
||||
df3['creationTime'].isna() |
|
||||
df3['language'].isna() |
|
||||
df3['sha'].isna() )
|
||||
df4: DataFrame = df3[~rows]
|
||||
|
||||
# Limit to one language
|
||||
df5 = df4[df4['language'] == args.language]
|
||||
|
||||
# Sort and group
|
||||
df_sorted: DataFrame = df5.sort_values(by=['owner', 'name', 'CID', 'creationTime'])
|
||||
df_unique: DataFrame = df_sorted.groupby(['owner', 'name', 'CID']).first().reset_index()
|
||||
|
||||
# Write output
|
||||
df_unique.to_csv(sys.stdout, index=False)
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
35
client/qldbtools/bin/mc-db-view-info
Executable file
35
client/qldbtools/bin/mc-db-view-info
Executable file
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env python
|
||||
""" Read a table of CodeQL DB information and display it using pandasui
|
||||
"""
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
#
|
||||
#* Configure logger
|
||||
#
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
# Overwrite log level set by minio
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Read a table of CodeQL DB information and display it using pandasui")
|
||||
args = parser.parse_args()
|
||||
#
|
||||
#* Collect the information display
|
||||
#
|
||||
import pandas as pd
|
||||
|
||||
df = pd.read_csv(sys.stdin)
|
||||
|
||||
import os
|
||||
os.environ['APPDATA'] = "needed-for-pandasgui"
|
||||
from pandasgui import show
|
||||
show(df)
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
120
client/qldbtools/bin/mc-hepc-init
Executable file
120
client/qldbtools/bin/mc-hepc-init
Executable file
@@ -0,0 +1,120 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import hashlib
|
||||
import yaml
|
||||
import sys
|
||||
from plumbum import cli, local
|
||||
from plumbum.cmd import find, mkdir, ln, rm, mktemp, unzip, date, env
|
||||
|
||||
# Logging function
|
||||
def log(level, message):
|
||||
colors = {
|
||||
"INFO": "\033[1;34m",
|
||||
"WARN": "\033[1;33m",
|
||||
"ERROR": "\033[1;31m",
|
||||
"RESET": "\033[0m",
|
||||
}
|
||||
timestamp = date("+%Y-%m-%d %H:%M:%S").strip()
|
||||
print(f"{colors[level]}[{timestamp}] [{level}] {message}{colors['RESET']}", file=sys.stderr)
|
||||
|
||||
# Generate a CID (cumulative id)
|
||||
def generate_cid(cli_version, creation_time, primary_language, sha):
|
||||
hash_input = f"{cli_version} {creation_time} {primary_language} {sha}".encode()
|
||||
return hashlib.sha256(hash_input).hexdigest()[:6]
|
||||
|
||||
# Expand environment variables in paths
|
||||
def expand_path(path):
|
||||
return local.env.expand(path)
|
||||
|
||||
# Process a single db.zip file
|
||||
def process_db_file(zip_path, db_collection_dir):
|
||||
temp_dir = mktemp("-d").strip()
|
||||
try:
|
||||
unzip("-o", "-q", zip_path, "*codeql-database.yml", "-d", temp_dir)
|
||||
|
||||
# Locate the YAML file regardless of its depth
|
||||
yaml_files = list(local.path(temp_dir).walk(
|
||||
filter=lambda p: p.name == "codeql-database.yml"))
|
||||
if not yaml_files:
|
||||
log("WARN", f"No codeql-database.yml found in {zip_path}")
|
||||
return
|
||||
|
||||
yaml_path = yaml_files[0]
|
||||
with yaml_path.open("r") as f:
|
||||
yaml_data = yaml.safe_load(f)
|
||||
|
||||
primary_language = yaml_data["primaryLanguage"]
|
||||
creation_metadata = yaml_data["creationMetadata"]
|
||||
sha = creation_metadata["sha"]
|
||||
cli_version = creation_metadata["cliVersion"]
|
||||
creation_time = creation_metadata["creationTime"]
|
||||
source_location_prefix = local.path(yaml_data["sourceLocationPrefix"])
|
||||
repo = source_location_prefix.name
|
||||
owner = source_location_prefix.parent.name
|
||||
cid = generate_cid(cli_version, creation_time, primary_language, sha)
|
||||
new_db_fname = f"{owner}-{repo}-ctsj-{cid}.zip"
|
||||
result_url = f"http://hepc/{db_collection_dir}/{new_db_fname}"
|
||||
|
||||
metadata = {
|
||||
"git_branch" : "HEAD",
|
||||
"git_commit_id" : sha,
|
||||
"git_repo" : repo,
|
||||
"ingestion_datetime_utc" : str(creation_time),
|
||||
"result_url" : result_url,
|
||||
"tool_id" : "9f2f9642-febb-4435-9204-fb50bbd43de4",
|
||||
"tool_name" : f"codeql-{primary_language}",
|
||||
"tool_version" : cli_version,
|
||||
"projname" : f"{owner}/{repo}",
|
||||
}
|
||||
|
||||
metadata_file = local.path(db_collection_dir) / "metadata.json"
|
||||
with metadata_file.open("a") as f:
|
||||
json.dump(metadata, f)
|
||||
f.write("\n")
|
||||
|
||||
link_path = local.path(db_collection_dir) / new_db_fname
|
||||
if not link_path.exists():
|
||||
ln("-sf", zip_path, link_path)
|
||||
|
||||
except Exception as e:
|
||||
log("WARN", f"Error processing {zip_path}: {e}")
|
||||
finally:
|
||||
rm("-rf", temp_dir)
|
||||
|
||||
# Main application class
|
||||
class DBProcessor(cli.Application):
|
||||
"""
|
||||
DBProcessor processes db.zip files found in a starting directory,
|
||||
symlinks updated names in a collection directory,
|
||||
and adds a metadata information file "metadata.json" to the directory.
|
||||
"""
|
||||
|
||||
db_collection_dir = cli.SwitchAttr(
|
||||
"--db_collection_dir", str, mandatory=True, help="Specify the database collection directory"
|
||||
)
|
||||
starting_path = cli.SwitchAttr(
|
||||
"--starting_path", str, mandatory=True, help="Specify the starting path"
|
||||
)
|
||||
|
||||
def main(self):
|
||||
db_collection_dir = expand_path(self.db_collection_dir)
|
||||
starting_path = expand_path(self.starting_path)
|
||||
|
||||
mkdir("-p", db_collection_dir)
|
||||
log("INFO", f"Searching for db.zip files in {starting_path}")
|
||||
|
||||
db_files = find(starting_path, "-type", "f", "-name", "db.zip",
|
||||
"-size", "+0c").splitlines()
|
||||
|
||||
if not db_files:
|
||||
log("WARN", "No db.zip files found in the specified starting path.")
|
||||
return
|
||||
|
||||
for zip_path in db_files:
|
||||
process_db_file(zip_path, db_collection_dir)
|
||||
|
||||
log("INFO", "Processing completed.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
DBProcessor.run()
|
||||
89
client/qldbtools/bin/mc-hepc-serve
Executable file
89
client/qldbtools/bin/mc-hepc-serve
Executable file
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env python3
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from plumbum import cli
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
import uvicorn
|
||||
|
||||
# Logging configuration
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
handlers=[logging.StreamHandler()]
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# FastAPI application
|
||||
app = FastAPI()
|
||||
db_dir = None # This will be set by the CLI application
|
||||
|
||||
@app.get("/db/{file_path:path}")
|
||||
def serve_file(file_path: str):
|
||||
"""
|
||||
Serve files from the database directory, such as .zip files or metadata.json.
|
||||
"""
|
||||
logger.info(f"Requested file: {file_path}")
|
||||
# Resolve symlink
|
||||
resolved_path = Path(file_path).resolve(strict=True)
|
||||
logger.info(f"file resolved to: {resolved_path}")
|
||||
if not resolved_path.exists():
|
||||
logger.error(f"File not found: {resolved_path}")
|
||||
raise HTTPException(status_code=404, detail=f"{resolved_path} not found")
|
||||
return FileResponse(resolved_path)
|
||||
|
||||
|
||||
@app.get("/index")
|
||||
@app.get("/api/v1/latest_results/codeql-all")
|
||||
def serve_metadata_json():
|
||||
"""
|
||||
Serve the metadata.json file for multiple routes.
|
||||
"""
|
||||
metadata_path = Path(db_dir) / "metadata.json"
|
||||
logger.info(f"Requested metadata.json at: {metadata_path}")
|
||||
if not metadata_path.exists():
|
||||
logger.error("metadata.json not found.")
|
||||
raise HTTPException(status_code=404, detail="metadata.json not found")
|
||||
logger.info(f"Serving metadata.json from: {metadata_path}")
|
||||
return FileResponse(metadata_path)
|
||||
|
||||
@app.middleware("http")
|
||||
async def log_request(request, call_next):
|
||||
logger.info(f"Incoming request: {request.method} {request.url}")
|
||||
response = await call_next(request)
|
||||
return response
|
||||
|
||||
class DBService(cli.Application):
|
||||
"""
|
||||
DBService serves:
|
||||
1. CodeQL database .zip files symlinked in the --codeql-db-dir
|
||||
2. Metadata for those zip files, contained in metadata.json in the same
|
||||
directory.
|
||||
The HTTP endpoints are:
|
||||
1. /db/{filename}
|
||||
2. /index
|
||||
3. /api/v1/latest_results/codeql-all
|
||||
"""
|
||||
|
||||
codeql_db_dir = cli.SwitchAttr("--codeql-db-dir", str, mandatory=True,
|
||||
help="Directory containing CodeQL database files")
|
||||
host = cli.SwitchAttr("--host", str, default="127.0.0.1",
|
||||
help="Host address for the HTTP server")
|
||||
port = cli.SwitchAttr("--port", int, default=8080, help="Port for the HTTP server")
|
||||
|
||||
def main(self):
|
||||
global db_dir
|
||||
db_dir = Path(self.codeql_db_dir)
|
||||
if not db_dir.is_dir():
|
||||
logger.error(f"Invalid directory: {db_dir}")
|
||||
return 1
|
||||
|
||||
logger.info(f"Starting server at {self.host}:{self.port}")
|
||||
logger.info(f"Serving files from directory: {db_dir}")
|
||||
|
||||
# Run the FastAPI server using Uvicorn
|
||||
uvicorn.run(app, host=self.host, port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
DBService.run()
|
||||
67
client/qldbtools/bin/mc-rows-from-mrva-list
Executable file
67
client/qldbtools/bin/mc-rows-from-mrva-list
Executable file
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Script to list full details for a mrva-list file
|
||||
|
||||
1. reads files containing
|
||||
{
|
||||
"mirva-list": [
|
||||
"NLPchina/elasticsearch-sqlctsj168cc4",
|
||||
"LMAX-Exchange/disruptorctsj3e75ec",
|
||||
"justauth/JustAuthctsj8a6177",
|
||||
"FasterXML/jackson-modules-basectsj2fe248",
|
||||
"ionic-team/capacitor-pluginsctsj38d457",
|
||||
"PaddlePaddle/PaddleOCRctsj60e555",
|
||||
"elastic/apm-agent-pythonctsj21dc64",
|
||||
"flipkart-incubator/zjsonpatchctsjc4db35",
|
||||
"stephane/libmodbusctsj54237e",
|
||||
"wso2/carbon-kernelctsj5a8a6e",
|
||||
"apache/servicecomb-packctsj4d98f5"
|
||||
]
|
||||
}
|
||||
2. reads a pandas dataframe stored in a csv file
|
||||
3. selects all rows from 2. that
|
||||
- contain the 'owner' column matching the string before the slash from 1. and
|
||||
- the 'name' column matching the string between the slash and the marker
|
||||
'ctsj' and
|
||||
- the 'CID' column matching the string after the marker 'ctsj'
|
||||
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser = argparse.ArgumentParser(
|
||||
description="""Script to list full details for a mrva-list file""")
|
||||
parser.add_argument('mrva_list', type=str,
|
||||
help='The JSON file containing the mrva-list')
|
||||
parser.add_argument('info_csv', type=str,
|
||||
help='The CSV file containing the full information')
|
||||
args = parser.parse_args()
|
||||
|
||||
#* Step 1: Read the JSON file containing the "mirva-list"
|
||||
with open(args.mrva_list, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Extract and parse the "mirva-list"
|
||||
mirva_list = data['mirva-list']
|
||||
parsed_mirva_list = []
|
||||
for item in mirva_list:
|
||||
owner_name = item.split('/')[0]
|
||||
repo_name = item.split('/')[1].split('ctsj')[0]
|
||||
cid = item.split('/')[1].split('ctsj')[1]
|
||||
parsed_mirva_list.append((owner_name, repo_name, cid))
|
||||
|
||||
#* Step 2: Read the CSV file into a pandas dataframe
|
||||
import pandas as pd
|
||||
df = pd.read_csv(args.info_csv)
|
||||
|
||||
#* Step 3: Filter the dataframe based on the parsed "mirva-list"
|
||||
filtered_df = df[
|
||||
df.apply(lambda row:
|
||||
(row['owner'], row['name'], row['CID']) in parsed_mirva_list, axis=1)]
|
||||
|
||||
# Optionally, you can save the filtered dataframe to a new CSV file
|
||||
filtered_df.to_csv(sys.stdout, index=False)
|
||||
1021
client/qldbtools/gfm.css
Normal file
1021
client/qldbtools/gfm.css
Normal file
File diff suppressed because it is too large
Load Diff
11
client/qldbtools/qldbtools.code-workspace
Normal file
11
client/qldbtools/qldbtools.code-workspace
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"folders": [
|
||||
{
|
||||
"path": "."
|
||||
}
|
||||
],
|
||||
"settings": {
|
||||
"git.ignoreLimitWarning": true,
|
||||
"makefile.configureOnOpen": false
|
||||
}
|
||||
}
|
||||
2
client/qldbtools/qldbtools/__init__.py
Normal file
2
client/qldbtools/qldbtools/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
from . import utils
|
||||
|
||||
205
client/qldbtools/qldbtools/utils.py
Normal file
205
client/qldbtools/qldbtools/utils.py
Normal file
@@ -0,0 +1,205 @@
|
||||
""" This module supports the selection of CodeQL databases based on various
|
||||
criteria.
|
||||
"""
|
||||
#* Imports
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import List, Dict, Any
|
||||
|
||||
import pandas as pd
|
||||
import time
|
||||
import yaml
|
||||
import zipfile
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
#* Setup
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s [%(levelname)s] %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
|
||||
#* Utility functions
|
||||
def log_and_raise(message):
|
||||
logging.error(message)
|
||||
raise Exception(message)
|
||||
|
||||
def log_and_raise_e(message, exception):
|
||||
logging.error(message)
|
||||
raise exception(message)
|
||||
|
||||
def traverse_tree(root: str) -> Path:
|
||||
root_path = Path(os.path.expanduser(root))
|
||||
if not root_path.exists() or not root_path.is_dir():
|
||||
log_and_raise(f"The specified root path '{root}' does not exist or "
|
||||
"is not a directory.")
|
||||
for path in root_path.rglob('*'):
|
||||
if path.is_file():
|
||||
yield path
|
||||
elif path.is_dir():
|
||||
pass
|
||||
|
||||
@dataclass
|
||||
class DBInfo:
|
||||
ctime : str = '2024-05-13T12:04:01.593586'
|
||||
language : str = 'cpp'
|
||||
name : str = 'nanobind'
|
||||
owner : str = 'wjakob'
|
||||
path : Path = Path('/Users/.../db.zip')
|
||||
size : int = 63083064
|
||||
|
||||
|
||||
def collect_dbs(db_base: str) -> DBInfo:
|
||||
for path in traverse_tree(db_base):
|
||||
if path.name == "db.zip":
|
||||
# For the current repository, we have
|
||||
# In [292]: len(path.parts)
|
||||
# Out[292]: 14
|
||||
# and can work from the end to get relevant info from the file path.
|
||||
db = DBInfo()
|
||||
(*_, db.owner, db.name, _, _, _, db.language, _) = path.parts
|
||||
db.path = path
|
||||
s = path.stat()
|
||||
db.size = s.st_size
|
||||
# db.ctime_raw = s.st_ctime
|
||||
# db.ctime = time.ctime(s.st_ctime)
|
||||
db.ctime = datetime.datetime.fromtimestamp(s.st_ctime).isoformat()
|
||||
yield db
|
||||
|
||||
|
||||
def extract_metadata(zipfile_path: str) -> tuple[object,object]:
|
||||
"""
|
||||
extract_metadata(zipfile)
|
||||
|
||||
Unzip zipfile into memory and return the contents of the files
|
||||
codeql-database.yml and baseline-info.json that it contains in a tuple
|
||||
"""
|
||||
codeql_content = None
|
||||
meta_content = None
|
||||
try:
|
||||
with zipfile.ZipFile(zipfile_path, 'r') as z:
|
||||
for file_info in z.infolist():
|
||||
# Filenames seen
|
||||
# java/codeql-database.yml
|
||||
# codeql_db/codeql-database.yml
|
||||
if file_info.filename.endswith('codeql-database.yml'):
|
||||
with z.open(file_info) as f:
|
||||
codeql_content = yaml.safe_load(f)
|
||||
# And
|
||||
# java/baseline-info.json
|
||||
# codeql_db/baseline-info.json
|
||||
elif file_info.filename.endswith('baseline-info.json'):
|
||||
with z.open(file_info) as f:
|
||||
meta_content = json.load(f)
|
||||
except zipfile.BadZipFile:
|
||||
log_and_raise_e(f"Not a zipfile: '{zipfile_path}'", ExtractNotZipfile)
|
||||
# The baseline-info is only available in more recent CodeQL versions
|
||||
if not meta_content:
|
||||
meta_content = {'languages':
|
||||
{'no-language': {'displayName': 'no-language',
|
||||
'files': [],
|
||||
'linesOfCode': -1,
|
||||
'name': 'nolang'},
|
||||
}}
|
||||
|
||||
if not codeql_content:
|
||||
log_and_raise_e(f"Not a zipfile: '{zipfile_path}'", ExtractNoCQLDB)
|
||||
return codeql_content, meta_content
|
||||
|
||||
class ExtractNotZipfile(Exception): pass
|
||||
class ExtractNoCQLDB(Exception): pass
|
||||
|
||||
def metadata_details(left_index: int, codeql_content: object, meta_content: object) -> pd.DataFrame:
|
||||
"""
|
||||
metadata_details(codeql_content, meta_content)
|
||||
|
||||
Extract the details from metadata that will be used in DB selection and return a
|
||||
dataframe with the information. Example, cropped to fit:
|
||||
|
||||
full_df.T
|
||||
Out[535]:
|
||||
0 1
|
||||
left_index 0 0
|
||||
baselineLinesOfCode 17990 17990
|
||||
primaryLanguage cpp cpp
|
||||
sha 288920efc079766f4 282c20efc079766f4
|
||||
cliVersion 2.17.0 2.17.0
|
||||
creationTime .325253+00:00 51.325253+00:00
|
||||
finalised True True
|
||||
db_lang cpp python
|
||||
db_lang_displayName C/C++ Python
|
||||
db_lang_file_count 102 27
|
||||
db_lang_linesOfCode 17990 5586
|
||||
"""
|
||||
cqlc, metac = codeql_content, meta_content
|
||||
d = {'left_index': left_index,
|
||||
'baselineLinesOfCode': cqlc['baselineLinesOfCode'],
|
||||
'primaryLanguage': cqlc['primaryLanguage'],
|
||||
'sha': cqlc['creationMetadata'].get('sha', 'abcde0123'),
|
||||
'cliVersion': cqlc['creationMetadata']['cliVersion'],
|
||||
'creationTime': cqlc['creationMetadata']['creationTime'],
|
||||
'finalised': cqlc.get('finalised', pd.NA),
|
||||
}
|
||||
f = pd.DataFrame(d, index=[0])
|
||||
joiners: list[dict[str, int | Any]] = []
|
||||
if not ('languages' in metac):
|
||||
log_and_raise_e("Missing 'languages' in metadata", DetailsMissing)
|
||||
for lang, lang_cont in metac['languages'].items():
|
||||
d1: dict[str, int | Any] = { 'left_index' : left_index,
|
||||
'db_lang': lang }
|
||||
for prop, val in lang_cont.items():
|
||||
if prop == 'files':
|
||||
d1['db_lang_file_count'] = len(val)
|
||||
elif prop == 'linesOfCode':
|
||||
d1['db_lang_linesOfCode'] = val
|
||||
elif prop == 'displayName':
|
||||
d1['db_lang_displayName'] = val
|
||||
joiners.append(d1)
|
||||
fj: DataFrame = pd.DataFrame(joiners)
|
||||
full_df: DataFrame = pd.merge(f, fj, on='left_index', how='outer')
|
||||
return full_df
|
||||
|
||||
class DetailsMissing(Exception): pass
|
||||
|
||||
from hashlib import blake2b
|
||||
|
||||
def cid_hash(row_tuple: tuple):
|
||||
"""
|
||||
cid_hash(row_tuple)
|
||||
Take a bytes object and return hash as hex string
|
||||
"""
|
||||
h = blake2b(digest_size = 3)
|
||||
h.update(str(row_tuple).encode())
|
||||
# return int.from_bytes(h.digest(), byteorder='big')
|
||||
return h.hexdigest()
|
||||
|
||||
def form_db_bucket_name(owner, name, CID):
|
||||
"""
|
||||
form_db_bucket_name(owner, name, CID)
|
||||
Return the name to use in minio storage; this function is trivial and used to
|
||||
enforce consistent naming.
|
||||
|
||||
The 'ctsj' prefix is a random, unique key to identify the information.
|
||||
"""
|
||||
return f'{owner}${name}ctsj{CID}.zip'
|
||||
|
||||
def form_db_req_name(owner: str, name: str, CID: str) -> str:
|
||||
"""
|
||||
form_db_req_name(owner, name, CID)
|
||||
Return the name to use in mrva requests; this function is trivial and used to
|
||||
enforce consistent naming.
|
||||
|
||||
The 'ctsj' prefix is a random, unique key to identify the information.
|
||||
"""
|
||||
return f'{owner}/{name}ctsj{CID}'
|
||||
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
109
client/qldbtools/requirements.txt
Normal file
109
client/qldbtools/requirements.txt
Normal file
@@ -0,0 +1,109 @@
|
||||
annotated-types==0.7.0
|
||||
anyio==4.4.0
|
||||
appnope==0.1.4
|
||||
argon2-cffi==23.1.0
|
||||
argon2-cffi-bindings==21.2.0
|
||||
arrow==1.3.0
|
||||
asttokens==2.4.1
|
||||
async-lru==2.0.4
|
||||
attrs==24.2.0
|
||||
babel==2.16.0
|
||||
beautifulsoup4==4.12.3
|
||||
bleach==6.1.0
|
||||
blinker==1.9.0
|
||||
certifi==2024.7.4
|
||||
cffi==1.17.0
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
comm==0.2.2
|
||||
debugpy==1.8.5
|
||||
decorator==5.1.1
|
||||
defusedxml==0.7.1
|
||||
executing==2.0.1
|
||||
fastapi==0.115.5
|
||||
fastjsonschema==2.20.0
|
||||
Flask==3.1.0
|
||||
fqdn==1.5.1
|
||||
h11==0.14.0
|
||||
httpcore==1.0.5
|
||||
httpx==0.27.0
|
||||
idna==3.7
|
||||
ipykernel==6.29.5
|
||||
ipython==8.26.0
|
||||
isoduration==20.11.0
|
||||
itsdangerous==2.2.0
|
||||
jedi==0.19.1
|
||||
Jinja2==3.1.4
|
||||
json5==0.9.25
|
||||
jsonpointer==3.0.0
|
||||
jsonschema==4.23.0
|
||||
jsonschema-specifications==2023.12.1
|
||||
jupyter-events==0.10.0
|
||||
jupyter-lsp==2.2.5
|
||||
jupyter_client==8.6.2
|
||||
jupyter_core==5.7.2
|
||||
jupyter_server==2.14.2
|
||||
jupyter_server_terminals==0.5.3
|
||||
jupyterlab==4.2.4
|
||||
jupyterlab_pygments==0.3.0
|
||||
jupyterlab_server==2.27.3
|
||||
MarkupSafe==2.1.5
|
||||
matplotlib-inline==0.1.7
|
||||
minio==7.2.8
|
||||
mistune==3.0.2
|
||||
nbclient==0.10.0
|
||||
nbconvert==7.16.4
|
||||
nbformat==5.10.4
|
||||
nest-asyncio==1.6.0
|
||||
notebook_shim==0.2.4
|
||||
numpy==2.1.0
|
||||
overrides==7.7.0
|
||||
packaging==24.1
|
||||
pandas==2.2.2
|
||||
pandocfilters==1.5.1
|
||||
parso==0.8.4
|
||||
pexpect==4.9.0
|
||||
platformdirs==4.2.2
|
||||
plumbum==1.9.0
|
||||
prometheus_client==0.20.0
|
||||
prompt_toolkit==3.0.47
|
||||
psutil==6.0.0
|
||||
ptyprocess==0.7.0
|
||||
pure_eval==0.2.3
|
||||
pycparser==2.22
|
||||
pycryptodome==3.20.0
|
||||
pydantic==2.10.2
|
||||
pydantic_core==2.27.1
|
||||
Pygments==2.18.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-json-logger==2.0.7
|
||||
pytz==2024.1
|
||||
PyYAML==6.0.2
|
||||
pyzmq==26.1.1
|
||||
referencing==0.35.1
|
||||
requests==2.32.3
|
||||
rfc3339-validator==0.1.4
|
||||
rfc3986-validator==0.1.1
|
||||
rpds-py==0.20.0
|
||||
Send2Trash==1.8.3
|
||||
setuptools==75.5.0
|
||||
six==1.16.0
|
||||
sniffio==1.3.1
|
||||
soupsieve==2.6
|
||||
stack-data==0.6.3
|
||||
starlette==0.41.3
|
||||
terminado==0.18.1
|
||||
tinycss2==1.3.0
|
||||
tornado==6.4.1
|
||||
traitlets==5.14.3
|
||||
types-python-dateutil==2.9.0.20240821
|
||||
typing_extensions==4.12.2
|
||||
tzdata==2024.1
|
||||
uri-template==1.3.0
|
||||
urllib3==2.2.2
|
||||
uvicorn==0.32.1
|
||||
wcwidth==0.2.13
|
||||
webcolors==24.8.0
|
||||
webencodings==0.5.1
|
||||
websocket-client==1.8.0
|
||||
Werkzeug==3.1.3
|
||||
61
client/qldbtools/session/db-generate-selection.py
Normal file
61
client/qldbtools/session/db-generate-selection.py
Normal file
@@ -0,0 +1,61 @@
|
||||
""" Read a table of CodeQL DB information
|
||||
and generate the selection files for
|
||||
1. the VS Code CodeQL plugin
|
||||
2. the gh-mrva command-line client
|
||||
"""
|
||||
#
|
||||
#* Collect the information and write files
|
||||
#
|
||||
import pandas as pd
|
||||
import sys
|
||||
import qldbtools.utils as utils
|
||||
import numpy as np
|
||||
import importlib
|
||||
importlib.reload(utils)
|
||||
|
||||
df0 = pd.read_csv('scratch/db-info-3.csv')
|
||||
|
||||
# Use num_entries, chosen via pseudo-random numbers
|
||||
df1 = df0.sample(n=3, random_state=np.random.RandomState(4242))
|
||||
|
||||
repos = []
|
||||
for index, row in df1[['owner', 'name', 'CID', 'path']].iterrows():
|
||||
owner, name, CID, path = row
|
||||
repos.append(utils.form_db_req_name(owner, name, CID))
|
||||
|
||||
repo_list_name = "mirva-list"
|
||||
vsc = {
|
||||
"version": 1,
|
||||
"databases": {
|
||||
"variantAnalysis": {
|
||||
"repositoryLists": [
|
||||
{
|
||||
"name": repo_list_name,
|
||||
"repositories": repos,
|
||||
}
|
||||
],
|
||||
"owners": [],
|
||||
"repositories": []
|
||||
}
|
||||
},
|
||||
"selected": {
|
||||
"kind": "variantAnalysisUserDefinedList",
|
||||
"listName": repo_list_name
|
||||
}
|
||||
}
|
||||
|
||||
gh = {
|
||||
repo_list_name: repos
|
||||
}
|
||||
|
||||
|
||||
# write the files
|
||||
import json
|
||||
with open("tmp-selection-vsc.json", "w") as fc:
|
||||
json.dump(vsc, fc, indent=4)
|
||||
with open("tmp-selection-gh.json", "w") as fc:
|
||||
json.dump(gh, fc, indent=4)
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
59
client/qldbtools/session/db-initial-info.py
Normal file
59
client/qldbtools/session/db-initial-info.py
Normal file
@@ -0,0 +1,59 @@
|
||||
#* Experimental work with utils.py, to be merged into it.
|
||||
# The rest of this interactive script is available as cli script in
|
||||
# mc-db-initial-info
|
||||
from utils import *
|
||||
|
||||
#* Data collection
|
||||
# Get the db information in list of DBInfo form
|
||||
db_base = "~/work-gh/mrva/mrva-open-source-download/"
|
||||
dbs = list(collect_dbs(db_base))
|
||||
|
||||
# Inspect:
|
||||
from pprint import pprint
|
||||
pprint(["len", len(dbs)])
|
||||
pprint(["dbs[0]", dbs[0].__dict__])
|
||||
pprint(["dbs[-1]", dbs[-1].__dict__])
|
||||
#
|
||||
# Get a dataframe
|
||||
dbdf = pd.DataFrame([d.__dict__ for d in dbs])
|
||||
#
|
||||
#* Experiments with on-disk format
|
||||
# Continue use of raw information in separate session.
|
||||
#
|
||||
# PosixPath is a problem for json and parquet
|
||||
#
|
||||
dbdf['path'] = dbdf['path'].astype(str)
|
||||
#
|
||||
dbdf.to_csv('dbdf.csv')
|
||||
#
|
||||
dbdf.to_csv('dbdf.csv.gz', compression='gzip', index=False)
|
||||
#
|
||||
dbdf.to_json('dbdf.json')
|
||||
#
|
||||
# dbdf.to_hdf('dbdf.h5', key='dbdf', mode='w')
|
||||
#
|
||||
# fast, binary
|
||||
dbdf.to_parquet('dbdf.parquet')
|
||||
#
|
||||
# fast
|
||||
import sqlite3
|
||||
conn = sqlite3.connect('dbdf.db')
|
||||
dbdf.to_sql('qldbs', conn, if_exists='replace', index=False)
|
||||
conn.close()
|
||||
#
|
||||
# Sizes:
|
||||
# ls -laSr dbdf.*
|
||||
# -rw-r--r--@ 1 hohn staff 101390 Jul 12 14:17 dbdf.csv.gz
|
||||
# -rw-r--r--@ 1 hohn staff 202712 Jul 12 14:17 dbdf.parquet
|
||||
# -rw-r--r--@ 1 hohn staff 560623 Jul 12 14:17 dbdf.csv
|
||||
# -rw-r--r--@ 1 hohn staff 610304 Jul 12 14:17 dbdf.db
|
||||
# -rw-r--r--@ 1 hohn staff 735097 Jul 12 14:17 dbdf.json
|
||||
#
|
||||
# parquet has many libraries, including go: xitongsys/parquet-go
|
||||
# https://parquet.apache.org/
|
||||
#
|
||||
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
65
client/qldbtools/session/db-populate-minio.py
Normal file
65
client/qldbtools/session/db-populate-minio.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import qldbtools.utils as utils
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import sys
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
from pathlib import Path
|
||||
|
||||
#
|
||||
#* Collect the information and select subset
|
||||
#
|
||||
df = pd.read_csv('scratch/db-info-2.csv')
|
||||
seed = 4242
|
||||
if 0:
|
||||
# Use all entries
|
||||
entries = df
|
||||
else:
|
||||
# Use num_entries, chosen via pseudo-random numbers
|
||||
entries = df.sample(n=3,
|
||||
random_state=np.random.RandomState(seed))
|
||||
#
|
||||
#* Push the DBs
|
||||
#
|
||||
# Configuration
|
||||
MINIO_URL = "http://localhost:9000"
|
||||
MINIO_ROOT_USER = "user"
|
||||
MINIO_ROOT_PASSWORD = "mmusty8432"
|
||||
QL_DB_BUCKET_NAME = "qldb"
|
||||
|
||||
# Initialize MinIO client
|
||||
client = Minio(
|
||||
MINIO_URL.replace("http://", "").replace("https://", ""),
|
||||
access_key=MINIO_ROOT_USER,
|
||||
secret_key=MINIO_ROOT_PASSWORD,
|
||||
secure=False
|
||||
)
|
||||
|
||||
# Create the bucket if it doesn't exist
|
||||
try:
|
||||
if not client.bucket_exists(QL_DB_BUCKET_NAME):
|
||||
client.make_bucket(QL_DB_BUCKET_NAME)
|
||||
else:
|
||||
print(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
|
||||
except S3Error as err:
|
||||
print(f"Error creating bucket: {err}")
|
||||
|
||||
# (test) File paths and new names
|
||||
files_to_upload = {
|
||||
"cmd/server/codeql/dbs/google/flatbuffers/google_flatbuffers_db.zip": "google$flatbuffers.zip",
|
||||
"cmd/server/codeql/dbs/psycopg/psycopg2/psycopg_psycopg2_db.zip": "psycopg$psycopg2.zip"
|
||||
}
|
||||
|
||||
# (test) Push the files
|
||||
prefix = Path('/Users/hohn/work-gh/mrva/mrvacommander')
|
||||
for local_path, new_name in files_to_upload.items():
|
||||
try:
|
||||
client.fput_object(QL_DB_BUCKET_NAME, new_name, prefix / Path(local_path))
|
||||
print(f"Uploaded {local_path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
|
||||
except S3Error as err:
|
||||
print(f"Error uploading file {local_path}: {err}")
|
||||
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
46
client/qldbtools/session/db-post-refine-info.py
Normal file
46
client/qldbtools/session/db-post-refine-info.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# Session around bin/mc-db-unique
|
||||
import qldbtools.utils as utils
|
||||
import pandas as pd
|
||||
|
||||
#
|
||||
#* Collect the information
|
||||
#
|
||||
df1 = pd.read_csv("scratch/db-info-2.csv")
|
||||
|
||||
# Add single uniqueness field -- CID (Cumulative ID) -- using
|
||||
# - creationTime
|
||||
# - sha
|
||||
# - cliVersion
|
||||
# - language
|
||||
|
||||
from hashlib import blake2b
|
||||
|
||||
def cid_hash(row_tuple: tuple):
|
||||
"""
|
||||
cid_hash(row_tuple)
|
||||
Take a bytes object and return hash as hex string
|
||||
"""
|
||||
h = blake2b(digest_size = 3)
|
||||
h.update(str(row_tuple).encode())
|
||||
# return int.from_bytes(h.digest(), byteorder='big')
|
||||
return h.hexdigest()
|
||||
|
||||
# Apply the cid_hash function to the specified columns and create the 'CID' column
|
||||
df1['CID'] = df1.apply(lambda row: cid_hash( (row['creationTime'],
|
||||
row['sha'],
|
||||
row['cliVersion'],
|
||||
row['language'])
|
||||
), axis=1)
|
||||
|
||||
df2 = df1.reindex(columns=['owner', 'name', 'cliVersion', 'creationTime',
|
||||
'language', 'sha','CID', 'baselineLinesOfCode', 'path',
|
||||
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
|
||||
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
|
||||
'finalised', 'left_index', 'size'])
|
||||
|
||||
df1['cid']
|
||||
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
118
client/qldbtools/session/db-refine-info.py
Normal file
118
client/qldbtools/session/db-refine-info.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# Experimental work be merged with bin/mc-db-refine-info
|
||||
from utils import *
|
||||
from pprint import pprint
|
||||
|
||||
#* Reload gzipped CSV file to continue work
|
||||
dbdf_1 = pd.read_csv('dbdf.csv.gz', compression='gzip')
|
||||
#
|
||||
# (old) Consistency check:
|
||||
# dbdf_1.columns == dbdf.columns
|
||||
# dbmask = (dbdf_1 != dbdf)
|
||||
# dbdf_1[dbmask]
|
||||
# dbdf_1[dbmask].dropna(how='all')
|
||||
# ctime_raw is different in places, so don't use it.
|
||||
|
||||
#
|
||||
#* Interact with/visualize the dataframe
|
||||
# Using pandasgui -- qt
|
||||
from pandasgui import show
|
||||
os.environ['APPDATA'] = "needed-for-pandasgui"
|
||||
show(dbdf_1)
|
||||
# Using dtale -- web
|
||||
import dtale
|
||||
dtale.show(dbdf_1)
|
||||
#
|
||||
|
||||
#
|
||||
#* Collect metadata from DB zip files
|
||||
#
|
||||
#** A manual sample
|
||||
#
|
||||
d = dbdf_1
|
||||
left_index = 0
|
||||
d.path[0]
|
||||
cqlc, metac = extract_metadata(d.path[0])
|
||||
|
||||
cqlc['baselineLinesOfCode']
|
||||
cqlc['primaryLanguage']
|
||||
cqlc['creationMetadata']['sha']
|
||||
cqlc['creationMetadata']['cliVersion']
|
||||
cqlc['creationMetadata']['creationTime'].isoformat()
|
||||
cqlc['finalised']
|
||||
|
||||
for lang, lang_cont in metac['languages'].items():
|
||||
print(lang)
|
||||
indent = " "
|
||||
for prop, val in lang_cont.items():
|
||||
if prop == 'files':
|
||||
print("%sfiles count %d" % (indent, len(val)))
|
||||
elif prop == 'linesOfCode':
|
||||
print("%slinesOfCode %d" % (indent, val))
|
||||
elif prop == 'displayName':
|
||||
print("%sdisplayName %s" % (indent, val))
|
||||
|
||||
#** Automated for all entries
|
||||
# The rest of this interactive script is available as cli script in
|
||||
# mc-db-refine-info
|
||||
d = dbdf_1
|
||||
joiners = []
|
||||
for left_index in range(0, len(d)-1):
|
||||
try:
|
||||
cqlc, metac = extract_metadata(d.path[left_index])
|
||||
except ExtractNotZipfile:
|
||||
continue
|
||||
except ExtractNoCQLDB:
|
||||
continue
|
||||
try:
|
||||
detail_df = metadata_details(left_index, cqlc, metac)
|
||||
except DetailsMissing:
|
||||
continue
|
||||
joiners.append(detail_df)
|
||||
joiners_df = pd.concat(joiners, axis=0)
|
||||
full_df = pd.merge(d, joiners_df, left_index=True, right_on='left_index', how='outer')
|
||||
|
||||
#** View the full dataframe with metadata
|
||||
from pandasgui import show
|
||||
os.environ['APPDATA'] = "needed-for-pandasgui"
|
||||
show(full_df)
|
||||
|
||||
#** Re-order the dataframe columns by importance
|
||||
# - Much of the data
|
||||
# 1. Is only conditionally present
|
||||
# 2. Is extra info, not for the DB proper
|
||||
# 3. May have various names
|
||||
|
||||
# - The essential columns are
|
||||
# | owner |
|
||||
# | name |
|
||||
# | language |
|
||||
# | size |
|
||||
# | cliVersion |
|
||||
# | creationTime |
|
||||
# | sha |
|
||||
# | baselineLinesOfCode |
|
||||
# | path |
|
||||
|
||||
# - The rest are useful; put them last
|
||||
# | db_lang |
|
||||
# | db_lang_displayName |
|
||||
# | db_lang_file_count |
|
||||
# | db_lang_linesOfCode |
|
||||
# | left_index |
|
||||
# | ctime |
|
||||
# | primaryLanguage |
|
||||
# | finalised |
|
||||
|
||||
final_df = full_df.reindex(columns=['owner', 'name', 'language', 'size', 'cliVersion',
|
||||
'creationTime', 'sha', 'baselineLinesOfCode', 'path',
|
||||
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
|
||||
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
|
||||
'finalised', 'left_index'])
|
||||
|
||||
final_df.to_csv('all-info-table.csv.gz', compression='gzip', index=False)
|
||||
|
||||
#
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
#
|
||||
41
client/qldbtools/session/db-unique-1.py
Normal file
41
client/qldbtools/session/db-unique-1.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# Experimental work for ../bin/mc-db-unique, to be merged into it.
|
||||
import qldbtools.utils as utils
|
||||
from pprint import pprint
|
||||
import pandas as pd
|
||||
# cd ../
|
||||
|
||||
#* Reload CSV file to continue work
|
||||
df2 = df_refined = pd.read_csv('scratch/db-info-2.csv')
|
||||
|
||||
# Identify rows missing specific entries
|
||||
rows = ( df2['cliVersion'].isna() |
|
||||
df2['creationTime'].isna() |
|
||||
df2['language'].isna() |
|
||||
df2['sha'].isna() )
|
||||
df2[rows]
|
||||
df3 = df2[~rows]
|
||||
df3
|
||||
|
||||
#* post-save work
|
||||
df4 = pd.read_csv('scratch/db-info-3.csv')
|
||||
|
||||
# Sort and group
|
||||
df_sorted = df4.sort_values(by=['owner', 'name', 'CID', 'creationTime'])
|
||||
df_unique = df_sorted.groupby(['owner', 'name', 'CID']).first().reset_index()
|
||||
|
||||
# Find duplicates
|
||||
df_dups = df_unique[df_unique['CID'].duplicated(keep=False)]
|
||||
len(df_dups)
|
||||
df_dups['CID']
|
||||
|
||||
# Set display options
|
||||
pd.set_option('display.max_colwidth', None)
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.width', 140)
|
||||
|
||||
|
||||
#
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
#
|
||||
46
client/qldbtools/session/db-unique.py
Normal file
46
client/qldbtools/session/db-unique.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# Session around bin/mc-db-unique
|
||||
import qldbtools.utils as utils
|
||||
import pandas as pd
|
||||
|
||||
#
|
||||
#* Collect the information
|
||||
#
|
||||
df1 = pd.read_csv("scratch/db-info-2.csv")
|
||||
|
||||
# Add single uniqueness field -- CID (Cumulative ID) -- using
|
||||
# - creationTime
|
||||
# - sha
|
||||
# - cliVersion
|
||||
# - language
|
||||
|
||||
from hashlib import blake2b
|
||||
|
||||
def cid_hash(row_tuple: tuple):
|
||||
"""
|
||||
cid_hash(row_tuple)
|
||||
Take a bytes object and return hash as hex string
|
||||
"""
|
||||
h = blake2b(digest_size = 3)
|
||||
h.update(str(row_tuple).encode())
|
||||
# return int.from_bytes(h.digest(), byteorder='big')
|
||||
return h.hexdigest()
|
||||
|
||||
# Apply the cid_hash function to the specified columns and create the 'CID' column
|
||||
df1['CID'] = df1.apply(lambda row: cid_hash( (row['creationTime'],
|
||||
row['sha'],
|
||||
row['cliVersion'],
|
||||
row['language'])
|
||||
), axis=1)
|
||||
|
||||
df2 = df1.reindex(columns=['owner', 'name', 'cliVersion', 'creationTime',
|
||||
'language', 'sha','CID', 'baselineLinesOfCode', 'path',
|
||||
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
|
||||
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
|
||||
'finalised', 'left_index', 'size'])
|
||||
|
||||
df1['cid']
|
||||
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
13
client/qldbtools/setup.py
Normal file
13
client/qldbtools/setup.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from setuptools import setup, find_packages
|
||||
import glob
|
||||
|
||||
setup(
|
||||
name='qldbtools',
|
||||
version='0.1.0',
|
||||
description='A Python package for working with CodeQL databases',
|
||||
author='Michael Hohn',
|
||||
author_email='hohn@github.com',
|
||||
packages=['qldbtools'],
|
||||
install_requires=[],
|
||||
scripts=glob.glob("bin/mc-*"),
|
||||
)
|
||||
@@ -23,7 +23,8 @@ ARG CODEQL_VERSION=latest
|
||||
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
|
||||
unzip \
|
||||
curl \
|
||||
ca-certificates
|
||||
ca-certificates \
|
||||
default-jdk
|
||||
|
||||
# If the version is 'latest', lsget the latest release version from GitHub, unzip the bundle into /opt, and delete the archive
|
||||
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
||||
@@ -32,14 +33,15 @@ RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
||||
echo "Using CodeQL version $CODEQL_VERSION" && \
|
||||
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
|
||||
unzip /tmp/codeql.zip -d /opt && \
|
||||
rm /tmp/codeql.zip
|
||||
rm /tmp/codeql.zip && \
|
||||
chmod -R +x /opt/codeql
|
||||
|
||||
# Set environment variables for CodeQL
|
||||
ENV CODEQL_CLI_PATH=/opt/codeql
|
||||
ENV CODEQL_CLI_PATH=/opt/codeql/codeql
|
||||
|
||||
# Set environment variable for CodeQL for `codeql database analyze` support on ARM
|
||||
# This env var has no functional effect on CodeQL when running on x86_64 linux
|
||||
ENV CODEQL_JAVA_HOME=/usr/
|
||||
ENV CODEQL_JAVA_HOME=/usr
|
||||
|
||||
# Copy built agent binary from the builder stage
|
||||
WORKDIR /app
|
||||
|
||||
23
cmd/agent/Makefile
Normal file
23
cmd/agent/Makefile
Normal file
@@ -0,0 +1,23 @@
|
||||
all: mrva-agent
|
||||
|
||||
MAI_TARGET := mrva-agent:0.1.24
|
||||
mai: mk.mrva-agent
|
||||
mrva-agent: mk.mrva-agent
|
||||
mk.mrva-agent:
|
||||
cd ../../ && docker build -t mrva-agent:0.1.24 -f cmd/agent/Dockerfile .
|
||||
touch $@
|
||||
|
||||
mai-serve: mai
|
||||
docker run --rm -it ${MAI_TARGET} /bin/bash
|
||||
|
||||
clean:
|
||||
-docker rmi -f ${MAI_TARGET}
|
||||
-rm mrva-agent
|
||||
|
||||
mai-push: mk.mai-push
|
||||
mk.mai-push: mai
|
||||
docker tag ${MAI_TARGET} ghcr.io/hohn/${MAI_TARGET}
|
||||
docker push ghcr.io/hohn/${MAI_TARGET}
|
||||
touch $@
|
||||
|
||||
|
||||
@@ -3,170 +3,71 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"log"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/elastic/go-sysinfo"
|
||||
"golang.org/x/exp/slog"
|
||||
|
||||
"mrvacommander/pkg/agent"
|
||||
"mrvacommander/pkg/queue"
|
||||
"mrvacommander/pkg/deploy"
|
||||
)
|
||||
|
||||
const (
|
||||
workerMemoryMB = 2048 // 2 GB
|
||||
monitorIntervalSec = 10 // Monitor every 10 seconds
|
||||
)
|
||||
|
||||
func calculateWorkers() int {
|
||||
host, err := sysinfo.Host()
|
||||
if err != nil {
|
||||
slog.Error("failed to get host info", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
memInfo, err := host.Memory()
|
||||
if err != nil {
|
||||
slog.Error("failed to get memory info", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Get available memory in MB
|
||||
totalMemoryMB := memInfo.Available / (1024 * 1024)
|
||||
|
||||
// Ensure we have at least one worker
|
||||
workers := int(totalMemoryMB / workerMemoryMB)
|
||||
if workers < 1 {
|
||||
workers = 1
|
||||
}
|
||||
|
||||
// Limit the number of workers to the number of CPUs
|
||||
cpuCount := runtime.NumCPU()
|
||||
if workers > cpuCount {
|
||||
workers = max(cpuCount, 1)
|
||||
}
|
||||
|
||||
return workers
|
||||
}
|
||||
|
||||
func startAndMonitorWorkers(ctx context.Context, queue queue.Queue, desiredWorkerCount int, wg *sync.WaitGroup) {
|
||||
currentWorkerCount := 0
|
||||
stopChans := make([]chan struct{}, 0)
|
||||
|
||||
if desiredWorkerCount != 0 {
|
||||
slog.Info("Starting workers", slog.Int("count", desiredWorkerCount))
|
||||
for i := 0; i < desiredWorkerCount; i++ {
|
||||
stopChan := make(chan struct{})
|
||||
stopChans = append(stopChans, stopChan)
|
||||
wg.Add(1)
|
||||
go agent.RunWorker(ctx, stopChan, queue, wg)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
slog.Info("Worker count not specified, managing based on available memory and CPU")
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// signal all workers to stop
|
||||
for _, stopChan := range stopChans {
|
||||
close(stopChan)
|
||||
}
|
||||
return
|
||||
default:
|
||||
newWorkerCount := calculateWorkers()
|
||||
|
||||
if newWorkerCount != currentWorkerCount {
|
||||
slog.Info(
|
||||
"Modifying worker count",
|
||||
slog.Int("current", currentWorkerCount),
|
||||
slog.Int("new", newWorkerCount))
|
||||
}
|
||||
|
||||
if newWorkerCount > currentWorkerCount {
|
||||
for i := currentWorkerCount; i < newWorkerCount; i++ {
|
||||
stopChan := make(chan struct{})
|
||||
stopChans = append(stopChans, stopChan)
|
||||
wg.Add(1)
|
||||
go agent.RunWorker(ctx, stopChan, queue, wg)
|
||||
}
|
||||
} else if newWorkerCount < currentWorkerCount {
|
||||
for i := newWorkerCount; i < currentWorkerCount; i++ {
|
||||
close(stopChans[i])
|
||||
}
|
||||
stopChans = stopChans[:newWorkerCount]
|
||||
}
|
||||
currentWorkerCount = newWorkerCount
|
||||
|
||||
time.Sleep(monitorIntervalSec * time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
slog.Info("Starting agent")
|
||||
|
||||
workerCount := flag.Int("workers", 0, "number of workers")
|
||||
logLevel := flag.String("loglevel", "info", "Set log level: debug, info, warn, error")
|
||||
flag.Parse()
|
||||
|
||||
requiredEnvVars := []string{
|
||||
"MRVA_RABBITMQ_HOST",
|
||||
"MRVA_RABBITMQ_PORT",
|
||||
"MRVA_RABBITMQ_USER",
|
||||
"MRVA_RABBITMQ_PASSWORD",
|
||||
"CODEQL_JAVA_HOME",
|
||||
"CODEQL_CLI_PATH",
|
||||
}
|
||||
|
||||
for _, envVar := range requiredEnvVars {
|
||||
if _, ok := os.LookupEnv(envVar); !ok {
|
||||
slog.Error("Missing required environment variable", "key", envVar)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
rmqHost := os.Getenv("MRVA_RABBITMQ_HOST")
|
||||
rmqPort := os.Getenv("MRVA_RABBITMQ_PORT")
|
||||
rmqUser := os.Getenv("MRVA_RABBITMQ_USER")
|
||||
rmqPass := os.Getenv("MRVA_RABBITMQ_PASSWORD")
|
||||
|
||||
rmqPortAsInt, err := strconv.ParseInt(rmqPort, 10, 16)
|
||||
if err != nil {
|
||||
slog.Error("Failed to parse RabbitMQ port", slog.Any("error", err))
|
||||
// Apply 'loglevel' flag
|
||||
switch *logLevel {
|
||||
case "debug":
|
||||
slog.SetLogLoggerLevel(slog.LevelDebug)
|
||||
case "info":
|
||||
slog.SetLogLoggerLevel(slog.LevelInfo)
|
||||
case "warn":
|
||||
slog.SetLogLoggerLevel(slog.LevelWarn)
|
||||
case "error":
|
||||
slog.SetLogLoggerLevel(slog.LevelError)
|
||||
default:
|
||||
log.Printf("Invalid logging verbosity level: %s", *logLevel)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
slog.Info("Initializing RabbitMQ queue")
|
||||
isAgent := true
|
||||
|
||||
rabbitMQQueue, err := queue.InitializeRabbitMQQueue(rmqHost, int16(rmqPortAsInt), rmqUser, rmqPass, false)
|
||||
rabbitMQQueue, err := deploy.InitRabbitMQ(isAgent)
|
||||
if err != nil {
|
||||
slog.Error("failed to initialize RabbitMQ", slog.Any("error", err))
|
||||
slog.Error("Failed to initialize RabbitMQ", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
defer rabbitMQQueue.Close()
|
||||
|
||||
artifacts, err := deploy.InitMinIOArtifactStore()
|
||||
if err != nil {
|
||||
slog.Error("Failed to initialize artifact store", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
databases, err := deploy.InitMinIOCodeQLDatabaseStore()
|
||||
if err != nil {
|
||||
slog.Error("Failed to initialize database store", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
go startAndMonitorWorkers(ctx, rabbitMQQueue, *workerCount, &wg)
|
||||
|
||||
go agent.StartAndMonitorWorkers(ctx, artifacts, databases, rabbitMQQueue, *workerCount, &wg)
|
||||
slog.Info("Agent started")
|
||||
|
||||
// Gracefully exit on SIGINT/SIGTERM
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
<-sigChan
|
||||
slog.Info("Shutting down agent")
|
||||
|
||||
// TODO: fix this to gracefully terminate agent workers during jobs
|
||||
slog.Info("Shutting down agent")
|
||||
cancel()
|
||||
wg.Wait()
|
||||
|
||||
slog.Info("Agent shutdown complete")
|
||||
}
|
||||
|
||||
@@ -1,38 +1,56 @@
|
||||
# Use the ubuntu 22.04 base image
|
||||
FROM ubuntu:24.10
|
||||
FROM golang:1.22 AS builder
|
||||
|
||||
# Set architecture to arm64
|
||||
ARG ARCH=arm64
|
||||
ARG AARCH=aarch64
|
||||
# Copy the entire project
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
|
||||
# Set environment variables
|
||||
# Download dependencies
|
||||
RUN go mod download
|
||||
|
||||
# Set the working directory to the cmd/server subproject
|
||||
WORKDIR /app/cmd/server
|
||||
|
||||
# Build the server
|
||||
RUN go build -o /bin/mrva_server ./main.go
|
||||
|
||||
FROM ubuntu:24.10 as runner
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV CODEQL_VERSION=codeql-bundle-v2.17.5
|
||||
ENV CODEQL_DOWNLOAD_URL=https://github.com/github/codeql-action/releases/download/${CODEQL_VERSION}/codeql-bundle-linux64.tar.gz
|
||||
ENV JDK_VERSION=22.0.1
|
||||
ENV JDK_DOWNLOAD_URL=https://download.oracle.com/java/21/latest/jdk-${JDK_VERSION}_linux-${AARCH}_bin.tar.gz
|
||||
ENV JDK_DOWNLOAD_URL=https://download.java.net/java/GA/jdk${JDK_VERSION}/c7ec1332f7bb44aeba2eb341ae18aca4/8/GPL/openjdk-${JDK_VERSION}_linux-${AARCH}_bin.tar.gz
|
||||
|
||||
ENV CODEQL_JAVA_HOME=/usr/local/jdk-${JDK_VERSION}
|
||||
# Build argument for CodeQL version, defaulting to the latest release
|
||||
ARG CODEQL_VERSION=latest
|
||||
|
||||
# Install necessary tools
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl tar && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
# Install packages
|
||||
RUN apt-get update && apt-get install --no-install-recommends --assume-yes \
|
||||
unzip \
|
||||
curl \
|
||||
ca-certificates \
|
||||
default-jdk
|
||||
|
||||
# Add and extract the CodeQL bundle
|
||||
RUN curl -L $CODEQL_DOWNLOAD_URL -o /tmp/${CODEQL_VERSION}.tar.gz && \
|
||||
tar -xzf /tmp/${CODEQL_VERSION}.tar.gz -C /opt && \
|
||||
rm /tmp/${CODEQL_VERSION}.tar.gz
|
||||
# If the version is 'latest', lsget the latest release version from GitHub, unzip the bundle into /opt, and delete the archive
|
||||
RUN if [ "$CODEQL_VERSION" = "latest" ]; then \
|
||||
CODEQL_VERSION=$(curl -s https://api.github.com/repos/github/codeql-cli-binaries/releases/latest | grep '"tag_name"' | sed -E 's/.*"([^"]+)".*/\1/'); \
|
||||
fi && \
|
||||
echo "Using CodeQL version $CODEQL_VERSION" && \
|
||||
curl -L "https://github.com/github/codeql-cli-binaries/releases/download/$CODEQL_VERSION/codeql-linux64.zip" -o /tmp/codeql.zip && \
|
||||
unzip /tmp/codeql.zip -d /opt && \
|
||||
rm /tmp/codeql.zip && \
|
||||
chmod -R +x /opt/codeql
|
||||
|
||||
# Add and extract the JDK
|
||||
RUN curl -L $JDK_DOWNLOAD_URL -o /tmp/jdk-${JDK_VERSION}.tar.gz && \
|
||||
tar -xzf /tmp/jdk-${JDK_VERSION}.tar.gz -C /usr/local && \
|
||||
rm /tmp/jdk-${JDK_VERSION}.tar.gz
|
||||
# Set environment variables for CodeQL
|
||||
ENV CODEQL_CLI_PATH=/opt/codeql/codeql
|
||||
|
||||
# Set PATH
|
||||
ENV PATH=/opt/codeql:"$PATH"
|
||||
# Set environment variable for CodeQL for `codeql database analyze` support on ARM
|
||||
# This env var has no functional effect on CodeQL when running on x86_64 linux
|
||||
ENV CODEQL_JAVA_HOME=/usr
|
||||
|
||||
# Prepare host mount point
|
||||
RUN mkdir /mrva
|
||||
# Set working directory to /app
|
||||
|
||||
# Copy built server binary from the builder stage
|
||||
COPY --from=builder /bin/mrva_server ./mrva_server
|
||||
|
||||
# Copy the CodeQL database directory from the builder stage (for standalone mode)
|
||||
COPY --from=builder /app/cmd/server/codeql ./codeql
|
||||
|
||||
# Run the server with the default mode set to container
|
||||
ENTRYPOINT ["./mrva_server"]
|
||||
CMD ["--mode=container"]
|
||||
26
cmd/server/Makefile
Normal file
26
cmd/server/Makefile
Normal file
@@ -0,0 +1,26 @@
|
||||
all: mrva-server
|
||||
|
||||
MSI_TARGET := mrva-server:0.1.24
|
||||
msi: mk.mrva-server
|
||||
mrva-server: mk.mrva-server
|
||||
mk.mrva-server:
|
||||
cd ../../ && docker build -t mrva-server:0.1.24 -f cmd/server/Dockerfile .
|
||||
touch $@
|
||||
|
||||
msi-serve: msi
|
||||
docker run --rm -it ${MSI_TARGET} /bin/bash
|
||||
|
||||
clean:
|
||||
-docker rmi -f ${MSI_TARGET}
|
||||
-rm mrva-server
|
||||
|
||||
msi-push: mk.msi-push
|
||||
mk.msi-push: mk.mrva-server
|
||||
docker tag ${MSI_TARGET} ghcr.io/hohn/${MSI_TARGET}
|
||||
docker push ghcr.io/hohn/${MSI_TARGET}
|
||||
touch $@
|
||||
|
||||
msi-test:
|
||||
docker pull ghcr.io/hohn/${MSI_TARGET}
|
||||
docker run --rm -it --name test-mrva-server-codeql ghcr.io/hohn/${MSI_TARGET} sh
|
||||
|
||||
@@ -4,20 +4,25 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"log"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
"mrvacommander/config/mcc"
|
||||
|
||||
"mrvacommander/pkg/agent"
|
||||
"mrvacommander/pkg/logger"
|
||||
"mrvacommander/pkg/artifactstore"
|
||||
"mrvacommander/pkg/deploy"
|
||||
"mrvacommander/pkg/qldbstore"
|
||||
"mrvacommander/pkg/qpstore"
|
||||
"mrvacommander/pkg/queue"
|
||||
"mrvacommander/pkg/server"
|
||||
"mrvacommander/pkg/storage"
|
||||
"mrvacommander/pkg/state"
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -25,13 +30,14 @@ func main() {
|
||||
helpFlag := flag.Bool("help", false, "Display help message")
|
||||
logLevel := flag.String("loglevel", "info", "Set log level: debug, info, warn, error")
|
||||
mode := flag.String("mode", "standalone", "Set mode: standalone, container, cluster")
|
||||
dbPathRoot := flag.String("dbpath", "", "Set the root path for the database store if using standalone mode.")
|
||||
|
||||
// Custom usage function for the help flag
|
||||
flag.Usage = func() {
|
||||
log.Printf("Usage of %s:\n", os.Args[0])
|
||||
flag.PrintDefaults()
|
||||
log.Println("\nExamples:")
|
||||
log.Println(" go run main.go --loglevel=debug --mode=container")
|
||||
log.Println("go run main.go --loglevel=debug --mode=container --dbpath=/path/to/db_dir")
|
||||
}
|
||||
|
||||
// Parse the flags
|
||||
@@ -58,6 +64,20 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Process database root if standalone and not provided
|
||||
if *mode == "standalone" && *dbPathRoot == "" {
|
||||
slog.Warn("No database root path provided.")
|
||||
// Current directory of the Executable has a codeql directory. There.
|
||||
// Resolve the absolute directory based on os.Executable()
|
||||
execPath, err := os.Executable()
|
||||
if err != nil {
|
||||
slog.Error("Failed to get executable path", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
*dbPathRoot = filepath.Dir(execPath) + "/codeql/dbs/"
|
||||
slog.Info("Using default database root path", "dbPathRoot", *dbPathRoot)
|
||||
}
|
||||
|
||||
// Read configuration
|
||||
config := mcc.LoadConfig("mcconfig.toml")
|
||||
|
||||
@@ -66,91 +86,73 @@ func main() {
|
||||
log.Printf("Log Level: %s\n", *logLevel)
|
||||
log.Printf("Mode: %s\n", *mode)
|
||||
|
||||
// Handle signals
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
// Apply 'mode' flag
|
||||
switch *mode {
|
||||
case "standalone":
|
||||
// Assemble single-process version
|
||||
|
||||
sl := logger.NewLoggerSingle(&logger.Visibles{})
|
||||
|
||||
// FIXME take value from configuration
|
||||
sq := queue.NewQueueSingle(2, &queue.Visibles{
|
||||
Logger: sl,
|
||||
})
|
||||
|
||||
ss := storage.NewStorageSingle(config.Storage.StartingID, &storage.Visibles{})
|
||||
|
||||
qp, err := qpstore.NewStore(&qpstore.Visibles{})
|
||||
if err != nil {
|
||||
slog.Error("Unable to initialize query pack storage")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
ql, err := qldbstore.NewStore(&qldbstore.Visibles{})
|
||||
if err != nil {
|
||||
slog.Error("Unable to initialize ql database storage")
|
||||
os.Exit(1)
|
||||
}
|
||||
sq := queue.NewQueueSingle(2)
|
||||
ss := state.NewLocalState(config.Storage.StartingID)
|
||||
as := artifactstore.NewInMemoryArtifactStore()
|
||||
ql := qldbstore.NewLocalFilesystemCodeQLDatabaseStore(*dbPathRoot)
|
||||
|
||||
server.NewCommanderSingle(&server.Visibles{
|
||||
Logger: sl,
|
||||
Queue: sq,
|
||||
ServerStore: ss,
|
||||
QueryPackStore: qp,
|
||||
QLDBStore: ql,
|
||||
State: ss,
|
||||
Artifacts: as,
|
||||
CodeQLDBStore: ql,
|
||||
})
|
||||
|
||||
// FIXME take value from configuration
|
||||
agent.NewAgentSingle(2, &agent.Visibles{
|
||||
Logger: sl,
|
||||
Queue: sq,
|
||||
QueryPackStore: qp,
|
||||
QLDBStore: ql,
|
||||
})
|
||||
var wg sync.WaitGroup
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
go agent.StartAndMonitorWorkers(ctx, as, ql, sq, 2, &wg)
|
||||
|
||||
slog.Info("Started server and standalone agent")
|
||||
<-sigChan
|
||||
slog.Info("Shutting down...")
|
||||
cancel()
|
||||
wg.Wait()
|
||||
slog.Info("Agent shutdown complete")
|
||||
|
||||
case "container":
|
||||
// Assemble container version
|
||||
sl := logger.NewLoggerSingle(&logger.Visibles{})
|
||||
isAgent := false
|
||||
|
||||
// FIXME take value from configuration
|
||||
sq := queue.NewQueueSingle(2, &queue.Visibles{
|
||||
Logger: sl,
|
||||
})
|
||||
|
||||
ss := storage.NewStorageSingle(config.Storage.StartingID, &storage.Visibles{})
|
||||
|
||||
qp, err := qpstore.NewStore(&qpstore.Visibles{})
|
||||
rabbitMQQueue, err := deploy.InitRabbitMQ(isAgent)
|
||||
if err != nil {
|
||||
slog.Error("Unable to initialize query pack storage")
|
||||
slog.Error("Failed to initialize RabbitMQ", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
defer rabbitMQQueue.Close()
|
||||
|
||||
artifacts, err := deploy.InitMinIOArtifactStore()
|
||||
if err != nil {
|
||||
slog.Error("Failed to initialize artifact store", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
ql, err := qldbstore.NewStore(&qldbstore.Visibles{})
|
||||
databases, err := deploy.InitMinIOCodeQLDatabaseStore()
|
||||
if err != nil {
|
||||
slog.Error("Unable to initialize ql database storage")
|
||||
slog.Error("Failed to initialize database store", slog.Any("error", err))
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
agent.NewAgentSingle(2, &agent.Visibles{
|
||||
Logger: sl,
|
||||
Queue: sq,
|
||||
QueryPackStore: qp,
|
||||
QLDBStore: ql,
|
||||
})
|
||||
|
||||
server.NewCommanderSingle(&server.Visibles{
|
||||
Logger: sl,
|
||||
Queue: sq,
|
||||
ServerStore: ss,
|
||||
QueryPackStore: qp,
|
||||
QLDBStore: ql,
|
||||
Queue: rabbitMQQueue,
|
||||
State: state.NewLocalState(config.Storage.StartingID),
|
||||
Artifacts: artifacts,
|
||||
CodeQLDBStore: databases,
|
||||
})
|
||||
|
||||
case "cluster":
|
||||
// Assemble cluster version
|
||||
slog.Info("Started server in container mode.")
|
||||
<-sigChan
|
||||
default:
|
||||
slog.Error("Invalid value for --mode. Allowed values are: standalone, container, cluster\n")
|
||||
slog.Error("Invalid value for --mode. Allowed values are: standalone, container, cluster")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
slog.Info("Server shutdown complete")
|
||||
}
|
||||
|
||||
@@ -17,15 +17,15 @@ type System struct {
|
||||
|
||||
func LoadConfig(fname string) *System {
|
||||
if _, err := os.Stat(fname); err != nil {
|
||||
slog.Error("Configuration file %s not found", fname)
|
||||
os.Exit(1)
|
||||
slog.Warn("Configuration file not found", "name", fname)
|
||||
return &System{}
|
||||
}
|
||||
|
||||
var config System
|
||||
|
||||
_, err := toml.DecodeFile(fname, &config)
|
||||
if err != nil {
|
||||
slog.Error("", err)
|
||||
slog.Error("Error decoding configuration file", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
|
||||
7
demo/containers/dbsdata/Dockerfile
Normal file
7
demo/containers/dbsdata/Dockerfile
Normal file
@@ -0,0 +1,7 @@
|
||||
# Use a minimal base image
|
||||
FROM busybox
|
||||
|
||||
ADD dbsdata_backup.tar /
|
||||
|
||||
# Just run sh if this container is ever started
|
||||
CMD ["sh"]
|
||||
70
demo/containers/dbsdata/README.org
Normal file
70
demo/containers/dbsdata/README.org
Normal file
@@ -0,0 +1,70 @@
|
||||
* MRVA cli tools container
|
||||
Set up / run:
|
||||
#+BEGIN_SRC sh
|
||||
# Run the raw container assembly
|
||||
cd ~/work-gh/mrva/mrvacommander/
|
||||
docker-compose -f docker-compose-demo-build.yml up -d
|
||||
|
||||
# Use the following commands to populate the mrvacommander database storage
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools
|
||||
mkdir -p scratch
|
||||
source venv/bin/activate
|
||||
|
||||
./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > scratch/db-info-1.csv
|
||||
|
||||
./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
|
||||
|
||||
./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
|
||||
|
||||
./bin/mc-db-generate-selection -n 11 \
|
||||
scratch/vscode-selection.json \
|
||||
scratch/gh-mrva-selection.json \
|
||||
< scratch/db-info-3.csv
|
||||
|
||||
# Several seconds start-up time; fast db population
|
||||
./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
|
||||
|
||||
# While the containers are running, this will show minio's storage. The zip files
|
||||
# are split into part.* and xl.meta by minio. Use the web interface to see real
|
||||
# names.
|
||||
docker exec dbstore ls -R /data/mrvacommander/
|
||||
|
||||
# Open browser to see the file listing
|
||||
open http://localhost:9001/browser/qldb
|
||||
|
||||
# list the volumes
|
||||
docker volume ls |grep dbs
|
||||
docker volume inspect mrvacommander_dbsdata
|
||||
|
||||
# Persist volume using container
|
||||
cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata
|
||||
# Note: use mrvacommander_dbsdata, not mrvacommander-dbsdata
|
||||
# Get the data as tar file from the image
|
||||
docker run --rm \
|
||||
-v mrvacommander_dbsdata:/data \
|
||||
-v $(pwd):/backup \
|
||||
busybox sh -c "tar cvf /backup/dbsdata_backup.tar ."
|
||||
# Build container with the tarball
|
||||
cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata
|
||||
docker build -t dbsdata-container:0.1.24 .
|
||||
docker image ls | grep dbs
|
||||
|
||||
# check container contents
|
||||
docker run -it dbsdata-container:0.1.24 /bin/sh
|
||||
docker run -it dbsdata-container:0.1.24 ls data/qldb
|
||||
|
||||
# Tag the dbstore backing container
|
||||
docker inspect dbsdata-container:0.1.24 |grep Id
|
||||
docker tag dbsdata-container:0.1.24 ghcr.io/hohn/dbsdata-container:0.1.24
|
||||
|
||||
# Push the pre-populated image
|
||||
docker push ghcr.io/hohn/dbsdata-container:0.1.24
|
||||
|
||||
# Check the tagged image
|
||||
docker run -it ghcr.io/hohn/dbsdata-container:0.1.24 \
|
||||
ls data/qldb
|
||||
|
||||
# Shut down the container assembly
|
||||
docker-compose -f docker-compose-demo-build.yml down
|
||||
#+END_SRC
|
||||
|
||||
11
doc/README.md
Normal file
11
doc/README.md
Normal file
@@ -0,0 +1,11 @@
|
||||
## The doc/ directory
|
||||
The `doc/` directory serves as home for documentation. This is the place to
|
||||
put refined documentation after it has gone through `notes/`. The contents of
|
||||
this directory should be accessible to a broad audience including prospective
|
||||
users, active users, and developers. Highly technical
|
||||
|
||||
1. The note authors and
|
||||
2. Developers of the project
|
||||
|
||||
It need not be meaningful to casual users.
|
||||
|
||||
129
docker-compose-demo-build.yml
Normal file
129
docker-compose-demo-build.yml
Normal file
@@ -0,0 +1,129 @@
|
||||
# This is the compose configuration used to build / prepopulate the containers for
|
||||
# a demo.
|
||||
services:
|
||||
dbssvc:
|
||||
## image: ghcr.io/hohn/dbsdata-container:0.1.24
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./demo/containers/dbsdata/Dockerfile
|
||||
container_name: dbssvc
|
||||
volumes:
|
||||
- dbsdata:/data/mrvacommander/dbstore-data
|
||||
networks:
|
||||
- backend
|
||||
|
||||
dbstore:
|
||||
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||
container_name: dbstore
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "9001:9001"
|
||||
env_file:
|
||||
- path: .env.container
|
||||
required: true
|
||||
command: server /data/mrvacommander/dbstore-data --console-address ":9001"
|
||||
depends_on:
|
||||
- dbssvc
|
||||
volumes:
|
||||
- dbsdata:/data/mrvacommander/dbstore-data
|
||||
networks:
|
||||
- backend
|
||||
|
||||
client-ghmrva:
|
||||
## image: ghcr.io/hohn/client-ghmrva-container:0.1.24
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./client/containers/ghmrva/Dockerfile
|
||||
network_mode: "service:server" # Share the 'server' network namespace
|
||||
environment:
|
||||
- SERVER_URL=http://localhost:8080 # 'localhost' now refers to 'server'
|
||||
|
||||
code-server:
|
||||
## image: ghcr.io/hohn/code-server-initialized:0.1.24
|
||||
build:
|
||||
context: ./client/containers/vscode
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "9080:9080"
|
||||
environment:
|
||||
- PASSWORD=mrva
|
||||
|
||||
rabbitmq:
|
||||
image: rabbitmq:3-management
|
||||
hostname: rabbitmq
|
||||
container_name: rabbitmq
|
||||
volumes:
|
||||
- ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
|
||||
- ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
|
||||
ports:
|
||||
- "5672:5672"
|
||||
- "15672:15672"
|
||||
healthcheck:
|
||||
test: rabbitmq-diagnostics check_port_connectivity
|
||||
interval: 30s
|
||||
timeout: 30s
|
||||
retries: 10
|
||||
networks:
|
||||
- backend
|
||||
|
||||
server:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./cmd/server/Dockerfile
|
||||
command: [ '--mode=container', '--loglevel=debug' ]
|
||||
container_name: server
|
||||
stop_grace_period: 1s
|
||||
ports:
|
||||
# - "8081:8080" # host:container for proxy
|
||||
- "8080:8080" # host:container
|
||||
depends_on:
|
||||
- rabbitmq
|
||||
- dbstore
|
||||
- artifactstore
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
networks:
|
||||
- backend
|
||||
|
||||
artifactstore:
|
||||
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||
container_name: artifactstore
|
||||
ports:
|
||||
- "19000:9000" # host:container
|
||||
- "19001:9001"
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
command: server /data --console-address ":9001"
|
||||
volumes:
|
||||
# The artifactstore is only populated at runtime so there is no need
|
||||
# for Docker storage; a directory is fine.
|
||||
- ./qpstore-data:/data
|
||||
networks:
|
||||
- backend
|
||||
|
||||
agent:
|
||||
## image: ghcr.io/hohn/mrva-agent:0.1.24
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./cmd/agent/Dockerfile
|
||||
command: [ '--loglevel=debug' ]
|
||||
container_name: agent
|
||||
depends_on:
|
||||
- rabbitmq
|
||||
- dbstore
|
||||
- artifactstore
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
networks:
|
||||
- backend
|
||||
|
||||
networks:
|
||||
backend:
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
dbsdata:
|
||||
|
||||
116
docker-compose-demo.yml
Normal file
116
docker-compose-demo.yml
Normal file
@@ -0,0 +1,116 @@
|
||||
services:
|
||||
dbssvc:
|
||||
# dbsdata-container:0.1.24
|
||||
image: ghcr.io/hohn/dbsdata-container:0.1.24
|
||||
command: tail -f /dev/null # Keep the container running
|
||||
# volumes:
|
||||
# - /qldb # Directory inside the container that contains the data
|
||||
volumes:
|
||||
- dbsdata:/data
|
||||
container_name: dbssvc
|
||||
networks:
|
||||
- backend
|
||||
|
||||
dbstore:
|
||||
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||
container_name: dbstore
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "9001:9001"
|
||||
env_file:
|
||||
- path: .env.container
|
||||
required: true
|
||||
command: server /data/mrvacommander/dbstore-data --console-address ":9001"
|
||||
depends_on:
|
||||
- dbssvc
|
||||
# volumes_from:
|
||||
# - dbsdata # Use the volumes from dbsdata container
|
||||
volumes:
|
||||
- dbsdata:/data/mrvacommander/dbstore-data
|
||||
networks:
|
||||
- backend
|
||||
|
||||
client-ghmrva:
|
||||
image: ghcr.io/hohn/client-ghmrva-container:0.1.24
|
||||
network_mode: "service:server" # Share the 'server' network namespace
|
||||
environment:
|
||||
- SERVER_URL=http://localhost:8080 # 'localhost' now refers to 'server'
|
||||
|
||||
code-server:
|
||||
image: ghcr.io/hohn/code-server-initialized:0.1.24
|
||||
ports:
|
||||
- "9080:9080"
|
||||
# XX: Include codeql binary in code-server (if it's not there already)
|
||||
environment:
|
||||
- PASSWORD=mrva
|
||||
|
||||
rabbitmq:
|
||||
image: rabbitmq:3-management
|
||||
hostname: rabbitmq
|
||||
container_name: rabbitmq
|
||||
volumes:
|
||||
- ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
|
||||
- ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
|
||||
ports:
|
||||
- "5672:5672"
|
||||
- "15672:15672"
|
||||
healthcheck:
|
||||
test: rabbitmq-diagnostics check_port_connectivity
|
||||
interval: 30s
|
||||
timeout: 30s
|
||||
retries: 10
|
||||
networks:
|
||||
- backend
|
||||
|
||||
server:
|
||||
image: ghcr.io/hohn/mrva-server:0.1.24
|
||||
command: [ '--mode=container', '--loglevel=debug' ]
|
||||
container_name: server
|
||||
stop_grace_period: 1s
|
||||
depends_on:
|
||||
- rabbitmq
|
||||
- dbstore
|
||||
- artifactstore
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
networks:
|
||||
- backend
|
||||
|
||||
artifactstore:
|
||||
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||
container_name: artifactstore
|
||||
ports:
|
||||
- "19000:9000" # host:container
|
||||
- "19001:9001"
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
command: server /data --console-address ":9001"
|
||||
volumes:
|
||||
# The artifactstore is only populated at runtime so there is no need
|
||||
# for Docker storage; a directory is fine.
|
||||
- ./qpstore-data:/data
|
||||
networks:
|
||||
- backend
|
||||
|
||||
agent:
|
||||
image: ghcr.io/hohn/mrva-agent:0.1.24
|
||||
command: [ '--loglevel=debug' ]
|
||||
container_name: agent
|
||||
depends_on:
|
||||
- rabbitmq
|
||||
- dbstore
|
||||
- artifactstore
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
networks:
|
||||
- backend
|
||||
|
||||
networks:
|
||||
backend:
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
dbsdata:
|
||||
@@ -7,37 +7,36 @@ services:
|
||||
volumes:
|
||||
- ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro
|
||||
- ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro
|
||||
expose:
|
||||
- "5672"
|
||||
- "15672"
|
||||
ports:
|
||||
- "5672:5672"
|
||||
- "15672:15672"
|
||||
networks:
|
||||
- backend
|
||||
healthcheck:
|
||||
test: [ "CMD", "nc", "-z", "localhost", "5672" ]
|
||||
interval: 5s
|
||||
timeout: 15s
|
||||
retries: 1
|
||||
test: rabbitmq-diagnostics check_port_connectivity
|
||||
interval: 30s
|
||||
timeout: 30s
|
||||
retries: 10
|
||||
|
||||
server:
|
||||
build:
|
||||
context: ./cmd/server
|
||||
dockerfile: Dockerfile
|
||||
context: .
|
||||
dockerfile: ./cmd/server/Dockerfile
|
||||
command: [ '--mode=container', '--loglevel=debug' ]
|
||||
container_name: server
|
||||
stop_grace_period: 1s # Reduce the timeout period for testing
|
||||
environment:
|
||||
- MRVA_SERVER_ROOT=/mrva/mrvacommander/cmd/server
|
||||
command: sh -c "tail -f /dev/null"
|
||||
stop_grace_period: 1s
|
||||
ports:
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- ./:/mrva/mrvacommander
|
||||
# - "8081:8080" # host:container for proxy
|
||||
- "8080:8080" # host:container
|
||||
depends_on:
|
||||
- rabbitmq
|
||||
- dbstore
|
||||
- artifactstore
|
||||
networks:
|
||||
- backend
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
|
||||
dbstore:
|
||||
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||
@@ -45,52 +44,46 @@ services:
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "9001:9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: user
|
||||
MINIO_ROOT_PASSWORD: mmusty8432
|
||||
|
||||
env_file:
|
||||
- path: .env.container
|
||||
required: true
|
||||
command: server /data --console-address ":9001"
|
||||
volumes:
|
||||
- ./dbstore-data:/data
|
||||
networks:
|
||||
- backend
|
||||
|
||||
|
||||
qpstore:
|
||||
artifactstore:
|
||||
image: minio/minio:RELEASE.2024-06-11T03-13-30Z
|
||||
container_name: qpstore
|
||||
container_name: artifactstore
|
||||
ports:
|
||||
- "19000:9000" # host:container
|
||||
- "19001:9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: user
|
||||
MINIO_ROOT_PASSWORD: mmusty8432
|
||||
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
command: server /data --console-address ":9001"
|
||||
volumes:
|
||||
- ./qpstore-data:/data
|
||||
|
||||
networks:
|
||||
- backend
|
||||
|
||||
agent:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./cmd/agent/Dockerfile
|
||||
command: [ '--loglevel=debug' ]
|
||||
container_name: agent
|
||||
depends_on:
|
||||
- rabbitmq
|
||||
- minio
|
||||
environment:
|
||||
MRVA_RABBITMQ_HOST: rabbitmq
|
||||
MRVA_RABBITMQ_PORT: 5672
|
||||
MRVA_RABBITMQ_USER: user
|
||||
MRVA_RABBITMQ_PASSWORD: password
|
||||
- dbstore
|
||||
- artifactstore
|
||||
env_file:
|
||||
- path: ./.env.container
|
||||
required: true
|
||||
networks:
|
||||
- backend
|
||||
|
||||
|
||||
networks:
|
||||
backend:
|
||||
driver: bridge
|
||||
|
||||
# Remove named volumes to use bind mounts
|
||||
# volumes:
|
||||
# minio-data:
|
||||
|
||||
|
||||
4
go.mod
4
go.mod
@@ -9,7 +9,7 @@ require (
|
||||
github.com/gorilla/mux v1.8.1
|
||||
github.com/minio/minio-go/v7 v7.0.71
|
||||
github.com/rabbitmq/amqp091-go v1.10.0
|
||||
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8
|
||||
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
gorm.io/driver/postgres v1.5.9
|
||||
gorm.io/gorm v1.25.10
|
||||
@@ -35,7 +35,7 @@ require (
|
||||
github.com/rs/xid v1.5.0 // indirect
|
||||
golang.org/x/crypto v0.24.0 // indirect
|
||||
golang.org/x/net v0.23.0 // indirect
|
||||
golang.org/x/sync v0.7.0 // indirect
|
||||
golang.org/x/sync v0.9.0 // indirect
|
||||
golang.org/x/sys v0.21.0 // indirect
|
||||
golang.org/x/text v0.16.0 // indirect
|
||||
gopkg.in/ini.v1 v1.67.0 // indirect
|
||||
|
||||
4
go.sum
4
go.sum
@@ -68,10 +68,14 @@ golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
|
||||
golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
|
||||
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY=
|
||||
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI=
|
||||
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo=
|
||||
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak=
|
||||
golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
|
||||
golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
|
||||
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
|
||||
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ=
|
||||
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
],
|
||||
"settings": {
|
||||
"sarif-viewer.connectToGithubCodeScanning": "off",
|
||||
"codeQL.githubDatabase.download": "never"
|
||||
"codeQL.githubDatabase.download": "never",
|
||||
"makefile.configureOnOpen": false,
|
||||
"git.ignoreLimitWarning": true
|
||||
}
|
||||
}
|
||||
5
notes/Makefile
Normal file
5
notes/Makefile
Normal file
@@ -0,0 +1,5 @@
|
||||
view: docker-demo-container-deps.pdf
|
||||
open $<
|
||||
|
||||
docker-demo-container-deps.pdf: docker-demo-container-deps.dot
|
||||
dot -Tpdf $< > $@
|
||||
9
notes/README.md
Normal file
9
notes/README.md
Normal file
@@ -0,0 +1,9 @@
|
||||
## The notes/ directory
|
||||
The `notes/` directory serves as staging directory for documentation. This is
|
||||
the place to develop documentation and short notes. The contents of this
|
||||
directory should be accessible to
|
||||
1. The note authors and
|
||||
2. Developers of the project
|
||||
|
||||
It need not be meaningful to casual users.
|
||||
|
||||
471
notes/cli-end-to-end-demo-build.org
Normal file
471
notes/cli-end-to-end-demo-build.org
Normal file
@@ -0,0 +1,471 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#+OPTIONS: H:2 num:t \n:nil @:t ::t |:t ^:{} f:t *:t TeX:t LaTeX:t skip:nil p:nil
|
||||
|
||||
* End-to-end example of CLI use
|
||||
This document describes the build steps for the demo containers.
|
||||
|
||||
* Database Aquisition
|
||||
For this demo, the data is preloaded via container. To set up the container
|
||||
|
||||
#+BEGIN_SRC sh
|
||||
# On host, run
|
||||
docker exec -it dbstore /bin/bash
|
||||
|
||||
# In the container
|
||||
ls -la /data/dbstore-data/
|
||||
ls /data/dbstore-data/qldb/ | wc -l
|
||||
#+END_SRC
|
||||
Here we use a small sample of an example for open-source
|
||||
repositories, 23 in all.
|
||||
|
||||
* Repository Selection
|
||||
When using all of the MRVA system, we select a small subset of repositories
|
||||
available to you in [[*Database Aquisition][Database Aquisition]]. For this demo we include a small
|
||||
collection -- 23 repositories -- and here we further narrow the selection to 12.
|
||||
|
||||
The full list
|
||||
#+BEGIN_SRC text
|
||||
ls -1 /data/dbstore-data/qldb/
|
||||
'BoomingTech$Piccoloctsj6d7177.zip'
|
||||
'KhronosGroup$OpenXR-SDKctsj984ee6.zip'
|
||||
'OpenRCT2$OpenRCT2ctsj975d7c.zip'
|
||||
'StanfordLegion$legionctsj39cbe4.zip'
|
||||
'USCiLab$cerealctsj264953.zip'
|
||||
'WinMerge$winmergectsj101305.zip'
|
||||
'draios$sysdigctsj12c02d.zip'
|
||||
'gildor2$UEViewerctsjfefdd8.zip'
|
||||
'git-for-windows$gitctsjb7c2bd.zip'
|
||||
'google$orbitctsj9bbeaf.zip'
|
||||
'libfuse$libfusectsj7a66a4.zip'
|
||||
'luigirizzo$netmapctsj6417fa.zip'
|
||||
'mawww$kakounectsjc54fab.zip'
|
||||
'microsoft$node-native-keymapctsj4cc9a2.zip'
|
||||
'nem0$LumixEnginectsjfab756.zip'
|
||||
'pocoproject$pococtsj26b932.zip'
|
||||
'quickfix$quickfixctsjebfd13.zip'
|
||||
'rui314$moldctsjfec16a.zip'
|
||||
'swig$swigctsj78bcd3.zip'
|
||||
'tdlib$telegram-bot-apictsj8529d9.zip'
|
||||
'timescale$timescaledbctsjf617cf.zip'
|
||||
'xoreaxeaxeax$movfuscatorctsj8f7e5b.zip'
|
||||
'xrootd$xrootdctsje4b745.zip'
|
||||
#+END_SRC
|
||||
|
||||
The selection of 12 repositories, from an initial collection of 6000 was made
|
||||
using a collection of Python/pandas scripts made for the purpose, the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#installation][qldbtools]]
|
||||
package. The resulting selection, in the format expected by the VS Code
|
||||
extension, follows.
|
||||
#+BEGIN_SRC text
|
||||
cat /data/qldbtools/scratch/vscode-selection.json
|
||||
{
|
||||
"version": 1,
|
||||
"databases": {
|
||||
"variantAnalysis": {
|
||||
"repositoryLists": [
|
||||
{
|
||||
"name": "mirva-list",
|
||||
"repositories": [
|
||||
"xoreaxeaxeax/movfuscatorctsj8f7e5b",
|
||||
"microsoft/node-native-keymapctsj4cc9a2",
|
||||
"BoomingTech/Piccoloctsj6d7177",
|
||||
"USCiLab/cerealctsj264953",
|
||||
"KhronosGroup/OpenXR-SDKctsj984ee6",
|
||||
"tdlib/telegram-bot-apictsj8529d9",
|
||||
"WinMerge/winmergectsj101305",
|
||||
"timescale/timescaledbctsjf617cf",
|
||||
"pocoproject/pococtsj26b932",
|
||||
"quickfix/quickfixctsjebfd13",
|
||||
"libfuse/libfusectsj7a66a4"
|
||||
]
|
||||
}
|
||||
],
|
||||
"owners": [],
|
||||
"repositories": []
|
||||
}
|
||||
},
|
||||
"selected": {
|
||||
"kind": "variantAnalysisUserDefinedList",
|
||||
"listName": "mirva-list"
|
||||
}
|
||||
#+END_SRC
|
||||
|
||||
This selection is deceptively simple. For a full explanation, see [[file:cli-end-to-end-detailed.org::*Repository Selection][Repository
|
||||
Selection]] in the detailed version of this document.
|
||||
|
||||
** Optional: The meaning of the names
|
||||
The repository names all end with =ctsj= followed by 6 hex digits like
|
||||
=ctsj4cc9a2=.
|
||||
|
||||
The information critial for selection of databases are the columns
|
||||
1. owner
|
||||
2. name
|
||||
3. language
|
||||
4. "sha"
|
||||
5. "cliVersion"
|
||||
6. "creationTime"
|
||||
|
||||
There are others that may be useful, but they are not strictly required.
|
||||
|
||||
The critical ones deserve more explanation:
|
||||
1. "sha": The =git= commit SHA of the repository the CodeQL database was
|
||||
created from. Required to distinguish query results over the evolution of
|
||||
a code base.
|
||||
2. "cliVersion": The version of the CodeQL CLI used to create the database.
|
||||
Required to identify advances/regressions originating from the CodeQL binary.
|
||||
3. "creationTime": The time the database was created. Required (or at least
|
||||
very handy) for following the evolution of query results over time.
|
||||
|
||||
There is a computed column, CID. The CID column combines
|
||||
- cliVersion
|
||||
- creationTime
|
||||
- language
|
||||
- sha
|
||||
into a single 6-character string via hashing. Together with (owner, repo) it
|
||||
provides a unique index for every DB.
|
||||
|
||||
|
||||
For this document, we simply use a pseudo-random selection of 11 databases via
|
||||
#+BEGIN_SRC sh
|
||||
./bin/mc-db-generate-selection -n 11 \
|
||||
scratch/vscode-selection.json \
|
||||
scratch/gh-mrva-selection.json \
|
||||
< scratch/db-info-3.csv
|
||||
#+END_SRC
|
||||
|
||||
Note that these use pseudo-random numbers, so the selection is in fact
|
||||
deterministic.
|
||||
|
||||
* Starting the server
|
||||
Clone the full repository before continuing:
|
||||
#+BEGIN_SRC sh
|
||||
mkdir -p ~/work-gh/mrva/
|
||||
git clone git@github.com:hohn/mrvacommander.git
|
||||
#+END_SRC
|
||||
|
||||
Make sure Docker is installed and running.
|
||||
With docker-compose set up and this repository cloned, we just run
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/mrvacommander
|
||||
docker-compose -f docker-compose-demo.yml up -d
|
||||
#+END_SRC
|
||||
and wait until the log output no longer changes.
|
||||
Should look like
|
||||
#+BEGIN_SRC text
|
||||
docker-compose -f docker-compose-demo.yml up -d
|
||||
[+] Running 27/6
|
||||
✔ dbstore Pulled 1.1s
|
||||
✔ artifactstore Pulled 1.1s
|
||||
✔ mrvadata 3 layers [⣿⣿⣿] 0B/0B Pulled 263.8s
|
||||
✔ server 2 layers [⣿⣿] 0B/0B Pulled 25.2s
|
||||
✔ agent 5 layers [⣿⣿⣿⣿⣿] 0B/0B Pulled 24.9s
|
||||
✔ client-qldbtools 11 layers [⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿] 0B/0B Pulled 20.8s
|
||||
[+] Running 9/9
|
||||
✔ Container mrvadata Started 0.3s
|
||||
✔ Container mrvacommander-client-qldbtools-1 Started 0.3s
|
||||
✔ Container mrvacommander-client-ghmrva-1 Running 0.0s
|
||||
✔ Container mrvacommander-code-server-1 Running 0.0s
|
||||
✔ Container artifactstore Running 0.0s
|
||||
✔ Container rabbitmq Running 0.0s
|
||||
✔ Container dbstore Started 0.4s
|
||||
✔ Container agent Started 0.5s
|
||||
✔ Container server Started 0.5s
|
||||
#+END_SRC
|
||||
|
||||
|
||||
The content is prepopulated in the =dbstore= container.
|
||||
|
||||
** Optional: Inspect the Backing Store
|
||||
As completely optional step, you can inspect the backing store:
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -it dbstore /bin/bash
|
||||
ls /data/qldb/
|
||||
# 'BoomingTech$Piccoloctsj6d7177.zip' 'mawww$kakounectsjc54fab.zip'
|
||||
# 'KhronosGroup$OpenXR-SDKctsj984ee6.zip' 'microsoft$node-native-keymapctsj4cc9a2.zip'
|
||||
# ...
|
||||
#+END_SRC
|
||||
|
||||
** Optional: Inspect the MinIO DB
|
||||
Another completely optional step, you can inspect the minio DB contents if you
|
||||
have the minio cli installed:
|
||||
#+BEGIN_SRC sh
|
||||
# Configuration
|
||||
MINIO_ALIAS="qldbminio"
|
||||
MINIO_URL="http://localhost:9000"
|
||||
MINIO_ROOT_USER="user"
|
||||
MINIO_ROOT_PASSWORD="mmusty8432"
|
||||
QL_DB_BUCKET_NAME="qldb"
|
||||
|
||||
# Check for MinIO client
|
||||
if ! command -v mc &> /dev/null
|
||||
then
|
||||
echo "MinIO client (mc) not found."
|
||||
fi
|
||||
|
||||
# Configure MinIO client
|
||||
mc alias set $MINIO_ALIAS $MINIO_URL $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD
|
||||
|
||||
# Show contents
|
||||
mc ls qldbminio/qldb
|
||||
#+END_SRC
|
||||
|
||||
* Running the gh-mrva command-line client
|
||||
The first run uses the test query to verify basic functionality, but it returns
|
||||
no results.
|
||||
|
||||
** Run MRVA from command line
|
||||
# From ~/work-gh/mrva/gh-mrva
|
||||
|
||||
1. Check mrva cli
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -it mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva -h
|
||||
#+END_SRC
|
||||
|
||||
2. Set up the configuration
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -i mrvacommander-client-ghmrva-1 \
|
||||
sh -c 'mkdir -p /root/.config/gh-mrva/'
|
||||
|
||||
cat | docker exec -i mrvacommander-client-ghmrva-1 \
|
||||
sh -c 'cat > /root/.config/gh-mrva/config.yml' <<eof
|
||||
codeql_path: not-used/$HOME/work-gh
|
||||
controller: not-used/mirva-controller
|
||||
list_file: /root/work-gh/mrva/gh-mrva/gh-mrva-selection.json
|
||||
eof
|
||||
|
||||
# check:
|
||||
docker exec -i mrvacommander-client-ghmrva-1 ls /root/.config/gh-mrva/config.yml
|
||||
docker exec -i mrvacommander-client-ghmrva-1 cat /root/.config/gh-mrva/config.yml
|
||||
#+END_SRC
|
||||
|
||||
3. Provide the repository list file
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -i mrvacommander-client-ghmrva-1 \
|
||||
sh -c 'mkdir -p /root/work-gh/mrva/gh-mrva'
|
||||
|
||||
cat | docker exec -i mrvacommander-client-ghmrva-1 \
|
||||
sh -c 'cat > /root/work-gh/mrva/gh-mrva/gh-mrva-selection.json' <<eof
|
||||
{
|
||||
"mirva-list": [
|
||||
"xoreaxeaxeax/movfuscatorctsj8f7e5b",
|
||||
"microsoft/node-native-keymapctsj4cc9a2",
|
||||
"BoomingTech/Piccoloctsj6d7177",
|
||||
"USCiLab/cerealctsj264953",
|
||||
"KhronosGroup/OpenXR-SDKctsj984ee6",
|
||||
"tdlib/telegram-bot-apictsj8529d9",
|
||||
"WinMerge/winmergectsj101305",
|
||||
"timescale/timescaledbctsjf617cf",
|
||||
"pocoproject/pococtsj26b932",
|
||||
"quickfix/quickfixctsjebfd13",
|
||||
"libfuse/libfusectsj7a66a4"
|
||||
]
|
||||
}
|
||||
eof
|
||||
#+END_SRC
|
||||
|
||||
4. Provide the CodeQL query
|
||||
#+BEGIN_SRC sh
|
||||
cat | docker exec -i mrvacommander-client-ghmrva-1 \
|
||||
sh -c 'cat > /root/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql' <<eof
|
||||
/**
|
||||
,* @name pickfun
|
||||
,* @description pick function from FlatBuffers
|
||||
,* @kind problem
|
||||
,* @id cpp-flatbuffer-func
|
||||
,* @problem.severity warning
|
||||
,*/
|
||||
|
||||
import cpp
|
||||
|
||||
from Function f
|
||||
where
|
||||
f.getName() = "MakeBinaryRegion" or
|
||||
f.getName() = "microprotocols_add"
|
||||
select f, "definition of MakeBinaryRegion"
|
||||
|
||||
eof
|
||||
|
||||
#+END_SRC
|
||||
|
||||
5. Submit the mrva job
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
|
||||
submit --language cpp --session mirva-session-1360 \
|
||||
--list mirva-list \
|
||||
--query /root/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql
|
||||
#+END_SRC
|
||||
|
||||
6. Check the status
|
||||
#+BEGIN_SRC sh
|
||||
# Check the status
|
||||
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
|
||||
status --session mirva-session-1360
|
||||
#+END_SRC
|
||||
|
||||
7. Download the sarif files, optionally also get databases. For the current
|
||||
query / database combination there are zero result hence no downloads.
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
|
||||
download --session mirva-session-1360 \
|
||||
--download-dbs \
|
||||
--output-dir mirva-session-1360
|
||||
#+END_SRC
|
||||
|
||||
** TODO Write query that has some results
|
||||
XX:
|
||||
|
||||
In this case, the trivial =alu_mul=,
|
||||
alu_mul for https://github.com/xoreaxeaxeax/movfuscator/blob/master/movfuscator/movfuscator.c
|
||||
#+BEGIN_SRC java
|
||||
/**
|
||||
,* @name findalu
|
||||
,* @description find calls to a function
|
||||
,* @kind problem
|
||||
,* @id cpp-call
|
||||
,* @problem.severity warning
|
||||
,*/
|
||||
|
||||
import cpp
|
||||
|
||||
from FunctionCall fc
|
||||
where
|
||||
fc.getTarget().getName() = "alu_mul"
|
||||
select fc, "call of alu_mul"
|
||||
#+END_SRC
|
||||
|
||||
|
||||
Repeat the submit steps with this query
|
||||
1. [X] --
|
||||
2. [X] --
|
||||
3. [ ] Provide the CodeQL query
|
||||
#+BEGIN_SRC sh
|
||||
cat | docker exec -i mrvacommander-client-ghmrva-1 \
|
||||
sh -c 'cat > /root/work-gh/mrva/gh-mrva/Alu_Mul.ql' <<eof
|
||||
/**
|
||||
,* @name findalu
|
||||
,* @description find calls to a function
|
||||
,* @kind problem
|
||||
,* @id cpp-call
|
||||
,* @problem.severity warning
|
||||
,*/
|
||||
|
||||
import cpp
|
||||
|
||||
from FunctionCall fc
|
||||
where
|
||||
fc.getTarget().getName() = "alu_mul"
|
||||
select fc, "call of alu_mul"
|
||||
eof
|
||||
#+END_SRC
|
||||
|
||||
4. [-] Submit the mrva job
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
|
||||
submit --language cpp --session mirva-session-1490 \
|
||||
--list mirva-list \
|
||||
--query /root/work-gh/mrva/gh-mrva/Alu_Mul.ql
|
||||
#+END_SRC
|
||||
|
||||
- [X] XX:
|
||||
|
||||
server | 2024/09/27 20:03:16 DEBUG Processed request info location="{Key:3 Bucket:packs}" language=cpp
|
||||
server | 2024/09/27 20:03:16 WARN No repositories found for analysis
|
||||
server | 2024/09/27 20:03:16 DEBUG Queueing analysis jobs count=0
|
||||
server | 2024/09/27 20:03:16 DEBUG Forming and sending response for submitted analysis job id=3
|
||||
|
||||
NO: debug in the server container
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -it server /bin/bash
|
||||
|
||||
apt-get update
|
||||
apt-get install delve
|
||||
|
||||
replace
|
||||
ENTRYPOINT ["./mrva_server"]
|
||||
CMD ["--mode=container"]
|
||||
|
||||
#+END_SRC
|
||||
|
||||
- [ ] XX:
|
||||
The dbstore is empty -- see http://localhost:9001/browser
|
||||
must populate it properly, then save the image.
|
||||
|
||||
5. [ ] Check the status
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
|
||||
status --session mirva-session-1490
|
||||
#+END_SRC
|
||||
|
||||
This time we have results
|
||||
#+BEGIN_SRC text
|
||||
...
|
||||
Run name: mirva-session-1490
|
||||
Status: succeeded
|
||||
Total runs: 1
|
||||
Total successful scans: 11
|
||||
Total failed scans: 0
|
||||
Total skipped repositories: 0
|
||||
Total skipped repositories due to access mismatch: 0
|
||||
Total skipped repositories due to not found: 0
|
||||
Total skipped repositories due to no database: 0
|
||||
Total skipped repositories due to over limit: 0
|
||||
Total repositories with findings: 7
|
||||
Total findings: 618
|
||||
Repositories with findings:
|
||||
quickfix/quickfixctsjebfd13 (cpp-fprintf-call): 5
|
||||
libfuse/libfusectsj7a66a4 (cpp-fprintf-call): 146
|
||||
xoreaxeaxeax/movfuscatorctsj8f7e5b (cpp-fprintf-call): 80
|
||||
pocoproject/pococtsj26b932 (cpp-fprintf-call): 17
|
||||
BoomingTech/Piccoloctsj6d7177 (cpp-fprintf-call): 10
|
||||
tdlib/telegram-bot-apictsj8529d9 (cpp-fprintf-call): 247
|
||||
WinMerge/winmergectsj101305 (cpp-fprintf-call): 113
|
||||
#+END_SRC
|
||||
6. [ ] Download the sarif files, optionally also get databases.
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
|
||||
download --session mirva-session-1490 \
|
||||
--download-dbs \
|
||||
--output-dir mirva-session-1490
|
||||
|
||||
# And list them:
|
||||
\ls -la *1490*
|
||||
#+END_SRC
|
||||
|
||||
7. [ ] Use the [[https://marketplace.visualstudio.com/items?itemName=MS-SarifVSCode.sarif-viewer][SARIF Viewer]] plugin in VS Code to open and review the results.
|
||||
|
||||
Prepare the source directory so the viewer can be pointed at it
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/gh-mrva/mirva-session-1490
|
||||
|
||||
unzip -qd BoomingTech_Piccoloctsj6d7177_1_db BoomingTech_Piccoloctsj6d7177_1_db.zip
|
||||
|
||||
cd BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/
|
||||
unzip -qd src src.zip
|
||||
#+END_SRC
|
||||
|
||||
Use the viewer
|
||||
#+BEGIN_SRC sh
|
||||
code BoomingTech_Piccoloctsj6d7177_1.sarif
|
||||
|
||||
# For lauxlib.c, point the source viewer to
|
||||
find ~/work-gh/mrva/gh-mrva/mirva-session-1490/BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/src/home/runner/work/bulk-builder/bulk-builder -name lauxlib.c
|
||||
|
||||
# Here: ~/work-gh/mrva/gh-mrva/mirva-session-1490/BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/src/home/runner/work/bulk-builder/bulk-builder/engine/3rdparty/lua-5.4.4/lauxlib.c
|
||||
#+END_SRC
|
||||
|
||||
8. [ ] (optional) Large result sets are more easily filtered via
|
||||
dataframes or spreadsheets. Convert the SARIF to CSV if needed; see [[https://github.com/hohn/sarif-cli/][sarif-cli]].
|
||||
|
||||
|
||||
|
||||
* Running the CodeQL VS Code plugin
|
||||
- [ ] XX: include the *custom* codeql plugin in the container.
|
||||
* Ending the session
|
||||
Shut down docker via
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/mrvacommander
|
||||
docker-compose -f docker-compose-demo.yml down
|
||||
#+END_SRC
|
||||
|
||||
* Footnotes
|
||||
[fn:1]The =csvkit= can be installed into the same Python virtual environment as
|
||||
the =qldbtools=.
|
||||
493
notes/cli-end-to-end-demo.org
Normal file
493
notes/cli-end-to-end-demo.org
Normal file
@@ -0,0 +1,493 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#+OPTIONS: H:2 num:t \n:nil @:t ::t |:t ^:{} f:t *:t TeX:t LaTeX:t skip:nil p:nil
|
||||
|
||||
* End-to-end example of CLI use
|
||||
This document describes a complete cycle of the MRVA workflow, but using
|
||||
pre-populated data. The steps included are
|
||||
1. aquiring CodeQL databases
|
||||
2. selection of databases
|
||||
3. configuration and use of the command-line client
|
||||
4. server startup
|
||||
5. submission of the jobs
|
||||
6. retrieval of the results
|
||||
7. examination of the results
|
||||
|
||||
* Start the containers
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/mrvacommander/
|
||||
|
||||
docker-compose -f docker-compose-demo.yml down --volumes --remove-orphans
|
||||
docker-compose -f docker-compose-demo.yml up --build
|
||||
#+END_SRC
|
||||
|
||||
* Database Aquisition
|
||||
General database aquisition is beyond the scope of this document as it is very specific
|
||||
to an organization's environment.
|
||||
|
||||
For this demo, the data is preloaded via container. To inspect it:
|
||||
|
||||
#+BEGIN_SRC sh
|
||||
# On host, run
|
||||
docker exec -it dbstore /bin/bash
|
||||
|
||||
# In the container
|
||||
ls -la /data/mrvacommander/dbstore-data/qldb
|
||||
|
||||
# Or in one step
|
||||
docker exec -it dbstore ls -la /data/mrvacommander/dbstore-data/qldb
|
||||
#+END_SRC
|
||||
|
||||
Here we use a small sample of an example for open-source
|
||||
repositories, 23 in all.
|
||||
|
||||
* Repository Selection
|
||||
When using all of the MRVA system, we select a small subset of repositories
|
||||
available to you in [[*Database Aquisition][Database Aquisition]]. For this demo we include a small
|
||||
collection -- 23 repositories -- and here we further narrow the selection to 12.
|
||||
|
||||
The full list
|
||||
#+BEGIN_SRC text
|
||||
ls -1 /data/dbstore-data/qldb/
|
||||
'BoomingTech$Piccoloctsj6d7177.zip'
|
||||
'KhronosGroup$OpenXR-SDKctsj984ee6.zip'
|
||||
'OpenRCT2$OpenRCT2ctsj975d7c.zip'
|
||||
'StanfordLegion$legionctsj39cbe4.zip'
|
||||
'USCiLab$cerealctsj264953.zip'
|
||||
'WinMerge$winmergectsj101305.zip'
|
||||
'draios$sysdigctsj12c02d.zip'
|
||||
'gildor2$UEViewerctsjfefdd8.zip'
|
||||
'git-for-windows$gitctsjb7c2bd.zip'
|
||||
'google$orbitctsj9bbeaf.zip'
|
||||
'libfuse$libfusectsj7a66a4.zip'
|
||||
'luigirizzo$netmapctsj6417fa.zip'
|
||||
'mawww$kakounectsjc54fab.zip'
|
||||
'microsoft$node-native-keymapctsj4cc9a2.zip'
|
||||
'nem0$LumixEnginectsjfab756.zip'
|
||||
'pocoproject$pococtsj26b932.zip'
|
||||
'quickfix$quickfixctsjebfd13.zip'
|
||||
'rui314$moldctsjfec16a.zip'
|
||||
'swig$swigctsj78bcd3.zip'
|
||||
'tdlib$telegram-bot-apictsj8529d9.zip'
|
||||
'timescale$timescaledbctsjf617cf.zip'
|
||||
'xoreaxeaxeax$movfuscatorctsj8f7e5b.zip'
|
||||
'xrootd$xrootdctsje4b745.zip'
|
||||
#+END_SRC
|
||||
|
||||
The selection of 12 repositories, from an initial collection of 6000 was made
|
||||
using a collection of Python/pandas scripts made for the purpose, the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#installation][qldbtools]]
|
||||
package. The resulting selection, in the format expected by the VS Code
|
||||
extension, follows.
|
||||
#+BEGIN_SRC text
|
||||
cat /data/qldbtools/scratch/vscode-selection.json
|
||||
{
|
||||
"version": 1,
|
||||
"databases": {
|
||||
"variantAnalysis": {
|
||||
"repositoryLists": [
|
||||
{
|
||||
"name": "mirva-list",
|
||||
"repositories": [
|
||||
"xoreaxeaxeax/movfuscatorctsj8f7e5b",
|
||||
"microsoft/node-native-keymapctsj4cc9a2",
|
||||
"BoomingTech/Piccoloctsj6d7177",
|
||||
"USCiLab/cerealctsj264953",
|
||||
"KhronosGroup/OpenXR-SDKctsj984ee6",
|
||||
"tdlib/telegram-bot-apictsj8529d9",
|
||||
"WinMerge/winmergectsj101305",
|
||||
"timescale/timescaledbctsjf617cf",
|
||||
"pocoproject/pococtsj26b932",
|
||||
"quickfix/quickfixctsjebfd13",
|
||||
"libfuse/libfusectsj7a66a4"
|
||||
]
|
||||
}
|
||||
],
|
||||
"owners": [],
|
||||
"repositories": []
|
||||
}
|
||||
},
|
||||
"selected": {
|
||||
"kind": "variantAnalysisUserDefinedList",
|
||||
"listName": "mirva-list"
|
||||
}
|
||||
#+END_SRC
|
||||
|
||||
This selection is deceptively simple. For a full explanation, see [[file:cli-end-to-end-detailed.org::*Repository Selection][Repository
|
||||
Selection]] in the detailed version of this document.
|
||||
|
||||
** Optional: The meaning of the names
|
||||
The repository names all end with =ctsj= followed by 6 hex digits like
|
||||
=ctsj4cc9a2=.
|
||||
|
||||
The information critial for selection of databases are the columns
|
||||
1. owner
|
||||
2. name
|
||||
3. language
|
||||
4. "sha"
|
||||
5. "cliVersion"
|
||||
6. "creationTime"
|
||||
|
||||
There are others that may be useful, but they are not strictly required.
|
||||
|
||||
The critical ones deserve more explanation:
|
||||
1. "sha": The =git= commit SHA of the repository the CodeQL database was
|
||||
created from. Required to distinguish query results over the evolution of
|
||||
a code base.
|
||||
2. "cliVersion": The version of the CodeQL CLI used to create the database.
|
||||
Required to identify advances/regressions originating from the CodeQL binary.
|
||||
3. "creationTime": The time the database was created. Required (or at least
|
||||
very handy) for following the evolution of query results over time.
|
||||
|
||||
There is a computed column, CID. The CID column combines
|
||||
- cliVersion
|
||||
- creationTime
|
||||
- language
|
||||
- sha
|
||||
into a single 6-character string via hashing. Together with (owner, repo) it
|
||||
provides a unique index for every DB.
|
||||
|
||||
|
||||
For this document, we simply use a pseudo-random selection of 11 databases via
|
||||
#+BEGIN_SRC sh
|
||||
./bin/mc-db-generate-selection -n 11 \
|
||||
scratch/vscode-selection.json \
|
||||
scratch/gh-mrva-selection.json \
|
||||
< scratch/db-info-3.csv
|
||||
#+END_SRC
|
||||
|
||||
Note that these use pseudo-random numbers, so the selection is in fact
|
||||
deterministic.
|
||||
|
||||
* Starting the server
|
||||
Clone the full repository before continuing:
|
||||
#+BEGIN_SRC sh
|
||||
mkdir -p ~/work-gh/mrva/
|
||||
git clone git@github.com:hohn/mrvacommander.git
|
||||
#+END_SRC
|
||||
|
||||
Make sure Docker is installed and running.
|
||||
With docker-compose set up and this repository cloned, we just run
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/mrvacommander
|
||||
docker-compose -f docker-compose-demo.yml up -d
|
||||
#+END_SRC
|
||||
and wait until the log output no longer changes.
|
||||
Should look like
|
||||
#+BEGIN_SRC text
|
||||
docker-compose -f docker-compose-demo.yml up -d
|
||||
[+] Running 27/6
|
||||
✔ dbstore Pulled 1.1s
|
||||
✔ artifactstore Pulled 1.1s
|
||||
✔ mrvadata 3 layers [⣿⣿⣿] 0B/0B Pulled 263.8s
|
||||
✔ server 2 layers [⣿⣿] 0B/0B Pulled 25.2s
|
||||
✔ agent 5 layers [⣿⣿⣿⣿⣿] 0B/0B Pulled 24.9s
|
||||
✔ client-qldbtools 11 layers [⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿] 0B/0B Pulled 20.8s
|
||||
[+] Running 9/9
|
||||
✔ Container mrvadata Started 0.3s
|
||||
✔ Container mrvacommander-client-qldbtools-1 Started 0.3s
|
||||
✔ Container mrvacommander-client-ghmrva-1 Running 0.0s
|
||||
✔ Container mrvacommander-code-server-1 Running 0.0s
|
||||
✔ Container artifactstore Running 0.0s
|
||||
✔ Container rabbitmq Running 0.0s
|
||||
✔ Container dbstore Started 0.4s
|
||||
✔ Container agent Started 0.5s
|
||||
✔ Container server Started 0.5s
|
||||
#+END_SRC
|
||||
|
||||
|
||||
The content is prepopulated in the =dbstore= container.
|
||||
|
||||
** Optional: Inspect the Backing Store
|
||||
As completely optional step, you can inspect the backing store:
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -it dbstore /bin/bash
|
||||
ls /data/qldb/
|
||||
# 'BoomingTech$Piccoloctsj6d7177.zip' 'mawww$kakounectsjc54fab.zip'
|
||||
# 'KhronosGroup$OpenXR-SDKctsj984ee6.zip' 'microsoft$node-native-keymapctsj4cc9a2.zip'
|
||||
# ...
|
||||
#+END_SRC
|
||||
|
||||
** Optional: Inspect the MinIO DB
|
||||
Another completely optional step, you can inspect the minio DB contents if you
|
||||
have the minio cli installed:
|
||||
#+BEGIN_SRC sh
|
||||
# Configuration
|
||||
MINIO_ALIAS="qldbminio"
|
||||
MINIO_URL="http://localhost:9000"
|
||||
MINIO_ROOT_USER="user"
|
||||
MINIO_ROOT_PASSWORD="mmusty8432"
|
||||
QL_DB_BUCKET_NAME="qldb"
|
||||
|
||||
# Check for MinIO client
|
||||
if ! command -v mc &> /dev/null
|
||||
then
|
||||
echo "MinIO client (mc) not found."
|
||||
fi
|
||||
|
||||
# Configure MinIO client
|
||||
mc alias set $MINIO_ALIAS $MINIO_URL $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD
|
||||
|
||||
# Show contents
|
||||
mc ls qldbminio/qldb
|
||||
#+END_SRC
|
||||
|
||||
* Running the gh-mrva command-line client
|
||||
The first run uses the test query to verify basic functionality, but it returns
|
||||
no results.
|
||||
|
||||
** Run MRVA from command line
|
||||
# From ~/work-gh/mrva/gh-mrva
|
||||
|
||||
1. Check mrva cli
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -it mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva -h
|
||||
#+END_SRC
|
||||
|
||||
2. Set up the configuration
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -i mrvacommander-client-ghmrva-1 \
|
||||
sh -c 'mkdir -p /root/.config/gh-mrva/'
|
||||
|
||||
cat | docker exec -i mrvacommander-client-ghmrva-1 \
|
||||
sh -c 'cat > /root/.config/gh-mrva/config.yml' <<eof
|
||||
codeql_path: not-used/$HOME/work-gh
|
||||
controller: not-used/mirva-controller
|
||||
list_file: /root/work-gh/mrva/gh-mrva/gh-mrva-selection.json
|
||||
eof
|
||||
|
||||
# check:
|
||||
docker exec -i mrvacommander-client-ghmrva-1 ls /root/.config/gh-mrva/config.yml
|
||||
docker exec -i mrvacommander-client-ghmrva-1 cat /root/.config/gh-mrva/config.yml
|
||||
#+END_SRC
|
||||
|
||||
3. Provide the repository list file
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -i mrvacommander-client-ghmrva-1 \
|
||||
sh -c 'mkdir -p /root/work-gh/mrva/gh-mrva'
|
||||
|
||||
cat | docker exec -i mrvacommander-client-ghmrva-1 \
|
||||
sh -c 'cat > /root/work-gh/mrva/gh-mrva/gh-mrva-selection.json' <<eof
|
||||
{
|
||||
"mirva-list": [
|
||||
"xoreaxeaxeax/movfuscatorctsj8f7e5b",
|
||||
"microsoft/node-native-keymapctsj4cc9a2",
|
||||
"BoomingTech/Piccoloctsj6d7177",
|
||||
"USCiLab/cerealctsj264953",
|
||||
"KhronosGroup/OpenXR-SDKctsj984ee6",
|
||||
"tdlib/telegram-bot-apictsj8529d9",
|
||||
"WinMerge/winmergectsj101305",
|
||||
"timescale/timescaledbctsjf617cf",
|
||||
"pocoproject/pococtsj26b932",
|
||||
"quickfix/quickfixctsjebfd13",
|
||||
"libfuse/libfusectsj7a66a4"
|
||||
]
|
||||
}
|
||||
eof
|
||||
#+END_SRC
|
||||
|
||||
4. Provide the CodeQL query
|
||||
#+BEGIN_SRC sh
|
||||
cat | docker exec -i mrvacommander-client-ghmrva-1 \
|
||||
sh -c 'cat > /root/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql' <<eof
|
||||
/**
|
||||
,* @name pickfun
|
||||
,* @description pick function from FlatBuffers
|
||||
,* @kind problem
|
||||
,* @id cpp-flatbuffer-func
|
||||
,* @problem.severity warning
|
||||
,*/
|
||||
|
||||
import cpp
|
||||
|
||||
from Function f
|
||||
where
|
||||
f.getName() = "MakeBinaryRegion" or
|
||||
f.getName() = "microprotocols_add"
|
||||
select f, "definition of MakeBinaryRegion"
|
||||
|
||||
eof
|
||||
|
||||
#+END_SRC
|
||||
|
||||
5. Submit the mrva job
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
|
||||
submit --language cpp --session mirva-session-1360 \
|
||||
--list mirva-list \
|
||||
--query /root/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql
|
||||
#+END_SRC
|
||||
|
||||
6. Check the status
|
||||
#+BEGIN_SRC sh
|
||||
# Check the status
|
||||
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
|
||||
status --session mirva-session-1360
|
||||
#+END_SRC
|
||||
|
||||
7. Download the sarif files, optionally also get databases. For the current
|
||||
query / database combination there are zero result hence no downloads.
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
|
||||
download --session mirva-session-1360 \
|
||||
--download-dbs \
|
||||
--output-dir mirva-session-1360
|
||||
#+END_SRC
|
||||
|
||||
** TODO Write query that has some results
|
||||
XX:
|
||||
|
||||
In this case, the trivial =alu_mul=,
|
||||
alu_mul for https://github.com/xoreaxeaxeax/movfuscator/blob/master/movfuscator/movfuscator.c
|
||||
#+BEGIN_SRC java
|
||||
/**
|
||||
,* @name findalu
|
||||
,* @description find calls to a function
|
||||
,* @kind problem
|
||||
,* @id cpp-call
|
||||
,* @problem.severity warning
|
||||
,*/
|
||||
|
||||
import cpp
|
||||
|
||||
from FunctionCall fc
|
||||
where
|
||||
fc.getTarget().getName() = "alu_mul"
|
||||
select fc, "call of alu_mul"
|
||||
#+END_SRC
|
||||
|
||||
|
||||
Repeat the submit steps with this query
|
||||
1. [X] --
|
||||
2. [X] --
|
||||
3. [ ] Provide the CodeQL query
|
||||
#+BEGIN_SRC sh
|
||||
cat | docker exec -i mrvacommander-client-ghmrva-1 \
|
||||
sh -c 'cat > /root/work-gh/mrva/gh-mrva/Alu_Mul.ql' <<eof
|
||||
/**
|
||||
,* @name findalu
|
||||
,* @description find calls to a function
|
||||
,* @kind problem
|
||||
,* @id cpp-call
|
||||
,* @problem.severity warning
|
||||
,*/
|
||||
|
||||
import cpp
|
||||
|
||||
from FunctionCall fc
|
||||
where
|
||||
fc.getTarget().getName() = "alu_mul"
|
||||
select fc, "call of alu_mul"
|
||||
eof
|
||||
#+END_SRC
|
||||
|
||||
4. [-] Submit the mrva job
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
|
||||
submit --language cpp --session mirva-session-1490 \
|
||||
--list mirva-list \
|
||||
--query /root/work-gh/mrva/gh-mrva/Alu_Mul.ql
|
||||
#+END_SRC
|
||||
|
||||
- [X] XX:
|
||||
|
||||
server | 2024/09/27 20:03:16 DEBUG Processed request info location="{Key:3 Bucket:packs}" language=cpp
|
||||
server | 2024/09/27 20:03:16 WARN No repositories found for analysis
|
||||
server | 2024/09/27 20:03:16 DEBUG Queueing analysis jobs count=0
|
||||
server | 2024/09/27 20:03:16 DEBUG Forming and sending response for submitted analysis job id=3
|
||||
|
||||
NO: debug in the server container
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -it server /bin/bash
|
||||
|
||||
apt-get update
|
||||
apt-get install delve
|
||||
|
||||
replace
|
||||
ENTRYPOINT ["./mrva_server"]
|
||||
CMD ["--mode=container"]
|
||||
|
||||
#+END_SRC
|
||||
|
||||
- [ ] XX:
|
||||
The dbstore is empty -- see http://localhost:9001/browser
|
||||
must populate it properly, then save the image.
|
||||
|
||||
5. [ ] Check the status
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
|
||||
status --session mirva-session-1490
|
||||
#+END_SRC
|
||||
|
||||
This time we have results
|
||||
#+BEGIN_SRC text
|
||||
...
|
||||
Run name: mirva-session-1490
|
||||
Status: succeeded
|
||||
Total runs: 1
|
||||
Total successful scans: 11
|
||||
Total failed scans: 0
|
||||
Total skipped repositories: 0
|
||||
Total skipped repositories due to access mismatch: 0
|
||||
Total skipped repositories due to not found: 0
|
||||
Total skipped repositories due to no database: 0
|
||||
Total skipped repositories due to over limit: 0
|
||||
Total repositories with findings: 7
|
||||
Total findings: 618
|
||||
Repositories with findings:
|
||||
quickfix/quickfixctsjebfd13 (cpp-fprintf-call): 5
|
||||
libfuse/libfusectsj7a66a4 (cpp-fprintf-call): 146
|
||||
xoreaxeaxeax/movfuscatorctsj8f7e5b (cpp-fprintf-call): 80
|
||||
pocoproject/pococtsj26b932 (cpp-fprintf-call): 17
|
||||
BoomingTech/Piccoloctsj6d7177 (cpp-fprintf-call): 10
|
||||
tdlib/telegram-bot-apictsj8529d9 (cpp-fprintf-call): 247
|
||||
WinMerge/winmergectsj101305 (cpp-fprintf-call): 113
|
||||
#+END_SRC
|
||||
6. [ ] Download the sarif files, optionally also get databases.
|
||||
#+BEGIN_SRC sh
|
||||
docker exec -i mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva \
|
||||
download --session mirva-session-1490 \
|
||||
--download-dbs \
|
||||
--output-dir mirva-session-1490
|
||||
|
||||
# And list them:
|
||||
\ls -la *1490*
|
||||
#+END_SRC
|
||||
|
||||
7. [ ] Use the [[https://marketplace.visualstudio.com/items?itemName=MS-SarifVSCode.sarif-viewer][SARIF Viewer]] plugin in VS Code to open and review the results.
|
||||
|
||||
Prepare the source directory so the viewer can be pointed at it
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/gh-mrva/mirva-session-1490
|
||||
|
||||
unzip -qd BoomingTech_Piccoloctsj6d7177_1_db BoomingTech_Piccoloctsj6d7177_1_db.zip
|
||||
|
||||
cd BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/
|
||||
unzip -qd src src.zip
|
||||
#+END_SRC
|
||||
|
||||
Use the viewer
|
||||
#+BEGIN_SRC sh
|
||||
code BoomingTech_Piccoloctsj6d7177_1.sarif
|
||||
|
||||
# For lauxlib.c, point the source viewer to
|
||||
find ~/work-gh/mrva/gh-mrva/mirva-session-1490/BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/src/home/runner/work/bulk-builder/bulk-builder -name lauxlib.c
|
||||
|
||||
# Here: ~/work-gh/mrva/gh-mrva/mirva-session-1490/BoomingTech_Piccoloctsj6d7177_1_db/codeql_db/src/home/runner/work/bulk-builder/bulk-builder/engine/3rdparty/lua-5.4.4/lauxlib.c
|
||||
#+END_SRC
|
||||
|
||||
8. [ ] (optional) Large result sets are more easily filtered via
|
||||
dataframes or spreadsheets. Convert the SARIF to CSV if needed; see [[https://github.com/hohn/sarif-cli/][sarif-cli]].
|
||||
|
||||
|
||||
|
||||
* Running the CodeQL VS Code plugin
|
||||
- [ ] XX: include the *custom* codeql plugin in the container.
|
||||
* Ending the session
|
||||
Shut down docker via
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/mrvacommander
|
||||
docker-compose -f docker-compose-demo.yml down
|
||||
#+END_SRC
|
||||
|
||||
* Footnotes
|
||||
[fn:1]The =csvkit= can be installed into the same Python virtual environment as
|
||||
the =qldbtools=.
|
||||
524
notes/cli-end-to-end-detailed.org
Normal file
524
notes/cli-end-to-end-detailed.org
Normal file
@@ -0,0 +1,524 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
* End-to-end example of CLI use
|
||||
This document describes a complete cycle of the MRVA workflow. The steps
|
||||
included are
|
||||
1. aquiring CodeQL databases
|
||||
2. selection of databases
|
||||
3. configuration and use of the command-line client
|
||||
4. server startup
|
||||
5. submission of the jobs
|
||||
6. retrieval of the results
|
||||
7. examination of the results
|
||||
|
||||
* Database Aquisition
|
||||
General database aquisition is beyond the scope of this document as it is very specific
|
||||
to an organization's environment. Here we use an example for open-source
|
||||
repositories, [[https://github.com/hohn/mrva-open-source-download.git][mrva-open-source-download]], which downloads the top 1000 databases for each of
|
||||
C/C++, Java, Python -- 3000 CodeQL DBs in all.
|
||||
|
||||
The scripts in [[https://github.com/hohn/mrva-open-source-download.git][mrva-open-source-download]] were used to download on two distinct dates
|
||||
resulting in close to 6000 databases to choose from. The DBs were directly
|
||||
saved to the file system, resulting in paths like
|
||||
: .../mrva-open-source-download/repos-2024-04-29/google/re2/code-scanning/codeql/databases/cpp/db.zip
|
||||
and
|
||||
: .../mrva-open-source-download/repos/google/re2/code-scanning/codeql/databases/cpp/db.zip
|
||||
Note that the only information in these paths are (owner, repository, download
|
||||
date). The databases contain more information which is used in the [[*Repository Selection][Repository
|
||||
Selection]] section.
|
||||
|
||||
To get a collection of databases follow the [[https://github.com/hohn/mrva-open-source-download?tab=readme-ov-file#mrva-download][instructions]].
|
||||
|
||||
* Repository Selection
|
||||
Here we select a small subset of those repositories using a collection scripts
|
||||
made for the purpose, the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#installation][qldbtools]] package.
|
||||
Clone the full repository before continuing:
|
||||
#+BEGIN_SRC sh
|
||||
mkdir -p ~/work-gh/mrva/
|
||||
git clone git@github.com:hohn/mrvacommander.git
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools && mkdir -p scratch
|
||||
#+END_SRC
|
||||
|
||||
After performing the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#installation][installation]] steps, we can follow the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#command-line-use][command line]] use
|
||||
instructions to collect all the database information from the file system into a
|
||||
single table:
|
||||
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools && mkdir -p scratch
|
||||
source venv/bin/activate
|
||||
./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > scratch/db-info-1.csv
|
||||
#+END_SRC
|
||||
|
||||
The [[https://csvkit.readthedocs.io/en/latest/scripts/csvstat.html][=csvstat=]] tool gives a good overview[fn:1]; here is a pruned version of the
|
||||
output
|
||||
#+BEGIN_SRC text
|
||||
csvstat scratch/db-info-1.csv
|
||||
1. "ctime"
|
||||
Type of data: DateTime
|
||||
...
|
||||
|
||||
2. "language"
|
||||
Type of data: Text
|
||||
Non-null values: 6000
|
||||
Unique values: 3
|
||||
Longest value: 6 characters
|
||||
Most common values: cpp (2000x)
|
||||
java (2000x)
|
||||
python (2000x)
|
||||
3. "name"
|
||||
...
|
||||
4. "owner"
|
||||
Type of data: Text
|
||||
Non-null values: 6000
|
||||
Unique values: 2189
|
||||
Longest value: 29 characters
|
||||
Most common values: apache (258x)
|
||||
google (86x)
|
||||
microsoft (64x)
|
||||
spring-projects (56x)
|
||||
alibaba (42x)
|
||||
5. "path"
|
||||
...
|
||||
6. "size"
|
||||
Type of data: Number
|
||||
Non-null values: 6000
|
||||
Unique values: 5354
|
||||
Smallest value: 0
|
||||
Largest value: 1,885,008,701
|
||||
Sum: 284,766,326,993
|
||||
...
|
||||
|
||||
Row count: 6000
|
||||
|
||||
#+END_SRC
|
||||
The information critial for selection are the columns
|
||||
1. owner
|
||||
2. name
|
||||
3. language
|
||||
The size column is interesting: a smallest value of 0 indicates some error
|
||||
while our largest DB is 1.88 GB in size
|
||||
|
||||
This information is not sufficient, so we collect more. The following script
|
||||
extracts information from every database on disk and takes more time accordingly
|
||||
-- about 30 seconds on my laptop.
|
||||
#+BEGIN_SRC sh
|
||||
./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
|
||||
#+END_SRC
|
||||
This new table is a merge of all the available meta-information with the
|
||||
previous table causing the increase in the number of rows. The following
|
||||
columns are now present
|
||||
#+BEGIN_SRC text
|
||||
0:$ csvstat scratch/db-info-2.csv
|
||||
1. "ctime"
|
||||
2. "language"
|
||||
3. "name"
|
||||
4. "owner"
|
||||
5. "path"
|
||||
6. "size"
|
||||
7. "left_index"
|
||||
8. "baselineLinesOfCode"
|
||||
Type of data: Number
|
||||
Contains null values: True (excluded from calculations)
|
||||
Non-null values: 11920
|
||||
Unique values: 4708
|
||||
Smallest value: 0
|
||||
Largest value: 22,028,732
|
||||
Sum: 3,454,019,142
|
||||
Mean: 289,766.707
|
||||
Median: 54,870.5
|
||||
9. "primaryLanguage"
|
||||
10. "sha"
|
||||
Type of data: Text
|
||||
Contains null values: True (excluded from calculations)
|
||||
Non-null values: 11920
|
||||
Unique values: 4928
|
||||
11. "cliVersion"
|
||||
Type of data: Text
|
||||
Contains null values: True (excluded from calculations)
|
||||
Non-null values: 11920
|
||||
Unique values: 59
|
||||
Longest value: 6 characters
|
||||
Most common values: 2.17.0 (3850x)
|
||||
2.18.0 (3622x)
|
||||
2.17.2 (1097x)
|
||||
2.17.6 (703x)
|
||||
2.16.3 (378x)
|
||||
12. "creationTime"
|
||||
Type of data: Text
|
||||
Contains null values: True (excluded from calculations)
|
||||
Non-null values: 11920
|
||||
Unique values: 5345
|
||||
Longest value: 32 characters
|
||||
Most common values: None (19x)
|
||||
2024-03-19 01:40:14.507823+00:00 (16x)
|
||||
2024-02-29 19:12:59.785147+00:00 (16x)
|
||||
2024-01-30 22:24:17.411939+00:00 (14x)
|
||||
2024-04-05 09:34:03.774619+00:00 (14x)
|
||||
13. "finalised"
|
||||
Type of data: Boolean
|
||||
Contains null values: True (excluded from calculations)
|
||||
Non-null values: 11617
|
||||
Unique values: 2
|
||||
Most common values: True (11617x)
|
||||
None (322x)
|
||||
14. "db_lang"
|
||||
15. "db_lang_displayName"
|
||||
16. "db_lang_file_count"
|
||||
17. "db_lang_linesOfCode"
|
||||
|
||||
Row count: 11939
|
||||
#+END_SRC
|
||||
There are several columns that are critical, namely
|
||||
1. "sha"
|
||||
2. "cliVersion"
|
||||
3. "creationTime"
|
||||
The others may be useful, but they are not strictly required.
|
||||
The critical ones deserve more explanation:
|
||||
1. "sha": The =git= commit SHA of the repository the CodeQL database was
|
||||
created from. Required to distinguish query results over the evolution of
|
||||
a code base.
|
||||
2. "cliVersion": The version of the CodeQL CLI used to create the database.
|
||||
Required to identify advances/regressions originating from the CodeQL binary.
|
||||
3. "creationTime": The time the database was created. Required (or at least
|
||||
very handy) for following the evolution of query results over time.
|
||||
This leaves us with a row count of 11939
|
||||
|
||||
To start reducing that count, start with
|
||||
#+BEGIN_SRC sh
|
||||
./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
|
||||
#+END_SRC
|
||||
and get a reduced count and a new column:
|
||||
#+BEGIN_SRC text
|
||||
csvstat scratch/db-info-3.csv
|
||||
3. "CID"
|
||||
|
||||
Type of data: Text
|
||||
Contains null values: False
|
||||
Non-null values: 5344
|
||||
Unique values: 5344
|
||||
Longest value: 6 characters
|
||||
Most common values: 1f8d99 (1x)
|
||||
9ab87a (1x)
|
||||
76fdc7 (1x)
|
||||
b21305 (1x)
|
||||
4ae79b (1x)
|
||||
|
||||
#+END_SRC
|
||||
From the docs: 'Read a table of CodeQL DB information and produce a table with unique entries
|
||||
adding the Cumulative ID (CID) column.'
|
||||
|
||||
The CID column combines
|
||||
- cliVersion
|
||||
- creationTime
|
||||
- language
|
||||
- sha
|
||||
into a single 6-character string via hashing and with (owner, repo) provides a
|
||||
unique index for every DB.
|
||||
|
||||
We still have too many rows. The tables are all in CSV format, so you can use
|
||||
your favorite tool to narrow the selection for your needs. For this document,
|
||||
we simply use a pseudo-random selection of 11 databases via
|
||||
#+BEGIN_SRC sh
|
||||
./bin/mc-db-generate-selection -n 11 \
|
||||
scratch/vscode-selection.json \
|
||||
scratch/gh-mrva-selection.json \
|
||||
< scratch/db-info-3.csv
|
||||
#+END_SRC
|
||||
|
||||
Note that these use pseudo-random numbers, so the selection is in fact
|
||||
deterministic. The selected databases in =gh-mrva-selection.json=, to be used
|
||||
in section [[*Running the gh-mrva command-line client][Running the gh-mrva command-line client]], are the following:
|
||||
#+begin_src javascript
|
||||
{
|
||||
"mirva-list": [
|
||||
"NLPchina/elasticsearch-sqlctsj168cc4",
|
||||
"LMAX-Exchange/disruptorctsj3e75ec",
|
||||
"justauth/JustAuthctsj8a6177",
|
||||
"FasterXML/jackson-modules-basectsj2fe248",
|
||||
"ionic-team/capacitor-pluginsctsj38d457",
|
||||
"PaddlePaddle/PaddleOCRctsj60e555",
|
||||
"elastic/apm-agent-pythonctsj21dc64",
|
||||
"flipkart-incubator/zjsonpatchctsjc4db35",
|
||||
"stephane/libmodbusctsj54237e",
|
||||
"wso2/carbon-kernelctsj5a8a6e",
|
||||
"apache/servicecomb-packctsj4d98f5"
|
||||
]
|
||||
}
|
||||
#+end_src
|
||||
|
||||
* Starting the server
|
||||
The full instructions for building and running the server are in [[../README.md]] under
|
||||
'Steps to build and run the server'
|
||||
|
||||
With docker-compose set up and this repository cloned as previously described,
|
||||
we just run
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/mrvacommander
|
||||
docker-compose up --build
|
||||
#+END_SRC
|
||||
and wait until the log output no longer changes.
|
||||
|
||||
Then, use the following command to populate the mrvacommander database storage:
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools && \
|
||||
./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
|
||||
#+END_SRC
|
||||
|
||||
* Running the gh-mrva command-line client
|
||||
The first run uses the test query to verify basic functionality, but it returns
|
||||
no results.
|
||||
** Run MRVA from command line
|
||||
1. Install mrva cli
|
||||
#+BEGIN_SRC sh
|
||||
mkdir -p ~/work-gh/mrva && cd ~/work-gh/mrva
|
||||
git clone https://github.com/hohn/gh-mrva.git
|
||||
cd ~/work-gh/mrva/gh-mrva && git checkout mrvacommander-end-to-end
|
||||
|
||||
# Build it
|
||||
go mod edit -replace="github.com/GitHubSecurityLab/gh-mrva=$HOME/work-gh/mrva/gh-mrva"
|
||||
go build .
|
||||
|
||||
# Sanity check
|
||||
./gh-mrva -h
|
||||
#+END_SRC
|
||||
|
||||
2. Set up the configuration
|
||||
#+BEGIN_SRC sh
|
||||
mkdir -p ~/.config/gh-mrva
|
||||
cat > ~/.config/gh-mrva/config.yml <<eof
|
||||
# The following options are supported
|
||||
# codeql_path: Path to CodeQL distribution (checkout of codeql repo)
|
||||
# controller: NWO of the MRVA controller to use. Not used here.
|
||||
# list_file: Path to the JSON file containing the target repos
|
||||
|
||||
# XX:
|
||||
codeql_path: $HOME/work-gh/not-used
|
||||
controller: not-used/mirva-controller
|
||||
list_file: $HOME/work-gh/mrva/gh-mrva/gh-mrva-selection.json
|
||||
eof
|
||||
#+END_SRC
|
||||
|
||||
3. Submit the mrva job
|
||||
#+BEGIN_SRC sh
|
||||
cp ~/work-gh/mrva/mrvacommander/client/qldbtools/scratch/gh-mrva-selection.json \
|
||||
~/work-gh/mrva/gh-mrva/gh-mrva-selection.json
|
||||
|
||||
cd ~/work-gh/mrva/gh-mrva/
|
||||
./gh-mrva submit --language cpp --session mirva-session-4160 \
|
||||
--list mirva-list \
|
||||
--query ~/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql
|
||||
#+END_SRC
|
||||
|
||||
4. Check the status
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/gh-mrva/
|
||||
|
||||
# Check the status
|
||||
./gh-mrva status --session mirva-session-4160
|
||||
#+END_SRC
|
||||
|
||||
5. Download the sarif files, optionally also get databases. For the current
|
||||
query / database combination there are zero result hence no downloads.
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/gh-mrva/
|
||||
# Just download the sarif files
|
||||
./gh-mrva download --session mirva-session-4160 \
|
||||
--output-dir mirva-session-4160
|
||||
|
||||
# Download the sarif files and CodeQL dbs
|
||||
./gh-mrva download --session mirva-session-4160 \
|
||||
--download-dbs \
|
||||
--output-dir mirva-session-4160
|
||||
#+END_SRC
|
||||
|
||||
** Write query that has some results
|
||||
First, get the list of paths corresponding to the previously selected
|
||||
databases.
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools
|
||||
. venv/bin/activate
|
||||
./bin/mc-rows-from-mrva-list scratch/gh-mrva-selection.json \
|
||||
scratch/db-info-3.csv > scratch/selection-full-info
|
||||
csvcut -c path scratch/selection-full-info
|
||||
#+END_SRC
|
||||
|
||||
Use one of these databases to write a query. It need not produce results.
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/gh-mrva/
|
||||
code gh-mrva.code-workspace
|
||||
#+END_SRC
|
||||
In this case, the trivial =findPrintf= query, in the file =Fprintf.ql=
|
||||
#+BEGIN_SRC java
|
||||
/**
|
||||
,* @name findPrintf
|
||||
,* @description find calls to plain fprintf
|
||||
,* @kind problem
|
||||
,* @id cpp-fprintf-call
|
||||
,* @problem.severity warning
|
||||
,*/
|
||||
|
||||
import cpp
|
||||
|
||||
from FunctionCall fc
|
||||
where
|
||||
fc.getTarget().getName() = "fprintf"
|
||||
select fc, "call of fprintf"
|
||||
#+END_SRC
|
||||
|
||||
|
||||
Repeat the submit steps with this query
|
||||
1. --
|
||||
2. --
|
||||
3. Submit the mrva job
|
||||
#+BEGIN_SRC sh
|
||||
cp ~/work-gh/mrva/mrvacommander/client/qldbtools/scratch/gh-mrva-selection.json \
|
||||
~/work-gh/mrva/gh-mrva/gh-mrva-selection.json
|
||||
|
||||
cd ~/work-gh/mrva/gh-mrva/
|
||||
./gh-mrva submit --language cpp --session mirva-session-3660 \
|
||||
--list mirva-list \
|
||||
--query ~/work-gh/mrva/gh-mrva/Fprintf.ql
|
||||
#+END_SRC
|
||||
|
||||
4. Check the status
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/gh-mrva/
|
||||
./gh-mrva status --session mirva-session-3660
|
||||
#+END_SRC
|
||||
|
||||
This time we have results
|
||||
#+BEGIN_SRC text
|
||||
...
|
||||
0:$ Run name: mirva-session-3660
|
||||
Status: succeeded
|
||||
Total runs: 1
|
||||
Total successful scans: 11
|
||||
Total failed scans: 0
|
||||
Total skipped repositories: 0
|
||||
Total skipped repositories due to access mismatch: 0
|
||||
Total skipped repositories due to not found: 0
|
||||
Total skipped repositories due to no database: 0
|
||||
Total skipped repositories due to over limit: 0
|
||||
Total repositories with findings: 8
|
||||
Total findings: 7055
|
||||
Repositories with findings:
|
||||
lz4/lz4ctsj2479c5 (cpp-fprintf-call): 307
|
||||
Mbed-TLS/mbedtlsctsj17ef85 (cpp-fprintf-call): 6464
|
||||
tsl0922/ttydctsj2e3faa (cpp-fprintf-call): 11
|
||||
medooze/media-server-nodectsj5e30b3 (cpp-fprintf-call): 105
|
||||
ampl/gslctsj4b270e (cpp-fprintf-call): 102
|
||||
baidu/sofa-pbrpcctsjba3501 (cpp-fprintf-call): 24
|
||||
dlundquist/sniproxyctsj3d83e7 (cpp-fprintf-call): 34
|
||||
hyprwm/Hyprlandctsjc2425f (cpp-fprintf-call): 8
|
||||
#+END_SRC
|
||||
|
||||
5. Download the sarif files, optionally also get databases.
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/gh-mrva/
|
||||
# Just download the sarif files
|
||||
./gh-mrva download --session mirva-session-3660 \
|
||||
--output-dir mirva-session-3660
|
||||
|
||||
# Download the sarif files and CodeQL dbs
|
||||
./gh-mrva download --session mirva-session-3660 \
|
||||
--download-dbs \
|
||||
--output-dir mirva-session-3660
|
||||
#+END_SRC
|
||||
#+BEGIN_SRC sh
|
||||
# And list them:
|
||||
\ls -la *3660*
|
||||
drwxr-xr-x@ 18 hohn staff 576 Nov 14 11:54 .
|
||||
drwxrwxr-x@ 56 hohn staff 1792 Nov 14 11:54 ..
|
||||
-rwxr-xr-x@ 1 hohn staff 9035554 Nov 14 11:54 Mbed-TLS_mbedtlsctsj17ef85_1.sarif
|
||||
-rwxr-xr-x@ 1 hohn staff 57714273 Nov 14 11:54 Mbed-TLS_mbedtlsctsj17ef85_1_db.zip
|
||||
-rwxr-xr-x@ 1 hohn staff 132484 Nov 14 11:54 ampl_gslctsj4b270e_1.sarif
|
||||
-rwxr-xr-x@ 1 hohn staff 99234414 Nov 14 11:54 ampl_gslctsj4b270e_1_db.zip
|
||||
-rwxr-xr-x@ 1 hohn staff 34419 Nov 14 11:54 baidu_sofa-pbrpcctsjba3501_1.sarif
|
||||
-rwxr-xr-x@ 1 hohn staff 55177796 Nov 14 11:54 baidu_sofa-pbrpcctsjba3501_1_db.zip
|
||||
-rwxr-xr-x@ 1 hohn staff 80744 Nov 14 11:54 dlundquist_sniproxyctsj3d83e7_1.sarif
|
||||
-rwxr-xr-x@ 1 hohn staff 2183836 Nov 14 11:54 dlundquist_sniproxyctsj3d83e7_1_db.zip
|
||||
-rwxr-xr-x@ 1 hohn staff 169079 Nov 14 11:54 hyprwm_Hyprlandctsjc2425f_1.sarif
|
||||
-rwxr-xr-x@ 1 hohn staff 21383303 Nov 14 11:54 hyprwm_Hyprlandctsjc2425f_1_db.zip
|
||||
-rwxr-xr-x@ 1 hohn staff 489064 Nov 14 11:54 lz4_lz4ctsj2479c5_1.sarif
|
||||
-rwxr-xr-x@ 1 hohn staff 2991310 Nov 14 11:54 lz4_lz4ctsj2479c5_1_db.zip
|
||||
-rwxr-xr-x@ 1 hohn staff 141336 Nov 14 11:54 medooze_media-server-nodectsj5e30b3_1.sarif
|
||||
-rwxr-xr-x@ 1 hohn staff 38217703 Nov 14 11:54 medooze_media-server-nodectsj5e30b3_1_db.zip
|
||||
-rwxr-xr-x@ 1 hohn staff 33861 Nov 14 11:54 tsl0922_ttydctsj2e3faa_1.sarif
|
||||
-rwxr-xr-x@ 1 hohn staff 5140183 Nov 14 11:54 tsl0922_ttydctsj2e3faa_1_db.zip
|
||||
#+END_SRC
|
||||
|
||||
6. Use the [[https://marketplace.visualstudio.com/items?itemName=MS-SarifVSCode.sarif-viewer][SARIF Viewer]] plugin in VS Code to open and review the results.
|
||||
|
||||
Prepare the source directory so the viewer can be pointed at it
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/gh-mrva/mirva-session-3660
|
||||
|
||||
unzip -qd ampl_gslctsj4b270e_1_db ampl_gslctsj4b270e_1_db.zip
|
||||
|
||||
cd ampl_gslctsj4b270e_1_db/codeql_db
|
||||
unzip -qd src src.zip
|
||||
#+END_SRC
|
||||
|
||||
Use the viewer in VS Code
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/gh-mrva/mirva-session-3660
|
||||
code ampl_gslctsj4b270e_1.sarif
|
||||
|
||||
# For the file vegas.c, when asked, point the source viewer to
|
||||
find ~/work-gh/mrva/gh-mrva/mirva-session-3660/ampl_gslctsj4b270e_1_db/codeql_db/src/\
|
||||
-name vegas.c
|
||||
|
||||
# Here: ~/work-gh/mrva/gh-mrva/mirva-session-3660/ampl_gslctsj4b270e_1_db/codeql_db/src//home/runner/work/bulk-builder/bulk-builder/monte/vegas.c
|
||||
#+END_SRC
|
||||
|
||||
7. (optional) Large result sets are more easily filtered via
|
||||
dataframes or spreadsheets. Convert the SARIF to CSV if needed; see [[https://github.com/hohn/sarif-cli/][sarif-cli]].
|
||||
|
||||
* Running the VS Code plugin
|
||||
** Compile and Load the Extension
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/vscode-codeql
|
||||
git checkout mrva-standalone
|
||||
|
||||
# Install nvm
|
||||
brew install nvm
|
||||
[ -s "/opt/homebrew/opt/nvm/nvm.sh" ] && \. "/opt/homebrew/opt/nvm/nvm.sh"
|
||||
# or
|
||||
# curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
|
||||
|
||||
# Install correct node version
|
||||
cd ./extensions/ql-vscode
|
||||
nvm install
|
||||
|
||||
# Build the extension
|
||||
cd ~/work-gh/mrva/vscode-codeql/extensions/ql-vscode
|
||||
npm install
|
||||
npm run build
|
||||
|
||||
# Install extension
|
||||
cd ~/work-gh/mrva/vscode-codeql/dist
|
||||
code --force --install-extension vscode-codeql-*.vsix
|
||||
# Extension 'vscode-codeql-1.13.2-dev.2024.12.10.23.51.57.vsix' was successfully installed.
|
||||
#+END_SRC
|
||||
|
||||
** Continue the CLI Sample using the Extension
|
||||
Start VS Code
|
||||
#+BEGIN_SRC sh
|
||||
cd ~/work-gh/mrva/gh-mrva/
|
||||
code .
|
||||
#+END_SRC
|
||||
|
||||
Set up 'variant analysis repositories', continuin from the
|
||||
=scratch/vscode-selection.json= file formed previously:
|
||||
1. Select '{}' and open db selection file
|
||||
2. paste
|
||||
: ~/work-gh/mrva/mrvacommander/client/qldbtools/scratch/vscode-selection.json
|
||||
3. open =Fprintf.ql=
|
||||
4. right click =>= 'run variant analysis'
|
||||
|
||||
The extension will assemble the pack, send it to the server, and display
|
||||
results as they arrive.
|
||||
|
||||
* Footnotes
|
||||
[fn:1]The =csvkit= can be installed into the same Python virtual environment as
|
||||
the =qldbtools=.
|
||||
24
notes/docker-demo-container-deps.dot
Normal file
24
notes/docker-demo-container-deps.dot
Normal file
@@ -0,0 +1,24 @@
|
||||
digraph G {
|
||||
// Define nodes
|
||||
mrvadata [label="mrvadata" shape=box];
|
||||
client_qldbtools [label="client-qldbtools" shape=box];
|
||||
client_ghmrva [label="client-ghmrva" shape=box];
|
||||
code_server [label="code-server" shape=box];
|
||||
rabbitmq [label="rabbitmq" shape=box];
|
||||
server [label="server" shape=box];
|
||||
dbstore [label="dbstore" shape=box];
|
||||
artifactstore [label="artifactstore" shape=box];
|
||||
agent [label="agent" shape=box];
|
||||
|
||||
// Define edges (dependencies)
|
||||
server -> rabbitmq;
|
||||
server -> dbstore;
|
||||
server -> artifactstore;
|
||||
dbstore -> mrvadata;
|
||||
agent -> rabbitmq;
|
||||
agent -> dbstore;
|
||||
agent -> artifactstore;
|
||||
|
||||
// Define styling
|
||||
edge [arrowhead=normal];
|
||||
}
|
||||
BIN
notes/docker-demo-container-deps.pdf
Normal file
BIN
notes/docker-demo-container-deps.pdf
Normal file
Binary file not shown.
170
notes/l3style.css
Normal file
170
notes/l3style.css
Normal file
@@ -0,0 +1,170 @@
|
||||
|
||||
/* The sum of width and margin percentages must not exceed 100.*/
|
||||
div#toc {
|
||||
/* Use a moving table of contents (scrolled away for long contents) */
|
||||
/*
|
||||
* float: left;
|
||||
*/
|
||||
/* OR */
|
||||
/* use a fixed-position toc */
|
||||
position: fixed;
|
||||
top: 80px;
|
||||
left: 0px;
|
||||
|
||||
/* match toc, org-content, postamble */
|
||||
width: 26%;
|
||||
margin-right: 1%;
|
||||
margin-left: 1%;
|
||||
}
|
||||
|
||||
div#org-content {
|
||||
float: right;
|
||||
width: 70%;
|
||||
/* match toc, org-content, postamble */
|
||||
margin-left: 28%;
|
||||
}
|
||||
|
||||
div#postamble {
|
||||
float: right;
|
||||
width: 70%;
|
||||
/* match toc, org-content, postamble */
|
||||
margin-left: 28%;
|
||||
}
|
||||
|
||||
|
||||
p.author {
|
||||
clear: both;
|
||||
font-size: 1em;
|
||||
margin-left: 25%;
|
||||
}
|
||||
|
||||
p.date {
|
||||
clear: both;
|
||||
font-size: 1em;
|
||||
margin-left: 25%;
|
||||
}
|
||||
|
||||
#toc * {
|
||||
font-size:1em;
|
||||
}
|
||||
|
||||
#toc h3 {
|
||||
font-weight:normal;
|
||||
margin:1em 0 0 0;
|
||||
padding: 4px 0;
|
||||
border-bottom:1px solid #666;
|
||||
text-transform:uppercase;
|
||||
}
|
||||
|
||||
#toc ul, #toc li {
|
||||
margin:0;
|
||||
padding:0;
|
||||
list-style:none;
|
||||
}
|
||||
|
||||
#toc li {
|
||||
display:inline;
|
||||
}
|
||||
|
||||
#toc ul li a {
|
||||
text-decoration:none;
|
||||
display:block;
|
||||
margin:0;
|
||||
padding:4px 6px;
|
||||
color:#990000;
|
||||
border-bottom:1px solid #aaa;
|
||||
}
|
||||
|
||||
#toc ul ul li a {
|
||||
padding-left:18px;
|
||||
color:#666;
|
||||
}
|
||||
|
||||
#toc ul li a:hover {
|
||||
background-color:#F6F6F6;
|
||||
}
|
||||
|
||||
|
||||
/* Description lists. */
|
||||
dt {
|
||||
font-style: bold;
|
||||
background-color:#F6F6F6;
|
||||
}
|
||||
|
||||
|
||||
/* From org-mode page. */
|
||||
body {
|
||||
font-family: avenir, Lao Sangam MN, Myanmar Sangam MN, Songti SC, Kohinoor Devanagari, Menlo, avenir, helvetica, verdana, sans-serif;
|
||||
font-size: 100%;
|
||||
margin-top: 5%;
|
||||
margin-bottom: 8%;
|
||||
background: white; color: black;
|
||||
margin-left: 3% !important; margin-right: 3% !important;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 2em;
|
||||
color: #cc8c00;
|
||||
/* padding-top: 5px; */
|
||||
border-bottom: 2px solid #aaa;
|
||||
width: 70%;
|
||||
/* match toc, org-content, postamble */
|
||||
margin-left: 28%; /* Align with div#content */
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 1.5em;
|
||||
padding-top: 1em;
|
||||
border-bottom: 1px solid #ccc;
|
||||
}
|
||||
|
||||
h3 {
|
||||
font-size: 1.2em;
|
||||
padding-top: 0.5em;
|
||||
border-bottom: 1px solid #eee;
|
||||
}
|
||||
|
||||
.todo, .deadline { color: red; font-style: italic }
|
||||
.done { color: green; font-style: italic }
|
||||
.timestamp { color: grey }
|
||||
.timestamp-kwd { color: CadetBlue; }
|
||||
.tag { background-color:lightblue; font-weight:normal; }
|
||||
|
||||
.target { background-color: lavender; }
|
||||
|
||||
.menu {
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.menu a:link {
|
||||
color: #888;
|
||||
}
|
||||
.menu a:active {
|
||||
color: #888;
|
||||
}
|
||||
.menu a:visited {
|
||||
color: #888;
|
||||
}
|
||||
|
||||
img { align: center; }
|
||||
|
||||
pre {
|
||||
padding: 5pt;
|
||||
font-family: andale mono, vera sans mono, monospace, courier ;
|
||||
font-size: 0.8em;
|
||||
background-color: #f0f0f0;
|
||||
}
|
||||
|
||||
code {
|
||||
font-family: andale mono, vera sans mono, monospace, courier ;
|
||||
font-size: 0.8em;
|
||||
background-color: #f0f0f0;
|
||||
}
|
||||
|
||||
table { border-collapse: collapse; }
|
||||
|
||||
td, th {
|
||||
vertical-align: top;
|
||||
border: 1pt solid #ADB9CC;
|
||||
}
|
||||
|
||||
127
notes/system-structure.dot
Normal file
127
notes/system-structure.dot
Normal file
@@ -0,0 +1,127 @@
|
||||
digraph DockerComposeDemo {
|
||||
rankdir=LR; // Left-to-right layout
|
||||
node [shape=plaintext fontname="Helvetica"];
|
||||
edge [arrowsize=0.5];
|
||||
|
||||
// Title
|
||||
label="Container Dependencies for Demo";
|
||||
labelloc=top;
|
||||
fontsize=20;
|
||||
fontname="Helvetica";
|
||||
|
||||
// Define nodes with clickable Dockerfile references
|
||||
dbssvc [
|
||||
href="../demo/containers/dbsdata/Dockerfile"
|
||||
target="_blank"
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td colspan="1" bgcolor="lightblue"><b>dbssvc</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Dockerfile: ./demo/containers/dbsdata/Dockerfile</font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
dbstore [
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td colspan="1" bgcolor="lightblue"><b>dbstore</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Image: minio/minio:RELEASE.2024-06-11T03-13-30Z</font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
client_ghmrva [
|
||||
href="../client/containers/ghmrva/Dockerfile"
|
||||
target="_blank"
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td colspan="1" bgcolor="lightblue"><b>client-ghmrva</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Dockerfile: ./client/containers/ghmrva/Dockerfile</font></td></tr>
|
||||
<tr><td port="slot1"></td></tr>
|
||||
<tr><td port="slot2"></td></tr>
|
||||
<tr><td port="slot3"></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
code_server [
|
||||
href="../client/containers/vscode/Dockerfile"
|
||||
target="_blank"
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td colspan="1" bgcolor="lightblue"><b>code-server</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Dockerfile: ./client/containers/vscode/Dockerfile</font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
rabbitmq [
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td colspan="1" bgcolor="lightblue"><b>rabbitmq</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Image: rabbitmq:3-management</font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
artifactstore [
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td colspan="1" bgcolor="lightblue"><b>artifactstore</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Image: minio/minio:RELEASE.2024-06-11T03-13-30Z</font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
agent [
|
||||
href="../cmd/agent/Dockerfile"
|
||||
target="_blank"
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td colspan="1" bgcolor="lightblue"><b>agent</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Dockerfile: ./cmd/agent/Dockerfile</font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
// Expanded 'server' node with handler names and Dockerfile reference
|
||||
server [
|
||||
href="../cmd/server/Dockerfile"
|
||||
target="_blank"
|
||||
shape=plaintext
|
||||
label=<
|
||||
<table border="1" cellborder="0" cellspacing="0" cellpadding="4">
|
||||
<tr><td colspan="1" bgcolor="lightblue"><b>server</b></td></tr>
|
||||
<tr><td align="left"><font point-size="10">Dockerfile: ./cmd/server/Dockerfile</font></td></tr>
|
||||
<tr><td port="c_MRVARequest" align="left"><font point-size="10">c.MRVARequest</font></td></tr>
|
||||
<tr><td port="c_MRVAStatus" align="left"><font point-size="10">c.MRVAStatus</font></td></tr>
|
||||
<tr><td port="c_MRVADownloadArtifact" align="left"><font point-size="10">c.MRVADownloadArtifact</font></td></tr>
|
||||
<tr><td align="left"><font point-size="10">c.MRVARequestID</font></td></tr>
|
||||
<tr><td align="left"><font point-size="10">c.MRVADownloadQLDB</font></td></tr>
|
||||
<tr><td align="left"><font point-size="10"><i>Not Found</i></font></td></tr>
|
||||
</table>
|
||||
>
|
||||
];
|
||||
|
||||
// Define edges (dependencies)
|
||||
dbstore -> dbssvc;
|
||||
server -> dbstore;
|
||||
server -> rabbitmq;
|
||||
server -> artifactstore;
|
||||
agent -> dbstore;
|
||||
agent -> artifactstore;
|
||||
agent -> rabbitmq;
|
||||
|
||||
// Message links
|
||||
client_ghmrva:slot1 -> server:c_MRVARequest [label="message" style=dashed penwidth=2 fontsize=8];
|
||||
client_ghmrva:slot2 -> server:c_MRVAStatus [label="message" style=dashed penwidth=2 fontsize=8];
|
||||
client_ghmrva:slot3 -> server:c_MRVADownloadArtifact [label="message" style=dashed penwidth=2 fontsize=8];
|
||||
|
||||
}
|
||||
BIN
notes/system-structure.pdf
Normal file
BIN
notes/system-structure.pdf
Normal file
Binary file not shown.
162
notes/system-structure.svg
Normal file
162
notes/system-structure.svg
Normal file
@@ -0,0 +1,162 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<!-- Generated by graphviz version 10.0.1 (20240210.2158)
|
||||
-->
|
||||
<!-- Title: DockerComposeDemo Pages: 1 -->
|
||||
<svg width="1057pt" height="280pt"
|
||||
viewBox="0.00 0.00 1056.75 280.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 276)">
|
||||
<title>DockerComposeDemo</title>
|
||||
<polygon fill="white" stroke="none" points="-4,4 -4,-276 1052.75,-276 1052.75,4 -4,4"/>
|
||||
<text text-anchor="middle" x="524.38" y="-249" font-family="Helvetica,sans-Serif" font-size="20.00">Container Dependencies for Demo</text>
|
||||
<!-- dbssvc -->
|
||||
<g id="node1" class="node">
|
||||
<title>dbssvc</title>
|
||||
<g id="a_node1"><a xlink:href="../demo/containers/dbsdata/Dockerfile" xlink:title="<TABLE>" target="_blank">
|
||||
<polygon fill="lightblue" stroke="none" points="818.75,-145 818.75,-168 1039.75,-168 1039.75,-145 818.75,-145"/>
|
||||
<text text-anchor="start" x="906" y="-151.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">dbssvc</text>
|
||||
<text text-anchor="start" x="822.75" y="-131.5" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./demo/containers/dbsdata/Dockerfile</text>
|
||||
<polygon fill="none" stroke="black" points="817.75,-124.75 817.75,-169 1040.75,-169 1040.75,-124.75 817.75,-124.75"/>
|
||||
</a>
|
||||
</g>
|
||||
</g>
|
||||
<!-- dbstore -->
|
||||
<g id="node2" class="node">
|
||||
<title>dbstore</title>
|
||||
<polygon fill="lightblue" stroke="none" points="523.25,-145 523.25,-168 763.75,-168 763.75,-145 523.25,-145"/>
|
||||
<text text-anchor="start" x="618.75" y="-151.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">dbstore</text>
|
||||
<text text-anchor="start" x="527.25" y="-131.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: minio/minio:RELEASE.2024-06-11T03-13-30Z</text>
|
||||
<polygon fill="none" stroke="black" points="522.25,-124.75 522.25,-169 764.75,-169 764.75,-124.75 522.25,-124.75"/>
|
||||
</g>
|
||||
<!-- dbstore->dbssvc -->
|
||||
<g id="edge1" class="edge">
|
||||
<title>dbstore->dbssvc</title>
|
||||
<path fill="none" stroke="black" d="M772.68,-146.88C783.02,-146.88 793.45,-146.88 803.76,-146.88"/>
|
||||
<polygon fill="black" stroke="black" points="803.65,-148.63 808.65,-146.88 803.65,-145.13 803.65,-148.63"/>
|
||||
</g>
|
||||
<!-- client_ghmrva -->
|
||||
<g id="node3" class="node">
|
||||
<title>client_ghmrva</title>
|
||||
<g id="a_node3"><a xlink:href="../client/containers/ghmrva/Dockerfile" xlink:title="<TABLE>" target="_blank">
|
||||
<polygon fill="lightblue" stroke="none" points="9,-123 9,-146 227,-146 227,-123 9,-123"/>
|
||||
<text text-anchor="start" x="73.38" y="-129.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">client-ghmrva</text>
|
||||
<text text-anchor="start" x="13" y="-109.5" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./client/containers/ghmrva/Dockerfile</text>
|
||||
<polygon fill="none" stroke="black" points="8,-78.75 8,-147 228,-147 228,-78.75 8,-78.75"/>
|
||||
</a>
|
||||
</g>
|
||||
</g>
|
||||
<!-- server -->
|
||||
<g id="node8" class="node">
|
||||
<title>server</title>
|
||||
<g id="a_node8"><a xlink:href="../cmd/server/Dockerfile" xlink:title="<TABLE>" target="_blank">
|
||||
<polygon fill="lightblue" stroke="none" points="308,-139.75 308,-162.75 468.25,-162.75 468.25,-139.75 308,-139.75"/>
|
||||
<text text-anchor="start" x="367.88" y="-146.45" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">server</text>
|
||||
<text text-anchor="start" x="312" y="-126.25" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./cmd/server/Dockerfile</text>
|
||||
<text text-anchor="start" x="312" y="-107" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVARequest</text>
|
||||
<text text-anchor="start" x="312" y="-87.75" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVAStatus</text>
|
||||
<text text-anchor="start" x="312" y="-68.5" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVADownloadArtifact</text>
|
||||
<text text-anchor="start" x="312" y="-49.25" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVARequestID</text>
|
||||
<text text-anchor="start" x="312" y="-30" font-family="Helvetica,sans-Serif" font-size="10.00">c.MRVADownloadQLDB</text>
|
||||
<text text-anchor="start" x="312" y="-11.75" font-family="Helvetica,sans-Serif" font-style="italic" font-size="10.00">Not Found</text>
|
||||
<polygon fill="none" stroke="black" points="307,-4 307,-163.75 469.25,-163.75 469.25,-4 307,-4"/>
|
||||
</a>
|
||||
</g>
|
||||
</g>
|
||||
<!-- client_ghmrva->server -->
|
||||
<g id="edge8" class="edge">
|
||||
<title>client_ghmrva:slot1->server:c_MRVARequest</title>
|
||||
<path fill="none" stroke="black" stroke-width="2" stroke-dasharray="5,2" d="M228,-99.88C243.53,-99.88 278.87,-108.3 299.36,-110.4"/>
|
||||
<polygon fill="black" stroke="black" stroke-width="2" points="299.01,-112.13 304.1,-110.69 299.22,-108.64 299.01,-112.13"/>
|
||||
<text text-anchor="middle" x="267.5" y="-109.03" font-family="Times,serif" font-size="8.00">message</text>
|
||||
</g>
|
||||
<!-- client_ghmrva->server -->
|
||||
<g id="edge9" class="edge">
|
||||
<title>client_ghmrva:slot2->server:c_MRVAStatus</title>
|
||||
<path fill="none" stroke="black" stroke-width="2" stroke-dasharray="5,2" d="M228,-91.88C260.42,-91.88 270.42,-91.88 299.34,-91.88"/>
|
||||
<polygon fill="black" stroke="black" stroke-width="2" points="299.1,-93.63 304.1,-91.88 299.1,-90.13 299.1,-93.63"/>
|
||||
<text text-anchor="middle" x="267.5" y="-94.03" font-family="Times,serif" font-size="8.00">message</text>
|
||||
</g>
|
||||
<!-- client_ghmrva->server -->
|
||||
<g id="edge10" class="edge">
|
||||
<title>client_ghmrva:slot3->server:c_MRVADownloadArtifact</title>
|
||||
<path fill="none" stroke="black" stroke-width="2" stroke-dasharray="5,2" d="M228,-83.88C239.83,-83.88 242.33,-80.1 254,-78.12 274.69,-74.63 281.73,-72.43 299.45,-71.97"/>
|
||||
<polygon fill="black" stroke="black" stroke-width="2" points="299.12,-73.72 304.1,-71.91 299.08,-70.22 299.12,-73.72"/>
|
||||
<text text-anchor="middle" x="267.5" y="-80.03" font-family="Times,serif" font-size="8.00">message</text>
|
||||
</g>
|
||||
<!-- code_server -->
|
||||
<g id="node4" class="node">
|
||||
<title>code_server</title>
|
||||
<g id="a_node4"><a xlink:href="../client/containers/vscode/Dockerfile" xlink:title="<TABLE>" target="_blank">
|
||||
<polygon fill="lightblue" stroke="none" points="9.38,-193 9.38,-216 226.62,-216 226.62,-193 9.38,-193"/>
|
||||
<text text-anchor="start" x="79.75" y="-199.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">code-server</text>
|
||||
<text text-anchor="start" x="13.38" y="-179.5" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./client/containers/vscode/Dockerfile</text>
|
||||
<polygon fill="none" stroke="black" points="8.38,-172.75 8.38,-217 227.62,-217 227.62,-172.75 8.38,-172.75"/>
|
||||
</a>
|
||||
</g>
|
||||
</g>
|
||||
<!-- rabbitmq -->
|
||||
<g id="node5" class="node">
|
||||
<title>rabbitmq</title>
|
||||
<polygon fill="lightblue" stroke="none" points="570.5,-215 570.5,-238 716.5,-238 716.5,-215 570.5,-215"/>
|
||||
<text text-anchor="start" x="614.25" y="-221.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">rabbitmq</text>
|
||||
<text text-anchor="start" x="574.5" y="-201.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: rabbitmq:3-management</text>
|
||||
<polygon fill="none" stroke="black" points="569.5,-194.75 569.5,-239 717.5,-239 717.5,-194.75 569.5,-194.75"/>
|
||||
</g>
|
||||
<!-- artifactstore -->
|
||||
<g id="node6" class="node">
|
||||
<title>artifactstore</title>
|
||||
<polygon fill="lightblue" stroke="none" points="523.25,-75 523.25,-98 763.75,-98 763.75,-75 523.25,-75"/>
|
||||
<text text-anchor="start" x="604.5" y="-81.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">artifactstore</text>
|
||||
<text text-anchor="start" x="527.25" y="-61.5" font-family="Helvetica,sans-Serif" font-size="10.00">Image: minio/minio:RELEASE.2024-06-11T03-13-30Z</text>
|
||||
<polygon fill="none" stroke="black" points="522.25,-54.75 522.25,-99 764.75,-99 764.75,-54.75 522.25,-54.75"/>
|
||||
</g>
|
||||
<!-- agent -->
|
||||
<g id="node7" class="node">
|
||||
<title>agent</title>
|
||||
<g id="a_node7"><a xlink:href="../cmd/agent/Dockerfile" xlink:title="<TABLE>" target="_blank">
|
||||
<polygon fill="lightblue" stroke="none" points="309.5,-210 309.5,-233 466.75,-233 466.75,-210 309.5,-210"/>
|
||||
<text text-anchor="start" x="370.12" y="-216.7" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="14.00">agent</text>
|
||||
<text text-anchor="start" x="313.5" y="-196.5" font-family="Helvetica,sans-Serif" font-size="10.00">Dockerfile: ./cmd/agent/Dockerfile</text>
|
||||
<polygon fill="none" stroke="black" points="308.5,-189.75 308.5,-234 467.75,-234 467.75,-189.75 308.5,-189.75"/>
|
||||
</a>
|
||||
</g>
|
||||
</g>
|
||||
<!-- agent->dbstore -->
|
||||
<g id="edge5" class="edge">
|
||||
<title>agent->dbstore</title>
|
||||
<path fill="none" stroke="black" d="M475.61,-189.69C494.58,-184.82 514.89,-179.61 534.58,-174.56"/>
|
||||
<polygon fill="black" stroke="black" points="534.8,-176.31 539.21,-173.37 533.93,-172.92 534.8,-176.31"/>
|
||||
</g>
|
||||
<!-- agent->rabbitmq -->
|
||||
<g id="edge7" class="edge">
|
||||
<title>agent->rabbitmq</title>
|
||||
<path fill="none" stroke="black" d="M475.61,-213.58C501.25,-214.09 529.34,-214.64 555.04,-215.15"/>
|
||||
<polygon fill="black" stroke="black" points="554.98,-216.9 560.01,-215.25 555.04,-213.4 554.98,-216.9"/>
|
||||
</g>
|
||||
<!-- agent->artifactstore -->
|
||||
<g id="edge6" class="edge">
|
||||
<title>agent->artifactstore</title>
|
||||
<path fill="none" stroke="black" d="M465.19,-185.78C469.46,-183.08 473.52,-180.12 477.25,-176.88 502.34,-155.06 487.99,-132.25 514.25,-111.88 516.87,-109.84 519.6,-107.93 522.42,-106.12"/>
|
||||
<polygon fill="black" stroke="black" points="522.93,-107.86 526.3,-103.78 521.11,-104.87 522.93,-107.86"/>
|
||||
</g>
|
||||
<!-- server->dbstore -->
|
||||
<g id="edge2" class="edge">
|
||||
<title>server->dbstore</title>
|
||||
<path fill="none" stroke="black" d="M477.03,-105.73C494.63,-110.11 513.32,-114.76 531.58,-119.29"/>
|
||||
<polygon fill="black" stroke="black" points="531,-120.96 536.28,-120.46 531.85,-117.56 531,-120.96"/>
|
||||
</g>
|
||||
<!-- server->rabbitmq -->
|
||||
<g id="edge3" class="edge">
|
||||
<title>server->rabbitmq</title>
|
||||
<path fill="none" stroke="black" d="M477.14,-159.61C489.14,-167.87 501.67,-175.56 514.25,-181.88 527.15,-188.36 541.41,-193.76 555.59,-198.22"/>
|
||||
<polygon fill="black" stroke="black" points="554.91,-199.85 560.21,-199.64 555.94,-196.5 554.91,-199.85"/>
|
||||
</g>
|
||||
<!-- server->artifactstore -->
|
||||
<g id="edge4" class="edge">
|
||||
<title>server->artifactstore</title>
|
||||
<path fill="none" stroke="black" d="M477.03,-81.45C487.19,-81.17 497.71,-80.88 508.31,-80.58"/>
|
||||
<polygon fill="black" stroke="black" points="508.09,-82.34 513.04,-80.45 507.99,-78.84 508.09,-82.34"/>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 10 KiB |
26
notes/vscode-plugin.org
Normal file
26
notes/vscode-plugin.org
Normal file
@@ -0,0 +1,26 @@
|
||||
* Building the plugin
|
||||
#+BEGIN_SRC sh
|
||||
# Clone hohn's fork of Nick's fork
|
||||
cd /tmp
|
||||
git clone git@github.com:hohn/vscode-codeql.git
|
||||
cd /tmp/vscode-codeql
|
||||
git checkout mrva-standalone
|
||||
|
||||
# Install nvm
|
||||
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
|
||||
|
||||
# Install correct node version
|
||||
cd /tmp/vscode-codeql/extensions/ql-vscode
|
||||
nvm install
|
||||
|
||||
# Build the extension
|
||||
cd /tmp/vscode-codeql/extensions/ql-vscode
|
||||
npm install
|
||||
npm run build
|
||||
|
||||
# Install extension
|
||||
cd /tmp/vscode-codeql/dist
|
||||
code --force --install-extension vscode-codeql-*.vsix
|
||||
|
||||
#+END_SRC
|
||||
|
||||
@@ -4,26 +4,29 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"mrvacommander/pkg/artifactstore"
|
||||
"mrvacommander/pkg/codeql"
|
||||
"mrvacommander/pkg/common"
|
||||
"mrvacommander/pkg/logger"
|
||||
"mrvacommander/pkg/qldbstore"
|
||||
"mrvacommander/pkg/qpstore"
|
||||
"mrvacommander/pkg/queue"
|
||||
"mrvacommander/utils"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/elastic/go-sysinfo"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
/*
|
||||
type RunnerSingle struct {
|
||||
queue queue.Queue
|
||||
}
|
||||
|
||||
func NewAgentSingle(numWorkers int, av *Visibles) *RunnerSingle {
|
||||
r := RunnerSingle{queue: av.Queue}
|
||||
func NewAgentSingle(numWorkers int, v *Visibles) *RunnerSingle {
|
||||
r := RunnerSingle{queue: v.Queue}
|
||||
|
||||
for id := 1; id <= numWorkers; id++ {
|
||||
go r.worker(id)
|
||||
@@ -31,72 +34,169 @@ func NewAgentSingle(numWorkers int, av *Visibles) *RunnerSingle {
|
||||
return &r
|
||||
}
|
||||
|
||||
type Visibles struct {
|
||||
Logger logger.Logger
|
||||
Queue queue.Queue
|
||||
// TODO extra package for query pack storage
|
||||
QueryPackStore qpstore.Storage
|
||||
// TODO extra package for ql db storage
|
||||
QLDBStore qldbstore.Storage
|
||||
}
|
||||
|
||||
func (r *RunnerSingle) worker(wid int) {
|
||||
// TODO: reimplement this later
|
||||
/*
|
||||
var job common.AnalyzeJob
|
||||
|
||||
for {
|
||||
job = <-r.queue.Jobs()
|
||||
|
||||
slog.Debug("Picking up job", "job", job, "worker", wid)
|
||||
|
||||
slog.Debug("Analysis: running", "job", job)
|
||||
storage.SetStatus(job.QueryPackId, job.NWO, common.StatusQueued)
|
||||
|
||||
resultFile, err := RunAnalysis(job)
|
||||
result, err := RunAnalysisJob(job)
|
||||
if err != nil {
|
||||
slog.Error("Failed to run analysis job", slog.Any("error", err))
|
||||
continue
|
||||
}
|
||||
|
||||
slog.Debug("Analysis run finished", "job", job)
|
||||
|
||||
// TODO: FIX THIS
|
||||
res := common.AnalyzeResult{
|
||||
RunAnalysisSARIF: resultFile,
|
||||
RunAnalysisBQRS: "", // FIXME ?
|
||||
r.queue.Results() <- result
|
||||
}
|
||||
r.queue.Results() <- res
|
||||
storage.SetStatus(job.QueryPackId, job.NWO, common.StatusSuccess)
|
||||
storage.SetResult(job.QueryPackId, job.NWO, res)
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
const (
|
||||
workerMemoryMB = 2048 // 2 GB
|
||||
monitorIntervalSec = 10 // Monitor every 10 seconds
|
||||
)
|
||||
|
||||
func calculateWorkers() int {
|
||||
host, err := sysinfo.Host()
|
||||
if err != nil {
|
||||
slog.Error("failed to get host info", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
memInfo, err := host.Memory()
|
||||
if err != nil {
|
||||
slog.Error("failed to get memory info", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Get available memory in MB
|
||||
totalMemoryMB := memInfo.Available / (1024 * 1024)
|
||||
|
||||
// Ensure we have at least one worker
|
||||
workers := int(totalMemoryMB / workerMemoryMB)
|
||||
if workers < 1 {
|
||||
workers = 1
|
||||
}
|
||||
|
||||
// Limit the number of workers to the number of CPUs
|
||||
cpuCount := runtime.NumCPU()
|
||||
if workers > cpuCount {
|
||||
workers = max(cpuCount, 1)
|
||||
}
|
||||
|
||||
return workers
|
||||
}
|
||||
|
||||
func StartAndMonitorWorkers(ctx context.Context,
|
||||
artifacts artifactstore.Store,
|
||||
databases qldbstore.Store,
|
||||
queue queue.Queue,
|
||||
desiredWorkerCount int,
|
||||
wg *sync.WaitGroup) {
|
||||
|
||||
currentWorkerCount := 0
|
||||
stopChans := make([]chan struct{}, 0)
|
||||
|
||||
if desiredWorkerCount != 0 {
|
||||
slog.Info("Starting workers", slog.Int("count", desiredWorkerCount))
|
||||
for i := 0; i < desiredWorkerCount; i++ {
|
||||
stopChan := make(chan struct{})
|
||||
stopChans = append(stopChans, stopChan)
|
||||
wg.Add(1)
|
||||
go RunWorker(ctx, artifacts, databases, queue, stopChan, wg)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
slog.Info("Worker count not specified, managing based on available memory and CPU")
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// signal all workers to stop
|
||||
for _, stopChan := range stopChans {
|
||||
close(stopChan)
|
||||
}
|
||||
return
|
||||
default:
|
||||
newWorkerCount := calculateWorkers()
|
||||
|
||||
if newWorkerCount != currentWorkerCount {
|
||||
slog.Info(
|
||||
"Modifying worker count",
|
||||
slog.Int("current", currentWorkerCount),
|
||||
slog.Int("new", newWorkerCount))
|
||||
}
|
||||
|
||||
if newWorkerCount > currentWorkerCount {
|
||||
for i := currentWorkerCount; i < newWorkerCount; i++ {
|
||||
stopChan := make(chan struct{})
|
||||
stopChans = append(stopChans, stopChan)
|
||||
wg.Add(1)
|
||||
go RunWorker(ctx, artifacts, databases, queue, stopChan, wg)
|
||||
}
|
||||
} else if newWorkerCount < currentWorkerCount {
|
||||
for i := newWorkerCount; i < currentWorkerCount; i++ {
|
||||
close(stopChans[i])
|
||||
}
|
||||
stopChans = stopChans[:newWorkerCount]
|
||||
}
|
||||
currentWorkerCount = newWorkerCount
|
||||
|
||||
time.Sleep(monitorIntervalSec * time.Second)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// RunAnalysisJob runs a CodeQL analysis job (AnalyzeJob) returning an AnalyzeResult
|
||||
func RunAnalysisJob(job common.AnalyzeJob) (common.AnalyzeResult, error) {
|
||||
var result = common.AnalyzeResult{
|
||||
RequestId: job.RequestId,
|
||||
func RunAnalysisJob(
|
||||
job queue.AnalyzeJob, artifacts artifactstore.Store, dbs qldbstore.Store) (queue.AnalyzeResult, error) {
|
||||
var result = queue.AnalyzeResult{
|
||||
Spec: job.Spec,
|
||||
ResultCount: 0,
|
||||
ResultArchiveURL: "",
|
||||
ResultLocation: artifactstore.ArtifactLocation{},
|
||||
Status: common.StatusError,
|
||||
}
|
||||
|
||||
// Create a temporary directory
|
||||
tempDir := filepath.Join(os.TempDir(), uuid.New().String())
|
||||
if err := os.MkdirAll(tempDir, 0755); err != nil {
|
||||
if err := os.MkdirAll(tempDir, 0600); err != nil {
|
||||
return result, fmt.Errorf("failed to create temporary directory: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(tempDir)
|
||||
|
||||
// Extract the query pack
|
||||
// TODO: download from the 'job' query pack URL
|
||||
// utils.downloadFile
|
||||
queryPackPath := filepath.Join(tempDir, "qp-54674")
|
||||
utils.UntarGz("qp-54674.tgz", queryPackPath)
|
||||
// Download the query pack as a byte slice
|
||||
queryPackData, err := artifacts.GetQueryPack(job.QueryPackLocation)
|
||||
if err != nil {
|
||||
return result, fmt.Errorf("failed to download query pack: %w", err)
|
||||
}
|
||||
|
||||
// Write the query pack data to the filesystem
|
||||
queryPackArchivePath := filepath.Join(tempDir, "query-pack.tar.gz")
|
||||
if err := os.WriteFile(queryPackArchivePath, queryPackData, 0600); err != nil {
|
||||
return result, fmt.Errorf("failed to write query pack archive to disk: %w", err)
|
||||
}
|
||||
|
||||
// Make a directory and extract the query pack
|
||||
queryPackPath := filepath.Join(tempDir, "pack")
|
||||
if err := os.Mkdir(queryPackPath, 0600); err != nil {
|
||||
return result, fmt.Errorf("failed to create query pack directory: %w", err)
|
||||
}
|
||||
if err := utils.UntarGz(queryPackArchivePath, queryPackPath); err != nil {
|
||||
return result, fmt.Errorf("failed to extract query pack: %w", err)
|
||||
}
|
||||
|
||||
databaseData, err := dbs.GetDatabase(job.Spec.NameWithOwner)
|
||||
if err != nil {
|
||||
return result, fmt.Errorf("failed to get database: %w", err)
|
||||
}
|
||||
|
||||
// Write the CodeQL database data to the filesystem
|
||||
databasePath := filepath.Join(tempDir, "database.zip")
|
||||
if err := os.WriteFile(databasePath, databaseData, 0600); err != nil {
|
||||
return result, fmt.Errorf("failed to write CodeQL database to disk: %w", err)
|
||||
}
|
||||
|
||||
// Perform the CodeQL analysis
|
||||
runResult, err := codeql.RunQuery("google_flatbuffers_db.zip", "cpp", queryPackPath, tempDir)
|
||||
runResult, err := codeql.RunQuery(databasePath, job.QueryLanguage, queryPackPath, tempDir)
|
||||
if err != nil {
|
||||
return result, fmt.Errorf("failed to run analysis: %w", err)
|
||||
}
|
||||
@@ -107,21 +207,32 @@ func RunAnalysisJob(job common.AnalyzeJob) (common.AnalyzeResult, error) {
|
||||
return result, fmt.Errorf("failed to generate results archive: %w", err)
|
||||
}
|
||||
|
||||
// TODO: Upload the archive to storage
|
||||
// Upload the archive to storage
|
||||
slog.Debug("Results archive size", slog.Int("size", len(resultsArchive)))
|
||||
resultsLocation, err := artifacts.SaveResult(job.Spec, resultsArchive)
|
||||
if err != nil {
|
||||
return result, fmt.Errorf("failed to save results archive: %w", err)
|
||||
}
|
||||
|
||||
result = common.AnalyzeResult{
|
||||
RequestId: job.RequestId,
|
||||
result = queue.AnalyzeResult{
|
||||
Spec: job.Spec,
|
||||
ResultCount: runResult.ResultCount,
|
||||
ResultArchiveURL: "REPLACE_THIS_WITH_STORED_RESULTS_ARCHIVE", // TODO
|
||||
ResultLocation: resultsLocation,
|
||||
Status: common.StatusSuccess,
|
||||
SourceLocationPrefix: runResult.SourceLocationPrefix,
|
||||
DatabaseSHA: runResult.DatabaseSHA,
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// RunWorker runs a worker that processes jobs from queue
|
||||
func RunWorker(ctx context.Context, stopChan chan struct{}, queue queue.Queue, wg *sync.WaitGroup) {
|
||||
func RunWorker(ctx context.Context,
|
||||
artifacts artifactstore.Store,
|
||||
databases qldbstore.Store,
|
||||
queue queue.Queue,
|
||||
stopChan chan struct{},
|
||||
wg *sync.WaitGroup) {
|
||||
const (
|
||||
WORKER_COUNT_STOP_MESSAGE = "Worker stopping due to reduction in worker count"
|
||||
WORKER_CONTEXT_STOP_MESSAGE = "Worker stopping due to context cancellation"
|
||||
@@ -144,7 +255,7 @@ func RunWorker(ctx context.Context, stopChan chan struct{}, queue queue.Queue, w
|
||||
return
|
||||
}
|
||||
slog.Info("Running analysis job", slog.Any("job", job))
|
||||
result, err := RunAnalysisJob(job)
|
||||
result, err := RunAnalysisJob(job, artifacts, databases)
|
||||
if err != nil {
|
||||
slog.Error("Failed to run analysis job", slog.Any("error", err))
|
||||
continue
|
||||
|
||||
@@ -1,4 +1,13 @@
|
||||
package agent
|
||||
|
||||
type Runner interface {
|
||||
import (
|
||||
"mrvacommander/pkg/artifactstore"
|
||||
"mrvacommander/pkg/qldbstore"
|
||||
"mrvacommander/pkg/queue"
|
||||
)
|
||||
|
||||
type Visibles struct {
|
||||
Queue queue.Queue
|
||||
Artifacts artifactstore.Store
|
||||
CodeQLDBStore qldbstore.Store
|
||||
}
|
||||
|
||||
28
pkg/artifactstore/common.go
Normal file
28
pkg/artifactstore/common.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package artifactstore
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"mrvacommander/pkg/common"
|
||||
)
|
||||
|
||||
// Restrict the keys / values for ArtifactLocation and centralize the common ones
|
||||
// here
|
||||
const (
|
||||
AF_BUCKETNAME_RESULTS = "results"
|
||||
AF_BUCKETNAME_PACKS = "packs"
|
||||
)
|
||||
|
||||
type ArtifactLocation struct {
|
||||
Key string // location in bucket OR full location for file paths
|
||||
Bucket string // which bucket: packs or results
|
||||
}
|
||||
|
||||
// deriveKeyFromSessionId generates a key for a query pack based on the job ID
|
||||
func deriveKeyFromSessionId(sessionId int) string {
|
||||
return fmt.Sprintf("%d", sessionId)
|
||||
}
|
||||
|
||||
// deriveKeyFromJobSpec generates a key for a result based on the JobSpec
|
||||
func deriveKeyFromJobSpec(jobSpec common.JobSpec) string {
|
||||
return fmt.Sprintf("%d-%s", jobSpec.SessionID, jobSpec.NameWithOwner)
|
||||
}
|
||||
20
pkg/artifactstore/interfaces.go
Normal file
20
pkg/artifactstore/interfaces.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package artifactstore
|
||||
|
||||
import "mrvacommander/pkg/common"
|
||||
|
||||
type Store interface {
|
||||
// GetQueryPack retrieves the query pack from the specified location.
|
||||
GetQueryPack(location ArtifactLocation) ([]byte, error)
|
||||
|
||||
// SaveQueryPack saves the query pack using the session ID and returns the artifact location.
|
||||
SaveQueryPack(sessionId int, data []byte) (ArtifactLocation, error)
|
||||
|
||||
// GetResult retrieves the result from the specified location.
|
||||
GetResult(location ArtifactLocation) ([]byte, error)
|
||||
|
||||
// GetResultSize retrieves the size of the result from the specified location.
|
||||
GetResultSize(location ArtifactLocation) (int, error)
|
||||
|
||||
// SaveResult saves the result using the JobSpec and returns the artifact location.
|
||||
SaveResult(jobSpec common.JobSpec, data []byte) (ArtifactLocation, error)
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user