diff --git a/client/containers/vscode/Makefile b/client/containers/vscode/Makefile deleted file mode 100644 index db32e1f..0000000 --- a/client/containers/vscode/Makefile +++ /dev/null @@ -1,28 +0,0 @@ -all: code-server-initialized - -CSI_TARGET := code-server-initialized:0.1.24 -csi: mk.code-server-initialized -mk.code-server-initialized: - docker build -t ${CSI_TARGET} . - touch $@ - -csi-serve: csi - docker run -d -p 9080:9080 ${CSI_TARGET} - -clean: - -docker rmi -f ${CSI_TARGET} - -rm mk.code-server-initialized - -# Targets below are used after some manual setup of the container. See README.org -# for details - -csi-push: mk.csi-push -mk.csi-push: csi - docker tag ${CSI_TARGET} ghcr.io/hohn/${CSI_TARGET} - docker push ghcr.io/hohn/${CSI_TARGET} - touch $@ - -csi-test: - docker pull ghcr.io/hohn/${CSI_TARGET} - docker run --rm -d -p 9080:9080 --name test-code-server-codeql\ - ghcr.io/hohn/${CSI_TARGET} diff --git a/client/containers/vscode/README.org b/client/containers/vscode/README.org index 263475c..a588fa4 100644 --- a/client/containers/vscode/README.org +++ b/client/containers/vscode/README.org @@ -1,14 +1,15 @@ * MRVA VS Code server container On the host: - - Build the container via #+BEGIN_SRC sh - make csi - #+END_SRC + # Build the container via + cd ~/work-gh/mrva/mrvacommander/client/containers/vscode/ + docker build -t code-server-initialized:0.1.24 . - - Run the container via - #+BEGIN_SRC sh - make csi-serve + # Run the container in standalone mode via + cd ~/work-gh/mrva/mrvacommander/client/containers/vscode/ + docker run -v ~/work-gh/mrva/vscode-codeql:/work-gh/mrva/vscode-codeql \ + -d -p 9080:9080 code-server-initialized:0.1.24 #+END_SRC - Connect to it at http://localhost:9080/?folder=/home/coder, password is =mrva=. @@ -24,9 +25,9 @@ codeql pack add codeql/python-all@1.0.6 #+END_SRC - - Open a new file =qldemo/simple.ql= and add this this query to it. The plugin - will download the CodeQL binaries (but never use them -- the configuration - redirects) + - Create a new file =qldemo/simple.ql= with this query. Open it in VS Code. + The plugin will download the CodeQL binaries (but never use them -- the + configuration redirects) #+BEGIN_SRC sh cd cat > qldemo/simple.ql < scratch/db-info-1.csv + + ./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv + + ./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv + + ./bin/mc-db-generate-selection -n 11 \ + scratch/vscode-selection.json \ + scratch/gh-mrva-selection.json \ + < scratch/db-info-3.csv + + # Several seconds start-up time; fast db population + ./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv + + # While the containers are running, this will show minio's storage. The zip files + # are split into part.* and xl.meta by minio. Use the web interface to see real + # names. + docker exec dbstore ls -R /data/mrvacommander/ + + # Open browser to see the file listing + open http://localhost:9001/browser/qldb + + # list the volumes + docker volume ls |grep dbs + docker volume inspect mrvacommander_dbsdata + + # Persist volume using container + cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata + # Note: use mrvacommander_dbsdata, not mrvacommander-dbsdata + # Get the data as tar file from the image + docker run --rm \ + -v mrvacommander_dbsdata:/data \ + -v $(pwd):/backup \ + busybox sh -c "tar cvf /backup/dbsdata_backup.tar ." + # Build container with the tarball + cd ~/work-gh/mrva/mrvacommander/demo/containers/dbsdata + docker build -t dbsdata-container:0.1.24 . + docker image ls | grep dbs + + # check container contents + docker run -it dbsdata-container:0.1.24 /bin/sh + docker run -it dbsdata-container:0.1.24 ls data/qldb + + # Tag the dbstore backing container + docker inspect dbsdata-container:0.1.24 |grep Id + docker tag dbsdata-container:0.1.24 ghcr.io/hohn/dbsdata-container:0.1.24 + + # Push the pre-populated image + docker push ghcr.io/hohn/dbsdata-container:0.1.24 + + # Check the tagged image + docker run -it ghcr.io/hohn/dbsdata-container:0.1.24 \ + ls data/qldb + + # Shut down the container assembly + docker-compose -f docker-compose-demo-build.yml down + #+END_SRC + diff --git a/docker-compose-demo-build.yml b/docker-compose-demo-build.yml new file mode 100644 index 0000000..14be645 --- /dev/null +++ b/docker-compose-demo-build.yml @@ -0,0 +1,129 @@ +# This is the compose configuration used to build / prepopulate the containers for +# a demo. +services: + dbssvc: + ## image: ghcr.io/hohn/dbsdata-container:0.1.24 + build: + context: . + dockerfile: ./demo/containers/dbsdata/Dockerfile + container_name: dbssvc + volumes: + - dbsdata:/data/mrvacommander/dbstore-data + networks: + - backend + + dbstore: + image: minio/minio:RELEASE.2024-06-11T03-13-30Z + container_name: dbstore + ports: + - "9000:9000" + - "9001:9001" + env_file: + - path: .env.container + required: true + command: server /data/mrvacommander/dbstore-data --console-address ":9001" + depends_on: + - dbssvc + volumes: + - dbsdata:/data/mrvacommander/dbstore-data + networks: + - backend + + client-ghmrva: + ## image: ghcr.io/hohn/client-ghmrva-container:0.1.24 + build: + context: . + dockerfile: ./client/containers/ghmrva/Dockerfile + network_mode: "service:server" # Share the 'server' network namespace + environment: + - SERVER_URL=http://localhost:8080 # 'localhost' now refers to 'server' + + code-server: + ## image: ghcr.io/hohn/code-server-initialized:0.1.24 + build: + context: ./client/containers/vscode + dockerfile: Dockerfile + ports: + - "9080:9080" + environment: + - PASSWORD=mrva + + rabbitmq: + image: rabbitmq:3-management + hostname: rabbitmq + container_name: rabbitmq + volumes: + - ./init/rabbitmq/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf:ro + - ./init/rabbitmq/definitions.json:/etc/rabbitmq/definitions.json:ro + ports: + - "5672:5672" + - "15672:15672" + healthcheck: + test: rabbitmq-diagnostics check_port_connectivity + interval: 30s + timeout: 30s + retries: 10 + networks: + - backend + + server: + build: + context: . + dockerfile: ./cmd/server/Dockerfile + command: [ '--mode=container', '--loglevel=debug' ] + container_name: server + stop_grace_period: 1s + ports: + # - "8081:8080" # host:container for proxy + - "8080:8080" # host:container + depends_on: + - rabbitmq + - dbstore + - artifactstore + env_file: + - path: ./.env.container + required: true + networks: + - backend + + artifactstore: + image: minio/minio:RELEASE.2024-06-11T03-13-30Z + container_name: artifactstore + ports: + - "19000:9000" # host:container + - "19001:9001" + env_file: + - path: ./.env.container + required: true + command: server /data --console-address ":9001" + volumes: + # The artifactstore is only populated at runtime so there is no need + # for Docker storage; a directory is fine. + - ./qpstore-data:/data + networks: + - backend + + agent: + ## image: ghcr.io/hohn/mrva-agent:0.1.24 + build: + context: . + dockerfile: ./cmd/agent/Dockerfile + command: [ '--loglevel=debug' ] + container_name: agent + depends_on: + - rabbitmq + - dbstore + - artifactstore + env_file: + - path: ./.env.container + required: true + networks: + - backend + +networks: + backend: + driver: bridge + +volumes: + dbsdata: + diff --git a/docker-compose-demo.yml b/docker-compose-demo.yml index ecc3fad..0018d49 100644 --- a/docker-compose-demo.yml +++ b/docker-compose-demo.yml @@ -1,12 +1,16 @@ services: - mrvadata: - image: ghcr.io/hohn/mrvadata:0.1.24 - container_name: mrvadata + dbssvc: + # dbsdata-container:0.1.24 + image: ghcr.io/hohn/dbsdata-container:0.1.24 + command: tail -f /dev/null # Keep the container running + # volumes: + # - /qldb # Directory inside the container that contains the data volumes: - - mrvadata:/data/mrvacommander/dbstore-data + - dbsdata:/data + container_name: dbssvc networks: - backend - + dbstore: image: minio/minio:RELEASE.2024-06-11T03-13-30Z container_name: dbstore @@ -18,23 +22,14 @@ services: required: true command: server /data/mrvacommander/dbstore-data --console-address ":9001" depends_on: - - mrvadata - # The mrvadata volume has content of ./dbstore-data, so the volume mount - # below is equivalent of this original: - # volumes: - # - ./dbstore-data:/data + - dbssvc + # volumes_from: + # - dbsdata # Use the volumes from dbsdata container volumes: - - mrvadata:/data + - dbsdata:/data/mrvacommander/dbstore-data networks: - backend - client-qldbtools: - image: ghcr.io/hohn/client-qldbtools-container:0.1.24 - # XX: Copy client/qldbtools/scratch into this container - networks: - - backend - - client-ghmrva: image: ghcr.io/hohn/client-ghmrva-container:0.1.24 network_mode: "service:server" # Share the 'server' network namespace @@ -118,4 +113,4 @@ networks: driver: bridge volumes: - mrvadata: + dbsdata: diff --git a/notes/cli-end-to-end-demo-build.org b/notes/cli-end-to-end-demo-build.org new file mode 100644 index 0000000..9ae6883 --- /dev/null +++ b/notes/cli-end-to-end-demo-build.org @@ -0,0 +1,471 @@ +# -*- coding: utf-8 -*- +#+OPTIONS: H:2 num:t \n:nil @:t ::t |:t ^:{} f:t *:t TeX:t LaTeX:t skip:nil p:nil + +* End-to-end example of CLI use + This document describes the build steps for the demo containers. + +* Database Aquisition + For this demo, the data is preloaded via container. To set up the container + + #+BEGIN_SRC sh + # On host, run + docker exec -it dbstore /bin/bash + + # In the container + ls -la /data/dbstore-data/ + ls /data/dbstore-data/qldb/ | wc -l + #+END_SRC + Here we use a small sample of an example for open-source + repositories, 23 in all. + +* Repository Selection + When using all of the MRVA system, we select a small subset of repositories + available to you in [[*Database Aquisition][Database Aquisition]]. For this demo we include a small + collection -- 23 repositories -- and here we further narrow the selection to 12. + + The full list + #+BEGIN_SRC text + ls -1 /data/dbstore-data/qldb/ + 'BoomingTech$Piccoloctsj6d7177.zip' + 'KhronosGroup$OpenXR-SDKctsj984ee6.zip' + 'OpenRCT2$OpenRCT2ctsj975d7c.zip' + 'StanfordLegion$legionctsj39cbe4.zip' + 'USCiLab$cerealctsj264953.zip' + 'WinMerge$winmergectsj101305.zip' + 'draios$sysdigctsj12c02d.zip' + 'gildor2$UEViewerctsjfefdd8.zip' + 'git-for-windows$gitctsjb7c2bd.zip' + 'google$orbitctsj9bbeaf.zip' + 'libfuse$libfusectsj7a66a4.zip' + 'luigirizzo$netmapctsj6417fa.zip' + 'mawww$kakounectsjc54fab.zip' + 'microsoft$node-native-keymapctsj4cc9a2.zip' + 'nem0$LumixEnginectsjfab756.zip' + 'pocoproject$pococtsj26b932.zip' + 'quickfix$quickfixctsjebfd13.zip' + 'rui314$moldctsjfec16a.zip' + 'swig$swigctsj78bcd3.zip' + 'tdlib$telegram-bot-apictsj8529d9.zip' + 'timescale$timescaledbctsjf617cf.zip' + 'xoreaxeaxeax$movfuscatorctsj8f7e5b.zip' + 'xrootd$xrootdctsje4b745.zip' + #+END_SRC + + The selection of 12 repositories, from an initial collection of 6000 was made + using a collection of Python/pandas scripts made for the purpose, the [[https://github.com/hohn/mrvacommander/blob/hohn-0.1.21.2-improve-structure-and-docs/client/qldbtools/README.md#installation][qldbtools]] + package. The resulting selection, in the format expected by the VS Code + extension, follows. + #+BEGIN_SRC text + cat /data/qldbtools/scratch/vscode-selection.json + { + "version": 1, + "databases": { + "variantAnalysis": { + "repositoryLists": [ + { + "name": "mirva-list", + "repositories": [ + "xoreaxeaxeax/movfuscatorctsj8f7e5b", + "microsoft/node-native-keymapctsj4cc9a2", + "BoomingTech/Piccoloctsj6d7177", + "USCiLab/cerealctsj264953", + "KhronosGroup/OpenXR-SDKctsj984ee6", + "tdlib/telegram-bot-apictsj8529d9", + "WinMerge/winmergectsj101305", + "timescale/timescaledbctsjf617cf", + "pocoproject/pococtsj26b932", + "quickfix/quickfixctsjebfd13", + "libfuse/libfusectsj7a66a4" + ] + } + ], + "owners": [], + "repositories": [] + } + }, + "selected": { + "kind": "variantAnalysisUserDefinedList", + "listName": "mirva-list" + } + #+END_SRC + + This selection is deceptively simple. For a full explanation, see [[file:cli-end-to-end-detailed.org::*Repository Selection][Repository + Selection]] in the detailed version of this document. + +** Optional: The meaning of the names + The repository names all end with =ctsj= followed by 6 hex digits like + =ctsj4cc9a2=. + + The information critial for selection of databases are the columns + 1. owner + 2. name + 3. language + 4. "sha" + 5. "cliVersion" + 6. "creationTime" + + There are others that may be useful, but they are not strictly required. + + The critical ones deserve more explanation: + 1. "sha": The =git= commit SHA of the repository the CodeQL database was + created from. Required to distinguish query results over the evolution of + a code base. + 2. "cliVersion": The version of the CodeQL CLI used to create the database. + Required to identify advances/regressions originating from the CodeQL binary. + 3. "creationTime": The time the database was created. Required (or at least + very handy) for following the evolution of query results over time. + + There is a computed column, CID. The CID column combines + - cliVersion + - creationTime + - language + - sha + into a single 6-character string via hashing. Together with (owner, repo) it + provides a unique index for every DB. + + + For this document, we simply use a pseudo-random selection of 11 databases via + #+BEGIN_SRC sh + ./bin/mc-db-generate-selection -n 11 \ + scratch/vscode-selection.json \ + scratch/gh-mrva-selection.json \ + < scratch/db-info-3.csv + #+END_SRC + + Note that these use pseudo-random numbers, so the selection is in fact + deterministic. + +* Starting the server + Clone the full repository before continuing: + #+BEGIN_SRC sh + mkdir -p ~/work-gh/mrva/ + git clone git@github.com:hohn/mrvacommander.git + #+END_SRC + + Make sure Docker is installed and running. + With docker-compose set up and this repository cloned, we just run + #+BEGIN_SRC sh + cd ~/work-gh/mrva/mrvacommander + docker-compose -f docker-compose-demo.yml up -d + #+END_SRC + and wait until the log output no longer changes. + Should look like + #+BEGIN_SRC text + docker-compose -f docker-compose-demo.yml up -d + [+] Running 27/6 + ✔ dbstore Pulled 1.1s + ✔ artifactstore Pulled 1.1s + ✔ mrvadata 3 layers [⣿⣿⣿] 0B/0B Pulled 263.8s + ✔ server 2 layers [⣿⣿] 0B/0B Pulled 25.2s + ✔ agent 5 layers [⣿⣿⣿⣿⣿] 0B/0B Pulled 24.9s + ✔ client-qldbtools 11 layers [⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿] 0B/0B Pulled 20.8s + [+] Running 9/9 + ✔ Container mrvadata Started 0.3s + ✔ Container mrvacommander-client-qldbtools-1 Started 0.3s + ✔ Container mrvacommander-client-ghmrva-1 Running 0.0s + ✔ Container mrvacommander-code-server-1 Running 0.0s + ✔ Container artifactstore Running 0.0s + ✔ Container rabbitmq Running 0.0s + ✔ Container dbstore Started 0.4s + ✔ Container agent Started 0.5s + ✔ Container server Started 0.5s + #+END_SRC + + + The content is prepopulated in the =dbstore= container. + +** Optional: Inspect the Backing Store + As completely optional step, you can inspect the backing store: + #+BEGIN_SRC sh + docker exec -it dbstore /bin/bash + ls /data/qldb/ + # 'BoomingTech$Piccoloctsj6d7177.zip' 'mawww$kakounectsjc54fab.zip' + # 'KhronosGroup$OpenXR-SDKctsj984ee6.zip' 'microsoft$node-native-keymapctsj4cc9a2.zip' + # ... + #+END_SRC + +** Optional: Inspect the MinIO DB + Another completely optional step, you can inspect the minio DB contents if you + have the minio cli installed: + #+BEGIN_SRC sh + # Configuration + MINIO_ALIAS="qldbminio" + MINIO_URL="http://localhost:9000" + MINIO_ROOT_USER="user" + MINIO_ROOT_PASSWORD="mmusty8432" + QL_DB_BUCKET_NAME="qldb" + + # Check for MinIO client + if ! command -v mc &> /dev/null + then + echo "MinIO client (mc) not found." + fi + + # Configure MinIO client + mc alias set $MINIO_ALIAS $MINIO_URL $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD + + # Show contents + mc ls qldbminio/qldb + #+END_SRC + +* Running the gh-mrva command-line client + The first run uses the test query to verify basic functionality, but it returns + no results. + +** Run MRVA from command line + # From ~/work-gh/mrva/gh-mrva + + 1. Check mrva cli + #+BEGIN_SRC sh + docker exec -it mrvacommander-client-ghmrva-1 /usr/local/bin/gh-mrva -h + #+END_SRC + + 2. Set up the configuration + #+BEGIN_SRC sh + docker exec -i mrvacommander-client-ghmrva-1 \ + sh -c 'mkdir -p /root/.config/gh-mrva/' + + cat | docker exec -i mrvacommander-client-ghmrva-1 \ + sh -c 'cat > /root/.config/gh-mrva/config.yml' < /root/work-gh/mrva/gh-mrva/gh-mrva-selection.json' < /root/work-gh/mrva/gh-mrva/FlatBuffersFunc.ql' < /root/work-gh/mrva/gh-mrva/Alu_Mul.ql' < scratch/selection-full-info - csvcut -c path scratch/selection-full-info - #+END_SRC - Use one of these databases to write a query. It need not produce results. - #+BEGIN_SRC sh - cd ~/work-gh/mrva/gh-mrva/ - code gh-mrva.code-workspace - #+END_SRC - In this case, the trivial =findPrintf=: + In this case, the trivial =alu_mul=, +alu_mul for https://github.com/xoreaxeaxeax/movfuscator/blob/master/movfuscator/movfuscator.c #+BEGIN_SRC java /** - ,* @name findPrintf - ,* @description find calls to plain fprintf + ,* @name findalu + ,* @description find calls to a function ,* @kind problem - ,* @id cpp-fprintf-call + ,* @id cpp-call ,* @problem.severity warning ,*/ @@ -353,34 +350,77 @@ from FunctionCall fc where - fc.getTarget().getName() = "fprintf" - select fc, "call of fprintf" + fc.getTarget().getName() = "alu_mul" + select fc, "call of alu_mul" #+END_SRC Repeat the submit steps with this query - 1. -- - 2. -- - 3. Submit the mrva job + 1. [X] -- + 2. [X] -- + 3. [ ] Provide the CodeQL query #+BEGIN_SRC sh - cp ~/work-gh/mrva/mrvacommander/client/qldbtools/scratch/gh-mrva-selection.json \ - ~/work-gh/mrva/gh-mrva/gh-mrva-selection.json + cat | docker exec -i mrvacommander-client-ghmrva-1 \ + sh -c 'cat > /root/work-gh/mrva/gh-mrva/Alu_Mul.ql' <