Add script to populate minio using dataframe previously chosen
This commit is contained in:
committed by
=Michael Hohn
parent
26dd69c976
commit
242ba3fc1e
@@ -59,8 +59,10 @@ import qldbtools as ql
|
|||||||
## Command-line use
|
## Command-line use
|
||||||
|
|
||||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools
|
cd ~/work-gh/mrva/mrvacommander/client/qldbtools
|
||||||
./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download | gzip > db-info-1.csv.gz
|
./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > db-info-1.csv
|
||||||
|
|
||||||
gunzip < db-info-1.csv.gz | ./bin/mc-db-refine-info | gzip > db-info-2.csv.gz
|
./bin/mc-db-refine-info < db-info-1.csv > db-info-2.csv
|
||||||
|
|
||||||
|
./bin/mc-db-populate-minio < db-info-2.csv -n 3
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
86
client/qldbtools/bin/mc-db-populate-minio
Executable file
86
client/qldbtools/bin/mc-db-populate-minio
Executable file
@@ -0,0 +1,86 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
""" Read a table of CodeQL DB information (like those produced by
|
||||||
|
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
|
||||||
|
DB.
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import qldbtools.utils as utils
|
||||||
|
import logging
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import sys
|
||||||
|
from minio import Minio
|
||||||
|
from minio.error import S3Error
|
||||||
|
from pathlib import Path
|
||||||
|
#
|
||||||
|
#* Configure logger
|
||||||
|
#
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||||
|
# Overwrite log level set by minio
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
root_logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Process command line
|
||||||
|
#
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description=""" Read a table of CodeQL DB information (like those produced by
|
||||||
|
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
|
||||||
|
DB. """)
|
||||||
|
parser.add_argument('-n', '--num-entries', type=int,
|
||||||
|
help='Only use N entries',
|
||||||
|
default=None)
|
||||||
|
parser.add_argument('-s', '--seed', type=int,
|
||||||
|
help='Random number seed',
|
||||||
|
default=4242)
|
||||||
|
args = parser.parse_args()
|
||||||
|
#
|
||||||
|
#* Collect the information and select subset
|
||||||
|
#
|
||||||
|
df = pd.read_csv(sys.stdin)
|
||||||
|
if args.num_entries == None:
|
||||||
|
# Use all entries
|
||||||
|
entries = df
|
||||||
|
else:
|
||||||
|
# Use num_entries, chosen via pseudo-random numbers
|
||||||
|
entries = df.sample(n=args.num_entries,
|
||||||
|
random_state=np.random.RandomState(args.seed))
|
||||||
|
#
|
||||||
|
#* Push the DBs
|
||||||
|
#
|
||||||
|
# Configuration
|
||||||
|
MINIO_URL = "http://localhost:9000"
|
||||||
|
MINIO_ROOT_USER = "user"
|
||||||
|
MINIO_ROOT_PASSWORD = "mmusty8432"
|
||||||
|
QL_DB_BUCKET_NAME = "qldb"
|
||||||
|
|
||||||
|
# Initialize MinIO client
|
||||||
|
client = Minio(
|
||||||
|
MINIO_URL.replace("http://", "").replace("https://", ""),
|
||||||
|
access_key=MINIO_ROOT_USER,
|
||||||
|
secret_key=MINIO_ROOT_PASSWORD,
|
||||||
|
secure=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create the bucket if it doesn't exist
|
||||||
|
try:
|
||||||
|
if not client.bucket_exists(QL_DB_BUCKET_NAME):
|
||||||
|
client.make_bucket(QL_DB_BUCKET_NAME)
|
||||||
|
else:
|
||||||
|
logging.info(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
|
||||||
|
except S3Error as err:
|
||||||
|
logging.error(f"Error creating bucket: {err}")
|
||||||
|
|
||||||
|
# Get info from dataframe and push the files
|
||||||
|
for index, row in entries[['owner', 'name', 'path']].iterrows():
|
||||||
|
owner, name, path = row
|
||||||
|
new_name = f'{owner}${name}.zip'
|
||||||
|
try:
|
||||||
|
client.fput_object(QL_DB_BUCKET_NAME, new_name, path)
|
||||||
|
logging.info(f"Uploaded {path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
|
||||||
|
except S3Error as err:
|
||||||
|
logging.error(f"Error uploading file {local_path}: {err}")
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
65
client/qldbtools/qldbtools/session-populate-minio.py
Normal file
65
client/qldbtools/qldbtools/session-populate-minio.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
import qldbtools.utils as utils
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import sys
|
||||||
|
from minio import Minio
|
||||||
|
from minio.error import S3Error
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
#
|
||||||
|
#* Collect the information and select subset
|
||||||
|
#
|
||||||
|
df = pd.read_csv('db-info-2.csv')
|
||||||
|
seed = 4242
|
||||||
|
if 0:
|
||||||
|
# Use all entries
|
||||||
|
entries = df
|
||||||
|
else:
|
||||||
|
# Use num_entries, chosen via pseudo-random numbers
|
||||||
|
entries = df.sample(n=3,
|
||||||
|
random_state=np.random.RandomState(seed))
|
||||||
|
#
|
||||||
|
#* Push the DBs
|
||||||
|
#
|
||||||
|
# Configuration
|
||||||
|
MINIO_URL = "http://localhost:9000"
|
||||||
|
MINIO_ROOT_USER = "user"
|
||||||
|
MINIO_ROOT_PASSWORD = "mmusty8432"
|
||||||
|
QL_DB_BUCKET_NAME = "qldb"
|
||||||
|
|
||||||
|
# Initialize MinIO client
|
||||||
|
client = Minio(
|
||||||
|
MINIO_URL.replace("http://", "").replace("https://", ""),
|
||||||
|
access_key=MINIO_ROOT_USER,
|
||||||
|
secret_key=MINIO_ROOT_PASSWORD,
|
||||||
|
secure=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create the bucket if it doesn't exist
|
||||||
|
try:
|
||||||
|
if not client.bucket_exists(QL_DB_BUCKET_NAME):
|
||||||
|
client.make_bucket(QL_DB_BUCKET_NAME)
|
||||||
|
else:
|
||||||
|
print(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
|
||||||
|
except S3Error as err:
|
||||||
|
print(f"Error creating bucket: {err}")
|
||||||
|
|
||||||
|
# (test) File paths and new names
|
||||||
|
files_to_upload = {
|
||||||
|
"cmd/server/codeql/dbs/google/flatbuffers/google_flatbuffers_db.zip": "google$flatbuffers.zip",
|
||||||
|
"cmd/server/codeql/dbs/psycopg/psycopg2/psycopg_psycopg2_db.zip": "psycopg$psycopg2.zip"
|
||||||
|
}
|
||||||
|
|
||||||
|
# (test) Push the files
|
||||||
|
prefix = Path('/Users/hohn/work-gh/mrva/mrvacommander')
|
||||||
|
for local_path, new_name in files_to_upload.items():
|
||||||
|
try:
|
||||||
|
client.fput_object(QL_DB_BUCKET_NAME, new_name, prefix / Path(local_path))
|
||||||
|
print(f"Uploaded {local_path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
|
||||||
|
except S3Error as err:
|
||||||
|
print(f"Error uploading file {local_path}: {err}")
|
||||||
|
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||||
|
# End:
|
||||||
@@ -80,6 +80,7 @@ lz4==4.3.3
|
|||||||
MarkupSafe==2.1.5
|
MarkupSafe==2.1.5
|
||||||
matplotlib==3.9.1
|
matplotlib==3.9.1
|
||||||
matplotlib-inline==0.1.7
|
matplotlib-inline==0.1.7
|
||||||
|
minio==7.2.7
|
||||||
missingno==0.5.2
|
missingno==0.5.2
|
||||||
mistune==3.0.2
|
mistune==3.0.2
|
||||||
msgpack==1.0.8
|
msgpack==1.0.8
|
||||||
@@ -115,6 +116,7 @@ pure-eval==0.2.2
|
|||||||
py-cpuinfo==9.0.0
|
py-cpuinfo==9.0.0
|
||||||
pyarrow==16.1.0
|
pyarrow==16.1.0
|
||||||
pycparser==2.22
|
pycparser==2.22
|
||||||
|
pycryptodome==3.20.0
|
||||||
Pygments==2.18.0
|
Pygments==2.18.0
|
||||||
pynput==1.7.7
|
pynput==1.7.7
|
||||||
pyobjc-core==10.3.1
|
pyobjc-core==10.3.1
|
||||||
@@ -133,6 +135,7 @@ python-json-logger==2.0.7
|
|||||||
pytz==2024.1
|
pytz==2024.1
|
||||||
PyYAML==6.0.1
|
PyYAML==6.0.1
|
||||||
pyzmq==26.0.3
|
pyzmq==26.0.3
|
||||||
|
-e git+ssh://git@github.com/advanced-security/mrvacommander.git@26dd69c9767c315a8ffb782eedf3b55eac574d45#egg=qldbtools&subdirectory=client/qldbtools
|
||||||
qtstylish==0.1.5
|
qtstylish==0.1.5
|
||||||
referencing==0.35.1
|
referencing==0.35.1
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
|
|||||||
Reference in New Issue
Block a user