Add script to populate minio using dataframe previously chosen
This commit is contained in:
committed by
=Michael Hohn
parent
26dd69c976
commit
242ba3fc1e
@@ -59,8 +59,10 @@ import qldbtools as ql
|
||||
## Command-line use
|
||||
|
||||
cd ~/work-gh/mrva/mrvacommander/client/qldbtools
|
||||
./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download | gzip > db-info-1.csv.gz
|
||||
./bin/mc-db-initial-info ~/work-gh/mrva/mrva-open-source-download > db-info-1.csv
|
||||
|
||||
gunzip < db-info-1.csv.gz | ./bin/mc-db-refine-info | gzip > db-info-2.csv.gz
|
||||
./bin/mc-db-refine-info < db-info-1.csv > db-info-2.csv
|
||||
|
||||
./bin/mc-db-populate-minio < db-info-2.csv -n 3
|
||||
|
||||
|
||||
|
||||
86
client/qldbtools/bin/mc-db-populate-minio
Executable file
86
client/qldbtools/bin/mc-db-populate-minio
Executable file
@@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env python
|
||||
""" Read a table of CodeQL DB information (like those produced by
|
||||
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
|
||||
DB.
|
||||
"""
|
||||
import argparse
|
||||
import qldbtools.utils as utils
|
||||
import logging
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import sys
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
from pathlib import Path
|
||||
#
|
||||
#* Configure logger
|
||||
#
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
# Overwrite log level set by minio
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser = argparse.ArgumentParser(
|
||||
description=""" Read a table of CodeQL DB information (like those produced by
|
||||
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
|
||||
DB. """)
|
||||
parser.add_argument('-n', '--num-entries', type=int,
|
||||
help='Only use N entries',
|
||||
default=None)
|
||||
parser.add_argument('-s', '--seed', type=int,
|
||||
help='Random number seed',
|
||||
default=4242)
|
||||
args = parser.parse_args()
|
||||
#
|
||||
#* Collect the information and select subset
|
||||
#
|
||||
df = pd.read_csv(sys.stdin)
|
||||
if args.num_entries == None:
|
||||
# Use all entries
|
||||
entries = df
|
||||
else:
|
||||
# Use num_entries, chosen via pseudo-random numbers
|
||||
entries = df.sample(n=args.num_entries,
|
||||
random_state=np.random.RandomState(args.seed))
|
||||
#
|
||||
#* Push the DBs
|
||||
#
|
||||
# Configuration
|
||||
MINIO_URL = "http://localhost:9000"
|
||||
MINIO_ROOT_USER = "user"
|
||||
MINIO_ROOT_PASSWORD = "mmusty8432"
|
||||
QL_DB_BUCKET_NAME = "qldb"
|
||||
|
||||
# Initialize MinIO client
|
||||
client = Minio(
|
||||
MINIO_URL.replace("http://", "").replace("https://", ""),
|
||||
access_key=MINIO_ROOT_USER,
|
||||
secret_key=MINIO_ROOT_PASSWORD,
|
||||
secure=False
|
||||
)
|
||||
|
||||
# Create the bucket if it doesn't exist
|
||||
try:
|
||||
if not client.bucket_exists(QL_DB_BUCKET_NAME):
|
||||
client.make_bucket(QL_DB_BUCKET_NAME)
|
||||
else:
|
||||
logging.info(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
|
||||
except S3Error as err:
|
||||
logging.error(f"Error creating bucket: {err}")
|
||||
|
||||
# Get info from dataframe and push the files
|
||||
for index, row in entries[['owner', 'name', 'path']].iterrows():
|
||||
owner, name, path = row
|
||||
new_name = f'{owner}${name}.zip'
|
||||
try:
|
||||
client.fput_object(QL_DB_BUCKET_NAME, new_name, path)
|
||||
logging.info(f"Uploaded {path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
|
||||
except S3Error as err:
|
||||
logging.error(f"Error uploading file {local_path}: {err}")
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
65
client/qldbtools/qldbtools/session-populate-minio.py
Normal file
65
client/qldbtools/qldbtools/session-populate-minio.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import qldbtools.utils as utils
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import sys
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
from pathlib import Path
|
||||
|
||||
#
|
||||
#* Collect the information and select subset
|
||||
#
|
||||
df = pd.read_csv('db-info-2.csv')
|
||||
seed = 4242
|
||||
if 0:
|
||||
# Use all entries
|
||||
entries = df
|
||||
else:
|
||||
# Use num_entries, chosen via pseudo-random numbers
|
||||
entries = df.sample(n=3,
|
||||
random_state=np.random.RandomState(seed))
|
||||
#
|
||||
#* Push the DBs
|
||||
#
|
||||
# Configuration
|
||||
MINIO_URL = "http://localhost:9000"
|
||||
MINIO_ROOT_USER = "user"
|
||||
MINIO_ROOT_PASSWORD = "mmusty8432"
|
||||
QL_DB_BUCKET_NAME = "qldb"
|
||||
|
||||
# Initialize MinIO client
|
||||
client = Minio(
|
||||
MINIO_URL.replace("http://", "").replace("https://", ""),
|
||||
access_key=MINIO_ROOT_USER,
|
||||
secret_key=MINIO_ROOT_PASSWORD,
|
||||
secure=False
|
||||
)
|
||||
|
||||
# Create the bucket if it doesn't exist
|
||||
try:
|
||||
if not client.bucket_exists(QL_DB_BUCKET_NAME):
|
||||
client.make_bucket(QL_DB_BUCKET_NAME)
|
||||
else:
|
||||
print(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
|
||||
except S3Error as err:
|
||||
print(f"Error creating bucket: {err}")
|
||||
|
||||
# (test) File paths and new names
|
||||
files_to_upload = {
|
||||
"cmd/server/codeql/dbs/google/flatbuffers/google_flatbuffers_db.zip": "google$flatbuffers.zip",
|
||||
"cmd/server/codeql/dbs/psycopg/psycopg2/psycopg_psycopg2_db.zip": "psycopg$psycopg2.zip"
|
||||
}
|
||||
|
||||
# (test) Push the files
|
||||
prefix = Path('/Users/hohn/work-gh/mrva/mrvacommander')
|
||||
for local_path, new_name in files_to_upload.items():
|
||||
try:
|
||||
client.fput_object(QL_DB_BUCKET_NAME, new_name, prefix / Path(local_path))
|
||||
print(f"Uploaded {local_path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
|
||||
except S3Error as err:
|
||||
print(f"Error uploading file {local_path}: {err}")
|
||||
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
@@ -80,6 +80,7 @@ lz4==4.3.3
|
||||
MarkupSafe==2.1.5
|
||||
matplotlib==3.9.1
|
||||
matplotlib-inline==0.1.7
|
||||
minio==7.2.7
|
||||
missingno==0.5.2
|
||||
mistune==3.0.2
|
||||
msgpack==1.0.8
|
||||
@@ -115,6 +116,7 @@ pure-eval==0.2.2
|
||||
py-cpuinfo==9.0.0
|
||||
pyarrow==16.1.0
|
||||
pycparser==2.22
|
||||
pycryptodome==3.20.0
|
||||
Pygments==2.18.0
|
||||
pynput==1.7.7
|
||||
pyobjc-core==10.3.1
|
||||
@@ -133,6 +135,7 @@ python-json-logger==2.0.7
|
||||
pytz==2024.1
|
||||
PyYAML==6.0.1
|
||||
pyzmq==26.0.3
|
||||
-e git+ssh://git@github.com/advanced-security/mrvacommander.git@26dd69c9767c315a8ffb782eedf3b55eac574d45#egg=qldbtools&subdirectory=client/qldbtools
|
||||
qtstylish==0.1.5
|
||||
referencing==0.35.1
|
||||
requests==2.32.3
|
||||
|
||||
Reference in New Issue
Block a user