Add script to populate minio using dataframe previously chosen
This commit is contained in:
committed by
=Michael Hohn
parent
26dd69c976
commit
242ba3fc1e
86
client/qldbtools/bin/mc-db-populate-minio
Executable file
86
client/qldbtools/bin/mc-db-populate-minio
Executable file
@@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env python
|
||||
""" Read a table of CodeQL DB information (like those produced by
|
||||
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
|
||||
DB.
|
||||
"""
|
||||
import argparse
|
||||
import qldbtools.utils as utils
|
||||
import logging
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import sys
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
from pathlib import Path
|
||||
#
|
||||
#* Configure logger
|
||||
#
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
# Overwrite log level set by minio
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(logging.INFO)
|
||||
|
||||
#
|
||||
#* Process command line
|
||||
#
|
||||
parser = argparse.ArgumentParser(
|
||||
description=""" Read a table of CodeQL DB information (like those produced by
|
||||
mc-db-refine-info) and push the databases it lists to the mrvacommander minio
|
||||
DB. """)
|
||||
parser.add_argument('-n', '--num-entries', type=int,
|
||||
help='Only use N entries',
|
||||
default=None)
|
||||
parser.add_argument('-s', '--seed', type=int,
|
||||
help='Random number seed',
|
||||
default=4242)
|
||||
args = parser.parse_args()
|
||||
#
|
||||
#* Collect the information and select subset
|
||||
#
|
||||
df = pd.read_csv(sys.stdin)
|
||||
if args.num_entries == None:
|
||||
# Use all entries
|
||||
entries = df
|
||||
else:
|
||||
# Use num_entries, chosen via pseudo-random numbers
|
||||
entries = df.sample(n=args.num_entries,
|
||||
random_state=np.random.RandomState(args.seed))
|
||||
#
|
||||
#* Push the DBs
|
||||
#
|
||||
# Configuration
|
||||
MINIO_URL = "http://localhost:9000"
|
||||
MINIO_ROOT_USER = "user"
|
||||
MINIO_ROOT_PASSWORD = "mmusty8432"
|
||||
QL_DB_BUCKET_NAME = "qldb"
|
||||
|
||||
# Initialize MinIO client
|
||||
client = Minio(
|
||||
MINIO_URL.replace("http://", "").replace("https://", ""),
|
||||
access_key=MINIO_ROOT_USER,
|
||||
secret_key=MINIO_ROOT_PASSWORD,
|
||||
secure=False
|
||||
)
|
||||
|
||||
# Create the bucket if it doesn't exist
|
||||
try:
|
||||
if not client.bucket_exists(QL_DB_BUCKET_NAME):
|
||||
client.make_bucket(QL_DB_BUCKET_NAME)
|
||||
else:
|
||||
logging.info(f"Bucket '{QL_DB_BUCKET_NAME}' already exists.")
|
||||
except S3Error as err:
|
||||
logging.error(f"Error creating bucket: {err}")
|
||||
|
||||
# Get info from dataframe and push the files
|
||||
for index, row in entries[['owner', 'name', 'path']].iterrows():
|
||||
owner, name, path = row
|
||||
new_name = f'{owner}${name}.zip'
|
||||
try:
|
||||
client.fput_object(QL_DB_BUCKET_NAME, new_name, path)
|
||||
logging.info(f"Uploaded {path} as {new_name} to bucket {QL_DB_BUCKET_NAME}")
|
||||
except S3Error as err:
|
||||
logging.error(f"Error uploading file {local_path}: {err}")
|
||||
|
||||
# Local Variables:
|
||||
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
||||
# End:
|
||||
Reference in New Issue
Block a user