From 742b059a49859b4f6855da37fd7be5e348c739d7 Mon Sep 17 00:00:00 2001 From: Michael Hohn Date: Fri, 9 Aug 2024 08:37:31 -0700 Subject: [PATCH] Add script to list full details for a mrva-list file --- client/qldbtools/bin/mc-rows-from-mrva-list | 64 +++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100755 client/qldbtools/bin/mc-rows-from-mrva-list diff --git a/client/qldbtools/bin/mc-rows-from-mrva-list b/client/qldbtools/bin/mc-rows-from-mrva-list new file mode 100755 index 0000000..8495e6b --- /dev/null +++ b/client/qldbtools/bin/mc-rows-from-mrva-list @@ -0,0 +1,64 @@ +#!/usr/bin/env python +""" +Script to list full details for a mrva-list file + +1. reads files containing +{ + "mirva-list": [ + "NLPchina/elasticsearch-sqlctsj168cc4", + "LMAX-Exchange/disruptorctsj3e75ec", + "justauth/JustAuthctsj8a6177", + "FasterXML/jackson-modules-basectsj2fe248", + "ionic-team/capacitor-pluginsctsj38d457", + "PaddlePaddle/PaddleOCRctsj60e555", + "elastic/apm-agent-pythonctsj21dc64", + "flipkart-incubator/zjsonpatchctsjc4db35", + "stephane/libmodbusctsj54237e", + "wso2/carbon-kernelctsj5a8a6e", + "apache/servicecomb-packctsj4d98f5" + ] +} +2. reads a pandas dataframe stored in a csv file +3. selects all rows from 2. that contain the 'owner' column matching + the string before the slash from 1. and the 'name' column matching + the string between the slash and the marker 'ctsj' + +""" +import argparse +import json +import sys + +# +#* Process command line +# +parser = argparse.ArgumentParser( + description="""Script to list full details for a mrva-list file""") +parser.add_argument('mrva_list', type=str, + help='The JSON file containing the mrva-list') +parser.add_argument('info_csv', type=str, + help='The CSV file containing the full information') +args = parser.parse_args() + +#* Step 1: Read the JSON file containing the "mirva-list" +with open(args.mrva_list, 'r') as f: + data = json.load(f) + +# Extract and parse the "mirva-list" +mirva_list = data['mirva-list'] +parsed_mirva_list = [] +for item in mirva_list: + owner_name = item.split('/')[0] + repo_name = item.split('/')[1].split('ctsj')[0] + parsed_mirva_list.append((owner_name, repo_name)) + +#* Step 2: Read the CSV file into a pandas dataframe +import pandas as pd +df = pd.read_csv(args.info_csv) + +#* Step 3: Filter the dataframe based on the parsed "mirva-list" +filtered_df = df[ + df.apply(lambda row: + (row['owner'], row['name']) in parsed_mirva_list, axis=1)] + +# Optionally, you can save the filtered dataframe to a new CSV file +filtered_df.to_csv(sys.stdout, index=False)