Add script to list full details for a mrva-list file

2024-08-09 08:37:31 -07:00
parent d1f56ae196
commit 742b059a49
1 changed files with 64 additions and 0 deletions
--- a/client/qldbtools/bin/mc-rows-from-mrva-list
+++ b/client/qldbtools/bin/mc-rows-from-mrva-list
@@ -0,0 +1,64 @@
 #!/usr/bin/env python
 """
 Script to list full details for a mrva-list file
 1. reads files containing
 {
    "mirva-list": [
        "NLPchina/elasticsearch-sqlctsj168cc4",
        "LMAX-Exchange/disruptorctsj3e75ec",
        "justauth/JustAuthctsj8a6177",
        "FasterXML/jackson-modules-basectsj2fe248",
        "ionic-team/capacitor-pluginsctsj38d457",
        "PaddlePaddle/PaddleOCRctsj60e555",
        "elastic/apm-agent-pythonctsj21dc64",
        "flipkart-incubator/zjsonpatchctsjc4db35",
        "stephane/libmodbusctsj54237e",
        "wso2/carbon-kernelctsj5a8a6e",
        "apache/servicecomb-packctsj4d98f5"
    ]
 }
 2. reads a pandas dataframe stored in a csv file 
 3. selects all rows from 2. that contain the 'owner' column matching
   the string before the slash from 1. and the 'name' column matching
   the string between the slash and the marker 'ctsj'
 """
 import argparse
 import json
 import sys
 #
 #* Process command line
 #
 parser = argparse.ArgumentParser(
    description="""Script to list full details for a mrva-list file""")
 parser.add_argument('mrva_list', type=str, 
                    help='The JSON file containing the mrva-list')
 parser.add_argument('info_csv', type=str, 
                    help='The CSV file containing the full information')
 args = parser.parse_args()
 #* Step 1: Read the JSON file containing the "mirva-list"
 with open(args.mrva_list, 'r') as f:
    data = json.load(f)
 # Extract and parse the "mirva-list"
 mirva_list = data['mirva-list']
 parsed_mirva_list = []
 for item in mirva_list:
    owner_name = item.split('/')[0]
    repo_name = item.split('/')[1].split('ctsj')[0]
    parsed_mirva_list.append((owner_name, repo_name))
 #* Step 2: Read the CSV file into a pandas dataframe
 import pandas as pd
 df = pd.read_csv(args.info_csv)
 #* Step 3: Filter the dataframe based on the parsed "mirva-list"
 filtered_df = df[
    df.apply(lambda row: 
             (row['owner'], row['name']) in parsed_mirva_list, axis=1)]
 # Optionally, you can save the filtered dataframe to a new CSV file
 filtered_df.to_csv(sys.stdout, index=False)