mrvacommander/client/qldbtools/bin/mc-rows-from-mrva-list

#!/usr/bin/env python
"""
Script to list full details for a mrva-list file

1. reads files containing
{
    "mirva-list": [
        "NLPchina/elasticsearch-sqlctsj168cc4",
        "LMAX-Exchange/disruptorctsj3e75ec",
        "justauth/JustAuthctsj8a6177",
        "FasterXML/jackson-modules-basectsj2fe248",
        "ionic-team/capacitor-pluginsctsj38d457",
        "PaddlePaddle/PaddleOCRctsj60e555",
        "elastic/apm-agent-pythonctsj21dc64",
        "flipkart-incubator/zjsonpatchctsjc4db35",
        "stephane/libmodbusctsj54237e",
        "wso2/carbon-kernelctsj5a8a6e",
        "apache/servicecomb-packctsj4d98f5"
    ]
}
2. reads a pandas dataframe stored in a csv file
3. selects all rows from 2. that
   - contain the 'owner' column matching the string before the slash from 1. and
   - the 'name' column matching the string between the slash and the marker
     'ctsj' and
   - the 'CID' column matching the string after the marker 'ctsj'

"""
import argparse
import json
import sys

#
#* Process command line
#
parser = argparse.ArgumentParser(
    description="""Script to list full details for a mrva-list file""")
parser.add_argument('mrva_list', type=str,
                    help='The JSON file containing the mrva-list')
parser.add_argument('info_csv', type=str,
                    help='The CSV file containing the full information')
args = parser.parse_args()

#* Step 1: Read the JSON file containing the "mirva-list"
with open(args.mrva_list, 'r') as f:
    data = json.load(f)

# Extract and parse the "mirva-list"
mirva_list = data['mirva-list']
parsed_mirva_list = []
for item in mirva_list:
    owner_name = item.split('/')[0]
    repo_name = item.split('/')[1].split('ctsj')[0]
    cid = item.split('/')[1].split('ctsj')[1]
    parsed_mirva_list.append((owner_name, repo_name, cid))

#* Step 2: Read the CSV file into a pandas dataframe
import pandas as pd
df = pd.read_csv(args.info_csv)

#* Step 3: Filter the dataframe based on the parsed "mirva-list"
filtered_df = df[
    df.apply(lambda row:
             (row['owner'], row['name'], row['CID']) in parsed_mirva_list, axis=1)]

# Optionally, you can save the filtered dataframe to a new CSV file
filtered_df.to_csv(sys.stdout, index=False)