#!/usr/bin/env python
"""
Script to list full details for a mrva-list file

1. reads files containing
{
    "mirva-list": [
        "NLPchina/elasticsearch-sqlctsj168cc4",
        "LMAX-Exchange/disruptorctsj3e75ec",
        "justauth/JustAuthctsj8a6177",
        "FasterXML/jackson-modules-basectsj2fe248",
        "ionic-team/capacitor-pluginsctsj38d457",
        "PaddlePaddle/PaddleOCRctsj60e555",
        "elastic/apm-agent-pythonctsj21dc64",
        "flipkart-incubator/zjsonpatchctsjc4db35",
        "stephane/libmodbusctsj54237e",
        "wso2/carbon-kernelctsj5a8a6e",
        "apache/servicecomb-packctsj4d98f5"
    ]
}
2. reads a pandas dataframe stored in a csv file 
3. selects all rows from 2. that contain the 'owner' column matching
   the string before the slash from 1. and the 'name' column matching
   the string between the slash and the marker 'ctsj'

"""
import argparse
import json
import sys

#
#* Process command line
#
parser = argparse.ArgumentParser(
    description="""Script to list full details for a mrva-list file""")
parser.add_argument('mrva_list', type=str, 
                    help='The JSON file containing the mrva-list')
parser.add_argument('info_csv', type=str, 
                    help='The CSV file containing the full information')
args = parser.parse_args()

#* Step 1: Read the JSON file containing the "mirva-list"
with open(args.mrva_list, 'r') as f:
    data = json.load(f)

# Extract and parse the "mirva-list"
mirva_list = data['mirva-list']
parsed_mirva_list = []
for item in mirva_list:
    owner_name = item.split('/')[0]
    repo_name = item.split('/')[1].split('ctsj')[0]
    parsed_mirva_list.append((owner_name, repo_name))

#* Step 2: Read the CSV file into a pandas dataframe
import pandas as pd
df = pd.read_csv(args.info_csv)

#* Step 3: Filter the dataframe based on the parsed "mirva-list"
filtered_df = df[
    df.apply(lambda row: 
             (row['owner'], row['name']) in parsed_mirva_list, axis=1)]

# Optionally, you can save the filtered dataframe to a new CSV file
filtered_df.to_csv(sys.stdout, index=False)