Files
mrvacommander/client/qldbtools/bin/mc-rows-from-mrva-list

68 lines
2.2 KiB
Python
Executable File

#!/usr/bin/env python
"""
Script to list full details for a mrva-list file
1. reads files containing
{
"mirva-list": [
"NLPchina/elasticsearch-sqlctsj168cc4",
"LMAX-Exchange/disruptorctsj3e75ec",
"justauth/JustAuthctsj8a6177",
"FasterXML/jackson-modules-basectsj2fe248",
"ionic-team/capacitor-pluginsctsj38d457",
"PaddlePaddle/PaddleOCRctsj60e555",
"elastic/apm-agent-pythonctsj21dc64",
"flipkart-incubator/zjsonpatchctsjc4db35",
"stephane/libmodbusctsj54237e",
"wso2/carbon-kernelctsj5a8a6e",
"apache/servicecomb-packctsj4d98f5"
]
}
2. reads a pandas dataframe stored in a csv file
3. selects all rows from 2. that
- contain the 'owner' column matching the string before the slash from 1. and
- the 'name' column matching the string between the slash and the marker
'ctsj' and
- the 'CID' column matching the string after the marker 'ctsj'
"""
import argparse
import json
import sys
#
#* Process command line
#
parser = argparse.ArgumentParser(
description="""Script to list full details for a mrva-list file""")
parser.add_argument('mrva_list', type=str,
help='The JSON file containing the mrva-list')
parser.add_argument('info_csv', type=str,
help='The CSV file containing the full information')
args = parser.parse_args()
#* Step 1: Read the JSON file containing the "mirva-list"
with open(args.mrva_list, 'r') as f:
data = json.load(f)
# Extract and parse the "mirva-list"
mirva_list = data['mirva-list']
parsed_mirva_list = []
for item in mirva_list:
owner_name = item.split('/')[0]
repo_name = item.split('/')[1].split('ctsj')[0]
cid = item.split('/')[1].split('ctsj')[1]
parsed_mirva_list.append((owner_name, repo_name, cid))
#* Step 2: Read the CSV file into a pandas dataframe
import pandas as pd
df = pd.read_csv(args.info_csv)
#* Step 3: Filter the dataframe based on the parsed "mirva-list"
filtered_df = df[
df.apply(lambda row:
(row['owner'], row['name'], row['CID']) in parsed_mirva_list, axis=1)]
# Optionally, you can save the filtered dataframe to a new CSV file
filtered_df.to_csv(sys.stdout, index=False)