84 lines
2.6 KiB
Python
Executable File
84 lines
2.6 KiB
Python
Executable File
#!/usr/bin/env python
|
|
""" Read an initial table of CodeQL DB information, produced by
|
|
mc-db-initial-info, and collect more detailed information from the database
|
|
files. Write out an extended table in CSV format.
|
|
"""
|
|
import qldbtools.utils as utils
|
|
import argparse
|
|
import logging
|
|
import pandas as pd
|
|
import sys
|
|
|
|
#
|
|
#* Configure logger
|
|
#
|
|
logging.basicConfig(format='%(asctime)s %(message)s')
|
|
|
|
#
|
|
#* Process command line
|
|
#
|
|
parser = argparse.ArgumentParser(
|
|
description="""Read an initial table of CodeQL DB information, produced by
|
|
mc-db-initial-info, and collect more detailed information from the database
|
|
files. Write out an extended table in CSV format. """)
|
|
args = parser.parse_args()
|
|
|
|
#
|
|
#* Collect the information
|
|
#
|
|
d = pd.read_csv(sys.stdin)
|
|
joiners = []
|
|
for left_index in range(0, len(d)-1):
|
|
try:
|
|
cqlc, metac = utils.extract_metadata(d.path[left_index])
|
|
except utils.ExtractNotZipfile:
|
|
continue
|
|
except utils.ExtractNoCQLDB:
|
|
continue
|
|
try:
|
|
detail_df = utils.metadata_details(left_index, cqlc, metac)
|
|
except utils.DetailsMissing:
|
|
continue
|
|
joiners.append(detail_df)
|
|
joiners_df = pd.concat(joiners, axis=0)
|
|
full_df = pd.merge(d, joiners_df, left_index=True, right_on='left_index', how='outer')
|
|
|
|
#** Re-order the dataframe columns by importance
|
|
# - Much of the data
|
|
# 1. Is only conditionally present
|
|
# 2. Is extra info, not for the DB proper
|
|
# 3. May have various names
|
|
#
|
|
# - The essential columns are
|
|
# | owner |
|
|
# | name |
|
|
# | language |
|
|
# | size |
|
|
# | cliVersion |
|
|
# | creationTime |
|
|
# | sha |
|
|
# | baselineLinesOfCode |
|
|
# | path |
|
|
#
|
|
# - The rest are useful; put them last
|
|
# | db_lang |
|
|
# | db_lang_displayName |
|
|
# | db_lang_file_count |
|
|
# | db_lang_linesOfCode |
|
|
# | left_index |
|
|
# | ctime |
|
|
# | primaryLanguage |
|
|
# | finalised |
|
|
|
|
final_df = full_df.reindex(columns=['owner', 'name', 'language', 'size', 'cliVersion',
|
|
'creationTime', 'sha', 'baselineLinesOfCode', 'path',
|
|
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
|
|
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
|
|
'finalised', 'left_index'])
|
|
|
|
final_df.to_csv(sys.stdout, index=False)
|
|
|
|
# Local Variables:
|
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
|
# End:
|