Add explicit language selection

This commit is contained in:
Michael Hohn
2024-08-09 08:36:48 -07:00
committed by =Michael Hohn
parent 6262197c8d
commit d1f56ae196
2 changed files with 21 additions and 6 deletions

View File

@@ -122,16 +122,25 @@ A small sample of a full table:
./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv ./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
./bin/mc-db-view-info < scratch/db-info-2.csv & ./bin/mc-db-view-info < scratch/db-info-2.csv &
./bin/mc-db-unique < scratch/db-info-2.csv > scratch/db-info-3.csv ./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
./bin/mc-db-view-info < scratch/db-info-3.csv & ./bin/mc-db-view-info < scratch/db-info-3.csv &
./bin/mc-db-populate-minio -n 23 < scratch/db-info-3.csv ./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
./bin/mc-db-generate-selection -n 23 \ ./bin/mc-db-generate-selection -n 11 \
scratch/vscode-selection.json \ scratch/vscode-selection.json \
scratch/gh-mrva-selection.json \ scratch/gh-mrva-selection.json \
< scratch/db-info-3.csv < scratch/db-info-3.csv
To see the full information for a selection, use `mc-rows-from-mrva-list`:
./bin/mc-rows-from-mrva-list scratch/gh-mrva-selection.json \
scratch/db-info-3.csv > scratch/selection-full-info
To check, e.g., the `language` column:
csvcut -c language scratch/selection-full-info
## Notes ## Notes
The `preview-data` plugin for VS Code has a bug; it displays `0` instead of The `preview-data` plugin for VS Code has a bug; it displays `0` instead of

View File

@@ -32,9 +32,12 @@ root_logger.setLevel(logging.INFO)
# #
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description=""" Read a table of CodeQL DB information, description=""" Read a table of CodeQL DB information,
narrow to <language>,
group entries by (owner,name), sort each group by group entries by (owner,name), sort each group by
creationTime and keep only the top (newest) element. creationTime and keep only the top (newest) element.
""") """)
parser.add_argument('language', type=str,
help='The language to be analyzed.')
args = parser.parse_args() args = parser.parse_args()
# #
@@ -100,8 +103,11 @@ rows = ( df3['cliVersion'].isna() |
df3['sha'].isna() ) df3['sha'].isna() )
df4 = df3[~rows] df4 = df3[~rows]
# XX: Limit to one language
df5 = df4[df4['language'] == args.language]
# Sort and group # Sort and group
df_sorted = df4.sort_values(by=['owner', 'name', 'CID', 'creationTime']) df_sorted = df5.sort_values(by=['owner', 'name', 'CID', 'creationTime'])
df_unique = df_sorted.groupby(['owner', 'name', 'CID']).first().reset_index() df_unique = df_sorted.groupby(['owner', 'name', 'CID']).first().reset_index()
# Write output # Write output