From d1f56ae1964d9bd21d004d11edc7fd07162a666b Mon Sep 17 00:00:00 2001 From: Michael Hohn Date: Fri, 9 Aug 2024 08:36:48 -0700 Subject: [PATCH] Add explicit language selection --- client/qldbtools/README.md | 19 ++++++++++++++----- client/qldbtools/bin/mc-db-unique | 8 +++++++- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/client/qldbtools/README.md b/client/qldbtools/README.md index 43625cb..87babac 100644 --- a/client/qldbtools/README.md +++ b/client/qldbtools/README.md @@ -122,16 +122,25 @@ A small sample of a full table: ./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv ./bin/mc-db-view-info < scratch/db-info-2.csv & - ./bin/mc-db-unique < scratch/db-info-2.csv > scratch/db-info-3.csv + ./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv ./bin/mc-db-view-info < scratch/db-info-3.csv & - ./bin/mc-db-populate-minio -n 23 < scratch/db-info-3.csv - ./bin/mc-db-generate-selection -n 23 \ + ./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv + ./bin/mc-db-generate-selection -n 11 \ scratch/vscode-selection.json \ scratch/gh-mrva-selection.json \ < scratch/db-info-3.csv - - + + + To see the full information for a selection, use `mc-rows-from-mrva-list`: + + ./bin/mc-rows-from-mrva-list scratch/gh-mrva-selection.json \ + scratch/db-info-3.csv > scratch/selection-full-info + + To check, e.g., the `language` column: + + csvcut -c language scratch/selection-full-info + ## Notes The `preview-data` plugin for VS Code has a bug; it displays `0` instead of diff --git a/client/qldbtools/bin/mc-db-unique b/client/qldbtools/bin/mc-db-unique index 7b8d811..fb974b5 100755 --- a/client/qldbtools/bin/mc-db-unique +++ b/client/qldbtools/bin/mc-db-unique @@ -32,9 +32,12 @@ root_logger.setLevel(logging.INFO) # parser = argparse.ArgumentParser( description=""" Read a table of CodeQL DB information, + narrow to , group entries by (owner,name), sort each group by creationTime and keep only the top (newest) element. """) +parser.add_argument('language', type=str, + help='The language to be analyzed.') args = parser.parse_args() # @@ -100,8 +103,11 @@ rows = ( df3['cliVersion'].isna() | df3['sha'].isna() ) df4 = df3[~rows] +# XX: Limit to one language +df5 = df4[df4['language'] == args.language] + # Sort and group -df_sorted = df4.sort_values(by=['owner', 'name', 'CID', 'creationTime']) +df_sorted = df5.sort_values(by=['owner', 'name', 'CID', 'creationTime']) df_unique = df_sorted.groupby(['owner', 'name', 'CID']).first().reset_index() # Write output