From d1f56ae1964d9bd21d004d11edc7fd07162a666b Mon Sep 17 00:00:00 2001
From: Michael Hohn <hohn@github.com>
Date: Fri, 9 Aug 2024 08:36:48 -0700
Subject: [PATCH] Add explicit language selection

---
 client/qldbtools/README.md        | 19 ++++++++++++++-----
 client/qldbtools/bin/mc-db-unique |  8 +++++++-
 2 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/client/qldbtools/README.md b/client/qldbtools/README.md
index 43625cb..87babac 100644
--- a/client/qldbtools/README.md
+++ b/client/qldbtools/README.md
@@ -122,16 +122,25 @@ A small sample of a full table:
         ./bin/mc-db-refine-info < scratch/db-info-1.csv > scratch/db-info-2.csv
        
         ./bin/mc-db-view-info < scratch/db-info-2.csv &
-        ./bin/mc-db-unique < scratch/db-info-2.csv > scratch/db-info-3.csv
+        ./bin/mc-db-unique cpp < scratch/db-info-2.csv > scratch/db-info-3.csv
         ./bin/mc-db-view-info < scratch/db-info-3.csv &
 
-        ./bin/mc-db-populate-minio -n 23 < scratch/db-info-3.csv
-        ./bin/mc-db-generate-selection -n 23 \
+        ./bin/mc-db-populate-minio -n 11 < scratch/db-info-3.csv
+        ./bin/mc-db-generate-selection -n 11 \
             scratch/vscode-selection.json \
             scratch/gh-mrva-selection.json \
             < scratch/db-info-3.csv 
-       
-       
+
+
+   To see the full information for a selection, use `mc-rows-from-mrva-list`:
+   
+        ./bin/mc-rows-from-mrva-list scratch/gh-mrva-selection.json \
+            scratch/db-info-3.csv > scratch/selection-full-info
+
+   To check, e.g., the `language` column:
+
+        csvcut -c language scratch/selection-full-info 
+
 ## Notes
 
    The `preview-data` plugin for VS Code has a bug; it displays `0` instead of
diff --git a/client/qldbtools/bin/mc-db-unique b/client/qldbtools/bin/mc-db-unique
index 7b8d811..fb974b5 100755
--- a/client/qldbtools/bin/mc-db-unique
+++ b/client/qldbtools/bin/mc-db-unique
@@ -32,9 +32,12 @@ root_logger.setLevel(logging.INFO)
 #
 parser = argparse.ArgumentParser(
     description=""" Read a table of CodeQL DB information, 
+    narrow to <language>,
     group entries by (owner,name),  sort each group by
     creationTime and keep only the top (newest) element.
     """)
+parser.add_argument('language', type=str, 
+                    help='The language to be analyzed.')
 
 args = parser.parse_args()
 #
@@ -100,8 +103,11 @@ rows = ( df3['cliVersion'].isna() |
          df3['sha'].isna() )
 df4 = df3[~rows]
 
+# XX: Limit to one language
+df5 = df4[df4['language'] == args.language]
+
 # Sort and group
-df_sorted = df4.sort_values(by=['owner', 'name', 'CID', 'creationTime'])
+df_sorted = df5.sort_values(by=['owner', 'name', 'CID', 'creationTime'])
 df_unique = df_sorted.groupby(['owner', 'name', 'CID']).first().reset_index()
 
 # Write output