From 81c44ab14aff4eb591cabfe7c1cf66b391eaeaeb Mon Sep 17 00:00:00 2001 From: Michael Hohn Date: Fri, 26 Jul 2024 14:18:14 -0700 Subject: [PATCH] Add mc-db-unique as default single-(owner,repo) selector --- client/qldbtools/README.md | 2 +- client/qldbtools/bin/mc-db-unique | 43 +++++++++++++++++++ .../qldbtools/qldbtools/session-4-unique.py | 16 +++++++ 3 files changed, 60 insertions(+), 1 deletion(-) create mode 100755 client/qldbtools/bin/mc-db-unique create mode 100644 client/qldbtools/qldbtools/session-4-unique.py diff --git a/client/qldbtools/README.md b/client/qldbtools/README.md index ec174f7..4400754 100644 --- a/client/qldbtools/README.md +++ b/client/qldbtools/README.md @@ -67,6 +67,6 @@ import qldbtools as ql ./bin/mc-db-view-info < db-info-2.csv - + ./bin/mc-db-unique < db-info-2.csv > db-info-3.csv diff --git a/client/qldbtools/bin/mc-db-unique b/client/qldbtools/bin/mc-db-unique new file mode 100755 index 0000000..4c44f6e --- /dev/null +++ b/client/qldbtools/bin/mc-db-unique @@ -0,0 +1,43 @@ +#!/usr/bin/env python +""" Read a table of CodeQL DB information, + group entries by (owner,name), sort each group by + creationTime and keep only the top (newest) element. +""" +import argparse +import logging + +# +#* Configure logger +# +logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') +# Overwrite log level set by minio +root_logger = logging.getLogger() +root_logger.setLevel(logging.INFO) + +# +#* Process command line +# +parser = argparse.ArgumentParser( + description=""" Read a table of CodeQL DB information, + group entries by (owner,name), sort each group by + creationTime and keep only the top (newest) element. + """) + +args = parser.parse_args() +# +#* Collect the information and select subset +# +import pandas as pd +import sys + +df0 = pd.read_csv(sys.stdin) + +df_sorted = df0.sort_values(by=['owner', 'name', 'creationTime']) +df_unique = df_sorted.groupby(['owner', 'name']).first().reset_index() + +df_unique.to_csv(sys.stdout, index=False) + + +# Local Variables: +# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/" +# End: diff --git a/client/qldbtools/qldbtools/session-4-unique.py b/client/qldbtools/qldbtools/session-4-unique.py new file mode 100644 index 0000000..47f946a --- /dev/null +++ b/client/qldbtools/qldbtools/session-4-unique.py @@ -0,0 +1,16 @@ +# Experimental work with utils.py, to be merged into it. +from utils import * +from pprint import pprint + +#* Reload gzipped CSV file to continue work +df2 = pd.read_csv('db-info-2.csv') + + +df_sorted = df2.sort_values(by=['owner', 'name', 'creationTime']) +df_unique = df_sorted.groupby(['owner', 'name']).first().reset_index() + +# +# Local Variables: +# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/" +# End: +#