From 349d758c1485661852446e67d9637e1772a72207 Mon Sep 17 00:00:00 2001 From: Michael Hohn Date: Fri, 2 Aug 2024 13:56:47 -0700 Subject: [PATCH] Move session scripts to separate directory --- .../db-generate-selection.py} | 0 .../db-initial-info.py} | 0 .../db-populate-minio.py} | 0 .../db-post-refine-info.py} | 1 + .../session2.py => session/db-refine-info.py} | 2 +- .../db-unique-1.py} | 0 client/qldbtools/session/db-unique.py | 46 +++++++++++++++++++ 7 files changed, 48 insertions(+), 1 deletion(-) rename client/qldbtools/{qldbtools/session-generate-selection.py => session/db-generate-selection.py} (100%) rename client/qldbtools/{qldbtools/session1.py => session/db-initial-info.py} (100%) rename client/qldbtools/{qldbtools/session-populate-minio.py => session/db-populate-minio.py} (100%) rename client/qldbtools/{qldbtools/session-post-refine-info.py => session/db-post-refine-info.py} (97%) rename client/qldbtools/{qldbtools/session2.py => session/db-refine-info.py} (98%) rename client/qldbtools/{qldbtools/session-4-unique.py => session/db-unique-1.py} (100%) create mode 100644 client/qldbtools/session/db-unique.py diff --git a/client/qldbtools/qldbtools/session-generate-selection.py b/client/qldbtools/session/db-generate-selection.py similarity index 100% rename from client/qldbtools/qldbtools/session-generate-selection.py rename to client/qldbtools/session/db-generate-selection.py diff --git a/client/qldbtools/qldbtools/session1.py b/client/qldbtools/session/db-initial-info.py similarity index 100% rename from client/qldbtools/qldbtools/session1.py rename to client/qldbtools/session/db-initial-info.py diff --git a/client/qldbtools/qldbtools/session-populate-minio.py b/client/qldbtools/session/db-populate-minio.py similarity index 100% rename from client/qldbtools/qldbtools/session-populate-minio.py rename to client/qldbtools/session/db-populate-minio.py diff --git a/client/qldbtools/qldbtools/session-post-refine-info.py b/client/qldbtools/session/db-post-refine-info.py similarity index 97% rename from client/qldbtools/qldbtools/session-post-refine-info.py rename to client/qldbtools/session/db-post-refine-info.py index 4825678..e6e0728 100644 --- a/client/qldbtools/qldbtools/session-post-refine-info.py +++ b/client/qldbtools/session/db-post-refine-info.py @@ -1,3 +1,4 @@ +# Session around bin/mc-db-unique import qldbtools.utils as utils import pandas as pd diff --git a/client/qldbtools/qldbtools/session2.py b/client/qldbtools/session/db-refine-info.py similarity index 98% rename from client/qldbtools/qldbtools/session2.py rename to client/qldbtools/session/db-refine-info.py index 978da84..7e9e236 100644 --- a/client/qldbtools/qldbtools/session2.py +++ b/client/qldbtools/session/db-refine-info.py @@ -1,4 +1,4 @@ -# Experimental work with utils.py, to be merged into it. +# Experimental work be merged with bin/mc-db-refine-info from utils import * from pprint import pprint diff --git a/client/qldbtools/qldbtools/session-4-unique.py b/client/qldbtools/session/db-unique-1.py similarity index 100% rename from client/qldbtools/qldbtools/session-4-unique.py rename to client/qldbtools/session/db-unique-1.py diff --git a/client/qldbtools/session/db-unique.py b/client/qldbtools/session/db-unique.py new file mode 100644 index 0000000..e6e0728 --- /dev/null +++ b/client/qldbtools/session/db-unique.py @@ -0,0 +1,46 @@ +# Session around bin/mc-db-unique +import qldbtools.utils as utils +import pandas as pd + +# +#* Collect the information +# +df1 = pd.read_csv("scratch/db-info-2.csv") + +# Add single uniqueness field -- CID (Cumulative ID) -- using +# - creationTime +# - sha +# - cliVersion +# - language + +from hashlib import blake2b + +def cid_hash(row_tuple: tuple): + """ + cid_hash(row_tuple) + Take a bytes object and return hash as hex string + """ + h = blake2b(digest_size = 3) + h.update(str(row_tuple).encode()) + # return int.from_bytes(h.digest(), byteorder='big') + return h.hexdigest() + +# Apply the cid_hash function to the specified columns and create the 'CID' column +df1['CID'] = df1.apply(lambda row: cid_hash( (row['creationTime'], + row['sha'], + row['cliVersion'], + row['language']) + ), axis=1) + +df2 = df1.reindex(columns=['owner', 'name', 'cliVersion', 'creationTime', + 'language', 'sha','CID', 'baselineLinesOfCode', 'path', + 'db_lang', 'db_lang_displayName', 'db_lang_file_count', + 'db_lang_linesOfCode', 'ctime', 'primaryLanguage', + 'finalised', 'left_index', 'size']) + +df1['cid'] + + +# Local Variables: +# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/" +# End: