Move session scripts to separate directory

This commit is contained in:
Michael Hohn
2024-08-02 13:56:47 -07:00
committed by =Michael Hohn
parent 582d933130
commit 349d758c14
7 changed files with 48 additions and 1 deletions

View File

@@ -1,3 +1,4 @@
# Session around bin/mc-db-unique
import qldbtools.utils as utils
import pandas as pd

View File

@@ -1,4 +1,4 @@
# Experimental work with utils.py, to be merged into it.
# Experimental work be merged with bin/mc-db-refine-info
from utils import *
from pprint import pprint

View File

@@ -0,0 +1,46 @@
# Session around bin/mc-db-unique
import qldbtools.utils as utils
import pandas as pd
#
#* Collect the information
#
df1 = pd.read_csv("scratch/db-info-2.csv")
# Add single uniqueness field -- CID (Cumulative ID) -- using
# - creationTime
# - sha
# - cliVersion
# - language
from hashlib import blake2b
def cid_hash(row_tuple: tuple):
"""
cid_hash(row_tuple)
Take a bytes object and return hash as hex string
"""
h = blake2b(digest_size = 3)
h.update(str(row_tuple).encode())
# return int.from_bytes(h.digest(), byteorder='big')
return h.hexdigest()
# Apply the cid_hash function to the specified columns and create the 'CID' column
df1['CID'] = df1.apply(lambda row: cid_hash( (row['creationTime'],
row['sha'],
row['cliVersion'],
row['language'])
), axis=1)
df2 = df1.reindex(columns=['owner', 'name', 'cliVersion', 'creationTime',
'language', 'sha','CID', 'baselineLinesOfCode', 'path',
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
'finalised', 'left_index', 'size'])
df1['cid']
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
# End: