47 lines
1.4 KiB
Python
47 lines
1.4 KiB
Python
# Session around bin/mc-db-unique
|
|
import qldbtools.utils as utils
|
|
import pandas as pd
|
|
|
|
#
|
|
#* Collect the information
|
|
#
|
|
df1 = pd.read_csv("scratch/db-info-2.csv")
|
|
|
|
# Add single uniqueness field -- CID (Cumulative ID) -- using
|
|
# - creationTime
|
|
# - sha
|
|
# - cliVersion
|
|
# - language
|
|
|
|
from hashlib import blake2b
|
|
|
|
def cid_hash(row_tuple: tuple):
|
|
"""
|
|
cid_hash(row_tuple)
|
|
Take a bytes object and return hash as hex string
|
|
"""
|
|
h = blake2b(digest_size = 3)
|
|
h.update(str(row_tuple).encode())
|
|
# return int.from_bytes(h.digest(), byteorder='big')
|
|
return h.hexdigest()
|
|
|
|
# Apply the cid_hash function to the specified columns and create the 'CID' column
|
|
df1['CID'] = df1.apply(lambda row: cid_hash( (row['creationTime'],
|
|
row['sha'],
|
|
row['cliVersion'],
|
|
row['language'])
|
|
), axis=1)
|
|
|
|
df2 = df1.reindex(columns=['owner', 'name', 'cliVersion', 'creationTime',
|
|
'language', 'sha','CID', 'baselineLinesOfCode', 'path',
|
|
'db_lang', 'db_lang_displayName', 'db_lang_file_count',
|
|
'db_lang_linesOfCode', 'ctime', 'primaryLanguage',
|
|
'finalised', 'left_index', 'size'])
|
|
|
|
df1['cid']
|
|
|
|
|
|
# Local Variables:
|
|
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/qldbtools/venv/"
|
|
# End:
|