Improve example data layout and README

This commit is contained in:
Michael Hohn
2024-08-01 14:30:40 -07:00
committed by =Michael Hohn
parent b7b4839fe0
commit 582d933130
6 changed files with 89 additions and 40 deletions

View File

@@ -5,7 +5,7 @@ import pandas as pd
# cd ../
#* Reload CSV file to continue work
df2 = df_refined = pd.read_csv('db-info-2.csv')
df2 = df_refined = pd.read_csv('scratch/db-info-2.csv')
# Identify rows missing specific entries
rows = ( df2['cliVersion'].isna() |
@@ -17,7 +17,7 @@ df3 = df2[~rows]
df3
#* post-save work
df4 = pd.read_csv('db-info-3.csv')
df4 = pd.read_csv('scratch/db-info-3.csv')
# Sort and group
df_sorted = df4.sort_values(by=['owner', 'name', 'CID', 'creationTime'])

View File

@@ -13,7 +13,7 @@ import numpy as np
import importlib
importlib.reload(utils)
df0 = pd.read_csv('db-info-3.csv')
df0 = pd.read_csv('scratch/db-info-3.csv')
# Use num_entries, chosen via pseudo-random numbers
df1 = df0.sample(n=3, random_state=np.random.RandomState(4242))

View File

@@ -9,7 +9,7 @@ from pathlib import Path
#
#* Collect the information and select subset
#
df = pd.read_csv('db-info-2.csv')
df = pd.read_csv('scratch/db-info-2.csv')
seed = 4242
if 0:
# Use all entries

View File

@@ -4,7 +4,7 @@ import pandas as pd
#
#* Collect the information
#
df1 = pd.read_csv("db-info-2.csv")
df1 = pd.read_csv("scratch/db-info-2.csv")
# Add single uniqueness field -- CID (Cumulative ID) -- using
# - creationTime