Collect DB information from file system and render it

This commit is contained in:
Michael Hohn
2024-07-10 09:11:21 -07:00
committed by =Michael Hohn
parent 8d80272922
commit 2df48b9f98
4 changed files with 327 additions and 2 deletions

File diff suppressed because one or more lines are too long

90
client/db_load_fs.py Normal file
View File

@@ -0,0 +1,90 @@
#* Overview
# - [ ] import the dbs from the filesystem. Include
# 1. name
# 2. owner
# 3. language
# 4. creation date
# 5. db size
#* Imports
import pandas as pd
from pathlib import Path
import os
import logging
import time
#* Setup
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s [%(levelname)s] %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
#* Utility functions
def log_and_raise(message):
logging.error(message)
raise Exception(message)
def traverse_tree(root):
root_path = Path(os.path.expanduser(root))
if not root_path.exists() or not root_path.is_dir():
log_and_raise(f"The specified root path '{root}' does not exist or "
"is not a directory.")
for path in root_path.rglob('*'):
if path.is_file():
yield path
elif path.is_dir():
pass
# Collect information in one 'struct'
class DBInfo:
pass
def collect_dbs(db_base):
for path in traverse_tree(db_base):
if path.name == "db.zip":
# For the current repository, we have
# In [292]: len(path.parts)
# Out[292]: 14
# and can work from the end to get relevant info from the file path.
db = DBInfo()
(*_, db.owner, db.name, _, _, _, db.language, _) = path.parts
db.path = path
s = path.stat()
db.size = s.st_size
db.ctime_raw = s.st_ctime
db.ctime = time.ctime(s.st_ctime)
yield db
def dbdf_from_tree():
db_base = "~/work-gh/mrva/mrva-open-source-download/"
dbs = list(collect_dbs(db_base))
dbdf = pd.DataFrame([d.__dict__ for d in dbs])
return dbdf
#* Interactive use only
if 0:
#* Data collection
# Get the db information in list of DBInfo form
db_base = "~/work-gh/mrva/mrva-open-source-download/"
dbs = list(collect_dbs(db_base))
#
# Inspect:
from pprint import pprint
pprint(["len", len(dbs)])
pprint(["dbs[0]", dbs[0].__dict__])
#
# Get a dataframe
dbdf = pd.DataFrame([d.__dict__ for d in dbs])
#
# Interact with/visualize it
os.environ['APPDATA'] = "needed-for-pandasgui"
from pandasgui import show
show(dbdf)
#
import dtale
dtale.show(dbdf)
#
# Local Variables:
# python-shell-virtualenv-root: "~/work-gh/mrva/mrvacommander/client/venv/"
# End:

View File

@@ -1,33 +1,59 @@
anyio==4.4.0
appdirs==1.4.4
appnope==0.1.4
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
arrow==1.3.0
astor==0.8.1
asttokens==2.4.1
async-lru==2.0.4
attrs==23.2.0
Babel==2.15.0
beautifulsoup4==4.12.3
bleach==6.1.0
Brotli==1.1.0
certifi==2024.7.4
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
comm==0.2.2
contourpy==1.2.1
cycler==0.12.1
dash==2.17.1
dash-bootstrap-components==1.3.1
dash-core-components==2.0.0
dash-html-components==2.0.0
dash-table==5.0.0
dash_colorscales==0.0.4
dash_daq==0.5.0
debugpy==1.8.2
decorator==5.1.1
defusedxml==0.7.1
dtale==3.13.1
et-xmlfile==1.1.0
executing==2.0.1
fastjsonschema==2.20.0
Flask==2.2.5
Flask-Compress==1.15
flask-ngrok==0.0.25
fonttools==4.53.1
fqdn==1.5.1
future==1.0.0
h11==0.14.0
httpcore==1.0.5
httpx==0.27.0
idna==3.7
importlib_metadata==8.0.0
ipykernel==6.29.5
ipympl==0.9.4
ipython==8.26.0
ipython-genutils==0.2.0
ipywidgets==8.1.3
isoduration==20.11.0
itsdangerous==2.2.0
jedi==0.19.1
Jinja2==3.1.4
joblib==1.4.2
json5==0.9.25
jsonpointer==3.0.0
jsonschema==4.23.0
@@ -41,45 +67,81 @@ jupyter_server_terminals==0.5.3
jupyterlab==4.2.3
jupyterlab_pygments==0.3.0
jupyterlab_server==2.27.2
jupyterlab_widgets==3.0.11
kaleido==0.2.1
kiwisolver==1.4.5
lckr_jupyterlab_variableinspector==3.2.1
lz4==4.3.3
MarkupSafe==2.1.5
matplotlib==3.9.1
matplotlib-inline==0.1.7
missingno==0.5.2
mistune==3.0.2
nbclient==0.10.0
nbconvert==7.16.4
nbformat==5.10.4
nest-asyncio==1.6.0
networkx==3.3
notebook_shim==0.2.4
numpy==2.0.0
openpyxl==3.1.5
overrides==7.7.0
packaging==24.1
pandas==2.2.2
pandasgui==0.2.14
pandocfilters==1.5.1
parso==0.8.4
patsy==0.5.6
pexpect==4.9.0
pillow==10.4.0
platformdirs==4.2.2
plotly==5.22.0
prometheus_client==0.20.0
prompt_toolkit==3.0.47
psutil==6.0.0
ptyprocess==0.7.0
pure-eval==0.2.2
pyarrow==16.1.0
pycparser==2.22
Pygments==2.18.0
pynput==1.7.7
pyobjc-core==10.3.1
pyobjc-framework-ApplicationServices==10.3.1
pyobjc-framework-Cocoa==10.3.1
pyobjc-framework-CoreText==10.3.1
pyobjc-framework-Quartz==10.3.1
pyparsing==3.1.2
PyQt5==5.15.10
PyQt5-Qt5==5.15.14
PyQt5-sip==12.13.0
PyQtWebEngine==5.15.6
PyQtWebEngine-Qt5==5.15.14
python-dateutil==2.9.0.post0
python-json-logger==2.0.7
pytz==2024.1
PyYAML==6.0.1
pyzmq==26.0.3
qtstylish==0.1.5
referencing==0.35.1
requests==2.32.3
retrying==1.3.4
rfc3339-validator==0.1.4
rfc3986-validator==0.1.1
rpds-py==0.19.0
scikit-learn==1.5.1
scipy==1.13.1
seaborn==0.13.2
Send2Trash==1.8.3
six==1.16.0
sniffio==1.3.1
soupsieve==2.5
squarify==0.4.3
stack-data==0.6.3
statsmodels==0.14.2
strsimpy==0.2.1
tenacity==8.5.0
terminado==0.18.1
threadpoolctl==3.5.0
tinycss2==1.3.0
tornado==6.4.1
traitlets==5.14.3
@@ -92,3 +154,10 @@ wcwidth==0.2.13
webcolors==24.6.0
webencodings==0.5.1
websocket-client==1.8.0
Werkzeug==3.0.3
widgetsnbextension==4.0.11
wordcloud==1.9.3
xarray==2024.6.0
xlrd==2.0.1
zipp==3.19.2
zstandard==0.22.0