mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 01:13:03 +01:00
Add snowflake implementation
This commit is contained in:
committed by
=Michael Hohn
parent
8b3710a51b
commit
8e5d9c464b
180
notes/unique-ids.ipynb
Normal file
180
notes/unique-ids.ipynb
Normal file
@@ -0,0 +1,180 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d6ed707c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Flake id: Splitting 64 bits between time, process, and counter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "60d1cbf0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"40 Wed Nov 3 11:53:47 2004\n",
|
||||
"41 Wed Sep 7 08:47:35 2039\n",
|
||||
"42 Wed May 15 00:35:11 2109\n",
|
||||
"43 Tue Sep 26 08:10:22 2248\n",
|
||||
"44 Sun Jun 22 23:20:44 2527\n",
|
||||
"45 Fri Dec 12 04:41:28 3084\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"\n",
|
||||
"# Maximum time in ms for given number of bits\n",
|
||||
"for bits in range(40, 46):\n",
|
||||
" ms_max = 2**bits\n",
|
||||
" print(bits, time.ctime(ms_max / 1000))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "527a1f0a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'1_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_1100_0000_1100_0000_0000_0001'"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 42 bits are enough\n",
|
||||
"\n",
|
||||
"# Simple bit patterns for testing\n",
|
||||
"ms_since_epoch = (1<<41) + 1\n",
|
||||
"process_id = (1<<7) + 1\n",
|
||||
"counter = (1<<14) + 1\n",
|
||||
"\n",
|
||||
"flake = (ms_since_epoch << (8+15) |\n",
|
||||
" process_id << (15) |\n",
|
||||
" counter)\n",
|
||||
"\n",
|
||||
"f\"{flake:_b}\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "ce892afa",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(4398046511104, 256, 32768)"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ms_max = (1<<41) * 2\n",
|
||||
"\n",
|
||||
"process_id_max = 1<<8\n",
|
||||
"\n",
|
||||
"counter_max = 1 << 15\n",
|
||||
"ms_max, process_id_max, counter_max"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "ab7600b7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Wed May 15 00:35:11 2109'"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Max. time for id\n",
|
||||
"time.ctime(ms_max / 1000)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "ebc23d69",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"('time',\n",
|
||||
" 1649714152347,\n",
|
||||
" 'bits required',\n",
|
||||
" 40.58535320753334,\n",
|
||||
" 'ratio to max',\n",
|
||||
" 2.6659445849131065)"
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Builtin time since epoch, in ns, converted to ms: \n",
|
||||
"from math import log2\n",
|
||||
"time_ms = int(time.time_ns() / 1e6)\n",
|
||||
"\"time\", time_ms, \"bits required\", log2(time_ms), \"ratio to max\", 2**42/time_ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7a06517b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
64
sarif_cli/snowflake_id.py
Normal file
64
sarif_cli/snowflake_id.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""A simple 64-bit snowflake id implementation.
|
||||
|
||||
For merging and joining tables externally, the ids must be sufficiently unique.
|
||||
At the same time, a full 20-byte uuid is excessive and likely slow in a database.
|
||||
The trade off is using a snowflake id (see References), which is a 64 bit int with
|
||||
bits allocated between time, a shard/process id, and a counter or random number.
|
||||
|
||||
This implementation uses a 42, 8, 15 bit split for (time, process, counter). The
|
||||
time is in milliseconds (ms) since unix epoch.
|
||||
|
||||
../notes/unique-ids.ipynb illustrates the values used here.
|
||||
|
||||
|
||||
References:
|
||||
- https://www.ietf.org/id/draft-peabody-dispatch-new-uuid-format-02.html#name-informative-references
|
||||
"""
|
||||
import time
|
||||
|
||||
class Snowflake:
|
||||
ms_max = (1<<41) * 2
|
||||
process_id_max = 1<<8
|
||||
counter_max = 1 << 15
|
||||
|
||||
def __init__(self, process_id):
|
||||
assert(process_id < Snowflake.process_id_max)
|
||||
self._time_ms = int(time.time_ns() / 1e6)
|
||||
self._process_id = process_id
|
||||
self._counter = 0
|
||||
|
||||
def next(self):
|
||||
if self._counter >= Snowflake.counter_max:
|
||||
while ((time_ms := int(time.time_ns() / 1e6)) <= self._time_ms):
|
||||
pass # TODO: profile this; should be few loops if any
|
||||
self._time_ms = time_ms
|
||||
self._counter = 0
|
||||
|
||||
flake = (self._time_ms << (23) |
|
||||
self._process_id << (15) |
|
||||
self._counter)
|
||||
self._counter += 1
|
||||
|
||||
return flake
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Test lower bits and counter wrapping
|
||||
fgen = Snowflake(0)
|
||||
for _ in range(0,4):
|
||||
fl = fgen.next()
|
||||
print(f"counter: {fl & (1<<15)-1:d} id: {(fl>>15) & (1<<8)-1:d} time_ms: {(fl>>23):d}")
|
||||
print(f"{(fl >> 23):_b}")
|
||||
print("----")
|
||||
for _ in range(0, Snowflake.counter_max):
|
||||
fgen.next()
|
||||
for _ in range(0,4):
|
||||
fl = fgen.next()
|
||||
print(f"counter: {fl & (1<<15)-1:d} id: {(fl>>15) & (1<<8)-1:d} time_ms: {(fl>>23):d}")
|
||||
print(f"{(fl >> 23):_b}")
|
||||
print("----")
|
||||
# simple loop time
|
||||
time_start = fgen.next() >> 23
|
||||
for _ in range(0, Snowflake.counter_max):
|
||||
fgen.next()
|
||||
delta = (fgen.next() >> 23) - time_start
|
||||
print(f"time delta in ms, one counter cycle: {delta:d}")
|
||||
Reference in New Issue
Block a user