Files
sarif-cli/sarif_cli/snowflake_id.py
2022-04-11 19:24:12 -07:00

65 lines
2.2 KiB
Python

"""A simple 64-bit snowflake id implementation.
For merging and joining tables externally, the ids must be sufficiently unique.
At the same time, a full 20-byte uuid is excessive and likely slow in a database.
The trade off is using a snowflake id (see References), which is a 64 bit int with
bits allocated between time, a shard/process id, and a counter or random number.
This implementation uses a 42, 8, 15 bit split for (time, process, counter). The
time is in milliseconds (ms) since unix epoch.
../notes/unique-ids.ipynb illustrates the values used here.
References:
- https://www.ietf.org/id/draft-peabody-dispatch-new-uuid-format-02.html#name-informative-references
"""
import time
class Snowflake:
ms_max = (1<<41) * 2
process_id_max = 1<<8
counter_max = 1 << 15
def __init__(self, process_id):
assert(process_id < Snowflake.process_id_max)
self._time_ms = int(time.time_ns() / 1e6)
self._process_id = process_id
self._counter = 0
def next(self):
if self._counter >= Snowflake.counter_max:
while ((time_ms := int(time.time_ns() / 1e6)) <= self._time_ms):
pass # TODO: profile this; should be few loops if any
self._time_ms = time_ms
self._counter = 0
flake = (self._time_ms << (23) |
self._process_id << (15) |
self._counter)
self._counter += 1
return flake
if __name__ == '__main__':
# Test lower bits and counter wrapping
fgen = Snowflake(0)
for _ in range(0,4):
fl = fgen.next()
print(f"counter: {fl & (1<<15)-1:d} id: {(fl>>15) & (1<<8)-1:d} time_ms: {(fl>>23):d}")
print(f"{(fl >> 23):_b}")
print("----")
for _ in range(0, Snowflake.counter_max):
fgen.next()
for _ in range(0,4):
fl = fgen.next()
print(f"counter: {fl & (1<<15)-1:d} id: {(fl>>15) & (1<<8)-1:d} time_ms: {(fl>>23):d}")
print(f"{(fl >> 23):_b}")
print("----")
# simple loop time
time_start = fgen.next() >> 23
for _ in range(0, Snowflake.counter_max):
fgen.next()
delta = (fgen.next() >> 23) - time_start
print(f"time delta in ms, one counter cycle: {delta:d}")