Rework project and scan id generation

goal:
deterministic across multiple instances of scan on same sarif file
no collisions between sarif files from different scan instances (regardless of for same project or not)

assumption sarif file naming will follow: <project>/<unique_filename_per_analysis> format
This commit is contained in:
Kristen Newbury
2022-10-26 12:00:38 -04:00
parent c51dbba577
commit 4121072088

View File

@@ -80,7 +80,12 @@ import os
import sys
import pickle
from datetime import datetime
from sarif_cli import snowflake_id
from hashlib import blake2b
def hash_unique(item_to_hash, size):
h = blake2b(digest_size = size)
h.update(item_to_hash.encode())
return abs(int.from_bytes(h.digest(), byteorder='big'))
#
# Handle arguments
@@ -130,9 +135,6 @@ if use_successful_runs:
else:
successful_runs = set()
# Scan id guaranteed unique - do not rely on external info
flakegen1 = snowflake_id.Snowflake(0)
count = -1
for path in paths:
count += 1
@@ -146,13 +148,15 @@ for path in paths:
# Scan specification
#
scan_spec = {
"project_id": abs(hash(project + component)), # pd.UInt64Dtype()
"scan_id": flakegen1.next(), # pd.Int64Dtype()
"sarif_file_name": path, # pd.StringDtype()
"project_id": hash_unique(project, 8), # pd.UInt64Dtype()
"scan_id": hash_unique(path, 8), # pd.Int64Dtype()
"sarif_file_name": path, # pd.StringDtype()
}
scan_spec_file = os.path.join(project, component + ".scanspec")
with open(scan_spec_file, 'w') as fp:
json.dump(scan_spec, fp)
#
# Table output directory
#