Files
sarif-cli/scripts/sarif-runner.py
Michael Hohn b7cd96ea72 Add sarif-runner.py to drive sarif-extract-scans for sarif file collections
The input file format is just a list of  organization/project entries
2022-05-30 00:04:40 -07:00

78 lines
2.2 KiB
Python

#!/usr/bin/env python3
import subprocess
import json
import os
import pickle
from datetime import datetime
#
# Collect sarif file information
#
paths = open('sarif-files.txt', 'r').readlines()
max_files = 80000
# Use saved status, only re-run failed attempts
if os.path.exists("successful_runs"):
with open("successful_runs", "rb") as infile:
successful_runs = pickle.load(infile)
else:
successful_runs = set()
count = 0
for path in paths:
count += 1
if count > max_files: break
#
# Paths and components
#
path = path.rstrip()
project, sarif_file = path.split('/')
component = sarif_file.removesuffix('.json')
#
# Scan specification
#
scan_spec = {
"project_id": abs(hash(project + component)),
"scan_id": int(os.path.getmtime(path)),
"sarif_file_name": path,
}
scan_spec_file = os.path.join(project, component + ".scanspec")
with open(scan_spec_file, 'w') as fp:
json.dump(scan_spec, fp)
#
# Table output directory
#
output_dir = os.path.join(project, component + ".scantables")
try: os.mkdir(output_dir, mode=0o755)
except FileExistsError: pass
#
# Run sarif-extract-scans
#
if path in successful_runs:
# Don't rerun
continue
# Some timing information
if count % 10 == 0:
print("{:6} {}".format("DATE", datetime.now().isoformat()))
# Save occasionally
if count % 10 == 0:
with open("successful_runs", 'wb') as outfile:
pickle.dump(successful_runs, outfile)
scan_log_file = os.path.join(project, component + ".scanlog")
runstats = subprocess.run(['sarif-extract-scans', scan_spec_file, output_dir],
capture_output=True, text=True)
if runstats.returncode == 0:
print("{:6} {}".format("OK", path))
successful_runs.add(path)
else:
print("{:6} {} {}".format("FAIL", path, scan_log_file))
# log error
with open(scan_log_file, 'w') as fp:
fp.write(runstats.stderr)
# report only tail
print("{:6} {}".format("", "Error tail: "))
for t1 in runstats.stderr.split('\n')[-6:-1]:
print("{:6} {}".format("", t1))