tested simple pull extractor. fail.

This commit is contained in:
2025-10-20 13:28:53 -07:00
committed by =michael hohn
parent 6c9e992b0e
commit f98af0295e
11 changed files with 98 additions and 82814 deletions

View File

@@ -1,7 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """Create SQLite schema for SARIF importer."""
Create SQLite schema for SARIF importer.
"""
import sqlite3, sys import sqlite3, sys
schemas = { schemas = {
@@ -10,8 +8,7 @@ schemas = {
timestamp TIMESTAMP, timestamp TIMESTAMP,
tool TEXT, tool TEXT,
version TEXT, version TEXT,
exit_code INTEGER exit_code INTEGER);""",
);""",
"results": """CREATE TABLE IF NOT EXISTS results( "results": """CREATE TABLE IF NOT EXISTS results(
run_id TEXT, run_id TEXT,
rule_id TEXT, rule_id TEXT,
@@ -22,8 +19,7 @@ schemas = {
line_end INTEGER, line_end INTEGER,
column_start INTEGER, column_start INTEGER,
column_end INTEGER, column_end INTEGER,
PRIMARY KEY (run_id, rule_id, file_path, line_start) PRIMARY KEY(run_id,rule_id,file_path,line_start));""",
);""",
"alerts": """CREATE TABLE IF NOT EXISTS alerts( "alerts": """CREATE TABLE IF NOT EXISTS alerts(
alert_id TEXT PRIMARY KEY, alert_id TEXT PRIMARY KEY,
run_id TEXT, run_id TEXT,
@@ -31,8 +27,7 @@ schemas = {
kind TEXT, kind TEXT,
file_path TEXT, file_path TEXT,
message TEXT, message TEXT,
severity TEXT severity TEXT);""",
);""",
"referenced_source_regions": """CREATE TABLE IF NOT EXISTS referenced_source_regions( "referenced_source_regions": """CREATE TABLE IF NOT EXISTS referenced_source_regions(
region_id TEXT PRIMARY KEY, region_id TEXT PRIMARY KEY,
result_id TEXT, result_id TEXT,
@@ -42,8 +37,7 @@ schemas = {
start_column INTEGER, start_column INTEGER,
end_column INTEGER, end_column INTEGER,
snippet TEXT, snippet TEXT,
source_hash TEXT source_hash TEXT);"""
);"""
} }
def main(): def main():
@@ -53,11 +47,8 @@ def main():
db=sys.argv[1] db=sys.argv[1]
con=sqlite3.connect(db) con=sqlite3.connect(db)
cur=con.cursor() cur=con.cursor()
for name, sql in schemas.items(): for sql in schemas.values(): cur.executescript(sql)
cur.executescript(sql) con.commit(); con.close()
con.commit() print(f"Schema ready in {db}")
con.close()
print(f"Created/verified schema in {db}")
if __name__ == "__main__": if __name__=="__main__": main()
main()

View File

@@ -1,40 +1,17 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
Pull-style SARIF to SQLite converter. Pull-style SARIF SQLite importer.
Example: sarif-pull foo.sarif foo.db Populates runs, results, alerts, referenced_source_regions.
""" """
import sqlite3, sys, os, uuid import sqlite3, sys, os
from sarif_util import load_json, hash_snippet, now_timestamp
import json, fnmatch, hashlib, datetime import subprocess
def load_json(path):
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
def flatten_json(obj, prefix="", sep="/"):
"""Yield (path, value) pairs from nested dicts/lists."""
if isinstance(obj, dict):
for k, v in obj.items():
yield from flatten_json(v, f"{prefix}{sep}{k}" if prefix else k, sep)
elif isinstance(obj, list):
for i, v in enumerate(obj):
yield from flatten_json(v, f"{prefix}{sep}{i}" if prefix else str(i), sep)
else:
yield prefix, obj
def hash_snippet(text):
return hashlib.sha1(text.encode('utf-8', 'ignore')).hexdigest()
def now_timestamp():
return datetime.datetime.utcnow().isoformat(sep=' ', timespec='seconds')
def ensure_schema(db): def ensure_schema(db):
import subprocess
subprocess.run(["sarif-make-schema", db], check=True) subprocess.run(["sarif-make-schema", db], check=True)
def extract_results(run_id, run): def extract_all(run_id, run):
results = [] results, alerts, regions = [], [], []
tool = run.get("tool",{}).get("driver",{}).get("name","") tool = run.get("tool",{}).get("driver",{}).get("name","")
version = run.get("tool",{}).get("driver",{}).get("semanticVersion","") version = run.get("tool",{}).get("driver",{}).get("semanticVersion","")
for res in run.get("results",[]) or []: for res in run.get("results",[]) or []:
@@ -46,39 +23,47 @@ def extract_results(run_id, run):
ploc=loc.get("physicalLocation",{}) if loc else {} ploc=loc.get("physicalLocation",{}) if loc else {}
file_path=(ploc.get("artifactLocation") or {}).get("uri","") file_path=(ploc.get("artifactLocation") or {}).get("uri","")
region=ploc.get("region") or {} region=ploc.get("region") or {}
results.append({ ls,le,cs,ce=(region.get("startLine"),region.get("endLine"),
"run_id": run_id, region.get("startColumn"),region.get("endColumn"))
"rule_id": rule_id, rid=hash_snippet(f"{run_id}|{rule_id}|{file_path}|{ls}|{le}|{cs}|{ce}")
"severity": sev, results.append(dict(run_id=run_id,rule_id=rule_id,severity=sev,
"message": msg, message=msg,file_path=file_path,
"file_path": file_path, line_start=ls,line_end=le,
"line_start": region.get("startLine"), column_start=cs,column_end=ce))
"line_end": region.get("endLine"), alerts.append(dict(alert_id=rid,run_id=run_id,rule_id=rule_id,
"column_start": region.get("startColumn"), kind="result",file_path=file_path,
"column_end": region.get("endColumn"), message=msg,severity=sev))
}) regions.append(dict(region_id=hash_snippet(f"{file_path}|{ls}|{le}|{cs}|{ce}"),
return results, tool, version result_id=rid,file_path=file_path,
start_line=ls,end_line=le,
start_column=cs,end_column=ce,
snippet=None,source_hash=None))
return results, alerts, regions, tool, version
def main(): def main():
if len(sys.argv)<3: if len(sys.argv)<3:
print("Usage: sarif-pull input.sarif output.db") print("Usage: sarif-pull input.sarif output.db")
sys.exit(1) sys.exit(1)
sarif_file, dbfile = sys.argv[1], sys.argv[2] sarif_file,dbfile=sys.argv[1:3]
ensure_schema(dbfile) ensure_schema(dbfile)
sarif=load_json(sarif_file) sarif=load_json(sarif_file)
con=sqlite3.connect(dbfile) con=sqlite3.connect(dbfile)
cur=con.cursor() cur=con.cursor()
for i,run in enumerate(sarif.get("runs",[])): for i,run in enumerate(sarif.get("runs",[])):
run_id=f"{os.path.basename(sarif_file)}#{i}" run_id=f"{os.path.basename(sarif_file)}#{i}"
results, tool, version = extract_results(run_id, run) results,alerts,regions,tool,version=extract_all(run_id,run)
cur.execute("INSERT OR REPLACE INTO runs VALUES (?,?,?,?,?)", cur.execute("INSERT OR REPLACE INTO runs VALUES (?,?,?,?,?)",
(run_id,now_timestamp(),tool,version,0)) (run_id,now_timestamp(),tool,version,0))
cur.executemany("""INSERT OR REPLACE INTO results VALUES cur.executemany("""INSERT OR REPLACE INTO results VALUES
(:run_id,:rule_id,:severity,:message,:file_path, (:run_id,:rule_id,:severity,:message,:file_path,
:line_start,:line_end,:column_start,:column_end)""",results) :line_start,:line_end,:column_start,:column_end)""",results)
con.commit() cur.executemany("""INSERT OR REPLACE INTO alerts VALUES
con.close() (:alert_id,:run_id,:rule_id,:kind,:file_path,:message,:severity)""",alerts)
print(f"Inserted {len(results)} results into {dbfile}") cur.executemany("""INSERT OR REPLACE INTO referenced_source_regions VALUES
(:region_id,:result_id,:file_path,:start_line,:end_line,
:start_column,:end_column,:snippet,:source_hash)""",regions)
con.commit(); con.close()
print(f"Inserted {len(results)} results, {len(alerts)} alerts, "
f"{len(regions)} regions into {dbfile}")
if __name__ == "__main__": if __name__=="__main__": main()
main()

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff