mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 01:13:03 +01:00
tested simple pull extractor. fail.
This commit is contained in:
@@ -1,18 +1,15 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""Create SQLite schema for SARIF importer."""
|
||||||
Create SQLite schema for SARIF importer.
|
|
||||||
"""
|
|
||||||
import sqlite3, sys
|
import sqlite3, sys
|
||||||
|
|
||||||
schemas = {
|
schemas = {
|
||||||
"runs": """CREATE TABLE IF NOT EXISTS runs (
|
"runs": """CREATE TABLE IF NOT EXISTS runs(
|
||||||
run_id TEXT PRIMARY KEY,
|
run_id TEXT PRIMARY KEY,
|
||||||
timestamp TIMESTAMP,
|
timestamp TIMESTAMP,
|
||||||
tool TEXT,
|
tool TEXT,
|
||||||
version TEXT,
|
version TEXT,
|
||||||
exit_code INTEGER
|
exit_code INTEGER);""",
|
||||||
);""",
|
"results": """CREATE TABLE IF NOT EXISTS results(
|
||||||
"results": """CREATE TABLE IF NOT EXISTS results (
|
|
||||||
run_id TEXT,
|
run_id TEXT,
|
||||||
rule_id TEXT,
|
rule_id TEXT,
|
||||||
severity TEXT,
|
severity TEXT,
|
||||||
@@ -22,18 +19,16 @@ schemas = {
|
|||||||
line_end INTEGER,
|
line_end INTEGER,
|
||||||
column_start INTEGER,
|
column_start INTEGER,
|
||||||
column_end INTEGER,
|
column_end INTEGER,
|
||||||
PRIMARY KEY (run_id, rule_id, file_path, line_start)
|
PRIMARY KEY(run_id,rule_id,file_path,line_start));""",
|
||||||
);""",
|
"alerts": """CREATE TABLE IF NOT EXISTS alerts(
|
||||||
"alerts": """CREATE TABLE IF NOT EXISTS alerts (
|
|
||||||
alert_id TEXT PRIMARY KEY,
|
alert_id TEXT PRIMARY KEY,
|
||||||
run_id TEXT,
|
run_id TEXT,
|
||||||
rule_id TEXT,
|
rule_id TEXT,
|
||||||
kind TEXT,
|
kind TEXT,
|
||||||
file_path TEXT,
|
file_path TEXT,
|
||||||
message TEXT,
|
message TEXT,
|
||||||
severity TEXT
|
severity TEXT);""",
|
||||||
);""",
|
"referenced_source_regions": """CREATE TABLE IF NOT EXISTS referenced_source_regions(
|
||||||
"referenced_source_regions": """CREATE TABLE IF NOT EXISTS referenced_source_regions (
|
|
||||||
region_id TEXT PRIMARY KEY,
|
region_id TEXT PRIMARY KEY,
|
||||||
result_id TEXT,
|
result_id TEXT,
|
||||||
file_path TEXT,
|
file_path TEXT,
|
||||||
@@ -42,22 +37,18 @@ schemas = {
|
|||||||
start_column INTEGER,
|
start_column INTEGER,
|
||||||
end_column INTEGER,
|
end_column INTEGER,
|
||||||
snippet TEXT,
|
snippet TEXT,
|
||||||
source_hash TEXT
|
source_hash TEXT);"""
|
||||||
);"""
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv)<2:
|
||||||
print("Usage: sarif-make-schema dbfile")
|
print("Usage: sarif-make-schema dbfile")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
db = sys.argv[1]
|
db=sys.argv[1]
|
||||||
con = sqlite3.connect(db)
|
con=sqlite3.connect(db)
|
||||||
cur = con.cursor()
|
cur=con.cursor()
|
||||||
for name, sql in schemas.items():
|
for sql in schemas.values(): cur.executescript(sql)
|
||||||
cur.executescript(sql)
|
con.commit(); con.close()
|
||||||
con.commit()
|
print(f"Schema ready in {db}")
|
||||||
con.close()
|
|
||||||
print(f"Created/verified schema in {db}")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__=="__main__": main()
|
||||||
main()
|
|
||||||
|
|||||||
123
bin/sarif-pull
123
bin/sarif-pull
@@ -1,84 +1,69 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
Pull-style SARIF to SQLite converter.
|
Pull-style SARIF → SQLite importer.
|
||||||
Example: sarif-pull foo.sarif foo.db
|
Populates runs, results, alerts, referenced_source_regions.
|
||||||
"""
|
"""
|
||||||
import sqlite3, sys, os, uuid
|
import sqlite3, sys, os
|
||||||
|
from sarif_util import load_json, hash_snippet, now_timestamp
|
||||||
import json, fnmatch, hashlib, datetime
|
import subprocess
|
||||||
|
|
||||||
def load_json(path):
|
|
||||||
with open(path, 'r', encoding='utf-8') as f:
|
|
||||||
return json.load(f)
|
|
||||||
|
|
||||||
def flatten_json(obj, prefix="", sep="/"):
|
|
||||||
"""Yield (path, value) pairs from nested dicts/lists."""
|
|
||||||
if isinstance(obj, dict):
|
|
||||||
for k, v in obj.items():
|
|
||||||
yield from flatten_json(v, f"{prefix}{sep}{k}" if prefix else k, sep)
|
|
||||||
elif isinstance(obj, list):
|
|
||||||
for i, v in enumerate(obj):
|
|
||||||
yield from flatten_json(v, f"{prefix}{sep}{i}" if prefix else str(i), sep)
|
|
||||||
else:
|
|
||||||
yield prefix, obj
|
|
||||||
|
|
||||||
def hash_snippet(text):
|
|
||||||
return hashlib.sha1(text.encode('utf-8', 'ignore')).hexdigest()
|
|
||||||
|
|
||||||
def now_timestamp():
|
|
||||||
return datetime.datetime.utcnow().isoformat(sep=' ', timespec='seconds')
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_schema(db):
|
def ensure_schema(db):
|
||||||
import subprocess
|
|
||||||
subprocess.run(["sarif-make-schema", db], check=True)
|
subprocess.run(["sarif-make-schema", db], check=True)
|
||||||
|
|
||||||
def extract_results(run_id, run):
|
def extract_all(run_id, run):
|
||||||
results = []
|
results, alerts, regions = [], [], []
|
||||||
tool = run.get("tool", {}).get("driver", {}).get("name", "")
|
tool = run.get("tool",{}).get("driver",{}).get("name","")
|
||||||
version = run.get("tool", {}).get("driver", {}).get("semanticVersion", "")
|
version = run.get("tool",{}).get("driver",{}).get("semanticVersion","")
|
||||||
for res in run.get("results", []) or []:
|
for res in run.get("results",[]) or []:
|
||||||
msg = (res.get("message") or {}).get("text", "")
|
msg=(res.get("message") or {}).get("text","")
|
||||||
rule_id = res.get("ruleId", "")
|
rule_id=res.get("ruleId","")
|
||||||
sev = (res.get("properties") or {}).get("problem.severity", "")
|
sev=(res.get("properties") or {}).get("problem.severity","")
|
||||||
locs = res.get("locations") or []
|
locs=res.get("locations") or []
|
||||||
for loc in locs:
|
for loc in locs:
|
||||||
ploc = loc.get("physicalLocation", {}) if loc else {}
|
ploc=loc.get("physicalLocation",{}) if loc else {}
|
||||||
file_path = (ploc.get("artifactLocation") or {}).get("uri", "")
|
file_path=(ploc.get("artifactLocation") or {}).get("uri","")
|
||||||
region = ploc.get("region") or {}
|
region=ploc.get("region") or {}
|
||||||
results.append({
|
ls,le,cs,ce=(region.get("startLine"),region.get("endLine"),
|
||||||
"run_id": run_id,
|
region.get("startColumn"),region.get("endColumn"))
|
||||||
"rule_id": rule_id,
|
rid=hash_snippet(f"{run_id}|{rule_id}|{file_path}|{ls}|{le}|{cs}|{ce}")
|
||||||
"severity": sev,
|
results.append(dict(run_id=run_id,rule_id=rule_id,severity=sev,
|
||||||
"message": msg,
|
message=msg,file_path=file_path,
|
||||||
"file_path": file_path,
|
line_start=ls,line_end=le,
|
||||||
"line_start": region.get("startLine"),
|
column_start=cs,column_end=ce))
|
||||||
"line_end": region.get("endLine"),
|
alerts.append(dict(alert_id=rid,run_id=run_id,rule_id=rule_id,
|
||||||
"column_start": region.get("startColumn"),
|
kind="result",file_path=file_path,
|
||||||
"column_end": region.get("endColumn"),
|
message=msg,severity=sev))
|
||||||
})
|
regions.append(dict(region_id=hash_snippet(f"{file_path}|{ls}|{le}|{cs}|{ce}"),
|
||||||
return results, tool, version
|
result_id=rid,file_path=file_path,
|
||||||
|
start_line=ls,end_line=le,
|
||||||
|
start_column=cs,end_column=ce,
|
||||||
|
snippet=None,source_hash=None))
|
||||||
|
return results, alerts, regions, tool, version
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if len(sys.argv) < 3:
|
if len(sys.argv)<3:
|
||||||
print("Usage: sarif-pull input.sarif output.db")
|
print("Usage: sarif-pull input.sarif output.db")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
sarif_file, dbfile = sys.argv[1], sys.argv[2]
|
sarif_file,dbfile=sys.argv[1:3]
|
||||||
ensure_schema(dbfile)
|
ensure_schema(dbfile)
|
||||||
sarif = load_json(sarif_file)
|
sarif=load_json(sarif_file)
|
||||||
con = sqlite3.connect(dbfile)
|
con=sqlite3.connect(dbfile)
|
||||||
cur = con.cursor()
|
cur=con.cursor()
|
||||||
for i, run in enumerate(sarif.get("runs", [])):
|
for i,run in enumerate(sarif.get("runs",[])):
|
||||||
run_id = f"{os.path.basename(sarif_file)}#{i}"
|
run_id=f"{os.path.basename(sarif_file)}#{i}"
|
||||||
results, tool, version = extract_results(run_id, run)
|
results,alerts,regions,tool,version=extract_all(run_id,run)
|
||||||
cur.execute("INSERT OR REPLACE INTO runs VALUES (?, ?, ?, ?, ?)",
|
cur.execute("INSERT OR REPLACE INTO runs VALUES (?,?,?,?,?)",
|
||||||
(run_id, now_timestamp(), tool, version, 0))
|
(run_id,now_timestamp(),tool,version,0))
|
||||||
cur.executemany("""INSERT OR REPLACE INTO results VALUES
|
cur.executemany("""INSERT OR REPLACE INTO results VALUES
|
||||||
(:run_id, :rule_id, :severity, :message, :file_path,
|
(:run_id,:rule_id,:severity,:message,:file_path,
|
||||||
:line_start, :line_end, :column_start, :column_end)""", results)
|
:line_start,:line_end,:column_start,:column_end)""",results)
|
||||||
con.commit()
|
cur.executemany("""INSERT OR REPLACE INTO alerts VALUES
|
||||||
con.close()
|
(:alert_id,:run_id,:rule_id,:kind,:file_path,:message,:severity)""",alerts)
|
||||||
print(f"Inserted {len(results)} results into {dbfile}")
|
cur.executemany("""INSERT OR REPLACE INTO referenced_source_regions VALUES
|
||||||
|
(:region_id,:result_id,:file_path,:start_line,:end_line,
|
||||||
|
:start_column,:end_column,:snippet,:source_hash)""",regions)
|
||||||
|
con.commit(); con.close()
|
||||||
|
print(f"Inserted {len(results)} results, {len(alerts)} alerts, "
|
||||||
|
f"{len(regions)} regions into {dbfile}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__=="__main__": main()
|
||||||
main()
|
|
||||||
|
|||||||
BIN
data/codeql-dataflow-sql-injection/sqlidb-0.sarif
(Stored with Git LFS)
BIN
data/codeql-dataflow-sql-injection/sqlidb-0.sarif
(Stored with Git LFS)
Binary file not shown.
BIN
data/codeql-dataflow-sql-injection/sqlidb-1.sarif
(Stored with Git LFS)
BIN
data/codeql-dataflow-sql-injection/sqlidb-1.sarif
(Stored with Git LFS)
Binary file not shown.
BIN
data/codeql-dataflow-sql-injection/sqlidb-v2.12.7-1.sarif
(Stored with Git LFS)
BIN
data/codeql-dataflow-sql-injection/sqlidb-v2.12.7-1.sarif
(Stored with Git LFS)
Binary file not shown.
BIN
data/codeql-dataflow-sql-injection/sqlidb-v2.13.5-1.sarif
(Stored with Git LFS)
BIN
data/codeql-dataflow-sql-injection/sqlidb-v2.13.5-1.sarif
(Stored with Git LFS)
Binary file not shown.
BIN
data/codeql-dataflow-sql-injection/sqlidb-v2.14.0-1.sarif
(Stored with Git LFS)
BIN
data/codeql-dataflow-sql-injection/sqlidb-v2.14.0-1.sarif
(Stored with Git LFS)
Binary file not shown.
BIN
data/codeql-dataflow-sql-injection/sqlidb-v2.9.4-1.sarif
(Stored with Git LFS)
BIN
data/codeql-dataflow-sql-injection/sqlidb-v2.9.4-1.sarif
(Stored with Git LFS)
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user