mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 09:13:04 +01:00
Add utility scripts to retrieve sarif files from lgtm
This commit is contained in:
committed by
=Michael Hohn
parent
5386310b1b
commit
780def7063
45
scripts/sarif-download-projects.py
Executable file
45
scripts/sarif-download-projects.py
Executable file
@@ -0,0 +1,45 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
""" This is part 1 of 2 hardcoded utility scripts. This one downloads the first
|
||||||
|
1000 project pages from lgtm.com and saves the `projects` information in
|
||||||
|
~/local/sarif/projects.pickle for use by `sarif-download-sarif.py`
|
||||||
|
"""
|
||||||
|
import concurrent.futures
|
||||||
|
import pathlib
|
||||||
|
import pickle
|
||||||
|
import requests
|
||||||
|
import sys
|
||||||
|
|
||||||
|
LGTM_URL = "https://lgtm.com/"
|
||||||
|
SESSION = requests.Session()
|
||||||
|
|
||||||
|
OUTPUT_DIRECTORY = pathlib.Path(__file__).parent
|
||||||
|
OUTPUT_DIRECTORY = pathlib.Path.home() / "local/sarif"
|
||||||
|
PROJECT_FILE = OUTPUT_DIRECTORY / "projects.pickle"
|
||||||
|
|
||||||
|
if PROJECT_FILE.exists():
|
||||||
|
sys.stderr.write("error: output file %s exists\n" % PROJECT_FILE)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
OUTPUT_DIRECTORY.mkdir(mode=0o755, parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
projects = {}
|
||||||
|
page = 1
|
||||||
|
current_projects_url = "%sapi/v1.0/projects/" % LGTM_URL
|
||||||
|
while page < 1000:
|
||||||
|
print("Fetching projects page %d..." % page)
|
||||||
|
page += 1
|
||||||
|
response = SESSION.get(current_projects_url)
|
||||||
|
response.raise_for_status()
|
||||||
|
response_data = response.json()
|
||||||
|
for item in response_data["data"]:
|
||||||
|
projects[item["id"]] = item
|
||||||
|
if "nextPageUrl" in response_data:
|
||||||
|
current_projects_url = response_data["nextPageUrl"]
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Save them
|
||||||
|
with open(PROJECT_FILE, 'wb') as outfile:
|
||||||
|
pickle.dump(projects, outfile)
|
||||||
|
|
||||||
|
print("All projects fetched, saved to %s" % PROJECT_FILE)
|
||||||
73
scripts/sarif-download-sarif.py
Executable file
73
scripts/sarif-download-sarif.py
Executable file
@@ -0,0 +1,73 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
""" This is part 2 of 2 hardcoded utility scripts. This one downloads the sarif
|
||||||
|
files for the `projects` collected by `sarif-download-projects.py` to
|
||||||
|
subdirectories of ~/local/sarif/projects.pickle.
|
||||||
|
|
||||||
|
Already downloaded files will not be tried again, so if this script fails it
|
||||||
|
can just be rerun.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import concurrent.futures
|
||||||
|
import pathlib
|
||||||
|
import pickle
|
||||||
|
import requests
|
||||||
|
import sys
|
||||||
|
|
||||||
|
LGTM_URL = "https://lgtm.com/"
|
||||||
|
SESSION = requests.Session()
|
||||||
|
|
||||||
|
# OUTPUT_DIRECTORY = pathlib.Path(__file__).parent
|
||||||
|
OUTPUT_DIRECTORY = pathlib.Path.home() / "local/sarif"
|
||||||
|
PROJECT_FILE = OUTPUT_DIRECTORY / "projects.pickle"
|
||||||
|
|
||||||
|
if not PROJECT_FILE.exists():
|
||||||
|
sys.stderr.write("error: missing input file %s\n" % PROJECT_FILE)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
OUTPUT_DIRECTORY.mkdir(mode=0o755, parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
with open(PROJECT_FILE, "rb") as infile:
|
||||||
|
projects = pickle.load(infile)
|
||||||
|
|
||||||
|
thread_pool = concurrent.futures.ThreadPoolExecutor(25)
|
||||||
|
futures = {}
|
||||||
|
any_failed = []
|
||||||
|
def process(index, pair):
|
||||||
|
try:
|
||||||
|
project_key, project = pair
|
||||||
|
output_path = OUTPUT_DIRECTORY / project["url-identifier"] / "results.sarif"
|
||||||
|
if output_path.exists():
|
||||||
|
print("Already fetched %d/%d (%s)" %
|
||||||
|
(index + 1, len(projects), project["url-identifier"]))
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
print("Processing project %d/%d (%s)..." %
|
||||||
|
(index + 1, len(projects), project["url-identifier"]))
|
||||||
|
|
||||||
|
# Get latest analysis information.
|
||||||
|
analysis_summary_url = "%sapi/v1.0/analyses/%d/commits/latest" % (LGTM_URL, project_key)
|
||||||
|
response = SESSION.get(analysis_summary_url)
|
||||||
|
response.raise_for_status()
|
||||||
|
analysis_summary = response.json()
|
||||||
|
analysis_id = analysis_summary["id"]
|
||||||
|
|
||||||
|
# Get SARIF export.
|
||||||
|
sarif_url = "%sapi/v1.0/analyses/%s/alerts" % (LGTM_URL, analysis_id)
|
||||||
|
response = SESSION.get(sarif_url)
|
||||||
|
response.raise_for_status()
|
||||||
|
# And save it
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
output_path.write_text(response.text)
|
||||||
|
except:
|
||||||
|
any_failed.append( (index, pair) )
|
||||||
|
|
||||||
|
for index, pair in enumerate(projects.items()):
|
||||||
|
try:
|
||||||
|
futures[pair[0]] = thread_pool.submit(process, index, pair)
|
||||||
|
except RuntimeError:
|
||||||
|
pass
|
||||||
|
thread_pool.shutdown()
|
||||||
|
|
||||||
|
for index, pair in any_failed:
|
||||||
|
print("Processing failed for %d, %s:" % (index, pair))
|
||||||
|
print("Re-run to try those again")
|
||||||
Reference in New Issue
Block a user