Files
codeql/misc/scripts/library-coverage/generate-time-series.py
2021-06-10 10:11:23 +02:00

115 lines
3.0 KiB
Python

import subprocess
import csv
import sys
import os
import shutil
from datetime import date
import datetime
import utils
"""
Gets the sink/source/summary statistics for different days.
"""
# the distance between commits to include in the output
day_distance = 1
# the directory where codeql is. This is the directory where we change the SHAs
working_dir = sys.argv[1]
lang = "java"
db = "empty-java"
ql_output = "output-java.csv"
csv_output = "timeseries-java.csv"
def get_str_output(arr):
r = subprocess.check_output(arr)
return r.decode("utf-8").strip("\n'")
def get_date(sha):
d = get_str_output(
["git", "show", "--no-patch", "--no-notes", "--pretty='%cd'", "--date=short", sha])
return date.fromisoformat(d)
def get_parent(sha, date):
parent_sha = get_str_output(
["git", "rev-parse", sha + "^"])
parent_date = get_date(parent_sha)
return (parent_sha, parent_date)
def get_previous_sha(sha, date):
parent_sha, parent_date = get_parent(sha, date)
while parent_date > date + datetime.timedelta(days=-1 * day_distance):
parent_sha, parent_date = get_parent(parent_sha, parent_date)
return (parent_sha, parent_date)
def get_stats():
if os.path.isdir(db):
shutil.rmtree(db)
utils.create_empty_database(lang, ".java", db)
utils.run_codeql_query(
"java/ql/src/meta/frameworks/Coverage.ql", db, ql_output)
shutil.rmtree(db)
sources = 0
sinks = 0
summaries = 0
with open(ql_output) as csvfile:
reader = csv.reader(csvfile)
for row in reader:
# row: "android.util",1,"remote","source",16
if row[3] == "source":
sources += int(row[4])
if row[3] == "sink":
sinks += int(row[4])
if row[3] == "summary":
summaries += int(row[4])
os.remove(ql_output)
return (sources, sinks, summaries)
with open(csv_output, 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(["SHA", "Date", "Sources", "Sinks", "Summaries"])
os.chdir(working_dir)
utils.subprocess_run(["git", "checkout", "main"])
current_sha = get_str_output(["git", "rev-parse", "HEAD"])
current_date = get_date(current_sha)
while True:
print("Getting stats for " + current_sha)
utils.subprocess_run(["git", "checkout", current_sha])
try:
stats = get_stats()
csvwriter.writerow(
[current_sha, current_date, stats[0], stats[1], stats[2]])
print("Collected stats for " + current_sha +
" at " + current_date.isoformat())
except:
print("Unexpected error:", sys.exc_info()[0])
if os.path.isdir(db):
shutil.rmtree(db)
print("Error getting stats for " +
current_sha + ". Stopping iteration.")
break
current_sha, current_date = get_previous_sha(current_sha, current_date)
utils.subprocess_run(["git", "checkout", "main"])