Add timeseries CSV generator script

This commit is contained in:
Tamas Vajk
2021-06-04 11:57:31 +02:00
parent 270cf62f08
commit 4de4277a8d
3 changed files with 144 additions and 28 deletions

View File

@@ -1,9 +1,9 @@
import subprocess
import csv
import sys
import os
import shutil
import settings
import utils
"""
This script runs the CSV coverage report QL query, and transforms it to a more readable format.
@@ -12,30 +12,6 @@ data.
"""
def subprocess_run(cmd):
"""Runs a command through subprocess.run, with a few tweaks. Raises an Exception if exit code != 0."""
return subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy(), check=True)
def create_empty_database(lang, extension, database):
"""Creates an empty database for the given language."""
subprocess_run(["codeql", "database", "init", "--language=" + lang,
"--source-root=/tmp/empty", "--allow-missing-source-root", database])
subprocess_run(["mkdir", "-p", database + "/src/tmp/empty"])
subprocess_run(["touch", database + "/src/tmp/empty/empty" + extension])
subprocess_run(["codeql", "database", "finalize",
database, "--no-pre-finalize"])
def run_codeql_query(query, database, output):
"""Runs a codeql query on the given database."""
subprocess_run(["codeql", "query", "run", query,
"--database", database, "--output", output + ".bqrs"])
subprocess_run(["codeql", "bqrs", "decode", output + ".bqrs",
"--format=csv", "--no-titles", "--output", output])
os.remove(output + ".bqrs")
def append_csv_number(list, value):
"""Adds a number to the list or None if the value is not greater than 0."""
if value > 0:
@@ -117,7 +93,7 @@ class LanguageConfig:
try: # Check for `codeql` on path
subprocess_run(["codeql", "--version"])
utils.subprocess_run(["codeql", "--version"])
except Exception as e:
print("Error: couldn't invoke CodeQL CLI 'codeql'. Is it on the path? Aborting.", file=sys.stderr)
raise e
@@ -165,8 +141,8 @@ for config in configs:
lang = config.lang
db = "empty-" + lang
ql_output = output_ql_csv.format(language=lang)
create_empty_database(lang, config.ext, db)
run_codeql_query(config.ql_path, db, ql_output)
utils.create_empty_database(lang, config.ext, db)
utils.run_codeql_query(config.ql_path, db, ql_output)
shutil.rmtree(db)
packages = {}

View File

@@ -0,0 +1,114 @@
import subprocess
import csv
import sys
import os
import shutil
from datetime import date
import datetime
import utils
"""
Gets the sink/source/summary statistics for different days.
"""
# the distance between commits to include in the output
day_distance = 1
# the directory where codeql is. This is the directory where we change the SHAs
working_dir = sys.argv[1]
lang = "java"
db = "empty-java"
ql_output = "output-java.csv"
csv_output = "timeseries-java.csv"
def get_str_output(arr):
r = subprocess.check_output(arr)
return r.decode("utf-8").strip("\n'")
def get_date(sha):
d = get_str_output(
["git", "show", "--no-patch", "--no-notes", "--pretty='%cd'", "--date=short", sha])
return date.fromisoformat(d)
def get_parent(sha, date):
parent_sha = get_str_output(
["git", "rev-parse", sha + "^"])
parent_date = get_date(parent_sha)
return (parent_sha, parent_date)
def get_previous_sha(sha, date):
parent_sha, parent_date = get_parent(sha, date)
while parent_date > date + datetime.timedelta(days=-1 * day_distance):
parent_sha, parent_date = get_parent(parent_sha, parent_date)
return (parent_sha, parent_date)
def get_stats():
if os.path.isdir(db):
shutil.rmtree(db)
utils.create_empty_database(lang, ".java", db)
utils.run_codeql_query(
"java/ql/src/meta/frameworks/Coverage.ql", db, ql_output)
shutil.rmtree(db)
sources = 0
sinks = 0
summaries = 0
with open(ql_output) as csvfile:
reader = csv.reader(csvfile)
for row in reader:
# row: "android.util",1,"remote","source",16
if row[3] == "source":
sources += int(row[4])
if row[3] == "sink":
sinks += int(row[4])
if row[3] == "summary":
summaries += int(row[4])
os.remove(ql_output)
return (sources, sinks, summaries)
with open(csv_output, 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(["SHA", "Date", "Sources", "Sinks", "Summaries"])
os.chdir(working_dir)
utils.subprocess_run(["git", "checkout", "main"])
current_sha = get_str_output(["git", "rev-parse", "HEAD"])
current_date = get_date(current_sha)
while True:
print("Getting stats for " + current_sha)
utils.subprocess_run(["git", "checkout", current_sha])
try:
stats = get_stats()
csvwriter.writerow(
[current_sha, current_date, stats[0], stats[1], stats[2]])
print("Collected stats for " + current_sha +
" at " + current_date.isoformat())
except:
print("Unexpected error:", sys.exc_info()[0])
if os.path.isdir(db):
shutil.rmtree(db)
print("Error getting stats for " +
current_sha + ". Stopping iteration.")
break
current_sha, current_date = get_previous_sha(current_sha, current_date)
utils.subprocess_run(["git", "checkout", "main"])

View File

@@ -0,0 +1,26 @@
import subprocess
import os
def subprocess_run(cmd):
"""Runs a command through subprocess.run, with a few tweaks. Raises an Exception if exit code != 0."""
return subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy(), check=True)
def create_empty_database(lang, extension, database):
"""Creates an empty database for the given language."""
subprocess_run(["codeql", "database", "init", "--language=" + lang,
"--source-root=/tmp/empty", "--allow-missing-source-root", database])
subprocess_run(["mkdir", "-p", database + "/src/tmp/empty"])
subprocess_run(["touch", database + "/src/tmp/empty/empty" + extension])
subprocess_run(["codeql", "database", "finalize",
database, "--no-pre-finalize"])
def run_codeql_query(query, database, output):
"""Runs a codeql query on the given database."""
subprocess_run(["codeql", "query", "run", query,
"--database", database, "--output", output + ".bqrs"])
subprocess_run(["codeql", "bqrs", "decode", output + ".bqrs",
"--format=csv", "--no-titles", "--output", output])
os.remove(output + ".bqrs")