mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
Add timeseries CSV generator script
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
import subprocess
|
||||
import csv
|
||||
import sys
|
||||
import os
|
||||
import shutil
|
||||
import settings
|
||||
import utils
|
||||
|
||||
"""
|
||||
This script runs the CSV coverage report QL query, and transforms it to a more readable format.
|
||||
@@ -12,30 +12,6 @@ data.
|
||||
"""
|
||||
|
||||
|
||||
def subprocess_run(cmd):
|
||||
"""Runs a command through subprocess.run, with a few tweaks. Raises an Exception if exit code != 0."""
|
||||
return subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy(), check=True)
|
||||
|
||||
|
||||
def create_empty_database(lang, extension, database):
|
||||
"""Creates an empty database for the given language."""
|
||||
subprocess_run(["codeql", "database", "init", "--language=" + lang,
|
||||
"--source-root=/tmp/empty", "--allow-missing-source-root", database])
|
||||
subprocess_run(["mkdir", "-p", database + "/src/tmp/empty"])
|
||||
subprocess_run(["touch", database + "/src/tmp/empty/empty" + extension])
|
||||
subprocess_run(["codeql", "database", "finalize",
|
||||
database, "--no-pre-finalize"])
|
||||
|
||||
|
||||
def run_codeql_query(query, database, output):
|
||||
"""Runs a codeql query on the given database."""
|
||||
subprocess_run(["codeql", "query", "run", query,
|
||||
"--database", database, "--output", output + ".bqrs"])
|
||||
subprocess_run(["codeql", "bqrs", "decode", output + ".bqrs",
|
||||
"--format=csv", "--no-titles", "--output", output])
|
||||
os.remove(output + ".bqrs")
|
||||
|
||||
|
||||
def append_csv_number(list, value):
|
||||
"""Adds a number to the list or None if the value is not greater than 0."""
|
||||
if value > 0:
|
||||
@@ -117,7 +93,7 @@ class LanguageConfig:
|
||||
|
||||
|
||||
try: # Check for `codeql` on path
|
||||
subprocess_run(["codeql", "--version"])
|
||||
utils.subprocess_run(["codeql", "--version"])
|
||||
except Exception as e:
|
||||
print("Error: couldn't invoke CodeQL CLI 'codeql'. Is it on the path? Aborting.", file=sys.stderr)
|
||||
raise e
|
||||
@@ -165,8 +141,8 @@ for config in configs:
|
||||
lang = config.lang
|
||||
db = "empty-" + lang
|
||||
ql_output = output_ql_csv.format(language=lang)
|
||||
create_empty_database(lang, config.ext, db)
|
||||
run_codeql_query(config.ql_path, db, ql_output)
|
||||
utils.create_empty_database(lang, config.ext, db)
|
||||
utils.run_codeql_query(config.ql_path, db, ql_output)
|
||||
shutil.rmtree(db)
|
||||
|
||||
packages = {}
|
||||
|
||||
114
misc/scripts/library-coverage/generate-time-series.py
Normal file
114
misc/scripts/library-coverage/generate-time-series.py
Normal file
@@ -0,0 +1,114 @@
|
||||
import subprocess
|
||||
import csv
|
||||
import sys
|
||||
import os
|
||||
import shutil
|
||||
from datetime import date
|
||||
import datetime
|
||||
import utils
|
||||
|
||||
"""
|
||||
Gets the sink/source/summary statistics for different days.
|
||||
"""
|
||||
|
||||
# the distance between commits to include in the output
|
||||
day_distance = 1
|
||||
|
||||
# the directory where codeql is. This is the directory where we change the SHAs
|
||||
working_dir = sys.argv[1]
|
||||
|
||||
lang = "java"
|
||||
db = "empty-java"
|
||||
ql_output = "output-java.csv"
|
||||
csv_output = "timeseries-java.csv"
|
||||
|
||||
|
||||
def get_str_output(arr):
|
||||
r = subprocess.check_output(arr)
|
||||
return r.decode("utf-8").strip("\n'")
|
||||
|
||||
|
||||
def get_date(sha):
|
||||
d = get_str_output(
|
||||
["git", "show", "--no-patch", "--no-notes", "--pretty='%cd'", "--date=short", sha])
|
||||
return date.fromisoformat(d)
|
||||
|
||||
|
||||
def get_parent(sha, date):
|
||||
parent_sha = get_str_output(
|
||||
["git", "rev-parse", sha + "^"])
|
||||
parent_date = get_date(parent_sha)
|
||||
return (parent_sha, parent_date)
|
||||
|
||||
|
||||
def get_previous_sha(sha, date):
|
||||
parent_sha, parent_date = get_parent(sha, date)
|
||||
while parent_date > date + datetime.timedelta(days=-1 * day_distance):
|
||||
parent_sha, parent_date = get_parent(parent_sha, parent_date)
|
||||
|
||||
return (parent_sha, parent_date)
|
||||
|
||||
|
||||
def get_stats():
|
||||
if os.path.isdir(db):
|
||||
shutil.rmtree(db)
|
||||
utils.create_empty_database(lang, ".java", db)
|
||||
utils.run_codeql_query(
|
||||
"java/ql/src/meta/frameworks/Coverage.ql", db, ql_output)
|
||||
shutil.rmtree(db)
|
||||
|
||||
sources = 0
|
||||
sinks = 0
|
||||
summaries = 0
|
||||
|
||||
with open(ql_output) as csvfile:
|
||||
reader = csv.reader(csvfile)
|
||||
for row in reader:
|
||||
# row: "android.util",1,"remote","source",16
|
||||
if row[3] == "source":
|
||||
sources += int(row[4])
|
||||
if row[3] == "sink":
|
||||
sinks += int(row[4])
|
||||
if row[3] == "summary":
|
||||
summaries += int(row[4])
|
||||
|
||||
os.remove(ql_output)
|
||||
|
||||
return (sources, sinks, summaries)
|
||||
|
||||
|
||||
with open(csv_output, 'w', newline='') as csvfile:
|
||||
csvwriter = csv.writer(csvfile)
|
||||
csvwriter.writerow(["SHA", "Date", "Sources", "Sinks", "Summaries"])
|
||||
|
||||
os.chdir(working_dir)
|
||||
|
||||
utils.subprocess_run(["git", "checkout", "main"])
|
||||
|
||||
current_sha = get_str_output(["git", "rev-parse", "HEAD"])
|
||||
current_date = get_date(current_sha)
|
||||
|
||||
while True:
|
||||
print("Getting stats for " + current_sha)
|
||||
utils.subprocess_run(["git", "checkout", current_sha])
|
||||
|
||||
try:
|
||||
stats = get_stats()
|
||||
|
||||
csvwriter.writerow(
|
||||
[current_sha, current_date, stats[0], stats[1], stats[2]])
|
||||
|
||||
print("Collected stats for " + current_sha +
|
||||
" at " + current_date.isoformat())
|
||||
except:
|
||||
print("Unexpected error:", sys.exc_info()[0])
|
||||
|
||||
if os.path.isdir(db):
|
||||
shutil.rmtree(db)
|
||||
print("Error getting stats for " +
|
||||
current_sha + ". Stopping iteration.")
|
||||
break
|
||||
|
||||
current_sha, current_date = get_previous_sha(current_sha, current_date)
|
||||
|
||||
utils.subprocess_run(["git", "checkout", "main"])
|
||||
26
misc/scripts/library-coverage/utils.py
Normal file
26
misc/scripts/library-coverage/utils.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
|
||||
def subprocess_run(cmd):
|
||||
"""Runs a command through subprocess.run, with a few tweaks. Raises an Exception if exit code != 0."""
|
||||
return subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy(), check=True)
|
||||
|
||||
|
||||
def create_empty_database(lang, extension, database):
|
||||
"""Creates an empty database for the given language."""
|
||||
subprocess_run(["codeql", "database", "init", "--language=" + lang,
|
||||
"--source-root=/tmp/empty", "--allow-missing-source-root", database])
|
||||
subprocess_run(["mkdir", "-p", database + "/src/tmp/empty"])
|
||||
subprocess_run(["touch", database + "/src/tmp/empty/empty" + extension])
|
||||
subprocess_run(["codeql", "database", "finalize",
|
||||
database, "--no-pre-finalize"])
|
||||
|
||||
|
||||
def run_codeql_query(query, database, output):
|
||||
"""Runs a codeql query on the given database."""
|
||||
subprocess_run(["codeql", "query", "run", query,
|
||||
"--database", database, "--output", output + ".bqrs"])
|
||||
subprocess_run(["codeql", "bqrs", "decode", output + ".bqrs",
|
||||
"--format=csv", "--no-titles", "--output", output])
|
||||
os.remove(output + ".bqrs")
|
||||
Reference in New Issue
Block a user