mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
145 lines
4.5 KiB
Python
145 lines
4.5 KiB
Python
import subprocess
|
|
import csv
|
|
import sys
|
|
import os
|
|
import shutil
|
|
from datetime import date
|
|
import datetime
|
|
import utils
|
|
|
|
"""
|
|
Gets the sink/source/summary statistics for different days.
|
|
"""
|
|
|
|
# the distance between commits to include in the output
|
|
day_distance = 1
|
|
|
|
|
|
def get_str_output(arr):
|
|
r = subprocess.check_output(arr)
|
|
return r.decode("utf-8").strip("\n'")
|
|
|
|
|
|
def get_date(sha):
|
|
d = get_str_output(
|
|
["git", "show", "--no-patch", "--no-notes", "--pretty='%cd'", "--date=short", sha])
|
|
return date.fromisoformat(d)
|
|
|
|
|
|
def get_parent(sha, date):
|
|
parent_sha = get_str_output(
|
|
["git", "rev-parse", sha + "^"])
|
|
parent_date = get_date(parent_sha)
|
|
return (parent_sha, parent_date)
|
|
|
|
|
|
def get_previous_sha(sha, date):
|
|
parent_sha, parent_date = get_parent(sha, date)
|
|
while parent_date > date + datetime.timedelta(days=-1 * day_distance):
|
|
parent_sha, parent_date = get_parent(parent_sha, parent_date)
|
|
|
|
return (parent_sha, parent_date)
|
|
|
|
|
|
def get_stats(lang, query):
|
|
try:
|
|
db = "empty_" + lang
|
|
ql_output = "output-" + lang + ".csv"
|
|
if os.path.isdir(db):
|
|
shutil.rmtree(db)
|
|
utils.create_empty_database(lang, ".java", db)
|
|
utils.run_codeql_query(query, db, ql_output)
|
|
|
|
sources = 0
|
|
sinks = 0
|
|
summaries = 0
|
|
|
|
packages = {}
|
|
|
|
with open(ql_output) as csvfile:
|
|
reader = csv.reader(csvfile)
|
|
for row in reader:
|
|
# row: "android.util",1,"remote","source",16
|
|
package = row[0]
|
|
if package not in packages:
|
|
packages[package] = {
|
|
"sources": 0,
|
|
"sinks": 0,
|
|
"summaries": 0
|
|
}
|
|
|
|
if row[3] == "source":
|
|
sources += int(row[4])
|
|
packages[package]["sources"] += int(row[4])
|
|
if row[3] == "sink":
|
|
sinks += int(row[4])
|
|
packages[package]["sinks"] += int(row[4])
|
|
if row[3] == "summary":
|
|
summaries += int(row[4])
|
|
packages[package]["summaries"] += int(row[4])
|
|
|
|
os.remove(ql_output)
|
|
|
|
return (sources, sinks, summaries, packages)
|
|
except:
|
|
print("Unexpected error:", sys.exc_info()[0])
|
|
raise Exception()
|
|
finally:
|
|
if os.path.isdir(db):
|
|
shutil.rmtree(db)
|
|
|
|
|
|
working_dir = ""
|
|
if len(sys.argv) > 1:
|
|
working_dir = sys.argv[1]
|
|
|
|
configs = [
|
|
utils.LanguageConfig(
|
|
"java", "Java", ".java", "java/ql/src/meta/frameworks/Coverage.ql")
|
|
]
|
|
|
|
# todo: change this when we cover multiple languages. We should compute the SHAs
|
|
# only once and not per language
|
|
for config in configs:
|
|
with open("timeseries-" + config.lang + ".csv", 'w', newline='') as csvfile_total:
|
|
with open("timeseries-" + config.lang + "-packages.csv", 'w', newline='') as csvfile_packages:
|
|
csvwriter_total = csv.writer(csvfile_total)
|
|
csvwriter_packages = csv.writer(csvfile_packages)
|
|
csvwriter_total.writerow(
|
|
["SHA", "Date", "Sources", "Sinks", "Summaries"])
|
|
csvwriter_packages.writerow(
|
|
["SHA", "Date", "Package", "Sources", "Sinks", "Summaries"])
|
|
|
|
os.chdir(working_dir)
|
|
|
|
utils.subprocess_run(["git", "checkout", "main"])
|
|
|
|
current_sha = get_str_output(["git", "rev-parse", "HEAD"])
|
|
current_date = get_date(current_sha)
|
|
|
|
while True:
|
|
print("Getting stats for " + current_sha)
|
|
utils.subprocess_run(["git", "checkout", current_sha])
|
|
|
|
try:
|
|
stats = get_stats(config.lang, config.ql_path)
|
|
|
|
csvwriter_total.writerow(
|
|
[current_sha, current_date, stats[0], stats[1], stats[2]])
|
|
|
|
for package in stats[3]:
|
|
csvwriter_packages.writerow(
|
|
[current_sha, current_date, package, stats[3][package]["sources"], stats[3][package]["sinks"], stats[3][package]["summaries"]])
|
|
|
|
print("Collected stats for " + current_sha +
|
|
" at " + current_date.isoformat())
|
|
except:
|
|
print("Error getting stats for " +
|
|
current_sha + ". Stopping iteration.")
|
|
break
|
|
|
|
current_sha, current_date = get_previous_sha(
|
|
current_sha, current_date)
|
|
|
|
utils.subprocess_run(["git", "checkout", "main"])
|