mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
235 lines
8.3 KiB
Python
235 lines
8.3 KiB
Python
import csv
|
|
import sys
|
|
import os
|
|
import shutil
|
|
from datetime import date
|
|
import datetime
|
|
import utils
|
|
import settings
|
|
import packages as pack
|
|
import frameworks as fr
|
|
|
|
"""
|
|
Gets the sink/source/summary statistics for different days.
|
|
"""
|
|
|
|
# the distance between commits to include in the output
|
|
day_distance = 1
|
|
|
|
|
|
class Git:
|
|
def get_output(arr):
|
|
r = utils.subprocess_check_output(arr)
|
|
return r.strip("\n'")
|
|
|
|
def get_date(sha):
|
|
d = Git.get_output(
|
|
["git", "show", "--no-patch", "--no-notes", "--pretty='%cd'", "--date=short", sha])
|
|
return date.fromisoformat(d)
|
|
|
|
def get_parent(sha):
|
|
parent_sha = Git.get_output(
|
|
["git", "rev-parse", sha + "^"])
|
|
parent_date = Git.get_date(parent_sha)
|
|
return (parent_sha, parent_date)
|
|
|
|
def get_previous_sha(sha, date):
|
|
parent_sha, parent_date = Git.get_parent(sha)
|
|
while parent_date > date + datetime.timedelta(days=-1 * day_distance):
|
|
parent_sha, parent_date = Git.get_parent(parent_sha)
|
|
|
|
return (parent_sha, parent_date)
|
|
|
|
|
|
def get_packages(config, search_path):
|
|
try:
|
|
db = "empty_" + config.lang
|
|
ql_output = "output-" + config.lang + ".csv"
|
|
if os.path.isdir(db):
|
|
shutil.rmtree(db)
|
|
utils.create_empty_database(
|
|
config.lang, config.ext, db, config.dbscheme)
|
|
utils.run_codeql_query(config.ql_path, db, ql_output, search_path)
|
|
|
|
return pack.PackageCollection(ql_output)
|
|
except:
|
|
print("Unexpected error:", sys.exc_info()[0])
|
|
raise Exception()
|
|
finally:
|
|
if os.path.isfile(ql_output):
|
|
os.remove(ql_output)
|
|
|
|
if os.path.isdir(db):
|
|
shutil.rmtree(db)
|
|
|
|
|
|
working_dir = ""
|
|
if len(sys.argv) > 1:
|
|
working_dir = sys.argv[1]
|
|
else:
|
|
print("Working directory is not specified")
|
|
exit(1)
|
|
|
|
configs = [
|
|
utils.LanguageConfig(
|
|
"java", "Java", ".java", "java/ql/src/meta/frameworks/Coverage.ql", ["java/ql/lib/config/semmlecode.dbscheme", "java/ql/src/config/semmlecode.dbscheme"]),
|
|
utils.LanguageConfig(
|
|
"csharp", "C#", ".cs", "csharp/ql/src/meta/frameworks/Coverage.ql", ["csharp/ql/lib/semmlecode.csharp.dbscheme", "csharp/ql/src/semmlecode.csharp.dbscheme"]),
|
|
utils.LanguageConfig(
|
|
"go", "Go", ".go", "go/ql/src/meta/frameworks/Coverage.ql", ["go/ql/lib/go.dbscheme", "go/ql/src/go.dbscheme"])
|
|
]
|
|
|
|
output_prefix = "framework-coverage-timeseries-"
|
|
|
|
languages_to_process = set()
|
|
language_utils = {}
|
|
|
|
# Try to create output files for each language:
|
|
for lang in settings.languages:
|
|
try:
|
|
file_total = open(output_prefix + lang + ".csv", 'w', newline='')
|
|
file_packages = open(output_prefix + lang +
|
|
"-packages.csv", 'w', newline='')
|
|
csvwriter_total = csv.writer(file_total)
|
|
csvwriter_packages = csv.writer(file_packages)
|
|
except:
|
|
print(
|
|
f"Unexpected error while opening files for {lang}:", sys.exc_info()[0])
|
|
if file_total is not None:
|
|
file_total.close()
|
|
if file_packages is not None:
|
|
file_packages.close()
|
|
else:
|
|
languages_to_process.add(lang)
|
|
language_utils[lang] = {
|
|
"file_total": file_total,
|
|
"file_packages": file_packages,
|
|
"csvwriter_total": csvwriter_total,
|
|
"csvwriter_packages": csvwriter_packages,
|
|
"last_row": (None, None, None)
|
|
}
|
|
|
|
try:
|
|
# Write headers
|
|
for lang in languages_to_process:
|
|
csvwriter_total = language_utils[lang]["csvwriter_total"]
|
|
csvwriter_packages = language_utils[lang]["csvwriter_packages"]
|
|
csvwriter_total.writerow(
|
|
["SHA", "Date", "Sources", "Sinks", "Summaries"])
|
|
csvwriter_packages.writerow(
|
|
["SHA", "Date", "Framework", "Package", "Sources", "Sinks", "Summaries"])
|
|
|
|
os.chdir(working_dir)
|
|
|
|
utils.subprocess_run(["git", "checkout", "main"])
|
|
|
|
current_sha = Git.get_output(["git", "rev-parse", "HEAD"])
|
|
current_date = Git.get_date(current_sha)
|
|
|
|
# Read the additional framework data, such as URL, friendly name from the latest commit
|
|
for lang in languages_to_process:
|
|
input_framework_csv = settings.documentation_folder_no_prefix + "frameworks.csv"
|
|
language_utils[lang]["frameworks"] = fr.FrameworkCollection(
|
|
input_framework_csv.format(language=lang))
|
|
language_utils[lang]["config"] = [
|
|
c for c in configs if c.lang == lang][0]
|
|
|
|
while True:
|
|
utils.subprocess_run(["git", "checkout", current_sha])
|
|
for lang in languages_to_process.copy():
|
|
try:
|
|
print(
|
|
f"Getting stats for {lang} at {current_sha} on {current_date.isoformat()}")
|
|
|
|
config: utils.LanguageConfig = language_utils[lang]["config"]
|
|
frameworks: fr.FrameworkCollection = language_utils[lang]["frameworks"]
|
|
csvwriter_total = language_utils[lang]["csvwriter_total"]
|
|
csvwriter_packages = language_utils[lang]["csvwriter_packages"]
|
|
last_row = language_utils[lang]["last_row"]
|
|
|
|
packages = get_packages(config, ".")
|
|
|
|
new_row = (packages.get_part_count("source"),
|
|
packages.get_part_count("sink"),
|
|
packages.get_part_count("summary"))
|
|
|
|
if last_row != new_row:
|
|
csvwriter_total.writerow([
|
|
current_sha,
|
|
current_date,
|
|
new_row[0], new_row[1], new_row[2]])
|
|
language_utils[lang]["last_row"] = new_row
|
|
|
|
matched_packages = set()
|
|
|
|
# Getting stats for frameworks:
|
|
for framework in frameworks.get_frameworks():
|
|
framework: fr.Framework = framework
|
|
|
|
row = [current_sha, current_date,
|
|
framework.name, ", ".join(sorted(framework.package_pattern.split(" ")))]
|
|
|
|
sources = 0
|
|
sinks = 0
|
|
summaries = 0
|
|
|
|
for package in packages.get_packages():
|
|
if frameworks.get_package_filter(framework)(package):
|
|
sources += package.get_part_count("source")
|
|
sinks += package.get_part_count("sink")
|
|
summaries += package.get_part_count("summary")
|
|
matched_packages.add(package.name)
|
|
|
|
row.append(sources)
|
|
row.append(sinks)
|
|
row.append(summaries)
|
|
|
|
csvwriter_packages.writerow(row)
|
|
|
|
# Getting stats for packages not included in frameworks:
|
|
row = [current_sha, current_date, "Others"]
|
|
|
|
sources = 0
|
|
sinks = 0
|
|
summaries = 0
|
|
other_packages = set()
|
|
|
|
for package in packages.get_packages():
|
|
if not package.name in matched_packages:
|
|
sources += package.get_part_count("source")
|
|
sinks += package.get_part_count("sink")
|
|
summaries += package.get_part_count("summary")
|
|
other_packages.add(package.name)
|
|
|
|
row.append(", ".join(sorted(other_packages)))
|
|
row.append(sources)
|
|
row.append(sinks)
|
|
row.append(summaries)
|
|
|
|
csvwriter_packages.writerow(row)
|
|
|
|
print(
|
|
f"Collected stats for {lang} at {current_sha} on {current_date.isoformat()}")
|
|
|
|
except:
|
|
print(
|
|
f"Error getting stats for {lang} at {current_sha}. Stopping iteration for language.")
|
|
languages_to_process.remove(lang)
|
|
if len(languages_to_process) == 0:
|
|
break
|
|
|
|
current_sha, current_date = Git.get_previous_sha(
|
|
current_sha, current_date)
|
|
|
|
finally:
|
|
utils.subprocess_run(["git", "checkout", "main"])
|
|
|
|
# Close files:
|
|
for lang in settings.languages:
|
|
file_total = language_utils[lang]["file_total"]
|
|
file_packages = language_utils[lang]["file_packages"]
|
|
if file_total is not None:
|
|
file_total.close()
|
|
if file_packages is not None:
|
|
file_packages.close()
|