import csv import sys import os import shutil from datetime import date import datetime import utils import settings import packages as pack import frameworks as fr """ Gets the sink/source/summary statistics for different days. """ # the distance between commits to include in the output day_distance = 1 class Git: def get_output(arr): r = utils.subprocess_check_output(arr) return r.strip("\n'") def get_date(sha): d = Git.get_output( ["git", "show", "--no-patch", "--no-notes", "--pretty='%cd'", "--date=short", sha]) return date.fromisoformat(d) def get_parent(sha): parent_sha = Git.get_output( ["git", "rev-parse", sha + "^"]) parent_date = Git.get_date(parent_sha) return (parent_sha, parent_date) def get_previous_sha(sha, date): parent_sha, parent_date = Git.get_parent(sha) while parent_date > date + datetime.timedelta(days=-1 * day_distance): parent_sha, parent_date = Git.get_parent(parent_sha) return (parent_sha, parent_date) def get_packages(lang, query): try: db = "empty_" + lang ql_output = "output-" + lang + ".csv" if os.path.isdir(db): shutil.rmtree(db) utils.create_empty_database(lang, ".java", db) utils.run_codeql_query(query, db, ql_output) return pack.PackageCollection(ql_output) except: print("Unexpected error:", sys.exc_info()[0]) raise Exception() finally: if os.path.isfile(ql_output): os.remove(ql_output) if os.path.isdir(db): shutil.rmtree(db) working_dir = "" if len(sys.argv) > 1: working_dir = sys.argv[1] else: print("Working directory is not specified") exit(1) configs = [ utils.LanguageConfig( "java", "Java", ".java", "java/ql/src/meta/frameworks/Coverage.ql") ] # todo: change this when we cover multiple languages. We should compute the SHAs # only once and not per language output_prefix = "framework-coverage-timeseries-" for config in configs: with open(output_prefix + config.lang + ".csv", 'w', newline='') as csvfile_total: with open(output_prefix + config.lang + "-packages.csv", 'w', newline='') as csvfile_packages: csvwriter_total = csv.writer(csvfile_total) csvwriter_packages = csv.writer(csvfile_packages) csvwriter_total.writerow( ["SHA", "Date", "Sources", "Sinks", "Summaries"]) csvwriter_packages.writerow( ["SHA", "Date", "Framework", "Package", "Sources", "Sinks", "Summaries"]) os.chdir(working_dir) utils.subprocess_run(["git", "checkout", "main"]) current_sha = Git.get_output(["git", "rev-parse", "HEAD"]) current_date = Git.get_date(current_sha) # Read the additional framework data, such as URL, friendly name from the latest commit input_framework_csv = settings.documentation_folder_no_prefix + "frameworks.csv" frameworks = fr.FrameworkCollection( input_framework_csv.format(language=config.lang)) while True: print("Getting stats for " + current_sha) utils.subprocess_run(["git", "checkout", current_sha]) try: packages = get_packages(config.lang, config.ql_path) csvwriter_total.writerow([ current_sha, current_date, packages.get_part_count("source"), packages.get_part_count("sink"), packages.get_part_count("summary")]) matched_packages = set() for framework in frameworks.get_frameworks(): framework: fr.Framework = framework row = [current_sha, current_date, framework.name, framework.package_pattern] sources = 0 sinks = 0 summaries = 0 for package in packages.get_packages(): if frameworks.get_package_filter(framework)(package): sources += package.get_part_count("source") sinks += package.get_part_count("sink") summaries += package.get_part_count("summary") matched_packages.add(package.name) row.append(sources) row.append(sinks) row.append(summaries) csvwriter_packages.writerow(row) row = [current_sha, current_date, "Others"] sources = 0 sinks = 0 summaries = 0 other_packages = set() for package in packages.get_packages(): if not package.name in matched_packages: sources += package.get_part_count("source") sinks += package.get_part_count("sink") summaries += package.get_part_count("summary") other_packages.add(package.name) row.append(", ".join(sorted(other_packages))) row.append(sources) row.append(sinks) row.append(summaries) csvwriter_packages.writerow(row) print("Collected stats for " + current_sha + " at " + current_date.isoformat()) except: print("Error getting stats for " + current_sha + ". Stopping iteration.") break current_sha, current_date = Git.get_previous_sha( current_sha, current_date) utils.subprocess_run(["git", "checkout", "main"])