import csv import sys import os import shutil from datetime import date import datetime import utils import settings import packages as pack import frameworks as fr """ Gets the sink/source/summary statistics for different days. """ # the distance between commits to include in the output day_distance = 1 class Git: def get_output(arr): r = utils.subprocess_check_output(arr) return r.strip("\n'") def get_date(sha): d = Git.get_output( ["git", "show", "--no-patch", "--no-notes", "--pretty='%cd'", "--date=short", sha]) return date.fromisoformat(d) def get_parent(sha): parent_sha = Git.get_output( ["git", "rev-parse", sha + "^"]) parent_date = Git.get_date(parent_sha) return (parent_sha, parent_date) def get_previous_sha(sha, date): parent_sha, parent_date = Git.get_parent(sha) while parent_date > date + datetime.timedelta(days=-1 * day_distance): parent_sha, parent_date = Git.get_parent(parent_sha) return (parent_sha, parent_date) def get_packages(config, search_path): try: db = "empty_" + config.lang ql_output = "output-" + config.lang + ".csv" if os.path.isdir(db): shutil.rmtree(db) utils.create_empty_database( config.lang, config.ext, db, config.dbscheme) utils.run_codeql_query(config.ql_path, db, ql_output, search_path) return pack.PackageCollection(ql_output) except: print("Unexpected error:", sys.exc_info()[0]) raise Exception() finally: if os.path.isfile(ql_output): os.remove(ql_output) if os.path.isdir(db): shutil.rmtree(db) working_dir = "" if len(sys.argv) > 1: working_dir = sys.argv[1] else: print("Working directory is not specified") exit(1) configs = [ utils.LanguageConfig( "java", "Java", ".java", "java/ql/src/meta/frameworks/Coverage.ql", ["java/ql/lib/config/semmlecode.dbscheme", "java/ql/src/config/semmlecode.dbscheme"]), utils.LanguageConfig( "csharp", "C#", ".cs", "csharp/ql/src/meta/frameworks/Coverage.ql", ["csharp/ql/lib/semmlecode.csharp.dbscheme", "csharp/ql/src/semmlecode.csharp.dbscheme"]), utils.LanguageConfig( "go", "Go", ".go", "go/ql/src/meta/frameworks/Coverage.ql", ["go/ql/lib/go.dbscheme", "go/ql/src/go.dbscheme"]) ] output_prefix = "framework-coverage-timeseries-" languages_to_process = set() language_utils = {} # Try to create output files for each language: for lang in settings.languages: try: file_total = open(output_prefix + lang + ".csv", 'w', newline='') file_packages = open(output_prefix + lang + "-packages.csv", 'w', newline='') csvwriter_total = csv.writer(file_total) csvwriter_packages = csv.writer(file_packages) except: print( f"Unexpected error while opening files for {lang}:", sys.exc_info()[0]) if file_total is not None: file_total.close() if file_packages is not None: file_packages.close() else: languages_to_process.add(lang) language_utils[lang] = { "file_total": file_total, "file_packages": file_packages, "csvwriter_total": csvwriter_total, "csvwriter_packages": csvwriter_packages, "last_row": (None, None, None) } try: # Write headers for lang in languages_to_process: csvwriter_total = language_utils[lang]["csvwriter_total"] csvwriter_packages = language_utils[lang]["csvwriter_packages"] csvwriter_total.writerow( ["SHA", "Date", "Sources", "Sinks", "Summaries"]) csvwriter_packages.writerow( ["SHA", "Date", "Framework", "Package", "Sources", "Sinks", "Summaries"]) os.chdir(working_dir) utils.subprocess_run(["git", "checkout", "main"]) current_sha = Git.get_output(["git", "rev-parse", "HEAD"]) current_date = Git.get_date(current_sha) # Read the additional framework data, such as URL, friendly name from the latest commit for lang in languages_to_process: input_framework_csv = settings.documentation_folder_no_prefix + "frameworks.csv" language_utils[lang]["frameworks"] = fr.FrameworkCollection( input_framework_csv.format(language=lang)) language_utils[lang]["config"] = [ c for c in configs if c.lang == lang][0] while True: utils.subprocess_run(["git", "checkout", current_sha]) for lang in languages_to_process.copy(): try: print( f"Getting stats for {lang} at {current_sha} on {current_date.isoformat()}") config: utils.LanguageConfig = language_utils[lang]["config"] frameworks: fr.FrameworkCollection = language_utils[lang]["frameworks"] csvwriter_total = language_utils[lang]["csvwriter_total"] csvwriter_packages = language_utils[lang]["csvwriter_packages"] last_row = language_utils[lang]["last_row"] packages = get_packages(config, ".") new_row = (packages.get_part_count("source"), packages.get_part_count("sink"), packages.get_part_count("summary")) if last_row != new_row: csvwriter_total.writerow([ current_sha, current_date, new_row[0], new_row[1], new_row[2]]) language_utils[lang]["last_row"] = new_row matched_packages = set() # Getting stats for frameworks: for framework in frameworks.get_frameworks(): framework: fr.Framework = framework row = [current_sha, current_date, framework.name, ", ".join(sorted(framework.package_pattern.split(" ")))] sources = 0 sinks = 0 summaries = 0 for package in packages.get_packages(): if frameworks.get_package_filter(framework)(package): sources += package.get_part_count("source") sinks += package.get_part_count("sink") summaries += package.get_part_count("summary") matched_packages.add(package.name) row.append(sources) row.append(sinks) row.append(summaries) csvwriter_packages.writerow(row) # Getting stats for packages not included in frameworks: row = [current_sha, current_date, "Others"] sources = 0 sinks = 0 summaries = 0 other_packages = set() for package in packages.get_packages(): if not package.name in matched_packages: sources += package.get_part_count("source") sinks += package.get_part_count("sink") summaries += package.get_part_count("summary") other_packages.add(package.name) row.append(", ".join(sorted(other_packages))) row.append(sources) row.append(sinks) row.append(summaries) csvwriter_packages.writerow(row) print( f"Collected stats for {lang} at {current_sha} on {current_date.isoformat()}") except: print( f"Error getting stats for {lang} at {current_sha}. Stopping iteration for language.") languages_to_process.remove(lang) if len(languages_to_process) == 0: break current_sha, current_date = Git.get_previous_sha( current_sha, current_date) finally: utils.subprocess_run(["git", "checkout", "main"]) # Close files: for lang in settings.languages: file_total = language_utils[lang]["file_total"] file_packages = language_utils[lang]["file_packages"] if file_total is not None: file_total.close() if file_packages is not None: file_packages.close()