Compute framework coverage diff in artifacts job

2025-12-16 16:53:25 +01:00 · 2021-06-22 10:45:49 +02:00
parent d6361d8500
commit a165cde808
7 changed files with 234 additions and 242 deletions
--- a/.github/workflows/csv-coverage-pr-artifacts.yml
+++ b/.github/workflows/csv-coverage-pr-artifacts.yml
@@ -49,19 +49,23 @@ jobs:
         gh release download --repo "github/codeql-cli-binaries" --pattern "codeql-linux64.zip"
    - name: Unzip CodeQL CLI
      run: unzip -d codeql-cli codeql-linux64.zip
-    - name: Generate CSV files on merge and base of the PR
+    - name: Generate CSV files on merge commit of the PR
      run: |
        echo "Running generator on merge"
        PATH="$PATH:codeql-cli/codeql" python merge/misc/scripts/library-coverage/generate-report.py ci merge merge
        mkdir out_merge
        cp framework-coverage-*.csv out_merge/
        cp framework-coverage-*.rst out_merge/
-
+    - name: Generate CSV files on base commit of the PR
+      run: |
        echo "Running generator on base"
        PATH="$PATH:codeql-cli/codeql" python base/misc/scripts/library-coverage/generate-report.py ci base base
        mkdir out_base
        cp framework-coverage-*.csv out_base/
        cp framework-coverage-*.rst out_base/
+    - name: Generate diff of coverage reports
+      run: |
+        python base/misc/scripts/library-coverage/compare-folders.py out_base out_merge comparison.md
    - name: Upload CSV package list
      uses: actions/upload-artifact@v2
      with:
@@ -76,6 +80,12 @@ jobs:
        path: |
          out_base/framework-coverage-*.csv
          out_base/framework-coverage-*.rst
+    - name: Upload comparison results
+      uses: actions/upload-artifact@v2
+      with:
+        name: comparison
+        path: |
+          comparison.md
    - name: Save PR number
      run: |
        mkdir -p pr
--- a/.github/workflows/csv-coverage-pr-comment.yml
+++ b/.github/workflows/csv-coverage-pr-comment.yml
@@ -31,11 +31,4 @@ jobs:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        RUN_ID: ${{ github.event.workflow_run.id }}
      run: |
-        python misc/scripts/library-coverage/compare-files-comment-pr.py \
-          comparison.md "$GITHUB_REPOSITORY" "$RUN_ID"
-    - name: Upload comparison results
-      uses: actions/upload-artifact@v2
-      with:
-        name: comparison
-        path: |
-          comparison.md
+        python misc/scripts/library-coverage/comment-pr.py "$GITHUB_REPOSITORY" "$RUN_ID"
--- a/misc/scripts/library-coverage/comment-pr.py
+++ b/misc/scripts/library-coverage/comment-pr.py
@@ -0,0 +1,130 @@
+import sys
+import os
+import utils
+import shutil
+import json
+import filecmp
+
+"""
+This script compares the generated CSV coverage files with the ones in the codebase.
+"""
+
+artifacts_workflow_name = "Check framework coverage changes"
+comparison_artifact_name = "comparison"
+comparison_artifact_file_name = "comparison.md"
+
+
+def get_comment_text(output_file, repo, run_id):
+    size = os.path.getsize(output_file)
+    if size == 0:
+        print("No difference in the coverage reports")
+        return
+
+    comment = ":warning: The head of this PR and the base branch were compared for differences in the framework coverage reports. " + \
+        f"The generated reports are available in the [artifacts of this workflow run](https://github.com/{repo}/actions/runs/{run_id}). " + \
+        "The differences will be picked up by the nightly job after the PR gets merged. "
+
+    if size < 2000:
+        print("There's a small change in the CSV framework coverage reports")
+        comment += "The following differences were found: \n\n"
+        with open(output_file, 'r') as file:
+            comment += file.read()
+    else:
+        print("There's a large change in the CSV framework coverage reports")
+        comment += f"The differences can be found in the {comparison_artifact_name} artifact of this workflow run](https://github.com/{repo}/actions/runs/{run_id})."
+
+    return comment
+
+
+def comment_pr(repo, run_id):
+    """
+    Generates coverage diff produced by the changes in the current PR. If the diff is not empty, then post it as a comment.
+    If a workflow run produces the same diff as the directly preceeding one, then don't post a comment.
+    """
+
+    # Store diff for current run
+    current_diff_folder = "current_diff"
+    utils.download_artifact(repo, comparison_artifact_name,
+                            current_diff_folder, run_id)
+
+    utils.download_artifact(repo, "pr", "pr", run_id)
+
+    try:
+        with open("pr/NR") as file:
+            pr_number = int(file.read())
+    finally:
+        if os.path.isdir("pr"):
+            shutil.rmtree("pr")
+
+    # Try storing diff for previous run:
+    try:
+        prev_run_id = get_previous_run_id(repo, run_id, pr_number)
+        prev_diff_folder = "prev_diff"
+        utils.download_artifact(repo, comparison_artifact_name,
+                                prev_diff_folder, prev_run_id)
+
+        if filecmp.cmp(f"{current_diff_folder}/{comparison_artifact_file_name}", f"{prev_diff_folder}/{comparison_artifact_file_name}", shallow=False):
+            print(
+                f"Previous run {prev_run_id} resulted in the same diff, so not commenting again.")
+            return
+        else:
+            print(f"Diff of previous run {prev_run_id} differs, commenting.")
+    except Exception:
+        # this is not mecessarily a failure, it can also mean that there was no previous run yet.
+        print("Couldn't generate diff for previous run:", sys.exc_info()[1])
+
+    comment = get_comment_text(
+        f"{current_diff_folder}/{comparison_artifact_file_name}", repo, run_id)
+    post_comment(comment, repo, pr_number)
+
+
+def post_comment(comment, repo, pr_number):
+    print(f"Posting comment to PR #{pr_number}")
+    utils.subprocess_run(["gh", "pr", "comment", str(pr_number),
+                         "--repo", repo, "--body", comment])
+
+
+def get_previous_run_id(repo, run_id, pr_number):
+    """
+    Gets the previous run id for a given workflow run, considering that the previous workflow run needs to come from the same PR.
+    """
+
+    # Get branch and repo from run:
+    this_run = utils.subprocess_check_output(
+        ["gh", "api", "-X", "GET", f"repos/{repo}/actions/runs/{run_id}", "--jq", "{ head_branch: .head_branch, head_repository: .head_repository.full_name }"])
+
+    this_run = json.loads(this_run)
+    pr_branch = this_run["head_branch"]
+    pr_repo = this_run["head_repository"]
+
+    # Get all previous runs that match branch, repo and workflow name:
+    ids = utils.subprocess_check_output(["gh", "api", "-X", "GET", f"repos/{repo}/actions/runs", "-f", "event=pull_request", "-f", "status=success", "-f", "name=\"" + artifacts_workflow_name + "\"", "--jq",
+                                        f"[.workflow_runs.[] | select(.head_branch==\"{pr_branch}\" and .head_repository.full_name==\"{pr_repo}\") | {{ created_at: .created_at, run_id: .id}}] | sort_by(.created_at) | reverse | [.[].run_id]"])
+
+    ids = json.loads(ids)
+    if ids[0] != int(run_id):
+        raise Exception(
+            f"Expected to find {run_id} in the list of matching runs.")
+
+    for previous_run_id in ids[1:]:
+        utils.download_artifact(repo, "pr", "prev_run_pr", previous_run_id)
+
+        try:
+            with open("prev_run_pr/NR") as file:
+                prev_pr_number = int(file.read())
+                print(f"PR number: {prev_pr_number}")
+        finally:
+            if os.path.isdir("prev_run_pr"):
+                shutil.rmtree("prev_run_pr")
+
+        # the previous run needs to be coming from the same PR:
+        if pr_number == prev_pr_number:
+            return previous_run_id
+
+    raise Exception("Couldn't find previous run.")
+
+
+repo = sys.argv[1]
+run_id = sys.argv[2]
+
+comment_pr(repo, run_id)
--- a/misc/scripts/library-coverage/compare-files-comment-pr.py
+++ b/misc/scripts/library-coverage/compare-files-comment-pr.py
@@ -1,232 +0,0 @@
-import sys
-import os
-import settings
-import difflib
-import utils
-import shutil
-import json
-import filecmp
-
-"""
-This script compares the generated CSV coverage files with the ones in the codebase.
-"""
-
-artifacts_workflow_name = "Check framework coverage changes"
-
-
-def check_file_exists(file):
-    if not os.path.exists(file):
-        print(f"Expected file '{file}' doesn't exist.", file=sys.stderr)
-        return False
-    return True
-
-
-def ignore_line_ending(ch):
-    return difflib.IS_CHARACTER_JUNK(ch, ws=" \r\n")
-
-
-def compare_files(file1, file2):
-    messages = compare_files_str(file1, file2)
-    if messages == "":
-        return True
-
-    print(messages, end="", file=sys.stderr)
-
-    return False
-
-
-def compare_files_str(file1, file2):
-    diff = difflib.ndiff(open(file1).readlines(),
-                         open(file2).readlines(), None, ignore_line_ending)
-    ret = ""
-    for line in diff:
-        if line.startswith("+") or line.startswith("-"):
-            ret += line
-
-    return ret
-
-
-def download_artifact(repo, name, dir, run_id):
-    utils.subprocess_run(["gh", "run", "download", "--repo",
-                         repo, "--name", name, "--dir", dir, str(run_id)])
-
-
-def write_diff_for_run(output_file, repo, run_id):
-    folder1 = "out_base"
-    folder2 = "out_merge"
-    try:
-        download_artifact(repo, "csv-framework-coverage-base", folder1, run_id)
-        download_artifact(
-            repo, "csv-framework-coverage-merge", folder2, run_id)
-
-        compare_folders(folder1, folder2, output_file)
-    finally:
-        for folder in [folder1, folder2]:
-            if os.path.isdir(folder):
-                shutil.rmtree(folder)
-
-
-def get_comment_text(output_file, repo, run_id):
-    size = os.path.getsize(output_file)
-    if size == 0:
-        print("No difference in the coverage reports")
-        return
-
-    comment = ":warning: The head of this PR and the base branch were compared for differences in the framework coverage reports. " + \
-        f"The generated reports are available in the [artifacts of this workflow run](https://github.com/{repo}/actions/runs/{run_id}). " + \
-        "The differences will be picked up by the nightly job after the PR gets merged. "
-
-    if size < 2000:
-        print("There's a small change in the CSV framework coverage reports")
-        comment += "The following differences were found: \n\n"
-        with open(output_file, 'r') as file:
-            comment += file.read()
-    else:
-        print("There's a large change in the CSV framework coverage reports")
-        comment += f"The differences can be found in the {output_file} artifact of this job."
-
-    return comment
-
-
-def comment_pr(output_file, repo, run_id):
-    """
-    Generates coverage diff produced by the changes in the current PR. If the diff is not empty, then post it as a comment.
-    If a workflow run produces the same diff as the directly preceeding one, then don't post a comment.
-    """
-
-    # Store diff for current run
-    write_diff_for_run(output_file, repo, run_id)
-
-    download_artifact(repo, "pr", "pr", run_id)
-
-    try:
-        with open("pr/NR") as file:
-            pr_number = int(file.read())
-    finally:
-        if os.path.isdir("pr"):
-            shutil.rmtree("pr")
-
-    # Try storing diff for previous run:
-    prev_output_file = "prev_" + output_file
-    try:
-        prev_run_id = get_previous_run_id(repo, run_id, pr_number)
-        write_diff_for_run(prev_output_file, repo, prev_run_id)
-
-        if filecmp.cmp(output_file, prev_output_file, shallow=False):
-            print(
-                f"Previous run {prev_run_id} resulted in the same diff, so not commenting again.")
-            return
-        else:
-            print(f"Diff of previous run {prev_run_id} differs, commenting.")
-    except Exception:
-        # this is not mecessarily a failure, it can also mean that there was no previous run yet.
-        print("Couldn't generate diff for previous run:", sys.exc_info()[1])
-    finally:
-        if os.path.isfile(prev_output_file):
-            os.remove(prev_output_file)
-
-    comment = get_comment_text(output_file, repo, run_id)
-    post_comment(comment, repo, pr_number)
-
-
-def post_comment(comment, repo, pr_number):
-    print(f"Posting comment to PR #{pr_number}")
-    utils.subprocess_run(["gh", "pr", "comment", str(pr_number),
-                         "--repo", repo, "--body", comment])
-
-
-def compare_folders(folder1, folder2, output_file):
-    """
-    Compares the contents of two folders and writes the differences to the output file.
-    """
-
-    languages = ['java']
-
-    return_md = ""
-
-    for lang in languages:
-        expected_files = ""
-
-        generated_output_rst = settings.generated_output_rst.format(
-            language=lang)
-        generated_output_csv = settings.generated_output_csv.format(
-            language=lang)
-
-        # check if files exist in both folder1 and folder 2
-        if not check_file_exists(f"{folder1}/{generated_output_rst}"):
-            expected_files += f"- {generated_output_rst} doesn't exist in folder {folder1}\n"
-        if not check_file_exists(f"{folder2}/{generated_output_rst}"):
-            expected_files += f"- {generated_output_rst} doesn't exist in folder {folder2}\n"
-        if not check_file_exists(f"{folder1}/{generated_output_csv}"):
-            expected_files += f"- {generated_output_csv} doesn't exist in folder {folder1}\n"
-        if not check_file_exists(f"{folder2}/{generated_output_csv}"):
-            expected_files += f"- {generated_output_csv} doesn't exist in folder {folder2}\n"
-
-        if expected_files != "":
-            print("Expected files are missing", file=sys.stderr)
-            return_md += f"\n### {lang}\n\n#### Expected files are missing for {lang}\n{expected_files}\n"
-            continue
-
-        # compare contents of files
-        cmp1 = compare_files_str(
-            f"{folder1}/{generated_output_rst}", f"{folder2}/{generated_output_rst}")
-        cmp2 = compare_files_str(
-            f"{folder1}/{generated_output_csv}", f"{folder2}/{generated_output_csv}")
-
-        if cmp1 != "" or cmp2 != "":
-            print("Generated file contents are not matching", file=sys.stderr)
-            return_md += f"\n### {lang}\n\n#### Generated file changes for {lang}\n\n"
-            if cmp1 != "":
-                return_md += f"- Changes to {generated_output_rst}:\n```diff\n{cmp1}```\n\n"
-            if cmp2 != "":
-                return_md += f"- Changes to {generated_output_csv}:\n```diff\n{cmp2}```\n\n"
-
-    with open(output_file, 'w', newline='') as out:
-        out.write(return_md)
-
-
-def get_previous_run_id(repo, run_id, pr_number):
-    """
-    Gets the previous run id for a given workflow run, considering that the previous workflow run needs to come from the same PR.
-    """
-
-    # Get branch and repo from run:
-    this_run = utils.subprocess_check_output(
-        ["gh", "api", "-X", "GET", f"repos/{repo}/actions/runs/{run_id}", "--jq", "{ head_branch: .head_branch, head_repository: .head_repository.full_name }"])
-
-    this_run = json.loads(this_run)
-    pr_branch = this_run["head_branch"]
-    pr_repo = this_run["head_repository"]
-
-    # Get all previous runs that match branch, repo and workflow name:
-    ids = utils.subprocess_check_output(["gh", "api", "-X", "GET", f"repos/{repo}/actions/runs", "-f", "event=pull_request", "-f", "status=success", "-f", "name=\"" + artifacts_workflow_name + "\"", "--jq",
-                                        f"[.workflow_runs.[] | select(.head_branch==\"{pr_branch}\" and .head_repository.full_name==\"{pr_repo}\") | {{ created_at: .created_at, run_id: .id}}] | sort_by(.created_at) | reverse | [.[].run_id]"])
-
-    ids = json.loads(ids)
-    if ids[0] != int(run_id):
-        raise Exception(
-            f"Expected to find {run_id} in the list of matching runs.")
-
-    for previous_run_id in ids[1:]:
-        download_artifact(repo, "pr", "prev_run_pr", previous_run_id)
-
-        try:
-            with open("prev_run_pr/NR") as file:
-                prev_pr_number = int(file.read())
-                print(f"PR number: {prev_pr_number}")
-        finally:
-            if os.path.isdir("prev_run_pr"):
-                shutil.rmtree("prev_run_pr")
-
-        # the previous run needs to be coming from the same PR:
-        if pr_number == prev_pr_number:
-            return previous_run_id
-
-    raise Exception("Couldn't find previous run.")
-
-
-output_file = sys.argv[1]
-repo = sys.argv[2]
-run_id = sys.argv[3]
-
-comment_pr(output_file, repo, run_id)
--- a/misc/scripts/library-coverage/compare-folders.py
+++ b/misc/scripts/library-coverage/compare-folders.py
@@ -0,0 +1,9 @@
+import sys
+import compare
+
+
+folder1 = sys.argv[1]
+folder2 = sys.argv[2]
+diff_file = sys.argv[3]
+
+compare.compare_folders(folder1, folder2, diff_file)
--- a/misc/scripts/library-coverage/compare.py
+++ b/misc/scripts/library-coverage/compare.py
@@ -0,0 +1,69 @@
+import sys
+import settings
+import utils
+import difflib
+
+
+def ignore_line_ending(ch):
+    return difflib.IS_CHARACTER_JUNK(ch, ws=" \r\n")
+
+
+def compare_files(file1, file2):
+    diff = difflib.ndiff(open(file1).readlines(),
+                         open(file2).readlines(), None, ignore_line_ending)
+    ret = ""
+    for line in diff:
+        if line.startswith("+") or line.startswith("-"):
+            ret += line
+
+    return ret
+
+
+def compare_folders(folder1, folder2, output_file):
+    """
+    Compares the contents of two folders and writes the differences to the output file.
+    """
+
+    languages = ['java']
+
+    return_md = ""
+
+    for lang in languages:
+        expected_files = ""
+
+        generated_output_rst = settings.generated_output_rst.format(
+            language=lang)
+        generated_output_csv = settings.generated_output_csv.format(
+            language=lang)
+
+        # check if files exist in both folder1 and folder 2
+        if not utils.check_file_exists(f"{folder1}/{generated_output_rst}"):
+            expected_files += f"- {generated_output_rst} doesn't exist in folder {folder1}\n"
+        if not utils.check_file_exists(f"{folder2}/{generated_output_rst}"):
+            expected_files += f"- {generated_output_rst} doesn't exist in folder {folder2}\n"
+        if not utils.check_file_exists(f"{folder1}/{generated_output_csv}"):
+            expected_files += f"- {generated_output_csv} doesn't exist in folder {folder1}\n"
+        if not utils.check_file_exists(f"{folder2}/{generated_output_csv}"):
+            expected_files += f"- {generated_output_csv} doesn't exist in folder {folder2}\n"
+
+        if expected_files != "":
+            print("Expected files are missing", file=sys.stderr)
+            return_md += f"\n### {lang}\n\n#### Expected files are missing for {lang}\n{expected_files}\n"
+            continue
+
+        # compare contents of files
+        cmp1 = compare_files(
+            f"{folder1}/{generated_output_rst}", f"{folder2}/{generated_output_rst}")
+        cmp2 = compare_files(
+            f"{folder1}/{generated_output_csv}", f"{folder2}/{generated_output_csv}")
+
+        if cmp1 != "" or cmp2 != "":
+            print("Generated file contents are not matching", file=sys.stderr)
+            return_md += f"\n### {lang}\n\n#### Generated file changes for {lang}\n\n"
+            if cmp1 != "":
+                return_md += f"- Changes to {generated_output_rst}:\n```diff\n{cmp1}```\n\n"
+            if cmp2 != "":
+                return_md += f"- Changes to {generated_output_csv}:\n```diff\n{cmp2}```\n\n"
+
+    with open(output_file, 'w', newline='') as out:
+        out.write(return_md)
--- a/misc/scripts/library-coverage/utils.py
+++ b/misc/scripts/library-coverage/utils.py
@@ -2,6 +2,7 @@ import subprocess
 import os
 import csv
 import shlex
+import sys


 def subprocess_run(cmd):
@@ -57,3 +58,15 @@ def read_cwes(path):
                    "label": row[2]
                }
    return cwes
+
+
+def check_file_exists(file):
+    if not os.path.exists(file):
+        print(f"Expected file '{file}' doesn't exist.", file=sys.stderr)
+        return False
+    return True
+
+
+def download_artifact(repo, name, dir, run_id):
+    subprocess_run(["gh", "run", "download", "--repo",
+                    repo, "--name", name, "--dir", dir, str(run_id)])