Add CSV coverage PR commenter

2025-12-17 01:03:14 +01:00 · 2021-06-07 10:27:27 +02:00
parent 200126b302
commit 4abaa7870f
5 changed files with 336 additions and 69 deletions
--- a/.github/workflows/csv-coverage-pr-artifacts.yml
+++ b/.github/workflows/csv-coverage-pr-artifacts.yml
@@ -0,0 +1,85 @@
+name: Check framework coverage changes
+
+on:
+  pull_request:
+    paths:
+      - '.github/workflows/csv-coverage-pr-comment.yml'
+      - '*/ql/src/**/*.ql'
+      - '*/ql/src/**/*.qll'
+      - 'misc/scripts/library-coverage/*.py'
+      # input data files
+      - '*/documentation/library-coverage/cwe-sink.csv'
+      - '*/documentation/library-coverage/frameworks.csv'
+    branches:
+      - main
+      - 'rc/*'
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Dump GitHub context
+      env:
+        GITHUB_CONTEXT: ${{ toJSON(github) }}
+      run: echo "$GITHUB_CONTEXT"
+    - name: Clone self (github/codeql) head
+      uses: actions/checkout@v2
+      with:
+        path: head
+    - name: Clone self (github/codeql) base
+      uses: actions/checkout@v2
+      with:
+        ref: ${{ github.event.pull_request.base.sha }}
+        path: base
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    - name: Download CodeQL CLI
+      uses: dsaltares/fetch-gh-release-asset@aa37ae5c44d3c9820bc12fe675e8670ecd93bd1c
+      with:
+        repo: "github/codeql-cli-binaries"
+        version: "latest"
+        file: "codeql-linux64.zip"
+        token: ${{ secrets.GITHUB_TOKEN }}
+    - name: Unzip CodeQL CLI
+      run: unzip -d codeql-cli codeql-linux64.zip
+    - name: Generate CSV files on head and base of the PR
+      run: |
+        echo "Running generator on ${{github.sha}}"
+        PATH="$PATH:codeql-cli/codeql" python head/misc/scripts/library-coverage/generate-report.py ci head head
+        mkdir out_head
+        cp framework-coverage-*.csv out_head/
+        cp framework-coverage-*.rst out_head/
+
+        echo "Running generator on ${{github.event.pull_request.base.sha}}"
+        PATH="$PATH:codeql-cli/codeql" python base/misc/scripts/library-coverage/generate-report.py ci base base
+        mkdir out_base
+        cp framework-coverage-*.csv out_base/
+        cp framework-coverage-*.rst out_base/
+    - name: Upload CSV package list
+      uses: actions/upload-artifact@v2
+      with:
+        name: csv-framework-coverage-merge
+        path: |
+          out_head/framework-coverage-*.csv
+          out_head/framework-coverage-*.rst
+    - name: Upload CSV package list
+      uses: actions/upload-artifact@v2
+      with:
+        name: csv-framework-coverage-base
+        path: |
+          out_base/framework-coverage-*.csv
+          out_base/framework-coverage-*.rst
+    - name: Save PR number
+      run: |
+        mkdir -p pr
+        echo ${{ github.event.number }} > pr/NR
+    - name: Upload PR number
+      uses: actions/upload-artifact@v2
+      with:
+        name: pr
+        path: pr/
+
--- a/.github/workflows/csv-coverage-pr-comment.yml
+++ b/.github/workflows/csv-coverage-pr-comment.yml
@@ -0,0 +1,66 @@
+name: Comment on PR with framework coverage changes
+
+on:
+  workflow_run:
+    workflows: ["Check framework coverage changes"]
+    types:
+      - completed
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    if: >
+      ${{ github.event.workflow_run.event == 'pull_request' &&
+      github.event.workflow_run.conclusion == 'success' }}
+
+    steps:
+    - name: Dump GitHub context
+      env:
+        GITHUB_CONTEXT: ${{ toJSON(github) }}
+      run: echo "$GITHUB_CONTEXT"
+    - name: Clone self (github/codeql) head
+      uses: actions/checkout@v2
+      with:
+        path: head
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+
+    # download artifacts from the PR job:
+    - name: Download artifact - HEAD
+      uses: dawidd6/action-download-artifact@v2.14.0
+      with:
+        workflow: csv-coverage-pr-artifacts.yml
+        run_id: ${{ github.event.workflow_run.id }}
+        name: csv-framework-coverage-merge
+        path: out_head
+
+    - name: Download artifact - BASE
+      uses: dawidd6/action-download-artifact@v2.14.0
+      with:
+        workflow: csv-coverage-pr-artifacts.yml
+        run_id: ${{ github.event.workflow_run.id }}
+        name: csv-framework-coverage-base
+        path: out_base
+
+    - name: Download artifact - PR
+      uses: dawidd6/action-download-artifact@v2.14.0
+      with:
+        workflow: csv-coverage-pr-artifacts.yml
+        run_id: ${{ github.event.workflow_run.id }}
+        name: pr
+        path: pr
+
+    - name: Check coverage files
+      run: |
+        PR=$(cat "pr/NR")
+        GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} python head/misc/scripts/library-coverage/compare-files-comment-pr.py \
+          out_head out_base comparison.md ${{ github.repository }} $PR ${{ github.event.workflow_run.id }}
+    - name: Upload comparison results
+      uses: actions/upload-artifact@v2
+      with:
+        name: comparison
+        path: |
+          comparison.md
--- a/misc/scripts/library-coverage/compare-files-comment-pr.py
+++ b/misc/scripts/library-coverage/compare-files-comment-pr.py
@@ -0,0 +1,183 @@
+import sys
+import os
+import settings
+import difflib
+import utils
+
+"""
+This script compares the generated CSV coverage files with the ones in the codebase.
+"""
+
+
+def check_file_exists(file):
+    if not os.path.exists(file):
+        print("Expected file '" + file + "' doesn't exist.", file=sys.stderr)
+        return False
+    return True
+
+
+def ignore_line_ending(ch):
+    return difflib.IS_CHARACTER_JUNK(ch, ws=" \r\n")
+
+
+def compare_files(file1, file2):
+    messages = compare_files_str(file1, file2)
+    if messages == "":
+        return True
+
+    print(messages, end="", file=sys.stderr)
+
+    return False
+
+
+def compare_files_str(file1, file2):
+    diff = difflib.ndiff(open(file1).readlines(),
+                         open(file2).readlines(), None, ignore_line_ending)
+    ret = ""
+    for line in diff:
+        if line.startswith("+") or line.startswith("-"):
+            ret += line
+
+    return ret
+
+
+def comment_pr(folder1, folder2, output_file, repo, pr_number, run_id):
+    compare_folders(folder1, folder2, output_file)
+    size = os.path.getsize(output_file)
+    if size == 0:
+        print("No difference in the coverage reports")
+        return
+
+    comment = ":warning: The head of this PR and the base branch were compared for differences in the framework coverage reports. " + \
+        "The generated reports are available in the [artifacts of this workflow run](https://github.com/" + repo + "/actions/runs/" + run_id + "). " + \
+        "The differences will be picked up by the nightly job after the PR gets merged. "
+
+    if size < 2000:
+        print("There's a small change in the CSV framework coverage reports")
+        comment += "The following differences were found: \n\n"
+        with open(output_file, 'r') as file:
+            comment += file.read()
+    else:
+        print("There's a large change in the CSV framework coverage reports")
+        comment += "The differences can be found in the " + \
+            output_file + " artifact of this job."
+
+    post_comment(comment, repo, pr_number)
+
+
+def post_comment(comment, repo, pr_number):
+    print("Posting comment to PR #" + str(pr_number))
+    utils.subprocess_run(["gh", "pr", "comment", pr_number,
+                         "--repo", repo, "--body", comment])
+
+
+def compare_folders(folder1, folder2, output_file):
+    languages = ['java']
+
+    return_md = ""
+
+    for lang in languages:
+        expected_files = ""
+
+        generated_output_rst = settings.generated_output_rst.format(
+            language=lang)
+        generated_output_csv = settings.generated_output_csv.format(
+            language=lang)
+
+        # check if files exist in both folder1 and folder 2
+        if not check_file_exists(folder1 + "/" + generated_output_rst):
+            expected_files += "- " + generated_output_rst + \
+                " doesn't exist in folder " + folder1 + "\n"
+        if not check_file_exists(folder2 + "/" + generated_output_rst):
+            expected_files += "- " + generated_output_rst + \
+                " doesn't exist in folder " + folder2 + "\n"
+        if not check_file_exists(folder1 + "/" + generated_output_csv):
+            expected_files += "- " + generated_output_csv + \
+                " doesn't exist in folder " + folder1 + "\n"
+        if not check_file_exists(folder2 + "/" + generated_output_csv):
+            expected_files += "- " + generated_output_csv + \
+                " doesn't exist in folder " + folder2 + "\n"
+
+        if expected_files != "":
+            print("Expected files are missing", file=sys.stderr)
+            return_md += "\n### " + lang + "\n\n#### Expected files are missing for " + \
+                lang + "\n" + expected_files + "\n"
+            continue
+
+        # compare contents of files
+        cmp1 = compare_files_str(
+            folder1 + "/" + generated_output_rst, folder2 + "/" + generated_output_rst)
+        cmp2 = compare_files_str(
+            folder1 + "/" + generated_output_csv, folder2 + "/" + generated_output_csv)
+
+        if cmp1 != "" or cmp2 != "":
+            print("Generated file contents are not matching", file=sys.stderr)
+            return_md += "\n### " + lang + "\n\n#### Generated file changes for " + \
+                lang + "\n\n"
+            if cmp1 != "":
+                return_md += "- Changes to " + generated_output_rst + \
+                    ":\n```diff\n" + cmp1 + "```\n\n"
+            if cmp2 != "":
+                return_md += "- Changes to " + generated_output_csv + \
+                    ":\n```diff\n" + cmp2 + "```\n\n"
+
+    with open(output_file, 'w', newline='') as out:
+        out.write(return_md)
+
+
+comment_pr(sys.argv[1], sys.argv[2], sys.argv[3],
+           sys.argv[4], sys.argv[5], sys.argv[6])
+
+
+# def compare_generated_and_repo_files():
+#     languages = ['java']
+
+#     all_ok = True
+
+#     for lang in languages:
+#         repo_output_rst = settings.repo_output_rst.format(language=lang)
+#         repo_output_csv = settings.repo_output_csv.format(language=lang)
+
+#         generated_output_rst = settings.generated_output_rst.format(
+#             language=lang)
+#         generated_output_csv = settings.generated_output_csv.format(
+#             language=lang)
+
+#         exists = check_file_exists(repo_output_rst)
+#         if not exists:
+#             sys.exit(1)
+
+#         exists = check_file_exists(repo_output_csv)
+#         if not exists:
+#             sys.exit(1)
+
+#         exists = check_file_exists(generated_output_rst)
+#         if not exists:
+#             sys.exit(1)
+
+#         exists = check_file_exists(generated_output_csv)
+#         if not exists:
+#             sys.exit(1)
+
+#         docs_folder = settings.documentation_folder_no_prefix.format(
+#             language=lang)
+
+#         rst_ok = compare_files(repo_output_rst, generated_output_rst)
+#         if not rst_ok:
+#             print("The generated file doesn't match the one in the codebase. Please check and fix file '" +
+#                   docs_folder + settings.output_rst_file_name + "'.", file=sys.stderr)
+#         csv_ok = compare_files(repo_output_csv, generated_output_csv)
+#         if not csv_ok:
+#             print("The generated file doesn't match the one in the codebase. Please check and fix file '" +
+#                   docs_folder + settings.output_csv_file_name + "'.", file=sys.stderr)
+
+#         if not rst_ok or not csv_ok:
+#             print("The generated CSV coverage report files for '" + lang + "' don't match the ones in the codebase. Please update the files in '" +
+#                   docs_folder + "'. The new files can be downloaded from the artifacts of this job.", file=sys.stderr)
+#             all_ok = False
+#         else:
+#             print("The generated files for '" + lang +
+#                   "' match the ones in the codebase.")
+
+#     if not all_ok:
+#         sys.exit(1)
--- a/misc/scripts/library-coverage/compare-files.py
+++ b/misc/scripts/library-coverage/compare-files.py
@@ -1,69 +0,0 @@
-import sys
-import os
-import settings
-import difflib
-
-"""
-This script compares the generated CSV coverage files with the ones in the codebase.
-"""
-
-
-def check_file_exists(file):
-    if not os.path.exists(file):
-        print("Expected file '" + file + "' doesn't exist.", file=sys.stderr)
-        sys.exit(1)
-
-
-def ignore_line_ending(ch):
-    return difflib.IS_CHARACTER_JUNK(ch, ws=" \r\n")
-
-
-def compare_files(file1, file2, path_to_report):
-    has_differences = False
-    diff = difflib.ndiff(open(file1).readlines(),
-                         open(file2).readlines(), None, ignore_line_ending)
-    for line in diff:
-        if line.startswith("+") or line.startswith("-"):
-            print(line, end="", file=sys.stderr)
-            has_differences = True
-
-    if has_differences:
-        print("The generated file doesn't match the one in the codebase. Please check and fix file '" +
-              path_to_report + "'.", file=sys.stderr)
-        return False
-    return True
-
-
-languages = ['java']
-
-all_ok = True
-
-for lang in languages:
-    repo_output_rst = settings.repo_output_rst.format(language=lang)
-    repo_output_csv = settings.repo_output_csv.format(language=lang)
-
-    generated_output_rst = settings.generated_output_rst.format(language=lang)
-    generated_output_csv = settings.generated_output_csv.format(language=lang)
-
-    check_file_exists(repo_output_rst)
-    check_file_exists(repo_output_csv)
-    check_file_exists(generated_output_rst)
-    check_file_exists(generated_output_csv)
-
-    docs_folder = settings.documentation_folder_no_prefix.format(language=lang)
-
-    rst_ok = compare_files(repo_output_rst, generated_output_rst,
-                           docs_folder + settings.output_rst_file_name)
-    csv_ok = compare_files(repo_output_csv, generated_output_csv,
-                           docs_folder + settings.output_csv_file_name)
-
-    if not rst_ok or not csv_ok:
-        print("The generated CSV coverage report files for '" + lang + "' don't match the ones in the codebase. Please update the files in '" +
-              docs_folder + "'. The new files can be downloaded from the artifacts of this job.", file=sys.stderr)
-        all_ok = False
-    else:
-        print("The generated files for '" + lang +
-              "' match the ones in the codebase.")
-
-if not all_ok:
-    sys.exit(1)
--- a/misc/scripts/library-coverage/utils.py
+++ b/misc/scripts/library-coverage/utils.py
@@ -2,10 +2,12 @@ import subprocess
 import os
 import csv
 import sys
+import shlex


 def subprocess_run(cmd):
    """Runs a command through subprocess.run, with a few tweaks. Raises an Exception if exit code != 0."""
+    print(shlex.join(cmd))
    return subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy(), check=True)