Add csv status aggregate tool

2025-12-16 01:13:03 +01:00 · 2022-11-15 10:18:12 -05:00
parent d9bdcc8724
commit 678219beb7
2 changed files with 99 additions and 1 deletions
--- a/bin/sarif-create-aggregate-report
+++ b/bin/sarif-create-aggregate-report
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+
+"""Run `sarif-create-aggregate-report` over csvs in the directories produced by
+   ./sarif-extract-scans-(runner)
+    ...
+
+    and creates the summary file as named by the arg
+"""
+
+import argparse
+import os
+import sys
+import pandas as pd
+import csv
+from sarif_cli import status_writer
+
+#
+# Handle arguments
+#
+parser = argparse.ArgumentParser(description='Run sarif-extract-scans over a directory hierarchy')
+
+parser.add_argument('sarif_files', metavar='sarif-files', type=str,
+                    help='File containing list of sarif files that were processed, use - for stdin') 
+
+parser.add_argument('-s','--summary_filename', metavar='summary-filename', type=str, default="summary-report.csv",
+                    help='Filename for final summary report') 
+
+parser.add_argument('-in', '--in-dir', metavar='input-dir', type=str, default="",
+                    help='Directory containing input set of results (corresponds to --outdir on the runner if supplied')
+
+args = parser.parse_args()
+
+#
+# only warn if specified outfile exists, still use
+#
+if os.path.exists(args.summary_filename):
+    print("Summary file provided exists, warning, overwriting.")
+      
+#
+# Collect sarif file information
+# 
+with open(args.sarif_files, 'r') if args.sarif_files != '-' else sys.stdin as fp: 
+    paths = fp.readlines()
+
+#
+# If specific input dir specified - format that
+# 
+if args.in_dir != "":
+    args.in_dir+="/"
+
+#
+# Traverse all possible individual summary csv containing directory
+# 
+number_processed= 0
+data = []
+
+for path in paths:
+    path = path.rstrip()
+    project, component = path.split('/')
+    #
+    # Validate input data directory and content
+    #
+    csv_infile = os.path.join(args.in_dir+project, component + ".csv")
+    if not os.path.exists(csv_infile):
+        continue
+    else:
+        number_processed+=1
+        data.append(pd.read_csv(csv_infile))
+        
+all = pd.concat(data)
+
+final_counts = [0]*(status_writer.STATUS_NUM+1)
+
+for i in range(status_writer.STATUS_NUM+1):
+    try:
+        final_counts[i]=all['levelcode'].value_counts()[i]
+    except KeyError: pass
+
+header = ['number_processed', 'number_successfully_created', 'number_zero_results', 
+"number_input_sarif_missing", "number_file_load_error", "number_input_sarif_extra", "number_unknown_sarif_parsing_shape",
+"number_unknown" ]
+
+final_counts.insert(0, number_processed)
+
+with open(args.summary_filename, 'w') as f:
+    csv_writer = csv.writer(f)
+    csv_writer.writerow(header)
+    csv_writer.writerow(final_counts)
+
--- a/sarif_cli/status_writer.py
+++ b/sarif_cli/status_writer.py
@@ -1,7 +1,9 @@
 # csv status reporting
 import csv

-fieldnames = ['sarif_file', 'level', 'message', "extra_info"]
+STATUS_NUM = 6
+
+fieldnames = ['sarif_file', 'level', 'levelcode', 'message', "extra_info"]

 warning_set = {
  "success" : 0,
@@ -55,18 +57,21 @@ def setup_status_filenames(sarif_file_name):
 success = {
  "sarif_file": "",
  "level": "SUCCESS",
+  "levelcode": 0,
  "message": "File successfully processed."
 }

 zero_results = {
  "sarif_file": "",
  "level": "WARNING",
+  "levelcode": 1,
  "message": "Zero results seen in sarif file."
 }

 input_sarif_missing = {
  "sarif_file": "",
  "level": "WARNING",
+  "levelcode": 2,
  "message": "Input sarif is missing neccesary properties.",
  "extra_info" : "Missing: "
 }
@@ -75,18 +80,21 @@ input_sarif_missing = {
 file_load_error = {
  "file": "",
  "level": "ERROR",
+  "levelcode": 3,
  "message": "Could not load file."
 }

 input_sarif_extra  = {
  "sarif_file": "",
  "level": "ERROR",
+  "levelcode": 4,
  "message": "Input sarif contains extra unneccesary properties."
 }

 unknown_sarif_parsing_shape = {
  "sarif_file": "",
  "level": "ERROR",
+  "levelcode": 5,
  "message": "Error matching expected sarif format to actual input sarif shape.",
  "extra_info" : ""
 }
@@ -94,5 +102,6 @@ unknown_sarif_parsing_shape = {
 unknown = {
  "sarif_file": "",
  "level": "ERROR",
+  "levelcode": 6,
  "message": "Error details currently undiagnosed. Assess log file for more information."
 }