Add csv status aggregate tool

This commit is contained in:
Kristen Newbury
2022-11-15 10:18:12 -05:00
parent d9bdcc8724
commit 678219beb7
2 changed files with 99 additions and 1 deletions

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""Run `sarif-create-aggregate-report` over csvs in the directories produced by
./sarif-extract-scans-(runner)
...
and creates the summary file as named by the arg
"""
import argparse
import os
import sys
import pandas as pd
import csv
from sarif_cli import status_writer
#
# Handle arguments
#
parser = argparse.ArgumentParser(description='Run sarif-extract-scans over a directory hierarchy')
parser.add_argument('sarif_files', metavar='sarif-files', type=str,
help='File containing list of sarif files that were processed, use - for stdin')
parser.add_argument('-s','--summary_filename', metavar='summary-filename', type=str, default="summary-report.csv",
help='Filename for final summary report')
parser.add_argument('-in', '--in-dir', metavar='input-dir', type=str, default="",
help='Directory containing input set of results (corresponds to --outdir on the runner if supplied')
args = parser.parse_args()
#
# only warn if specified outfile exists, still use
#
if os.path.exists(args.summary_filename):
print("Summary file provided exists, warning, overwriting.")
#
# Collect sarif file information
#
with open(args.sarif_files, 'r') if args.sarif_files != '-' else sys.stdin as fp:
paths = fp.readlines()
#
# If specific input dir specified - format that
#
if args.in_dir != "":
args.in_dir+="/"
#
# Traverse all possible individual summary csv containing directory
#
number_processed= 0
data = []
for path in paths:
path = path.rstrip()
project, component = path.split('/')
#
# Validate input data directory and content
#
csv_infile = os.path.join(args.in_dir+project, component + ".csv")
if not os.path.exists(csv_infile):
continue
else:
number_processed+=1
data.append(pd.read_csv(csv_infile))
all = pd.concat(data)
final_counts = [0]*(status_writer.STATUS_NUM+1)
for i in range(status_writer.STATUS_NUM+1):
try:
final_counts[i]=all['levelcode'].value_counts()[i]
except KeyError: pass
header = ['number_processed', 'number_successfully_created', 'number_zero_results',
"number_input_sarif_missing", "number_file_load_error", "number_input_sarif_extra", "number_unknown_sarif_parsing_shape",
"number_unknown" ]
final_counts.insert(0, number_processed)
with open(args.summary_filename, 'w') as f:
csv_writer = csv.writer(f)
csv_writer.writerow(header)
csv_writer.writerow(final_counts)

View File

@@ -1,7 +1,9 @@
# csv status reporting
import csv
fieldnames = ['sarif_file', 'level', 'message', "extra_info"]
STATUS_NUM = 6
fieldnames = ['sarif_file', 'level', 'levelcode', 'message', "extra_info"]
warning_set = {
"success" : 0,
@@ -55,18 +57,21 @@ def setup_status_filenames(sarif_file_name):
success = {
"sarif_file": "",
"level": "SUCCESS",
"levelcode": 0,
"message": "File successfully processed."
}
zero_results = {
"sarif_file": "",
"level": "WARNING",
"levelcode": 1,
"message": "Zero results seen in sarif file."
}
input_sarif_missing = {
"sarif_file": "",
"level": "WARNING",
"levelcode": 2,
"message": "Input sarif is missing neccesary properties.",
"extra_info" : "Missing: "
}
@@ -75,18 +80,21 @@ input_sarif_missing = {
file_load_error = {
"file": "",
"level": "ERROR",
"levelcode": 3,
"message": "Could not load file."
}
input_sarif_extra = {
"sarif_file": "",
"level": "ERROR",
"levelcode": 4,
"message": "Input sarif contains extra unneccesary properties."
}
unknown_sarif_parsing_shape = {
"sarif_file": "",
"level": "ERROR",
"levelcode": 5,
"message": "Error matching expected sarif format to actual input sarif shape.",
"extra_info" : ""
}
@@ -94,5 +102,6 @@ unknown_sarif_parsing_shape = {
unknown = {
"sarif_file": "",
"level": "ERROR",
"levelcode": 6,
"message": "Error details currently undiagnosed. Assess log file for more information."
}