mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 17:23:03 +01:00
Add error handling csv writer
writer generates status csv per sarif
This commit is contained in:
@@ -5,6 +5,7 @@ from dataclasses import dataclass
|
|||||||
from sarif_cli import signature, signature_single
|
from sarif_cli import signature, signature_single
|
||||||
from sarif_cli import typegraph
|
from sarif_cli import typegraph
|
||||||
from sarif_cli import snowflake_id
|
from sarif_cli import snowflake_id
|
||||||
|
from sarif_cli import status_writer
|
||||||
import argparse
|
import argparse
|
||||||
import csv
|
import csv
|
||||||
import dataclasses as dc
|
import dataclasses as dc
|
||||||
@@ -28,10 +29,14 @@ parser = argparse.ArgumentParser(description='Read a collection of sarif files a
|
|||||||
parser.add_argument('file', metavar='scan-spec.json', type=str,
|
parser.add_argument('file', metavar='scan-spec.json', type=str,
|
||||||
help="json file containing required external scan information.")
|
help="json file containing required external scan information.")
|
||||||
parser.add_argument('outdir', metavar='output-dir', type=str, help='output directory')
|
parser.add_argument('outdir', metavar='output-dir', type=str, help='output directory')
|
||||||
|
parser.add_argument('csvout', metavar='csv-outfile', type=str, help='processing status csv output file name to use')
|
||||||
parser.add_argument('-r', '--write-raw-tables', action="store_true",
|
parser.add_argument('-r', '--write-raw-tables', action="store_true",
|
||||||
help='Write the raw sarif tables to the output directory')
|
help='Write the raw sarif tables to the output directory')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Setup csv error writer
|
||||||
|
status_writer.setup_csv_writer(args.csvout)
|
||||||
|
|
||||||
# Load meta info
|
# Load meta info
|
||||||
def load(fname):
|
def load(fname):
|
||||||
with open(fname, 'rb') if fname != '-' else sys.stdin as fp:
|
with open(fname, 'rb') if fname != '-' else sys.stdin as fp:
|
||||||
@@ -40,12 +45,15 @@ def load(fname):
|
|||||||
except json.decoder.JSONDecodeError as err:
|
except json.decoder.JSONDecodeError as err:
|
||||||
# TODO knewbury error handling
|
# TODO knewbury error handling
|
||||||
logging.error('Error reading from {}: {}: line {}, column {}'
|
logging.error('Error reading from {}: {}: line {}, column {}'
|
||||||
.format(args.file, err.msg, err.lineno, err.colno))
|
.format(fname, err.msg, err.lineno, err.colno))
|
||||||
|
status_writer.file_load_error["file"] = fname
|
||||||
|
status_writer.csv_write(status_writer.file_load_error)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
return content
|
return content
|
||||||
|
|
||||||
scan_spec = load(args.file)
|
scan_spec = load(args.file)
|
||||||
sarif_struct = load(scan_spec['sarif_file_name'])
|
sarif_struct = load(scan_spec['sarif_file_name'])
|
||||||
|
status_writer.setup_status_filenames(scan_spec['sarif_file_name'])
|
||||||
|
|
||||||
#
|
#
|
||||||
# Preprocess raw SARIF to get smaller signature
|
# Preprocess raw SARIF to get smaller signature
|
||||||
@@ -71,6 +79,8 @@ sarif_struct = signature.fillsig(args, sarif_struct, context)
|
|||||||
# sys.exit(1)
|
# sys.exit(1)
|
||||||
tgraph = typegraph.Typegraph(signature_single.struct_graph_2022_02_01)
|
tgraph = typegraph.Typegraph(signature_single.struct_graph_2022_02_01)
|
||||||
typegraph.destructure(tgraph, signature_single.start_node_2022_02_01, sarif_struct)
|
typegraph.destructure(tgraph, signature_single.start_node_2022_02_01, sarif_struct)
|
||||||
|
# may have gathered warnings below, if not does nothing
|
||||||
|
status_writer.csv_write_warnings()
|
||||||
|
|
||||||
#
|
#
|
||||||
# Form output tables
|
# Form output tables
|
||||||
@@ -237,3 +247,5 @@ if args.write_raw_tables:
|
|||||||
_write_dataframes_of(scantabs)
|
_write_dataframes_of(scantabs)
|
||||||
|
|
||||||
write('codeflows', bt.codeflows)
|
write('codeflows', bt.codeflows)
|
||||||
|
status_writer.warning_set["success"]+=1
|
||||||
|
status_writer.csv_write_warnings()
|
||||||
@@ -185,7 +185,8 @@ for path in paths:
|
|||||||
pickle.dump(successful_runs, outfile)
|
pickle.dump(successful_runs, outfile)
|
||||||
|
|
||||||
scan_log_file = os.path.join(project, component + ".scanlog")
|
scan_log_file = os.path.join(project, component + ".scanlog")
|
||||||
runstats = subprocess.run(['sarif-extract-scans', scan_spec_file, output_dir],
|
csv_outfile = os.path.join(project, component)
|
||||||
|
runstats = subprocess.run(['sarif-extract-scans', scan_spec_file, output_dir, csv_outfile],
|
||||||
capture_output=True, text=True)
|
capture_output=True, text=True)
|
||||||
if runstats.returncode == 0:
|
if runstats.returncode == 0:
|
||||||
print("{:6} {}".format("OK", path))
|
print("{:6} {}".format("OK", path))
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ import logging
|
|||||||
import numpy
|
import numpy
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
from sarif_cli import hash
|
from sarif_cli import hash
|
||||||
|
from sarif_cli import status_writer
|
||||||
|
|
||||||
class ZeroResults(Exception):
|
class ZeroResults(Exception):
|
||||||
pass
|
pass
|
||||||
@@ -168,6 +168,7 @@ def joins_for_results(basetables, external_info):
|
|||||||
# TODO knewbury to error handling
|
# TODO knewbury to error handling
|
||||||
logging.warning("Zero problem/path_problem results found in sarif "
|
logging.warning("Zero problem/path_problem results found in sarif "
|
||||||
"file but processing anyway.")
|
"file but processing anyway.")
|
||||||
|
status_writer.csv_write(status_writer.zero_results)
|
||||||
res = tables[0]
|
res = tables[0]
|
||||||
|
|
||||||
# Force all column types to ensure appropriate formatting
|
# Force all column types to ensure appropriate formatting
|
||||||
|
|||||||
98
sarif_cli/status_writer.py
Normal file
98
sarif_cli/status_writer.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
# csv status reporting
|
||||||
|
import csv
|
||||||
|
|
||||||
|
fieldnames = ['sarif_file', 'level', 'message', "extra_info"]
|
||||||
|
|
||||||
|
warning_set = {
|
||||||
|
"success" : 0,
|
||||||
|
"zero_results" : 0,
|
||||||
|
"input_sarif_missing" : 0
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Setup csv status writer
|
||||||
|
#
|
||||||
|
def setup_csv_writer(filename):
|
||||||
|
with open(filename+'.csv', 'w', newline='') as file:
|
||||||
|
# global in module as singleton alt
|
||||||
|
global global_filename
|
||||||
|
global_filename = filename
|
||||||
|
csv_writer = csv.DictWriter(file, fieldnames)
|
||||||
|
csv_writer.writeheader()
|
||||||
|
|
||||||
|
#
|
||||||
|
# csv status write - one line for errors
|
||||||
|
#
|
||||||
|
def csv_write(data):
|
||||||
|
with open(global_filename+'.csv', 'a', newline='') as file:
|
||||||
|
csv_writer = csv.DictWriter(file, fieldnames)
|
||||||
|
csv_writer.writerow(data)
|
||||||
|
|
||||||
|
#
|
||||||
|
# csv status write - all at once for type of warnings that can
|
||||||
|
# happen multiple times
|
||||||
|
# and want success message last
|
||||||
|
#
|
||||||
|
def csv_write_warnings():
|
||||||
|
with open(global_filename+'.csv', 'a', newline='') as file:
|
||||||
|
csv_writer = csv.DictWriter(file, fieldnames)
|
||||||
|
if warning_set["input_sarif_missing"] != 0:
|
||||||
|
csv_writer.writerow(input_sarif_missing)
|
||||||
|
#reset in case later different types of warnings can be accumulated
|
||||||
|
input_sarif_missing["extra_info"] = "Missing: "
|
||||||
|
warning_set["input_sarif_missing"] = 0
|
||||||
|
if warning_set["success"] != 0:
|
||||||
|
csv_writer.writerow(success)
|
||||||
|
|
||||||
|
def setup_status_filenames(sarif_file_name):
|
||||||
|
success["sarif_file"] = sarif_file_name
|
||||||
|
zero_results["sarif_file"] = sarif_file_name
|
||||||
|
input_sarif_extra["sarif_file"] = sarif_file_name
|
||||||
|
input_sarif_missing["sarif_file"] = sarif_file_name
|
||||||
|
unknown_sarif_parsing_shape["sarif_file"] = sarif_file_name
|
||||||
|
unknown["sarif_file"] = sarif_file_name
|
||||||
|
|
||||||
|
success = {
|
||||||
|
"sarif_file": "",
|
||||||
|
"level": "SUCCESS",
|
||||||
|
"message": "File successfully processed."
|
||||||
|
}
|
||||||
|
|
||||||
|
zero_results = {
|
||||||
|
"sarif_file": "",
|
||||||
|
"level": "WARNING",
|
||||||
|
"message": "Zero results seen in sarif file."
|
||||||
|
}
|
||||||
|
|
||||||
|
input_sarif_missing = {
|
||||||
|
"sarif_file": "",
|
||||||
|
"level": "WARNING",
|
||||||
|
"message": "Input sarif is missing neccesary properties.",
|
||||||
|
"extra_info" : "Missing: "
|
||||||
|
}
|
||||||
|
|
||||||
|
# file load error can happen on either sarif file or scan-spec.json
|
||||||
|
file_load_error = {
|
||||||
|
"file": "",
|
||||||
|
"level": "ERROR",
|
||||||
|
"message": "Could not load file."
|
||||||
|
}
|
||||||
|
|
||||||
|
input_sarif_extra = {
|
||||||
|
"sarif_file": "",
|
||||||
|
"level": "ERROR",
|
||||||
|
"message": "Input sarif contains extra unneccesary properties."
|
||||||
|
}
|
||||||
|
|
||||||
|
unknown_sarif_parsing_shape = {
|
||||||
|
"sarif_file": "",
|
||||||
|
"level": "ERROR",
|
||||||
|
"message": "Error matching expected sarif format to actual input sarif shape.",
|
||||||
|
"extra_info" : ""
|
||||||
|
}
|
||||||
|
|
||||||
|
unknown = {
|
||||||
|
"sarif_file": "",
|
||||||
|
"level": "ERROR",
|
||||||
|
"message": "Error details currently undiagnosed. Assess log file for more information."
|
||||||
|
}
|
||||||
@@ -11,6 +11,7 @@ from dataclasses import dataclass
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, List, Tuple, Union
|
from typing import Any, Dict, List, Tuple, Union
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from sarif_cli import status_writer
|
||||||
|
|
||||||
#
|
#
|
||||||
# Utility classes
|
# Utility classes
|
||||||
@@ -112,6 +113,7 @@ def destructure(typegraph: Typegraph, node: NodeId, tree: Tree):
|
|||||||
elif t in [str, int, bool]:
|
elif t in [str, int, bool]:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
# TODO knewbury error handling
|
||||||
raise Exception("Unhandled type: %s" % t)
|
raise Exception("Unhandled type: %s" % t)
|
||||||
|
|
||||||
def _destructure_dict_1(typegraph, node, tree):
|
def _destructure_dict_1(typegraph, node, tree):
|
||||||
@@ -137,6 +139,7 @@ def _destructure_dict_1(typegraph, node, tree):
|
|||||||
# Sanity check
|
# Sanity check
|
||||||
sig = typegraph.signature_graph[node]
|
sig = typegraph.signature_graph[node]
|
||||||
if type(sig) != tuple:
|
if type(sig) != tuple:
|
||||||
|
# TODO knewbury error handling
|
||||||
raise SignatureMismatch()
|
raise SignatureMismatch()
|
||||||
|
|
||||||
# Destructure this dictionary
|
# Destructure this dictionary
|
||||||
@@ -157,7 +160,7 @@ def _destructure_dict(typegraph: Typegraph, node, tree):
|
|||||||
type_fields = typegraph.fields[node]
|
type_fields = typegraph.fields[node]
|
||||||
if tree_fields == type_fields:
|
if tree_fields == type_fields:
|
||||||
_destructure_dict_1(typegraph, node, tree)
|
_destructure_dict_1(typegraph, node, tree)
|
||||||
|
# TODO knewbury error handling here
|
||||||
elif set(tree_fields).issuperset(set(type_fields)):
|
elif set(tree_fields).issuperset(set(type_fields)):
|
||||||
# Log a warning
|
# Log a warning
|
||||||
# log.warning("XX: Tree has unrecognized fields")
|
# log.warning("XX: Tree has unrecognized fields")
|
||||||
@@ -165,9 +168,15 @@ def _destructure_dict(typegraph: Typegraph, node, tree):
|
|||||||
'known entries: {}'.format(tree))
|
'known entries: {}'.format(tree))
|
||||||
logging.warning('tree fields: {}'.format(sorted(tree_fields)))
|
logging.warning('tree fields: {}'.format(sorted(tree_fields)))
|
||||||
logging.warning('type fields: {}'.format(sorted(type_fields)))
|
logging.warning('type fields: {}'.format(sorted(type_fields)))
|
||||||
|
status_writer.csv_write(status_writer.input_sarif_extra)
|
||||||
_destructure_dict_1(typegraph, node, tree)
|
_destructure_dict_1(typegraph, node, tree)
|
||||||
|
|
||||||
elif set(tree_fields).issubset(set(type_fields)):
|
elif set(tree_fields).issubset(set(type_fields)):
|
||||||
|
# create a string list of the missing expected properties from the sarif
|
||||||
|
specific_missing = f"{set(type_fields) - set(tree_fields)}, "
|
||||||
|
if specific_missing not in status_writer.input_sarif_missing["extra_info"]:
|
||||||
|
status_writer.input_sarif_missing["extra_info"] += specific_missing
|
||||||
|
status_writer.warning_set["input_sarif_missing"]+=1
|
||||||
raise MissingFieldException(
|
raise MissingFieldException(
|
||||||
f"(Sub)tree is missing fields required by typedef.\n"
|
f"(Sub)tree is missing fields required by typedef.\n"
|
||||||
f"Expected {type_fields}, found {tree_fields}.\n"
|
f"Expected {type_fields}, found {tree_fields}.\n"
|
||||||
@@ -177,6 +186,9 @@ def _destructure_dict(typegraph: Typegraph, node, tree):
|
|||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
# TODO knewbury error handling
|
||||||
|
status_writer.unknown_sarif_parsing_shape["extra_info"] = "type fields {} do not match tree fields {}.".format(type_fields, tree_fields)
|
||||||
|
status_writer.csv_write(status_writer.unknown_sarif_parsing_shape)
|
||||||
raise Exception("typegraph: unhandled case reached: cannot match type "
|
raise Exception("typegraph: unhandled case reached: cannot match type "
|
||||||
"fields {} to tree fields {}. Data is invalid."
|
"fields {} to tree fields {}. Data is invalid."
|
||||||
.format(type_fields, tree_fields))
|
.format(type_fields, tree_fields))
|
||||||
@@ -243,6 +255,7 @@ def _destructure_list(typegraph, node: str, tree: List):
|
|||||||
id(value)))
|
id(value)))
|
||||||
# Next `value` on success
|
# Next `value` on success
|
||||||
break
|
break
|
||||||
|
# status reporting under this handled already in each case
|
||||||
except MissingFieldException:
|
except MissingFieldException:
|
||||||
# Re-raise if last available signature failed, otherwise try
|
# Re-raise if last available signature failed, otherwise try
|
||||||
# next `signature`
|
# next `signature`
|
||||||
|
|||||||
Reference in New Issue
Block a user