Fix runner defaults and setup more options

sarif-extract-scans-runner now takes specific outer
output dir
bin/sarif-aggregate-scans now takes specific directory
to summarize from
This commit is contained in:
Kristen Newbury
2022-11-14 14:30:55 -05:00
parent 066fcb8248
commit d9bdcc8724
2 changed files with 35 additions and 7 deletions

View File

@@ -33,7 +33,10 @@ parser.add_argument('sarif_files', metavar='sarif-files', type=str,
parser.add_argument('aggregate_dir', metavar='aggregate-dir', type=str,
help='Directory for writing the combined scan tables')
parser.add_argument('-m', '--max-files', metavar='M', type=int, default=100,
parser.add_argument('-in', '--in-dir', metavar='input-dir', type=str, default="",
help='Directory containing input set of results (corresponds to --outdir on the runner if supplied')
parser.add_argument('-m', '--max-files', metavar='M', type=int, default=100000,
help='Maximum number of files to process.'
' Default: %(default)d')
@@ -105,6 +108,12 @@ def _all_csv_files_exist(output_dir):
try: os.mkdir(args.aggregate_dir, mode=0o755)
except FileExistsError: pass
#
# If specific input dir specified - format that
#
if args.in_dir != "":
args.in_dir+="/"
#
# Collect sarif file information
#
@@ -126,7 +135,7 @@ for path in paths:
#
# Validate input data directory and content
#
output_dir = os.path.join(project, component + ".scantables")
output_dir = os.path.join(args.in_dir+project, component + ".scantables")
if not os.path.exists(output_dir):
continue
if not _all_csv_files_exist(output_dir):

View File

@@ -87,8 +87,9 @@ from sarif_cli import hash
parser = argparse.ArgumentParser(description='Run sarif-extract-scans over a directory hierarchy')
parser.add_argument('sarif_files', metavar='sarif-files', type=str, help='File containing list of sarif files, use - for stdin')
parser.add_argument('-o','--outdir', metavar='output-dir', type=str, default="", help='output directory')
parser.add_argument('-m', '--max-files', metavar='number', type=int, default=100,
parser.add_argument('-m', '--max-files', metavar='number', type=int, default=100000,
help='Maximum number of files to process.'
' Default: %(default)d')
@@ -114,6 +115,17 @@ if len(sys.argv) == 2 and sys.argv[1] == '--doc':
args = parser.parse_args()
#
# Create outermost output directory (like <outer_dir>/*/*.scantables)
#
outer_dir = args.outdir
if outer_dir != "":
outer_dir+="/"
try:
os.mkdir(outer_dir, mode=0o755)
except FileExistsError:
pass
#
# Collect sarif file information
#
@@ -156,14 +168,21 @@ for path in paths:
"sarif_file_name": path, # pd.StringDtype()
}
scan_spec_file = os.path.join(project, component + ".scanspec")
#
# If using outermost output directory, create project directory:
# (like <outer_dir>/<project>/*.scantables)
#
try: os.mkdir(outer_dir+ project, mode=0o755)
except FileExistsError: pass
scan_spec_file = os.path.join(outer_dir+ project, component + ".scanspec")
with open(scan_spec_file, 'w') as fp:
json.dump(scan_spec, fp)
#
# Table output directory
#
output_dir = os.path.join(project, component + ".scantables")
output_dir = os.path.join(outer_dir+ project, component + ".scantables")
try: os.mkdir(output_dir, mode=0o755)
except FileExistsError: pass
#
@@ -184,8 +203,8 @@ for path in paths:
with open(args.successful_runs, 'wb') as outfile:
pickle.dump(successful_runs, outfile)
scan_log_file = os.path.join(project, component + ".scanlog")
csv_outfile = os.path.join(project, component)
scan_log_file = os.path.join(outer_dir+ project, component + ".scanlog")
csv_outfile = os.path.join(outer_dir+ project, component)
runstats = subprocess.run(['sarif-extract-scans', scan_spec_file, output_dir, csv_outfile],
capture_output=True, text=True)
if runstats.returncode == 0: