#!/usr/bin/env python3 """Run `sarif-extract-scans` over a directory hierarchy of the form organization/project-sarif.json org2/proj2.sarif ... mirroring the github org/project structure. The list of sarif files to ingest is in the file given by the sarif-files argument, a list of paths of the form / sarif-extract-scans-runner creates these files: - successful_runs -- optional, one file. Track saved file status and only re-run failed attempts - org*/project*.scanspec -- one scanspec file per org/project listed in sarif-files. Required by `sarif-extract-scans` - org*/project*.scanlog -- failures from 'sarif-extract-scans' are logged here It also creates the directories - org*/project*.scantables -- one directory per org/project, each holding the tables produced by `sarif-extract-scans`: ├── codeflows.csv ├── projects.csv ├── results.csv └── scans.csv As example: cd ../data/treeio sarif-extract-scans-runner -i1 -s successful-runs - </*/*.scantables) # outer_dir = args.outdir if outer_dir != "": outer_dir+="/" try: os.mkdir(outer_dir, mode=0o755) except FileExistsError: pass if args.input_signature not in ["LGTM","CLI"]: print("Unsupported sarif signature requested.") print("Use one of [LGTM, CLI].") sys.exit(0) # # Collect sarif file information # with open(args.sarif_files, 'r') if args.sarif_files != '-' else sys.stdin as fp: paths = fp.readlines() # Use saved status, only re-run failed attempts use_successful_runs = args.successful_runs != "" if use_successful_runs: if os.path.exists(args.successful_runs): with open(args.successful_runs, "rb") as infile: successful_runs = pickle.load(infile) else: successful_runs = set() count = -1 for path_timestamp in paths: if args.with_timestamps: path, t1 = path_timestamp.split(',') timestamp_fname = t1.strip() else: path = path_timestamp count += 1 if count > args.max_files: break # # Paths and components # path = path.rstrip() # # Scan specification # # scan id as hash of sarif file contents with open(path, 'rb') as f: data = f.read() scan_id = hash.hash_unique(data) if args.with_timestamps: scan_spec = { "scan_id": scan_id, # pd.Int64Dtype() "sarif_file_name": path, # pd.StringDtype() "timestamp_file_name": timestamp_fname } else: scan_spec = { "scan_id": scan_id, # pd.Int64Dtype() "sarif_file_name": path, # pd.StringDtype() } # # If using outermost output directory, create project directory: # (like //*.scantables) # try: os.mkdir(outer_dir+ path, mode=0o755) except FileExistsError: pass scan_spec_file = os.path.join(outer_dir+ path + ".scanspec") with open(scan_spec_file, 'w') as fp: json.dump(scan_spec, fp) # # Table output directory # output_dir = os.path.join(outer_dir+ path + ".scantables") try: os.mkdir(output_dir, mode=0o755) except FileExistsError: pass # # Run sarif-extract-scans # if use_successful_runs: if path in successful_runs: # Don't rerun continue # Some timing information if count % args.update_interval == 0: print("{:6} {}".format("DATE", datetime.now().isoformat())) # Save occasionally if count % args.update_interval == 0: if use_successful_runs: with open(args.successful_runs, 'wb') as outfile: pickle.dump(successful_runs, outfile) scan_log_file = os.path.join(outer_dir+ path + ".scanlog") csv_outfile = os.path.join(outer_dir+ path) if args.with_timestamps: timestamp_options = ['--with-timestamps'] else: timestamp_options = [] # XX: runstats = subprocess.run(['sarif-extract-scans', scan_spec_file, output_dir, csv_outfile, "-f", args.input_signature], capture_output=True, text=True) if runstats.returncode == 0: print("{:6} {}".format("OK", path)) if use_successful_runs: successful_runs.add(path) else: print("{:6} {} {}".format("FAIL", path, scan_log_file)) # log error with open(scan_log_file, 'w') as fp: fp.write(runstats.stderr) # show command for manual re-run cmd = [ "sarif-extract-scans", scan_spec_file, output_dir, csv_outfile, "-f", args.input_signature, ] print("{:6} {}".format("", "Command was:")) print("{:6} {}".format("", " ".join(cmd))) # report only tail of stderr print("{:6} {}".format("", "Error tail:")) if runstats.stderr: for line in runstats.stderr.splitlines()[-6:]: print("{:6} {}".format("", line)) if use_successful_runs: with open(args.successful_runs, 'wb') as outfile: pickle.dump(successful_runs, outfile)