mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 01:13:03 +01:00
Add support for external timestamps
This allows external files containing
timestamps = {
"db_create_start" : pd.Timestamp(0.0, unit='s'),
"db_create_stop" : pd.Timestamp(0.0, unit='s'),
"scan_start_date" : pd.Timestamp(0.0, unit='s'),
"scan_stop_date" : pd.Timestamp(0.0, unit='s'),
}
to be used to provide those values, instead of the above defaults.
This patch changes the top-level scripts
bin/sarif-extract-scans
bin/sarif-extract-scans-runner
and provides
scripts/test-timestamps.sh
for verification.
The following keys are also accepted:
{
"db_create_start": ...,
"db_create_stop": ...,
"scan_start": ...
"scan_stop": ...
}
This commit is contained in:
committed by
=Michael Hohn
parent
57710bdd14
commit
ee11214aee
@@ -34,6 +34,17 @@ parser.add_argument('outdir', metavar='output-dir', type=str, help='output direc
|
|||||||
parser.add_argument('csvout', metavar='csv-outfile', type=str, help='processing status csv output file name to use')
|
parser.add_argument('csvout', metavar='csv-outfile', type=str, help='processing status csv output file name to use')
|
||||||
parser.add_argument('-r', '--write-raw-tables', action="store_true",
|
parser.add_argument('-r', '--write-raw-tables', action="store_true",
|
||||||
help='Write the raw sarif tables to the output directory')
|
help='Write the raw sarif tables to the output directory')
|
||||||
|
|
||||||
|
parser.add_argument('-t', '--with-timestamps', action='store_true',
|
||||||
|
help='Read name of files containing timestamp information '
|
||||||
|
'from the scan-spec.json file. '
|
||||||
|
'The file format changes from '
|
||||||
|
'e.g., '
|
||||||
|
'{"scan_id": 15092319597255524458, "sarif_file_name": "sqlidb-0.1.sarif"} '
|
||||||
|
'to '
|
||||||
|
'{"scan_id": 15092319597255524458, "sarif_file_name": "sqlidb-0.1.sarif", timestamp_file_name: "sqlidb-0.1.timestamps"}'
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument('-f','--input-signature', metavar='input-signature', type=str, default="CLI",
|
parser.add_argument('-f','--input-signature', metavar='input-signature', type=str, default="CLI",
|
||||||
help='Signature of the sarif, as in, where it was generated it may affect the signature.\n'
|
help='Signature of the sarif, as in, where it was generated it may affect the signature.\n'
|
||||||
'Options: LGTM, CLI\n'
|
'Options: LGTM, CLI\n'
|
||||||
@@ -64,6 +75,22 @@ def load(fname):
|
|||||||
|
|
||||||
scan_spec = load(args.file)
|
scan_spec = load(args.file)
|
||||||
sarif_struct = load(scan_spec['sarif_file_name'])
|
sarif_struct = load(scan_spec['sarif_file_name'])
|
||||||
|
if args.with_timestamps:
|
||||||
|
t1 = load(scan_spec['timestamp_file_name'])
|
||||||
|
# TODO Remove this kludge for wrong keywords.
|
||||||
|
timestamps = {
|
||||||
|
**t1,
|
||||||
|
"scan_start_date" : t1["scan_start"],
|
||||||
|
"scan_stop_date" : t1["scan_stop"],
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
timestamps = {
|
||||||
|
"db_create_start" : pd.Timestamp(0.0, unit='s'),
|
||||||
|
"db_create_stop" : pd.Timestamp(0.0, unit='s'),
|
||||||
|
"scan_start_date" : pd.Timestamp(0.0, unit='s'),
|
||||||
|
"scan_stop_date" : pd.Timestamp(0.0, unit='s'),
|
||||||
|
}
|
||||||
|
|
||||||
status_writer.setup_status_filenames(scan_spec['sarif_file_name'])
|
status_writer.setup_status_filenames(scan_spec['sarif_file_name'])
|
||||||
|
|
||||||
#
|
#
|
||||||
@@ -189,7 +216,9 @@ scantabs.columns_to_reindex = {
|
|||||||
# joins for projects has to happen first as it backfills the guess about the project_id
|
# joins for projects has to happen first as it backfills the guess about the project_id
|
||||||
scantabs.projects = st.joins_for_projects(bt, external_info)
|
scantabs.projects = st.joins_for_projects(bt, external_info)
|
||||||
scantabs.results = st.joins_for_results(bt, external_info)
|
scantabs.results = st.joins_for_results(bt, external_info)
|
||||||
scantabs.scans = st.joins_for_scans(bt, external_info, scantabs, args.input_signature)
|
scantabs.scans = \
|
||||||
|
st.joins_for_scans(bt, external_info, scantabs,
|
||||||
|
args.input_signature, timestamps)
|
||||||
|
|
||||||
#
|
#
|
||||||
# Replace the remaining internal ids with snowflake ids
|
# Replace the remaining internal ids with snowflake ids
|
||||||
|
|||||||
@@ -110,6 +110,16 @@ parser.add_argument('-s', '--successful-runs', metavar='filename', type=str,
|
|||||||
'new/failed entries from sarif-files.'
|
'new/failed entries from sarif-files.'
|
||||||
' Default: "%(default)s"')
|
' Default: "%(default)s"')
|
||||||
|
|
||||||
|
parser.add_argument('-t', '--with-timestamps', action='store_true',
|
||||||
|
help='Read names of files containing timestamp information '
|
||||||
|
'following the name of the sarif source file.'
|
||||||
|
'E.g., '
|
||||||
|
'sarif-extract-scans-runner --with-timestamps - << EOF '
|
||||||
|
'foo.sarif,timestamps.json '
|
||||||
|
'EOF '
|
||||||
|
'Note: spaces are NOT stripped, so foo.sarif,timestamps.json '
|
||||||
|
'and foo.sarif, timestamps.json are different.'
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument('--doc', dest='fulldoc', default=False,
|
parser.add_argument('--doc', dest='fulldoc', default=False,
|
||||||
action='store_true',
|
action='store_true',
|
||||||
@@ -154,7 +164,13 @@ if use_successful_runs:
|
|||||||
successful_runs = set()
|
successful_runs = set()
|
||||||
|
|
||||||
count = -1
|
count = -1
|
||||||
for path in paths:
|
for path_timestamp in paths:
|
||||||
|
if args.with_timestamps:
|
||||||
|
path, t1 = path_timestamp.split(',')
|
||||||
|
timestamp_fname = t1.strip()
|
||||||
|
else:
|
||||||
|
path = path_timestamp
|
||||||
|
|
||||||
count += 1
|
count += 1
|
||||||
if count > args.max_files: break
|
if count > args.max_files: break
|
||||||
#
|
#
|
||||||
@@ -169,11 +185,17 @@ for path in paths:
|
|||||||
data = f.read()
|
data = f.read()
|
||||||
scan_id = hash.hash_unique(data)
|
scan_id = hash.hash_unique(data)
|
||||||
|
|
||||||
scan_spec = {
|
if args.with_timestamps:
|
||||||
"scan_id": scan_id, # pd.Int64Dtype()
|
scan_spec = {
|
||||||
"sarif_file_name": path, # pd.StringDtype()
|
"scan_id": scan_id, # pd.Int64Dtype()
|
||||||
}
|
"sarif_file_name": path, # pd.StringDtype()
|
||||||
|
"timestamp_file_name": timestamp_fname
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
scan_spec = {
|
||||||
|
"scan_id": scan_id, # pd.Int64Dtype()
|
||||||
|
"sarif_file_name": path, # pd.StringDtype()
|
||||||
|
}
|
||||||
#
|
#
|
||||||
# If using outermost output directory, create project directory:
|
# If using outermost output directory, create project directory:
|
||||||
# (like <outer_dir>/<repositoryUri>/*.scantables)
|
# (like <outer_dir>/<repositoryUri>/*.scantables)
|
||||||
@@ -211,7 +233,13 @@ for path in paths:
|
|||||||
|
|
||||||
scan_log_file = os.path.join(outer_dir+ path + ".scanlog")
|
scan_log_file = os.path.join(outer_dir+ path + ".scanlog")
|
||||||
csv_outfile = os.path.join(outer_dir+ path)
|
csv_outfile = os.path.join(outer_dir+ path)
|
||||||
runstats = subprocess.run(['sarif-extract-scans', scan_spec_file, output_dir, csv_outfile, "-f", args.input_signature],
|
if args.with_timestamps:
|
||||||
|
timestamp_options = ['--with-timestamps']
|
||||||
|
else:
|
||||||
|
timestamp_options = []
|
||||||
|
runstats = subprocess.run(['sarif-extract-scans', scan_spec_file, output_dir,
|
||||||
|
csv_outfile, "-f", args.input_signature,
|
||||||
|
*timestamp_options],
|
||||||
capture_output=True, text=True)
|
capture_output=True, text=True)
|
||||||
if runstats.returncode == 0:
|
if runstats.returncode == 0:
|
||||||
print("{:6} {}".format("OK", path))
|
print("{:6} {}".format("OK", path))
|
||||||
|
|||||||
@@ -116,7 +116,7 @@ def joins_for_projects(basetables, external_info):
|
|||||||
#
|
#
|
||||||
# Scans table
|
# Scans table
|
||||||
#
|
#
|
||||||
def joins_for_scans(basetables, external_info, scantables, sarif_type):
|
def joins_for_scans(basetables, external_info, scantables, sarif_type, timestamps : dict):
|
||||||
"""
|
"""
|
||||||
Form the `scans` table for the ScanTables dataclass
|
Form the `scans` table for the ScanTables dataclass
|
||||||
"""
|
"""
|
||||||
@@ -135,12 +135,7 @@ def joins_for_scans(basetables, external_info, scantables, sarif_type):
|
|||||||
"id" : e.scan_id,
|
"id" : e.scan_id,
|
||||||
"commit_id" : commit_id,
|
"commit_id" : commit_id,
|
||||||
"project_id" : e.project_id,
|
"project_id" : e.project_id,
|
||||||
# TODO extract real date information from somewhere external
|
**timestamps,
|
||||||
"db_create_start" : pd.Timestamp(0.0, unit='s'),
|
|
||||||
"db_create_stop" : pd.Timestamp(0.0, unit='s'),
|
|
||||||
"scan_start_date" : pd.Timestamp(0.0, unit='s'),
|
|
||||||
"scan_stop_date" : pd.Timestamp(0.0, unit='s'),
|
|
||||||
#
|
|
||||||
"tool_name" : driver_name[0],
|
"tool_name" : driver_name[0],
|
||||||
"tool_version" : driver_version[0],
|
"tool_version" : driver_version[0],
|
||||||
"tool_query_commit_id" : pd.NA,
|
"tool_query_commit_id" : pd.NA,
|
||||||
|
|||||||
86
scripts/test-timestamps.sh
Normal file
86
scripts/test-timestamps.sh
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#* Setup
|
||||||
|
cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection
|
||||||
|
ls -la sqlidb-0.sarif sqlidb-1.sarif
|
||||||
|
|
||||||
|
#
|
||||||
|
source ~/local/sarif-cli/.venv/bin/activate
|
||||||
|
|
||||||
|
#* Utility functions
|
||||||
|
function get-csv() {
|
||||||
|
#* Insert versionControlProvenance
|
||||||
|
sarif-insert-vcp $1.sarif > $1.1.sarif
|
||||||
|
|
||||||
|
#* Populate CSV with provided timestamps
|
||||||
|
cat > $1.timestamp << EOF
|
||||||
|
{
|
||||||
|
"db_create_start": "2023-07-03T00:56:15.576222",
|
||||||
|
"db_create_stop": "2023-07-03T00:56:42.781839",
|
||||||
|
"scan_start": "2023-07-03T00:56:47.546696",
|
||||||
|
"scan_stop": "2023-07-03T00:57:55.988059"
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
sarif-extract-scans-runner --input-signature CLI --with-timestamps - <<EOF
|
||||||
|
$1.1.sarif,$1.timestamp
|
||||||
|
EOF
|
||||||
|
|
||||||
|
#* List CSV messages
|
||||||
|
cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection
|
||||||
|
head -4 $1.1.sarif.csv
|
||||||
|
|
||||||
|
#* List CSV output
|
||||||
|
ls -la $1.1*
|
||||||
|
find $1.1.sarif.scantables -print
|
||||||
|
csvcut -c "db_create_start,db_create_stop,scan_start_date,scan_stop_date" \
|
||||||
|
$1.1.sarif.scantables/scans.csv
|
||||||
|
|
||||||
|
# #* show log
|
||||||
|
# echo "run log:"
|
||||||
|
# cat $1.1.sarif.scanlog
|
||||||
|
}
|
||||||
|
|
||||||
|
function get-csv-no-ts() {
|
||||||
|
#* Insert versionControlProvenance
|
||||||
|
sarif-insert-vcp $1.sarif > $1.1.sarif
|
||||||
|
|
||||||
|
#* Get CSV with dummy timestamps
|
||||||
|
sarif-extract-scans-runner --input-signature CLI - <<EOF
|
||||||
|
$1.1.sarif
|
||||||
|
EOF
|
||||||
|
|
||||||
|
#* List CSV messages
|
||||||
|
cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection
|
||||||
|
head -4 $1.1.sarif.csv
|
||||||
|
|
||||||
|
#* List CSV output
|
||||||
|
ls -la $1.1*
|
||||||
|
find $1.1.sarif.scantables -print
|
||||||
|
csvcut -c "db_create_start,db_create_stop,scan_start_date,scan_stop_date" \
|
||||||
|
$1.1.sarif.scantables/scans.csv
|
||||||
|
}
|
||||||
|
|
||||||
|
clean-csv () {
|
||||||
|
cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection
|
||||||
|
rm -f $1.1.sarif.csv
|
||||||
|
rm -f $1.1*scan{log,spec}
|
||||||
|
rm -fR $1.1.sarif.scantables
|
||||||
|
}
|
||||||
|
|
||||||
|
#* Clean up and run tool
|
||||||
|
cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection
|
||||||
|
clean-csv sqlidb-0
|
||||||
|
get-csv sqlidb-0
|
||||||
|
|
||||||
|
clean-csv sqlidb-1
|
||||||
|
get-csv-no-ts sqlidb-1
|
||||||
|
|
||||||
|
#* Look for the timestamp value
|
||||||
|
function check-timestamp() {
|
||||||
|
ag -C1 "00:56:15.57622|1970-01-01" ${1}
|
||||||
|
}
|
||||||
|
# With custom stamp:
|
||||||
|
check-timestamp 'sqlidb-0.1*/scans.csv'
|
||||||
|
# With default stamp:
|
||||||
|
check-timestamp 'sqlidb-1.1*/scans.csv'
|
||||||
|
#
|
||||||
Reference in New Issue
Block a user