mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 17:23:03 +01:00
sarif-extract-tables: also output relatedLocations table
With --related-locations,
../../bin/sarif-results-summary -r results.sarif
produces the details
RESULT: static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js:722:
72:722:73: Character ''' is repeated [here](1) in the same character class.
Character ''' is repeated [here](2) in the same character class.
Character ''' is repeated [here](3) in the same character class.
REFERENCE: static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js:722:74:722:75: here
REFERENCE: static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js:722:76:722:77: here
REFERENCE: static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js:722:78:722:79: here
Via
../../bin/sarif-extract-tables results.sarif tables
sarif-extract-tables now produces two output tables,
tables/
├── messages.csv
└── relatedLocations.csv
that contain the relevant information and can be joined or otherwise combined on
the struct_id_4055 key.
For example, adding to the end of sarif-extract-tables:
import IPython
IPython.embed()
msg = d2[d2.message.str.startswith("Character ''' is repeated [here]")]
dr3[dr3.struct_id_4055 == msg.struct_id_4055.values[0]]
In [24]: msg
Out[24]:
struct_id_4055 ... message
180 4796917312 ... Character ''' is repeated [here](1) in the sam...
[1 rows x 7 columns]
In [25]: dr3[dr3.struct_id_4055 == msg.struct_id_4055.values[0]]
Out[25]:
struct_id_4055 uri startLine startColumn endLine endColumn message
180 4796917312 static/js/tinymce/jscripts/tiny_mce/plugins/pa... 722 74 722 75 here
181 4796917312 static/js/tinymce/jscripts/tiny_mce/plugins/pa... 722 76 722 77 here
182 4796917312 static/js/tinymce/jscripts/tiny_mce/plugins/pa... 722 78 722 79 here
or manually from the shell:
# pick up the struct_id_4055:
0:$ grep "static.*Character ''' is repeated \[here\]" tables/messages.csv
180,4927448704,static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js,722,72,722,73,"Character ''' is repeated [here](1) in the same character class.
# and find relatedLocations:
0:$ grep 4927448704 tables/relatedLocations.csv
180,4927448704,static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js,722,74,722,75,here
181,4927448704,static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js,722,76,722,77,here
182,4927448704,static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js,722,78,722,79,here
Changes:
- Introduce scli-dyys, a random id string for later identification and removal of
dummy table rows.
- Keep the struct_id_4055 column to join tables as needed.
- Output is now written to a directory as there are always multiple files.
This commit is contained in:
committed by
=Michael Hohn
parent
ec9a0b5590
commit
ad738abed3
@@ -3,6 +3,7 @@
|
|||||||
"""
|
"""
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
|
import pathlib
|
||||||
from sarif_cli import signature
|
from sarif_cli import signature
|
||||||
from sarif_cli import typegraph
|
from sarif_cli import typegraph
|
||||||
import sys
|
import sys
|
||||||
@@ -14,6 +15,7 @@ import pandas as pd
|
|||||||
#
|
#
|
||||||
parser = argparse.ArgumentParser(description='Read a sarif file and produce tabular output.')
|
parser = argparse.ArgumentParser(description='Read a sarif file and produce tabular output.')
|
||||||
parser.add_argument('file', metavar='sarif-file', type=str, help='input file, - for stdin')
|
parser.add_argument('file', metavar='sarif-file', type=str, help='input file, - for stdin')
|
||||||
|
parser.add_argument('outdir', metavar='output-dir', type=str, help='output directory')
|
||||||
parser.add_argument('-f', '--output-format', metavar='format', type=str, default="csv",
|
parser.add_argument('-f', '--output-format', metavar='format', type=str, default="csv",
|
||||||
help='Output format for table. Currently just csv; '
|
help='Output format for table. Currently just csv; '
|
||||||
' other formats supported by pandas can be added.')
|
' other formats supported by pandas can be added.')
|
||||||
@@ -100,15 +102,15 @@ sf = lambda num: tgraph.dataframes['Struct' + str(num)]
|
|||||||
af = lambda num: tgraph.dataframes['Array' + str(num)]
|
af = lambda num: tgraph.dataframes['Array' + str(num)]
|
||||||
|
|
||||||
#
|
#
|
||||||
# Form the dataframe via joins
|
# Form the message dataframe via joins
|
||||||
#
|
#
|
||||||
d1 = (
|
d1 = (
|
||||||
sf(4055)
|
sf(4055)
|
||||||
.merge(af('0350'), how="left", left_on='locations', right_on='array_id', validate="1:m")
|
.merge(af('0350'), how="left", left_on='locations', right_on='array_id', validate="1:m")
|
||||||
.drop(columns=['struct_id', 'locations', 'array_id', 'value_index', 'type_at_index'])
|
.drop(columns=['locations', 'array_id', 'value_index', 'type_at_index'])
|
||||||
.merge(sf(2683), how="left", left_on='id_or_value_at_index', right_on='struct_id',
|
.merge(sf(2683), how="left", left_on='id_or_value_at_index', right_on='struct_id',
|
||||||
suffixes=("_4055", "_2683"), validate="1:m")
|
suffixes=("_4055", "_2683"), validate="1:m")
|
||||||
.drop(columns=['struct_id', 'id_or_value_at_index'])
|
.drop(columns=['struct_id_2683', 'id_or_value_at_index'])
|
||||||
.merge(sf(4963), how="left", left_on='physicalLocation', right_on='struct_id', validate="1:m")
|
.merge(sf(4963), how="left", left_on='physicalLocation', right_on='struct_id', validate="1:m")
|
||||||
.drop(columns=['struct_id', 'physicalLocation'])
|
.drop(columns=['struct_id', 'physicalLocation'])
|
||||||
.merge(sf(6299), how="left", left_on='region', right_on='struct_id', validate="1:m")
|
.merge(sf(6299), how="left", left_on='region', right_on='struct_id', validate="1:m")
|
||||||
@@ -135,14 +137,48 @@ d1 = (
|
|||||||
#
|
#
|
||||||
# Reproduce ALL `file:line:col:line:col: message` entries as a table
|
# Reproduce ALL `file:line:col:line:col: message` entries as a table
|
||||||
#
|
#
|
||||||
d2 = (d1[['uri', 'startLine', 'startColumn', 'endLine', 'endColumn', 'text_4055']]
|
d2 = (d1[['struct_id_4055', 'uri', 'startLine', 'startColumn', 'endLine', 'endColumn', 'text_4055']]
|
||||||
.rename({'text_4055': 'message'}, axis='columns'))
|
.rename({'text_4055': 'message'}, axis='columns'))
|
||||||
|
|
||||||
|
#
|
||||||
|
# Form the relatedLocation dataframe via joins. This is subtly different from d1:
|
||||||
|
# left_on=relatedLocations, and no left_on='message_4055'
|
||||||
|
dr1 = (
|
||||||
|
sf(4055)
|
||||||
|
.merge(af('0350'), how="left", left_on='relatedLocations', right_on='array_id', validate="1:m")
|
||||||
|
.drop(columns=['relatedLocations', 'array_id', 'value_index', 'type_at_index'])
|
||||||
|
#
|
||||||
|
.merge(sf(2683), how="left", left_on='id_or_value_at_index', right_on='struct_id',
|
||||||
|
suffixes=("_4055", "_2683"), validate="1:m")
|
||||||
|
.drop(columns=['struct_id_2683', 'id_or_value_at_index'])
|
||||||
|
.merge(sf(4963), how="left", left_on='physicalLocation', right_on='struct_id', validate="1:m")
|
||||||
|
#
|
||||||
|
.drop(columns=['struct_id', 'physicalLocation'])
|
||||||
|
.merge(sf(6299), how="left", left_on='region', right_on='struct_id', validate="1:m")
|
||||||
|
.drop(columns=['struct_id', 'region'])
|
||||||
|
.merge(sf(2685), how="left", left_on='artifactLocation', right_on='struct_id', validate="1:m")
|
||||||
|
#
|
||||||
|
.drop(columns=['struct_id', 'artifactLocation'])
|
||||||
|
.merge(sf(2774), how="left", left_on='message_2683', right_on='struct_id', validate="1:m")
|
||||||
|
.drop(columns=['struct_id', 'message_2683'])
|
||||||
|
)
|
||||||
|
|
||||||
|
dr2 = (dr1[['struct_id_4055', 'uri', 'startLine', 'startColumn', 'endLine', 'endColumn', 'text']]
|
||||||
|
.rename({'text': 'message'}, axis='columns'))
|
||||||
|
|
||||||
|
# Remove dummy locations previously injected by signature.fillsig
|
||||||
|
dr3 = dr2[dr2.uri != 'scli-dyys dummy value']
|
||||||
|
|
||||||
#
|
#
|
||||||
# Write output
|
# Write output
|
||||||
#
|
#
|
||||||
if args.output_format == 'csv':
|
if args.output_format == 'csv':
|
||||||
d2.to_csv(sys.stdout, index_label='index')
|
p = pathlib.Path(args.outdir)
|
||||||
|
p.mkdir(exist_ok=True)
|
||||||
|
with p.joinpath('messages.csv').open(mode='wb') as messages:
|
||||||
|
d2.to_csv(messages, index_label='index')
|
||||||
|
with p.joinpath('relatedLocations.csv').open(mode='wb') as relo:
|
||||||
|
dr3.to_csv(relo, index_label='index')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
sys.stderr.write("unknown output format")
|
sys.stderr.write("unknown output format")
|
||||||
|
|||||||
@@ -203,12 +203,15 @@ properties_keys = set([first for first, _ in
|
|||||||
('sub-severity', 'String'),
|
('sub-severity', 'String'),
|
||||||
('tags', 'Array003'),
|
('tags', 'Array003'),
|
||||||
]])
|
]])
|
||||||
dummy_properties = { 'kind' : 'unspecified',
|
#
|
||||||
'precision' : 'unspecified',
|
# scli-dyys is a random id string for later identification of dummy values
|
||||||
'security-severity' : 'unspecified',
|
#
|
||||||
'severity' : 'unspecified',
|
dummy_properties = { 'kind' : 'scli-dyys dummy value',
|
||||||
'sub-severity' : 'unspecified',
|
'precision' : 'scli-dyys dummy value',
|
||||||
'tags' : ['unspecified'],
|
'security-severity' : 'scli-dyys dummy value',
|
||||||
|
'severity' : 'scli-dyys dummy value',
|
||||||
|
'sub-severity' : 'scli-dyys dummy value',
|
||||||
|
'tags' : ['scli-dyys dummy value'],
|
||||||
}
|
}
|
||||||
|
|
||||||
relatedLocations_keys = set([first for first, _ in
|
relatedLocations_keys = set([first for first, _ in
|
||||||
@@ -221,16 +224,16 @@ dummy_newlineSequences = ['\r\n', '\n', '\u2028', '\u2029']
|
|||||||
|
|
||||||
dummy_relatedLocations_entry = [
|
dummy_relatedLocations_entry = [
|
||||||
{'id': -1,
|
{'id': -1,
|
||||||
'physicalLocation': {'artifactLocation': {'uri': '',
|
'physicalLocation': {'artifactLocation': {'uri': 'scli-dyys dummy value',
|
||||||
'uriBaseId': '%SRCROOT%',
|
'uriBaseId': 'scli-dyys dummy value',
|
||||||
'index': -1},
|
'index': -1},
|
||||||
'region': {'startLine': -1,
|
'region': {'startLine': -1,
|
||||||
'startColumn': -1,
|
'startColumn': -1,
|
||||||
'endLine': -1,
|
'endLine': -1,
|
||||||
'endColumn': -1}},
|
'endColumn': -1}},
|
||||||
'message': {'text': ''}}]
|
'message': {'text': 'scli-dyys dummy value'}}]
|
||||||
|
|
||||||
dummy_message_entry = {'text': ''}
|
dummy_message_entry = {'text': 'scli-dyys dummy value'}
|
||||||
|
|
||||||
def fillsig_dict(args, elem, context):
|
def fillsig_dict(args, elem, context):
|
||||||
""" Fill in the missing fields in dictionary signatures.
|
""" Fill in the missing fields in dictionary signatures.
|
||||||
|
|||||||
Reference in New Issue
Block a user