mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 17:23:03 +01:00
sarif-extract-tables: also output relatedLocations table
With --related-locations,
../../bin/sarif-results-summary -r results.sarif
produces the details
RESULT: static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js:722:
72:722:73: Character ''' is repeated [here](1) in the same character class.
Character ''' is repeated [here](2) in the same character class.
Character ''' is repeated [here](3) in the same character class.
REFERENCE: static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js:722:74:722:75: here
REFERENCE: static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js:722:76:722:77: here
REFERENCE: static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js:722:78:722:79: here
Via
../../bin/sarif-extract-tables results.sarif tables
sarif-extract-tables now produces two output tables,
tables/
├── messages.csv
└── relatedLocations.csv
that contain the relevant information and can be joined or otherwise combined on
the struct_id_4055 key.
For example, adding to the end of sarif-extract-tables:
import IPython
IPython.embed()
msg = d2[d2.message.str.startswith("Character ''' is repeated [here]")]
dr3[dr3.struct_id_4055 == msg.struct_id_4055.values[0]]
In [24]: msg
Out[24]:
struct_id_4055 ... message
180 4796917312 ... Character ''' is repeated [here](1) in the sam...
[1 rows x 7 columns]
In [25]: dr3[dr3.struct_id_4055 == msg.struct_id_4055.values[0]]
Out[25]:
struct_id_4055 uri startLine startColumn endLine endColumn message
180 4796917312 static/js/tinymce/jscripts/tiny_mce/plugins/pa... 722 74 722 75 here
181 4796917312 static/js/tinymce/jscripts/tiny_mce/plugins/pa... 722 76 722 77 here
182 4796917312 static/js/tinymce/jscripts/tiny_mce/plugins/pa... 722 78 722 79 here
or manually from the shell:
# pick up the struct_id_4055:
0:$ grep "static.*Character ''' is repeated \[here\]" tables/messages.csv
180,4927448704,static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js,722,72,722,73,"Character ''' is repeated [here](1) in the same character class.
# and find relatedLocations:
0:$ grep 4927448704 tables/relatedLocations.csv
180,4927448704,static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js,722,74,722,75,here
181,4927448704,static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js,722,76,722,77,here
182,4927448704,static/js/tinymce/jscripts/tiny_mce/plugins/paste/editor_plugin_src.js,722,78,722,79,here
Changes:
- Introduce scli-dyys, a random id string for later identification and removal of
dummy table rows.
- Keep the struct_id_4055 column to join tables as needed.
- Output is now written to a directory as there are always multiple files.
This commit is contained in:
committed by
=Michael Hohn
parent
ec9a0b5590
commit
ad738abed3
@@ -3,6 +3,7 @@
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import pathlib
|
||||
from sarif_cli import signature
|
||||
from sarif_cli import typegraph
|
||||
import sys
|
||||
@@ -14,6 +15,7 @@ import pandas as pd
|
||||
#
|
||||
parser = argparse.ArgumentParser(description='Read a sarif file and produce tabular output.')
|
||||
parser.add_argument('file', metavar='sarif-file', type=str, help='input file, - for stdin')
|
||||
parser.add_argument('outdir', metavar='output-dir', type=str, help='output directory')
|
||||
parser.add_argument('-f', '--output-format', metavar='format', type=str, default="csv",
|
||||
help='Output format for table. Currently just csv; '
|
||||
' other formats supported by pandas can be added.')
|
||||
@@ -100,15 +102,15 @@ sf = lambda num: tgraph.dataframes['Struct' + str(num)]
|
||||
af = lambda num: tgraph.dataframes['Array' + str(num)]
|
||||
|
||||
#
|
||||
# Form the dataframe via joins
|
||||
# Form the message dataframe via joins
|
||||
#
|
||||
d1 = (
|
||||
sf(4055)
|
||||
.merge(af('0350'), how="left", left_on='locations', right_on='array_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'locations', 'array_id', 'value_index', 'type_at_index'])
|
||||
.drop(columns=['locations', 'array_id', 'value_index', 'type_at_index'])
|
||||
.merge(sf(2683), how="left", left_on='id_or_value_at_index', right_on='struct_id',
|
||||
suffixes=("_4055", "_2683"), validate="1:m")
|
||||
.drop(columns=['struct_id', 'id_or_value_at_index'])
|
||||
.drop(columns=['struct_id_2683', 'id_or_value_at_index'])
|
||||
.merge(sf(4963), how="left", left_on='physicalLocation', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'physicalLocation'])
|
||||
.merge(sf(6299), how="left", left_on='region', right_on='struct_id', validate="1:m")
|
||||
@@ -135,14 +137,48 @@ d1 = (
|
||||
#
|
||||
# Reproduce ALL `file:line:col:line:col: message` entries as a table
|
||||
#
|
||||
d2 = (d1[['uri', 'startLine', 'startColumn', 'endLine', 'endColumn', 'text_4055']]
|
||||
d2 = (d1[['struct_id_4055', 'uri', 'startLine', 'startColumn', 'endLine', 'endColumn', 'text_4055']]
|
||||
.rename({'text_4055': 'message'}, axis='columns'))
|
||||
|
||||
#
|
||||
# Form the relatedLocation dataframe via joins. This is subtly different from d1:
|
||||
# left_on=relatedLocations, and no left_on='message_4055'
|
||||
dr1 = (
|
||||
sf(4055)
|
||||
.merge(af('0350'), how="left", left_on='relatedLocations', right_on='array_id', validate="1:m")
|
||||
.drop(columns=['relatedLocations', 'array_id', 'value_index', 'type_at_index'])
|
||||
#
|
||||
.merge(sf(2683), how="left", left_on='id_or_value_at_index', right_on='struct_id',
|
||||
suffixes=("_4055", "_2683"), validate="1:m")
|
||||
.drop(columns=['struct_id_2683', 'id_or_value_at_index'])
|
||||
.merge(sf(4963), how="left", left_on='physicalLocation', right_on='struct_id', validate="1:m")
|
||||
#
|
||||
.drop(columns=['struct_id', 'physicalLocation'])
|
||||
.merge(sf(6299), how="left", left_on='region', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'region'])
|
||||
.merge(sf(2685), how="left", left_on='artifactLocation', right_on='struct_id', validate="1:m")
|
||||
#
|
||||
.drop(columns=['struct_id', 'artifactLocation'])
|
||||
.merge(sf(2774), how="left", left_on='message_2683', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'message_2683'])
|
||||
)
|
||||
|
||||
dr2 = (dr1[['struct_id_4055', 'uri', 'startLine', 'startColumn', 'endLine', 'endColumn', 'text']]
|
||||
.rename({'text': 'message'}, axis='columns'))
|
||||
|
||||
# Remove dummy locations previously injected by signature.fillsig
|
||||
dr3 = dr2[dr2.uri != 'scli-dyys dummy value']
|
||||
|
||||
#
|
||||
# Write output
|
||||
#
|
||||
if args.output_format == 'csv':
|
||||
d2.to_csv(sys.stdout, index_label='index')
|
||||
p = pathlib.Path(args.outdir)
|
||||
p.mkdir(exist_ok=True)
|
||||
with p.joinpath('messages.csv').open(mode='wb') as messages:
|
||||
d2.to_csv(messages, index_label='index')
|
||||
with p.joinpath('relatedLocations.csv').open(mode='wb') as relo:
|
||||
dr3.to_csv(relo, index_label='index')
|
||||
|
||||
else:
|
||||
sys.stderr.write("unknown output format")
|
||||
|
||||
@@ -203,12 +203,15 @@ properties_keys = set([first for first, _ in
|
||||
('sub-severity', 'String'),
|
||||
('tags', 'Array003'),
|
||||
]])
|
||||
dummy_properties = { 'kind' : 'unspecified',
|
||||
'precision' : 'unspecified',
|
||||
'security-severity' : 'unspecified',
|
||||
'severity' : 'unspecified',
|
||||
'sub-severity' : 'unspecified',
|
||||
'tags' : ['unspecified'],
|
||||
#
|
||||
# scli-dyys is a random id string for later identification of dummy values
|
||||
#
|
||||
dummy_properties = { 'kind' : 'scli-dyys dummy value',
|
||||
'precision' : 'scli-dyys dummy value',
|
||||
'security-severity' : 'scli-dyys dummy value',
|
||||
'severity' : 'scli-dyys dummy value',
|
||||
'sub-severity' : 'scli-dyys dummy value',
|
||||
'tags' : ['scli-dyys dummy value'],
|
||||
}
|
||||
|
||||
relatedLocations_keys = set([first for first, _ in
|
||||
@@ -221,16 +224,16 @@ dummy_newlineSequences = ['\r\n', '\n', '\u2028', '\u2029']
|
||||
|
||||
dummy_relatedLocations_entry = [
|
||||
{'id': -1,
|
||||
'physicalLocation': {'artifactLocation': {'uri': '',
|
||||
'uriBaseId': '%SRCROOT%',
|
||||
'physicalLocation': {'artifactLocation': {'uri': 'scli-dyys dummy value',
|
||||
'uriBaseId': 'scli-dyys dummy value',
|
||||
'index': -1},
|
||||
'region': {'startLine': -1,
|
||||
'startColumn': -1,
|
||||
'endLine': -1,
|
||||
'endColumn': -1}},
|
||||
'message': {'text': ''}}]
|
||||
'message': {'text': 'scli-dyys dummy value'}}]
|
||||
|
||||
dummy_message_entry = {'text': ''}
|
||||
dummy_message_entry = {'text': 'scli-dyys dummy value'}
|
||||
|
||||
def fillsig_dict(args, elem, context):
|
||||
""" Fill in the missing fields in dictionary signatures.
|
||||
|
||||
Reference in New Issue
Block a user