mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 09:13:04 +01:00
sarif-extract-tables: initial version, reproduces known output as table
Reproduce the
file:line:col:line:col: message
output from
../../bin/sarif-results-summary results.sarif | grep size
as test/example.
Original sample output is
RESULT: static/js/fileuploader.js:1214:13:1214:17: Unused variable size.
RESULT: static/js/tinymce/jscripts/tiny_mce/plugins/media/js/media.js:438:30:438:34: Unused variable size.
The table result here is
0:$ ../../bin/sarif-extract-tables results.sarif | grep size
0,static/js/fileuploader.js,1214,13,1214,17,Unused variable size.
34,static/js/tinymce/jscripts/tiny_mce/plugins/media/js/media.js,438,30,438,34,Unused variable size.
This commit is contained in:
committed by
=Michael Hohn
parent
f5e73e90ba
commit
ec9a0b5590
@@ -6,7 +6,6 @@ import json
|
|||||||
from sarif_cli import signature
|
from sarif_cli import signature
|
||||||
from sarif_cli import typegraph
|
from sarif_cli import typegraph
|
||||||
import sys
|
import sys
|
||||||
from pprint import pprint
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
@@ -15,9 +14,10 @@ import pandas as pd
|
|||||||
#
|
#
|
||||||
parser = argparse.ArgumentParser(description='Read a sarif file and produce tabular output.')
|
parser = argparse.ArgumentParser(description='Read a sarif file and produce tabular output.')
|
||||||
parser.add_argument('file', metavar='sarif-file', type=str, help='input file, - for stdin')
|
parser.add_argument('file', metavar='sarif-file', type=str, help='input file, - for stdin')
|
||||||
# XX
|
parser.add_argument('-f', '--output-format', metavar='format', type=str, default="csv",
|
||||||
# parser.add_argument('-t', '--typedef-signatures', action="store_true",
|
help='Output format for table. Currently just csv; '
|
||||||
# help='Give every object signature a type and report by types')
|
' other formats supported by pandas can be added.')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
#
|
#
|
||||||
@@ -44,265 +44,107 @@ sarif_struct = signature.fillsig(args, sarif_struct, context)
|
|||||||
tgraph = typegraph.Typegraph(typegraph.struct_graph_2022_02_01)
|
tgraph = typegraph.Typegraph(typegraph.struct_graph_2022_02_01)
|
||||||
typegraph.destructure(tgraph, typegraph.start_node_2022_02_01, sarif_struct)
|
typegraph.destructure(tgraph, typegraph.start_node_2022_02_01, sarif_struct)
|
||||||
|
|
||||||
if 0:
|
|
||||||
import IPython
|
|
||||||
IPython.embed(header="""
|
|
||||||
---------------------------------
|
|
||||||
ipython repl for
|
|
||||||
|
|
||||||
tgraph = typegraph.Typegraph(typegraph.struct_graph_2022_02_01)
|
|
||||||
|
|
||||||
---------------------------------
|
|
||||||
Sanity checks:
|
|
||||||
In [4]: tgraph.fields
|
|
||||||
Out[4]:
|
|
||||||
{'String': None,
|
|
||||||
'Int': None,
|
|
||||||
'Bool': None,
|
|
||||||
...
|
|
||||||
}
|
|
||||||
In [6]: tgraph.instances['String']
|
|
||||||
Out[6]: []
|
|
||||||
|
|
||||||
In [7]: tgraph.instances['Int']
|
|
||||||
Out[7]: []
|
|
||||||
|
|
||||||
In [8]: tgraph.instances['Bool']
|
|
||||||
Out[8]: []
|
|
||||||
|
|
||||||
Select value checks:
|
|
||||||
In [9]: tgraph.instances['Struct6787']
|
|
||||||
Out[9]:
|
|
||||||
[(4358601472,
|
|
||||||
'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
|
|
||||||
4362190016,
|
|
||||||
'2.1.0')]
|
|
||||||
|
|
||||||
In [10]: tgraph.fields['Struct6787']
|
|
||||||
Out[10]: ['$schema', 'runs', 'version']
|
|
||||||
|
|
||||||
In [5]: tgraph.instances['Array0177']
|
|
||||||
Out[5]:
|
|
||||||
[(4337396800, 0, 'Struct3388', 4337396928),
|
|
||||||
(4337396800, 1, 'Struct3388', 4337397056)]
|
|
||||||
|
|
||||||
In [12]: tgraph.fields['Array0177']
|
|
||||||
Out[12]: [0]
|
|
||||||
|
|
||||||
In [9]: tgraph.instances['Array7069'][0:5]
|
|
||||||
Out[9]:
|
|
||||||
[(4337397248, 0, 'String', '\r\n'),
|
|
||||||
(4337397248, 1, 'String', '\n'),
|
|
||||||
(4337397248, 2, 'String', '\u2028'),
|
|
||||||
(4337397248, 3, 'String', '\u2029'),
|
|
||||||
(4339863424, 0, 'String', 'maintainability')]
|
|
||||||
|
|
||||||
|
|
||||||
In [10]: tgraph.instances['Struct6299'][:3]
|
|
||||||
Out[10]:
|
|
||||||
[(4315110720, 17, 1214, 13, 1214),
|
|
||||||
(4315111232, -1, -1, 1, -1),
|
|
||||||
(4315124096, 30, 847, 17, 847)]
|
|
||||||
|
|
||||||
In [11]: tgraph.fields['Struct6299']
|
|
||||||
Out[11]: ['endColumn', 'endLine', 'startColumn', 'startLine']
|
|
||||||
""")
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Form output tables
|
# Form output tables
|
||||||
#
|
#
|
||||||
typegraph.attach_tables(tgraph)
|
typegraph.attach_tables(tgraph)
|
||||||
|
|
||||||
import IPython
|
"""
|
||||||
IPython.embed(header="""
|
Reproduce the
|
||||||
---------------------------------
|
|
||||||
ipython repl for tables
|
|
||||||
|
|
||||||
---------------------------------
|
|
||||||
|
|
||||||
tgraph.dataframes
|
|
||||||
In [7]: sorted(tgraph.dataframes.keys())
|
|
||||||
Out[7]:
|
|
||||||
['Array0177',
|
|
||||||
'Array0350',
|
|
||||||
'Array1075',...]
|
|
||||||
|
|
||||||
sorted(tgraph.dataframes.keys())
|
|
||||||
tgraph.dataframes['Array0177']
|
|
||||||
tgraph.dataframes['Struct3388']
|
|
||||||
tgraph.signature_graph['Struct3388']
|
|
||||||
|
|
||||||
XX: reproduce the
|
|
||||||
|
|
||||||
file:line:col:line:col: message
|
file:line:col:line:col: message
|
||||||
|
|
||||||
output from
|
output from
|
||||||
|
|
||||||
../../bin/sarif-results-summary results.sarif | less
|
../../bin/sarif-results-summary results.sarif | grep size
|
||||||
|
|
||||||
as test. Sample:
|
as test/example. Sample output is
|
||||||
|
|
||||||
RESULT: static/js/fileuploader.js:1214:13:1214:17: Unused variable size.
|
RESULT: static/js/fileuploader.js:1214:13:1214:17: Unused variable size.
|
||||||
|
RESULT: static/js/tinymce/jscripts/tiny_mce/plugins/media/js/media.js:438:30:438:34: Unused variable size.
|
||||||
|
|
||||||
Collect typedef/fields via typegraph.pdf:
|
The tree paths that match up .startLine with .text and .uri are
|
||||||
|
- .results > .[] > .message > .text
|
||||||
|
- .results > .[] > .locations > .[] > .physicalLocation > .region > .startLine
|
||||||
|
- .results > .[] > .locations > .[] > .physicalLocation > .artifactLocation > .uri
|
||||||
|
|
||||||
static/js/fileuploader.js
|
Note that this IGNORES the path
|
||||||
Struct2685/uri
|
- .results > .[] > .relatedLocations > .[] > .physicalLocation > .text
|
||||||
|
|
||||||
In [22]: d1 = tgraph.dataframes['Struct2685']
|
We need appropriate table joins to replicate those tree paths; following the edges
|
||||||
In [24]: d1[d1.uri == "static/js/fileuploader.js"]
|
in typegraph.pdf is the most direct way to find relevant tables and keys.
|
||||||
Out[24]:
|
|
||||||
struct_id index uri uriBaseId
|
|
||||||
0 4856718656 0 static/js/fileuploader.js %SRCROOT%
|
|
||||||
77 4856758336 0 static/js/fileuploader.js %SRCROOT%
|
|
||||||
...
|
|
||||||
|
|
||||||
:1214:13:1214:17:
|
We only care about .message with matching .startLine, so left joins should
|
||||||
Struct6299/startLine/startColumn/endLine/endColumn
|
work without losing any data. Here are the tree paths and their corresponding
|
||||||
|
tables; the tree paths are from left to right and the joins can be done in the
|
||||||
|
same order.
|
||||||
|
|
||||||
Unused variable size.
|
Using ../notes/typegraph.pdf, we find these:
|
||||||
Struct2774/message
|
|
||||||
d1 = tgraph.dataframes['Struct2774']
|
|
||||||
In [31]: d1[d1.text.str.contains("Unused variable size")]
|
|
||||||
Out[31]:
|
|
||||||
struct_id text
|
|
||||||
1 4856749504 Unused variable size.
|
|
||||||
103 4856879296 Unused variable size.
|
|
||||||
|
|
||||||
Follow the edges in typegraph.pdf to find joining typedefs and paths.
|
|------------+----------+---------+-------------------+-------------------+------------|
|
||||||
|
| .locations | | .[] | .physicalLocation | .artifactLocation | .uri |
|
||||||
|
| sf(4055) | | af(350) | sf(2683) | sf(4963) | sf(2685) |
|
||||||
|
|------------+----------+---------+-------------------+-------------------+------------|
|
||||||
|
| .locations | | .[] | .physicalLocation | .region | .startLine |
|
||||||
|
| sf(4055) | | af(350) | sf(2683) | sf(4963) | sf(6299) |
|
||||||
|
|------------+----------+---------+-------------------+-------------------+------------|
|
||||||
|
| .message | .text | | | | |
|
||||||
|
| sf(4055) | sf(2774) | | | | |
|
||||||
|
|------------+----------+---------+-------------------+-------------------+------------|
|
||||||
|
|
||||||
Struct4963
|
"""
|
||||||
|
#
|
||||||
Struct2683
|
# Access convenience functions
|
||||||
|
#
|
||||||
|
sf = lambda num: tgraph.dataframes['Struct' + str(num)]
|
||||||
""")
|
af = lambda num: tgraph.dataframes['Array' + str(num)]
|
||||||
|
|
||||||
#
|
#
|
||||||
# These merges are for reconstructing ../../bin/sarif-results-summary output, but
|
# Form the dataframe via joins
|
||||||
# they also form the "bottom right" dataframe on the type graph (see the .pdf) and
|
|
||||||
# can be used for other result-oriented output.
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# original dataframes
|
d1 = (
|
||||||
#
|
sf(4055)
|
||||||
# Struct2685/uri
|
.merge(af('0350'), how="left", left_on='locations', right_on='array_id', validate="1:m")
|
||||||
f2685 = odf_location = tgraph.dataframes['Struct2685']
|
.drop(columns=['struct_id', 'locations', 'array_id', 'value_index', 'type_at_index'])
|
||||||
|
.merge(sf(2683), how="left", left_on='id_or_value_at_index', right_on='struct_id',
|
||||||
# Struct6299/startLine/startColumn/endLine/endColumn
|
suffixes=("_4055", "_2683"), validate="1:m")
|
||||||
f6299 = odf_region = tgraph.dataframes['Struct6299']
|
.drop(columns=['struct_id', 'id_or_value_at_index'])
|
||||||
|
.merge(sf(4963), how="left", left_on='physicalLocation', right_on='struct_id', validate="1:m")
|
||||||
# Struct2774/message
|
.drop(columns=['struct_id', 'physicalLocation'])
|
||||||
f2774 = odf_message = tgraph.dataframes['Struct2774']
|
.merge(sf(6299), how="left", left_on='region', right_on='struct_id', validate="1:m")
|
||||||
|
.drop(columns=['struct_id', 'region'])
|
||||||
#
|
.merge(sf(2685), how="left", left_on='artifactLocation', right_on='struct_id', validate="1:m")
|
||||||
# Linking dataframes
|
.drop(columns=['struct_id', 'artifactLocation'])
|
||||||
#
|
.merge(sf(2774), how="left", left_on='message_4055', right_on='struct_id', validate="1:m")
|
||||||
f4963 = ldf_physicalLocation = tgraph.dataframes['Struct4963']
|
.drop(columns=['struct_id', 'message_4055'])
|
||||||
|
.merge(sf(2774), how="left", left_on='message_2683', right_on='struct_id',
|
||||||
f2683 = tgraph.dataframes['Struct2683']
|
suffixes=("_4055", "_2683"), validate="1:m")
|
||||||
|
|
||||||
# f4963 -> f6299
|
|
||||||
m_f4963_f6299 = pd.merge(
|
|
||||||
f4963,
|
|
||||||
f6299,
|
|
||||||
how="inner",
|
|
||||||
on=None,
|
|
||||||
left_on='region',
|
|
||||||
right_on='struct_id',
|
|
||||||
left_index=False,
|
|
||||||
right_index=False,
|
|
||||||
sort=True,
|
|
||||||
suffixes=("_f4963", "_f6299"),
|
|
||||||
copy=True,
|
|
||||||
indicator=False,
|
|
||||||
validate="1:m",
|
|
||||||
)
|
|
||||||
# m_f4963_f6299 -> f2685
|
|
||||||
m_f4963_f6299_f2685 = pd.merge(
|
|
||||||
m_f4963_f6299,
|
|
||||||
f2685,
|
|
||||||
how="inner",
|
|
||||||
on=None,
|
|
||||||
left_on='artifactLocation',
|
|
||||||
right_on='struct_id',
|
|
||||||
left_index=False,
|
|
||||||
right_index=False,
|
|
||||||
sort=True,
|
|
||||||
suffixes=("_m_f4963_f6299", "_f2685"),
|
|
||||||
copy=True,
|
|
||||||
indicator=False,
|
|
||||||
validate="1:m",
|
|
||||||
)
|
)
|
||||||
|
#
|
||||||
|
# As expected from the above note
|
||||||
|
#
|
||||||
|
# Note that this IGNORES the path
|
||||||
|
# - .results > .[] > .relatedLocations > .[] > .physicalLocation > .text
|
||||||
|
#
|
||||||
|
# we have no text entries that table:
|
||||||
|
#
|
||||||
|
# In [88]: d1[d1.text_2683 != '']
|
||||||
|
# Out[88]:
|
||||||
|
# Empty DataFrame
|
||||||
|
|
||||||
# f2683 -> m_f4963_f6299_f2685
|
#
|
||||||
m_f2683_f4963_f6299_f2685 = pd.merge(
|
# Reproduce ALL `file:line:col:line:col: message` entries as a table
|
||||||
f2683,
|
#
|
||||||
m_f4963_f6299_f2685,
|
d2 = (d1[['uri', 'startLine', 'startColumn', 'endLine', 'endColumn', 'text_4055']]
|
||||||
how="inner",
|
.rename({'text_4055': 'message'}, axis='columns'))
|
||||||
on=None,
|
|
||||||
left_on='physicalLocation',
|
|
||||||
right_on='struct_id_f4963',
|
|
||||||
left_index=False,
|
|
||||||
right_index=False,
|
|
||||||
sort=True,
|
|
||||||
suffixes=("_f2683", "_m_f4963_f6299_f2685"),
|
|
||||||
copy=True,
|
|
||||||
indicator=False,
|
|
||||||
validate="1:m",
|
|
||||||
)
|
|
||||||
|
|
||||||
# m_f2683_f4963_f6299_f2685 -> f2774
|
|
||||||
m_f2683_f4963_f6299_f2685_f2774 = pd.merge(
|
|
||||||
m_f2683_f4963_f6299_f2685,
|
|
||||||
f2774,
|
|
||||||
how="inner",
|
|
||||||
on=None,
|
|
||||||
left_on='message',
|
|
||||||
right_on='struct_id',
|
|
||||||
left_index=False,
|
|
||||||
right_index=False,
|
|
||||||
sort=True,
|
|
||||||
suffixes=("_m_f2683_f4963_f6299_f2685", "_f2774"),
|
|
||||||
copy=True,
|
|
||||||
indicator=False,
|
|
||||||
validate="1:m",
|
|
||||||
)
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Remove indexing columns. Note: each row corresponds to the fields of an
|
# Write output
|
||||||
# original table.
|
|
||||||
#
|
#
|
||||||
qdf = m_f2683_f4963_f6299_f2685_f2774[
|
if args.output_format == 'csv':
|
||||||
['id', 'message', 'physicalLocation',
|
d2.to_csv(sys.stdout, index_label='index')
|
||||||
'artifactLocation', 'region',
|
|
||||||
'endColumn', 'endLine', 'startColumn', 'startLine',
|
|
||||||
'index', 'uri', 'uriBaseId',
|
|
||||||
'text']]
|
|
||||||
|
|
||||||
qdf[qdf.uri == "static/js/fileuploader.js"]
|
|
||||||
qdf[qdf.text.str.contains("Unused variable size")]
|
|
||||||
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if args.dot_output:
|
|
||||||
signature._signature(args, sarif_struct, context)
|
|
||||||
struct_graph = [(typedef, sig) for sig, typedef in context.sig_to_typedef.items()]
|
|
||||||
signature.write_header(sys.stdout)
|
|
||||||
for typedef, sig in struct_graph:
|
|
||||||
signature.write_node(sys.stdout, typedef, sig)
|
|
||||||
for typedef, sig in struct_graph:
|
|
||||||
signature.write_edges(args, sys.stdout, typedef, sig)
|
|
||||||
signature.write_footer(sys.stdout)
|
|
||||||
|
|
||||||
elif args.typedef_signatures:
|
|
||||||
signature._signature(args, sarif_struct, context)
|
|
||||||
struct_graph = dict((typedef, sig) for sig,typedef in context.sig_to_typedef.items())
|
|
||||||
pprint(struct_graph, sys.stdout, indent=4)
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
pprint(signature._signature(args, sarif_struct, context), sys.stdout, indent=2)
|
sys.stderr.write("unknown output format")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|||||||
BIN
notes/typegraph.pdf
Normal file
BIN
notes/typegraph.pdf
Normal file
Binary file not shown.
Reference in New Issue
Block a user