mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 17:23:03 +01:00
sarif-extract-tables: interim commit: form tables
Tables are now formed and kept in the Typegraph instance. These will be tested using pandas operations to form one of the previous outputs.
This commit is contained in:
committed by
=Michael Hohn
parent
7a517fa06c
commit
f246f06d4e
@@ -32,12 +32,13 @@
|
||||
Set up the virtual environment and install the packages:
|
||||
# pip freeze > requirements.txt
|
||||
#+BEGIN_SRC sh
|
||||
# Using requirements.txt
|
||||
python3 -m venv .venv
|
||||
. .venv/bin/activate
|
||||
python3 -m pip install -r requirements.txt
|
||||
# Or separately:
|
||||
pip install --upgrade pip
|
||||
pip install ipython pyyaml
|
||||
pip install ipython pyyaml pandas
|
||||
#+END_SRC
|
||||
|
||||
"Install" for local development:
|
||||
|
||||
@@ -3,11 +3,12 @@
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import sarif_cli.signature as S
|
||||
import sarif_cli.typegraph as T
|
||||
from sarif_cli import signature
|
||||
from sarif_cli import typegraph
|
||||
import sys
|
||||
from pprint import pprint
|
||||
from collections import defaultdict
|
||||
import pandas as pd
|
||||
|
||||
#
|
||||
# Start processing
|
||||
@@ -28,104 +29,173 @@ with open(args.file, 'r') if args.file != '-' else sys.stdin as fp:
|
||||
#
|
||||
# Preprocess raw SARIF to get smaller signature
|
||||
#
|
||||
context = S.Context(
|
||||
context = signature.Context(
|
||||
{
|
||||
"string" : "String",
|
||||
"int" : "Int",
|
||||
"bool" : "Bool"
|
||||
}
|
||||
)
|
||||
sarif_struct = S.fillsig(args, sarif_struct, context)
|
||||
sarif_struct = signature.fillsig(args, sarif_struct, context)
|
||||
|
||||
#
|
||||
# Use reference type graph (signature) to traverse sarif and attach values to tables
|
||||
#
|
||||
tgraph = T.Typegraph(T.struct_graph_2022_02_01)
|
||||
T.destructure(tgraph, T.start_node_2022_02_01, sarif_struct)
|
||||
tgraph = typegraph.Typegraph(typegraph.struct_graph_2022_02_01)
|
||||
typegraph.destructure(tgraph, typegraph.start_node_2022_02_01, sarif_struct)
|
||||
|
||||
import IPython
|
||||
IPython.embed(header="""
|
||||
---------------------------------
|
||||
ipython repl for
|
||||
if 0:
|
||||
import IPython
|
||||
IPython.embed(header="""
|
||||
---------------------------------
|
||||
ipython repl for
|
||||
|
||||
tgraph = T.Typegraph(T.struct_graph_2022_02_01)
|
||||
tgraph = typegraph.Typegraph(typegraph.struct_graph_2022_02_01)
|
||||
|
||||
---------------------------------
|
||||
Sanity checks:
|
||||
In [4]: tgraph.fields
|
||||
Out[4]:
|
||||
{'String': None,
|
||||
'Int': None,
|
||||
'Bool': None,
|
||||
...
|
||||
}
|
||||
In [6]: tgraph.instances['String']
|
||||
Out[6]: []
|
||||
---------------------------------
|
||||
Sanity checks:
|
||||
In [4]: tgraph.fields
|
||||
Out[4]:
|
||||
{'String': None,
|
||||
'Int': None,
|
||||
'Bool': None,
|
||||
...
|
||||
}
|
||||
In [6]: tgraph.instances['String']
|
||||
Out[6]: []
|
||||
|
||||
In [7]: tgraph.instances['Int']
|
||||
Out[7]: []
|
||||
In [7]: tgraph.instances['Int']
|
||||
Out[7]: []
|
||||
|
||||
In [8]: tgraph.instances['Bool']
|
||||
Out[8]: []
|
||||
In [8]: tgraph.instances['Bool']
|
||||
Out[8]: []
|
||||
|
||||
Select value checks:
|
||||
In [9]: tgraph.instances['Struct6787']
|
||||
Out[9]:
|
||||
[(4358601472,
|
||||
'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
|
||||
4362190016,
|
||||
'2.1.0')]
|
||||
Select value checks:
|
||||
In [9]: tgraph.instances['Struct6787']
|
||||
Out[9]:
|
||||
[(4358601472,
|
||||
'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
|
||||
4362190016,
|
||||
'2.1.0')]
|
||||
|
||||
In [10]: tgraph.fields['Struct6787']
|
||||
Out[10]: ['$schema', 'runs', 'version']
|
||||
In [10]: tgraph.fields['Struct6787']
|
||||
Out[10]: ['$schema', 'runs', 'version']
|
||||
|
||||
In [5]: tgraph.instances['Array0177']
|
||||
Out[5]:
|
||||
[(4337396800, 0, 'Struct3388', 4337396928),
|
||||
(4337396800, 1, 'Struct3388', 4337397056)]
|
||||
In [5]: tgraph.instances['Array0177']
|
||||
Out[5]:
|
||||
[(4337396800, 0, 'Struct3388', 4337396928),
|
||||
(4337396800, 1, 'Struct3388', 4337397056)]
|
||||
|
||||
In [12]: tgraph.fields['Array0177']
|
||||
Out[12]: [0]
|
||||
In [12]: tgraph.fields['Array0177']
|
||||
Out[12]: [0]
|
||||
|
||||
In [9]: tgraph.instances['Array7069'][0:5]
|
||||
Out[9]:
|
||||
[(4337397248, 0, 'String', '\r\n'),
|
||||
(4337397248, 1, 'String', '\n'),
|
||||
(4337397248, 2, 'String', '\u2028'),
|
||||
(4337397248, 3, 'String', '\u2029'),
|
||||
(4339863424, 0, 'String', 'maintainability')]
|
||||
In [9]: tgraph.instances['Array7069'][0:5]
|
||||
Out[9]:
|
||||
[(4337397248, 0, 'String', '\r\n'),
|
||||
(4337397248, 1, 'String', '\n'),
|
||||
(4337397248, 2, 'String', '\u2028'),
|
||||
(4337397248, 3, 'String', '\u2029'),
|
||||
(4339863424, 0, 'String', 'maintainability')]
|
||||
|
||||
|
||||
In [10]: tgraph.instances['Struct6299'][:3]
|
||||
Out[10]:
|
||||
[(4315110720, 17, 1214, 13, 1214),
|
||||
(4315111232, -1, -1, 1, -1),
|
||||
(4315124096, 30, 847, 17, 847)]
|
||||
In [10]: tgraph.instances['Struct6299'][:3]
|
||||
Out[10]:
|
||||
[(4315110720, 17, 1214, 13, 1214),
|
||||
(4315111232, -1, -1, 1, -1),
|
||||
(4315124096, 30, 847, 17, 847)]
|
||||
|
||||
In [11]: tgraph.fields['Struct6299']
|
||||
Out[11]: ['endColumn', 'endLine', 'startColumn', 'startLine']
|
||||
|
||||
|
||||
""")
|
||||
In [11]: tgraph.fields['Struct6299']
|
||||
Out[11]: ['endColumn', 'endLine', 'startColumn', 'startLine']
|
||||
""")
|
||||
|
||||
#
|
||||
# Form output tables
|
||||
#
|
||||
typegraph.attach_tables(tgraph)
|
||||
|
||||
import IPython
|
||||
IPython.embed(header="""
|
||||
---------------------------------
|
||||
ipython repl for tables
|
||||
|
||||
---------------------------------
|
||||
|
||||
tgraph.dataframes
|
||||
In [7]: sorted(tgraph.dataframes.keys())
|
||||
Out[7]:
|
||||
['Array0177',
|
||||
'Array0350',
|
||||
'Array1075',...]
|
||||
|
||||
sorted(tgraph.dataframes.keys())
|
||||
tgraph.dataframes['Array0177']
|
||||
tgraph.dataframes['Struct3388']
|
||||
tgraph.signature_graph['Struct3388']
|
||||
|
||||
XX: reproduce the
|
||||
|
||||
file:line:col:line:col: message
|
||||
|
||||
output from
|
||||
|
||||
../../bin/sarif-results-summary results.sarif | less
|
||||
|
||||
as test. Sample:
|
||||
|
||||
RESULT: static/js/fileuploader.js:1214:13:1214:17: Unused variable size.
|
||||
|
||||
Collect typedef/fields via typegraph.pdf:
|
||||
|
||||
static/js/fileuploader.js
|
||||
Struct2685/uri
|
||||
|
||||
In [22]: d1 = tgraph.dataframes['Struct2685']
|
||||
In [24]: d1[d1.uri == "static/js/fileuploader.js"]
|
||||
Out[24]:
|
||||
struct_id index uri uriBaseId
|
||||
0 4856718656 0 static/js/fileuploader.js %SRCROOT%
|
||||
77 4856758336 0 static/js/fileuploader.js %SRCROOT%
|
||||
...
|
||||
|
||||
:1214:13:1214:17:
|
||||
Struct6299/startLine/startColumn/endLine/endColumn
|
||||
|
||||
Unused variable size.
|
||||
Struct2774/message
|
||||
d1 = tgraph.dataframes['Struct2774']
|
||||
In [31]: d1[d1.text.str.contains("Unused variable size")]
|
||||
Out[31]:
|
||||
struct_id text
|
||||
1 4856749504 Unused variable size.
|
||||
103 4856879296 Unused variable size.
|
||||
|
||||
Follow the edges in typegraph.pdf to find joining typedefs and paths.
|
||||
|
||||
Struct4963
|
||||
|
||||
Struct2683
|
||||
|
||||
|
||||
""")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if args.dot_output:
|
||||
S._signature(args, sarif_struct, context)
|
||||
signature._signature(args, sarif_struct, context)
|
||||
struct_graph = [(typedef, sig) for sig, typedef in context.sig_to_typedef.items()]
|
||||
S.write_header(sys.stdout)
|
||||
signature.write_header(sys.stdout)
|
||||
for typedef, sig in struct_graph:
|
||||
S.write_node(sys.stdout, typedef, sig)
|
||||
signature.write_node(sys.stdout, typedef, sig)
|
||||
for typedef, sig in struct_graph:
|
||||
S.write_edges(args, sys.stdout, typedef, sig)
|
||||
S.write_footer(sys.stdout)
|
||||
signature.write_edges(args, sys.stdout, typedef, sig)
|
||||
signature.write_footer(sys.stdout)
|
||||
|
||||
elif args.typedef_signatures:
|
||||
S._signature(args, sarif_struct, context)
|
||||
signature._signature(args, sarif_struct, context)
|
||||
struct_graph = dict((typedef, sig) for sig,typedef in context.sig_to_typedef.items())
|
||||
pprint(struct_graph, sys.stdout, indent=4)
|
||||
|
||||
else:
|
||||
pprint(S._signature(args, sarif_struct, context), sys.stdout, indent=2)
|
||||
pprint(signature._signature(args, sarif_struct, context), sys.stdout, indent=2)
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
"""Operations on the type graph produced by sarif-to-dot -u -t -f
|
||||
"""Operations on the type graph produced by sarif-to-dot -u -t -f
|
||||
|
||||
Also contains some type graph reference values; these may be moved out into
|
||||
To get a map of this type graph, use
|
||||
cd sarif-cli/data/treeio
|
||||
../../bin/sarif-to-dot -u -t -f -n -d results.sarif | dot -Tpdf > typegraph.pdf
|
||||
|
||||
This file also contains some type graph reference values; these may be moved out into
|
||||
separate files at some point.
|
||||
"""
|
||||
from dataclasses import dataclass
|
||||
from typing import *
|
||||
import pandas as pd
|
||||
|
||||
#
|
||||
# Structure graph from ../../bin/sarif-to-dot -u -t -f results.sarif
|
||||
@@ -145,6 +150,7 @@ class Typegraph:
|
||||
signature_graph : Dict[NodeId, Any] # (typedef -> signature) dict
|
||||
instances : Dict[NodeId, List[Tuple]] # (node -> (row list)) dict
|
||||
fields: Dict[NodeId, List] # (node -> (field list)) dict
|
||||
dataframes: Dict[NodeId, Any] # (node -> dataframe) dict
|
||||
|
||||
"""
|
||||
# Given this typedef
|
||||
@@ -195,6 +201,7 @@ class Typegraph:
|
||||
self.signature_graph = dict(signature_graph)
|
||||
self.instances = {}
|
||||
self.fields = {}
|
||||
self.dataframes = {}
|
||||
for typedef, signature in signature_graph:
|
||||
self.instances[typedef] = []
|
||||
self.fields[typedef] = fields(signature)
|
||||
@@ -346,3 +353,19 @@ def _destructure_list(typegraph, node: str, tree: List):
|
||||
# next `signature`
|
||||
if (sigindex, sigtype) == signature[-1]:
|
||||
raise
|
||||
|
||||
#
|
||||
# Form tables from destructured json/sarif
|
||||
#
|
||||
def attach_tables(typegraph):
|
||||
for typedef, valarray in typegraph.instances.items():
|
||||
if typedef.startswith('Array'):
|
||||
# Arrays
|
||||
colheader = ('array_id', 'value_index', 'type_at_index', 'id_or_value_at_index')
|
||||
elif typedef.startswith('Struct'):
|
||||
# Structs
|
||||
colheader = ('struct_id', *typegraph.fields[typedef])
|
||||
else:
|
||||
continue # skip String etc.
|
||||
typegraph.dataframes[typedef] = pd.DataFrame(valarray, columns = colheader)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user