mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 01:13:03 +01:00
sarif-extract-multi: extract combined tables from multiple sarif files
This command introduces a new tree structure that pulls in a collection
of sarif files. In yaml format, an example is
- creation_date: '2021-12-09' # Repository creation date
primary_language: javascript # By lines of code
project_name: treeio/treeio # Repo name-short name
query_commit_id: fa9571646c # Commit id for custom (non-library) queries
sarif_content: {} # The sarif content will be attached here
sarif_file_name: 2021-12-09/results.sarif # Path to sarif file
scan_start_date: '2021-12-09' # Beginning date/time of scan
scan_stop_date: '2021-12-10' # End date/time of scan
tool_name: codeql
tool_version: v1.27
- creation_date: '2022-02-25'
primary_language: javascript
...
At run time,
cd ~/local/sarif-cli/data/treeio
sarif-extract-multi multi-sarif-01.json test-multi-table
will load the specified sarif files and put them in place of
`sarif_content`, then build tables against the new signature found in
sarif_cli/signature_multi.py, and merge those into 6 larger tables. The
exported tables are
artifacts.csv path-problem.csv project.csv
codeflows.csv problem.csv related-locations.csv
and they have join keys for further operations.
The new typegraph is rendered in
notes/typegraph-multi.pdf
using the instructions in
sarif_cli/signature_multi.py
This commit is contained in:
committed by
=Michael Hohn
parent
9c151e295b
commit
0f070a6ae4
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1 +1,2 @@
|
||||
*.sarif filter=lfs diff=lfs merge=lfs -text
|
||||
*.pdf filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
88
bin/sarif-extract-multi
Executable file
88
bin/sarif-extract-multi
Executable file
@@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env python
|
||||
""" Extract data from multiple sarif files in table form.
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import pathlib
|
||||
from sarif_cli import signature, signature_multi
|
||||
from sarif_cli import typegraph
|
||||
import sarif_cli.table_joins as tj
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
import pandas as pd
|
||||
|
||||
#
|
||||
# Start processing
|
||||
#
|
||||
parser = argparse.ArgumentParser(description='Read a collection of sarif files and produce tabular output.')
|
||||
parser.add_argument('file', metavar='sarif-files.json', type=str,
|
||||
help="json file containing the metadata array. Use - for stdin. ")
|
||||
parser.add_argument('outdir', metavar='output-dir', type=str, help='output directory')
|
||||
parser.add_argument('-c', '--combine-only', action="store_true",
|
||||
help='Read the referenced input file(s) and write the combined structure to stdout')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Load meta info
|
||||
with open(args.file, 'r') if args.file != '-' else sys.stdin as fp:
|
||||
meta_struct = json.load(fp)
|
||||
|
||||
# Attach referenced files
|
||||
def load(fname):
|
||||
with open(fname, 'rb') as fp:
|
||||
content = json.load(fp)
|
||||
return content
|
||||
|
||||
for sarif_meta in meta_struct:
|
||||
sarif_meta['sarif_content'] = load(sarif_meta['sarif_file_name'])
|
||||
|
||||
# Only output composite?
|
||||
if args.combine_only:
|
||||
json.dump(meta_struct, sys.stdout, indent=4)
|
||||
sys.exit(0)
|
||||
#
|
||||
# Preprocess raw SARIF to get smaller signature
|
||||
#
|
||||
context = signature.Context(
|
||||
{
|
||||
"string" : "String",
|
||||
"int" : "Int",
|
||||
"bool" : "Bool"
|
||||
}
|
||||
)
|
||||
meta_struct = signature.fillsig(args, meta_struct, context)
|
||||
#
|
||||
# Use reference type graph (signature) to traverse sarif and attach values to tables
|
||||
#
|
||||
tgraph = typegraph.Typegraph(signature_multi.struct_graph_2022_03_08)
|
||||
typegraph.destructure(tgraph, signature_multi.start_node_2022_03_08, meta_struct)
|
||||
#
|
||||
# Form output tables
|
||||
#
|
||||
typegraph.attach_tables(tgraph)
|
||||
#
|
||||
# Form dataframes originally introduced by sarif-extract-tables
|
||||
#
|
||||
sf_2683 = tj.joins_for_sf_2683(tgraph)
|
||||
kind_problem = tj.joins_for_problem(tgraph, sf_2683)
|
||||
kind_pathproblem = tj.joins_for_path_problem(tgraph, sf_2683)
|
||||
codeflows_9799 = tj.joins_for_codeflows(tgraph, sf_2683)
|
||||
related_locations = tj.joins_for_relatedLocations(tgraph, sf_2683)
|
||||
#
|
||||
# Form the new dataframes
|
||||
#
|
||||
project_df = tj.joins_for_project(tgraph)
|
||||
artifacts_df = tj.joins_for_artifacts(tgraph)
|
||||
#
|
||||
# Write output
|
||||
#
|
||||
p = pathlib.Path(args.outdir)
|
||||
p.mkdir(exist_ok=True)
|
||||
def write(path, frame):
|
||||
with p.joinpath(path).open(mode='wb') as fh:
|
||||
frame.to_csv(fh, index_label='index')
|
||||
write('problem.csv', kind_problem)
|
||||
write('path-problem.csv', kind_pathproblem)
|
||||
write('codeflows.csv', codeflows_9799)
|
||||
write('related-locations.csv', related_locations)
|
||||
write('project.csv', project_df)
|
||||
write('artifacts.csv', artifacts_df)
|
||||
@@ -1,10 +1,21 @@
|
||||
#!/usr/bin/env python
|
||||
""" Extract data from sarif files in table form.
|
||||
|
||||
These particular table joins create tables matching the content of
|
||||
./sarif-results-summary
|
||||
|
||||
Return tables providing the `problem`, `path-problem` and `relatedLocations`
|
||||
information.
|
||||
|
||||
The `problem` and `path-problem` entries provide that information; the
|
||||
`relatedLocations` table provides the details when multiple results are present
|
||||
for either.
|
||||
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import pathlib
|
||||
from sarif_cli import signature
|
||||
from sarif_cli import signature, signature_single
|
||||
from sarif_cli import typegraph
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
@@ -43,8 +54,8 @@ sarif_struct = signature.fillsig(args, sarif_struct, context)
|
||||
#
|
||||
# Use reference type graph (signature) to traverse sarif and attach values to tables
|
||||
#
|
||||
tgraph = typegraph.Typegraph(typegraph.struct_graph_2022_02_01)
|
||||
typegraph.destructure(tgraph, typegraph.start_node_2022_02_01, sarif_struct)
|
||||
tgraph = typegraph.Typegraph(signature_single.struct_graph_2022_02_01)
|
||||
typegraph.destructure(tgraph, signature_single.start_node_2022_02_01, sarif_struct)
|
||||
|
||||
#
|
||||
# Form output tables
|
||||
|
||||
25131
data/treeio/2021-12-09/results.yaml
Normal file
25131
data/treeio/2021-12-09/results.yaml
Normal file
File diff suppressed because it is too large
Load Diff
30785
data/treeio/2022-02-25/results.sarif
Normal file
30785
data/treeio/2022-02-25/results.sarif
Normal file
File diff suppressed because it is too large
Load Diff
22180
data/treeio/2022-02-25/results.yaml
Normal file
22180
data/treeio/2022-02-25/results.yaml
Normal file
File diff suppressed because it is too large
Load Diff
26
data/treeio/multi-sarif-01.json
Normal file
26
data/treeio/multi-sarif-01.json
Normal file
@@ -0,0 +1,26 @@
|
||||
[
|
||||
{
|
||||
"creation_date": "2021-12-09",
|
||||
"primary_language": "javascript",
|
||||
"project_name": "treeio/treeio",
|
||||
"query_commit_id": "fa9571646c",
|
||||
"sarif_content": {},
|
||||
"sarif_file_name": "2021-12-09/results.sarif",
|
||||
"scan_start_date": "2021-12-09",
|
||||
"scan_stop_date": "2021-12-10",
|
||||
"tool_name": "codeql",
|
||||
"tool_version": "v1.27"
|
||||
},
|
||||
{
|
||||
"creation_date": "2022-02-25",
|
||||
"primary_language": "javascript",
|
||||
"project_name": "treeio/treeio",
|
||||
"query_commit_id": "fa9571646c",
|
||||
"sarif_content": {},
|
||||
"sarif_file_name": "2022-02-25/results.sarif",
|
||||
"scan_start_date": "2022-02-25",
|
||||
"scan_stop_date": "2022-02-26",
|
||||
"tool_name": "codeql",
|
||||
"tool_version": "v1.29"
|
||||
}
|
||||
]
|
||||
21
data/treeio/multi-sarif-01.yaml
Normal file
21
data/treeio/multi-sarif-01.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
- creation_date: '2021-12-09' # Repository creation date
|
||||
primary_language: javascript # By lines of code
|
||||
project_name: treeio/treeio # Repo name-short name
|
||||
query_commit_id: fa9571646c # Commit id for custom (non-library) queries
|
||||
sarif_content: {} # The sarif content will be attached here
|
||||
sarif_file_name: 2021-12-09/results.sarif # Path to sarif file
|
||||
scan_start_date: '2021-12-09' # Beginning date/time of scan
|
||||
scan_stop_date: '2021-12-10' # End date/time of scan
|
||||
tool_name: codeql
|
||||
tool_version: v1.27
|
||||
|
||||
- creation_date: '2022-02-25'
|
||||
primary_language: javascript
|
||||
project_name: treeio/treeio
|
||||
query_commit_id: fa9571646c
|
||||
sarif_content: {}
|
||||
sarif_file_name: 2022-02-25/results.sarif
|
||||
scan_start_date: '2022-02-25'
|
||||
scan_stop_date: '2022-02-26'
|
||||
tool_name: codeql
|
||||
tool_version: v1.29
|
||||
24921
data/treeio/results.yaml
24921
data/treeio/results.yaml
File diff suppressed because it is too large
Load Diff
BIN
notes/typegraph-multi.pdf
Normal file
BIN
notes/typegraph-multi.pdf
Normal file
Binary file not shown.
@@ -4,7 +4,7 @@ These functions convert a SARIF (or any json structure) to its signature, with v
|
||||
See sarif-to-dot for options and examples.
|
||||
"""
|
||||
from dataclasses import dataclass
|
||||
import sarif_cli.traverse as traverse
|
||||
from . import traverse
|
||||
import zlib
|
||||
|
||||
#
|
||||
|
||||
148
sarif_cli/signature_multi.py
Normal file
148
sarif_cli/signature_multi.py
Normal file
@@ -0,0 +1,148 @@
|
||||
""" The signature for a multi-sarif result file
|
||||
|
||||
Produced by
|
||||
|
||||
cd sarif-cli/data/treeio
|
||||
sarif-extract-multi -c multi-sarif-01.json none | sarif-to-dot -utf -
|
||||
|
||||
with some arrays manually sorted so the the signature with more fields comes first. The case
|
||||
('Array6343', ('array', (1, 'Struct9699'), (0, 'Struct4055'))), # MANUALLY SORTED
|
||||
is marked below.
|
||||
|
||||
Also, this struct should be (and is) identical to struct_graph_2022_02_01 in the
|
||||
leading entries, but there are two extras.
|
||||
|
||||
To get a map of this type graph, use
|
||||
|
||||
cd sarif-cli/data/treeio
|
||||
sarif-extract-multi -c multi-sarif-01.json none | \
|
||||
sarif-to-dot -u -t -f -n -d - | dot -Tpdf > typegraph-multi.pdf
|
||||
|
||||
"""
|
||||
|
||||
#
|
||||
# The starting node is the leftmost node in ../notes/typegraph-multi.pdf
|
||||
#
|
||||
start_node_2022_03_08 = 'Array6785'
|
||||
|
||||
struct_graph_2022_03_08 = (
|
||||
[ ('String', 'string'),
|
||||
('Int', 'int'),
|
||||
('Bool', 'bool'),
|
||||
( 'Struct2685',
|
||||
( 'struct',
|
||||
('index', 'Int'),
|
||||
('uri', 'String'),
|
||||
('uriBaseId', 'String'))),
|
||||
('Struct5277', ('struct', ('location', 'Struct2685'))),
|
||||
('Array4640', ('array', (0, 'Struct5277'))),
|
||||
('Array7069', ('array', (0, 'String'))),
|
||||
( 'Struct9543',
|
||||
( 'struct',
|
||||
('semmle.formatSpecifier', 'String'),
|
||||
('semmle.sourceLanguage', 'String'))),
|
||||
('Struct2774', ('struct', ('text', 'String'))),
|
||||
( 'Struct6299',
|
||||
( 'struct',
|
||||
('endColumn', 'Int'),
|
||||
('endLine', 'Int'),
|
||||
('startColumn', 'Int'),
|
||||
('startLine', 'Int'))),
|
||||
( 'Struct4963',
|
||||
( 'struct',
|
||||
('artifactLocation', 'Struct2685'),
|
||||
('region', 'Struct6299'))),
|
||||
( 'Struct2683',
|
||||
( 'struct',
|
||||
('id', 'Int'),
|
||||
('message', 'Struct2774'),
|
||||
('physicalLocation', 'Struct4963'))),
|
||||
('Array0350', ('array', (0, 'Struct2683'))),
|
||||
( 'Struct4199',
|
||||
( 'struct',
|
||||
('primaryLocationLineHash', 'String'),
|
||||
('primaryLocationStartColumnFingerprint', 'String'))),
|
||||
('Struct3942', ('struct', ('id', 'String'), ('index', 'Int'))),
|
||||
( 'Struct4055',
|
||||
( 'struct',
|
||||
('locations', 'Array0350'),
|
||||
('message', 'Struct2774'),
|
||||
('partialFingerprints', 'Struct4199'),
|
||||
('relatedLocations', 'Array0350'),
|
||||
('rule', 'Struct3942'),
|
||||
('ruleId', 'String'),
|
||||
('ruleIndex', 'Int'))),
|
||||
('Struct0987', ('struct', ('location', 'Struct2683'))),
|
||||
('Array1075', ('array', (0, 'Struct0987'))),
|
||||
('Struct4194', ('struct', ('locations', 'Array1075'))),
|
||||
('Array1597', ('array', (0, 'Struct4194'))),
|
||||
('Struct7122', ('struct', ('threadFlows', 'Array1597'))),
|
||||
('Array9799', ('array', (0, 'Struct7122'))),
|
||||
( 'Struct9699',
|
||||
( 'struct',
|
||||
('codeFlows', 'Array9799'),
|
||||
('locations', 'Array0350'),
|
||||
('message', 'Struct2774'),
|
||||
('partialFingerprints', 'Struct4199'),
|
||||
('relatedLocations', 'Array0350'),
|
||||
('rule', 'Struct3942'),
|
||||
('ruleId', 'String'),
|
||||
('ruleIndex', 'Int'))),
|
||||
('Array6343', ('array', (1, 'Struct9699'), (0, 'Struct4055'))), # MANUALLY SORTED
|
||||
('Struct8581', ('struct', ('enabled', 'Bool'), ('level', 'String'))),
|
||||
( 'Struct7849',
|
||||
( 'struct',
|
||||
('kind', 'String'),
|
||||
('precision', 'String'),
|
||||
('security-severity', 'String'),
|
||||
('severity', 'String'),
|
||||
('sub-severity', 'String'),
|
||||
('tags', 'Array7069'))),
|
||||
( 'Struct6818',
|
||||
( 'struct',
|
||||
('defaultConfiguration', 'Struct8581'),
|
||||
('fullDescription', 'Struct2774'),
|
||||
('id', 'String'),
|
||||
('name', 'String'),
|
||||
('properties', 'Struct7849'),
|
||||
('shortDescription', 'Struct2774'))),
|
||||
('Array8754', ('array', (0, 'Struct6818'))),
|
||||
( 'Struct7820',
|
||||
( 'struct',
|
||||
('name', 'String'),
|
||||
('organization', 'String'),
|
||||
('rules', 'Array8754'),
|
||||
('version', 'String'))),
|
||||
('Struct8972', ('struct', ('driver', 'Struct7820'))),
|
||||
( 'Struct3081',
|
||||
('struct', ('repositoryUri', 'String'), ('revisionId', 'String'))),
|
||||
('Array5511', ('array', (0, 'Struct3081'))),
|
||||
( 'Struct3388',
|
||||
( 'struct',
|
||||
('artifacts', 'Array4640'),
|
||||
('columnKind', 'String'),
|
||||
('newlineSequences', 'Array7069'),
|
||||
('properties', 'Struct9543'),
|
||||
('results', 'Array6343'),
|
||||
('tool', 'Struct8972'),
|
||||
('versionControlProvenance', 'Array5511'))),
|
||||
('Array0177', ('array', (0, 'Struct3388'))),
|
||||
( 'Struct6787',
|
||||
( 'struct',
|
||||
('$schema', 'String'),
|
||||
('runs', 'Array0177'),
|
||||
('version', 'String'))), # Up to here identical to struct_graph_2022_02_01
|
||||
( 'Struct3739',
|
||||
( 'struct',
|
||||
('creation_date', 'String'),
|
||||
('primary_language', 'String'),
|
||||
('project_name', 'String'),
|
||||
('query_commit_id', 'String'),
|
||||
('sarif_content', 'Struct6787'),
|
||||
('sarif_file_name', 'String'),
|
||||
('scan_start_date', 'String'),
|
||||
('scan_stop_date', 'String'),
|
||||
('tool_name', 'String'),
|
||||
('tool_version', 'String'))),
|
||||
('Array6785', ('array', (0, 'Struct3739')))]
|
||||
)
|
||||
125
sarif_cli/signature_single.py
Normal file
125
sarif_cli/signature_single.py
Normal file
@@ -0,0 +1,125 @@
|
||||
""" The signature for a single sarif file
|
||||
|
||||
Produced by
|
||||
|
||||
sarif-to-dot -u -t -f 2021-12-09/results.sarif
|
||||
|
||||
with some arrays manually sorted so the the signature with more fields comes first. The case
|
||||
('Array6343', ('array', (1, 'Struct9699'), (0, 'Struct4055'))), # MANUALLY SORTED
|
||||
is marked below
|
||||
"""
|
||||
|
||||
#
|
||||
# The starting node the leftmost node in ../notes/typegraph.pdf
|
||||
#
|
||||
start_node_2022_02_01 = 'Struct6787'
|
||||
|
||||
struct_graph_2022_02_01 = (
|
||||
[ ('String', 'string'),
|
||||
('Int', 'int'),
|
||||
('Bool', 'bool'),
|
||||
( 'Struct2685',
|
||||
( 'struct',
|
||||
('index', 'Int'),
|
||||
('uri', 'String'),
|
||||
('uriBaseId', 'String'))),
|
||||
('Struct5277', ('struct', ('location', 'Struct2685'))),
|
||||
('Array4640', ('array', (0, 'Struct5277'))),
|
||||
('Array7069', ('array', (0, 'String'))),
|
||||
( 'Struct9543',
|
||||
( 'struct',
|
||||
('semmle.formatSpecifier', 'String'),
|
||||
('semmle.sourceLanguage', 'String'))),
|
||||
('Struct2774', ('struct', ('text', 'String'))),
|
||||
( 'Struct6299',
|
||||
( 'struct',
|
||||
('endColumn', 'Int'),
|
||||
('endLine', 'Int'),
|
||||
('startColumn', 'Int'),
|
||||
('startLine', 'Int'))),
|
||||
( 'Struct4963',
|
||||
( 'struct',
|
||||
('artifactLocation', 'Struct2685'),
|
||||
('region', 'Struct6299'))),
|
||||
( 'Struct2683',
|
||||
( 'struct',
|
||||
('id', 'Int'),
|
||||
('message', 'Struct2774'),
|
||||
('physicalLocation', 'Struct4963'))),
|
||||
('Array0350', ('array', (0, 'Struct2683'))),
|
||||
( 'Struct4199',
|
||||
( 'struct',
|
||||
('primaryLocationLineHash', 'String'),
|
||||
('primaryLocationStartColumnFingerprint', 'String'))),
|
||||
('Struct3942', ('struct', ('id', 'String'), ('index', 'Int'))),
|
||||
( 'Struct4055',
|
||||
( 'struct',
|
||||
('locations', 'Array0350'),
|
||||
('message', 'Struct2774'),
|
||||
('partialFingerprints', 'Struct4199'),
|
||||
('relatedLocations', 'Array0350'),
|
||||
('rule', 'Struct3942'),
|
||||
('ruleId', 'String'),
|
||||
('ruleIndex', 'Int'))),
|
||||
('Struct0987', ('struct', ('location', 'Struct2683'))),
|
||||
('Array1075', ('array', (0, 'Struct0987'))),
|
||||
('Struct4194', ('struct', ('locations', 'Array1075'))),
|
||||
('Array1597', ('array', (0, 'Struct4194'))),
|
||||
('Struct7122', ('struct', ('threadFlows', 'Array1597'))),
|
||||
('Array9799', ('array', (0, 'Struct7122'))),
|
||||
( 'Struct9699',
|
||||
( 'struct',
|
||||
('codeFlows', 'Array9799'),
|
||||
('locations', 'Array0350'),
|
||||
('message', 'Struct2774'),
|
||||
('partialFingerprints', 'Struct4199'),
|
||||
('relatedLocations', 'Array0350'),
|
||||
('rule', 'Struct3942'),
|
||||
('ruleId', 'String'),
|
||||
('ruleIndex', 'Int'))),
|
||||
('Array6343', ('array', (1, 'Struct9699'), (0, 'Struct4055'))), # MANUALLY SORTED
|
||||
('Struct8581', ('struct', ('enabled', 'Bool'), ('level', 'String'))),
|
||||
( 'Struct7849',
|
||||
( 'struct',
|
||||
('kind', 'String'),
|
||||
('precision', 'String'),
|
||||
('security-severity', 'String'),
|
||||
('severity', 'String'),
|
||||
('sub-severity', 'String'),
|
||||
('tags', 'Array7069'))),
|
||||
( 'Struct6818',
|
||||
( 'struct',
|
||||
('defaultConfiguration', 'Struct8581'),
|
||||
('fullDescription', 'Struct2774'),
|
||||
('id', 'String'),
|
||||
('name', 'String'),
|
||||
('properties', 'Struct7849'),
|
||||
('shortDescription', 'Struct2774'))),
|
||||
('Array8754', ('array', (0, 'Struct6818'))),
|
||||
( 'Struct7820',
|
||||
( 'struct',
|
||||
('name', 'String'),
|
||||
('organization', 'String'),
|
||||
('rules', 'Array8754'),
|
||||
('version', 'String'))),
|
||||
('Struct8972', ('struct', ('driver', 'Struct7820'))),
|
||||
( 'Struct3081',
|
||||
('struct', ('repositoryUri', 'String'), ('revisionId', 'String'))),
|
||||
('Array5511', ('array', (0, 'Struct3081'))),
|
||||
( 'Struct3388',
|
||||
( 'struct',
|
||||
('artifacts', 'Array4640'),
|
||||
('columnKind', 'String'),
|
||||
('newlineSequences', 'Array7069'),
|
||||
('properties', 'Struct9543'),
|
||||
('results', 'Array6343'),
|
||||
('tool', 'Struct8972'),
|
||||
('versionControlProvenance', 'Array5511'))),
|
||||
('Array0177', ('array', (0, 'Struct3388'))),
|
||||
( 'Struct6787',
|
||||
( 'struct',
|
||||
('$schema', 'String'),
|
||||
('runs', 'Array0177'),
|
||||
('version', 'String')))]
|
||||
)
|
||||
|
||||
316
sarif_cli/table_joins.py
Normal file
316
sarif_cli/table_joins.py
Normal file
@@ -0,0 +1,316 @@
|
||||
""" Collection of joins for the base tables provided by typegraph.attach_tables()
|
||||
|
||||
The `problem` and `path-problem` entries provide that information; the
|
||||
`relatedLocations` table provides the details when multiple results are
|
||||
present for either. `project` is the high-level overview; `artifacts`
|
||||
provides those for the other tables.
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
def joins_for_sf_2683(tgraph):
|
||||
"""
|
||||
Join all the tables used by 2683's right side into one.
|
||||
"""
|
||||
# Access convenience functions
|
||||
sf = lambda num: tgraph.dataframes['Struct' + str(num)]
|
||||
af = lambda num: tgraph.dataframes['Array' + str(num)]
|
||||
#
|
||||
sf_2683 = (
|
||||
#
|
||||
sf(2683)
|
||||
.rename(columns={"struct_id": "struct_id_2683", "id": "id_2683"})
|
||||
#
|
||||
.merge(sf(4963), how="left", left_on='physicalLocation', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'physicalLocation'])
|
||||
#
|
||||
.merge(sf(6299), how="left", left_on='region', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'region'])
|
||||
#
|
||||
.merge(sf(2685), how="left", left_on='artifactLocation', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'artifactLocation'])
|
||||
.rename(columns={"index": "location_index_2685"})
|
||||
#
|
||||
.merge(sf(2774), how="left", left_on='message', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'message'])
|
||||
.rename(columns={"text": "message_text_2683"})
|
||||
#
|
||||
)
|
||||
|
||||
return sf_2683
|
||||
|
||||
def joins_for_problem(tgraph, sf_2683):
|
||||
"""
|
||||
Return table providing the `problem` information.
|
||||
"""
|
||||
# Access convenience functions
|
||||
sf = lambda num: tgraph.dataframes['Struct' + str(num)]
|
||||
af = lambda num: tgraph.dataframes['Array' + str(num)]
|
||||
#
|
||||
# Form the message dataframe (@kind problem) via joins
|
||||
#
|
||||
kind_problem_1 = (
|
||||
af(6343)
|
||||
.rename(columns={"value_index": "results_idx_6343", "array_id": "result_id_6343"})
|
||||
.merge(sf(4055), how="inner", left_on='id_or_value_at_index', right_on='struct_id',
|
||||
validate="1:m")
|
||||
.drop(columns=['type_at_index', 'id_or_value_at_index', 'struct_id'])
|
||||
.rename(columns={"message": "result_message_4055",
|
||||
"relatedLocations": "relatedLocations_id"})
|
||||
# locations
|
||||
.merge(af('0350'), how="left", left_on='locations', right_on='array_id', validate="1:m")
|
||||
.drop(columns=['locations', 'array_id', 'type_at_index'])
|
||||
#
|
||||
.merge(sf_2683, how="left", left_on='id_or_value_at_index', right_on='struct_id_2683', validate="1:m")
|
||||
.drop(columns=['id_or_value_at_index', 'struct_id_2683'])
|
||||
#
|
||||
.merge(sf(2774), how="left", left_on='result_message_4055', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'result_message_4055'])
|
||||
.rename(columns={"text": "message_text_4055"})
|
||||
#
|
||||
.merge(sf(4199), how="left", left_on='partialFingerprints', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'partialFingerprints'])
|
||||
#
|
||||
.merge(
|
||||
sf(3942).rename(columns={"id": "rule_id", "index": "rule_index"}),
|
||||
how="left", left_on='rule', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'rule'])
|
||||
#
|
||||
)
|
||||
return kind_problem_1
|
||||
|
||||
def joins_for_codeflows(tgraph, sf_2683):
|
||||
"""
|
||||
Return the table providing the `codeFlows` for a `path-problem table.
|
||||
"""
|
||||
# Access convenience functions
|
||||
sf = lambda num: tgraph.dataframes['Struct' + str(num)]
|
||||
af = lambda num: tgraph.dataframes['Array' + str(num)]
|
||||
#
|
||||
af_9799 = (
|
||||
af(9799).rename(columns={"array_id": "t9799_array_id", "value_index": "t9799_idx"})
|
||||
#
|
||||
.merge(sf(7122), how="left", left_on='id_or_value_at_index', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['id_or_value_at_index', 'struct_id', 'type_at_index'])
|
||||
#
|
||||
.merge(af(1597).rename(columns={"array_id": "t1597_array_id", "value_index": "t1597_idx"}),
|
||||
how="left", left_on='threadFlows', right_on='t1597_array_id', validate="1:m")
|
||||
.drop(columns=['threadFlows', 't1597_array_id', 'type_at_index'])
|
||||
#
|
||||
.merge(sf(4194), how="left", left_on='id_or_value_at_index', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['id_or_value_at_index', 'struct_id'])
|
||||
#
|
||||
.merge(af(1075).rename(columns={"array_id": "t1075_array_id", "value_index": "t1075_idx"}),
|
||||
how="left", left_on='locations', right_on='t1075_array_id', validate="1:m")
|
||||
.drop(columns=['locations', 't1075_array_id', 'type_at_index'])
|
||||
.rename(columns={"t1075_idx": "t1075_locations_idx"})
|
||||
#
|
||||
.merge(sf('0987'), how="left", left_on='id_or_value_at_index', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['id_or_value_at_index', 'struct_id'])
|
||||
#
|
||||
.merge(sf_2683, how="left", left_on='location', right_on='struct_id_2683', validate="1:m")
|
||||
.drop(columns=['location', 'struct_id_2683'])
|
||||
)
|
||||
return af_9799
|
||||
|
||||
def joins_for_path_problem(tgraph, sf_2683):
|
||||
"""
|
||||
Return table providing the `path-problem` information.
|
||||
"""
|
||||
# Access convenience functions
|
||||
sf = lambda num: tgraph.dataframes['Struct' + str(num)]
|
||||
af = lambda num: tgraph.dataframes['Array' + str(num)]
|
||||
#
|
||||
kind_pathproblem_1 = (
|
||||
af(6343)
|
||||
.rename(columns={"value_index": "t6343_result_idx", "array_id": "t6343_result_id"})
|
||||
.merge(sf(9699), how="inner", left_on='id_or_value_at_index', right_on='struct_id',
|
||||
validate="1:m")
|
||||
.rename(columns={"codeFlows" : "t9699_codeFlows",
|
||||
"locations" : "t9699_locations",
|
||||
"message" : "t9699_message",
|
||||
"partialFingerprints" : "t9699_partialFingerprints",
|
||||
"relatedLocations" : "t9699_relatedLocations",
|
||||
"rule" : "t9699_rule",
|
||||
"ruleId" : "t9699_ruleId",
|
||||
"ruleIndex" : "t9699_ruleIndex",
|
||||
})
|
||||
.drop(columns=['id_or_value_at_index', 'struct_id', 'type_at_index'])
|
||||
# 9699.locations
|
||||
.merge(af('0350').rename(columns={"value_index": "t0350_location_idx"}),
|
||||
how="left", left_on='t9699_locations', right_on='array_id', validate="1:m")
|
||||
.drop(columns=['t9699_locations', 'array_id', 'type_at_index'])
|
||||
#
|
||||
.merge(sf_2683, how="left", left_on='id_or_value_at_index', right_on='struct_id_2683', validate="1:m")
|
||||
.drop(columns=['id_or_value_at_index', 'struct_id_2683'])
|
||||
#
|
||||
# # TODO: merge or keep separate?
|
||||
# # 9699.codeFlows
|
||||
# .merge(af_9799, how="left", left_on='t9699_codeFlows', right_on='t9799_array_id', validate="1:m")
|
||||
#
|
||||
# 9699.message
|
||||
.merge(sf(2774), how="left", left_on='t9699_message', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 't9699_message'])
|
||||
.rename(columns={"text": "t9699_message_text"})
|
||||
#
|
||||
# 9699.partialFingerprints
|
||||
.merge(sf(4199), how="left", left_on='t9699_partialFingerprints', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 't9699_partialFingerprints'])
|
||||
#
|
||||
# 9699.relatedLocations -- keep ids
|
||||
#
|
||||
# 9699.rule
|
||||
.merge(
|
||||
sf(3942).rename(columns={"id": "t3942_rule_id", "index": "t3942_rule_idx"}),
|
||||
how="left", left_on='t9699_rule', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 't9699_rule'])
|
||||
)
|
||||
|
||||
# # TODO potential cleanup
|
||||
# # Remove dummy locations previously injected by signature.fillsig
|
||||
# kind_pathproblem_2 = kind_pathproblem_1[kind_pathproblem_1.uri != 'scli-dyys dummy value']
|
||||
# #
|
||||
return kind_pathproblem_1
|
||||
|
||||
def joins_for_relatedLocations(tgraph, sf_2683):
|
||||
"""
|
||||
Return table providing the `relatedLocations` and `locations` information.
|
||||
"""
|
||||
# Access convenience functions
|
||||
sf = lambda num: tgraph.dataframes['Struct' + str(num)]
|
||||
af = lambda num: tgraph.dataframes['Array' + str(num)]
|
||||
#
|
||||
# Form the relatedLocation dataframe via joins, starting from the union of
|
||||
# relatedLocations from `kind problem` (sf(4055)) and `kind path-problem`
|
||||
# (sf(9699)).
|
||||
#
|
||||
related_locations_1 = (
|
||||
pd.concat([sf(4055)[['relatedLocations', 'struct_id']], sf(9699)[['relatedLocations', 'struct_id']]])
|
||||
.merge(af('0350'), how="left", left_on='relatedLocations', right_on='array_id', validate="1:m")
|
||||
.drop(columns=['relatedLocations', 'array_id', 'value_index', 'type_at_index'])
|
||||
#
|
||||
.merge(sf(2683), how="left", left_on='id_or_value_at_index', right_on='struct_id',
|
||||
suffixes=("_4055_9699", "_2683"), validate="1:m")
|
||||
.drop(columns=['struct_id_2683', 'id_or_value_at_index'])
|
||||
#
|
||||
.merge(sf(4963), how="left", left_on='physicalLocation', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'physicalLocation'])
|
||||
#
|
||||
.merge(sf(6299), how="left", left_on='region', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'region'])
|
||||
#
|
||||
.merge(sf(2685), how="left", left_on='artifactLocation', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'artifactLocation'])
|
||||
#
|
||||
.merge(sf(2774), how="left", left_on='message', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['struct_id', 'message'])
|
||||
)
|
||||
|
||||
# Keep columns of interest
|
||||
related_locations_2 = (related_locations_1[['struct_id_4055_9699', 'uri', 'startLine', 'startColumn', 'endLine', 'endColumn', 'text']]
|
||||
.rename({'text': 'message', 'struct_id_4055_9699': 'struct_id'}, axis='columns'))
|
||||
|
||||
# Remove dummy locations previously injected by signature.fillsig
|
||||
related_locations_3 = related_locations_2[related_locations_2.uri != 'scli-dyys dummy value']
|
||||
|
||||
return related_locations_3
|
||||
|
||||
def joins_for_project(tgraph):
|
||||
"""
|
||||
Return table providing the `project` information.
|
||||
"""
|
||||
# Access convenience functions
|
||||
sf = lambda num: tgraph.dataframes['Struct' + str(num)]
|
||||
af = lambda num: tgraph.dataframes['Array' + str(num)]
|
||||
#
|
||||
project_df = (
|
||||
af(6785)
|
||||
#
|
||||
.merge(sf(3739), how="left", left_on='id_or_value_at_index', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['id_or_value_at_index', 'struct_id', 'array_id', 'type_at_index'])
|
||||
#
|
||||
.merge(sf(6787), how="left", left_on='sarif_content', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['sarif_content', 'struct_id'])
|
||||
.rename(columns={"version": "version_6787"})
|
||||
#
|
||||
.merge(af('0177'), how="left", left_on='runs', right_on='array_id',
|
||||
suffixes=("_6785", "_0177"), validate="1:m")
|
||||
.drop(columns=['runs', 'array_id', 'type_at_index'])
|
||||
#
|
||||
.merge(sf(3388), how="left", left_on='id_or_value_at_index', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['id_or_value_at_index', 'struct_id'])
|
||||
#
|
||||
# .merge(af(7069), how="left", left_on='newlineSequences', right_on='array_id',
|
||||
# validate="1:m")
|
||||
# .drop(columns=['newlineSequences', 'array_id', 'type_at_index'])
|
||||
.drop(columns=['newlineSequences'])
|
||||
#
|
||||
.merge(sf(9543), how="left", left_on='properties', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['properties', 'struct_id'])
|
||||
#
|
||||
# tool - driver - rules - defaultConfiguration - ( properties - tags )
|
||||
#
|
||||
.merge(sf(8972), how="left", left_on='tool', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['tool', 'struct_id'])
|
||||
#
|
||||
.merge(sf(7820), how="left", left_on='driver', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['driver', 'struct_id'])
|
||||
.rename(columns={"version": "driver_version_7820", "name": "driver_name_7820"})
|
||||
#
|
||||
.merge(af(8754), how="left", left_on='rules', right_on='array_id', validate="1:m")
|
||||
.drop(columns=['rules', 'array_id', 'type_at_index'])
|
||||
.rename(columns={"value_index": "rule_value_index_8754"}) # rule index
|
||||
#
|
||||
.merge(sf(6818), how="left", left_on='id_or_value_at_index', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['id_or_value_at_index', 'struct_id'])
|
||||
.rename(columns={"id": "rule_id_6818", "name": "rule_name_6818"})
|
||||
#
|
||||
.merge(sf(8581), how="left", left_on='defaultConfiguration', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['defaultConfiguration', 'struct_id'])
|
||||
#
|
||||
.merge(sf(2774), how="left", left_on='fullDescription', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['fullDescription', 'struct_id'])
|
||||
.rename(columns={"text": "rule_fullDescription_6818"})
|
||||
#
|
||||
.merge(sf(2774), how="left", left_on='shortDescription', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['shortDescription', 'struct_id'])
|
||||
.rename(columns={"text": "rule_shortDescription_6818"})
|
||||
#
|
||||
.merge(sf(7849), how="left", left_on='properties', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['properties', 'struct_id'])
|
||||
#
|
||||
.merge(af(7069), how="left", left_on='tags', right_on='array_id', validate="1:m")
|
||||
.drop(columns=['tags', 'array_id', 'type_at_index'])
|
||||
.rename(columns={"value_index": "tag_index_7069", "id_or_value_at_index": "tag_text_7069"})
|
||||
# versionControlProvenance - repositoryUri
|
||||
# The merge with af(8754) replicates versionControlProvenance, no 1:m validation
|
||||
.merge(af(5511), how="left", left_on='versionControlProvenance', right_on='array_id')
|
||||
.drop(columns=['versionControlProvenance', 'array_id', 'type_at_index'])
|
||||
.rename(columns={"value_index": "versionControl_value_index_5511"})
|
||||
#
|
||||
.merge(sf(3081), how="left", left_on='id_or_value_at_index', right_on='struct_id')
|
||||
.drop(columns=['id_or_value_at_index', 'struct_id'])
|
||||
#
|
||||
)
|
||||
return project_df
|
||||
|
||||
def joins_for_artifacts(tgraph):
|
||||
"""
|
||||
Return table providing the `artifacts` information.
|
||||
"""
|
||||
# Access convenience functions
|
||||
sf = lambda num: tgraph.dataframes['Struct' + str(num)]
|
||||
af = lambda num: tgraph.dataframes['Array' + str(num)]
|
||||
#
|
||||
artifacts_df = (
|
||||
af(4640)
|
||||
#
|
||||
.merge(sf(5277), how="left", left_on='id_or_value_at_index', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['id_or_value_at_index', 'struct_id', 'type_at_index'])
|
||||
.rename(columns={"value_index": "artifact_index_4640"})
|
||||
#
|
||||
.merge(sf(2685), how="left", left_on='location', right_on='struct_id', validate="1:m")
|
||||
.drop(columns=['location', 'struct_id'])
|
||||
.rename(columns={"index": "location_index_2685", "uri": "location_uri_2685",
|
||||
"uriBaseId": "location_uriBaseId_2685"})
|
||||
)
|
||||
return artifacts_df
|
||||
@@ -11,124 +11,6 @@ from dataclasses import dataclass
|
||||
from typing import *
|
||||
import pandas as pd
|
||||
|
||||
#
|
||||
# Structure graph from ../../bin/sarif-to-dot -u -t -f results.sarif
|
||||
#
|
||||
struct_graph_2022_02_01 = (
|
||||
[ ('String', 'string'),
|
||||
('Int', 'int'),
|
||||
('Bool', 'bool'),
|
||||
( 'Struct2685',
|
||||
( 'struct',
|
||||
('index', 'Int'),
|
||||
('uri', 'String'),
|
||||
('uriBaseId', 'String'))),
|
||||
('Struct5277', ('struct', ('location', 'Struct2685'))),
|
||||
('Array4640', ('array', (0, 'Struct5277'))),
|
||||
('Array7069', ('array', (0, 'String'))),
|
||||
( 'Struct9543',
|
||||
( 'struct',
|
||||
('semmle.formatSpecifier', 'String'),
|
||||
('semmle.sourceLanguage', 'String'))),
|
||||
('Struct2774', ('struct', ('text', 'String'))),
|
||||
( 'Struct6299',
|
||||
( 'struct',
|
||||
('endColumn', 'Int'),
|
||||
('endLine', 'Int'),
|
||||
('startColumn', 'Int'),
|
||||
('startLine', 'Int'))),
|
||||
( 'Struct4963',
|
||||
( 'struct',
|
||||
('artifactLocation', 'Struct2685'),
|
||||
('region', 'Struct6299'))),
|
||||
( 'Struct2683',
|
||||
( 'struct',
|
||||
('id', 'Int'),
|
||||
('message', 'Struct2774'),
|
||||
('physicalLocation', 'Struct4963'))),
|
||||
('Array0350', ('array', (0, 'Struct2683'))),
|
||||
( 'Struct4199',
|
||||
( 'struct',
|
||||
('primaryLocationLineHash', 'String'),
|
||||
('primaryLocationStartColumnFingerprint', 'String'))),
|
||||
('Struct3942', ('struct', ('id', 'String'), ('index', 'Int'))),
|
||||
( 'Struct4055',
|
||||
( 'struct',
|
||||
('locations', 'Array0350'),
|
||||
('message', 'Struct2774'),
|
||||
('partialFingerprints', 'Struct4199'),
|
||||
('relatedLocations', 'Array0350'),
|
||||
('rule', 'Struct3942'),
|
||||
('ruleId', 'String'),
|
||||
('ruleIndex', 'Int'))),
|
||||
('Struct0987', ('struct', ('location', 'Struct2683'))),
|
||||
('Array1075', ('array', (0, 'Struct0987'))),
|
||||
('Struct4194', ('struct', ('locations', 'Array1075'))),
|
||||
('Array1597', ('array', (0, 'Struct4194'))),
|
||||
('Struct7122', ('struct', ('threadFlows', 'Array1597'))),
|
||||
('Array9799', ('array', (0, 'Struct7122'))),
|
||||
( 'Struct9699',
|
||||
( 'struct',
|
||||
('codeFlows', 'Array9799'),
|
||||
('locations', 'Array0350'),
|
||||
('message', 'Struct2774'),
|
||||
('partialFingerprints', 'Struct4199'),
|
||||
('relatedLocations', 'Array0350'),
|
||||
('rule', 'Struct3942'),
|
||||
('ruleId', 'String'),
|
||||
('ruleIndex', 'Int'))),
|
||||
('Array6343', ('array', (1, 'Struct9699'), (0, 'Struct4055'))), # MANUALLY SORTED
|
||||
('Struct8581', ('struct', ('enabled', 'Bool'), ('level', 'String'))),
|
||||
( 'Struct7849',
|
||||
( 'struct',
|
||||
('kind', 'String'),
|
||||
('precision', 'String'),
|
||||
('security-severity', 'String'),
|
||||
('severity', 'String'),
|
||||
('sub-severity', 'String'),
|
||||
('tags', 'Array7069'))),
|
||||
( 'Struct6818',
|
||||
( 'struct',
|
||||
('defaultConfiguration', 'Struct8581'),
|
||||
('fullDescription', 'Struct2774'),
|
||||
('id', 'String'),
|
||||
('name', 'String'),
|
||||
('properties', 'Struct7849'),
|
||||
('shortDescription', 'Struct2774'))),
|
||||
('Array8754', ('array', (0, 'Struct6818'))),
|
||||
( 'Struct7820',
|
||||
( 'struct',
|
||||
('name', 'String'),
|
||||
('organization', 'String'),
|
||||
('rules', 'Array8754'),
|
||||
('version', 'String'))),
|
||||
('Struct8972', ('struct', ('driver', 'Struct7820'))),
|
||||
( 'Struct3081',
|
||||
('struct', ('repositoryUri', 'String'), ('revisionId', 'String'))),
|
||||
('Array5511', ('array', (0, 'Struct3081'))),
|
||||
( 'Struct3388',
|
||||
( 'struct',
|
||||
('artifacts', 'Array4640'),
|
||||
('columnKind', 'String'),
|
||||
('newlineSequences', 'Array7069'),
|
||||
('properties', 'Struct9543'),
|
||||
('results', 'Array6343'),
|
||||
('tool', 'Struct8972'),
|
||||
('versionControlProvenance', 'Array5511'))),
|
||||
('Array0177', ('array', (0, 'Struct3388'))),
|
||||
( 'Struct6787',
|
||||
( 'struct',
|
||||
('$schema', 'String'),
|
||||
('runs', 'Array0177'),
|
||||
('version', 'String')))]
|
||||
)
|
||||
|
||||
#
|
||||
# The starting node is the typedef with '$schema' in the struct, also the leftmost
|
||||
# node in ../notes/sarif-structure-from-sarif-to-dot.pdf
|
||||
#
|
||||
start_node_2022_02_01 = 'Struct6787'
|
||||
|
||||
#
|
||||
# Utility classes
|
||||
#
|
||||
|
||||
@@ -29,7 +29,7 @@ done
|
||||
# cases covering the different output options. They are intended for manual use
|
||||
# and review.
|
||||
#
|
||||
read -r file srcroot <<< "../data/treeio/results.sarif ../data/treeio/treeio"
|
||||
read -r file srcroot <<< "../data/treeio/2021-12-09/results.sarif ../data/treeio/treeio"
|
||||
|
||||
# All results, minimal output
|
||||
sarif-results-summary $file | less
|
||||
|
||||
11
scripts/table-tests.sh
Normal file
11
scripts/table-tests.sh
Normal file
@@ -0,0 +1,11 @@
|
||||
# -*- sh -*-
|
||||
#
|
||||
# Sanity tests for the table-producing scripts. Should succeed and produce
|
||||
# nothing on stdout/stderr
|
||||
#
|
||||
|
||||
cd ~/local/sarif-cli/data/treeio/2021-12-09
|
||||
sarif-extract-tables results.sarif test-tables
|
||||
|
||||
cd ~/local/sarif-cli/data/treeio
|
||||
sarif-extract-multi multi-sarif-01.json test-multi-table
|
||||
Reference in New Issue
Block a user