mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 17:23:03 +01:00
Add overview of the base tables derived from multi-sarif input; add rules.csv
The table overview is in the jupyter notebook scripts/multi-table-overview.ipynb and makes use of some formatting customizations to actually get an overview. The initial `projects` table had far too many entries; the `rules` part is now in a separate `rules` table.
This commit is contained in:
committed by
=Michael Hohn
parent
926e083991
commit
b82c620a1e
5
.gitignore
vendored
5
.gitignore
vendored
@@ -16,3 +16,8 @@
|
|||||||
# Temporary files
|
# Temporary files
|
||||||
foo*
|
foo*
|
||||||
tmp*
|
tmp*
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# MacOS index files
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
|||||||
@@ -38,7 +38,7 @@
|
|||||||
python3 -m pip install -r requirements.txt
|
python3 -m pip install -r requirements.txt
|
||||||
# Or separately:
|
# Or separately:
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install ipython pyyaml pandas
|
pip install ipython pyyaml pandas jupyter
|
||||||
#+END_SRC
|
#+END_SRC
|
||||||
|
|
||||||
"Install" for local development:
|
"Install" for local development:
|
||||||
|
|||||||
@@ -6,9 +6,10 @@ import json
|
|||||||
import pathlib
|
import pathlib
|
||||||
from sarif_cli import signature, signature_multi
|
from sarif_cli import signature, signature_multi
|
||||||
from sarif_cli import typegraph
|
from sarif_cli import typegraph
|
||||||
|
from dataclasses import dataclass
|
||||||
import sarif_cli.table_joins as tj
|
import sarif_cli.table_joins as tj
|
||||||
import sys
|
import sys
|
||||||
from collections import defaultdict
|
from collections import UserDict
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
#
|
#
|
||||||
@@ -62,27 +63,43 @@ typegraph.attach_tables(tgraph)
|
|||||||
#
|
#
|
||||||
# Form dataframes originally introduced by sarif-extract-tables
|
# Form dataframes originally introduced by sarif-extract-tables
|
||||||
#
|
#
|
||||||
|
@dataclass
|
||||||
|
class BaseTables:
|
||||||
|
kind_problem : pd.DataFrame
|
||||||
|
kind_pathproblem : pd.DataFrame
|
||||||
|
codeflows : pd.DataFrame
|
||||||
|
relatedLocations : pd.DataFrame
|
||||||
|
project : pd.DataFrame
|
||||||
|
rules : pd.DataFrame
|
||||||
|
artifacts : pd.DataFrame
|
||||||
|
def __init__(self): pass
|
||||||
|
|
||||||
|
bt = BaseTables()
|
||||||
|
|
||||||
sf_2683 = tj.joins_for_sf_2683(tgraph)
|
sf_2683 = tj.joins_for_sf_2683(tgraph)
|
||||||
kind_problem = tj.joins_for_problem(tgraph, sf_2683)
|
bt.kind_problem = tj.joins_for_problem(tgraph, sf_2683)
|
||||||
kind_pathproblem = tj.joins_for_path_problem(tgraph, sf_2683)
|
bt.kind_pathproblem = tj.joins_for_path_problem(tgraph, sf_2683)
|
||||||
codeflows_9799 = tj.joins_for_codeflows(tgraph, sf_2683)
|
bt.codeflows = tj.joins_for_codeflows(tgraph, sf_2683)
|
||||||
related_locations = tj.joins_for_relatedLocations(tgraph, sf_2683)
|
bt.relatedLocations = tj.joins_for_relatedLocations(tgraph, sf_2683)
|
||||||
#
|
#
|
||||||
# Form the new dataframes
|
# Form the new dataframes
|
||||||
#
|
#
|
||||||
project_df = tj.joins_for_project(tgraph)
|
bt.project = tj.joins_for_project(tgraph)
|
||||||
artifacts_df = tj.joins_for_artifacts(tgraph)
|
bt.rules = tj.joins_for_rules(tgraph)
|
||||||
|
bt.artifacts = tj.joins_for_artifacts(tgraph)
|
||||||
#
|
#
|
||||||
# Write output
|
# Write output
|
||||||
#
|
#
|
||||||
p = pathlib.Path(args.outdir)
|
p = pathlib.Path(args.outdir)
|
||||||
p.mkdir(exist_ok=True)
|
p.mkdir(exist_ok=True)
|
||||||
def write(path, frame):
|
def write(path, frame):
|
||||||
with p.joinpath(path).open(mode='wb') as fh:
|
with p.joinpath(path + ".csv").open(mode='wb') as fh:
|
||||||
frame.to_csv(fh, index_label='index')
|
frame.to_csv(fh, index_label='index')
|
||||||
write('problem.csv', kind_problem)
|
write('kind_problem', bt.kind_problem)
|
||||||
write('path-problem.csv', kind_pathproblem)
|
write('kind_pathproblem', bt.kind_pathproblem)
|
||||||
write('codeflows.csv', codeflows_9799)
|
write('codeflows', bt.codeflows)
|
||||||
write('related-locations.csv', related_locations)
|
write('relatedLocations', bt.relatedLocations)
|
||||||
write('project.csv', project_df)
|
write('project', bt.project)
|
||||||
write('artifacts.csv', artifacts_df)
|
write('rules', bt.rules)
|
||||||
|
write('artifacts', bt.artifacts)
|
||||||
|
|
||||||
|
|||||||
@@ -1,15 +1,68 @@
|
|||||||
appnope==0.1.2
|
appnope==0.1.2
|
||||||
|
argon2-cffi==21.3.0
|
||||||
|
argon2-cffi-bindings==21.2.0
|
||||||
|
asttokens==2.0.5
|
||||||
|
attrs==21.4.0
|
||||||
backcall==0.2.0
|
backcall==0.2.0
|
||||||
decorator==5.1.0
|
beautifulsoup4==4.10.0
|
||||||
ipython==7.28.0
|
bleach==4.1.0
|
||||||
jedi==0.18.0
|
cffi==1.15.0
|
||||||
|
debugpy==1.5.1
|
||||||
|
decorator==5.1.1
|
||||||
|
defusedxml==0.7.1
|
||||||
|
entrypoints==0.4
|
||||||
|
executing==0.8.3
|
||||||
|
ipykernel==6.9.2
|
||||||
|
ipython==8.1.1
|
||||||
|
ipython-genutils==0.2.0
|
||||||
|
ipywidgets==7.6.5
|
||||||
|
jedi==0.18.1
|
||||||
|
Jinja2==3.0.3
|
||||||
|
jsonschema==4.4.0
|
||||||
|
jupyter==1.0.0
|
||||||
|
jupyter-client==7.1.2
|
||||||
|
jupyter-console==6.4.3
|
||||||
|
jupyter-core==4.9.2
|
||||||
|
jupyterlab-pygments==0.1.2
|
||||||
|
jupyterlab-widgets==1.0.2
|
||||||
|
MarkupSafe==2.1.1
|
||||||
matplotlib-inline==0.1.3
|
matplotlib-inline==0.1.3
|
||||||
parso==0.8.2
|
mistune==0.8.4
|
||||||
|
nbclient==0.5.13
|
||||||
|
nbconvert==6.4.4
|
||||||
|
nbformat==5.2.0
|
||||||
|
nest-asyncio==1.5.4
|
||||||
|
notebook==6.4.10
|
||||||
|
numpy==1.22.3
|
||||||
|
packaging==21.3
|
||||||
|
pandas==1.4.1
|
||||||
|
pandocfilters==1.5.0
|
||||||
|
parso==0.8.3
|
||||||
pexpect==4.8.0
|
pexpect==4.8.0
|
||||||
pickleshare==0.7.5
|
pickleshare==0.7.5
|
||||||
prompt-toolkit==3.0.20
|
prometheus-client==0.13.1
|
||||||
|
prompt-toolkit==3.0.28
|
||||||
|
psutil==5.9.0
|
||||||
ptyprocess==0.7.0
|
ptyprocess==0.7.0
|
||||||
Pygments==2.10.0
|
pure-eval==0.2.2
|
||||||
|
pycparser==2.21
|
||||||
|
Pygments==2.11.2
|
||||||
|
pyparsing==3.0.7
|
||||||
|
pyrsistent==0.18.1
|
||||||
|
python-dateutil==2.8.2
|
||||||
|
pytz==2021.3
|
||||||
PyYAML==6.0
|
PyYAML==6.0
|
||||||
traitlets==5.1.0
|
pyzmq==22.3.0
|
||||||
|
qtconsole==5.2.2
|
||||||
|
QtPy==2.0.1
|
||||||
|
Send2Trash==1.8.0
|
||||||
|
six==1.16.0
|
||||||
|
soupsieve==2.3.1
|
||||||
|
stack-data==0.2.0
|
||||||
|
terminado==0.13.3
|
||||||
|
testpath==0.6.0
|
||||||
|
tornado==6.1
|
||||||
|
traitlets==5.1.1
|
||||||
wcwidth==0.2.5
|
wcwidth==0.2.5
|
||||||
|
webencodings==0.5.1
|
||||||
|
widgetsnbextension==3.5.2
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
provides those for the other tables.
|
provides those for the other tables.
|
||||||
"""
|
"""
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from .typegraph import tagged_array_columns, tagged_struct_columns
|
||||||
|
|
||||||
def joins_for_sf_2683(tgraph):
|
def joins_for_sf_2683(tgraph):
|
||||||
"""
|
"""
|
||||||
@@ -256,31 +257,6 @@ def joins_for_project(tgraph):
|
|||||||
.drop(columns=['driver', 'struct_id'])
|
.drop(columns=['driver', 'struct_id'])
|
||||||
.rename(columns={"version": "driver_version_7820", "name": "driver_name_7820"})
|
.rename(columns={"version": "driver_version_7820", "name": "driver_name_7820"})
|
||||||
#
|
#
|
||||||
.merge(af(8754), how="left", left_on='rules', right_on='array_id', validate="1:m")
|
|
||||||
.drop(columns=['rules', 'array_id', 'type_at_index'])
|
|
||||||
.rename(columns={"value_index": "rule_value_index_8754"}) # rule index
|
|
||||||
#
|
|
||||||
.merge(sf(6818), how="left", left_on='id_or_value_at_index', right_on='struct_id', validate="1:m")
|
|
||||||
.drop(columns=['id_or_value_at_index', 'struct_id'])
|
|
||||||
.rename(columns={"id": "rule_id_6818", "name": "rule_name_6818"})
|
|
||||||
#
|
|
||||||
.merge(sf(8581), how="left", left_on='defaultConfiguration', right_on='struct_id', validate="1:m")
|
|
||||||
.drop(columns=['defaultConfiguration', 'struct_id'])
|
|
||||||
#
|
|
||||||
.merge(sf(2774), how="left", left_on='fullDescription', right_on='struct_id', validate="1:m")
|
|
||||||
.drop(columns=['fullDescription', 'struct_id'])
|
|
||||||
.rename(columns={"text": "rule_fullDescription_6818"})
|
|
||||||
#
|
|
||||||
.merge(sf(2774), how="left", left_on='shortDescription', right_on='struct_id', validate="1:m")
|
|
||||||
.drop(columns=['shortDescription', 'struct_id'])
|
|
||||||
.rename(columns={"text": "rule_shortDescription_6818"})
|
|
||||||
#
|
|
||||||
.merge(sf(7849), how="left", left_on='properties', right_on='struct_id', validate="1:m")
|
|
||||||
.drop(columns=['properties', 'struct_id'])
|
|
||||||
#
|
|
||||||
.merge(af(7069), how="left", left_on='tags', right_on='array_id', validate="1:m")
|
|
||||||
.drop(columns=['tags', 'array_id', 'type_at_index'])
|
|
||||||
.rename(columns={"value_index": "tag_index_7069", "id_or_value_at_index": "tag_text_7069"})
|
|
||||||
# versionControlProvenance - repositoryUri
|
# versionControlProvenance - repositoryUri
|
||||||
# The merge with af(8754) replicates versionControlProvenance, no 1:m validation
|
# The merge with af(8754) replicates versionControlProvenance, no 1:m validation
|
||||||
.merge(af(5511), how="left", left_on='versionControlProvenance', right_on='array_id')
|
.merge(af(5511), how="left", left_on='versionControlProvenance', right_on='array_id')
|
||||||
@@ -293,6 +269,50 @@ def joins_for_project(tgraph):
|
|||||||
)
|
)
|
||||||
return project_df
|
return project_df
|
||||||
|
|
||||||
|
def joins_for_rules(tgraph):
|
||||||
|
"""
|
||||||
|
Return table providing the `rules` information.
|
||||||
|
"""
|
||||||
|
# Access convenience functions
|
||||||
|
sf = lambda num: tgraph.dataframes['Struct' + str(num)]
|
||||||
|
sft = lambda id: sf(id).rename(columns = tagged_struct_columns(tgraph, id))
|
||||||
|
af = lambda num: tgraph.dataframes['Array' + str(num)]
|
||||||
|
aft = lambda id: af(id).rename(columns = tagged_array_columns(tgraph, id))
|
||||||
|
#
|
||||||
|
rules_df = (
|
||||||
|
aft(8754)
|
||||||
|
#
|
||||||
|
.drop(columns=['t8754_type_at_index'])
|
||||||
|
#
|
||||||
|
.merge(sft(6818), how="left", left_on='t8754_id_or_value_at_index',
|
||||||
|
right_on='t6818_struct_id',
|
||||||
|
validate="1:m")
|
||||||
|
.drop(columns=['t8754_id_or_value_at_index', 't6818_struct_id'])
|
||||||
|
#
|
||||||
|
.merge(sft(8581), how="left", left_on='t6818_defaultConfiguration',
|
||||||
|
right_on='t8581_struct_id', validate="1:m")
|
||||||
|
.drop(columns=['t6818_defaultConfiguration', 't8581_struct_id'])
|
||||||
|
#
|
||||||
|
.merge(sft(2774), how="left", left_on='t6818_fullDescription',
|
||||||
|
right_on='t2774_struct_id', validate="1:m")
|
||||||
|
.drop(columns=['t6818_fullDescription', 't2774_struct_id'])
|
||||||
|
.rename(columns={'t2774_text': "t6818_t2774_fullDescription"})
|
||||||
|
#
|
||||||
|
.merge(sft(2774), how="left", left_on='t6818_shortDescription',
|
||||||
|
right_on='t2774_struct_id', validate="1:m")
|
||||||
|
.drop(columns=['t6818_shortDescription', 't2774_struct_id'])
|
||||||
|
.rename(columns={"t2774_text": 't6818_t2774_shortDescription'})
|
||||||
|
#
|
||||||
|
.merge(sft(7849), how="left", left_on='t6818_properties',
|
||||||
|
right_on='t7849_struct_id', validate="1:m")
|
||||||
|
.drop(columns=['t6818_properties', 't7849_struct_id'])
|
||||||
|
#
|
||||||
|
.merge(aft(7069), how="left", left_on='t7849_tags',
|
||||||
|
right_on='t7069_array_id', validate="1:m")
|
||||||
|
.drop(columns=['t7849_tags', 't7069_array_id', 't7069_type_at_index'])
|
||||||
|
)
|
||||||
|
return rules_df
|
||||||
|
|
||||||
def joins_for_artifacts(tgraph):
|
def joins_for_artifacts(tgraph):
|
||||||
"""
|
"""
|
||||||
Return table providing the `artifacts` information.
|
Return table providing the `artifacts` information.
|
||||||
|
|||||||
@@ -251,3 +251,30 @@ def attach_tables(typegraph):
|
|||||||
continue # skip String etc.
|
continue # skip String etc.
|
||||||
typegraph.dataframes[typedef] = pd.DataFrame(valarray, columns = colheader)
|
typegraph.dataframes[typedef] = pd.DataFrame(valarray, columns = colheader)
|
||||||
|
|
||||||
|
|
||||||
|
def tagged_array_columns(typegraph, array_id):
|
||||||
|
""" Return a dict mapping the array column names to versions tagged with the id.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
The original table headers are
|
||||||
|
|
||||||
|
array_id value_index type_at_index id_or_value_at_index
|
||||||
|
|
||||||
|
the tagged versions become
|
||||||
|
|
||||||
|
t8754_array_id t8754_value_index t8754_type_at_index t8754_id_or_value_at_index
|
||||||
|
"""
|
||||||
|
array_id = str(array_id)
|
||||||
|
typedef = 'Array' + array_id
|
||||||
|
colheader = ('array_id', 'value_index', 'type_at_index', 'id_or_value_at_index')
|
||||||
|
return { header:"t{:s}_{:s}".format(array_id, header) for header in colheader}
|
||||||
|
|
||||||
|
|
||||||
|
def tagged_struct_columns(typegraph, struct_id):
|
||||||
|
""" Return a dict mapping the struct column names to versions tagged with the id.
|
||||||
|
XX:
|
||||||
|
"""
|
||||||
|
struct_id = str(struct_id)
|
||||||
|
typedef = 'Struct' + struct_id
|
||||||
|
colheader = ('struct_id', *typegraph.fields[typedef])
|
||||||
|
return { header:"t{:s}_{:s}".format(struct_id, header) for header in colheader}
|
||||||
|
|||||||
4271
scripts/multi-table-overview.ipynb
Normal file
4271
scripts/multi-table-overview.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user