pre-alpha versions of bin/sarif-{digest,labeled,list-files,results-summary

This commit is contained in:
Michael Hohn
2021-11-09 12:20:28 -08:00
committed by =Michael Hohn
parent d180a079b0
commit 3032fe3fcd
14 changed files with 301 additions and 0 deletions

1
.gitattributes vendored Normal file
View File

@@ -0,0 +1 @@
*.sarif filter=lfs diff=lfs merge=lfs -text

15
.gitignore vendored Normal file
View File

@@ -0,0 +1,15 @@
# Compiled python modules.
*.pyc
# Setuptools distribution folder.
/dist/
# Python egg metadata, regenerated from source files by setuptools.
/*.egg-info
# virtual environment
/.venv/
# Backup files
*~

1
MANIFEST.in Normal file
View File

@@ -0,0 +1 @@
include README.org

55
README.org Normal file
View File

@@ -0,0 +1,55 @@
* Collection of cli tools for SARIF processing
This is a work in progress; the plan is as follows:
Each of these tools present a high-level command-line interface to extract a
specific subset of information from a SARIF file. The format of each tool's
/output/ is versioned and, as much as possible, independent of the input.
It is the intent of these tools to
- hide the internals of sarif when /used/
- provide examples of extracting information from sarif files while writing your
own or extending the tools
* Setup for development
Set up the virtual environment and install the packages:
# pip freeze > requirements.txt
#+BEGIN_SRC sh
python3 -m venv .venv
. .venv/bin/activate
python3 -m pip install -r requirements.txt
# Or separately:
pip install --upgrade pip
pip install ipython pyyaml
#+END_SRC
"Install" for local development:
#+BEGIN_SRC sh
pip install -e .
#+END_SRC
* Sample Data
The query results in =data/= are taken from lgtm.com, which ran the
: ql/$LANG/ql/src/codeql-suites/$LANG-lgtm.qls
queries.
The linux kernel has both single-location results (="kind": "problem"=) and path
results (="kind": "path-problem"=). It also has results for multiple source
languages.
The subset of files referenced by the sarif results is in =data/linux-small/=
and is taken from
#+begin_src javascript
"versionControlProvenance": [
{
"repositoryUri": "https://github.com/torvalds/linux.git",
"revisionId": "d9abdee5fd5abffd0e763e52fbfa3116de167822"
}
]
#+end_src
* Commands
#+OPTIONS: ^:{}

6
bin/json-to-yaml Executable file
View File

@@ -0,0 +1,6 @@
#!/usr/bin/env python
import json
import yaml
import sys
yaml.dump(json.load(sys.stdin), stream=sys.stdout)

38
bin/sarif-digest Executable file
View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python
import json
import sarif_cli as S
import sys
# TODO command-line: sarif-digest [<file>]
#
# reduce size by listing only first/last elements
fpath = sys.argv[1]
with open(fpath, 'r') as fp:
sarif_struct = json.load(fp)
def _show_dict(elem, context):
return {key : _compact(val, key) for key, val in elem.items()}
def _show_list(elem, context):
if len(elem) > 2:
# first and last
return ["------------%d items, showing first and last ----------" % len(elem),
_compact(elem[0], 0),
_compact(elem[-1], -1)]
if len(elem) > 0:
return [_compact(elem[i], i) for i in range(0, len(elem))]
else:
return elem
def _compact(elem, context):
t = type(elem)
if t == dict:
return _show_dict(elem, context)
elif t == list:
return _show_list(elem, context)
else:
return elem
json.dump(_compact(sarif_struct, "starting"), sys.stdout, indent=2)

50
bin/sarif-labeled Normal file
View File

@@ -0,0 +1,50 @@
#!/usr/bin/env python
import argparse
import json
import sarif_cli as S
import sys
import collections
# TODO
# require python 3.7+ for ordered dictionaries?
parser = argparse.ArgumentParser(description='Output a sarif file with labeled paths preceeding arrays and objects')
parser.add_argument('file', metavar='file', type=str, help='input file, - for stdin')
args = parser.parse_args()
with open(args.file, 'r') if args.file != '-' else sys.stdin as fp:
sarif_struct = json.load(fp)
def _label_dict(elem, path):
d = collections.OrderedDict()
for key, val in elem.items():
subpath = path + "['%s']" % key
if type(val) in [dict, list]:
d[subpath] = "----path----"
d[key] = _label(val, subpath)
return d
def _label_list(elem, path):
if len(elem) > 0:
l = []
for i in range(0, len(elem)):
subpath = path + "[%d]" % i
if i % 4 == 0:
l.append("---- %s ----" % subpath)
l.append(_label(elem[i], subpath))
return l
else:
return elem
def _label(elem, path):
t = type(elem)
if t == dict:
return _label_dict(elem, path)
elif t == list:
return _label_list(elem, path)
else:
return elem
json.dump(_label(sarif_struct, "sarif_struct"), sys.stdout, indent=2)

46
bin/sarif-list-files Executable file
View File

@@ -0,0 +1,46 @@
#!/usr/bin/env python
import argparse
import json
import sarif_cli as S
import sys
import collections
parser = argparse.ArgumentParser(description='list source files referenced by sarif file')
parser.add_argument('file', metavar='sarif-file', type=str,
help='input file, - for stdin')
args = parser.parse_args()
# Grab the file
with open(args.file, 'r') if args.file != '-' else sys.stdin as fp:
sarif_struct = json.load(fp)
# Make sure there are some results
num_results = len(S.get(sarif_struct, 'runs', 0, 'results'))
if num_results == 0:
S.exit(0)
# Collect the file names
uris = set()
# Locations for @kind problem
# e.g.,
# sarif_struct['runs'][0]['results'][5]['locations'][0]['physicalLocation']['artifactLocation']
for resi in range(0, len(S.get(sarif_struct, 'runs', 0, 'results'))):
uri = S.get(sarif_struct, 'runs', 0, 'results', resi, 'locations', 0,
'physicalLocation', 'artifactLocation', 'uri')
uris.add(uri)
# Locations for @kind path-problem
# e.g. sarif_struct['runs'][0]['results'][22]['codeFlows'][0]['threadFlows'][0]['locations'][1]['location']
for resi in range(0, len(S.get(sarif_struct, 'runs', 0, 'results'))):
if 'codeFlows' in S.get(sarif_struct, 'runs', 0, 'results', resi).keys():
locations = S.get(sarif_struct, 'runs', 0, 'results', resi, 'codeFlows', 0,
'threadFlows', 0, 'locations')
for loci in range(0, len(locations)):
uri = S.get(locations, loci, 'location', 'physicalLocation',
'artifactLocation', 'uri')
uris.add(uri)
uris = list(uris)
uris.sort()
for u in uris:
print(u)

32
bin/sarif-results-summary Normal file
View File

@@ -0,0 +1,32 @@
#!/usr/bin/env python
import argparse
import json
import sarif_cli as S
import sys
import collections
parser = argparse.ArgumentParser(description='summary of results')
parser.add_argument('file', metavar='sarif-file', type=str, help='input file, - for stdin')
args = parser.parse_args()
with open(args.file, 'r') if args.file != '-' else sys.stdin as fp:
sarif_struct = json.load(fp)
num_results = len(S.get(sarif_struct, 'runs', 0, 'results'))
S.msg("Found %d results\n\n" % num_results)
if num_results == 0:
S.exit(0)
for resi in range(0, len(S.get(sarif_struct, 'runs', 0, 'results'))):
message = S.get(sarif_struct, 'runs', 0, 'results', resi, 'message', 'text')
artifact = S.get(sarif_struct, 'runs', 0, 'results', resi, 'locations', 0,
'physicalLocation', 'artifactLocation')
region = S.get(sarif_struct, 'runs', 0, 'results', resi, 'locations', 0,
'physicalLocation', 'region')
filepath = "%s:%d:%d" % (artifact['uri'], region['startLine'],
region.get('startColumn', -1))
S.msg("%s: %s\n" % (filepath, message))

BIN
data/torvalds_linux__2021-10-21_10_07_00__export.sarif (Stored with Git LFS) Normal file

Binary file not shown.

18
requirements.txt Normal file
View File

@@ -0,0 +1,18 @@
appnope==0.1.2
attrs==21.2.0
backcall==0.2.0
decorator==5.1.0
ipython==7.28.0
jedi==0.18.0
matplotlib-inline==0.1.3
parso==0.8.2
pbr==5.6.0
pexpect==4.8.0
pickleshare==0.7.5
prompt-toolkit==3.0.20
ptyprocess==0.7.0
Pygments==2.10.0
PyYAML==6.0
sarif-om==1.0.4
traitlets==5.1.0
wcwidth==0.2.5

18
sarif_cli/__init__.py Normal file
View File

@@ -0,0 +1,18 @@
import sys
MIN_PYTHON = (3, 7)
if sys.version_info < MIN_PYTHON:
sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON)
def get(sarif_struct, *path):
""" Get the sarif entry at PATH """
res = sarif_struct
for p in path:
res = res[p]
return res
def msg(message):
""" Print message to stdout """
sys.stdout.write(message)
sys.stdout.write('\n')

View File

@@ -0,0 +1 @@
#

17
setup.py Normal file
View File

@@ -0,0 +1,17 @@
from setuptools import setup
import glob
setup(name='sarif_cli',
version='0.1',
description='Collection of command line tools for sarif files',
url='https://github.com/hohn/sarif-cli',
author='Michael Hohn',
author_email='hohn@github.com',
license='MIT',
packages=['sarif_cli'],
install_requires=[],
include_package_data=True,
scripts=glob.glob("bin/sarif-*"),
zip_safe=False,
python_requires='>=3.7'
)