mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 01:13:03 +01:00
Add endpoints-only option for path output and a collection of usage samples
This commit is contained in:
committed by
=Michael Hohn
parent
79649a6226
commit
558e218d3b
21
README.org
21
README.org
@@ -3,12 +3,23 @@
|
|||||||
|
|
||||||
Each of these tools present a high-level command-line interface to extract a
|
Each of these tools present a high-level command-line interface to extract a
|
||||||
specific subset of information from a SARIF file. The format of each tool's
|
specific subset of information from a SARIF file. The format of each tool's
|
||||||
/output/ is versioned and, as much as possible, independent of the input.
|
/output/ will be versioned and, as much as possible, independent of the input.
|
||||||
|
|
||||||
It is the intent of these tools to
|
For human use and to fit with existing tools, the default output format is
|
||||||
- hide the internals of sarif when /used/
|
line-oriented and resembles compiler error formatting.
|
||||||
- provide examples of extracting information from SARIF files /while writing
|
|
||||||
your own/ or extending the tools
|
The goal of this tool set is to support working with sarif files
|
||||||
|
- at the shell / file level,
|
||||||
|
- across multiple versions of the same sarif result set,
|
||||||
|
- and across many repositories.
|
||||||
|
|
||||||
|
The implementation language is Python, but that is a detail. The scripts should
|
||||||
|
work well when used with other shell tools, especially =diff= and =git=.
|
||||||
|
|
||||||
|
# It is the intent of these tools to
|
||||||
|
# - hide the internals of sarif when /used/,
|
||||||
|
# - provide examples of extracting information from SARIF files /while writing
|
||||||
|
# your own/ or extending the tools.
|
||||||
|
|
||||||
* Setup for development
|
* Setup for development
|
||||||
This repository uses =git lfs= for some larger files; installation steps are at
|
This repository uses =git lfs= for some larger files; installation steps are at
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ parser.add_argument('-s', '--list-source', metavar='srcroot', type=str,
|
|||||||
help='list source snippets using srcroot as sarif SRCROOT')
|
help='list source snippets using srcroot as sarif SRCROOT')
|
||||||
parser.add_argument('-r', '--related-locations', action="store_true",
|
parser.add_argument('-r', '--related-locations', action="store_true",
|
||||||
help='list related locations like "hides [parameter](1)"')
|
help='list related locations like "hides [parameter](1)"')
|
||||||
|
parser.add_argument('-e', '--endpoints-only', action="store_true",
|
||||||
|
help='only list source and sink, dropping the path. Identical, successive source/sink pairs are combined')
|
||||||
# TODO mutually exclusive options
|
# TODO mutually exclusive options
|
||||||
parser.add_argument('-c', '--csv', action="store_true",
|
parser.add_argument('-c', '--csv', action="store_true",
|
||||||
help='output csv instead of human-readable summary')
|
help='output csv instead of human-readable summary')
|
||||||
@@ -59,7 +61,6 @@ for runi in S.indices(sarif_struct, 'runs'):
|
|||||||
for line, line_num in zip(lines, range(l1, l2+1)):
|
for line, line_num in zip(lines, range(l1, l2+1)):
|
||||||
S.display_underlined(l1, c1, l2, c2, line, line_num)
|
S.display_underlined(l1, c1, l2, c2, line, line_num)
|
||||||
if args.related_locations:
|
if args.related_locations:
|
||||||
# Full path: S.get(sarif_struct, 'runs', runi, 'results', resi, 'relatedLocations')
|
|
||||||
relatedLocations = result.get('relatedLocations', None)
|
relatedLocations = result.get('relatedLocations', None)
|
||||||
if type(relatedLocations) == list:
|
if type(relatedLocations) == list:
|
||||||
# Linking is explicit in output, so no need to get id(s) from message string.
|
# Linking is explicit in output, so no need to get id(s) from message string.
|
||||||
@@ -89,15 +90,37 @@ for runi in S.indices(sarif_struct, 'runs'):
|
|||||||
S.display_underlined(l1, c1, l2, c2, line, line_num)
|
S.display_underlined(l1, c1, l2, c2, line, line_num)
|
||||||
if 'codeFlows' in result:
|
if 'codeFlows' in result:
|
||||||
# Path problems
|
# Path problems
|
||||||
|
last_codeFlow = None
|
||||||
for codefi in S.indices(result, 'codeFlows'):
|
for codefi in S.indices(result, 'codeFlows'):
|
||||||
codeFlow = S.get(result, 'codeFlows', codefi)
|
codeFlow = S.get(result, 'codeFlows', codefi)
|
||||||
if args.csv:
|
if args.csv:
|
||||||
S.write_csv(cw, "path", codefi)
|
S.write_csv(cw, "path", codefi)
|
||||||
else:
|
else:
|
||||||
S.msg("PATH %d\n" % codefi)
|
S.msg("PATH %d\n" % codefi)
|
||||||
|
|
||||||
for threadi in S.indices(codeFlow, 'threadFlows'):
|
for threadi in S.indices(codeFlow, 'threadFlows'):
|
||||||
threadFlow = S.get(codeFlow, 'threadFlows', threadi)
|
threadFlow = S.get(codeFlow, 'threadFlows', threadi)
|
||||||
for loci in S.indices(threadFlow, 'locations'):
|
|
||||||
|
if args.endpoints_only:
|
||||||
|
#
|
||||||
|
# Pick the range to list only the endpoints (source/sink) of a threadFlow.
|
||||||
|
#
|
||||||
|
t1 = S.indices(threadFlow, 'locations')
|
||||||
|
location_range = [t1[0], t1[-1]]
|
||||||
|
#
|
||||||
|
# If the previous path had the same (source,sink) pair,
|
||||||
|
# we don't need to repeat it.
|
||||||
|
#
|
||||||
|
if (last_codeFlow and
|
||||||
|
( S.get(last_codeFlow, 'threadFlows', threadi, 'locations', 0) ==
|
||||||
|
S.get(codeFlow, 'threadFlows', threadi, 'locations', 0)) and
|
||||||
|
( S.get(last_codeFlow, 'threadFlows', threadi, 'locations', -1) ==
|
||||||
|
S.get(codeFlow, 'threadFlows', threadi, 'locations', -1))):
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
location_range = S.indices(threadFlow, 'locations')
|
||||||
|
|
||||||
|
for loci in location_range:
|
||||||
location = S.get(threadFlow, 'locations', loci, 'location')
|
location = S.get(threadFlow, 'locations', loci, 'location')
|
||||||
message, artifact, region = S.get_relatedlocation_message_info(location)
|
message, artifact, region = S.get_relatedlocation_message_info(location)
|
||||||
if artifact == S.NoFile:
|
if artifact == S.NoFile:
|
||||||
@@ -122,6 +145,7 @@ for runi in S.indices(sarif_struct, 'runs'):
|
|||||||
else:
|
else:
|
||||||
for line, line_num in zip(lines, range(l1, l2+1)):
|
for line, line_num in zip(lines, range(l1, l2+1)):
|
||||||
S.display_underlined(l1, c1, l2, c2, line, line_num)
|
S.display_underlined(l1, c1, l2, c2, line, line_num)
|
||||||
|
last_codeFlow = codeFlow
|
||||||
if args.csv:
|
if args.csv:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -61,7 +61,8 @@
|
|||||||
"'$.fn." + plugin.getPluginName() + "' plugin"
|
"'$.fn." + plugin.getPluginName() + "' plugin"
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
Results are
|
The full results are found in [[file:../data/treeio/results.yaml::Potential XSS vulnerability in the \['$.fn.datepicker' plugin\](1).][results.yaml]], with a testing subset in [[file:../data/treeio/test_set_1.yaml::Potential XSS vulnerability in the \['$.fn.datepicker'
|
||||||
|
plugin\](1).][test_set_1.yaml]]; the results for this query are
|
||||||
#+BEGIN_SRC text
|
#+BEGIN_SRC text
|
||||||
message:
|
message:
|
||||||
text: |-
|
text: |-
|
||||||
@@ -71,7 +72,7 @@
|
|||||||
#+END_SRC
|
#+END_SRC
|
||||||
with 3 =relatedLocations= and 6 =threadFlows=.
|
with 3 =relatedLocations= and 6 =threadFlows=.
|
||||||
|
|
||||||
The the original query's first column is a sink (=sink.getNode()=), so the
|
The original query's first column is a sink (=sink.getNode()=), so the
|
||||||
=threadFlows= should terminate there -- and they do.
|
=threadFlows= should terminate there -- and they do.
|
||||||
#+BEGIN_SRC text
|
#+BEGIN_SRC text
|
||||||
locations:
|
locations:
|
||||||
@@ -152,6 +153,78 @@
|
|||||||
obvious connections between them. More importantly, the ordering is
|
obvious connections between them. More importantly, the ordering is
|
||||||
consistent.
|
consistent.
|
||||||
|
|
||||||
|
** Multiple message values and source/sink pairs
|
||||||
|
As a special case of [[*Multiple message values and flow paths][Multiple message values and flow paths]], we can report only
|
||||||
|
the (source, sink) pairs and drop the flow paths. This is useful in result
|
||||||
|
reports spanning many repositories and multiple tools.
|
||||||
|
|
||||||
|
Considering
|
||||||
|
#+BEGIN_SRC text
|
||||||
|
Potential XSS vulnerability in the ['$.fn.datepicker' plugin](1).
|
||||||
|
#+END_SRC
|
||||||
|
found in [[file:../data/treeio/test_set_1.yaml::Potential XSS vulnerability in the \['$.fn.datepicker' plugin\](1).][test_set_1.yaml]], stripping the =threadFlows= paths, and looking at the
|
||||||
|
first two =threadFlows= gives the following simplified structure.
|
||||||
|
Note that without the flow paths, the first two results are now identical
|
||||||
|
=(source, sink)= pairs; the same holds for 2,3 and 4,5.
|
||||||
|
|
||||||
|
#+BEGIN_SRC yaml
|
||||||
|
- ruleId: com.lgtm/javascript-queries:js/unsafe-jquery-plugin
|
||||||
|
codeFlows:
|
||||||
|
- threadFlows:
|
||||||
|
- locations:
|
||||||
|
- location:
|
||||||
|
physicalLocation:
|
||||||
|
artifactLocation:
|
||||||
|
uri: static/js/jquery-ui-1.10.3/ui/jquery-ui.js
|
||||||
|
uriBaseId: '%SRCROOT%'
|
||||||
|
index: 72
|
||||||
|
region:
|
||||||
|
startLine: 9598
|
||||||
|
startColumn: 28
|
||||||
|
endColumn: 35
|
||||||
|
message:
|
||||||
|
text: options
|
||||||
|
- location:
|
||||||
|
physicalLocation:
|
||||||
|
artifactLocation:
|
||||||
|
uri: static/js/jquery-ui-1.10.3/ui/jquery.ui.datepicker.js
|
||||||
|
uriBaseId: '%SRCROOT%'
|
||||||
|
index: 61
|
||||||
|
region:
|
||||||
|
startLine: 1027
|
||||||
|
startColumn: 6
|
||||||
|
endColumn: 14
|
||||||
|
message:
|
||||||
|
text: altField
|
||||||
|
- threadFlows:
|
||||||
|
- locations:
|
||||||
|
- location:
|
||||||
|
physicalLocation:
|
||||||
|
artifactLocation:
|
||||||
|
uri: static/js/jquery-ui-1.10.3/ui/jquery-ui.js
|
||||||
|
uriBaseId: '%SRCROOT%'
|
||||||
|
index: 72
|
||||||
|
region:
|
||||||
|
startLine: 9598
|
||||||
|
startColumn: 28
|
||||||
|
endColumn: 35
|
||||||
|
message:
|
||||||
|
text: options
|
||||||
|
- location:
|
||||||
|
physicalLocation:
|
||||||
|
artifactLocation:
|
||||||
|
uri: static/js/jquery-ui-1.10.3/ui/jquery.ui.datepicker.js
|
||||||
|
uriBaseId: '%SRCROOT%'
|
||||||
|
index: 61
|
||||||
|
region:
|
||||||
|
startLine: 1027
|
||||||
|
startColumn: 6
|
||||||
|
endColumn: 14
|
||||||
|
message:
|
||||||
|
text: altField
|
||||||
|
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
#
|
#
|
||||||
#+OPTIONS: ^:{}
|
#+OPTIONS: ^:{}
|
||||||
|
|
||||||
|
|||||||
66
scripts/file-level-tests.sh
Normal file
66
scripts/file-level-tests.sh
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
# -*- sh -*-
|
||||||
|
# The purpose of this tool set is working with sarif at the shell / file level,
|
||||||
|
# across multiple versions of the same sarif result set, and across many
|
||||||
|
# repositories.
|
||||||
|
#
|
||||||
|
# These tests mirror that goal: they work on files using the tools and use
|
||||||
|
# standard unix utilities to verify contents.
|
||||||
|
#
|
||||||
|
|
||||||
|
sarif-results-summary -h
|
||||||
|
|
||||||
|
#
|
||||||
|
# Simple failure checks. These should produce no output.
|
||||||
|
#
|
||||||
|
test_files="
|
||||||
|
../data/wxWidgets_wxWidgets__2021-11-21_16_06_30__export.sarif
|
||||||
|
../data/torvalds_linux__2021-10-21_10_07_00__export.sarif
|
||||||
|
../data/treeio/results.sarif
|
||||||
|
"
|
||||||
|
for file in $test_files ; do
|
||||||
|
sarif-results-summary $file > /dev/null
|
||||||
|
done
|
||||||
|
for file in $test_files ; do
|
||||||
|
sarif-results-summary -r $file > /dev/null
|
||||||
|
done
|
||||||
|
|
||||||
|
#
|
||||||
|
# The following are for iterating and evolving result inspection to find test
|
||||||
|
# cases covering the different output options. They are intended for manual use
|
||||||
|
# and review.
|
||||||
|
#
|
||||||
|
read -r file srcroot <<< "../data/treeio/results.sarif ../data/treeio/treeio"
|
||||||
|
|
||||||
|
# All results, minimal output
|
||||||
|
sarif-results-summary $file | less
|
||||||
|
|
||||||
|
# All results, related locations output
|
||||||
|
sarif-results-summary -r $file | less
|
||||||
|
|
||||||
|
# All results, related locations and source output
|
||||||
|
sarif-results-summary -r -s $srcroot $file | less
|
||||||
|
|
||||||
|
# single-line result, no flow steps
|
||||||
|
start="sanitizer.py:8:1:8:16"
|
||||||
|
sarif-results-summary $file | sed -n "/$start/,/RESULT/p" | sed '$d' | less
|
||||||
|
|
||||||
|
# single-line result, with flow steps
|
||||||
|
start="treeio.core.middleware.chat.py:395:29:395:33"
|
||||||
|
sarif-results-summary $file | sed -n "/$start/,/RESULT/p" | sed '$d' | less
|
||||||
|
|
||||||
|
# single-line result, with flow steps, with relatedLocations
|
||||||
|
start="treeio.core.middleware.chat.py:395:29:395:33"
|
||||||
|
sarif-results-summary -r $file | sed -n "/$start/,/RESULT/p" | sed '$d' | less
|
||||||
|
|
||||||
|
# single-line result, with flow steps compacted
|
||||||
|
start="treeio.core.middleware.chat.py:395:29:395:33"
|
||||||
|
sarif-results-summary -e $file | sed -n "/$start/,/RESULT/p" | sed '$d' | less
|
||||||
|
|
||||||
|
# multi-line result, no flow steps, with relatedLocations and source
|
||||||
|
start=editor_plugin_src.js:722:72:722:73
|
||||||
|
sarif-results-summary -r -s $srcroot $file | sed -n "/$start/,/RESULT/p" | sed '$d' | less
|
||||||
|
|
||||||
|
# multi-line result, with flow steps, with relatedLocations and source
|
||||||
|
start=modal-form.html:89:35:93:14
|
||||||
|
sarif-results-summary -r -s $srcroot $file | sed -n "/$start/,/RESULT/p" | sed '$d' | less
|
||||||
|
|
||||||
Reference in New Issue
Block a user