mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 09:13:04 +01:00
266 lines
11 KiB
Org Mode
266 lines
11 KiB
Org Mode
[[./typegraph-td.svg]]
|
|
|
|
* Collection of cli tools for SARIF processing
|
|
*THIS IS A WORK IN PROGRESS*
|
|
|
|
Each of these tools present a high-level command-line interface to extract a
|
|
specific subset of information from a SARIF file. The format of each tool's
|
|
/output/ will be versioned and, as much as possible, independent of the input.
|
|
|
|
For human use and to fit with existing tools, the default output format is
|
|
line-oriented and resembles compiler error formatting.
|
|
|
|
The goal of this tool set is to support working with sarif files
|
|
- at the shell / file level,
|
|
- across multiple versions of the same sarif result set,
|
|
- and across many repositories.
|
|
|
|
The implementation language is Python, but that is a detail. The scripts should
|
|
work well when used with other shell tools, especially =diff= and =git=.
|
|
|
|
# It is the intent of these tools to
|
|
# - hide the internals of sarif when /used/,
|
|
# - provide examples of extracting information from SARIF files /while writing
|
|
# your own/ or extending the tools.
|
|
|
|
* Setup for development
|
|
This repository uses =git lfs= for some larger files; installation steps are at
|
|
[[https://git-lfs.github.com][git-lfs]]; on a mac with homebrew, install it via
|
|
#+BEGIN_SRC sh
|
|
brew install git-lfs
|
|
git lfs install
|
|
#+END_SRC
|
|
|
|
(For development) Set up the virtual environment and install the packages:
|
|
# pip freeze > requirements.txt
|
|
#+BEGIN_SRC sh
|
|
# Using requirementsDEV.txt
|
|
python3.9 -m venv .venv
|
|
. .venv/bin/activate
|
|
python3.9 -m pip install -r requirementsDEV.txt
|
|
#+END_SRC
|
|
|
|
OR (For distribution) Set up the virtual environment and install the packages:
|
|
# pip freeze > requirements.txt
|
|
#+BEGIN_SRC sh
|
|
# Using requirements.txt
|
|
python3.9 -m venv .venv
|
|
. .venv/bin/activate
|
|
python3.9 -m pip install -r requirements.txt
|
|
#+END_SRC
|
|
|
|
"Install" for local development:
|
|
#+BEGIN_SRC sh
|
|
pip install -e .
|
|
#+END_SRC
|
|
|
|
* Examples
|
|
To use git parlance, the porcelain tool is =sarif-results-summary=, while the
|
|
plumbing tools are =sarif-digest=, =sarif-labeled= and =sarif-list-files=.
|
|
|
|
Following are short summaries of each.
|
|
|
|
** =sarif-results-summary=
|
|
Display the SARIF results in human-readable plain text form.
|
|
|
|
Starting with the =data/wxWidgets= sample and the warning around
|
|
#+BEGIN_SRC text
|
|
src/stc/scintilla/lexers/LexMySQL.cxx:153:24:153:30:
|
|
#+END_SRC
|
|
there are several options using only the SARIF file, and one more when
|
|
source code is available.
|
|
|
|
The following show the command and the output, limited to the intended result
|
|
via =sed=:
|
|
|
|
1. Display only main result, using no options.
|
|
#+BEGIN_SRC shell :results output code :exports both
|
|
.venv/bin/sarif-results-summary \
|
|
data/wxWidgets_wxWidgets__2021-11-21_16_06_30__export.sarif 2>&1 |\
|
|
sed -n "/LexMySQL.cxx:153:24:153:30/,/RESULT/p" | sed '$d'
|
|
#+END_SRC
|
|
|
|
#+RESULTS:
|
|
#+begin_src shell
|
|
RESULT: src/stc/scintilla/lexers/LexMySQL.cxx:153:24:153:30: Local variable 'length' hides a [parameter of the same name](1).
|
|
|
|
#+end_src
|
|
|
|
2. Display the related information.
|
|
#+BEGIN_SRC shell :results output code :exports both
|
|
.venv/bin/sarif-results-summary \
|
|
-r data/wxWidgets_wxWidgets__2021-11-21_16_06_30__export.sarif 2>&1 |\
|
|
sed -n "/LexMySQL.cxx:153:24:153:30/,/RESULT/p" | sed '$d'
|
|
#+END_SRC
|
|
|
|
#+RESULTS:
|
|
#+begin_src shell
|
|
RESULT: src/stc/scintilla/lexers/LexMySQL.cxx:153:24:153:30: Local variable 'length' hides a [parameter of the same name](1).
|
|
REFERENCE: src/stc/scintilla/lexers/LexMySQL.cxx:108:68:108:74: parameter of the same name
|
|
|
|
#+end_src
|
|
|
|
3. Include source code snippets (when the source is available):
|
|
#+BEGIN_SRC shell :results output code :exports both
|
|
.venv/bin/sarif-results-summary \
|
|
-s data/wxWidgets-small \
|
|
-r data/wxWidgets_wxWidgets__2021-11-21_16_06_30__export.sarif 2>&1 |\
|
|
sed -n "/LexMySQL.cxx:153:24:153:30/,/RESULT/p" | sed '$d'
|
|
#+END_SRC
|
|
|
|
#+RESULTS:
|
|
#+begin_src shell
|
|
RESULT: src/stc/scintilla/lexers/LexMySQL.cxx:153:24:153:30: Local variable 'length' hides a [parameter of the same name](1).
|
|
Sci_Position length = sc.LengthCurrent() + 1;
|
|
^^^^^^
|
|
REFERENCE: src/stc/scintilla/lexers/LexMySQL.cxx:108:68:108:74: parameter of the same name
|
|
static void ColouriseMySQLDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
|
|
^^^^^^
|
|
|
|
#+end_src
|
|
|
|
To illustrate the flow steps options, switch to the =data/treeio= sample:
|
|
1. Result with flow steps and relatedLocations
|
|
#+BEGIN_SRC shell :results output code :exports both
|
|
read -r file srcroot <<< "data/treeio/results.sarif data/treeio/treeio"
|
|
start="treeio.core.middleware.chat.py:395:29:395:33"
|
|
.venv/bin/sarif-results-summary -r $file | sed -n "/$start/,/RESULT/p" | sed '$d'
|
|
#+END_SRC
|
|
|
|
#+RESULTS:
|
|
#+begin_src shell
|
|
RESULT: treeio/core/middleware/chat.py:395:29:395:33: [Error information](1) may be exposed to an external user
|
|
REFERENCE: treeio/core/middleware/chat.py:394:50:394:64: Error information
|
|
PATH 0
|
|
FLOW STEP 0: treeio/core/middleware/chat.py:394:50:394:64: ControlFlowNode for Attribute()
|
|
FLOW STEP 1: treeio/core/middleware/chat.py:394:38:394:66: ControlFlowNode for Dict
|
|
FLOW STEP 2: treeio/core/middleware/chat.py:394:13:394:67: ControlFlowNode for Dict
|
|
FLOW STEP 3: treeio/core/middleware/chat.py:395:29:395:33: ControlFlowNode for data
|
|
PATH 1
|
|
FLOW STEP 0: treeio/core/middleware/chat.py:394:50:394:64: ControlFlowNode for Attribute()
|
|
FLOW STEP 1: treeio/core/middleware/chat.py:394:46:394:65: ControlFlowNode for str()
|
|
FLOW STEP 2: treeio/core/middleware/chat.py:394:38:394:66: ControlFlowNode for Dict
|
|
FLOW STEP 3: treeio/core/middleware/chat.py:394:13:394:67: ControlFlowNode for Dict
|
|
FLOW STEP 4: treeio/core/middleware/chat.py:395:29:395:33: ControlFlowNode for data
|
|
|
|
#+end_src
|
|
|
|
2. Result with flow steps, relatedLocations, and source
|
|
#+BEGIN_SRC shell :results output code :exports both
|
|
read -r file srcroot <<< "data/treeio/results.sarif data/treeio/treeio"
|
|
start="treeio.core.middleware.chat.py:395:29:395:33"
|
|
.venv/bin/sarif-results-summary -r -s $srcroot $file | \
|
|
sed -n "/$start/,/RESULT/p" | sed '$d'
|
|
#+END_SRC
|
|
|
|
#+RESULTS:
|
|
#+begin_src shell
|
|
RESULT: treeio/core/middleware/chat.py:395:29:395:33: [Error information](1) may be exposed to an external user
|
|
return HttpResponse(data, content_type='application/json', status=200)
|
|
^^^^
|
|
REFERENCE: treeio/core/middleware/chat.py:394:50:394:64: Error information
|
|
{"cmd": "Error", "data": {"msg": str(sys.exc_info())}})
|
|
^^^^^^^^^^^^^^
|
|
PATH 0
|
|
FLOW STEP 0: treeio/core/middleware/chat.py:394:50:394:64: ControlFlowNode for Attribute()
|
|
{"cmd": "Error", "data": {"msg": str(sys.exc_info())}})
|
|
^^^^^^^^^^^^^^
|
|
FLOW STEP 1: treeio/core/middleware/chat.py:394:38:394:66: ControlFlowNode for Dict
|
|
{"cmd": "Error", "data": {"msg": str(sys.exc_info())}})
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
FLOW STEP 2: treeio/core/middleware/chat.py:394:13:394:67: ControlFlowNode for Dict
|
|
{"cmd": "Error", "data": {"msg": str(sys.exc_info())}})
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
FLOW STEP 3: treeio/core/middleware/chat.py:395:29:395:33: ControlFlowNode for data
|
|
return HttpResponse(data, content_type='application/json', status=200)
|
|
^^^^
|
|
PATH 1
|
|
FLOW STEP 0: treeio/core/middleware/chat.py:394:50:394:64: ControlFlowNode for Attribute()
|
|
{"cmd": "Error", "data": {"msg": str(sys.exc_info())}})
|
|
^^^^^^^^^^^^^^
|
|
FLOW STEP 1: treeio/core/middleware/chat.py:394:46:394:65: ControlFlowNode for str()
|
|
{"cmd": "Error", "data": {"msg": str(sys.exc_info())}})
|
|
^^^^^^^^^^^^^^^^^^^
|
|
FLOW STEP 2: treeio/core/middleware/chat.py:394:38:394:66: ControlFlowNode for Dict
|
|
{"cmd": "Error", "data": {"msg": str(sys.exc_info())}})
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
FLOW STEP 3: treeio/core/middleware/chat.py:394:13:394:67: ControlFlowNode for Dict
|
|
{"cmd": "Error", "data": {"msg": str(sys.exc_info())}})
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
FLOW STEP 4: treeio/core/middleware/chat.py:395:29:395:33: ControlFlowNode for data
|
|
return HttpResponse(data, content_type='application/json', status=200)
|
|
^^^^
|
|
#+end_src
|
|
|
|
** =sarif-digest=
|
|
Get an idea of the SARIF file structure by showing only first / last entries in arrays.
|
|
#+BEGIN_SRC shell
|
|
sarif-digest data/torvalds_linux__2021-10-21_10_07_00__export.sarif |less
|
|
#+END_SRC
|
|
|
|
** =sarif-labeled=
|
|
Display the SARIF file with explicit paths inserted before json objects and
|
|
selected array entries. Handy when reverse-engineering the format by searching
|
|
for results.
|
|
#+BEGIN_SRC shell
|
|
sarif-labeled data/torvalds_linux__2021-10-21_10_07_00__export.sarif |less
|
|
#+END_SRC
|
|
For example, the
|
|
#+BEGIN_SRC text
|
|
"uri": "drivers/gpu/drm/i915/gt/uc/intel_guc.c",
|
|
#+END_SRC
|
|
is nested; the labeled display shows where:
|
|
#+BEGIN_SRC text
|
|
"sarif_struct['runs'][1]['results'][4]['locations'][0]['physicalLocation']['artifactLocation']": "----path----",
|
|
"artifactLocation": {
|
|
"uri": "drivers/gpu/drm/i915/gt/uc/intel_guc.c",
|
|
#+END_SRC
|
|
|
|
** =sarif-list-files=
|
|
Display the list of files referenced by a SARIF file. This is the tools used to
|
|
get file names that ultimately went into =data/linux-small/= and
|
|
=data/wxWidgets-small/=.
|
|
#+BEGIN_SRC shell
|
|
sarif-list-files data/wxWidgets_wxWidgets__2021-11-21_16_06_30__export.sarif
|
|
#+END_SRC
|
|
|
|
* Sample Data
|
|
The query results in =data/= are taken from lgtm.com, which ran the
|
|
: ql/$LANG/ql/src/codeql-suites/$LANG-lgtm.qls
|
|
queries.
|
|
|
|
The linux kernel has both single-location results (="kind": "problem"=) and path
|
|
results (="kind": "path-problem"=). It also has results for multiple source
|
|
languages.
|
|
|
|
The subset of files referenced by the sarif results is in =data/linux-small/=
|
|
and is taken from
|
|
#+begin_src javascript
|
|
"versionControlProvenance": [
|
|
{
|
|
"repositoryUri": "https://github.com/torvalds/linux.git",
|
|
"revisionId": "d9abdee5fd5abffd0e763e52fbfa3116de167822"
|
|
}
|
|
]
|
|
#+end_src
|
|
|
|
The wxWidgets library has both single-location results (="kind": "problem"=) and path
|
|
results (="kind": "path-problem"=).
|
|
|
|
The subset of files referenced by the sarif results is in =data/wxWidgets-small/=
|
|
and is taken from
|
|
|
|
#+BEGIN_SRC js
|
|
"repositoryUri": "https://github.com/wxWidgets/wxWidgets.git",
|
|
"revisionId": "7a03d5fe9bca2d2a2cd81fc0620bcbd2cbc4c7b0"
|
|
#+END_SRC
|
|
|
|
|
|
# * Commands
|
|
|
|
|
|
|
|
|
|
#+OPTIONS: ^:{}
|
|
|