mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 17:23:03 +01:00
Description of current and upcoming tables and their information sources
This commit is contained in:
committed by
=Michael Hohn
parent
1f2daab51e
commit
44f1d2f179
89
notes/tables-info.py
Normal file
89
notes/tables-info.py
Normal file
@@ -0,0 +1,89 @@
|
||||
#
|
||||
# Simple utilities to retrieve and view Github API information
|
||||
#
|
||||
import urllib3
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pprint import pprint
|
||||
from contextlib import redirect_stdout
|
||||
|
||||
#* Init
|
||||
header_auth = {'Authorization': 'token %s' % os.environ['GITHUB_TOKEN']}
|
||||
|
||||
http = urllib3.PoolManager()
|
||||
|
||||
owner = 'hohn'
|
||||
repo = 'tabu-soda'
|
||||
header_accept = {'Accept' : 'application/vnd.github.v3+json'}
|
||||
GET = 'GET'
|
||||
|
||||
#* Local utility functions using lexical variables
|
||||
def gith(verb, path, headers={}):
|
||||
res = http.request(
|
||||
verb,
|
||||
'https://api.github.com' + path,
|
||||
headers={**header_auth, **header_accept, **headers}
|
||||
)
|
||||
return res
|
||||
|
||||
def topy(result):
|
||||
return json.loads(result.data.decode('utf-8'))
|
||||
|
||||
def pathval(result, *path):
|
||||
v = topy(result)
|
||||
for p in path:
|
||||
v = v[p]
|
||||
print(f'path: {path} value: {v}')
|
||||
return (path, v)
|
||||
|
||||
|
||||
#* GET /repos/{owner}/{repo}/events
|
||||
r01 = gith(GET, f'/repos/{owner}/{repo}/events')
|
||||
pathval(r01, 0, 'repo', 'name')
|
||||
pathval(r01, 0, 'repo', 'url')
|
||||
|
||||
#* GET /repos/{owner}/{repo}/code-scanning/analyses
|
||||
r02 = gith(GET, f'/repos/{owner}/{repo}/code-scanning/analyses')
|
||||
topy(r02)
|
||||
# ? 'sarif_id': '9df9fbb4-bf4b-11ec-9ca6-b32c61360f89',
|
||||
|
||||
#** GET /repos/{owner}/{repo}/code-scanning/analyses/{analysis_id}, overview only:
|
||||
_, analysis_id = pathval(r02, 0, 'id')
|
||||
r02s01 = gith(GET, f'/repos/{owner}/{repo}/code-scanning/analyses/{analysis_id}')
|
||||
topy(r02s01)
|
||||
pathval(r02s01, 'commit_sha')
|
||||
pathval(r02s01, 'created_at')
|
||||
pathval(r02s01, 'results_count')
|
||||
pathval(r02s01, 'rules_count')
|
||||
pathval(r02s01, 'sarif_id')
|
||||
pathval(r02s01, 'tool', 'name')
|
||||
pathval(r02s01, 'tool', 'version')
|
||||
|
||||
#** GET /repos/{owner}/{repo}/code-scanning/analyses/{analysis_id}, full sarif:
|
||||
r02s02 = gith(GET, f'/repos/{owner}/{repo}/code-scanning/analyses/{analysis_id}',
|
||||
headers = {'Accept': 'application/sarif+json'})
|
||||
|
||||
pprint(topy(r02s02), open("r02s02", "w", encoding='utf-8'))
|
||||
json.dump(topy(r02s02), open("r02s02.json", "w", encoding='utf-8'), indent=4)
|
||||
|
||||
#* GET /repos/{owner}/{repo}
|
||||
r03 = gith(GET, f'/repos/{owner}/{repo}')
|
||||
topy(r03)
|
||||
pathval(r03, 'created_at')
|
||||
pathval(r03, 'full_name')
|
||||
pathval(r03, 'git_url')
|
||||
pathval(r03, 'clone_url')
|
||||
pathval(r03, 'language')
|
||||
|
||||
#* POST /repos/{owner}/{repo}/code-scanning/sarifs
|
||||
# TODO: to be tested...
|
||||
r04 = gith(POST, f'/repos/{owner}/{repo}/code-scanning/sarifs',
|
||||
fields={'commit_sha': 'aa22233',
|
||||
'ref': 'refs/heads/<branch name>',
|
||||
'sarif': 'gzip < sarif | base64 -w0',
|
||||
'tool_name' : 'codeql',
|
||||
'started_at': 'when the analysis started',
|
||||
},
|
||||
headers = {'Accept': 'application/sarif+json'})
|
||||
|
||||
145
notes/tables.org
145
notes/tables.org
@@ -80,7 +80,6 @@
|
||||
|
||||
#+END_SRC
|
||||
|
||||
|
||||
#+BEGIN_SRC text
|
||||
==> kind_problem.csv <==
|
||||
results_array_id
|
||||
@@ -115,7 +114,8 @@
|
||||
|
||||
#+END_SRC
|
||||
|
||||
|
||||
The parts above =$schema= in the =projects.csv= table is ad-hoc and the
|
||||
information for those fields is not yet collected. They can be discarded.
|
||||
#+BEGIN_SRC text
|
||||
==> project.csv <==
|
||||
creation_date
|
||||
@@ -178,11 +178,92 @@
|
||||
tag_text
|
||||
#+END_SRC
|
||||
|
||||
* New tables to be exported
|
||||
Possible splits from =project.csv=
|
||||
|
||||
The scan results are the root of the sarif tree, so this is a required base table.
|
||||
* Tables or entries to be removed
|
||||
The top of the [Mar-23-2022] =projects.csv= table, enumerated below, is ad-hoc
|
||||
and included in the other tables below; the information for its fields is not
|
||||
yet collected to it can be discarded.
|
||||
#+BEGIN_SRC text
|
||||
==> project-meta.csv <==
|
||||
creation_date
|
||||
primary_language
|
||||
project_name
|
||||
query_commit_id
|
||||
sarif_file_name
|
||||
scan_id
|
||||
scan_start_date
|
||||
scan_stop_date
|
||||
tool_name
|
||||
tool_version
|
||||
#+END_SRC
|
||||
|
||||
* New tables to be exported
|
||||
This section enumerates new tables intended for reporting infrastructure.
|
||||
|
||||
Using the github API starting points
|
||||
#+BEGIN_SRC python
|
||||
# Code scanning information
|
||||
# Get the full list:
|
||||
r02 = gith(GET, f'/repos/{owner}/{repo}/code-scanning/analyses')
|
||||
|
||||
# Work with one entry
|
||||
_, analysis_id = pathval(r02, 0, 'id')
|
||||
r02s01 = gith(GET, f'/repos/{owner}/{repo}/code-scanning/analyses/{analysis_id}')
|
||||
|
||||
r02s02 = gith(GET, f'/repos/{owner}/{repo}/code-scanning/analyses/{analysis_id}',
|
||||
headers = {'Accept': 'application/sarif+json'})
|
||||
|
||||
# Repository information via GET /repos/{owner}/{repo}
|
||||
r03 = gith(GET, f'/repos/{owner}/{repo}')
|
||||
#+END_SRC
|
||||
we can populate the =project.csv= and =scans.csv= tables:
|
||||
#+BEGIN_SRC sql
|
||||
==> project.csv <==
|
||||
id
|
||||
project_name -- pathval(r03, 'full_name')
|
||||
creation_date -- pathval(r03, 'created_at')
|
||||
owner -- r03
|
||||
repo -- r03 = gith(GET, f'/repos/{owner}/{repo}')
|
||||
repository_url -- pathval(r03, 'clone_url')
|
||||
primary_language -- pathval(r03, 'language')
|
||||
languages_analyzed --
|
||||
#+END_SRC
|
||||
#+BEGIN_SRC sql
|
||||
==> scans.csv <==
|
||||
id --
|
||||
commit_id -- pathval(r02s01, 'commit_sha')
|
||||
project_id -- project.id
|
||||
db_create_start -- pathval(r02s01, 'created_at')
|
||||
db_create_stop
|
||||
scan_start_date
|
||||
scan_stop_date
|
||||
tool_name -- pathval(r02s01, 'tool', 'name')
|
||||
tool_version -- pathval(r02s01, 'tool', 'version')
|
||||
tool_query_commit_id -- pathval(r02, 0, 'tool', 'version') is sufficient
|
||||
sarif_content -- r02s02
|
||||
sarif_file_name -- used on upload
|
||||
sarif_id -- pathval(r02s01, 'sarif_id')
|
||||
results_count -- pathval(r02s01, 'results_count')
|
||||
rules_count -- pathval(r02s01, 'rules_count')
|
||||
#+END_SRC
|
||||
|
||||
The sarif upload from codeql analysis to github uses the following API and
|
||||
parameters which naturally are the minimal parameters needed to run the
|
||||
analysis.
|
||||
#+BEGIN_SRC python
|
||||
# untested
|
||||
r04 = gith(POST, f'/repos/{owner}/{repo}/code-scanning/sarifs',
|
||||
fields={'commit_sha': 'aa22233',
|
||||
'ref': 'refs/heads/<branch name>',
|
||||
'sarif': 'gzip < sarif | base64 -w0',
|
||||
'tool_name' : 'codeql',
|
||||
'started_at': 'when the analysis started',
|
||||
},
|
||||
headers = {'Accept': 'application/sarif+json'})
|
||||
#+END_SRC
|
||||
|
||||
The scan results from =project.csv= are the root of the sarif tree, so this is a
|
||||
required base table.
|
||||
#+BEGIN_SRC sql
|
||||
==> project-scan-result.csv <==
|
||||
$schema
|
||||
sarif_version
|
||||
@@ -200,52 +281,9 @@
|
||||
revisionId
|
||||
#+END_SRC
|
||||
|
||||
The rest of the [Mar-23-2022] =projects.csv= table is ad-hoc and included in the
|
||||
other tables below; it can be discarded.
|
||||
#+BEGIN_SRC text
|
||||
==> project-meta.csv <==
|
||||
creation_date
|
||||
primary_language
|
||||
project_name
|
||||
query_commit_id
|
||||
sarif_file_name
|
||||
scan_id
|
||||
scan_start_date
|
||||
scan_stop_date
|
||||
tool_name
|
||||
tool_version
|
||||
|
||||
#+END_SRC
|
||||
|
||||
New tables intended for reporting infrastructure:
|
||||
#+BEGIN_SRC text
|
||||
==> project.csv <==
|
||||
id
|
||||
project_name
|
||||
creation_date
|
||||
repository_url -- new
|
||||
primary_language -- from github api
|
||||
languages_analyzed
|
||||
#+END_SRC
|
||||
|
||||
#+BEGIN_SRC text
|
||||
==> scans.csv <==
|
||||
id
|
||||
commit_id
|
||||
project_id
|
||||
db_create_start
|
||||
db_create_stop
|
||||
scan_start_date
|
||||
scan_stop_date
|
||||
tool_name
|
||||
tool_version
|
||||
tool_query_commit_id
|
||||
sarif_content
|
||||
sarif_file_name
|
||||
|
||||
#+END_SRC
|
||||
|
||||
#+BEGIN_SRC text
|
||||
Using joins of the =project-scan-result.csv= table and the
|
||||
other [[*Currently Exported Tables][Currently Exported Tables]], the =results.csv= table can be formed:
|
||||
#+BEGIN_SRC sql
|
||||
==> results.csv <==
|
||||
id INT, -- primary key
|
||||
scan_id INT, -- scans.id
|
||||
@@ -271,9 +309,6 @@
|
||||
--
|
||||
source_object STRING, -- higher-level info: 'args', 'request', etc.
|
||||
sink_object string, -- higher level: 'execute', 'sql statement', etc.
|
||||
|
||||
#+END_SRC
|
||||
|
||||
|
||||
|
||||
#+HTML: </div>
|
||||
|
||||
Reference in New Issue
Block a user