mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 17:23:03 +01:00
Description of current and upcoming tables and their information sources
This commit is contained in:
committed by
=Michael Hohn
parent
1f2daab51e
commit
44f1d2f179
89
notes/tables-info.py
Normal file
89
notes/tables-info.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
#
|
||||||
|
# Simple utilities to retrieve and view Github API information
|
||||||
|
#
|
||||||
|
import urllib3
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
from pprint import pprint
|
||||||
|
from contextlib import redirect_stdout
|
||||||
|
|
||||||
|
#* Init
|
||||||
|
header_auth = {'Authorization': 'token %s' % os.environ['GITHUB_TOKEN']}
|
||||||
|
|
||||||
|
http = urllib3.PoolManager()
|
||||||
|
|
||||||
|
owner = 'hohn'
|
||||||
|
repo = 'tabu-soda'
|
||||||
|
header_accept = {'Accept' : 'application/vnd.github.v3+json'}
|
||||||
|
GET = 'GET'
|
||||||
|
|
||||||
|
#* Local utility functions using lexical variables
|
||||||
|
def gith(verb, path, headers={}):
|
||||||
|
res = http.request(
|
||||||
|
verb,
|
||||||
|
'https://api.github.com' + path,
|
||||||
|
headers={**header_auth, **header_accept, **headers}
|
||||||
|
)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def topy(result):
|
||||||
|
return json.loads(result.data.decode('utf-8'))
|
||||||
|
|
||||||
|
def pathval(result, *path):
|
||||||
|
v = topy(result)
|
||||||
|
for p in path:
|
||||||
|
v = v[p]
|
||||||
|
print(f'path: {path} value: {v}')
|
||||||
|
return (path, v)
|
||||||
|
|
||||||
|
|
||||||
|
#* GET /repos/{owner}/{repo}/events
|
||||||
|
r01 = gith(GET, f'/repos/{owner}/{repo}/events')
|
||||||
|
pathval(r01, 0, 'repo', 'name')
|
||||||
|
pathval(r01, 0, 'repo', 'url')
|
||||||
|
|
||||||
|
#* GET /repos/{owner}/{repo}/code-scanning/analyses
|
||||||
|
r02 = gith(GET, f'/repos/{owner}/{repo}/code-scanning/analyses')
|
||||||
|
topy(r02)
|
||||||
|
# ? 'sarif_id': '9df9fbb4-bf4b-11ec-9ca6-b32c61360f89',
|
||||||
|
|
||||||
|
#** GET /repos/{owner}/{repo}/code-scanning/analyses/{analysis_id}, overview only:
|
||||||
|
_, analysis_id = pathval(r02, 0, 'id')
|
||||||
|
r02s01 = gith(GET, f'/repos/{owner}/{repo}/code-scanning/analyses/{analysis_id}')
|
||||||
|
topy(r02s01)
|
||||||
|
pathval(r02s01, 'commit_sha')
|
||||||
|
pathval(r02s01, 'created_at')
|
||||||
|
pathval(r02s01, 'results_count')
|
||||||
|
pathval(r02s01, 'rules_count')
|
||||||
|
pathval(r02s01, 'sarif_id')
|
||||||
|
pathval(r02s01, 'tool', 'name')
|
||||||
|
pathval(r02s01, 'tool', 'version')
|
||||||
|
|
||||||
|
#** GET /repos/{owner}/{repo}/code-scanning/analyses/{analysis_id}, full sarif:
|
||||||
|
r02s02 = gith(GET, f'/repos/{owner}/{repo}/code-scanning/analyses/{analysis_id}',
|
||||||
|
headers = {'Accept': 'application/sarif+json'})
|
||||||
|
|
||||||
|
pprint(topy(r02s02), open("r02s02", "w", encoding='utf-8'))
|
||||||
|
json.dump(topy(r02s02), open("r02s02.json", "w", encoding='utf-8'), indent=4)
|
||||||
|
|
||||||
|
#* GET /repos/{owner}/{repo}
|
||||||
|
r03 = gith(GET, f'/repos/{owner}/{repo}')
|
||||||
|
topy(r03)
|
||||||
|
pathval(r03, 'created_at')
|
||||||
|
pathval(r03, 'full_name')
|
||||||
|
pathval(r03, 'git_url')
|
||||||
|
pathval(r03, 'clone_url')
|
||||||
|
pathval(r03, 'language')
|
||||||
|
|
||||||
|
#* POST /repos/{owner}/{repo}/code-scanning/sarifs
|
||||||
|
# TODO: to be tested...
|
||||||
|
r04 = gith(POST, f'/repos/{owner}/{repo}/code-scanning/sarifs',
|
||||||
|
fields={'commit_sha': 'aa22233',
|
||||||
|
'ref': 'refs/heads/<branch name>',
|
||||||
|
'sarif': 'gzip < sarif | base64 -w0',
|
||||||
|
'tool_name' : 'codeql',
|
||||||
|
'started_at': 'when the analysis started',
|
||||||
|
},
|
||||||
|
headers = {'Accept': 'application/sarif+json'})
|
||||||
|
|
||||||
145
notes/tables.org
145
notes/tables.org
@@ -80,7 +80,6 @@
|
|||||||
|
|
||||||
#+END_SRC
|
#+END_SRC
|
||||||
|
|
||||||
|
|
||||||
#+BEGIN_SRC text
|
#+BEGIN_SRC text
|
||||||
==> kind_problem.csv <==
|
==> kind_problem.csv <==
|
||||||
results_array_id
|
results_array_id
|
||||||
@@ -115,7 +114,8 @@
|
|||||||
|
|
||||||
#+END_SRC
|
#+END_SRC
|
||||||
|
|
||||||
|
The parts above =$schema= in the =projects.csv= table is ad-hoc and the
|
||||||
|
information for those fields is not yet collected. They can be discarded.
|
||||||
#+BEGIN_SRC text
|
#+BEGIN_SRC text
|
||||||
==> project.csv <==
|
==> project.csv <==
|
||||||
creation_date
|
creation_date
|
||||||
@@ -178,11 +178,92 @@
|
|||||||
tag_text
|
tag_text
|
||||||
#+END_SRC
|
#+END_SRC
|
||||||
|
|
||||||
* New tables to be exported
|
* Tables or entries to be removed
|
||||||
Possible splits from =project.csv=
|
The top of the [Mar-23-2022] =projects.csv= table, enumerated below, is ad-hoc
|
||||||
|
and included in the other tables below; the information for its fields is not
|
||||||
The scan results are the root of the sarif tree, so this is a required base table.
|
yet collected to it can be discarded.
|
||||||
#+BEGIN_SRC text
|
#+BEGIN_SRC text
|
||||||
|
==> project-meta.csv <==
|
||||||
|
creation_date
|
||||||
|
primary_language
|
||||||
|
project_name
|
||||||
|
query_commit_id
|
||||||
|
sarif_file_name
|
||||||
|
scan_id
|
||||||
|
scan_start_date
|
||||||
|
scan_stop_date
|
||||||
|
tool_name
|
||||||
|
tool_version
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
* New tables to be exported
|
||||||
|
This section enumerates new tables intended for reporting infrastructure.
|
||||||
|
|
||||||
|
Using the github API starting points
|
||||||
|
#+BEGIN_SRC python
|
||||||
|
# Code scanning information
|
||||||
|
# Get the full list:
|
||||||
|
r02 = gith(GET, f'/repos/{owner}/{repo}/code-scanning/analyses')
|
||||||
|
|
||||||
|
# Work with one entry
|
||||||
|
_, analysis_id = pathval(r02, 0, 'id')
|
||||||
|
r02s01 = gith(GET, f'/repos/{owner}/{repo}/code-scanning/analyses/{analysis_id}')
|
||||||
|
|
||||||
|
r02s02 = gith(GET, f'/repos/{owner}/{repo}/code-scanning/analyses/{analysis_id}',
|
||||||
|
headers = {'Accept': 'application/sarif+json'})
|
||||||
|
|
||||||
|
# Repository information via GET /repos/{owner}/{repo}
|
||||||
|
r03 = gith(GET, f'/repos/{owner}/{repo}')
|
||||||
|
#+END_SRC
|
||||||
|
we can populate the =project.csv= and =scans.csv= tables:
|
||||||
|
#+BEGIN_SRC sql
|
||||||
|
==> project.csv <==
|
||||||
|
id
|
||||||
|
project_name -- pathval(r03, 'full_name')
|
||||||
|
creation_date -- pathval(r03, 'created_at')
|
||||||
|
owner -- r03
|
||||||
|
repo -- r03 = gith(GET, f'/repos/{owner}/{repo}')
|
||||||
|
repository_url -- pathval(r03, 'clone_url')
|
||||||
|
primary_language -- pathval(r03, 'language')
|
||||||
|
languages_analyzed --
|
||||||
|
#+END_SRC
|
||||||
|
#+BEGIN_SRC sql
|
||||||
|
==> scans.csv <==
|
||||||
|
id --
|
||||||
|
commit_id -- pathval(r02s01, 'commit_sha')
|
||||||
|
project_id -- project.id
|
||||||
|
db_create_start -- pathval(r02s01, 'created_at')
|
||||||
|
db_create_stop
|
||||||
|
scan_start_date
|
||||||
|
scan_stop_date
|
||||||
|
tool_name -- pathval(r02s01, 'tool', 'name')
|
||||||
|
tool_version -- pathval(r02s01, 'tool', 'version')
|
||||||
|
tool_query_commit_id -- pathval(r02, 0, 'tool', 'version') is sufficient
|
||||||
|
sarif_content -- r02s02
|
||||||
|
sarif_file_name -- used on upload
|
||||||
|
sarif_id -- pathval(r02s01, 'sarif_id')
|
||||||
|
results_count -- pathval(r02s01, 'results_count')
|
||||||
|
rules_count -- pathval(r02s01, 'rules_count')
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
The sarif upload from codeql analysis to github uses the following API and
|
||||||
|
parameters which naturally are the minimal parameters needed to run the
|
||||||
|
analysis.
|
||||||
|
#+BEGIN_SRC python
|
||||||
|
# untested
|
||||||
|
r04 = gith(POST, f'/repos/{owner}/{repo}/code-scanning/sarifs',
|
||||||
|
fields={'commit_sha': 'aa22233',
|
||||||
|
'ref': 'refs/heads/<branch name>',
|
||||||
|
'sarif': 'gzip < sarif | base64 -w0',
|
||||||
|
'tool_name' : 'codeql',
|
||||||
|
'started_at': 'when the analysis started',
|
||||||
|
},
|
||||||
|
headers = {'Accept': 'application/sarif+json'})
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
The scan results from =project.csv= are the root of the sarif tree, so this is a
|
||||||
|
required base table.
|
||||||
|
#+BEGIN_SRC sql
|
||||||
==> project-scan-result.csv <==
|
==> project-scan-result.csv <==
|
||||||
$schema
|
$schema
|
||||||
sarif_version
|
sarif_version
|
||||||
@@ -200,52 +281,9 @@
|
|||||||
revisionId
|
revisionId
|
||||||
#+END_SRC
|
#+END_SRC
|
||||||
|
|
||||||
The rest of the [Mar-23-2022] =projects.csv= table is ad-hoc and included in the
|
Using joins of the =project-scan-result.csv= table and the
|
||||||
other tables below; it can be discarded.
|
other [[*Currently Exported Tables][Currently Exported Tables]], the =results.csv= table can be formed:
|
||||||
#+BEGIN_SRC text
|
#+BEGIN_SRC sql
|
||||||
==> project-meta.csv <==
|
|
||||||
creation_date
|
|
||||||
primary_language
|
|
||||||
project_name
|
|
||||||
query_commit_id
|
|
||||||
sarif_file_name
|
|
||||||
scan_id
|
|
||||||
scan_start_date
|
|
||||||
scan_stop_date
|
|
||||||
tool_name
|
|
||||||
tool_version
|
|
||||||
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
New tables intended for reporting infrastructure:
|
|
||||||
#+BEGIN_SRC text
|
|
||||||
==> project.csv <==
|
|
||||||
id
|
|
||||||
project_name
|
|
||||||
creation_date
|
|
||||||
repository_url -- new
|
|
||||||
primary_language -- from github api
|
|
||||||
languages_analyzed
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
#+BEGIN_SRC text
|
|
||||||
==> scans.csv <==
|
|
||||||
id
|
|
||||||
commit_id
|
|
||||||
project_id
|
|
||||||
db_create_start
|
|
||||||
db_create_stop
|
|
||||||
scan_start_date
|
|
||||||
scan_stop_date
|
|
||||||
tool_name
|
|
||||||
tool_version
|
|
||||||
tool_query_commit_id
|
|
||||||
sarif_content
|
|
||||||
sarif_file_name
|
|
||||||
|
|
||||||
#+END_SRC
|
|
||||||
|
|
||||||
#+BEGIN_SRC text
|
|
||||||
==> results.csv <==
|
==> results.csv <==
|
||||||
id INT, -- primary key
|
id INT, -- primary key
|
||||||
scan_id INT, -- scans.id
|
scan_id INT, -- scans.id
|
||||||
@@ -271,9 +309,6 @@
|
|||||||
--
|
--
|
||||||
source_object STRING, -- higher-level info: 'args', 'request', etc.
|
source_object STRING, -- higher-level info: 'args', 'request', etc.
|
||||||
sink_object string, -- higher level: 'execute', 'sql statement', etc.
|
sink_object string, -- higher level: 'execute', 'sql statement', etc.
|
||||||
|
|
||||||
#+END_SRC
|
#+END_SRC
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#+HTML: </div>
|
#+HTML: </div>
|
||||||
|
|||||||
Reference in New Issue
Block a user