mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 17:23:03 +01:00
Include project table in output of sarif-extract-scans; add commit_id to scans table
This commit is contained in:
committed by
=Michael Hohn
parent
fd55969b76
commit
741be0cfe1
@@ -36,9 +36,11 @@
|
||||
python3 -m venv .venv
|
||||
. .venv/bin/activate
|
||||
python3 -m pip install -r requirements.txt
|
||||
# Or separately:
|
||||
# Or separately, for development:
|
||||
pip install --upgrade pip
|
||||
pip install ipython pyyaml pandas jupyter pyflakes
|
||||
# Or separately, for running:
|
||||
pip install pandas
|
||||
#+END_SRC
|
||||
|
||||
"Install" for local development:
|
||||
|
||||
@@ -88,6 +88,7 @@ class ScanTables:
|
||||
# project: External table with project information
|
||||
scans : pd.DataFrame
|
||||
results : pd.DataFrame
|
||||
projects : pd.DataFrame
|
||||
columns_to_reindex : dict # (name -> name list) dict
|
||||
def __init__(self): pass
|
||||
scantabs = ScanTables()
|
||||
@@ -124,6 +125,8 @@ bt.rules = tj.joins_for_rules(tgraph)
|
||||
#
|
||||
scantabs.results = st.joins_for_results(bt, external_info)
|
||||
scantabs.scans = st.joins_for_scans(bt, external_info, scantabs)
|
||||
scantabs.projects = st.joins_for_projects(bt, external_info, scantabs)
|
||||
|
||||
|
||||
#
|
||||
# Replace the remaining internal ids with snowflake ids
|
||||
@@ -142,6 +145,7 @@ bt.columns_to_reindex = {
|
||||
|
||||
scantabs.columns_to_reindex = {
|
||||
'scans': [],
|
||||
'projects' : [],
|
||||
'results': ['codeFlow_id'],
|
||||
}
|
||||
|
||||
|
||||
@@ -2,8 +2,40 @@
|
||||
|
||||
"""
|
||||
import pandas as pd
|
||||
import re
|
||||
from . import snowflake_id
|
||||
|
||||
#
|
||||
# Projects table
|
||||
#
|
||||
def joins_for_projects(basetables, external_info, scantables):
|
||||
"""
|
||||
Form the 'projects' table for the ScanTables dataclass
|
||||
"""
|
||||
b = basetables; e = external_info
|
||||
|
||||
# For a repository url of the form
|
||||
# (git|https)://*/org/project.*
|
||||
# use the org/project part as the project_name.
|
||||
#
|
||||
repo_url = b.project.repositoryUri[0]
|
||||
url_parts = re.match(r'(git|https)://[^/]+/([^/]+)/([^/.]+).*', repo_url)
|
||||
if url_parts:
|
||||
project_name = f"{url_parts.group(2)}/{url_parts.group(3)}"
|
||||
else:
|
||||
project_name = pd.NA
|
||||
|
||||
res = pd.DataFrame(data={
|
||||
"id" : e.project_id,
|
||||
"project_name" : project_name,
|
||||
"creation_date" : pd.NA, # TODO: external info
|
||||
"repo_url" : repo_url,
|
||||
"primary_language" : b.project['semmle.sourceLanguage'][0], # TODO: external info
|
||||
"languages_analyzed" : ",".join(list(b.project['semmle.sourceLanguage']))
|
||||
},index=[0])
|
||||
|
||||
return res
|
||||
|
||||
#
|
||||
# Scans table
|
||||
#
|
||||
@@ -19,7 +51,7 @@ def joins_for_scans(basetables, external_info, scantables):
|
||||
"More than one driver version found for single sarif file."
|
||||
res = pd.DataFrame(data={
|
||||
"id" : e.scan_id,
|
||||
"commit_id" : pd.NA,
|
||||
"commit_id" : b.project.revisionId[0],
|
||||
"project_id" : e.project_id,
|
||||
#
|
||||
"db_create_start" : pd.NA,
|
||||
|
||||
Reference in New Issue
Block a user