mirror of
https://github.com/hohn/sarif-cli.git
synced 2025-12-16 17:23:03 +01:00
Export column types for scan-related pandas tables
This commit is contained in:
committed by
=Michael Hohn
parent
7e996e746c
commit
505ee8ea66
@@ -9,6 +9,60 @@ from . import snowflake_id
|
||||
class ZeroResults(Exception):
|
||||
pass
|
||||
|
||||
#
|
||||
# Column types for scan-related pandas tables
|
||||
#
|
||||
class ScanTablesTypes:
|
||||
scans = {
|
||||
"id" : pd.UInt64Dtype(),
|
||||
"commit_id" : pd.StringDtype(),
|
||||
"project_id" : pd.UInt64Dtype(),
|
||||
"db_create_start" : numpy.datetime64(),
|
||||
"db_create_stop" : numpy.datetime64(),
|
||||
"scan_start_date" : numpy.datetime64(),
|
||||
"scan_stop_date" : numpy.datetime64(),
|
||||
"tool_name" : pd.StringDtype(),
|
||||
"tool_version" : pd.StringDtype(),
|
||||
"tool_query_commit_id" : pd.StringDtype(),
|
||||
"sarif_file_name" : pd.StringDtype(),
|
||||
"results_count" : pd.Int64Dtype(),
|
||||
"rules_count" : pd.Int64Dtype(),
|
||||
}
|
||||
results = {
|
||||
'id' : pd.UInt64Dtype(),
|
||||
'scan_id' : pd.UInt64Dtype(),
|
||||
'query_id' : pd.StringDtype(),
|
||||
|
||||
'result_type' : pd.StringDtype(),
|
||||
'codeFlow_id' : pd.UInt64Dtype(),
|
||||
|
||||
'message' : pd.StringDtype(),
|
||||
'message_object' : numpy.dtype('O'),
|
||||
'location' : pd.StringDtype(),
|
||||
|
||||
'source_startLine' : pd.Int64Dtype(),
|
||||
'source_startCol' : pd.Int64Dtype(),
|
||||
'source_endLine' : pd.Int64Dtype(),
|
||||
'source_endCol' : pd.Int64Dtype(),
|
||||
|
||||
'sink_startLine' : pd.Int64Dtype(),
|
||||
'sink_startCol' : pd.Int64Dtype(),
|
||||
'sink_endLine' : pd.Int64Dtype(),
|
||||
'sink_endCol' : pd.Int64Dtype(),
|
||||
|
||||
# TODO Find high-level info from query name or tags?
|
||||
'source_object' : numpy.dtype('O'),
|
||||
'sink_object' : numpy.dtype('O'),
|
||||
}
|
||||
projects = {
|
||||
"id" : pd.UInt64Dtype(),
|
||||
"project_name" : pd.StringDtype(),
|
||||
"creation_date" : numpy.datetime64(),
|
||||
"repo_url" : pd.StringDtype(),
|
||||
"primary_language" : pd.StringDtype(),
|
||||
"languages_analyzed" : pd.StringDtype(),
|
||||
}
|
||||
|
||||
#
|
||||
# Projects table
|
||||
#
|
||||
@@ -36,18 +90,10 @@ def joins_for_projects(basetables, external_info, scantables):
|
||||
"repo_url" : repo_url,
|
||||
"primary_language" : b.project['semmle.sourceLanguage'][0], # TODO: external info
|
||||
"languages_analyzed" : ",".join(list(b.project['semmle.sourceLanguage']))
|
||||
},index=[0])
|
||||
}, index=[0])
|
||||
|
||||
# Force all column types to ensure appropriate formatting
|
||||
res1 = res.astype({
|
||||
"id" : pd.UInt64Dtype(),
|
||||
"project_name" : pd.StringDtype(),
|
||||
"creation_date" : numpy.datetime64(),
|
||||
"repo_url" : pd.StringDtype(),
|
||||
"primary_language" : pd.StringDtype(),
|
||||
"languages_analyzed" : pd.StringDtype(),
|
||||
}).reset_index(drop=True)
|
||||
|
||||
res1 = res.astype(ScanTablesTypes.projects).reset_index(drop=True)
|
||||
return res1
|
||||
|
||||
#
|
||||
@@ -82,22 +128,7 @@ def joins_for_scans(basetables, external_info, scantables):
|
||||
},index=[0])
|
||||
|
||||
# Force all column types to ensure correct writing and type checks on reading.
|
||||
res1 = res.astype({
|
||||
"id" : pd.UInt64Dtype(),
|
||||
"commit_id" : pd.StringDtype(),
|
||||
"project_id" : pd.UInt64Dtype(),
|
||||
"db_create_start" : numpy.datetime64(),
|
||||
"db_create_stop" : numpy.datetime64(),
|
||||
"scan_start_date" : numpy.datetime64(),
|
||||
"scan_stop_date" : numpy.datetime64(),
|
||||
"tool_name" : pd.StringDtype(),
|
||||
"tool_version" : pd.StringDtype(),
|
||||
"tool_query_commit_id" : pd.StringDtype(),
|
||||
"sarif_file_name" : pd.StringDtype(),
|
||||
"results_count" : pd.Int64Dtype(),
|
||||
"rules_count" : pd.Int64Dtype(),
|
||||
}).reset_index(drop=True)
|
||||
|
||||
res1 = res.astype(ScanTablesTypes.scans).reset_index(drop=True)
|
||||
return res1
|
||||
|
||||
#
|
||||
@@ -129,33 +160,7 @@ def joins_for_results(basetables, external_info):
|
||||
res = tables[0]
|
||||
|
||||
# Force all column types to ensure appropriate formatting
|
||||
res1 = res.astype({
|
||||
'id' : pd.UInt64Dtype(),
|
||||
'scan_id' : pd.UInt64Dtype(),
|
||||
'query_id' : pd.StringDtype(),
|
||||
|
||||
'result_type' : pd.StringDtype(),
|
||||
'codeFlow_id' : pd.UInt64Dtype(),
|
||||
|
||||
'message' : pd.StringDtype(),
|
||||
'message_object' : numpy.dtype('O'),
|
||||
'location' : pd.StringDtype(),
|
||||
|
||||
'source_startLine' : pd.Int64Dtype(),
|
||||
'source_startCol' : pd.Int64Dtype(),
|
||||
'source_endLine' : pd.Int64Dtype(),
|
||||
'source_endCol' : pd.Int64Dtype(),
|
||||
|
||||
'sink_startLine' : pd.Int64Dtype(),
|
||||
'sink_startCol' : pd.Int64Dtype(),
|
||||
'sink_endLine' : pd.Int64Dtype(),
|
||||
'sink_endCol' : pd.Int64Dtype(),
|
||||
|
||||
# TODO Find high-level info from query name or tags?
|
||||
'source_object' : numpy.dtype('O'),
|
||||
'sink_object' : numpy.dtype('O'),
|
||||
}).reset_index(drop=True)
|
||||
|
||||
res1 = res.astype(ScanTablesTypes.results).reset_index(drop=True)
|
||||
return res1
|
||||
|
||||
def _results_from_kind_problem(basetables, external_info):
|
||||
|
||||
Reference in New Issue
Block a user