wip: debug and get automationDetails into CSV output

This commit is contained in:
Michael Hohn
2023-07-12 17:04:23 -07:00
committed by =Michael Hohn
parent 742392338e
commit 68b43e0514
6 changed files with 267 additions and 24 deletions

View File

@@ -46,7 +46,8 @@ columns = {
"creation_date",
"repo_url" ,
"primary_language" ,
"languages_analyzed"
"languages_analyzed",
"automationDetails",
],
"codeflows" : [
"codeflow_id",
@@ -62,4 +63,4 @@ columns = {
"uriBaseId",
"message"
]
}
}

View File

@@ -70,6 +70,7 @@ class ScanTablesTypes:
"repo_url" : pd.StringDtype(),
"primary_language" : pd.StringDtype(),
"languages_analyzed" : pd.StringDtype(),
"automationDetails" : pd.StringDtype(),
}
#
@@ -98,11 +99,16 @@ def joins_for_projects(basetables, external_info):
"creation_date" : pd.Timestamp(0.0, unit='s'), # TODO: external info
"repo_url" : repoUri,
"primary_language" : b.project['semmle.sourceLanguage'][0],
"languages_analyzed" : ",".join(list(b.project['semmle.sourceLanguage']))
"languages_analyzed" : ",".join(list(b.project['semmle.sourceLanguage'])),
"automationDetails" : extra,
}, index=[0])
# Force all column types to ensure appropriate formatting
res1 = res.astype(ScanTablesTypes.projects).reset_index(drop=True)
# XX: automationDetails?
import IPython
IPython.embed(header="spot 11")
#
return res1
#

View File

@@ -256,7 +256,11 @@ def fillsig_dict(args, elem, context):
if 'results' in elem.keys() and not 'automationDetails' in elem.keys():
#want this to be blank if not present- ie no submodule info added/no sarif-category used
full_elem['automationDetails'] = {'id' : ""}
full_elem['automationDetails'] = {'id' : "no-value-for-ad"}
# XX: automationDetails?
import IPython
IPython.embed(header="spot 2")
#
if {'locations', 'message', 'partialFingerprints', 'ruleId',
'ruleIndex'}.issubset(elem.keys()):

View File

@@ -336,6 +336,10 @@ def joins_for_project_single(tgraph):
.drop(columns=['automationDetails', 'struct_id'])
.rename(columns={"id": "automationDetails"}))
#
# XX: automationDetails?
import IPython
IPython.embed(header="spot 3")
#
#newlines there or not - handle
if 'newlineSequences' in project_df_temp1:
project_df_temp2 = project_df_temp1.drop(columns=['newlineSequences'])