wip: debug and get automationDetails into CSV output

2025-12-16 09:13:04 +01:00 · 2023-07-12 17:04:23 -07:00
parent 742392338e
commit 68b43e0514
6 changed files with 267 additions and 24 deletions
--- a/sarif_cli/columns.py
+++ b/sarif_cli/columns.py
@@ -46,7 +46,8 @@ columns = {
        "creation_date",     
        "repo_url" ,          
        "primary_language"  , 
-        "languages_analyzed" 
+        "languages_analyzed",
+        "automationDetails",
 ],
 "codeflows" : [
 "codeflow_id",
@@ -62,4 +63,4 @@ columns = {
 "uriBaseId",
 "message"
 ]
-}
+}
--- a/sarif_cli/scan_tables.py
+++ b/sarif_cli/scan_tables.py
@@ -70,6 +70,7 @@ class ScanTablesTypes:
        "repo_url"           : pd.StringDtype(),
        "primary_language"   : pd.StringDtype(),
        "languages_analyzed" : pd.StringDtype(),
+        "automationDetails"  : pd.StringDtype(),
    }

 #
@@ -98,11 +99,16 @@ def joins_for_projects(basetables, external_info):
        "creation_date"      : pd.Timestamp(0.0, unit='s'), # TODO: external info 
        "repo_url"           : repoUri, 
        "primary_language"   : b.project['semmle.sourceLanguage'][0],
-        "languages_analyzed" : ",".join(list(b.project['semmle.sourceLanguage']))
+        "languages_analyzed" : ",".join(list(b.project['semmle.sourceLanguage'])),
+        "automationDetails"  : extra,
    }, index=[0])

    # Force all column types to ensure appropriate formatting
    res1 = res.astype(ScanTablesTypes.projects).reset_index(drop=True)
+    # XX: automationDetails?
+    import IPython
+    IPython.embed(header="spot 11")
+    # 
    return res1

 #
--- a/sarif_cli/signature.py
+++ b/sarif_cli/signature.py
@@ -256,7 +256,11 @@ def fillsig_dict(args, elem, context):

    if 'results' in elem.keys() and not 'automationDetails' in elem.keys():
        #want this to be blank if not present- ie no submodule info added/no sarif-category used
-        full_elem['automationDetails'] = {'id' : ""}
+        full_elem['automationDetails'] = {'id' : "no-value-for-ad"}
+        # XX: automationDetails?
+        import IPython
+        IPython.embed(header="spot 2")
+        # 

    if {'locations', 'message', 'partialFingerprints', 'ruleId',
        'ruleIndex'}.issubset(elem.keys()):
--- a/sarif_cli/table_joins_CLI.py
+++ b/sarif_cli/table_joins_CLI.py
@@ -336,6 +336,10 @@ def joins_for_project_single(tgraph):
        .drop(columns=['automationDetails', 'struct_id'])
         .rename(columns={"id": "automationDetails"}))
        # 
+    # XX: automationDetails?
+    import IPython
+    IPython.embed(header="spot 3")
+    # 
    #newlines there or not - handle
    if 'newlineSequences' in project_df_temp1:
        project_df_temp2 = project_df_temp1.drop(columns=['newlineSequences'])