Add ability to read automationDetails.id if present

2025-12-16 09:13:04 +01:00 · 2023-05-17 15:23:19 -04:00
parent e2501b94a9
commit 9407e5b00f
5 changed files with 19 additions and 3 deletions
--- a/sarif_cli/scan_tables.py
+++ b/sarif_cli/scan_tables.py
@@ -79,10 +79,14 @@ def joins_for_projects(basetables, external_info):
    """
    b = basetables; e = external_info
   
+    extra = ""
+    # if the sarif does have automationDetails
+    if "automationDetails" in b.project:
+        extra = b.project.automationDetails[0]
    # if the sarif does have versionControlProvenance
    if "repositoryUri" in b.project:
        repoUri = b.project.repositoryUri[0]
-        e.project_id = hash.hash_unique(repoUri.encode())
+        e.project_id = hash.hash_unique((repoUri+extra).encode())
    else:
        repoUri = "unknown"
    
--- a/sarif_cli/signature.py
+++ b/sarif_cli/signature.py
@@ -250,6 +250,10 @@ def fillsig_dict(args, elem, context):
    # Several rules overlap and need to be applied together, so this is (now) a
    # simple sequence tests.

+    if 'results' in elem.keys() and not 'automationDetails' in elem.keys():
+        #want this to be blank if not present- ie no submodule info added/no sarif-category used
+        full_elem['automationDetails'] = {'id' : ""}
+
    if {'locations', 'message', 'partialFingerprints', 'ruleId',
        'ruleIndex'}.issubset(elem.keys()):
        # Ensure 'rule' is present 
--- a/sarif_cli/signature_single_CLI.py
+++ b/sarif_cli/signature_single_CLI.py
@@ -30,6 +30,8 @@ struct_graph_CLI = (
    ('Array6920', ('array', (0, 'Struct5277'), (1, 'Struct9567'))),
    ('Struct1509', ('struct', ('semmle.formatSpecifier', 'String'), ('semmle.sourceLanguage', 'String'))),
    ('Struct2774', ('struct', ('text', 'String'))),
+    #manually added for optional automationDetails
+    ('Struct1111', ('struct', ('id', 'String'))),
    (   'Struct6299',
        (   'struct',
            ('endColumn', 'Int'),
@@ -146,6 +148,8 @@ struct_graph_CLI = (
    (   'Struct9786',
        (   'struct',
            ('artifacts', 'Array6920'),
+            #manually added for optional automationDetails
+            ('automationDetails', 'Struct1111'),
            ('columnKind', 'String'),
            ('newlineSequences', 'Array7069'),
            ('properties', 'Struct1509'),
--- a/sarif_cli/table_joins_CLI.py
+++ b/sarif_cli/table_joins_CLI.py
@@ -330,7 +330,11 @@ def joins_for_project_single(tgraph):
        .rename(columns={"value_index": "value_index_1273"})
        #
        .merge(sf(9786), how="left", left_on='id_or_value_at_index', right_on='struct_id', validate="1:m")
-        .drop(columns=['id_or_value_at_index', 'struct_id']))
+        .drop(columns=['id_or_value_at_index', 'struct_id'])
+        
+        .merge(sf(1111), how="left", left_on='automationDetails', right_on='struct_id', validate="1:m")
+        .drop(columns=['automationDetails', 'struct_id'])
+         .rename(columns={"id": "automationDetails"}))
        # 
    #newlines there or not - handle
    if 'newlineSequences' in project_df_temp1: