Add ability to read automationDetails.id if present

2025-12-16 17:23:03 +01:00 · 2023-05-17 15:23:19 -04:00
parent e2501b94a9
commit 9407e5b00f
5 changed files with 19 additions and 3 deletions
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
 # Sarif format information
-  The tool operates on sarif generated by LGTM 1.27.0 (by default) or by the CodeQL CLI (enabled with the -f flag given a value of `CLI`).
+  The tool operates on sarif generated by LGTM 1.27.0 (by default) or by the CodeQL CLI (enabled with the -f flag given a value of `CLI`). The supported sarif is [SARIF v2.1.0](https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html).
  The values that the -f flag accepts are: `LGTM` and `CLI`.
--- a/sarif_cli/scan_tables.py
+++ b/sarif_cli/scan_tables.py
@@ -79,10 +79,14 @@ def joins_for_projects(basetables, external_info):
    """
    b = basetables; e = external_info
    extra = ""
    # if the sarif does have automationDetails
    if "automationDetails" in b.project:
        extra = b.project.automationDetails[0]
    # if the sarif does have versionControlProvenance
    if "repositoryUri" in b.project:
        repoUri = b.project.repositoryUri[0]
-        e.project_id = hash.hash_unique(repoUri.encode())
+        e.project_id = hash.hash_unique((repoUri+extra).encode())
    else:
        repoUri = "unknown"
--- a/sarif_cli/signature.py
+++ b/sarif_cli/signature.py
@@ -250,6 +250,10 @@ def fillsig_dict(args, elem, context):
    # Several rules overlap and need to be applied together, so this is (now) a
    # simple sequence tests.
    if 'results' in elem.keys() and not 'automationDetails' in elem.keys():
        #want this to be blank if not present- ie no submodule info added/no sarif-category used
        full_elem['automationDetails'] = {'id' : ""}
    if {'locations', 'message', 'partialFingerprints', 'ruleId',
        'ruleIndex'}.issubset(elem.keys()):
        # Ensure 'rule' is present 
--- a/sarif_cli/signature_single_CLI.py
+++ b/sarif_cli/signature_single_CLI.py
@@ -30,6 +30,8 @@ struct_graph_CLI = (
    ('Array6920', ('array', (0, 'Struct5277'), (1, 'Struct9567'))),
    ('Struct1509', ('struct', ('semmle.formatSpecifier', 'String'), ('semmle.sourceLanguage', 'String'))),
    ('Struct2774', ('struct', ('text', 'String'))),
    #manually added for optional automationDetails
    ('Struct1111', ('struct', ('id', 'String'))),
    (   'Struct6299',
        (   'struct',
            ('endColumn', 'Int'),
@@ -146,6 +148,8 @@ struct_graph_CLI = (
    (   'Struct9786',
        (   'struct',
            ('artifacts', 'Array6920'),
            #manually added for optional automationDetails
            ('automationDetails', 'Struct1111'),
            ('columnKind', 'String'),
            ('newlineSequences', 'Array7069'),
            ('properties', 'Struct1509'),
--- a/sarif_cli/table_joins_CLI.py
+++ b/sarif_cli/table_joins_CLI.py
@@ -330,7 +330,11 @@ def joins_for_project_single(tgraph):
        .rename(columns={"value_index": "value_index_1273"})
        #
        .merge(sf(9786), how="left", left_on='id_or_value_at_index', right_on='struct_id', validate="1:m")
-        .drop(columns=['id_or_value_at_index', 'struct_id']))
+        .drop(columns=['id_or_value_at_index', 'struct_id'])
        .merge(sf(1111), how="left", left_on='automationDetails', right_on='struct_id', validate="1:m")
        .drop(columns=['automationDetails', 'struct_id'])
         .rename(columns={"id": "automationDetails"}))
        # 
    #newlines there or not - handle
    if 'newlineSequences' in project_df_temp1: