From 9407e5b00f24fb5bdc2999900db5d961a3ae3727 Mon Sep 17 00:00:00 2001 From: Kristen Newbury Date: Wed, 17 May 2023 15:23:19 -0400 Subject: [PATCH] Add ability to read automationDetails.id if present --- README.md | 2 +- sarif_cli/scan_tables.py | 6 +++++- sarif_cli/signature.py | 4 ++++ sarif_cli/signature_single_CLI.py | 4 ++++ sarif_cli/table_joins_CLI.py | 6 +++++- 5 files changed, 19 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3916397..4928713 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ # Sarif format information - The tool operates on sarif generated by LGTM 1.27.0 (by default) or by the CodeQL CLI (enabled with the -f flag given a value of `CLI`). + The tool operates on sarif generated by LGTM 1.27.0 (by default) or by the CodeQL CLI (enabled with the -f flag given a value of `CLI`). The supported sarif is [SARIF v2.1.0](https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html). The values that the -f flag accepts are: `LGTM` and `CLI`. diff --git a/sarif_cli/scan_tables.py b/sarif_cli/scan_tables.py index b2cd8f0..3d56e12 100644 --- a/sarif_cli/scan_tables.py +++ b/sarif_cli/scan_tables.py @@ -79,10 +79,14 @@ def joins_for_projects(basetables, external_info): """ b = basetables; e = external_info + extra = "" + # if the sarif does have automationDetails + if "automationDetails" in b.project: + extra = b.project.automationDetails[0] # if the sarif does have versionControlProvenance if "repositoryUri" in b.project: repoUri = b.project.repositoryUri[0] - e.project_id = hash.hash_unique(repoUri.encode()) + e.project_id = hash.hash_unique((repoUri+extra).encode()) else: repoUri = "unknown" diff --git a/sarif_cli/signature.py b/sarif_cli/signature.py index c8b2f18..c668a1d 100644 --- a/sarif_cli/signature.py +++ b/sarif_cli/signature.py @@ -250,6 +250,10 @@ def fillsig_dict(args, elem, context): # Several rules overlap and need to be applied together, so this is (now) a # simple sequence tests. + if 'results' in elem.keys() and not 'automationDetails' in elem.keys(): + #want this to be blank if not present- ie no submodule info added/no sarif-category used + full_elem['automationDetails'] = {'id' : ""} + if {'locations', 'message', 'partialFingerprints', 'ruleId', 'ruleIndex'}.issubset(elem.keys()): # Ensure 'rule' is present diff --git a/sarif_cli/signature_single_CLI.py b/sarif_cli/signature_single_CLI.py index 1b6b747..4a8c432 100644 --- a/sarif_cli/signature_single_CLI.py +++ b/sarif_cli/signature_single_CLI.py @@ -30,6 +30,8 @@ struct_graph_CLI = ( ('Array6920', ('array', (0, 'Struct5277'), (1, 'Struct9567'))), ('Struct1509', ('struct', ('semmle.formatSpecifier', 'String'), ('semmle.sourceLanguage', 'String'))), ('Struct2774', ('struct', ('text', 'String'))), + #manually added for optional automationDetails + ('Struct1111', ('struct', ('id', 'String'))), ( 'Struct6299', ( 'struct', ('endColumn', 'Int'), @@ -146,6 +148,8 @@ struct_graph_CLI = ( ( 'Struct9786', ( 'struct', ('artifacts', 'Array6920'), + #manually added for optional automationDetails + ('automationDetails', 'Struct1111'), ('columnKind', 'String'), ('newlineSequences', 'Array7069'), ('properties', 'Struct1509'), diff --git a/sarif_cli/table_joins_CLI.py b/sarif_cli/table_joins_CLI.py index 71b8c42..94f9af9 100644 --- a/sarif_cli/table_joins_CLI.py +++ b/sarif_cli/table_joins_CLI.py @@ -330,7 +330,11 @@ def joins_for_project_single(tgraph): .rename(columns={"value_index": "value_index_1273"}) # .merge(sf(9786), how="left", left_on='id_or_value_at_index', right_on='struct_id', validate="1:m") - .drop(columns=['id_or_value_at_index', 'struct_id'])) + .drop(columns=['id_or_value_at_index', 'struct_id']) + + .merge(sf(1111), how="left", left_on='automationDetails', right_on='struct_id', validate="1:m") + .drop(columns=['automationDetails', 'struct_id']) + .rename(columns={"id": "automationDetails"})) # #newlines there or not - handle if 'newlineSequences' in project_df_temp1: