From 68b43e0514ff9b0dc5ce7211f4fef5f5a614a0fb Mon Sep 17 00:00:00 2001 From: Michael Hohn Date: Wed, 12 Jul 2023 17:04:23 -0700 Subject: [PATCH] wip: debug and get automationDetails into CSV output --- data/build-multiple-sarifs.sh | 3 - notes/README.org | 265 +++++++++++++++++++++++++++++++--- sarif_cli/columns.py | 5 +- sarif_cli/scan_tables.py | 8 +- sarif_cli/signature.py | 6 +- sarif_cli/table_joins_CLI.py | 4 + 6 files changed, 267 insertions(+), 24 deletions(-) diff --git a/data/build-multiple-sarifs.sh b/data/build-multiple-sarifs.sh index fb49a4d..23ad2f1 100644 --- a/data/build-multiple-sarifs.sh +++ b/data/build-multiple-sarifs.sh @@ -167,6 +167,3 @@ head -4 sqlidb-1.1.sarif.csv #* Check CSV output ls -la sqlidb-1.1* find sqlidb-1.1.sarif.scantables -print - - - diff --git a/notes/README.org b/notes/README.org index ac00db4..ef6ebd5 100644 --- a/notes/README.org +++ b/notes/README.org @@ -2,11 +2,11 @@ #+OPTIONS: org-confirm-babel-evaluate:nil #+LANGUAGE: en #+TEXT: -#+OPTIONS: ^:{} H:2 num:t \n:nil @:t ::t |:t ^:nil f:t *:t TeX:t LaTeX:t skip:nil p:nil +#+OPTIONS: ^:{} H:3 num:t \n:nil @:t ::t |:t ^:nil f:t *:t TeX:t LaTeX:t skip:nil p:nil #+OPTIONS: toc:nil #+HTML_HEAD: #+HTML:
-#+TOC: headlines 2 insert TOC here, with two headline levels +#+TOC: headlines 3 insert TOC here, with two headline levels #+HTML:
# #+HTML:
@@ -44,27 +44,258 @@ #+END_SRC -** The automationDetails.id +** Debugging the absence of automationDetails.id The =automationDetails.id= entry is produced by CodeQL when using the =--sarif-category= flag. The prerequisites for tracing its flow through the tools is started in [[../data/build-multiple-sarifs.sh]] - #+BEGIN_SRC sh :session shared :results output - cd ~/local/sarif-cli/ && ag -l automationDetails |cat + For testing the following is injected into =sqlidb-1.1.sarif=. + #+BEGIN_SRC text + : ' + "automationDetails" : { + "id" : "mast-issue/" + }, + ' + #+END_SRC + +*** Add repl as appropriate, then examine. + Make sure the input is correct + #+BEGIN_SRC sh :session shared :results output :eval never-export + cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection + grep -A2 automationDetails sqlidb-1.1.sarif + #+END_SRC - #+RESULTS: - : notes/README.org - : notes/README.html - : scripts/table-tests.sh - : sarif_cli/signature_single_CLI.py - : sarif_cli/table_joins_CLI.py - : sarif_cli/scan_tables.py - : sarif_cli/signature.py - : - : hohn@gh-hohn ~/local/sarif-cli - -#+HTML:
+ #+RESULTS: + : hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection + : "automationDetails" : { + : "id" : "mast-issue/" + : }, + : + : hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection + +*** Create the CSV + #+BEGIN_SRC sh :session shared :results output :eval never-export + source ~/local/sarif-cli/.venv/bin/activate + cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection + sarif-extract-scans-runner --input-signature CLI - > /dev/null < > (.venv) + hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection + #+end_example + + #+BEGIN_SRC sh :session shared :results output :eval never-export + cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection + ls -la sqlidb-1.1* + find sqlidb-1.1.sarif.scantables -print + #+END_SRC + + #+RESULTS: + #+begin_example + hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection + -rw-r--r-- 1 hohn staff 8.2K Jul 11 19:25 sqlidb-1.1.sarif + -rw-r--r-- 1 hohn staff 326 Jul 12 16:39 sqlidb-1.1.sarif.csv + -rw-r--r-- 1 hohn staff 72 Jul 12 16:39 sqlidb-1.1.sarif.scanspec + + sqlidb-1.1.sarif.scantables: + total 16K + drwxr-xr-x 6 hohn staff 192 Jul 12 16:39 ./ + drwxr-xr-x 43 hohn staff 1.4K Jul 12 16:39 ../ + -rw-r--r-- 1 hohn staff 622 Jul 12 16:39 codeflows.csv + -rw-r--r-- 1 hohn staff 165 Jul 12 16:39 projects.csv + -rw-r--r-- 1 hohn staff 589 Jul 12 16:39 results.csv + -rw-r--r-- 1 hohn staff 343 Jul 12 16:39 scans.csv + (.venv) + hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection + sqlidb-1.1.sarif.scantables + sqlidb-1.1.sarif.scantables/codeflows.csv + sqlidb-1.1.sarif.scantables/scans.csv + sqlidb-1.1.sarif.scantables/results.csv + sqlidb-1.1.sarif.scantables/projects.csv + (.venv) + hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection + #+end_example + +*** Check if =automationDetails= or its value is in output + #+BEGIN_SRC sh :session shared :results output :eval never-export + cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables + ag automationDetails | cat + #+END_SRC + + #+RESULTS: + : (.venv) + : hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables + : projects.csv:1:"id","project_name","creation_date","repo_url","primary_language","languages_analyzed","automationDetails" + : (.venv) + : hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables + + #+RESULTS: + : (.venv) + : hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables + : (.venv) + : hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables + + #+RESULTS: + : (.venv) + : hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables + : (.venv) + : hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables + + See if the magic value is present + #+BEGIN_SRC sh :session shared :results output :eval never-export + cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables + ag mast-issue |cat + #+END_SRC + + #+RESULTS: + : (.venv) + : hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables + : projects.csv:2:490227419655596076,"vcp-no-uri","1970-01-01","vcp-no-uri","unknown","unknown","mast-issue/" + : (.venv) + : hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables + + #+RESULTS: + : (.venv) + : hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables + : (.venv) + : hohn@gh-hohn ~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables + +*** Nothing is in the output, so trace execution to see where it's dropped + #+BEGIN_SRC sh :session shared :results output :eval never-export + cd ~/local/sarif-cli/notes && ag -l automationDetails ../sarif_cli |cat + #+END_SRC + + #+RESULTS: + : ../sarif_cli/scan_tables.py + : ../sarif_cli/signature_single_CLI.py + : ../sarif_cli/table_joins_CLI.py + : ../sarif_cli/signature.py + : (.venv) + : hohn@gh-hohn ~/local/sarif-cli/notes + +*** Trace the call chain + Trace the call chain to one of + : ../sarif_cli/scan_tables.py + : ../sarif_cli/table_joins_CLI.py + : ../sarif_cli/signature.py + + Entry is + #+BEGIN_SRC sh :session shared :results output :eval never-export + sarif-extract-scans-runner --input-signature CLI - > /dev/null < diff --git a/sarif_cli/columns.py b/sarif_cli/columns.py index 71d8dda..2bbaa14 100644 --- a/sarif_cli/columns.py +++ b/sarif_cli/columns.py @@ -46,7 +46,8 @@ columns = { "creation_date", "repo_url" , "primary_language" , - "languages_analyzed" + "languages_analyzed", + "automationDetails", ], "codeflows" : [ "codeflow_id", @@ -62,4 +63,4 @@ columns = { "uriBaseId", "message" ] -} \ No newline at end of file +} diff --git a/sarif_cli/scan_tables.py b/sarif_cli/scan_tables.py index 28d02bc..69889d9 100644 --- a/sarif_cli/scan_tables.py +++ b/sarif_cli/scan_tables.py @@ -70,6 +70,7 @@ class ScanTablesTypes: "repo_url" : pd.StringDtype(), "primary_language" : pd.StringDtype(), "languages_analyzed" : pd.StringDtype(), + "automationDetails" : pd.StringDtype(), } # @@ -98,11 +99,16 @@ def joins_for_projects(basetables, external_info): "creation_date" : pd.Timestamp(0.0, unit='s'), # TODO: external info "repo_url" : repoUri, "primary_language" : b.project['semmle.sourceLanguage'][0], - "languages_analyzed" : ",".join(list(b.project['semmle.sourceLanguage'])) + "languages_analyzed" : ",".join(list(b.project['semmle.sourceLanguage'])), + "automationDetails" : extra, }, index=[0]) # Force all column types to ensure appropriate formatting res1 = res.astype(ScanTablesTypes.projects).reset_index(drop=True) + # XX: automationDetails? + import IPython + IPython.embed(header="spot 11") + # return res1 # diff --git a/sarif_cli/signature.py b/sarif_cli/signature.py index e3d3f1e..100fce7 100644 --- a/sarif_cli/signature.py +++ b/sarif_cli/signature.py @@ -256,7 +256,11 @@ def fillsig_dict(args, elem, context): if 'results' in elem.keys() and not 'automationDetails' in elem.keys(): #want this to be blank if not present- ie no submodule info added/no sarif-category used - full_elem['automationDetails'] = {'id' : ""} + full_elem['automationDetails'] = {'id' : "no-value-for-ad"} + # XX: automationDetails? + import IPython + IPython.embed(header="spot 2") + # if {'locations', 'message', 'partialFingerprints', 'ruleId', 'ruleIndex'}.issubset(elem.keys()): diff --git a/sarif_cli/table_joins_CLI.py b/sarif_cli/table_joins_CLI.py index 94f9af9..ef6cf84 100644 --- a/sarif_cli/table_joins_CLI.py +++ b/sarif_cli/table_joins_CLI.py @@ -336,6 +336,10 @@ def joins_for_project_single(tgraph): .drop(columns=['automationDetails', 'struct_id']) .rename(columns={"id": "automationDetails"})) # + # XX: automationDetails? + import IPython + IPython.embed(header="spot 3") + # #newlines there or not - handle if 'newlineSequences' in project_df_temp1: project_df_temp2 = project_df_temp1.drop(columns=['newlineSequences'])