-#+TOC: headlines 2 insert TOC here, with two headline levels
+#+TOC: headlines 3 insert TOC here, with two headline levels
#+HTML:
#
#+HTML:
@@ -44,27 +44,258 @@
#+END_SRC
-** The automationDetails.id
+** Debugging the absence of automationDetails.id
The =automationDetails.id= entry is produced by CodeQL when using the
=--sarif-category= flag.
The prerequisites for tracing its flow through the tools is started in
[[../data/build-multiple-sarifs.sh]]
- #+BEGIN_SRC sh :session shared :results output
- cd ~/local/sarif-cli/ && ag -l automationDetails |cat
+ For testing the following is injected into =sqlidb-1.1.sarif=.
+ #+BEGIN_SRC text
+ : '
+ "automationDetails" : {
+ "id" : "mast-issue/"
+ },
+ '
+
#+END_SRC
+
+*** Add repl as appropriate, then examine.
+ Make sure the input is correct
+ #+BEGIN_SRC sh :session shared :results output :eval never-export
+ cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection
+ grep -A2 automationDetails sqlidb-1.1.sarif
+ #+END_SRC
- #+RESULTS:
- : notes/README.org
- : notes/README.html
- : scripts/table-tests.sh
- : sarif_cli/signature_single_CLI.py
- : sarif_cli/table_joins_CLI.py
- : sarif_cli/scan_tables.py
- : sarif_cli/signature.py
- :
- : [32mhohn@gh-hohn [33m~/local/sarif-cli[0m
-
-#+HTML:
+ #+RESULTS:
+ : [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection[0m
+ : "automationDetails" : {
+ : "id" : "mast-issue/"
+ : },
+ :
+ : [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection[0m
+
+*** Create the CSV
+ #+BEGIN_SRC sh :session shared :results output :eval never-export
+ source ~/local/sarif-cli/.venv/bin/activate
+ cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection
+ sarif-extract-scans-runner --input-signature CLI - > /dev/null < > (.venv)
+ [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection[0m
+ #+end_example
+
+ #+BEGIN_SRC sh :session shared :results output :eval never-export
+ cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection
+ ls -la sqlidb-1.1*
+ find sqlidb-1.1.sarif.scantables -print
+ #+END_SRC
+
+ #+RESULTS:
+ #+begin_example
+ [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection[0m
+ -rw-r--r-- 1 hohn staff 8.2K Jul 11 19:25 [0m[0msqlidb-1.1.sarif[0m
+ -rw-r--r-- 1 hohn staff 326 Jul 12 16:39 [0msqlidb-1.1.sarif.csv[0m
+ -rw-r--r-- 1 hohn staff 72 Jul 12 16:39 [0msqlidb-1.1.sarif.scanspec[0m
+
+ sqlidb-1.1.sarif.scantables:
+ total 16K
+ drwxr-xr-x 6 hohn staff 192 Jul 12 16:39 [1;34m.[0m/
+ drwxr-xr-x 43 hohn staff 1.4K Jul 12 16:39 [1;34m..[0m/
+ -rw-r--r-- 1 hohn staff 622 Jul 12 16:39 [0mcodeflows.csv[0m
+ -rw-r--r-- 1 hohn staff 165 Jul 12 16:39 [0mprojects.csv[0m
+ -rw-r--r-- 1 hohn staff 589 Jul 12 16:39 [0mresults.csv[0m
+ -rw-r--r-- 1 hohn staff 343 Jul 12 16:39 [0mscans.csv[0m
+ (.venv)
+ [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection[0m
+ sqlidb-1.1.sarif.scantables
+ sqlidb-1.1.sarif.scantables/codeflows.csv
+ sqlidb-1.1.sarif.scantables/scans.csv
+ sqlidb-1.1.sarif.scantables/results.csv
+ sqlidb-1.1.sarif.scantables/projects.csv
+ (.venv)
+ [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection[0m
+ #+end_example
+
+*** Check if =automationDetails= or its value is in output
+ #+BEGIN_SRC sh :session shared :results output :eval never-export
+ cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables
+ ag automationDetails | cat
+ #+END_SRC
+
+ #+RESULTS:
+ : (.venv)
+ : [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables[0m
+ : projects.csv:1:"id","project_name","creation_date","repo_url","primary_language","languages_analyzed","automationDetails"
+ : (.venv)
+ : [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables[0m
+
+ #+RESULTS:
+ : (.venv)
+ : [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables[0m
+ : (.venv)
+ : [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables[0m
+
+ #+RESULTS:
+ : (.venv)
+ : [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables[0m
+ : (.venv)
+ : [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables[0m
+
+ See if the magic value is present
+ #+BEGIN_SRC sh :session shared :results output :eval never-export
+ cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables
+ ag mast-issue |cat
+ #+END_SRC
+
+ #+RESULTS:
+ : (.venv)
+ : [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables[0m
+ : projects.csv:2:490227419655596076,"vcp-no-uri","1970-01-01","vcp-no-uri","unknown","unknown","mast-issue/"
+ : (.venv)
+ : [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables[0m
+
+ #+RESULTS:
+ : (.venv)
+ : [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables[0m
+ : (.venv)
+ : [32mhohn@gh-hohn [33m~/local/sarif-cli/data/codeql-dataflow-sql-injection/sqlidb-1.1.sarif.scantables[0m
+
+*** Nothing is in the output, so trace execution to see where it's dropped
+ #+BEGIN_SRC sh :session shared :results output :eval never-export
+ cd ~/local/sarif-cli/notes && ag -l automationDetails ../sarif_cli |cat
+ #+END_SRC
+
+ #+RESULTS:
+ : ../sarif_cli/scan_tables.py
+ : ../sarif_cli/signature_single_CLI.py
+ : ../sarif_cli/table_joins_CLI.py
+ : ../sarif_cli/signature.py
+ : (.venv)
+ : [32mhohn@gh-hohn [33m~/local/sarif-cli/notes[0m
+
+*** Trace the call chain
+ Trace the call chain to one of
+ : ../sarif_cli/scan_tables.py
+ : ../sarif_cli/table_joins_CLI.py
+ : ../sarif_cli/signature.py
+
+ Entry is
+ #+BEGIN_SRC sh :session shared :results output :eval never-export
+ sarif-extract-scans-runner --input-signature CLI - > /dev/null <
diff --git a/sarif_cli/columns.py b/sarif_cli/columns.py
index 71d8dda..2bbaa14 100644
--- a/sarif_cli/columns.py
+++ b/sarif_cli/columns.py
@@ -46,7 +46,8 @@ columns = {
"creation_date",
"repo_url" ,
"primary_language" ,
- "languages_analyzed"
+ "languages_analyzed",
+ "automationDetails",
],
"codeflows" : [
"codeflow_id",
@@ -62,4 +63,4 @@ columns = {
"uriBaseId",
"message"
]
-}
\ No newline at end of file
+}
diff --git a/sarif_cli/scan_tables.py b/sarif_cli/scan_tables.py
index 28d02bc..69889d9 100644
--- a/sarif_cli/scan_tables.py
+++ b/sarif_cli/scan_tables.py
@@ -70,6 +70,7 @@ class ScanTablesTypes:
"repo_url" : pd.StringDtype(),
"primary_language" : pd.StringDtype(),
"languages_analyzed" : pd.StringDtype(),
+ "automationDetails" : pd.StringDtype(),
}
#
@@ -98,11 +99,16 @@ def joins_for_projects(basetables, external_info):
"creation_date" : pd.Timestamp(0.0, unit='s'), # TODO: external info
"repo_url" : repoUri,
"primary_language" : b.project['semmle.sourceLanguage'][0],
- "languages_analyzed" : ",".join(list(b.project['semmle.sourceLanguage']))
+ "languages_analyzed" : ",".join(list(b.project['semmle.sourceLanguage'])),
+ "automationDetails" : extra,
}, index=[0])
# Force all column types to ensure appropriate formatting
res1 = res.astype(ScanTablesTypes.projects).reset_index(drop=True)
+ # XX: automationDetails?
+ import IPython
+ IPython.embed(header="spot 11")
+ #
return res1
#
diff --git a/sarif_cli/signature.py b/sarif_cli/signature.py
index e3d3f1e..100fce7 100644
--- a/sarif_cli/signature.py
+++ b/sarif_cli/signature.py
@@ -256,7 +256,11 @@ def fillsig_dict(args, elem, context):
if 'results' in elem.keys() and not 'automationDetails' in elem.keys():
#want this to be blank if not present- ie no submodule info added/no sarif-category used
- full_elem['automationDetails'] = {'id' : ""}
+ full_elem['automationDetails'] = {'id' : "no-value-for-ad"}
+ # XX: automationDetails?
+ import IPython
+ IPython.embed(header="spot 2")
+ #
if {'locations', 'message', 'partialFingerprints', 'ruleId',
'ruleIndex'}.issubset(elem.keys()):
diff --git a/sarif_cli/table_joins_CLI.py b/sarif_cli/table_joins_CLI.py
index 94f9af9..ef6cf84 100644
--- a/sarif_cli/table_joins_CLI.py
+++ b/sarif_cli/table_joins_CLI.py
@@ -336,6 +336,10 @@ def joins_for_project_single(tgraph):
.drop(columns=['automationDetails', 'struct_id'])
.rename(columns={"id": "automationDetails"}))
#
+ # XX: automationDetails?
+ import IPython
+ IPython.embed(header="spot 3")
+ #
#newlines there or not - handle
if 'newlineSequences' in project_df_temp1:
project_df_temp2 = project_df_temp1.drop(columns=['newlineSequences'])