From c299321ab8598254c1b032b58898784809806b36 Mon Sep 17 00:00:00 2001 From: Michael Hohn Date: Thu, 13 Jul 2023 16:03:01 -0700 Subject: [PATCH] Remove repls; add scripts/test-vcp.sh --- .../sqlidb-0.sarif | 246 ++++++++++++++++++ sarif_cli/scan_tables.py | 3 - sarif_cli/signature.py | 4 - sarif_cli/table_joins_CLI.py | 4 - scripts/test-vcp.sh | 48 ++++ 5 files changed, 294 insertions(+), 11 deletions(-) create mode 100644 data/codeql-dataflow-sql-injection/sqlidb-0.sarif create mode 100644 scripts/test-vcp.sh diff --git a/data/codeql-dataflow-sql-injection/sqlidb-0.sarif b/data/codeql-dataflow-sql-injection/sqlidb-0.sarif new file mode 100644 index 0000000..47053af --- /dev/null +++ b/data/codeql-dataflow-sql-injection/sqlidb-0.sarif @@ -0,0 +1,246 @@ +{ + "$schema": "https://json.schemastore.org/sarif-2.1.0.json", + "version": "2.1.0", + "runs": [ + { + "tool": { + "driver": { + "name": "CodeQL", + "organization": "GitHub", + "semanticVersion": "2.9.4", + "rules": [ + { + "id": "cpp/SQLIVulnerable", + "name": "cpp/SQLIVulnerable", + "shortDescription": { + "text": "SQLI Vulnerability" + }, + "fullDescription": { + "text": "Using untrusted strings in a sql query allows sql injection attacks." + }, + "defaultConfiguration": { + "enabled": true, + "level": "warning" + }, + "properties": { + "description": "Using untrusted strings in a sql query allows sql injection attacks.", + "id": "cpp/SQLIVulnerable", + "kind": "path-problem", + "name": "SQLI Vulnerability", + "problem.severity": "warning" + } + } + ] + }, + "extensions": [ + { + "name": "legacy-upgrades", + "semanticVersion": "0.0.0", + "locations": [ + { + "uri": "file:///Users/hohn/.local/share/gh/extensions/gh-codeql/dist/release/v2.9.4/legacy-upgrades/", + "description": { + "text": "The QL pack root directory." + } + }, + { + "uri": "file:///Users/hohn/.local/share/gh/extensions/gh-codeql/dist/release/v2.9.4/legacy-upgrades/qlpack.yml", + "description": { + "text": "The QL pack definition file." + } + } + ] + }, + { + "name": "sample/cpp-sql-injection", + "semanticVersion": "0.0.1", + "locations": [ + { + "uri": "file:///Users/hohn/local/sarif-cli/data/codeql-dataflow-sql-injection/", + "description": { + "text": "The QL pack root directory." + } + }, + { + "uri": "file:///Users/hohn/local/sarif-cli/data/codeql-dataflow-sql-injection/qlpack.yml", + "description": { + "text": "The QL pack definition file." + } + } + ] + } + ] + }, + "artifacts": [ + { + "location": { + "uri": "add-user.c", + "uriBaseId": "%SRCROOT%", + "index": 0 + } + } + ], + "results": [ + { + "ruleId": "cpp/SQLIVulnerable", + "ruleIndex": 0, + "rule": { + "id": "cpp/SQLIVulnerable", + "index": 0 + }, + "message": { + "text": "Possible SQL injection" + }, + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": "add-user.c", + "uriBaseId": "%SRCROOT%", + "index": 0 + }, + "region": { + "startLine": 84, + "startColumn": 27, + "endColumn": 32 + } + } + } + ], + "partialFingerprints": { + "primaryLocationLineHash": "9a8bc91bbc363391:1", + "primaryLocationStartColumnFingerprint": "22" + }, + "codeFlows": [ + { + "threadFlows": [ + { + "locations": [ + { + "location": { + "physicalLocation": { + "artifactLocation": { + "uri": "add-user.c", + "uriBaseId": "%SRCROOT%", + "index": 0 + }, + "region": { + "startLine": 52, + "startColumn": 32, + "endColumn": 35 + } + }, + "message": { + "text": "ref arg buf" + } + } + }, + { + "location": { + "physicalLocation": { + "artifactLocation": { + "uri": "add-user.c", + "uriBaseId": "%SRCROOT%", + "index": 0 + }, + "region": { + "startLine": 60, + "startColumn": 12, + "endColumn": 15 + } + }, + "message": { + "text": "buf" + } + } + }, + { + "location": { + "physicalLocation": { + "artifactLocation": { + "uri": "add-user.c", + "uriBaseId": "%SRCROOT%", + "index": 0 + }, + "region": { + "startLine": 93, + "startColumn": 12, + "endColumn": 25 + } + }, + "message": { + "text": "call to get_user_info" + } + } + }, + { + "location": { + "physicalLocation": { + "artifactLocation": { + "uri": "add-user.c", + "uriBaseId": "%SRCROOT%", + "index": 0 + }, + "region": { + "startLine": 95, + "startColumn": 20, + "endColumn": 24 + } + }, + "message": { + "text": "info" + } + } + }, + { + "location": { + "physicalLocation": { + "artifactLocation": { + "uri": "add-user.c", + "uriBaseId": "%SRCROOT%", + "index": 0 + }, + "region": { + "startLine": 68, + "startColumn": 31, + "endColumn": 35 + } + }, + "message": { + "text": "info" + } + } + }, + { + "location": { + "physicalLocation": { + "artifactLocation": { + "uri": "add-user.c", + "uriBaseId": "%SRCROOT%", + "index": 0 + }, + "region": { + "startLine": 84, + "startColumn": 27, + "endColumn": 32 + } + }, + "message": { + "text": "query" + } + } + } + ] + } + ] + } + ] + } + ], + "columnKind": "utf16CodeUnits", + "properties": { + "semmle.formatSpecifier": "sarif-latest" + } + } + ] +} diff --git a/sarif_cli/scan_tables.py b/sarif_cli/scan_tables.py index 69889d9..faba273 100644 --- a/sarif_cli/scan_tables.py +++ b/sarif_cli/scan_tables.py @@ -105,9 +105,6 @@ def joins_for_projects(basetables, external_info): # Force all column types to ensure appropriate formatting res1 = res.astype(ScanTablesTypes.projects).reset_index(drop=True) - # XX: automationDetails? - import IPython - IPython.embed(header="spot 11") # return res1 diff --git a/sarif_cli/signature.py b/sarif_cli/signature.py index 100fce7..7302709 100644 --- a/sarif_cli/signature.py +++ b/sarif_cli/signature.py @@ -257,10 +257,6 @@ def fillsig_dict(args, elem, context): if 'results' in elem.keys() and not 'automationDetails' in elem.keys(): #want this to be blank if not present- ie no submodule info added/no sarif-category used full_elem['automationDetails'] = {'id' : "no-value-for-ad"} - # XX: automationDetails? - import IPython - IPython.embed(header="spot 2") - # if {'locations', 'message', 'partialFingerprints', 'ruleId', 'ruleIndex'}.issubset(elem.keys()): diff --git a/sarif_cli/table_joins_CLI.py b/sarif_cli/table_joins_CLI.py index ef6cf84..3859b3e 100644 --- a/sarif_cli/table_joins_CLI.py +++ b/sarif_cli/table_joins_CLI.py @@ -335,10 +335,6 @@ def joins_for_project_single(tgraph): .merge(sf(1111), how="left", left_on='automationDetails', right_on='struct_id', validate="1:m") .drop(columns=['automationDetails', 'struct_id']) .rename(columns={"id": "automationDetails"})) - # - # XX: automationDetails? - import IPython - IPython.embed(header="spot 3") # #newlines there or not - handle if 'newlineSequences' in project_df_temp1: diff --git a/scripts/test-vcp.sh b/scripts/test-vcp.sh new file mode 100644 index 0000000..32afbae --- /dev/null +++ b/scripts/test-vcp.sh @@ -0,0 +1,48 @@ +# +# The automationDetails.id entry is produced by CodeQL when using the +# =--sarif-category= flag. +# +# This is a simple end-to-end test to ensure it appears after CSV conversion. +# + +#* Two databases, one with and one without +# --sarif-category mast-issue +cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection +ls -la sqlidb-0.sarif sqlidb-1.sarif +grep -A2 automationDetails sqlidb-0.sarif sqlidb-1.sarif + +source ~/local/sarif-cli/.venv/bin/activate + +function get-csv() { + #* Insert versionControlProvenance + sarif-insert-vcp $1.sarif > $1.1.sarif + + #* Get CSV. + cd ~/local/sarif-cli/data/codeql-dataflow-sql-injection + sarif-extract-scans-runner --input-signature CLI - > /dev/null <