Rename directories to include language. Also update files

2025-12-16 09:53:04 +01:00 · 2025-07-30 15:14:02 -07:00
parent fe1baf7dc1
commit 102c18cce5
40 changed files with 43 additions and 27 deletions
--- a/codeql-dataflow-sql-injection-c/CodeQL-workshop-overview-only.pdf
+++ b/codeql-dataflow-sql-injection-c/CodeQL-workshop-overview-only.pdf
--- a/codeql-dataflow-sql-injection-c/README.org
+++ b/codeql-dataflow-sql-injection-c/README.org
@@ -0,0 +1,310 @@
+[[https://imgs.xkcd.com/comics/exploits_of_a_mom.png]]
+
+(from https://xkcd.com/327/)
+
+
+* Using sqlite to illustrate models-as-data
+** Build codeql database
+   To get started, build the codeql database (adjust paths to your setup):
+   #+BEGIN_SRC sh
+     # Build the db with source commit id.
+     # export PATH=$HOME/local/vmsync/codeql250:"$PATH"
+     SRCDIR=$(pwd)
+     DB=$SRCDIR/cpp-sqli-$(cd $SRCDIR && git rev-parse --short HEAD)
+
+     echo $DB
+     test -d "$DB" && rm -fR "$DB"
+     mkdir -p "$DB"
+
+     cd $SRCDIR && codeql database create --language=cpp -s . -j 8 -v $DB --command='./build.sh'
+   #+END_SRC
+
+   Then add this database directory to your VS Code =DATABASES= tab.
+** Tests using a default query
+** TODO supplement sources via the model editor   
+** TODO supplement codeql: Add to FlowSource or a subclass
+
+       Note: this /one area/ that just has to be known.  Browsing source will *not*
+       help you.
+
+       CodeQL reading hint:
+       : class ActiveThreatModelSource extends DataFlow::Node
+       uses
+       : this.(SourceNode).getThreatModel()
+       So following the cast (SourceNode) may be useful:
+       #+BEGIN_SRC java
+         /**
+          ,* A data flow source.
+          ,*/
+         abstract class SourceNode extends DataFlow::Node 
+       #+END_SRC
+       Following the =abstract class= is promising:
+       #+BEGIN_SRC java
+         abstract class RemoteFlowSource extends SourceNode
+       #+END_SRC
+       and others.
+
+       XX: no java, use C
+       In 
+       [[../ql/java/ql/lib/Customizations.qll]]
+       notice the comments mentioning RemoteFlowSource.
+       Use imports from [[../ql/java/ql/src/Security/CWE/CWE-089/SqlTainted.ql]] 
+       but note that there are conflicts.  you will use
+       : private import semmle.code.java.dataflow.FlowSources
+       Follow this to FlowSources, and find the mentioned RemoteFlowSource
+       : abstract class RemoteFlowSource extends SourceNode
+
+       Add the custom source.  The modified [[../ql/java/ql/lib/Customizations.qll]] is
+       #+BEGIN_SRC java
+         import java
+         private import semmle.code.java.dataflow.FlowSources
+
+         class ReadLine extends RemoteFlowSource {
+           ReadLine() {
+             exists(Call read |
+               read.getCallee().getName() = "readLine" and
+               read = this.asExpr()
+             )
+           }
+
+           override string getSourceType() { result = "Console readline" }
+         }
+       #+END_SRC
+
+       Note that the predicate
+       #+BEGIN_SRC java
+         module QueryInjectionFlowConfig implements DataFlow::ConfigSig {
+           predicate isSource(DataFlow::Node src) { src instanceof ActiveThreatModelSource }
+               ...;
+         }
+       #+END_SRC
+       now also returns the readLine() result -- although we extended
+       RemoteFlowSource, not ActiveThreatModelSource
+
+** TODO supplement codeql: Add to models-as-data
+       - schema in codeql: [[../ql/cpp/ql/lib/semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll]]
+         #+BEGIN_SRC java
+           extensible predicate sourceModel(
+             string namespace, string type, boolean subtypes, string name, string signature, string ext,
+             string output, string kind, string provenance, QlBuiltins::ExtensionId madId
+           );
+         #+END_SRC
+         
+       - schema in json: ../tmp.bundle/codeql/qlpacks/codeql/cpp-queries/1.3.0/.codeql/libraries/codeql/cpp-all/3.0.0/.packinfo
+         #+BEGIN_SRC sh 
+           ../bin/hovjson < ../tmp.bundle/codeql/qlpacks/codeql/cpp-queries/1.3.0/.codeql/libraries/codeql/cpp-all/3.0.0/.packinfo
+           {
+             "extensible_predicate_metadata": {
+               "extensible_predicates": [
+                 {
+                   "name": "sourceModel",
+                   "parameters": [
+                     {"name": "namespace","type": "string"},
+                     {"name": "type","type": "string"},
+                     {"name": "subtypes","type": "boolean"},
+                     {"name": "name","type": "string"},
+                     {"name": "signature","type": "string"},
+                     {"name": "ext","type": "string"},
+                     {"name": "output","type": "string"},
+                     {"name": "kind","type": "string"},
+                     {"name": "provenance","type": "string"}
+                   ],
+                   "has_origin": true,
+                   "path": "semmle/code/cpp/dataflow/internal/ExternalFlowExtensions.qll",
+                   "start_line": 8,
+                   "start_column": 1,
+                   "end_line": 11,
+                   "end_column": 3
+                 },
+                 ....
+               ]
+             }
+           }
+
+         #+END_SRC
+
+       - note:  QlBuiltins::ExtensionId madId is only in ql, not json.
+
+       - file format sample: ../ql/cpp/ql/lib/ext/empty.model.yml
+         
+       - data sample:
+         #+begin_src javascript
+           # partial model of windows system calls
+           extensions:
+             - addsTo:
+                 pack: codeql/cpp-all
+                 extensible: sourceModel
+               data: # namespace, type, subtypes, name, signature, ext, output, kind, provenance
+                 # processenv.h
+                 - ["", "", False, "GetCommandLineA", "", "", "ReturnValue[*]", "local", "manual"]
+         #+end_src
+
+       - add a =sourceModel= 
+         #+BEGIN_SRC yaml
+           extensions:
+             - addsTo:
+                 pack: codeql/cpp-all
+                 extensible: sourceModel
+               data:
+                 - [
+                     "",
+                     "",
+                     False,
+                     "get_user_info",
+                     "",
+                     "",
+                     "ReturnValue[*]",
+                     "remote",
+                     "manual",
+                   ]
+             - addsTo:
+                 pack: codeql/cpp-all
+                 extensible: sinkModel
+               data: []
+             - addsTo:
+                 pack: codeql/cpp-all
+                 extensible: summaryModel
+               data: []
+           #+END_SRC
+         #+BEGIN_SRC sh 
+           0:$ ls .github/codeql/extensions/
+           jedis-db-local-java/ sqlite-db/
+           (venv)
+           hohn@ghm3 ~/work-gh/codeql-lab
+           0:$ cp -r .github/codeql/extensions/sqlite-db .github/codeql/extensions/sqlite-db-c
+
+           pushd .github/codeql/extensions/sqlite-db-c
+
+           sed -i -e 's/java-all/cpp-all/g;'  codeql-pack.yml
+           # TODO also replace pack name
+
+           0:$ cat > models/sqlite.model.yml
+           extensions:
+             - addsTo:
+                 pack: codeql/cpp-all
+                 extensible: sourceModel
+               data:
+                 - [
+                     "",
+                     "",
+                     False,
+                     "get_user_info",
+                     "",
+                     "",
+                     "ReturnValue[*]",
+                     "remote",
+                     "manual",
+                   ]
+             - addsTo:
+                 pack: codeql/cpp-all
+                 extensible: sinkModel
+               data: []
+             - addsTo:
+                 pack: codeql/cpp-all
+                 extensible: summaryModel
+               data: []
+         #+END_SRC
+ 
+       - back to SqlTainted.ql
+       - 
+       - 
+       In the model editor, we see a java.io.*Console.*readline' (using =show already modeled= option)
+       #+BEGIN_SRC sh 
+         1:$ rg -i 'java.io.*Console.*readline' ql/java
+         ql/java/ql/lib/ext/generated/java.io.model.yml
+         16:      - ["java.io", "Console", False, "readLine", "()", "", "Argument[this]", "ReturnValue", "taint", "df-generated"]
+         17:      - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[0]", "Argument[this]", "taint", "df-generated"]
+         18:      - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[1].ArrayElement", "Argument[this]", "taint", "df-generated"]
+         19:      - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[this]", "ReturnValue", "taint", "df-generated"]
+       #+END_SRC
+       note: this file is in the generated/ tree.
+
+       The current readline modeling is in the =summaryModel= section; we need it
+       in a =sourceModel=
+       #+BEGIN_SRC yaml
+         extensions:
+           - addsTo:
+               pack: codeql/java-all
+               extensible: summaryModel
+             data:
+               ...
+               - ["java.io", "Console", False, "readLine", "()", "", "Argument[this]", "ReturnValue", "taint", "df-generated"]
+               - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[0]", "Argument[this]", "taint", "df-generated"]
+               - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[1].ArrayElement", "Argument[this]", "taint", "df-generated"]
+               - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument
+       #+END_SRC
+
+       The model editor will not show this because its already modeled.  To
+       illustrate text-based additions, we'll use plain text.
+       Starting from
+       #+BEGIN_SRC yaml
+         extensions:
+           - addsTo:
+               pack: codeql/java-all
+               extensible: summaryModel
+             data:
+               ...
+               - ["java.io", "Console", False, "readLine", "()", "", "Argument[this]", "ReturnValue", "taint", "df-generated"]
+               - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[0]", "Argument[this]", "taint", "df-generated"]
+               - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[1].ArrayElement", "Argument[this]", "taint", "df-generated"]
+               - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument
+       #+END_SRC
+       and the field information
+       #+BEGIN_SRC java
+         extensible predicate sourceModel(
+           string package, string type, boolean subtypes, string name, string signature, string ext,
+           string output, string kind, string provenance, QlBuiltins::ExtensionId madId
+         );
+       #+END_SRC
+       Starting from =summaryModel=
+       #+BEGIN_SRC yaml
+         # summaryModel
+         # string package, string type, boolean subtypes, string name, string signature, string ext, string input,     string output, string kind,  string provenance, QlBuiltins::ExtensionId madId
+         - ["java.io",     "Console",   False,            "readLine",  "()",             "",         "Argument[this]", "ReturnValue", "taint",      "df-generated"]
+       #+END_SRC
+
+       we can construct the =sourceModel=
+       #+BEGIN_SRC yaml
+         extensions:
+           - addsTo:
+               pack: codeql/java-all
+               extensible: sourceModel
+             data: 
+               # sourceModel
+               # string package, string type, boolean subtypes, string name, string signature, string ext,                   string output,    string kind,   string provenance, QlBuiltins::ExtensionId madId
+               - ["java.io",     "Console",   False,            "readLine",  "()",             "",                           "ReturnValue",    "remote",      "manual"]
+
+               # # from original
+               # # summaryModel
+               # # string package, string type, boolean subtypes, string name, string signature, string ext, string input,     string output, string kind,  string provenance, QlBuiltins::ExtensionId madId
+               # - ["java.io",     "Console",   False,            "readLine",  "()",             "",         "Argument[this]", "ReturnValue", "taint",      "df-generated"]
+
+       #+END_SRC
+
+       and move this into [[../.github/codeql/extensions/sqlite-db/models/sqlite.model.yml]]
+   
+
+
+       To ensure that these model extensions are applied during query runs, include
+       this setting
+       #+begin_src javascript
+         {
+             ...,
+             "settings": {
+                 ...,
+                 "codeQL.runningQueries.useExtensionPacks": "all"
+             }
+         }
+       #+end_src
+
+       in the workspace configuration file [[../qllab.code-workspace]]
+
+       In some environments (e.g., older VS Code versions), you may also need to
+       replicate this setting in [[../.vscode/settings.json]]; there it simplifies to
+       #+begin_src javascript
+         "codeQL.runningQueries.useExtensionPacks": "all"
+       #+end_src
+
+       Now we can run [[../ql/java/ql/src/Security/CWE/CWE-089/SqlTainted.ql]] again.
+
+
--- a/codeql-dataflow-sql-injection-c/SqlInjection.ql
+++ b/codeql-dataflow-sql-injection-c/SqlInjection.ql
@@ -0,0 +1,46 @@
+/**
+* @name SQLI Vulnerability
+* @description Using untrusted strings in a sql query allows sql injection attacks.
+* @ kind path-problem
+* @id cpp/sqlivulnerable
+* @problem.severity warning
+*/
+
+import cpp
+import semmle.code.cpp.dataflow.new.TaintTracking
+
+module SqliFlowConfig implements DataFlow::ConfigSig {
+
+    predicate isSource(DataFlow::Node source) {
+        // count = read(STDIN_FILENO, buf, BUFSIZE);
+        exists(FunctionCall read |
+            read.getTarget().getName() = "read" and
+            (
+            read.getArgument(1) = source.asDefiningArgument()
+                or
+            read.getArgument(1) = source.asExpr()
+            )
+        )
+    }
+
+    predicate isBarrier(DataFlow::Node sanitizer) { none() }
+
+    predicate isSink(DataFlow::Node sink) {
+        // rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+        exists(FunctionCall exec |
+            exec.getTarget().getName() = "sqlite3_exec" and
+            exec.getArgument(1) = sink.asIndirectArgument()
+        )
+    }
+}
+
+module MyFlow = TaintTracking::Global<SqliFlowConfig>;
+// import MyFlow::PathGraph
+
+from DataFlow::Node thing
+where SqliFlowConfig::isSource(thing)
+select thing, thing.getAQlClass()
+// from  MyFlow::PathNode source, MyFlow::PathNode sink
+// where MyFlow::flowPath(source, sink)
+// select sink, source, sink, "Possible SQL injection"
+
--- a/codeql-dataflow-sql-injection-c/add-user.c
+++ b/codeql-dataflow-sql-injection-c/add-user.c
@@ -0,0 +1,100 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <sqlite3.h>
+#include <time.h>
+
+void write_log(const char* fmt, ...) {
+    time_t t;
+    char tstr[26];
+    va_list args;
+
+    va_start(args, fmt);
+    t = time(NULL);
+    ctime_r(&t, tstr);
+    tstr[24] = 0; /* no \n */
+    fprintf(stderr, "[%s] ", tstr);
+    vfprintf(stderr, fmt, args);
+    va_end(args);
+    fflush(stderr);
+}
+
+void abort_on_error(int rc, sqlite3 *db) {
+    if( rc ) {
+        fprintf(stderr, "Can't open database: %s\n", sqlite3_errmsg(db));
+        sqlite3_close(db);
+        fflush(stderr);
+        abort();
+    }
+}
+
+void abort_on_exec_error(int rc, sqlite3 *db, char* zErrMsg) {
+    if( rc!=SQLITE_OK ){
+        fprintf(stderr, "SQL error: %s\n", zErrMsg);
+        sqlite3_free(zErrMsg);
+        sqlite3_close(db);
+        fflush(stderr);
+        abort();
+    }
+}
+    
+char* get_user_info() {
+#define BUFSIZE 1024
+    char* buf = (char*) malloc(BUFSIZE * sizeof(char));
+    if(buf==NULL) abort();
+    int count;
+    // Disable buffering to avoid need for fflush
+    // after printf().
+    setbuf( stdout, NULL );
+    printf("*** Welcome to sql injection ***\n");
+    printf("Please enter name: ");
+    count = read(STDIN_FILENO, buf, BUFSIZE - 1);
+    if (count <= 0) abort();
+    // ensure the buffer is zero-terminated
+    buf[count] = '\0';
+    /* strip trailing whitespace */
+    while (count && isspace(buf[count-1])) {
+        buf[count-1] = 0; --count;
+    }
+    return buf;
+}
+
+int get_new_id() {
+    int id = getpid();
+    return id;
+}
+
+void write_info(int id, char* info) {
+    sqlite3 *db;
+    int rc;
+    int bufsize = 1024;
+    char *zErrMsg = 0;
+    char query[bufsize];
+    
+    /* open db */
+    rc = sqlite3_open("users.sqlite", &db);
+    abort_on_error(rc, db);
+
+    /* Format query */
+    snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
+    write_log("query: %s\n", query);
+
+    /* Write info */
+    rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+    abort_on_exec_error(rc, db, zErrMsg);
+
+    sqlite3_close(db);
+}
+
+int main(int argc, char* argv[]) {
+    char* info;
+    int id;
+    info = get_user_info();
+    id = get_new_id();
+    write_info(id, info);
+    free(info);
+    /*
+     * show_info(id);
+     */
+}
--- a/codeql-dataflow-sql-injection-c/add-user.sh
+++ b/codeql-dataflow-sql-injection-c/add-user.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+get-user-info () {
+    echo "*** Welcome to sql injection ***"
+    read -r -p "Please enter name: " NAME
+}
+
+get-new-id () {
+    ID=$(/bin/bash -c 'echo $$')
+}
+
+add-user-info () {
+    echo "
+    INSERT INTO users VALUES ($ID, '$NAME')
+    " | sqlite3 users.sqlite 
+}
+
+show-user-info () {
+    echo "We have the following information for you:"
+    echo "
+    select * FROM users where user_id=$ID
+    " | sqlite3 users.sqlite 
+}
+
+get-user-info
+get-new-id
+add-user-info
+show-user-info
--- a/codeql-dataflow-sql-injection-c/admin
+++ b/codeql-dataflow-sql-injection-c/admin
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+set -e
+
+script=$(basename "$0")
+
+GREEN='\033[0;32m'
+MAGENTA='\033[0;95m'
+NC='\033[0m'
+RED='\033[0;31m'
+YELLOW='\033[0;33m'
+
+help() {
+    echo -e "Usage: ./${script} [options]" \
+         "\n${YELLOW}Options: ${NC}" \
+         "\n\t -h  ${GREEN}Show Help ${NC}" \
+         "\n\t -c  ${MAGENTA}Creates a users table ${NC}" \
+         "\n\t -s  ${MAGENTA}Shows all records in the users table ${NC}" \
+         "\n\t -r  ${RED}Removes users table ${NC}" 
+}
+remove-db () {
+    rm users.sqlite
+}
+
+create-db () {
+    echo '
+    CREATE TABLE users (
+        user_id INTEGER not null,
+        name TEXT NOT NULL
+    );
+    '  | sqlite3 users.sqlite 
+}        
+
+show-db () {
+    echo '
+    SELECT * FROM users;
+    ' | sqlite3 users.sqlite 
+}
+
+if [ $# == 0 ]; then 
+   help
+   exit 0
+fi
+
+while getopts "h?csr" option
+do
+    case "${option}"
+    in
+        h|\?)
+            help
+            exit 0
+        ;;
+        c)  create-db
+        ;;
+        s)  show-db
+        ;;
+        r)  remove-db
+        ;;
+    esac
+done
--- a/codeql-dataflow-sql-injection-c/build.sh
+++ b/codeql-dataflow-sql-injection-c/build.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+clang -Wall add-user.c -lsqlite3 -o add-user
--- a/codeql-dataflow-sql-injection-c/codeql-dataflow-sql-injection.code-workspace
+++ b/codeql-dataflow-sql-injection-c/codeql-dataflow-sql-injection.code-workspace
@@ -0,0 +1,12 @@
+{
+	"folders": [
+		{
+			"path": "."
+		}
+	],
+	"settings": {
+        "codeQL.runningQueries.autoSave": true,
+		"makefile.configureOnOpen": false,
+		"codeQL.githubDatabase.download": "never"
+	}
+}
--- a/codeql-dataflow-sql-injection-c/codeql-dataflow-sql-injection.md
+++ b/codeql-dataflow-sql-injection-c/codeql-dataflow-sql-injection.md
@@ -0,0 +1,983 @@
+<!-- -*- coding: utf-8 -*- -->
+<!-- https://gist.github.com/hohn/
+ -->
+# CodeQL Tutorial for C/C++: Data Flow and SQL Injection
+
+<!--
+ !-- xx:
+ !-- md_toc github <  codeql-dataflow-sql-injection.md 
+  -->
+
+- [CodeQL Tutorial for C/C++: Data Flow and SQL Injection](#codeql-tutorial-for-cc-data-flow-and-sql-injection)
+  - [Setup Instructions](#setup-instructions)
+  - [Documentation Links](#documentation-links)
+  - [Codeql Recap](#codeql-recap)
+    - [from, where, select](#from-where-select)
+    - [Predicates](#predicates)
+    - [Existential quantifiers (local variables in queries)](#existential-quantifiers-local-variables-in-queries)
+    - [Classes](#classes)
+  - [The Problem in Action](#the-problem-in-action)
+  - [Problem Statement](#problem-statement)
+  - [Data flow overview and illustration](#data-flow-overview-and-illustration)
+  - [Tutorial: Sources, Sinks and Flow Steps](#tutorial-sources-sinks-and-flow-steps)
+    - [The Data Sink](#the-data-sink)
+    - [The Data Source](#the-data-source)
+    - [The Extra Flow Step](#the-extra-flow-step)
+  - [The CodeQL Taint Flow Configuration](#the-codeql-taint-flow-configuration)
+    - [Taint Flow Configuration](#taint-flow-configuration)
+    - [Path Problem Setup](#path-problem-setup)
+    - [Path Problem Query Format](#path-problem-query-format)
+  - [Tutorial: Taint Flow Details](#tutorial-taint-flow-details)
+    - [The isSink Predicate](#the-issink-predicate)
+    - [The isSource Predicate](#the-issource-predicate)
+    - [The isAdditionalTaintStep Predicate](#the-isadditionaltaintstep-predicate)
+  - [Appendix](#appendix)
+    - [The complete Query: SqlInjection.ql](#the-complete-query-sqlinjectionql)
+    - [The Database Writer: add-user.c](#the-database-writer-add-userc)
+
+## Setup Instructions
+
+To run CodeQL queries on dotnet/coreclr, follow these steps:
+
+1. Install the Visual Studio Code IDE.
+2. Download and install the [CodeQL extension for Visual Studio Code](https://help.semmle.com/codeql/codeql-for-vscode.html). Full setup instructions are [here](https://help.semmle.com/codeql/codeql-for-vscode/procedures/setting-up.html).
+3. [Set up the starter workspace](https://help.semmle.com/codeql/codeql-for-vscode/procedures/setting-up.html#using-the-starter-workspace).
+    - **Important**: Don't forget to `git clone --recursive` or `git submodule update --init --remote`, so that you obtain the standard query libraries.
+4. Open the starter workspace: File > Open Workspace > Browse to `vscode-codeql-starter/vscode-codeql-starter.code-workspace`.
+
+5. Download the sample database [`codeql-dataflow-sql-injection-d5b28fb.zip`](https://drive.google.com/file/d/1eBZ69ZQx6YnnZu41iUL0m8_e9qyMCZ9B/view?usp=sharing)
+
+6. Unzip the database.
+
+7. Import the unzipped database into Visual Studio Code:
+    - Click the **CodeQL** icon in the left sidebar.
+    - Place your mouse over **Databases**, and click the + sign that appears on
+      the right. 
+    - Choose the unzipped database directory on your filesystem.
+
+8. Create a new file, name it `SqliInjection.ql`, save it under `codeql-custom-queries-cpp`.
+
+
+## Documentation Links
+If you get stuck, try searching our documentation and blog posts for help and ideas. Below are a few links to help you get started:
+- [Learning CodeQL](https://help.semmle.com/QL/learn-ql)
+- [Learning CodeQL for C/C++](https://help.semmle.com/QL/learn-ql/cpp/ql-for-cpp.html)
+- [Using the CodeQL extension for VS Code](https://help.semmle.com/codeql/codeql-for-vscode.html)
+
+## Codeql Recap
+This is a brief review of CodeQL taken from the [full
+introduction](https://git.io/JJqdS).  For more details, see the [documentation
+links](#documentation-links).  We will revisit all of this during the tutorial.
+
+### from, where, select
+Recall that codeql is a declarative language and a basic query is defined by a
+_select_ clause, which specifies what the result of the query should be. For
+example:
+
+```ql
+import cpp
+
+select "hello world"
+```
+
+More complicated queries look like this:
+```ql
+from /* ... variable declarations ... */
+where /* ... logical formulas ... */
+select /* ... expressions ... */
+```
+
+The `from` clause specifies some variables that will be used in the query. The
+`where` clause specifies some conditions on those variables in the form of logical
+formulas. The `select` clauses specifies what the results should be, and can refer
+to variables defined in the `from` clause.
+
+The `from` clause is defined as a series of variable declarations, where each
+declaration has a _type_ and a _name_. For example:
+
+```ql
+from IfStmt ifStmt
+select ifStmt
+```
+
+We are declaring a variable with the name `ifStmt` and the type `IfStmt` (from the
+CodeQL standard library for analyzing C/C++).  Variables represent a **set of
+values**, initially constrained by the type of the variable.  Here, the variable
+`ifStmt` represents the set of all `if` statements in the C/C++ program, as we can
+see if we run the query.
+
+A query using all three clauses to find empty blocks:
+```ql
+from IfStmt ifStmt, Block block
+where
+  ifStmt.getThen() = block and
+  block.getNumStmt() = 0
+select ifStmt, "Empty if statement"
+```
+
+
+### Predicates
+The other feature we will use are _predicates_. These provide a way to encapsulate
+portions of logic in the program so that they can be reused.  You can think of
+them as a mini `from`-`where`-`select` query clause. Like a select clause they
+also produce a set of "tuples" or rows in a result table.
+
+We can introduce a new predicate in our query that identifies the set of empty
+blocks in the program (for example, to reuse this feature in another query):
+
+```ql
+predicate isEmptyBlock(Block block) {
+  block.getNumStmt() = 0
+}
+
+from IfStmt ifStmt
+where isEmptyBlock(ifStmt.getThen())
+select ifStmt, "Empty if statement"
+```
+
+### Existential quantifiers (local variables in queries)
+Although the terminology may sound scary if you are not familiar with logic and
+logic programming, *existential quantifiers* are simply ways to introduce
+temporary variables with some associated conditions.  The syntax for them is:
+
+```ql
+exists(<variable declarations> | <formula>)
+```
+
+They have a similar structure to the `from` and `where` clauses, where the first
+part allows you to declare one or more variables, and the second formula
+("conditions") that can be applied to those variables.
+
+For example, we can use this to refactor the query 
+```ql
+from IfStmt ifStmt, Block block
+where
+  ifStmt.getThen() = block and
+  block.getNumStmt() = 0
+select ifStmt, "Empty if statement"
+```
+
+to use a temporary variable for the empty block:
+```ql
+from IfStmt ifStmt
+where
+  exists(Block block |
+    ifStmt.getThen() = block and
+    block.getNumStmt() = 0
+  )
+select ifStmt, "Empty if statement"
+```
+
+This is frequently used to convert a query into a predicate.
+
+### Classes
+Classes are a way in which you can define new types within CodeQL, as well as
+providing an easy way to reuse and structure code.
+
+Like all types in CodeQL, classes represent a set of values. For example, the
+`Block` type is, in fact, a class, and it represents the set of all blocks in the
+program. You can also think of a class as defining a set of logical conditions
+that specifies the set of values for that class.
+
+For example, we can define a new CodeQL class to represent empty blocks:
+```ql
+class EmptyBlock extends Block {
+  EmptyBlock() {
+    this.getNumStmt() = 0
+  }
+}
+```
+
+and use it in a query:
+```ql
+from IfStmt ifStmt, EmptyBlock block
+where ifStmt.getThen() = block
+select ifStmt, "Empty if statement"
+```
+
+## The Problem in Action
+Running the code is a great way to see the problem and check whether the code is
+vulnerable.
+
+This program can be compiled and linked, and a simple sqlite db created via 
+
+```sh
+# Build
+./build.sh
+
+# Prepare db
+./admin -r
+./admin -c 
+./admin -s
+```
+
+Users can be added via `stdin` in several ways; the second is a pretend "server"
+using the `echo` command.
+
+```sh
+# Add regular user interactively
+./add-user 2>> users.log
+First User
+
+# Regular user via "external" process
+echo "User Outside" | ./add-user 2>> users.log
+```
+
+Check the db and log:
+```
+# Check
+./admin -s
+
+tail -4 users.log 
+```
+
+Looks ok:
+```
+0:$ ./admin -s
+87797|First User
+87808|User Outside
+
+0:$ tail -4 users.log 
+[Tue Jul 21 14:15:46 2020] query: INSERT INTO users VALUES (87797, 'First User')
+[Tue Jul 21 14:17:07 2020] query: INSERT INTO users VALUES (87808, 'User Outside')
+```
+
+But there may be bad input; this one guesses the table name and drops it:
+```sh
+# Add Johnny Droptable 
+./add-user 2>> users.log
+Johnny'); DROP TABLE users; --
+```
+
+And then we have this:
+```sh
+# And the problem:
+./admin -s
+0:$ ./admin -s
+Error: near line 2: no such table: users
+```
+
+What happened?  The log shows that data was treated as command:
+```
+1:$ tail -4 users.log 
+[Tue Jul 21 14:15:46 2020] query: INSERT INTO users VALUES (87797, 'First User')
+[Tue Jul 21 14:17:07 2020] query: INSERT INTO users VALUES (87808, 'User Outside')
+[Tue Jul 21 14:18:25 2020] query: INSERT INTO users VALUES (87817, 'Johnny'); DROP TABLE users; --')
+```
+
+Looking ahead, we now *know* that there is unsafe external data (source)
+which reaches (flow path) a database-writing command (sink).  Thus, a query
+written against this code should find at least one taint flow path.
+
+## Problem Statement
+
+Many security problems can be phrased in terms of _information flow_:
+
+_Given a (problem-specific) set of sources and sinks, is there a path in the data
+flow graph from some source to some sink?_
+
+The example we look at is SQL injection: sources are user-input, sinks are SQL
+queries processing a string formed at runtime.
+
+When parts of the string can be specified by the user, they allow an attacker to
+insert arbitrary sql statements; these could erase a table or extract internal
+data etc.
+
+We will use CodeQL to analyze the source code constructing a SQL
+query using string concatenation and then executing that query
+string.  The following example uses the `sqlite3` library; it 
+- receives user-provided data from `stdin` and keeps it in `buf`
+- uses environment data and stores it in `id`,
+- runs a query in `sqlite3_exec`
+
+This is intentionally simple code, but it has all the elements that have to be
+considered in real code and illustrates the QL features. 
+
+```c
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <sqlite3.h>
+#include <time.h>
+
+void write_log(const char* fmt, ...);
+
+void abort_on_error(int rc, sqlite3 *db);
+
+void abort_on_exec_error(int rc, sqlite3 *db, char* zErrMsg);
+    
+char* get_user_info() {
+#define BUFSIZE 1024
+    char* buf = (char*) malloc(BUFSIZE * sizeof(char));
+    int count;
+    // Disable buffering to avoid need for fflush
+    // after printf().
+    setbuf( stdout, NULL );
+    printf("*** Welcome to sql injection ***\n");
+    printf("Please enter name: ");
+    count = read(STDIN_FILENO, buf, BUFSIZE);
+    if (count <= 0) abort();
+    /* strip trailing whitespace */
+    while (count && isspace(buf[count-1])) {
+        buf[count-1] = 0; --count;
+    }
+    return buf;
+}
+
+int get_new_id() {
+    int id = getpid();
+    return id;
+}
+
+void write_info(int id, char* info) {
+    sqlite3 *db;
+    int rc;
+    int bufsize = 1024;
+    char *zErrMsg = 0;
+    char query[bufsize];
+    
+    /* open db */
+    rc = sqlite3_open("users.sqlite", &db);
+    abort_on_error(rc, db);
+
+    /* Format query */
+    snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
+    write_log("query: %s\n", query);
+
+    /* Write info */
+    rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+    abort_on_exec_error(rc, db, zErrMsg);
+
+    sqlite3_close(db);
+}
+
+int main(int argc, char* argv[]) {
+    char* info;
+    int id;
+    info = get_user_info();
+    id = get_new_id();
+    write_info(id, info);
+    /*
+     * show_info(id);
+     */
+}
+
+```
+
+In terms of sources, sinks, and information flow, the concrete problem for codeql is:
+1. specifying `buf` as **source**,
+2. specifying the `query` argument to `sqlite3_exec()` as **sink**, 
+3. specifying some code-specific data flow steps for the codeql library,
+3. using the codeql taint flow library find taint flow paths (if there are any)
+   between the source and the sink.
+
+In the following, we go into more concrete detail and develop codedql scripts to
+solve this problem.
+
+
+
+## Data flow overview and illustration
+In the previous sections we identified the sources of problematic strings
+(accesses of `info` etc.), and the sink that their data may flow to (the argument
+to `sqlite3_exec`).
+
+We need to see if there is data flow between the source(s) and this sink.
+
+The solution here is to use the data flow library.  Data flow is, as the name
+suggests, about tracking the flow of data through the program. It helps answers
+questions like: does this expression ever hold a value that originates from a
+particular other place in the program?
+
+We can visualize the data flow problem as one of finding paths through a directed
+graph, where the nodes of the graph are elements in program, and the edges
+represent the flow of data between those elements. If a path exists, then the data
+flows between those two nodes.
+
+This graph represents the flow of data from the tainted parameter. The nodes of
+graph represent program elements that have a value, such as function parameters
+and expressions. The edges of this graph represent flow through these nodes.
+
+There are two variants of data flow available in CodeQL:
+ - Local (“intra-procedural”) data flow models flow within one function; feasible
+   to compute for all functions in a CodeQL database.
+ - Global (“inter-procedural”) data flow models flow across function calls; not
+   feasible to compute for all functions in a CodeQL database.
+
+While local data flow is feasible to compute for all functions in a CodeQL
+database, global data flow is not. This is because the number of paths becomes
+_exponentially_ larger for global data flow.
+
+The global data flow (and taint tracking) library avoids this problem by requiring
+that the query author specifies which _sources_ and _sinks_ are applicable. This
+allows the implementation to compute paths only between the restricted set of
+nodes, rather than for the full graph.
+
+To illustrate the dataflow for this problem, we have a [collection of slides](https://drive.google.com/file/d/1eEG0eGVDVEQh0C-0_4UIMcD23AWwnGtV/view?usp=sharing)
+for this workshop.
+
+## Tutorial: Sources, Sinks and Flow Steps
+<!--
+XX:
+ !-- The complete project can be downloaded via this 
+ !-- [drive](https://drive.google.com/file/d/1-6c3S-e4FKa_IsuuzhhXupiAwCzzPgD-/view?usp=sharing)
+ !-- link.
+  -->
+
+The tutorial is split into several steps and introduces concepts as they are
+needed.  Experimentation with the presented queries is encouraged, and the
+autocomplete suggestions (Ctrl + Space) and the jump-to-definition command (F12 in
+VS Code) are good ways explore the libraries.
+
+
+### The Data Sink
+Now let's find the function `sqlite3_exec`.  In CodeQL, this uses `Function`
+and a `getName()` attribute.
+
+```ql
+from Function f
+where f.getName() = "sqlite3_exec" 
+select f
+```
+
+This should find one result, 
+```ql
+SQLITE_API int sqlite3_exec(
+  sqlite3*,                                  /* An open database */
+  const char *sql,                           /* SQL to be evaluated */
+  int (*callback)(void*,int,char**,char**),  /* Callback function */
+  void *,                                    /* 1st argument to callback */
+  char **errmsg                              /* Error msg written here */
+);
+```
+in the header `sqlite3.h`.
+
+Next, let's find the calls to `sqlite3_exec` using the `FunctionCall` type
+```ql
+from FunctionCall exec
+where exec.getTarget().getName() = "sqlite3_exec" 
+select exec
+```
+
+This finds our call in `add-user.c`, 
+
+    rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+
+We are interested in the `query` argument, which we can get using `.getArgument`:
+```ql
+from FunctionCall exec, Expr query
+where
+    exec.getTarget().getName() = "sqlite3_exec" and
+    query = exec.getArgument(1)
+select exec, query
+```
+
+### The Data Source
+
+The external data enters through the call
+
+    count = read(STDIN_FILENO, buf, BUFSIZE);
+
+We thus want the `buf` argument to the call of the `read` function.  Together, this is 
+
+```ql
+from FunctionCall read, Expr buf
+where
+    read.getTarget().getName() = "read" and
+    buf = read.getArgument(1)
+select read, buf
+```
+
+### The Extra Flow Step
+The codeql data flow library traverses *visible* source code fairly well, but flow
+through opaque functions requires additional support (more on this later).
+Functions for which only a headers is available are opaque, and we have one of
+these here: the call to `snprintf`.  Once we locate this call, there are *two* nodes
+to identify: the inflow and outflow.
+
+Let's start with `snprintf`.  If we try
+```ql
+from FunctionCall printf
+where printf.getTarget().getName() = "snprintf"
+select printf
+```
+we get zero results.  This is puzzling; if we visit the `add-user.c` source and
+follow the definition of `snprintf`, it turns out to be a macro on MacOS:
+```c
+#undef snprintf
+#define snprintf(str, len, ...) \
+  __builtin___snprintf_chk (str, len, 0, __darwin_obsz(str), __VA_ARGS__)
+#endif
+```
+
+Fortunately, the underlying function `__builtin___snprintf_chk` has `snprintf` in
+the name.  So instead of working with C macros from codeql, we generalize our
+query using a name pattern with `.matches`:
+```ql
+from FunctionCall printf
+where printf.getTarget().getName().matches("%snprintf%")
+select printf
+```
+
+This identifies our call
+
+    snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
+    
+and we need the inflow and outflow nodes next.  `query` is the outflow, `info` is
+the inflow.
+
+In the `snprintf` macro call, those have indices 0 and 4.  In the underlying function
+`__builtin___snprintf_chk`, the indices are 0 and 6.  Using the latter:
+```ql
+from FunctionCall printf, Expr out, Expr into
+where
+    printf.getTarget().getName().matches("%snprintf%") and
+    printf.getArgument(0) = out and
+    printf.getArgument(6) = into
+select printf, out, into
+```
+
+This correctly identifies the call and the extra flow arguments.
+
+<!-- !-- Practice exercise: !-- Very specific: shifted index for macro.
+ Generalize this to consider !-- all trailing arguments as sources.  -->
+
+
+Practice exercise: If you are using linux or windows, generalize this query for
+the `snprintf` arguments found there.  One way to do this is using `or`:
+
+```ql
+printf.getTarget().getName().matches("%snprintf%") and
+(
+  // mac version
+or
+ // linux version
+or
+ // windows version
+)
+```
+
+
+
+## The CodeQL Taint Flow Configuration
+The previous queries identify our source, sink and one additional flow step.  To
+use global data flow and taint tracking we need some additional codeql setup:
+ - a taint flow configuration 
+ - the path problem header and imports
+ - a query formatted for path problems.
+
+These are done next.
+
+### Taint Flow Configuration
+The way we configure global taint flow is by creating a custom extension of the
+`TaintTracking::Configuration` class, and speciyfing `isSource`, `isSink`, and 
+`isAdditionalTaintStep` predicates.
+
+The sources and sinks were explained earlier.  Data flow and taint tracking
+configuration classes support a number of additional features that help configure
+the process of building and exploring the data flow path.
+
+One such feature is adding additional taint steps. This is useful if you use
+libraries which are not modelled by the default taint tracking. You can implement
+this by overriding `isAdditionalTaintStep` predicate. This has two parameters, the
+`from` and the `to` node, and it essentially allows you to add extra edges into the
+taint tracking or data flow graph.
+
+A starting configuration can look like the following, with details to be filled
+in.
+
+```ql
+class SqliFlowConfig extends TaintTracking::Configuration {
+    SqliFlowConfig() { this = "SqliFlow" }
+
+    override predicate isSource(DataFlow::Node source) {
+        // count = read(STDIN_FILENO, buf, BUFSIZE);
+    }
+
+    override predicate isSanitizer(DataFlow::Node sanitizer) { none() }
+
+    override predicate isAdditionalTaintStep(DataFlow::Node into, DataFlow::Node out) {
+        // Extra taint step for 
+        //     snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
+    }
+
+    override predicate isSink(DataFlow::Node sink) {
+        // rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+    }
+}
+```
+
+`TaintTracking::Configuration` is a _configuration_ class. In this case, there will be
+a single instance of the class, identified by a unique string specified in the
+characteristic predicate. We then override the `isSource` predicates to represent
+the set of possible sources in the program, and `isSink` to represent the possible
+set of sinks in the program.
+
+### Path Problem Setup
+Queries will only list sources and sinks by default.  To inspect these results and
+work with them, we also need the data paths from source to sink.  For this, the
+query needs to have the form of a _path problem_ query.
+
+This requires a modifications to the query header and an extra import: 
+ - The `@kind` comment has to be `path-problem`. This tells the CodeQL toolchain
+   to interpret the results of this query as path results. 
+ - A new import `DataFlow::PathGraph`, which will report the path data
+   alongside the query results. 
+
+Together, this looks like
+```ql
+/**
+ * @name SQLI Vulnerability
+ * @description Using untrusted strings in a sql query allows sql injection attacks.
+ * @kind path-problem
+ * @id cpp/SQLIVulnerable
+ * @problem.severity warning
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+import DataFlow::PathGraph
+```
+
+### Path Problem Query Format
+To use this new configuration and `PathGraph` support, we call the
+`hasFlowPath(source, sink)` predicate, which will compute a reachability table
+between the defined sources and sinks.  Behind the scenes, you can think of this as
+performing a graph search algorithm from sources to sinks.  The query will look
+like this:
+
+```ql
+from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink
+where conf.hasFlowPath(source, sink)
+select sink, source, sink, "Possible SQL injection"
+```
+
+## Tutorial: Taint Flow Details
+With the dataflow configuration in place, we just need to provide the details for
+source(s), sink(s), and taint step(s).
+
+Some more steps are required to convert our previous queries for use in data
+flow.  These are covered here.
+
+### The isSink Predicate
+Note that our previous queries used `Expr` nodes, but the taint query requires
+`DataFlow::Node` nodes.
+
+We have identified arguments to the call of the `sqlite3_exec` function via the
+query
+
+```ql
+from FunctionCall exec, Expr query
+where
+    exec.getTarget().getName() = "sqlite3_exec" and
+    query = exec.getArgument(1)
+select exec, query
+```
+
+First, we need to incorporate the `DataFlow::Node`.  The key to this is
+`node.asExpr()`, which yields the `node`'s expression.  Adding this we get
+
+```ql
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+
+from FunctionCall exec, Expr query, DataFlow::Node sink
+where
+    exec.getTarget().getName() = "sqlite3_exec" and
+    query = exec.getArgument(1) and
+    sink.asExpr() = query
+select exec, query, sink
+```
+
+Notice that `query` is now redundant, so this simplifies to 
+```ql
+from FunctionCall exec, DataFlow::Node sink
+where
+    exec.getTarget().getName() = "sqlite3_exec" and
+    sink.asExpr() = exec.getArgument(1) 
+select exec, sink
+```
+
+Second, we need this as a predicate of a single argument, `predicate
+isSink(DataFlow::Node sink)`.  For this we introduce the `exists()`
+[quantifier](https://help.semmle.com/QL/ql-handbook/formulas.html?highlight=exists#exists)
+to move the `FunctionCall exec` into the body of the query and remove it from the
+result:
+
+```ql
+from DataFlow::Node sink
+where
+    exists(FunctionCall exec |
+        exec.getTarget().getName() = "sqlite3_exec" and
+        sink.asExpr() = exec.getArgument(1)
+    )
+select sink
+```
+
+To turn this into a predicate, `from` contents become arguments, the `where`
+becomes the body, and the `select` is dropped:
+
+```ql
+predicate isSink(DataFlow::Node sink) {
+    // rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+    exists(FunctionCall exec |
+        exec.getTarget().getName() = "sqlite3_exec" and
+        sink.asExpr() = exec.getArgument(1)
+    )
+}
+```
+
+### The isSource Predicate
+Recall that the external data enters through the `buf` argument to the call
+
+    count = read(STDIN_FILENO, buf, BUFSIZE);
+
+and we got this via the query
+
+```ql
+from FunctionCall read, Expr buf
+where
+    read.getTarget().getName() = "read" and
+    buf = read.getArgument(1)
+select read, buf
+```
+
+As for the `isSink` predicate in the previous section, we need to convert this to
+a predicate of a single argument, `predicate isSource(DataFlow::Node source)`.
+Following the same steps, we introduce a `DataFlow::Node` and an `exists()`:
+
+```ql
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+
+from DataFlow::Node source
+where
+    exists(FunctionCall read |
+        read.getTarget().getName() = "read" and
+        read.getArgument(1) = source.asExpr()
+    )
+select source
+```
+
+There is one more adjustment needed for this to work.  The `buf` argument is both
+read by and written to by the `snprintf` function call.  Because we are specifying
+it as a *source*, the value of interest is the value *after* the call.  We get
+this value by
+[casting](https://help.semmle.com/QL/ql-handbook/expressions.html#casts) to the
+post-update node.  Instead of `source.asExpr()`, we use
+`source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()`
+
+
+Last, we incorporate this into a predicate:
+
+```ql
+predicate isSource(DataFlow::Node source) {
+    // count = read(STDIN_FILENO, buf, BUFSIZE);
+    exists(FunctionCall read |
+        read.getTarget().getName() = "read" and
+        read.getArgument(1) = source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()
+    )
+}
+```
+
+If you quick-eval this predicate, you will see that `source` is now `ref arg buf`
+instead of `buf`.
+
+
+### The isAdditionalTaintStep Predicate
+Our previous query identifies the call to `snprintf` and the extra flow arguments:
+
+```ql
+from FunctionCall printf, Expr out, Expr into
+where
+    printf.getTarget().getName().matches("%snprintf%") and
+    printf.getArgument(0) = out and
+    printf.getArgument(6) = into
+select printf, out, into
+```
+
+As for the `isSource` and `isSink` predicates, we need to
+- change from `Expr` to a `DataFlow::Node`
+- change the outflow (`out`) type to a `PostUpdateNode`
+- convert this to a predicate
+
+Put together:
+
+```ql
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+
+predicate isAdditionalTaintStep(DataFlow::Node into, DataFlow::Node out) {
+    // Extra taint step for
+    //     snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
+    exists(FunctionCall printf |
+        printf.getTarget().getName().matches("%snprintf%") and
+        printf.getArgument(0) = out.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() and
+        printf.getArgument(6) = into.asExpr()
+    )
+}
+```
+
+## Appendix
+This appendix has the complete C source and codeql query.
+
+### The complete Query: SqlInjection.ql
+The full query is
+
+```ql
+/**
+ * @name SQLI Vulnerability
+ * @description Using untrusted strings in a sql query allows sql injection attacks.
+ * @kind path-problem
+ * @id cpp/SQLIVulnerable
+ * @problem.severity warning
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+import DataFlow::PathGraph
+
+class SqliFlowConfig extends TaintTracking::Configuration {
+    SqliFlowConfig() { this = "SqliFlow" }
+
+    override predicate isSource(DataFlow::Node source) {
+        // count = read(STDIN_FILENO, buf, BUFSIZE);
+        exists(FunctionCall read |
+            read.getTarget().getName() = "read" and
+            read.getArgument(1) = source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()
+        )
+    }
+
+    override predicate isSanitizer(DataFlow::Node sanitizer) { none() }
+
+    override predicate isAdditionalTaintStep(DataFlow::Node into, DataFlow::Node out) {
+        // Extra taint step
+        //     snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
+        // But snprintf is a macro on mac os.  The actual function's name is
+        //     #undef snprintf
+        //     #define snprintf(str, len, ...) \
+        //       __builtin___snprintf_chk (str, len, 0, __darwin_obsz(str), __VA_ARGS__)
+        //     #endif
+        exists(FunctionCall printf |
+            printf.getTarget().getName().matches("%snprintf%") and
+            printf.getArgument(0) = out.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() and
+            // very specific: shifted index for macro.
+            printf.getArgument(6) = into.asExpr()
+        )
+    }
+
+    override predicate isSink(DataFlow::Node sink) {
+        // rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+        exists(FunctionCall exec |
+            exec.getTarget().getName() = "sqlite3_exec" and
+            exec.getArgument(1) = sink.asExpr()
+        )
+    }
+}
+
+from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink
+where conf.hasFlowPath(source, sink)
+select sink, source, sink, "Possible SQL injection"
+```
+
+### The Database Writer: add-user.c
+The complete source for the sqlite database writer
+```c
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <sqlite3.h>
+#include <time.h>
+
+void write_log(const char* fmt, ...) {
+    time_t t;
+    char tstr[26];
+    va_list args;
+
+    va_start(args, fmt);
+    t = time(NULL);
+    ctime_r(&t, tstr);
+    tstr[24] = 0; /* no \n */
+    fprintf(stderr, "[%s] ", tstr);
+    vfprintf(stderr, fmt, args);
+    va_end(args);
+    fflush(stderr);
+}
+
+void abort_on_error(int rc, sqlite3 *db) {
+    if( rc ) {
+        fprintf(stderr, "Can't open database: %s\n", sqlite3_errmsg(db));
+        sqlite3_close(db);
+        fflush(stderr);
+        abort();
+    }
+}
+
+void abort_on_exec_error(int rc, sqlite3 *db, char* zErrMsg) {
+    if( rc!=SQLITE_OK ){
+        fprintf(stderr, "SQL error: %s\n", zErrMsg);
+        sqlite3_free(zErrMsg);
+        sqlite3_close(db);
+        fflush(stderr);
+        abort();
+    }
+}
+    
+char* get_user_info() {
+#define BUFSIZE 1024
+    char* buf = (char*) malloc(BUFSIZE * sizeof(char));
+    int count;
+    // Disable buffering to avoid need for fflush
+    // after printf().
+    setbuf( stdout, NULL );
+    printf("*** Welcome to sql injection ***\n");
+    printf("Please enter name: ");
+    count = read(STDIN_FILENO, buf, BUFSIZE);
+    if (count <= 0) abort();
+    /* strip trailing whitespace */
+    while (count && isspace(buf[count-1])) {
+        buf[count-1] = 0; --count;
+    }
+    return buf;
+}
+
+int get_new_id() {
+    int id = getpid();
+    return id;
+}
+
+void write_info(int id, char* info) {
+    sqlite3 *db;
+    int rc;
+    int bufsize = 1024;
+    char *zErrMsg = 0;
+    char query[bufsize];
+    
+    /* open db */
+    rc = sqlite3_open("users.sqlite", &db);
+    abort_on_error(rc, db);
+
+    /* Format query */
+    snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
+    write_log("query: %s\n", query);
+
+    /* Write info */
+    rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+    abort_on_exec_error(rc, db, zErrMsg);
+
+    sqlite3_close(db);
+}
+
+int main(int argc, char* argv[]) {
+    char* info;
+    int id;
+    info = get_user_info();
+    id = get_new_id();
+    write_info(id, info);
+    /*
+     * show_info(id);
+     */
+}
+    
+```
--- a/codeql-dataflow-sql-injection-c/codeql-pack.lock.yml
+++ b/codeql-dataflow-sql-injection-c/codeql-pack.lock.yml
@@ -0,0 +1,14 @@
+---
+lockVersion: 1.0.0
+dependencies:
+  codeql/cpp-all:
+    version: 0.9.1
+  codeql/dataflow:
+    version: 0.0.2
+  codeql/ssa:
+    version: 0.1.3
+  codeql/tutorial:
+    version: 0.1.3
+  codeql/util:
+    version: 0.1.3
+compiled: false
--- a/codeql-dataflow-sql-injection-c/cpp-sqli-3fe610d-1.zip
+++ b/codeql-dataflow-sql-injection-c/cpp-sqli-3fe610d-1.zip
--- a/codeql-dataflow-sql-injection-c/dataflow-cropped.pdf
+++ b/codeql-dataflow-sql-injection-c/dataflow-cropped.pdf
--- a/codeql-dataflow-sql-injection-c/dataflow.key
+++ b/codeql-dataflow-sql-injection-c/dataflow.key
--- a/codeql-dataflow-sql-injection-c/dataflow.pdf
+++ b/codeql-dataflow-sql-injection-c/dataflow.pdf
--- a/codeql-dataflow-sql-injection-c/incoming.codeql-customizations-workshop.md
+++ b/codeql-dataflow-sql-injection-c/incoming.codeql-customizations-workshop.md
@@ -0,0 +1,164 @@
+# Adding to Customizations example
+
+## Setup and sample run
+
+The **prerequisite** for this workshop is the `java/codeql-dataflow-sql-injection/` also located in this repository, which centers around detecting a potential SQL Injection vulnerability in a small Java database interaction application.
+
+Now that we have used the query developed in the previous workshop, lets see if there is a pre-existing query that can detect the same vulnerability.
+
+Navigate to the `SQLTainted.ql` query and run it. 
+
+To find that file locally use one of the following:
+
+  1) If you are using a [CodeQL bundle](https://github.com/github/codeql-action/releases), this can be found via a search like:
+`find <location-of-bundle> -name "SQLTainted.ql"`.
+
+  2) If you are using the [installed packs](https://github.com/orgs/codeql/packages/container/package/java-all) (obtained via Install Pack Dependencies), then the location of the query will be under `~/.codeql/packages/codeql/java-all/`  or  `C:\Users\<username>\.codeql\packages\codeql\java-all\`
+
+## Identify the problem
+
+Determine if the query detects the following source and sink (again from the previous workshop) using the *Quick Evaluation* feature in the editor:
+
+source: 
+```
+System.console().readLine();
+```
+sink:
+```
+conn.createStatement().executeUpdate(query);
+```
+
+## Investigate the Implementation
+
+Its time to look at the query file and libraries responsible for the implementation. Use the *Go to Definition* feature of the editor to investigate the `QueryInjectionSink` class used in the query and the `queryTaintedBy` predicate. 
+
+Also look at the definition of the `RemoteFlowSource` class and take this time to discuss [*Abstract* classes](https://codeql.github.com/docs/ql-language-reference/types/#abstract-classes).
+
+Take some time to investigate the differences between *abstract* and *nonabstract* classes using a generic example:
+```
+abstract class A extends string {
+     A() { this = ["A", "B", "C"] }
+   }
+  
+   class B extends A { B() { this = "B" } }
+  
+   class C extends A { C() { this = "C" } }
+
+from A a 
+select a
+```
+versus:
+```
+class A extends string {
+     A() { this = ["A", "B", "C"] }
+   }
+  
+   class B extends A { B() { this = "B" } }
+  
+   class C extends A { C() { this = "C" } }
+
+from A a 
+select a
+```
+(attribution: this example was created by @smowton)
+
+## Add to the Implementation
+
+Now that we understand the reason that `SQLTainted.ql` does not detect the potential SQL Injection vulnerability (it does not model the source), we will add to the `Customizations.qll` file which acts as a query extension interface. This will allow `SQLTainted.ql` to detect the vulnerability.
+
+First determine which import will be required to access the abstract class that we will need to extend:
+
+```
+private import semmle.code.java.dataflow.FlowSources
+```
+
+Then add a custom class that models the
+`System.console().readLine()` call:
+
+```
+class ReadLineFlowSource extends RemoteFlowSource {
+    ReadLineFlowSource() { 
+        exists(MethodAccess read |
+            read.getCallee().hasName("readLine") and
+            this.asExpr() = read
+        )
+     }
+  
+    override string getSourceType() { result = "readLine source" }
+  }
+```
+
+Now when we run `SQLTainted.ql` we will detect the same vulnerability that is detected by the end of the `java/codeql-dataflow-sql-injection/` workshop.
+
+## Additional practice
+
+Now we can also see what it would be like to add an additional sink to the `Customizations.qll` file. While the following doesn't apply for the particular rule `SQLTainted.ql`, we can just use this as an exercise for practice.
+
+We will now take the time to add a model for the `System.err.printf("Sent: %s", query)` call, as a sink in the application.
+
+```
+import semmle.code.java.security.QueryInjection
+
+  class PrintfSink extends QueryInjectionSink { 
+    PrintfSink(){
+        exists(MethodAccess  printf |
+            printf.getCallee().hasName("printf")
+            and this.asExpr() = printf.getAnArgument()
+            )
+    }
+  }
+```
+
+We should now get 2 `path-problem` results when we run `SQLTainted.ql` and we should be familiar with adding custom sources and sinks to `Customizations.qll` to extend the pre-existing queries.
+
+# Model Editor Alternative Technique
+
+[CodeQL Model Editor](https://docs.github.com/en/code-security/codeql-for-vs-code/using-the-advanced-functionality-of-the-codeql-for-vs-code-extension/using-the-codeql-model-editor) can be used when an out of the box CodeQL needs a customization. Currently (as of June 2024) supported customizations via the Model Editor are sources and sinks. The Model Editor will generate [CodeQL model packs](https://docs.github.com/en/code-security/codeql-cli/using-the-advanced-functionality-of-the-codeql-cli/creating-and-working-with-codeql-packs#creating-a-codeql-model-pack) which can get added to any analysis at scan runtime.
+
+## Setup the example for `readLine`
+
+As a sample, we want to add the `Console.io.readLine` call to the `RemoteFlowSource` concept, like we did above, and get it to be picked up in the out of the box scans. To accomplish this, **clear any previous changes** in the `Customizations.qll` file to get a fresh start on no customization currently added to `RemoteFlowSource`.
+
+This should reset the results of the `SqlTainted.ql` query to return nothing.
+
+Then double check if there are any out of the box models for `Console.io.readLine` already exist (as of June 2024 [there are](https://github.com/github/codeql/blob/main/java/ql/lib/ext/generated/java.io.model.ym). We want to temporarily remove those **just for demonstration purposes**. 
+To do that check:
+
+ 1) If you are using a [CodeQL bundle](https://github.com/github/codeql-action/releases), these models can be found locally via a search like:
+`grep -R "readLine" <location-of-bundle> | grep "Console" | sort --unique | grep ".yml" | grep "java"`
+
+ 2) If you are using the [installed packs](https://github.com/orgs/codeql/packages/container/package/java-all) (obtained via Install Pack Dependencies), then the location of the model will be under `~/.codeql/packages/codeql/java-queries/<some-version>/.codeql/libraries/codeql/java-all/<some-other-version>/ext/generated/java.io.model.yml`.
+
+Once that file is open, remove any lines containing the `java.io.Console.readLine` signature.
+
+## Open the Model Editor
+
+In the QL widget selection, there is a panel labelled "CODEQL METHOD MODELING". Select "Start Modeling". It should open a central panel that shows a display saying that some % of the Java Runtime is modelled (but not 100%). Expanding the Java Runtime panel should show `java.io.Console.readLine()` as a model-able option.
+
+## Model the API
+
+Select Model Type -> "Source" and Kind -> "remote" and then click "Save". This will generate the model pack in the `.github` folder. Take some time to explore that directory and the model pack.
+
+## Enable testing with the model
+
+To [test the model in the editor](https://docs.github.com/en/code-security/codeql-for-vs-code/using-the-advanced-functionality-of-the-codeql-for-vs-code-extension/using-the-codeql-model-editor#testing-codeql-model-packs-in-vs-code), an enable setting must be added to the VSCode settings. Open the `.vscode/settings.json` file and add this line: `"codeQL.runningQueries.useExtensionPacks": "all"`.
+
+## Utilize the model in a test
+
+Create the following sample query to perform a quick test that the model has been succesfully configured and added to a scan:
+
+example.ql
+```
+import java
+import semmle.code.java.dataflow.FlowSources
+
+from RemoteFlowSource r
+select r
+```
+
+The results of this should now show the `readLine` call!
+
+## Utilize the model in the out of the box query
+
+Open the `SqlTainted.ql` query again and the run it. The results should now show a path through our sample vulnerable application!
+
--- a/codeql-dataflow-sql-injection-c/qlpack.yml
+++ b/codeql-dataflow-sql-injection-c/qlpack.yml
@@ -0,0 +1,4 @@
+name: codeql-workshop/cpp-sql-injection
+version: 0.0.1
+dependencies:
+  codeql/cpp-all: "*"
--- a/codeql-dataflow-sql-injection-c/sarif-summary.jq
+++ b/codeql-dataflow-sql-injection-c/sarif-summary.jq
@@ -0,0 +1,60 @@
+# -*- sh -*-
+.runs | .[] | .results | .[] |
+    ( (.ruleId, ": ", 
+       (.message.text | split("\n") | ( .[0], " [", length-1 , " more]")),
+       "\n")
+      , 
+      (if (.codeFlows != null) then
+          (.codeFlows | .[] | 
+               ("    Path\n"
+                ,
+                ( .threadFlows | .[] | .locations | .[] | .location | "        "
+                  ,
+                  ( .physicalLocation | ( .artifactLocation.uri, ":", .region.startLine, ":"))
+                  ,
+                  (.message.text, " ")
+                  , 
+                  "\n"
+                )))
+          else 
+              (.locations | .[] |
+                   ( "        "
+                     ,
+                     (.physicalLocation | ( .artifactLocation.uri, ":", .region.startLine, ":"))
+                   ))
+              ,
+              # .message.text, 
+              "\n"
+              end)
+    )  | tostring 
+
+# This script extracts the following parts of the sarif output:
+# 
+# # problem
+# "runs" : [ {
+#   "results" : [ {
+#     "ruleId" : "cpp/UncheckedErrorCode",
+
+# # path problem
+# "runs" : [ {
+#   "tool" : {
+#     "driver" : {
+#       "rules" : [ {
+#         "properties" : {
+#           "kind" : "path-problem",
+
+# "runs" : [ {
+#   "results" : [ {
+#     "ruleId" : "cpp/DangerousArithmetic",
+#     "ruleIndex" : 6,
+#     "message" : {
+#       "text" : "Potential overflow (conversion: int -> unsigned int)\nPotential overflow (con
+
+# "runs" : [ {
+#   "results" : [ {
+#     "codeFlows" : [ {
+#       "threadFlows" : [ {
+#         "locations" : [ {
+#           "location" : {
+#             "message" : {
+#               "text" : "buff"
--- a/codeql-dataflow-sql-injection-c/session.ql
+++ b/codeql-dataflow-sql-injection-c/session.ql
@@ -0,0 +1,29 @@
+import cpp
+
+// 1. invalid input -- source
+//    count = read(STDIN_FILENO, buf, BUFSIZE - 1);
+//
+// 2. gets to a sql statement -- flow
+//    flow config
+//
+// 3. drops table -- sink
+//    rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+// All predicates and classes are using one of:
+// AST Abstract syntax tree
+// CFG Control flow graph
+// DFG Data flow graph
+// Type hierarchy
+class DataSource extends VariableAccess {
+  DataSource() {
+    exists(FunctionCall read |
+      read.getTarget().getName() = "read" and
+      read.getArgument(1) = this
+    )
+  }
+}
+
+from FunctionCall read, VariableAccess buf
+where
+  read.getTarget().getName() = "read" and
+  read.getArgument(1) = buf
+select buf