add workshop codeql-dataflow-sql-injection

2025-12-15 17:43:04 +01:00 · 2025-06-25 00:05:50 -07:00
parent ab7fba9007
commit 4f4ef1b84d
19 changed files with 1574 additions and 0 deletions
--- a/2
+++ b/2
@@ -0,0 +1,2 @@
+curl -L -O https://github.com/github/codeql-cli-binaries/releases/download/v2.22.0/codeql-osx64.zip
+unzip -q assets/codeql-osx64.zip
--- a/assets/codeql-osx64.zip
+++ b/assets/codeql-osx64.zip
--- a/codeql-dataflow-sql-injection/CodeQL-workshop-overview-only.pdf
+++ b/codeql-dataflow-sql-injection/CodeQL-workshop-overview-only.pdf
--- a/codeql-dataflow-sql-injection/README.org
+++ b/codeql-dataflow-sql-injection/README.org
@@ -0,0 +1,229 @@
+
+[[https://imgs.xkcd.com/comics/exploits_of_a_mom.png]]
+
+(from https://xkcd.com/327/)
+
+
+* SQL injection example
+** Setup and sample run
+  #+BEGIN_SRC sh
+    # Use a simple headline prompt 
+    PS1='
+    \033[32m---- SQL injection demo ----\[\033[33m\033[0m\]
+    $?:$ '
+
+    
+    # Build
+    ./build.sh
+
+    # Prepare db
+    ./admin -r
+    ./admin -c
+    ./admin -s 
+
+    # Add regular user interactively
+    ./add-user 2>> users.log
+    First User
+
+    
+    # Regular user via "external" process
+    echo "User Outside" | ./add-user 2>> users.log
+
+    # Check
+    ./admin -s
+
+    # Add Johnny Droptable 
+    ./add-user 2>> users.log
+    Johnny'); DROP TABLE users; --
+
+    # And the problem:
+    ./admin -s
+    
+    # Check the log
+    tail users.log
+  #+END_SRC
+
+** Identify the problem
+   =./add-user= is reading from =STDIN=, and writing to a database; looking at the code in
+   [[./add-user.c]] leads to
+   : count = read(STDIN_FILENO, buf, BUFSIZE - 1);
+   for the read and 
+   : rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+   for the write.
+
+   This problem is thus a dataflow problem; in codeql terminology we have
+   - a /source/ at the =read(STDIN_FILENO, buf, BUFSIZE - 1);=
+   - a /sink/ at the =sqlite3_exec(db, query, NULL, 0, &zErrMsg);=
+
+   We write codeql to identify these two, and then connect them via
+   - a /dataflow configuration/ -- for this problem, the more general /taintflow
+     configuration/. 
+   
+** Build codeql database
+   To get started, build the codeql database (adjust paths to your setup):
+   #+BEGIN_SRC sh
+     # Build the db with source commit id.
+     # export PATH=$HOME/local/vmsync/codeql250:"$PATH"
+     SRCDIR=$(pwd)
+     DB=$SRCDIR/cpp-sqli-$(cd $SRCDIR && git rev-parse --short HEAD)
+
+     echo $DB
+     test -d "$DB" && rm -fR "$DB"
+     mkdir -p "$DB"
+
+     cd $SRCDIR && codeql database create --language=cpp -s . -j 8 -v $DB --command='./build.sh'
+   #+END_SRC
+
+   Then add this database directory to your VS Code =DATABASES= tab.
+
+
+** Build codeql database in steps
+   For larger projects, using a single command to build everything is costly when
+   any part of the build fails.
+   
+   To build a database in steps, use the following sequence, adjusting paths to
+   your setup:
+   #+BEGIN_SRC sh
+     # Build the db with source commit id.
+     export PATH=$HOME/local/vmsync/codeql250:"$PATH"
+     SRCDIR=$HOME/local/codeql-training-material.cpp-sqli/cpp/codeql-dataflow-sql-injection
+     DB=$SRCDIR/cpp-sqli-$(cd $SRCDIR && git rev-parse --short HEAD)
+
+     # Check paths
+     echo $DB
+     echo $SRCDIR
+
+     # Prepare db directory
+     test -d "$DB" && rm -fR "$DB"
+     mkdir -p "$DB"
+
+     # Run the build
+     cd $SRCDIR
+     codeql database init --language=cpp -s . -v $DB
+     # Repeat trace-command as needed to cover all targets
+     codeql database trace-command -v $DB -- make 
+     codeql database finalize -j4 $DB
+   #+END_SRC
+
+   Then add this database directory to your VS Code =DATABASES= tab.
+
+** Develop the query bottom-up
+   1. Identify the /source/ part of the 
+      : read(STDIN_FILENO, buf, BUFSIZE - 1);
+      expression, the =buf= argument.  
+      Start from a =from..where..select=, then convert to a predicate.
+
+   2. Identify the /sink/ part of the
+      : sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+      expression, the =query= argument.  Again start from =from..where..select=,
+      then convert to a predicate.
+
+   3. Fill in the /taintflow configuration/ boilerplate
+      #+BEGIN_SRC java
+        class CppSqli extends TaintTracking::Configuration {
+            CppSqli() { this = "CppSqli" }
+
+            override predicate isSource(DataFlow::Node node) {
+                none()
+                    }
+
+            override predicate isSink(DataFlow::Node node) {
+                none()
+                    }
+        }
+      #+END_SRC
+
+      Note that an inout-argument in C/C++ (the =buf= pointer is passed to =read=
+      and points to updated data after the return) is accessed as a codeql source
+      via
+      : source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()
+      instead of the usual
+      : source.asExpr()
+
+   The final query (without =isAdditionalTaintStep=) is
+   #+BEGIN_SRC java
+     /**
+      ,* @name SQLI Vulnerability
+      ,* @description Using untrusted strings in a sql query allows sql injection attacks.
+      ,* @kind path-problem
+      ,* @id cpp/SQLIVulnerable
+      ,* @problem.severity warning
+      ,*/
+
+     import cpp
+     import semmle.code.cpp.dataflow.TaintTracking
+     import DataFlow::PathGraph
+
+     class SqliFlowConfig extends TaintTracking::Configuration {
+         SqliFlowConfig() { this = "SqliFlow" }
+
+         override predicate isSource(DataFlow::Node source) {
+             // count = read(STDIN_FILENO, buf, BUFSIZE);
+             exists(FunctionCall read |
+                 read.getTarget().getName() = "read" and
+                 read.getArgument(1) = source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()
+             )
+         }
+
+         override predicate isSink(DataFlow::Node sink) {
+             // rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+             exists(FunctionCall exec |
+                 exec.getTarget().getName() = "sqlite3_exec" and
+                 exec.getArgument(1) = sink.asExpr()
+             )
+         }
+     }
+
+     from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink
+     where conf.hasFlowPath(source, sink)
+     select sink, source, sink, "Possible SQL injection"
+   #+END_SRC
+
+** Optional: sarif file review of the results
+   Query results are available in several output formats using the cli.  The
+   following produces the sarif format, a json-based result description.
+
+   #+BEGIN_SRC sh
+     # The setup information from before
+     export PATH=$HOME/local/vmsync/codeql250:"$PATH"
+     SRCDIR=$HOME/local/codeql-training-material.cpp-sqli/cpp/codeql-dataflow-sql-injection
+     DB=$SRCDIR/cpp-sqli-$(cd $SRCDIR && git rev-parse --short HEAD)
+
+     # Check paths
+     echo $DB
+     echo $SRCDIR
+
+     # To see the help
+     codeql database analyze -h
+
+     # Run a query
+     codeql database analyze                         \
+            -v                                       \
+            --ram=14000                              \
+            -j12                                     \
+            --rerun                                  \
+            --search-path ~/local/vmsync/ql          \
+            --format=sarif-latest                    \
+            --output cpp-sqli.sarif                  \
+            --                                       \
+            $DB                                      \
+            $SRCDIR/SqlInjection.ql
+
+     # Examine the file in an editor
+     edit cpp-sqli.sarif
+   #+END_SRC
+
+   An example of using the sarif data is in the the jq script [[./sarif-summary.jq]].
+   When run against the sarif input via 
+   #+BEGIN_SRC sh
+     jq --raw-output --join-output  -f sarif-summary.jq < cpp-sqli.sarif > cpp-sqli.txt
+   #+END_SRC
+   it produces output in a form close to that of compiler error messages:
+   #+BEGIN_SRC text
+     query-id: message line 
+         Path
+            ...
+         Path
+            ...
+   #+END_SRC
+   
--- a/codeql-dataflow-sql-injection/SqlInjection.ql
+++ b/codeql-dataflow-sql-injection/SqlInjection.ql
@@ -0,0 +1,46 @@
+/**
+* @name SQLI Vulnerability
+* @description Using untrusted strings in a sql query allows sql injection attacks.
+* @ kind path-problem
+* @id cpp/sqlivulnerable
+* @problem.severity warning
+*/
+
+import cpp
+import semmle.code.cpp.dataflow.new.TaintTracking
+
+module SqliFlowConfig implements DataFlow::ConfigSig {
+
+    predicate isSource(DataFlow::Node source) {
+        // count = read(STDIN_FILENO, buf, BUFSIZE);
+        exists(FunctionCall read |
+            read.getTarget().getName() = "read" and
+            (
+            read.getArgument(1) = source.asDefiningArgument()
+                or
+            read.getArgument(1) = source.asExpr()
+            )
+        )
+    }
+
+    predicate isBarrier(DataFlow::Node sanitizer) { none() }
+
+    predicate isSink(DataFlow::Node sink) {
+        // rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+        exists(FunctionCall exec |
+            exec.getTarget().getName() = "sqlite3_exec" and
+            exec.getArgument(1) = sink.asIndirectArgument()
+        )
+    }
+}
+
+module MyFlow = TaintTracking::Global<SqliFlowConfig>;
+// import MyFlow::PathGraph
+
+from DataFlow::Node thing
+where SqliFlowConfig::isSource(thing)
+select thing, thing.getAQlClass()
+// from  MyFlow::PathNode source, MyFlow::PathNode sink
+// where MyFlow::flowPath(source, sink)
+// select sink, source, sink, "Possible SQL injection"
+
--- a/codeql-dataflow-sql-injection/add-user.c
+++ b/codeql-dataflow-sql-injection/add-user.c
@@ -0,0 +1,100 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <sqlite3.h>
+#include <time.h>
+
+void write_log(const char* fmt, ...) {
+    time_t t;
+    char tstr[26];
+    va_list args;
+
+    va_start(args, fmt);
+    t = time(NULL);
+    ctime_r(&t, tstr);
+    tstr[24] = 0; /* no \n */
+    fprintf(stderr, "[%s] ", tstr);
+    vfprintf(stderr, fmt, args);
+    va_end(args);
+    fflush(stderr);
+}
+
+void abort_on_error(int rc, sqlite3 *db) {
+    if( rc ) {
+        fprintf(stderr, "Can't open database: %s\n", sqlite3_errmsg(db));
+        sqlite3_close(db);
+        fflush(stderr);
+        abort();
+    }
+}
+
+void abort_on_exec_error(int rc, sqlite3 *db, char* zErrMsg) {
+    if( rc!=SQLITE_OK ){
+        fprintf(stderr, "SQL error: %s\n", zErrMsg);
+        sqlite3_free(zErrMsg);
+        sqlite3_close(db);
+        fflush(stderr);
+        abort();
+    }
+}
+    
+char* get_user_info() {
+#define BUFSIZE 1024
+    char* buf = (char*) malloc(BUFSIZE * sizeof(char));
+    if(buf==NULL) abort();
+    int count;
+    // Disable buffering to avoid need for fflush
+    // after printf().
+    setbuf( stdout, NULL );
+    printf("*** Welcome to sql injection ***\n");
+    printf("Please enter name: ");
+    count = read(STDIN_FILENO, buf, BUFSIZE - 1);
+    if (count <= 0) abort();
+    // ensure the buffer is zero-terminated
+    buf[count] = '\0';
+    /* strip trailing whitespace */
+    while (count && isspace(buf[count-1])) {
+        buf[count-1] = 0; --count;
+    }
+    return buf;
+}
+
+int get_new_id() {
+    int id = getpid();
+    return id;
+}
+
+void write_info(int id, char* info) {
+    sqlite3 *db;
+    int rc;
+    int bufsize = 1024;
+    char *zErrMsg = 0;
+    char query[bufsize];
+    
+    /* open db */
+    rc = sqlite3_open("users.sqlite", &db);
+    abort_on_error(rc, db);
+
+    /* Format query */
+    snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
+    write_log("query: %s\n", query);
+
+    /* Write info */
+    rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+    abort_on_exec_error(rc, db, zErrMsg);
+
+    sqlite3_close(db);
+}
+
+int main(int argc, char* argv[]) {
+    char* info;
+    int id;
+    info = get_user_info();
+    id = get_new_id();
+    write_info(id, info);
+    free(info);
+    /*
+     * show_info(id);
+     */
+}
--- a/codeql-dataflow-sql-injection/add-user.sh
+++ b/codeql-dataflow-sql-injection/add-user.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+get-user-info () {
+    echo "*** Welcome to sql injection ***"
+    read -r -p "Please enter name: " NAME
+}
+
+get-new-id () {
+    ID=$(/bin/bash -c 'echo $$')
+}
+
+add-user-info () {
+    echo "
+    INSERT INTO users VALUES ($ID, '$NAME')
+    " | sqlite3 users.sqlite 
+}
+
+show-user-info () {
+    echo "We have the following information for you:"
+    echo "
+    select * FROM users where user_id=$ID
+    " | sqlite3 users.sqlite 
+}
+
+get-user-info
+get-new-id
+add-user-info
+show-user-info
--- a/codeql-dataflow-sql-injection/admin
+++ b/codeql-dataflow-sql-injection/admin
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+set -e
+
+script=$(basename "$0")
+
+GREEN='\033[0;32m'
+MAGENTA='\033[0;95m'
+NC='\033[0m'
+RED='\033[0;31m'
+YELLOW='\033[0;33m'
+
+help() {
+    echo -e "Usage: ./${script} [options]" \
+         "\n${YELLOW}Options: ${NC}" \
+         "\n\t -h  ${GREEN}Show Help ${NC}" \
+         "\n\t -c  ${MAGENTA}Creates a users table ${NC}" \
+         "\n\t -s  ${MAGENTA}Shows all records in the users table ${NC}" \
+         "\n\t -r  ${RED}Removes users table ${NC}" 
+}
+remove-db () {
+    rm users.sqlite
+}
+
+create-db () {
+    echo '
+    CREATE TABLE users (
+        user_id INTEGER not null,
+        name TEXT NOT NULL
+    );
+    '  | sqlite3 users.sqlite 
+}        
+
+show-db () {
+    echo '
+    SELECT * FROM users;
+    ' | sqlite3 users.sqlite 
+}
+
+if [ $# == 0 ]; then 
+   help
+   exit 0
+fi
+
+while getopts "h?csr" option
+do
+    case "${option}"
+    in
+        h|\?)
+            help
+            exit 0
+        ;;
+        c)  create-db
+        ;;
+        s)  show-db
+        ;;
+        r)  remove-db
+        ;;
+    esac
+done
--- a/codeql-dataflow-sql-injection/build.sh
+++ b/codeql-dataflow-sql-injection/build.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+clang -Wall add-user.c -lsqlite3 -o add-user
--- a/codeql-dataflow-sql-injection/codeql-dataflow-sql-injection.code-workspace
+++ b/codeql-dataflow-sql-injection/codeql-dataflow-sql-injection.code-workspace
@@ -0,0 +1,12 @@
+{
+	"folders": [
+		{
+			"path": "."
+		}
+	],
+	"settings": {
+        "codeQL.runningQueries.autoSave": true,
+		"makefile.configureOnOpen": false,
+		"codeQL.githubDatabase.download": "never"
+	}
+}
--- a/codeql-dataflow-sql-injection/codeql-dataflow-sql-injection.md
+++ b/codeql-dataflow-sql-injection/codeql-dataflow-sql-injection.md
@@ -0,0 +1,983 @@
+<!-- -*- coding: utf-8 -*- -->
+<!-- https://gist.github.com/hohn/
+ -->
+# CodeQL Tutorial for C/C++: Data Flow and SQL Injection
+
+<!--
+ !-- xx:
+ !-- md_toc github <  codeql-dataflow-sql-injection.md 
+  -->
+
+- [CodeQL Tutorial for C/C++: Data Flow and SQL Injection](#codeql-tutorial-for-cc-data-flow-and-sql-injection)
+  - [Setup Instructions](#setup-instructions)
+  - [Documentation Links](#documentation-links)
+  - [Codeql Recap](#codeql-recap)
+    - [from, where, select](#from-where-select)
+    - [Predicates](#predicates)
+    - [Existential quantifiers (local variables in queries)](#existential-quantifiers-local-variables-in-queries)
+    - [Classes](#classes)
+  - [The Problem in Action](#the-problem-in-action)
+  - [Problem Statement](#problem-statement)
+  - [Data flow overview and illustration](#data-flow-overview-and-illustration)
+  - [Tutorial: Sources, Sinks and Flow Steps](#tutorial-sources-sinks-and-flow-steps)
+    - [The Data Sink](#the-data-sink)
+    - [The Data Source](#the-data-source)
+    - [The Extra Flow Step](#the-extra-flow-step)
+  - [The CodeQL Taint Flow Configuration](#the-codeql-taint-flow-configuration)
+    - [Taint Flow Configuration](#taint-flow-configuration)
+    - [Path Problem Setup](#path-problem-setup)
+    - [Path Problem Query Format](#path-problem-query-format)
+  - [Tutorial: Taint Flow Details](#tutorial-taint-flow-details)
+    - [The isSink Predicate](#the-issink-predicate)
+    - [The isSource Predicate](#the-issource-predicate)
+    - [The isAdditionalTaintStep Predicate](#the-isadditionaltaintstep-predicate)
+  - [Appendix](#appendix)
+    - [The complete Query: SqlInjection.ql](#the-complete-query-sqlinjectionql)
+    - [The Database Writer: add-user.c](#the-database-writer-add-userc)
+
+## Setup Instructions
+
+To run CodeQL queries on dotnet/coreclr, follow these steps:
+
+1. Install the Visual Studio Code IDE.
+2. Download and install the [CodeQL extension for Visual Studio Code](https://help.semmle.com/codeql/codeql-for-vscode.html). Full setup instructions are [here](https://help.semmle.com/codeql/codeql-for-vscode/procedures/setting-up.html).
+3. [Set up the starter workspace](https://help.semmle.com/codeql/codeql-for-vscode/procedures/setting-up.html#using-the-starter-workspace).
+    - **Important**: Don't forget to `git clone --recursive` or `git submodule update --init --remote`, so that you obtain the standard query libraries.
+4. Open the starter workspace: File > Open Workspace > Browse to `vscode-codeql-starter/vscode-codeql-starter.code-workspace`.
+
+5. Download the sample database [`codeql-dataflow-sql-injection-d5b28fb.zip`](https://drive.google.com/file/d/1eBZ69ZQx6YnnZu41iUL0m8_e9qyMCZ9B/view?usp=sharing)
+
+6. Unzip the database.
+
+7. Import the unzipped database into Visual Studio Code:
+    - Click the **CodeQL** icon in the left sidebar.
+    - Place your mouse over **Databases**, and click the + sign that appears on
+      the right. 
+    - Choose the unzipped database directory on your filesystem.
+
+8. Create a new file, name it `SqliInjection.ql`, save it under `codeql-custom-queries-cpp`.
+
+
+## Documentation Links
+If you get stuck, try searching our documentation and blog posts for help and ideas. Below are a few links to help you get started:
+- [Learning CodeQL](https://help.semmle.com/QL/learn-ql)
+- [Learning CodeQL for C/C++](https://help.semmle.com/QL/learn-ql/cpp/ql-for-cpp.html)
+- [Using the CodeQL extension for VS Code](https://help.semmle.com/codeql/codeql-for-vscode.html)
+
+## Codeql Recap
+This is a brief review of CodeQL taken from the [full
+introduction](https://git.io/JJqdS).  For more details, see the [documentation
+links](#documentation-links).  We will revisit all of this during the tutorial.
+
+### from, where, select
+Recall that codeql is a declarative language and a basic query is defined by a
+_select_ clause, which specifies what the result of the query should be. For
+example:
+
+```ql
+import cpp
+
+select "hello world"
+```
+
+More complicated queries look like this:
+```ql
+from /* ... variable declarations ... */
+where /* ... logical formulas ... */
+select /* ... expressions ... */
+```
+
+The `from` clause specifies some variables that will be used in the query. The
+`where` clause specifies some conditions on those variables in the form of logical
+formulas. The `select` clauses specifies what the results should be, and can refer
+to variables defined in the `from` clause.
+
+The `from` clause is defined as a series of variable declarations, where each
+declaration has a _type_ and a _name_. For example:
+
+```ql
+from IfStmt ifStmt
+select ifStmt
+```
+
+We are declaring a variable with the name `ifStmt` and the type `IfStmt` (from the
+CodeQL standard library for analyzing C/C++).  Variables represent a **set of
+values**, initially constrained by the type of the variable.  Here, the variable
+`ifStmt` represents the set of all `if` statements in the C/C++ program, as we can
+see if we run the query.
+
+A query using all three clauses to find empty blocks:
+```ql
+from IfStmt ifStmt, Block block
+where
+  ifStmt.getThen() = block and
+  block.getNumStmt() = 0
+select ifStmt, "Empty if statement"
+```
+
+
+### Predicates
+The other feature we will use are _predicates_. These provide a way to encapsulate
+portions of logic in the program so that they can be reused.  You can think of
+them as a mini `from`-`where`-`select` query clause. Like a select clause they
+also produce a set of "tuples" or rows in a result table.
+
+We can introduce a new predicate in our query that identifies the set of empty
+blocks in the program (for example, to reuse this feature in another query):
+
+```ql
+predicate isEmptyBlock(Block block) {
+  block.getNumStmt() = 0
+}
+
+from IfStmt ifStmt
+where isEmptyBlock(ifStmt.getThen())
+select ifStmt, "Empty if statement"
+```
+
+### Existential quantifiers (local variables in queries)
+Although the terminology may sound scary if you are not familiar with logic and
+logic programming, *existential quantifiers* are simply ways to introduce
+temporary variables with some associated conditions.  The syntax for them is:
+
+```ql
+exists(<variable declarations> | <formula>)
+```
+
+They have a similar structure to the `from` and `where` clauses, where the first
+part allows you to declare one or more variables, and the second formula
+("conditions") that can be applied to those variables.
+
+For example, we can use this to refactor the query 
+```ql
+from IfStmt ifStmt, Block block
+where
+  ifStmt.getThen() = block and
+  block.getNumStmt() = 0
+select ifStmt, "Empty if statement"
+```
+
+to use a temporary variable for the empty block:
+```ql
+from IfStmt ifStmt
+where
+  exists(Block block |
+    ifStmt.getThen() = block and
+    block.getNumStmt() = 0
+  )
+select ifStmt, "Empty if statement"
+```
+
+This is frequently used to convert a query into a predicate.
+
+### Classes
+Classes are a way in which you can define new types within CodeQL, as well as
+providing an easy way to reuse and structure code.
+
+Like all types in CodeQL, classes represent a set of values. For example, the
+`Block` type is, in fact, a class, and it represents the set of all blocks in the
+program. You can also think of a class as defining a set of logical conditions
+that specifies the set of values for that class.
+
+For example, we can define a new CodeQL class to represent empty blocks:
+```ql
+class EmptyBlock extends Block {
+  EmptyBlock() {
+    this.getNumStmt() = 0
+  }
+}
+```
+
+and use it in a query:
+```ql
+from IfStmt ifStmt, EmptyBlock block
+where ifStmt.getThen() = block
+select ifStmt, "Empty if statement"
+```
+
+## The Problem in Action
+Running the code is a great way to see the problem and check whether the code is
+vulnerable.
+
+This program can be compiled and linked, and a simple sqlite db created via 
+
+```sh
+# Build
+./build.sh
+
+# Prepare db
+./admin -r
+./admin -c 
+./admin -s
+```
+
+Users can be added via `stdin` in several ways; the second is a pretend "server"
+using the `echo` command.
+
+```sh
+# Add regular user interactively
+./add-user 2>> users.log
+First User
+
+# Regular user via "external" process
+echo "User Outside" | ./add-user 2>> users.log
+```
+
+Check the db and log:
+```
+# Check
+./admin -s
+
+tail -4 users.log 
+```
+
+Looks ok:
+```
+0:$ ./admin -s
+87797|First User
+87808|User Outside
+
+0:$ tail -4 users.log 
+[Tue Jul 21 14:15:46 2020] query: INSERT INTO users VALUES (87797, 'First User')
+[Tue Jul 21 14:17:07 2020] query: INSERT INTO users VALUES (87808, 'User Outside')
+```
+
+But there may be bad input; this one guesses the table name and drops it:
+```sh
+# Add Johnny Droptable 
+./add-user 2>> users.log
+Johnny'); DROP TABLE users; --
+```
+
+And then we have this:
+```sh
+# And the problem:
+./admin -s
+0:$ ./admin -s
+Error: near line 2: no such table: users
+```
+
+What happened?  The log shows that data was treated as command:
+```
+1:$ tail -4 users.log 
+[Tue Jul 21 14:15:46 2020] query: INSERT INTO users VALUES (87797, 'First User')
+[Tue Jul 21 14:17:07 2020] query: INSERT INTO users VALUES (87808, 'User Outside')
+[Tue Jul 21 14:18:25 2020] query: INSERT INTO users VALUES (87817, 'Johnny'); DROP TABLE users; --')
+```
+
+Looking ahead, we now *know* that there is unsafe external data (source)
+which reaches (flow path) a database-writing command (sink).  Thus, a query
+written against this code should find at least one taint flow path.
+
+## Problem Statement
+
+Many security problems can be phrased in terms of _information flow_:
+
+_Given a (problem-specific) set of sources and sinks, is there a path in the data
+flow graph from some source to some sink?_
+
+The example we look at is SQL injection: sources are user-input, sinks are SQL
+queries processing a string formed at runtime.
+
+When parts of the string can be specified by the user, they allow an attacker to
+insert arbitrary sql statements; these could erase a table or extract internal
+data etc.
+
+We will use CodeQL to analyze the source code constructing a SQL
+query using string concatenation and then executing that query
+string.  The following example uses the `sqlite3` library; it 
+- receives user-provided data from `stdin` and keeps it in `buf`
+- uses environment data and stores it in `id`,
+- runs a query in `sqlite3_exec`
+
+This is intentionally simple code, but it has all the elements that have to be
+considered in real code and illustrates the QL features. 
+
+```c
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <sqlite3.h>
+#include <time.h>
+
+void write_log(const char* fmt, ...);
+
+void abort_on_error(int rc, sqlite3 *db);
+
+void abort_on_exec_error(int rc, sqlite3 *db, char* zErrMsg);
+    
+char* get_user_info() {
+#define BUFSIZE 1024
+    char* buf = (char*) malloc(BUFSIZE * sizeof(char));
+    int count;
+    // Disable buffering to avoid need for fflush
+    // after printf().
+    setbuf( stdout, NULL );
+    printf("*** Welcome to sql injection ***\n");
+    printf("Please enter name: ");
+    count = read(STDIN_FILENO, buf, BUFSIZE);
+    if (count <= 0) abort();
+    /* strip trailing whitespace */
+    while (count && isspace(buf[count-1])) {
+        buf[count-1] = 0; --count;
+    }
+    return buf;
+}
+
+int get_new_id() {
+    int id = getpid();
+    return id;
+}
+
+void write_info(int id, char* info) {
+    sqlite3 *db;
+    int rc;
+    int bufsize = 1024;
+    char *zErrMsg = 0;
+    char query[bufsize];
+    
+    /* open db */
+    rc = sqlite3_open("users.sqlite", &db);
+    abort_on_error(rc, db);
+
+    /* Format query */
+    snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
+    write_log("query: %s\n", query);
+
+    /* Write info */
+    rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+    abort_on_exec_error(rc, db, zErrMsg);
+
+    sqlite3_close(db);
+}
+
+int main(int argc, char* argv[]) {
+    char* info;
+    int id;
+    info = get_user_info();
+    id = get_new_id();
+    write_info(id, info);
+    /*
+     * show_info(id);
+     */
+}
+
+```
+
+In terms of sources, sinks, and information flow, the concrete problem for codeql is:
+1. specifying `buf` as **source**,
+2. specifying the `query` argument to `sqlite3_exec()` as **sink**, 
+3. specifying some code-specific data flow steps for the codeql library,
+3. using the codeql taint flow library find taint flow paths (if there are any)
+   between the source and the sink.
+
+In the following, we go into more concrete detail and develop codedql scripts to
+solve this problem.
+
+
+
+## Data flow overview and illustration
+In the previous sections we identified the sources of problematic strings
+(accesses of `info` etc.), and the sink that their data may flow to (the argument
+to `sqlite3_exec`).
+
+We need to see if there is data flow between the source(s) and this sink.
+
+The solution here is to use the data flow library.  Data flow is, as the name
+suggests, about tracking the flow of data through the program. It helps answers
+questions like: does this expression ever hold a value that originates from a
+particular other place in the program?
+
+We can visualize the data flow problem as one of finding paths through a directed
+graph, where the nodes of the graph are elements in program, and the edges
+represent the flow of data between those elements. If a path exists, then the data
+flows between those two nodes.
+
+This graph represents the flow of data from the tainted parameter. The nodes of
+graph represent program elements that have a value, such as function parameters
+and expressions. The edges of this graph represent flow through these nodes.
+
+There are two variants of data flow available in CodeQL:
+ - Local (“intra-procedural”) data flow models flow within one function; feasible
+   to compute for all functions in a CodeQL database.
+ - Global (“inter-procedural”) data flow models flow across function calls; not
+   feasible to compute for all functions in a CodeQL database.
+
+While local data flow is feasible to compute for all functions in a CodeQL
+database, global data flow is not. This is because the number of paths becomes
+_exponentially_ larger for global data flow.
+
+The global data flow (and taint tracking) library avoids this problem by requiring
+that the query author specifies which _sources_ and _sinks_ are applicable. This
+allows the implementation to compute paths only between the restricted set of
+nodes, rather than for the full graph.
+
+To illustrate the dataflow for this problem, we have a [collection of slides](https://drive.google.com/file/d/1eEG0eGVDVEQh0C-0_4UIMcD23AWwnGtV/view?usp=sharing)
+for this workshop.
+
+## Tutorial: Sources, Sinks and Flow Steps
+<!--
+XX:
+ !-- The complete project can be downloaded via this 
+ !-- [drive](https://drive.google.com/file/d/1-6c3S-e4FKa_IsuuzhhXupiAwCzzPgD-/view?usp=sharing)
+ !-- link.
+  -->
+
+The tutorial is split into several steps and introduces concepts as they are
+needed.  Experimentation with the presented queries is encouraged, and the
+autocomplete suggestions (Ctrl + Space) and the jump-to-definition command (F12 in
+VS Code) are good ways explore the libraries.
+
+
+### The Data Sink
+Now let's find the function `sqlite3_exec`.  In CodeQL, this uses `Function`
+and a `getName()` attribute.
+
+```ql
+from Function f
+where f.getName() = "sqlite3_exec" 
+select f
+```
+
+This should find one result, 
+```ql
+SQLITE_API int sqlite3_exec(
+  sqlite3*,                                  /* An open database */
+  const char *sql,                           /* SQL to be evaluated */
+  int (*callback)(void*,int,char**,char**),  /* Callback function */
+  void *,                                    /* 1st argument to callback */
+  char **errmsg                              /* Error msg written here */
+);
+```
+in the header `sqlite3.h`.
+
+Next, let's find the calls to `sqlite3_exec` using the `FunctionCall` type
+```ql
+from FunctionCall exec
+where exec.getTarget().getName() = "sqlite3_exec" 
+select exec
+```
+
+This finds our call in `add-user.c`, 
+
+    rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+
+We are interested in the `query` argument, which we can get using `.getArgument`:
+```ql
+from FunctionCall exec, Expr query
+where
+    exec.getTarget().getName() = "sqlite3_exec" and
+    query = exec.getArgument(1)
+select exec, query
+```
+
+### The Data Source
+
+The external data enters through the call
+
+    count = read(STDIN_FILENO, buf, BUFSIZE);
+
+We thus want the `buf` argument to the call of the `read` function.  Together, this is 
+
+```ql
+from FunctionCall read, Expr buf
+where
+    read.getTarget().getName() = "read" and
+    buf = read.getArgument(1)
+select read, buf
+```
+
+### The Extra Flow Step
+The codeql data flow library traverses *visible* source code fairly well, but flow
+through opaque functions requires additional support (more on this later).
+Functions for which only a headers is available are opaque, and we have one of
+these here: the call to `snprintf`.  Once we locate this call, there are *two* nodes
+to identify: the inflow and outflow.
+
+Let's start with `snprintf`.  If we try
+```ql
+from FunctionCall printf
+where printf.getTarget().getName() = "snprintf"
+select printf
+```
+we get zero results.  This is puzzling; if we visit the `add-user.c` source and
+follow the definition of `snprintf`, it turns out to be a macro on MacOS:
+```c
+#undef snprintf
+#define snprintf(str, len, ...) \
+  __builtin___snprintf_chk (str, len, 0, __darwin_obsz(str), __VA_ARGS__)
+#endif
+```
+
+Fortunately, the underlying function `__builtin___snprintf_chk` has `snprintf` in
+the name.  So instead of working with C macros from codeql, we generalize our
+query using a name pattern with `.matches`:
+```ql
+from FunctionCall printf
+where printf.getTarget().getName().matches("%snprintf%")
+select printf
+```
+
+This identifies our call
+
+    snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
+    
+and we need the inflow and outflow nodes next.  `query` is the outflow, `info` is
+the inflow.
+
+In the `snprintf` macro call, those have indices 0 and 4.  In the underlying function
+`__builtin___snprintf_chk`, the indices are 0 and 6.  Using the latter:
+```ql
+from FunctionCall printf, Expr out, Expr into
+where
+    printf.getTarget().getName().matches("%snprintf%") and
+    printf.getArgument(0) = out and
+    printf.getArgument(6) = into
+select printf, out, into
+```
+
+This correctly identifies the call and the extra flow arguments.
+
+<!-- !-- Practice exercise: !-- Very specific: shifted index for macro.
+ Generalize this to consider !-- all trailing arguments as sources.  -->
+
+
+Practice exercise: If you are using linux or windows, generalize this query for
+the `snprintf` arguments found there.  One way to do this is using `or`:
+
+```ql
+printf.getTarget().getName().matches("%snprintf%") and
+(
+  // mac version
+or
+ // linux version
+or
+ // windows version
+)
+```
+
+
+
+## The CodeQL Taint Flow Configuration
+The previous queries identify our source, sink and one additional flow step.  To
+use global data flow and taint tracking we need some additional codeql setup:
+ - a taint flow configuration 
+ - the path problem header and imports
+ - a query formatted for path problems.
+
+These are done next.
+
+### Taint Flow Configuration
+The way we configure global taint flow is by creating a custom extension of the
+`TaintTracking::Configuration` class, and speciyfing `isSource`, `isSink`, and 
+`isAdditionalTaintStep` predicates.
+
+The sources and sinks were explained earlier.  Data flow and taint tracking
+configuration classes support a number of additional features that help configure
+the process of building and exploring the data flow path.
+
+One such feature is adding additional taint steps. This is useful if you use
+libraries which are not modelled by the default taint tracking. You can implement
+this by overriding `isAdditionalTaintStep` predicate. This has two parameters, the
+`from` and the `to` node, and it essentially allows you to add extra edges into the
+taint tracking or data flow graph.
+
+A starting configuration can look like the following, with details to be filled
+in.
+
+```ql
+class SqliFlowConfig extends TaintTracking::Configuration {
+    SqliFlowConfig() { this = "SqliFlow" }
+
+    override predicate isSource(DataFlow::Node source) {
+        // count = read(STDIN_FILENO, buf, BUFSIZE);
+    }
+
+    override predicate isSanitizer(DataFlow::Node sanitizer) { none() }
+
+    override predicate isAdditionalTaintStep(DataFlow::Node into, DataFlow::Node out) {
+        // Extra taint step for 
+        //     snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
+    }
+
+    override predicate isSink(DataFlow::Node sink) {
+        // rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+    }
+}
+```
+
+`TaintTracking::Configuration` is a _configuration_ class. In this case, there will be
+a single instance of the class, identified by a unique string specified in the
+characteristic predicate. We then override the `isSource` predicates to represent
+the set of possible sources in the program, and `isSink` to represent the possible
+set of sinks in the program.
+
+### Path Problem Setup
+Queries will only list sources and sinks by default.  To inspect these results and
+work with them, we also need the data paths from source to sink.  For this, the
+query needs to have the form of a _path problem_ query.
+
+This requires a modifications to the query header and an extra import: 
+ - The `@kind` comment has to be `path-problem`. This tells the CodeQL toolchain
+   to interpret the results of this query as path results. 
+ - A new import `DataFlow::PathGraph`, which will report the path data
+   alongside the query results. 
+
+Together, this looks like
+```ql
+/**
+ * @name SQLI Vulnerability
+ * @description Using untrusted strings in a sql query allows sql injection attacks.
+ * @kind path-problem
+ * @id cpp/SQLIVulnerable
+ * @problem.severity warning
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+import DataFlow::PathGraph
+```
+
+### Path Problem Query Format
+To use this new configuration and `PathGraph` support, we call the
+`hasFlowPath(source, sink)` predicate, which will compute a reachability table
+between the defined sources and sinks.  Behind the scenes, you can think of this as
+performing a graph search algorithm from sources to sinks.  The query will look
+like this:
+
+```ql
+from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink
+where conf.hasFlowPath(source, sink)
+select sink, source, sink, "Possible SQL injection"
+```
+
+## Tutorial: Taint Flow Details
+With the dataflow configuration in place, we just need to provide the details for
+source(s), sink(s), and taint step(s).
+
+Some more steps are required to convert our previous queries for use in data
+flow.  These are covered here.
+
+### The isSink Predicate
+Note that our previous queries used `Expr` nodes, but the taint query requires
+`DataFlow::Node` nodes.
+
+We have identified arguments to the call of the `sqlite3_exec` function via the
+query
+
+```ql
+from FunctionCall exec, Expr query
+where
+    exec.getTarget().getName() = "sqlite3_exec" and
+    query = exec.getArgument(1)
+select exec, query
+```
+
+First, we need to incorporate the `DataFlow::Node`.  The key to this is
+`node.asExpr()`, which yields the `node`'s expression.  Adding this we get
+
+```ql
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+
+from FunctionCall exec, Expr query, DataFlow::Node sink
+where
+    exec.getTarget().getName() = "sqlite3_exec" and
+    query = exec.getArgument(1) and
+    sink.asExpr() = query
+select exec, query, sink
+```
+
+Notice that `query` is now redundant, so this simplifies to 
+```ql
+from FunctionCall exec, DataFlow::Node sink
+where
+    exec.getTarget().getName() = "sqlite3_exec" and
+    sink.asExpr() = exec.getArgument(1) 
+select exec, sink
+```
+
+Second, we need this as a predicate of a single argument, `predicate
+isSink(DataFlow::Node sink)`.  For this we introduce the `exists()`
+[quantifier](https://help.semmle.com/QL/ql-handbook/formulas.html?highlight=exists#exists)
+to move the `FunctionCall exec` into the body of the query and remove it from the
+result:
+
+```ql
+from DataFlow::Node sink
+where
+    exists(FunctionCall exec |
+        exec.getTarget().getName() = "sqlite3_exec" and
+        sink.asExpr() = exec.getArgument(1)
+    )
+select sink
+```
+
+To turn this into a predicate, `from` contents become arguments, the `where`
+becomes the body, and the `select` is dropped:
+
+```ql
+predicate isSink(DataFlow::Node sink) {
+    // rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+    exists(FunctionCall exec |
+        exec.getTarget().getName() = "sqlite3_exec" and
+        sink.asExpr() = exec.getArgument(1)
+    )
+}
+```
+
+### The isSource Predicate
+Recall that the external data enters through the `buf` argument to the call
+
+    count = read(STDIN_FILENO, buf, BUFSIZE);
+
+and we got this via the query
+
+```ql
+from FunctionCall read, Expr buf
+where
+    read.getTarget().getName() = "read" and
+    buf = read.getArgument(1)
+select read, buf
+```
+
+As for the `isSink` predicate in the previous section, we need to convert this to
+a predicate of a single argument, `predicate isSource(DataFlow::Node source)`.
+Following the same steps, we introduce a `DataFlow::Node` and an `exists()`:
+
+```ql
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+
+from DataFlow::Node source
+where
+    exists(FunctionCall read |
+        read.getTarget().getName() = "read" and
+        read.getArgument(1) = source.asExpr()
+    )
+select source
+```
+
+There is one more adjustment needed for this to work.  The `buf` argument is both
+read by and written to by the `snprintf` function call.  Because we are specifying
+it as a *source*, the value of interest is the value *after* the call.  We get
+this value by
+[casting](https://help.semmle.com/QL/ql-handbook/expressions.html#casts) to the
+post-update node.  Instead of `source.asExpr()`, we use
+`source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()`
+
+
+Last, we incorporate this into a predicate:
+
+```ql
+predicate isSource(DataFlow::Node source) {
+    // count = read(STDIN_FILENO, buf, BUFSIZE);
+    exists(FunctionCall read |
+        read.getTarget().getName() = "read" and
+        read.getArgument(1) = source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()
+    )
+}
+```
+
+If you quick-eval this predicate, you will see that `source` is now `ref arg buf`
+instead of `buf`.
+
+
+### The isAdditionalTaintStep Predicate
+Our previous query identifies the call to `snprintf` and the extra flow arguments:
+
+```ql
+from FunctionCall printf, Expr out, Expr into
+where
+    printf.getTarget().getName().matches("%snprintf%") and
+    printf.getArgument(0) = out and
+    printf.getArgument(6) = into
+select printf, out, into
+```
+
+As for the `isSource` and `isSink` predicates, we need to
+- change from `Expr` to a `DataFlow::Node`
+- change the outflow (`out`) type to a `PostUpdateNode`
+- convert this to a predicate
+
+Put together:
+
+```ql
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+
+predicate isAdditionalTaintStep(DataFlow::Node into, DataFlow::Node out) {
+    // Extra taint step for
+    //     snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
+    exists(FunctionCall printf |
+        printf.getTarget().getName().matches("%snprintf%") and
+        printf.getArgument(0) = out.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() and
+        printf.getArgument(6) = into.asExpr()
+    )
+}
+```
+
+## Appendix
+This appendix has the complete C source and codeql query.
+
+### The complete Query: SqlInjection.ql
+The full query is
+
+```ql
+/**
+ * @name SQLI Vulnerability
+ * @description Using untrusted strings in a sql query allows sql injection attacks.
+ * @kind path-problem
+ * @id cpp/SQLIVulnerable
+ * @problem.severity warning
+ */
+
+import cpp
+import semmle.code.cpp.dataflow.TaintTracking
+import DataFlow::PathGraph
+
+class SqliFlowConfig extends TaintTracking::Configuration {
+    SqliFlowConfig() { this = "SqliFlow" }
+
+    override predicate isSource(DataFlow::Node source) {
+        // count = read(STDIN_FILENO, buf, BUFSIZE);
+        exists(FunctionCall read |
+            read.getTarget().getName() = "read" and
+            read.getArgument(1) = source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()
+        )
+    }
+
+    override predicate isSanitizer(DataFlow::Node sanitizer) { none() }
+
+    override predicate isAdditionalTaintStep(DataFlow::Node into, DataFlow::Node out) {
+        // Extra taint step
+        //     snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
+        // But snprintf is a macro on mac os.  The actual function's name is
+        //     #undef snprintf
+        //     #define snprintf(str, len, ...) \
+        //       __builtin___snprintf_chk (str, len, 0, __darwin_obsz(str), __VA_ARGS__)
+        //     #endif
+        exists(FunctionCall printf |
+            printf.getTarget().getName().matches("%snprintf%") and
+            printf.getArgument(0) = out.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() and
+            // very specific: shifted index for macro.
+            printf.getArgument(6) = into.asExpr()
+        )
+    }
+
+    override predicate isSink(DataFlow::Node sink) {
+        // rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+        exists(FunctionCall exec |
+            exec.getTarget().getName() = "sqlite3_exec" and
+            exec.getArgument(1) = sink.asExpr()
+        )
+    }
+}
+
+from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink
+where conf.hasFlowPath(source, sink)
+select sink, source, sink, "Possible SQL injection"
+```
+
+### The Database Writer: add-user.c
+The complete source for the sqlite database writer
+```c
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <sqlite3.h>
+#include <time.h>
+
+void write_log(const char* fmt, ...) {
+    time_t t;
+    char tstr[26];
+    va_list args;
+
+    va_start(args, fmt);
+    t = time(NULL);
+    ctime_r(&t, tstr);
+    tstr[24] = 0; /* no \n */
+    fprintf(stderr, "[%s] ", tstr);
+    vfprintf(stderr, fmt, args);
+    va_end(args);
+    fflush(stderr);
+}
+
+void abort_on_error(int rc, sqlite3 *db) {
+    if( rc ) {
+        fprintf(stderr, "Can't open database: %s\n", sqlite3_errmsg(db));
+        sqlite3_close(db);
+        fflush(stderr);
+        abort();
+    }
+}
+
+void abort_on_exec_error(int rc, sqlite3 *db, char* zErrMsg) {
+    if( rc!=SQLITE_OK ){
+        fprintf(stderr, "SQL error: %s\n", zErrMsg);
+        sqlite3_free(zErrMsg);
+        sqlite3_close(db);
+        fflush(stderr);
+        abort();
+    }
+}
+    
+char* get_user_info() {
+#define BUFSIZE 1024
+    char* buf = (char*) malloc(BUFSIZE * sizeof(char));
+    int count;
+    // Disable buffering to avoid need for fflush
+    // after printf().
+    setbuf( stdout, NULL );
+    printf("*** Welcome to sql injection ***\n");
+    printf("Please enter name: ");
+    count = read(STDIN_FILENO, buf, BUFSIZE);
+    if (count <= 0) abort();
+    /* strip trailing whitespace */
+    while (count && isspace(buf[count-1])) {
+        buf[count-1] = 0; --count;
+    }
+    return buf;
+}
+
+int get_new_id() {
+    int id = getpid();
+    return id;
+}
+
+void write_info(int id, char* info) {
+    sqlite3 *db;
+    int rc;
+    int bufsize = 1024;
+    char *zErrMsg = 0;
+    char query[bufsize];
+    
+    /* open db */
+    rc = sqlite3_open("users.sqlite", &db);
+    abort_on_error(rc, db);
+
+    /* Format query */
+    snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
+    write_log("query: %s\n", query);
+
+    /* Write info */
+    rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+    abort_on_exec_error(rc, db, zErrMsg);
+
+    sqlite3_close(db);
+}
+
+int main(int argc, char* argv[]) {
+    char* info;
+    int id;
+    info = get_user_info();
+    id = get_new_id();
+    write_info(id, info);
+    /*
+     * show_info(id);
+     */
+}
+    
+```
--- a/codeql-dataflow-sql-injection/codeql-pack.lock.yml
+++ b/codeql-dataflow-sql-injection/codeql-pack.lock.yml
@@ -0,0 +1,14 @@
+---
+lockVersion: 1.0.0
+dependencies:
+  codeql/cpp-all:
+    version: 0.9.1
+  codeql/dataflow:
+    version: 0.0.2
+  codeql/ssa:
+    version: 0.1.3
+  codeql/tutorial:
+    version: 0.1.3
+  codeql/util:
+    version: 0.1.3
+compiled: false
--- a/codeql-dataflow-sql-injection/cpp-sqli-3fe610d-1.zip
+++ b/codeql-dataflow-sql-injection/cpp-sqli-3fe610d-1.zip
--- a/codeql-dataflow-sql-injection/dataflow-cropped.pdf
+++ b/codeql-dataflow-sql-injection/dataflow-cropped.pdf
--- a/codeql-dataflow-sql-injection/dataflow.key
+++ b/codeql-dataflow-sql-injection/dataflow.key
--- a/codeql-dataflow-sql-injection/dataflow.pdf
+++ b/codeql-dataflow-sql-injection/dataflow.pdf
--- a/codeql-dataflow-sql-injection/qlpack.yml
+++ b/codeql-dataflow-sql-injection/qlpack.yml
@@ -0,0 +1,4 @@
+name: codeql-workshop/cpp-sql-injection
+version: 0.0.1
+dependencies:
+  codeql/cpp-all: "*"
--- a/codeql-dataflow-sql-injection/sarif-summary.jq
+++ b/codeql-dataflow-sql-injection/sarif-summary.jq
@@ -0,0 +1,60 @@
+# -*- sh -*-
+.runs | .[] | .results | .[] |
+    ( (.ruleId, ": ", 
+       (.message.text | split("\n") | ( .[0], " [", length-1 , " more]")),
+       "\n")
+      , 
+      (if (.codeFlows != null) then
+          (.codeFlows | .[] | 
+               ("    Path\n"
+                ,
+                ( .threadFlows | .[] | .locations | .[] | .location | "        "
+                  ,
+                  ( .physicalLocation | ( .artifactLocation.uri, ":", .region.startLine, ":"))
+                  ,
+                  (.message.text, " ")
+                  , 
+                  "\n"
+                )))
+          else 
+              (.locations | .[] |
+                   ( "        "
+                     ,
+                     (.physicalLocation | ( .artifactLocation.uri, ":", .region.startLine, ":"))
+                   ))
+              ,
+              # .message.text, 
+              "\n"
+              end)
+    )  | tostring 
+
+# This script extracts the following parts of the sarif output:
+# 
+# # problem
+# "runs" : [ {
+#   "results" : [ {
+#     "ruleId" : "cpp/UncheckedErrorCode",
+
+# # path problem
+# "runs" : [ {
+#   "tool" : {
+#     "driver" : {
+#       "rules" : [ {
+#         "properties" : {
+#           "kind" : "path-problem",
+
+# "runs" : [ {
+#   "results" : [ {
+#     "ruleId" : "cpp/DangerousArithmetic",
+#     "ruleIndex" : 6,
+#     "message" : {
+#       "text" : "Potential overflow (conversion: int -> unsigned int)\nPotential overflow (con
+
+# "runs" : [ {
+#   "results" : [ {
+#     "codeFlows" : [ {
+#       "threadFlows" : [ {
+#         "locations" : [ {
+#           "location" : {
+#             "message" : {
+#               "text" : "buff"
--- a/codeql-dataflow-sql-injection/session.ql
+++ b/codeql-dataflow-sql-injection/session.ql
@@ -0,0 +1,29 @@
+import cpp
+
+// 1. invalid input -- source
+//    count = read(STDIN_FILENO, buf, BUFSIZE - 1);
+//
+// 2. gets to a sql statement -- flow
+//    flow config
+//
+// 3. drops table -- sink
+//    rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
+// All predicates and classes are using one of:
+// AST Abstract syntax tree
+// CFG Control flow graph
+// DFG Data flow graph
+// Type hierarchy
+class DataSource extends VariableAccess {
+  DataSource() {
+    exists(FunctionCall read |
+      read.getTarget().getName() = "read" and
+      read.getArgument(1) = this
+    )
+  }
+}
+
+from FunctionCall read, VariableAccess buf
+where
+  read.getTarget().getName() = "read" and
+  read.getArgument(1) = buf
+select buf