From 834ef46858acb118b06b7540445ec80124e7f13f Mon Sep 17 00:00:00 2001 From: Michael Hohn Date: Tue, 4 Mar 2025 19:28:11 -0800 Subject: [PATCH] reduced copy from codeql-dataflow-sql-injection --- .gitattributes | 1 + .gitignore | 3 + README.org | 229 +++++++++++++++++++++++++++++++++++ SqlInjection.ql | 55 +++++++++ add-user.c | 100 +++++++++++++++ admin | 60 +++++++++ build.sh | 2 + codeql-c-sqli.code-workspace | 11 ++ qlpack.yml | 4 + 9 files changed, 465 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 README.org create mode 100644 SqlInjection.ql create mode 100644 add-user.c create mode 100755 admin create mode 100755 build.sh create mode 100644 codeql-c-sqli.code-workspace create mode 100644 qlpack.yml diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..ea544b2 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.psd filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9e75f35 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +add-user + +users.sqlite diff --git a/README.org b/README.org new file mode 100644 index 0000000..264acb1 --- /dev/null +++ b/README.org @@ -0,0 +1,229 @@ + +[[https://imgs.xkcd.com/comics/exploits_of_a_mom.png]] + +(from https://xkcd.com/327/) + + +* SQL injection example +** Setup and sample run + #+BEGIN_SRC sh + # Use a simple headline prompt + PS1=' + \033[32m---- SQL injection demo ----\[\033[33m\033[0m\] + $?:$ ' + + + # Build + ./build.sh + + # Prepare db + ./admin -r + ./admin -c + ./admin -s + + # Add regular user interactively + ./add-user 2>> users.log + First User + + + # Regular user via "external" process + echo "User Outside" | ./add-user 2>> users.log + + # Check + ./admin -s + + # Add Johnny Droptable + ./add-user 2>> users.log + Johnny'); DROP TABLE users; -- + + # And the problem: + ./admin -s + + # Check the log + tail users.log + #+END_SRC + +** Identify the problem + =./add-user= is reading from =STDIN=, and writing to a database; looking at the code in + [[./add-user.c]] leads to + : count = read(STDIN_FILENO, buf, BUFSIZE - 1); + for the read and + : rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg); + for the write. + + This problem is thus a dataflow problem; in codeql terminology we have + - a /source/ at the =read(STDIN_FILENO, buf, BUFSIZE - 1);= + - a /sink/ at the =sqlite3_exec(db, query, NULL, 0, &zErrMsg);= + + We write codeql to identify these two, and then connect them via + - a /dataflow configuration/ -- for this problem, the more general /taintflow + configuration/. + +** Build codeql database + To get started, build the codeql database (adjust paths to your setup): + #+BEGIN_SRC sh + # Build the db with source commit id. + # export PATH=$HOME/local/vmsync/codeql250:"$PATH" + SRCDIR=$(pwd) + DB=$SRCDIR/cpp-sqli-$(cd $SRCDIR && git rev-parse --short HEAD) + + echo $DB + test -d "$DB" && rm -fR "$DB" + mkdir -p "$DB" + + cd $SRCDIR && codeql database create --language=cpp -s . -j 8 -v $DB --command='./build.sh' + #+END_SRC + + Then add this database directory to your VS Code =DATABASES= tab. + + +** Build codeql database in steps + For larger projects, using a single command to build everything is costly when + any part of the build fails. + + To build a database in steps, use the following sequence, adjusting paths to + your setup: + #+BEGIN_SRC sh + # Build the db with source commit id. + export PATH=$HOME/local/vmsync/codeql250:"$PATH" + SRCDIR=$HOME/local/codeql-training-material.cpp-sqli/cpp/codeql-dataflow-sql-injection + DB=$SRCDIR/cpp-sqli-$(cd $SRCDIR && git rev-parse --short HEAD) + + # Check paths + echo $DB + echo $SRCDIR + + # Prepare db directory + test -d "$DB" && rm -fR "$DB" + mkdir -p "$DB" + + # Run the build + cd $SRCDIR + codeql database init --language=cpp -s . -v $DB + # Repeat trace-command as needed to cover all targets + codeql database trace-command -v $DB -- make + codeql database finalize -j4 $DB + #+END_SRC + + Then add this database directory to your VS Code =DATABASES= tab. + +** Develop the query bottom-up + 1. Identify the /source/ part of the + : read(STDIN_FILENO, buf, BUFSIZE - 1); + expression, the =buf= argument. + Start from a =from..where..select=, then convert to a predicate. + + 2. Identify the /sink/ part of the + : sqlite3_exec(db, query, NULL, 0, &zErrMsg); + expression, the =query= argument. Again start from =from..where..select=, + then convert to a predicate. + + 3. Fill in the /taintflow configuration/ boilerplate + #+BEGIN_SRC java + class CppSqli extends TaintTracking::Configuration { + CppSqli() { this = "CppSqli" } + + override predicate isSource(DataFlow::Node node) { + none() + } + + override predicate isSink(DataFlow::Node node) { + none() + } + } + #+END_SRC + + Note that an inout-argument in C/C++ (the =buf= pointer is passed to =read= + and points to updated data after the return) is accessed as a codeql source + via + : source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() + instead of the usual + : source.asExpr() + + The final query (without =isAdditionalTaintStep=) is + #+BEGIN_SRC java + /** + ,* @name SQLI Vulnerability + ,* @description Using untrusted strings in a sql query allows sql injection attacks. + ,* @kind path-problem + ,* @id cpp/SQLIVulnerable + ,* @problem.severity warning + ,*/ + + import cpp + import semmle.code.cpp.dataflow.TaintTracking + import DataFlow::PathGraph + + class SqliFlowConfig extends TaintTracking::Configuration { + SqliFlowConfig() { this = "SqliFlow" } + + override predicate isSource(DataFlow::Node source) { + // count = read(STDIN_FILENO, buf, BUFSIZE); + exists(FunctionCall read | + read.getTarget().getName() = "read" and + read.getArgument(1) = source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() + ) + } + + override predicate isSink(DataFlow::Node sink) { + // rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg); + exists(FunctionCall exec | + exec.getTarget().getName() = "sqlite3_exec" and + exec.getArgument(1) = sink.asExpr() + ) + } + } + + from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink + where conf.hasFlowPath(source, sink) + select sink, source, sink, "Possible SQL injection" + #+END_SRC + +** Optional: sarif file review of the results + Query results are available in several output formats using the cli. The + following produces the sarif format, a json-based result description. + + #+BEGIN_SRC sh + # The setup information from before + export PATH=$HOME/local/vmsync/codeql250:"$PATH" + SRCDIR=$HOME/local/codeql-training-material.cpp-sqli/cpp/codeql-dataflow-sql-injection + DB=$SRCDIR/cpp-sqli-$(cd $SRCDIR && git rev-parse --short HEAD) + + # Check paths + echo $DB + echo $SRCDIR + + # To see the help + codeql database analyze -h + + # Run a query + codeql database analyze \ + -v \ + --ram=14000 \ + -j12 \ + --rerun \ + --search-path ~/local/vmsync/ql \ + --format=sarif-latest \ + --output cpp-sqli.sarif \ + -- \ + $DB \ + $SRCDIR/SqlInjection.ql + + # Examine the file in an editor + edit cpp-sqli.sarif + #+END_SRC + + An example of using the sarif data is in the the jq script [[./sarif-summary.jq]]. + When run against the sarif input via + #+BEGIN_SRC sh + jq --raw-output --join-output -f sarif-summary.jq < cpp-sqli.sarif > cpp-sqli.txt + #+END_SRC + it produces output in a form close to that of compiler error messages: + #+BEGIN_SRC text + query-id: message line + Path + ... + Path + ... + #+END_SRC + diff --git a/SqlInjection.ql b/SqlInjection.ql new file mode 100644 index 0000000..47e8794 --- /dev/null +++ b/SqlInjection.ql @@ -0,0 +1,55 @@ +/** +* @name SQLI Vulnerability +* @description Using untrusted strings in a sql query allows sql injection attacks. +* @kind path-problem +* @id cpp/sqlivulnerable +* @problem.severity warning +*/ + +import cpp +import semmle.code.cpp.dataflow.new.TaintTracking + +module SqliFlowConfig implements DataFlow::ConfigSig { + + predicate isSource(DataFlow::Node source) { + // count = read(STDIN_FILENO, buf, BUFSIZE); + exists(FunctionCall read | + read.getTarget().getName() = "read" and + read.getArgument(1) = source.(DataFlow::PostUpdateNode).getPreUpdateNode().asIndirectArgument() + ) + } + + predicate isBarrier(DataFlow::Node sanitizer) { none() } + + // predicate isAdditionalFlowStep(DataFlow::Node into, DataFlow::Node out) { + // // Extra taint step + // // snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info); + // // But snprintf is a macro on mac os. The actual function's name is + // // #undef snprintf + // // #define snprintf(str, len, ...) \ + // // __builtin___snprintf_chk (str, len, 0, __darwin_obsz(str), __VA_ARGS__) + // // #endif + // exists(FunctionCall printf | + // printf.getTarget().getName().matches("%snprintf%") and + // printf.getArgument(0) = out.(DataFlow::PostUpdateNode).getPreUpdateNode().asIndirectArgument() and + // // very specific: shifted index for macro. + // printf.getArgument(6) = into.asExpr() + // ) + // } + + predicate isSink(DataFlow::Node sink) { + // rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg); + exists(FunctionCall exec | + exec.getTarget().getName() = "sqlite3_exec" and + exec.getArgument(1) = sink.asIndirectArgument() + ) + } +} + +module MyFlow = TaintTracking::Global; +import MyFlow::PathGraph + +from MyFlow::PathNode source, MyFlow::PathNode sink +where MyFlow::flowPath(source, sink) +select sink, source, sink, "Possible SQL injection" + diff --git a/add-user.c b/add-user.c new file mode 100644 index 0000000..1e0a60d --- /dev/null +++ b/add-user.c @@ -0,0 +1,100 @@ +#include +#include +#include +#include +#include +#include + +void write_log(const char* fmt, ...) { + time_t t; + char tstr[26]; + va_list args; + + va_start(args, fmt); + t = time(NULL); + ctime_r(&t, tstr); + tstr[24] = 0; /* no \n */ + fprintf(stderr, "[%s] ", tstr); + vfprintf(stderr, fmt, args); + va_end(args); + fflush(stderr); +} + +void abort_on_error(int rc, sqlite3 *db) { + if( rc ) { + fprintf(stderr, "Can't open database: %s\n", sqlite3_errmsg(db)); + sqlite3_close(db); + fflush(stderr); + abort(); + } +} + +void abort_on_exec_error(int rc, sqlite3 *db, char* zErrMsg) { + if( rc!=SQLITE_OK ){ + fprintf(stderr, "SQL error: %s\n", zErrMsg); + sqlite3_free(zErrMsg); + sqlite3_close(db); + fflush(stderr); + abort(); + } +} + +char* get_user_info() { +#define BUFSIZE 1024 + char* buf = (char*) malloc(BUFSIZE * sizeof(char)); + if(buf==NULL) abort(); + int count; + // Disable buffering to avoid need for fflush + // after printf(). + setbuf( stdout, NULL ); + printf("*** Welcome to sql injection ***\n"); + printf("Please enter name: "); + count = read(STDIN_FILENO, buf, BUFSIZE - 1); + if (count <= 0) abort(); + // ensure the buffer is zero-terminated + buf[count] = '\0'; + /* strip trailing whitespace */ + while (count && isspace(buf[count-1])) { + buf[count-1] = 0; --count; + } + return buf; +} + +int get_new_id() { + int id = getpid(); + return id; +} + +void write_info(int id, char* info) { + sqlite3 *db; + int rc; + int bufsize = 1024; + char *zErrMsg = 0; + char query[bufsize]; + + /* open db */ + rc = sqlite3_open("users.sqlite", &db); + abort_on_error(rc, db); + + /* Format query */ + snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info); + write_log("query: %s\n", query); + + /* Write info */ + rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg); + abort_on_exec_error(rc, db, zErrMsg); + + sqlite3_close(db); +} + +int main(int argc, char* argv[]) { + char* info; + int id; + info = get_user_info(); + id = get_new_id(); + write_info(id, info); + free(info); + /* + * show_info(id); + */ +} diff --git a/admin b/admin new file mode 100755 index 0000000..5d3b70d --- /dev/null +++ b/admin @@ -0,0 +1,60 @@ +#!/bin/bash + +set -e + +script=$(basename "$0") + +GREEN='\033[0;32m' +MAGENTA='\033[0;95m' +NC='\033[0m' +RED='\033[0;31m' +YELLOW='\033[0;33m' + +help() { + echo -e "Usage: ./${script} [options]" \ + "\n${YELLOW}Options: ${NC}" \ + "\n\t -h ${GREEN}Show Help ${NC}" \ + "\n\t -c ${MAGENTA}Creates a users table ${NC}" \ + "\n\t -s ${MAGENTA}Shows all records in the users table ${NC}" \ + "\n\t -r ${RED}Removes users table ${NC}" +} +remove-db () { + rm users.sqlite +} + +create-db () { + echo ' + CREATE TABLE users ( + user_id INTEGER not null, + name TEXT NOT NULL + ); + ' | sqlite3 users.sqlite +} + +show-db () { + echo ' + SELECT * FROM users; + ' | sqlite3 users.sqlite +} + +if [ $# == 0 ]; then + help + exit 0 +fi + +while getopts "h?csr" option +do + case "${option}" + in + h|\?) + help + exit 0 + ;; + c) create-db + ;; + s) show-db + ;; + r) remove-db + ;; + esac +done diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..7281840 --- /dev/null +++ b/build.sh @@ -0,0 +1,2 @@ +#!/bin/bash +clang -Wall add-user.c -lsqlite3 -o add-user diff --git a/codeql-c-sqli.code-workspace b/codeql-c-sqli.code-workspace new file mode 100644 index 0000000..aa98b17 --- /dev/null +++ b/codeql-c-sqli.code-workspace @@ -0,0 +1,11 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": { + "codeQL.runningQueries.autoSave": true, + "makefile.configureOnOpen": false + } +} diff --git a/qlpack.yml b/qlpack.yml new file mode 100644 index 0000000..5a30be8 --- /dev/null +++ b/qlpack.yml @@ -0,0 +1,4 @@ +name: codeql-workshop/cpp-sql-injection +version: 0.0.1 +dependencies: + codeql/cpp-all: "*"