From dd664fe4ef6ca11eebea330c0e1548d42f3a06cf Mon Sep 17 00:00:00 2001 From: Michael Hohn Date: Wed, 8 Jun 2022 08:36:05 +0200 Subject: [PATCH] Insert updates from github.com:hohn/codeql.git --- README.org | 194 +++++++++++++++++-- add-user.c | 6 +- admin | 43 +++- codeql-dataflow-sql-injection.code-workspace | 12 +- codeql-dataflow-sql-injection.md | 20 +- qlpack.yml | 2 +- 6 files changed, 247 insertions(+), 30 deletions(-) diff --git a/README.org b/README.org index f7ddb33..f9f9a5c 100644 --- a/README.org +++ b/README.org @@ -11,9 +11,9 @@ ./build.sh # Prepare db - ./admin rm-db - ./admin create-db - ./admin show-db + ./admin -r + ./admin -c + ./admin -s # Add regular user interactively ./add-user 2>> users.log @@ -22,34 +22,202 @@ # Regular user via "external" process echo "User Outside" | ./add-user 2>> users.log - ./admin show-db # Check - ./admin show-db + ./admin -s # Add Johnny Droptable ./add-user 2>> users.log Johnny'); DROP TABLE users; -- - # And the problem: - ./admin show-db + ./admin -s # Check the log - + tail users.log #+END_SRC +** Identify the problem + =./add-user= is reading from =STDIN=, and writing to a database; looking at the code in + [[./add-user.c]] leads to + : count = read(STDIN_FILENO, buf, BUFSIZE - 1); + for the read and + : rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg); + for the write. + + This problem is thus a dataflow problem; in codeql terminology we have + - a /source/ at the =read(STDIN_FILENO, buf, BUFSIZE - 1);= + - a /sink/ at the =sqlite3_exec(db, query, NULL, 0, &zErrMsg);= + + We write codeql to identify these two, and then connect them via + - a /dataflow configuration/ -- for this problem, the more general /taintflow + configuration/. + ** Build codeql database + To get started, build the codeql database (adjust paths to your setup): #+BEGIN_SRC sh # Build the db with source commit id. - export PATH=$HOME/local/vmsync/codeql224:"$PATH" - SRCDIR=$HOME/local/codeql-dataflow-sql-injection/ - DB=$HOME/local/db/codeql-dataflow-sql-injection-$(cd $SRCDIR && git rev-parse --short HEAD) + export PATH=$HOME/local/vmsync/codeql250:"$PATH" + SRCDIR=$HOME/local/codeql-training-material.cpp-sqli/cpp/codeql-dataflow-sql-injection + DB=$SRCDIR/cpp-sqli-$(cd $SRCDIR && git rev-parse --short HEAD) echo $DB test -d "$DB" && rm -fR "$DB" mkdir -p "$DB" - cd $SRCDIR - codeql database create --language=cpp -s $SRCDIR -j 8 -v $DB --command='./build.sh' + cd $SRCDIR && codeql database create --language=cpp -s . -j 8 -v $DB --command='./build.sh' #+END_SRC + + Then add this database directory to your VS Code =DATABASES= tab. + + +** Build codeql database in steps + For larger projects, using a single command to build everything is costly when + any part of the build fails. + + To build a database in steps, use the following sequence, adjusting paths to + your setup: + #+BEGIN_SRC sh + # Build the db with source commit id. + export PATH=$HOME/local/vmsync/codeql250:"$PATH" + SRCDIR=$HOME/local/codeql-training-material.cpp-sqli/cpp/codeql-dataflow-sql-injection + DB=$SRCDIR/cpp-sqli-$(cd $SRCDIR && git rev-parse --short HEAD) + + # Check paths + echo $DB + echo $SRCDIR + + # Prepare db directory + test -d "$DB" && rm -fR "$DB" + mkdir -p "$DB" + + # Run the build + cd $SRCDIR + codeql database init --language=cpp -s . -v $DB + # Repeat trace-command as needed to cover all targets + codeql database trace-command -v $DB -- make + codeql database finalize -j4 $DB + #+END_SRC + + Then add this database directory to your VS Code =DATABASES= tab. + +** Develop the query bottom-up + 1. Identify the /source/ part of the + : read(STDIN_FILENO, buf, BUFSIZE - 1); + expression, the =buf= argument. + Start from a =from..where..select=, then convert to a predicate. + + 2. Identify the /sink/ part of the + : sqlite3_exec(db, query, NULL, 0, &zErrMsg); + expression, the =query= argument. Again start from =from..where..select=, + then convert to a predicate. + + 3. Fill in the /taintflow configuration/ boilerplate + #+BEGIN_SRC java + class CppSqli extends TaintTracking::Configuration { + CppSqli() { this = "CppSqli" } + + override predicate isSource(DataFlow::Node node) { + none() + } + + override predicate isSink(DataFlow::Node node) { + none() + } + } + #+END_SRC + + Note that an inout-argument in C/C++ (the =buf= pointer is passed to =read= + and points to updated data after the return) is accessed as a codeql source + via + : source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() + instead of the usual + : source.asExpr() + + The final query (without =isAdditionalTaintStep=) is + #+BEGIN_SRC java + /** + ,* @name SQLI Vulnerability + ,* @description Using untrusted strings in a sql query allows sql injection attacks. + ,* @kind path-problem + ,* @id cpp/SQLIVulnerable + ,* @problem.severity warning + ,*/ + + import cpp + import semmle.code.cpp.dataflow.TaintTracking + import DataFlow::PathGraph + + class SqliFlowConfig extends TaintTracking::Configuration { + SqliFlowConfig() { this = "SqliFlow" } + + override predicate isSource(DataFlow::Node source) { + // count = read(STDIN_FILENO, buf, BUFSIZE); + exists(FunctionCall read | + read.getTarget().getName() = "read" and + read.getArgument(1) = source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() + ) + } + + override predicate isSink(DataFlow::Node sink) { + // rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg); + exists(FunctionCall exec | + exec.getTarget().getName() = "sqlite3_exec" and + exec.getArgument(1) = sink.asExpr() + ) + } + } + + from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink + where conf.hasFlowPath(source, sink) + select sink, source, sink, "Possible SQL injection" + #+END_SRC + +** Optional: sarif file review of the results + Query results are available in several output formats using the cli. The + following produces the sarif format, a json-based result description. + + #+BEGIN_SRC sh + # The setup information from before + export PATH=$HOME/local/vmsync/codeql250:"$PATH" + SRCDIR=$HOME/local/codeql-training-material.cpp-sqli/cpp/codeql-dataflow-sql-injection + DB=$SRCDIR/cpp-sqli-$(cd $SRCDIR && git rev-parse --short HEAD) + + # Check paths + echo $DB + echo $SRCDIR + + # To see the help + codeql database analyze -h + + # Run a query + codeql database analyze \ + -v \ + --ram=14000 \ + -j12 \ + --rerun \ + --search-path ~/local/vmsync/ql \ + --format=sarif-latest \ + --output cpp-sqli.sarif \ + -- \ + $DB \ + $SRCDIR/SqlInjection.ql + + # Examine the file in an editor + edit cpp-sqli.sarif + #+END_SRC + + An example of using the sarif data is in the the jq script [[./sarif-summary.jq]]. + When run against the sarif input via + #+BEGIN_SRC sh + jq --raw-output --join-output -f sarif-summary.jq < cpp-sqli.sarif > cpp-sqli.txt + #+END_SRC + it produces output in a form close to that of compiler error messages: + #+BEGIN_SRC text + query-id: message line + Path + ... + Path + ... + #+END_SRC + diff --git a/add-user.c b/add-user.c index 8740813..6725356 100644 --- a/add-user.c +++ b/add-user.c @@ -42,14 +42,17 @@ void abort_on_exec_error(int rc, sqlite3 *db, char* zErrMsg) { char* get_user_info() { #define BUFSIZE 1024 char* buf = (char*) malloc(BUFSIZE * sizeof(char)); + if(buf==NULL) abort(); int count; // Disable buffering to avoid need for fflush // after printf(). setbuf( stdout, NULL ); printf("*** Welcome to sql injection ***\n"); printf("Please enter name: "); - count = read(STDIN_FILENO, buf, BUFSIZE); + count = read(STDIN_FILENO, buf, BUFSIZE - 1); if (count <= 0) abort(); + // ensure the buffer is zero-terminated + buf[count] = '\0'; /* strip trailing whitespace */ while (count && isspace(buf[count-1])) { buf[count-1] = 0; --count; @@ -90,6 +93,7 @@ int main(int argc, char* argv[]) { info = get_user_info(); id = get_new_id(); write_info(id, info); + free(info); /* * show_info(id); */ diff --git a/admin b/admin index a9b10ea..5d3b70d 100755 --- a/admin +++ b/admin @@ -1,5 +1,24 @@ #!/bin/bash -rm-db () { + +set -e + +script=$(basename "$0") + +GREEN='\033[0;32m' +MAGENTA='\033[0;95m' +NC='\033[0m' +RED='\033[0;31m' +YELLOW='\033[0;33m' + +help() { + echo -e "Usage: ./${script} [options]" \ + "\n${YELLOW}Options: ${NC}" \ + "\n\t -h ${GREEN}Show Help ${NC}" \ + "\n\t -c ${MAGENTA}Creates a users table ${NC}" \ + "\n\t -s ${MAGENTA}Shows all records in the users table ${NC}" \ + "\n\t -r ${RED}Removes users table ${NC}" +} +remove-db () { rm users.sqlite } @@ -18,4 +37,24 @@ show-db () { ' | sqlite3 users.sqlite } -eval $@ +if [ $# == 0 ]; then + help + exit 0 +fi + +while getopts "h?csr" option +do + case "${option}" + in + h|\?) + help + exit 0 + ;; + c) create-db + ;; + s) show-db + ;; + r) remove-db + ;; + esac +done diff --git a/codeql-dataflow-sql-injection.code-workspace b/codeql-dataflow-sql-injection.code-workspace index 6755577..aa1d6e9 100644 --- a/codeql-dataflow-sql-injection.code-workspace +++ b/codeql-dataflow-sql-injection.code-workspace @@ -4,8 +4,14 @@ "path": "." }, { - "name": "[codeql-dataflow-sql-injection-d5b28fb source archive]", - "uri": "codeql-zip-archive://0-66/Users/hohn/local/db/codeql-dataflow-sql-injection-d5b28fb/src.zip/" + "path": "../../../vmsync/ql" + }, + { + "name": "[cpp-sqli-89900b3 source archive]", + "uri": "codeql-zip-archive://0-110/Users/hohn/local/codeql-training-material.cpp-sqli/cpp/codeql-dataflow-sql-injection/cpp-sqli-89900b3/src.zip" } - ] + ], + "settings": { + "codeQL.runningQueries.autoSave": true + } } \ No newline at end of file diff --git a/codeql-dataflow-sql-injection.md b/codeql-dataflow-sql-injection.md index fa041d3..96c639c 100644 --- a/codeql-dataflow-sql-injection.md +++ b/codeql-dataflow-sql-injection.md @@ -65,7 +65,7 @@ If you get stuck, try searching our documentation and blog posts for help and id - [Using the CodeQL extension for VS Code](https://help.semmle.com/codeql/codeql-for-vscode.html) ## Codeql Recap -This is a brief review of codeql taken from the [full +This is a brief review of CodeQL taken from the [full introduction](https://git.io/JJqdS). For more details, see the [documentation links](#documentation-links). We will revisit all of this during the tutorial. @@ -89,7 +89,7 @@ select /* ... expressions ... */ The `from` clause specifies some variables that will be used in the query. The `where` clause specifies some conditions on those variables in the form of logical -formulas. The `select` clauses speciifes what the results should be, and can refer +formulas. The `select` clauses specifies what the results should be, and can refer to variables defined in the `from` clause. The `from` clause is defined as a series of variable declarations, where each @@ -206,9 +206,9 @@ This program can be compiled and linked, and a simple sqlite db created via ./build.sh # Prepare db -./admin rm-db -./admin create-db -./admin show-db +./admin -r +./admin -c +./admin -s ``` Users can be added via `stdin` in several ways; the second is a pretend "server" @@ -226,14 +226,14 @@ echo "User Outside" | ./add-user 2>> users.log Check the db and log: ``` # Check -./admin show-db +./admin -s tail -4 users.log ``` Looks ok: ``` -0:$ ./admin show-db +0:$ ./admin -s 87797|First User 87808|User Outside @@ -252,8 +252,8 @@ Johnny'); DROP TABLE users; -- And then we have this: ```sh # And the problem: -./admin show-db -0:$ ./admin show-db +./admin -s +0:$ ./admin -s Error: near line 2: no such table: users ``` @@ -580,7 +580,7 @@ the process of building and exploring the data flow path. One such feature is adding additional taint steps. This is useful if you use libraries which are not modelled by the default taint tracking. You can implement this by overriding `isAdditionalTaintStep` predicate. This has two parameters, the -`from` and the `to` node, and essentially allows you to add extra edges into the +`from` and the `to` node, and it essentially allows you to add extra edges into the taint tracking or data flow graph. A starting configuration can look like the following, with details to be filled diff --git a/qlpack.yml b/qlpack.yml index c1ab1ff..b7e5d10 100644 --- a/qlpack.yml +++ b/qlpack.yml @@ -1,3 +1,3 @@ -name: cpp_sql_injection +name: cpp-sql-injection version: 0.0.0 libraryPathDependencies: codeql-cpp