commit ddb5e545ff2eac348f59c774209d121ee9431e60 Author: Michael Hohn Date: Wed Aug 16 14:17:31 2023 -0700 Initial clone from https://github.com/advanced-security/codeql-workshops-staging/tree/master/java/codeql-dataflow-sql-injection diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4c3f7f3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*~ +*.html diff --git a/README.org b/README.org new file mode 100644 index 0000000..d5585a1 --- /dev/null +++ b/README.org @@ -0,0 +1,7 @@ +* SQL injection example + This is an introductory codeql workshop for Java. + 1. The problem and its source code are in [[./src]] along with a full description in + [[./src/README.org]]. + 2. The developed queries are in [[./solutions]] and their tests in [[./tests]]. The + derivation of those queries is described in [[./solutions/README.org]] + diff --git a/codeql-overview-for-workshop.pdf b/codeql-overview-for-workshop.pdf new file mode 100644 index 0000000..467961c Binary files /dev/null and b/codeql-overview-for-workshop.pdf differ diff --git a/java-demo.code-workspace b/java-demo.code-workspace new file mode 100644 index 0000000..fd3b45f --- /dev/null +++ b/java-demo.code-workspace @@ -0,0 +1,10 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": { + "git.ignoreLimitWarning": true + } +} \ No newline at end of file diff --git a/solutions/README.html b/solutions/README.html new file mode 100644 index 0000000..baabc37 --- /dev/null +++ b/solutions/README.html @@ -0,0 +1,366 @@ + + + + + + + + + + + + + +
+ +
+

1. Develop the query bottom-up

+
+
    +
  1. +Identify the source part of the +

    +
    +System.console().readLine();
    +
    + +

    +expression, the buf argument. +Start from a from..where..select, then convert to a predicate. +

  2. + +
  3. +Identify the sink part of the +

    +
    +conn.createStatement().executeUpdate(query);
    +
    + +

    +expression, the query argument. Again start from from..where..select, +then convert to a predicate. +

  4. + +
  5. +Fill in the taintflow configuration boilerplate +

    +
    +
    class SqliFlowConfig extends TaintTracking::Configuration {
    +    SqliFlowConfig() { this = "SqliFlow" }
    +
    +    override predicate isSource(DataFlow::Node node) {
    +        none()
    +            }
    +
    +    override predicate isSink(DataFlow::Node node) {
    +        none()
    +            }
    +}
    +
    +
  6. +
+ +

+The final query (without isAdditionalTaintStep) is +

+
+
/**
+ * @name SQLI Vulnerability
+ * @description Using untrusted strings in a sql query allows sql injection attacks.
+ * @kind path-problem
+ * @id java/SQLIVulnerable
+ * @problem.severity warning
+ */
+
+import java
+import semmle.code.java.dataflow.TaintTracking
+import DataFlow::PathGraph
+
+class SqliFlowConfig extends TaintTracking::Configuration {
+    SqliFlowConfig() { this = "SqliFlow" }
+
+    override predicate isSource(DataFlow::Node source) {
+       // System.console().readLine();
+       exists(Call read |
+           read.getCallee().getName() = "readLine" and
+           read = source.asExpr()
+       )
+   }
+
+    override predicate isSink(DataFlow::Node sink) {
+       // conn.createStatement().executeUpdate(query);
+       exists(Call exec |
+           exec.getCallee().getName() = "executeUpdate" and
+           exec.getArgument(0) = sink.asExpr()
+       )
+   }
+}
+
+from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink
+where conf.hasFlowPath(source, sink)
+select sink, source, sink, "Possible SQL injection"
+
+
+
+
+ +
+

2. Optional: sarif file review of the results

+
+

+Query results are available in several output formats using the cli. The +following produces the sarif format, a json-based result description. +

+ +
+
# The setup information from before
+export PATH=$HOME/local/vmsync/codeql250:"$PATH"
+SRCDIR=$HOME/local/codeql-training-material.java-sqli/java/codeql-dataflow-sql-injection
+DB=$SRCDIR/java-sqli-$(cd $SRCDIR && git rev-parse --short HEAD)
+
+# Check paths
+echo $DB
+echo $SRCDIR
+
+# To see the help
+codeql database analyze -h
+
+# Run a query
+codeql database analyze                         \
+       -v                                       \
+       --ram=14000                              \
+       -j12                                     \
+       --rerun                                  \
+       --search-path ~/local/vmsync/ql          \
+       --format=sarif-latest                    \
+       --output java-sqli.sarif                 \
+       --                                       \
+       $DB                                      \
+       $SRCDIR/SqlInjection.ql
+
+# Examine the file in an editor
+edit java-sqli.sarif
+
+
+ +

+An example of using the sarif data is in the the jq script ./sarif-summary.jq. +When run against the sarif input via +

+
+
jq --raw-output --join-output  -f sarif-summary.jq < java-sqli.sarif > java-sqli.txt
+
+
+

+it produces output in a form close to that of compiler error messages: +

+
+
query-id: message line 
+    Path
+       ...
+    Path
+       ...
+
+
+
+
+
+
+

Author: Michael Hohn

+

Created: 2023-08-16 Wed 10:26

+

Validate

+
+ + diff --git a/solutions/README.org b/solutions/README.org new file mode 100644 index 0000000..dc9fd1d --- /dev/null +++ b/solutions/README.org @@ -0,0 +1,119 @@ +* Develop the query bottom-up + 1. Identify the /source/ part of the + : System.console().readLine(); + expression, the =buf= argument. + Start from a =from..where..select=, then convert to a predicate. + + 2. Identify the /sink/ part of the + : conn.createStatement().executeUpdate(query); + expression, the =query= argument. Again start from =from..where..select=, + then convert to a predicate. + + 3. Fill in the /taintflow configuration/ boilerplate + XX: update for new module approach; + See ~/local/codeql-workshop-dataflow-c/exercises/Exercise16.ql + [[file:~/local/codeql-workshop-dataflow-c/exercises/Exercise16.ql::module InputTypesToTypeValidation = DataFlow::Make;]] + or + [[file:~/local/codeql-workshop-dataflow-c/solutions/Exercise16.ql::module InputTypesToTypeValidation = DataFlow::Make;]] + + #+BEGIN_SRC java + class SqliFlowConfig extends TaintTracking::Configuration { + SqliFlowConfig() { this = "SqliFlow" } + + override predicate isSource(DataFlow::Node node) { + none() + } + + override predicate isSink(DataFlow::Node node) { + none() + } + } + #+END_SRC + + The final query (without =isAdditionalTaintStep=) is + #+BEGIN_SRC java + /** + ,* @name SQLI Vulnerability + ,* @description Using untrusted strings in a sql query allows sql injection attacks. + ,* @kind path-problem + ,* @id java/SQLIVulnerable + ,* @problem.severity warning + ,*/ + + import java + import semmle.code.java.dataflow.TaintTracking + import DataFlow::PathGraph + + class SqliFlowConfig extends TaintTracking::Configuration { + SqliFlowConfig() { this = "SqliFlow" } + + override predicate isSource(DataFlow::Node source) { + // System.console().readLine(); + exists(Call read | + read.getCallee().getName() = "readLine" and + read = source.asExpr() + ) + } + + override predicate isSink(DataFlow::Node sink) { + // conn.createStatement().executeUpdate(query); + exists(Call exec | + exec.getCallee().getName() = "executeUpdate" and + exec.getArgument(0) = sink.asExpr() + ) + } + } + + from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink + where conf.hasFlowPath(source, sink) + select sink, source, sink, "Possible SQL injection" + #+END_SRC + +* Optional: sarif file review of the results + Query results are available in several output formats using the cli. The + following produces the sarif format, a json-based result description. + + #+BEGIN_SRC sh + # The setup information from before + export PATH=$HOME/local/vmsync/codeql250:"$PATH" + SRCDIR=$HOME/local/codeql-training-material.java-sqli/java/codeql-dataflow-sql-injection + DB=$SRCDIR/java-sqli-$(cd $SRCDIR && git rev-parse --short HEAD) + + # Check paths + echo $DB + echo $SRCDIR + + # To see the help + codeql database analyze -h + + # Run a query + codeql database analyze \ + -v \ + --ram=14000 \ + -j12 \ + --rerun \ + --search-path ~/local/vmsync/ql \ + --format=sarif-latest \ + --output java-sqli.sarif \ + -- \ + $DB \ + $SRCDIR/SqlInjection.ql + + # Examine the file in an editor + edit java-sqli.sarif + #+END_SRC + + An example of using the sarif data is in the the jq script [[./sarif-summary.jq]]. + When run against the sarif input via + #+BEGIN_SRC sh + jq --raw-output --join-output -f sarif-summary.jq < java-sqli.sarif > java-sqli.txt + #+END_SRC + it produces output in a form close to that of compiler error messages: + #+BEGIN_SRC text + query-id: message line + Path + ... + Path + ... + #+END_SRC + diff --git a/solutions/qlpack.yml b/solutions/qlpack.yml new file mode 100644 index 0000000..7820d79 --- /dev/null +++ b/solutions/qlpack.yml @@ -0,0 +1,6 @@ +--- +library: false +name: codeql-workshop/java-sql-injection +version: 0.0.1 +dependencies: + codeql/java-all: "*" diff --git a/src/AddUser.java b/src/AddUser.java new file mode 100644 index 0000000..7c69b87 --- /dev/null +++ b/src/AddUser.java @@ -0,0 +1,45 @@ +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; + +public class AddUser { + public static Connection connect() { + Connection conn = null; + try { + String url = "jdbc:sqlite:users.sqlite"; + conn = DriverManager.getConnection(url); + System.out.println("Connected..."); + } catch (SQLException e) { + System.out.println(e.getMessage()); + } + return conn; + } + + static String get_user_info() { + System.out.println("Enter name:"); + return System.console().readLine(); + } + + static void write_info(int id, String info) { + try (Connection conn = connect()) { + String query = String.format("INSERT INTO users VALUES (%d, '%s')", id, info); + conn.createStatement().executeUpdate(query); + System.err.printf("Sent: %s", query); + } catch (SQLException e) { + System.out.println(e.getMessage()); + } + } + + static int get_new_id() { + return (int)(Math.random()*100000); + } + + public static void main(String[] args) { + String info; + int id; + + info = get_user_info(); + id = get_new_id(); + write_info(id, info); + } +} diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..48fcaf6 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,26 @@ +add-user: add-user.c + clang -Wall add-user.c -lsqlite3 -o add-user + +clean: + rm -f README.html add-user cpp-sqli.sarif cpp-sqli.txt users.log + rm -f users.sqlite *.bak *~ cpp-sqli-demo.zip + +ZIPLIST := \ + Makefile \ + README.org \ + SqlInjection.ql \ + add-user.c \ + add-user.sh \ + admin \ + build.sh \ + codeql-dataflow-sql-injection.md \ + codeql-overview-for-workshop.pdf \ + cpp-sqli.code-workspace \ + dataflow-cropped.pdf \ + qlpack.yml \ + sarif-summary.jq \ + session.ql + +demo-zip: + rm -f cpp-sqli-demo.zip + zip cpp-sqli-demo.zip $(ZIPLIST) diff --git a/src/README.org b/src/README.org new file mode 100644 index 0000000..f0ae073 --- /dev/null +++ b/src/README.org @@ -0,0 +1,218 @@ +* SQL injection example +** Setup and sample run + The jdbc connector at https://github.com/xerial/sqlite-jdbc, from [[https://github.com/xerial/sqlite-jdbc/releases/download/3.36.0.1/sqlite-jdbc-3.36.0.1.jar][here]] is + included in the git repository. + + #+BEGIN_SRC sh + # Use a simple headline prompt + PS1=' + \033[32m---- SQL injection demo ----\[\033[33m\033[0m\] + $?:$ ' + + + # Build + ./build.sh + + # Prepare db + ./admin -r + ./admin -c + ./admin -s + + # Add regular user interactively + ./add-user 2>> users.log + First User + + # Check + ./admin -s + + # Add Johnny Droptable + ./add-user 2>> users.log + Johnny'); DROP TABLE users; -- + + # And the problem: + ./admin -s + + # Check the log + tail users.log + #+END_SRC + +** Identify the problem + =./add-user= is reading from =STDIN=, and writing to a database; looking at the code in + [[./AddUser.java]] leads to + : System.console().readLine(); + for the read and + : conn.createStatement().executeUpdate(query); + for the write. + + This problem is thus a dataflow problem; in codeql terminology we have + - a /source/ at the =System.console().readLine();= + - a /sink/ at the =conn.createStatement().executeUpdate(query);= + + We write codeql to identify these two, and then connect them via + - a /dataflow configuration/ -- for this problem, the more general /taintflow + configuration/. + +** Build codeql database + To get started, build the codeql database (adjust paths to your setup): + #+BEGIN_SRC sh + # Build the db with source commit id. + export PATH=$HOME/local/vmsync/codeql250:"$PATH" + SRCDIR=$(pwd) + DB=$SRCDIR/java-sqli-$(cd $SRCDIR && git rev-parse --short HEAD) + + echo $DB + test -d "$DB" && rm -fR "$DB" + mkdir -p "$DB" + + cd $SRCDIR && codeql database create --language=java -s . -j 8 -v $DB --command='./build.sh' + + # Check for AddUser in the db + unzip -v $DB/src.zip | grep AddUser + #+END_SRC + + Then add this database directory to your VS Code =DATABASES= tab. + + +** Build codeql database in steps + For larger projects, using a single command to build everything is costly when + any part of the build fails. + + To build a database in steps, use the following sequence, adjusting paths to + your setup: + #+BEGIN_SRC sh + # Build the db with source commit id. + export PATH=$HOME/local/vmsync/codeql250:"$PATH" + SRCDIR=$HOME/local/codeql-training-material.java-sqli/java/codeql-dataflow-sql-injection + DB=$SRCDIR/java-sqli-$(cd $SRCDIR && git rev-parse --short HEAD) + + # Check paths + echo $DB + echo $SRCDIR + + # Prepare db directory + test -d "$DB" && rm -fR "$DB" + mkdir -p "$DB" + + # Run the build + cd $SRCDIR + codeql database init --language=java -s . -v $DB + # Repeat trace-command as needed to cover all targets + codeql database trace-command -v $DB -- make + codeql database finalize -j4 $DB + #+END_SRC + + Then add this database directory to your VS Code =DATABASES= tab. + +** Develop the query bottom-up + 1. Identify the /source/ part of the + : System.console().readLine(); + expression, the =buf= argument. + Start from a =from..where..select=, then convert to a predicate. + + 2. Identify the /sink/ part of the + : conn.createStatement().executeUpdate(query); + expression, the =query= argument. Again start from =from..where..select=, + then convert to a predicate. + + 3. Fill in the /taintflow configuration/ boilerplate + #+BEGIN_SRC java + class SqliFlowConfig extends TaintTracking::Configuration { + SqliFlowConfig() { this = "SqliFlow" } + + override predicate isSource(DataFlow::Node node) { + none() + } + + override predicate isSink(DataFlow::Node node) { + none() + } + } + #+END_SRC + + The final query (without =isAdditionalTaintStep=) is + #+BEGIN_SRC java + /** + ,* @name SQLI Vulnerability + ,* @description Using untrusted strings in a sql query allows sql injection attacks. + ,* @kind path-problem + ,* @id java/SQLIVulnerable + ,* @problem.severity warning + ,*/ + + import java + import semmle.code.java.dataflow.TaintTracking + import DataFlow::PathGraph + + class SqliFlowConfig extends TaintTracking::Configuration { + SqliFlowConfig() { this = "SqliFlow" } + + override predicate isSource(DataFlow::Node source) { + // System.console().readLine(); + exists(Call read | + read.getCallee().getName() = "readLine" and + read = source.asExpr() + ) + } + + override predicate isSink(DataFlow::Node sink) { + // conn.createStatement().executeUpdate(query); + exists(Call exec | + exec.getCallee().getName() = "executeUpdate" and + exec.getArgument(0) = sink.asExpr() + ) + } + } + + from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink + where conf.hasFlowPath(source, sink) + select sink, source, sink, "Possible SQL injection" + #+END_SRC + +** Optional: sarif file review of the results + Query results are available in several output formats using the cli. The + following produces the sarif format, a json-based result description. + + #+BEGIN_SRC sh + # The setup information from before + export PATH=$HOME/local/vmsync/codeql250:"$PATH" + SRCDIR=$HOME/local/codeql-training-material.java-sqli/java/codeql-dataflow-sql-injection + DB=$SRCDIR/java-sqli-$(cd $SRCDIR && git rev-parse --short HEAD) + + # Check paths + echo $DB + echo $SRCDIR + + # To see the help + codeql database analyze -h + + # Run a query + codeql database analyze \ + -v \ + --ram=14000 \ + -j12 \ + --rerun \ + --search-path ~/local/vmsync/ql \ + --format=sarif-latest \ + --output java-sqli.sarif \ + -- \ + $DB \ + $SRCDIR/SqlInjection.ql + + # Examine the file in an editor + edit java-sqli.sarif + #+END_SRC + + An example of using the sarif data is in the the jq script [[./sarif-summary.jq]]. + When run against the sarif input via + #+BEGIN_SRC sh + jq --raw-output --join-output -f sarif-summary.jq < java-sqli.sarif > java-sqli.txt + #+END_SRC + it produces output in a form close to that of compiler error messages: + #+BEGIN_SRC text + query-id: message line + Path + ... + Path + ... + #+END_SRC + diff --git a/src/SqlInjection.ql b/src/SqlInjection.ql new file mode 100644 index 0000000..7d5fac6 --- /dev/null +++ b/src/SqlInjection.ql @@ -0,0 +1,44 @@ +/** + * @name SQLI Vulnerability + * @description Using untrusted strings in a sql query allows sql injection attacks. + * @kind path-problem + * @id cpp/SQLIVulnerable + * @problem.severity warning + */ + +import java +import semmle.code.java.dataflow.TaintTracking +import DataFlow::PathGraph + +class SqliFlowConfig extends TaintTracking::Configuration { + SqliFlowConfig() { this = "SqliFlow" } + + override predicate isSource(DataFlow::Node source) { + // System.console().readLine(); + exists(Call read | + read.getCallee().getName() = "readLine" and + read = source.asExpr() + ) + } + + override predicate isSanitizer(DataFlow::Node sanitizer) { none() } + + override predicate isAdditionalTaintStep(DataFlow::Node into, DataFlow::Node out) { + // Extra taint step + // String.format("INSERT INTO users VALUES (%d, '%s')", id, info); + // Not needed here, but may be needed for larger libraries. + none() + } + + override predicate isSink(DataFlow::Node sink) { + // conn.createStatement().executeUpdate(query); + exists(Call exec | + exec.getCallee().getName() = "executeUpdate" and + exec.getArgument(0) = sink.asExpr() + ) + } +} + +from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink +where conf.hasFlowPath(source, sink) +select sink, source, sink, "Possible SQL injection" diff --git a/src/add-user b/src/add-user new file mode 100755 index 0000000..53d5f3c --- /dev/null +++ b/src/add-user @@ -0,0 +1,3 @@ +#!/bin/sh +java -cp ".:sqlite-jdbc-3.36.0.1.jar" AddUser $@ + diff --git a/src/admin b/src/admin new file mode 100755 index 0000000..5d3b70d --- /dev/null +++ b/src/admin @@ -0,0 +1,60 @@ +#!/bin/bash + +set -e + +script=$(basename "$0") + +GREEN='\033[0;32m' +MAGENTA='\033[0;95m' +NC='\033[0m' +RED='\033[0;31m' +YELLOW='\033[0;33m' + +help() { + echo -e "Usage: ./${script} [options]" \ + "\n${YELLOW}Options: ${NC}" \ + "\n\t -h ${GREEN}Show Help ${NC}" \ + "\n\t -c ${MAGENTA}Creates a users table ${NC}" \ + "\n\t -s ${MAGENTA}Shows all records in the users table ${NC}" \ + "\n\t -r ${RED}Removes users table ${NC}" +} +remove-db () { + rm users.sqlite +} + +create-db () { + echo ' + CREATE TABLE users ( + user_id INTEGER not null, + name TEXT NOT NULL + ); + ' | sqlite3 users.sqlite +} + +show-db () { + echo ' + SELECT * FROM users; + ' | sqlite3 users.sqlite +} + +if [ $# == 0 ]; then + help + exit 0 +fi + +while getopts "h?csr" option +do + case "${option}" + in + h|\?) + help + exit 0 + ;; + c) create-db + ;; + s) show-db + ;; + r) remove-db + ;; + esac +done diff --git a/src/build.sh b/src/build.sh new file mode 100755 index 0000000..239cccb --- /dev/null +++ b/src/build.sh @@ -0,0 +1,3 @@ +#!/bin/bash +javac AddUser.java + diff --git a/src/sarif-summary.jq b/src/sarif-summary.jq new file mode 100644 index 0000000..1f60353 --- /dev/null +++ b/src/sarif-summary.jq @@ -0,0 +1,60 @@ +# -*- sh -*- +.runs | .[] | .results | .[] | + ( (.ruleId, ": ", + (.message.text | split("\n") | ( .[0], " [", length-1 , " more]")), + "\n") + , + (if (.codeFlows != null) then + (.codeFlows | .[] | + (" Path\n" + , + ( .threadFlows | .[] | .locations | .[] | .location | " " + , + ( .physicalLocation | ( .artifactLocation.uri, ":", .region.startLine, ":")) + , + (.message.text, " ") + , + "\n" + ))) + else + (.locations | .[] | + ( " " + , + (.physicalLocation | ( .artifactLocation.uri, ":", .region.startLine, ":")) + )) + , + # .message.text, + "\n" + end) + ) | tostring + +# This script extracts the following parts of the sarif output: +# +# # problem +# "runs" : [ { +# "results" : [ { +# "ruleId" : "cpp/UncheckedErrorCode", + +# # path problem +# "runs" : [ { +# "tool" : { +# "driver" : { +# "rules" : [ { +# "properties" : { +# "kind" : "path-problem", + +# "runs" : [ { +# "results" : [ { +# "ruleId" : "cpp/DangerousArithmetic", +# "ruleIndex" : 6, +# "message" : { +# "text" : "Potential overflow (conversion: int -> unsigned int)\nPotential overflow (con + +# "runs" : [ { +# "results" : [ { +# "codeFlows" : [ { +# "threadFlows" : [ { +# "locations" : [ { +# "location" : { +# "message" : { +# "text" : "buff" diff --git a/src/sqlite-jdbc-3.36.0.1.jar b/src/sqlite-jdbc-3.36.0.1.jar new file mode 100644 index 0000000..60a39eb Binary files /dev/null and b/src/sqlite-jdbc-3.36.0.1.jar differ