mirror of
https://github.com/hohn/codeql-dataflow-sql-injection.git
synced 2025-12-16 10:13:04 +01:00
224 lines
7.0 KiB
Org Mode
224 lines
7.0 KiB
Org Mode
* SQL injection example
|
||
** Setup and sample run
|
||
#+BEGIN_SRC sh
|
||
# Use a simple headline prompt
|
||
PS1='
|
||
\033[32m---- SQL injection demo ----\[\033[33m\033[0m\]
|
||
$?:$ '
|
||
|
||
|
||
# Build
|
||
./build.sh
|
||
|
||
# Prepare db
|
||
./admin -r
|
||
./admin -c
|
||
./admin -s
|
||
|
||
# Add regular user interactively
|
||
./add-user 2>> users.log
|
||
First User
|
||
|
||
|
||
# Regular user via "external" process
|
||
echo "User Outside" | ./add-user 2>> users.log
|
||
|
||
# Check
|
||
./admin -s
|
||
|
||
# Add Johnny Droptable
|
||
./add-user 2>> users.log
|
||
Johnny'); DROP TABLE users; --
|
||
|
||
# And the problem:
|
||
./admin -s
|
||
|
||
# Check the log
|
||
tail users.log
|
||
#+END_SRC
|
||
|
||
** Identify the problem
|
||
=./add-user= is reading from =STDIN=, and writing to a database; looking at the code in
|
||
[[./add-user.c]] leads to
|
||
: count = read(STDIN_FILENO, buf, BUFSIZE - 1);
|
||
for the read and
|
||
: rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
|
||
for the write.
|
||
|
||
This problem is thus a dataflow problem; in codeql terminology we have
|
||
- a /source/ at the =read(STDIN_FILENO, buf, BUFSIZE - 1);=
|
||
- a /sink/ at the =sqlite3_exec(db, query, NULL, 0, &zErrMsg);=
|
||
|
||
We write codeql to identify these two, and then connect them via
|
||
- a /dataflow configuration/ -- for this problem, the more general /taintflow
|
||
configuration/.
|
||
|
||
** Build codeql database
|
||
To get started, build the codeql database (adjust paths to your setup):
|
||
#+BEGIN_SRC sh
|
||
# Build the db with source commit id.
|
||
export PATH=$HOME/local/vmsync/codeql250:"$PATH"
|
||
SRCDIR=$HOME/local/codeql-training-material.cpp-sqli/cpp/codeql-dataflow-sql-injection
|
||
DB=$SRCDIR/cpp-sqli-$(cd $SRCDIR && git rev-parse --short HEAD)
|
||
|
||
echo $DB
|
||
test -d "$DB" && rm -fR "$DB"
|
||
mkdir -p "$DB"
|
||
|
||
cd $SRCDIR && codeql database create --language=cpp -s . -j 8 -v $DB --command='./build.sh'
|
||
#+END_SRC
|
||
|
||
Then add this database directory to your VS Code =DATABASES= tab.
|
||
|
||
|
||
** Build codeql database in steps
|
||
For larger projects, using a single command to build everything is costly when
|
||
any part of the build fails.
|
||
|
||
To build a database in steps, use the following sequence, adjusting paths to
|
||
your setup:
|
||
#+BEGIN_SRC sh
|
||
# Build the db with source commit id.
|
||
export PATH=$HOME/local/vmsync/codeql250:"$PATH"
|
||
SRCDIR=$HOME/local/codeql-training-material.cpp-sqli/cpp/codeql-dataflow-sql-injection
|
||
DB=$SRCDIR/cpp-sqli-$(cd $SRCDIR && git rev-parse --short HEAD)
|
||
|
||
# Check paths
|
||
echo $DB
|
||
echo $SRCDIR
|
||
|
||
# Prepare db directory
|
||
test -d "$DB" && rm -fR "$DB"
|
||
mkdir -p "$DB"
|
||
|
||
# Run the build
|
||
cd $SRCDIR
|
||
codeql database init --language=cpp -s . -v $DB
|
||
# Repeat trace-command as needed to cover all targets
|
||
codeql database trace-command -v $DB -- make
|
||
codeql database finalize -j4 $DB
|
||
#+END_SRC
|
||
|
||
Then add this database directory to your VS Code =DATABASES= tab.
|
||
|
||
** Develop the query bottom-up
|
||
1. Identify the /source/ part of the
|
||
: read(STDIN_FILENO, buf, BUFSIZE - 1);
|
||
expression, the =buf= argument.
|
||
Start from a =from..where..select=, then convert to a predicate.
|
||
|
||
2. Identify the /sink/ part of the
|
||
: sqlite3_exec(db, query, NULL, 0, &zErrMsg);
|
||
expression, the =query= argument. Again start from =from..where..select=,
|
||
then convert to a predicate.
|
||
|
||
3. Fill in the /taintflow configuration/ boilerplate
|
||
#+BEGIN_SRC java
|
||
class CppSqli extends TaintTracking::Configuration {
|
||
CppSqli() { this = "CppSqli" }
|
||
|
||
override predicate isSource(DataFlow::Node node) {
|
||
none()
|
||
}
|
||
|
||
override predicate isSink(DataFlow::Node node) {
|
||
none()
|
||
}
|
||
}
|
||
#+END_SRC
|
||
|
||
Note that an inout-argument in C/C++ (the =buf= pointer is passed to =read=
|
||
and points to updated data after the return) is accessed as a codeql source
|
||
via
|
||
: source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()
|
||
instead of the usual
|
||
: source.asExpr()
|
||
|
||
The final query (without =isAdditionalTaintStep=) is
|
||
#+BEGIN_SRC java
|
||
/**
|
||
,* @name SQLI Vulnerability
|
||
,* @description Using untrusted strings in a sql query allows sql injection attacks.
|
||
,* @kind path-problem
|
||
,* @id cpp/SQLIVulnerable
|
||
,* @problem.severity warning
|
||
,*/
|
||
|
||
import cpp
|
||
import semmle.code.cpp.dataflow.TaintTracking
|
||
import DataFlow::PathGraph
|
||
|
||
class SqliFlowConfig extends TaintTracking::Configuration {
|
||
SqliFlowConfig() { this = "SqliFlow" }
|
||
|
||
override predicate isSource(DataFlow::Node source) {
|
||
// count = read(STDIN_FILENO, buf, BUFSIZE);
|
||
exists(FunctionCall read |
|
||
read.getTarget().getName() = "read" and
|
||
read.getArgument(1) = source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()
|
||
)
|
||
}
|
||
|
||
override predicate isSink(DataFlow::Node sink) {
|
||
// rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
|
||
exists(FunctionCall exec |
|
||
exec.getTarget().getName() = "sqlite3_exec" and
|
||
exec.getArgument(1) = sink.asExpr()
|
||
)
|
||
}
|
||
}
|
||
|
||
from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink
|
||
where conf.hasFlowPath(source, sink)
|
||
select sink, source, sink, "Possible SQL injection"
|
||
#+END_SRC
|
||
|
||
** Optional: sarif file review of the results
|
||
Query results are available in several output formats using the cli. The
|
||
following produces the sarif format, a json-based result description.
|
||
|
||
#+BEGIN_SRC sh
|
||
# The setup information from before
|
||
export PATH=$HOME/local/vmsync/codeql250:"$PATH"
|
||
SRCDIR=$HOME/local/codeql-training-material.cpp-sqli/cpp/codeql-dataflow-sql-injection
|
||
DB=$SRCDIR/cpp-sqli-$(cd $SRCDIR && git rev-parse --short HEAD)
|
||
|
||
# Check paths
|
||
echo $DB
|
||
echo $SRCDIR
|
||
|
||
# To see the help
|
||
codeql database analyze -h
|
||
|
||
# Run a query
|
||
codeql database analyze \
|
||
-v \
|
||
--ram=14000 \
|
||
-j12 \
|
||
--rerun \
|
||
--search-path ~/local/vmsync/ql \
|
||
--format=sarif-latest \
|
||
--output cpp-sqli.sarif \
|
||
-- \
|
||
$DB \
|
||
$SRCDIR/SqlInjection.ql
|
||
|
||
# Examine the file in an editor
|
||
edit cpp-sqli.sarif
|
||
#+END_SRC
|
||
|
||
An example of using the sarif data is in the the jq script [[./sarif-summary.jq]].
|
||
When run against the sarif input via
|
||
#+BEGIN_SRC sh
|
||
jq --raw-output --join-output -f sarif-summary.jq < cpp-sqli.sarif > cpp-sqli.txt
|
||
#+END_SRC
|
||
it produces output in a form close to that of compiler error messages:
|
||
#+BEGIN_SRC text
|
||
query-id: message line
|
||
Path
|
||
...
|
||
Path
|
||
...
|
||
#+END_SRC
|
||
|