mirror of
https://github.com/hohn/codeql-dataflow-sql-injection.git
synced 2025-12-16 02:03:05 +01:00
Update Appendix
This commit is contained in:
committed by
=Michael Hohn
parent
ab99c0fb44
commit
fd27af2216
@@ -439,7 +439,7 @@ or
|
||||
|
||||
|
||||
|
||||
## The CodeQL Data Flow Configuration
|
||||
## The CodeQL Taint Flow Configuration
|
||||
The previous queries identify our source, sink and one additional flow step. To
|
||||
use global data flow and taint tracking we need some additional codeql setup:
|
||||
- a taint flow configuration
|
||||
@@ -532,7 +532,7 @@ where conf.hasFlowPath(source, sink)
|
||||
select sink, source, sink, "Possible SQL injection"
|
||||
```
|
||||
|
||||
## Tutorial: Data Flow Details
|
||||
## Tutorial: Taint Flow Details
|
||||
With the dataflow configuration in place, we just need to provide the details for
|
||||
source(s), sink(s), and taint step(s).
|
||||
|
||||
@@ -664,10 +664,9 @@ If you quick-eval this predicate, you will see that `source` is now `ref arg buf
|
||||
instead of `buf`.
|
||||
|
||||
|
||||
### The Extra Flow Step
|
||||
### The isAdditionalTaintStep Predicate
|
||||
xx:
|
||||
|
||||
|
||||
In the `snprintf` macro call, those have indices 0 and 4. In the underlying function
|
||||
`__builtin___snprintf_chk`, the indices are 0 and 6. Using the latter:
|
||||
```ql
|
||||
@@ -681,368 +680,167 @@ select printf, out, into
|
||||
|
||||
This correctly identifies the call and the extra flow arguments.
|
||||
|
||||
### Complete query
|
||||
xx:
|
||||
## Appendix
|
||||
This appendix has the complete C source and codeql query.
|
||||
|
||||
Using the previous predicates
|
||||
|
||||
sqliSourceDemo(source)
|
||||
sqliSink(sink, _)
|
||||
stlBslTaintStep(n1, n2)
|
||||
|
||||
our full query is now
|
||||
### The complete Query: SqlInjection.ql
|
||||
The full query is
|
||||
|
||||
```ql
|
||||
/**
|
||||
* @name SQLI Vulnerability
|
||||
* @description Building a sql query dynamically may lead to sql injection vulnerability
|
||||
* @description Using untrusted strings in a sql query allows sql injection attacks.
|
||||
* @kind path-problem
|
||||
* @id cpp/SQLIVulnerable
|
||||
* @problem.severity warning
|
||||
*/
|
||||
|
||||
import cpp
|
||||
import semmle.code.cpp.dataflow.TaintTracking
|
||||
import semmle.code.cpp.models.implementations.Pure
|
||||
import DataFlow::PathGraph
|
||||
|
||||
/**
|
||||
* The taint tracking configuration
|
||||
*/
|
||||
class SqliFlowConfig extends TaintTracking::Configuration {
|
||||
SqliFlowConfig() { this = "SqliFlow" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
// Use sqliSourceProduction(this, source) in that case
|
||||
sqliSourceDemo(source)
|
||||
}
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
stlBslTaintStep(n1, n2)
|
||||
// count = read(STDIN_FILENO, buf, BUFSIZE);
|
||||
exists(FunctionCall read |
|
||||
read.getTarget().getName() = "read" and
|
||||
read.getArgument(1) = source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()
|
||||
)
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node sanitizer) { none() }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sqliSink(sink, _) }
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node into, DataFlow::Node out) {
|
||||
// Extra taint step
|
||||
// snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
|
||||
// But snprintf is a macro on mac os. The actual function's name is
|
||||
// #undef snprintf
|
||||
// #define snprintf(str, len, ...) \
|
||||
// __builtin___snprintf_chk (str, len, 0, __darwin_obsz(str), __VA_ARGS__)
|
||||
// #endif
|
||||
exists(FunctionCall printf |
|
||||
printf.getTarget().getName().matches("%snprintf%") and
|
||||
printf.getArgument(0) = out.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() and
|
||||
// very specific: shifted index for macro. We can generalize this to consider
|
||||
// all trailing arguments as sources.
|
||||
printf.getArgument(6) = into.asExpr()
|
||||
)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
// rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
|
||||
exists(FunctionCall exec |
|
||||
exec.getTarget().getName() = "sqlite3_exec" and
|
||||
exec.getArgument(1) = sink.asExpr()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The main query
|
||||
*/
|
||||
|
||||
from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink, string label
|
||||
where
|
||||
// Flow path setup
|
||||
conf.hasFlowPath(source, sink) and
|
||||
source != sink and
|
||||
if source.getNode().asExpr().(VariableAccess).getTarget().hasName(_)
|
||||
then label = source.getNode().asExpr().(VariableAccess).getTarget().getName()
|
||||
else label = "source"
|
||||
select sink, source, sink, "Sqli flow from $@", source, label
|
||||
|
||||
from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where conf.hasFlowPath(source, sink)
|
||||
select sink, source, sink, "Possible SQL injection"
|
||||
```
|
||||
|
||||
## Appendix
|
||||
This appendix has the C++ test source, the bslstrings query, and the bslstrings
|
||||
library. The latter are in one file for convenience.
|
||||
|
||||
### Test case: simple.cc
|
||||
### The Database Writer: add-user.c
|
||||
The complete source for the sqlite database writer
|
||||
```c
|
||||
#include <bslstl_string.h>
|
||||
#include <bslstl_stringstream.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <ctype.h>
|
||||
#include <sqlite3.h>
|
||||
#include <time.h>
|
||||
|
||||
void executeStatement(const bsl::string &sQuery);
|
||||
void write_log(const char* fmt, ...) {
|
||||
time_t t;
|
||||
char tstr[26];
|
||||
va_list args;
|
||||
|
||||
int checkClusterSQL(const bsl::string &sDatabase, const bsl::string &sQuery,
|
||||
const bsl::string &sObjectName);
|
||||
va_start(args, fmt);
|
||||
t = time(NULL);
|
||||
ctime_r(&t, tstr);
|
||||
tstr[24] = 0; /* no \n */
|
||||
fprintf(stderr, "[%s] ", tstr);
|
||||
vfprintf(stderr, fmt, args);
|
||||
va_end(args);
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
void abort_on_error(int rc, sqlite3 *db) {
|
||||
if( rc ) {
|
||||
fprintf(stderr, "Can't open database: %s\n", sqlite3_errmsg(db));
|
||||
sqlite3_close(db);
|
||||
fflush(stderr);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
bsl::stringstream oSS;
|
||||
// Local constants
|
||||
int iUUID = 123;
|
||||
bsl::string sObjectName("HELLO");
|
||||
// User-supplied iLevel
|
||||
int iLevel = std::stol(argv[1]);
|
||||
void abort_on_exec_error(int rc, sqlite3 *db, char* zErrMsg) {
|
||||
if( rc!=SQLITE_OK ){
|
||||
fprintf(stderr, "SQL error: %s\n", zErrMsg);
|
||||
sqlite3_free(zErrMsg);
|
||||
sqlite3_close(db);
|
||||
fflush(stderr);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
char* get_user_info() {
|
||||
#define BUFSIZE 1024
|
||||
char* buf = (char*) malloc(BUFSIZE * sizeof(char));
|
||||
int count;
|
||||
// Disable buffering to avoid need for fflush
|
||||
// after printf().
|
||||
setbuf( stdout, NULL );
|
||||
printf("*** Welcome to sql injection ***\n");
|
||||
printf("Please enter name: ");
|
||||
count = read(STDIN_FILENO, buf, BUFSIZE);
|
||||
if (count <= 0) abort();
|
||||
/* strip trailing whitespace */
|
||||
while (count && isspace(buf[count-1])) {
|
||||
buf[count-1] = 0; --count;
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
oSS << "SELECT object_name_upper, object_value_name_upper "
|
||||
<< "FROM pvfx_privilege "
|
||||
<< "WHERE uuid=" << iUUID << " "
|
||||
<< "AND object_name_upper=\"" << sObjectName << "\" "
|
||||
<< "AND pvf_function=\"" << "sFunction" << "\" "
|
||||
<< "AND pvf_level=" << iLevel;
|
||||
bsl::string sQuery(oSS.str());
|
||||
int iErrorCode = checkClusterSQL("pvfxdb", sQuery, sObjectName);
|
||||
int get_new_id() {
|
||||
int id = getpid();
|
||||
return id;
|
||||
}
|
||||
|
||||
// a_cdb2::SqlService sqlService("default");
|
||||
// sqlService.executeStatement(sQuery);
|
||||
executeStatement(sQuery);
|
||||
void write_info(int id, char* info) {
|
||||
sqlite3 *db;
|
||||
int rc;
|
||||
int bufsize = 1024;
|
||||
char *zErrMsg = 0;
|
||||
char query[bufsize];
|
||||
|
||||
/* open db */
|
||||
rc = sqlite3_open("users.sqlite", &db);
|
||||
abort_on_error(rc, db);
|
||||
|
||||
/* Format query */
|
||||
snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
|
||||
write_log("query: %s\n", query);
|
||||
|
||||
/* Write info */
|
||||
rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
|
||||
abort_on_exec_error(rc, db, zErrMsg);
|
||||
|
||||
sqlite3_close(db);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
char* info;
|
||||
int id;
|
||||
info = get_user_info();
|
||||
id = get_new_id();
|
||||
write_info(id, info);
|
||||
/*
|
||||
* show_info(id);
|
||||
*/
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
### bslstrings query and library: bslstrings.ql
|
||||
The complete query is first, followed by the library components.
|
||||
```ql
|
||||
/**
|
||||
* @name SQLI Vulnerability
|
||||
* @description Building a sql query dynamically may lead to sql injection vulnerability
|
||||
* @kind path-problem
|
||||
* @id cpp/SQLIVulnerable
|
||||
* @problem.severity warning
|
||||
*/
|
||||
|
||||
import semmle.code.cpp.dataflow.TaintTracking
|
||||
import semmle.code.cpp.models.implementations.Pure
|
||||
import DataFlow::PathGraph
|
||||
|
||||
/**
|
||||
* The taint tracking configuration
|
||||
*/
|
||||
class SqliFlowConfig extends TaintTracking::Configuration {
|
||||
SqliFlowConfig() { this = "SqliFlow" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
// Use sqliSourceProduction(this, source) in that case
|
||||
sqliSourceDemo(source)
|
||||
}
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
stlBslTaintStep(n1, n2)
|
||||
}
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node sanitizer) { none() }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { sqliSink(sink, _) }
|
||||
}
|
||||
|
||||
/*
|
||||
* The main query
|
||||
*/
|
||||
|
||||
from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink, string label
|
||||
where
|
||||
// Flow path setup
|
||||
conf.hasFlowPath(source, sink) and
|
||||
source != sink and
|
||||
if source.getNode().asExpr().(VariableAccess).getTarget().hasName(_)
|
||||
then label = source.getNode().asExpr().(VariableAccess).getTarget().getName()
|
||||
else label = "source"
|
||||
select sink, source, sink, "Sqli flow from $@", source, label
|
||||
|
||||
// Identify the sink(s) for DataFlow
|
||||
predicate sqliSink(DataFlow::Node nd, FunctionCall fc) {
|
||||
fc.getTarget().getName() = "executeStatement" and
|
||||
fc.getArgument(0) = nd.asExpr()
|
||||
}
|
||||
|
||||
// Identify the source(s) for DataFlow; this version for the demonstration.
|
||||
predicate sqliSourceDemo(DataFlow::Node nd) {
|
||||
// variable use
|
||||
nd.asExpr() instanceof VariableAccess and
|
||||
nd.getLocation().getFile().getShortName() = "simple"
|
||||
}
|
||||
|
||||
// Identify the source(s) for DataFlow; this version for full applications
|
||||
predicate sqliSourceProduction(SqliFlowConfig config, DataFlow::Node source) {
|
||||
// Approximates places where we concatenate a var with a string
|
||||
source.asExpr() instanceof VariableAccess and
|
||||
config.isAdditionalTaintStep(source, _) and
|
||||
// These are the steps where we get an existing value out, so don't use as source.
|
||||
not qualifierToCallStep(source, _, _) and
|
||||
(
|
||||
// We are reading a non-local variable (field, param, etc)
|
||||
exists(Variable v |
|
||||
source.asExpr().(VariableAccess).getTarget() = v and
|
||||
not v instanceof LocalVariable
|
||||
)
|
||||
or
|
||||
// We are reading a local, but something wrote to it since definition
|
||||
exists(LocalVariable v, VariableAccess mid |
|
||||
source.asExpr().(VariableAccess).getTarget() = v and
|
||||
mid.getTarget() = v and
|
||||
mid = v.getInitializer().getASuccessor+() and
|
||||
source.asExpr() = mid.getASuccessor+()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Library routines. These are for more in-depth development.
|
||||
*/
|
||||
// argv is a Parameter and exists(DataFlow::Node n | n.asParameter() = e) holds
|
||||
predicate whatsThere(Element e, string info, int line, string file) {
|
||||
line = e.getLocation().getStartLine() and
|
||||
line = [10] and
|
||||
file = e.getLocation().getFile().getShortName() and
|
||||
file.matches("%simple%") and
|
||||
info = e.getAQlClass() and
|
||||
exists(DataFlow::Node n | n.asParameter() = e)
|
||||
}
|
||||
|
||||
// Basic type that represents a string for our purposes
|
||||
class StringLikeType extends Type {
|
||||
StringLikeType() {
|
||||
this.getName().matches("%string%") or
|
||||
this.getName().matches("%stream%") or
|
||||
this.(ReferenceType).getBaseType() instanceof StringLikeType
|
||||
}
|
||||
}
|
||||
|
||||
// Capture all types that may be used to form or append to a string.
|
||||
class AppendableToString extends Type {
|
||||
AppendableToString() {
|
||||
this instanceof StringLikeType or
|
||||
this instanceof CharPointerType or
|
||||
this instanceof IntegralType
|
||||
}
|
||||
}
|
||||
|
||||
// Identify pure, non-member functions taking a tainted value and returning a taint, of the form
|
||||
// val = func(arg). Avoids overlap with instance-modifying members and generic functions.
|
||||
predicate pureFuncArgToCallStep(Function f) {
|
||||
not f.isMember() and
|
||||
(
|
||||
f instanceof PureStrFunction or
|
||||
f.getName() = "stol"
|
||||
) and
|
||||
f.getAParameter().getType().getUnspecifiedType() instanceof AppendableToString and
|
||||
f.getType().getUnspecifiedType() instanceof AppendableToString
|
||||
}
|
||||
|
||||
predicate pureFuncArgToCallStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) {
|
||||
pureFuncArgToCallStep(fc.getTarget()) and
|
||||
(
|
||||
// argument taints call result. Note that pure functions don't have PostUpdateNodes
|
||||
n1.asExpr() = fc.getAnArgument() and
|
||||
n2.asExpr() = fc
|
||||
)
|
||||
}
|
||||
|
||||
// Identify member functions taking a tainted value and returning a taint, of the form
|
||||
// qual.func(arg).
|
||||
predicate argToCallStep(Function f) {
|
||||
f.getDeclaringType() instanceof StringLikeType and
|
||||
f.getAParameter().getType().getUnspecifiedType() instanceof AppendableToString and
|
||||
f.getType().getUnspecifiedType() instanceof StringLikeType
|
||||
}
|
||||
|
||||
predicate argToCallStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) {
|
||||
argToCallStep(fc.getTarget()) and
|
||||
(
|
||||
// argument taints call result
|
||||
n1.asExpr() = fc.getAnArgument() and
|
||||
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc
|
||||
or
|
||||
// The argument taints the post-update node of the qualifier
|
||||
// or
|
||||
// the argument taints leftmost argument in the call chain
|
||||
n1.asExpr() = fc.getArgument(0) and
|
||||
exists(Expr found |
|
||||
leftmost(fc.getQualifier(), found) and
|
||||
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = found
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
// Identify functions where a tainted qualifier taints the result.
|
||||
// This includes qual.str() and stream << arg (which is stream::operator<<(arg))
|
||||
//
|
||||
// In the FunctionCall, not here, the qualifier is `this`. Here, the declaring
|
||||
// type is the later type of `this`.
|
||||
// This covers chaining of methods. e.g., foo << seek(10) << "hello" will chain
|
||||
predicate qualifierToCallStep(Function f) {
|
||||
f.getDeclaringType() instanceof StringLikeType and
|
||||
f.getType().getUnspecifiedType() instanceof StringLikeType
|
||||
}
|
||||
|
||||
// Tainted qualifier taints the call's result, e.g., qual.str()
|
||||
predicate qualifierToCallStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) {
|
||||
qualifierToCallStep(fc.getTarget()) and
|
||||
(
|
||||
n1.asExpr() = fc.getQualifier() and
|
||||
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc
|
||||
or
|
||||
// Cover cases missing the PostUpdateNode, like oSS.str()
|
||||
n1.asExpr() = fc.getQualifier() and
|
||||
n2.asExpr() = fc
|
||||
)
|
||||
}
|
||||
|
||||
// Find parameterized override of operator<<, typically of the form
|
||||
// stream& operator<<(stream&, AppendableToString)
|
||||
predicate operatorAsFunctionStep(Function f) {
|
||||
not exists(f.getDeclaringType()) and
|
||||
f.getName() = "operator<<" and
|
||||
f.getParameter(0).getType().getUnspecifiedType() instanceof StringLikeType and
|
||||
f.getParameter(1).getType().getUnspecifiedType() instanceof AppendableToString and
|
||||
f.getType().getUnspecifiedType() instanceof StringLikeType
|
||||
}
|
||||
|
||||
predicate operatorAsFunctionStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) {
|
||||
operatorAsFunctionStep(fc.getTarget()) and
|
||||
(
|
||||
// both arguments taint result
|
||||
n1.asExpr() = fc.getArgument(0) and
|
||||
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc
|
||||
or
|
||||
n1.asExpr() = fc.getArgument(1) and
|
||||
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc
|
||||
or
|
||||
// The second argument taints the post-update node of the first
|
||||
// or
|
||||
// the rightmost (second) argument also taints the leftmost argument at the
|
||||
// beginning of the call chain.
|
||||
// ( ( head << mid) << last)
|
||||
// ^________________/
|
||||
// \-left-------/
|
||||
n1.asExpr() = fc.getArgument(1) and
|
||||
exists(Expr found |
|
||||
leftmost(fc.getArgument(0), found) and
|
||||
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = found
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
// For a FunctionCall chain like (((head << n1) << n2) << last),
|
||||
// find `head` starting from `last`
|
||||
//
|
||||
// The rightmost argument also taints the leftmost argument at the
|
||||
// beginning of the call chain.
|
||||
// ( ( head << mid) << last)
|
||||
// ^________________/
|
||||
// \-left-------/
|
||||
predicate leftmost(FunctionCall fc, Expr head) {
|
||||
//
|
||||
operatorAsFunctionStep(fc.getTarget()) and
|
||||
not fc.getArgument(0) instanceof FunctionCall and
|
||||
head = fc.getArgument(0)
|
||||
or
|
||||
//
|
||||
(argToCallStep(fc.getTarget()) or qualifierToCallStep(fc.getTarget())) and
|
||||
not fc.getQualifier() instanceof FunctionCall and
|
||||
head = fc.getQualifier()
|
||||
or
|
||||
//
|
||||
leftmost(fc.getArgument(0), head)
|
||||
or
|
||||
leftmost(fc.getQualifier(), head)
|
||||
}
|
||||
|
||||
// Propagate values from an array to an element access, `a` taints `a[i]`
|
||||
predicate elementAccessStep(DataFlow::Node n1, DataFlow::Node n2, ArrayExpr a) {
|
||||
// arr -> arr[ind]
|
||||
n2.asExpr() = a and
|
||||
a.getArrayBase() = n1.asExpr()
|
||||
}
|
||||
|
||||
// All the stl and bsl taint steps in a single predicate.
|
||||
predicate stlBslTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
|
||||
operatorAsFunctionStep(n1, n2, _) or
|
||||
qualifierToCallStep(n1, n2, _) or
|
||||
argToCallStep(n1, n2, _) or
|
||||
pureFuncArgToCallStep(n1, n2, _) or
|
||||
elementAccessStep(n1, n2, _)
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
|
||||
Reference in New Issue
Block a user