Update Appendix

This commit is contained in:
Michael Hohn
2020-07-22 15:06:30 -07:00
committed by =Michael Hohn
parent ab99c0fb44
commit fd27af2216

View File

@@ -439,7 +439,7 @@ or
## The CodeQL Data Flow Configuration ## The CodeQL Taint Flow Configuration
The previous queries identify our source, sink and one additional flow step. To The previous queries identify our source, sink and one additional flow step. To
use global data flow and taint tracking we need some additional codeql setup: use global data flow and taint tracking we need some additional codeql setup:
- a taint flow configuration - a taint flow configuration
@@ -532,7 +532,7 @@ where conf.hasFlowPath(source, sink)
select sink, source, sink, "Possible SQL injection" select sink, source, sink, "Possible SQL injection"
``` ```
## Tutorial: Data Flow Details ## Tutorial: Taint Flow Details
With the dataflow configuration in place, we just need to provide the details for With the dataflow configuration in place, we just need to provide the details for
source(s), sink(s), and taint step(s). source(s), sink(s), and taint step(s).
@@ -664,10 +664,9 @@ If you quick-eval this predicate, you will see that `source` is now `ref arg buf
instead of `buf`. instead of `buf`.
### The Extra Flow Step ### The isAdditionalTaintStep Predicate
xx: xx:
In the `snprintf` macro call, those have indices 0 and 4. In the underlying function In the `snprintf` macro call, those have indices 0 and 4. In the underlying function
`__builtin___snprintf_chk`, the indices are 0 and 6. Using the latter: `__builtin___snprintf_chk`, the indices are 0 and 6. Using the latter:
```ql ```ql
@@ -681,368 +680,167 @@ select printf, out, into
This correctly identifies the call and the extra flow arguments. This correctly identifies the call and the extra flow arguments.
### Complete query
xx:
Using the previous predicates
sqliSourceDemo(source)
sqliSink(sink, _)
stlBslTaintStep(n1, n2)
our full query is now
```ql
/**
* @name SQLI Vulnerability
* @description Building a sql query dynamically may lead to sql injection vulnerability
* @kind path-problem
* @id cpp/SQLIVulnerable
* @problem.severity warning
*/
import semmle.code.cpp.dataflow.TaintTracking
import semmle.code.cpp.models.implementations.Pure
import DataFlow::PathGraph
/**
* The taint tracking configuration
*/
class SqliFlowConfig extends TaintTracking::Configuration {
SqliFlowConfig() { this = "SqliFlow" }
override predicate isSource(DataFlow::Node source) {
// Use sqliSourceProduction(this, source) in that case
sqliSourceDemo(source)
}
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
stlBslTaintStep(n1, n2)
}
override predicate isSanitizer(DataFlow::Node sanitizer) { none() }
override predicate isSink(DataFlow::Node sink) { sqliSink(sink, _) }
}
/*
* The main query
*/
from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink, string label
where
// Flow path setup
conf.hasFlowPath(source, sink) and
source != sink and
if source.getNode().asExpr().(VariableAccess).getTarget().hasName(_)
then label = source.getNode().asExpr().(VariableAccess).getTarget().getName()
else label = "source"
select sink, source, sink, "Sqli flow from $@", source, label
```
## Appendix ## Appendix
This appendix has the C++ test source, the bslstrings query, and the bslstrings This appendix has the complete C source and codeql query.
library. The latter are in one file for convenience.
### Test case: simple.cc ### The complete Query: SqlInjection.ql
```c The full query is
#include <bslstl_string.h>
#include <bslstl_stringstream.h>
void executeStatement(const bsl::string &sQuery);
int checkClusterSQL(const bsl::string &sDatabase, const bsl::string &sQuery,
const bsl::string &sObjectName);
int main(int argc, char **argv) {
bsl::stringstream oSS;
// Local constants
int iUUID = 123;
bsl::string sObjectName("HELLO");
// User-supplied iLevel
int iLevel = std::stol(argv[1]);
oSS << "SELECT object_name_upper, object_value_name_upper "
<< "FROM pvfx_privilege "
<< "WHERE uuid=" << iUUID << " "
<< "AND object_name_upper=\"" << sObjectName << "\" "
<< "AND pvf_function=\"" << "sFunction" << "\" "
<< "AND pvf_level=" << iLevel;
bsl::string sQuery(oSS.str());
int iErrorCode = checkClusterSQL("pvfxdb", sQuery, sObjectName);
// a_cdb2::SqlService sqlService("default");
// sqlService.executeStatement(sQuery);
executeStatement(sQuery);
}
```
### bslstrings query and library: bslstrings.ql
The complete query is first, followed by the library components.
```ql ```ql
/** /**
* @name SQLI Vulnerability * @name SQLI Vulnerability
* @description Building a sql query dynamically may lead to sql injection vulnerability * @description Using untrusted strings in a sql query allows sql injection attacks.
* @kind path-problem * @kind path-problem
* @id cpp/SQLIVulnerable * @id cpp/SQLIVulnerable
* @problem.severity warning * @problem.severity warning
*/ */
import cpp
import semmle.code.cpp.dataflow.TaintTracking import semmle.code.cpp.dataflow.TaintTracking
import semmle.code.cpp.models.implementations.Pure
import DataFlow::PathGraph import DataFlow::PathGraph
/**
* The taint tracking configuration
*/
class SqliFlowConfig extends TaintTracking::Configuration { class SqliFlowConfig extends TaintTracking::Configuration {
SqliFlowConfig() { this = "SqliFlow" } SqliFlowConfig() { this = "SqliFlow" }
override predicate isSource(DataFlow::Node source) { override predicate isSource(DataFlow::Node source) {
// Use sqliSourceProduction(this, source) in that case // count = read(STDIN_FILENO, buf, BUFSIZE);
sqliSourceDemo(source) exists(FunctionCall read |
} read.getTarget().getName() = "read" and
read.getArgument(1) = source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) { )
stlBslTaintStep(n1, n2)
} }
override predicate isSanitizer(DataFlow::Node sanitizer) { none() } override predicate isSanitizer(DataFlow::Node sanitizer) { none() }
override predicate isSink(DataFlow::Node sink) { sqliSink(sink, _) } override predicate isAdditionalTaintStep(DataFlow::Node into, DataFlow::Node out) {
} // Extra taint step
// snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
/* // But snprintf is a macro on mac os. The actual function's name is
* The main query // #undef snprintf
*/ // #define snprintf(str, len, ...) \
// __builtin___snprintf_chk (str, len, 0, __darwin_obsz(str), __VA_ARGS__)
from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink, string label // #endif
where exists(FunctionCall printf |
// Flow path setup printf.getTarget().getName().matches("%snprintf%") and
conf.hasFlowPath(source, sink) and printf.getArgument(0) = out.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() and
source != sink and // very specific: shifted index for macro. We can generalize this to consider
if source.getNode().asExpr().(VariableAccess).getTarget().hasName(_) // all trailing arguments as sources.
then label = source.getNode().asExpr().(VariableAccess).getTarget().getName() printf.getArgument(6) = into.asExpr()
else label = "source"
select sink, source, sink, "Sqli flow from $@", source, label
// Identify the sink(s) for DataFlow
predicate sqliSink(DataFlow::Node nd, FunctionCall fc) {
fc.getTarget().getName() = "executeStatement" and
fc.getArgument(0) = nd.asExpr()
}
// Identify the source(s) for DataFlow; this version for the demonstration.
predicate sqliSourceDemo(DataFlow::Node nd) {
// variable use
nd.asExpr() instanceof VariableAccess and
nd.getLocation().getFile().getShortName() = "simple"
}
// Identify the source(s) for DataFlow; this version for full applications
predicate sqliSourceProduction(SqliFlowConfig config, DataFlow::Node source) {
// Approximates places where we concatenate a var with a string
source.asExpr() instanceof VariableAccess and
config.isAdditionalTaintStep(source, _) and
// These are the steps where we get an existing value out, so don't use as source.
not qualifierToCallStep(source, _, _) and
(
// We are reading a non-local variable (field, param, etc)
exists(Variable v |
source.asExpr().(VariableAccess).getTarget() = v and
not v instanceof LocalVariable
) )
or }
// We are reading a local, but something wrote to it since definition
exists(LocalVariable v, VariableAccess mid |
source.asExpr().(VariableAccess).getTarget() = v and
mid.getTarget() = v and
mid = v.getInitializer().getASuccessor+() and
source.asExpr() = mid.getASuccessor+()
)
)
}
/** override predicate isSink(DataFlow::Node sink) {
* Library routines. These are for more in-depth development. // rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
*/ exists(FunctionCall exec |
// argv is a Parameter and exists(DataFlow::Node n | n.asParameter() = e) holds exec.getTarget().getName() = "sqlite3_exec" and
predicate whatsThere(Element e, string info, int line, string file) { exec.getArgument(1) = sink.asExpr()
line = e.getLocation().getStartLine() and )
line = [10] and
file = e.getLocation().getFile().getShortName() and
file.matches("%simple%") and
info = e.getAQlClass() and
exists(DataFlow::Node n | n.asParameter() = e)
}
// Basic type that represents a string for our purposes
class StringLikeType extends Type {
StringLikeType() {
this.getName().matches("%string%") or
this.getName().matches("%stream%") or
this.(ReferenceType).getBaseType() instanceof StringLikeType
} }
} }
// Capture all types that may be used to form or append to a string. from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink
class AppendableToString extends Type { where conf.hasFlowPath(source, sink)
AppendableToString() { select sink, source, sink, "Possible SQL injection"
this instanceof StringLikeType or ```
this instanceof CharPointerType or
this instanceof IntegralType ### The Database Writer: add-user.c
The complete source for the sqlite database writer
```c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <ctype.h>
#include <sqlite3.h>
#include <time.h>
void write_log(const char* fmt, ...) {
time_t t;
char tstr[26];
va_list args;
va_start(args, fmt);
t = time(NULL);
ctime_r(&t, tstr);
tstr[24] = 0; /* no \n */
fprintf(stderr, "[%s] ", tstr);
vfprintf(stderr, fmt, args);
va_end(args);
fflush(stderr);
}
void abort_on_error(int rc, sqlite3 *db) {
if( rc ) {
fprintf(stderr, "Can't open database: %s\n", sqlite3_errmsg(db));
sqlite3_close(db);
fflush(stderr);
abort();
} }
} }
// Identify pure, non-member functions taking a tainted value and returning a taint, of the form void abort_on_exec_error(int rc, sqlite3 *db, char* zErrMsg) {
// val = func(arg). Avoids overlap with instance-modifying members and generic functions. if( rc!=SQLITE_OK ){
predicate pureFuncArgToCallStep(Function f) { fprintf(stderr, "SQL error: %s\n", zErrMsg);
not f.isMember() and sqlite3_free(zErrMsg);
( sqlite3_close(db);
f instanceof PureStrFunction or fflush(stderr);
f.getName() = "stol" abort();
) and }
f.getAParameter().getType().getUnspecifiedType() instanceof AppendableToString and
f.getType().getUnspecifiedType() instanceof AppendableToString
} }
predicate pureFuncArgToCallStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) { char* get_user_info() {
pureFuncArgToCallStep(fc.getTarget()) and #define BUFSIZE 1024
( char* buf = (char*) malloc(BUFSIZE * sizeof(char));
// argument taints call result. Note that pure functions don't have PostUpdateNodes int count;
n1.asExpr() = fc.getAnArgument() and // Disable buffering to avoid need for fflush
n2.asExpr() = fc // after printf().
) setbuf( stdout, NULL );
printf("*** Welcome to sql injection ***\n");
printf("Please enter name: ");
count = read(STDIN_FILENO, buf, BUFSIZE);
if (count <= 0) abort();
/* strip trailing whitespace */
while (count && isspace(buf[count-1])) {
buf[count-1] = 0; --count;
}
return buf;
} }
// Identify member functions taking a tainted value and returning a taint, of the form int get_new_id() {
// qual.func(arg). int id = getpid();
predicate argToCallStep(Function f) { return id;
f.getDeclaringType() instanceof StringLikeType and
f.getAParameter().getType().getUnspecifiedType() instanceof AppendableToString and
f.getType().getUnspecifiedType() instanceof StringLikeType
} }
predicate argToCallStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) { void write_info(int id, char* info) {
argToCallStep(fc.getTarget()) and sqlite3 *db;
( int rc;
// argument taints call result int bufsize = 1024;
n1.asExpr() = fc.getAnArgument() and char *zErrMsg = 0;
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc char query[bufsize];
or
// The argument taints the post-update node of the qualifier /* open db */
// or rc = sqlite3_open("users.sqlite", &db);
// the argument taints leftmost argument in the call chain abort_on_error(rc, db);
n1.asExpr() = fc.getArgument(0) and
exists(Expr found | /* Format query */
leftmost(fc.getQualifier(), found) and snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = found write_log("query: %s\n", query);
)
) /* Write info */
rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
abort_on_exec_error(rc, db, zErrMsg);
sqlite3_close(db);
} }
// Identify functions where a tainted qualifier taints the result. int main(int argc, char* argv[]) {
// This includes qual.str() and stream << arg (which is stream::operator<<(arg)) char* info;
// int id;
// In the FunctionCall, not here, the qualifier is `this`. Here, the declaring info = get_user_info();
// type is the later type of `this`. id = get_new_id();
// This covers chaining of methods. e.g., foo << seek(10) << "hello" will chain write_info(id, info);
predicate qualifierToCallStep(Function f) { /*
f.getDeclaringType() instanceof StringLikeType and * show_info(id);
f.getType().getUnspecifiedType() instanceof StringLikeType */
}
// Tainted qualifier taints the call's result, e.g., qual.str()
predicate qualifierToCallStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) {
qualifierToCallStep(fc.getTarget()) and
(
n1.asExpr() = fc.getQualifier() and
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc
or
// Cover cases missing the PostUpdateNode, like oSS.str()
n1.asExpr() = fc.getQualifier() and
n2.asExpr() = fc
)
}
// Find parameterized override of operator<<, typically of the form
// stream& operator<<(stream&, AppendableToString)
predicate operatorAsFunctionStep(Function f) {
not exists(f.getDeclaringType()) and
f.getName() = "operator<<" and
f.getParameter(0).getType().getUnspecifiedType() instanceof StringLikeType and
f.getParameter(1).getType().getUnspecifiedType() instanceof AppendableToString and
f.getType().getUnspecifiedType() instanceof StringLikeType
}
predicate operatorAsFunctionStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) {
operatorAsFunctionStep(fc.getTarget()) and
(
// both arguments taint result
n1.asExpr() = fc.getArgument(0) and
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc
or
n1.asExpr() = fc.getArgument(1) and
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc
or
// The second argument taints the post-update node of the first
// or
// the rightmost (second) argument also taints the leftmost argument at the
// beginning of the call chain.
// ( ( head << mid) << last)
// ^________________/
// \-left-------/
n1.asExpr() = fc.getArgument(1) and
exists(Expr found |
leftmost(fc.getArgument(0), found) and
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = found
)
)
}
// For a FunctionCall chain like (((head << n1) << n2) << last),
// find `head` starting from `last`
//
// The rightmost argument also taints the leftmost argument at the
// beginning of the call chain.
// ( ( head << mid) << last)
// ^________________/
// \-left-------/
predicate leftmost(FunctionCall fc, Expr head) {
//
operatorAsFunctionStep(fc.getTarget()) and
not fc.getArgument(0) instanceof FunctionCall and
head = fc.getArgument(0)
or
//
(argToCallStep(fc.getTarget()) or qualifierToCallStep(fc.getTarget())) and
not fc.getQualifier() instanceof FunctionCall and
head = fc.getQualifier()
or
//
leftmost(fc.getArgument(0), head)
or
leftmost(fc.getQualifier(), head)
}
// Propagate values from an array to an element access, `a` taints `a[i]`
predicate elementAccessStep(DataFlow::Node n1, DataFlow::Node n2, ArrayExpr a) {
// arr -> arr[ind]
n2.asExpr() = a and
a.getArrayBase() = n1.asExpr()
}
// All the stl and bsl taint steps in a single predicate.
predicate stlBslTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
operatorAsFunctionStep(n1, n2, _) or
qualifierToCallStep(n1, n2, _) or
argToCallStep(n1, n2, _) or
pureFuncArgToCallStep(n1, n2, _) or
elementAccessStep(n1, n2, _)
} }
``` ```