Update Appendix

This commit is contained in:
Michael Hohn
2020-07-22 15:06:30 -07:00
committed by =Michael Hohn
parent ab99c0fb44
commit fd27af2216

View File

@@ -439,7 +439,7 @@ or
## The CodeQL Data Flow Configuration
## The CodeQL Taint Flow Configuration
The previous queries identify our source, sink and one additional flow step. To
use global data flow and taint tracking we need some additional codeql setup:
- a taint flow configuration
@@ -532,7 +532,7 @@ where conf.hasFlowPath(source, sink)
select sink, source, sink, "Possible SQL injection"
```
## Tutorial: Data Flow Details
## Tutorial: Taint Flow Details
With the dataflow configuration in place, we just need to provide the details for
source(s), sink(s), and taint step(s).
@@ -664,10 +664,9 @@ If you quick-eval this predicate, you will see that `source` is now `ref arg buf
instead of `buf`.
### The Extra Flow Step
### The isAdditionalTaintStep Predicate
xx:
In the `snprintf` macro call, those have indices 0 and 4. In the underlying function
`__builtin___snprintf_chk`, the indices are 0 and 6. Using the latter:
```ql
@@ -681,368 +680,167 @@ select printf, out, into
This correctly identifies the call and the extra flow arguments.
### Complete query
xx:
## Appendix
This appendix has the complete C source and codeql query.
Using the previous predicates
sqliSourceDemo(source)
sqliSink(sink, _)
stlBslTaintStep(n1, n2)
our full query is now
### The complete Query: SqlInjection.ql
The full query is
```ql
/**
* @name SQLI Vulnerability
* @description Building a sql query dynamically may lead to sql injection vulnerability
* @description Using untrusted strings in a sql query allows sql injection attacks.
* @kind path-problem
* @id cpp/SQLIVulnerable
* @problem.severity warning
*/
import cpp
import semmle.code.cpp.dataflow.TaintTracking
import semmle.code.cpp.models.implementations.Pure
import DataFlow::PathGraph
/**
* The taint tracking configuration
*/
class SqliFlowConfig extends TaintTracking::Configuration {
SqliFlowConfig() { this = "SqliFlow" }
override predicate isSource(DataFlow::Node source) {
// Use sqliSourceProduction(this, source) in that case
sqliSourceDemo(source)
}
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
stlBslTaintStep(n1, n2)
// count = read(STDIN_FILENO, buf, BUFSIZE);
exists(FunctionCall read |
read.getTarget().getName() = "read" and
read.getArgument(1) = source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()
)
}
override predicate isSanitizer(DataFlow::Node sanitizer) { none() }
override predicate isSink(DataFlow::Node sink) { sqliSink(sink, _) }
override predicate isAdditionalTaintStep(DataFlow::Node into, DataFlow::Node out) {
// Extra taint step
// snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
// But snprintf is a macro on mac os. The actual function's name is
// #undef snprintf
// #define snprintf(str, len, ...) \
// __builtin___snprintf_chk (str, len, 0, __darwin_obsz(str), __VA_ARGS__)
// #endif
exists(FunctionCall printf |
printf.getTarget().getName().matches("%snprintf%") and
printf.getArgument(0) = out.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() and
// very specific: shifted index for macro. We can generalize this to consider
// all trailing arguments as sources.
printf.getArgument(6) = into.asExpr()
)
}
override predicate isSink(DataFlow::Node sink) {
// rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
exists(FunctionCall exec |
exec.getTarget().getName() = "sqlite3_exec" and
exec.getArgument(1) = sink.asExpr()
)
}
}
/*
* The main query
*/
from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink, string label
where
// Flow path setup
conf.hasFlowPath(source, sink) and
source != sink and
if source.getNode().asExpr().(VariableAccess).getTarget().hasName(_)
then label = source.getNode().asExpr().(VariableAccess).getTarget().getName()
else label = "source"
select sink, source, sink, "Sqli flow from $@", source, label
from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink
where conf.hasFlowPath(source, sink)
select sink, source, sink, "Possible SQL injection"
```
## Appendix
This appendix has the C++ test source, the bslstrings query, and the bslstrings
library. The latter are in one file for convenience.
### Test case: simple.cc
### The Database Writer: add-user.c
The complete source for the sqlite database writer
```c
#include <bslstl_string.h>
#include <bslstl_stringstream.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <ctype.h>
#include <sqlite3.h>
#include <time.h>
void executeStatement(const bsl::string &sQuery);
void write_log(const char* fmt, ...) {
time_t t;
char tstr[26];
va_list args;
int checkClusterSQL(const bsl::string &sDatabase, const bsl::string &sQuery,
const bsl::string &sObjectName);
va_start(args, fmt);
t = time(NULL);
ctime_r(&t, tstr);
tstr[24] = 0; /* no \n */
fprintf(stderr, "[%s] ", tstr);
vfprintf(stderr, fmt, args);
va_end(args);
fflush(stderr);
}
int main(int argc, char **argv) {
void abort_on_error(int rc, sqlite3 *db) {
if( rc ) {
fprintf(stderr, "Can't open database: %s\n", sqlite3_errmsg(db));
sqlite3_close(db);
fflush(stderr);
abort();
}
}
bsl::stringstream oSS;
// Local constants
int iUUID = 123;
bsl::string sObjectName("HELLO");
// User-supplied iLevel
int iLevel = std::stol(argv[1]);
void abort_on_exec_error(int rc, sqlite3 *db, char* zErrMsg) {
if( rc!=SQLITE_OK ){
fprintf(stderr, "SQL error: %s\n", zErrMsg);
sqlite3_free(zErrMsg);
sqlite3_close(db);
fflush(stderr);
abort();
}
}
char* get_user_info() {
#define BUFSIZE 1024
char* buf = (char*) malloc(BUFSIZE * sizeof(char));
int count;
// Disable buffering to avoid need for fflush
// after printf().
setbuf( stdout, NULL );
printf("*** Welcome to sql injection ***\n");
printf("Please enter name: ");
count = read(STDIN_FILENO, buf, BUFSIZE);
if (count <= 0) abort();
/* strip trailing whitespace */
while (count && isspace(buf[count-1])) {
buf[count-1] = 0; --count;
}
return buf;
}
oSS << "SELECT object_name_upper, object_value_name_upper "
<< "FROM pvfx_privilege "
<< "WHERE uuid=" << iUUID << " "
<< "AND object_name_upper=\"" << sObjectName << "\" "
<< "AND pvf_function=\"" << "sFunction" << "\" "
<< "AND pvf_level=" << iLevel;
bsl::string sQuery(oSS.str());
int iErrorCode = checkClusterSQL("pvfxdb", sQuery, sObjectName);
int get_new_id() {
int id = getpid();
return id;
}
// a_cdb2::SqlService sqlService("default");
// sqlService.executeStatement(sQuery);
executeStatement(sQuery);
void write_info(int id, char* info) {
sqlite3 *db;
int rc;
int bufsize = 1024;
char *zErrMsg = 0;
char query[bufsize];
/* open db */
rc = sqlite3_open("users.sqlite", &db);
abort_on_error(rc, db);
/* Format query */
snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info);
write_log("query: %s\n", query);
/* Write info */
rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg);
abort_on_exec_error(rc, db, zErrMsg);
sqlite3_close(db);
}
int main(int argc, char* argv[]) {
char* info;
int id;
info = get_user_info();
id = get_new_id();
write_info(id, info);
/*
* show_info(id);
*/
}
```
### bslstrings query and library: bslstrings.ql
The complete query is first, followed by the library components.
```ql
/**
* @name SQLI Vulnerability
* @description Building a sql query dynamically may lead to sql injection vulnerability
* @kind path-problem
* @id cpp/SQLIVulnerable
* @problem.severity warning
*/
import semmle.code.cpp.dataflow.TaintTracking
import semmle.code.cpp.models.implementations.Pure
import DataFlow::PathGraph
/**
* The taint tracking configuration
*/
class SqliFlowConfig extends TaintTracking::Configuration {
SqliFlowConfig() { this = "SqliFlow" }
override predicate isSource(DataFlow::Node source) {
// Use sqliSourceProduction(this, source) in that case
sqliSourceDemo(source)
}
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
stlBslTaintStep(n1, n2)
}
override predicate isSanitizer(DataFlow::Node sanitizer) { none() }
override predicate isSink(DataFlow::Node sink) { sqliSink(sink, _) }
}
/*
* The main query
*/
from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink, string label
where
// Flow path setup
conf.hasFlowPath(source, sink) and
source != sink and
if source.getNode().asExpr().(VariableAccess).getTarget().hasName(_)
then label = source.getNode().asExpr().(VariableAccess).getTarget().getName()
else label = "source"
select sink, source, sink, "Sqli flow from $@", source, label
// Identify the sink(s) for DataFlow
predicate sqliSink(DataFlow::Node nd, FunctionCall fc) {
fc.getTarget().getName() = "executeStatement" and
fc.getArgument(0) = nd.asExpr()
}
// Identify the source(s) for DataFlow; this version for the demonstration.
predicate sqliSourceDemo(DataFlow::Node nd) {
// variable use
nd.asExpr() instanceof VariableAccess and
nd.getLocation().getFile().getShortName() = "simple"
}
// Identify the source(s) for DataFlow; this version for full applications
predicate sqliSourceProduction(SqliFlowConfig config, DataFlow::Node source) {
// Approximates places where we concatenate a var with a string
source.asExpr() instanceof VariableAccess and
config.isAdditionalTaintStep(source, _) and
// These are the steps where we get an existing value out, so don't use as source.
not qualifierToCallStep(source, _, _) and
(
// We are reading a non-local variable (field, param, etc)
exists(Variable v |
source.asExpr().(VariableAccess).getTarget() = v and
not v instanceof LocalVariable
)
or
// We are reading a local, but something wrote to it since definition
exists(LocalVariable v, VariableAccess mid |
source.asExpr().(VariableAccess).getTarget() = v and
mid.getTarget() = v and
mid = v.getInitializer().getASuccessor+() and
source.asExpr() = mid.getASuccessor+()
)
)
}
/**
* Library routines. These are for more in-depth development.
*/
// argv is a Parameter and exists(DataFlow::Node n | n.asParameter() = e) holds
predicate whatsThere(Element e, string info, int line, string file) {
line = e.getLocation().getStartLine() and
line = [10] and
file = e.getLocation().getFile().getShortName() and
file.matches("%simple%") and
info = e.getAQlClass() and
exists(DataFlow::Node n | n.asParameter() = e)
}
// Basic type that represents a string for our purposes
class StringLikeType extends Type {
StringLikeType() {
this.getName().matches("%string%") or
this.getName().matches("%stream%") or
this.(ReferenceType).getBaseType() instanceof StringLikeType
}
}
// Capture all types that may be used to form or append to a string.
class AppendableToString extends Type {
AppendableToString() {
this instanceof StringLikeType or
this instanceof CharPointerType or
this instanceof IntegralType
}
}
// Identify pure, non-member functions taking a tainted value and returning a taint, of the form
// val = func(arg). Avoids overlap with instance-modifying members and generic functions.
predicate pureFuncArgToCallStep(Function f) {
not f.isMember() and
(
f instanceof PureStrFunction or
f.getName() = "stol"
) and
f.getAParameter().getType().getUnspecifiedType() instanceof AppendableToString and
f.getType().getUnspecifiedType() instanceof AppendableToString
}
predicate pureFuncArgToCallStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) {
pureFuncArgToCallStep(fc.getTarget()) and
(
// argument taints call result. Note that pure functions don't have PostUpdateNodes
n1.asExpr() = fc.getAnArgument() and
n2.asExpr() = fc
)
}
// Identify member functions taking a tainted value and returning a taint, of the form
// qual.func(arg).
predicate argToCallStep(Function f) {
f.getDeclaringType() instanceof StringLikeType and
f.getAParameter().getType().getUnspecifiedType() instanceof AppendableToString and
f.getType().getUnspecifiedType() instanceof StringLikeType
}
predicate argToCallStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) {
argToCallStep(fc.getTarget()) and
(
// argument taints call result
n1.asExpr() = fc.getAnArgument() and
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc
or
// The argument taints the post-update node of the qualifier
// or
// the argument taints leftmost argument in the call chain
n1.asExpr() = fc.getArgument(0) and
exists(Expr found |
leftmost(fc.getQualifier(), found) and
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = found
)
)
}
// Identify functions where a tainted qualifier taints the result.
// This includes qual.str() and stream << arg (which is stream::operator<<(arg))
//
// In the FunctionCall, not here, the qualifier is `this`. Here, the declaring
// type is the later type of `this`.
// This covers chaining of methods. e.g., foo << seek(10) << "hello" will chain
predicate qualifierToCallStep(Function f) {
f.getDeclaringType() instanceof StringLikeType and
f.getType().getUnspecifiedType() instanceof StringLikeType
}
// Tainted qualifier taints the call's result, e.g., qual.str()
predicate qualifierToCallStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) {
qualifierToCallStep(fc.getTarget()) and
(
n1.asExpr() = fc.getQualifier() and
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc
or
// Cover cases missing the PostUpdateNode, like oSS.str()
n1.asExpr() = fc.getQualifier() and
n2.asExpr() = fc
)
}
// Find parameterized override of operator<<, typically of the form
// stream& operator<<(stream&, AppendableToString)
predicate operatorAsFunctionStep(Function f) {
not exists(f.getDeclaringType()) and
f.getName() = "operator<<" and
f.getParameter(0).getType().getUnspecifiedType() instanceof StringLikeType and
f.getParameter(1).getType().getUnspecifiedType() instanceof AppendableToString and
f.getType().getUnspecifiedType() instanceof StringLikeType
}
predicate operatorAsFunctionStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) {
operatorAsFunctionStep(fc.getTarget()) and
(
// both arguments taint result
n1.asExpr() = fc.getArgument(0) and
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc
or
n1.asExpr() = fc.getArgument(1) and
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc
or
// The second argument taints the post-update node of the first
// or
// the rightmost (second) argument also taints the leftmost argument at the
// beginning of the call chain.
// ( ( head << mid) << last)
// ^________________/
// \-left-------/
n1.asExpr() = fc.getArgument(1) and
exists(Expr found |
leftmost(fc.getArgument(0), found) and
n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = found
)
)
}
// For a FunctionCall chain like (((head << n1) << n2) << last),
// find `head` starting from `last`
//
// The rightmost argument also taints the leftmost argument at the
// beginning of the call chain.
// ( ( head << mid) << last)
// ^________________/
// \-left-------/
predicate leftmost(FunctionCall fc, Expr head) {
//
operatorAsFunctionStep(fc.getTarget()) and
not fc.getArgument(0) instanceof FunctionCall and
head = fc.getArgument(0)
or
//
(argToCallStep(fc.getTarget()) or qualifierToCallStep(fc.getTarget())) and
not fc.getQualifier() instanceof FunctionCall and
head = fc.getQualifier()
or
//
leftmost(fc.getArgument(0), head)
or
leftmost(fc.getQualifier(), head)
}
// Propagate values from an array to an element access, `a` taints `a[i]`
predicate elementAccessStep(DataFlow::Node n1, DataFlow::Node n2, ArrayExpr a) {
// arr -> arr[ind]
n2.asExpr() = a and
a.getArrayBase() = n1.asExpr()
}
// All the stl and bsl taint steps in a single predicate.
predicate stlBslTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
operatorAsFunctionStep(n1, n2, _) or
qualifierToCallStep(n1, n2, _) or
argToCallStep(n1, n2, _) or
pureFuncArgToCallStep(n1, n2, _) or
elementAccessStep(n1, n2, _)
}
```