diff --git a/codeql-dataflow-sql-injection.md b/codeql-dataflow-sql-injection.md index 904c19d..f24dcfb 100644 --- a/codeql-dataflow-sql-injection.md +++ b/codeql-dataflow-sql-injection.md @@ -439,7 +439,7 @@ or -## The CodeQL Data Flow Configuration +## The CodeQL Taint Flow Configuration The previous queries identify our source, sink and one additional flow step. To use global data flow and taint tracking we need some additional codeql setup: - a taint flow configuration @@ -532,7 +532,7 @@ where conf.hasFlowPath(source, sink) select sink, source, sink, "Possible SQL injection" ``` -## Tutorial: Data Flow Details +## Tutorial: Taint Flow Details With the dataflow configuration in place, we just need to provide the details for source(s), sink(s), and taint step(s). @@ -664,10 +664,9 @@ If you quick-eval this predicate, you will see that `source` is now `ref arg buf instead of `buf`. -### The Extra Flow Step +### The isAdditionalTaintStep Predicate xx: - In the `snprintf` macro call, those have indices 0 and 4. In the underlying function `__builtin___snprintf_chk`, the indices are 0 and 6. Using the latter: ```ql @@ -681,368 +680,167 @@ select printf, out, into This correctly identifies the call and the extra flow arguments. -### Complete query -xx: +## Appendix +This appendix has the complete C source and codeql query. -Using the previous predicates - - sqliSourceDemo(source) - sqliSink(sink, _) - stlBslTaintStep(n1, n2) - -our full query is now +### The complete Query: SqlInjection.ql +The full query is ```ql /** * @name SQLI Vulnerability - * @description Building a sql query dynamically may lead to sql injection vulnerability + * @description Using untrusted strings in a sql query allows sql injection attacks. * @kind path-problem * @id cpp/SQLIVulnerable * @problem.severity warning */ +import cpp import semmle.code.cpp.dataflow.TaintTracking -import semmle.code.cpp.models.implementations.Pure import DataFlow::PathGraph -/** - * The taint tracking configuration - */ class SqliFlowConfig extends TaintTracking::Configuration { SqliFlowConfig() { this = "SqliFlow" } override predicate isSource(DataFlow::Node source) { - // Use sqliSourceProduction(this, source) in that case - sqliSourceDemo(source) - } - - override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) { - stlBslTaintStep(n1, n2) + // count = read(STDIN_FILENO, buf, BUFSIZE); + exists(FunctionCall read | + read.getTarget().getName() = "read" and + read.getArgument(1) = source.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() + ) } override predicate isSanitizer(DataFlow::Node sanitizer) { none() } - override predicate isSink(DataFlow::Node sink) { sqliSink(sink, _) } + override predicate isAdditionalTaintStep(DataFlow::Node into, DataFlow::Node out) { + // Extra taint step + // snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info); + // But snprintf is a macro on mac os. The actual function's name is + // #undef snprintf + // #define snprintf(str, len, ...) \ + // __builtin___snprintf_chk (str, len, 0, __darwin_obsz(str), __VA_ARGS__) + // #endif + exists(FunctionCall printf | + printf.getTarget().getName().matches("%snprintf%") and + printf.getArgument(0) = out.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() and + // very specific: shifted index for macro. We can generalize this to consider + // all trailing arguments as sources. + printf.getArgument(6) = into.asExpr() + ) + } + + override predicate isSink(DataFlow::Node sink) { + // rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg); + exists(FunctionCall exec | + exec.getTarget().getName() = "sqlite3_exec" and + exec.getArgument(1) = sink.asExpr() + ) + } } -/* - * The main query - */ - -from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink, string label -where - // Flow path setup - conf.hasFlowPath(source, sink) and - source != sink and - if source.getNode().asExpr().(VariableAccess).getTarget().hasName(_) - then label = source.getNode().asExpr().(VariableAccess).getTarget().getName() - else label = "source" -select sink, source, sink, "Sqli flow from $@", source, label - +from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink +where conf.hasFlowPath(source, sink) +select sink, source, sink, "Possible SQL injection" ``` -## Appendix -This appendix has the C++ test source, the bslstrings query, and the bslstrings -library. The latter are in one file for convenience. - -### Test case: simple.cc +### The Database Writer: add-user.c +The complete source for the sqlite database writer ```c -#include -#include +#include +#include +#include +#include +#include +#include -void executeStatement(const bsl::string &sQuery); +void write_log(const char* fmt, ...) { + time_t t; + char tstr[26]; + va_list args; -int checkClusterSQL(const bsl::string &sDatabase, const bsl::string &sQuery, - const bsl::string &sObjectName); + va_start(args, fmt); + t = time(NULL); + ctime_r(&t, tstr); + tstr[24] = 0; /* no \n */ + fprintf(stderr, "[%s] ", tstr); + vfprintf(stderr, fmt, args); + va_end(args); + fflush(stderr); +} -int main(int argc, char **argv) { +void abort_on_error(int rc, sqlite3 *db) { + if( rc ) { + fprintf(stderr, "Can't open database: %s\n", sqlite3_errmsg(db)); + sqlite3_close(db); + fflush(stderr); + abort(); + } +} - bsl::stringstream oSS; - // Local constants - int iUUID = 123; - bsl::string sObjectName("HELLO"); - // User-supplied iLevel - int iLevel = std::stol(argv[1]); +void abort_on_exec_error(int rc, sqlite3 *db, char* zErrMsg) { + if( rc!=SQLITE_OK ){ + fprintf(stderr, "SQL error: %s\n", zErrMsg); + sqlite3_free(zErrMsg); + sqlite3_close(db); + fflush(stderr); + abort(); + } +} + +char* get_user_info() { +#define BUFSIZE 1024 + char* buf = (char*) malloc(BUFSIZE * sizeof(char)); + int count; + // Disable buffering to avoid need for fflush + // after printf(). + setbuf( stdout, NULL ); + printf("*** Welcome to sql injection ***\n"); + printf("Please enter name: "); + count = read(STDIN_FILENO, buf, BUFSIZE); + if (count <= 0) abort(); + /* strip trailing whitespace */ + while (count && isspace(buf[count-1])) { + buf[count-1] = 0; --count; + } + return buf; +} - oSS << "SELECT object_name_upper, object_value_name_upper " - << "FROM pvfx_privilege " - << "WHERE uuid=" << iUUID << " " - << "AND object_name_upper=\"" << sObjectName << "\" " - << "AND pvf_function=\"" << "sFunction" << "\" " - << "AND pvf_level=" << iLevel; - bsl::string sQuery(oSS.str()); - int iErrorCode = checkClusterSQL("pvfxdb", sQuery, sObjectName); +int get_new_id() { + int id = getpid(); + return id; +} - // a_cdb2::SqlService sqlService("default"); - // sqlService.executeStatement(sQuery); - executeStatement(sQuery); +void write_info(int id, char* info) { + sqlite3 *db; + int rc; + int bufsize = 1024; + char *zErrMsg = 0; + char query[bufsize]; + + /* open db */ + rc = sqlite3_open("users.sqlite", &db); + abort_on_error(rc, db); + + /* Format query */ + snprintf(query, bufsize, "INSERT INTO users VALUES (%d, '%s')", id, info); + write_log("query: %s\n", query); + + /* Write info */ + rc = sqlite3_exec(db, query, NULL, 0, &zErrMsg); + abort_on_exec_error(rc, db, zErrMsg); + + sqlite3_close(db); +} + +int main(int argc, char* argv[]) { + char* info; + int id; + info = get_user_info(); + id = get_new_id(); + write_info(id, info); + /* + * show_info(id); + */ } ``` - -### bslstrings query and library: bslstrings.ql -The complete query is first, followed by the library components. -```ql -/** - * @name SQLI Vulnerability - * @description Building a sql query dynamically may lead to sql injection vulnerability - * @kind path-problem - * @id cpp/SQLIVulnerable - * @problem.severity warning - */ - -import semmle.code.cpp.dataflow.TaintTracking -import semmle.code.cpp.models.implementations.Pure -import DataFlow::PathGraph - -/** - * The taint tracking configuration - */ -class SqliFlowConfig extends TaintTracking::Configuration { - SqliFlowConfig() { this = "SqliFlow" } - - override predicate isSource(DataFlow::Node source) { - // Use sqliSourceProduction(this, source) in that case - sqliSourceDemo(source) - } - - override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) { - stlBslTaintStep(n1, n2) - } - - override predicate isSanitizer(DataFlow::Node sanitizer) { none() } - - override predicate isSink(DataFlow::Node sink) { sqliSink(sink, _) } -} - -/* - * The main query - */ - -from SqliFlowConfig conf, DataFlow::PathNode source, DataFlow::PathNode sink, string label -where - // Flow path setup - conf.hasFlowPath(source, sink) and - source != sink and - if source.getNode().asExpr().(VariableAccess).getTarget().hasName(_) - then label = source.getNode().asExpr().(VariableAccess).getTarget().getName() - else label = "source" -select sink, source, sink, "Sqli flow from $@", source, label - -// Identify the sink(s) for DataFlow -predicate sqliSink(DataFlow::Node nd, FunctionCall fc) { - fc.getTarget().getName() = "executeStatement" and - fc.getArgument(0) = nd.asExpr() -} - -// Identify the source(s) for DataFlow; this version for the demonstration. -predicate sqliSourceDemo(DataFlow::Node nd) { - // variable use - nd.asExpr() instanceof VariableAccess and - nd.getLocation().getFile().getShortName() = "simple" -} - -// Identify the source(s) for DataFlow; this version for full applications -predicate sqliSourceProduction(SqliFlowConfig config, DataFlow::Node source) { - // Approximates places where we concatenate a var with a string - source.asExpr() instanceof VariableAccess and - config.isAdditionalTaintStep(source, _) and - // These are the steps where we get an existing value out, so don't use as source. - not qualifierToCallStep(source, _, _) and - ( - // We are reading a non-local variable (field, param, etc) - exists(Variable v | - source.asExpr().(VariableAccess).getTarget() = v and - not v instanceof LocalVariable - ) - or - // We are reading a local, but something wrote to it since definition - exists(LocalVariable v, VariableAccess mid | - source.asExpr().(VariableAccess).getTarget() = v and - mid.getTarget() = v and - mid = v.getInitializer().getASuccessor+() and - source.asExpr() = mid.getASuccessor+() - ) - ) -} - -/** - * Library routines. These are for more in-depth development. - */ -// argv is a Parameter and exists(DataFlow::Node n | n.asParameter() = e) holds -predicate whatsThere(Element e, string info, int line, string file) { - line = e.getLocation().getStartLine() and - line = [10] and - file = e.getLocation().getFile().getShortName() and - file.matches("%simple%") and - info = e.getAQlClass() and - exists(DataFlow::Node n | n.asParameter() = e) -} - -// Basic type that represents a string for our purposes -class StringLikeType extends Type { - StringLikeType() { - this.getName().matches("%string%") or - this.getName().matches("%stream%") or - this.(ReferenceType).getBaseType() instanceof StringLikeType - } -} - -// Capture all types that may be used to form or append to a string. -class AppendableToString extends Type { - AppendableToString() { - this instanceof StringLikeType or - this instanceof CharPointerType or - this instanceof IntegralType - } -} - -// Identify pure, non-member functions taking a tainted value and returning a taint, of the form -// val = func(arg). Avoids overlap with instance-modifying members and generic functions. -predicate pureFuncArgToCallStep(Function f) { - not f.isMember() and - ( - f instanceof PureStrFunction or - f.getName() = "stol" - ) and - f.getAParameter().getType().getUnspecifiedType() instanceof AppendableToString and - f.getType().getUnspecifiedType() instanceof AppendableToString -} - -predicate pureFuncArgToCallStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) { - pureFuncArgToCallStep(fc.getTarget()) and - ( - // argument taints call result. Note that pure functions don't have PostUpdateNodes - n1.asExpr() = fc.getAnArgument() and - n2.asExpr() = fc - ) -} - -// Identify member functions taking a tainted value and returning a taint, of the form -// qual.func(arg). -predicate argToCallStep(Function f) { - f.getDeclaringType() instanceof StringLikeType and - f.getAParameter().getType().getUnspecifiedType() instanceof AppendableToString and - f.getType().getUnspecifiedType() instanceof StringLikeType -} - -predicate argToCallStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) { - argToCallStep(fc.getTarget()) and - ( - // argument taints call result - n1.asExpr() = fc.getAnArgument() and - n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc - or - // The argument taints the post-update node of the qualifier - // or - // the argument taints leftmost argument in the call chain - n1.asExpr() = fc.getArgument(0) and - exists(Expr found | - leftmost(fc.getQualifier(), found) and - n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = found - ) - ) -} - -// Identify functions where a tainted qualifier taints the result. -// This includes qual.str() and stream << arg (which is stream::operator<<(arg)) -// -// In the FunctionCall, not here, the qualifier is `this`. Here, the declaring -// type is the later type of `this`. -// This covers chaining of methods. e.g., foo << seek(10) << "hello" will chain -predicate qualifierToCallStep(Function f) { - f.getDeclaringType() instanceof StringLikeType and - f.getType().getUnspecifiedType() instanceof StringLikeType -} - -// Tainted qualifier taints the call's result, e.g., qual.str() -predicate qualifierToCallStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) { - qualifierToCallStep(fc.getTarget()) and - ( - n1.asExpr() = fc.getQualifier() and - n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc - or - // Cover cases missing the PostUpdateNode, like oSS.str() - n1.asExpr() = fc.getQualifier() and - n2.asExpr() = fc - ) -} - -// Find parameterized override of operator<<, typically of the form -// stream& operator<<(stream&, AppendableToString) -predicate operatorAsFunctionStep(Function f) { - not exists(f.getDeclaringType()) and - f.getName() = "operator<<" and - f.getParameter(0).getType().getUnspecifiedType() instanceof StringLikeType and - f.getParameter(1).getType().getUnspecifiedType() instanceof AppendableToString and - f.getType().getUnspecifiedType() instanceof StringLikeType -} - -predicate operatorAsFunctionStep(DataFlow::Node n1, DataFlow::Node n2, FunctionCall fc) { - operatorAsFunctionStep(fc.getTarget()) and - ( - // both arguments taint result - n1.asExpr() = fc.getArgument(0) and - n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc - or - n1.asExpr() = fc.getArgument(1) and - n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = fc - or - // The second argument taints the post-update node of the first - // or - // the rightmost (second) argument also taints the leftmost argument at the - // beginning of the call chain. - // ( ( head << mid) << last) - // ^________________/ - // \-left-------/ - n1.asExpr() = fc.getArgument(1) and - exists(Expr found | - leftmost(fc.getArgument(0), found) and - n2.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr() = found - ) - ) -} - -// For a FunctionCall chain like (((head << n1) << n2) << last), -// find `head` starting from `last` -// -// The rightmost argument also taints the leftmost argument at the -// beginning of the call chain. -// ( ( head << mid) << last) -// ^________________/ -// \-left-------/ -predicate leftmost(FunctionCall fc, Expr head) { - // - operatorAsFunctionStep(fc.getTarget()) and - not fc.getArgument(0) instanceof FunctionCall and - head = fc.getArgument(0) - or - // - (argToCallStep(fc.getTarget()) or qualifierToCallStep(fc.getTarget())) and - not fc.getQualifier() instanceof FunctionCall and - head = fc.getQualifier() - or - // - leftmost(fc.getArgument(0), head) - or - leftmost(fc.getQualifier(), head) -} - -// Propagate values from an array to an element access, `a` taints `a[i]` -predicate elementAccessStep(DataFlow::Node n1, DataFlow::Node n2, ArrayExpr a) { - // arr -> arr[ind] - n2.asExpr() = a and - a.getArrayBase() = n1.asExpr() -} - -// All the stl and bsl taint steps in a single predicate. -predicate stlBslTaintStep(DataFlow::Node n1, DataFlow::Node n2) { - operatorAsFunctionStep(n1, n2, _) or - qualifierToCallStep(n1, n2, _) or - argToCallStep(n1, n2, _) or - pureFuncArgToCallStep(n1, n2, _) or - elementAccessStep(n1, n2, _) -} - -``` -