diff --git a/README.org b/README.org index 5231d36..ae7598d 100644 --- a/README.org +++ b/README.org @@ -338,6 +338,21 @@ | 640 } #+END_SRC + Further exploration is done in [[./argv-out-of-bounds-1.ql]], starting with an + attempt at using the =SimpleRangeAnalysis= library + #+begin_src javascript + lowerBound(cmp.getLeftOperand().getFullyConverted()), "left lower bound", + lowerBound(cmp.getRightOperand().getFullyConverted()), "right lower bound" + #+end_src + The bounds are correct bounds for the types, but too general -- they include + the possible results of iteration. We are only interested in initial bounds + that are statically determinate, those before any iteration happens. + + Put another way, this is not a general data flow problem; we only want to check + initial value propagation along certain execution paths. The =for= loop + complicates this, as do the operations within it. We really want to see + execution paths that bypass the loop altogether. That is done in the latter + parts of [[./argv-out-of-bounds-1.ql]], using a =SsaDefinition=. # TODO # ** Running the query from the command line diff --git a/argv-out-of-bounds-1.ql b/argv-out-of-bounds-1.ql new file mode 100644 index 0000000..d45b182 --- /dev/null +++ b/argv-out-of-bounds-1.ql @@ -0,0 +1,109 @@ +/** + * @name Argv index out-of-bounds-1 + * @ kind problem + * @id cpp/example/argv-out-of-bounds-1 + */ + +import cpp +import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis + +/* + * Use + * https://codeql.github.com/docs/codeql-language-guides/codeql-library-for-cpp/ + * to find the codeql class matching c++ syntax + * + * See https://codeql.github.com/docs/codeql-language-guides/using-range-analsis-in-cpp/ + * for the entry points to the range analysis library. Namely, + * `import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis` + * and + * `lowerBound(expr)` + */ + +from + Parameter argc, RelationalOperation cmp, Variable n, Parameter argv, ArrayExpr argvAccess, + AssignExpr argvSet, SsaDefinition invalidN, SsaDefinition invalidNDef +where + // + // consider argc and n when they occur in the same comparison + // + argc.getName() = "argc" and + argc.getAnAccess() = cmp.getAnOperand().getAChild*() and + n.getAnAccess() = cmp.getAnOperand().getAChild*() and + not n instanceof Parameter and + // + // find indexed reads of argv using n + // + argvAccess.getArrayBase() = argv.getAnAccess() and + argvAccess.getArrayOffset() = n.getAnAccess() and + // + // find the indexed writes of argv + // + exists(ArrayExpr aa | + aa.getArrayBase() = argv.getAnAccess() and + aa.getArrayOffset() = n.getAnAccess() and + argvSet.getLValue() = aa and + /* Separate the read */ + aa != argvAccess + ) and + // + // ignore argv use when it's an argument of strcmp + // + not exists(FunctionCall strcmp | + strcmp.getTarget().getName() = "strcmp" and + strcmp.getAnArgument*() = argvAccess + ) and + // // ---------------------- + // // + // // The first thing to try is the range library via these predicates: + // select argc, "definition of argc", cmp, "use in comparison", n, "other var in comparison", + // argvAccess, "argv indexed", argvSet, "argv assigned", + // , lowerBound(cmp.getLeftOperand()), "left lower bound" + // , lowerBound(cmp.getRightOperand()), "right lower bound" + // // ---------------------- + // // + // // Both bounds are 0, which is somewhat surprising. The reason is that + // // results include possible increments from loop; with overflow and cast, + // // this makes 0 the minimum. + // // These are correct bounds for the types, but too general -- they include the possible results + // // of iteration. We are only interested in initial bounds that are statically determinate, + // // those before any iteration happens. + // // Just to check: + // select argc, "definition of argc", cmp, "use in comparison", n, "other var in comparison", + // argvAccess, "argv indexed", argvSet, "argv assigned", + // lowerBound(cmp.getLeftOperand().getFullyConverted()), "left lower bound", + // lowerBound(cmp.getRightOperand().getFullyConverted()), "right lower bound" + // ---------------------- + // + // Step back from the general range library and note a few details about this + // problem: + // - The range of argc is set by the OS, so it's known at compile time + // - The first value of n is set in the code, it's also statically determined + // - We are concerned about the lowest possible value of the argv index, no other + // + // We can rephrase the problem: + // + // Find an execution path (if any), using statically known values, that reaches + // an argv assignment with invalid index. + // + // To track only values of the argv index that are too low, we need to stay on + // certain branches of the CFG, namely those matching a SSA defition of the + // index variable. + // + // A first attempt, starting from `cmp` is the following. This finds two + // invalidNDef locations, n++ and n = 1, and several `argvAccess`s + cmp.getLesserOperand() = invalidN.getAUse(n) and + cmp.getGreaterOperand() = argc.getAnAccess() and + invalidN.getAnUltimateDefiningValue(n).getValue().toInt() > 0 and + invalidNDef = invalidN.getAnUltimateSsaDefinition(n) + // + // We still find an access inside the loop, + // opt_user = g_strdup (argv[n]); + // so let's narrow: + and + invalidN.getAUse(n) = argvAccess.getArrayOffset() and + argvSet = cmp.getAFalseSuccessor().getASuccessor*() and + argvAccess = cmp.getAFalseSuccessor().getASuccessor*() + +select argc, "definition of argc", cmp, "use in comparison", n, "other var in comparison", + argvAccess, "argv indexed", argvSet, "argv assigned", invalidNDef, "invalid n definition" +