diff --git a/README.org b/README.org index ae7598d..c92bb4d 100644 --- a/README.org +++ b/README.org @@ -338,8 +338,8 @@ | 640 } #+END_SRC - Further exploration is done in [[./argv-out-of-bounds-1.ql]], starting with an - attempt at using the =SimpleRangeAnalysis= library + Exploration of values starts in [[./argv-out-of-bounds-1.ql]] with an attempt at + using the =SimpleRangeAnalysis= library via #+begin_src javascript lowerBound(cmp.getLeftOperand().getFullyConverted()), "left lower bound", lowerBound(cmp.getRightOperand().getFullyConverted()), "right lower bound" @@ -350,9 +350,18 @@ Put another way, this is not a general data flow problem; we only want to check initial value propagation along certain execution paths. The =for= loop - complicates this, as do the operations within it. We really want to see - execution paths that bypass the loop altogether. That is done in the latter - parts of [[./argv-out-of-bounds-1.ql]], using a =SsaDefinition=. + complicates this, as do the operations within it. + + We really want to see execution paths that bypass the loop altogether. That is + done in the latter parts of [[./argv-out-of-bounds-1.ql]], using a =SsaDefinition=. + + The query [[./argv-out-of-bounds-2.ql]] cleans up the exploration from + [[./argv-out-of-bounds-1.ql]] and correctly identifies all the locations using =n= + with a known index value =n > 0=. + + This query reports results on the vulnerable version of the code, + =polkit-0.119.db=. Next, it needs to be checked and enhanced so it reports + nothing on the patched version, =polkit-a2bf5c9c.db=. # TODO # ** Running the query from the command line diff --git a/argv-out-of-bounds-1.ql b/argv-out-of-bounds-1.ql index 3bf5525..e40b20c 100644 --- a/argv-out-of-bounds-1.ql +++ b/argv-out-of-bounds-1.ql @@ -85,7 +85,7 @@ from // Find an execution path (if any), using statically known values, that reaches // an argv assignment with invalid index. // - // To track only values of the argv index that are too low, we need to stay on + // To track only values of the argv index that are too high, we need to stay on // certain branches of the CFG, namely those matching a SSA defition of the // index variable. // diff --git a/argv-out-of-bounds-2.ql b/argv-out-of-bounds-2.ql new file mode 100644 index 0000000..64c7e9c --- /dev/null +++ b/argv-out-of-bounds-2.ql @@ -0,0 +1,75 @@ +/** + * @name Argv index out-of-bounds-2 + * @ kind problem + * @id cpp/example/argv-out-of-bounds-2 + */ + +import cpp +import semmle.code.cpp.controlflow.SSA + +// +// Use https://codeql.github.com/docs/codeql-language-guides/codeql-library-for-cpp/ +// to find the codeql class matching c++ syntax +// +// See https://codeql.github.com/docs/codeql-language-guides/using-range-analsis-in-cpp/ +// for the entry points to the range analysis library. Namely, +// `import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis` +// and +// `lowerBound(expr)` +// +// There is a short example of SSA use at +// https://codeql.github.com/docs/codeql-language-guides/detecting-a-potential-buffer-overflow/#improving-the-query-using-the-ssa-library +// + +from + Parameter argc, RelationalOperation cmp, Variable n, Parameter argv, ArrayExpr argvAccess, + AssignExpr argvSet, SsaDefinition invalidN, SsaDefinition invalidNDef +where + // + // consider argc and n when they occur in the same comparison + // + argc.getName() = "argc" and + argc.getAnAccess() = cmp.getAnOperand().getAChild*() and + n.getAnAccess() = cmp.getAnOperand().getAChild*() and + not n instanceof Parameter and + // + // find indexed reads of argv using n + // + argvAccess.getArrayBase() = argv.getAnAccess() and + argvAccess.getArrayOffset() = n.getAnAccess() and + // + // find the indexed writes of argv + // + exists(ArrayExpr aa | + aa.getArrayBase() = argv.getAnAccess() and + aa.getArrayOffset() = n.getAnAccess() and + argvSet.getLValue() = aa and + /* Separate the read */ + aa != argvAccess + ) and + // + // Ignore argv use when it's an argument of strcmp + // + not exists(FunctionCall strcmp | + strcmp.getTarget().getName() = "strcmp" and + strcmp.getAnArgument*() = argvAccess + ) and + // + // To track only values of the argv index that are too high, we need to + // stay on branches of the CFG matching a SSA definition of the index + // variable with known high value. + // + cmp.getLesserOperand() = invalidN.getAUse(n) and + cmp.getGreaterOperand() = argc.getAnAccess() and + invalidNDef = invalidN.getAnUltimateSsaDefinition(n) and + invalidNDef.getDefiningValue(n).getValue().toInt() > 0 and + // + // We still find an access inside the loop, + // opt_user = g_strdup (argv[n]); + // so let's narrow to branches outside the loop. + // + invalidN.getAUse(n) = argvAccess.getArrayOffset() and + argvSet = cmp.getAFalseSuccessor().getASuccessor*() and + argvAccess = cmp.getAFalseSuccessor().getASuccessor*() + +select argc, cmp, n, argvAccess, argvSet, invalidNDef, invalidN