mirror of
https://github.com/hohn/codeql-sample-polkit.git
synced 2025-12-16 22:03:03 +01:00
argv-out-of-bounds-1.ql: ssa query identifying the out-of-bound access
This commit is contained in:
committed by
=Michael Hohn
parent
010254d3d4
commit
ae4e677dec
15
README.org
15
README.org
@@ -338,6 +338,21 @@
|
|||||||
| 640 }
|
| 640 }
|
||||||
#+END_SRC
|
#+END_SRC
|
||||||
|
|
||||||
|
Further exploration is done in [[./argv-out-of-bounds-1.ql]], starting with an
|
||||||
|
attempt at using the =SimpleRangeAnalysis= library
|
||||||
|
#+begin_src javascript
|
||||||
|
lowerBound(cmp.getLeftOperand().getFullyConverted()), "left lower bound",
|
||||||
|
lowerBound(cmp.getRightOperand().getFullyConverted()), "right lower bound"
|
||||||
|
#+end_src
|
||||||
|
The bounds are correct bounds for the types, but too general -- they include
|
||||||
|
the possible results of iteration. We are only interested in initial bounds
|
||||||
|
that are statically determinate, those before any iteration happens.
|
||||||
|
|
||||||
|
Put another way, this is not a general data flow problem; we only want to check
|
||||||
|
initial value propagation along certain execution paths. The =for= loop
|
||||||
|
complicates this, as do the operations within it. We really want to see
|
||||||
|
execution paths that bypass the loop altogether. That is done in the latter
|
||||||
|
parts of [[./argv-out-of-bounds-1.ql]], using a =SsaDefinition=.
|
||||||
|
|
||||||
# TODO
|
# TODO
|
||||||
# ** Running the query from the command line
|
# ** Running the query from the command line
|
||||||
|
|||||||
109
argv-out-of-bounds-1.ql
Normal file
109
argv-out-of-bounds-1.ql
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
/**
|
||||||
|
* @name Argv index out-of-bounds-1
|
||||||
|
* @ kind problem
|
||||||
|
* @id cpp/example/argv-out-of-bounds-1
|
||||||
|
*/
|
||||||
|
|
||||||
|
import cpp
|
||||||
|
import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use
|
||||||
|
* https://codeql.github.com/docs/codeql-language-guides/codeql-library-for-cpp/
|
||||||
|
* to find the codeql class matching c++ syntax
|
||||||
|
*
|
||||||
|
* See https://codeql.github.com/docs/codeql-language-guides/using-range-analsis-in-cpp/
|
||||||
|
* for the entry points to the range analysis library. Namely,
|
||||||
|
* `import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis`
|
||||||
|
* and
|
||||||
|
* `lowerBound(expr)`
|
||||||
|
*/
|
||||||
|
|
||||||
|
from
|
||||||
|
Parameter argc, RelationalOperation cmp, Variable n, Parameter argv, ArrayExpr argvAccess,
|
||||||
|
AssignExpr argvSet, SsaDefinition invalidN, SsaDefinition invalidNDef
|
||||||
|
where
|
||||||
|
//
|
||||||
|
// consider argc and n when they occur in the same comparison
|
||||||
|
//
|
||||||
|
argc.getName() = "argc" and
|
||||||
|
argc.getAnAccess() = cmp.getAnOperand().getAChild*() and
|
||||||
|
n.getAnAccess() = cmp.getAnOperand().getAChild*() and
|
||||||
|
not n instanceof Parameter and
|
||||||
|
//
|
||||||
|
// find indexed reads of argv using n
|
||||||
|
//
|
||||||
|
argvAccess.getArrayBase() = argv.getAnAccess() and
|
||||||
|
argvAccess.getArrayOffset() = n.getAnAccess() and
|
||||||
|
//
|
||||||
|
// find the indexed writes of argv
|
||||||
|
//
|
||||||
|
exists(ArrayExpr aa |
|
||||||
|
aa.getArrayBase() = argv.getAnAccess() and
|
||||||
|
aa.getArrayOffset() = n.getAnAccess() and
|
||||||
|
argvSet.getLValue() = aa and
|
||||||
|
/* Separate the read */
|
||||||
|
aa != argvAccess
|
||||||
|
) and
|
||||||
|
//
|
||||||
|
// ignore argv use when it's an argument of strcmp
|
||||||
|
//
|
||||||
|
not exists(FunctionCall strcmp |
|
||||||
|
strcmp.getTarget().getName() = "strcmp" and
|
||||||
|
strcmp.getAnArgument*() = argvAccess
|
||||||
|
) and
|
||||||
|
// // ----------------------
|
||||||
|
// //
|
||||||
|
// // The first thing to try is the range library via these predicates:
|
||||||
|
// select argc, "definition of argc", cmp, "use in comparison", n, "other var in comparison",
|
||||||
|
// argvAccess, "argv indexed", argvSet, "argv assigned",
|
||||||
|
// , lowerBound(cmp.getLeftOperand()), "left lower bound"
|
||||||
|
// , lowerBound(cmp.getRightOperand()), "right lower bound"
|
||||||
|
// // ----------------------
|
||||||
|
// //
|
||||||
|
// // Both bounds are 0, which is somewhat surprising. The reason is that
|
||||||
|
// // results include possible increments from loop; with overflow and cast,
|
||||||
|
// // this makes 0 the minimum.
|
||||||
|
// // These are correct bounds for the types, but too general -- they include the possible results
|
||||||
|
// // of iteration. We are only interested in initial bounds that are statically determinate,
|
||||||
|
// // those before any iteration happens.
|
||||||
|
// // Just to check:
|
||||||
|
// select argc, "definition of argc", cmp, "use in comparison", n, "other var in comparison",
|
||||||
|
// argvAccess, "argv indexed", argvSet, "argv assigned",
|
||||||
|
// lowerBound(cmp.getLeftOperand().getFullyConverted()), "left lower bound",
|
||||||
|
// lowerBound(cmp.getRightOperand().getFullyConverted()), "right lower bound"
|
||||||
|
// ----------------------
|
||||||
|
//
|
||||||
|
// Step back from the general range library and note a few details about this
|
||||||
|
// problem:
|
||||||
|
// - The range of argc is set by the OS, so it's known at compile time
|
||||||
|
// - The first value of n is set in the code, it's also statically determined
|
||||||
|
// - We are concerned about the lowest possible value of the argv index, no other
|
||||||
|
//
|
||||||
|
// We can rephrase the problem:
|
||||||
|
//
|
||||||
|
// Find an execution path (if any), using statically known values, that reaches
|
||||||
|
// an argv assignment with invalid index.
|
||||||
|
//
|
||||||
|
// To track only values of the argv index that are too low, we need to stay on
|
||||||
|
// certain branches of the CFG, namely those matching a SSA defition of the
|
||||||
|
// index variable.
|
||||||
|
//
|
||||||
|
// A first attempt, starting from `cmp` is the following. This finds two
|
||||||
|
// invalidNDef locations, n++ and n = 1, and several `argvAccess`s
|
||||||
|
cmp.getLesserOperand() = invalidN.getAUse(n) and
|
||||||
|
cmp.getGreaterOperand() = argc.getAnAccess() and
|
||||||
|
invalidN.getAnUltimateDefiningValue(n).getValue().toInt() > 0 and
|
||||||
|
invalidNDef = invalidN.getAnUltimateSsaDefinition(n)
|
||||||
|
//
|
||||||
|
// We still find an access inside the loop,
|
||||||
|
// opt_user = g_strdup (argv[n]);
|
||||||
|
// so let's narrow:
|
||||||
|
and
|
||||||
|
invalidN.getAUse(n) = argvAccess.getArrayOffset() and
|
||||||
|
argvSet = cmp.getAFalseSuccessor().getASuccessor*() and
|
||||||
|
argvAccess = cmp.getAFalseSuccessor().getASuccessor*()
|
||||||
|
|
||||||
|
select argc, "definition of argc", cmp, "use in comparison", n, "other var in comparison",
|
||||||
|
argvAccess, "argv indexed", argvSet, "argv assigned", invalidNDef, "invalid n definition"
|
||||||
|
|
||||||
Reference in New Issue
Block a user