Refined argv-out-of-bounds-1 to precise result set

2025-12-16 13:53:04 +01:00 · 2022-03-04 12:11:05 -08:00
parent bc6b32c4c8
commit 3660be0eeb
1 changed files with 53 additions and 41 deletions
--- a/argv-out-of-bounds-1.ql
+++ b/argv-out-of-bounds-1.ql
@@ -6,23 +6,23 @@

 import cpp
 import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis
+import semmle.code.cpp.controlflow.SSA

-/*
- * Use
- * https://codeql.github.com/docs/codeql-language-guides/codeql-library-for-cpp/
- * to find the codeql class matching c++ syntax
- *
- * See https://codeql.github.com/docs/codeql-language-guides/using-range-analsis-in-cpp/
- * for the entry points to the range analysis library.  Namely,
- * `import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis`
- * and
- * `lowerBound(expr)`
- */
+// 
+//    Use https://codeql.github.com/docs/codeql-language-guides/codeql-library-for-cpp/
+//    to find the codeql class matching c++ syntax
+// 
+//    See https://codeql.github.com/docs/codeql-language-guides/using-range-analsis-in-cpp/
+//    for the entry points to the range analysis library.  Namely,
+//    `import semmle.code.cpp.rangeanalysis.SimpleRangeAnalysis`
+//    and
+//    `lowerBound(expr)`
+// 

 from
  Parameter argc, RelationalOperation cmp, Variable n, Parameter argv, ArrayExpr argvAccess,
  AssignExpr argvSet, SsaDefinition invalidN, SsaDefinition invalidNDef
-where
+ where
  //
  //     consider argc and n when they occur in the same comparison
  //
@@ -54,56 +54,68 @@ where
  ) and
  // // ----------------------
  // //
-  // // The first thing to try is the range library via these predicates:
+  // //     The first thing to try is the range library via these predicates:
  // select argc, "definition of argc", cmp, "use in comparison", n, "other var in comparison",
  //   argvAccess, "argv indexed", argvSet, "argv assigned",
  //   , lowerBound(cmp.getLeftOperand()), "left lower bound"
  //   , lowerBound(cmp.getRightOperand()), "right lower bound"
  // // ----------------------
  // //
-  // // Both bounds are 0, which is somewhat surprising.  The reason is that
-  // // results include possible increments from loop; with overflow and cast,
-  // // this makes 0 the minimum.
-  // // These are correct bounds for the types, but too general -- they include the possible results
-  // // of iteration.  We are only interested in initial bounds that are statically determinate,
-  // // those before any iteration happens.
-  // // Just to check:
+  // //     Both bounds are 0, which is somewhat surprising.  The reason is that
+  // //     results include possible increments from loop; with overflow and cast,
+  // //     this makes 0 the minimum.
+  // //     These are correct bounds for the types, but too general -- they include the possible results
+  // //     of iteration.  We are only interested in initial bounds that are statically determinate,
+  // //     those before any iteration happens.
+  // //     Just to check:
  // select argc, "definition of argc", cmp, "use in comparison", n, "other var in comparison",
  //   argvAccess, "argv indexed", argvSet, "argv assigned",
  //   lowerBound(cmp.getLeftOperand().getFullyConverted()),  "left lower bound",
  //   lowerBound(cmp.getRightOperand().getFullyConverted()), "right lower bound"
  // ----------------------
  //
-  // Step back from the general range library and note a few details about this
-  // problem:
-  // - The range of argc is set by the OS, so it's known at compile time
-  // - The first value of n is set in the code, it's also statically determined
-  // - We are concerned about the lowest possible value of the argv index, no other
+  //      Step back from the general range library and note a few details about this
+  //      problem:
+  //      - The range of argc is set by the OS, so it's known at compile time
+  //      - The first value of n is set in the code, it's also statically determined
+  //      - We are concerned about the lowest possible value of the argv index, no other
  //
-  // We can rephrase the problem:
+  //      We can rephrase the problem:
  //
-  // Find an execution path (if any), using statically known values, that reaches
-  // an argv assignment with invalid index.
+  //      Find an execution path (if any), using statically known values, that reaches
+  //      an argv assignment with invalid index.
  //
-  // To track only values of the argv index that are too low, we need to stay on
-  // certain branches of the CFG, namely those matching a SSA defition of the
-  // index variable.
+  //      To track only values of the argv index that are too low, we need to stay on
+  //      certain branches of the CFG, namely those matching a SSA defition of the
+  //      index variable.
  //
-  // A first attempt, starting from `cmp` is the following.  This finds two
-  // invalidNDef locations, n++ and n = 1, and several `argvAccess`s
-  cmp.getLesserOperand() = invalidN.getAUse(n) and 
+  //      A first attempt, starting from `cmp` is the following.  This finds two
+  //      invalidNDef locations, n++ and n = 1, and several `argvAccess`s
+  cmp.getLesserOperand() = invalidN.getAUse(n) and
  cmp.getGreaterOperand() = argc.getAnAccess() and
  invalidN.getAnUltimateDefiningValue(n).getValue().toInt() > 0 and
-  invalidNDef = invalidN.getAnUltimateSsaDefinition(n)
+  invalidNDef = invalidN.getAnUltimateSsaDefinition(n) and
  //
-  // We still find an access inside the loop,
-  //    opt_user = g_strdup (argv[n]);
-  // so let's narrow:
-  and 
+  //     We still find an access inside the loop,
+  //        opt_user = g_strdup (argv[n]);
+  //     so let's narrow:
  invalidN.getAUse(n) = argvAccess.getArrayOffset() and
  argvSet = cmp.getAFalseSuccessor().getASuccessor*() and
-  argvAccess = cmp.getAFalseSuccessor().getASuccessor*() 
-
+  argvAccess = cmp.getAFalseSuccessor().getASuccessor*() and
+  // 
+  //    Reduce to a single invalidNDef location, the n = 1 case.
+  // // 
+  // //     Try one; still get `n = 1` and `n++`
+  // and  exists( Expr t| t =  invalidNDef.getDefiningValue(n))
+  //
+  //    Try two, require an explicit value:
+  exists(string t | t = invalidNDef.getDefiningValue(n).getValue())
+  // 
+  //    This works!  Some clean up/simplification, to be used in the next iteration.
+  and
+  invalidNDef = invalidN.getAnUltimateSsaDefinition(n) and 
+  invalidNDef.getDefiningValue(n).getValue().toInt() > 0
+  
 select argc, "definition of argc", cmp, "use in comparison", n, "other var in comparison",
  argvAccess, "argv indexed", argvSet, "argv assigned", invalidNDef, "invalid n definition"