Python: Model shelve.open

2026-05-01 19:55:15 +02:00 · 2021-10-08 09:07:05 +02:00
parent a81d359669
commit 42980a1ab4
2 changed files with 38 additions and 2 deletions
--- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll
+++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll
@@ -498,6 +498,42 @@ private module StdlibPrivate {
    override string getFormat() { result = "pickle" }
  }

+  // ---------------------------------------------------------------------------
+  // shelve
+  // ---------------------------------------------------------------------------
+  /**
+   * A call to `shelve.open`
+   * See https://docs.python.org/3/library/shelve.html#shelve.open
+   *
+   * Claiming there is decoding of the input to `shelve.open` is a bit questionable, since
+   * it's not the filename, but the contents of the file that is decoded.
+   *
+   * However, we definitely want to be able to alert if a user is able to control what
+   * file is used, since that can lead to code execution (even if that file is free of
+   * path injection).
+   *
+   * So right now the best way we have of modeling this seems to be to treat the filename
+   * argument as being deserialized...
+   */
+  private class ShelveOpenCall extends Decoding::Range, FileSystemAccess::Range,
+    DataFlow::CallCfgNode {
+    ShelveOpenCall() { this = API::moduleImport("shelve").getMember("open").getACall() }
+
+    override predicate mayExecuteInput() { any() }
+
+    override DataFlow::Node getAnInput() {
+      result in [this.getArg(0), this.getArgByName("filename")]
+    }
+
+    override DataFlow::Node getAPathArgument() {
+      result in [this.getArg(0), this.getArgByName("filename")]
+    }
+
+    override DataFlow::Node getOutput() { result = this }
+
+    override string getFormat() { result = "pickle" }
+  }
+
  // ---------------------------------------------------------------------------
  // popen2
  // ---------------------------------------------------------------------------
--- a/python/ql/test/library-tests/frameworks/stdlib/Decoding.py
+++ b/python/ql/test/library-tests/frameworks/stdlib/Decoding.py
@@ -15,8 +15,8 @@ marshal.loads(payload)  # $ decodeInput=payload decodeOutput=marshal.loads(..) d

 # if the file opened has been controlled by an attacker, this can lead to code
 # execution. (underlying file format is pickle)
-shelve.open(filepath)  # $ MISSING: decodeInput=filepath decodeOutput=shelve.open(..) decodeFormat=pickle decodeMayExecuteInput getAPathArgument=filepath
-shelve.open(filename=filepath)  # $ MISSING: decodeInput=filepath decodeOutput=shelve.open(..) decodeFormat=pickle decodeMayExecuteInput getAPathArgument=filepath
+shelve.open(filepath)  # $ decodeInput=filepath decodeOutput=shelve.open(..) decodeFormat=pickle decodeMayExecuteInput getAPathArgument=filepath
+shelve.open(filename=filepath)  # $ decodeInput=filepath decodeOutput=shelve.open(..) decodeFormat=pickle decodeMayExecuteInput getAPathArgument=filepath

 # TODO: These tests should be merged with python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py
 base64.b64decode(payload)  # $ decodeInput=payload decodeOutput=base64.b64decode(..) decodeFormat=Base64