From a31bf75169ce47400d71d86fa98a7ebd53853567 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 7 Oct 2021 20:28:30 +0200 Subject: [PATCH 01/12] Python: Refactor `pickle.loads()` modeling --- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 539f0dcabb0..0c4c49aa504 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -447,23 +447,15 @@ private module StdlibPrivate { // --------------------------------------------------------------------------- // pickle // --------------------------------------------------------------------------- - /** Gets a reference to the `pickle` module. */ - DataFlow::Node pickle() { result = API::moduleImport(["pickle", "cPickle", "_pickle"]).getAUse() } - - /** Provides models for the `pickle` module. */ - module pickle { - /** Gets a reference to the `pickle.loads` function. */ - DataFlow::Node loads() { - result = API::moduleImport(["pickle", "cPickle", "_pickle"]).getMember("loads").getAUse() - } - } + /** Gets a reference to any of the `pickle` modules. */ + API::Node pickle() { result = API::moduleImport(["pickle", "cPickle", "_pickle"]) } /** * A call to `pickle.loads` * See https://docs.python.org/3/library/pickle.html#pickle.loads */ private class PickleLoadsCall extends Decoding::Range, DataFlow::CallCfgNode { - PickleLoadsCall() { this.getFunction() = pickle::loads() } + PickleLoadsCall() { this = pickle().getMember("loads").getACall() } override predicate mayExecuteInput() { any() } From 3592b09d56285770563693cc6071158139c482fd Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 7 Oct 2021 21:11:51 +0200 Subject: [PATCH 02/12] Python: Expand stdlib decoding tests The part about claiming there is decoding of the input to `shelve.open` is sort of an odd one, since it's not the filename, but the contents of the file that is decoded. However, trying to only handle this problem through path injection is not enough -- if a user is able to upload and access files through `shelve.open` in a path injection safe manner, that still leads to code execution. So right now the best way we have of modeling this is to treat the filename argument as being deserialized... --- .../library-tests/frameworks/stdlib/Decoding.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/python/ql/test/library-tests/frameworks/stdlib/Decoding.py b/python/ql/test/library-tests/frameworks/stdlib/Decoding.py index 5d157a61f6e..7455ff85856 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/Decoding.py +++ b/python/ql/test/library-tests/frameworks/stdlib/Decoding.py @@ -1,10 +1,23 @@ import pickle import marshal +import shelve import base64 +pickle.load(file_) # $ MISSING: decodeInput=file_ decodeOutput=pickle.load(..) decodeFormat=pickle decodeMayExecuteInput +pickle.load(file=file_) # $ MISSING: decodeInput=file_ decodeOutput=pickle.load(..) decodeFormat=pickle decodeMayExecuteInput pickle.loads(payload) # $ decodeInput=payload decodeOutput=pickle.loads(..) decodeFormat=pickle decodeMayExecuteInput +# using this keyword argument is disallowed from Python 3.9 +pickle.loads(data=payload) # $ decodeOutput=pickle.loads(..) decodeFormat=pickle decodeMayExecuteInput MISSING: decodeInput=payload + +marshal.load(file_) # $ MISSING: decodeInput=file_ decodeOutput=marshal.load(..) decodeFormat=marshal decodeMayExecuteInput marshal.loads(payload) # $ decodeInput=payload decodeOutput=marshal.loads(..) decodeFormat=marshal decodeMayExecuteInput + +# if the file opened has been controlled by an attacker, this can lead to code +# execution. (underlying file format is pickle) +shelve.open(filepath) # $ MISSING: decodeInput=filepath decodeOutput=shelve.open(..) decodeFormat=pickle decodeMayExecuteInput getAPathArgument=filepath +shelve.open(filename=filepath) # $ MISSING: decodeInput=filepath decodeOutput=shelve.open(..) decodeFormat=pickle decodeMayExecuteInput getAPathArgument=filepath + # TODO: These tests should be merged with python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py base64.b64decode(payload) # $ decodeInput=payload decodeOutput=base64.b64decode(..) decodeFormat=Base64 base64.standard_b64decode(payload) # $ decodeInput=payload decodeOutput=base64.standard_b64decode(..) decodeFormat=Base64 From 27c368a444bcfaeea023f7ddf7b1917d5223496b Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 7 Oct 2021 21:24:12 +0200 Subject: [PATCH 03/12] Python: Model keyword arguments to `pickle.loads` --- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 2 +- python/ql/test/library-tests/frameworks/stdlib/Decoding.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 0c4c49aa504..b0807a52c99 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -459,7 +459,7 @@ private module StdlibPrivate { override predicate mayExecuteInput() { any() } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } override DataFlow::Node getOutput() { result = this } diff --git a/python/ql/test/library-tests/frameworks/stdlib/Decoding.py b/python/ql/test/library-tests/frameworks/stdlib/Decoding.py index 7455ff85856..f5f1f035e0d 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/Decoding.py +++ b/python/ql/test/library-tests/frameworks/stdlib/Decoding.py @@ -7,7 +7,7 @@ pickle.load(file_) # $ MISSING: decodeInput=file_ decodeOutput=pickle.load(..) pickle.load(file=file_) # $ MISSING: decodeInput=file_ decodeOutput=pickle.load(..) decodeFormat=pickle decodeMayExecuteInput pickle.loads(payload) # $ decodeInput=payload decodeOutput=pickle.loads(..) decodeFormat=pickle decodeMayExecuteInput # using this keyword argument is disallowed from Python 3.9 -pickle.loads(data=payload) # $ decodeOutput=pickle.loads(..) decodeFormat=pickle decodeMayExecuteInput MISSING: decodeInput=payload +pickle.loads(data=payload) # $ decodeInput=payload decodeOutput=pickle.loads(..) decodeFormat=pickle decodeMayExecuteInput marshal.load(file_) # $ MISSING: decodeInput=file_ decodeOutput=marshal.load(..) decodeFormat=marshal decodeMayExecuteInput marshal.loads(payload) # $ decodeInput=payload decodeOutput=marshal.loads(..) decodeFormat=marshal decodeMayExecuteInput From 1b61296ea59f829ab6f7d376c84fcfc9c17eb254 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 7 Oct 2021 21:25:48 +0200 Subject: [PATCH 04/12] Python: Model `pickle.load` --- .../ql/lib/semmle/python/frameworks/Stdlib.qll | 16 ++++++++++++++++ .../library-tests/frameworks/stdlib/Decoding.py | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index b0807a52c99..0f74eea8618 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -450,6 +450,22 @@ private module StdlibPrivate { /** Gets a reference to any of the `pickle` modules. */ API::Node pickle() { result = API::moduleImport(["pickle", "cPickle", "_pickle"]) } + /** + * A call to `pickle.load` + * See https://docs.python.org/3/library/pickle.html#pickle.load + */ + private class PickleLoadCall extends Decoding::Range, DataFlow::CallCfgNode { + PickleLoadCall() { this = pickle().getMember("load").getACall() } + + override predicate mayExecuteInput() { any() } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] } + + override DataFlow::Node getOutput() { result = this } + + override string getFormat() { result = "pickle" } + } + /** * A call to `pickle.loads` * See https://docs.python.org/3/library/pickle.html#pickle.loads diff --git a/python/ql/test/library-tests/frameworks/stdlib/Decoding.py b/python/ql/test/library-tests/frameworks/stdlib/Decoding.py index f5f1f035e0d..a57685f9b3c 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/Decoding.py +++ b/python/ql/test/library-tests/frameworks/stdlib/Decoding.py @@ -3,8 +3,8 @@ import marshal import shelve import base64 -pickle.load(file_) # $ MISSING: decodeInput=file_ decodeOutput=pickle.load(..) decodeFormat=pickle decodeMayExecuteInput -pickle.load(file=file_) # $ MISSING: decodeInput=file_ decodeOutput=pickle.load(..) decodeFormat=pickle decodeMayExecuteInput +pickle.load(file_) # $ decodeInput=file_ decodeOutput=pickle.load(..) decodeFormat=pickle decodeMayExecuteInput +pickle.load(file=file_) # $ decodeInput=file_ decodeOutput=pickle.load(..) decodeFormat=pickle decodeMayExecuteInput pickle.loads(payload) # $ decodeInput=payload decodeOutput=pickle.loads(..) decodeFormat=pickle decodeMayExecuteInput # using this keyword argument is disallowed from Python 3.9 pickle.loads(data=payload) # $ decodeInput=payload decodeOutput=pickle.loads(..) decodeFormat=pickle decodeMayExecuteInput From a81d359669468d17ed1a86bebab5b03a221bf439 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 7 Oct 2021 21:27:35 +0200 Subject: [PATCH 05/12] Python: Model `marshal.load` --- .../ql/lib/semmle/python/frameworks/Stdlib.qll | 16 ++++++++++++++++ .../library-tests/frameworks/stdlib/Decoding.py | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 0f74eea8618..c045e8fdb7f 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -428,6 +428,22 @@ private module StdlibPrivate { // --------------------------------------------------------------------------- // marshal // --------------------------------------------------------------------------- + /** + * A call to `marshal.load` + * See https://docs.python.org/3/library/marshal.html#marshal.load + */ + private class MarshalLoadCall extends Decoding::Range, DataFlow::CallCfgNode { + MarshalLoadCall() { this = API::moduleImport("marshal").getMember("load").getACall() } + + override predicate mayExecuteInput() { any() } + + override DataFlow::Node getAnInput() { result = this.getArg(0) } + + override DataFlow::Node getOutput() { result = this } + + override string getFormat() { result = "marshal" } + } + /** * A call to `marshal.loads` * See https://docs.python.org/3/library/marshal.html#marshal.loads diff --git a/python/ql/test/library-tests/frameworks/stdlib/Decoding.py b/python/ql/test/library-tests/frameworks/stdlib/Decoding.py index a57685f9b3c..e468bf51c21 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/Decoding.py +++ b/python/ql/test/library-tests/frameworks/stdlib/Decoding.py @@ -9,7 +9,7 @@ pickle.loads(payload) # $ decodeInput=payload decodeOutput=pickle.loads(..) dec # using this keyword argument is disallowed from Python 3.9 pickle.loads(data=payload) # $ decodeInput=payload decodeOutput=pickle.loads(..) decodeFormat=pickle decodeMayExecuteInput -marshal.load(file_) # $ MISSING: decodeInput=file_ decodeOutput=marshal.load(..) decodeFormat=marshal decodeMayExecuteInput +marshal.load(file_) # $ decodeInput=file_ decodeOutput=marshal.load(..) decodeFormat=marshal decodeMayExecuteInput marshal.loads(payload) # $ decodeInput=payload decodeOutput=marshal.loads(..) decodeFormat=marshal decodeMayExecuteInput From 42980a1ab4646ff8d9e380c83f0270a2c5a3f148 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 8 Oct 2021 09:07:05 +0200 Subject: [PATCH 06/12] Python: Model `shelve.open` --- .../lib/semmle/python/frameworks/Stdlib.qll | 36 +++++++++++++++++++ .../frameworks/stdlib/Decoding.py | 4 +-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index c045e8fdb7f..6e04e06aefb 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -498,6 +498,42 @@ private module StdlibPrivate { override string getFormat() { result = "pickle" } } + // --------------------------------------------------------------------------- + // shelve + // --------------------------------------------------------------------------- + /** + * A call to `shelve.open` + * See https://docs.python.org/3/library/shelve.html#shelve.open + * + * Claiming there is decoding of the input to `shelve.open` is a bit questionable, since + * it's not the filename, but the contents of the file that is decoded. + * + * However, we definitely want to be able to alert if a user is able to control what + * file is used, since that can lead to code execution (even if that file is free of + * path injection). + * + * So right now the best way we have of modeling this seems to be to treat the filename + * argument as being deserialized... + */ + private class ShelveOpenCall extends Decoding::Range, FileSystemAccess::Range, + DataFlow::CallCfgNode { + ShelveOpenCall() { this = API::moduleImport("shelve").getMember("open").getACall() } + + override predicate mayExecuteInput() { any() } + + override DataFlow::Node getAnInput() { + result in [this.getArg(0), this.getArgByName("filename")] + } + + override DataFlow::Node getAPathArgument() { + result in [this.getArg(0), this.getArgByName("filename")] + } + + override DataFlow::Node getOutput() { result = this } + + override string getFormat() { result = "pickle" } + } + // --------------------------------------------------------------------------- // popen2 // --------------------------------------------------------------------------- diff --git a/python/ql/test/library-tests/frameworks/stdlib/Decoding.py b/python/ql/test/library-tests/frameworks/stdlib/Decoding.py index e468bf51c21..0044de07add 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/Decoding.py +++ b/python/ql/test/library-tests/frameworks/stdlib/Decoding.py @@ -15,8 +15,8 @@ marshal.loads(payload) # $ decodeInput=payload decodeOutput=marshal.loads(..) d # if the file opened has been controlled by an attacker, this can lead to code # execution. (underlying file format is pickle) -shelve.open(filepath) # $ MISSING: decodeInput=filepath decodeOutput=shelve.open(..) decodeFormat=pickle decodeMayExecuteInput getAPathArgument=filepath -shelve.open(filename=filepath) # $ MISSING: decodeInput=filepath decodeOutput=shelve.open(..) decodeFormat=pickle decodeMayExecuteInput getAPathArgument=filepath +shelve.open(filepath) # $ decodeInput=filepath decodeOutput=shelve.open(..) decodeFormat=pickle decodeMayExecuteInput getAPathArgument=filepath +shelve.open(filename=filepath) # $ decodeInput=filepath decodeOutput=shelve.open(..) decodeFormat=pickle decodeMayExecuteInput getAPathArgument=filepath # TODO: These tests should be merged with python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py base64.b64decode(payload) # $ decodeInput=payload decodeOutput=base64.b64decode(..) decodeFormat=Base64 From f9333fc5513b87f56703064ff4e0d24732abfeea Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 8 Oct 2021 09:32:34 +0200 Subject: [PATCH 07/12] Python: Expand `dill` tests --- python/ql/test/library-tests/frameworks/dill/Decoding.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/ql/test/library-tests/frameworks/dill/Decoding.py b/python/ql/test/library-tests/frameworks/dill/Decoding.py index 49eb551af04..3a8d4dc8da3 100644 --- a/python/ql/test/library-tests/frameworks/dill/Decoding.py +++ b/python/ql/test/library-tests/frameworks/dill/Decoding.py @@ -1,3 +1,6 @@ import dill -dill.loads(payload) # $decodeInput=payload decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput +dill.load(file_) # $ MISSING: decodeInput=file_ decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput +dill.load(file=file_) # $ MISSING: decodeInput=file_ decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput +dill.loads(payload) # $ decodeInput=payload decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput +dill.loads(str=payload) # $ decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput MISSING: decodeInput=payload From 9180257afec24710fa2f177f20536a6d3414678c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 8 Oct 2021 09:37:39 +0200 Subject: [PATCH 08/12] Python: Refactor `Dill.qll` So it matches the layout of all our other qll modules modeling a PyPI package. --- .../ql/lib/semmle/python/frameworks/Dill.qll | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Dill.qll b/python/ql/lib/semmle/python/frameworks/Dill.qll index 94af905756b..2ee66046e36 100644 --- a/python/ql/lib/semmle/python/frameworks/Dill.qll +++ b/python/ql/lib/semmle/python/frameworks/Dill.qll @@ -1,5 +1,5 @@ /** - * Provides classes modeling security-relevant aspects of the 'dill' package. + * Provides classes modeling security-relevant aspects of the `dill` PyPI package. * See https://pypi.org/project/dill/. */ @@ -10,18 +10,24 @@ private import semmle.python.Concepts private import semmle.python.ApiGraphs /** - * A call to `dill.loads` - * See https://pypi.org/project/dill/ (which currently refers you - * to https://docs.python.org/3/library/pickle.html#pickle.loads) + * Provides models for the `dill` PyPI package. + * See https://pypi.org/project/dill/. */ -private class DillLoadsCall extends Decoding::Range, DataFlow::CallCfgNode { - DillLoadsCall() { this = API::moduleImport("dill").getMember("loads").getACall() } +private module Dill { + /** + * A call to `dill.loads` + * See https://pypi.org/project/dill/ (which currently refers you + * to https://docs.python.org/3/library/pickle.html#pickle.loads) + */ + private class DillLoadsCall extends Decoding::Range, DataFlow::CallCfgNode { + DillLoadsCall() { this = API::moduleImport("dill").getMember("loads").getACall() } - override predicate mayExecuteInput() { any() } + override predicate mayExecuteInput() { any() } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { result = this.getArg(0) } - override DataFlow::Node getOutput() { result = this } + override DataFlow::Node getOutput() { result = this } - override string getFormat() { result = "dill" } + override string getFormat() { result = "dill" } + } } From 4820be3b10729ab87b6cbe20d433ed4aafb6d1c4 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 8 Oct 2021 09:38:34 +0200 Subject: [PATCH 09/12] Python: Model keyword arguments to `dill.loads` --- python/ql/lib/semmle/python/frameworks/Dill.qll | 2 +- python/ql/test/library-tests/frameworks/dill/Decoding.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Dill.qll b/python/ql/lib/semmle/python/frameworks/Dill.qll index 2ee66046e36..1b07da7aac9 100644 --- a/python/ql/lib/semmle/python/frameworks/Dill.qll +++ b/python/ql/lib/semmle/python/frameworks/Dill.qll @@ -24,7 +24,7 @@ private module Dill { override predicate mayExecuteInput() { any() } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("str")] } override DataFlow::Node getOutput() { result = this } diff --git a/python/ql/test/library-tests/frameworks/dill/Decoding.py b/python/ql/test/library-tests/frameworks/dill/Decoding.py index 3a8d4dc8da3..87ad1fa6e33 100644 --- a/python/ql/test/library-tests/frameworks/dill/Decoding.py +++ b/python/ql/test/library-tests/frameworks/dill/Decoding.py @@ -3,4 +3,4 @@ import dill dill.load(file_) # $ MISSING: decodeInput=file_ decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput dill.load(file=file_) # $ MISSING: decodeInput=file_ decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput dill.loads(payload) # $ decodeInput=payload decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput -dill.loads(str=payload) # $ decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput MISSING: decodeInput=payload +dill.loads(str=payload) # $ decodeInput=payload decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput From 75b06d8a256bb0427982ebeb1913920d05363487 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 8 Oct 2021 09:40:21 +0200 Subject: [PATCH 10/12] Python: Model `dill.load` --- python/ql/lib/semmle/python/frameworks/Dill.qll | 17 +++++++++++++++++ .../library-tests/frameworks/dill/Decoding.py | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Dill.qll b/python/ql/lib/semmle/python/frameworks/Dill.qll index 1b07da7aac9..168542272f9 100644 --- a/python/ql/lib/semmle/python/frameworks/Dill.qll +++ b/python/ql/lib/semmle/python/frameworks/Dill.qll @@ -14,6 +14,23 @@ private import semmle.python.ApiGraphs * See https://pypi.org/project/dill/. */ private module Dill { + /** + * A call to `dill.load` + * See https://pypi.org/project/dill/ (which currently refers you + * to https://docs.python.org/3/library/pickle.html#pickle.load) + */ + private class DillLoadCall extends Decoding::Range, DataFlow::CallCfgNode { + DillLoadCall() { this = API::moduleImport("dill").getMember("load").getACall() } + + override predicate mayExecuteInput() { any() } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] } + + override DataFlow::Node getOutput() { result = this } + + override string getFormat() { result = "dill" } + } + /** * A call to `dill.loads` * See https://pypi.org/project/dill/ (which currently refers you diff --git a/python/ql/test/library-tests/frameworks/dill/Decoding.py b/python/ql/test/library-tests/frameworks/dill/Decoding.py index 87ad1fa6e33..bcbd6ff4b35 100644 --- a/python/ql/test/library-tests/frameworks/dill/Decoding.py +++ b/python/ql/test/library-tests/frameworks/dill/Decoding.py @@ -1,6 +1,6 @@ import dill -dill.load(file_) # $ MISSING: decodeInput=file_ decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput -dill.load(file=file_) # $ MISSING: decodeInput=file_ decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput +dill.load(file_) # $ decodeInput=file_ decodeOutput=dill.load(..) decodeFormat=dill decodeMayExecuteInput +dill.load(file=file_) # $ decodeInput=file_ decodeOutput=dill.load(..) decodeFormat=dill decodeMayExecuteInput dill.loads(payload) # $ decodeInput=payload decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput dill.loads(str=payload) # $ decodeInput=payload decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput From 5e6f042f6eadf1c7d523d966c57aabdc9d740a1c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 8 Oct 2021 11:51:43 +0200 Subject: [PATCH 11/12] Python: Model `pickle.Unpickler` --- .../ql/lib/semmle/python/frameworks/Stdlib.qll | 16 ++++++++++++++++ .../library-tests/frameworks/stdlib/Decoding.py | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 6e04e06aefb..f89f74f3c34 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -498,6 +498,22 @@ private module StdlibPrivate { override string getFormat() { result = "pickle" } } + /** + * A construction of a `pickle.Unpickler` + * See https://docs.python.org/3/library/pickle.html#pickle.Unpickler + */ + private class PickleUnpicklerCall extends Decoding::Range, DataFlow::CallCfgNode { + PickleUnpicklerCall() { this = pickle().getMember("Unpickler").getACall() } + + override predicate mayExecuteInput() { any() } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] } + + override DataFlow::Node getOutput() { result = this.getAMethodCall("load") } + + override string getFormat() { result = "pickle" } + } + // --------------------------------------------------------------------------- // shelve // --------------------------------------------------------------------------- diff --git a/python/ql/test/library-tests/frameworks/stdlib/Decoding.py b/python/ql/test/library-tests/frameworks/stdlib/Decoding.py index 0044de07add..e9e811ffc97 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/Decoding.py +++ b/python/ql/test/library-tests/frameworks/stdlib/Decoding.py @@ -9,6 +9,14 @@ pickle.loads(payload) # $ decodeInput=payload decodeOutput=pickle.loads(..) dec # using this keyword argument is disallowed from Python 3.9 pickle.loads(data=payload) # $ decodeInput=payload decodeOutput=pickle.loads(..) decodeFormat=pickle decodeMayExecuteInput +# We don't really have a good way to model a decode happening over multiple statements +# like this. Since the important bit for `py/unsafe-deserialization` is the input, that +# is the main focus. We do a best effort to model the output though (but that will only +# work in local scope). +unpickler = pickle.Unpickler(file_) # $ decodeInput=file_ decodeFormat=pickle decodeMayExecuteInput +unpickler.load() # $ decodeOutput=unpickler.load() +unpickler = pickle.Unpickler(file=file_) # $ decodeInput=file_ decodeFormat=pickle decodeMayExecuteInput + marshal.load(file_) # $ decodeInput=file_ decodeOutput=marshal.load(..) decodeFormat=marshal decodeMayExecuteInput marshal.loads(payload) # $ decodeInput=payload decodeOutput=marshal.loads(..) decodeFormat=marshal decodeMayExecuteInput From fd0c386a4cacee21e106b4d4bca5f9ed57ef0cf8 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 8 Oct 2021 12:06:18 +0200 Subject: [PATCH 12/12] Python: Add change-note --- .../2021-10-08-improve-pickle-dill-shelve-modeling.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 python/change-notes/2021-10-08-improve-pickle-dill-shelve-modeling.md diff --git a/python/change-notes/2021-10-08-improve-pickle-dill-shelve-modeling.md b/python/change-notes/2021-10-08-improve-pickle-dill-shelve-modeling.md new file mode 100644 index 00000000000..48c868ee416 --- /dev/null +++ b/python/change-notes/2021-10-08-improve-pickle-dill-shelve-modeling.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* Improved modeling of decoding through pickle related functions (which can lead to code execution), resulting in additional sinks for the _Deserializing untrusted input_ query (`py/unsafe-deserialization`). Now we fully support `pickle.load`, `pickle.loads`, `pickle.Unpickler`, `marshal.load`, `marshal.loads`, `dill.load`, `dill.loads`, `shelve.open`.