mirror of
https://github.com/github/codeql.git
synced 2026-04-30 11:15:13 +02:00
Merge pull request #6837 from RasmusWL/more-unsafe-deserialization-sinks
Python: More unsafe deserialization sinks
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
lgtm,codescanning
|
||||
* Improved modeling of decoding through pickle related functions (which can lead to code execution), resulting in additional sinks for the _Deserializing untrusted input_ query (`py/unsafe-deserialization`). Now we fully support `pickle.load`, `pickle.loads`, `pickle.Unpickler`, `marshal.load`, `marshal.loads`, `dill.load`, `dill.loads`, `shelve.open`.
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the 'dill' package.
|
||||
* Provides classes modeling security-relevant aspects of the `dill` PyPI package.
|
||||
* See https://pypi.org/project/dill/.
|
||||
*/
|
||||
|
||||
@@ -10,18 +10,41 @@ private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* A call to `dill.loads`
|
||||
* See https://pypi.org/project/dill/ (which currently refers you
|
||||
* to https://docs.python.org/3/library/pickle.html#pickle.loads)
|
||||
* Provides models for the `dill` PyPI package.
|
||||
* See https://pypi.org/project/dill/.
|
||||
*/
|
||||
private class DillLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
DillLoadsCall() { this = API::moduleImport("dill").getMember("loads").getACall() }
|
||||
private module Dill {
|
||||
/**
|
||||
* A call to `dill.load`
|
||||
* See https://pypi.org/project/dill/ (which currently refers you
|
||||
* to https://docs.python.org/3/library/pickle.html#pickle.load)
|
||||
*/
|
||||
private class DillLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
DillLoadCall() { this = API::moduleImport("dill").getMember("load").getACall() }
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "dill" }
|
||||
override string getFormat() { result = "dill" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `dill.loads`
|
||||
* See https://pypi.org/project/dill/ (which currently refers you
|
||||
* to https://docs.python.org/3/library/pickle.html#pickle.loads)
|
||||
*/
|
||||
private class DillLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
DillLoadsCall() { this = API::moduleImport("dill").getMember("loads").getACall() }
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("str")] }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "dill" }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -428,6 +428,22 @@ private module StdlibPrivate {
|
||||
// ---------------------------------------------------------------------------
|
||||
// marshal
|
||||
// ---------------------------------------------------------------------------
|
||||
/**
|
||||
* A call to `marshal.load`
|
||||
* See https://docs.python.org/3/library/marshal.html#marshal.load
|
||||
*/
|
||||
private class MarshalLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
MarshalLoadCall() { this = API::moduleImport("marshal").getMember("load").getACall() }
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "marshal" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to `marshal.loads`
|
||||
* See https://docs.python.org/3/library/marshal.html#marshal.loads
|
||||
@@ -447,15 +463,23 @@ private module StdlibPrivate {
|
||||
// ---------------------------------------------------------------------------
|
||||
// pickle
|
||||
// ---------------------------------------------------------------------------
|
||||
/** Gets a reference to the `pickle` module. */
|
||||
DataFlow::Node pickle() { result = API::moduleImport(["pickle", "cPickle", "_pickle"]).getAUse() }
|
||||
/** Gets a reference to any of the `pickle` modules. */
|
||||
API::Node pickle() { result = API::moduleImport(["pickle", "cPickle", "_pickle"]) }
|
||||
|
||||
/** Provides models for the `pickle` module. */
|
||||
module pickle {
|
||||
/** Gets a reference to the `pickle.loads` function. */
|
||||
DataFlow::Node loads() {
|
||||
result = API::moduleImport(["pickle", "cPickle", "_pickle"]).getMember("loads").getAUse()
|
||||
}
|
||||
/**
|
||||
* A call to `pickle.load`
|
||||
* See https://docs.python.org/3/library/pickle.html#pickle.load
|
||||
*/
|
||||
private class PickleLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
PickleLoadCall() { this = pickle().getMember("load").getACall() }
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "pickle" }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -463,11 +487,63 @@ private module StdlibPrivate {
|
||||
* See https://docs.python.org/3/library/pickle.html#pickle.loads
|
||||
*/
|
||||
private class PickleLoadsCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
PickleLoadsCall() { this.getFunction() = pickle::loads() }
|
||||
PickleLoadsCall() { this = pickle().getMember("loads").getACall() }
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result = this.getArg(0) }
|
||||
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "pickle" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A construction of a `pickle.Unpickler`
|
||||
* See https://docs.python.org/3/library/pickle.html#pickle.Unpickler
|
||||
*/
|
||||
private class PickleUnpicklerCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
PickleUnpicklerCall() { this = pickle().getMember("Unpickler").getACall() }
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("file")] }
|
||||
|
||||
override DataFlow::Node getOutput() { result = this.getAMethodCall("load") }
|
||||
|
||||
override string getFormat() { result = "pickle" }
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// shelve
|
||||
// ---------------------------------------------------------------------------
|
||||
/**
|
||||
* A call to `shelve.open`
|
||||
* See https://docs.python.org/3/library/shelve.html#shelve.open
|
||||
*
|
||||
* Claiming there is decoding of the input to `shelve.open` is a bit questionable, since
|
||||
* it's not the filename, but the contents of the file that is decoded.
|
||||
*
|
||||
* However, we definitely want to be able to alert if a user is able to control what
|
||||
* file is used, since that can lead to code execution (even if that file is free of
|
||||
* path injection).
|
||||
*
|
||||
* So right now the best way we have of modeling this seems to be to treat the filename
|
||||
* argument as being deserialized...
|
||||
*/
|
||||
private class ShelveOpenCall extends Decoding::Range, FileSystemAccess::Range,
|
||||
DataFlow::CallCfgNode {
|
||||
ShelveOpenCall() { this = API::moduleImport("shelve").getMember("open").getACall() }
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result in [this.getArg(0), this.getArgByName("filename")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getAPathArgument() {
|
||||
result in [this.getArg(0), this.getArgByName("filename")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
import dill
|
||||
|
||||
dill.loads(payload) # $decodeInput=payload decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput
|
||||
dill.load(file_) # $ decodeInput=file_ decodeOutput=dill.load(..) decodeFormat=dill decodeMayExecuteInput
|
||||
dill.load(file=file_) # $ decodeInput=file_ decodeOutput=dill.load(..) decodeFormat=dill decodeMayExecuteInput
|
||||
dill.loads(payload) # $ decodeInput=payload decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput
|
||||
dill.loads(str=payload) # $ decodeInput=payload decodeOutput=dill.loads(..) decodeFormat=dill decodeMayExecuteInput
|
||||
|
||||
@@ -1,10 +1,31 @@
|
||||
import pickle
|
||||
import marshal
|
||||
import shelve
|
||||
import base64
|
||||
|
||||
pickle.load(file_) # $ decodeInput=file_ decodeOutput=pickle.load(..) decodeFormat=pickle decodeMayExecuteInput
|
||||
pickle.load(file=file_) # $ decodeInput=file_ decodeOutput=pickle.load(..) decodeFormat=pickle decodeMayExecuteInput
|
||||
pickle.loads(payload) # $ decodeInput=payload decodeOutput=pickle.loads(..) decodeFormat=pickle decodeMayExecuteInput
|
||||
# using this keyword argument is disallowed from Python 3.9
|
||||
pickle.loads(data=payload) # $ decodeInput=payload decodeOutput=pickle.loads(..) decodeFormat=pickle decodeMayExecuteInput
|
||||
|
||||
# We don't really have a good way to model a decode happening over multiple statements
|
||||
# like this. Since the important bit for `py/unsafe-deserialization` is the input, that
|
||||
# is the main focus. We do a best effort to model the output though (but that will only
|
||||
# work in local scope).
|
||||
unpickler = pickle.Unpickler(file_) # $ decodeInput=file_ decodeFormat=pickle decodeMayExecuteInput
|
||||
unpickler.load() # $ decodeOutput=unpickler.load()
|
||||
unpickler = pickle.Unpickler(file=file_) # $ decodeInput=file_ decodeFormat=pickle decodeMayExecuteInput
|
||||
|
||||
marshal.load(file_) # $ decodeInput=file_ decodeOutput=marshal.load(..) decodeFormat=marshal decodeMayExecuteInput
|
||||
marshal.loads(payload) # $ decodeInput=payload decodeOutput=marshal.loads(..) decodeFormat=marshal decodeMayExecuteInput
|
||||
|
||||
|
||||
# if the file opened has been controlled by an attacker, this can lead to code
|
||||
# execution. (underlying file format is pickle)
|
||||
shelve.open(filepath) # $ decodeInput=filepath decodeOutput=shelve.open(..) decodeFormat=pickle decodeMayExecuteInput getAPathArgument=filepath
|
||||
shelve.open(filename=filepath) # $ decodeInput=filepath decodeOutput=shelve.open(..) decodeFormat=pickle decodeMayExecuteInput getAPathArgument=filepath
|
||||
|
||||
# TODO: These tests should be merged with python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py
|
||||
base64.b64decode(payload) # $ decodeInput=payload decodeOutput=base64.b64decode(..) decodeFormat=Base64
|
||||
base64.standard_b64decode(payload) # $ decodeInput=payload decodeOutput=base64.standard_b64decode(..) decodeFormat=Base64
|
||||
|
||||
Reference in New Issue
Block a user