Python: Add unsafe deserialization sinks (CWE-502)

This commit is contained in:
Maiky
2023-07-20 03:26:22 +02:00
parent 9b0d7f3515
commit a1782182dd
15 changed files with 161 additions and 0 deletions

View File

@@ -0,0 +1,3 @@
category: minorAnalysis
---
* Improved modeling of decoding through pickle related functions (which can lead to code execution), resulting in additional sinks for the _Deserializing untrusted input_ query (`py/unsafe-deserialization`). Add support for `pandas.read_pickle`.

View File

@@ -62,3 +62,6 @@ private import semmle.python.frameworks.Urllib3
private import semmle.python.frameworks.Xmltodict
private import semmle.python.frameworks.Yaml
private import semmle.python.frameworks.Yarl
private import semmle.python.frameworks.Pandas
private import semmle.python.frameworks.Numpy
private import semmle.python.frameworks.Joblib

View File

@@ -0,0 +1,44 @@
/**
* Provides classes modeling security-relevant aspects of the `joblib` PyPI package.
* See https://pypi.org/project/joblib/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `joblib` PyPI package.
* See https://pypi.org/project/joblib/.
*/
private module Joblib {
/**
* A call to `joblib.load`
* See https://pypi.org/project/joblib/
*
* Claiming there is decoding of the input to `joblib.load` is a bit questionable, since
* it's not the filename, but the contents of the file that is decoded.
*
* However, we definitely want to be able to alert if a user is able to control what
* file is used, since that can lead to code execution (even if that file is free of
* path injection).
*
* So right now the best way we have of modeling this seems to be to treat the filename
* argument as being deserialized...
*/
private class PandasReadPickleCall extends Decoding::Range, DataFlow::CallCfgNode {
PandasReadPickleCall() { this = API::moduleImport("joblib").getMember("load").getACall() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("filename")]
}
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "joblib" }
}
}

View File

@@ -0,0 +1,47 @@
/**
* Provides classes modeling security-relevant aspects of the `numpy` PyPI package.
* See https://pypi.org/project/numpy/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `numpy` PyPI package.
* See https://pypi.org/project/numpy/.
*/
private module Numpy {
/**
* A call to `numpy.load`
* See https://pypi.org/project/numpy/
*
* Claiming there is decoding of the input to `numpy.load` is a bit questionable, since
* it's not the filename, but the contents of the file that is decoded.
*
* However, we definitely want to be able to alert if a user is able to control what
* file is used, since that can lead to code execution (even if that file is free of
* path injection).
*
* So right now the best way we have of modeling this seems to be to treat the filename
* argument as being deserialized...
*/
private class PandasReadPickleCall extends Decoding::Range, DataFlow::CallCfgNode {
PandasReadPickleCall() {
this = API::moduleImport("numpy").getMember("load").getACall() and
this.getArgByName("allow_pickle").asExpr() = any(True t)
}
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("filename")]
}
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "numpy" }
}
}

View File

@@ -0,0 +1,37 @@
/**
* Provides classes modeling security-relevant aspects of the `pandas` PyPI package.
* See https://pypi.org/project/pandas/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `pandas` PyPI package.
* See https://pypi.org/project/pandas/.
*/
private module Pandas {
/**
* A call to `pandas.read_pickle`
* See https://pypi.org/project/pandas/ (which currently refers you
* to https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_pickle.html)
*/
private class PandasReadPickleCall extends Decoding::Range, DataFlow::CallCfgNode {
PandasReadPickleCall() {
this = API::moduleImport("pandas").getMember("read_pickle").getACall()
}
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("filepath_or_buffer")]
}
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "pandas" }
}
}

View File

@@ -0,0 +1,2 @@
failures
testFailures

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,4 @@
import joblib
joblib.load(file_) # $ decodeInput=file_ decodeOutput=joblib.load(..) decodeFormat=joblib decodeMayExecuteInput
joblib.load(filename=file_) # $ decodeInput=file_ decodeOutput=joblib.load(..) decodeFormat=joblib decodeMayExecuteInput

View File

@@ -0,0 +1,2 @@
failures
testFailures

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,4 @@
import numpy
numpy.load(file_) # $ decodeInput=file_ decodeOutput=numpy.load(..) decodeFormat=numpy decodeMayExecuteInput
numpy.load(filename=file_) # $ decodeInput=file_ decodeOutput=numpy.load(..) decodeFormat=numpy decodeMayExecuteInput

View File

@@ -0,0 +1,2 @@
failures
testFailures

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,4 @@
import pandas
pandas.read_pickle(file_) # $ decodeInput=file_ decodeOutput=pandas.read_pickle(..) decodeFormat=pandas decodeMayExecuteInput
pandas.read_pickle(filepath_or_buffer=file_) # $ decodeInput=file_ decodeOutput=pandas.read_pickle(..) decodeFormat=pandas decodeMayExecuteInput

View File

@@ -19,3 +19,6 @@ def hello():
import dill
dill.loads(payload) # NOT OK
import pandas
pandas.read_pickle(payload) # NOT OK