Merge pull request #13781 from maikypedia/maikypedia/python-unsafe-deserialization

Python: Add unsafe deserialization sinks (CWE-502)
This commit is contained in:
Rasmus Wriedt Larsen
2023-10-10 13:30:38 +02:00
committed by GitHub
16 changed files with 153 additions and 2 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Improved modeling of decoding through pickle related functions (which can lead to code execution), resulting in additional sinks for the _Deserializing untrusted input_ query (`py/unsafe-deserialization`). Added support for `pandas.read_pickle`, `numpy.load` and `joblib.load`.

View File

@@ -7,8 +7,8 @@
private import semmle.python.frameworks.Aioch
private import semmle.python.frameworks.Aiohttp
private import semmle.python.frameworks.Aiomysql
private import semmle.python.frameworks.Aiosqlite
private import semmle.python.frameworks.Aiopg
private import semmle.python.frameworks.Aiosqlite
private import semmle.python.frameworks.Asyncpg
private import semmle.python.frameworks.BSon
private import semmle.python.frameworks.CassandraDriver
@@ -28,6 +28,7 @@ private import semmle.python.frameworks.Httpx
private import semmle.python.frameworks.Idna
private import semmle.python.frameworks.Invoke
private import semmle.python.frameworks.Jmespath
private import semmle.python.frameworks.Joblib
private import semmle.python.frameworks.Ldap
private import semmle.python.frameworks.Ldap3
private import semmle.python.frameworks.Libtaxii
@@ -37,7 +38,9 @@ private import semmle.python.frameworks.MarkupSafe
private import semmle.python.frameworks.Multidict
private import semmle.python.frameworks.Mysql
private import semmle.python.frameworks.MySQLdb
private import semmle.python.frameworks.Numpy
private import semmle.python.frameworks.Oracledb
private import semmle.python.frameworks.Pandas
private import semmle.python.frameworks.Peewee
private import semmle.python.frameworks.Phoenixdb
private import semmle.python.frameworks.Psycopg2
@@ -52,11 +55,11 @@ private import semmle.python.frameworks.RestFramework
private import semmle.python.frameworks.Rsa
private import semmle.python.frameworks.RuamelYaml
private import semmle.python.frameworks.ServerLess
private import semmle.python.frameworks.Setuptools
private import semmle.python.frameworks.Simplejson
private import semmle.python.frameworks.SqlAlchemy
private import semmle.python.frameworks.Starlette
private import semmle.python.frameworks.Stdlib
private import semmle.python.frameworks.Setuptools
private import semmle.python.frameworks.Toml
private import semmle.python.frameworks.Tornado
private import semmle.python.frameworks.Twisted

View File

@@ -0,0 +1,33 @@
/**
* Provides classes modeling security-relevant aspects of the `joblib` PyPI package.
* See https://pypi.org/project/joblib/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `joblib` PyPI package.
* See https://pypi.org/project/joblib/.
*/
private module Joblib {
/**
* A call to `joblib.load`
* See https://pypi.org/project/joblib/
*/
private class JoblibLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
JoblibLoadCall() { this = API::moduleImport("joblib").getMember("load").getACall() }
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("filename")]
}
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "joblib" }
}
}

View File

@@ -0,0 +1,42 @@
/**
* Provides classes modeling security-relevant aspects of the `numpy` PyPI package.
* See https://pypi.org/project/numpy/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `numpy` PyPI package.
* See https://pypi.org/project/numpy/.
*/
private module Numpy {
/**
* A call to `numpy.load`
* See https://numpy.org/doc/stable/reference/generated/numpy.load.html
*/
private class NumpyLoadCall extends Decoding::Range, API::CallNode {
NumpyLoadCall() { this = API::moduleImport("numpy").getMember("load").getACall() }
override predicate mayExecuteInput() {
this.getParameter(2, "allow_pickle")
.getAValueReachingSink()
.asExpr()
.(ImmutableLiteral)
.booleanValue() = true
}
override DataFlow::Node getAnInput() { result = this.getParameter(0, "filename").asSink() }
override DataFlow::Node getOutput() { result = this }
override string getFormat() {
result = "numpy"
or
this.mayExecuteInput() and result = "pickle"
}
}
}

View File

@@ -0,0 +1,37 @@
/**
* Provides classes modeling security-relevant aspects of the `pandas` PyPI package.
* See https://pypi.org/project/pandas/.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs
/**
* Provides models for the `pandas` PyPI package.
* See https://pypi.org/project/pandas/.
*/
private module Pandas {
/**
* A call to `pandas.read_pickle`
* See https://pypi.org/project/pandas/
* See https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_pickle.html
*/
private class PandasReadPickleCall extends Decoding::Range, DataFlow::CallCfgNode {
PandasReadPickleCall() {
this = API::moduleImport("pandas").getMember("read_pickle").getACall()
}
override predicate mayExecuteInput() { any() }
override DataFlow::Node getAnInput() {
result in [this.getArg(0), this.getArgByName("filepath_or_buffer")]
}
override DataFlow::Node getOutput() { result = this }
override string getFormat() { result = "pickle" }
}
}

View File

@@ -0,0 +1,2 @@
failures
testFailures

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,4 @@
import joblib
joblib.load(file_) # $ decodeInput=file_ decodeOutput=joblib.load(..) decodeFormat=joblib decodeMayExecuteInput
joblib.load(filename=file_) # $ decodeInput=file_ decodeOutput=joblib.load(..) decodeFormat=joblib decodeMayExecuteInput

View File

@@ -0,0 +1,2 @@
failures
testFailures

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,6 @@
import numpy
numpy.load(file_) # $ decodeInput=file_ decodeOutput=numpy.load(..) decodeFormat=numpy
numpy.load(filename=file_) # $ decodeInput=file_ decodeOutput=numpy.load(..) decodeFormat=numpy
numpy.load(file_, allow_pickle=True) # $ decodeInput=file_ decodeOutput=numpy.load(..) decodeFormat=numpy decodeFormat=pickle decodeMayExecuteInput
numpy.load(file_, None, True) # $ decodeInput=file_ decodeOutput=numpy.load(..) decodeFormat=numpy decodeFormat=pickle decodeMayExecuteInput

View File

@@ -0,0 +1,2 @@
failures
testFailures

View File

@@ -0,0 +1,2 @@
import python
import experimental.meta.ConceptsTest

View File

@@ -0,0 +1,4 @@
import pandas
pandas.read_pickle(file_) # $ decodeInput=file_ decodeOutput=pandas.read_pickle(..) decodeFormat=pickle decodeMayExecuteInput
pandas.read_pickle(filepath_or_buffer=file_) # $ decodeInput=file_ decodeOutput=pandas.read_pickle(..) decodeFormat=pickle decodeMayExecuteInput

View File

@@ -5,6 +5,7 @@ edges
| unsafe_deserialization.py:14:5:14:11 | SSA variable payload | unsafe_deserialization.py:16:15:16:21 | ControlFlowNode for payload |
| unsafe_deserialization.py:14:5:14:11 | SSA variable payload | unsafe_deserialization.py:18:19:18:25 | ControlFlowNode for payload |
| unsafe_deserialization.py:14:5:14:11 | SSA variable payload | unsafe_deserialization.py:21:16:21:22 | ControlFlowNode for payload |
| unsafe_deserialization.py:14:5:14:11 | SSA variable payload | unsafe_deserialization.py:24:24:24:30 | ControlFlowNode for payload |
| unsafe_deserialization.py:14:15:14:21 | ControlFlowNode for request | unsafe_deserialization.py:14:15:14:26 | ControlFlowNode for Attribute |
| unsafe_deserialization.py:14:15:14:26 | ControlFlowNode for Attribute | unsafe_deserialization.py:14:15:14:41 | ControlFlowNode for Attribute() |
| unsafe_deserialization.py:14:15:14:41 | ControlFlowNode for Attribute() | unsafe_deserialization.py:14:5:14:11 | SSA variable payload |
@@ -19,9 +20,11 @@ nodes
| unsafe_deserialization.py:16:15:16:21 | ControlFlowNode for payload | semmle.label | ControlFlowNode for payload |
| unsafe_deserialization.py:18:19:18:25 | ControlFlowNode for payload | semmle.label | ControlFlowNode for payload |
| unsafe_deserialization.py:21:16:21:22 | ControlFlowNode for payload | semmle.label | ControlFlowNode for payload |
| unsafe_deserialization.py:24:24:24:30 | ControlFlowNode for payload | semmle.label | ControlFlowNode for payload |
subpaths
#select
| unsafe_deserialization.py:15:18:15:24 | ControlFlowNode for payload | unsafe_deserialization.py:8:26:8:32 | ControlFlowNode for ImportMember | unsafe_deserialization.py:15:18:15:24 | ControlFlowNode for payload | Unsafe deserialization depends on a $@. | unsafe_deserialization.py:8:26:8:32 | ControlFlowNode for ImportMember | user-provided value |
| unsafe_deserialization.py:16:15:16:21 | ControlFlowNode for payload | unsafe_deserialization.py:8:26:8:32 | ControlFlowNode for ImportMember | unsafe_deserialization.py:16:15:16:21 | ControlFlowNode for payload | Unsafe deserialization depends on a $@. | unsafe_deserialization.py:8:26:8:32 | ControlFlowNode for ImportMember | user-provided value |
| unsafe_deserialization.py:18:19:18:25 | ControlFlowNode for payload | unsafe_deserialization.py:8:26:8:32 | ControlFlowNode for ImportMember | unsafe_deserialization.py:18:19:18:25 | ControlFlowNode for payload | Unsafe deserialization depends on a $@. | unsafe_deserialization.py:8:26:8:32 | ControlFlowNode for ImportMember | user-provided value |
| unsafe_deserialization.py:21:16:21:22 | ControlFlowNode for payload | unsafe_deserialization.py:8:26:8:32 | ControlFlowNode for ImportMember | unsafe_deserialization.py:21:16:21:22 | ControlFlowNode for payload | Unsafe deserialization depends on a $@. | unsafe_deserialization.py:8:26:8:32 | ControlFlowNode for ImportMember | user-provided value |
| unsafe_deserialization.py:24:24:24:30 | ControlFlowNode for payload | unsafe_deserialization.py:8:26:8:32 | ControlFlowNode for ImportMember | unsafe_deserialization.py:24:24:24:30 | ControlFlowNode for payload | Unsafe deserialization depends on a $@. | unsafe_deserialization.py:8:26:8:32 | ControlFlowNode for ImportMember | user-provided value |

View File

@@ -19,3 +19,6 @@ def hello():
import dill
dill.loads(payload) # NOT OK
import pandas
pandas.read_pickle(payload) # NOT OK