mirror of
https://github.com/github/codeql.git
synced 2026-04-25 16:55:19 +02:00
Python: Add unsafe deserialization sinks (CWE-502)
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
category: minorAnalysis
|
||||
---
|
||||
* Improved modeling of decoding through pickle related functions (which can lead to code execution), resulting in additional sinks for the _Deserializing untrusted input_ query (`py/unsafe-deserialization`). Add support for `pandas.read_pickle`.
|
||||
@@ -62,3 +62,6 @@ private import semmle.python.frameworks.Urllib3
|
||||
private import semmle.python.frameworks.Xmltodict
|
||||
private import semmle.python.frameworks.Yaml
|
||||
private import semmle.python.frameworks.Yarl
|
||||
private import semmle.python.frameworks.Pandas
|
||||
private import semmle.python.frameworks.Numpy
|
||||
private import semmle.python.frameworks.Joblib
|
||||
|
||||
44
python/ql/lib/semmle/python/frameworks/Joblib.qll
Normal file
44
python/ql/lib/semmle/python/frameworks/Joblib.qll
Normal file
@@ -0,0 +1,44 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `joblib` PyPI package.
|
||||
* See https://pypi.org/project/joblib/.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `joblib` PyPI package.
|
||||
* See https://pypi.org/project/joblib/.
|
||||
*/
|
||||
private module Joblib {
|
||||
/**
|
||||
* A call to `joblib.load`
|
||||
* See https://pypi.org/project/joblib/
|
||||
*
|
||||
* Claiming there is decoding of the input to `joblib.load` is a bit questionable, since
|
||||
* it's not the filename, but the contents of the file that is decoded.
|
||||
*
|
||||
* However, we definitely want to be able to alert if a user is able to control what
|
||||
* file is used, since that can lead to code execution (even if that file is free of
|
||||
* path injection).
|
||||
*
|
||||
* So right now the best way we have of modeling this seems to be to treat the filename
|
||||
* argument as being deserialized...
|
||||
*/
|
||||
private class PandasReadPickleCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
PandasReadPickleCall() { this = API::moduleImport("joblib").getMember("load").getACall() }
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result in [this.getArg(0), this.getArgByName("filename")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "joblib" }
|
||||
}
|
||||
}
|
||||
47
python/ql/lib/semmle/python/frameworks/Numpy.qll
Normal file
47
python/ql/lib/semmle/python/frameworks/Numpy.qll
Normal file
@@ -0,0 +1,47 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `numpy` PyPI package.
|
||||
* See https://pypi.org/project/numpy/.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `numpy` PyPI package.
|
||||
* See https://pypi.org/project/numpy/.
|
||||
*/
|
||||
private module Numpy {
|
||||
/**
|
||||
* A call to `numpy.load`
|
||||
* See https://pypi.org/project/numpy/
|
||||
*
|
||||
* Claiming there is decoding of the input to `numpy.load` is a bit questionable, since
|
||||
* it's not the filename, but the contents of the file that is decoded.
|
||||
*
|
||||
* However, we definitely want to be able to alert if a user is able to control what
|
||||
* file is used, since that can lead to code execution (even if that file is free of
|
||||
* path injection).
|
||||
*
|
||||
* So right now the best way we have of modeling this seems to be to treat the filename
|
||||
* argument as being deserialized...
|
||||
*/
|
||||
private class PandasReadPickleCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
PandasReadPickleCall() {
|
||||
this = API::moduleImport("numpy").getMember("load").getACall() and
|
||||
this.getArgByName("allow_pickle").asExpr() = any(True t)
|
||||
}
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result in [this.getArg(0), this.getArgByName("filename")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "numpy" }
|
||||
}
|
||||
}
|
||||
37
python/ql/lib/semmle/python/frameworks/Pandas.qll
Normal file
37
python/ql/lib/semmle/python/frameworks/Pandas.qll
Normal file
@@ -0,0 +1,37 @@
|
||||
/**
|
||||
* Provides classes modeling security-relevant aspects of the `pandas` PyPI package.
|
||||
* See https://pypi.org/project/pandas/.
|
||||
*/
|
||||
|
||||
private import python
|
||||
private import semmle.python.dataflow.new.DataFlow
|
||||
private import semmle.python.dataflow.new.RemoteFlowSources
|
||||
private import semmle.python.Concepts
|
||||
private import semmle.python.ApiGraphs
|
||||
|
||||
/**
|
||||
* Provides models for the `pandas` PyPI package.
|
||||
* See https://pypi.org/project/pandas/.
|
||||
*/
|
||||
private module Pandas {
|
||||
/**
|
||||
* A call to `pandas.read_pickle`
|
||||
* See https://pypi.org/project/pandas/ (which currently refers you
|
||||
* to https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_pickle.html)
|
||||
*/
|
||||
private class PandasReadPickleCall extends Decoding::Range, DataFlow::CallCfgNode {
|
||||
PandasReadPickleCall() {
|
||||
this = API::moduleImport("pandas").getMember("read_pickle").getACall()
|
||||
}
|
||||
|
||||
override predicate mayExecuteInput() { any() }
|
||||
|
||||
override DataFlow::Node getAnInput() {
|
||||
result in [this.getArg(0), this.getArgByName("filepath_or_buffer")]
|
||||
}
|
||||
|
||||
override DataFlow::Node getOutput() { result = this }
|
||||
|
||||
override string getFormat() { result = "pandas" }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
failures
|
||||
testFailures
|
||||
@@ -0,0 +1,2 @@
|
||||
import python
|
||||
import experimental.meta.ConceptsTest
|
||||
@@ -0,0 +1,4 @@
|
||||
import joblib
|
||||
|
||||
joblib.load(file_) # $ decodeInput=file_ decodeOutput=joblib.load(..) decodeFormat=joblib decodeMayExecuteInput
|
||||
joblib.load(filename=file_) # $ decodeInput=file_ decodeOutput=joblib.load(..) decodeFormat=joblib decodeMayExecuteInput
|
||||
@@ -0,0 +1,2 @@
|
||||
failures
|
||||
testFailures
|
||||
@@ -0,0 +1,2 @@
|
||||
import python
|
||||
import experimental.meta.ConceptsTest
|
||||
@@ -0,0 +1,4 @@
|
||||
import numpy
|
||||
|
||||
numpy.load(file_) # $ decodeInput=file_ decodeOutput=numpy.load(..) decodeFormat=numpy decodeMayExecuteInput
|
||||
numpy.load(filename=file_) # $ decodeInput=file_ decodeOutput=numpy.load(..) decodeFormat=numpy decodeMayExecuteInput
|
||||
@@ -0,0 +1,2 @@
|
||||
failures
|
||||
testFailures
|
||||
@@ -0,0 +1,2 @@
|
||||
import python
|
||||
import experimental.meta.ConceptsTest
|
||||
@@ -0,0 +1,4 @@
|
||||
import pandas
|
||||
|
||||
pandas.read_pickle(file_) # $ decodeInput=file_ decodeOutput=pandas.read_pickle(..) decodeFormat=pandas decodeMayExecuteInput
|
||||
pandas.read_pickle(filepath_or_buffer=file_) # $ decodeInput=file_ decodeOutput=pandas.read_pickle(..) decodeFormat=pandas decodeMayExecuteInput
|
||||
@@ -19,3 +19,6 @@ def hello():
|
||||
|
||||
import dill
|
||||
dill.loads(payload) # NOT OK
|
||||
|
||||
import pandas
|
||||
pandas.read_pickle(payload) # NOT OK
|
||||
Reference in New Issue
Block a user