fix some qldocs, change Sink extenstion model, deduct some not necessarily checks :)

This commit is contained in:
amammad
2023-12-07 13:45:28 +01:00
parent 4283bb7d48
commit 2d0067d618
4 changed files with 161 additions and 201 deletions

View File

@@ -18,6 +18,7 @@ import semmle.python.ApiGraphs
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.internal.DataFlowPublic
import experimental.semmle.python.security.DecompressionBomb
import FileAndFormRemoteFlowSource::FileAndFormRemoteFlowSource
/**
* `io.TextIOWrapper(ip, encoding='utf-8')` like following:
@@ -39,59 +40,12 @@ predicate isAdditionalTaintStepTextIOWrapper(DataFlow::Node nodeFrom, DataFlow::
)
}
module FileAndFormRemoteFlowSource {
class FastAPI extends RemoteFlowSource::Range {
FastAPI() {
exists(API::Node fastApiParam, Expr fastApiUploadFile |
fastApiParam =
API::moduleImport("fastapi")
.getMember("FastAPI")
.getReturn()
.getMember("post")
.getReturn()
.getParameter(0)
.getKeywordParameter(_) and
fastApiUploadFile =
API::moduleImport("fastapi")
.getMember("UploadFile")
.getASubclass*()
.getAValueReachableFromSource()
.asExpr()
|
fastApiUploadFile =
fastApiParam.asSource().asExpr().(Parameter).getAnnotation().getASubExpression*() and
// Multiple Uploaded files as list of fastapi.UploadFile
exists(For f, Attribute attr |
fastApiParam.getAValueReachableFromSource().asExpr() = f.getIter().getASubExpression*()
|
TaintTracking::localExprTaint(f.getIter(), attr.getObject()) and
attr.getName() = ["filename", "content_type", "headers", "file", "read"] and
this.asExpr() = attr
)
or
// one Uploaded file as fastapi.UploadFile
this =
[
fastApiParam.getMember(["filename", "content_type", "headers"]).asSource(),
fastApiParam
.getMember("file")
.getMember(["readlines", "readline", "read"])
.getReturn()
.asSource(), fastApiParam.getMember("read").getReturn().asSource()
]
)
}
override string getSourceType() { result = "fastapi HTTP FORM files" }
}
}
module BombsConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
(
source instanceof RemoteFlowSource
or
source instanceof FileAndFormRemoteFlowSource::FastAPI
source instanceof FastAPI
) and
not source.getLocation().getFile().inStdlib() and
not source.getLocation().getFile().getRelativePath().matches("%venv%")
@@ -113,11 +67,11 @@ module BombsConfig implements DataFlow::ConfigSig {
}
}
module Bombs = TaintTracking::Global<BombsConfig>;
module BombsFlow = TaintTracking::Global<BombsConfig>;
import Bombs::PathGraph
import BombsFlow::PathGraph
from Bombs::PathNode source, Bombs::PathNode sink
where Bombs::flowPath(source, sink)
from BombsFlow::PathNode source, BombsFlow::PathNode sink
where BombsFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "This uncontrolled file extraction is $@.", source.getNode(),
"depends on this user controlled data"

View File

@@ -0,0 +1,63 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.ApiGraphs
/**
* Provides user-controllable Remote sources for file(s) upload and Multipart-Form
*/
module FileAndFormRemoteFlowSource {
/**
* A
*/
class FastAPI extends DataFlow::Node {
FastAPI() {
exists(API::Node fastApiParam, Expr fastApiUploadFile |
fastApiParam =
API::moduleImport("fastapi")
.getMember("FastAPI")
.getReturn()
.getMember("post")
.getReturn()
.getParameter(0)
.getKeywordParameter(_) and
fastApiUploadFile =
API::moduleImport("fastapi")
.getMember("UploadFile")
.getASubclass*()
.getAValueReachableFromSource()
.asExpr()
|
// Multiple uploaded files as list of fastapi.UploadFile
// @app.post("/")
// def upload(files: List[UploadFile] = File(...)):
// for file in files:
fastApiUploadFile =
fastApiParam.asSource().asExpr().(Parameter).getAnnotation().getASubExpression*() and
exists(For f, Attribute attr |
fastApiParam.getAValueReachableFromSource().asExpr() = f.getIter().getASubExpression*()
|
TaintTracking::localExprTaint(f.getIter(), attr.getObject()) and
attr.getName() = ["filename", "content_type", "headers", "file", "read"] and
this.asExpr() = attr
)
or
// One uploaded file as fastapi.UploadFile
// @app.post("/zipbomb2")
// async def zipbomb2(file: UploadFile):
// print(file.filename)
this =
[
fastApiParam.getMember(["filename", "content_type", "headers"]).asSource(),
fastApiParam
.getMember("file")
.getMember(["readlines", "readline", "read"])
.getReturn()
.asSource(), fastApiParam.getMember("read").getReturn().asSource()
]
)
}
string getSourceType() { result = "fastapi HTTP FORM files" }
}
}

View File

@@ -2,20 +2,33 @@ import zipfile
def safeUnzip(zipFileName):
buffer_size = 2 ** 10 * 8
'''
safeUnzip reads each file inside the zipfile 1 MB by 1 MB
and during extraction or reading of these files it checks the total decompressed size
doesn't exceed the SIZE_THRESHOLD
'''
buffer_size = 1024 * 1024 * 1 # 1 MB
total_size = 0
SIZE_THRESHOLD = 2 ** 10 * 100
SIZE_THRESHOLD = 1024 * 1024 * 10 # 10 MB
with zipfile.ZipFile(zipFileName) as myzip:
for fileinfo in myzip.infolist():
content = b''
with myzip.open(fileinfo.filename, mode="r") as myfile:
content = b''
chunk = myfile.read(buffer_size)
total_size += buffer_size
if total_size > SIZE_THRESHOLD:
print("Bomb detected")
return False # it isn't a successful extract or read
content += chunk
# reading next bytes of uncompressed data
while chunk:
chunk = myfile.read(buffer_size)
total_size += buffer_size
if total_size > SIZE_THRESHOLD:
print("Bomb detected")
return
return False # it isn't a successful extract or read
content += chunk
# An example of extracting or reading each decompressed file here
print(bytes.decode(content, 'utf-8'))
return "No Bombs!"
return True # it is a successful extract or read

View File

@@ -6,13 +6,6 @@ import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.internal.DataFlowPublic
module DecompressionBomb {
/**
* The Sinks of uncontrolled data decompression, use this class in your queries
*/
class Sink extends DataFlow::Node {
Sink() { this = any(Range r).sink() }
}
/**
* The additional taint steps that need for creating taint tracking or dataflow.
*/
@@ -28,15 +21,7 @@ module DecompressionBomb {
/**
* A abstract class responsible for extending new decompression sinks
*/
abstract class Range extends API::Node {
/**
* Gets the sink of responsible for decompression node
*
* it can be a path, stream of compressed data,
* or a call to function that use pipe
*/
abstract DataFlow::Node sink();
}
abstract class Sink extends DataFlow::Node { }
}
module ZipFile {
@@ -56,68 +41,26 @@ module ZipFile {
}
/**
* The Decompression Sinks of `zipfile` library
*
* ```python
* myzip = zipfile.ZipFile("zipfileName.zip")
* myzip.open('eggs.txt',"r").read()
* myzip.extractall()
* ```
* A Call to `extractall`, `extract()`, or `extractfile()` on an open ZipFile.
*/
class DecompressionSink extends DecompressionBomb::Range {
override string toString() { result = "DecompressionSink" }
DecompressionSink() { this = zipFileClass() }
/**
* An function call of tarfile for extracting compressed data
*
* `tarfile.open(filepath).extractall()` or `tarfile.open(filepath).extract()`or `tarfile.open(filepath).extractfile()`
* or `tarfile.Tarfile.xzopen()` or `tarfile.Tarfile.gzopen()` or `tarfile.Tarfile.bz2open()`
*/
override DataFlow::Node sink() {
(
result = this.getReturn().getMember(["extractall", "read", "extract", "testzip"]).getACall()
or
exists(API::Node openInstance |
openInstance = this.getReturn().getMember("open") and
(
not exists(
openInstance
.getACall()
.getParameter(1, "mode")
.getAValueReachingSink()
.asExpr()
.(StrConst)
.getText()
) or
openInstance
.getACall()
.getParameter(1, "mode")
.getAValueReachingSink()
.asExpr()
.(StrConst)
.getText() = "r"
) and
(
not exists(
this.getACall()
.getParameter(1, "mode")
.getAValueReachingSink()
.asExpr()
.(StrConst)
.getText()
) or
this.getACall()
.getParameter(1, "mode")
.getAValueReachingSink()
.asExpr()
.(StrConst)
.getText() = "r"
) and
not zipFileDecompressionBombSanitizer(this) and
result = openInstance.getACall()
)
class DecompressionSink extends DecompressionBomb::Sink {
DecompressionSink() {
this =
zipFileClass()
.getReturn()
.getMember(["extractall", "read", "extract", "testzip"])
.getACall()
or
exists(API::Node zipOpen | zipOpen = zipFileClass().getReturn().getMember("open") |
// this open function must reads uncompressed data with buffer
// and checks the accumulated size at the end of each read to be called safe
not TaintTracking::localExprTaint(zipOpen
.getReturn()
.getMember("read")
.getParameter(0)
.asSink()
.asExpr(), any(Compare i).getASubExpression*()) and
this = zipOpen.getACall()
)
}
}
@@ -157,17 +100,16 @@ module ZipFile {
DecompressionAdditionalTaintStep() { this = "AdditionalTaintStep" }
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DecompressionSink zipFileInstance |
exists(API::Node zipFileInstance | zipFileInstance = zipFileClass() |
nodeFrom =
[zipFileInstance.getACall().getParameter(0, "file").asSink(), zipFileInstance.getACall()] and
nodeTo =
[
zipFileInstance.sink(),
zipFileInstance
.getACall()
.getReturn()
.getMember(["extractall", "read", "extract", "testzip"])
.getACall()
.getACall(), zipFileInstance.getReturn().getMember("open").getACall()
]
)
}
@@ -179,20 +121,17 @@ module ZipFile {
*/
module TarFile {
/**
* The Decompression Sinks of `tarfile` library
* A Call to `extractall`, `extract()`, or `extractfile()` on an open tarfile,
* or
* A Call to `gzopen`, `xzopen()`, or `bz2open()` on a tarfile.Tarfile.
*/
class DecompressionSink extends DecompressionBomb::Range {
override string toString() { result = "DecompressionSink" }
DecompressionSink() { this = tarfileExtractMember() }
/**
* An function call of tarfile for extracting compressed data
* `tarfile.open(filepath).extractall()` or `tarfile.open(filepath).extract()`or `tarfile.open(filepath).extractfile()`
* or `tarfile.Tarfile.xzopen()` or `tarfile.Tarfile.gzopen()` or `tarfile.Tarfile.bz2open()`
*/
override DataFlow::Node sink() {
result = this.getReturn().getMember(["extractall", "extract", "extractfile"]).getACall()
class DecompressionSink extends DecompressionBomb::Sink {
DecompressionSink() {
this =
tarfileExtractMember()
.getReturn()
.getMember(["extractall", "extract", "extractfile"])
.getACall()
}
}
@@ -251,34 +190,34 @@ module Pandas {
/**
* The Decompression Sinks of `pandas` library
*/
class DecompressionSink extends DecompressionBomb::Range {
override string toString() { result = "DecompressionSink" }
DecompressionSink() { this = API::moduleImport("pandas") }
override DataFlow::Node sink() {
exists(API::CallNode calltoPandasMethods |
class DecompressionSink extends DecompressionBomb::Sink {
DecompressionSink() {
exists(API::CallNode callToPandasMethods |
(
calltoPandasMethods =
this.getMember([
"read_csv", "read_json", "read_sas", "read_stata", "read_table", "read_xml"
]).getACall() and
result = calltoPandasMethods.getArg(0)
callToPandasMethods =
API::moduleImport("pandas")
.getMember([
"read_csv", "read_json", "read_sas", "read_stata", "read_table", "read_xml"
])
.getACall() and
this = callToPandasMethods.getArg(0)
or
calltoPandasMethods =
this.getMember(["read_csv", "read_sas", "read_stata", "read_table"]).getACall() and
result = calltoPandasMethods.getArgByName("filepath_or_buffer")
callToPandasMethods =
API::moduleImport("pandas")
.getMember(["read_csv", "read_sas", "read_stata", "read_table"])
.getACall() and
this = callToPandasMethods.getArgByName("filepath_or_buffer")
or
calltoPandasMethods = this.getMember("read_json").getACall() and
result = calltoPandasMethods.getArgByName("path_or_buf")
callToPandasMethods = API::moduleImport("pandas").getMember("read_json").getACall() and
this = callToPandasMethods.getArgByName("path_or_buf")
or
calltoPandasMethods = this.getMember("read_xml").getACall() and
result = calltoPandasMethods.getArgByName("path_or_buffer")
callToPandasMethods = API::moduleImport("pandas").getMember("read_xml").getACall() and
this = callToPandasMethods.getArgByName("path_or_buffer")
) and
(
not exists(calltoPandasMethods.getArgByName("compression"))
not exists(callToPandasMethods.getArgByName("compression"))
or
not calltoPandasMethods
not callToPandasMethods
.getKeywordParameter("compression")
.getAValueReachingSink()
.asExpr()
@@ -297,12 +236,15 @@ module Shutil {
/**
* The Decompression Sinks of `shutil` library
*/
class DecompressionSink extends DecompressionBomb::Range {
override string toString() { result = "DecompressionSink" }
DecompressionSink() { this = API::moduleImport("shutil").getMember("unpack_archive") }
override DataFlow::Node sink() { result = this.getACall().getParameter(0, "filename").asSink() }
class DecompressionSink extends DecompressionBomb::Sink {
DecompressionSink() {
this =
API::moduleImport("shutil")
.getMember("unpack_archive")
.getACall()
.getParameter(0, "filename")
.asSink()
}
}
}
@@ -322,14 +264,10 @@ module Gzip {
*
* only read mode is sink
*/
class DecompressionSink extends DecompressionBomb::Range {
override string toString() { result = "DecompressionSink" }
DecompressionSink() { this = gzipInstance() }
override DataFlow::Node sink() {
exists(API::CallNode gzipCall | gzipCall = this.getACall() |
result = gzipCall.getParameter(0, "filename").asSink() and
class DecompressionSink extends DecompressionBomb::Sink {
DecompressionSink() {
exists(API::CallNode gzipCall | gzipCall = gzipInstance().getACall() |
this = gzipCall.getParameter(0, "filename").asSink() and
(
not exists(
gzipCall.getParameter(1, "mode").getAValueReachingSink().asExpr().(StrConst).getText()
@@ -363,14 +301,10 @@ module Bz2 {
*
* only read mode is sink
*/
class DecompressionSink extends DecompressionBomb::Range {
override string toString() { result = "DecompressionSink" }
DecompressionSink() { this = bz2Instance() }
override DataFlow::Node sink() {
exists(API::CallNode bz2Call | bz2Call = this.getACall() |
result = bz2Call.getParameter(0, "filename").asSink() and
class DecompressionSink extends DecompressionBomb::Sink {
DecompressionSink() {
exists(API::CallNode bz2Call | bz2Call = bz2Instance().getACall() |
this = bz2Call.getParameter(0, "filename").asSink() and
(
not exists(
bz2Call.getParameter(1, "mode").getAValueReachingSink().asExpr().(StrConst).getText()
@@ -404,14 +338,10 @@ module Lzma {
*
* only read mode is sink
*/
class DecompressionSink extends DecompressionBomb::Range {
override string toString() { result = "DecompressionSink" }
DecompressionSink() { this = lzmaInstance() }
override DataFlow::Node sink() {
exists(API::CallNode lzmaCall | lzmaCall = this.getACall() |
result = lzmaCall.getParameter(0, "filename").asSink() and
class DecompressionSink extends DecompressionBomb::Sink {
DecompressionSink() {
exists(API::CallNode lzmaCall | lzmaCall = lzmaInstance().getACall() |
this = lzmaCall.getParameter(0, "filename").asSink() and
(
not exists(
lzmaCall.getParameter(1, "mode").getAValueReachingSink().asExpr().(StrConst).getText()