Merge pull request #11570 from Sim4n6/UnsafeUnpack

Python: Unsafe unpacking using `shutil.unpack_archive()` query and tests
This commit is contained in:
yoff
2023-02-17 09:48:05 +01:00
committed by GitHub
17 changed files with 612 additions and 0 deletions

View File

@@ -0,0 +1,56 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Extracting files from a malicious tarball without validating that the destination file path
is within the destination directory using <code>shutil.unpack_archive()</code> can cause files outside the
destination directory to be overwritten, due to the possible presence of directory traversal elements
(<code>..</code>) in archive path names.</p>
<p>Tarball contain archive entries representing each file in the archive. These entries
include a file path for the entry, but these file paths are not restricted and may contain
unexpected special elements such as the directory traversal element (<code>..</code>). If these
file paths are used to determine an output file to write the contents of the archive item to, then
the file may be written to an unexpected location. This can result in sensitive information being
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
files.</p>
<p>For example, if a tarball contains a file entry <code>../sneaky-file.txt</code>, and the tarball
is extracted to the directory <code>/tmp/tmp123</code>, then naively combining the paths would result
in an output file path of <code>/tmp/tmp123/../sneaky-file.txt</code>, which would cause the file to be
written to <code>/tmp/</code>.</p>
</overview>
<recommendation>
<p>Ensure that output paths constructed from tarball entries are validated
to prevent writing files to unexpected locations.</p>
<p>Consider using a safer module, such as: <code>zipfile</code></p>
</recommendation>
<example>
<p>
In this example an archive is extracted without validating file paths.
</p>
<sample src="examples/HIT_UnsafeUnpack.py" />
<p>To fix this vulnerability, we need to call the function <code>tarfile.extract()</code>
on each <code>member</code> after verifying that it does not contain either <code>..</code> or startswith <code>/</code>.
</p>
<sample src="examples/NoHIT_UnsafeUnpack.py" />
</example>
<references>
<li>
Shutil official documentation
<a href="https://docs.python.org/3/library/shutil.html?highlight=unpack_archive#shutil.unpack_archive">shutil.unpack_archive() warning.</a>
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,24 @@
/**
* @name Arbitrary file write during a tarball extraction from a user controlled source
* @description Extracting files from a potentially malicious tarball using `shutil.unpack_archive()` without validating
* that the destination file path is within the destination directory can cause files outside
* the destination directory to be overwritten. More precisely, if the tarball comes from a user controlled
* location either a remote one or cli argument.
* @kind path-problem
* @id py/unsafe-unpacking
* @problem.severity error
* @security-severity 7.5
* @precision high
* @tags security
* experimental
* external/cwe/cwe-022
*/
import python
import experimental.Security.UnsafeUnpackQuery
import DataFlow::PathGraph
from UnsafeUnpackingConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"Unsafe extraction from a malicious tarball retrieved from a remote location."

View File

@@ -0,0 +1,12 @@
import requests
import shutil
url = "https://www.someremote.location/tarball.tar.gz"
response = requests.get(url, stream=True)
tarpath = "/tmp/tmp456/tarball.tar.gz"
with open(tarpath, "wb") as f:
f.write(response.raw.read())
untarredpath = "/tmp/tmp123"
shutil.unpack_archive(tarpath, untarredpath)

View File

@@ -0,0 +1,17 @@
import requests
import tarfile
url = "https://www.someremote.location/tarball.tar.gz"
response = requests.get(url, stream=True)
tarpath = "/tmp/tmp456/tarball.tar.gz"
with open(tarpath, "wb") as f:
f.write(response.raw.read())
untarredpath = "/tmp/tmp123"
with tarfile.open(tarpath) as tar:
for member in tar.getmembers():
if member.name.startswith("/") or ".." in member.name:
raise Exception("Path traversal identified in tarball")
tar.extract(untarredpath, member)

View File

@@ -0,0 +1,213 @@
/**
* Provides a taint-tracking configuration for detecting "UnsafeUnpacking" vulnerabilities.
*/
import python
import semmle.python.Concepts
import semmle.python.dataflow.new.internal.DataFlowPublic
import semmle.python.ApiGraphs
import semmle.python.dataflow.new.TaintTracking
import semmle.python.frameworks.Stdlib
import semmle.python.dataflow.new.RemoteFlowSources
/**
* Handle those three cases of Tarfile opens:
* - `tarfile.open()`
* - `tarfile.TarFile()`
* - `MKtarfile.Tarfile.open()`
*/
API::Node tarfileOpen() {
result in [
API::moduleImport("tarfile").getMember(["open", "TarFile"]),
API::moduleImport("tarfile").getMember("TarFile").getASubclass().getMember("open")
]
}
/**
* A class for handling the previous three cases, plus the use of `closing` in with the previous cases
*/
class AllTarfileOpens extends API::CallNode {
AllTarfileOpens() {
this = tarfileOpen().getACall()
or
exists(API::Node closing, Node arg |
closing = API::moduleImport("contextlib").getMember("closing") and
this = closing.getACall() and
arg = this.getArg(0) and
arg = tarfileOpen().getACall()
)
}
}
class UnsafeUnpackingConfig extends TaintTracking::Configuration {
UnsafeUnpackingConfig() { this = "UnsafeUnpackingConfig" }
override predicate isSource(DataFlow::Node source) {
// A source coming from a remote location
source instanceof RemoteFlowSource
or
// A source coming from a CLI argparse module
// see argparse: https://docs.python.org/3/library/argparse.html
exists(MethodCallNode args |
args = source.(AttrRead).getObject().getALocalSource() and
args =
API::moduleImport("argparse")
.getMember("ArgumentParser")
.getReturn()
.getMember("parse_args")
.getACall()
)
or
// A source catching an S3 file download
// see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_file
source =
API::moduleImport("boto3")
.getMember("client")
.getReturn()
.getMember(["download_file", "download_fileobj"])
.getACall()
.getArg(2)
or
// A source catching an S3 file download
// see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
source =
API::moduleImport("boto3")
.getMember("Session")
.getReturn()
.getMember("client")
.getReturn()
.getMember(["download_file", "download_fileobj"])
.getACall()
.getArg(2)
or
// A source download a file using wget
// see wget: https://pypi.org/project/wget/
exists(API::CallNode mcn |
mcn = API::moduleImport("wget").getMember("download").getACall() and
if exists(mcn.getArg(1)) then source = mcn.getArg(1) else source = mcn.getReturn().asSource()
)
or
// catch the Django uploaded files as a source
// see HttpRequest.FILES: https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.HttpRequest.FILES
source.(AttrRead).getAttributeName() = "FILES"
}
override predicate isSink(DataFlow::Node sink) {
(
// A sink capturing method calls to `unpack_archive`.
sink = API::moduleImport("shutil").getMember("unpack_archive").getACall().getArg(0)
or
// A sink capturing method calls to `extractall` without `members` argument.
// For a call to `file.extractall` without `members` argument, `file` is considered a sink.
exists(MethodCallNode call, AllTarfileOpens atfo |
call = atfo.getReturn().getMember("extractall").getACall() and
not exists(call.getArgByName("members")) and
sink = call.getObject()
)
or
// A sink capturing method calls to `extractall` with `members` argument.
// For a call to `file.extractall` with `members` argument, `file` is considered a sink if not
// a the `members` argument contains a NameConstant as None, a List or call to the method `getmembers`.
// Otherwise, the argument of `members` is considered a sink.
exists(MethodCallNode call, Node arg, AllTarfileOpens atfo |
call = atfo.getReturn().getMember("extractall").getACall() and
arg = call.getArgByName("members") and
if
arg.asCfgNode() instanceof NameConstantNode or
arg.asCfgNode() instanceof ListNode
then sink = call.getObject()
else
if arg.(MethodCallNode).getMethodName() = "getmembers"
then sink = arg.(MethodCallNode).getObject()
else sink = call.getArgByName("members")
)
or
// An argument to `extract` is considered a sink.
exists(AllTarfileOpens atfo |
sink = atfo.getReturn().getMember("extract").getACall().getArg(0)
)
or
//An argument to `_extract_member` is considered a sink.
exists(MethodCallNode call, AllTarfileOpens atfo |
call = atfo.getReturn().getMember("_extract_member").getACall() and
call.getArg(1).(AttrRead).accesses(sink, "name")
)
) and
not sink.getScope().getLocation().getFile().inStdlib()
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Reading the response
nodeTo.(MethodCallNode).calls(nodeFrom, "read")
or
// Open a file for access
exists(MethodCallNode cn |
cn.calls(nodeTo, "open") and
cn.flowsTo(nodeFrom)
)
or
// Open a file for access using builtin
exists(API::CallNode cn |
cn = API::builtin("open").getACall() and
nodeTo = cn.getArg(0) and
cn.flowsTo(nodeFrom)
)
or
// Write access
exists(MethodCallNode cn |
cn.calls(nodeTo, "write") and
nodeFrom = cn.getArg(0)
)
or
// Retrieve Django uploaded files
// see getlist(): https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.QueryDict.getlist
// see chunks(): https://docs.djangoproject.com/en/4.1/ref/files/uploads/#django.core.files.uploadedfile.UploadedFile.chunks
nodeTo.(MethodCallNode).calls(nodeFrom, ["getlist", "get", "chunks"])
or
// Considering the use of "fs"
// see fs: https://docs.djangoproject.com/en/4.1/ref/files/storage/#the-filesystemstorage-class
nodeTo =
API::moduleImport("django")
.getMember("core")
.getMember("files")
.getMember("storage")
.getMember("FileSystemStorage")
.getReturn()
.getMember(["save", "path"])
.getACall() and
nodeFrom = nodeTo.(MethodCallNode).getArg(0)
or
// Accessing the name or raw content
nodeTo.(AttrRead).accesses(nodeFrom, ["name", "raw"])
or
// Join the base_dir to the filename
nodeTo = API::moduleImport("os").getMember("path").getMember("join").getACall() and
nodeFrom = nodeTo.(API::CallNode).getArg(1)
or
// Go through an Open for a Tarfile
nodeTo = tarfileOpen().getACall() and nodeFrom = nodeTo.(MethodCallNode).getArg(0)
or
// Handle the case where the getmembers is used.
nodeTo.(MethodCallNode).calls(nodeFrom, "getmembers") and
nodeFrom instanceof AllTarfileOpens
or
// To handle the case of `with closing(tarfile.open()) as file:`
// we add a step from the first argument of `closing` to the call to `closing`,
// whenever that first argument is a return of `tarfile.open()`.
nodeTo = API::moduleImport("contextlib").getMember("closing").getACall() and
nodeFrom = nodeTo.(API::CallNode).getArg(0) and
nodeFrom = tarfileOpen().getReturn().getAValueReachableFromSource()
or
// see Path : https://docs.python.org/3/library/pathlib.html#pathlib.Path
nodeTo = API::moduleImport("pathlib").getMember("Path").getACall() and
nodeFrom = nodeTo.(API::CallNode).getArg(0)
or
// Use of absolutepath
// see absolute : https://docs.python.org/3/library/pathlib.html#pathlib.Path.absolute
exists(API::CallNode mcn |
mcn = API::moduleImport("pathlib").getMember("Path").getACall() and
nodeTo = mcn.getAMethodCall("absolute") and
nodeFrom = mcn.getArg(0)
)
}
}

View File

@@ -0,0 +1,2 @@
missingAnnotationOnSink
failures

View File

@@ -0,0 +1,3 @@
import python
import experimental.dataflow.TestUtil.DataflowQueryTest
import experimental.Security.UnsafeUnpackQuery

View File

@@ -0,0 +1,83 @@
edges
| UnsafeUnpack.py:0:0:0:0 | ModuleVariableNode for UnsafeUnpack.request | UnsafeUnpack.py:11:18:11:24 | ControlFlowNode for request |
| UnsafeUnpack.py:5:26:5:32 | ControlFlowNode for ImportMember | UnsafeUnpack.py:5:26:5:32 | GSSA Variable request |
| UnsafeUnpack.py:5:26:5:32 | GSSA Variable request | UnsafeUnpack.py:0:0:0:0 | ModuleVariableNode for UnsafeUnpack.request |
| UnsafeUnpack.py:11:18:11:24 | ControlFlowNode for request | UnsafeUnpack.py:11:18:11:29 | ControlFlowNode for Attribute |
| UnsafeUnpack.py:11:18:11:29 | ControlFlowNode for Attribute | UnsafeUnpack.py:17:27:17:38 | ControlFlowNode for Attribute |
| UnsafeUnpack.py:17:27:17:38 | ControlFlowNode for Attribute | UnsafeUnpack.py:19:35:19:41 | ControlFlowNode for tarpath |
| UnsafeUnpack.py:33:50:33:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:34:23:34:38 | ControlFlowNode for local_ziped_path |
| UnsafeUnpack.py:47:20:47:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:48:23:48:37 | ControlFlowNode for compressed_file |
| UnsafeUnpack.py:51:19:51:36 | ControlFlowNode for Attribute() | UnsafeUnpack.py:52:23:52:37 | ControlFlowNode for compressed_file |
| UnsafeUnpack.py:65:19:65:31 | ControlFlowNode for Attribute | UnsafeUnpack.py:66:23:66:37 | ControlFlowNode for compressed_file |
| UnsafeUnpack.py:79:16:79:28 | ControlFlowNode for Attribute | UnsafeUnpack.py:85:15:85:26 | ControlFlowNode for Attribute |
| UnsafeUnpack.py:79:16:79:28 | ControlFlowNode for Attribute | UnsafeUnpack.py:174:15:174:26 | ControlFlowNode for Attribute |
| UnsafeUnpack.py:85:15:85:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:87:23:87:29 | ControlFlowNode for tarpath |
| UnsafeUnpack.py:103:23:103:27 | SSA variable chunk | UnsafeUnpack.py:105:35:105:42 | ControlFlowNode for savepath |
| UnsafeUnpack.py:103:32:103:44 | ControlFlowNode for Attribute | UnsafeUnpack.py:103:32:103:54 | ControlFlowNode for Subscript |
| UnsafeUnpack.py:103:32:103:54 | ControlFlowNode for Subscript | UnsafeUnpack.py:103:23:103:27 | SSA variable chunk |
| UnsafeUnpack.py:108:22:108:34 | ControlFlowNode for Attribute | UnsafeUnpack.py:112:35:112:43 | ControlFlowNode for file_path |
| UnsafeUnpack.py:116:17:116:21 | SSA variable ufile | UnsafeUnpack.py:118:38:118:47 | ControlFlowNode for Attribute |
| UnsafeUnpack.py:116:27:116:39 | ControlFlowNode for Attribute | UnsafeUnpack.py:116:17:116:21 | SSA variable ufile |
| UnsafeUnpack.py:118:38:118:47 | ControlFlowNode for Attribute | UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path |
| UnsafeUnpack.py:140:23:140:35 | ControlFlowNode for Attribute | UnsafeUnpack.py:142:49:142:51 | ControlFlowNode for tar |
| UnsafeUnpack.py:158:23:158:27 | SSA variable chunk | UnsafeUnpack.py:163:23:163:28 | SSA variable member |
| UnsafeUnpack.py:158:32:158:44 | ControlFlowNode for Attribute | UnsafeUnpack.py:158:32:158:54 | ControlFlowNode for Subscript |
| UnsafeUnpack.py:158:32:158:54 | ControlFlowNode for Subscript | UnsafeUnpack.py:158:23:158:27 | SSA variable chunk |
| UnsafeUnpack.py:163:23:163:28 | SSA variable member | UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result |
| UnsafeUnpack.py:174:15:174:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:176:1:176:34 | ControlFlowNode for Attribute() |
| UnsafeUnpack.py:194:53:194:55 | ControlFlowNode for tmp | UnsafeUnpack.py:201:29:201:36 | ControlFlowNode for Attribute |
nodes
| UnsafeUnpack.py:0:0:0:0 | ModuleVariableNode for UnsafeUnpack.request | semmle.label | ModuleVariableNode for UnsafeUnpack.request |
| UnsafeUnpack.py:5:26:5:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| UnsafeUnpack.py:5:26:5:32 | GSSA Variable request | semmle.label | GSSA Variable request |
| UnsafeUnpack.py:11:18:11:24 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| UnsafeUnpack.py:11:18:11:29 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:17:27:17:38 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:19:35:19:41 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
| UnsafeUnpack.py:33:50:33:65 | ControlFlowNode for local_ziped_path | semmle.label | ControlFlowNode for local_ziped_path |
| UnsafeUnpack.py:34:23:34:38 | ControlFlowNode for local_ziped_path | semmle.label | ControlFlowNode for local_ziped_path |
| UnsafeUnpack.py:47:20:47:34 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
| UnsafeUnpack.py:48:23:48:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
| UnsafeUnpack.py:51:19:51:36 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| UnsafeUnpack.py:52:23:52:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
| UnsafeUnpack.py:65:19:65:31 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:66:23:66:37 | ControlFlowNode for compressed_file | semmle.label | ControlFlowNode for compressed_file |
| UnsafeUnpack.py:79:16:79:28 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:85:15:85:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:87:23:87:29 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
| UnsafeUnpack.py:103:23:103:27 | SSA variable chunk | semmle.label | SSA variable chunk |
| UnsafeUnpack.py:103:32:103:44 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:103:32:103:54 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| UnsafeUnpack.py:105:35:105:42 | ControlFlowNode for savepath | semmle.label | ControlFlowNode for savepath |
| UnsafeUnpack.py:108:22:108:34 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:112:35:112:43 | ControlFlowNode for file_path | semmle.label | ControlFlowNode for file_path |
| UnsafeUnpack.py:116:17:116:21 | SSA variable ufile | semmle.label | SSA variable ufile |
| UnsafeUnpack.py:116:27:116:39 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:118:38:118:47 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path | semmle.label | ControlFlowNode for uploaded_file_path |
| UnsafeUnpack.py:140:23:140:35 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:142:49:142:51 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| UnsafeUnpack.py:158:23:158:27 | SSA variable chunk | semmle.label | SSA variable chunk |
| UnsafeUnpack.py:158:32:158:44 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:158:32:158:54 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript |
| UnsafeUnpack.py:163:23:163:28 | SSA variable member | semmle.label | SSA variable member |
| UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | semmle.label | ControlFlowNode for result |
| UnsafeUnpack.py:174:15:174:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:176:1:176:34 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| UnsafeUnpack.py:194:53:194:55 | ControlFlowNode for tmp | semmle.label | ControlFlowNode for tmp |
| UnsafeUnpack.py:201:29:201:36 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
subpaths
#select
| UnsafeUnpack.py:19:35:19:41 | ControlFlowNode for tarpath | UnsafeUnpack.py:5:26:5:32 | ControlFlowNode for ImportMember | UnsafeUnpack.py:19:35:19:41 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:34:23:34:38 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:33:50:33:65 | ControlFlowNode for local_ziped_path | UnsafeUnpack.py:34:23:34:38 | ControlFlowNode for local_ziped_path | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:48:23:48:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:47:20:47:34 | ControlFlowNode for compressed_file | UnsafeUnpack.py:48:23:48:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:52:23:52:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:51:19:51:36 | ControlFlowNode for Attribute() | UnsafeUnpack.py:52:23:52:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:66:23:66:37 | ControlFlowNode for compressed_file | UnsafeUnpack.py:65:19:65:31 | ControlFlowNode for Attribute | UnsafeUnpack.py:66:23:66:37 | ControlFlowNode for compressed_file | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:87:23:87:29 | ControlFlowNode for tarpath | UnsafeUnpack.py:79:16:79:28 | ControlFlowNode for Attribute | UnsafeUnpack.py:87:23:87:29 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:105:35:105:42 | ControlFlowNode for savepath | UnsafeUnpack.py:103:32:103:44 | ControlFlowNode for Attribute | UnsafeUnpack.py:105:35:105:42 | ControlFlowNode for savepath | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:112:35:112:43 | ControlFlowNode for file_path | UnsafeUnpack.py:108:22:108:34 | ControlFlowNode for Attribute | UnsafeUnpack.py:112:35:112:43 | ControlFlowNode for file_path | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path | UnsafeUnpack.py:116:27:116:39 | ControlFlowNode for Attribute | UnsafeUnpack.py:120:41:120:58 | ControlFlowNode for uploaded_file_path | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:142:49:142:51 | ControlFlowNode for tar | UnsafeUnpack.py:140:23:140:35 | ControlFlowNode for Attribute | UnsafeUnpack.py:142:49:142:51 | ControlFlowNode for tar | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | UnsafeUnpack.py:158:32:158:44 | ControlFlowNode for Attribute | UnsafeUnpack.py:167:67:167:72 | ControlFlowNode for result | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:176:1:176:34 | ControlFlowNode for Attribute() | UnsafeUnpack.py:79:16:79:28 | ControlFlowNode for Attribute | UnsafeUnpack.py:176:1:176:34 | ControlFlowNode for Attribute() | Unsafe extraction from a malicious tarball retrieved from a remote location. |
| UnsafeUnpack.py:201:29:201:36 | ControlFlowNode for Attribute | UnsafeUnpack.py:194:53:194:55 | ControlFlowNode for tmp | UnsafeUnpack.py:201:29:201:36 | ControlFlowNode for Attribute | Unsafe extraction from a malicious tarball retrieved from a remote location. |

View File

@@ -0,0 +1,201 @@
import requests
import shutil
import os
from flask import Flask, request
app = Flask(__name__)
# Consider any RemoteFlowSource as a source
@app.route("/download_from_url")
def download_from_url():
filename = request.args.get('filename', '')
if not filename:
response = requests.get(filename, stream=True)
tarpath = "/tmp/tmp456/tarball.tar.gz"
with open(tarpath, "wb") as f:
f.write(response.raw.read())
untarredpath = "/tmp/tmp123"
shutil.unpack_archive(tarpath, untarredpath) # $result=BAD
# A source catching an S3 filename download
# see boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_file
import boto3
import os
remote_ziped_name = "remote_name.tar.gz"
base_dir = "/tmp/basedir"
local_ziped_path = os.path.join(base_dir, remote_ziped_name)
bucket_name = "mybucket"
s3 = boto3.client('s3')
s3.download_file(bucket_name, remote_ziped_name, local_ziped_path)
shutil.unpack_archive(local_ziped_path, base_dir) # $result=BAD
# wget
# see wget: https://pypi.org/project/wget/
import wget
import os
url = "https://some.remote/location/remote_name.tar.xz"
compressed_file = "/tmp/basedir/local_name.tar.xz"
base_dir = "/tmp/basedir"
# download(url, out, bar) contains out parameter
wget.download(url, compressed_file)
shutil.unpack_archive(compressed_file, base_dir) # $result=BAD
# download(url) returns filename
compressed_file = wget.download(url)
shutil.unpack_archive(compressed_file, base_dir) # $result=BAD
# A source coming from a CLI argparse module
# see argparse: https://docs.python.org/3/library/argparse.html
import argparse
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('integers', metavar='N', type=int, nargs='+',
help='an integer for the accumulator')
parser.add_argument('filename', help='filename to be provided')
args = parser.parse_args()
compressed_file = args.filename
shutil.unpack_archive(compressed_file, base_dir) # $result=BAD
# A source coming from a CLI and downloaded
import argparse
import requests
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('integers', metavar='N', type=int, nargs='+',
help='an integer for the accumulator')
parser.add_argument('filename', help='url to filename to be provided')
args = parser.parse_args()
url_filename = args.filename
response = requests.get(url_filename, stream=True)
tarpath = "/tmp/tmp456/tarball.tar.gz"
with open(tarpath, "wb") as f:
f.write(response.raw.read())
shutil.unpack_archive(tarpath, base_dir) # $result=BAD
# the django upload functionality
# see HttpRequest.FILES: https://docs.djangoproject.com/en/4.1/ref/request-response/#django.http.HttpRequest.FILES
from django.shortcuts import render
from django.core.files.storage import FileSystemStorage
import shutil
def simple_upload(request):
base_dir = "/tmp/baase_dir"
if request.method == 'POST':
# Read uploaded files by chunks of data
# see chunks(): https://docs.djangoproject.com/en/4.1/ref/files/uploads/#django.core.files.uploadedfile.UploadedFile.chunks
savepath = os.path.join(base_dir, "tarball_compressed.tar.gz")
with open(savepath, 'wb+') as wfile:
for chunk in request.FILES["ufile1"].chunks():
wfile.write(chunk)
shutil.unpack_archive(savepath, base_dir) # $result=BAD
# Write in binary the uploaded tarball
myfile = request.FILES.get("ufile1")
file_path = os.path.join(base_dir, "tarball.tar")
with file_path.open('wb') as f:
f.write(myfile.read())
shutil.unpack_archive(file_path, base_dir) # $result=BAD
# Save uploaded files using FileSystemStorage Django API
# see FileSystemStorage: https://docs.djangoproject.com/en/4.1/ref/files/storage/#django.core.files.storage.FileSystemStorage
for ufile in request.FILES.getlist():
fs = FileSystemStorage()
filename = fs.save(ufile.name, ufile)
uploaded_file_path = fs.path(filename)
shutil.unpack_archive(uploaded_file_path, base_dir) # $result=BAD
return render(request, 'simple_upload.html')
elif request.method == 'GET':
return render(request, 'simple_upload.html')
import shutil
import os
import tarfile
import tempfile
import argparse
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('integers', metavar='N', type=int, nargs='+',
help='an integer for the accumulator')
parser.add_argument('filename', help='filename to be provided')
args = parser.parse_args()
unsafe_filename_tar = args.filename
with tarfile.TarFile(unsafe_filename_tar, mode="r") as tar:
tar.extractall(path="/tmp/unpack/", members=tar) # $result=BAD
tar = tarfile.open(unsafe_filename_tar)
from django.shortcuts import render
from django.core.files.storage import FileSystemStorage
import shutil
def simple_upload(request):
base_dir = "/tmp/baase_dir"
if request.method == 'POST':
# Read uploaded files by chunks of data
# see chunks(): https://docs.djangoproject.com/en/4.1/ref/files/uploads/#django.core.files.uploadedfile.UploadedFile.chunks
savepath = os.path.join(base_dir, "tarball_compressed.tar.gz")
with open(savepath, 'wb+') as wfile:
for chunk in request.FILES["ufile1"].chunks():
wfile.write(chunk)
tar = tarfile.open(savepath)
result = []
for member in tar:
if member.issym():
raise ValueError("But it is a symlink")
result.append(member)
tar.extractall(path=tempfile.mkdtemp(), members=result) # $result=BAD
tar.close()
response = requests.get(url_filename, stream=True)
tarpath = "/tmp/tmp456/tarball.tar.gz"
with open(tarpath, "wb") as f:
f.write(response.raw.read())
target_dir = "/tmp/unpack"
tarfile.TarFile(tarpath, mode="r").extractall(path=target_dir) # $result=BAD
from pathlib import Path
import tempfile
import boto3
def default_session() -> boto3.Session:
_SESSION = None
if _SESSION is None:
_SESSION = boto3.Session()
return _SESSION
cache = False
cache_dir = "/tmp/artifacts"
object_path = "/objects/obj1"
s3 = default_session().client("s3")
with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tmp:
s3.download_fileobj(bucket_name, object_path, tmp)
tmp.seek(0)
if cache:
cache_dir.mkdir(exist_ok=True, parents=True)
target = cache_dir
else:
target = Path(tempfile.mkdtemp())
shutil.unpack_archive(tmp.name, target) # $result=BAD

View File

@@ -0,0 +1 @@
experimental/Security/CWE-022bis/UnsafeUnpack.ql