UnpackUnsafe query and tests

This commit is contained in:
ALJI Mohamed
2022-12-05 17:20:22 +01:00
parent 5b31da44e1
commit 68fd75ca34
7 changed files with 164 additions and 0 deletions

View File

@@ -0,0 +1,56 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Extracting files from a malicious tarball without validating that the destination file path
is within the destination directory using `shutil.unpack_archive()` can cause files outside the
destination directory to be overwritten, due to the possible presence of directory traversal elements
(<code>..</code>) in archive path names.</p>
<p>Tarball contain archive entries representing each file in the archive. These entries
include a file path for the entry, but these file paths are not restricted and may contain
unexpected special elements such as the directory traversal element (<code>..</code>). If these
file paths are used to determine an output file to write the contents of the archive item to, then
the file may be written to an unexpected location. This can result in sensitive information being
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
files.</p>
<p>For example, if a tarball contains a file entry <code>../sim4n6.txt</code>, and the tarball
is extracted to the directory <code>/tmp/tmp123</code>, then naively combining the paths would result
in an output file path of <code>/tmp/tmp123/../sim4n6.txt</code>, which would cause the file to be
written to <code>/tmp/</code>.</p>
</overview>
<recommendation>
<p>Ensure that output paths constructed from tarball entries are validated
to prevent writing files to unexpected locations.</p>
<p>Consider using a safer module, such as: <code>zipfile</code></p>
</recommendation>
<example>
<p>
In this example an archive is extracted without validating file paths.
</p>
<sample src="examples/HIT_UnsafeUnpack.py" />
<p>To fix this vulnerability, we need to call the function <code>tarfile.extract()</code>
on each <code>member</code> after verifying that it does not contain either `..` or startswith `/`.
</p>
<sample src="examples/NoHIT_UnsafeUnpack.py" />
</example>
<references>
<li>
Shutil official documentation
<a href="https://docs.python.org/3/library/shutil.html?highlight=unpack_archive#shutil.unpack_archive">shutil.unpack_archive() warning.</a>
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,56 @@
/**
* @name Arbitrary file write during a remotely downloaded tarball extraction
* @description Extracting files from a malicious tarball using `shutil.unpack_archive()` without validating
* that the destination file path is within the destination directory can cause files outside
* the destination directory to be overwritten. More precisely, if the tarball comes from a remote location.
* @kind path-problem
* @id py/unsafe-unpacking
* @problem.severity error
* @security-severity 7.5
* @precision high
* @tags security
* external/cwe/cwe-022bis
*/
import python
import experimental.semmle.python.Concepts
import DataFlow::PathGraph
import semmle.python.ApiGraphs
import semmle.python.dataflow.new.internal.Attributes
import semmle.python.dataflow.new.DataFlow
import semmle.python.ApiGraphs
import semmle.python.dataflow.new.TaintTracking
import semmle.python.Concepts
class UnsafeUnpackingConfig extends TaintTracking::Configuration {
UnsafeUnpackingConfig() { this = "UnsafeUnpackingConfig" }
override predicate isSource(DataFlow::Node source) {
// A source coming from a remote location
exists(Http::Client::Request request | source = request) and
not source.getScope().getLocation().getFile().inStdlib()
}
override predicate isSink(DataFlow::Node sink) {
// A sink capturing method calls to `unpack_archive`.
sink =
API::moduleImport("shutil").getMember("unpack_archive").getACall().getParameter(0).asSink() and
not sink.getScope().getLocation().getFile().inStdlib()
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Writing the response data to the archive
exists(API::CallNode call, MethodCallNode w |
nodeTo = call.getArg(0) and
call = API::builtin("open").getACall() and
w.getMethodName() = "write" and
w.getObject() = call.getReturn().getAValueReachableFromSource() and
nodeFrom = w.getArg(0)
)
}
}
from UnsafeUnpackingConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select source.getNode(), source, sink, "Unsafe extraction from a malicious tarball, is used in a $@",
source.getAQlClass(), "during archive unpacking."

View File

@@ -0,0 +1,12 @@
import requests
import shutil
url = "https://www.someremote.location/tarball.tar.gz"
response = requests.get(url, stream=True)
tarpath = "/tmp/tmp456/tarball.tar.gz"
with open(tarpath, "wb") as f:
f.write(response.raw.read())
untarredpath = "/tmp/tmp123"
shutil.unpack_archive(tarpath, untarredpath)

View File

@@ -0,0 +1,17 @@
import requests
import tarfile
url = "https://www.someremote.location/tarball.tar.gz"
response = requests.get(url, stream=True)
tarpath = "/tmp/tmp456/tarball.tar.gz"
with open(tarpath, "wb") as f:
f.write(response.raw.read())
untarredpath = "/tmp/tmp123"
with tarfile.open(tarpath) as tar:
for member in tar.getmembers():
if member.name.startswith("/") or ".." in member.name:
raise Exception("Path traversal identified in tarball")
tar.extract(untarredpath, member)

View File

@@ -0,0 +1,10 @@
edges
| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute |
| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath |
nodes
| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
subpaths
#select
| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball, is used in a $@ | PathNode | during archive unpacking. |

View File

@@ -0,0 +1,12 @@
import requests
import shutil
url = "https://www.someremote.location/tarball.tar.gz"
response = requests.get(url, stream=True)
tarpath = "/tmp/tmp456/tarball.tar.gz"
with open(tarpath, "wb") as f:
f.write(response.raw.read())
untarredpath = "/tmp/tmp123"
shutil.unpack_archive(tarpath, untarredpath)

View File

@@ -0,0 +1 @@
experimental/Security/CWE-022bis/UnsafeUnpack.ql