mirror of
https://github.com/github/codeql.git
synced 2025-12-24 12:46:34 +01:00
UnpackUnsafe query and tests
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>Extracting files from a malicious tarball without validating that the destination file path
|
||||
is within the destination directory using `shutil.unpack_archive()` can cause files outside the
|
||||
destination directory to be overwritten, due to the possible presence of directory traversal elements
|
||||
(<code>..</code>) in archive path names.</p>
|
||||
|
||||
<p>Tarball contain archive entries representing each file in the archive. These entries
|
||||
include a file path for the entry, but these file paths are not restricted and may contain
|
||||
unexpected special elements such as the directory traversal element (<code>..</code>). If these
|
||||
file paths are used to determine an output file to write the contents of the archive item to, then
|
||||
the file may be written to an unexpected location. This can result in sensitive information being
|
||||
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
|
||||
files.</p>
|
||||
|
||||
<p>For example, if a tarball contains a file entry <code>../sim4n6.txt</code>, and the tarball
|
||||
is extracted to the directory <code>/tmp/tmp123</code>, then naively combining the paths would result
|
||||
in an output file path of <code>/tmp/tmp123/../sim4n6.txt</code>, which would cause the file to be
|
||||
written to <code>/tmp/</code>.</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
|
||||
<p>Ensure that output paths constructed from tarball entries are validated
|
||||
to prevent writing files to unexpected locations.</p>
|
||||
|
||||
<p>Consider using a safer module, such as: <code>zipfile</code></p>
|
||||
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>
|
||||
In this example an archive is extracted without validating file paths.
|
||||
</p>
|
||||
|
||||
<sample src="examples/HIT_UnsafeUnpack.py" />
|
||||
|
||||
<p>To fix this vulnerability, we need to call the function <code>tarfile.extract()</code>
|
||||
on each <code>member</code> after verifying that it does not contain either `..` or startswith `/`.
|
||||
</p>
|
||||
|
||||
<sample src="examples/NoHIT_UnsafeUnpack.py" />
|
||||
|
||||
</example>
|
||||
<references>
|
||||
|
||||
<li>
|
||||
Shutil official documentation
|
||||
<a href="https://docs.python.org/3/library/shutil.html?highlight=unpack_archive#shutil.unpack_archive">shutil.unpack_archive() warning.</a>
|
||||
</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* @name Arbitrary file write during a remotely downloaded tarball extraction
|
||||
* @description Extracting files from a malicious tarball using `shutil.unpack_archive()` without validating
|
||||
* that the destination file path is within the destination directory can cause files outside
|
||||
* the destination directory to be overwritten. More precisely, if the tarball comes from a remote location.
|
||||
* @kind path-problem
|
||||
* @id py/unsafe-unpacking
|
||||
* @problem.severity error
|
||||
* @security-severity 7.5
|
||||
* @precision high
|
||||
* @tags security
|
||||
* external/cwe/cwe-022bis
|
||||
*/
|
||||
|
||||
import python
|
||||
import experimental.semmle.python.Concepts
|
||||
import DataFlow::PathGraph
|
||||
import semmle.python.ApiGraphs
|
||||
import semmle.python.dataflow.new.internal.Attributes
|
||||
import semmle.python.dataflow.new.DataFlow
|
||||
import semmle.python.ApiGraphs
|
||||
import semmle.python.dataflow.new.TaintTracking
|
||||
import semmle.python.Concepts
|
||||
|
||||
class UnsafeUnpackingConfig extends TaintTracking::Configuration {
|
||||
UnsafeUnpackingConfig() { this = "UnsafeUnpackingConfig" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
// A source coming from a remote location
|
||||
exists(Http::Client::Request request | source = request) and
|
||||
not source.getScope().getLocation().getFile().inStdlib()
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
// A sink capturing method calls to `unpack_archive`.
|
||||
sink =
|
||||
API::moduleImport("shutil").getMember("unpack_archive").getACall().getParameter(0).asSink() and
|
||||
not sink.getScope().getLocation().getFile().inStdlib()
|
||||
}
|
||||
|
||||
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
|
||||
// Writing the response data to the archive
|
||||
exists(API::CallNode call, MethodCallNode w |
|
||||
nodeTo = call.getArg(0) and
|
||||
call = API::builtin("open").getACall() and
|
||||
w.getMethodName() = "write" and
|
||||
w.getObject() = call.getReturn().getAValueReachableFromSource() and
|
||||
nodeFrom = w.getArg(0)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
from UnsafeUnpackingConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select source.getNode(), source, sink, "Unsafe extraction from a malicious tarball, is used in a $@",
|
||||
source.getAQlClass(), "during archive unpacking."
|
||||
@@ -0,0 +1,12 @@
|
||||
import requests
|
||||
import shutil
|
||||
|
||||
url = "https://www.someremote.location/tarball.tar.gz"
|
||||
response = requests.get(url, stream=True)
|
||||
|
||||
tarpath = "/tmp/tmp456/tarball.tar.gz"
|
||||
with open(tarpath, "wb") as f:
|
||||
f.write(response.raw.read())
|
||||
|
||||
untarredpath = "/tmp/tmp123"
|
||||
shutil.unpack_archive(tarpath, untarredpath)
|
||||
@@ -0,0 +1,17 @@
|
||||
import requests
|
||||
import tarfile
|
||||
|
||||
url = "https://www.someremote.location/tarball.tar.gz"
|
||||
response = requests.get(url, stream=True)
|
||||
|
||||
tarpath = "/tmp/tmp456/tarball.tar.gz"
|
||||
with open(tarpath, "wb") as f:
|
||||
f.write(response.raw.read())
|
||||
|
||||
untarredpath = "/tmp/tmp123"
|
||||
with tarfile.open(tarpath) as tar:
|
||||
for member in tar.getmembers():
|
||||
if member.name.startswith("/") or ".." in member.name:
|
||||
raise Exception("Path traversal identified in tarball")
|
||||
|
||||
tar.extract(untarredpath, member)
|
||||
@@ -0,0 +1,10 @@
|
||||
edges
|
||||
| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute |
|
||||
| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath |
|
||||
nodes
|
||||
| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
|
||||
| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
|
||||
| UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
|
||||
subpaths
|
||||
#select
|
||||
| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball, is used in a $@ | PathNode | during archive unpacking. |
|
||||
@@ -0,0 +1,12 @@
|
||||
import requests
|
||||
import shutil
|
||||
|
||||
url = "https://www.someremote.location/tarball.tar.gz"
|
||||
response = requests.get(url, stream=True)
|
||||
|
||||
tarpath = "/tmp/tmp456/tarball.tar.gz"
|
||||
with open(tarpath, "wb") as f:
|
||||
f.write(response.raw.read())
|
||||
|
||||
untarredpath = "/tmp/tmp123"
|
||||
shutil.unpack_archive(tarpath, untarredpath)
|
||||
@@ -0,0 +1 @@
|
||||
experimental/Security/CWE-022bis/UnsafeUnpack.ql
|
||||
Reference in New Issue
Block a user