Python: Initial version and help of tar-slip (CWE-022) query.

This commit is contained in:
Mark Shannon
2019-05-21 14:43:35 +01:00
parent cb43d27344
commit ea4e263060
7 changed files with 205 additions and 0 deletions

View File

@@ -0,0 +1,75 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Extracting files from a malicious tar archive without validating that the destination file path
is within the destination directory can cause files outside the destination directory to be
overwritten, due to the possible presence of directory traversal elements (<code>..</code>) in
archive paths.</p>
<p>Tar archives contain archive entries representing each file in the archive. These entries
include a file path for the entry, but these file paths are not restricted and may contain
unexpected special elements such as the directory traversal element (<code>..</code>). If these
file paths are used to determine an output file to write the contents of the archive item to, then
the file may be written to an unexpected location. This can result in sensitive information being
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
files.</p>
<p>For example, if a tar archive contains a file entry <code>..\sneaky-file</code>, and the tar archive
is extracted to the directory <code>c:\output</code>, then naively combining the paths would result
in an output file path of <code>c:\output\..\sneaky-file</code>, which would cause the file to be
written to <code>c:\sneaky-file</code>.</p>
</overview>
<recommendation>
<p>Ensure that output paths constructed from tar archive entries are validated
to prevent writing files to unexpected locations.</p>
<p>The recommended way of writing an output file from a tar archive entry is to check that
<code>".."</code> does not occur in the path.
</p>
</recommendation>
<example>
<p>
In this example an archive is extracted without validating file paths.
If <code>archive.tar</code> contained relative paths (for
instance, if it were created by something like <code>tar -cf archive.tar
../file.txt</code>) then executing this code could write to locations
outside the destination directory.
</p>
<sample src="examples/tarslip_bad.py" />
<p>To fix this vulnerability, we need to check that the path does not
contain any <code>".."</code> elements in it.
</p>
<sample src="examples/tarslip_good.py" />
</example>
<references>
<li>
Snyk:
<a href="https://snyk.io/research/zip-slip-vulnerability">Zip Slip Vulnerability</a>.
</li>
<li>
OWASP:
<a href="https://www.owasp.org/index.php/Path_traversal">Path Traversal</a>.
</li>
<li>
Python Library Reference:
< href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extract">TarFile.extract</a>.
</li>
<li>
Python Library Reference:
< href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall">TarFile.extractall</a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,81 @@
/**
* @name Arbitrary file write during tarfile extraction
* @description Extracting files from a malicious tar archive without validating that the
* destination file path is within the destination directory can cause files outside
* the destination directory to be overwritten.
* @kind path-problem
* @id py/tarslip
* @problem.severity error
* @precision medium
* @tags security
* external/cwe/cwe-022
*/
import python
import semmle.python.security.Paths
import semmle.python.security.TaintTracking
/** A TaintKind to represent open tarfile objects. That is, the result of calling `tarfile.open(...)` */
class OpenTarFile extends TaintKind {
OpenTarFile() {
this = "tarfile.open"
}
}
/** The source of open tarfile objects. That is, any call to `tarfile.open(...)` */
class TarfileOpen extends TaintSource {
TarfileOpen() {
Module::named("tarfile").attr("open").getACall() = this
and
/* If argument refers to a string object, then it's a hardcoded path and
* this tarfile is safe.
*/
not this.(CallNode).getAnArg().refersTo(any(StringObject str))
}
override predicate isSourceOf(TaintKind kind) {
kind instanceof OpenTarFile
}
}
/* Any call to an extract method */
class ExtractionSink extends TaintSink {
CallNode call;
ExtractionSink() {
this = call.getFunction().(AttrNode).getObject(extract())
}
override predicate sinks(TaintKind kind) {
kind instanceof OpenTarFile
}
}
private string extract() {
result = "extract" or result = "extractall"
}
//evil = [e for e in members if os.path.relpath(e).startswith(('/', '..'))]
class TarSlipConfiguration extends TaintTracking::Configuration {
TarSlipConfiguration() { this = "TarSlip configuration" }
override predicate isSource(TaintTracking::Source source) { source instanceof TarfileOpen }
override predicate isSink(TaintTracking::Sink sink) { sink instanceof ExtractionSink }
}
from TarSlipConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "Extraction of tarfile from $@", src.getSource(), "a potentially untrusted source"

View File

@@ -0,0 +1,7 @@
import tarfile
with tarfile.open('archive.zip') as tar:
#BAD : This could write any file on the filesystem.
for entry in tar:
tar.extract(entry, "/tmp/unpack/")

View File

@@ -0,0 +1,10 @@
import tarfile
import os.path
with tarfile.open('archive.zip') as tar:
for entry in tar:
#GOOD: Check that entry is safe
if os.path.isabs(entry.name) or ".." in entry.name:
raise ValueError("Illegal tar archive entry")
tar.extract(entry, "/tmp/unpack/")

View File

@@ -0,0 +1,9 @@
edges
| tarslip.py:12:7:12:39 | tarfile.open | tarslip.py:13:1:13:3 | tarfile.open |
| tarslip.py:12:7:12:39 | tarfile.open | tarslip.py:14:1:14:3 | tarfile.open |
| tarslip.py:16:7:16:39 | tarfile.open | tarslip.py:17:14:17:16 | tarfile.open |
| tarslip.py:16:7:16:39 | tarfile.open | tarslip.py:18:5:18:7 | tarfile.open |
parents
#select
| tarslip.py:13:1:13:3 | Taint sink | tarslip.py:12:7:12:39 | tarfile.open | tarslip.py:13:1:13:3 | tarfile.open | Extraction of tarfile from $@ | tarslip.py:12:7:12:39 | Taint source | a potentially untrusted source |
| tarslip.py:18:5:18:7 | Taint sink | tarslip.py:16:7:16:39 | tarfile.open | tarslip.py:18:5:18:7 | tarfile.open | Extraction of tarfile from $@ | tarslip.py:16:7:16:39 | Taint source | a potentially untrusted source |

View File

@@ -0,0 +1 @@
Security/CWE-022/TarSlip.ql

View File

@@ -0,0 +1,22 @@
#!/usr/bin/python
import tarfile
unsafe_filename_tar = sys.argv[1]
safe_filename_tar = "safe_path.tar"
tar = tarfile.open(safe_filename_tar)
for entry in tar:
tar.extract(entry)
tar = tarfile.open(unsafe_filename_tar)
tar.extractall()
tar.close()
tar = tarfile.open(unsafe_filename_tar)
for entry in tar:
tar.extract(entry)
tar = tarfile.open(safe_filename_tar)
tar.extractall()
tar.close()