Merge pull request #1470 from markshannon/python-tarslip

Python: "TarSlip" query
This commit is contained in:
Taus
2019-07-15 12:43:47 +02:00
committed by GitHub
10 changed files with 403 additions and 2 deletions

View File

@@ -0,0 +1,75 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>Extracting files from a malicious tar archive without validating that the destination file path
is within the destination directory can cause files outside the destination directory to be
overwritten, due to the possible presence of directory traversal elements (<code>..</code>) in
archive paths.</p>
<p>Tar archives contain archive entries representing each file in the archive. These entries
include a file path for the entry, but these file paths are not restricted and may contain
unexpected special elements such as the directory traversal element (<code>..</code>). If these
file paths are used to determine an output file to write the contents of the archive item to, then
the file may be written to an unexpected location. This can result in sensitive information being
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
files.</p>
<p>For example, if a tar archive contains a file entry <code>..\sneaky-file</code>, and the tar archive
is extracted to the directory <code>c:\output</code>, then naively combining the paths would result
in an output file path of <code>c:\output\..\sneaky-file</code>, which would cause the file to be
written to <code>c:\sneaky-file</code>.</p>
</overview>
<recommendation>
<p>Ensure that output paths constructed from tar archive entries are validated
to prevent writing files to unexpected locations.</p>
<p>The recommended way of writing an output file from a tar archive entry is to check that
<code>".."</code> does not occur in the path.
</p>
</recommendation>
<example>
<p>
In this example an archive is extracted without validating file paths.
If <code>archive.tar</code> contained relative paths (for
instance, if it were created by something like <code>tar -cf archive.tar
../file.txt</code>) then executing this code could write to locations
outside the destination directory.
</p>
<sample src="examples/tarslip_bad.py" />
<p>To fix this vulnerability, we need to check that the path does not
contain any <code>".."</code> elements in it.
</p>
<sample src="examples/tarslip_good.py" />
</example>
<references>
<li>
Snyk:
<a href="https://snyk.io/research/zip-slip-vulnerability">Zip Slip Vulnerability</a>.
</li>
<li>
OWASP:
<a href="https://www.owasp.org/index.php/Path_traversal">Path Traversal</a>.
</li>
<li>
Python Library Reference:
<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extract">TarFile.extract</a>.
</li>
<li>
Python Library Reference:
<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall">TarFile.extractall</a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,196 @@
/**
* @name Arbitrary file write during tarfile extraction
* @description Extracting files from a malicious tar archive without validating that the
* destination file path is within the destination directory can cause files outside
* the destination directory to be overwritten.
* @kind path-problem
* @id py/tarslip
* @problem.severity error
* @precision medium
* @tags security
* external/cwe/cwe-022
*/
import python
import semmle.python.security.Paths
import semmle.python.security.TaintTracking
import semmle.python.security.strings.Basic
/** A TaintKind to represent open tarfile objects. That is, the result of calling `tarfile.open(...)` */
class OpenTarFile extends TaintKind {
OpenTarFile() {
this = "tarfile.open"
}
override TaintKind getTaintOfMethodResult(string name) {
name = "getmember" and result instanceof TarFileInfo
or
name = "getmembers" and result.(SequenceKind).getItem() instanceof TarFileInfo
}
override ClassValue getType() {
result = Module::named("tarfile").attr("TarFile")
}
override TaintKind getTaintForIteration() {
result instanceof TarFileInfo
}
}
/** The source of open tarfile objects. That is, any call to `tarfile.open(...)` */
class TarfileOpen extends TaintSource {
TarfileOpen() {
Module::named("tarfile").attr("open").getACall() = this
and
/* If argument refers to a string object, then it's a hardcoded path and
* this tarfile is safe.
*/
not this.(CallNode).getAnArg().refersTo(any(StringObject str))
and
/* Ignore opens within the tarfile module itself */
not this.(ControlFlowNode).getLocation().getFile().getBaseName() = "tarfile.py"
}
override predicate isSourceOf(TaintKind kind) {
kind instanceof OpenTarFile
}
}
class TarFileInfo extends TaintKind {
TarFileInfo() {
this = "tarfile.entry"
}
override TaintKind getTaintOfMethodResult(string name) {
name = "next" and result = this
}
override TaintKind getTaintOfAttribute(string name) {
name = "name" and result instanceof TarFileInfo
}
}
/* For efficiency we don't want to track the flow of taint
* around the tarfile module. */
class ExcludeTarFilePy extends Sanitizer {
ExcludeTarFilePy() {
this = "Tar sanitizer"
}
override predicate sanitizingNode(TaintKind taint, ControlFlowNode node) {
node.getLocation().getFile().getBaseName() = "tarfile.py" and
(
taint instanceof OpenTarFile
or
taint instanceof TarFileInfo
or
taint.(SequenceKind).getItem() instanceof TarFileInfo
)
}
}
/* Any call to an extractall method */
class ExtractAllSink extends TaintSink {
CallNode call;
ExtractAllSink() {
this = call.getFunction().(AttrNode).getObject("extractall") and
count(call.getAnArg()) = 0
}
override predicate sinks(TaintKind kind) {
kind instanceof OpenTarFile
}
}
/* Argument to extract method */
class ExtractSink extends TaintSink {
CallNode call;
ExtractSink() {
call.getFunction().(AttrNode).getName() = "extract" and
this = call.getArg(0)
}
override predicate sinks(TaintKind kind) {
kind instanceof TarFileInfo
}
}
/* Members argument to extract method */
class ExtractMembersSink extends TaintSink {
CallNode call;
ExtractMembersSink() {
call.getFunction().(AttrNode).getName() = "extractall" and
(this = call.getArg(0) or this = call.getArgByName("members"))
}
override predicate sinks(TaintKind kind) {
kind.(SequenceKind).getItem() instanceof TarFileInfo
or
kind instanceof OpenTarFile
}
}
class TarFileInfoSanitizer extends Sanitizer {
TarFileInfoSanitizer() {
this = "TarInfo sanitizer"
}
override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
path_sanitizing_test(test.getTest()) and
taint instanceof TarFileInfo
}
}
private predicate path_sanitizing_test(ControlFlowNode test) {
/* Assume that any test with "path" in it is a sanitizer */
test.getAChild+().(AttrNode).getName().matches("%path")
or
test.getAChild+().(NameNode).getId().matches("%path")
}
class TarSlipConfiguration extends TaintTracking::Configuration {
TarSlipConfiguration() { this = "TarSlip configuration" }
override predicate isSource(TaintTracking::Source source) { source instanceof TarfileOpen }
override predicate isSink(TaintTracking::Sink sink) {
sink instanceof ExtractSink or
sink instanceof ExtractAllSink or
sink instanceof ExtractMembersSink
}
override predicate isSanitizer(Sanitizer sanitizer) {
sanitizer instanceof TarFileInfoSanitizer
or
sanitizer instanceof ExcludeTarFilePy
}
}
from TarSlipConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "Extraction of tarfile from $@", src.getSource(), "a potentially untrusted source"

View File

@@ -0,0 +1,7 @@
import tarfile
with tarfile.open('archive.zip') as tar:
#BAD : This could write any file on the filesystem.
for entry in tar:
tar.extract(entry, "/tmp/unpack/")

View File

@@ -0,0 +1,10 @@
import tarfile
import os.path
with tarfile.open('archive.zip') as tar:
for entry in tar:
#GOOD: Check that entry is safe
if os.path.isabs(entry.name) or ".." in entry.name:
raise ValueError("Illegal tar archive entry")
tar.extract(entry, "/tmp/unpack/")

View File

@@ -5,7 +5,7 @@ import semmle.python.security.TaintTracking
query predicate edges(TaintedNode fromnode, TaintedNode tonode) {
fromnode.getASuccessor() = tonode and
/* Don't record flow past sinks */
not fromnode.isVulnerableSink()
not fromnode.isSink()
}
private TaintedNode first_child(TaintedNode parent) {

View File

@@ -703,7 +703,7 @@ class TaintedNode extends TTaintedNode {
/** Holds if the underlying CFG node for this node is a vulnerable node
* and is vulnerable to this node's taint.
*/
predicate isVulnerableSink() {
predicate isSink() {
exists(TaintedNode src, TaintSink vuln |
src.isSource() and
src.getASuccessor*() = this and
@@ -712,6 +712,13 @@ class TaintedNode extends TTaintedNode {
)
}
/** DEPRECATED -- Use `TaintedNode.isSink()` instead
* Sinks are not necessarily vulnerable
* For removal 2020-07-01 */
deprecated predicate isVulnerableSink() {
this.isSink()
}
TaintFlowImplementation::TrackedTaint fromAttribute(string name) {
result = this.getTrackedValue().(TaintFlowImplementation::TrackedAttribute).fromAttribute(name)
}