Merge pull request #1470 from markshannon/python-tarslip

Python: "TarSlip" query
2025-12-18 01:33:15 +01:00 · 2019-07-15 12:43:47 +02:00
parent 41e46f6686 fbe20a96dc
commit f12c057826
10 changed files with 403 additions and 2 deletions
--- a/python/ql/src/Security/CWE-022/TarSlip.qhelp
+++ b/python/ql/src/Security/CWE-022/TarSlip.qhelp
@@ -0,0 +1,75 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>Extracting files from a malicious tar archive without validating that the destination file path
+is within the destination directory can cause files outside the destination directory to be
+overwritten, due to the possible presence of directory traversal elements (<code>..</code>) in
+archive paths.</p>
+
+<p>Tar archives contain archive entries representing each file in the archive. These entries
+include a file path for the entry, but these file paths are not restricted and may contain
+unexpected special elements such as the directory traversal element (<code>..</code>). If these
+file paths are used to determine an output file to write the contents of the archive item to, then
+the file may be written to an unexpected location. This can result in sensitive information being
+revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
+files.</p>
+
+<p>For example, if a tar archive contains a file entry <code>..\sneaky-file</code>, and the tar archive
+is extracted to the directory <code>c:\output</code>, then naively combining the paths would result
+in an output file path of <code>c:\output\..\sneaky-file</code>, which would cause the file to be
+written to <code>c:\sneaky-file</code>.</p>
+
+</overview>
+<recommendation>
+
+<p>Ensure that output paths constructed from tar archive entries are validated
+to prevent writing files to unexpected locations.</p>
+
+<p>The recommended way of writing an output file from a tar archive entry is to check that
+<code>".."</code> does not occur in the path.
+</p>
+
+</recommendation>
+
+<example>
+<p>
+In this example an archive is extracted without validating file paths.
+If <code>archive.tar</code> contained relative paths (for
+instance, if it were created by something like <code>tar -cf archive.tar
+../file.txt</code>) then executing this code could write to locations
+outside the destination directory.
+</p>
+
+<sample src="examples/tarslip_bad.py" />
+
+<p>To fix this vulnerability, we need to check that the path does not
+contain any <code>".."</code> elements in it.
+</p>
+
+<sample src="examples/tarslip_good.py" />
+
+</example>
+<references>
+
+<li>
+Snyk:
+<a href="https://snyk.io/research/zip-slip-vulnerability">Zip Slip Vulnerability</a>.
+</li>
+<li>
+OWASP:
+<a href="https://www.owasp.org/index.php/Path_traversal">Path Traversal</a>.
+</li>
+<li>
+Python Library Reference:
+<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extract">TarFile.extract</a>.
+</li>
+<li>
+Python Library Reference:
+<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall">TarFile.extractall</a>.
+</li>
+
+</references>
+</qhelp>
--- a/python/ql/src/Security/CWE-022/TarSlip.ql
+++ b/python/ql/src/Security/CWE-022/TarSlip.ql
@@ -0,0 +1,196 @@
+/**
+ * @name Arbitrary file write during tarfile extraction
+ * @description Extracting files from a malicious tar archive without validating that the
+ *              destination file path is within the destination directory can cause files outside
+ *              the destination directory to be overwritten.
+* @kind path-problem
+ * @id py/tarslip
+ * @problem.severity error
+ * @precision medium
+ * @tags security
+ *       external/cwe/cwe-022
+ */
+
+import python
+import semmle.python.security.Paths
+
+import semmle.python.security.TaintTracking
+import semmle.python.security.strings.Basic
+
+/** A TaintKind to represent open tarfile objects. That is, the result of calling `tarfile.open(...)` */
+class OpenTarFile extends TaintKind {
+    OpenTarFile() {
+        this = "tarfile.open"
+    }
+
+    override TaintKind getTaintOfMethodResult(string name) {
+        name = "getmember" and result instanceof TarFileInfo
+        or
+        name = "getmembers" and result.(SequenceKind).getItem() instanceof TarFileInfo
+    }
+
+    override ClassValue getType() {
+        result = Module::named("tarfile").attr("TarFile")
+    }
+
+    override TaintKind getTaintForIteration() {
+        result instanceof TarFileInfo
+    }
+
+}
+
+/** The source of open tarfile objects. That is, any call to `tarfile.open(...)` */
+class TarfileOpen extends TaintSource {
+
+    TarfileOpen() {
+        Module::named("tarfile").attr("open").getACall() = this
+        and
+        /* If argument refers to a string object, then it's a hardcoded path and
+         * this tarfile is safe.
+         */
+        not this.(CallNode).getAnArg().refersTo(any(StringObject str))
+        and
+        /* Ignore opens within the tarfile module itself */
+        not this.(ControlFlowNode).getLocation().getFile().getBaseName() = "tarfile.py"
+    }
+
+    override predicate isSourceOf(TaintKind kind) {
+        kind instanceof OpenTarFile
+    }
+
+}
+
+class TarFileInfo extends TaintKind {
+
+    TarFileInfo() {
+        this = "tarfile.entry"
+    }
+
+    override TaintKind getTaintOfMethodResult(string name) {
+        name = "next" and result = this
+    }
+
+    override TaintKind getTaintOfAttribute(string name) {
+        name = "name" and result instanceof TarFileInfo
+    }
+}
+
+
+/* For efficiency we don't want to track the flow of taint
+ * around the tarfile module. */
+class ExcludeTarFilePy extends Sanitizer {
+
+    ExcludeTarFilePy() {
+        this = "Tar sanitizer"
+    }
+
+    override predicate sanitizingNode(TaintKind taint, ControlFlowNode node) {
+        node.getLocation().getFile().getBaseName() = "tarfile.py" and
+        (
+            taint instanceof OpenTarFile
+            or
+            taint instanceof TarFileInfo
+            or
+            taint.(SequenceKind).getItem() instanceof TarFileInfo
+        )
+    }
+
+}
+
+/* Any call to an extractall method */
+class ExtractAllSink extends TaintSink {
+
+    CallNode call;
+
+    ExtractAllSink() {
+        this = call.getFunction().(AttrNode).getObject("extractall") and
+        count(call.getAnArg()) = 0
+    }
+
+    override predicate sinks(TaintKind kind) {
+        kind instanceof OpenTarFile
+    }
+
+}
+
+/* Argument to extract method */
+class ExtractSink extends TaintSink {
+
+    CallNode call;
+
+    ExtractSink() {
+        call.getFunction().(AttrNode).getName() = "extract" and
+        this = call.getArg(0)
+    }
+
+    override predicate sinks(TaintKind kind) {
+        kind instanceof TarFileInfo
+    }
+
+}
+
+
+/* Members argument to extract method */
+class ExtractMembersSink extends TaintSink {
+
+    CallNode call;
+
+    ExtractMembersSink() {
+        call.getFunction().(AttrNode).getName() = "extractall" and
+        (this = call.getArg(0) or this = call.getArgByName("members"))
+    }
+
+    override predicate sinks(TaintKind kind) {
+        kind.(SequenceKind).getItem() instanceof TarFileInfo
+        or
+        kind instanceof OpenTarFile
+    }
+
+}
+
+class TarFileInfoSanitizer extends Sanitizer {
+
+    TarFileInfoSanitizer() {
+        this = "TarInfo sanitizer"
+    }
+
+    override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
+        path_sanitizing_test(test.getTest()) and
+        taint instanceof TarFileInfo
+    }
+
+
+}
+
+private predicate path_sanitizing_test(ControlFlowNode test) {
+    /* Assume that any test with "path" in it is a sanitizer */
+    test.getAChild+().(AttrNode).getName().matches("%path")
+    or
+    test.getAChild+().(NameNode).getId().matches("%path")
+}
+
+class TarSlipConfiguration extends TaintTracking::Configuration {
+
+    TarSlipConfiguration() { this = "TarSlip configuration" }
+
+    override predicate isSource(TaintTracking::Source source) { source instanceof TarfileOpen }
+
+    override predicate isSink(TaintTracking::Sink sink) {
+        sink instanceof ExtractSink or
+        sink instanceof ExtractAllSink or
+        sink instanceof ExtractMembersSink
+    }
+
+    override predicate isSanitizer(Sanitizer sanitizer) {
+        sanitizer instanceof TarFileInfoSanitizer
+        or
+        sanitizer instanceof ExcludeTarFilePy
+    }
+
+}
+
+
+from TarSlipConfiguration config, TaintedPathSource src, TaintedPathSink sink
+where config.hasFlowPath(src, sink)
+select sink.getSink(), src, sink, "Extraction of tarfile from $@", src.getSource(), "a potentially untrusted source"
+
--- a/python/ql/src/Security/CWE-022/examples/tarslip_bad.py
+++ b/python/ql/src/Security/CWE-022/examples/tarslip_bad.py
@@ -0,0 +1,7 @@
+
+import tarfile
+
+with tarfile.open('archive.zip') as tar:
+    #BAD : This could write any file on the filesystem.
+    for entry in tar:
+        tar.extract(entry, "/tmp/unpack/")
--- a/python/ql/src/Security/CWE-022/examples/tarslip_good.py
+++ b/python/ql/src/Security/CWE-022/examples/tarslip_good.py
@@ -0,0 +1,10 @@
+
+import tarfile
+import os.path
+
+with tarfile.open('archive.zip') as tar:
+    for entry in tar:
+        #GOOD: Check that entry is safe
+        if os.path.isabs(entry.name) or ".." in entry.name:
+            raise ValueError("Illegal tar archive entry")
+        tar.extract(entry, "/tmp/unpack/")
--- a/python/ql/src/semmle/python/security/Paths.qll
+++ b/python/ql/src/semmle/python/security/Paths.qll
@@ -5,7 +5,7 @@ import semmle.python.security.TaintTracking
 query predicate edges(TaintedNode fromnode, TaintedNode tonode) {
    fromnode.getASuccessor() = tonode and
    /* Don't record flow past sinks */
-    not fromnode.isVulnerableSink()
+    not fromnode.isSink()
 }

 private TaintedNode first_child(TaintedNode parent) {
--- a/python/ql/src/semmle/python/security/TaintTracking.qll
+++ b/python/ql/src/semmle/python/security/TaintTracking.qll
@@ -703,7 +703,7 @@ class TaintedNode extends TTaintedNode {
    /** Holds if the underlying CFG node for this node is a vulnerable node
     * and is vulnerable to this node's taint.
     */
-    predicate isVulnerableSink() {
+    predicate isSink() {
        exists(TaintedNode src, TaintSink vuln |
            src.isSource() and
            src.getASuccessor*() = this and
@@ -712,6 +712,13 @@ class TaintedNode extends TTaintedNode {
        )
    }

+    /** DEPRECATED -- Use `TaintedNode.isSink()` instead
+     * Sinks are not necessarily vulnerable
+     * For removal 2020-07-01 */
+    deprecated predicate isVulnerableSink() {
+        this.isSink()
+    }
+
    TaintFlowImplementation::TrackedTaint fromAttribute(string name) {
        result = this.getTrackedValue().(TaintFlowImplementation::TrackedAttribute).fromAttribute(name)
    }