mirror of
https://github.com/github/codeql.git
synced 2025-12-18 01:33:15 +01:00
Merge pull request #1470 from markshannon/python-tarslip
Python: "TarSlip" query
This commit is contained in:
75
python/ql/src/Security/CWE-022/TarSlip.qhelp
Normal file
75
python/ql/src/Security/CWE-022/TarSlip.qhelp
Normal file
@@ -0,0 +1,75 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>Extracting files from a malicious tar archive without validating that the destination file path
|
||||
is within the destination directory can cause files outside the destination directory to be
|
||||
overwritten, due to the possible presence of directory traversal elements (<code>..</code>) in
|
||||
archive paths.</p>
|
||||
|
||||
<p>Tar archives contain archive entries representing each file in the archive. These entries
|
||||
include a file path for the entry, but these file paths are not restricted and may contain
|
||||
unexpected special elements such as the directory traversal element (<code>..</code>). If these
|
||||
file paths are used to determine an output file to write the contents of the archive item to, then
|
||||
the file may be written to an unexpected location. This can result in sensitive information being
|
||||
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
|
||||
files.</p>
|
||||
|
||||
<p>For example, if a tar archive contains a file entry <code>..\sneaky-file</code>, and the tar archive
|
||||
is extracted to the directory <code>c:\output</code>, then naively combining the paths would result
|
||||
in an output file path of <code>c:\output\..\sneaky-file</code>, which would cause the file to be
|
||||
written to <code>c:\sneaky-file</code>.</p>
|
||||
|
||||
</overview>
|
||||
<recommendation>
|
||||
|
||||
<p>Ensure that output paths constructed from tar archive entries are validated
|
||||
to prevent writing files to unexpected locations.</p>
|
||||
|
||||
<p>The recommended way of writing an output file from a tar archive entry is to check that
|
||||
<code>".."</code> does not occur in the path.
|
||||
</p>
|
||||
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>
|
||||
In this example an archive is extracted without validating file paths.
|
||||
If <code>archive.tar</code> contained relative paths (for
|
||||
instance, if it were created by something like <code>tar -cf archive.tar
|
||||
../file.txt</code>) then executing this code could write to locations
|
||||
outside the destination directory.
|
||||
</p>
|
||||
|
||||
<sample src="examples/tarslip_bad.py" />
|
||||
|
||||
<p>To fix this vulnerability, we need to check that the path does not
|
||||
contain any <code>".."</code> elements in it.
|
||||
</p>
|
||||
|
||||
<sample src="examples/tarslip_good.py" />
|
||||
|
||||
</example>
|
||||
<references>
|
||||
|
||||
<li>
|
||||
Snyk:
|
||||
<a href="https://snyk.io/research/zip-slip-vulnerability">Zip Slip Vulnerability</a>.
|
||||
</li>
|
||||
<li>
|
||||
OWASP:
|
||||
<a href="https://www.owasp.org/index.php/Path_traversal">Path Traversal</a>.
|
||||
</li>
|
||||
<li>
|
||||
Python Library Reference:
|
||||
<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extract">TarFile.extract</a>.
|
||||
</li>
|
||||
<li>
|
||||
Python Library Reference:
|
||||
<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall">TarFile.extractall</a>.
|
||||
</li>
|
||||
|
||||
</references>
|
||||
</qhelp>
|
||||
196
python/ql/src/Security/CWE-022/TarSlip.ql
Normal file
196
python/ql/src/Security/CWE-022/TarSlip.ql
Normal file
@@ -0,0 +1,196 @@
|
||||
/**
|
||||
* @name Arbitrary file write during tarfile extraction
|
||||
* @description Extracting files from a malicious tar archive without validating that the
|
||||
* destination file path is within the destination directory can cause files outside
|
||||
* the destination directory to be overwritten.
|
||||
* @kind path-problem
|
||||
* @id py/tarslip
|
||||
* @problem.severity error
|
||||
* @precision medium
|
||||
* @tags security
|
||||
* external/cwe/cwe-022
|
||||
*/
|
||||
|
||||
import python
|
||||
import semmle.python.security.Paths
|
||||
|
||||
import semmle.python.security.TaintTracking
|
||||
import semmle.python.security.strings.Basic
|
||||
|
||||
/** A TaintKind to represent open tarfile objects. That is, the result of calling `tarfile.open(...)` */
|
||||
class OpenTarFile extends TaintKind {
|
||||
OpenTarFile() {
|
||||
this = "tarfile.open"
|
||||
}
|
||||
|
||||
override TaintKind getTaintOfMethodResult(string name) {
|
||||
name = "getmember" and result instanceof TarFileInfo
|
||||
or
|
||||
name = "getmembers" and result.(SequenceKind).getItem() instanceof TarFileInfo
|
||||
}
|
||||
|
||||
override ClassValue getType() {
|
||||
result = Module::named("tarfile").attr("TarFile")
|
||||
}
|
||||
|
||||
override TaintKind getTaintForIteration() {
|
||||
result instanceof TarFileInfo
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** The source of open tarfile objects. That is, any call to `tarfile.open(...)` */
|
||||
class TarfileOpen extends TaintSource {
|
||||
|
||||
TarfileOpen() {
|
||||
Module::named("tarfile").attr("open").getACall() = this
|
||||
and
|
||||
/* If argument refers to a string object, then it's a hardcoded path and
|
||||
* this tarfile is safe.
|
||||
*/
|
||||
not this.(CallNode).getAnArg().refersTo(any(StringObject str))
|
||||
and
|
||||
/* Ignore opens within the tarfile module itself */
|
||||
not this.(ControlFlowNode).getLocation().getFile().getBaseName() = "tarfile.py"
|
||||
}
|
||||
|
||||
override predicate isSourceOf(TaintKind kind) {
|
||||
kind instanceof OpenTarFile
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class TarFileInfo extends TaintKind {
|
||||
|
||||
TarFileInfo() {
|
||||
this = "tarfile.entry"
|
||||
}
|
||||
|
||||
override TaintKind getTaintOfMethodResult(string name) {
|
||||
name = "next" and result = this
|
||||
}
|
||||
|
||||
override TaintKind getTaintOfAttribute(string name) {
|
||||
name = "name" and result instanceof TarFileInfo
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* For efficiency we don't want to track the flow of taint
|
||||
* around the tarfile module. */
|
||||
class ExcludeTarFilePy extends Sanitizer {
|
||||
|
||||
ExcludeTarFilePy() {
|
||||
this = "Tar sanitizer"
|
||||
}
|
||||
|
||||
override predicate sanitizingNode(TaintKind taint, ControlFlowNode node) {
|
||||
node.getLocation().getFile().getBaseName() = "tarfile.py" and
|
||||
(
|
||||
taint instanceof OpenTarFile
|
||||
or
|
||||
taint instanceof TarFileInfo
|
||||
or
|
||||
taint.(SequenceKind).getItem() instanceof TarFileInfo
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Any call to an extractall method */
|
||||
class ExtractAllSink extends TaintSink {
|
||||
|
||||
CallNode call;
|
||||
|
||||
ExtractAllSink() {
|
||||
this = call.getFunction().(AttrNode).getObject("extractall") and
|
||||
count(call.getAnArg()) = 0
|
||||
}
|
||||
|
||||
override predicate sinks(TaintKind kind) {
|
||||
kind instanceof OpenTarFile
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Argument to extract method */
|
||||
class ExtractSink extends TaintSink {
|
||||
|
||||
CallNode call;
|
||||
|
||||
ExtractSink() {
|
||||
call.getFunction().(AttrNode).getName() = "extract" and
|
||||
this = call.getArg(0)
|
||||
}
|
||||
|
||||
override predicate sinks(TaintKind kind) {
|
||||
kind instanceof TarFileInfo
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Members argument to extract method */
|
||||
class ExtractMembersSink extends TaintSink {
|
||||
|
||||
CallNode call;
|
||||
|
||||
ExtractMembersSink() {
|
||||
call.getFunction().(AttrNode).getName() = "extractall" and
|
||||
(this = call.getArg(0) or this = call.getArgByName("members"))
|
||||
}
|
||||
|
||||
override predicate sinks(TaintKind kind) {
|
||||
kind.(SequenceKind).getItem() instanceof TarFileInfo
|
||||
or
|
||||
kind instanceof OpenTarFile
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class TarFileInfoSanitizer extends Sanitizer {
|
||||
|
||||
TarFileInfoSanitizer() {
|
||||
this = "TarInfo sanitizer"
|
||||
}
|
||||
|
||||
override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
|
||||
path_sanitizing_test(test.getTest()) and
|
||||
taint instanceof TarFileInfo
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
private predicate path_sanitizing_test(ControlFlowNode test) {
|
||||
/* Assume that any test with "path" in it is a sanitizer */
|
||||
test.getAChild+().(AttrNode).getName().matches("%path")
|
||||
or
|
||||
test.getAChild+().(NameNode).getId().matches("%path")
|
||||
}
|
||||
|
||||
class TarSlipConfiguration extends TaintTracking::Configuration {
|
||||
|
||||
TarSlipConfiguration() { this = "TarSlip configuration" }
|
||||
|
||||
override predicate isSource(TaintTracking::Source source) { source instanceof TarfileOpen }
|
||||
|
||||
override predicate isSink(TaintTracking::Sink sink) {
|
||||
sink instanceof ExtractSink or
|
||||
sink instanceof ExtractAllSink or
|
||||
sink instanceof ExtractMembersSink
|
||||
}
|
||||
|
||||
override predicate isSanitizer(Sanitizer sanitizer) {
|
||||
sanitizer instanceof TarFileInfoSanitizer
|
||||
or
|
||||
sanitizer instanceof ExcludeTarFilePy
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
from TarSlipConfiguration config, TaintedPathSource src, TaintedPathSink sink
|
||||
where config.hasFlowPath(src, sink)
|
||||
select sink.getSink(), src, sink, "Extraction of tarfile from $@", src.getSource(), "a potentially untrusted source"
|
||||
|
||||
7
python/ql/src/Security/CWE-022/examples/tarslip_bad.py
Normal file
7
python/ql/src/Security/CWE-022/examples/tarslip_bad.py
Normal file
@@ -0,0 +1,7 @@
|
||||
|
||||
import tarfile
|
||||
|
||||
with tarfile.open('archive.zip') as tar:
|
||||
#BAD : This could write any file on the filesystem.
|
||||
for entry in tar:
|
||||
tar.extract(entry, "/tmp/unpack/")
|
||||
10
python/ql/src/Security/CWE-022/examples/tarslip_good.py
Normal file
10
python/ql/src/Security/CWE-022/examples/tarslip_good.py
Normal file
@@ -0,0 +1,10 @@
|
||||
|
||||
import tarfile
|
||||
import os.path
|
||||
|
||||
with tarfile.open('archive.zip') as tar:
|
||||
for entry in tar:
|
||||
#GOOD: Check that entry is safe
|
||||
if os.path.isabs(entry.name) or ".." in entry.name:
|
||||
raise ValueError("Illegal tar archive entry")
|
||||
tar.extract(entry, "/tmp/unpack/")
|
||||
@@ -5,7 +5,7 @@ import semmle.python.security.TaintTracking
|
||||
query predicate edges(TaintedNode fromnode, TaintedNode tonode) {
|
||||
fromnode.getASuccessor() = tonode and
|
||||
/* Don't record flow past sinks */
|
||||
not fromnode.isVulnerableSink()
|
||||
not fromnode.isSink()
|
||||
}
|
||||
|
||||
private TaintedNode first_child(TaintedNode parent) {
|
||||
|
||||
@@ -703,7 +703,7 @@ class TaintedNode extends TTaintedNode {
|
||||
/** Holds if the underlying CFG node for this node is a vulnerable node
|
||||
* and is vulnerable to this node's taint.
|
||||
*/
|
||||
predicate isVulnerableSink() {
|
||||
predicate isSink() {
|
||||
exists(TaintedNode src, TaintSink vuln |
|
||||
src.isSource() and
|
||||
src.getASuccessor*() = this and
|
||||
@@ -712,6 +712,13 @@ class TaintedNode extends TTaintedNode {
|
||||
)
|
||||
}
|
||||
|
||||
/** DEPRECATED -- Use `TaintedNode.isSink()` instead
|
||||
* Sinks are not necessarily vulnerable
|
||||
* For removal 2020-07-01 */
|
||||
deprecated predicate isVulnerableSink() {
|
||||
this.isSink()
|
||||
}
|
||||
|
||||
TaintFlowImplementation::TrackedTaint fromAttribute(string name) {
|
||||
result = this.getTrackedValue().(TaintFlowImplementation::TrackedAttribute).fromAttribute(name)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user