Add query checking for unpacking of symlinks without using EvalSymlinks to spot existing ones.

This is usually dangerous because (if the archive is untrusted) the intent is usually to permit within-archive symlinks, e.g. dest/a/parent -> .. -> dest/a is an acceptable link to unpack. However if EvalSymlinks is not used to take already-unpacked symlinks into account, it becomes possible to sneak tricks like dest/escapes -> dest/a/parent/.. through, which create links leading out of the archive for later abuse.
This commit is contained in:
Chris Smowton
2020-11-05 11:00:37 +00:00
parent 82a5b5f264
commit 1a2c209259
12 changed files with 382 additions and 2 deletions

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* A new query `go/unsafe-unzip-symlink` has been added. The query checks for extracting symbolic links from an archive without using `filepath.EvalSymlinks`. This could lead to a file being written outside the destination directory.

View File

@@ -0,0 +1,32 @@
package main
import (
"archive/tar"
"io"
"os"
"path/filepath"
"strings"
)
func isRel(candidate, target string) bool {
// BAD: purely syntactic means are used to check
// that `candidate` does not escape from `target`
if filepath.IsAbs(candidate) {
return false
}
relpath, err := filepath.Rel(target, filepath.Join(target, candidate))
return err == nil && !strings.HasPrefix(filepath.Clean(relpath), "..")
}
func unzipSymlink(f io.Reader, target string) {
r := tar.NewReader(f)
for {
header, err := r.Next()
if err != nil {
break
}
if isRel(header.Linkname, target) && isRel(header.Name, target) {
os.Symlink(header.Linkname, header.Name)
}
}
}

View File

@@ -0,0 +1,58 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Extracting symbolic links from a malicious zip archive, without validating that the destination file path
is within the destination directory, can cause files outside the destination directory to be overwritten.
This can happen if there are previously-extracted symbolic links or
directory traversal elements and links (<code>..</code>) in archive paths.
</p>
<p>
This problem is related to the ZipSlip vulnerability which is detected by the <code>go/zipslip</code> query;
please see that query's help for more general information about malicious archive file vulnerabilities.
This query considers the specific case where symbolic links are extracted from an archive, in which case
the extraction code must be aware of existing symbolic links when checking whether it is about to extract
a link pointing to a location outside the target extraction directory.
</p>
</overview>
<recommendation>
<p>
Ensure that output paths constructed from zip archive entries are validated. This includes resolving any
previously extracted symbolic links, for example using <code>path/filepath.EvalSymlinks</code>, to prevent writing
files or links to unexpected locations.
</p>
</recommendation>
<example>
<p>
In this example, links are extracted from an archive using the syntactic <code>filepath.Rel</code>
function to check whether the link and its target fall within the destination directory.
However, the extraction code doesn't resolve previously-extracted links, so a pair of links like
<code>subdir/parent -> ..</code> followed by <code>escape -> subdir/parent/.. -> subdir/../..</code>
leaves a link pointing to the parent of the archive root. The syntactic <code>Rel</code> is ineffective
because it equates <code>subdir/parent/..</code> with <code>subdir/</code>, but this is not the case
when <code>subdir/parent</code> is a symbolic link.
</p>
<sample src="UnsafeUnzipSymlink.go" />
<p>To fix this vulnerability, resolve pre-existing symbolic links before checking
that the link's target is acceptable:
</p>
<sample src="UnsafeUnzipSymlinkGood.go" />
</example>
<references>
<li>
Snyk:
<a href="https://snyk.io/research/zip-slip-vulnerability">Zip Slip Vulnerability</a>.
</li>
<li>
OWASP:
<a href="https://owasp.org/www-community/attacks/Path_Traversal">Path Traversal</a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,106 @@
/**
* @name Arbitrary file write extracting an archive containing symbolic links
* @description Extracting files from a malicious zip archive without validating that the
* destination file path is within the destination directory can cause files outside
* the destination directory to be overwritten. Extracting symbolic links in particular
* requires resolving previously extracted links to ensure the destination directory
* is not escaped.
* @kind path-problem
* @id go/unsafe-unzip-symlink
* @problem.severity error
* @precision high
* @tags security
* external/cwe/cwe-022
*/
import go
import DataFlow::PathGraph
/** A file name from a zip or tar entry, as a source for unsafe unzipping of symlinks. */
class FileNameSource extends DataFlow::FieldReadNode {
FileNameSource() {
getField().hasQualifiedName("archive/zip", "File", ["Name", "Data"]) or
getField().hasQualifiedName("archive/tar", "Header", ["Name", "Linkname"])
}
}
/**
* Gets a read from `archive/zip.Reader.File` or a call to `archive/tar.Reader.Next()`.
*/
Expr getAnArchiveEntryAccess() {
result =
any(DataFlow::FieldReadNode frn | frn.readsField(_, "archive/zip", "Reader", "File")).asExpr() or
result =
any(DataFlow::MethodCallNode mcn |
mcn.getTarget().hasQualifiedName("archive/tar", "Reader", "Next")
).asExpr()
}
/**
* Gets a loop statement that contains either a read from `archive/zip.Reader.File` or a call to `archive/tar.Reader.Next()`.
*/
LoopStmt getAnExtractionLoop() { result = getAnArchiveEntryAccess().getParent*() }
/**
* An argument to a call to `os.Symlink` within a loop that extracts a zip or tar archive,
* taken as a sink for unsafe unzipping of symlinks.
*/
class SymlinkSink extends DataFlow::Node {
SymlinkSink() {
this =
any(DataFlow::CallNode n | n.getTarget().hasQualifiedName("os", "Symlink"))
.getArgument([0, 1]) and
this.asExpr().getParent*() = getAnExtractionLoop()
}
}
/**
* An argument to `path/filepath.EvalSymlinks`, taken as a sink for detecting target paths
* that are likely safe to extract to.
*/
class EvalSymlinksSink extends DataFlow::Node {
EvalSymlinksSink() {
this =
any(DataFlow::CallNode n | n.getTarget().hasQualifiedName("path/filepath", "EvalSymlinks"))
.getArgument(0)
}
}
/**
* Taint-flow configuration tracking archive header fields flowing to a `path/filepath.EvalSymlinks` call.
*/
class EvalSymlinksConfiguration extends TaintTracking2::Configuration {
EvalSymlinksConfiguration() { this = "Archive header field flow to `path/filepath.EvalSymlinks`" }
override predicate isSource(DataFlow::Node source) { source instanceof FileNameSource }
override predicate isSink(DataFlow::Node sink) { sink instanceof EvalSymlinksSink }
}
/**
* Holds if `node` is an archive header field read that flows to a `path/filepath.EvalSymlinks` call.
*/
predicate symlinksEvald(DataFlow::Node node) {
exists(EvalSymlinksConfiguration c | c.hasFlow(getASimilarReadNode(node), _))
}
/**
* Taint-flow configuration tracking archive header fields flowing to an `os.Symlink` call,
* which never flow to a `path/filepath.EvalSymlinks` call.
*/
class SymlinkConfiguration extends TaintTracking::Configuration {
SymlinkConfiguration() { this = "Unsafe unzipping of symlinks" }
override predicate isSource(DataFlow::Node source) {
source instanceof FileNameSource and
not symlinksEvald(source)
}
override predicate isSink(DataFlow::Node sink) { sink instanceof SymlinkSink }
}
from SymlinkConfiguration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select source.getNode(), source, sink,
"Unresolved path from an archive header, which may point outside the archive root, is used in $@.",
sink.getNode(), "symlink creation"

View File

@@ -0,0 +1,15 @@
package main
func isRel(candidate, target string) bool {
// GOOD: resolves all symbolic links before checking
// that `candidate` does not escape from `target`
if filepath.IsAbs(candidate) {
return false
}
realpath, err := filepath.EvalSymlinks(filepath.Join(target, candidate))
if err != nil {
return false
}
relpath, err := filepath.Rel(target, realpath)
return err == nil && !strings.HasPrefix(filepath.Clean(relpath), "..")
}

View File

@@ -396,3 +396,12 @@ class SsaWithFields extends TSsaWithFields {
this.getBaseVariable().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
}
}
/**
* Gets a read similar to `node`, according to the same rules as `SsaWithFields.similar()`.
*/
DataFlow::Node getASimilarReadNode(DataFlow::Node node) {
exists(SsaWithFields readFields | node = readFields.getAUse() |
result = readFields.similar().getAUse()
)
}

View File

@@ -615,11 +615,23 @@ class ReadNode extends InstructionNode {
insn.readsField(IR::implicitDerefInstruction(base.asExpr()), f)
}
/**
* Holds if this data-flow node reads the value of field `package.type.field` on the value of `base` or its
* implicit dereference.
*
* For example, for the field read `x.width`, `base` is either the data-flow node corresponding
* to `x` or (if `x` is a pointer) the data-flow node corresponding to the implicit dereference
* `*x`, and `x` has the type `package.type`.
*/
predicate readsField(Node base, string package, string type, string field) {
exists(Field f | f.hasQualifiedName(package, type, field) | this.readsField(base, f))
}
/**
* Holds if this data-flow node looks up method `m` on the value of `receiver` or its implicit
* dereference.
*
* For example, for the method read `x.area`, `base` is either the data-flow node corresponding
* For example, for the method read `x.area`, `receiver` is either the data-flow node corresponding
* to `x` or (if `x` is a pointer) the data-flow node corresponding to the implicit dereference
* `*x`, and `m` is the method referenced by `area`.
*/
@@ -629,6 +641,18 @@ class ReadNode extends InstructionNode {
insn.readsMethod(IR::implicitDerefInstruction(receiver.asExpr()), m)
}
/**
* Holds if this data-flow node looks up method `package.type.name` on the value of `receiver`
* or its implicit dereference.
*
* For example, for the method read `x.name`, `receiver` is either the data-flow node corresponding
* to `x` or (if `x` is a pointer) the data-flow node corresponding to the implicit dereference
* `*x`, and `package.type` is a type of `x` that defines a method named `name`.
*/
predicate readsMethod(Node receiver, string package, string type, string name) {
exists(Method m | m.hasQualifiedName(package, type, name) | this.readsMethod(receiver, m))
}
/**
* Holds if this data-flow node reads the value of element `index` on the value of `base` or its
* implicit dereference.

View File

@@ -42,7 +42,13 @@ module ZipSlip {
/** A path-traversal sink, considered as a taint sink for zip slip. */
class TaintedPathSinkAsSink extends Sink {
TaintedPathSinkAsSink() { this instanceof TaintedPath::Sink }
TaintedPathSinkAsSink() {
this instanceof TaintedPath::Sink and
// Exclude `os.Symlink`, which is treated specifically in query `go/unsafe-unzip-symlink`.
not exists(DataFlow::CallNode c | c.getTarget().hasQualifiedName("os", "Symlink") |
this = c.getAnArgument()
)
}
}
/** A path-traversal sanitizer, considered as a sanitizer for zip slip. */

View File

@@ -0,0 +1,9 @@
edges
nodes
| UnsafeUnzipSymlink.go:31:15:31:29 | selection of Linkname | semmle.label | selection of Linkname |
| UnsafeUnzipSymlink.go:31:32:31:42 | selection of Name | semmle.label | selection of Name |
| UnsafeUnzipSymlink.go:43:25:43:35 | selection of Name | semmle.label | selection of Name |
#select
| UnsafeUnzipSymlink.go:31:15:31:29 | selection of Linkname | UnsafeUnzipSymlink.go:31:15:31:29 | selection of Linkname | UnsafeUnzipSymlink.go:31:15:31:29 | selection of Linkname | Unresolved path from an archive header, which may point outside the archive root, is used in $@. | UnsafeUnzipSymlink.go:31:15:31:29 | selection of Linkname | symlink creation |
| UnsafeUnzipSymlink.go:31:32:31:42 | selection of Name | UnsafeUnzipSymlink.go:31:32:31:42 | selection of Name | UnsafeUnzipSymlink.go:31:32:31:42 | selection of Name | Unresolved path from an archive header, which may point outside the archive root, is used in $@. | UnsafeUnzipSymlink.go:31:32:31:42 | selection of Name | symlink creation |
| UnsafeUnzipSymlink.go:43:25:43:35 | selection of Name | UnsafeUnzipSymlink.go:43:25:43:35 | selection of Name | UnsafeUnzipSymlink.go:43:25:43:35 | selection of Name | Unresolved path from an archive header, which may point outside the archive root, is used in $@. | UnsafeUnzipSymlink.go:43:25:43:35 | selection of Name | symlink creation |

View File

@@ -0,0 +1,68 @@
package main
import (
"archive/tar"
"archive/zip"
"io"
"io/ioutil"
"os"
"path/filepath"
"strings"
)
func isRel(candidate, target string) bool {
// BAD: purely syntactic means are used to check
// that `candidate` does not escape from `target`
if filepath.IsAbs(candidate) {
return false
}
relpath, err := filepath.Rel(target, filepath.Join(target, candidate))
return err == nil && !strings.HasPrefix(filepath.Clean(relpath), "..")
}
func unzipSymlinkBad(f io.Reader, target string) {
r := tar.NewReader(f)
for {
header, err := r.Next()
if err != nil {
break
}
if isRel(header.Linkname, target) && isRel(header.Name, target) {
os.Symlink(header.Linkname, header.Name)
}
}
}
func unzipSymlinkBadZip(f io.ReaderAt, target string) {
r, _ := zip.NewReader(f, 100)
for _, header := range r.File {
linkData, _ := header.Open()
linkNameBytes, _ := ioutil.ReadAll(linkData)
linkName := string(linkNameBytes)
if isRel(linkName, target) && isRel(header.Name, target) {
os.Symlink(linkName, header.Name)
}
}
}
// BAD (but not detected): some (notably Kubernetes) have solved the symlink
// problem with two loops: one that creates directory structure and resolves
// paths without creating links that could trip this process, then another
// that creates links. We approximate that by looking for the most intuitive
// implementation where `os.Symlink` is called directly from the same loop,
// and so mistake this for a safe implementation.
func unzipSymlinkOtherLoop(f io.Reader, target string) {
r := tar.NewReader(f)
links := map[string]string{}
for {
header, err := r.Next()
if err != nil {
break
}
links[header.Linkname] = header.Name
}
for linkName, name := range links {
os.Symlink(linkName, name)
}
}

View File

@@ -0,0 +1 @@
Security/CWE-022/UnsafeUnzipSymlink.ql

View File

@@ -0,0 +1,50 @@
package main
import (
"archive/tar"
"archive/zip"
"io"
"io/ioutil"
"os"
"path/filepath"
"strings"
)
func isRelGood(candidate, target string) bool {
// GOOD: resolves all symbolic links before checking
// that `candidate` does not escape from `target`
if filepath.IsAbs(candidate) {
return false
}
realpath, err := filepath.EvalSymlinks(filepath.Join(target, candidate))
if err != nil {
return false
}
relpath, err := filepath.Rel(target, realpath)
return err == nil && !strings.HasPrefix(filepath.Clean(relpath), "..")
}
func unzipSymlinkGood(f io.Reader, target string) {
r := tar.NewReader(f)
for {
header, err := r.Next()
if err != nil {
break
}
if isRelGood(header.Linkname, target) && isRelGood(header.Name, target) {
os.Symlink(header.Linkname, header.Name)
}
}
}
func unzipSymlinkGoodZip(f io.ReaderAt, target string) {
r, _ := zip.NewReader(f, 100)
for _, header := range r.File {
linkData, _ := header.Open()
linkNameBytes, _ := ioutil.ReadAll(linkData)
linkName := string(linkNameBytes)
if isRelGood(linkName, target) && isRelGood(header.Name, target) {
os.Symlink(linkName, header.Name)
}
}
}