diff --git a/change-notes/2020-11-04-unsafe-unzip-symlink.md b/change-notes/2020-11-04-unsafe-unzip-symlink.md new file mode 100644 index 00000000000..88a13019dc5 --- /dev/null +++ b/change-notes/2020-11-04-unsafe-unzip-symlink.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* A new query `go/unsafe-unzip-symlink` has been added. The query checks for extracting symbolic links from an archive without using `filepath.EvalSymlinks`. This could lead to a file being written outside the destination directory. diff --git a/ql/src/Security/CWE-022/UnsafeUnzipSymlink.go b/ql/src/Security/CWE-022/UnsafeUnzipSymlink.go new file mode 100644 index 00000000000..30291b930d0 --- /dev/null +++ b/ql/src/Security/CWE-022/UnsafeUnzipSymlink.go @@ -0,0 +1,32 @@ +package main + +import ( + "archive/tar" + "io" + "os" + "path/filepath" + "strings" +) + +func isRel(candidate, target string) bool { + // BAD: purely syntactic means are used to check + // that `candidate` does not escape from `target` + if filepath.IsAbs(candidate) { + return false + } + relpath, err := filepath.Rel(target, filepath.Join(target, candidate)) + return err == nil && !strings.HasPrefix(filepath.Clean(relpath), "..") +} + +func unzipSymlink(f io.Reader, target string) { + r := tar.NewReader(f) + for { + header, err := r.Next() + if err != nil { + break + } + if isRel(header.Linkname, target) && isRel(header.Name, target) { + os.Symlink(header.Linkname, header.Name) + } + } +} diff --git a/ql/src/Security/CWE-022/UnsafeUnzipSymlink.qhelp b/ql/src/Security/CWE-022/UnsafeUnzipSymlink.qhelp new file mode 100644 index 00000000000..69072f16ed1 --- /dev/null +++ b/ql/src/Security/CWE-022/UnsafeUnzipSymlink.qhelp @@ -0,0 +1,58 @@ + + + + +

+Extracting symbolic links from a malicious zip archive, without validating that the destination file path +is within the destination directory, can cause files outside the destination directory to be overwritten. +This can happen if there are previously-extracted symbolic links or +directory traversal elements and links (..) in archive paths. +

+ +

+This problem is related to the ZipSlip vulnerability which is detected by the go/zipslip query; +please see that query's help for more general information about malicious archive file vulnerabilities. +This query considers the specific case where symbolic links are extracted from an archive, in which case +the extraction code must be aware of existing symbolic links when checking whether it is about to extract +a link pointing to a location outside the target extraction directory. +

+
+ + +

+Ensure that output paths constructed from zip archive entries are validated. This includes resolving any +previously extracted symbolic links, for example using path/filepath.EvalSymlinks, to prevent writing +files or links to unexpected locations. +

+
+ + +

+In this example, links are extracted from an archive using the syntactic filepath.Rel +function to check whether the link and its target fall within the destination directory. +However, the extraction code doesn't resolve previously-extracted links, so a pair of links like +subdir/parent -> .. followed by escape -> subdir/parent/.. -> subdir/../.. +leaves a link pointing to the parent of the archive root. The syntactic Rel is ineffective +because it equates subdir/parent/.. with subdir/, but this is not the case +when subdir/parent is a symbolic link. +

+ +

To fix this vulnerability, resolve pre-existing symbolic links before checking +that the link's target is acceptable: +

+ +
+ + +
  • +Snyk: +Zip Slip Vulnerability. +
  • +
  • +OWASP: +Path Traversal. +
  • +
    +
    diff --git a/ql/src/Security/CWE-022/UnsafeUnzipSymlink.ql b/ql/src/Security/CWE-022/UnsafeUnzipSymlink.ql new file mode 100644 index 00000000000..05d2f405514 --- /dev/null +++ b/ql/src/Security/CWE-022/UnsafeUnzipSymlink.ql @@ -0,0 +1,106 @@ +/** + * @name Arbitrary file write extracting an archive containing symbolic links + * @description Extracting files from a malicious zip archive without validating that the + * destination file path is within the destination directory can cause files outside + * the destination directory to be overwritten. Extracting symbolic links in particular + * requires resolving previously extracted links to ensure the destination directory + * is not escaped. + * @kind path-problem + * @id go/unsafe-unzip-symlink + * @problem.severity error + * @precision high + * @tags security + * external/cwe/cwe-022 + */ + +import go +import DataFlow::PathGraph + +/** A file name from a zip or tar entry, as a source for unsafe unzipping of symlinks. */ +class FileNameSource extends DataFlow::FieldReadNode { + FileNameSource() { + getField().hasQualifiedName("archive/zip", "File", ["Name", "Data"]) or + getField().hasQualifiedName("archive/tar", "Header", ["Name", "Linkname"]) + } +} + +/** + * Gets a read from `archive/zip.Reader.File` or a call to `archive/tar.Reader.Next()`. + */ +Expr getAnArchiveEntryAccess() { + result = + any(DataFlow::FieldReadNode frn | frn.readsField(_, "archive/zip", "Reader", "File")).asExpr() or + result = + any(DataFlow::MethodCallNode mcn | + mcn.getTarget().hasQualifiedName("archive/tar", "Reader", "Next") + ).asExpr() +} + +/** + * Gets a loop statement that contains either a read from `archive/zip.Reader.File` or a call to `archive/tar.Reader.Next()`. + */ +LoopStmt getAnExtractionLoop() { result = getAnArchiveEntryAccess().getParent*() } + +/** + * An argument to a call to `os.Symlink` within a loop that extracts a zip or tar archive, + * taken as a sink for unsafe unzipping of symlinks. + */ +class SymlinkSink extends DataFlow::Node { + SymlinkSink() { + this = + any(DataFlow::CallNode n | n.getTarget().hasQualifiedName("os", "Symlink")) + .getArgument([0, 1]) and + this.asExpr().getParent*() = getAnExtractionLoop() + } +} + +/** + * An argument to `path/filepath.EvalSymlinks`, taken as a sink for detecting target paths + * that are likely safe to extract to. + */ +class EvalSymlinksSink extends DataFlow::Node { + EvalSymlinksSink() { + this = + any(DataFlow::CallNode n | n.getTarget().hasQualifiedName("path/filepath", "EvalSymlinks")) + .getArgument(0) + } +} + +/** + * Taint-flow configuration tracking archive header fields flowing to a `path/filepath.EvalSymlinks` call. + */ +class EvalSymlinksConfiguration extends TaintTracking2::Configuration { + EvalSymlinksConfiguration() { this = "Archive header field flow to `path/filepath.EvalSymlinks`" } + + override predicate isSource(DataFlow::Node source) { source instanceof FileNameSource } + + override predicate isSink(DataFlow::Node sink) { sink instanceof EvalSymlinksSink } +} + +/** + * Holds if `node` is an archive header field read that flows to a `path/filepath.EvalSymlinks` call. + */ +predicate symlinksEvald(DataFlow::Node node) { + exists(EvalSymlinksConfiguration c | c.hasFlow(getASimilarReadNode(node), _)) +} + +/** + * Taint-flow configuration tracking archive header fields flowing to an `os.Symlink` call, + * which never flow to a `path/filepath.EvalSymlinks` call. + */ +class SymlinkConfiguration extends TaintTracking::Configuration { + SymlinkConfiguration() { this = "Unsafe unzipping of symlinks" } + + override predicate isSource(DataFlow::Node source) { + source instanceof FileNameSource and + not symlinksEvald(source) + } + + override predicate isSink(DataFlow::Node sink) { sink instanceof SymlinkSink } +} + +from SymlinkConfiguration cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select source.getNode(), source, sink, + "Unresolved path from an archive header, which may point outside the archive root, is used in $@.", + sink.getNode(), "symlink creation" diff --git a/ql/src/Security/CWE-022/UnsafeUnzipSymlinkGood.go b/ql/src/Security/CWE-022/UnsafeUnzipSymlinkGood.go new file mode 100644 index 00000000000..feea347cf78 --- /dev/null +++ b/ql/src/Security/CWE-022/UnsafeUnzipSymlinkGood.go @@ -0,0 +1,15 @@ +package main + +func isRel(candidate, target string) bool { + // GOOD: resolves all symbolic links before checking + // that `candidate` does not escape from `target` + if filepath.IsAbs(candidate) { + return false + } + realpath, err := filepath.EvalSymlinks(filepath.Join(target, candidate)) + if err != nil { + return false + } + relpath, err := filepath.Rel(target, realpath) + return err == nil && !strings.HasPrefix(filepath.Clean(relpath), "..") +} diff --git a/ql/src/semmle/go/dataflow/SSA.qll b/ql/src/semmle/go/dataflow/SSA.qll index 519b281893a..88381133c84 100644 --- a/ql/src/semmle/go/dataflow/SSA.qll +++ b/ql/src/semmle/go/dataflow/SSA.qll @@ -396,3 +396,12 @@ class SsaWithFields extends TSsaWithFields { this.getBaseVariable().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) } } + +/** + * Gets a read similar to `node`, according to the same rules as `SsaWithFields.similar()`. + */ +DataFlow::Node getASimilarReadNode(DataFlow::Node node) { + exists(SsaWithFields readFields | node = readFields.getAUse() | + result = readFields.similar().getAUse() + ) +} diff --git a/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll b/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll index da941b3efa4..034e0f60910 100644 --- a/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll +++ b/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll @@ -615,11 +615,23 @@ class ReadNode extends InstructionNode { insn.readsField(IR::implicitDerefInstruction(base.asExpr()), f) } + /** + * Holds if this data-flow node reads the value of field `package.type.field` on the value of `base` or its + * implicit dereference. + * + * For example, for the field read `x.width`, `base` is either the data-flow node corresponding + * to `x` or (if `x` is a pointer) the data-flow node corresponding to the implicit dereference + * `*x`, and `x` has the type `package.type`. + */ + predicate readsField(Node base, string package, string type, string field) { + exists(Field f | f.hasQualifiedName(package, type, field) | this.readsField(base, f)) + } + /** * Holds if this data-flow node looks up method `m` on the value of `receiver` or its implicit * dereference. * - * For example, for the method read `x.area`, `base` is either the data-flow node corresponding + * For example, for the method read `x.area`, `receiver` is either the data-flow node corresponding * to `x` or (if `x` is a pointer) the data-flow node corresponding to the implicit dereference * `*x`, and `m` is the method referenced by `area`. */ @@ -629,6 +641,18 @@ class ReadNode extends InstructionNode { insn.readsMethod(IR::implicitDerefInstruction(receiver.asExpr()), m) } + /** + * Holds if this data-flow node looks up method `package.type.name` on the value of `receiver` + * or its implicit dereference. + * + * For example, for the method read `x.name`, `receiver` is either the data-flow node corresponding + * to `x` or (if `x` is a pointer) the data-flow node corresponding to the implicit dereference + * `*x`, and `package.type` is a type of `x` that defines a method named `name`. + */ + predicate readsMethod(Node receiver, string package, string type, string name) { + exists(Method m | m.hasQualifiedName(package, type, name) | this.readsMethod(receiver, m)) + } + /** * Holds if this data-flow node reads the value of element `index` on the value of `base` or its * implicit dereference. diff --git a/ql/src/semmle/go/security/ZipSlipCustomizations.qll b/ql/src/semmle/go/security/ZipSlipCustomizations.qll index 7b6544ad1f5..1f97ca14a6d 100644 --- a/ql/src/semmle/go/security/ZipSlipCustomizations.qll +++ b/ql/src/semmle/go/security/ZipSlipCustomizations.qll @@ -42,7 +42,13 @@ module ZipSlip { /** A path-traversal sink, considered as a taint sink for zip slip. */ class TaintedPathSinkAsSink extends Sink { - TaintedPathSinkAsSink() { this instanceof TaintedPath::Sink } + TaintedPathSinkAsSink() { + this instanceof TaintedPath::Sink and + // Exclude `os.Symlink`, which is treated specifically in query `go/unsafe-unzip-symlink`. + not exists(DataFlow::CallNode c | c.getTarget().hasQualifiedName("os", "Symlink") | + this = c.getAnArgument() + ) + } } /** A path-traversal sanitizer, considered as a sanitizer for zip slip. */ diff --git a/ql/test/query-tests/Security/CWE-022/UnsafeUnzipSymlink.expected b/ql/test/query-tests/Security/CWE-022/UnsafeUnzipSymlink.expected new file mode 100644 index 00000000000..bd960ad80fe --- /dev/null +++ b/ql/test/query-tests/Security/CWE-022/UnsafeUnzipSymlink.expected @@ -0,0 +1,9 @@ +edges +nodes +| UnsafeUnzipSymlink.go:31:15:31:29 | selection of Linkname | semmle.label | selection of Linkname | +| UnsafeUnzipSymlink.go:31:32:31:42 | selection of Name | semmle.label | selection of Name | +| UnsafeUnzipSymlink.go:43:25:43:35 | selection of Name | semmle.label | selection of Name | +#select +| UnsafeUnzipSymlink.go:31:15:31:29 | selection of Linkname | UnsafeUnzipSymlink.go:31:15:31:29 | selection of Linkname | UnsafeUnzipSymlink.go:31:15:31:29 | selection of Linkname | Unresolved path from an archive header, which may point outside the archive root, is used in $@. | UnsafeUnzipSymlink.go:31:15:31:29 | selection of Linkname | symlink creation | +| UnsafeUnzipSymlink.go:31:32:31:42 | selection of Name | UnsafeUnzipSymlink.go:31:32:31:42 | selection of Name | UnsafeUnzipSymlink.go:31:32:31:42 | selection of Name | Unresolved path from an archive header, which may point outside the archive root, is used in $@. | UnsafeUnzipSymlink.go:31:32:31:42 | selection of Name | symlink creation | +| UnsafeUnzipSymlink.go:43:25:43:35 | selection of Name | UnsafeUnzipSymlink.go:43:25:43:35 | selection of Name | UnsafeUnzipSymlink.go:43:25:43:35 | selection of Name | Unresolved path from an archive header, which may point outside the archive root, is used in $@. | UnsafeUnzipSymlink.go:43:25:43:35 | selection of Name | symlink creation | diff --git a/ql/test/query-tests/Security/CWE-022/UnsafeUnzipSymlink.go b/ql/test/query-tests/Security/CWE-022/UnsafeUnzipSymlink.go new file mode 100644 index 00000000000..239a3492c00 --- /dev/null +++ b/ql/test/query-tests/Security/CWE-022/UnsafeUnzipSymlink.go @@ -0,0 +1,68 @@ +package main + +import ( + "archive/tar" + "archive/zip" + "io" + "io/ioutil" + "os" + "path/filepath" + "strings" +) + +func isRel(candidate, target string) bool { + // BAD: purely syntactic means are used to check + // that `candidate` does not escape from `target` + if filepath.IsAbs(candidate) { + return false + } + relpath, err := filepath.Rel(target, filepath.Join(target, candidate)) + return err == nil && !strings.HasPrefix(filepath.Clean(relpath), "..") +} + +func unzipSymlinkBad(f io.Reader, target string) { + r := tar.NewReader(f) + for { + header, err := r.Next() + if err != nil { + break + } + if isRel(header.Linkname, target) && isRel(header.Name, target) { + os.Symlink(header.Linkname, header.Name) + } + } +} + +func unzipSymlinkBadZip(f io.ReaderAt, target string) { + r, _ := zip.NewReader(f, 100) + for _, header := range r.File { + linkData, _ := header.Open() + linkNameBytes, _ := ioutil.ReadAll(linkData) + linkName := string(linkNameBytes) + if isRel(linkName, target) && isRel(header.Name, target) { + os.Symlink(linkName, header.Name) + } + } +} + +// BAD (but not detected): some (notably Kubernetes) have solved the symlink +// problem with two loops: one that creates directory structure and resolves +// paths without creating links that could trip this process, then another +// that creates links. We approximate that by looking for the most intuitive +// implementation where `os.Symlink` is called directly from the same loop, +// and so mistake this for a safe implementation. +func unzipSymlinkOtherLoop(f io.Reader, target string) { + r := tar.NewReader(f) + links := map[string]string{} + for { + header, err := r.Next() + if err != nil { + break + } + links[header.Linkname] = header.Name + } + + for linkName, name := range links { + os.Symlink(linkName, name) + } +} diff --git a/ql/test/query-tests/Security/CWE-022/UnsafeUnzipSymlink.qlref b/ql/test/query-tests/Security/CWE-022/UnsafeUnzipSymlink.qlref new file mode 100644 index 00000000000..d25a3dc696b --- /dev/null +++ b/ql/test/query-tests/Security/CWE-022/UnsafeUnzipSymlink.qlref @@ -0,0 +1 @@ +Security/CWE-022/UnsafeUnzipSymlink.ql diff --git a/ql/test/query-tests/Security/CWE-022/UnsafeUnzipSymlinkGood.go b/ql/test/query-tests/Security/CWE-022/UnsafeUnzipSymlinkGood.go new file mode 100644 index 00000000000..7d69eb75362 --- /dev/null +++ b/ql/test/query-tests/Security/CWE-022/UnsafeUnzipSymlinkGood.go @@ -0,0 +1,50 @@ +package main + +import ( + "archive/tar" + "archive/zip" + "io" + "io/ioutil" + "os" + "path/filepath" + "strings" +) + +func isRelGood(candidate, target string) bool { + // GOOD: resolves all symbolic links before checking + // that `candidate` does not escape from `target` + if filepath.IsAbs(candidate) { + return false + } + realpath, err := filepath.EvalSymlinks(filepath.Join(target, candidate)) + if err != nil { + return false + } + relpath, err := filepath.Rel(target, realpath) + return err == nil && !strings.HasPrefix(filepath.Clean(relpath), "..") +} + +func unzipSymlinkGood(f io.Reader, target string) { + r := tar.NewReader(f) + for { + header, err := r.Next() + if err != nil { + break + } + if isRelGood(header.Linkname, target) && isRelGood(header.Name, target) { + os.Symlink(header.Linkname, header.Name) + } + } +} + +func unzipSymlinkGoodZip(f io.ReaderAt, target string) { + r, _ := zip.NewReader(f, 100) + for _, header := range r.File { + linkData, _ := header.Open() + linkNameBytes, _ := ioutil.ReadAll(linkData) + linkName := string(linkNameBytes) + if isRelGood(linkName, target) && isRelGood(header.Name, target) { + os.Symlink(linkName, header.Name) + } + } +}