From 68fd75ca34dd9d792cdce7f400a72acd8b473b54 Mon Sep 17 00:00:00 2001 From: ALJI Mohamed Date: Mon, 5 Dec 2022 17:20:22 +0100 Subject: [PATCH] UnpackUnsafe query and tests --- .../Security/CWE-022bis/UnsafeUnpack.qhelp | 56 +++++++++++++++++++ .../Security/CWE-022bis/UnsafeUnpack.ql | 56 +++++++++++++++++++ .../CWE-022bis/examples/HIT_UnsafeUnpack.py | 12 ++++ .../CWE-022bis/examples/NoHIT_UnsafeUnpack.py | 17 ++++++ .../Security/CWE-022/UnsafeUnpack.expected | 10 ++++ .../Security/CWE-022/UnsafeUnpack.py | 12 ++++ .../Security/CWE-022/UnsafeUnpack.qlref | 1 + 7 files changed, 164 insertions(+) create mode 100644 python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp create mode 100644 python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql create mode 100644 python/ql/src/experimental/Security/CWE-022bis/examples/HIT_UnsafeUnpack.py create mode 100644 python/ql/src/experimental/Security/CWE-022bis/examples/NoHIT_UnsafeUnpack.py create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.qlref diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp new file mode 100644 index 00000000000..c1115e819b9 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.qhelp @@ -0,0 +1,56 @@ + + + + +

Extracting files from a malicious tarball without validating that the destination file path +is within the destination directory using `shutil.unpack_archive()` can cause files outside the +destination directory to be overwritten, due to the possible presence of directory traversal elements + (..) in archive path names.

+ +

Tarball contain archive entries representing each file in the archive. These entries +include a file path for the entry, but these file paths are not restricted and may contain +unexpected special elements such as the directory traversal element (..). If these +file paths are used to determine an output file to write the contents of the archive item to, then +the file may be written to an unexpected location. This can result in sensitive information being +revealed or deleted, or an attacker being able to influence behavior by modifying unexpected +files.

+ +

For example, if a tarball contains a file entry ../sim4n6.txt, and the tarball +is extracted to the directory /tmp/tmp123, then naively combining the paths would result +in an output file path of /tmp/tmp123/../sim4n6.txt, which would cause the file to be +written to /tmp/.

+ +
+ + +

Ensure that output paths constructed from tarball entries are validated +to prevent writing files to unexpected locations.

+ +

Consider using a safer module, such as: zipfile

+ +
+ + +

+In this example an archive is extracted without validating file paths. +

+ + + +

To fix this vulnerability, we need to call the function tarfile.extract() + on each member after verifying that it does not contain either `..` or startswith `/`. +

+ + + +
+ + +
  • + Shutil official documentation + shutil.unpack_archive() warning. +
  • +
    +
    diff --git a/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql new file mode 100644 index 00000000000..eb78b13cd0a --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-022bis/UnsafeUnpack.ql @@ -0,0 +1,56 @@ +/** + * @name Arbitrary file write during a remotely downloaded tarball extraction + * @description Extracting files from a malicious tarball using `shutil.unpack_archive()` without validating + * that the destination file path is within the destination directory can cause files outside + * the destination directory to be overwritten. More precisely, if the tarball comes from a remote location. + * @kind path-problem + * @id py/unsafe-unpacking + * @problem.severity error + * @security-severity 7.5 + * @precision high + * @tags security + * external/cwe/cwe-022bis + */ + +import python +import experimental.semmle.python.Concepts +import DataFlow::PathGraph +import semmle.python.ApiGraphs +import semmle.python.dataflow.new.internal.Attributes +import semmle.python.dataflow.new.DataFlow +import semmle.python.ApiGraphs +import semmle.python.dataflow.new.TaintTracking +import semmle.python.Concepts + +class UnsafeUnpackingConfig extends TaintTracking::Configuration { + UnsafeUnpackingConfig() { this = "UnsafeUnpackingConfig" } + + override predicate isSource(DataFlow::Node source) { + // A source coming from a remote location + exists(Http::Client::Request request | source = request) and + not source.getScope().getLocation().getFile().inStdlib() + } + + override predicate isSink(DataFlow::Node sink) { + // A sink capturing method calls to `unpack_archive`. + sink = + API::moduleImport("shutil").getMember("unpack_archive").getACall().getParameter(0).asSink() and + not sink.getScope().getLocation().getFile().inStdlib() + } + + override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + // Writing the response data to the archive + exists(API::CallNode call, MethodCallNode w | + nodeTo = call.getArg(0) and + call = API::builtin("open").getACall() and + w.getMethodName() = "write" and + w.getObject() = call.getReturn().getAValueReachableFromSource() and + nodeFrom = w.getArg(0) + ) + } +} + +from UnsafeUnpackingConfig config, DataFlow::PathNode source, DataFlow::PathNode sink +where config.hasFlowPath(source, sink) +select source.getNode(), source, sink, "Unsafe extraction from a malicious tarball, is used in a $@", + source.getAQlClass(), "during archive unpacking." \ No newline at end of file diff --git a/python/ql/src/experimental/Security/CWE-022bis/examples/HIT_UnsafeUnpack.py b/python/ql/src/experimental/Security/CWE-022bis/examples/HIT_UnsafeUnpack.py new file mode 100644 index 00000000000..cc0f857f8c0 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-022bis/examples/HIT_UnsafeUnpack.py @@ -0,0 +1,12 @@ +import requests +import shutil + +url = "https://www.someremote.location/tarball.tar.gz" +response = requests.get(url, stream=True) + +tarpath = "/tmp/tmp456/tarball.tar.gz" +with open(tarpath, "wb") as f: + f.write(response.raw.read()) + +untarredpath = "/tmp/tmp123" +shutil.unpack_archive(tarpath, untarredpath) \ No newline at end of file diff --git a/python/ql/src/experimental/Security/CWE-022bis/examples/NoHIT_UnsafeUnpack.py b/python/ql/src/experimental/Security/CWE-022bis/examples/NoHIT_UnsafeUnpack.py new file mode 100644 index 00000000000..426bcd71481 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-022bis/examples/NoHIT_UnsafeUnpack.py @@ -0,0 +1,17 @@ +import requests +import tarfile + +url = "https://www.someremote.location/tarball.tar.gz" +response = requests.get(url, stream=True) + +tarpath = "/tmp/tmp456/tarball.tar.gz" +with open(tarpath, "wb") as f: + f.write(response.raw.read()) + +untarredpath = "/tmp/tmp123" +with tarfile.open(tarpath) as tar: + for member in tar.getmembers(): + if member.name.startswith("/") or ".." in member.name: + raise Exception("Path traversal identified in tarball") + + tar.extract(untarredpath, member) \ No newline at end of file diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected new file mode 100644 index 00000000000..96d25c65a30 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.expected @@ -0,0 +1,10 @@ +edges +| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | +| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | +nodes +| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath | +subpaths +#select +| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball, is used in a $@ | PathNode | during archive unpacking. | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py new file mode 100644 index 00000000000..cc0f857f8c0 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.py @@ -0,0 +1,12 @@ +import requests +import shutil + +url = "https://www.someremote.location/tarball.tar.gz" +response = requests.get(url, stream=True) + +tarpath = "/tmp/tmp456/tarball.tar.gz" +with open(tarpath, "wb") as f: + f.write(response.raw.read()) + +untarredpath = "/tmp/tmp123" +shutil.unpack_archive(tarpath, untarredpath) \ No newline at end of file diff --git a/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.qlref b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.qlref new file mode 100644 index 00000000000..90e5db651a0 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-022/UnsafeUnpack.qlref @@ -0,0 +1 @@ +experimental/Security/CWE-022bis/UnsafeUnpack.ql \ No newline at end of file