From 88b36c44df9bd9e7c0403090e186e71310c19c71 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 08:34:36 +0000 Subject: [PATCH] Add shutil.unpack_archive and subprocess tar extraction as TarSlip sources and sinks - Add test cases for shutil.unpack_archive and subprocess.run(["tar", ...]) to tarslip.py - Add ShutilUnpackArchiveSource/Sink for shutil.unpack_archive calls with non-literal filenames - Add SubprocessTarExtractionSource/Sink for subprocess calls invoking tar with extraction flags - Update TarSlip.expected with expected test output for new cases Agent-Logs-Url: https://github.com/github/codeql/sessions/f31a3622-9b18-415f-85f1-62ec14a8319f Co-authored-by: hvitved <3667920+hvitved@users.noreply.github.com> --- .../dataflow/TarSlipCustomizations.qll | 69 +++++++++++++++++++ .../Security/CWE-022-TarSlip/TarSlip.expected | 6 ++ .../Security/CWE-022-TarSlip/tarslip.py | 13 ++++ 3 files changed, 88 insertions(+) diff --git a/python/ql/lib/semmle/python/security/dataflow/TarSlipCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/TarSlipCustomizations.qll index 2dbe2c542ae..4a0068990dc 100644 --- a/python/ql/lib/semmle/python/security/dataflow/TarSlipCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/TarSlipCustomizations.qll @@ -132,6 +132,75 @@ module TarSlip { } } + /** + * A call to `shutil.unpack_archive`, considered as a flow source. + * + * The archive filename is not hardcoded, so it may come from user input. + */ + class ShutilUnpackArchiveSource extends Source { + ShutilUnpackArchiveSource() { + this = API::moduleImport("shutil").getMember("unpack_archive").getACall() and + not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StringLiteral + } + } + + /** + * A call to `shutil.unpack_archive`, considered as a flow sink. + * + * The archive filename is not hardcoded, so it may come from user input. + */ + class ShutilUnpackArchiveSink extends Sink { + ShutilUnpackArchiveSink() { + this = API::moduleImport("shutil").getMember("unpack_archive").getACall() and + not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StringLiteral + } + } + + /** + * Holds if `call` is a subprocess call that invokes `tar` for archive extraction + * with at least one non-literal argument (the archive filename). + * + * Detects patterns like `subprocess.run(["tar", "-xf", untrusted_filename])`. + */ + private predicate isSubprocessTarExtraction(DataFlow::CallCfgNode call) { + exists(SequenceNode cmdList | + call = + API::moduleImport("subprocess") + .getMember(["run", "call", "check_call", "check_output", "Popen"]) + .getACall() and + cmdList = call.getArg(0).asCfgNode() and + // Command must be "tar" (possibly with a full path like "/usr/bin/tar") + cmdList.getElement(0).getNode().(StringLiteral).getText().matches("%tar") and + // At least one extraction-related flag must be present + exists(string flag | + flag = cmdList.getElement(_).getNode().(StringLiteral).getText() and + (flag.matches("%-x%") or flag = "--extract") + ) and + // At least one non-literal argument (the archive filename) + exists(int i | + i > 0 and + exists(cmdList.getElement(i)) and + not cmdList.getElement(i).getNode() instanceof StringLiteral + ) + ) + } + + /** + * A call to `subprocess` functions that invokes `tar` for archive extraction, + * considered as a flow source. + */ + class SubprocessTarExtractionSource extends Source { + SubprocessTarExtractionSource() { isSubprocessTarExtraction(this) } + } + + /** + * A call to `subprocess` functions that invokes `tar` for archive extraction, + * considered as a flow sink. + */ + class SubprocessTarExtractionSink extends Sink { + SubprocessTarExtractionSink() { isSubprocessTarExtraction(this) } + } + /** * Holds if `g` clears taint for `tarInfo`. * diff --git a/python/ql/test/query-tests/Security/CWE-022-TarSlip/TarSlip.expected b/python/ql/test/query-tests/Security/CWE-022-TarSlip/TarSlip.expected index 6f98ea1aae2..73a257d13cb 100644 --- a/python/ql/test/query-tests/Security/CWE-022-TarSlip/TarSlip.expected +++ b/python/ql/test/query-tests/Security/CWE-022-TarSlip/TarSlip.expected @@ -53,6 +53,9 @@ nodes | tarslip.py:112:1:112:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar | | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | tarslip.py:113:24:113:26 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar | +| tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | subpaths #select | tarslip.py:15:1:15:3 | ControlFlowNode for tar | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | tarslip.py:15:1:15:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | potentially untrusted source | @@ -64,3 +67,6 @@ subpaths | tarslip.py:96:17:96:21 | ControlFlowNode for entry | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | tarslip.py:96:17:96:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | potentially untrusted source | | tarslip.py:110:1:110:3 | ControlFlowNode for tar | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | tarslip.py:110:1:110:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | potentially untrusted source | | tarslip.py:113:24:113:26 | ControlFlowNode for tar | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | tarslip.py:113:24:113:26 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | potentially untrusted source | +| tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | potentially untrusted source | +| tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | potentially untrusted source | +| tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | potentially untrusted source | diff --git a/python/ql/test/query-tests/Security/CWE-022-TarSlip/tarslip.py b/python/ql/test/query-tests/Security/CWE-022-TarSlip/tarslip.py index 2c06d01adfd..bf8e2d0e264 100644 --- a/python/ql/test/query-tests/Security/CWE-022-TarSlip/tarslip.py +++ b/python/ql/test/query-tests/Security/CWE-022-TarSlip/tarslip.py @@ -114,3 +114,16 @@ tar.extractall(members=tar, filter=extraction_filter) # unsafe tar = tarfile.open(unsafe_filename_tar) tar.extractall(members=safemembers(tar), filter=extraction_filter) # safe -- we assume `safemembers` makes up for the unsafe filter + +import shutil +import subprocess + +# shutil.unpack_archive +shutil.unpack_archive(unsafe_filename_tar, "out") # unsafe +shutil.unpack_archive("safe.tar", "out") # safe + +# subprocess tar extraction +subprocess.run(["tar", "-xf", unsafe_filename_tar]) # unsafe +subprocess.check_call(["tar", "-xf", unsafe_filename_tar]) # unsafe +subprocess.run(["tar", "-xf", "safe.tar"]) # safe +subprocess.run(["echo", unsafe_filename_tar]) # safe - not a tar extraction