Add shutil.unpack_archive and subprocess tar extraction as TarSlip sources and sinks

- Add test cases for shutil.unpack_archive and subprocess.run(["tar", ...]) to tarslip.py
- Add ShutilUnpackArchiveSource/Sink for shutil.unpack_archive calls with non-literal filenames
- Add SubprocessTarExtractionSource/Sink for subprocess calls invoking tar with extraction flags
- Update TarSlip.expected with expected test output for new cases

Agent-Logs-Url: https://github.com/github/codeql/sessions/f31a3622-9b18-415f-85f1-62ec14a8319f

Co-authored-by: hvitved <3667920+hvitved@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-04-16 08:34:36 +00:00
committed by GitHub
parent c1ecca3ca6
commit 88b36c44df
3 changed files with 88 additions and 0 deletions

View File

@@ -132,6 +132,75 @@ module TarSlip {
}
}
/**
* A call to `shutil.unpack_archive`, considered as a flow source.
*
* The archive filename is not hardcoded, so it may come from user input.
*/
class ShutilUnpackArchiveSource extends Source {
ShutilUnpackArchiveSource() {
this = API::moduleImport("shutil").getMember("unpack_archive").getACall() and
not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StringLiteral
}
}
/**
* A call to `shutil.unpack_archive`, considered as a flow sink.
*
* The archive filename is not hardcoded, so it may come from user input.
*/
class ShutilUnpackArchiveSink extends Sink {
ShutilUnpackArchiveSink() {
this = API::moduleImport("shutil").getMember("unpack_archive").getACall() and
not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StringLiteral
}
}
/**
* Holds if `call` is a subprocess call that invokes `tar` for archive extraction
* with at least one non-literal argument (the archive filename).
*
* Detects patterns like `subprocess.run(["tar", "-xf", untrusted_filename])`.
*/
private predicate isSubprocessTarExtraction(DataFlow::CallCfgNode call) {
exists(SequenceNode cmdList |
call =
API::moduleImport("subprocess")
.getMember(["run", "call", "check_call", "check_output", "Popen"])
.getACall() and
cmdList = call.getArg(0).asCfgNode() and
// Command must be "tar" (possibly with a full path like "/usr/bin/tar")
cmdList.getElement(0).getNode().(StringLiteral).getText().matches("%tar") and
// At least one extraction-related flag must be present
exists(string flag |
flag = cmdList.getElement(_).getNode().(StringLiteral).getText() and
(flag.matches("%-x%") or flag = "--extract")
) and
// At least one non-literal argument (the archive filename)
exists(int i |
i > 0 and
exists(cmdList.getElement(i)) and
not cmdList.getElement(i).getNode() instanceof StringLiteral
)
)
}
/**
* A call to `subprocess` functions that invokes `tar` for archive extraction,
* considered as a flow source.
*/
class SubprocessTarExtractionSource extends Source {
SubprocessTarExtractionSource() { isSubprocessTarExtraction(this) }
}
/**
* A call to `subprocess` functions that invokes `tar` for archive extraction,
* considered as a flow sink.
*/
class SubprocessTarExtractionSink extends Sink {
SubprocessTarExtractionSink() { isSubprocessTarExtraction(this) }
}
/**
* Holds if `g` clears taint for `tarInfo`.
*

View File

@@ -53,6 +53,9 @@ nodes
| tarslip.py:112:1:112:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
subpaths
#select
| tarslip.py:15:1:15:3 | ControlFlowNode for tar | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | tarslip.py:15:1:15:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | potentially untrusted source |
@@ -64,3 +67,6 @@ subpaths
| tarslip.py:96:17:96:21 | ControlFlowNode for entry | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | tarslip.py:96:17:96:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:110:1:110:3 | ControlFlowNode for tar | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | tarslip.py:110:1:110:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | tarslip.py:113:24:113:26 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | potentially untrusted source |

View File

@@ -114,3 +114,16 @@ tar.extractall(members=tar, filter=extraction_filter) # unsafe
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(members=safemembers(tar), filter=extraction_filter) # safe -- we assume `safemembers` makes up for the unsafe filter
import shutil
import subprocess
# shutil.unpack_archive
shutil.unpack_archive(unsafe_filename_tar, "out") # unsafe
shutil.unpack_archive("safe.tar", "out") # safe
# subprocess tar extraction
subprocess.run(["tar", "-xf", unsafe_filename_tar]) # unsafe
subprocess.check_call(["tar", "-xf", unsafe_filename_tar]) # unsafe
subprocess.run(["tar", "-xf", "safe.tar"]) # safe
subprocess.run(["echo", unsafe_filename_tar]) # safe - not a tar extraction