Compare commits

...

5 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
50848834a8 Address reviewer feedback: fix sink modeling for shutil and subprocess
- Remove ShutilUnpackArchiveSource (should not be a source)
- Change ShutilUnpackArchiveSink to target getArg(0) (the filename arg, not the whole call); removes the now-redundant literal check
- Remove SubprocessTarExtractionSource (should not be a source)
- Change SubprocessTarExtractionSink to target the specific non-literal element in the command list (the filename), not the call itself
- Remove private isSubprocessTarExtraction predicate (inlined into the sink)
- Revert TarSlip.expected to its pre-PR state (the new sinks require proper source taint flow to fire)

Agent-Logs-Url: https://github.com/github/codeql/sessions/833673da-f868-4c3b-8bff-62364ee0ed19

Co-authored-by: hvitved <3667920+hvitved@users.noreply.github.com>
2026-04-16 10:11:37 +00:00
copilot-swe-agent[bot]
6d03548d9d Improve clarity of subprocess tar extraction detection patterns
Replace regexpMatch for command name with explicit equality check:
- cmd = \"tar\" or cmd.matches(\"%/tar\") - clearly matches only exact \"tar\" or paths ending with \"/tar\"
Keep flag check as regexpMatch since it naturally excludes double-dash flags

Agent-Logs-Url: https://github.com/github/codeql/sessions/f31a3622-9b18-415f-85f1-62ec14a8319f

Co-authored-by: hvitved <3667920+hvitved@users.noreply.github.com>
2026-04-16 08:42:03 +00:00
copilot-swe-agent[bot]
8efaa5daf1 Fix imprecise patterns in isSubprocessTarExtraction predicate
Use regexpMatch instead of matches to avoid false positives:
- Command name: regexpMatch(\"(.*/)?tar\") to match only \"tar\" or paths ending in \"/tar\"
- Extraction flag: regexpMatch(\"-[a-zA-Z]*x[a-zA-Z]*\") to match only single-dash flags containing x

Agent-Logs-Url: https://github.com/github/codeql/sessions/f31a3622-9b18-415f-85f1-62ec14a8319f

Co-authored-by: hvitved <3667920+hvitved@users.noreply.github.com>
2026-04-16 08:38:54 +00:00
copilot-swe-agent[bot]
88b36c44df Add shutil.unpack_archive and subprocess tar extraction as TarSlip sources and sinks
- Add test cases for shutil.unpack_archive and subprocess.run(["tar", ...]) to tarslip.py
- Add ShutilUnpackArchiveSource/Sink for shutil.unpack_archive calls with non-literal filenames
- Add SubprocessTarExtractionSource/Sink for subprocess calls invoking tar with extraction flags
- Update TarSlip.expected with expected test output for new cases

Agent-Logs-Url: https://github.com/github/codeql/sessions/f31a3622-9b18-415f-85f1-62ec14a8319f

Co-authored-by: hvitved <3667920+hvitved@users.noreply.github.com>
2026-04-16 08:34:36 +00:00
copilot-swe-agent[bot]
c1ecca3ca6 Initial plan 2026-04-16 07:56:48 +00:00
2 changed files with 59 additions and 0 deletions

View File

@@ -132,6 +132,52 @@ module TarSlip {
}
}
/**
* A call to `shutil.unpack_archive`, considered as a flow sink.
*
* The first argument (the archive filename) is the sink.
*/
class ShutilUnpackArchiveSink extends Sink {
ShutilUnpackArchiveSink() {
this = API::moduleImport("shutil").getMember("unpack_archive").getACall().getArg(0)
}
}
/**
* A call to `subprocess` functions that invokes `tar` for archive extraction,
* considered as a flow sink.
*
* The sink is the non-literal element in the command list that corresponds
* to the archive filename (e.g. the `unsafe_filename` in
* `subprocess.run(["tar", "-xf", unsafe_filename])`).
*/
class SubprocessTarExtractionSink extends Sink {
SubprocessTarExtractionSink() {
exists(SequenceNode cmdList, DataFlow::CallCfgNode call, int i |
call =
API::moduleImport("subprocess")
.getMember(["run", "call", "check_call", "check_output", "Popen"])
.getACall() and
cmdList = call.getArg(0).asCfgNode() and
// Command must be "tar" exactly or a path ending in "/tar" (e.g. "/usr/bin/tar")
exists(string cmd |
cmd = cmdList.getElement(0).getNode().(StringLiteral).getText() and
(cmd = "tar" or cmd.matches("%/tar"))
) and
// At least one extraction-related flag must be present:
// single-dash flags containing 'x' (like -x, -xf, -xvf) or the long option --extract
exists(string flag |
flag = cmdList.getElement(_).getNode().(StringLiteral).getText() and
(flag.regexpMatch("-[a-zA-Z]*x[a-zA-Z]*") or flag = "--extract")
) and
// The sink is the specific non-literal argument (the archive filename)
i > 0 and
not cmdList.getElement(i).getNode() instanceof StringLiteral and
this.asCfgNode() = cmdList.getElement(i)
)
}
}
/**
* Holds if `g` clears taint for `tarInfo`.
*

View File

@@ -114,3 +114,16 @@ tar.extractall(members=tar, filter=extraction_filter) # unsafe
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(members=safemembers(tar), filter=extraction_filter) # safe -- we assume `safemembers` makes up for the unsafe filter
import shutil
import subprocess
# shutil.unpack_archive
shutil.unpack_archive(unsafe_filename_tar, "out") # unsafe
shutil.unpack_archive("safe.tar", "out") # safe
# subprocess tar extraction
subprocess.run(["tar", "-xf", unsafe_filename_tar]) # unsafe
subprocess.check_call(["tar", "-xf", unsafe_filename_tar]) # unsafe
subprocess.run(["tar", "-xf", "safe.tar"]) # safe
subprocess.run(["echo", unsafe_filename_tar]) # safe - not a tar extraction