mirror of
https://github.com/github/codeql.git
synced 2026-06-03 04:40:14 +02:00
Flips the Python dataflow trunk from the legacy CFG (semmle/python/Flow.qll) and legacy ESSA SSA (semmle/python/essa/*) to the new shared CFG facade (semmle.python.controlflow.internal.Cfg) and the new SSA adapter (semmle.python.dataflow.new.internal.SsaImpl), both introduced additively in the preceding PRs in this stack. This is the trunk-flip equivalent of the original draft PR #21894 (kept around as documentation), rebased on top of the four preparatory PRs: P1: Remove AstNode.getAFlowNode() and rewrite callers (#21919). P2: Qualify Flow.qll's AST references with Py:: prefix (#21920). P3: Add new shared-CFG-backed control flow graph (#21921). P4: Add new shared-SSA-backed SSA adapter (#21923). The Python dataflow library (semmle/python/dataflow/new/) now imports the new CFG facade and SSA adapter. All CFG-typed predicates (ControlFlowNode, CallNode, BasicBlock, NameNode, AttrNode, ...) are qualified with the Cfg:: prefix; SSA references switch from EssaVariable/EssaDefinition to SsaImpl::Definition/SourceVariable. GuardNode is redesigned to use the new CFG's outcome-node model (isAfterTrue / isAfterFalse) instead of the legacy ConditionBlock + flipped indirection. Only BarrierGuard<...> is preserved as public API. Framework files (Bottle, FastApi, Django, Tornado, Pyramid, Stdlib, ...) are updated to take CFG nodes from the new facade. A handful of dataflow consistency tweaks for the new CFG: - Augmented-assignment targets are treated as both load and store. - 'from X import *' produces uncertain SSA writes for unknown names. - CFG nodes are canonicalised so dataflow does not see equivalent pre/post-order pairs as distinct nodes. Two AST tweaks for the new CFG: - AstNodeImpl: omit PEP 695 type-parameter names from FunctionDefExpr / ClassDefExpr children. - ImportResolution: drop the legacy essa import. Test churn (~175 files): reblessed library- and query-test .expected files reflect slightly different CFG granularity, different toString output, and a handful of true alert deltas in security queries. Verification: all 367 lib + src + consistency-queries compile clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
172 lines
5.2 KiB
Plaintext
172 lines
5.2 KiB
Plaintext
/**
|
|
* Provides default sources, sinks and sanitizers for detecting
|
|
* "tar slip"
|
|
* vulnerabilities, as well as extension points for adding your own.
|
|
*/
|
|
|
|
private import python
|
|
private import semmle.python.dataflow.new.DataFlow
|
|
private import semmle.python.Concepts
|
|
private import semmle.python.dataflow.new.BarrierGuards
|
|
private import semmle.python.ApiGraphs
|
|
private import semmle.python.controlflow.internal.Cfg as Cfg
|
|
|
|
/**
|
|
* Provides default sources, sinks and sanitizers for detecting
|
|
* "tar slip"
|
|
* vulnerabilities, as well as extension points for adding your own.
|
|
*/
|
|
module TarSlip {
|
|
/**
|
|
* A data flow source for "tar slip" vulnerabilities.
|
|
*/
|
|
abstract class Source extends DataFlow::Node { }
|
|
|
|
/**
|
|
* A data flow sink for "tar slip" vulnerabilities.
|
|
*/
|
|
abstract class Sink extends DataFlow::Node { }
|
|
|
|
/**
|
|
* A sanitizer for "tar slip" vulnerabilities.
|
|
*/
|
|
abstract class Sanitizer extends DataFlow::Node { }
|
|
|
|
/**
|
|
* A call to `tarfile.open`, considered as a flow source.
|
|
*/
|
|
class TarfileOpen extends Source {
|
|
TarfileOpen() {
|
|
this = API::moduleImport("tarfile").getMember("open").getACall() and
|
|
// If argument refers to a string object, then it's a hardcoded path and
|
|
// this tarfile is safe.
|
|
not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StringLiteral and
|
|
// Ignore opens within the tarfile module itself
|
|
not this.getLocation().getFile().getBaseName() = "tarfile.py"
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A sanitizer based on file name. This because we extract the standard library.
|
|
*
|
|
* For efficiency we don't want to track the flow of taint
|
|
* around the tarfile module.
|
|
*/
|
|
class ExcludeTarFilePy extends Sanitizer {
|
|
ExcludeTarFilePy() { this.getLocation().getFile().getBaseName() = "tarfile.py" }
|
|
}
|
|
|
|
/**
|
|
* Holds if `call` has an unsafe extraction filter, either by default (as the default is unsafe),
|
|
* or by being set to an explicitly unsafe value, such as `"fully_trusted"`, or `None`.
|
|
*/
|
|
private predicate hasUnsafeFilter(API::CallNode call) {
|
|
call =
|
|
API::moduleImport("tarfile")
|
|
.getMember("open")
|
|
.getReturn()
|
|
.getMember(["extract", "extractall"])
|
|
.getACall() and
|
|
(
|
|
exists(Expr filterValue |
|
|
filterValue = call.getParameter(4, "filter").getAValueReachingSink().asExpr() and
|
|
(
|
|
filterValue.(StringLiteral).getText() = "fully_trusted"
|
|
or
|
|
filterValue instanceof None
|
|
)
|
|
)
|
|
or
|
|
not exists(call.getParameter(4, "filter"))
|
|
)
|
|
}
|
|
|
|
/**
|
|
* A sink capturing method calls to `extractall`.
|
|
*
|
|
* For a call to `file.extractall`, `file` is considered a sink if
|
|
* there is no `members` argument and the extraction filter is unsafe.
|
|
*/
|
|
class ExtractAllSink extends Sink {
|
|
ExtractAllSink() {
|
|
exists(API::CallNode call |
|
|
call =
|
|
API::moduleImport("tarfile")
|
|
.getMember("open")
|
|
.getReturn()
|
|
.getMember("extractall")
|
|
.getACall() and
|
|
hasUnsafeFilter(call) and
|
|
not exists(call.getParameter(2, "members")) and
|
|
this = call.(DataFlow::MethodCallNode).getObject()
|
|
)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* An argument to `extract` is considered a sink.
|
|
*/
|
|
class ExtractSink extends Sink {
|
|
ExtractSink() {
|
|
exists(DataFlow::CallCfgNode call |
|
|
call =
|
|
API::moduleImport("tarfile").getMember("open").getReturn().getMember("extract").getACall() and
|
|
this = call.getArg(0) and
|
|
hasUnsafeFilter(call)
|
|
)
|
|
}
|
|
}
|
|
|
|
/** The `members` argument `extractall` is considered a sink. */
|
|
class ExtractMembersSink extends Sink {
|
|
ExtractMembersSink() {
|
|
exists(DataFlow::CallCfgNode call |
|
|
call =
|
|
API::moduleImport("tarfile")
|
|
.getMember("open")
|
|
.getReturn()
|
|
.getMember("extractall")
|
|
.getACall() and
|
|
this in [call.getArg(0), call.getArgByName("members")] and
|
|
hasUnsafeFilter(call)
|
|
)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Holds if `g` clears taint for `tarInfo`.
|
|
*
|
|
* The test `if <check_path>(info.name)` should clear taint for `info`,
|
|
* where `<check_path>` is any function matching `"%path"`.
|
|
* `info` is assumed to be a `TarInfo` instance.
|
|
*/
|
|
predicate tarFileInfoSanitizer(DataFlow::GuardNode g, Cfg::ControlFlowNode tarInfo, boolean branch) {
|
|
exists(Cfg::CallNode call, Cfg::AttrNode attr |
|
|
g = call and
|
|
// We must test the name of the tar info object.
|
|
attr = call.getAnArg() and
|
|
attr.getName() = "name" and
|
|
attr.getObject() = tarInfo
|
|
|
|
|
// The assumption that any test that matches %path is a sanitizer might be too broad.
|
|
call.getAChild*().(Cfg::AttrNode).getName().matches("%path")
|
|
or
|
|
call.getAChild*().(Cfg::NameNode).getId().matches("%path")
|
|
) and
|
|
branch = false
|
|
}
|
|
|
|
/**
|
|
* A sanitizer guard heuristic.
|
|
*
|
|
* The test `if <check_path>(info.name)` should clear taint for `info`,
|
|
* where `<check_path>` is any function matching `"%path"`.
|
|
* `info` is assumed to be a `TarInfo` instance.
|
|
*/
|
|
class TarFileInfoSanitizer extends Sanitizer {
|
|
TarFileInfoSanitizer() {
|
|
this = DataFlow::BarrierGuard<tarFileInfoSanitizer/3>::getABarrierNode()
|
|
}
|
|
}
|
|
}
|