Merge pull request #9551 from yoff/python/port-tarslip

Approved by RasmusWL
This commit is contained in:
CodeQL CI
2022-07-01 12:58:25 +01:00
committed by GitHub
5 changed files with 206 additions and 193 deletions

View File

@@ -0,0 +1,142 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "tar slip"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.Concepts
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.ApiGraphs
/**
* Provides default sources, sinks and sanitizers for detecting
* "tar slip"
* vulnerabilities, as well as extension points for adding your own.
*/
module TarSlip {
/**
* A data flow source for "tar slip" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "tar slip" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "tar slip" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A call to `tarfile.open`, considered as a flow source.
*/
class TarfileOpen extends Source {
TarfileOpen() {
this = API::moduleImport("tarfile").getMember("open").getACall() and
// If argument refers to a string object, then it's a hardcoded path and
// this tarfile is safe.
not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StrConst and
// Ignore opens within the tarfile module itself
not this.getLocation().getFile().getBaseName() = "tarfile.py"
}
}
/**
* A sanitizer based on file name. This because we extract the standard library.
*
* For efficiency we don't want to track the flow of taint
* around the tarfile module.
*/
class ExcludeTarFilePy extends Sanitizer {
ExcludeTarFilePy() { this.getLocation().getFile().getBaseName() = "tarfile.py" }
}
/**
* A sink capturing method calls to `extractall`.
*
* For a call to `file.extractall` without arguments, `file` is considered a sink.
*/
class ExtractAllSink extends Sink {
ExtractAllSink() {
exists(DataFlow::CallCfgNode call |
call =
API::moduleImport("tarfile")
.getMember("open")
.getReturn()
.getMember("extractall")
.getACall() and
not exists(call.getArg(_)) and
not exists(call.getArgByName(_)) and
this = call.(DataFlow::MethodCallNode).getObject()
)
}
}
/**
* An argument to `extract` is considered a sink.
*/
class ExtractSink extends Sink {
ExtractSink() {
exists(DataFlow::CallCfgNode call |
call =
API::moduleImport("tarfile").getMember("open").getReturn().getMember("extract").getACall() and
this = call.getArg(0)
)
}
}
/** The `members` argument `extractall` is considered a sink. */
class ExtractMembersSink extends Sink {
ExtractMembersSink() {
exists(DataFlow::CallCfgNode call |
call =
API::moduleImport("tarfile")
.getMember("open")
.getReturn()
.getMember("extractall")
.getACall() and
this in [call.getArg(0), call.getArgByName("members")]
)
}
}
/**
* Holds if `g` clears taint for `tarInfo`.
*
* The test `if <check_path>(info.name)` should clear taint for `info`,
* where `<check_path>` is any function matching `"%path"`.
* `info` is assumed to be a `TarInfo` instance.
*/
predicate tarFileInfoSanitizer(DataFlow::GuardNode g, ControlFlowNode tarInfo, boolean branch) {
exists(CallNode call, AttrNode attr |
g = call and
// We must test the name of the tar info object.
attr = call.getAnArg() and
attr.getName() = "name" and
attr.getObject() = tarInfo
|
// The assumption that any test that matches %path is a sanitizer might be too broad.
call.getAChild*().(AttrNode).getName().matches("%path")
or
call.getAChild*().(NameNode).getId().matches("%path")
) and
branch = false
}
/**
* A sanitizer guard heuristic.
*
* The test `if <check_path>(info.name)` should clear taint for `info`,
* where `<check_path>` is any function matching `"%path"`.
* `info` is assumed to be a `TarInfo` instance.
*/
class TarFileInfoSanitizer extends Sanitizer {
TarFileInfoSanitizer() {
this = DataFlow::BarrierGuard<tarFileInfoSanitizer/3>::getABarrierNode()
}
}
}

View File

@@ -0,0 +1,25 @@
/**
* Provides a taint-tracking configuration for detecting "command injection" vulnerabilities.
*
* Note, for performance reasons: only import this file if
* `TarSlip::Configuration` is needed, otherwise
* `TarSlipCustomizations` should be imported instead.
*/
private import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import TarSlipCustomizations::TarSlip
/**
* A taint-tracking configuration for detecting "command injection" vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "TarSlip" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
}

View File

@@ -13,170 +13,10 @@
*/
import python
import semmle.python.security.Paths
import semmle.python.dataflow.TaintTracking
import semmle.python.security.strings.Basic
import semmle.python.security.dataflow.TarSlipQuery
import DataFlow::PathGraph
/** A TaintKind to represent open tarfile objects. That is, the result of calling `tarfile.open(...)` */
class OpenTarFile extends TaintKind {
OpenTarFile() { this = "tarfile.open" }
override TaintKind getTaintOfMethodResult(string name) {
name = "getmember" and result instanceof TarFileInfo
or
name = "getmembers" and result.(SequenceKind).getItem() instanceof TarFileInfo
}
override ClassValue getType() { result = Value::named("tarfile.TarFile") }
override TaintKind getTaintForIteration() { result instanceof TarFileInfo }
}
/** The source of open tarfile objects. That is, any call to `tarfile.open(...)` */
class TarfileOpen extends TaintSource {
TarfileOpen() {
Value::named("tarfile.open").getACall() = this and
/*
* If argument refers to a string object, then it's a hardcoded path and
* this tarfile is safe.
*/
not this.(CallNode).getAnArg().pointsTo(any(StringValue str)) and
/* Ignore opens within the tarfile module itself */
not this.(ControlFlowNode).getLocation().getFile().getBaseName() = "tarfile.py"
}
override predicate isSourceOf(TaintKind kind) { kind instanceof OpenTarFile }
}
class TarFileInfo extends TaintKind {
TarFileInfo() { this = "tarfile.entry" }
override TaintKind getTaintOfMethodResult(string name) { name = "next" and result = this }
override TaintKind getTaintOfAttribute(string name) {
name = "name" and result instanceof TarFileInfo
}
}
/*
* For efficiency we don't want to track the flow of taint
* around the tarfile module.
*/
class ExcludeTarFilePy extends Sanitizer {
ExcludeTarFilePy() { this = "Tar sanitizer" }
override predicate sanitizingNode(TaintKind taint, ControlFlowNode node) {
node.getLocation().getFile().getBaseName() = "tarfile.py" and
(
taint instanceof OpenTarFile
or
taint instanceof TarFileInfo
or
taint.(SequenceKind).getItem() instanceof TarFileInfo
)
}
}
/* Any call to an extractall method */
class ExtractAllSink extends TaintSink {
ExtractAllSink() {
exists(CallNode call |
this = call.getFunction().(AttrNode).getObject("extractall") and
not exists(call.getAnArg())
)
}
override predicate sinks(TaintKind kind) { kind instanceof OpenTarFile }
}
/* Argument to extract method */
class ExtractSink extends TaintSink {
CallNode call;
ExtractSink() {
call.getFunction().(AttrNode).getName() = "extract" and
this = call.getArg(0)
}
override predicate sinks(TaintKind kind) { kind instanceof TarFileInfo }
}
/* Members argument to extract method */
class ExtractMembersSink extends TaintSink {
CallNode call;
ExtractMembersSink() {
call.getFunction().(AttrNode).getName() = "extractall" and
(this = call.getArg(0) or this = call.getArgByName("members"))
}
override predicate sinks(TaintKind kind) {
kind.(SequenceKind).getItem() instanceof TarFileInfo
or
kind instanceof OpenTarFile
}
}
class TarFileInfoSanitizer extends Sanitizer {
TarFileInfoSanitizer() { this = "TarInfo sanitizer" }
/* The test `if <path_sanitizing_test>:` clears taint on its `false` edge. */
override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
taint instanceof TarFileInfo and
clears_taint_on_false_edge(test.getTest(), test.getSense())
}
private predicate clears_taint_on_false_edge(ControlFlowNode test, boolean sense) {
path_sanitizing_test(test) and
sense = false
or
// handle `not` (also nested)
test.(UnaryExprNode).getNode().getOp() instanceof Not and
clears_taint_on_false_edge(test.(UnaryExprNode).getOperand(), sense.booleanNot())
}
}
private predicate path_sanitizing_test(ControlFlowNode test) {
/* Assume that any test with "path" in it is a sanitizer */
test.getAChild+().(AttrNode).getName().matches("%path")
or
test.getAChild+().(NameNode).getId().matches("%path")
}
class TarSlipConfiguration extends TaintTracking::Configuration {
TarSlipConfiguration() { this = "TarSlip configuration" }
override predicate isSource(TaintTracking::Source source) { source instanceof TarfileOpen }
override predicate isSink(TaintTracking::Sink sink) {
sink instanceof ExtractSink or
sink instanceof ExtractAllSink or
sink instanceof ExtractMembersSink
}
override predicate isSanitizer(Sanitizer sanitizer) {
sanitizer instanceof TarFileInfoSanitizer
or
sanitizer instanceof ExcludeTarFilePy
}
override predicate isBarrier(DataFlow::Node node) {
// Avoid flow into the tarfile module
exists(ParameterDefinition def |
node.asVariable().getDefinition() = def
or
node.asCfgNode() = def.getDefiningNode()
|
def.getScope() = Value::named("tarfile.open").(CallableValue).getScope()
or
def.isSelf() and def.getScope().getEnclosingModule().getName() = "tarfile"
)
}
}
from TarSlipConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "Extraction of tarfile from $@", src.getSource(),
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "Extraction of tarfile from $@", source.getNode(),
"a potentially untrusted source"

View File

@@ -1,29 +1,36 @@
edges
| tarslip.py:12:7:12:39 | tarfile.open | tarslip.py:13:1:13:3 | tarfile.open |
| tarslip.py:12:7:12:39 | tarfile.open | tarslip.py:13:1:13:3 | tarfile.open |
| tarslip.py:16:7:16:39 | tarfile.open | tarslip.py:17:14:17:16 | tarfile.open |
| tarslip.py:16:7:16:39 | tarfile.open | tarslip.py:17:14:17:16 | tarfile.open |
| tarslip.py:17:1:17:17 | tarfile.entry | tarslip.py:18:17:18:21 | tarfile.entry |
| tarslip.py:17:1:17:17 | tarfile.entry | tarslip.py:18:17:18:21 | tarfile.entry |
| tarslip.py:17:14:17:16 | tarfile.open | tarslip.py:17:1:17:17 | tarfile.entry |
| tarslip.py:17:14:17:16 | tarfile.open | tarslip.py:17:1:17:17 | tarfile.entry |
| tarslip.py:33:7:33:39 | tarfile.open | tarslip.py:34:14:34:16 | tarfile.open |
| tarslip.py:33:7:33:39 | tarfile.open | tarslip.py:34:14:34:16 | tarfile.open |
| tarslip.py:34:1:34:17 | tarfile.entry | tarslip.py:37:17:37:21 | tarfile.entry |
| tarslip.py:34:1:34:17 | tarfile.entry | tarslip.py:37:17:37:21 | tarfile.entry |
| tarslip.py:34:14:34:16 | tarfile.open | tarslip.py:34:1:34:17 | tarfile.entry |
| tarslip.py:34:14:34:16 | tarfile.open | tarslip.py:34:1:34:17 | tarfile.entry |
| tarslip.py:40:7:40:39 | tarfile.open | tarslip.py:41:24:41:26 | tarfile.open |
| tarslip.py:40:7:40:39 | tarfile.open | tarslip.py:41:24:41:26 | tarfile.open |
| tarslip.py:56:7:56:39 | tarfile.open | tarslip.py:57:14:57:16 | tarfile.open |
| tarslip.py:56:7:56:39 | tarfile.open | tarslip.py:57:14:57:16 | tarfile.open |
| tarslip.py:57:1:57:17 | tarfile.entry | tarslip.py:59:21:59:25 | tarfile.entry |
| tarslip.py:57:1:57:17 | tarfile.entry | tarslip.py:59:21:59:25 | tarfile.entry |
| tarslip.py:57:14:57:16 | tarfile.open | tarslip.py:57:1:57:17 | tarfile.entry |
| tarslip.py:57:14:57:16 | tarfile.open | tarslip.py:57:1:57:17 | tarfile.entry |
| tarslip.py:12:7:12:39 | ControlFlowNode for Attribute() | tarslip.py:13:1:13:3 | ControlFlowNode for tar |
| tarslip.py:16:7:16:39 | ControlFlowNode for Attribute() | tarslip.py:17:5:17:9 | GSSA Variable entry |
| tarslip.py:17:5:17:9 | GSSA Variable entry | tarslip.py:18:17:18:21 | ControlFlowNode for entry |
| tarslip.py:33:7:33:39 | ControlFlowNode for Attribute() | tarslip.py:34:5:34:9 | GSSA Variable entry |
| tarslip.py:34:5:34:9 | GSSA Variable entry | tarslip.py:37:17:37:21 | ControlFlowNode for entry |
| tarslip.py:40:7:40:39 | ControlFlowNode for Attribute() | tarslip.py:41:24:41:26 | ControlFlowNode for tar |
| tarslip.py:56:7:56:39 | ControlFlowNode for Attribute() | tarslip.py:57:5:57:9 | GSSA Variable entry |
| tarslip.py:57:5:57:9 | GSSA Variable entry | tarslip.py:59:21:59:25 | ControlFlowNode for entry |
| tarslip.py:79:7:79:39 | ControlFlowNode for Attribute() | tarslip.py:80:5:80:9 | GSSA Variable entry |
| tarslip.py:80:5:80:9 | GSSA Variable entry | tarslip.py:82:21:82:25 | ControlFlowNode for entry |
nodes
| tarslip.py:12:7:12:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:13:1:13:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:16:7:16:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:17:5:17:9 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| tarslip.py:18:17:18:21 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| tarslip.py:33:7:33:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:34:5:34:9 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| tarslip.py:37:17:37:21 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| tarslip.py:40:7:40:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:41:24:41:26 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:56:7:56:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:57:5:57:9 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| tarslip.py:59:21:59:25 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| tarslip.py:79:7:79:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:80:5:80:9 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| tarslip.py:82:21:82:25 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
subpaths
#select
| tarslip.py:13:1:13:3 | tar | tarslip.py:12:7:12:39 | tarfile.open | tarslip.py:13:1:13:3 | tarfile.open | Extraction of tarfile from $@ | tarslip.py:12:7:12:39 | Attribute() | a potentially untrusted source |
| tarslip.py:18:17:18:21 | entry | tarslip.py:16:7:16:39 | tarfile.open | tarslip.py:18:17:18:21 | tarfile.entry | Extraction of tarfile from $@ | tarslip.py:16:7:16:39 | Attribute() | a potentially untrusted source |
| tarslip.py:37:17:37:21 | entry | tarslip.py:33:7:33:39 | tarfile.open | tarslip.py:37:17:37:21 | tarfile.entry | Extraction of tarfile from $@ | tarslip.py:33:7:33:39 | Attribute() | a potentially untrusted source |
| tarslip.py:41:24:41:26 | tar | tarslip.py:40:7:40:39 | tarfile.open | tarslip.py:41:24:41:26 | tarfile.open | Extraction of tarfile from $@ | tarslip.py:40:7:40:39 | Attribute() | a potentially untrusted source |
| tarslip.py:59:21:59:25 | entry | tarslip.py:56:7:56:39 | tarfile.open | tarslip.py:59:21:59:25 | tarfile.entry | Extraction of tarfile from $@ | tarslip.py:56:7:56:39 | Attribute() | a potentially untrusted source |
| tarslip.py:13:1:13:3 | ControlFlowNode for tar | tarslip.py:12:7:12:39 | ControlFlowNode for Attribute() | tarslip.py:13:1:13:3 | ControlFlowNode for tar | Extraction of tarfile from $@ | tarslip.py:12:7:12:39 | ControlFlowNode for Attribute() | a potentially untrusted source |
| tarslip.py:18:17:18:21 | ControlFlowNode for entry | tarslip.py:16:7:16:39 | ControlFlowNode for Attribute() | tarslip.py:18:17:18:21 | ControlFlowNode for entry | Extraction of tarfile from $@ | tarslip.py:16:7:16:39 | ControlFlowNode for Attribute() | a potentially untrusted source |
| tarslip.py:37:17:37:21 | ControlFlowNode for entry | tarslip.py:33:7:33:39 | ControlFlowNode for Attribute() | tarslip.py:37:17:37:21 | ControlFlowNode for entry | Extraction of tarfile from $@ | tarslip.py:33:7:33:39 | ControlFlowNode for Attribute() | a potentially untrusted source |
| tarslip.py:41:24:41:26 | ControlFlowNode for tar | tarslip.py:40:7:40:39 | ControlFlowNode for Attribute() | tarslip.py:41:24:41:26 | ControlFlowNode for tar | Extraction of tarfile from $@ | tarslip.py:40:7:40:39 | ControlFlowNode for Attribute() | a potentially untrusted source |
| tarslip.py:59:21:59:25 | ControlFlowNode for entry | tarslip.py:56:7:56:39 | ControlFlowNode for Attribute() | tarslip.py:59:21:59:25 | ControlFlowNode for entry | Extraction of tarfile from $@ | tarslip.py:56:7:56:39 | ControlFlowNode for Attribute() | a potentially untrusted source |
| tarslip.py:82:21:82:25 | ControlFlowNode for entry | tarslip.py:79:7:79:39 | ControlFlowNode for Attribute() | tarslip.py:82:21:82:25 | ControlFlowNode for entry | Extraction of tarfile from $@ | tarslip.py:79:7:79:39 | ControlFlowNode for Attribute() | a potentially untrusted source |

View File

@@ -1 +0,0 @@
semmle-extractor-options: -p ../lib/ --max-import-depth=3