Python: Add support for extraction filters

Adds support for extraction filters as defined in
https://peps.python.org/pep-0706/
and implemented in Python 3.12.

By my reading, setting the filter to `'data'` or `'tar'` is probably
safe, whereas `'fully_trusted'` or the default (which is the same as
`None`) is not.

For now, I have just added this modelling to the tarslip query. We could
also share it with the modelling of `shutil.unpack_archive` (which has also
gained a `filter` argument), but it was unclear to me where we should put
this modelling in that case. Perhaps the best solution would be to merge
the experimental `py/tarslip-extended` query into the existing query (in
which case the current location is perhaps not too bad).
This commit is contained in:
Taus
2023-11-27 14:11:17 +00:00
parent f05c86239f
commit 95e9284d08
3 changed files with 108 additions and 5 deletions

View File

@@ -55,10 +55,48 @@ module TarSlip {
ExcludeTarFilePy() { this.getLocation().getFile().getBaseName() = "tarfile.py" } ExcludeTarFilePy() { this.getLocation().getFile().getBaseName() = "tarfile.py" }
} }
private DataFlow::TypeTrackingNode unsafeFilter(DataFlow::TypeTracker t) {
t.start() and
(
result.asExpr().(StrConst).getS() = "fully_trusted"
or
result.asExpr() instanceof None
)
or
exists(DataFlow::TypeTracker t2 | result = unsafeFilter(t2).track(t2, t))
}
private DataFlow::Node unsafeFilter() {
unsafeFilter(DataFlow::TypeTracker::end()).flowsTo(result)
}
/**
* Holds if `call` has an unsafe extraction filter, either by default (as the default is unsafe),
* or by being set to an explicitly unsafe value, such as `"fully_trusted"`, or `None`.
*/
private predicate hasUnsafeFilter(DataFlow::CallCfgNode call) {
call =
API::moduleImport("tarfile")
.getMember("open")
.getReturn()
.getMember(["extract", "extractall"])
.getACall() and
(
call.getArg(4) = unsafeFilter()
or
call.getArgByName("filter") = unsafeFilter()
or
not exists(call.getArg(4)) and not exists(call.getArgByName("filter"))
)
}
/** /**
* A sink capturing method calls to `extractall`. * A sink capturing method calls to `extractall`.
* *
* For a call to `file.extractall` without arguments, `file` is considered a sink. * For a call to `file.extractall`, `file` is considered a sink if
*
* - there are no other arguments, or
* - there are other arguments (except `members`), and the extraction filter is unsafe.
*/ */
class ExtractAllSink extends Sink { class ExtractAllSink extends Sink {
ExtractAllSink() { ExtractAllSink() {
@@ -69,8 +107,13 @@ module TarSlip {
.getReturn() .getReturn()
.getMember("extractall") .getMember("extractall")
.getACall() and .getACall() and
(
not exists(call.getArg(_)) and not exists(call.getArg(_)) and
not exists(call.getArgByName(_)) and not exists(call.getArgByName(_))
or
hasUnsafeFilter(call)
) and
not exists(call.getArgByName("members")) and
this = call.(DataFlow::MethodCallNode).getObject() this = call.(DataFlow::MethodCallNode).getObject()
) )
} }
@@ -84,7 +127,8 @@ module TarSlip {
exists(DataFlow::CallCfgNode call | exists(DataFlow::CallCfgNode call |
call = call =
API::moduleImport("tarfile").getMember("open").getReturn().getMember("extract").getACall() and API::moduleImport("tarfile").getMember("open").getReturn().getMember("extract").getACall() and
this = call.getArg(0) this = call.getArg(0) and
hasUnsafeFilter(call)
) )
} }
} }
@@ -99,7 +143,8 @@ module TarSlip {
.getReturn() .getReturn()
.getMember("extractall") .getMember("extractall")
.getACall() and .getACall() and
this in [call.getArg(0), call.getArgByName("members")] this in [call.getArg(0), call.getArgByName("members")] and
hasUnsafeFilter(call)
) )
} }
} }

View File

@@ -12,6 +12,15 @@ edges
| tarslip.py:58:1:58:3 | GSSA Variable tar | tarslip.py:59:5:59:9 | GSSA Variable entry | | tarslip.py:58:1:58:3 | GSSA Variable tar | tarslip.py:59:5:59:9 | GSSA Variable entry |
| tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | tarslip.py:58:1:58:3 | GSSA Variable tar | | tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | tarslip.py:58:1:58:3 | GSSA Variable tar |
| tarslip.py:59:5:59:9 | GSSA Variable entry | tarslip.py:61:21:61:25 | ControlFlowNode for entry | | tarslip.py:59:5:59:9 | GSSA Variable entry | tarslip.py:61:21:61:25 | ControlFlowNode for entry |
| tarslip.py:90:1:90:3 | GSSA Variable tar | tarslip.py:91:1:91:3 | ControlFlowNode for tar |
| tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | tarslip.py:90:1:90:3 | GSSA Variable tar |
| tarslip.py:94:1:94:3 | GSSA Variable tar | tarslip.py:95:5:95:9 | GSSA Variable entry |
| tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | tarslip.py:94:1:94:3 | GSSA Variable tar |
| tarslip.py:95:5:95:9 | GSSA Variable entry | tarslip.py:96:17:96:21 | ControlFlowNode for entry |
| tarslip.py:109:1:109:3 | GSSA Variable tar | tarslip.py:110:1:110:3 | ControlFlowNode for tar |
| tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | tarslip.py:109:1:109:3 | GSSA Variable tar |
| tarslip.py:112:1:112:3 | GSSA Variable tar | tarslip.py:113:24:113:26 | ControlFlowNode for tar |
| tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | tarslip.py:112:1:112:3 | GSSA Variable tar |
nodes nodes
| tarslip.py:14:1:14:3 | GSSA Variable tar | semmle.label | GSSA Variable tar | | tarslip.py:14:1:14:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
| tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
@@ -31,6 +40,19 @@ nodes
| tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:59:5:59:9 | GSSA Variable entry | semmle.label | GSSA Variable entry | | tarslip.py:59:5:59:9 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| tarslip.py:61:21:61:25 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry | | tarslip.py:61:21:61:25 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| tarslip.py:90:1:90:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
| tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:91:1:91:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:94:1:94:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
| tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:95:5:95:9 | GSSA Variable entry | semmle.label | GSSA Variable entry |
| tarslip.py:96:17:96:21 | ControlFlowNode for entry | semmle.label | ControlFlowNode for entry |
| tarslip.py:109:1:109:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
| tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:110:1:110:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
| tarslip.py:112:1:112:3 | GSSA Variable tar | semmle.label | GSSA Variable tar |
| tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
subpaths subpaths
#select #select
| tarslip.py:15:1:15:3 | ControlFlowNode for tar | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | tarslip.py:15:1:15:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | potentially untrusted source | | tarslip.py:15:1:15:3 | ControlFlowNode for tar | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | tarslip.py:15:1:15:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | potentially untrusted source |
@@ -38,3 +60,7 @@ subpaths
| tarslip.py:39:17:39:21 | ControlFlowNode for entry | tarslip.py:35:7:35:39 | ControlFlowNode for Attribute() | tarslip.py:39:17:39:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:35:7:35:39 | ControlFlowNode for Attribute() | potentially untrusted source | | tarslip.py:39:17:39:21 | ControlFlowNode for entry | tarslip.py:35:7:35:39 | ControlFlowNode for Attribute() | tarslip.py:39:17:39:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:35:7:35:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:43:24:43:26 | ControlFlowNode for tar | tarslip.py:42:7:42:39 | ControlFlowNode for Attribute() | tarslip.py:43:24:43:26 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:42:7:42:39 | ControlFlowNode for Attribute() | potentially untrusted source | | tarslip.py:43:24:43:26 | ControlFlowNode for tar | tarslip.py:42:7:42:39 | ControlFlowNode for Attribute() | tarslip.py:43:24:43:26 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:42:7:42:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:61:21:61:25 | ControlFlowNode for entry | tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | tarslip.py:61:21:61:25 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | potentially untrusted source | | tarslip.py:61:21:61:25 | ControlFlowNode for entry | tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | tarslip.py:61:21:61:25 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:58:7:58:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:91:1:91:3 | ControlFlowNode for tar | tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | tarslip.py:91:1:91:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:90:7:90:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:96:17:96:21 | ControlFlowNode for entry | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | tarslip.py:96:17:96:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:110:1:110:3 | ControlFlowNode for tar | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | tarslip.py:110:1:110:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | potentially untrusted source |
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | tarslip.py:113:24:113:26 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | potentially untrusted source |

View File

@@ -82,3 +82,35 @@ tar = tarfile.open(unsafe_filename_tar)
for entry in tar: for entry in tar:
if not os.path.isabs(entry.name): if not os.path.isabs(entry.name):
tar.extract(entry, "/tmp/unpack/") tar.extract(entry, "/tmp/unpack/")
# Extraction filters
extraction_filter = "fully_trusted"
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(filter=extraction_filter) # unsafe
tar.close()
tar = tarfile.open(unsafe_filename_tar)
for entry in tar:
tar.extract(entry, filter=extraction_filter) # unsafe
extraction_filter = "data"
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(filter=extraction_filter) # safe
tar.close()
tar = tarfile.open(unsafe_filename_tar)
for entry in tar:
tar.extract(entry, filter=extraction_filter) # safe
extraction_filter = None
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(filter=extraction_filter) # unsafe
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(members=tar, filter=extraction_filter) # unsafe
tar = tarfile.open(unsafe_filename_tar)
tar.extractall(members=safemembers(tar), filter=extraction_filter) # safe -- we assume `safemembers` makes up for the unsafe filter