Python: Fix problems with sinks in pathlib

This must mean that we did not have this flow with the old call-graph,
which means the new call-graph is doing a better job (yay).
This commit is contained in:
Rasmus Wriedt Larsen
2022-08-24 14:14:41 +02:00
parent edcaff26af
commit 39ce50fadc
7 changed files with 71 additions and 18 deletions

View File

@@ -1462,7 +1462,19 @@ private module StdlibPrivate {
t.start() and
result = openCall and
(
openCall instanceof OpenCall
openCall instanceof OpenCall and
// don't include the open call inside of Path.open in pathlib.py since
// the call to `path_obj.open` is covered by `PathLibOpenCall`.
not exists(Module mod, Class cls, Function func |
openCall.(OpenCall).asCfgNode().getScope() = func and
func.getName() = "open" and
func.getScope() = cls and
cls.getName() = "Path" and
cls.getScope() = mod and
mod.getName() = "pathlib" and
// do allow this call if we're analyzing pathlib.py as part of CPython though
not exists(mod.getFile().getRelativePath())
)
or
openCall instanceof PathLibOpenCall
)

View File

@@ -50,7 +50,34 @@ module CleartextStorage {
/** The data written to a file, considered as a flow sink. */
class FileWriteDataAsSink extends Sink {
FileWriteDataAsSink() { this = any(FileSystemWriteAccess write).getADataNode() }
FileWriteDataAsSink() {
this = any(FileSystemWriteAccess write).getADataNode() and
// since implementation of Path.write_bytes in pathlib.py is like
// ```py
// def write_bytes(self, data):
// with self.open(mode='wb') as f:
// return f.write(data)
// ```
// any time we would report flow to the `Path.write_bytes` sink, we can ALSO report
// the flow from the `data` parameter to the `f.write` sink -- obviously we
// don't want that.
//
// However, simply removing taint edges out of a sink is not a good enough solution,
// since we would only flag one of the `p.write` calls in the following example
// due to use-use flow
// ```py
// p.write(user_controlled)
// p.write(user_controlled)
// ```
//
// The same approach is used in the command injection query.
not exists(Module pathlib |
pathlib.getName() = "pathlib" and
this.getScope().getEnclosingModule() = pathlib and
// do allow this call if we're analyzing pathlib.py as part of CPython though
not exists(pathlib.getFile().getRelativePath())
)
}
}
/** The data written to a cookie on a HTTP response, considered as a flow sink. */

View File

@@ -76,6 +76,8 @@ module CommandInjection {
// `subprocess`. See:
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
//
// The same approach is used in the path-injection and cleartext-storage queries.
not this.getScope().getEnclosingModule().getName() in [
"os", "subprocess", "platform", "popen2"
]

View File

@@ -58,7 +58,33 @@ module PathInjection {
* A file system access, considered as a flow sink.
*/
class FileSystemAccessAsSink extends Sink {
FileSystemAccessAsSink() { this = any(FileSystemAccess e).getAPathArgument() }
FileSystemAccessAsSink() {
this = any(FileSystemAccess e).getAPathArgument() and
// since implementation of Path.open in pathlib.py is like
// ```py
// def open(self, ...):
// return io.open(self, ...)
// ```
// any time we would report flow to the `path_obj.open` sink, we can ALSO report
// the flow from the `self` parameter to the `io.open` sink -- obviously we
// don't want that.
//
// However, simply removing taint edges out of a sink is not a good enough solution,
// since we would only flag one of the `p.open` calls in the following example
// due to use-use flow
// ```py
// p.open()
// p.open()
// ```
//
// The same approach is used in the command injection query.
not exists(Module pathlib |
pathlib.getName() = "pathlib" and
this.getScope().getEnclosingModule() = pathlib and
// do allow this call if we're analyzing pathlib.py as part of CPython though
not exists(pathlib.getFile().getRelativePath())
)
}
}
private import semmle.python.frameworks.data.ModelsAsData

View File

@@ -13,7 +13,7 @@ with p.open() as f: # $ getAPathArgument=p
p.write_bytes(b"hello") # $ getAPathArgument=p fileWriteData=b"hello"
p.write_text("hello") # $ getAPathArgument=p fileWriteData="hello"
p.open("wt").write("hello") # $ getAPathArgument=p fileWriteData="hello" SPURIOUS: getAPathArgument=self
p.open("wt").write("hello") # $ getAPathArgument=p fileWriteData="hello"
name = windows.parent.name
o = open

View File

@@ -1,5 +1,4 @@
edges
| file:///usr/lib/python3.8/pathlib.py:1214:14:1214:17 | ControlFlowNode for self | file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self |
| flask_path_injection.py:0:0:0:0 | ModuleVariableNode for flask_path_injection.request | flask_path_injection.py:19:15:19:21 | ControlFlowNode for request |
| flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:1:26:1:32 | GSSA Variable request |
| flask_path_injection.py:1:26:1:32 | GSSA Variable request | flask_path_injection.py:0:0:0:0 | ModuleVariableNode for flask_path_injection.request |
@@ -56,8 +55,6 @@ edges
| pathlib_use.py:12:16:12:22 | ControlFlowNode for request | pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute |
| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | pathlib_use.py:14:5:14:5 | ControlFlowNode for p |
| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 |
| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 |
| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | file:///usr/lib/python3.8/pathlib.py:1214:14:1214:17 | ControlFlowNode for self |
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:9:12:9:18 | ControlFlowNode for request |
| test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:3:26:3:32 | GSSA Variable request |
| test.py:3:26:3:32 | GSSA Variable request | test.py:0:0:0:0 | ModuleVariableNode for test.request |
@@ -80,8 +77,6 @@ edges
| test.py:48:23:48:23 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x |
| test.py:48:23:48:23 | ControlFlowNode for x | test.py:48:13:48:24 | ControlFlowNode for normalize() |
nodes
| file:///usr/lib/python3.8/pathlib.py:1214:14:1214:17 | ControlFlowNode for self | semmle.label | ControlFlowNode for self |
| file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self | semmle.label | ControlFlowNode for self |
| flask_path_injection.py:0:0:0:0 | ModuleVariableNode for flask_path_injection.request | semmle.label | ModuleVariableNode for flask_path_injection.request |
| flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| flask_path_injection.py:1:26:1:32 | GSSA Variable request | semmle.label | GSSA Variable request |
@@ -143,7 +138,6 @@ nodes
| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
| pathlib_use.py:14:5:14:5 | ControlFlowNode for p | semmle.label | ControlFlowNode for p |
| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | semmle.label | ControlFlowNode for p2 |
| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | semmle.label | ControlFlowNode for p2 |
| test.py:0:0:0:0 | ModuleVariableNode for test.request | semmle.label | ModuleVariableNode for test.request |
| test.py:3:26:3:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| test.py:3:26:3:32 | GSSA Variable request | semmle.label | GSSA Variable request |
@@ -169,7 +163,6 @@ subpaths
| test.py:25:19:25:19 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x | test.py:13:12:13:30 | ControlFlowNode for Attribute() | test.py:25:9:25:20 | ControlFlowNode for normalize() |
| test.py:48:23:48:23 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x | test.py:13:12:13:30 | ControlFlowNode for Attribute() | test.py:48:13:48:24 | ControlFlowNode for normalize() |
#select
| file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self | This path depends on a $@. | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
| flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | This path depends on a $@. | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
| path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
| path_injection.py:21:14:21:18 | ControlFlowNode for npath | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:21:14:21:18 | ControlFlowNode for npath | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |

View File

@@ -1,21 +1,14 @@
edges
| file:///usr/lib/python3.8/pathlib.py:1248:26:1248:29 | ControlFlowNode for data | file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:13:22:13:41 | ControlFlowNode for Attribute() |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:15:26:15:29 | ControlFlowNode for cert |
| test.py:12:21:12:24 | ControlFlowNode for cert | file:///usr/lib/python3.8/pathlib.py:1248:26:1248:29 | ControlFlowNode for data |
nodes
| file:///usr/lib/python3.8/pathlib.py:1248:26:1248:29 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
| file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | semmle.label | ControlFlowNode for get_cert() |
| test.py:12:21:12:24 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
| test.py:12:21:12:24 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
| test.py:13:22:13:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
| test.py:15:26:15:29 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
subpaths
#select
| file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data | test.py:9:12:9:21 | ControlFlowNode for get_cert() | file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |
| test.py:12:21:12:24 | ControlFlowNode for cert | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |
| test.py:13:22:13:41 | ControlFlowNode for Attribute() | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:13:22:13:41 | ControlFlowNode for Attribute() | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |
| test.py:15:26:15:29 | ControlFlowNode for cert | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:15:26:15:29 | ControlFlowNode for cert | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |