From 81adb7dd2a73b4e1fd3ac54bf10e9330de258b7e Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 23 Sep 2021 15:28:05 +0200 Subject: [PATCH 1/6] Python: Add tests for `os.path`-functions --- .../library-tests/frameworks/stdlib/FileSystemAccess.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py b/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py index bcf6589ef85..ccc2dd76f2c 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py +++ b/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py @@ -27,3 +27,10 @@ def through_function(open_file): open_file.write("foo") # $ fileWriteData="foo" getAPathArgument="path" through_function(f) + +from os import path +path.exists("filepath") # $ MISSING: getAPathArgument="filepath" +path.isfile("filepath") # $ MISSING: getAPathArgument="filepath" +path.isdir("filepath") # $ MISSING: getAPathArgument="filepath" +path.islink("filepath") # $ MISSING: getAPathArgument="filepath" +path.ismount("filepath") # $ MISSING: getAPathArgument="filepath" From f2fbeed4903522dcb4f415c18504672006ad46de Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 23 Sep 2021 15:28:33 +0200 Subject: [PATCH 2/6] Python: Model `os.path`-functions --- .../lib/semmle/python/frameworks/Stdlib.qll | 26 +++++++++++++++++++ .../frameworks/stdlib/FileSystemAccess.py | 10 +++---- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 539f0dcabb0..46f0349de77 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -195,6 +195,32 @@ private module StdlibPrivate { } } + /** + * A call to `os.path.exists` or `os.path.lexists` will check if a file exists on the file system. + * The `os.path` module offers e number of methods for checking if a file exists and/or has certain + * properties, leading to a file system access. + * (Although, on some platforms, the check may return `false` due to missing permissions.) + * See: + * - https://docs.python.org/3/library/os.path.html#os.path.exists + * - https://docs.python.org/3/library/os.path.html#os.path.lexists + * - https://docs.python.org/3/library/os.path.html#os.path.isfile + * - https://docs.python.org/3/library/os.path.html#os.path.isdir + * - https://docs.python.org/3/library/os.path.html#os.path.islink + * - https://docs.python.org/3/library/os.path.html#os.path.ismount + */ + private class OsPathProbingCall extends FileSystemAccess::Range, DataFlow::CallCfgNode { + OsPathProbingCall() { + this = + os::path() + .getMember(["exists", "lexists", "isfile", "isdir", "islink", "ismount"]) + .getACall() + } + + override DataFlow::Node getAPathArgument() { + result in [this.getArg(0), this.getArgByName("path")] + } + } + /** * A call to `os.path.normpath`. * See https://docs.python.org/3/library/os.path.html#os.path.normpath diff --git a/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py b/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py index ccc2dd76f2c..64f8ad5010d 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py +++ b/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py @@ -29,8 +29,8 @@ def through_function(open_file): through_function(f) from os import path -path.exists("filepath") # $ MISSING: getAPathArgument="filepath" -path.isfile("filepath") # $ MISSING: getAPathArgument="filepath" -path.isdir("filepath") # $ MISSING: getAPathArgument="filepath" -path.islink("filepath") # $ MISSING: getAPathArgument="filepath" -path.ismount("filepath") # $ MISSING: getAPathArgument="filepath" +path.exists("filepath") # $ getAPathArgument="filepath" +path.isfile("filepath") # $ getAPathArgument="filepath" +path.isdir("filepath") # $ getAPathArgument="filepath" +path.islink("filepath") # $ getAPathArgument="filepath" +path.ismount("filepath") # $ getAPathArgument="filepath" From 02e91b3902476d7d049a7462c2e6171e67be83eb Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 30 Sep 2021 13:36:24 +0200 Subject: [PATCH 3/6] Python: Model functions that will raise on non-existing files. --- .../lib/semmle/python/frameworks/Stdlib.qll | 42 +++++++++++++++---- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 46f0349de77..0239b726cef 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -196,23 +196,36 @@ private module StdlibPrivate { } /** - * A call to `os.path.exists` or `os.path.lexists` will check if a file exists on the file system. - * The `os.path` module offers e number of methods for checking if a file exists and/or has certain + * The `os.path` module offers a number of methods for checking if a file exists and/or has certain * properties, leading to a file system access. + * A call to `os.path.exists` or `os.path.lexists` will check if a file exists on the file system. * (Although, on some platforms, the check may return `false` due to missing permissions.) + * A call to `os.path.getatime` will raise `OSError` if the file does not exist or is inaccessible. * See: - * - https://docs.python.org/3/library/os.path.html#os.path.exists - * - https://docs.python.org/3/library/os.path.html#os.path.lexists - * - https://docs.python.org/3/library/os.path.html#os.path.isfile - * - https://docs.python.org/3/library/os.path.html#os.path.isdir - * - https://docs.python.org/3/library/os.path.html#os.path.islink - * - https://docs.python.org/3/library/os.path.html#os.path.ismount + * - https://docs.python.org/3/library/os.path.html#os.path.exists + * - https://docs.python.org/3/library/os.path.html#os.path.lexists + * - https://docs.python.org/3/library/os.path.html#os.path.isfile + * - https://docs.python.org/3/library/os.path.html#os.path.isdir + * - https://docs.python.org/3/library/os.path.html#os.path.islink + * - https://docs.python.org/3/library/os.path.html#os.path.ismount + * - https://docs.python.org/3/library/os.path.html#os.path.getatime + * - https://docs.python.org/3/library/os.path.html#os.path.getmtime + * - https://docs.python.org/3/library/os.path.html#os.path.getctime + * - https://docs.python.org/3/library/os.path.html#os.path.getsize + * - https://docs.python.org/3/library/os.path.html#os.path.realpath */ private class OsPathProbingCall extends FileSystemAccess::Range, DataFlow::CallCfgNode { OsPathProbingCall() { this = os::path() - .getMember(["exists", "lexists", "isfile", "isdir", "islink", "ismount"]) + .getMember([ + // these check if the file exists + "exists", "lexists", "isfile", "isdir", "islink", "ismount", + // these raise errors if the file does not exist + "getatime", "getmtime", "getctime", "getsize", + // this will resolve symlinks + "realpath" + ]) .getACall() } @@ -221,6 +234,17 @@ private module StdlibPrivate { } } + /** A call to `os.path.samefile` will raise an exception if an `os.stat()` call on either pathname fails. */ + private class OsPathSamefileCall extends FileSystemAccess::Range, DataFlow::CallCfgNode { + OsPathSamefileCall() { this = os::path().getMember("samefile").getACall() } + + override DataFlow::Node getAPathArgument() { + result in [ + this.getArg(0), this.getArgByName("path1"), this.getArg(1), this.getArgByName("path2") + ] + } + } + /** * A call to `os.path.normpath`. * See https://docs.python.org/3/library/os.path.html#os.path.normpath From 4521a9fdf0af2aa74b9fe85834cd82b437288d13 Mon Sep 17 00:00:00 2001 From: yoff Date: Mon, 4 Oct 2021 11:36:53 +0200 Subject: [PATCH 4/6] Update python/ql/lib/semmle/python/frameworks/Stdlib.qll Co-authored-by: Rasmus Wriedt Larsen --- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 0239b726cef..61b307bf51b 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -222,9 +222,7 @@ private module StdlibPrivate { // these check if the file exists "exists", "lexists", "isfile", "isdir", "islink", "ismount", // these raise errors if the file does not exist - "getatime", "getmtime", "getctime", "getsize", - // this will resolve symlinks - "realpath" + "getatime", "getmtime", "getctime", "getsize" ]) .getACall() } From aa91c267925868f852fc1b39772bffbf9d884b89 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 4 Oct 2021 12:12:07 +0200 Subject: [PATCH 5/6] Python: Add missing taint steps --- .../lib/semmle/python/frameworks/Stdlib.qll | 76 +++++++++++-------- 1 file changed, 46 insertions(+), 30 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 61b307bf51b..04f3bdf8d7b 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -243,6 +243,52 @@ private module StdlibPrivate { } } + // Functions with non-standard arguments: + // - os.path.join(path, *paths) + // - os.path.relpath(path, start=os.curdir) + // Functions that need summaries: + // - os.path.commonpath(paths): takes a sequence + // - os.path.commonprefix(list): takes a list argument + // - os.path.splitdrive: retunrs a tuple + // - os.path.splittext: returns a tuple + private string pathComputation() { + result in [ + "abspath", "basename", "commonpath", "dirname", "expanduser", "expandvars", "join", + "normcase", "normpath", "realpath", "relpath", "split" + ] + } + + /** + * The `os.path` module offers a number of methods for computing new paths from existing paths. + * These should all propagate taint. + */ + private class OsPathComputation extends DataFlow::CallCfgNode { + string methodName; + + OsPathComputation() { + methodName = pathComputation() and + this = os::path().getMember(methodName).getACall() + } + + DataFlow::Node getPathArg() { + result in [this.getArg(0), this.getArgByName("path")] + or + methodName = "join" and result = this.getArg(_) + or + methodName = "relpath" and result in [this.getArg(1), this.getArgByName("start")] + } + } + + /** An additional taint step for path computations. */ + private class OsPathComputationAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + exists(OsPathComputation call | + nodeTo = call and + nodeFrom = call.getPathArg() + ) + } + } + /** * A call to `os.path.normpath`. * See https://docs.python.org/3/library/os.path.html#os.path.normpath @@ -253,16 +299,6 @@ private module StdlibPrivate { DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] } } - /** An additional taint step for calls to `os.path.normpath` */ - private class OsPathNormpathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - exists(OsPathNormpathCall call | - nodeTo = call and - nodeFrom = call.getPathArg() - ) - } - } - /** * A call to `os.path.abspath`. * See https://docs.python.org/3/library/os.path.html#os.path.abspath @@ -273,16 +309,6 @@ private module StdlibPrivate { DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] } } - /** An additional taint step for calls to `os.path.abspath` */ - private class OsPathAbspathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - exists(OsPathAbspathCall call | - nodeTo = call and - nodeFrom = call.getPathArg() - ) - } - } - /** * A call to `os.path.realpath`. * See https://docs.python.org/3/library/os.path.html#os.path.realpath @@ -293,16 +319,6 @@ private module StdlibPrivate { DataFlow::Node getPathArg() { result in [this.getArg(0), this.getArgByName("path")] } } - /** An additional taint step for calls to `os.path.realpath` */ - private class OsPathRealpathCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - exists(OsPathRealpathCall call | - nodeTo = call and - nodeFrom = call.getPathArg() - ) - } - } - /** * A call to `os.system`. * See https://docs.python.org/3/library/os.html#os.system From 6c108e43d988afb0d08572a43eb95b7ab37d1d4e Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 12 Oct 2021 15:16:48 +0200 Subject: [PATCH 6/6] Python: address review --- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 04f3bdf8d7b..e6a85b62b72 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -246,15 +246,16 @@ private module StdlibPrivate { // Functions with non-standard arguments: // - os.path.join(path, *paths) // - os.path.relpath(path, start=os.curdir) - // Functions that need summaries: + // these functions need special treatment when computing `getPathArg`. + // + // Functions that excluded because they can act as sanitizers: // - os.path.commonpath(paths): takes a sequence // - os.path.commonprefix(list): takes a list argument - // - os.path.splitdrive: retunrs a tuple - // - os.path.splittext: returns a tuple + // unless the user control all arguments, we are comparing with a known value. private string pathComputation() { result in [ "abspath", "basename", "commonpath", "dirname", "expanduser", "expandvars", "join", - "normcase", "normpath", "realpath", "relpath", "split" + "normcase", "normpath", "realpath", "relpath", "split", "splitdrive", "splitext" ] }