Merge pull request #5681 from yoff/python-support-pathlib

Approved by tausbn
This commit is contained in:
CodeQL CI
2021-05-04 09:20:24 -07:00
committed by GitHub
6 changed files with 226 additions and 15 deletions

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* Added modeling of `pathlib` from the standard library to recognize `Path` objects constructed in various ways and resulting file accesses. This can lead to new results for `py/path-injection`.

View File

@@ -864,6 +864,194 @@ private module Stdlib {
class Sqlite3 extends PEP249ModuleApiNode {
Sqlite3() { this = API::moduleImport("sqlite3") }
}
// ---------------------------------------------------------------------------
// pathlib
// ---------------------------------------------------------------------------
/** Gets a reference to the `pathlib` module. */
private API::Node pathlib() { result = API::moduleImport("pathlib") }
/**
* Gets a name of a constructor for a `pathlib.Path` object.
* We include the pure paths, as they can be "exported" (say with `as_posix`) and then used to acces the underlying file system.
*/
private string pathlibPathConstructor() {
result in ["Path", "PurePath", "PurePosixPath", "PureWindowsPath", "PosixPath", "WindowsPath"]
}
/**
* Gets a name of an attribute of a `pathlib.Path` object that is also a `pathlib.Path` object.
*/
private string pathlibPathAttribute() { result in ["parent"] }
/**
* Gets a name of a method of a `pathlib.Path` object that returns a `pathlib.Path` object.
*/
private string pathlibPathMethod() {
result in ["absolute", "relative_to", "rename", "replace", "resolve"]
}
/**
* Gets a name of a method of a `pathlib.Path` object that modifies a `pathlib.Path` object based on new data.
*/
private string pathlibPathInjection() {
result in ["joinpath", "with_name", "with_stem", "with_suffix"]
}
/**
* Gets a name of an attribute of a `pathlib.Path` object that exports information about the `pathlib.Path` object.
*/
private string pathlibPathAttributeExport() {
result in ["drive", "root", "anchor", "name", "suffix", "stem"]
}
/**
* Gets a name of a method of a `pathlib.Path` object that exports information about the `pathlib.Path` object.
*/
private string pathlibPathMethodExport() { result in ["as_posix", "as_uri"] }
/**
* Flow for attributes and methods that return a `pathlib.Path` object.
*/
private predicate pathlibPathStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::AttrRead returnsPath |
(
// attribute access
returnsPath.getAttributeName() = pathlibPathAttribute() and
nodeTo = returnsPath
or
// method call
returnsPath.getAttributeName() = pathlibPathMethod() and
returnsPath
.(DataFlow::LocalSourceNode)
.flowsTo(nodeTo.(DataFlow::CallCfgNode).getFunction())
) and
nodeFrom = returnsPath.getObject()
)
}
/**
* Gets a reference to a `pathlib.Path` object.
* This type tracker makes the monomorphic API use assumption.
*/
private DataFlow::LocalSourceNode pathlibPath(DataFlow::TypeTracker t) {
// Type construction
t.start() and
result = pathlib().getMember(pathlibPathConstructor()).getACall()
or
// Type-preserving step
exists(DataFlow::Node nodeFrom, DataFlow::TypeTracker t2 |
pathlibPath(t2).flowsTo(nodeFrom) and
t2.end()
|
t.start() and
pathlibPathStep(nodeFrom, result)
)
or
// Data injection
// Special handling of the `/` operator
exists(BinaryExprNode slash, DataFlow::Node pathOperand, DataFlow::TypeTracker t2 |
slash.getOp() instanceof Div and
pathOperand.asCfgNode() = slash.getAnOperand() and
pathlibPath(t2).flowsTo(pathOperand) and
t2.end()
|
t.start() and
result.asCfgNode() = slash
)
or
// standard case
exists(DataFlow::AttrRead returnsPath, DataFlow::TypeTracker t2 |
returnsPath.getAttributeName() = pathlibPathInjection() and
pathlibPath(t2).flowsTo(returnsPath.getObject()) and
t2.end()
|
t.start() and
result.(DataFlow::CallCfgNode).getFunction() = returnsPath
)
or
// Track further
exists(DataFlow::TypeTracker t2 | result = pathlibPath(t2).track(t2, t))
}
/** Gets a reference to a `pathlib.Path` object. */
DataFlow::LocalSourceNode pathlibPath() { result = pathlibPath(DataFlow::TypeTracker::end()) }
private class PathlibFileAccess extends FileSystemAccess::Range, DataFlow::CallCfgNode {
DataFlow::AttrRead fileAccess;
PathlibFileAccess() {
fileAccess.getAttributeName() in [
"stat", "chmod", "exists", "expanduser", "glob", "group", "is_dir", "is_file", "is_mount",
"is_symlink", "is_socket", "is_fifo", "is_block_device", "is_char_device", "iter_dir",
"lchmod", "lstat", "mkdir", "open", "owner", "read_bytes", "read_text", "readlink",
"rename", "replace", "resolve", "rglob", "rmdir", "samefile", "symlink_to", "touch",
"unlink", "link_to", "write_bytes", "write_text"
] and
pathlibPath().flowsTo(fileAccess.getObject()) and
fileAccess.(DataFlow::LocalSourceNode).flowsTo(this.getFunction())
}
override DataFlow::Node getAPathArgument() { result = fileAccess.getObject() }
}
/** An additional taint steps for objects of type `pathlib.Path` */
private class PathlibPathTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// Type construction
nodeTo = pathlib().getMember(pathlibPathConstructor()).getACall() and
nodeFrom = nodeTo.(DataFlow::CallCfgNode).getArg(_)
or
// Type preservation
pathlibPath().flowsTo(nodeFrom) and
pathlibPathStep(nodeFrom, nodeTo)
or
// Data injection
pathlibPath().flowsTo(nodeTo) and
(
// Special handling of the `/` operator
exists(BinaryExprNode slash, DataFlow::Node pathOperand |
slash.getOp() instanceof Div and
pathOperand.asCfgNode() = slash.getAnOperand() and
pathlibPath().flowsTo(pathOperand)
|
nodeTo.asCfgNode() = slash and
// Taint can flow either from the left or the right operand as long as one of them is a path.
nodeFrom.asCfgNode() = slash.getAnOperand()
)
or
// standard case
exists(DataFlow::AttrRead augmentsPath |
augmentsPath.getAttributeName() = pathlibPathInjection()
|
augmentsPath
.(DataFlow::LocalSourceNode)
.flowsTo(nodeTo.(DataFlow::CallCfgNode).getFunction()) and
(
// type-preserving call
nodeFrom = augmentsPath.getObject()
or
// data injection
nodeFrom = nodeTo.(DataFlow::CallCfgNode).getArg(_)
)
)
)
or
// Export data from type
pathlibPath().flowsTo(nodeFrom) and
exists(DataFlow::AttrRead exportPath |
// exporting attribute
exportPath.getAttributeName() = pathlibPathAttributeExport() and
nodeTo = exportPath
or
// exporting method
exportPath.getAttributeName() = pathlibPathMethodExport() and
exportPath.(DataFlow::LocalSourceNode).flowsTo(nodeTo.(DataFlow::CallCfgNode).getFunction())
|
nodeFrom = exportPath.getObject()
)
}
}
}
// ---------------------------------------------------------------------------

View File

@@ -23,20 +23,20 @@ def test_basic():
tainted_pure_windows_path = pathlib.PureWindowsPath(ts)
ensure_tainted(
tainted_path, # $ MISSING: tainted
tainted_path, # $ tainted
tainted_pure_path, # $ MISSING: tainted
tainted_pure_posix_path, # $ MISSING: tainted
tainted_pure_windows_path, # $ MISSING: tainted
tainted_pure_path, # $ tainted
tainted_pure_posix_path, # $ tainted
tainted_pure_windows_path, # $ tainted
pathlib.Path("foo") / ts, # $ MISSING: tainted
ts / pathlib.Path("foo"), # $ MISSING: tainted
pathlib.Path("foo") / ts, # $ tainted
ts / pathlib.Path("foo"), # $ tainted
tainted_path.joinpath("foo", "bar"), # $ MISSING: tainted
pathlib.Path("foo").joinpath(tainted_path, "bar"), # $ MISSING: tainted
pathlib.Path("foo").joinpath("bar", tainted_path), # $ MISSING: tainted
tainted_path.joinpath("foo", "bar"), # $ tainted
pathlib.Path("foo").joinpath(tainted_path, "bar"), # $ tainted
pathlib.Path("foo").joinpath("bar", tainted_path), # $ tainted
str(tainted_path), # $ MISSING: tainted
str(tainted_path), # $ tainted
# TODO: Tainted methods and attributes
# https://docs.python.org/3.8/library/pathlib.html#methods-and-properties
@@ -46,13 +46,13 @@ def test_basic():
tainted_posix_path = pathlib.PosixPath(ts)
ensure_tainted(
tainted_posix_path, # $ MISSING: tainted
tainted_posix_path, # $ tainted
)
if os.name == "nt":
tainted_windows_path = pathlib.WindowsPath(ts)
ensure_tainted(
tainted_windows_path, # $ MISSING: tainted
tainted_windows_path, # $ tainted
)
# Make tests runable

View File

@@ -13,7 +13,7 @@ https://docs.djangoproject.com/en/3.1/ref/settings/
from pathlib import Path
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
BASE_DIR = Path(__file__).resolve().parent.parent #$ getAPathArgument=Path()
# Quick-start development settings - unsuitable for production

View File

@@ -16,3 +16,24 @@ builtins.open(file="filepath") # $getAPathArgument="filepath"
io.open("filepath") # $getAPathArgument="filepath"
io.open(file="filepath") # $getAPathArgument="filepath"
from pathlib import Path, PosixPath, WindowsPath
p = Path("filepath")
posix = PosixPath("posix/filepath")
windows = WindowsPath("windows/filepath")
p.chmod(0o777) # $getAPathArgument=p
posix.chmod(0o777) # $getAPathArgument=posix
windows.chmod(0o777) # $getAPathArgument=windows
with p.open() as f: # $getAPathArgument=p
f.read()
p.write_bytes(b"hello") # $getAPathArgument=p
name = windows.parent.name
o(name) # $getAPathArgument=name
wb = p.write_bytes
wb(b"hello") # $getAPathArgument=p

View File

@@ -3,6 +3,6 @@ s = "taintedString"
if s.startswith("tainted"): # $checks=s branch=true
pass
sw = s.startswith # $ MISSING: checks=s branch=true
if sw("safe"):
sw = s.startswith
if sw("safe"): # $ MISSING: checks=s branch=true
pass