diff --git a/python/ql/lib/change-notes/2023-09-29-allow-namespace-packages.md b/python/ql/lib/change-notes/2023-09-29-allow-namespace-packages.md new file mode 100644 index 00000000000..6c064920b51 --- /dev/null +++ b/python/ql/lib/change-notes/2023-09-29-allow-namespace-packages.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Namespace packages in the form of regular packages with missing `__init__.py`-files are now allowed. This enables the analysis to resolve modules and functions inside such packages. diff --git a/python/ql/lib/semmle/python/Module.qll b/python/ql/lib/semmle/python/Module.qll index 56376155108..fa756fc9655 100644 --- a/python/ql/lib/semmle/python/Module.qll +++ b/python/ql/lib/semmle/python/Module.qll @@ -179,21 +179,6 @@ private predicate legalDottedName(string name) { bindingset[name] private predicate legalShortName(string name) { name.regexpMatch("(\\p{L}|_)(\\p{L}|\\d|_)*") } -/** - * Holds if `f` is potentially a source package. - * Does it have an __init__.py file (or --respect-init=False for Python 2) and is it within the source archive? - */ -private predicate isPotentialSourcePackage(Folder f) { - f.getRelativePath() != "" and - isPotentialPackage(f) -} - -private predicate isPotentialPackage(Folder f) { - exists(f.getFile("__init__.py")) - or - py_flags_versioned("options.respect_init", "False", _) and major_version() = 2 and exists(f) -} - private string moduleNameFromBase(Container file) { // We used to also require `isPotentialPackage(f)` to hold in this case, // but we saw modules not getting resolved because their folder did not @@ -236,31 +221,114 @@ private predicate transitively_imported_from_entry_point(File file) { ) } +/** + * Holds if the folder `f` is a regular Python package, + * containing an `__init__.py` file. + */ +private predicate isRegularPackage(Folder f, string name) { + legalShortName(name) and + name = f.getStem() and + exists(f.getFile("__init__.py")) +} + +/** Gets the name of a module imported in package `c`. */ +private string moduleImportedInPackage(Container c) { + legalShortName(result) and + // it has to be imported in this folder + result = + any(ImportExpr i | i.getLocation().getFile().getParent() = c) + .getName() + // strip everything after the first `.` + .regexpReplaceAll("\\..*", "") and + result != "" +} + +/** Holds if the file `f` could be resolved to a module named `name`. */ +private predicate isPotentialModuleFile(File file, string name) { + legalShortName(name) and + name = file.getStem() and + file.getExtension() = ["py", "pyc", "so", "pyd"] and + // it has to be imported in this folder + name = moduleImportedInPackage(file.getParent()) +} + +/** + * Holds if the folder `f` is a namespace package named `name`. + * + * See https://peps.python.org/pep-0420/#specification + * for details on namespace packages. + */ +private predicate isNameSpacePackage(Folder f, string name) { + legalShortName(name) and + name = f.getStem() and + not isRegularPackage(f, name) and + // it has to be imported in a file + // either in this folder or next to this folder + name = moduleImportedInPackage([f, f.getParent()]) and + // no sibling regular package + // and no sibling module + not exists(Folder sibling | sibling.getParent() = f.getParent() | + isRegularPackage(sibling.getFolder(name), name) + or + isPotentialModuleFile(sibling.getAFile(), name) + ) +} + +/** + * Holds if the folder `f` is a package (either a regular package + * or a namespace package) named `name`. + */ +private predicate isPackage(Folder f, string name) { + isRegularPackage(f, name) + or + isNameSpacePackage(f, name) +} + +/** + * Holds if the file `f` is a module named `name`. + */ +private predicate isModuleFile(File file, string name) { + isPotentialModuleFile(file, name) and + not isPackage(file.getParent(), _) +} + +/** + * Holds if the folder `f` is a package named `name` + * and does reside inside another package. + */ +private predicate isOutermostPackage(Folder f, string name) { + isPackage(f, name) and + not isPackage(f.getParent(), _) +} + +/** Gets the name of the module that `c` resolves to, if any. */ cached -string moduleNameFromFile(Container file) { +string moduleNameFromFile(Container c) { + // package + isOutermostPackage(c, result) + or + // module + isModuleFile(c, result) + or Stages::AST::ref() and exists(string basename | - basename = moduleNameFromBase(file) and + basename = moduleNameFromBase(c) and legalShortName(basename) | - result = moduleNameFromFile(file.getParent()) + "." + basename + // recursive case + result = moduleNameFromFile(c.getParent()) + "." + basename or // If `file` is a transitive import of a file that's executed directly, we allow references // to it by its `basename`. - transitively_imported_from_entry_point(file) and + transitively_imported_from_entry_point(c) and result = basename ) or - isPotentialSourcePackage(file) and - result = file.getStem() and - ( - not isPotentialSourcePackage(file.getParent()) or - not legalShortName(file.getParent().getBaseName()) - ) + // + // standard library + result = c.getStem() and c.getParent() = c.getImportRoot() or - result = file.getStem() and file.getParent() = file.getImportRoot() - or - result = file.getStem() and isStubRoot(file.getParent()) + result = c.getStem() and isStubRoot(c.getParent()) } private predicate isStubRoot(Folder f) { diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index cc6fb2766fa..0456278bc66 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -400,7 +400,7 @@ class ModuleVariableNode extends Node, TModuleVariableNode { override Scope getScope() { result = mod } override string toString() { - result = "ModuleVariableNode in " + mod.toString() + " for " + var.getId() + result = "ModuleVariableNode in " + concat( | | mod.toString(), ",") + " for " + var.getId() } /** Gets the module in which this variable appears. */ diff --git a/python/ql/test/2/library-tests/six/pointsto.expected b/python/ql/test/2/library-tests/six/pointsto.expected index b2ad9ae96f4..ca063a000e3 100644 --- a/python/ql/test/2/library-tests/six/pointsto.expected +++ b/python/ql/test/2/library-tests/six/pointsto.expected @@ -5,5 +5,4 @@ | six.moves.range | builtin-class xrange | | six.moves.urllib | Package six.moves.urllib | | six.moves.urllib.parse | Module six.moves.urllib_parse | -| six.moves.urllib.parse.urlsplit | Function urlsplit | | six.moves.zip | Builtin-function zip | diff --git a/python/ql/test/3/library-tests/modules/entry_point/modules.expected b/python/ql/test/3/library-tests/modules/entry_point/modules.expected index cdc743a360d..60edda057d8 100644 --- a/python/ql/test/3/library-tests/modules/entry_point/modules.expected +++ b/python/ql/test/3/library-tests/modules/entry_point/modules.expected @@ -1,5 +1,14 @@ | module | hash_bang/module.py:0:0:0:0 | Module module | | module | name_main/module.py:0:0:0:0 | Module module | +| namespace_package | hash_bang/namespace_package:0:0:0:0 | Package namespace_package | +| namespace_package | name_main/namespace_package:0:0:0:0 | Package namespace_package | +| namespace_package | no_py_extension/namespace_package:0:0:0:0 | Package namespace_package | +| namespace_package.namespace_package_main | hash_bang/namespace_package/namespace_package_main.py:0:0:0:0 | Module namespace_package.namespace_package_main | +| namespace_package.namespace_package_main | name_main/namespace_package/namespace_package_main.py:0:0:0:0 | Module namespace_package.namespace_package_main | +| namespace_package.namespace_package_main | no_py_extension/namespace_package/namespace_package_main.py:0:0:0:0 | Module namespace_package.namespace_package_main | +| namespace_package.namespace_package_module | hash_bang/namespace_package/namespace_package_module.py:0:0:0:0 | Module namespace_package.namespace_package_module | +| namespace_package.namespace_package_module | name_main/namespace_package/namespace_package_module.py:0:0:0:0 | Module namespace_package.namespace_package_module | +| namespace_package.namespace_package_module | no_py_extension/namespace_package/namespace_package_module.py:0:0:0:0 | Module namespace_package.namespace_package_module | | package | hash_bang/package:0:0:0:0 | Package package | | package | name_main/package:0:0:0:0 | Package package | | package | no_py_extension/package:0:0:0:0 | Package package | diff --git a/python/ql/test/3/library-tests/six/pointsto.expected b/python/ql/test/3/library-tests/six/pointsto.expected index 6d1a9181e82..83f61848b7c 100644 --- a/python/ql/test/3/library-tests/six/pointsto.expected +++ b/python/ql/test/3/library-tests/six/pointsto.expected @@ -5,5 +5,4 @@ | six.moves.range | builtin-class range | | six.moves.urllib | Package six.moves.urllib | | six.moves.urllib.parse | Module six.moves.urllib_parse | -| six.moves.urllib.parse.urlsplit | Function urlsplit | | six.moves.zip | builtin-class zip | diff --git a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/example.py b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/example.py index 12d3a013e8d..3ef8784e9e2 100644 --- a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/example.py +++ b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/example.py @@ -16,7 +16,13 @@ Since PEP 420 was accepted in Python 3, this test is Python 3 only. from foo.bar.a import afunc from foo_explicit.bar.a import explicit_afunc +from not_root.baz.foo import foo_func +from not_root.baz.bar.a import afunc as afunc2 afunc() # $ pt,tt="foo/bar/a.py:afunc" explicit_afunc() # $ pt,tt="foo_explicit/bar/a.py:explicit_afunc" + +foo_func() # $ pt,tt="not_root/baz/foo.py:foo_func" + +afunc2() # $ pt,tt="not_root/baz/bar/a.py:afunc" diff --git a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/foo/bar/a.py b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/foo/bar/a.py index bc639f6c537..d08154af275 100644 --- a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/foo/bar/a.py +++ b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/foo/bar/a.py @@ -1,3 +1,6 @@ def afunc(): - print("afunc called") - return 1 + print("afunc called") + return 1 + +from foo.foo import foo_func +foo_func() # $ pt,tt="foo/foo.py:foo_func" diff --git a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/foo/foo.py b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/foo/foo.py new file mode 100644 index 00000000000..cdb22e00908 --- /dev/null +++ b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/foo/foo.py @@ -0,0 +1,2 @@ +def foo_func(): + print("foo_func called") diff --git a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/foo_explicit/bar/a.py b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/foo_explicit/bar/a.py index c84d63cfce2..16285e38780 100644 --- a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/foo_explicit/bar/a.py +++ b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/foo_explicit/bar/a.py @@ -1,3 +1,6 @@ def explicit_afunc(): - print("explicit_afunc called") - return 1 + print("explicit_afunc called") + return 1 + +from foo_explicit.foo_explicit import foo_explicit_func +foo_explicit_func() # $ pt,tt="foo_explicit/foo_explicit.py:foo_explicit_func" diff --git a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/foo_explicit/foo_explicit.py b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/foo_explicit/foo_explicit.py new file mode 100644 index 00000000000..f6514b76072 --- /dev/null +++ b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/foo_explicit/foo_explicit.py @@ -0,0 +1,2 @@ +def foo_explicit_func(): + print("foo_explicit_func called") diff --git a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/not_root/baz/bar/a.py b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/not_root/baz/bar/a.py new file mode 100644 index 00000000000..69aae305874 --- /dev/null +++ b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/not_root/baz/bar/a.py @@ -0,0 +1,6 @@ +def afunc(): + print("afunc called") + return 1 + +from not_root.baz.foo import foo_func +foo_func() # $ pt,tt="not_root/baz/foo.py:foo_func" diff --git a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/not_root/baz/foo.py b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/not_root/baz/foo.py new file mode 100644 index 00000000000..cdb22e00908 --- /dev/null +++ b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/not_root/baz/foo.py @@ -0,0 +1,2 @@ +def foo_func(): + print("foo_func called") diff --git a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/options b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/options index cfef58cf2b2..f2d60a4bc3c 100644 --- a/python/ql/test/experimental/library-tests/CallGraph-implicit-init/options +++ b/python/ql/test/experimental/library-tests/CallGraph-implicit-init/options @@ -1 +1 @@ -semmle-extractor-options: --max-import-depth=1 --lang=3 +semmle-extractor-options: --max-import-depth=3 --lang=3 diff --git a/python/ql/test/library-tests/PointsTo/regressions/wrong/module-imports/conflict-stdlib/LocalModuleWithRef.expected b/python/ql/test/library-tests/PointsTo/regressions/wrong/module-imports/conflict-stdlib/LocalModuleWithRef.expected index e69de29bb2d..92e569f09e6 100644 --- a/python/ql/test/library-tests/PointsTo/regressions/wrong/module-imports/conflict-stdlib/LocalModuleWithRef.expected +++ b/python/ql/test/library-tests/PointsTo/regressions/wrong/module-imports/conflict-stdlib/LocalModuleWithRef.expected @@ -0,0 +1,3 @@ +| Local module | code-invalid-package-name/cmd.py:0:0:0:0 | Module cmd | referenced in external file called | pdb.py | +| Local module | code-invalid-package-name/cmd.py:0:0:0:0 | Module cmd | referenced in local file called | test_ok.py | +| Local module | code-invalid-package-name/unique_name.py:0:0:0:0 | Module unique_name | referenced in local file called | unique_name_use.py | diff --git a/python/ql/test/library-tests/PointsTo/regressions/wrong/module-imports/conflict-stdlib/LocalModules.expected b/python/ql/test/library-tests/PointsTo/regressions/wrong/module-imports/conflict-stdlib/LocalModules.expected index 7d18240655f..ceca3a3713d 100644 --- a/python/ql/test/library-tests/PointsTo/regressions/wrong/module-imports/conflict-stdlib/LocalModules.expected +++ b/python/ql/test/library-tests/PointsTo/regressions/wrong/module-imports/conflict-stdlib/LocalModules.expected @@ -1,5 +1,5 @@ -| code-invalid-package-name/cmd.py:0:0:0:0 | Script cmd.py | +| code-invalid-package-name/cmd.py:0:0:0:0 | Module cmd | | code-invalid-package-name/test_fail.py:0:0:0:0 | Script test_fail.py | | code-invalid-package-name/test_ok.py:0:0:0:0 | Script test_ok.py | -| code-invalid-package-name/unique_name.py:0:0:0:0 | Script unique_name.py | +| code-invalid-package-name/unique_name.py:0:0:0:0 | Module unique_name | | code-invalid-package-name/unique_name_use.py:0:0:0:0 | Script unique_name_use.py | diff --git a/python/ql/test/library-tests/PointsTo/regressions/wrong/module-imports/conflict-stdlib/ModuleWithLocalRef.expected b/python/ql/test/library-tests/PointsTo/regressions/wrong/module-imports/conflict-stdlib/ModuleWithLocalRef.expected index 2c66cfdbc99..cf5d7560b9a 100644 --- a/python/ql/test/library-tests/PointsTo/regressions/wrong/module-imports/conflict-stdlib/ModuleWithLocalRef.expected +++ b/python/ql/test/library-tests/PointsTo/regressions/wrong/module-imports/conflict-stdlib/ModuleWithLocalRef.expected @@ -1,3 +1,3 @@ -| Module 'cmd' (external, in stdlib, not missing) referenced in local file | code-invalid-package-name/test_ok.py:1 | +| Module 'cmd' (local, not in stdlib, not missing) referenced in local file | code-invalid-package-name/test_ok.py:1 | | Module 'pdb' (external, in stdlib, not missing) referenced in local file | code-invalid-package-name/test_fail.py:3 | -| Module 'unique_name' (external, not in stdlib, missing) referenced in local file | code-invalid-package-name/unique_name_use.py:1 | +| Module 'unique_name' (local, not in stdlib, not missing) referenced in local file | code-invalid-package-name/unique_name_use.py:1 |