Merge pull request #14114 from yoff/python/allow-namespace-packages

Python: Allow namespace packages
This commit is contained in:
yoff
2023-10-26 16:56:05 +02:00
committed by GitHub
17 changed files with 146 additions and 40 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Namespace packages in the form of regular packages with missing `__init__.py`-files are now allowed. This enables the analysis to resolve modules and functions inside such packages.

View File

@@ -179,21 +179,6 @@ private predicate legalDottedName(string name) {
bindingset[name]
private predicate legalShortName(string name) { name.regexpMatch("(\\p{L}|_)(\\p{L}|\\d|_)*") }
/**
* Holds if `f` is potentially a source package.
* Does it have an __init__.py file (or --respect-init=False for Python 2) and is it within the source archive?
*/
private predicate isPotentialSourcePackage(Folder f) {
f.getRelativePath() != "" and
isPotentialPackage(f)
}
private predicate isPotentialPackage(Folder f) {
exists(f.getFile("__init__.py"))
or
py_flags_versioned("options.respect_init", "False", _) and major_version() = 2 and exists(f)
}
private string moduleNameFromBase(Container file) {
// We used to also require `isPotentialPackage(f)` to hold in this case,
// but we saw modules not getting resolved because their folder did not
@@ -236,31 +221,114 @@ private predicate transitively_imported_from_entry_point(File file) {
)
}
/**
* Holds if the folder `f` is a regular Python package,
* containing an `__init__.py` file.
*/
private predicate isRegularPackage(Folder f, string name) {
legalShortName(name) and
name = f.getStem() and
exists(f.getFile("__init__.py"))
}
/** Gets the name of a module imported in package `c`. */
private string moduleImportedInPackage(Container c) {
legalShortName(result) and
// it has to be imported in this folder
result =
any(ImportExpr i | i.getLocation().getFile().getParent() = c)
.getName()
// strip everything after the first `.`
.regexpReplaceAll("\\..*", "") and
result != ""
}
/** Holds if the file `f` could be resolved to a module named `name`. */
private predicate isPotentialModuleFile(File file, string name) {
legalShortName(name) and
name = file.getStem() and
file.getExtension() = ["py", "pyc", "so", "pyd"] and
// it has to be imported in this folder
name = moduleImportedInPackage(file.getParent())
}
/**
* Holds if the folder `f` is a namespace package named `name`.
*
* See https://peps.python.org/pep-0420/#specification
* for details on namespace packages.
*/
private predicate isNameSpacePackage(Folder f, string name) {
legalShortName(name) and
name = f.getStem() and
not isRegularPackage(f, name) and
// it has to be imported in a file
// either in this folder or next to this folder
name = moduleImportedInPackage([f, f.getParent()]) and
// no sibling regular package
// and no sibling module
not exists(Folder sibling | sibling.getParent() = f.getParent() |
isRegularPackage(sibling.getFolder(name), name)
or
isPotentialModuleFile(sibling.getAFile(), name)
)
}
/**
* Holds if the folder `f` is a package (either a regular package
* or a namespace package) named `name`.
*/
private predicate isPackage(Folder f, string name) {
isRegularPackage(f, name)
or
isNameSpacePackage(f, name)
}
/**
* Holds if the file `f` is a module named `name`.
*/
private predicate isModuleFile(File file, string name) {
isPotentialModuleFile(file, name) and
not isPackage(file.getParent(), _)
}
/**
* Holds if the folder `f` is a package named `name`
* and does reside inside another package.
*/
private predicate isOutermostPackage(Folder f, string name) {
isPackage(f, name) and
not isPackage(f.getParent(), _)
}
/** Gets the name of the module that `c` resolves to, if any. */
cached
string moduleNameFromFile(Container file) {
string moduleNameFromFile(Container c) {
// package
isOutermostPackage(c, result)
or
// module
isModuleFile(c, result)
or
Stages::AST::ref() and
exists(string basename |
basename = moduleNameFromBase(file) and
basename = moduleNameFromBase(c) and
legalShortName(basename)
|
result = moduleNameFromFile(file.getParent()) + "." + basename
// recursive case
result = moduleNameFromFile(c.getParent()) + "." + basename
or
// If `file` is a transitive import of a file that's executed directly, we allow references
// to it by its `basename`.
transitively_imported_from_entry_point(file) and
transitively_imported_from_entry_point(c) and
result = basename
)
or
isPotentialSourcePackage(file) and
result = file.getStem() and
(
not isPotentialSourcePackage(file.getParent()) or
not legalShortName(file.getParent().getBaseName())
)
//
// standard library
result = c.getStem() and c.getParent() = c.getImportRoot()
or
result = file.getStem() and file.getParent() = file.getImportRoot()
or
result = file.getStem() and isStubRoot(file.getParent())
result = c.getStem() and isStubRoot(c.getParent())
}
private predicate isStubRoot(Folder f) {

View File

@@ -400,7 +400,7 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
override Scope getScope() { result = mod }
override string toString() {
result = "ModuleVariableNode in " + mod.toString() + " for " + var.getId()
result = "ModuleVariableNode in " + concat( | | mod.toString(), ",") + " for " + var.getId()
}
/** Gets the module in which this variable appears. */

View File

@@ -5,5 +5,4 @@
| six.moves.range | builtin-class xrange |
| six.moves.urllib | Package six.moves.urllib |
| six.moves.urllib.parse | Module six.moves.urllib_parse |
| six.moves.urllib.parse.urlsplit | Function urlsplit |
| six.moves.zip | Builtin-function zip |

View File

@@ -1,5 +1,14 @@
| module | hash_bang/module.py:0:0:0:0 | Module module |
| module | name_main/module.py:0:0:0:0 | Module module |
| namespace_package | hash_bang/namespace_package:0:0:0:0 | Package namespace_package |
| namespace_package | name_main/namespace_package:0:0:0:0 | Package namespace_package |
| namespace_package | no_py_extension/namespace_package:0:0:0:0 | Package namespace_package |
| namespace_package.namespace_package_main | hash_bang/namespace_package/namespace_package_main.py:0:0:0:0 | Module namespace_package.namespace_package_main |
| namespace_package.namespace_package_main | name_main/namespace_package/namespace_package_main.py:0:0:0:0 | Module namespace_package.namespace_package_main |
| namespace_package.namespace_package_main | no_py_extension/namespace_package/namespace_package_main.py:0:0:0:0 | Module namespace_package.namespace_package_main |
| namespace_package.namespace_package_module | hash_bang/namespace_package/namespace_package_module.py:0:0:0:0 | Module namespace_package.namespace_package_module |
| namespace_package.namespace_package_module | name_main/namespace_package/namespace_package_module.py:0:0:0:0 | Module namespace_package.namespace_package_module |
| namespace_package.namespace_package_module | no_py_extension/namespace_package/namespace_package_module.py:0:0:0:0 | Module namespace_package.namespace_package_module |
| package | hash_bang/package:0:0:0:0 | Package package |
| package | name_main/package:0:0:0:0 | Package package |
| package | no_py_extension/package:0:0:0:0 | Package package |

View File

@@ -5,5 +5,4 @@
| six.moves.range | builtin-class range |
| six.moves.urllib | Package six.moves.urllib |
| six.moves.urllib.parse | Module six.moves.urllib_parse |
| six.moves.urllib.parse.urlsplit | Function urlsplit |
| six.moves.zip | builtin-class zip |

View File

@@ -16,7 +16,13 @@ Since PEP 420 was accepted in Python 3, this test is Python 3 only.
from foo.bar.a import afunc
from foo_explicit.bar.a import explicit_afunc
from not_root.baz.foo import foo_func
from not_root.baz.bar.a import afunc as afunc2
afunc() # $ pt,tt="foo/bar/a.py:afunc"
explicit_afunc() # $ pt,tt="foo_explicit/bar/a.py:explicit_afunc"
foo_func() # $ pt,tt="not_root/baz/foo.py:foo_func"
afunc2() # $ pt,tt="not_root/baz/bar/a.py:afunc"

View File

@@ -1,3 +1,6 @@
def afunc():
print("afunc called")
return 1
print("afunc called")
return 1
from foo.foo import foo_func
foo_func() # $ pt,tt="foo/foo.py:foo_func"

View File

@@ -0,0 +1,2 @@
def foo_func():
print("foo_func called")

View File

@@ -1,3 +1,6 @@
def explicit_afunc():
print("explicit_afunc called")
return 1
print("explicit_afunc called")
return 1
from foo_explicit.foo_explicit import foo_explicit_func
foo_explicit_func() # $ pt,tt="foo_explicit/foo_explicit.py:foo_explicit_func"

View File

@@ -0,0 +1,2 @@
def foo_explicit_func():
print("foo_explicit_func called")

View File

@@ -0,0 +1,6 @@
def afunc():
print("afunc called")
return 1
from not_root.baz.foo import foo_func
foo_func() # $ pt,tt="not_root/baz/foo.py:foo_func"

View File

@@ -0,0 +1,2 @@
def foo_func():
print("foo_func called")

View File

@@ -1 +1 @@
semmle-extractor-options: --max-import-depth=1 --lang=3
semmle-extractor-options: --max-import-depth=3 --lang=3

View File

@@ -0,0 +1,3 @@
| Local module | code-invalid-package-name/cmd.py:0:0:0:0 | Module cmd | referenced in external file called | pdb.py |
| Local module | code-invalid-package-name/cmd.py:0:0:0:0 | Module cmd | referenced in local file called | test_ok.py |
| Local module | code-invalid-package-name/unique_name.py:0:0:0:0 | Module unique_name | referenced in local file called | unique_name_use.py |

View File

@@ -1,5 +1,5 @@
| code-invalid-package-name/cmd.py:0:0:0:0 | Script cmd.py |
| code-invalid-package-name/cmd.py:0:0:0:0 | Module cmd |
| code-invalid-package-name/test_fail.py:0:0:0:0 | Script test_fail.py |
| code-invalid-package-name/test_ok.py:0:0:0:0 | Script test_ok.py |
| code-invalid-package-name/unique_name.py:0:0:0:0 | Script unique_name.py |
| code-invalid-package-name/unique_name.py:0:0:0:0 | Module unique_name |
| code-invalid-package-name/unique_name_use.py:0:0:0:0 | Script unique_name_use.py |

View File

@@ -1,3 +1,3 @@
| Module 'cmd' (external, in stdlib, not missing) referenced in local file | code-invalid-package-name/test_ok.py:1 |
| Module 'cmd' (local, not in stdlib, not missing) referenced in local file | code-invalid-package-name/test_ok.py:1 |
| Module 'pdb' (external, in stdlib, not missing) referenced in local file | code-invalid-package-name/test_fail.py:3 |
| Module 'unique_name' (external, not in stdlib, missing) referenced in local file | code-invalid-package-name/unique_name_use.py:1 |
| Module 'unique_name' (local, not in stdlib, not missing) referenced in local file | code-invalid-package-name/unique_name_use.py:1 |