mirror of
https://github.com/github/codeql.git
synced 2026-01-14 15:04:56 +01:00
378 lines
12 KiB
Python
378 lines
12 KiB
Python
'''
|
|
Classes and functions for converting module names into paths and Extractables.
|
|
Implements standard Python import semantics, and is designed to be extensible
|
|
to handle additional features like stub and template files.
|
|
'''
|
|
|
|
import sys
|
|
import imp
|
|
import os.path
|
|
from semmle.util import FileExtractable, FolderExtractable, BuiltinModuleExtractable, PY_EXTENSIONS, get_analysis_major_version
|
|
from semmle.python.modules import PythonSourceModule, is_script
|
|
|
|
class Module(object):
|
|
'''A module. Modules are approximations
|
|
to Python module objects and are used for
|
|
analyzing imports.'''
|
|
|
|
IS_PACKAGE = False
|
|
path = None
|
|
respect_init = True
|
|
|
|
def __init__(self, name, package):
|
|
self.name = name
|
|
self.package = package
|
|
|
|
def get_sub_module(self, name):
|
|
'''gets the (immediate) sub-module with the given name'''
|
|
raise NotImplementedError()
|
|
|
|
def all_sub_modules(self):
|
|
'''returns an iterable of all the sub-modules of this module'''
|
|
raise NotImplementedError()
|
|
|
|
def get_extractable(self):
|
|
'''gets the Extractable for this module'''
|
|
raise NotImplementedError()
|
|
|
|
def find(self, name):
|
|
'''Returns the named sub-module of this module if this module
|
|
is a package, otherwise returns `None`'''
|
|
if '.' in name:
|
|
top, rest = name.split(".", 1)
|
|
pkg = self.get_sub_module(top)
|
|
return pkg.find(rest) if pkg else None
|
|
else:
|
|
return self.get_sub_module(name)
|
|
|
|
def is_package(self):
|
|
return self.IS_PACKAGE
|
|
|
|
class PyModule(Module):
|
|
' A Python source code module'
|
|
|
|
def __init__(self, name, package, path):
|
|
Module.__init__(self, name, package)
|
|
assert isinstance(path, str)
|
|
self.path = path
|
|
|
|
def get_sub_module(self, name):
|
|
return None
|
|
|
|
def all_sub_modules(self):
|
|
return ()
|
|
|
|
def get_extractable(self):
|
|
return FileExtractable(self.path)
|
|
|
|
def load(self, logger=None):
|
|
return PythonSourceModule(self.name, self.path, logger=logger)
|
|
|
|
def __str__(self):
|
|
return "Python module at %s" % self.path
|
|
|
|
class BuiltinModule(Module):
|
|
' A built-in module'
|
|
|
|
def __init__(self, name, package):
|
|
Module.__init__(self, name, package)
|
|
|
|
def get_sub_module(self, name):
|
|
return None
|
|
|
|
def all_sub_modules(self):
|
|
return ()
|
|
|
|
def get_extractable(self):
|
|
return BuiltinModuleExtractable(self.name)
|
|
|
|
def __str__(self):
|
|
return "Builtin module %s" % self.name
|
|
|
|
class FilePackage(Module):
|
|
' A normal package. That is a folder with an __init__.py'
|
|
|
|
IS_PACKAGE = True
|
|
|
|
def __init__(self, name, package, path, respect_init=True):
|
|
Module.__init__(self, name, package)
|
|
assert isinstance(path, str), type(path)
|
|
self.path = path
|
|
self.respect_init = respect_init
|
|
|
|
def get_sub_module(self, name):
|
|
modname = self.name + "." + name if self.name else None
|
|
basepath = os.path.join(self.path, name)
|
|
return _from_base(modname, basepath, self, self.respect_init)
|
|
|
|
def all_sub_modules(self):
|
|
return _from_folder(self.name, self.path, self, self.respect_init)
|
|
|
|
def load(self):
|
|
return None
|
|
|
|
def get_extractable(self):
|
|
return FolderExtractable(self.path)
|
|
|
|
def __str__(self):
|
|
return "Package at %s" % self.path
|
|
|
|
class PthPackage(Module):
|
|
"A built-in package object generated from a '.pth' file"
|
|
|
|
IS_PACKAGE = True
|
|
|
|
def __init__(self, name, package, search_path):
|
|
Module.__init__(self, name, package)
|
|
self.search_path = search_path
|
|
|
|
def get_sub_module(self, name):
|
|
mname = self.name + "." + name
|
|
for path in self.search_path:
|
|
mod = _from_base(mname, os.path.join(path, name), self)
|
|
if mod is not None:
|
|
return mod
|
|
return None
|
|
|
|
def all_sub_modules(self):
|
|
for path in self.search_path:
|
|
for mod in _from_folder(self.name, path, self):
|
|
yield mod
|
|
|
|
def load(self):
|
|
return None
|
|
|
|
def __str__(self):
|
|
return "Builtin package (.pth) %s %s" % (self.name, self.search_path)
|
|
|
|
def get_extractable(self):
|
|
return None
|
|
|
|
#Helper functions
|
|
|
|
def _from_base(name, basepath, pkg, respect_init=True):
|
|
if os.path.isdir(basepath):
|
|
if os.path.exists(os.path.join(basepath, "__init__.py")) or not respect_init:
|
|
return FilePackage(name, pkg, basepath, respect_init)
|
|
else:
|
|
return None
|
|
for ext in PY_EXTENSIONS:
|
|
filepath = basepath + ext
|
|
if os.path.isfile(filepath):
|
|
return PyModule(name, pkg, filepath)
|
|
return None
|
|
|
|
def _from_folder(name, path, pkg, respect_init=True):
|
|
for file in os.listdir(path):
|
|
fullpath = os.path.join(path, file)
|
|
if os.path.isdir(fullpath):
|
|
if os.path.exists(os.path.join(fullpath, "__init__.py")) or not respect_init:
|
|
yield FilePackage(name + "." + file if name else None, pkg, fullpath, respect_init)
|
|
base, ext = os.path.splitext(file)
|
|
if ext not in PY_EXTENSIONS:
|
|
continue
|
|
if os.path.isfile(fullpath):
|
|
yield PyModule(name + "." + base if name else None, pkg, fullpath)
|
|
|
|
class AbstractFinder(object):
|
|
|
|
def find(self, mod_name):
|
|
'''Find an extractable object given a module name'''
|
|
if '.' in mod_name:
|
|
top, rest = mod_name.split(".", 1)
|
|
pkg = self.find_top(top)
|
|
return pkg.find(rest) if pkg else None
|
|
else:
|
|
return self.find_top(mod_name)
|
|
|
|
def find_top(self, name):
|
|
'''Find module or package object given a simple (dot-less) name'''
|
|
raise NotImplementedError()
|
|
|
|
def name_from_path(self, path, extensions):
|
|
'''Find module or package object given a path'''
|
|
raise NotImplementedError()
|
|
|
|
class PyFinder(AbstractFinder):
|
|
|
|
__slots__ = [ 'path', 'respect_init', 'logger' ]
|
|
|
|
def __init__(self, path, respect_init, logger):
|
|
assert isinstance(path, str), path
|
|
self.path = os.path.abspath(path)
|
|
self.respect_init = respect_init
|
|
self.logger = logger
|
|
|
|
def find_top(self, mod_name):
|
|
basepath = os.path.join(self.path, mod_name)
|
|
return _from_base(mod_name, basepath, None, self.respect_init)
|
|
|
|
def name_from_path(self, path, extensions):
|
|
rel_path = _relative_subpath(path, self.path)
|
|
if rel_path is None:
|
|
return None
|
|
base, ext = os.path.splitext(rel_path)
|
|
if ext and ext not in extensions:
|
|
return None
|
|
return ".".join(base.split(os.path.sep))
|
|
|
|
def _relative_subpath(subpath, root):
|
|
'Returns the relative path if `subpath` is within `root` or `None` otherwise'
|
|
try:
|
|
relpath = os.path.relpath(subpath, root)
|
|
except ValueError:
|
|
#No relative path possible
|
|
return None
|
|
if relpath.startswith(os.pardir):
|
|
#Not in root:
|
|
return None
|
|
return relpath
|
|
|
|
class BuiltinFinder(AbstractFinder):
|
|
'''Finder for builtin modules that are already present in the VM
|
|
or can be guaranteed to load successfully'''
|
|
|
|
def __init__(self, logger):
|
|
self.modules = {}
|
|
for name, module in sys.modules.items():
|
|
self.modules[name] = module
|
|
try:
|
|
self.dynload_path = os.path.dirname(imp.find_module("_json")[1])
|
|
except Exception:
|
|
if os.name != "nt":
|
|
logger.warning("Failed to find dynload path")
|
|
self.dynload_path = None
|
|
|
|
def builtin_module(self, name):
|
|
if "." in name:
|
|
pname, name = name.rsplit(".", 1)
|
|
return BuiltinModule(name, self.builtin_module(pname))
|
|
return BuiltinModule(name, None)
|
|
|
|
def find(self, mod_name):
|
|
mod = super(BuiltinFinder, self).find(mod_name)
|
|
if mod is not None:
|
|
return mod
|
|
#Use `imp` module to find module
|
|
try:
|
|
_, filepath, mod_t = imp.find_module(mod_name)
|
|
except ImportError:
|
|
return None
|
|
#Accept builtin dynamically loaded modules like _ctypes or _json
|
|
if filepath and os.path.dirname(filepath) == self.dynload_path:
|
|
return BuiltinModule(mod_name, None)
|
|
return None
|
|
|
|
def find_top(self, mod_name):
|
|
if mod_name in self.modules:
|
|
mod = self.modules[mod_name]
|
|
if hasattr(mod, "__file__"):
|
|
return None
|
|
if hasattr(mod, "__path__"):
|
|
return PthPackage(mod_name, None, mod.__path__)
|
|
return BuiltinModule(mod_name, None)
|
|
if mod_name in sys.builtin_module_names:
|
|
return BuiltinModule(mod_name, None)
|
|
return None
|
|
|
|
def name_from_path(self, path, extensions):
|
|
return None
|
|
|
|
#Stub file handling
|
|
|
|
class StubFinder(PyFinder):
|
|
|
|
def __init__(self, logger):
|
|
try:
|
|
tools = os.environ['ODASA_TOOLS']
|
|
except KeyError:
|
|
tools = sys.path[1]
|
|
logger.debug("StubFinder: can't find ODASA_TOOLS, using '%s' instead", tools)
|
|
path = os.path.join(tools, "data", "python", "stubs")
|
|
super(StubFinder, self).__init__(path, True, logger)
|
|
|
|
|
|
def _finders_for_path(path, respect_init, logger):
|
|
finders = [ StubFinder(logger) ]
|
|
for p in path:
|
|
if p:
|
|
finders.append(PyFinder(p, respect_init, logger))
|
|
finders.append(BuiltinFinder(logger))
|
|
return finders
|
|
|
|
|
|
def finders_from_options_and_env(options, logger):
|
|
'''Return a list of finders from the given command line options'''
|
|
if options.path:
|
|
path = options.path + options.sys_path
|
|
else:
|
|
path = options.sys_path
|
|
path = [os.path.abspath(p) for p in path]
|
|
if options.exclude:
|
|
exclude = set(options.exclude)
|
|
trimmed_path = []
|
|
for p in path:
|
|
for x in exclude:
|
|
if p.startswith(x):
|
|
break
|
|
else:
|
|
trimmed_path.append(p)
|
|
path = trimmed_path
|
|
logger.debug("Finder path: %s", path)
|
|
logger.debug("sys path: %s", sys.path)
|
|
return _finders_for_path(path, options.respect_init, logger)
|
|
|
|
|
|
class Finder(object):
|
|
|
|
def __init__(self, finders, options, logger):
|
|
self.finders = finders
|
|
self.path_map = {}
|
|
self.logger = logger
|
|
self.respect_init = options.respect_init
|
|
|
|
def find(self, mod_name):
|
|
for finder in self.finders:
|
|
mod = finder.find(mod_name)
|
|
if mod is not None:
|
|
return mod
|
|
self.logger.debug("Cannot find module '%s'", mod_name)
|
|
return None
|
|
|
|
@staticmethod
|
|
def from_options_and_env(options, logger):
|
|
return Finder(finders_from_options_and_env(options, logger), options, logger)
|
|
|
|
def from_extractable(self, unit):
|
|
if isinstance(unit, FolderExtractable) or isinstance(unit, FileExtractable):
|
|
return self.from_path(unit.path)
|
|
return None
|
|
|
|
def from_path(self, path, extensions=PY_EXTENSIONS):
|
|
if path in self.path_map:
|
|
return self.path_map[path]
|
|
if not path or path == "/":
|
|
return None
|
|
is_python_2 = (get_analysis_major_version() == 2)
|
|
if os.path.isdir(path) and not os.path.exists(os.path.join(path, "__init__.py")) and (self.respect_init or not is_python_2):
|
|
return None
|
|
pkg = self.from_path(os.path.dirname(path))
|
|
mod = None
|
|
if os.path.isdir(path):
|
|
mod = FilePackage(None, pkg, path)
|
|
if os.path.isfile(path):
|
|
base, ext = os.path.splitext(path)
|
|
if ext in extensions:
|
|
mod = PyModule(None, pkg, path)
|
|
if is_script(path):
|
|
mod = PyModule(None, None, path)
|
|
self.path_map[path] = mod
|
|
return mod
|
|
|
|
def name_from_path(self, path, extensions=PY_EXTENSIONS):
|
|
for finder in self.finders:
|
|
name = finder.name_from_path(path, extensions)
|
|
if name is not None:
|
|
return name
|
|
return None
|