''' Classes and functions for converting module names into paths and Extractables. Implements standard Python import semantics, and is designed to be extensible to handle additional features like stub and template files. ''' import sys import imp import os.path from semmle.util import FileExtractable, FolderExtractable, BuiltinModuleExtractable, PY_EXTENSIONS, get_analysis_major_version from semmle.python.modules import PythonSourceModule, is_script class Module(object): '''A module. Modules are approximations to Python module objects and are used for analyzing imports.''' IS_PACKAGE = False path = None respect_init = True def __init__(self, name, package): self.name = name self.package = package def get_sub_module(self, name): '''gets the (immediate) sub-module with the given name''' raise NotImplementedError() def all_sub_modules(self): '''returns an iterable of all the sub-modules of this module''' raise NotImplementedError() def get_extractable(self): '''gets the Extractable for this module''' raise NotImplementedError() def find(self, name): '''Returns the named sub-module of this module if this module is a package, otherwise returns `None`''' if '.' in name: top, rest = name.split(".", 1) pkg = self.get_sub_module(top) return pkg.find(rest) if pkg else None else: return self.get_sub_module(name) def is_package(self): return self.IS_PACKAGE class PyModule(Module): ' A Python source code module' def __init__(self, name, package, path): Module.__init__(self, name, package) assert isinstance(path, str) self.path = path def get_sub_module(self, name): return None def all_sub_modules(self): return () def get_extractable(self): return FileExtractable(self.path) def load(self, logger=None): return PythonSourceModule(self.name, self.path, logger=logger) def __str__(self): return "Python module at %s" % self.path class BuiltinModule(Module): ' A built-in module' def __init__(self, name, package): Module.__init__(self, name, package) def get_sub_module(self, name): return None def all_sub_modules(self): return () def get_extractable(self): return BuiltinModuleExtractable(self.name) def __str__(self): return "Builtin module %s" % self.name class FilePackage(Module): ' A normal package. That is a folder with an __init__.py' IS_PACKAGE = True def __init__(self, name, package, path, respect_init=True): Module.__init__(self, name, package) assert isinstance(path, str), type(path) self.path = path self.respect_init = respect_init def get_sub_module(self, name): modname = self.name + "." + name if self.name else None basepath = os.path.join(self.path, name) return _from_base(modname, basepath, self, self.respect_init) def all_sub_modules(self): return _from_folder(self.name, self.path, self, self.respect_init) def load(self): return None def get_extractable(self): return FolderExtractable(self.path) def __str__(self): return "Package at %s" % self.path class PthPackage(Module): "A built-in package object generated from a '.pth' file" IS_PACKAGE = True def __init__(self, name, package, search_path): Module.__init__(self, name, package) self.search_path = search_path def get_sub_module(self, name): mname = self.name + "." + name for path in self.search_path: mod = _from_base(mname, os.path.join(path, name), self) if mod is not None: return mod return None def all_sub_modules(self): for path in self.search_path: for mod in _from_folder(self.name, path, self): yield mod def load(self): return None def __str__(self): return "Builtin package (.pth) %s %s" % (self.name, self.search_path) def get_extractable(self): return None #Helper functions def _from_base(name, basepath, pkg, respect_init=True): if os.path.isdir(basepath): if os.path.exists(os.path.join(basepath, "__init__.py")) or not respect_init: return FilePackage(name, pkg, basepath, respect_init) else: return None for ext in PY_EXTENSIONS: filepath = basepath + ext if os.path.isfile(filepath): return PyModule(name, pkg, filepath) return None def _from_folder(name, path, pkg, respect_init=True): for file in os.listdir(path): fullpath = os.path.join(path, file) if os.path.isdir(fullpath): if os.path.exists(os.path.join(fullpath, "__init__.py")) or not respect_init: yield FilePackage(name + "." + file if name else None, pkg, fullpath, respect_init) base, ext = os.path.splitext(file) if ext not in PY_EXTENSIONS: continue if os.path.isfile(fullpath): yield PyModule(name + "." + base if name else None, pkg, fullpath) class AbstractFinder(object): def find(self, mod_name): '''Find an extractable object given a module name''' if '.' in mod_name: top, rest = mod_name.split(".", 1) pkg = self.find_top(top) return pkg.find(rest) if pkg else None else: return self.find_top(mod_name) def find_top(self, name): '''Find module or package object given a simple (dot-less) name''' raise NotImplementedError() def name_from_path(self, path, extensions): '''Find module or package object given a path''' raise NotImplementedError() class PyFinder(AbstractFinder): __slots__ = [ 'path', 'respect_init', 'logger' ] def __init__(self, path, respect_init, logger): assert isinstance(path, str), path self.path = os.path.abspath(path) self.respect_init = respect_init self.logger = logger def find_top(self, mod_name): basepath = os.path.join(self.path, mod_name) return _from_base(mod_name, basepath, None, self.respect_init) def name_from_path(self, path, extensions): rel_path = _relative_subpath(path, self.path) if rel_path is None: return None base, ext = os.path.splitext(rel_path) if ext and ext not in extensions: return None return ".".join(base.split(os.path.sep)) def _relative_subpath(subpath, root): 'Returns the relative path if `subpath` is within `root` or `None` otherwise' try: relpath = os.path.relpath(subpath, root) except ValueError: #No relative path possible return None if relpath.startswith(os.pardir): #Not in root: return None return relpath class BuiltinFinder(AbstractFinder): '''Finder for builtin modules that are already present in the VM or can be guaranteed to load successfully''' def __init__(self, logger): self.modules = {} for name, module in sys.modules.items(): self.modules[name] = module try: self.dynload_path = os.path.dirname(imp.find_module("_json")[1]) except Exception: if os.name != "nt": logger.warning("Failed to find dynload path") self.dynload_path = None def builtin_module(self, name): if "." in name: pname, name = name.rsplit(".", 1) return BuiltinModule(name, self.builtin_module(pname)) return BuiltinModule(name, None) def find(self, mod_name): mod = super(BuiltinFinder, self).find(mod_name) if mod is not None: return mod #Use `imp` module to find module try: _, filepath, mod_t = imp.find_module(mod_name) except ImportError: return None #Accept builtin dynamically loaded modules like _ctypes or _json if filepath and os.path.dirname(filepath) == self.dynload_path: return BuiltinModule(mod_name, None) return None def find_top(self, mod_name): if mod_name in self.modules: mod = self.modules[mod_name] if hasattr(mod, "__file__"): return None if hasattr(mod, "__path__"): return PthPackage(mod_name, None, mod.__path__) return BuiltinModule(mod_name, None) if mod_name in sys.builtin_module_names: return BuiltinModule(mod_name, None) return None def name_from_path(self, path, extensions): return None #Stub file handling class StubFinder(PyFinder): def __init__(self, logger): try: tools = os.environ['ODASA_TOOLS'] except KeyError: tools = sys.path[1] logger.debug("StubFinder: can't find ODASA_TOOLS, using '%s' instead", tools) path = os.path.join(tools, "data", "python", "stubs") super(StubFinder, self).__init__(path, True, logger) def _finders_for_path(path, respect_init, logger): finders = [ StubFinder(logger) ] for p in path: if p: finders.append(PyFinder(p, respect_init, logger)) finders.append(BuiltinFinder(logger)) return finders def finders_from_options_and_env(options, logger): '''Return a list of finders from the given command line options''' if options.path: path = options.path + options.sys_path else: path = options.sys_path path = [os.path.abspath(p) for p in path] if options.exclude: exclude = set(options.exclude) trimmed_path = [] for p in path: for x in exclude: if p.startswith(x): break else: trimmed_path.append(p) path = trimmed_path logger.debug("Finder path: %s", path) logger.debug("sys path: %s", sys.path) return _finders_for_path(path, options.respect_init, logger) class Finder(object): def __init__(self, finders, options, logger): self.finders = finders self.path_map = {} self.logger = logger self.respect_init = options.respect_init def find(self, mod_name): for finder in self.finders: mod = finder.find(mod_name) if mod is not None: return mod self.logger.debug("Cannot find module '%s'", mod_name) return None @staticmethod def from_options_and_env(options, logger): return Finder(finders_from_options_and_env(options, logger), options, logger) def from_extractable(self, unit): if isinstance(unit, FolderExtractable) or isinstance(unit, FileExtractable): return self.from_path(unit.path) return None def from_path(self, path, extensions=PY_EXTENSIONS): if path in self.path_map: return self.path_map[path] if not path or path == "/": return None is_python_2 = (get_analysis_major_version() == 2) if os.path.isdir(path) and not os.path.exists(os.path.join(path, "__init__.py")) and (self.respect_init or not is_python_2): return None pkg = self.from_path(os.path.dirname(path)) mod = None if os.path.isdir(path): mod = FilePackage(None, pkg, path) if os.path.isfile(path): base, ext = os.path.splitext(path) if ext in extensions: mod = PyModule(None, pkg, path) if is_script(path): mod = PyModule(None, None, path) self.path_map[path] = mod return mod def name_from_path(self, path, extensions=PY_EXTENSIONS): for finder in self.finders: name = finder.name_from_path(path, extensions) if name is not None: return name return None