Files
codeql/python/extractor/semmle/populator.py

149 lines
6.2 KiB
Python

import sys
import os
import subprocess
from ast import literal_eval
from semmle import logging
from semmle import traverser
from semmle import cmdline
from semmle import worker
from semmle.util import VERSION, update_analysis_version, get_analysis_major_version
from buildtools.version import executable
'''The populator generates trap files from a Python project.
The populator consists of two parts: a traverser front end which traverses the file
system and multiple worker back ends which extract information from the modules.
'''
#NOTE: The front-end is simply an iterable of "extractables" and it should be easy to
#plug-in new front-ends if needed.
def cleanup_sys_path(path):
'''Clean up sys.path removing duplicates and
current working directory, making it safe for analysis.
'''
#Remove duplicates
path = [ p for i, p in enumerate(path) if i == 0 or p != path[i-1] ]
#Remove curent working directory
cwd = os.getcwd()
if cwd in path:
path.remove(cwd)
return path
def get_py2_sys_path(logger, py3_sys_path):
'''Get the sys.path for Python 2, if it is available. If no Python 2 is available,
simply return the Python 3 sys.path. Returns a tuple of the sys.path and a boolean indicating
whether Python 2 is available.'''
try:
command = " ".join(executable(2) + ['-c "import sys; print(sys.path)"'])
# We need `shell=True` here in order for the test framework to function correctly. For
# whatever reason, the `PATH` variable is ignored if `shell=False`.
# Also, this in turn forces us to give the whole command as a string, rather than a list.
# Otherwise, the effect is that the Python interpreter is invoked _as a REPL_, rather than
# with the given piece of code.
output = subprocess.check_output(command, shell=True).decode(sys.getfilesystemencoding())
py2_sys_path = literal_eval(output)
# Ensure that the first element of the sys.path is the same as the Python 3 sys.path --
# specifically a reference to our local `tools` directory. This ensures that the `six` stubs
# are picked up from there. The item we're overwriting here is '', which would be cleaned up
# later anyway.
py2_sys_path[0] = py3_sys_path[0]
return py2_sys_path, True
except (subprocess.CalledProcessError, ValueError, SyntaxError) as e:
logger.error("Error while getting Python 2 sys.path:")
logger.error(e)
logger.info("No Python 2 found. Using Python 3 sys.path.")
return py3_sys_path, False
def main(sys_path = sys.path[:]):
options, args = cmdline.parse(sys.argv[1:])
logger = logging.Logger(options.verbosity, options.colorize)
# This is not the prettiest way to do it, but when running tests we want to ensure that the
# `--lang` flag influences the analysis version (e.g. so that we include the correct stdlib TRAP
# file). So, we change the values of the appropriate variables (which would otherwise be based
# on `CODEQL_EXTRACTOR_PYTHON_ANALYSIS_VERSION`), overwriting the previous values.
if options.language_version:
last_version = options.language_version[-1]
update_analysis_version(last_version)
found_py2 = False
if get_analysis_major_version() == 2:
# Setup `sys_path` to use the Python 2 standard library
sys_path, found_py2 = get_py2_sys_path(logger, sys_path)
# use utf-8 as the character encoding for stdout/stderr to be able to properly
# log/print things on systems that use bad default encodings (windows).
sys.stdout.reconfigure(encoding='utf-8')
sys.stderr.reconfigure(encoding='utf-8')
sys.setrecursionlimit(2000)
sys_path = cleanup_sys_path(sys_path)
options.sys_path = sys_path[1:]
if sys.version_info.major == 2:
logger.error("Extraction using Python 2 is not supported.")
logger.warning("To use the Python extractor, please ensure that Python 3 is available on your system.")
logger.warning("For more information, see https://codeql.github.com/docs/codeql-overview/system-requirements/#additional-software-requirements")
logger.warning("and https://codeql.github.com/docs/codeql-overview/supported-languages-and-frameworks/#languages-and-compilers")
logger.close()
logging.stop()
sys.exit(1)
elif found_py2:
logger.info("Extraction will use the Python 2 standard library.")
else:
logger.info("Extraction will use the Python 3 standard library.")
logger.info("sys_path is: %s", sys_path)
try:
the_traverser = traverser.Traverser(options, args, logger)
except Exception as ex:
logger.error("%s", ex)
logger.close()
logging.stop()
sys.exit(1)
run(options, args, the_traverser, logger)
def run(options, args, the_traverser, logger: logging.Logger):
logger.info("Python version %s", sys.version.split()[0])
logger.info("Python extractor version %s", VERSION)
if 'CODEQL_EXTRACTOR_PYTHON_SOURCE_ARCHIVE_DIR' in os.environ:
archive = os.environ['CODEQL_EXTRACTOR_PYTHON_SOURCE_ARCHIVE_DIR']
elif 'SOURCE_ARCHIVE' in os.environ:
archive = os.environ['SOURCE_ARCHIVE']
else:
archive = None
trap_dir = cmdline.output_dir_from_options_and_env(options)
try:
pool = worker.ExtractorPool.from_options(options, trap_dir, archive, logger)
except ValueError as ve:
logger.error("%s", ve)
logger.close()
sys.exit(1)
try:
exitcode = 0
pool.extract(the_traverser)
except worker.ExtractorFailure:
exitcode = 1
except KeyboardInterrupt:
exitcode = 2
logger.info("Keyboard interrupt")
except BaseException as ex:
exitcode = 3
logger.error("Unexpected exception: %s ", ex)
logger.traceback(logging.WARN)
finally:
if exitcode:
logger.debug("Stopping...")
pool.stop()
else:
logger.debug("Writing interpreter trap")
pool.close()
logger.close()
logging.stop()
logger.write_message(logging.DEBUG, "Stopped." if exitcode else "Done.")
if exitcode:
sys.exit(exitcode)
if __name__ == "__main__":
main()