mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Python: Improve parser logging/timing/customisability
Does a bunch of things, unfortunately all in the same place, so my apologies in advance for a slightly complicated commit. As for the changes themselves, this commit - Adds timers for the old and new parsers. This means we get the overall time spent on these parts of the extractor if the extractor is run with `DEBUG` output shown. - Adds logging information (at the `DEBUG` level) to show which invocations of the parsers happen when, and whether they succeed or not. - Adds support for using an environment variable named `CODEQL_PYTHON_DISABLE_OLD_PARSER` to disable using the old parser entirely. This makes it easier to test the new parser in isolation. - Fixes a bug where we did not check whether a parse with the new parser had already succeeded, and so would do a superfluous second parse.
This commit is contained in:
@@ -98,6 +98,7 @@ class ModuleImporter(object):
|
||||
def get_import_nodes(self, loaded_module):
|
||||
'Return list of AST nodes representing imports'
|
||||
try:
|
||||
self.logger.debug("Looking for imports in %s", loaded_module.path)
|
||||
return imports_from_ast(loaded_module.py_ast)
|
||||
except Exception as ex:
|
||||
if isinstance(ex, SyntaxError):
|
||||
|
||||
@@ -109,14 +109,25 @@ class PythonSourceModule(object):
|
||||
def old_py_ast(self):
|
||||
# The py_ast is the raw ast from the Python parser.
|
||||
if self._py_ast is None:
|
||||
self._py_ast = semmle.python.parser.parse(self.tokens, self.logger)
|
||||
with timers["old_py_ast"]:
|
||||
self.logger.debug("Trying old parser on %s", self.path)
|
||||
self._py_ast = semmle.python.parser.parse(self.tokens, self.logger)
|
||||
self.logger.debug("Old parser successful on %s", self.path)
|
||||
else:
|
||||
self.logger.debug("Found (during old_py_ast) parse tree for %s in cache", self.path)
|
||||
return self._py_ast
|
||||
|
||||
@property
|
||||
def py_ast(self):
|
||||
try:
|
||||
# First, try to parse the source with the old Python parser.
|
||||
return self.old_py_ast
|
||||
# If the `CODEQL_PYTHON_DISABLE_OLD_PARSER` flag is present, we do not try to use the
|
||||
# old parser, and instead jump straight to the exception handler.
|
||||
if os.environ.get("CODEQL_PYTHON_DISABLE_OLD_PARSER"):
|
||||
self.logger.debug("Old parser disabled, skipping old parse attempt for %s", self.path)
|
||||
raise Exception("Skipping old parser")
|
||||
# Otherwise, we first try to parse the source with the old Python parser.
|
||||
self._py_ast = self.old_py_ast
|
||||
return self._py_ast
|
||||
except Exception as ex:
|
||||
# If that fails, try to parse the source with the new Python parser (unless it has been
|
||||
# explicitly disabled).
|
||||
@@ -131,7 +142,13 @@ class PythonSourceModule(object):
|
||||
raise SyntaxError("Exception %s while parsing %s" % (ex, self.path))
|
||||
else:
|
||||
try:
|
||||
self._py_ast = semmle.python.parser.tsg_parser.parse(self.path, self.logger)
|
||||
with timers["tsg_py_ast"]:
|
||||
if self._py_ast is None:
|
||||
self.logger.debug("Trying tsg-python on %s", self.path)
|
||||
self._py_ast = semmle.python.parser.tsg_parser.parse(self.path, self.logger)
|
||||
self.logger.debug("tsg-python successful on %s", self.path)
|
||||
else:
|
||||
self.logger.debug("Found (during py_ast) parse tree for %s in cache", self.path)
|
||||
return self._py_ast
|
||||
except SyntaxError as ex:
|
||||
raise ex
|
||||
|
||||
@@ -168,7 +168,7 @@ def read_tsg_python_output(path, logger):
|
||||
p.stdout.close()
|
||||
p.terminate()
|
||||
p.wait()
|
||||
logger.info("Read {} nodes and {} edges from TSG output".format(len(node_attr), len(edge_attr)))
|
||||
logger.debug("Read {} nodes and {} edges from TSG output".format(len(node_attr), len(edge_attr)))
|
||||
return node_attr, edge_attr
|
||||
|
||||
def evaluate_string(s):
|
||||
|
||||
@@ -10,7 +10,7 @@ from io import BytesIO
|
||||
|
||||
#Semantic version of extractor.
|
||||
#Update this if any changes are made
|
||||
VERSION = "7.1.0"
|
||||
VERSION = "7.1.1"
|
||||
|
||||
PY_EXTENSIONS = ".py", ".pyw"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user