mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Merge pull request #20337 from d10c/d10c/python-overlay-compilation-plus-extractor
Python: enable overlay compilation + extractor overlay support
This commit is contained in:
@@ -40,6 +40,22 @@ externalData(
|
||||
string value : string ref
|
||||
);
|
||||
|
||||
/*- Overlay support -*/
|
||||
|
||||
/**
|
||||
* The CLI will automatically emit the tuple `databaseMetadata("isOverlay", "true")`,
|
||||
* along with an `overlayChangedFiles` tuple for each new/modified/deleted file,
|
||||
* when building an overlay database, and these can be used by the discard predicates.
|
||||
*/
|
||||
databaseMetadata(
|
||||
string metadataKey : string ref,
|
||||
string value : string ref
|
||||
);
|
||||
|
||||
overlayChangedFiles(
|
||||
string path : string ref
|
||||
);
|
||||
|
||||
/*- DEPRECATED: Snapshot date -*/
|
||||
|
||||
snapshotDate(unique date snapshotDate : date ref);
|
||||
@@ -420,3 +436,12 @@ py_decorated_object(int object : @py_object ref,
|
||||
@py_object = @py_cobject | @py_flow_node;
|
||||
|
||||
@py_source_element = @py_ast_node | @container;
|
||||
|
||||
/** The union of all Python database entities */
|
||||
@top =
|
||||
@py_source_element | @py_object | @py_base_var | @location | @py_line | @py_comment |
|
||||
@py_expr_parent | @py_expr_context |
|
||||
@py_operator | @py_boolop | @py_cmpop | @py_unaryop |
|
||||
@py_cmpop_list | @py_alias_list | @py_StringPart_list | @py_comprehension_list | @py_dict_item_list | @py_pattern_list | @py_stmt_list | @py_str_list | @py_type_parameter_list |
|
||||
@externalDefect | @externalMetric | @externalDataElement | @duplication_or_similarity | @svnentry |
|
||||
@xmllocatable | @yaml_locatable;
|
||||
|
||||
@@ -25,7 +25,7 @@ def renamer_from_options_and_env(options, logger):
|
||||
except (AttributeError, ImportError):
|
||||
raise SemmleError("Cannot get renamer from module " + options.renamer)
|
||||
else:
|
||||
path_transformer = os.environ.get("SEMMLE_PATH_TRANSFORMER", None)
|
||||
path_transformer = os.environ.get("CODEQL_PATH_TRANSFORMER", None) or os.environ.get("SEMMLE_PATH_TRANSFORMER", None)
|
||||
if path_transformer:
|
||||
logger.info("Using path transformer '%s'", path_transformer)
|
||||
rename = projectlayout.get_renamer(path_transformer)
|
||||
|
||||
@@ -12,12 +12,16 @@ import collections
|
||||
import re
|
||||
from functools import total_ordering
|
||||
import sys
|
||||
from pathlib import PureWindowsPath
|
||||
import os
|
||||
|
||||
def get_renamer(filename):
|
||||
layout = load(filename)
|
||||
def rename(path):
|
||||
renamed = layout.artificial_path(path)
|
||||
return path if renamed is None else renamed
|
||||
if os.name == "nt":
|
||||
return lambda path: rename(PureWindowsPath(path).as_posix())
|
||||
return rename
|
||||
|
||||
def load(filename):
|
||||
@@ -257,7 +261,7 @@ class _Rewrite(object):
|
||||
exclude = path
|
||||
self._line = line;
|
||||
self._original = u'-' + exclude;
|
||||
if not exclude.startswith(u"/"):
|
||||
if os.name != 'nt' and not exclude.startswith(u"/"):
|
||||
exclude = u'/' + exclude
|
||||
if exclude.find(u"//") != -1:
|
||||
raise _error(u"Illegal '//' in exclude path", line)
|
||||
@@ -274,14 +278,14 @@ class _Rewrite(object):
|
||||
include = path
|
||||
self._line = line;
|
||||
self._original = include;
|
||||
if not include.startswith(u"/"):
|
||||
if os.name != 'nt' and not include.startswith(u"/"):
|
||||
include = u'/' + include
|
||||
doubleslash = include.find(u"//")
|
||||
if doubleslash != include.find(u"//"):
|
||||
raise _error(u"More than one '//' in include path (project-layout)", line)
|
||||
if self._verify_stars.match(include):
|
||||
raise _error(u"Illegal use of '**' in include path (project-layout)", line)
|
||||
if not virtual.startswith(u"/"):
|
||||
if os.name != 'nt' and not virtual.startswith(u"/"):
|
||||
virtual = u"/" + virtual
|
||||
if virtual.endswith(u"/"):
|
||||
virtual = virtual[0 : -1]
|
||||
|
||||
@@ -11,6 +11,7 @@ from semmle.extractors import SuperExtractor, ModulePrinter, SkippedBuiltin
|
||||
from semmle.profiling import get_profiler
|
||||
from semmle.path_rename import renamer_from_options_and_env
|
||||
from semmle.logging import WARN, recursion_error_message, internal_error_message, Logger
|
||||
from semmle.util import FileExtractable, FolderExtractable
|
||||
|
||||
class ExtractorFailure(Exception):
|
||||
'Generic exception representing the failure of an extractor.'
|
||||
@@ -19,17 +20,32 @@ class ExtractorFailure(Exception):
|
||||
|
||||
class ModuleImportGraph(object):
|
||||
|
||||
def __init__(self, max_depth):
|
||||
def __init__(self, max_depth, logger: Logger):
|
||||
self.modules = {}
|
||||
self.succ = defaultdict(set)
|
||||
self.todo = set()
|
||||
self.done = set()
|
||||
self.max_depth = max_depth
|
||||
self.logger = logger
|
||||
|
||||
# During overlay extraction, only traverse the files that were changed.
|
||||
self.overlay_changes = None
|
||||
if 'CODEQL_EXTRACTOR_PYTHON_OVERLAY_CHANGES' in os.environ:
|
||||
overlay_changes_file = os.environ['CODEQL_EXTRACTOR_PYTHON_OVERLAY_CHANGES']
|
||||
logger.info("Overlay extraction mode: only extracting files changed according to '%s'", overlay_changes_file)
|
||||
try:
|
||||
with open(overlay_changes_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
changed_paths = data.get('changes', [])
|
||||
self.overlay_changes = { os.path.abspath(p) for p in changed_paths }
|
||||
except (IOError, ValueError) as e:
|
||||
logger.warn("Failed to read overlay changes from '%s' (falling back to full extraction): %s", overlay_changes_file, e)
|
||||
self.overlay_changes = None
|
||||
|
||||
def add_root(self, mod):
|
||||
self.modules[mod] = 0
|
||||
if mod not in self.done:
|
||||
self.todo.add(mod)
|
||||
self.add_todo(mod)
|
||||
|
||||
def add_import(self, mod, imported):
|
||||
assert mod in self.modules
|
||||
@@ -39,7 +55,7 @@ class ModuleImportGraph(object):
|
||||
self._reduce_depth(imported, self.modules[mod] + 1)
|
||||
else:
|
||||
if self.modules[mod] < self.max_depth and imported not in self.done:
|
||||
self.todo.add(imported)
|
||||
self.add_todo(imported)
|
||||
self.modules[imported] = self.modules[mod] + 1
|
||||
|
||||
def _reduce_depth(self, mod, depth):
|
||||
@@ -48,7 +64,7 @@ class ModuleImportGraph(object):
|
||||
if depth > self.max_depth:
|
||||
return
|
||||
if mod not in self.done:
|
||||
self.todo.add(mod)
|
||||
self.add_todo(mod)
|
||||
self.modules[mod] = depth
|
||||
for imp in self.succ[mod]:
|
||||
self._reduce_depth(imp, depth+1)
|
||||
@@ -61,11 +77,25 @@ class ModuleImportGraph(object):
|
||||
|
||||
def push_back(self, mod):
|
||||
self.done.remove(mod)
|
||||
self.todo.add(mod)
|
||||
self.add_todo(mod)
|
||||
|
||||
def empty(self):
|
||||
return not self.todo
|
||||
|
||||
def add_todo(self, mod):
|
||||
if not self._module_in_overlay_changes(mod):
|
||||
self.logger.debug("Skipping module '%s' as it was not changed in overlay extraction.", mod)
|
||||
return
|
||||
self.todo.add(mod)
|
||||
|
||||
def _module_in_overlay_changes(self, mod):
|
||||
if self.overlay_changes is not None:
|
||||
if isinstance(mod, FileExtractable):
|
||||
return mod.path in self.overlay_changes
|
||||
if isinstance(mod, FolderExtractable):
|
||||
return mod.path + '/__init__.py' in self.overlay_changes
|
||||
return True
|
||||
|
||||
class ExtractorPool(object):
|
||||
'''Pool of worker processes running extractors'''
|
||||
|
||||
@@ -90,7 +120,7 @@ class ExtractorPool(object):
|
||||
self.enqueued = set()
|
||||
self.done = set()
|
||||
self.requirements = {}
|
||||
self.import_graph = ModuleImportGraph(options.max_import_depth)
|
||||
self.import_graph = ModuleImportGraph(options.max_import_depth, logger)
|
||||
logger.debug("Source archive: %s", archive)
|
||||
self.logger = logger
|
||||
DiagnosticsWriter.create_output_dir()
|
||||
@@ -162,6 +192,10 @@ class ExtractorPool(object):
|
||||
self.module_queue.put(None)
|
||||
for p in self.procs:
|
||||
p.join()
|
||||
if 'CODEQL_EXTRACTOR_PYTHON_OVERLAY_BASE_METADATA_OUT' in os.environ:
|
||||
with open(os.environ['CODEQL_EXTRACTOR_PYTHON_OVERLAY_BASE_METADATA_OUT'], 'w', encoding='utf-8') as f:
|
||||
metadata = {}
|
||||
json.dump(metadata, f)
|
||||
self.logger.info("Processed %d modules in %0.2fs", len(self.import_graph.done), time.time() - self.start_time)
|
||||
|
||||
def stop(self, timeout=2.0):
|
||||
|
||||
@@ -19,7 +19,7 @@ class ProjectLayoutUseTest(ExtractorTest):
|
||||
|
||||
def test_invalid_layout(self):
|
||||
try:
|
||||
with environment("SEMMLE_PATH_TRANSFORMER", "nonsuch/project-layout"):
|
||||
with environment("CODEQL_PATH_TRANSFORMER", "nonsuch/project-layout"):
|
||||
self.run_extractor("-R", self.src_path)
|
||||
except subprocess.CalledProcessError as ex:
|
||||
self.assertEqual(ex.returncode, 2)
|
||||
|
||||
Reference in New Issue
Block a user