Merge pull request #20337 from d10c/d10c/python-overlay-compilation-plus-extractor

Python: enable overlay compilation + extractor overlay support
This commit is contained in:
Nora Dimitrijević
2025-10-16 14:49:01 +02:00
committed by GitHub
41 changed files with 10814 additions and 11 deletions

View File

@@ -9,6 +9,7 @@
"fragments": [ "fragments": [
"/*- Compilations -*/", "/*- Compilations -*/",
"/*- External data -*/", "/*- External data -*/",
"/*- Overlay support -*/",
"/*- Files and folders -*/", "/*- Files and folders -*/",
"/*- Diagnostic messages -*/", "/*- Diagnostic messages -*/",
"/*- Diagnostic messages: severity -*/", "/*- Diagnostic messages: severity -*/",

View File

@@ -2,6 +2,7 @@ name: "python"
display_name: "Python" display_name: "Python"
version: 1.22.1 version: 1.22.1
column_kind: utf32 column_kind: utf32
overlay_support_version: 20250626
build_modes: build_modes:
- none - none
default_queries: default_queries:

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,4 @@
description: Add databaseMetadata and overlayChangedFiles relations
compatibility: full
databaseMetadata.rel: delete
overlayChangedFiles.rel: delete

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,2 @@
description: Add @top type
compatibility: full

View File

@@ -40,6 +40,22 @@ externalData(
string value : string ref string value : string ref
); );
/*- Overlay support -*/
/**
* The CLI will automatically emit the tuple `databaseMetadata("isOverlay", "true")`,
* along with an `overlayChangedFiles` tuple for each new/modified/deleted file,
* when building an overlay database, and these can be used by the discard predicates.
*/
databaseMetadata(
string metadataKey : string ref,
string value : string ref
);
overlayChangedFiles(
string path : string ref
);
/*- DEPRECATED: Snapshot date -*/ /*- DEPRECATED: Snapshot date -*/
snapshotDate(unique date snapshotDate : date ref); snapshotDate(unique date snapshotDate : date ref);
@@ -420,3 +436,12 @@ py_decorated_object(int object : @py_object ref,
@py_object = @py_cobject | @py_flow_node; @py_object = @py_cobject | @py_flow_node;
@py_source_element = @py_ast_node | @container; @py_source_element = @py_ast_node | @container;
/** The union of all Python database entities */
@top =
@py_source_element | @py_object | @py_base_var | @location | @py_line | @py_comment |
@py_expr_parent | @py_expr_context |
@py_operator | @py_boolop | @py_cmpop | @py_unaryop |
@py_cmpop_list | @py_alias_list | @py_StringPart_list | @py_comprehension_list | @py_dict_item_list | @py_pattern_list | @py_stmt_list | @py_str_list | @py_type_parameter_list |
@externalDefect | @externalMetric | @externalDataElement | @duplication_or_similarity | @svnentry |
@xmllocatable | @yaml_locatable;

View File

@@ -25,7 +25,7 @@ def renamer_from_options_and_env(options, logger):
except (AttributeError, ImportError): except (AttributeError, ImportError):
raise SemmleError("Cannot get renamer from module " + options.renamer) raise SemmleError("Cannot get renamer from module " + options.renamer)
else: else:
path_transformer = os.environ.get("SEMMLE_PATH_TRANSFORMER", None) path_transformer = os.environ.get("CODEQL_PATH_TRANSFORMER", None) or os.environ.get("SEMMLE_PATH_TRANSFORMER", None)
if path_transformer: if path_transformer:
logger.info("Using path transformer '%s'", path_transformer) logger.info("Using path transformer '%s'", path_transformer)
rename = projectlayout.get_renamer(path_transformer) rename = projectlayout.get_renamer(path_transformer)

View File

@@ -12,12 +12,16 @@ import collections
import re import re
from functools import total_ordering from functools import total_ordering
import sys import sys
from pathlib import PureWindowsPath
import os
def get_renamer(filename): def get_renamer(filename):
layout = load(filename) layout = load(filename)
def rename(path): def rename(path):
renamed = layout.artificial_path(path) renamed = layout.artificial_path(path)
return path if renamed is None else renamed return path if renamed is None else renamed
if os.name == "nt":
return lambda path: rename(PureWindowsPath(path).as_posix())
return rename return rename
def load(filename): def load(filename):
@@ -257,7 +261,7 @@ class _Rewrite(object):
exclude = path exclude = path
self._line = line; self._line = line;
self._original = u'-' + exclude; self._original = u'-' + exclude;
if not exclude.startswith(u"/"): if os.name != 'nt' and not exclude.startswith(u"/"):
exclude = u'/' + exclude exclude = u'/' + exclude
if exclude.find(u"//") != -1: if exclude.find(u"//") != -1:
raise _error(u"Illegal '//' in exclude path", line) raise _error(u"Illegal '//' in exclude path", line)
@@ -274,14 +278,14 @@ class _Rewrite(object):
include = path include = path
self._line = line; self._line = line;
self._original = include; self._original = include;
if not include.startswith(u"/"): if os.name != 'nt' and not include.startswith(u"/"):
include = u'/' + include include = u'/' + include
doubleslash = include.find(u"//") doubleslash = include.find(u"//")
if doubleslash != include.find(u"//"): if doubleslash != include.find(u"//"):
raise _error(u"More than one '//' in include path (project-layout)", line) raise _error(u"More than one '//' in include path (project-layout)", line)
if self._verify_stars.match(include): if self._verify_stars.match(include):
raise _error(u"Illegal use of '**' in include path (project-layout)", line) raise _error(u"Illegal use of '**' in include path (project-layout)", line)
if not virtual.startswith(u"/"): if os.name != 'nt' and not virtual.startswith(u"/"):
virtual = u"/" + virtual virtual = u"/" + virtual
if virtual.endswith(u"/"): if virtual.endswith(u"/"):
virtual = virtual[0 : -1] virtual = virtual[0 : -1]

View File

@@ -11,6 +11,7 @@ from semmle.extractors import SuperExtractor, ModulePrinter, SkippedBuiltin
from semmle.profiling import get_profiler from semmle.profiling import get_profiler
from semmle.path_rename import renamer_from_options_and_env from semmle.path_rename import renamer_from_options_and_env
from semmle.logging import WARN, recursion_error_message, internal_error_message, Logger from semmle.logging import WARN, recursion_error_message, internal_error_message, Logger
from semmle.util import FileExtractable, FolderExtractable
class ExtractorFailure(Exception): class ExtractorFailure(Exception):
'Generic exception representing the failure of an extractor.' 'Generic exception representing the failure of an extractor.'
@@ -19,17 +20,32 @@ class ExtractorFailure(Exception):
class ModuleImportGraph(object): class ModuleImportGraph(object):
def __init__(self, max_depth): def __init__(self, max_depth, logger: Logger):
self.modules = {} self.modules = {}
self.succ = defaultdict(set) self.succ = defaultdict(set)
self.todo = set() self.todo = set()
self.done = set() self.done = set()
self.max_depth = max_depth self.max_depth = max_depth
self.logger = logger
# During overlay extraction, only traverse the files that were changed.
self.overlay_changes = None
if 'CODEQL_EXTRACTOR_PYTHON_OVERLAY_CHANGES' in os.environ:
overlay_changes_file = os.environ['CODEQL_EXTRACTOR_PYTHON_OVERLAY_CHANGES']
logger.info("Overlay extraction mode: only extracting files changed according to '%s'", overlay_changes_file)
try:
with open(overlay_changes_file, 'r', encoding='utf-8') as f:
data = json.load(f)
changed_paths = data.get('changes', [])
self.overlay_changes = { os.path.abspath(p) for p in changed_paths }
except (IOError, ValueError) as e:
logger.warn("Failed to read overlay changes from '%s' (falling back to full extraction): %s", overlay_changes_file, e)
self.overlay_changes = None
def add_root(self, mod): def add_root(self, mod):
self.modules[mod] = 0 self.modules[mod] = 0
if mod not in self.done: if mod not in self.done:
self.todo.add(mod) self.add_todo(mod)
def add_import(self, mod, imported): def add_import(self, mod, imported):
assert mod in self.modules assert mod in self.modules
@@ -39,7 +55,7 @@ class ModuleImportGraph(object):
self._reduce_depth(imported, self.modules[mod] + 1) self._reduce_depth(imported, self.modules[mod] + 1)
else: else:
if self.modules[mod] < self.max_depth and imported not in self.done: if self.modules[mod] < self.max_depth and imported not in self.done:
self.todo.add(imported) self.add_todo(imported)
self.modules[imported] = self.modules[mod] + 1 self.modules[imported] = self.modules[mod] + 1
def _reduce_depth(self, mod, depth): def _reduce_depth(self, mod, depth):
@@ -48,7 +64,7 @@ class ModuleImportGraph(object):
if depth > self.max_depth: if depth > self.max_depth:
return return
if mod not in self.done: if mod not in self.done:
self.todo.add(mod) self.add_todo(mod)
self.modules[mod] = depth self.modules[mod] = depth
for imp in self.succ[mod]: for imp in self.succ[mod]:
self._reduce_depth(imp, depth+1) self._reduce_depth(imp, depth+1)
@@ -61,11 +77,25 @@ class ModuleImportGraph(object):
def push_back(self, mod): def push_back(self, mod):
self.done.remove(mod) self.done.remove(mod)
self.todo.add(mod) self.add_todo(mod)
def empty(self): def empty(self):
return not self.todo return not self.todo
def add_todo(self, mod):
if not self._module_in_overlay_changes(mod):
self.logger.debug("Skipping module '%s' as it was not changed in overlay extraction.", mod)
return
self.todo.add(mod)
def _module_in_overlay_changes(self, mod):
if self.overlay_changes is not None:
if isinstance(mod, FileExtractable):
return mod.path in self.overlay_changes
if isinstance(mod, FolderExtractable):
return mod.path + '/__init__.py' in self.overlay_changes
return True
class ExtractorPool(object): class ExtractorPool(object):
'''Pool of worker processes running extractors''' '''Pool of worker processes running extractors'''
@@ -90,7 +120,7 @@ class ExtractorPool(object):
self.enqueued = set() self.enqueued = set()
self.done = set() self.done = set()
self.requirements = {} self.requirements = {}
self.import_graph = ModuleImportGraph(options.max_import_depth) self.import_graph = ModuleImportGraph(options.max_import_depth, logger)
logger.debug("Source archive: %s", archive) logger.debug("Source archive: %s", archive)
self.logger = logger self.logger = logger
DiagnosticsWriter.create_output_dir() DiagnosticsWriter.create_output_dir()
@@ -162,6 +192,10 @@ class ExtractorPool(object):
self.module_queue.put(None) self.module_queue.put(None)
for p in self.procs: for p in self.procs:
p.join() p.join()
if 'CODEQL_EXTRACTOR_PYTHON_OVERLAY_BASE_METADATA_OUT' in os.environ:
with open(os.environ['CODEQL_EXTRACTOR_PYTHON_OVERLAY_BASE_METADATA_OUT'], 'w', encoding='utf-8') as f:
metadata = {}
json.dump(metadata, f)
self.logger.info("Processed %d modules in %0.2fs", len(self.import_graph.done), time.time() - self.start_time) self.logger.info("Processed %d modules in %0.2fs", len(self.import_graph.done), time.time() - self.start_time)
def stop(self, timeout=2.0): def stop(self, timeout=2.0):

View File

@@ -19,7 +19,7 @@ class ProjectLayoutUseTest(ExtractorTest):
def test_invalid_layout(self): def test_invalid_layout(self):
try: try:
with environment("SEMMLE_PATH_TRANSFORMER", "nonsuch/project-layout"): with environment("CODEQL_PATH_TRANSFORMER", "nonsuch/project-layout"):
self.run_extractor("-R", self.src_path) self.run_extractor("-R", self.src_path)
except subprocess.CalledProcessError as ex: except subprocess.CalledProcessError as ex:
self.assertEqual(ex.returncode, 2) self.assertEqual(ex.returncode, 2)

View File

@@ -0,0 +1 @@
import semmle.python.internal.OverlayDiscardConsistencyQuery

View File

@@ -0,0 +1,5 @@
---
category: feature
---
* Initial support for incremental Python databases via `codeql database create --overlay-base`/`--overlay-changes`.

View File

@@ -37,6 +37,7 @@ import semmle.python.pointsto.CallGraph
import semmle.python.objects.ObjectAPI import semmle.python.objects.ObjectAPI
import semmle.python.Unit import semmle.python.Unit
import site import site
private import semmle.python.Overlay
// Removing this import perturbs the compilation process enough that the points-to analysis gets // Removing this import perturbs the compilation process enough that the points-to analysis gets
// compiled -- and cached -- differently depending on whether the data flow library is imported. By // compiled -- and cached -- differently depending on whether the data flow library is imported. By
// importing it privately here, we ensure that the points-to analysis is compiled the same way. // importing it privately here, we ensure that the points-to analysis is compiled the same way.

View File

@@ -19,3 +19,4 @@ dataExtensions:
- semmle/python/frameworks/**/*.model.yml - semmle/python/frameworks/**/*.model.yml
- ext/*.model.yml - ext/*.model.yml
warnOnImplicitThis: true warnOnImplicitThis: true
compileForOverlayEval: true

View File

@@ -0,0 +1,355 @@
/**
* Defines entity discard predicates for Python overlay analysis.
*/
/*- Predicates -*/
/**
* Holds always for the overlay variant and never for the base variant.
* This local predicate is used to define local predicates that behave
* differently for the base and overlay variant.
*/
overlay[local]
predicate isOverlay() { databaseMetadata("isOverlay", "true") }
overlay[local]
private string getPathForLocation(@location loc) {
exists(@file file | locations_default(loc, file, _, _, _, _) | files(file, result))
or
exists(@py_Module mod | locations_ast(loc, mod, _, _, _, _) | result = getPathForModule(mod))
}
overlay[local]
private string getPathForModule(@py_Module mod) {
exists(@container fileOrFolder | py_module_path(mod, fileOrFolder) |
result = getPathForContainer(fileOrFolder)
)
}
overlay[local]
private string getPathForContainer(@container fileOrFolder) {
files(fileOrFolder, result) or folders(fileOrFolder, result)
}
/*- Discardable entities and their discard predicates -*/
/** Python database entities that use named TRAP IDs; the rest use *-ids. */
overlay[local]
private class NamedEntity = @py_Module or @container or @py_cobject;
overlay[discard_entity]
private predicate discardNamedEntity(@top el) {
el instanceof NamedEntity and
// Entities with named IDs can exist both in base, overlay, or both.
exists(Discardable d | d = el |
overlayChangedFiles(d.getPath()) and
not d.existsInOverlay()
)
}
overlay[discard_entity]
private predicate discardStarEntity(@top el) {
not el instanceof NamedEntity and
// Entities with *-ids can exist either in base or overlay, but not both.
exists(Discardable d | d = el |
overlayChangedFiles(d.getPath()) and
d.existsInBase()
)
}
/**
* An abstract base class for all elements that can be discarded from the base.
*/
overlay[local]
abstract class Discardable extends @top {
/** Gets the path to the file in which this element occurs. */
abstract string getPath();
/** Holds if this element exists in the base variant. */
predicate existsInBase() { not isOverlay() and exists(this) }
/** Holds if this element exists in the overlay variant. */
predicate existsInOverlay() { isOverlay() and exists(this) }
/** Gets a textual representation of this discardable element. */
string toString() { none() }
}
/**
* Discardable locatable AST nodes (`@py_location_parent`).
*/
overlay[local]
final private class DiscardableLocatable extends Discardable instanceof @py_location_parent {
override string getPath() {
exists(@location loc | py_locations(loc, this) | result = getPathForLocation(loc))
}
}
/**
* Discardable scopes (classes, functions, modules).
*/
overlay[local]
final private class DiscardableScope extends Discardable instanceof @py_scope {
override string getPath() {
exists(@location loc | py_scope_location(loc, this) | result = getPathForLocation(loc))
or
result = getPathForModule(this)
}
}
/**
* Discardable files and folders.
*/
overlay[local]
final private class DiscardableContainer extends Discardable instanceof @container {
override string getPath() { result = getPathForContainer(this) }
}
/** Discardable control flow nodes */
overlay[local]
final private class DiscardableCfgNode extends Discardable instanceof @py_flow_node {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_flow_bb_node(this, d.(@py_ast_node), _, _))
}
}
/** Discardable Python variables. */
overlay[local]
final private class DiscardableVar extends Discardable instanceof @py_variable {
override string getPath() {
exists(Discardable parent | result = parent.getPath() | variable(this, parent.(@py_scope), _))
}
}
/** Discardable SSA variables. */
overlay[local]
final private class DiscardableSsaVar extends Discardable instanceof @py_ssa_var {
override string getPath() {
exists(DiscardableVar other | result = other.getPath() | py_ssa_var(this, other))
}
}
/** Discardable locations. */
overlay[local]
final private class DiscardableLocation extends Discardable instanceof @location {
override string getPath() { result = getPathForLocation(this) }
}
/** Discardable lines. */
overlay[local]
final private class DiscardableLine extends Discardable instanceof @py_line {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_line_lengths(this, d.(@py_Module), _, _))
}
}
/** Discardable string part lists. */
overlay[local]
final private class DiscardableStringPartList extends Discardable instanceof @py_StringPart_list {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_StringPart_lists(this, d.(@py_Bytes_or_Str)))
}
}
/** Discardable alias */
overlay[local]
final private class DiscardableAlias extends Discardable instanceof @py_alias {
override string getPath() {
exists(DiscardableAliasList d | result = d.getPath() | py_aliases(this, d, _))
}
}
/** Discardable alias list */
overlay[local]
final private class DiscardableAliasList extends Discardable instanceof @py_alias_list {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_alias_lists(this, d.(@py_Import)))
}
}
/** Discardable arguments */
overlay[local]
final private class DiscardableArguments extends Discardable instanceof @py_arguments {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_arguments(this, d.(@py_arguments_parent)))
}
}
/** Discardable boolop */
overlay[local]
final private class DiscardableBoolOp extends Discardable instanceof @py_boolop {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_boolops(this, _, d.(@py_BoolExpr)))
}
}
/** Discardable cmpop */
overlay[local]
final private class DiscardableCmpOp extends Discardable instanceof @py_cmpop {
override string getPath() {
exists(DiscardableCmpOpList d | result = d.getPath() | py_cmpops(this, _, d, _))
}
}
/** Discardable cmpop list */
overlay[local]
final private class DiscardableCmpOpList extends Discardable instanceof @py_cmpop_list {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_cmpop_lists(this, d.(@py_Compare)))
}
}
/** Discardable comprehension list */
overlay[local]
final private class DiscardableComprehensionList extends Discardable instanceof @py_comprehension_list
{
override string getPath() {
exists(Discardable d | result = d.getPath() | py_comprehension_lists(this, d.(@py_ListComp)))
}
}
/** Discardable dict item list */
overlay[local]
final private class DiscardableDictItemList extends Discardable instanceof @py_dict_item_list {
override string getPath() {
exists(Discardable d | result = d.getPath() |
py_dict_item_lists(this, d.(@py_dict_item_list_parent))
)
}
}
/** Discardable expr context */
overlay[local]
final private class DiscardableExprContext extends Discardable instanceof @py_expr_context {
override string getPath() {
exists(Discardable d | result = d.getPath() |
py_expr_contexts(this, _, d.(@py_expr_context_parent))
)
}
}
/** Discardable expr list */
overlay[local]
final private class DiscardableExprList extends Discardable instanceof @py_expr_list {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_expr_lists(this, d.(@py_expr_list_parent), _))
}
}
/** Discardable operator */
overlay[local]
final private class DiscardableOperator extends Discardable instanceof @py_operator {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_operators(this, _, d.(@py_BinaryExpr)))
}
}
/** Discardable parameter list */
overlay[local]
final private class DiscardableParameterList extends Discardable instanceof @py_parameter_list {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_parameter_lists(this, d.(@py_Function)))
}
}
/** Discardable pattern list */
overlay[local]
final private class DiscardablePatternList extends Discardable instanceof @py_pattern_list {
override string getPath() {
exists(Discardable d | result = d.getPath() |
py_pattern_lists(this, d.(@py_pattern_list_parent), _)
)
}
}
/** Discardable stmt list */
overlay[local]
final private class DiscardableStmtList extends Discardable instanceof @py_stmt_list {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_stmt_lists(this, d.(@py_stmt_list_parent), _))
}
}
/** Discardable str list */
overlay[local]
final private class DiscardableStrList extends Discardable instanceof @py_str_list {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_str_lists(this, d.(@py_str_list_parent)))
}
}
/** Discardable type parameter list */
overlay[local]
final private class DiscardableTypeParameterList extends Discardable instanceof @py_type_parameter_list
{
override string getPath() {
exists(Discardable d | result = d.getPath() |
py_type_parameter_lists(this, d.(@py_type_parameter_list_parent))
)
}
}
/** Discardable unaryop */
overlay[local]
final private class DiscardableUnaryOp extends Discardable instanceof @py_unaryop {
override string getPath() {
exists(Discardable d | result = d.getPath() | py_unaryops(this, _, d.(@py_UnaryExpr)))
}
}
/** Discardable comment */
overlay[local]
final private class DiscardableComment extends Discardable instanceof @py_comment {
override string getPath() {
exists(DiscardableLocation d | result = d.getPath() | py_comments(this, _, d))
}
}
/*- XML -*/
overlay[local]
final private class DiscardableXmlLocatable extends Discardable instanceof @xmllocatable {
override string getPath() {
exists(@location loc | xmllocations(this, loc) | result = getPathForLocation(loc))
}
}
overlay[local]
private predicate overlayXmlExtracted(string path) {
exists(DiscardableXmlLocatable d | not files(d, _) and not xmlNs(d, _, _, _) |
d.existsInOverlay() and
path = d.getPath()
)
}
overlay[discard_entity]
private predicate discardXmlLocatable(@xmllocatable el) {
exists(DiscardableXmlLocatable d | d = el |
// The XML extractor is currently not incremental and may extract more
// XML files than those included in `overlayChangedFiles`, so this discard predicate
// handles those files alongside the normal `discardStarEntity` logic.
overlayXmlExtracted(d.getPath()) and
d.existsInBase()
)
}
/*- YAML -*/
overlay[local]
final private class DiscardableYamlLocatable extends Discardable instanceof @yaml_locatable {
override string getPath() {
exists(@location loc | yaml_locations(this, loc) | result = getPathForLocation(loc))
}
}
overlay[local]
private predicate overlayYamlExtracted(string path) {
exists(DiscardableYamlLocatable l | l.existsInOverlay() | path = l.getPath())
}
overlay[discard_entity]
private predicate discardBaseYamlLocatable(@yaml_locatable el) {
exists(DiscardableYamlLocatable d | d = el |
// The Yaml extractor is currently not incremental and may extract more
// Yaml files than those included in `overlayChangedFiles`, so this discard predicate
// handles those files alongside the normal `discardStarEntity` logic.
overlayYamlExtracted(d.getPath()) and
d.existsInBase()
)
}

View File

@@ -0,0 +1,90 @@
/**
* Provides consistency queries for checking that every database entity
* that can be discarded (i.e. everything but `@py_cobject`) in an overlay
* database is indeed discarded, by proxy of having exactly one `Discardable.getPath()`.
*/
import python
import semmle.python.Overlay
class TopWithToString instanceof @top {
string getDbType() {
this instanceof @py_source_element and result = "@source_element"
or
this instanceof @py_object and result = "@py_object"
or
this instanceof @py_base_var and result = "@py_base_var"
or
this instanceof @location and result = "@location"
or
this instanceof @py_line and result = "@py_line"
or
this instanceof @py_comment and result = "@py_comment"
or
this instanceof @py_expr_parent and result = "@py_expr_parent"
or
this instanceof @py_expr_context and result = "@py_expr_context"
or
this instanceof @py_operator and result = "@py_operator"
or
this instanceof @py_boolop and result = "@py_boolop"
or
this instanceof @py_cmpop and result = "@py_cmpop"
or
this instanceof @py_unaryop and result = "@py_unaryop"
or
this instanceof @py_cmpop_list and result = "@py_cmpop_list"
or
this instanceof @py_alias_list and result = "@py_alias_list"
or
this instanceof @py_StringPart_list and result = "@py_StringPart_list"
or
this instanceof @py_comprehension_list and result = "@py_comprehension_list"
or
this instanceof @py_dict_item_list and result = "@py_dict_item_list"
or
this instanceof @py_pattern_list and result = "@py_pattern_list"
or
this instanceof @py_stmt_list and result = "@py_stmt_list"
or
this instanceof @py_str_list and result = "@py_str_list"
or
this instanceof @py_type_parameter_list and result = "@py_type_parameter_list"
or
this instanceof @externalDefect and result = "@externalDefect"
or
this instanceof @externalMetric and result = "@externalMetric"
or
this instanceof @externalDataElement and result = "@externalDataElement"
or
this instanceof @duplication_or_similarity and result = "@duplication_or_similarity"
or
this instanceof @svnentry and result = "@svnentry"
or
this instanceof @xmllocatable and result = "@xmllocatable"
or
this instanceof @yaml_locatable and result = "@yaml_locatable"
}
string toString() {
result = this.getDbType()
or
not exists(this.getDbType()) and
result = "Unknown type"
}
}
query predicate consistencyTest(TopWithToString el, string message) {
not el instanceof Discardable and
not el instanceof @py_cobject and // cannot be linked to a path
not el instanceof @externalDataElement and // cannot be linked to a path
message = "Not Discardable"
or
exists(Discardable d, int numPaths | d = el and numPaths = count(d.getPath()) |
numPaths = 0 and
message = "Discardable but no path found"
or
numPaths > 1 and
message = "Discardable but multiple paths found (" + concat(d.getPath(), ", ") + ")"
)
}

View File

@@ -47,6 +47,22 @@ externalData(
string value : string ref string value : string ref
); );
/*- Overlay support -*/
/**
* The CLI will automatically emit the tuple `databaseMetadata("isOverlay", "true")`,
* along with an `overlayChangedFiles` tuple for each new/modified/deleted file,
* when building an overlay database, and these can be used by the discard predicates.
*/
databaseMetadata(
string metadataKey : string ref,
string value : string ref
);
overlayChangedFiles(
string path : string ref
);
/*- DEPRECATED: Snapshot date -*/ /*- DEPRECATED: Snapshot date -*/
snapshotDate(unique date snapshotDate : date ref); snapshotDate(unique date snapshotDate : date ref);
@@ -1234,3 +1250,12 @@ py_decorated_object(int object : @py_object ref,
@py_object = @py_cobject | @py_flow_node; @py_object = @py_cobject | @py_flow_node;
@py_source_element = @py_ast_node | @container; @py_source_element = @py_ast_node | @container;
/** The union of all Python database entities */
@top =
@py_source_element | @py_object | @py_base_var | @location | @py_line | @py_comment |
@py_expr_parent | @py_expr_context |
@py_operator | @py_boolop | @py_cmpop | @py_unaryop |
@py_cmpop_list | @py_alias_list | @py_StringPart_list | @py_comprehension_list | @py_dict_item_list | @py_pattern_list | @py_stmt_list | @py_str_list | @py_type_parameter_list |
@externalDefect | @externalMetric | @externalDataElement | @duplication_or_similarity | @svnentry |
@xmllocatable | @yaml_locatable;

View File

@@ -1269,6 +1269,53 @@
</dependencies> </dependencies>
</relation> </relation>
<relation> <relation>
<name>databaseMetadata</name>
<cardinality>1</cardinality>
<columnsizes>
<e>
<k>metadataKey</k>
<v>1</v>
</e>
<e>
<k>value</k>
<v>1</v>
</e>
</columnsizes>
<dependencies>
<dep>
<src>metadataKey</src>
<trg>value</trg>
<val>
<hist>
<budget>12</budget>
<bs/>
</hist>
</val>
</dep>
<dep>
<src>value</src>
<trg>metadataKey</trg>
<val>
<hist>
<budget>12</budget>
<bs/>
</hist>
</val>
</dep>
</dependencies>
</relation>
<relation>
<name>overlayChangedFiles</name>
<cardinality>50</cardinality>
<columnsizes>
<e>
<k>path</k>
<v>50</v>
</e>
</columnsizes>
<dependencies/>
</relation>
<relation>
<name>snapshotDate</name> <name>snapshotDate</name>
<cardinality>2</cardinality> <cardinality>2</cardinality>
<columnsizes> <columnsizes>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,2 @@
description: Add databaseMetadata and overlayChangedFiles relations
compatibility: full

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,2 @@
description: Add @top type
compatibility: full

View File

@@ -0,0 +1,5 @@
from new import *
from lib import *
def get_greeting():
return combine("Goodbye", get_new_target())

View File

@@ -0,0 +1,4 @@
from greeting import *
if __name__ == "__main__":
print(get_greeting())

View File

@@ -0,0 +1 @@
def combine(a, b): return f"{a}, {b}!"

View File

@@ -0,0 +1,2 @@
def get_new_target():
return "世界"

View File

@@ -0,0 +1 @@
semmle-extractor-options: -R . -m hello.py --filter exclude:**/*.testproj/**

View File

@@ -0,0 +1,152 @@
testStringLiterals
| greeting.py:5:20:5:28 | StringLiteral | Goodbye |
| hello.py:3:16:3:25 | StringLiteral | __main__ |
| lib/__init__.py:1:27:1:29 | StringLiteral | |
| lib/__init__.py:1:31:1:34 | StringLiteral | , |
| lib/__init__.py:1:36:1:38 | StringLiteral | ! |
| new.py:2:12:2:15 | StringLiteral | \u4e16\u754c |
testModules
| greeting.py:0:0:0:0 | Module greeting |
| hello.py:0:0:0:0 | Module hello |
| lib/__init__.py:0:0:0:0 | Module lib.__init__ |
| lib:0:0:0:0 | Package lib |
| new.py:0:0:0:0 | Module new |
testFunctions
| greeting.py:4:1:4:19 | Function get_greeting |
| lib/__init__.py:1:1:1:18 | Function combine |
| new.py:1:1:1:21 | Function get_new_target |
testClasses
testLocations
| greeting.py:0:0:0:0 | greeting.py:0 |
| greeting.py:1:1:1:17 | greeting.py:1 |
| greeting.py:1:6:1:8 | greeting.py:1 |
| greeting.py:2:1:2:17 | greeting.py:2 |
| greeting.py:2:6:2:8 | greeting.py:2 |
| greeting.py:4:1:4:19 | greeting.py:4 |
| greeting.py:4:1:4:19 | greeting.py:4 |
| greeting.py:4:1:4:19 | greeting.py:4 |
| greeting.py:4:5:4:16 | greeting.py:4 |
| greeting.py:5:5:5:47 | greeting.py:5 |
| greeting.py:5:12:5:18 | greeting.py:5 |
| greeting.py:5:12:5:47 | greeting.py:5 |
| greeting.py:5:20:5:28 | greeting.py:5 |
| greeting.py:5:31:5:44 | greeting.py:5 |
| greeting.py:5:31:5:46 | greeting.py:5 |
| hello.py:0:0:0:0 | hello.py:0 |
| hello.py:1:1:1:22 | hello.py:1 |
| hello.py:1:6:1:13 | hello.py:1 |
| hello.py:3:1:3:26 | hello.py:3 |
| hello.py:3:4:3:11 | hello.py:3 |
| hello.py:3:4:3:25 | hello.py:3 |
| hello.py:3:16:3:25 | hello.py:3 |
| hello.py:4:5:4:9 | hello.py:4 |
| hello.py:4:5:4:25 | hello.py:4 |
| hello.py:4:5:4:25 | hello.py:4 |
| hello.py:4:11:4:22 | hello.py:4 |
| hello.py:4:11:4:24 | hello.py:4 |
| lib/__init__.py:0:0:0:0 | lib/__init__.py:0 |
| lib/__init__.py:1:1:1:18 | lib/__init__.py:1 |
| lib/__init__.py:1:1:1:18 | lib/__init__.py:1 |
| lib/__init__.py:1:1:1:18 | lib/__init__.py:1 |
| lib/__init__.py:1:5:1:11 | lib/__init__.py:1 |
| lib/__init__.py:1:13:1:13 | lib/__init__.py:1 |
| lib/__init__.py:1:16:1:16 | lib/__init__.py:1 |
| lib/__init__.py:1:20:1:38 | lib/__init__.py:1 |
| lib/__init__.py:1:27:1:29 | lib/__init__.py:1 |
| lib/__init__.py:1:27:1:38 | lib/__init__.py:1 |
| lib/__init__.py:1:30:1:30 | lib/__init__.py:1 |
| lib/__init__.py:1:31:1:34 | lib/__init__.py:1 |
| lib/__init__.py:1:35:1:35 | lib/__init__.py:1 |
| lib/__init__.py:1:36:1:38 | lib/__init__.py:1 |
| lib:0:0:0:0 | lib:0 |
| new.py:0:0:0:0 | new.py:0 |
| new.py:1:1:1:21 | new.py:1 |
| new.py:1:1:1:21 | new.py:1 |
| new.py:1:1:1:21 | new.py:1 |
| new.py:1:5:1:18 | new.py:1 |
| new.py:2:5:2:15 | new.py:2 |
| new.py:2:12:2:15 | new.py:2 |
testFiles
| greeting.py:0:0:0:0 | greeting.py |
| hello.py:0:0:0:0 | hello.py |
| lib/__init__.py:0:0:0:0 | lib/__init__.py |
| new.py:0:0:0:0 | new.py |
testCfgNodes
| greeting.py:0:0:0:0 | Entry node for Module greeting |
| greeting.py:0:0:0:0 | Exit node for Module greeting |
| greeting.py:1:1:1:17 | ControlFlowNode for from new import * |
| greeting.py:1:6:1:8 | ControlFlowNode for ImportExpr |
| greeting.py:2:1:2:17 | ControlFlowNode for from lib import * |
| greeting.py:2:6:2:8 | ControlFlowNode for ImportExpr |
| greeting.py:4:1:4:19 | ControlFlowNode for FunctionExpr |
| greeting.py:4:1:4:19 | Entry node for Function get_greeting |
| greeting.py:4:1:4:19 | Exit node for Function get_greeting |
| greeting.py:4:5:4:16 | ControlFlowNode for get_greeting |
| greeting.py:5:5:5:47 | ControlFlowNode for Return |
| greeting.py:5:12:5:18 | ControlFlowNode for combine |
| greeting.py:5:12:5:47 | ControlFlowNode for combine() |
| greeting.py:5:20:5:28 | ControlFlowNode for StringLiteral |
| greeting.py:5:31:5:44 | ControlFlowNode for get_new_target |
| greeting.py:5:31:5:46 | ControlFlowNode for get_new_target() |
| hello.py:0:0:0:0 | Entry node for Module hello |
| hello.py:0:0:0:0 | Exit node for Module hello |
| hello.py:1:1:1:22 | ControlFlowNode for from greeting import * |
| hello.py:1:6:1:13 | ControlFlowNode for ImportExpr |
| hello.py:3:4:3:11 | ControlFlowNode for __name__ |
| hello.py:3:4:3:25 | ControlFlowNode for Compare |
| hello.py:3:16:3:25 | ControlFlowNode for StringLiteral |
| hello.py:4:5:4:9 | ControlFlowNode for print |
| hello.py:4:5:4:25 | ControlFlowNode for print() |
| hello.py:4:11:4:22 | ControlFlowNode for get_greeting |
| hello.py:4:11:4:24 | ControlFlowNode for get_greeting() |
| lib/__init__.py:0:0:0:0 | Entry node for Module lib.__init__ |
| lib/__init__.py:0:0:0:0 | Exit node for Module lib.__init__ |
| lib/__init__.py:1:1:1:18 | ControlFlowNode for FunctionExpr |
| lib/__init__.py:1:1:1:18 | Entry node for Function combine |
| lib/__init__.py:1:1:1:18 | Exit node for Function combine |
| lib/__init__.py:1:5:1:11 | ControlFlowNode for combine |
| lib/__init__.py:1:13:1:13 | ControlFlowNode for a |
| lib/__init__.py:1:16:1:16 | ControlFlowNode for b |
| lib/__init__.py:1:20:1:38 | ControlFlowNode for Return |
| lib/__init__.py:1:27:1:29 | ControlFlowNode for StringLiteral |
| lib/__init__.py:1:27:1:38 | ControlFlowNode for Fstring |
| lib/__init__.py:1:30:1:30 | ControlFlowNode for a |
| lib/__init__.py:1:31:1:34 | ControlFlowNode for StringLiteral |
| lib/__init__.py:1:35:1:35 | ControlFlowNode for b |
| lib/__init__.py:1:36:1:38 | ControlFlowNode for StringLiteral |
| lib:0:0:0:0 | Entry node for Package lib |
| new.py:0:0:0:0 | Entry node for Module new |
| new.py:0:0:0:0 | Exit node for Module new |
| new.py:1:1:1:21 | ControlFlowNode for FunctionExpr |
| new.py:1:1:1:21 | Entry node for Function get_new_target |
| new.py:1:1:1:21 | Exit node for Function get_new_target |
| new.py:1:5:1:18 | ControlFlowNode for get_new_target |
| new.py:2:5:2:15 | ControlFlowNode for Return |
| new.py:2:12:2:15 | ControlFlowNode for StringLiteral |
testSsaVars
| file://:0:0:0:0 | SSA Variable __name__ |
| file://:0:0:0:0 | SSA Variable get_greeting |
| file://:0:0:0:0 | SSA Variable print |
| greeting.py:4:5:4:16 | SSA Variable get_greeting |
| lib/__init__.py:1:5:1:11 | SSA Variable combine |
| lib/__init__.py:1:13:1:13 | SSA Variable a |
| lib/__init__.py:1:16:1:16 | SSA Variable b |
| new.py:1:5:1:18 | SSA Variable get_new_target |
testVars
| Global Variable __name__ | greeting.py:0:0:0:0 | Module greeting |
| Global Variable __name__ | hello.py:0:0:0:0 | Module hello |
| Global Variable __name__ | lib/__init__.py:0:0:0:0 | Module lib.__init__ |
| Global Variable __name__ | new.py:0:0:0:0 | Module new |
| Global Variable __package__ | greeting.py:0:0:0:0 | Module greeting |
| Global Variable __package__ | hello.py:0:0:0:0 | Module hello |
| Global Variable __package__ | lib/__init__.py:0:0:0:0 | Module lib.__init__ |
| Global Variable __package__ | new.py:0:0:0:0 | Module new |
| Global Variable combine | greeting.py:0:0:0:0 | Module greeting |
| Global Variable combine | lib/__init__.py:0:0:0:0 | Module lib.__init__ |
| Global Variable get_greeting | greeting.py:0:0:0:0 | Module greeting |
| Global Variable get_greeting | hello.py:0:0:0:0 | Module hello |
| Global Variable get_new_target | greeting.py:0:0:0:0 | Module greeting |
| Global Variable get_new_target | new.py:0:0:0:0 | Module new |
| Global Variable print | hello.py:0:0:0:0 | Module hello |
| Local Variable a | lib/__init__.py:1:1:1:18 | Function combine |
| Local Variable b | lib/__init__.py:1:1:1:18 | Function combine |

View File

@@ -0,0 +1,19 @@
import python
query predicate testStringLiterals(StringLiteral l, string text) { l.getText() = text }
query predicate testModules(Module m) { any() }
query predicate testFunctions(Function f) { any() }
query predicate testClasses(Class c) { any() }
query predicate testLocations(Location l) { any() }
query predicate testFiles(File f) { any() }
query predicate testCfgNodes(ControlFlowNode n) { any() }
query predicate testSsaVars(SsaVariable var) { any() }
query predicate testVars(Variable var, Scope s) { s = var.getScope() }

View File

@@ -0,0 +1,3 @@
overlay:
base: "orig_src"
overlay: "../basic-full-eval"

View File

@@ -0,0 +1 @@
semmle-extractor-options: -R . -m hello.py --filter exclude:**/*.testproj/**

View File

@@ -0,0 +1,5 @@
from old import *
from lib import *
def get_greeting():
return combine("Hello", get_old_target())

View File

@@ -0,0 +1,4 @@
from greeting import *
if __name__ == "__main__":
print(get_greeting())

View File

@@ -0,0 +1 @@
def combine(a, b): return f"{a}, {b}!"

View File

@@ -0,0 +1,2 @@
def get_old_target():
return "World"

View File

@@ -0,0 +1 @@
../basic-full-eval/test.ql