mirror of
https://github.com/github/codeql.git
synced 2025-12-17 09:13:20 +01:00
This makes the result of code generation independent of the order in which classes are defined in the schema, and makes additional topological sorting not required. Being independent from schema order will be important for reviewing the move to a pure python schema, as generated code will be left untouched.
185 lines
5.4 KiB
Python
185 lines
5.4 KiB
Python
""" schema.yml format representation """
|
|
|
|
import pathlib
|
|
import re
|
|
from dataclasses import dataclass, field
|
|
from typing import List, Set, Union, Dict, ClassVar, Optional
|
|
from toposort import toposort_flatten
|
|
|
|
import yaml
|
|
|
|
|
|
class Error(Exception):
|
|
|
|
def __str__(self):
|
|
return self.args[0]
|
|
|
|
|
|
root_class_name = "Element"
|
|
|
|
|
|
@dataclass
|
|
class Property:
|
|
is_single: ClassVar = False
|
|
is_optional: ClassVar = False
|
|
is_repeated: ClassVar = False
|
|
is_predicate: ClassVar = False
|
|
|
|
name: str
|
|
type: str = None
|
|
is_child: bool = False
|
|
pragmas: List[str] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class SingleProperty(Property):
|
|
is_single: ClassVar = True
|
|
|
|
|
|
@dataclass
|
|
class OptionalProperty(Property):
|
|
is_optional: ClassVar = True
|
|
|
|
|
|
@dataclass
|
|
class RepeatedProperty(Property):
|
|
is_repeated: ClassVar = True
|
|
|
|
|
|
@dataclass
|
|
class RepeatedOptionalProperty(Property):
|
|
is_optional: ClassVar = True
|
|
is_repeated: ClassVar = True
|
|
|
|
|
|
@dataclass
|
|
class PredicateProperty(Property):
|
|
is_predicate: ClassVar = True
|
|
|
|
|
|
@dataclass
|
|
class IpaInfo:
|
|
from_class: Optional[str] = None
|
|
on_arguments: Optional[Dict[str, str]] = None
|
|
|
|
|
|
@dataclass
|
|
class Class:
|
|
name: str
|
|
bases: List[str] = field(default_factory=set)
|
|
derived: Set[str] = field(default_factory=set)
|
|
properties: List[Property] = field(default_factory=list)
|
|
dir: pathlib.Path = pathlib.Path()
|
|
pragmas: List[str] = field(default_factory=list)
|
|
ipa: Optional[IpaInfo] = None
|
|
|
|
@property
|
|
def final(self):
|
|
return not self.derived
|
|
|
|
|
|
@dataclass
|
|
class Schema:
|
|
classes: Dict[str, Class]
|
|
includes: Set[str] = field(default_factory=set)
|
|
|
|
|
|
_StrOrList = Union[str, List[str]]
|
|
|
|
|
|
def _auto_list(data: _StrOrList) -> List[str]:
|
|
if isinstance(data, list):
|
|
return data
|
|
return [data]
|
|
|
|
|
|
def _parse_property(name: str, data: Union[str, Dict[str, _StrOrList]], is_child: bool = False):
|
|
if isinstance(data, dict):
|
|
if "type" not in data:
|
|
raise Error(f"property {name} has no type")
|
|
pragmas = _auto_list(data.pop("_pragma", []))
|
|
type = data.pop("type")
|
|
if data:
|
|
raise Error(f"unknown metadata {', '.join(data)} in property {name}")
|
|
else:
|
|
pragmas = []
|
|
type = data
|
|
if is_child and type[0].islower():
|
|
raise Error(f"children must have class type, got {type} for {name}")
|
|
if type.endswith("?*"):
|
|
return RepeatedOptionalProperty(name, type[:-2], is_child=is_child, pragmas=pragmas)
|
|
elif type.endswith("*"):
|
|
return RepeatedProperty(name, type[:-1], is_child=is_child, pragmas=pragmas)
|
|
elif type.endswith("?"):
|
|
return OptionalProperty(name, type[:-1], is_child=is_child, pragmas=pragmas)
|
|
elif type == "predicate":
|
|
return PredicateProperty(name, pragmas=pragmas)
|
|
else:
|
|
return SingleProperty(name, type, is_child=is_child, pragmas=pragmas)
|
|
|
|
|
|
def _parse_ipa(data: Dict[str, Union[str, Dict[str, str]]]):
|
|
return IpaInfo(from_class=data.get("from"),
|
|
on_arguments=data.get(True)) # 'on' is parsed as boolean True in yaml
|
|
|
|
|
|
class _DirSelector:
|
|
""" Default output subdirectory selector for generated QL files, based on the `_directories` global field"""
|
|
|
|
def __init__(self, dir_to_patterns):
|
|
self.selector = [(re.compile(p), pathlib.Path(d)) for d, p in dir_to_patterns]
|
|
self.selector.append((re.compile(""), pathlib.Path()))
|
|
|
|
def get(self, name):
|
|
return next(d for p, d in self.selector if p.search(name))
|
|
|
|
|
|
def load(path):
|
|
""" Parse the schema from the file at `path` """
|
|
with open(path) as input:
|
|
data = yaml.load(input, Loader=yaml.SafeLoader)
|
|
grouper = _DirSelector(data.get("_directories", {}).items())
|
|
classes = {root_class_name: Class(root_class_name)}
|
|
classes.update((cls, Class(cls, dir=grouper.get(cls))) for cls in data if not cls.startswith("_"))
|
|
for name, info in data.items():
|
|
if name.startswith("_"):
|
|
continue
|
|
if not name[0].isupper():
|
|
raise Error(f"keys in the schema file must be capitalized class names or metadata, got {name}")
|
|
cls = classes[name]
|
|
for k, v in info.items():
|
|
if not k.startswith("_"):
|
|
cls.properties.append(_parse_property(k, v))
|
|
elif k == "_extends":
|
|
cls.bases = _auto_list(v)
|
|
for base in cls.bases:
|
|
classes[base].derived.add(name)
|
|
elif k == "_dir":
|
|
cls.dir = pathlib.Path(v)
|
|
elif k == "_children":
|
|
cls.properties.extend(_parse_property(kk, vv, is_child=True) for kk, vv in v.items())
|
|
elif k == "_pragma":
|
|
cls.pragmas = _auto_list(v)
|
|
elif k == "_synth":
|
|
cls.ipa = _parse_ipa(v)
|
|
else:
|
|
raise Error(f"unknown metadata {k} for class {name}")
|
|
if not cls.bases and cls.name != root_class_name:
|
|
cls.bases = [root_class_name]
|
|
classes[root_class_name].derived.add(name)
|
|
|
|
groups = {}
|
|
|
|
for name, cls in classes.items():
|
|
groups.setdefault(cls.dir, []).append(name)
|
|
|
|
sorted_classes = {}
|
|
|
|
for dir in sorted(groups):
|
|
group = groups[dir]
|
|
inheritance = {name: classes[name].bases for name in group}
|
|
for name in toposort_flatten(inheritance):
|
|
sorted_classes[name] = classes[name]
|
|
|
|
return Schema(classes=sorted_classes, includes=set(data.get("_includes", [])))
|