Swift: cleanup and some docstrings for codegen

Also added code generation and clang formatting to the pre-commit
configuration.
This commit is contained in:
Paolo Tranquilli
2022-04-14 11:15:19 +02:00
parent 91fd83a554
commit 64496b4c97
12 changed files with 186 additions and 92 deletions

View File

@@ -2,28 +2,41 @@
# See https://pre-commit.com/hooks.html for more hooks
exclude: /test/.*$(?<!\.ql)(?<!\.qll)(?<!\.qlref)
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: trailing-whitespace
- id: end-of-file-fixer
- repo: local
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v13.0.1
hooks:
- id: codeql-format
- id: clang-format
files: ^swift/.*\.(h|c|cpp)$
- repo: local
hooks:
- id: codeql-format
name: Fix QL file formatting
files: \.qll?$
language: system
entry: codeql query format --in-place
- id: sync-files
- id: sync-files
name: Fix files required to be identical
language: system
entry: python3 config/sync-files.py --latest
pass_filenames: false
- id: qhelp
- id: qhelp
name: Check query help generation
files: \.qhelp$
language: system
entry: python3 misc/scripts/check-qhelp.py
- id: swift-codegen
name: Run Swift checked in code generation
files: ^swift/(codegen/|.*/generated/|ql/lib/swift\.dbscheme$)
language: system
entry: bazel run //swift/codegen
pass_filenames: false

View File

@@ -1,8 +1,4 @@
py_binary(
name = "codegen",
srcs = glob(["**/*.py"]),
data = glob(["**/*.mustache"]) + [
"schema.yml",
"prefix.dbscheme",
],
)

View File

@@ -1,4 +1,5 @@
#!/usr/bin/env python3
""" Driver script to run all checked in code generation """
from lib import generator
import dbschemegen

View File

@@ -1,24 +1,30 @@
#!/usr/bin/env python3
import pathlib
import inflection
from lib.renderer import Renderer
from lib.dbscheme import *
from lib import paths, schema, generator
from lib.dbscheme import *
log = logging.getLogger(__name__)
def dbtype(typename):
""" translate a type to a dbscheme counterpart, using `@lower_underscore` format for classes """
if typename[0].isupper():
return "@" + inflection.underscore(typename)
return typename
def cls_to_dbscheme(cls: schema.Class):
""" Yield all dbscheme entities needed to model class `cls` """
if cls.derived:
yield DbUnion(dbtype(cls.name), (dbtype(c) for c in cls.derived))
if not cls.derived or any(f.is_single() for f in cls.fields):
# output a table specific to a class only if it is a leaf class or it has 1-to-1 properties
# Leaf classes need a table to bind the `@` ids
# 1-to-1 properties are added to a class specific table
# in other cases, separate tables are used for the properties, and a class specific table is unneeded
if not cls.derived or any(f.is_single for f in cls.properties):
binding = not cls.derived
keyset = DbKeySet(["id"]) if cls.derived else None
yield DbTable(
@@ -27,11 +33,12 @@ def cls_to_dbscheme(cls: schema.Class):
columns=[
DbColumn("id", type=dbtype(cls.name), binding=binding),
] + [
DbColumn(f.name, dbtype(f.type)) for f in cls.fields if f.is_single()
DbColumn(f.name, dbtype(f.type)) for f in cls.properties if f.is_single
]
)
for f in cls.fields:
if f.is_optional():
# use property-specific tables for 1-to-many and 1-to-at-most-1 properties
for f in cls.properties:
if f.is_optional:
yield DbTable(
keyset=DbKeySet(["id"]),
name=inflection.tableize(f"{cls.name}_{f.name}"),
@@ -40,7 +47,7 @@ def cls_to_dbscheme(cls: schema.Class):
DbColumn(f.name, dbtype(f.type)),
],
)
elif f.is_repeated():
elif f.is_repeated:
yield DbTable(
keyset=DbKeySet(["id", "index"]),
name=inflection.tableize(f"{cls.name}_{f.name}"),
@@ -52,24 +59,31 @@ def cls_to_dbscheme(cls: schema.Class):
)
def generate(opts):
def get_declarations(data: schema.Schema):
return [d for cls in data.classes.values() for d in cls_to_dbscheme(cls)]
def get_includes(data: schema.Schema, include_dir: pathlib.Path):
includes = []
for inc in data.includes:
inc = include_dir / inc
with open(inc) as inclusion:
includes.append(DbSchemeInclude(src=inc.relative_to(paths.swift_dir), data=inclusion.read()))
return includes
def generate(opts, renderer):
input = opts.schema.resolve()
out = opts.dbscheme.resolve()
renderer = Renderer(opts.check)
with open(input) as src:
data = schema.load(src)
declarations = [d for cls in data.classes.values() for d in cls_to_dbscheme(cls)]
dbscheme = DbScheme(src=input.relative_to(paths.swift_dir),
includes=get_includes(data, include_dir=input.parent),
declarations=get_declarations(data))
includes = []
for inc in data.includes:
inc = input.parent / inc
with open(inc) as inclusion:
includes.append({"src": inc.relative_to(paths.swift_dir), "data": inclusion.read()})
renderer.render("dbscheme", out, includes=includes, src=input.relative_to(paths.swift_dir),
declarations=declarations)
return renderer.written
renderer.render("dbscheme", out, dbscheme)
if __name__ == "__main__":

View File

@@ -1,5 +1,6 @@
""" dbscheme format representation """
import logging
import re
from dataclasses import dataclass
from typing import ClassVar, List
@@ -83,3 +84,16 @@ class DbUnion(DbDecl):
self.rhs = [DbUnionCase(x) for x in self.rhs]
self.rhs.sort(key=lambda c: c.type)
self.rhs[0].first = True
@dataclass
class DbSchemeInclude:
src: str
data: str
@dataclass
class DbScheme:
src: str
includes: List[DbSchemeInclude]
declarations: List[DbDecl]

View File

@@ -1,44 +1,15 @@
""" generator script scaffolding """
import argparse
import collections
import logging
import pathlib
import sys
from . import paths
options = collections.defaultdict(list)
from . import options, render
class Option:
def __init__(self, *args, tags=None, **kwargs):
tags = tags or []
self.args = args
self.kwargs = kwargs
if tags:
for t in tags:
options[t].append(self)
else:
options["*"].append(self)
def add_to(self, parser: argparse.ArgumentParser):
parser.add_argument(*self.args, **self.kwargs)
Option("--check", "-c", action="store_true")
Option("--verbose", "-v", action="store_true")
Option("--schema", tags=["schema"], type=pathlib.Path, default=paths.swift_dir / "codegen/schema.yml")
Option("--dbscheme", tags=["dbscheme"], type=pathlib.Path, default=paths.swift_dir / "ql/lib/swift.dbscheme")
def _parse(*tags):
def _parse(tags):
parser = argparse.ArgumentParser()
if not tags:
opts = [o for os in options.values() for o in os]
else:
opts = options["*"]
for t in tags:
opts.extend(options[t])
for opt in opts:
for opt in options.get(tags):
opt.add_to(parser)
ret = parser.parse_args()
log_level = logging.DEBUG if ret.verbose else logging.INFO
@@ -46,10 +17,13 @@ def _parse(*tags):
return ret
def run(*generate, tags=()):
opts = _parse(*tags)
done_something = False
for g in generate:
if g(opts):
done_something = True
sys.exit(1 if opts.check and done_something else 0)
def run(*generators, tags=None):
""" run generation functions in `generators`, parsing options tagged with `tags` (all if unspecified)
`generators` should be callables taking as input an option namespace and a `render.Renderer` instance
"""
opts = _parse(tags)
renderer = render.Renderer(dryrun=opts.check)
for g in generators:
g(opts, renderer)
sys.exit(1 if opts.check and renderer.done_something else 0)

View File

@@ -0,0 +1,48 @@
""" generator options, categorized by tags """
import argparse
import collections
import pathlib
from typing import Tuple
from . import paths
def _init_options():
Option("--check", "-c", action="store_true")
Option("--verbose", "-v", action="store_true")
Option("--schema", tags=["schema"], type=pathlib.Path, default=paths.swift_dir / "codegen/schema.yml")
Option("--dbscheme", tags=["dbscheme"], type=pathlib.Path, default=paths.swift_dir / "ql/lib/swift.dbscheme")
_options = collections.defaultdict(list)
class Option:
def __init__(self, *args, tags=None, **kwargs):
tags = tags or []
self.args = args
self.kwargs = kwargs
if tags:
for t in tags:
_options[t].append(self)
else:
_options["*"].append(self)
def add_to(self, parser: argparse.ArgumentParser):
parser.add_argument(*self.args, **self.kwargs)
_init_options()
def get(tags: Tuple[str]):
""" get options marked by `tags`
Return all options if tags is falsy. Options tagged by wildcard '*' are always returned
"""
if not tags:
return (o for tagged_opts in _options.values() for o in tagged_opts)
else:
# use specifically tagged options + those tagged with wildcard *
return (o for tag in ('*',) + tags for o in _options[tag])

View File

@@ -1,9 +1,11 @@
""" module providing useful filesystem paths """
import pathlib
import sys
import os
try:
_workspace_dir = pathlib.Path(os.environ['BUILD_WORKSPACE_DIRECTORY'])
_workspace_dir = pathlib.Path(os.environ['BUILD_WORKSPACE_DIRECTORY']) # <- means we are using bazel run
swift_dir = _workspace_dir / 'swift'
lib_dir = swift_dir / 'codegen' / 'lib'
except KeyError:

View File

@@ -1,3 +1,10 @@
""" template renderer module, wrapping around `pystache.Renderer`
`pystache` is a python mustache engine, and mustache is a template language. More information on
https://mustache.github.io/
"""
import hashlib
import logging
@@ -8,15 +15,18 @@ from . import paths
log = logging.getLogger(__name__)
def md5(data):
def _md5(data):
return hashlib.md5(data).digest()
class Renderer:
def __init__(self, check=False):
""" Template renderer using mustache templates in the `templates` directory """
def __init__(self, dryrun=False):
""" Construct the renderer, which will not write anything if `dryrun` is `True` """
self.r = pystache.Renderer(search_dirs=str(paths.lib_dir / "templates"), escape=lambda u: u)
self.generator = paths.exe_file.relative_to(paths.swift_dir)
self.check = check
self.dryrun = dryrun
self.written = set()
self.skipped = set()
self.erased = set()
@@ -29,18 +39,21 @@ class Renderer:
def rendered(self):
return self.written | self.skipped
def render(self, name, output, **data):
def render(self, name, output, data):
""" Render the template called `name` in the template directory, writing to `output` using `data` as context
If the file is unchanged, then no write is performed (and `done_something` remains unchanged)
"""
mnemonic, _, _ = name.lower().partition(".")
output.parent.mkdir(parents=True, exist_ok=True)
data["generator"] = self.generator
data = self.r.render_name(name, data)
data = self.r.render_name(name, data, generator=self.generator)
if output.is_file():
with open(output, "rb") as file:
if md5(data.encode()) == md5(file.read()):
if _md5(data.encode()) == _md5(file.read()):
log.debug(f"skipped {output.name}")
self.skipped.add(output)
return
if self.check:
if self.dryrun:
log.error(f"would have generated {mnemonic} {output.name}")
else:
with open(output, "w") as out:
@@ -49,9 +62,10 @@ class Renderer:
self.written.add(output)
def cleanup(self, existing):
""" Remove files in `existing` for which no `render` has been called """
for f in existing - self.written - self.skipped:
if f.is_file():
if self.check:
if self.dryrun:
log.error(f"would have removed {f.name}")
else:
f.unlink()

View File

@@ -1,30 +1,41 @@
""" schema.yml format representation """
import pathlib
import re
from dataclasses import dataclass, field
from enum import Enum, auto
from typing import List, Set, Dict
import re
import yaml
root_class_name = "Element"
class Cardinality(Enum):
""" The cardinality of a property
`ONE` is the default, `OPTIONAL` are the fields denoted by `?`, `MANY` are those denoted by `*`
"""
ONE = auto()
OPTIONAL = auto()
MANY = auto()
@dataclass
class Field:
class Property:
name: str
type: str
cardinality: Cardinality = Cardinality.ONE
@property
def is_single(self):
return self.cardinality == Cardinality.ONE
@property
def is_optional(self):
return self.cardinality == Cardinality.OPTIONAL
@property
def is_repeated(self):
return self.cardinality == Cardinality.MANY
@@ -34,7 +45,7 @@ class Class:
name: str
bases: Set[str] = field(default_factory=set)
derived: Set[str] = field(default_factory=set)
fields: List[Field] = field(default_factory=list)
properties: List[Property] = field(default_factory=list)
dir: pathlib.Path = pathlib.Path()
@@ -44,7 +55,7 @@ class Schema:
includes: Set[str] = field(default_factory=set)
def _parse_field(name, type):
def _parse_property(name, type):
if type.endswith("*"):
cardinality = Cardinality.MANY
type = type[:-1]
@@ -53,13 +64,11 @@ def _parse_field(name, type):
type = type[:-1]
else:
cardinality = Cardinality.ONE
return Field(name, type, cardinality)
return Property(name, type, cardinality)
root_class_name = "Element"
class DirSelector:
class _DirSelector:
""" Default output subdirectory selector for generated QL files, based on the `_directories` global field"""
def __init__(self, dir_to_patterns):
self.selector = [(re.compile(p), pathlib.Path(d)) for d, p in dir_to_patterns]
self.selector.append((re.compile(""), pathlib.Path()))
@@ -69,8 +78,9 @@ class DirSelector:
def load(file):
""" Parse the schema from `file` """
data = yaml.load(file, Loader=yaml.SafeLoader)
grouper = DirSelector(data.get("_directories", {}).items())
grouper = _DirSelector(data.get("_directories", {}).items())
ret = Schema(classes={cls: Class(cls, dir=grouper.get(cls)) for cls in data if not cls.startswith("_")},
includes=set(data.get("_includes", [])))
assert root_class_name not in ret.classes
@@ -82,7 +92,7 @@ def load(file):
cls = ret.classes[name]
for k, v in info.items():
if not k.startswith("_"):
cls.fields.append(_parse_field(k, v))
cls.properties.append(_parse_property(k, v))
elif k == "_extends":
if not isinstance(v, list):
v = [v]

View File

@@ -4,3 +4,7 @@
sourceLocationPrefix(
string prefix: string ref
);
answer_to_life_the_universe_and_everything(
int answer: int ref
)

View File

@@ -8,6 +8,10 @@ sourceLocationPrefix(
string prefix: string ref
);
answer_to_life_the_universe_and_everything(
int answer: int ref
)
// from codegen/schema.yml