Swift: cleanup and some docstrings for codegen

Also added code generation and clang formatting to the pre-commit configuration.
2025-12-17 01:03:14 +01:00 · 2022-04-14 11:15:19 +02:00
parent 91fd83a554
commit 64496b4c97
12 changed files with 186 additions and 92 deletions
--- a/swift/codegen/lib/dbscheme.py
+++ b/swift/codegen/lib/dbscheme.py
@@ -1,5 +1,6 @@
+""" dbscheme format representation """
+
 import logging
-import re
 from dataclasses import dataclass
 from typing import ClassVar, List

@@ -83,3 +84,16 @@ class DbUnion(DbDecl):
        self.rhs = [DbUnionCase(x) for x in self.rhs]
        self.rhs.sort(key=lambda c: c.type)
        self.rhs[0].first = True
+
+
+@dataclass
+class DbSchemeInclude:
+    src: str
+    data: str
+
+
+@dataclass
+class DbScheme:
+    src: str
+    includes: List[DbSchemeInclude]
+    declarations: List[DbDecl]
--- a/swift/codegen/lib/generator.py
+++ b/swift/codegen/lib/generator.py
@@ -1,44 +1,15 @@
+""" generator script scaffolding """
+
 import argparse
-import collections
 import logging
-import pathlib
 import sys

-from . import paths
-
-options = collections.defaultdict(list)
+from . import options, render


-class Option:
-    def __init__(self, *args, tags=None, **kwargs):
-        tags = tags or []
-        self.args = args
-        self.kwargs = kwargs
-        if tags:
-            for t in tags:
-                options[t].append(self)
-        else:
-            options["*"].append(self)
-
-    def add_to(self, parser: argparse.ArgumentParser):
-        parser.add_argument(*self.args, **self.kwargs)
-
-
-Option("--check", "-c", action="store_true")
-Option("--verbose", "-v", action="store_true")
-Option("--schema", tags=["schema"], type=pathlib.Path, default=paths.swift_dir / "codegen/schema.yml")
-Option("--dbscheme", tags=["dbscheme"], type=pathlib.Path, default=paths.swift_dir / "ql/lib/swift.dbscheme")
-
-
-def _parse(*tags):
+def _parse(tags):
    parser = argparse.ArgumentParser()
-    if not tags:
-        opts = [o for os in options.values() for o in os]
-    else:
-        opts = options["*"]
-        for t in tags:
-            opts.extend(options[t])
-    for opt in opts:
+    for opt in options.get(tags):
        opt.add_to(parser)
    ret = parser.parse_args()
    log_level = logging.DEBUG if ret.verbose else logging.INFO
@@ -46,10 +17,13 @@ def _parse(*tags):
    return ret


-def run(*generate, tags=()):
-    opts = _parse(*tags)
-    done_something = False
-    for g in generate:
-        if g(opts):
-            done_something = True
-    sys.exit(1 if opts.check and done_something else 0)
+def run(*generators, tags=None):
+    """ run generation functions in `generators`, parsing options tagged with `tags` (all if unspecified)
+
+    `generators` should be callables taking as input an option namespace and a `render.Renderer` instance
+    """
+    opts = _parse(tags)
+    renderer = render.Renderer(dryrun=opts.check)
+    for g in generators:
+        g(opts, renderer)
+    sys.exit(1 if opts.check and renderer.done_something else 0)
--- a/swift/codegen/lib/options.py
+++ b/swift/codegen/lib/options.py
@@ -0,0 +1,48 @@
+""" generator options, categorized by tags """
+
+import argparse
+import collections
+import pathlib
+from typing import Tuple
+
+from . import paths
+
+
+def _init_options():
+    Option("--check", "-c", action="store_true")
+    Option("--verbose", "-v", action="store_true")
+    Option("--schema", tags=["schema"], type=pathlib.Path, default=paths.swift_dir / "codegen/schema.yml")
+    Option("--dbscheme", tags=["dbscheme"], type=pathlib.Path, default=paths.swift_dir / "ql/lib/swift.dbscheme")
+
+
+_options = collections.defaultdict(list)
+
+
+class Option:
+    def __init__(self, *args, tags=None, **kwargs):
+        tags = tags or []
+        self.args = args
+        self.kwargs = kwargs
+        if tags:
+            for t in tags:
+                _options[t].append(self)
+        else:
+            _options["*"].append(self)
+
+    def add_to(self, parser: argparse.ArgumentParser):
+        parser.add_argument(*self.args, **self.kwargs)
+
+
+_init_options()
+
+
+def get(tags: Tuple[str]):
+    """ get options marked by `tags`
+
+    Return all options if tags is falsy. Options tagged by wildcard '*' are always returned
+    """
+    if not tags:
+        return (o for tagged_opts in _options.values() for o in tagged_opts)
+    else:
+        # use specifically tagged options + those tagged with wildcard *
+        return (o for tag in ('*',) + tags for o in _options[tag])
--- a/swift/codegen/lib/paths.py
+++ b/swift/codegen/lib/paths.py
@@ -1,9 +1,11 @@
+""" module providing useful filesystem paths """
+
 import pathlib
 import sys
 import os

 try:
-    _workspace_dir = pathlib.Path(os.environ['BUILD_WORKSPACE_DIRECTORY'])
+    _workspace_dir = pathlib.Path(os.environ['BUILD_WORKSPACE_DIRECTORY'])  # <- means we are using bazel run
    swift_dir = _workspace_dir / 'swift'
    lib_dir = swift_dir / 'codegen' / 'lib'
 except KeyError:
--- a/swift/codegen/lib/renderer.py
+++ b/swift/codegen/lib/renderer.py
@@ -1,3 +1,10 @@
+""" template renderer module, wrapping around `pystache.Renderer`
+
+`pystache` is a python mustache engine, and mustache is a template language. More information on
+
+https://mustache.github.io/
+"""
+
 import hashlib
 import logging

@@ -8,15 +15,18 @@ from . import paths
 log = logging.getLogger(__name__)


-def md5(data):
+def _md5(data):
    return hashlib.md5(data).digest()


 class Renderer:
-    def __init__(self, check=False):
+    """ Template renderer using mustache templates in the `templates` directory """
+
+    def __init__(self, dryrun=False):
+        """ Construct the renderer, which will not write anything if `dryrun` is `True` """
        self.r = pystache.Renderer(search_dirs=str(paths.lib_dir / "templates"), escape=lambda u: u)
        self.generator = paths.exe_file.relative_to(paths.swift_dir)
-        self.check = check
+        self.dryrun = dryrun
        self.written = set()
        self.skipped = set()
        self.erased = set()
@@ -29,18 +39,21 @@ class Renderer:
    def rendered(self):
        return self.written | self.skipped

-    def render(self, name, output, **data):
+    def render(self, name, output, data):
+        """ Render the template called `name` in the template directory, writing to `output` using `data` as context
+
+        If the file is unchanged, then no write is performed (and `done_something` remains unchanged)
+        """
        mnemonic, _, _ = name.lower().partition(".")
        output.parent.mkdir(parents=True, exist_ok=True)
-        data["generator"] = self.generator
-        data = self.r.render_name(name, data)
+        data = self.r.render_name(name, data, generator=self.generator)
        if output.is_file():
            with open(output, "rb") as file:
-                if md5(data.encode()) == md5(file.read()):
+                if _md5(data.encode()) == _md5(file.read()):
                    log.debug(f"skipped {output.name}")
                    self.skipped.add(output)
                    return
-        if self.check:
+        if self.dryrun:
            log.error(f"would have generated {mnemonic} {output.name}")
        else:
            with open(output, "w") as out:
@@ -49,9 +62,10 @@ class Renderer:
        self.written.add(output)

    def cleanup(self, existing):
+        """ Remove files in `existing` for which no `render` has been called """
        for f in existing - self.written - self.skipped:
            if f.is_file():
-                if self.check:
+                if self.dryrun:
                    log.error(f"would have removed {f.name}")
                else:
                    f.unlink()
--- a/swift/codegen/lib/schema.py
+++ b/swift/codegen/lib/schema.py
@@ -1,30 +1,41 @@
+""" schema.yml format representation """
+
 import pathlib
+import re
 from dataclasses import dataclass, field
 from enum import Enum, auto
 from typing import List, Set, Dict
-import re

 import yaml

+root_class_name = "Element"
+

 class Cardinality(Enum):
+    """ The cardinality of a property
+
+    `ONE` is the default, `OPTIONAL` are the fields denoted by `?`, `MANY` are those denoted by `*`
+    """
    ONE = auto()
    OPTIONAL = auto()
    MANY = auto()


@dataclass
-class Field:
+class Property:
    name: str
    type: str
    cardinality: Cardinality = Cardinality.ONE

+    @property
    def is_single(self):
        return self.cardinality == Cardinality.ONE

+    @property
    def is_optional(self):
        return self.cardinality == Cardinality.OPTIONAL

+    @property
    def is_repeated(self):
        return self.cardinality == Cardinality.MANY

@@ -34,7 +45,7 @@ class Class:
    name: str
    bases: Set[str] = field(default_factory=set)
    derived: Set[str] = field(default_factory=set)
-    fields: List[Field] = field(default_factory=list)
+    properties: List[Property] = field(default_factory=list)
    dir: pathlib.Path = pathlib.Path()


@@ -44,7 +55,7 @@ class Schema:
    includes: Set[str] = field(default_factory=set)


-def _parse_field(name, type):
+def _parse_property(name, type):
    if type.endswith("*"):
        cardinality = Cardinality.MANY
        type = type[:-1]
@@ -53,13 +64,11 @@ def _parse_field(name, type):
        type = type[:-1]
    else:
        cardinality = Cardinality.ONE
-    return Field(name, type, cardinality)
+    return Property(name, type, cardinality)


-root_class_name = "Element"
-
-
-class DirSelector:
+class _DirSelector:
+    """ Default output subdirectory selector for generated QL files, based on the `_directories` global field"""
    def __init__(self, dir_to_patterns):
        self.selector = [(re.compile(p), pathlib.Path(d)) for d, p in dir_to_patterns]
        self.selector.append((re.compile(""), pathlib.Path()))
@@ -69,8 +78,9 @@ class DirSelector:


 def load(file):
+    """ Parse the schema from `file` """
    data = yaml.load(file, Loader=yaml.SafeLoader)
-    grouper = DirSelector(data.get("_directories", {}).items())
+    grouper = _DirSelector(data.get("_directories", {}).items())
    ret = Schema(classes={cls: Class(cls, dir=grouper.get(cls)) for cls in data if not cls.startswith("_")},
                 includes=set(data.get("_includes", [])))
    assert root_class_name not in ret.classes
@@ -82,7 +92,7 @@ def load(file):
        cls = ret.classes[name]
        for k, v in info.items():
            if not k.startswith("_"):
-                cls.fields.append(_parse_field(k, v))
+                cls.properties.append(_parse_property(k, v))
            elif k == "_extends":
                if not isinstance(v, list):
                    v = [v]