Swift: cleanup and some docstrings for codegen

Also added code generation and clang formatting to the pre-commit configuration.
2025-12-16 16:53:25 +01:00 · 2022-04-14 11:15:19 +02:00
parent 91fd83a554
commit 64496b4c97
12 changed files with 186 additions and 92 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,28 +2,41 @@
 # See https://pre-commit.com/hooks.html for more hooks
 exclude: /test/.*$(?<!\.ql)(?<!\.qll)(?<!\.qlref)
 repos:
-   repo: https://github.com/pre-commit/pre-commit-hooks
+  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v3.2.0
    hooks:
-    -   id: trailing-whitespace
-    -   id: end-of-file-fixer
+      - id: trailing-whitespace
+      - id: end-of-file-fixer

-   repo: local
+  - repo: https://github.com/pre-commit/mirrors-clang-format
+    rev: v13.0.1
    hooks:
-    -   id: codeql-format
+      - id: clang-format
+        files: ^swift/.*\.(h|c|cpp)$
+
+  - repo: local
+    hooks:
+      - id: codeql-format
        name: Fix QL file formatting
        files: \.qll?$
        language: system
        entry: codeql query format --in-place

-    -   id: sync-files
+      - id: sync-files
        name: Fix files required to be identical
        language: system
        entry: python3 config/sync-files.py --latest
        pass_filenames: false

-    -   id: qhelp
+      - id: qhelp
        name: Check query help generation
        files: \.qhelp$
        language: system
        entry: python3 misc/scripts/check-qhelp.py
+
+      - id: swift-codegen
+        name: Run Swift checked in code generation
+        files: ^swift/(codegen/|.*/generated/|ql/lib/swift\.dbscheme$)
+        language: system
+        entry: bazel run //swift/codegen
+        pass_filenames: false
--- a/swift/codegen/BUILD.bazel
+++ b/swift/codegen/BUILD.bazel
@@ -1,8 +1,4 @@
 py_binary(
    name = "codegen",
    srcs = glob(["**/*.py"]),
-    data = glob(["**/*.mustache"]) + [
-        "schema.yml",
-        "prefix.dbscheme",
-    ],
 )
--- a/swift/codegen/codegen.py
+++ b/swift/codegen/codegen.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+""" Driver script to run all checked in code generation """

 from lib import generator
 import dbschemegen
--- a/swift/codegen/dbschemegen.py
+++ b/swift/codegen/dbschemegen.py
@@ -1,24 +1,30 @@
 #!/usr/bin/env python3
+import pathlib

 import inflection

-from lib.renderer import Renderer
-from lib.dbscheme import *
 from lib import paths, schema, generator
+from lib.dbscheme import *

 log = logging.getLogger(__name__)


 def dbtype(typename):
+    """ translate a type to a dbscheme counterpart, using `@lower_underscore` format for classes """
    if typename[0].isupper():
        return "@" + inflection.underscore(typename)
    return typename


 def cls_to_dbscheme(cls: schema.Class):
+    """ Yield all dbscheme entities needed to model class `cls` """
    if cls.derived:
        yield DbUnion(dbtype(cls.name), (dbtype(c) for c in cls.derived))
-    if not cls.derived or any(f.is_single() for f in cls.fields):
+    # output a table specific to a class only if it is a leaf class or it has 1-to-1 properties
+    # Leaf classes need a table to bind the `@` ids
+    # 1-to-1 properties are added to a class specific table
+    # in other cases, separate tables are used for the properties, and a class specific table is unneeded
+    if not cls.derived or any(f.is_single for f in cls.properties):
        binding = not cls.derived
        keyset = DbKeySet(["id"]) if cls.derived else None
        yield DbTable(
@@ -27,11 +33,12 @@ def cls_to_dbscheme(cls: schema.Class):
            columns=[
                        DbColumn("id", type=dbtype(cls.name), binding=binding),
                    ] + [
-                        DbColumn(f.name, dbtype(f.type)) for f in cls.fields if f.is_single()
+                        DbColumn(f.name, dbtype(f.type)) for f in cls.properties if f.is_single
                    ]
        )
-    for f in cls.fields:
-        if f.is_optional():
+    # use property-specific tables for 1-to-many and 1-to-at-most-1 properties
+    for f in cls.properties:
+        if f.is_optional:
            yield DbTable(
                keyset=DbKeySet(["id"]),
                name=inflection.tableize(f"{cls.name}_{f.name}"),
@@ -40,7 +47,7 @@ def cls_to_dbscheme(cls: schema.Class):
                    DbColumn(f.name, dbtype(f.type)),
                ],
            )
-        elif f.is_repeated():
+        elif f.is_repeated:
            yield DbTable(
                keyset=DbKeySet(["id", "index"]),
                name=inflection.tableize(f"{cls.name}_{f.name}"),
@@ -52,24 +59,31 @@ def cls_to_dbscheme(cls: schema.Class):
            )


-def generate(opts):
+def get_declarations(data: schema.Schema):
+    return [d for cls in data.classes.values() for d in cls_to_dbscheme(cls)]
+
+
+def get_includes(data: schema.Schema, include_dir: pathlib.Path):
+    includes = []
+    for inc in data.includes:
+        inc = include_dir / inc
+        with open(inc) as inclusion:
+            includes.append(DbSchemeInclude(src=inc.relative_to(paths.swift_dir), data=inclusion.read()))
+    return includes
+
+
+def generate(opts, renderer):
    input = opts.schema.resolve()
    out = opts.dbscheme.resolve()
-    renderer = Renderer(opts.check)

    with open(input) as src:
        data = schema.load(src)

-    declarations = [d for cls in data.classes.values() for d in cls_to_dbscheme(cls)]
+    dbscheme = DbScheme(src=input.relative_to(paths.swift_dir),
+                        includes=get_includes(data, include_dir=input.parent),
+                        declarations=get_declarations(data))

-    includes = []
-    for inc in data.includes:
-        inc = input.parent / inc
-        with open(inc) as inclusion:
-            includes.append({"src": inc.relative_to(paths.swift_dir), "data": inclusion.read()})
-    renderer.render("dbscheme", out, includes=includes, src=input.relative_to(paths.swift_dir),
-                    declarations=declarations)
-    return renderer.written
+    renderer.render("dbscheme", out, dbscheme)


 if __name__ == "__main__":
--- a/swift/codegen/lib/dbscheme.py
+++ b/swift/codegen/lib/dbscheme.py
@@ -1,5 +1,6 @@
+""" dbscheme format representation """
+
 import logging
-import re
 from dataclasses import dataclass
 from typing import ClassVar, List

@@ -83,3 +84,16 @@ class DbUnion(DbDecl):
        self.rhs = [DbUnionCase(x) for x in self.rhs]
        self.rhs.sort(key=lambda c: c.type)
        self.rhs[0].first = True
+
+
+@dataclass
+class DbSchemeInclude:
+    src: str
+    data: str
+
+
+@dataclass
+class DbScheme:
+    src: str
+    includes: List[DbSchemeInclude]
+    declarations: List[DbDecl]
--- a/swift/codegen/lib/generator.py
+++ b/swift/codegen/lib/generator.py
@@ -1,44 +1,15 @@
+""" generator script scaffolding """
+
 import argparse
-import collections
 import logging
-import pathlib
 import sys

-from . import paths
-
-options = collections.defaultdict(list)
+from . import options, render


-class Option:
-    def __init__(self, *args, tags=None, **kwargs):
-        tags = tags or []
-        self.args = args
-        self.kwargs = kwargs
-        if tags:
-            for t in tags:
-                options[t].append(self)
-        else:
-            options["*"].append(self)
-
-    def add_to(self, parser: argparse.ArgumentParser):
-        parser.add_argument(*self.args, **self.kwargs)
-
-
-Option("--check", "-c", action="store_true")
-Option("--verbose", "-v", action="store_true")
-Option("--schema", tags=["schema"], type=pathlib.Path, default=paths.swift_dir / "codegen/schema.yml")
-Option("--dbscheme", tags=["dbscheme"], type=pathlib.Path, default=paths.swift_dir / "ql/lib/swift.dbscheme")
-
-
-def _parse(*tags):
+def _parse(tags):
    parser = argparse.ArgumentParser()
-    if not tags:
-        opts = [o for os in options.values() for o in os]
-    else:
-        opts = options["*"]
-        for t in tags:
-            opts.extend(options[t])
-    for opt in opts:
+    for opt in options.get(tags):
        opt.add_to(parser)
    ret = parser.parse_args()
    log_level = logging.DEBUG if ret.verbose else logging.INFO
@@ -46,10 +17,13 @@ def _parse(*tags):
    return ret


-def run(*generate, tags=()):
-    opts = _parse(*tags)
-    done_something = False
-    for g in generate:
-        if g(opts):
-            done_something = True
-    sys.exit(1 if opts.check and done_something else 0)
+def run(*generators, tags=None):
+    """ run generation functions in `generators`, parsing options tagged with `tags` (all if unspecified)
+
+    `generators` should be callables taking as input an option namespace and a `render.Renderer` instance
+    """
+    opts = _parse(tags)
+    renderer = render.Renderer(dryrun=opts.check)
+    for g in generators:
+        g(opts, renderer)
+    sys.exit(1 if opts.check and renderer.done_something else 0)
--- a/swift/codegen/lib/options.py
+++ b/swift/codegen/lib/options.py
@@ -0,0 +1,48 @@
+""" generator options, categorized by tags """
+
+import argparse
+import collections
+import pathlib
+from typing import Tuple
+
+from . import paths
+
+
+def _init_options():
+    Option("--check", "-c", action="store_true")
+    Option("--verbose", "-v", action="store_true")
+    Option("--schema", tags=["schema"], type=pathlib.Path, default=paths.swift_dir / "codegen/schema.yml")
+    Option("--dbscheme", tags=["dbscheme"], type=pathlib.Path, default=paths.swift_dir / "ql/lib/swift.dbscheme")
+
+
+_options = collections.defaultdict(list)
+
+
+class Option:
+    def __init__(self, *args, tags=None, **kwargs):
+        tags = tags or []
+        self.args = args
+        self.kwargs = kwargs
+        if tags:
+            for t in tags:
+                _options[t].append(self)
+        else:
+            _options["*"].append(self)
+
+    def add_to(self, parser: argparse.ArgumentParser):
+        parser.add_argument(*self.args, **self.kwargs)
+
+
+_init_options()
+
+
+def get(tags: Tuple[str]):
+    """ get options marked by `tags`
+
+    Return all options if tags is falsy. Options tagged by wildcard '*' are always returned
+    """
+    if not tags:
+        return (o for tagged_opts in _options.values() for o in tagged_opts)
+    else:
+        # use specifically tagged options + those tagged with wildcard *
+        return (o for tag in ('*',) + tags for o in _options[tag])
--- a/swift/codegen/lib/paths.py
+++ b/swift/codegen/lib/paths.py
@@ -1,9 +1,11 @@
+""" module providing useful filesystem paths """
+
 import pathlib
 import sys
 import os

 try:
-    _workspace_dir = pathlib.Path(os.environ['BUILD_WORKSPACE_DIRECTORY'])
+    _workspace_dir = pathlib.Path(os.environ['BUILD_WORKSPACE_DIRECTORY'])  # <- means we are using bazel run
    swift_dir = _workspace_dir / 'swift'
    lib_dir = swift_dir / 'codegen' / 'lib'
 except KeyError:
--- a/swift/codegen/lib/renderer.py
+++ b/swift/codegen/lib/renderer.py
@@ -1,3 +1,10 @@
+""" template renderer module, wrapping around `pystache.Renderer`
+
+`pystache` is a python mustache engine, and mustache is a template language. More information on
+
+https://mustache.github.io/
+"""
+
 import hashlib
 import logging

@@ -8,15 +15,18 @@ from . import paths
 log = logging.getLogger(__name__)


-def md5(data):
+def _md5(data):
    return hashlib.md5(data).digest()


 class Renderer:
-    def __init__(self, check=False):
+    """ Template renderer using mustache templates in the `templates` directory """
+
+    def __init__(self, dryrun=False):
+        """ Construct the renderer, which will not write anything if `dryrun` is `True` """
        self.r = pystache.Renderer(search_dirs=str(paths.lib_dir / "templates"), escape=lambda u: u)
        self.generator = paths.exe_file.relative_to(paths.swift_dir)
-        self.check = check
+        self.dryrun = dryrun
        self.written = set()
        self.skipped = set()
        self.erased = set()
@@ -29,18 +39,21 @@ class Renderer:
    def rendered(self):
        return self.written | self.skipped

-    def render(self, name, output, **data):
+    def render(self, name, output, data):
+        """ Render the template called `name` in the template directory, writing to `output` using `data` as context
+
+        If the file is unchanged, then no write is performed (and `done_something` remains unchanged)
+        """
        mnemonic, _, _ = name.lower().partition(".")
        output.parent.mkdir(parents=True, exist_ok=True)
-        data["generator"] = self.generator
-        data = self.r.render_name(name, data)
+        data = self.r.render_name(name, data, generator=self.generator)
        if output.is_file():
            with open(output, "rb") as file:
-                if md5(data.encode()) == md5(file.read()):
+                if _md5(data.encode()) == _md5(file.read()):
                    log.debug(f"skipped {output.name}")
                    self.skipped.add(output)
                    return
-        if self.check:
+        if self.dryrun:
            log.error(f"would have generated {mnemonic} {output.name}")
        else:
            with open(output, "w") as out:
@@ -49,9 +62,10 @@ class Renderer:
        self.written.add(output)

    def cleanup(self, existing):
+        """ Remove files in `existing` for which no `render` has been called """
        for f in existing - self.written - self.skipped:
            if f.is_file():
-                if self.check:
+                if self.dryrun:
                    log.error(f"would have removed {f.name}")
                else:
                    f.unlink()
--- a/swift/codegen/lib/schema.py
+++ b/swift/codegen/lib/schema.py
@@ -1,30 +1,41 @@
+""" schema.yml format representation """
+
 import pathlib
+import re
 from dataclasses import dataclass, field
 from enum import Enum, auto
 from typing import List, Set, Dict
-import re

 import yaml

+root_class_name = "Element"
+

 class Cardinality(Enum):
+    """ The cardinality of a property
+
+    `ONE` is the default, `OPTIONAL` are the fields denoted by `?`, `MANY` are those denoted by `*`
+    """
    ONE = auto()
    OPTIONAL = auto()
    MANY = auto()


@dataclass
-class Field:
+class Property:
    name: str
    type: str
    cardinality: Cardinality = Cardinality.ONE

+    @property
    def is_single(self):
        return self.cardinality == Cardinality.ONE

+    @property
    def is_optional(self):
        return self.cardinality == Cardinality.OPTIONAL

+    @property
    def is_repeated(self):
        return self.cardinality == Cardinality.MANY

@@ -34,7 +45,7 @@ class Class:
    name: str
    bases: Set[str] = field(default_factory=set)
    derived: Set[str] = field(default_factory=set)
-    fields: List[Field] = field(default_factory=list)
+    properties: List[Property] = field(default_factory=list)
    dir: pathlib.Path = pathlib.Path()


@@ -44,7 +55,7 @@ class Schema:
    includes: Set[str] = field(default_factory=set)


-def _parse_field(name, type):
+def _parse_property(name, type):
    if type.endswith("*"):
        cardinality = Cardinality.MANY
        type = type[:-1]
@@ -53,13 +64,11 @@ def _parse_field(name, type):
        type = type[:-1]
    else:
        cardinality = Cardinality.ONE
-    return Field(name, type, cardinality)
+    return Property(name, type, cardinality)


-root_class_name = "Element"
-
-
-class DirSelector:
+class _DirSelector:
+    """ Default output subdirectory selector for generated QL files, based on the `_directories` global field"""
    def __init__(self, dir_to_patterns):
        self.selector = [(re.compile(p), pathlib.Path(d)) for d, p in dir_to_patterns]
        self.selector.append((re.compile(""), pathlib.Path()))
@@ -69,8 +78,9 @@ class DirSelector:


 def load(file):
+    """ Parse the schema from `file` """
    data = yaml.load(file, Loader=yaml.SafeLoader)
-    grouper = DirSelector(data.get("_directories", {}).items())
+    grouper = _DirSelector(data.get("_directories", {}).items())
    ret = Schema(classes={cls: Class(cls, dir=grouper.get(cls)) for cls in data if not cls.startswith("_")},
                 includes=set(data.get("_includes", [])))
    assert root_class_name not in ret.classes
@@ -82,7 +92,7 @@ def load(file):
        cls = ret.classes[name]
        for k, v in info.items():
            if not k.startswith("_"):
-                cls.fields.append(_parse_field(k, v))
+                cls.properties.append(_parse_property(k, v))
            elif k == "_extends":
                if not isinstance(v, list):
                    v = [v]
--- a/swift/codegen/prefix.dbscheme
+++ b/swift/codegen/prefix.dbscheme
@@ -4,3 +4,7 @@
 sourceLocationPrefix(
  string prefix: string ref
 );
+
+answer_to_life_the_universe_and_everything(
+   int answer: int ref
+)
--- a/swift/ql/lib/swift.dbscheme
+++ b/swift/ql/lib/swift.dbscheme
@@ -8,6 +8,10 @@ sourceLocationPrefix(
  string prefix: string ref
 );

+answer_to_life_the_universe_and_everything(
+   int answer: int ref
+)
+

 // from codegen/schema.yml