Move swift/codegen to misc/codegen

2026-05-04 13:15:21 +02:00 · 2023-02-24 13:44:29 +01:00
parent 6d192cdcc1
commit cdd4e8021b
65 changed files with 116 additions and 86 deletions
--- a/misc/codegen/generators/BUILD.bazel
+++ b/misc/codegen/generators/BUILD.bazel
@@ -0,0 +1,11 @@
+load("@codegen_deps//:requirements.bzl", "requirement")
+
+py_library(
+    name = "generators",
+    srcs = glob(["*.py"]),
+    visibility = ["//misc/codegen:__subpackages__"],
+    deps = [
+        "//misc/codegen/lib",
+        "//misc/codegen/loaders",
+    ],
+)
--- a/misc/codegen/generators/init.py
+++ b/misc/codegen/generators/init.py
@@ -0,0 +1,6 @@
+from . import dbschemegen, qlgen, trapgen, cppgen
+
+
+def generate(target, opts, renderer):
+    module = globals()[f"{target}gen"]
+    module.generate(opts, renderer)
--- a/misc/codegen/generators/cppgen.py
+++ b/misc/codegen/generators/cppgen.py
@@ -0,0 +1,99 @@
+"""
+C++ trap class generation
+
+`generate(opts, renderer)` will generate `TrapClasses.h` out of a `yml` schema file.
+
+Each class in the schema gets a corresponding `struct` in `TrapClasses.h`, where:
+* inheritance is preserved
+* each property will be a corresponding field in the `struct` (with repeated properties mapping to `std::vector` and
+  optional ones to `std::optional`)
+* final classes get a streaming operator that serializes the whole class into the corresponding trap emissions (using
+  `TrapEntries.h` from `trapgen`).
+"""
+
+import functools
+import typing
+
+import inflection
+
+from misc.codegen.lib import cpp, schema
+from misc.codegen.loaders import schemaloader
+
+
+def _get_type(t: str, add_or_none_except: typing.Optional[str] = None) -> str:
+    if t is None:
+        # this is a predicate
+        return "bool"
+    if t == "string":
+        return "std::string"
+    if t == "boolean":
+        return "bool"
+    if t[0].isupper():
+        if add_or_none_except is not None and t != add_or_none_except:
+            suffix = "OrNone"
+        else:
+            suffix = ""
+        return f"TrapLabel<{t}{suffix}Tag>"
+    return t
+
+
+def _get_field(cls: schema.Class, p: schema.Property, add_or_none_except: typing.Optional[str] = None) -> cpp.Field:
+    trap_name = None
+    if not p.is_single:
+        trap_name = inflection.camelize(f"{cls.name}_{p.name}")
+        if not p.is_predicate:
+            trap_name = inflection.pluralize(trap_name)
+    args = dict(
+        field_name=p.name + ("_" if p.name in cpp.cpp_keywords else ""),
+        base_type=_get_type(p.type, add_or_none_except),
+        is_optional=p.is_optional,
+        is_repeated=p.is_repeated,
+        is_predicate=p.is_predicate,
+        trap_name=trap_name,
+    )
+    args.update(cpp.get_field_override(p.name))
+    return cpp.Field(**args)
+
+
+class Processor:
+    def __init__(self, data: schema.Schema):
+        self._classmap = data.classes
+        if data.null:
+            root_type = next(iter(data.classes))
+            self._add_or_none_except = root_type
+        else:
+            self._add_or_none_except = None
+
+    @functools.lru_cache(maxsize=None)
+    def _get_class(self, name: str) -> cpp.Class:
+        cls = self._classmap[name]
+        trap_name = None
+        if not cls.derived or any(p.is_single for p in cls.properties):
+            trap_name = inflection.pluralize(cls.name)
+        return cpp.Class(
+            name=name,
+            bases=[self._get_class(b) for b in cls.bases],
+            fields=[
+                _get_field(cls, p, self._add_or_none_except)
+                for p in cls.properties if "cpp_skip" not in p.pragmas
+            ],
+            final=not cls.derived,
+            trap_name=trap_name,
+        )
+
+    def get_classes(self):
+        ret = {'': []}
+        for k, cls in self._classmap.items():
+            if not cls.ipa:
+                ret.setdefault(cls.group, []).append(self._get_class(cls.name))
+        return ret
+
+
+def generate(opts, renderer):
+    assert opts.cpp_output
+    processor = Processor(schemaloader.load_file(opts.schema))
+    out = opts.cpp_output
+    for dir, classes in processor.get_classes().items():
+        renderer.render(cpp.ClassList(classes, opts.schema,
+                                      include_parent=bool(dir),
+                                      trap_library=opts.trap_library), out / dir / "TrapClasses")
--- a/misc/codegen/generators/dbschemegen.py
+++ b/misc/codegen/generators/dbschemegen.py
@@ -0,0 +1,132 @@
+"""
+dbscheme file generation
+
+`generate(opts, renderer)` will generate a `dbscheme` file out of a `yml` schema file.
+
+Each final class in the schema file will get a corresponding defining DB table with the id and single properties as
+columns.
+Moreover:
+* single properties in non-final classes will also trigger generation of a table with an id reference and all single
+  properties as columns
+* each optional property will trigger generation of a table with an id reference and the property value as columns
+* each repeated property will trigger generation of a table with an id reference, an `int` index and the property value
+  as columns
+The type hierarchy will be translated to corresponding `union` declarations.
+"""
+import typing
+
+import inflection
+
+from misc.codegen.lib import schema
+from misc.codegen.loaders import schemaloader
+from misc.codegen.lib.dbscheme import *
+
+log = logging.getLogger(__name__)
+
+
+def dbtype(typename: str, add_or_none_except: typing.Optional[str] = None) -> str:
+    """ translate a type to a dbscheme counterpart, using `@lower_underscore` format for classes.
+    For class types, appends an underscore followed by `null` if provided
+    """
+    if typename[0].isupper():
+        underscored = inflection.underscore(typename)
+        if add_or_none_except is not None and typename != add_or_none_except:
+            suffix = "_or_none"
+        else:
+            suffix = ""
+        return f"@{underscored}{suffix}"
+    return typename
+
+
+def cls_to_dbscheme(cls: schema.Class, lookup: typing.Dict[str, schema.Class], add_or_none_except: typing.Optional[str] = None):
+    """ Yield all dbscheme entities needed to model class `cls` """
+    if cls.ipa:
+        return
+    if cls.derived:
+        yield Union(dbtype(cls.name), (dbtype(c) for c in cls.derived if not lookup[c].ipa))
+    dir = pathlib.Path(cls.group) if cls.group else None
+    # output a table specific to a class only if it is a leaf class or it has 1-to-1 properties
+    # Leaf classes need a table to bind the `@` ids
+    # 1-to-1 properties are added to a class specific table
+    # in other cases, separate tables are used for the properties, and a class specific table is unneeded
+    if not cls.derived or any(f.is_single for f in cls.properties):
+        binding = not cls.derived
+        keyset = KeySet(["id"]) if cls.derived else None
+        yield Table(
+            keyset=keyset,
+            name=inflection.tableize(cls.name),
+            columns=[
+                Column("id", type=dbtype(cls.name), binding=binding),
+            ] + [
+                Column(f.name, dbtype(f.type, add_or_none_except)) for f in cls.properties if f.is_single
+            ],
+            dir=dir,
+        )
+    # use property-specific tables for 1-to-many and 1-to-at-most-1 properties
+    for f in cls.properties:
+        if f.is_repeated:
+            yield Table(
+                keyset=KeySet(["id", "index"]),
+                name=inflection.tableize(f"{cls.name}_{f.name}"),
+                columns=[
+                    Column("id", type=dbtype(cls.name)),
+                    Column("index", type="int"),
+                    Column(inflection.singularize(f.name), dbtype(f.type, add_or_none_except)),
+                ],
+                dir=dir,
+            )
+        elif f.is_optional:
+            yield Table(
+                keyset=KeySet(["id"]),
+                name=inflection.tableize(f"{cls.name}_{f.name}"),
+                columns=[
+                    Column("id", type=dbtype(cls.name)),
+                    Column(f.name, dbtype(f.type, add_or_none_except)),
+                ],
+                dir=dir,
+            )
+        elif f.is_predicate:
+            yield Table(
+                keyset=KeySet(["id"]),
+                name=inflection.underscore(f"{cls.name}_{f.name}"),
+                columns=[
+                    Column("id", type=dbtype(cls.name)),
+                ],
+                dir=dir,
+            )
+
+
+def get_declarations(data: schema.Schema):
+    add_or_none_except = data.root_class.name if data.null else None
+    declarations = [d for cls in data.classes.values() for d in cls_to_dbscheme(cls, data.classes, add_or_none_except)]
+    if data.null:
+        property_classes = {
+            prop.type for cls in data.classes.values() for prop in cls.properties
+            if cls.name != data.null and prop.type and prop.type[0].isupper()
+        }
+        declarations += [
+            Union(dbtype(t, data.null), [dbtype(t), dbtype(data.null)]) for t in sorted(property_classes)
+        ]
+    return declarations
+
+
+def get_includes(data: schema.Schema, include_dir: pathlib.Path, root_dir: pathlib.Path):
+    includes = []
+    for inc in data.includes:
+        inc = include_dir / inc
+        with open(inc) as inclusion:
+            includes.append(SchemeInclude(src=inc.relative_to(root_dir), data=inclusion.read()))
+    return includes
+
+
+def generate(opts, renderer):
+    input = opts.schema
+    out = opts.dbscheme
+
+    data = schemaloader.load_file(input)
+
+    dbscheme = Scheme(src=input.name,
+                      includes=get_includes(data, include_dir=input.parent, root_dir=input.parent),
+                      declarations=get_declarations(data))
+
+    renderer.render(dbscheme, out)
--- a/misc/codegen/generators/qlgen.py
+++ b/misc/codegen/generators/qlgen.py
@@ -0,0 +1,427 @@
+"""
+QL code generation
+
+`generate(opts, renderer)` will generate in the library directory:
+ * generated/Raw.qll with thin class wrappers around DB types
+ * generated/Synth.qll with the base algebraic datatypes for AST entities
+ * generated/<group>/<Class>.qll with generated properties for each class
+ * if not already modified, a elements/<group>/<Class>.qll stub to customize the above classes
+ * elements.qll importing all the above stubs
+ * if not already modified, a elements/<group>/<Class>Constructor.qll stub to customize the algebraic datatype
+   characteristic predicate
+ * generated/SynthConstructors.qll importing all the above constructor stubs
+ * generated/PureSynthConstructors.qll importing constructor stubs for pure synthesized types (that is, not
+   corresponding to raw types)
+Moreover in the test directory for each <Class> in <group> it will generate beneath the
+extractor-tests/generated/<group>/<Class> directory either
+ * a `MISSING_SOURCE.txt` explanation file if no source is present, or
+ * one `<Class>.ql` test query for all single properties and on `<Class>_<property>.ql` test query for each optional or
+   repeated property
+"""
+# TODO this should probably be split in different generators now: ql, qltest, maybe qlsynth
+
+import logging
+import pathlib
+import re
+import subprocess
+import typing
+import itertools
+
+import inflection
+
+from misc.codegen.lib import schema, ql
+from misc.codegen.loaders import schemaloader
+
+log = logging.getLogger(__name__)
+
+
+class Error(Exception):
+    def __str__(self):
+        return self.args[0]
+
+
+class FormatError(Error):
+    pass
+
+
+class RootElementHasChildren(Error):
+    pass
+
+
+class NoClasses(Error):
+    pass
+
+
+abbreviations = {
+    "expr": "expression",
+    "arg": "argument",
+    "stmt": "statement",
+    "decl": "declaration",
+    "repr": "representation",
+    "param": "parameter",
+    "int": "integer",
+    "var": "variable",
+    "ref": "reference",
+}
+
+abbreviations.update({f"{k}s": f"{v}s" for k, v in abbreviations.items()})
+
+_abbreviations_re = re.compile("|".join(fr"\b{abbr}\b" for abbr in abbreviations))
+
+
+def _humanize(s: str) -> str:
+    ret = inflection.humanize(s)
+    ret = ret[0].lower() + ret[1:]
+    ret = _abbreviations_re.sub(lambda m: abbreviations[m[0]], ret)
+    return ret
+
+
+_format_re = re.compile(r"\{(\w+)\}")
+
+
+def _get_doc(cls: schema.Class, prop: schema.Property, plural=None):
+    if prop.doc:
+        if plural is None:
+            # for consistency, ignore format in non repeated properties
+            return _format_re.sub(lambda m: m[1], prop.doc)
+        format = prop.doc
+        nouns = [m[1] for m in _format_re.finditer(prop.doc)]
+        if not nouns:
+            noun, _, rest = prop.doc.partition(" ")
+            format = f"{{{noun}}} {rest}"
+            nouns = [noun]
+        transform = inflection.pluralize if plural else inflection.singularize
+        return format.format(**{noun: transform(noun) for noun in nouns})
+
+    prop_name = _humanize(prop.name)
+    class_name = cls.default_doc_name or _humanize(inflection.underscore(cls.name))
+    if prop.is_predicate:
+        return f"this {class_name} {prop_name}"
+    if plural is not None:
+        prop_name = inflection.pluralize(prop_name) if plural else inflection.singularize(prop_name)
+    return f"{prop_name} of this {class_name}"
+
+
+def get_ql_property(cls: schema.Class, prop: schema.Property, prev_child: str = "") -> ql.Property:
+    args = dict(
+        type=prop.type if not prop.is_predicate else "predicate",
+        qltest_skip="qltest_skip" in prop.pragmas,
+        prev_child=prev_child if prop.is_child else None,
+        is_optional=prop.is_optional,
+        is_predicate=prop.is_predicate,
+        description=prop.description
+    )
+    if prop.is_single:
+        args.update(
+            singular=inflection.camelize(prop.name),
+            tablename=inflection.tableize(cls.name),
+            tableparams=["this"] + ["result" if p is prop else "_" for p in cls.properties if p.is_single],
+            doc=_get_doc(cls, prop),
+        )
+    elif prop.is_repeated:
+        args.update(
+            singular=inflection.singularize(inflection.camelize(prop.name)),
+            plural=inflection.pluralize(inflection.camelize(prop.name)),
+            tablename=inflection.tableize(f"{cls.name}_{prop.name}"),
+            tableparams=["this", "index", "result"],
+            doc=_get_doc(cls, prop, plural=False),
+            doc_plural=_get_doc(cls, prop, plural=True),
+        )
+    elif prop.is_optional:
+        args.update(
+            singular=inflection.camelize(prop.name),
+            tablename=inflection.tableize(f"{cls.name}_{prop.name}"),
+            tableparams=["this", "result"],
+            doc=_get_doc(cls, prop),
+        )
+    elif prop.is_predicate:
+        args.update(
+            singular=inflection.camelize(prop.name, uppercase_first_letter=False),
+            tablename=inflection.underscore(f"{cls.name}_{prop.name}"),
+            tableparams=["this"],
+            doc=_get_doc(cls, prop),
+        )
+    else:
+        raise ValueError(f"unknown property kind for {prop.name} from {cls.name}")
+    return ql.Property(**args)
+
+
+def get_ql_class(cls: schema.Class) -> ql.Class:
+    pragmas = {k: True for k in cls.pragmas if k.startswith("ql")}
+    prev_child = ""
+    properties = []
+    for p in cls.properties:
+        prop = get_ql_property(cls, p, prev_child)
+        if prop.is_child:
+            prev_child = prop.singular
+        properties.append(prop)
+    return ql.Class(
+        name=cls.name,
+        bases=cls.bases,
+        final=not cls.derived,
+        properties=properties,
+        dir=pathlib.Path(cls.group or ""),
+        ipa=bool(cls.ipa),
+        doc=cls.doc,
+        **pragmas,
+    )
+
+
+def _to_db_type(x: str) -> str:
+    if x[0].isupper():
+        return "Raw::" + x
+    return x
+
+
+_final_db_class_lookup = {}
+
+
+def get_ql_ipa_class_db(name: str) -> ql.Synth.FinalClassDb:
+    return _final_db_class_lookup.setdefault(name, ql.Synth.FinalClassDb(name=name,
+                                                                         params=[
+                                                                             ql.Synth.Param("id", _to_db_type(name))]))
+
+
+def get_ql_ipa_class(cls: schema.Class):
+    if cls.derived:
+        return ql.Synth.NonFinalClass(name=cls.name, derived=sorted(cls.derived),
+                                      root=not cls.bases)
+    if cls.ipa and cls.ipa.from_class is not None:
+        source = cls.ipa.from_class
+        get_ql_ipa_class_db(source).subtract_type(cls.name)
+        return ql.Synth.FinalClassDerivedIpa(name=cls.name,
+                                             params=[ql.Synth.Param("id", _to_db_type(source))])
+    if cls.ipa and cls.ipa.on_arguments is not None:
+        return ql.Synth.FinalClassFreshIpa(name=cls.name,
+                                           params=[ql.Synth.Param(k, _to_db_type(v))
+                                                   for k, v in cls.ipa.on_arguments.items()])
+    return get_ql_ipa_class_db(cls.name)
+
+
+def get_import(file: pathlib.Path, root_dir: pathlib.Path):
+    stem = file.relative_to(root_dir / "ql/lib").with_suffix("")
+    return str(stem).replace("/", ".")
+
+
+def get_types_used_by(cls: ql.Class) -> typing.Iterable[str]:
+    for b in cls.bases:
+        yield b.base
+    for p in cls.properties:
+        yield p.type
+
+
+def get_classes_used_by(cls: ql.Class) -> typing.List[str]:
+    return sorted(set(t for t in get_types_used_by(cls) if t[0].isupper() and t != cls.name))
+
+
+def format(codeql, files):
+    ql_files = [str(f) for f in files if f.suffix in (".qll", ".ql")]
+    if not ql_files:
+        return
+    format_cmd = [codeql, "query", "format", "--in-place", "--"] + ql_files
+    res = subprocess.run(format_cmd, stderr=subprocess.PIPE, text=True)
+    if res.returncode:
+        for line in res.stderr.splitlines():
+            log.error(line.strip())
+        raise FormatError("QL format failed")
+    for line in res.stderr.splitlines():
+        log.debug(line.strip())
+
+
+def _get_path(cls: schema.Class) -> pathlib.Path:
+    return pathlib.Path(cls.group or "", cls.name).with_suffix(".qll")
+
+
+def _get_all_properties(cls: schema.Class, lookup: typing.Dict[str, schema.Class],
+                        already_seen: typing.Optional[typing.Set[int]] = None) -> \
+        typing.Iterable[typing.Tuple[schema.Class, schema.Property]]:
+    # deduplicate using ids
+    if already_seen is None:
+        already_seen = set()
+    for b in sorted(cls.bases):
+        base = lookup[b]
+        for item in _get_all_properties(base, lookup, already_seen):
+            yield item
+    for p in cls.properties:
+        if id(p) not in already_seen:
+            already_seen.add(id(p))
+            yield cls, p
+
+
+def _get_all_properties_to_be_tested(cls: schema.Class, lookup: typing.Dict[str, schema.Class]) -> \
+        typing.Iterable[ql.PropertyForTest]:
+    for c, p in _get_all_properties(cls, lookup):
+        if not ("qltest_skip" in c.pragmas or "qltest_skip" in p.pragmas):
+            # TODO here operations are duplicated, but should be better if we split ql and qltest generation
+            p = get_ql_property(c, p)
+            yield ql.PropertyForTest(p.getter, is_total=p.is_single or p.is_predicate,
+                                     type=p.type if not p.is_predicate else None, is_repeated=p.is_repeated)
+            if p.is_repeated and not p.is_optional:
+                yield ql.PropertyForTest(f"getNumberOf{p.plural}", type="int")
+            elif p.is_optional and not p.is_repeated:
+                yield ql.PropertyForTest(f"has{p.singular}")
+
+
+def _partition_iter(x, pred):
+    x1, x2 = itertools.tee(x)
+    return filter(pred, x1), itertools.filterfalse(pred, x2)
+
+
+def _partition(l, pred):
+    """ partitions a list according to boolean predicate """
+    return map(list, _partition_iter(l, pred))
+
+
+def _is_in_qltest_collapsed_hierarchy(cls: schema.Class, lookup: typing.Dict[str, schema.Class]):
+    return "qltest_collapse_hierarchy" in cls.pragmas or _is_under_qltest_collapsed_hierarchy(cls, lookup)
+
+
+def _is_under_qltest_collapsed_hierarchy(cls: schema.Class, lookup: typing.Dict[str, schema.Class]):
+    return "qltest_uncollapse_hierarchy" not in cls.pragmas and any(
+        _is_in_qltest_collapsed_hierarchy(lookup[b], lookup) for b in cls.bases)
+
+
+def _should_skip_qltest(cls: schema.Class, lookup: typing.Dict[str, schema.Class]):
+    return "qltest_skip" in cls.pragmas or not (
+        cls.final or "qltest_collapse_hierarchy" in cls.pragmas) or _is_under_qltest_collapsed_hierarchy(
+        cls, lookup)
+
+
+def _get_stub(cls: schema.Class, base_import: str, generated_import_prefix: str) -> ql.Stub:
+    if isinstance(cls.ipa, schema.IpaInfo):
+        if cls.ipa.from_class is not None:
+            accessors = [
+                ql.IpaUnderlyingAccessor(
+                    argument="Entity",
+                    type=_to_db_type(cls.ipa.from_class),
+                    constructorparams=["result"]
+                )
+            ]
+        elif cls.ipa.on_arguments is not None:
+            accessors = [
+                ql.IpaUnderlyingAccessor(
+                    argument=inflection.camelize(arg),
+                    type=_to_db_type(type),
+                    constructorparams=["result" if a == arg else "_" for a in cls.ipa.on_arguments]
+                ) for arg, type in cls.ipa.on_arguments.items()
+            ]
+    else:
+        accessors = []
+    return ql.Stub(name=cls.name, base_import=base_import, import_prefix=generated_import_prefix, ipa_accessors=accessors)
+
+
+def generate(opts, renderer):
+    input = opts.schema
+    out = opts.ql_output
+    stub_out = opts.ql_stub_output
+    test_out = opts.ql_test_output
+    missing_test_source_filename = "MISSING_SOURCE.txt"
+    include_file = stub_out.with_suffix(".qll")
+
+    generated = {q for q in out.rglob("*.qll")}
+    generated.add(include_file)
+    generated.update(q for q in test_out.rglob("*.ql"))
+    generated.update(q for q in test_out.rglob(missing_test_source_filename))
+
+    stubs = {q for q in stub_out.rglob("*.qll")}
+
+    data = schemaloader.load_file(input)
+
+    classes = {name: get_ql_class(cls) for name, cls in data.classes.items()}
+    if not classes:
+        raise NoClasses
+    root = next(iter(classes.values()))
+    if root.has_children:
+        raise RootElementHasChildren(root)
+
+    imports = {}
+    generated_import_prefix = get_import(out, opts.root_dir)
+
+    with renderer.manage(generated=generated, stubs=stubs, registry=opts.generated_registry,
+                         force=opts.force) as renderer:
+
+        db_classes = [cls for cls in classes.values() if not cls.ipa]
+        renderer.render(ql.DbClasses(db_classes), out / "Raw.qll")
+
+        classes_by_dir_and_name = sorted(classes.values(), key=lambda cls: (cls.dir, cls.name))
+        for c in classes_by_dir_and_name:
+            imports[c.name] = get_import(stub_out / c.path, opts.root_dir)
+
+        for c in classes.values():
+            qll = out / c.path.with_suffix(".qll")
+            c.imports = [imports[t] for t in get_classes_used_by(c)]
+            c.import_prefix = generated_import_prefix
+            renderer.render(c, qll)
+
+        for c in data.classes.values():
+            path = _get_path(c)
+            stub_file = stub_out / path
+            if not renderer.is_customized_stub(stub_file):
+                base_import = get_import(out / path, opts.root_dir)
+                renderer.render(_get_stub(c, base_import, generated_import_prefix), stub_file)
+
+        # for example path/to/elements -> path/to/elements.qll
+        renderer.render(ql.ImportList([i for name, i in imports.items() if not classes[name].ql_internal]),
+                        include_file)
+
+        elements_module = get_import(include_file, opts.root_dir)
+
+        renderer.render(
+            ql.GetParentImplementation(
+                classes=list(classes.values()),
+                imports=[elements_module] + [i for name, i in imports.items() if classes[name].ql_internal],
+            ),
+            out / 'ParentChild.qll')
+
+        for c in data.classes.values():
+            if _should_skip_qltest(c, data.classes):
+                continue
+            test_dir = test_out / c.group / c.name
+            test_dir.mkdir(parents=True, exist_ok=True)
+            if all(f.suffix in (".txt", ".ql", ".actual", ".expected") for f in test_dir.glob("*.*")):
+                log.warning(f"no test source in {test_dir.relative_to(test_out)}")
+                renderer.render(ql.MissingTestInstructions(),
+                                test_dir / missing_test_source_filename)
+                continue
+            total_props, partial_props = _partition(_get_all_properties_to_be_tested(c, data.classes),
+                                                    lambda p: p.is_total)
+            renderer.render(ql.ClassTester(class_name=c.name,
+                                           properties=total_props,
+                                           elements_module=elements_module,
+                                           # in case of collapsed hierarchies we want to see the actual QL class in results
+                                           show_ql_class="qltest_collapse_hierarchy" in c.pragmas),
+                            test_dir / f"{c.name}.ql")
+            for p in partial_props:
+                renderer.render(ql.PropertyTester(class_name=c.name,
+                                                  elements_module=elements_module,
+                                                  property=p), test_dir / f"{c.name}_{p.getter}.ql")
+
+        final_ipa_types = []
+        non_final_ipa_types = []
+        constructor_imports = []
+        ipa_constructor_imports = []
+        stubs = {}
+        for cls in sorted(data.classes.values(), key=lambda cls: (cls.group, cls.name)):
+            ipa_type = get_ql_ipa_class(cls)
+            if ipa_type.is_final:
+                final_ipa_types.append(ipa_type)
+                if ipa_type.has_params:
+                    stub_file = stub_out / cls.group / f"{cls.name}Constructor.qll"
+                    if not renderer.is_customized_stub(stub_file):
+                        # stub rendering must be postponed as we might not have yet all subtracted ipa types in `ipa_type`
+                        stubs[stub_file] = ql.Synth.ConstructorStub(ipa_type, import_prefix=generated_import_prefix)
+                    constructor_import = get_import(stub_file, opts.root_dir)
+                    constructor_imports.append(constructor_import)
+                    if ipa_type.is_ipa:
+                        ipa_constructor_imports.append(constructor_import)
+            else:
+                non_final_ipa_types.append(ipa_type)
+
+        for stub_file, data in stubs.items():
+            renderer.render(data, stub_file)
+        renderer.render(ql.Synth.Types(root.name, generated_import_prefix,
+                        final_ipa_types, non_final_ipa_types), out / "Synth.qll")
+        renderer.render(ql.ImportList(constructor_imports), out / "SynthConstructors.qll")
+        renderer.render(ql.ImportList(ipa_constructor_imports), out / "PureSynthConstructors.qll")
+        if opts.ql_format:
+            format(opts.codeql_binary, renderer.written)
--- a/misc/codegen/generators/trapgen.py
+++ b/misc/codegen/generators/trapgen.py
@@ -0,0 +1,98 @@
+"""
+C++ trap entry generation
+
+`generate(opts, renderer)` will generate `TrapTags.h` (for types of labels) and `TrapEntries.h` (for trap emission) out
+of a dbscheme file.
+
+Each table in the `dbscheme` gets a corresponding `struct` defined in `TrapEntries.h` with a field for each column and
+an appropriate streaming operator for the trap emission.
+
+Unions in the `dbscheme` are used to populate a hierarchy of tags (empty structs) in `TrapTags.h` that is used to
+enforce a type system on trap labels (see `TrapLabel.h`).
+"""
+
+import logging
+import pathlib
+
+import inflection
+from toposort import toposort_flatten
+
+from misc.codegen.lib import dbscheme, cpp
+from misc.codegen.loaders import dbschemeloader
+
+log = logging.getLogger(__name__)
+
+
+def get_tag_name(s):
+    assert s.startswith("@")
+    return inflection.camelize(s[1:])
+
+
+def get_cpp_type(schema_type: str):
+    if schema_type.startswith("@"):
+        tag = get_tag_name(schema_type)
+        return f"TrapLabel<{tag}Tag>"
+    if schema_type == "string":
+        return "std::string"
+    if schema_type == "boolean":
+        return "bool"
+    return schema_type
+
+
+def get_field(c: dbscheme.Column):
+    args = {
+        "field_name": c.schema_name,
+        "base_type": c.type,
+    }
+    args.update(cpp.get_field_override(c.schema_name))
+    args["base_type"] = get_cpp_type(args["base_type"])
+    return cpp.Field(**args)
+
+
+def get_binding_column(t: dbscheme.Table):
+    try:
+        return next(c for c in t.columns if c.binding)
+    except StopIteration:
+        return None
+
+
+def get_trap(t: dbscheme.Table):
+    id = get_binding_column(t)
+    if id:
+        id = get_field(id)
+    return cpp.Trap(
+        table_name=t.name,
+        name=inflection.camelize(t.name),
+        fields=[get_field(c) for c in t.columns],
+        id=id,
+    )
+
+
+def generate(opts, renderer):
+    assert opts.cpp_output
+    tag_graph = {}
+    out = opts.cpp_output
+    trap_library = opts.trap_library
+
+    traps = {pathlib.Path(): []}
+    for e in dbschemeloader.iterload(opts.dbscheme):
+        if e.is_table:
+            traps.setdefault(e.dir, []).append(get_trap(e))
+        elif e.is_union:
+            tag_graph.setdefault(e.lhs, set())
+            for d in e.rhs:
+                tag_graph.setdefault(d.type, set()).add(e.lhs)
+
+    for dir, entries in traps.items():
+        dir = dir or pathlib.Path()
+        relative_gen_dir = pathlib.Path(*[".." for _ in dir.parents])
+        renderer.render(cpp.TrapList(entries, opts.dbscheme, trap_library, relative_gen_dir), out / dir / "TrapEntries")
+
+    tags = []
+    for tag in toposort_flatten(tag_graph):
+        tags.append(cpp.Tag(
+            name=get_tag_name(tag),
+            bases=[get_tag_name(b) for b in sorted(tag_graph[tag])],
+            id=tag,
+        ))
+    renderer.render(cpp.TagList(tags, opts.dbscheme), out / "TrapTags")