diff --git a/swift/codegen/README.md b/swift/codegen/README.md new file mode 100644 index 00000000000..d7efe304de7 --- /dev/null +++ b/swift/codegen/README.md @@ -0,0 +1,37 @@ +# Code generation suite + +This directory contains the code generation suite used by the Swift extractor and the QL library. This suite will use +the abstract class specification of [`schema.yml`](schema.yml) to generate: + +* [the `dbscheme` file](../ql/lib/swift.dbscheme) (see [`dbschemegen.py`](generators/dbschemegen.py)) +* [the QL generated code](../ql/lib/codeql/swift/generated) and when + appropriate [the corresponding stubs](../ql/lib/codeql/swift/elements) (see [`qlgen.py`](generators/qlgen.py)) +* C++ tags and trap entries (see [`trapgen.py`](generators/trapgen.py)) +* C++ structured classes (see [`cppgen.py`](generators/cppgen.py)) + +## Usage + +By default `bazel run //swift/codegen` will update all checked-in generated files (`dbscheme` and QL sources). You can +append `--` followed by other options to tweak the behaviour, which is mainly intended for debugging. +See `bazel run //swift/codegen -- --help` for a list of all options. In particular `--generate` can be used with a comma +separated list to select what to generate (choosing among `dbscheme`, `ql`, `trap` and `cpp`). + +C++ code is generated during build (see [`swift/extractor/trap/BUILD.bazel`](../extractor/trap/BUILD.bazel)). After a +build you can browse the generated code in `bazel-bin/swift/extractor/trap/generated`. + +## Implementation notes + +The suite uses [mustache templating](https://mustache.github.io/) for generation. Templates are +in [the `templates` directory](templates), prefixed with the generation target they are used for. + +Rather than passing dictionaries to the templating engine, python dataclasses are used as defined +in [the `lib` directory](lib). For each of the four generation targets the entry point for the implementation is +specified as the `generate` function in the modules within [the `generators` directory](generators). + +Finally, [`codegen.py`](codegen.py) is the driver script gluing everything together and specifying the command line +options. + +Unit tests are in [the `test` directory](test) and can be run via `bazel test //swift/codegen/test`. + +For more details about each specific generation target, please refer to the module docstrings +in [the `generators` directory](generators). diff --git a/swift/codegen/codegen.py b/swift/codegen/codegen.py index 9b3f21eaf11..c9a812b7d80 100755 --- a/swift/codegen/codegen.py +++ b/swift/codegen/codegen.py @@ -14,21 +14,29 @@ from swift.codegen.generators import generate def _parse_args() -> argparse.Namespace: - p = argparse.ArgumentParser() - p.add_argument("--generate", type=lambda x: x.split(","), default=["dbscheme", "ql"]) - p.add_argument("--verbose", "-v", action="store_true") - p.add_argument("--swift-dir", type=_abspath, default=paths.swift_dir) - p.add_argument("--schema", type=_abspath, default=paths.swift_dir / "codegen/schema.yml") - p.add_argument("--dbscheme", type=_abspath, default=paths.swift_dir / "ql/lib/swift.dbscheme") - p.add_argument("--ql-output", type=_abspath, default=paths.swift_dir / "ql/lib/codeql/swift/generated") - p.add_argument("--ql-stub-output", type=_abspath, default=paths.swift_dir / "ql/lib/codeql/swift/elements") - p.add_argument("--ql-format", action="store_true", default=True) - p.add_argument("--no-ql-format", action="store_false", dest="ql_format") - p.add_argument("--codeql-binary", default="codeql") - p.add_argument("--cpp-output", type=_abspath) - p.add_argument("--cpp-namespace", default="codeql") - p.add_argument("--trap-affix", default="Trap") - p.add_argument("--cpp-include-dir", default="swift/extractor/trap") + p = argparse.ArgumentParser(description="Code generation suite") + p.add_argument("--generate", type=lambda x: x.split(","), default=["dbscheme", "ql"], + help="specify what targets to generate as a comma separated list, choosing among dbscheme, ql, trap " + "and cpp") + p.add_argument("--verbose", "-v", action="store_true", help="print more information") + p.add_argument("--swift-dir", type=_abspath, default=paths.swift_dir, + help="the directory that should be regarded as the root of the swift codebase. Used to compute QL " + "imports and in some comments (default %(default)s)") + p.add_argument("--schema", type=_abspath, default=paths.swift_dir / "codegen/schema.yml", + help="input schema file (default %(default)s)") + p.add_argument("--dbscheme", type=_abspath, default=paths.swift_dir / "ql/lib/swift.dbscheme", + help="output file for dbscheme generation, input file for trap generation (default %(default)s)") + p.add_argument("--ql-output", type=_abspath, default=paths.swift_dir / "ql/lib/codeql/swift/generated", + help="output directory for generated QL files (default %(default)s)") + p.add_argument("--ql-stub-output", type=_abspath, default=paths.swift_dir / "ql/lib/codeql/swift/elements", + help="output directory for QL stub/customization files (default %(default)s). Defines also the " + "generated qll file importing every class file") + p.add_argument("--ql-format", action="store_true", default=True, + help="use codeql to autoformat QL files (which is the default default)") + p.add_argument("--no-ql-format", action="store_false", dest="ql_format", help="do not format QL files") + p.add_argument("--codeql-binary", default="codeql", help="command to use for QL formatting (default %(default)s)") + p.add_argument("--cpp-output", type=_abspath, + help="output directory for generated C++ files, required if trap or cpp is provided to --generate") return p.parse_args() diff --git a/swift/codegen/generators/cppgen.py b/swift/codegen/generators/cppgen.py index 629c971523d..5368383748f 100644 --- a/swift/codegen/generators/cppgen.py +++ b/swift/codegen/generators/cppgen.py @@ -1,3 +1,16 @@ +""" +C++ trap class generation + +`generate(opts, renderer)` will generate `TrapClasses.h` out of a `yml` schema file. + +Each class in the schema gets a corresponding `struct` in `TrapClasses.h`, where: +* inheritance is preserved +* each property will be a corresponding field in the `struct` (with repeated properties mapping to `std::vector` and + optional ones to `std::optional`) +* final classes get a streaming operator that serializes the whole class into the corresponding trap emissions (using + `TrapEntries.h` from `trapgen`). +""" + import functools from typing import Dict @@ -7,7 +20,7 @@ from toposort import toposort_flatten from swift.codegen.lib import cpp, schema -def _get_type(t: str, trap_affix: str) -> str: +def _get_type(t: str) -> str: if t is None: # this is a predicate return "bool" @@ -16,11 +29,11 @@ def _get_type(t: str, trap_affix: str) -> str: if t == "boolean": return "bool" if t[0].isupper(): - return f"{trap_affix}Label<{t}Tag>" + return f"TrapLabel<{t}Tag>" return t -def _get_field(cls: schema.Class, p: schema.Property, trap_affix: str) -> cpp.Field: +def _get_field(cls: schema.Class, p: schema.Property) -> cpp.Field: trap_name = None if not p.is_single: trap_name = inflection.camelize(f"{cls.name}_{p.name}") @@ -28,7 +41,7 @@ def _get_field(cls: schema.Class, p: schema.Property, trap_affix: str) -> cpp.Fi trap_name = inflection.pluralize(trap_name) args = dict( field_name=p.name + ("_" if p.name in cpp.cpp_keywords else ""), - type=_get_type(p.type, trap_affix), + type=_get_type(p.type), is_optional=p.is_optional, is_repeated=p.is_repeated, is_predicate=p.is_predicate, @@ -39,9 +52,8 @@ def _get_field(cls: schema.Class, p: schema.Property, trap_affix: str) -> cpp.Fi class Processor: - def __init__(self, data: Dict[str, schema.Class], trap_affix: str): + def __init__(self, data: Dict[str, schema.Class]): self._classmap = data - self._trap_affix = trap_affix @functools.lru_cache(maxsize=None) def _get_class(self, name: str) -> cpp.Class: @@ -52,7 +64,7 @@ class Processor: return cpp.Class( name=name, bases=[self._get_class(b) for b in cls.bases], - fields=[_get_field(cls, p, self._trap_affix) for p in cls.properties], + fields=[_get_field(cls, p) for p in cls.properties], final=not cls.derived, trap_name=trap_name, ) @@ -64,7 +76,6 @@ class Processor: def generate(opts, renderer): assert opts.cpp_output - processor = Processor({cls.name: cls for cls in schema.load(opts.schema).classes}, opts.trap_affix) + processor = Processor({cls.name: cls for cls in schema.load(opts.schema).classes}) out = opts.cpp_output - renderer.render(cpp.ClassList(processor.get_classes(), opts.cpp_namespace, opts.trap_affix, - opts.cpp_include_dir, opts.schema), out / f"{opts.trap_affix}Classes.h") + renderer.render(cpp.ClassList(processor.get_classes(), opts.schema), out / f"TrapClasses.h") diff --git a/swift/codegen/generators/dbschemegen.py b/swift/codegen/generators/dbschemegen.py index 0408be55dc2..650c5a7e40b 100755 --- a/swift/codegen/generators/dbschemegen.py +++ b/swift/codegen/generators/dbschemegen.py @@ -1,4 +1,19 @@ -#!/usr/bin/env python3 +""" +dbscheme file generation + +`generate(opts, renderer)` will generate a `dbscheme` file out of a `yml` schema file. + +Each final class in the schema file will get a corresponding defining DB table with the id and single properties as +columns. +Moreover: +* single properties in non-final classes will also trigger generation of a table with an id reference and all single + properties as columns +* each optional property will trigger generation of a table with an id reference and the property value as columns +* each repeated property will trigger generation of a table with an id reference, an `int` index and the property value + as columns +The type hierarchy will be translated to corresponding `union` declarations. +""" + import pathlib import inflection @@ -63,11 +78,10 @@ def cls_to_dbscheme(cls: schema.Class): name=inflection.underscore(f"{cls.name}_{f.name}"), columns=[ Column("id", type=dbtype(cls.name)), - ], + ], ) - def get_declarations(data: schema.Schema): return [d for cls in data.classes for d in cls_to_dbscheme(cls)] diff --git a/swift/codegen/generators/trapgen.py b/swift/codegen/generators/trapgen.py index f3da3be7cfe..38ca3986905 100755 --- a/swift/codegen/generators/trapgen.py +++ b/swift/codegen/generators/trapgen.py @@ -1,4 +1,15 @@ -#!/usr/bin/env python3 +""" +C++ trap entry generation + +`generate(opts, renderer)` will generate `TrapTags.h` (for types of labels) and `TrapEntries.h` (for trap emission) out +of a dbscheme file. + +Each table in the `dbscheme` gets a corresponding `struct` defined in `TrapEntries.h` with a field for each column and +an appropriate streaming operator for the trap emission. + +Unions in the `dbscheme` are used to populate a hierarchy of tags (empty structs) in `TrapTags.h` that is used to +enforce a type system on trap labels (see `TrapLabel.h`). +""" import logging @@ -15,10 +26,10 @@ def get_tag_name(s): return inflection.camelize(s[1:]) -def get_cpp_type(schema_type: str, trap_affix: str): +def get_cpp_type(schema_type: str): if schema_type.startswith("@"): tag = get_tag_name(schema_type) - return f"{trap_affix}Label<{tag}Tag>" + return f"TrapLabel<{tag}Tag>" if schema_type == "string": return "std::string" if schema_type == "boolean": @@ -26,13 +37,13 @@ def get_cpp_type(schema_type: str, trap_affix: str): return schema_type -def get_field(c: dbscheme.Column, trap_affix: str): +def get_field(c: dbscheme.Column): args = { "field_name": c.schema_name, "type": c.type, } args.update(cpp.get_field_override(c.schema_name)) - args["type"] = get_cpp_type(args["type"], trap_affix) + args["type"] = get_cpp_type(args["type"]) return cpp.Field(**args) @@ -43,14 +54,14 @@ def get_binding_column(t: dbscheme.Table): return None -def get_trap(t: dbscheme.Table, trap_affix: str): +def get_trap(t: dbscheme.Table): id = get_binding_column(t) if id: - id = get_field(id, trap_affix) + id = get_field(id) return cpp.Trap( table_name=t.name, name=inflection.camelize(t.name), - fields=[get_field(c, trap_affix) for c in t.columns], + fields=[get_field(c) for c in t.columns], id=id, ) @@ -63,14 +74,14 @@ def generate(opts, renderer): traps = [] for e in dbscheme.iterload(opts.dbscheme): if e.is_table: - traps.append(get_trap(e, opts.trap_affix)) + traps.append(get_trap(e)) elif e.is_union: tag_graph.setdefault(e.lhs, set()) for d in e.rhs: tag_graph.setdefault(d.type, set()).add(e.lhs) - renderer.render(cpp.TrapList(traps, opts.cpp_namespace, opts.trap_affix, opts.cpp_include_dir, opts.dbscheme), - out / f"{opts.trap_affix}Entries.h") + renderer.render(cpp.TrapList(traps, opts.dbscheme), + out / f"TrapEntries.h") tags = [] for index, tag in enumerate(toposort_flatten(tag_graph)): @@ -80,4 +91,4 @@ def generate(opts, renderer): index=index, id=tag, )) - renderer.render(cpp.TagList(tags, opts.cpp_namespace, opts.dbscheme), out / f"{opts.trap_affix}Tags.h") + renderer.render(cpp.TagList(tags, opts.dbscheme), out / f"TrapTags.h") diff --git a/swift/codegen/lib/cpp.py b/swift/codegen/lib/cpp.py index f93ff00b027..cd39fc2afba 100644 --- a/swift/codegen/lib/cpp.py +++ b/swift/codegen/lib/cpp.py @@ -101,9 +101,6 @@ class TrapList: template: ClassVar = 'trap_traps' traps: List[Trap] - namespace: str - trap_affix: str - include_dir: str source: str @@ -112,7 +109,6 @@ class TagList: template: ClassVar = 'trap_tags' tags: List[Tag] - namespace: str source: str @@ -149,7 +145,4 @@ class ClassList: template: ClassVar = "cpp_classes" classes: List[Class] - namespace: str - trap_affix: str - include_dir: str source: str diff --git a/swift/codegen/lib/ql.py b/swift/codegen/lib/ql.py index ec667b6ade8..9efdb163e4d 100644 --- a/swift/codegen/lib/ql.py +++ b/swift/codegen/lib/ql.py @@ -1,3 +1,17 @@ +""" +QL files generation + +`generate(opts, renderer)` will generate QL classes and manage stub files out of a `yml` schema file. + +Each class (for example, `Foo`) in the schema triggers: +* generation of a `FooBase` class implementation translating all properties into appropriate getters +* if not created or already customized, generation of a stub file which defines `Foo` as extending `FooBase`. This can + be used to add hand-written code to `Foo`, which requires removal of the `// generated` header comment in that file. + All generated base classes actually import these customizations when referencing other classes. +Generated files that do not correspond any more to any class in the schema are deleted. Customized stubs are however +left behind and must be dealt with by hand. +""" + import pathlib from dataclasses import dataclass, field from typing import List, ClassVar diff --git a/swift/codegen/templates/cpp_classes.mustache b/swift/codegen/templates/cpp_classes.mustache index 758febe966f..2b81ab6e3f8 100644 --- a/swift/codegen/templates/cpp_classes.mustache +++ b/swift/codegen/templates/cpp_classes.mustache @@ -6,10 +6,10 @@ #include #include -#include "{{include_dir}}/{{trap_affix}}Label.h" -#include "./{{trap_affix}}Entries.h" +#include "swift/extractor/trap/TrapLabel.h" +#include "./TrapEntries.h" -namespace {{namespace}} { +namespace codeql { {{#classes}} struct {{name}}{{#final}} : Binding<{{name}}Tag>{{#bases}}, {{ref.name}}{{/bases}}{{/final}}{{^final}}{{#has_bases}}: {{#bases}}{{^first}}, {{/first}}{{ref.name}}{{/bases}}{{/has_bases}}{{/final}} { @@ -25,29 +25,29 @@ struct {{name}}{{#final}} : Binding<{{name}}Tag>{{#bases}}, {{ref.name}}{{/bases {{/final}} protected: - void emit({{^final}}{{trap_affix}}Label<{{name}}Tag> id, {{/final}}std::ostream& out) const { + void emit({{^final}}TrapLabel<{{name}}Tag> id, {{/final}}std::ostream& out) const { {{#trap_name}} - out << {{.}}{{trap_affix}}{id{{#single_fields}}, {{field_name}}{{/single_fields}}} << '\n'; + out << {{.}}Trap{id{{#single_fields}}, {{field_name}}{{/single_fields}}} << '\n'; {{/trap_name}} {{#bases}} {{ref.name}}::emit(id, out); {{/bases}} {{#fields}} {{#is_predicate}} - if ({{field_name}}) out << {{trap_name}}{{trap_affix}}{id} << '\n'; + if ({{field_name}}) out << {{trap_name}}Trap{id} << '\n'; {{/is_predicate}} {{#is_optional}} {{^is_repeated}} - if ({{field_name}}) out << {{trap_name}}{{trap_affix}}{id, *{{field_name}}} << '\n'; + if ({{field_name}}) out << {{trap_name}}Trap{id, *{{field_name}}} << '\n'; {{/is_repeated}} {{/is_optional}} {{#is_repeated}} for (auto i = 0u; i < {{field_name}}.size(); ++i) { {{^is_optional}} - out << {{trap_name}}{{trap_affix}}{id, i, {{field_name}}[i]}; + out << {{trap_name}}Trap{id, i, {{field_name}}[i]}; {{/is_optional}} {{#is_optional}} - if ({{field_name}}[i]) out << {{trap_name}}{{trap_affix}}{id, i, *{{field_name}}[i]}; + if ({{field_name}}[i]) out << {{trap_name}}Trap{id, i, *{{field_name}}[i]}; {{/is_optional}} } {{/is_repeated}} diff --git a/swift/codegen/templates/trap_tags.mustache b/swift/codegen/templates/trap_tags.mustache index 48bae6bb153..feac64ff92a 100644 --- a/swift/codegen/templates/trap_tags.mustache +++ b/swift/codegen/templates/trap_tags.mustache @@ -2,7 +2,7 @@ // clang-format off #pragma once -namespace {{namespace}} { +namespace codeql { {{#tags}} // {{id}} diff --git a/swift/codegen/templates/trap_traps.mustache b/swift/codegen/templates/trap_traps.mustache index e5a576f0a33..5e5f2b68e93 100644 --- a/swift/codegen/templates/trap_traps.mustache +++ b/swift/codegen/templates/trap_traps.mustache @@ -5,15 +5,15 @@ #include #include -#include "{{include_dir}}/{{trap_affix}}Label.h" -#include "{{include_dir}}/{{trap_affix}}TagTraits.h" -#include "./{{trap_affix}}Tags.h" +#include "swift/extractor/trap/TrapLabel.h" +#include "swift/extractor/trap/TrapTagTraits.h" +#include "./TrapTags.h" -namespace {{namespace}} { +namespace codeql { {{#traps}} // {{table_name}} -struct {{name}}{{trap_affix}} { +struct {{name}}Trap { static constexpr bool is_binding = {{#id}}true{{/id}}{{^id}}false{{/id}}; {{#id}} {{type}} getBoundLabel() const { return {{field_name}}; } @@ -24,7 +24,7 @@ struct {{name}}{{trap_affix}} { {{/fields}} }; -inline std::ostream &operator<<(std::ostream &out, const {{name}}{{trap_affix}} &e) { +inline std::ostream &operator<<(std::ostream &out, const {{name}}Trap &e) { out << "{{table_name}}("{{#fields}}{{^first}} << ", "{{/first}} << {{#get_streamer}}e.{{field_name}}{{/get_streamer}}{{/fields}} << ")"; return out; @@ -34,7 +34,7 @@ inline std::ostream &operator<<(std::ostream &out, const {{name}}{{trap_affix}} namespace detail { template<> struct ToBindingTrapFunctor<{{type}}> { - using type = {{name}}{{trap_affix}}; + using type = {{name}}Trap; }; } {{/id}} diff --git a/swift/codegen/test/test_cppgen.py b/swift/codegen/test/test_cppgen.py index ea0255f4254..c1f205eaa3b 100644 --- a/swift/codegen/test/test_cppgen.py +++ b/swift/codegen/test/test_cppgen.py @@ -10,19 +10,13 @@ output_dir = pathlib.Path("path", "to", "output") @pytest.fixture def generate(opts, renderer, input): opts.cpp_output = output_dir - opts.cpp_namespace = "test_namespace" - opts.trap_affix = "TestTrapAffix" - opts.cpp_include_dir = "my/include/dir" def ret(classes): input.classes = classes generated = run_generation(cppgen.generate, opts, renderer) - assert set(generated) == {output_dir / "TestTrapAffixClasses.h"} - generated = generated[output_dir / "TestTrapAffixClasses.h"] + assert set(generated) == {output_dir / "TrapClasses.h"} + generated = generated[output_dir / "TrapClasses.h"] assert isinstance(generated, cpp.ClassList) - assert generated.namespace == opts.cpp_namespace - assert generated.trap_affix == opts.trap_affix - assert generated.include_dir == opts.cpp_include_dir return generated.classes return ret @@ -72,7 +66,7 @@ def test_complex_hierarchy_topologically_ordered(generate): ("a", "a"), ("string", "std::string"), ("boolean", "bool"), - ("MyClass", "TestTrapAffixLabel"), + ("MyClass", "TrapLabel"), ]) @pytest.mark.parametrize("property_cls,optional,repeated,trap_name", [ (schema.SingleProperty, False, False, None), diff --git a/swift/codegen/test/test_trapgen.py b/swift/codegen/test/test_trapgen.py index d3d0dbfcaba..1241ce0b863 100644 --- a/swift/codegen/test/test_trapgen.py +++ b/swift/codegen/test/test_trapgen.py @@ -10,16 +10,13 @@ output_dir = pathlib.Path("path", "to", "output") @pytest.fixture def generate(opts, renderer, dbscheme_input): opts.cpp_output = output_dir - opts.cpp_namespace = "test_namespace" - opts.trap_affix = "TrapAffix" - opts.cpp_include_dir = "my/include/dir" def ret(entities): dbscheme_input.entities = entities generated = run_generation(trapgen.generate, opts, renderer) assert set(generated) == {output_dir / - "TrapAffixEntries.h", output_dir / "TrapAffixTags.h"} - return generated[output_dir / "TrapAffixEntries.h"], generated[output_dir / "TrapAffixTags.h"] + "TrapEntries.h", output_dir / "TrapTags.h"} + return generated[output_dir / "TrapEntries.h"], generated[output_dir / "TrapTags.h"] return ret @@ -29,9 +26,6 @@ def generate_traps(opts, generate): def ret(entities): traps, _ = generate(entities) assert isinstance(traps, cpp.TrapList) - assert traps.namespace == opts.cpp_namespace - assert traps.trap_affix == opts.trap_affix - assert traps.include_dir == opts.cpp_include_dir return traps.traps return ret @@ -42,7 +36,6 @@ def generate_tags(opts, generate): def ret(entities): _, tags = generate(entities) assert isinstance(tags, cpp.TagList) - assert tags.namespace == opts.cpp_namespace return tags.tags return ret @@ -106,7 +99,7 @@ def test_one_table_with_two_binding_first_is_id(generate_traps): @pytest.mark.parametrize("column,field", [ (dbscheme.Column("x", "string"), cpp.Field("x", "std::string")), (dbscheme.Column("y", "boolean"), cpp.Field("y", "bool")), - (dbscheme.Column("z", "@db_type"), cpp.Field("z", "TrapAffixLabel")), + (dbscheme.Column("z", "@db_type"), cpp.Field("z", "TrapLabel")), ]) def test_one_table_special_types(generate_traps, column, field): assert generate_traps([ diff --git a/swift/extractor/trap/BUILD.bazel b/swift/extractor/trap/BUILD.bazel index d2702726074..3b057800382 100644 --- a/swift/extractor/trap/BUILD.bazel +++ b/swift/extractor/trap/BUILD.bazel @@ -15,7 +15,6 @@ genrule( "--generate=dbscheme,trap,cpp", "--schema $(location //swift/codegen:schema)", "--dbscheme $(RULEDIR)/generated/swift.dbscheme", - "--cpp-include-dir " + package_name(), "--cpp-output $(RULEDIR)/generated", ]), exec_tools = ["//swift/codegen"],