Rust: fetch ungram and rust-analyzer code instead of checking it in

* The ungram file is now taken from the rust-analyzer dependencies pulled in by bazel * the grammar parsing code is not published, so it must be taken directly from rust-analyzer code. That part should be less prone to be updated than the ungram file, so it does not necessarily need to be in sync with the rust-analyzer version is used elsewhere. * both need some patches. The former is patched during build, the latter during loading in `MODULE.bazel`.
2025-12-16 16:53:25 +01:00 · 2024-12-18 16:25:30 +01:00
parent 023f48ff1c
commit 290a1043b1
141 changed files with 968 additions and 2902 deletions
--- a/rust/ast-generator/.gitignore
+++ b/rust/ast-generator/.gitignore
@@ -1,2 +1,4 @@
 /target
 /.idea
+/src/codegen/grammar.rs
+/src/codegen/grammar/
--- a/rust/ast-generator/BUILD.bazel
+++ b/rust/ast-generator/BUILD.bazel
@@ -1,10 +1,62 @@
+load("@bazel_skylib//rules:write_file.bzl", "write_file")
+load("@rules_shell//shell:sh_binary.bzl", "sh_binary")
 load("//misc/bazel:rust.bzl", "codeql_rust_binary")
-load("//misc/bazel/3rdparty/tree_sitter_extractors_deps:defs.bzl", "aliases", "all_crate_deps")
+load("//misc/bazel/3rdparty/tree_sitter_extractors_deps:defs.bzl", "aliases", "all_crate_deps", "crate_deps")
+
+(ra_ap_syntax_label,) = crate_deps(
+    ["ra_ap_syntax"],
+    "rust/extractor",
+)
+
+ra_ap_syntax_workspace, _, _ = str(ra_ap_syntax_label).partition("//")
+
+ungram_source = "%s//:rust.ungram" % ra_ap_syntax_workspace
+
+genrule(
+    name = "ungram",
+    srcs = [
+        ungram_source,
+        "patches/rust.ungram.patch",
+    ],
+    outs = ["rust.ungram"],
+    cmd = "\n".join([
+        "cp $(location %s) $@" % ungram_source,
+        "patch $@ $(location patches/rust.ungram.patch)",
+    ]),
+    visibility = ["//rust/codegen:__pkg__"],
+)
+
+_codegen = [
+    "grammar.rs",
+    "grammar/ast_src.rs",
+]
+
+_codegen_srcs = ["@rust-analyzer-src//:xtask/src/codegen/%s" % f for f in _codegen]
+
+_codegen_outs = ["src/codegen/%s" % f for f in _codegen]
+
+genrule(
+    name = "codegen",
+    srcs = _codegen_srcs,
+    outs = _codegen_outs,
+    cmd = "\n".join(
+        ["mkdir -p $(RULEDIR)/src/codegen/grammar"] +
+        [
+            "cp $(location %s) $(RULEDIR)/%s" % item
+            for item in zip(_codegen_srcs, _codegen_outs)
+        ],
+    ),
+)

 codeql_rust_binary(
    name = "ast-generator",
-    srcs = glob(["src/**/*.rs"]),
+    srcs = glob(
+        ["src/**/*.rs"],
+        exclude = ["src/codegen/**"],
+    ) + [":codegen"],
    aliases = aliases(),
+    args = ["$(rlocationpath :ungram)"],
+    data = [":ungram"],
    proc_macro_deps = all_crate_deps(
        proc_macro = True,
    ),
@@ -14,4 +66,27 @@ codeql_rust_binary(
    ),
 )

+write_file(
+    name = "update",
+    out = "update.sh",
+    content = [
+        "#!/bin/bash",
+        ". misc/bazel/runfiles.sh",
+        'DST_DIR="$(dirname "$(rlocation "$1")")"',
+        'mkdir -p "$DST_DIR/src/codegen/grammar"',
+    ] + [
+        'cp "$(rlocation "$%s")" "$DST_DIR/%s"' % item
+        for item in enumerate(_codegen_outs, 2)
+    ],
+    is_executable = True,
+)
+
+sh_binary(
+    name = "inject-sources",
+    srcs = [":update"],
+    args = ["$(rlocationpath Cargo.toml)"] + ["$(rlocationpath %s)" % f for f in _codegen_outs],
+    data = ["Cargo.toml"] + _codegen_outs,
+    deps = ["//misc/bazel:sh_runfiles"],
+)
+
 exports_files(["Cargo.toml"])
--- a/rust/ast-generator/BUILD.rust-analyzer-src.bazel
+++ b/rust/ast-generator/BUILD.rust-analyzer-src.bazel
@@ -0,0 +1,4 @@
+exports_files([
+    "xtask/src/codegen/grammar.rs",
+    "xtask/src/codegen/grammar/ast_src.rs",
+])
--- a/rust/ast-generator/Cargo.toml
+++ b/rust/ast-generator/Cargo.toml
@@ -9,5 +9,5 @@ ungrammar = "1.16.1"
 proc-macro2 = "1.0.47"
 quote = "1.0.20"
 either = "1.9.0"
-ra_ap_stdx = "0.0.248"
+stdx = {package = "ra_ap_stdx", version = "0.0.248"}
 itertools = "0.12.0"
--- a/rust/ast-generator/README.md
+++ b/rust/ast-generator/README.md
@@ -0,0 +1,15 @@
+This crate takes care of generating `ast.py` in the schema and `translate/generate.rs`
+in the extractor.
+
+It uses:
+* `rust.ungram` from `ra_ap_syntax`
+* a couple of slightly modified sources from `rust-analyzer` that are not published.
+
+Both are fetched by bazel while building. In order to have proper IDE support and being
+able to run cargo tooling in this crate, you can run
+```bash
+bazel run //rust/ast-generator:inject_sources
+```
+which will create the missing sources. Be aware that bazel will still use the source taken
+directly from `rust-analyzer`, not the one in your working copy. Those should not need to be
+update often though.
--- a/rust/ast-generator/patches/rust-analyzer.patch
+++ b/rust/ast-generator/patches/rust-analyzer.patch
@@ -0,0 +1,57 @@
+diff --git a/xtask/src/codegen/grammar.rs b/xtask/src/codegen/grammar.rs
+index e7534582f2..8bc9237737 100644
+--- a/xtask/src/codegen/grammar.rs
+++ b/xtask/src/codegen/grammar.rs
+@@ -3,6 +3,7 @@
+ //! Specifically, it generates the `SyntaxKind` enum and a number of newtype
+ //! wrappers around `SyntaxNode` which implement `syntax::AstNode`.
+ 
+#![allow(warnings)]
+ #![allow(clippy::disallowed_types)]
+ 
+ use std::{
+@@ -23,7 +24,7 @@ use crate::{
+     project_root,
+ };
+ 
+-mod ast_src;
+pub mod ast_src;
+ use self::ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc};
+ 
+ pub(crate) fn generate(check: bool) {
+@@ -624,7 +625,7 @@ fn pluralize(s: &str) -> String {
+ }
+ 
+ impl Field {
+-    fn is_many(&self) -> bool {
+    pub fn is_many(&self) -> bool {
+         matches!(self, Field::Node { cardinality: Cardinality::Many, .. })
+     }
+     fn token_kind(&self) -> Option<proc_macro2::TokenStream> {
+@@ -636,7 +637,7 @@ impl Field {
+             _ => None,
+         }
+     }
+-    fn method_name(&self) -> String {
+    pub fn method_name(&self) -> String {
+         match self {
+             Field::Token(name) => {
+                 let name = match name.as_str() {
+@@ -682,7 +683,7 @@ impl Field {
+             }
+         }
+     }
+-    fn ty(&self) -> proc_macro2::Ident {
+    pub fn ty(&self) -> proc_macro2::Ident {
+         match self {
+             Field::Token(_) => format_ident!("SyntaxToken"),
+             Field::Node { ty, .. } => format_ident!("{}", ty),
+@@ -699,7 +700,7 @@ fn clean_token_name(name: &str) -> String {
+     }
+ }
+ 
+-fn lower(grammar: &Grammar) -> AstSrc {
+pub fn lower(grammar: &Grammar) -> AstSrc {
+     let mut res = AstSrc {
+         tokens:
+             "Whitespace Comment String ByteString CString IntNumber FloatNumber Char Byte Ident"
--- a/rust/ast-generator/patches/rust.ungram.patch
+++ b/rust/ast-generator/patches/rust.ungram.patch
@@ -0,0 +1,11 @@
+--- rust.ungram	2006-07-24 03:21:28.000000000 +0200
+++ rust.ungram	2024-12-18 14:13:01.191592682 +0100
+@@ -414,7 +414,7 @@
+ // option := "pure" / "nomem" / "readonly" / "preserves_flags" / "noreturn" / "nostack" / "att_syntax" / "raw"
+ AsmOption = 'pure' | 'nomem' | 'readonly' | 'preserves_flags' | 'noreturn' | 'nostack' | 'att_syntax' | 'raw' | 'may_unwind'
+ // options := "options(" option *("," option) [","] ")"
+-AsmOptions = 'options' '(' AsmOption *(',' AsmOption) ','? ')'
+AsmOptions = 'options' '(' (AsmOption (',' AsmOption)* ','?) ')'
+ AsmLabel = 'label' BlockExpr
+ AsmSym = 'sym' Path
+ AsmConst = 'const' Expr
--- a/rust/ast-generator/rust.ungram
+++ b/rust/ast-generator/rust.ungram
@@ -1,754 +0,0 @@
-// Rust Un-Grammar.
-//
-// This grammar specifies the structure of Rust's concrete syntax tree.
-// It does not specify parsing rules (ambiguities, precedence, etc are out of scope).
-// Tokens are processed -- contextual keywords are recognised, compound operators glued.
-//
-// Legend:
-//
-//   //          -- comment
-//   Name =      -- non-terminal definition
-//   'ident'     -- keyword or punct token (terminal)
-//   '#ident'    -- generic token (terminal)
-//   '@ident'    -- literal token (terminal)
-//   A B         -- sequence
-//   A | B       -- alternation
-//   A*          -- zero or more repetition
-//   A?          -- zero or one repetition
-//   (A)         -- same as A
-//   label:A     -- suggested name for field of AST node
-
-//*************************//
-//         Paths           //
-//*************************//
-
-Name =
-  '#ident' | 'self'
-
-NameRef =
-  '#ident' | '@int_number' | 'self' | 'super' | 'crate' | 'Self'
-
-Lifetime =
-  '#lifetime_ident'
-
-Path =
-  (qualifier:Path '::')? segment:PathSegment
-
-PathSegment =
-  '::'? NameRef
-| NameRef GenericArgList?
-| NameRef ParenthesizedArgList RetType?
-| NameRef ReturnTypeSyntax
-| '<' Type ('as' PathType)? '>'
-
-ReturnTypeSyntax =
-  '(' '..' ')'
-
-
-//*************************//
-//        Generics         //
-//*************************//
-
-ParenthesizedArgList =
-  '::'? '(' (TypeArg (',' TypeArg)* ','?)? ')'
-
-GenericArgList =
-  '::'? '<' (GenericArg (',' GenericArg)* ','?)? '>'
-
-GenericArg =
-  TypeArg
-| AssocTypeArg
-| LifetimeArg
-| ConstArg
-
-TypeArg =
-  Type
-
-AssocTypeArg =
-  NameRef
-  (GenericArgList | ParamList RetType? | ReturnTypeSyntax)?
-  (':' TypeBoundList | ('=' Type | ConstArg))
-
-LifetimeArg =
-  Lifetime
-
-ConstArg =
-  Expr
-
-GenericParamList =
-  '<' (GenericParam (',' GenericParam)* ','?)? '>'
-
-GenericParam =
-  ConstParam
-| LifetimeParam
-| TypeParam
-
-TypeParam =
-  Attr* Name (':' TypeBoundList?)?
-  ('=' default_type:Type)?
-
-ConstParam =
-  Attr* 'const' Name ':' Type
-  ('=' default_val:ConstArg)?
-
-LifetimeParam =
-  Attr* Lifetime (':' TypeBoundList?)?
-
-WhereClause =
-  'where' predicates:(WherePred (',' WherePred)* ','?)
-
-WherePred =
-  ('for' GenericParamList)?  (Lifetime | Type) ':' TypeBoundList?
-
-
-//*************************//
-//          Macro          //
-//*************************//
-
-MacroCall =
-  Attr* Path '!' TokenTree ';'?
-
-TokenTree =
-  '(' ')'
-| '{' '}'
-| '[' ']'
-
-MacroItems =
-  Item*
-
-MacroStmts =
-  statements:Stmt*
-  Expr?
-
-Attr =
-  '#' '!'? '[' Meta ']'
-
-Meta =
-  'unsafe' '(' Path ('=' Expr | TokenTree)? ')'
-| Path ('=' Expr | TokenTree)?
-
-//*************************//
-//          Items          //
-//*************************//
-
-SourceFile =
-  '#shebang'?
-  Attr*
-  Item*
-
-Item =
-  Const
-| Enum
-| ExternBlock
-| ExternCrate
-| Fn
-| Impl
-| MacroCall
-| MacroRules
-| MacroDef
-| Module
-| Static
-| Struct
-| Trait
-| TraitAlias
-| TypeAlias
-| Union
-| Use
-
-MacroRules =
-  Attr* Visibility?
-  'macro_rules' '!' Name
-  TokenTree
-
-MacroDef =
-  Attr* Visibility?
-  'macro' Name args:TokenTree?
-  body:TokenTree
-
-Module =
-  Attr* Visibility?
-  'mod' Name
-  (ItemList | ';')
-
-ItemList =
-  '{' Attr* Item* '}'
-
-ExternCrate =
-  Attr* Visibility?
-  'extern' 'crate' NameRef Rename? ';'
-
-Rename =
-  'as' (Name | '_')
-
-Use =
-  Attr* Visibility?
-  'use' UseTree ';'
-
-UseTree =
-  (Path? '::')? ('*' | UseTreeList)
-| Path Rename?
-
-UseTreeList =
-  '{' (UseTree (',' UseTree)* ','?)? '}'
-
-Fn =
- Attr* Visibility?
- 'default'? 'const'? 'async'? 'gen'? 'unsafe'? 'safe'? Abi?
- 'fn' Name GenericParamList? ParamList RetType? WhereClause?
- (body:BlockExpr | ';')
-
-Abi =
-  'extern' '@string'?
-
-ParamList =
-  '('(
-    SelfParam
-  | (SelfParam ',')? (Param (',' Param)* ','?)?
-  )')'
-| '|' (Param (',' Param)* ','?)? '|'
-
-SelfParam =
-  Attr* (
-    ('&' Lifetime?)? 'mut'? Name
-  | 'mut'? Name ':' Type
-  )
-
-Param =
-  Attr* (
-    Pat (':' Type)?
-  | Type
-  | '...'
-  )
-
-RetType =
-  '->' Type
-
-TypeAlias =
-  Attr* Visibility?
-  'default'?
-  'type' Name GenericParamList? (':' TypeBoundList?)? WhereClause?
-  ('=' Type)? ';'
-
-Struct =
-  Attr* Visibility?
-  'struct' Name GenericParamList? (
-    WhereClause? (RecordFieldList | ';')
-  | TupleFieldList WhereClause? ';'
-  )
-
-RecordFieldList =
- '{' fields:(RecordField (',' RecordField)* ','?)? '}'
-
-RecordField =
-  Attr* Visibility?
-  Name ':' Type
-
-TupleFieldList =
-  '(' fields:(TupleField (',' TupleField)* ','?)? ')'
-
-TupleField =
-  Attr* Visibility?
-  Type
-
-FieldList =
-  RecordFieldList
-| TupleFieldList
-
-Enum =
-  Attr* Visibility?
-  'enum' Name GenericParamList? WhereClause?
-  VariantList
-
-VariantList =
- '{' (Variant (',' Variant)* ','?)? '}'
-
-Variant =
-  Attr* Visibility?
-  Name FieldList? ('=' Expr)?
-
-Union =
-  Attr* Visibility?
-  'union' Name GenericParamList? WhereClause?
-  RecordFieldList
-
-// A Data Type.
-//
-// Not used directly in the grammar, but handy to have anyway.
-Adt =
-  Enum
-| Struct
-| Union
-
-Const =
-  Attr* Visibility?
-  'default'?
-  'const' (Name | '_') ':' Type
-  ('=' body:Expr)? ';'
-
-Static =
-  Attr* Visibility?
-  'unsafe'? 'safe'?
-  'static' 'mut'? Name ':' Type
-  ('=' body:Expr)? ';'
-
-Trait =
-  Attr* Visibility?
-  'unsafe'? 'auto'?
-  'trait' Name GenericParamList?
-  (':' TypeBoundList?)? WhereClause? AssocItemList
-
-TraitAlias =
-  Attr* Visibility?
-  'trait' Name GenericParamList? '=' TypeBoundList? WhereClause? ';'
-
-AssocItemList =
-  '{' Attr* AssocItem* '}'
-
-AssocItem =
-  Const
-| Fn
-| MacroCall
-| TypeAlias
-
-Impl =
-  Attr* Visibility?
-  'default'? 'unsafe'?
-  'impl' GenericParamList? ('const'? '!'? trait:Type 'for')? self_ty:Type WhereClause?
-  AssocItemList
-
-ExternBlock =
-  Attr* 'unsafe'? Abi ExternItemList
-
-ExternItemList =
-  '{' Attr* ExternItem* '}'
-
-ExternItem =
-  Fn
-| MacroCall
-| Static
-| TypeAlias
-
-Visibility =
-  'pub' ('(' 'in'? Path ')')?
-
-
-//****************************//
-// Statements and Expressions //
-//****************************//
-
-Stmt =
-  ';'
-| ExprStmt
-| Item
-| LetStmt
-
-LetStmt =
-  Attr* 'let' Pat (':' Type)?
-  '=' initializer:Expr
-  LetElse?
-  ';'
-
-LetElse =
-  'else' BlockExpr
-
-ExprStmt =
-  Expr ';'?
-
-Expr =
-  ArrayExpr
-| AsmExpr
-| AwaitExpr
-| BinExpr
-| BlockExpr
-| BreakExpr
-| CallExpr
-| CastExpr
-| ClosureExpr
-| ContinueExpr
-| FieldExpr
-| ForExpr
-| FormatArgsExpr
-| IfExpr
-| IndexExpr
-| Literal
-| LoopExpr
-| MacroExpr
-| MatchExpr
-| MethodCallExpr
-| OffsetOfExpr
-| ParenExpr
-| PathExpr
-| PrefixExpr
-| RangeExpr
-| RecordExpr
-| RefExpr
-| ReturnExpr
-| BecomeExpr
-| TryExpr
-| TupleExpr
-| WhileExpr
-| YieldExpr
-| YeetExpr
-| LetExpr
-| UnderscoreExpr
-
-OffsetOfExpr =
-  Attr* 'builtin' '#' 'offset_of' '(' Type ',' fields:(NameRef ('.' NameRef)* ) ')'
-
-// asm := "asm!(" format_string *("," format_string) *("," operand) [","] ")"
-// global_asm := "global_asm!(" format_string *("," format_string) *("," operand) [","] ")"
-// format_string := STRING_LITERAL / RAW_STRING_LITERAL
-AsmExpr =
-  Attr* 'builtin' '#' 'asm' '(' template:(Expr (',' Expr)*) (AsmPiece (',' AsmPiece)*)? ','? ')'
-
-// operand_expr := expr / "_" / expr "=>" expr / expr "=>" "_"
-AsmOperandExpr = in_expr:Expr ('=>' out_expr:Expr)?
-// dir_spec := "in" / "out" / "lateout" / "inout" / "inlateout"
-AsmDirSpec = 'in' | 'out' | 'lateout' | 'inout' | 'inlateout'
-// reg_spec := <register class> / "\"" <explicit register> "\""
-AsmRegSpec = '@string' | NameRef
-// reg_operand := [ident "="] dir_spec "(" reg_spec ")" operand_expr
-AsmRegOperand = AsmDirSpec '(' AsmRegSpec ')' AsmOperandExpr
-// clobber_abi := "clobber_abi(" <abi> *("," <abi>) [","] ")"
-AsmClobberAbi = 'clobber_abi' '(' ('@string' (',' '@string')* ','?) ')'
-// option := "pure" / "nomem" / "readonly" / "preserves_flags" / "noreturn" / "nostack" / "att_syntax" / "raw"
-AsmOption = 'pure' | 'nomem' | 'readonly' | 'preserves_flags' | 'noreturn' | 'nostack' | 'att_syntax' | 'raw' | 'may_unwind'
-// options := "options(" option *("," option) [","] ")"
-AsmOptions = 'options' '(' (AsmOption (',' AsmOption)* ','?) ')'
-AsmLabel = 'label' BlockExpr
-AsmSym = 'sym' Path
-AsmConst = 'const' Expr
-// operand := reg_operand / clobber_abi / options
-AsmOperand = AsmRegOperand | AsmLabel | AsmSym | AsmConst
-AsmOperandNamed = (Name '=')? AsmOperand
-AsmPiece = AsmOperandNamed | AsmClobberAbi | AsmOptions
-
-FormatArgsExpr =
-  Attr* 'builtin' '#' 'format_args' '('
-  template:Expr
-  (',' args:(FormatArgsArg (',' FormatArgsArg)* ','?)? )?
-  ')'
-
-FormatArgsArg =
-  (Name '=')? Expr
-
-MacroExpr =
-  MacroCall
-
-Literal =
-  Attr* value:(
-    '@int_number' | '@float_number'
-  | '@string' | '@raw_string'
-  | '@byte_string' | '@raw_byte_string'
-  | '@c_string' | '@raw_c_string'
-  | '@char' | '@byte'
-  | 'true' | 'false'
-  )
-
-PathExpr =
-  Attr* Path
-
-StmtList =
-  '{'
-    Attr*
-    statements:Stmt*
-    tail_expr:Expr?
-  '}'
-
-RefExpr =
-  Attr* '&' (('raw' 'const'?)| ('raw'? 'mut') ) Expr
-
-TryExpr =
-  Attr* Expr '?'
-
-BlockExpr =
-  Attr* Label? ('try' | 'unsafe' | ('async' 'move'?) | ('gen' 'move'?) | 'const') StmtList
-
-PrefixExpr =
-  Attr* op:('-' | '!' | '*') Expr
-
-BinExpr =
-  Attr*
-  lhs:Expr
-  op:(
-    '||' | '&&'
-  | '==' | '!=' | '<=' | '>=' | '<' | '>'
-  | '+' | '*' | '-' | '/' | '%' | '<<' | '>>' | '^' | '|' | '&'
-  | '=' | '+=' | '/=' | '*=' | '%=' | '>>=' | '<<=' | '-=' | '|=' | '&=' | '^='
-  )
-  rhs:Expr
-
-CastExpr =
-  Attr* Expr 'as' Type
-
-ParenExpr =
-  Attr* '(' Attr* Expr ')'
-
-ArrayExpr =
-  Attr* '[' Attr* (
-    (Expr (',' Expr)* ','?)?
-  | Expr ';' Expr
-  ) ']'
-
-IndexExpr =
-  Attr* base:Expr '[' index:Expr ']'
-
-TupleExpr =
-  Attr* '(' Attr* fields:(Expr (',' Expr)* ','?)? ')'
-
-RecordExpr =
-  Path RecordExprFieldList
-
-RecordExprFieldList =
-  '{'
-    Attr*
-    fields:(RecordExprField (',' RecordExprField)* ','?)?
-    ('..' spread:Expr?)?
-  '}'
-
-RecordExprField =
-  Attr* (NameRef ':')? Expr
-
-CallExpr =
-  Attr* Expr ArgList
-
-ArgList =
-  '(' args:(Expr (',' Expr)* ','?)? ')'
-
-MethodCallExpr =
-  Attr* receiver:Expr '.' NameRef GenericArgList? ArgList
-
-FieldExpr =
-  Attr* Expr '.' NameRef
-
-ClosureExpr =
-  Attr* ClosureBinder? 'const'? 'static'? 'async'? 'gen'? 'move'?  ParamList RetType?
-  body:Expr
-
-ClosureBinder =
-  'for' GenericParamList
-
-IfExpr =
-  Attr* 'if' condition:Expr then_branch:BlockExpr
-  ('else' else_branch:(IfExpr | BlockExpr))?
-
-LoopExpr =
-  Attr* Label? 'loop'
-  loop_body:BlockExpr
-
-ForExpr =
-  Attr* Label? 'for' Pat 'in' iterable:Expr
-  loop_body:BlockExpr
-
-WhileExpr =
-  Attr* Label? 'while' condition:Expr
-  loop_body:BlockExpr
-
-Label =
-  Lifetime ':'
-
-BreakExpr =
-  Attr* 'break' Lifetime? Expr?
-
-ContinueExpr =
-  Attr* 'continue' Lifetime?
-
-RangeExpr =
-  Attr* start:Expr? op:('..' | '..=') end:Expr?
-
-MatchExpr =
-  Attr* 'match' Expr MatchArmList
-
-MatchArmList =
-  '{'
-    Attr*
-    arms:MatchArm*
-  '}'
-
-MatchArm =
-  Attr* Pat guard:MatchGuard? '=>' Expr ','?
-
-MatchGuard =
-  'if' condition:Expr
-
-ReturnExpr =
-  Attr* 'return' Expr?
-
-BecomeExpr =
-  Attr* 'become' Expr
-
-YieldExpr =
-  Attr* 'yield' Expr?
-
-YeetExpr =
-  Attr* 'do' 'yeet' Expr?
-
-LetExpr =
-  Attr* 'let' Pat '=' Expr
-
-UnderscoreExpr =
-  Attr* '_'
-
-AwaitExpr =
-  Attr* Expr '.' 'await'
-
-//*************************//
-//          Types          //
-//*************************//
-
-Type =
-  ArrayType
-| DynTraitType
-| FnPtrType
-| ForType
-| ImplTraitType
-| InferType
-| MacroType
-| NeverType
-| ParenType
-| PathType
-| PtrType
-| RefType
-| SliceType
-| TupleType
-
-ParenType =
-  '(' Type ')'
-
-NeverType =
-  '!'
-
-MacroType =
-  MacroCall
-
-PathType =
-  Path
-
-TupleType =
-  '(' fields:(Type (',' Type)* ','?)? ')'
-
-PtrType =
-  '*' ('const' | 'mut') Type
-
-RefType =
-  '&' Lifetime? 'mut'? Type
-
-ArrayType =
-  '[' Type ';' ConstArg ']'
-
-SliceType =
-  '[' Type ']'
-
-InferType =
-  '_'
-
-FnPtrType =
-  'const'? 'async'? 'unsafe'? Abi? 'fn' ParamList RetType?
-
-ForType =
-  'for' GenericParamList Type
-
-ImplTraitType =
-  'impl' TypeBoundList
-
-DynTraitType =
-  'dyn'? TypeBoundList
-
-TypeBoundList =
-  bounds:(TypeBound ('+' TypeBound)* '+'?)
-
-TypeBound =
-  Lifetime
-| ('~' 'const' | 'const')? 'async'? '?'? Type
-| 'use' UseBoundGenericArgs
-
-UseBoundGenericArgs =
- '<' (UseBoundGenericArg (',' UseBoundGenericArg)* ','?)? '>'
-
-UseBoundGenericArg =
-  Lifetime
-| NameRef
-
-//************************//
-//        Patterns        //
-//************************//
-
-Pat =
-  IdentPat
-| BoxPat
-| RestPat
-| LiteralPat
-| MacroPat
-| OrPat
-| ParenPat
-| PathPat
-| WildcardPat
-| RangePat
-| RecordPat
-| RefPat
-| SlicePat
-| TuplePat
-| TupleStructPat
-| ConstBlockPat
-
-LiteralPat =
-  '-'? Literal
-
-IdentPat =
-  Attr* 'ref'? 'mut'? Name ('@' Pat)?
-
-WildcardPat =
-  '_'
-
-RangePat =
-  // 1..
-  start:Pat op:('..' | '..=')
-  // 1..2
-  | start:Pat op:('..' | '..=') end:Pat
-  // ..2
-  | op:('..' | '..=') end:Pat
-
-RefPat =
-  '&' 'mut'? Pat
-
-RecordPat =
-  Path RecordPatFieldList
-
-RecordPatFieldList =
-  '{'
-    fields:(RecordPatField (',' RecordPatField)* ','?)?
-    RestPat?
-  '}'
-
-RecordPatField =
-  Attr* (NameRef ':')? Pat
-
-TupleStructPat =
-   Path '(' fields:(Pat (',' Pat)* ','?)? ')'
-
-TuplePat =
-   '(' fields:(Pat (',' Pat)* ','?)? ')'
-
-ParenPat =
-  '(' Pat ')'
-
-SlicePat =
-  '[' (Pat (',' Pat)* ','?)? ']'
-
-PathPat =
-  Path
-
-OrPat =
-  '|'? (Pat ('|' Pat)*)
-
-BoxPat =
-  'box' Pat
-
-RestPat =
-  Attr* '..'
-
-MacroPat =
-  MacroCall
-
-ConstBlockPat =
-  'const' BlockExpr
--- a/rust/ast-generator/src/codegen/grammar.rs
+++ b/rust/ast-generator/src/codegen/grammar.rs
--- a/rust/ast-generator/src/codegen/grammar/ast_src.rs
+++ b/rust/ast-generator/src/codegen/grammar/ast_src.rs
@@ -1,287 +0,0 @@
-//! Defines input for code generation process.
-
-use quote::ToTokens;
-
-use crate::codegen::grammar::to_upper_snake_case;
-
-#[derive(Copy, Clone, Debug)]
-pub(crate) struct KindsSrc {
-    pub(crate) punct: &'static [(&'static str, &'static str)],
-    pub(crate) keywords: &'static [&'static str],
-    pub(crate) contextual_keywords: &'static [&'static str],
-    pub(crate) literals: &'static [&'static str],
-    pub(crate) tokens: &'static [&'static str],
-    pub(crate) nodes: &'static [&'static str],
-    pub(crate) edition_dependent_keywords: &'static [(&'static str, Edition)],
-}
-
-#[allow(dead_code)]
-#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
-pub(super) enum Edition {
-    Edition2015,
-    Edition2018,
-    Edition2021,
-    Edition2024,
-}
-
-impl ToTokens for Edition {
-    fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
-        match self {
-            Edition::Edition2015 => {
-                tokens.extend(quote::quote! { Edition::Edition2015 });
-            }
-            Edition::Edition2018 => {
-                tokens.extend(quote::quote! { Edition::Edition2018 });
-            }
-            Edition::Edition2021 => {
-                tokens.extend(quote::quote! { Edition::Edition2021 });
-            }
-            Edition::Edition2024 => {
-                tokens.extend(quote::quote! { Edition::Edition2024 });
-            }
-        }
-    }
-}
-
-/// The punctuations of the language.
-const PUNCT: &[(&str, &str)] = &[
-    // KEEP THE DOLLAR AT THE TOP ITS SPECIAL
-    ("$", "DOLLAR"),
-    (";", "SEMICOLON"),
-    (",", "COMMA"),
-    ("(", "L_PAREN"),
-    (")", "R_PAREN"),
-    ("{", "L_CURLY"),
-    ("}", "R_CURLY"),
-    ("[", "L_BRACK"),
-    ("]", "R_BRACK"),
-    ("<", "L_ANGLE"),
-    (">", "R_ANGLE"),
-    ("@", "AT"),
-    ("#", "POUND"),
-    ("~", "TILDE"),
-    ("?", "QUESTION"),
-    ("&", "AMP"),
-    ("|", "PIPE"),
-    ("+", "PLUS"),
-    ("*", "STAR"),
-    ("/", "SLASH"),
-    ("^", "CARET"),
-    ("%", "PERCENT"),
-    ("_", "UNDERSCORE"),
-    (".", "DOT"),
-    ("..", "DOT2"),
-    ("...", "DOT3"),
-    ("..=", "DOT2EQ"),
-    (":", "COLON"),
-    ("::", "COLON2"),
-    ("=", "EQ"),
-    ("==", "EQ2"),
-    ("=>", "FAT_ARROW"),
-    ("!", "BANG"),
-    ("!=", "NEQ"),
-    ("-", "MINUS"),
-    ("->", "THIN_ARROW"),
-    ("<=", "LTEQ"),
-    (">=", "GTEQ"),
-    ("+=", "PLUSEQ"),
-    ("-=", "MINUSEQ"),
-    ("|=", "PIPEEQ"),
-    ("&=", "AMPEQ"),
-    ("^=", "CARETEQ"),
-    ("/=", "SLASHEQ"),
-    ("*=", "STAREQ"),
-    ("%=", "PERCENTEQ"),
-    ("&&", "AMP2"),
-    ("||", "PIPE2"),
-    ("<<", "SHL"),
-    (">>", "SHR"),
-    ("<<=", "SHLEQ"),
-    (">>=", "SHREQ"),
-];
-const TOKENS: &[&str] = &["ERROR", "WHITESPACE", "NEWLINE", "COMMENT"];
-// &["ERROR", "IDENT", "WHITESPACE", "LIFETIME_IDENT", "COMMENT", "SHEBANG"],;
-
-const EOF: &str = "EOF";
-
-const RESERVED: &[&str] = &[
-    "abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof", "unsized",
-    "virtual", "yield",
-];
-// keywords that are keywords only in specific parse contexts
-#[doc(alias = "WEAK_KEYWORDS")]
-const CONTEXTUAL_KEYWORDS: &[&str] = &[
-    "macro_rules",
-    "union",
-    "default",
-    "raw",
-    "dyn",
-    "auto",
-    "yeet",
-    "safe",
-];
-// keywords we use for special macro expansions
-const CONTEXTUAL_BUILTIN_KEYWORDS: &[&str] = &[
-    "asm",
-    "att_syntax",
-    "builtin",
-    "clobber_abi",
-    "format_args",
-    // "in",
-    "inlateout",
-    "inout",
-    "label",
-    "lateout",
-    "may_unwind",
-    "nomem",
-    "noreturn",
-    "nostack",
-    "offset_of",
-    "options",
-    "out",
-    "preserves_flags",
-    "pure",
-    // "raw",
-    "readonly",
-    "sym",
-];
-
-// keywords that are keywords depending on the edition
-const EDITION_DEPENDENT_KEYWORDS: &[(&str, Edition)] = &[
-    ("try", Edition::Edition2018),
-    ("dyn", Edition::Edition2018),
-    ("async", Edition::Edition2018),
-    ("await", Edition::Edition2018),
-    ("gen", Edition::Edition2024),
-];
-
-pub(crate) fn generate_kind_src(
-    nodes: &[AstNodeSrc],
-    enums: &[AstEnumSrc],
-    grammar: &ungrammar::Grammar,
-) -> KindsSrc {
-    let mut contextual_keywords: Vec<&_> = CONTEXTUAL_KEYWORDS
-        .iter()
-        .chain(CONTEXTUAL_BUILTIN_KEYWORDS)
-        .copied()
-        .collect();
-
-    let mut keywords: Vec<&_> = Vec::new();
-    let mut tokens: Vec<&_> = TOKENS.to_vec();
-    let mut literals: Vec<&_> = Vec::new();
-    let mut used_puncts = vec![false; PUNCT.len()];
-    // Mark $ as used
-    used_puncts[0] = true;
-    grammar.tokens().for_each(|token| {
-        let name = &*grammar[token].name;
-        if name == EOF {
-            return;
-        }
-        match name.split_at(1) {
-            ("@", lit) if !lit.is_empty() => {
-                literals.push(String::leak(to_upper_snake_case(lit)));
-            }
-            ("#", token) if !token.is_empty() => {
-                tokens.push(String::leak(to_upper_snake_case(token)));
-            }
-            _ if contextual_keywords.contains(&name) => {}
-            _ if name.chars().all(char::is_alphabetic) => {
-                keywords.push(String::leak(name.to_owned()));
-            }
-            _ => {
-                let idx = PUNCT
-                    .iter()
-                    .position(|(punct, _)| punct == &name)
-                    .unwrap_or_else(|| panic!("Grammar references unknown punctuation {name:?}"));
-                used_puncts[idx] = true;
-            }
-        }
-    });
-    PUNCT
-        .iter()
-        .zip(used_puncts)
-        .filter(|(_, used)| !used)
-        .for_each(|((punct, _), _)| {
-            panic!("Punctuation {punct:?} is not used in grammar");
-        });
-    keywords.extend(RESERVED.iter().copied());
-    keywords.sort();
-    keywords.dedup();
-    contextual_keywords.sort();
-    contextual_keywords.dedup();
-    let mut edition_dependent_keywords: Vec<(&_, _)> = EDITION_DEPENDENT_KEYWORDS.to_vec();
-    edition_dependent_keywords.sort();
-    edition_dependent_keywords.dedup();
-
-    keywords.retain(|&it| !contextual_keywords.contains(&it));
-    keywords.retain(|&it| !edition_dependent_keywords.iter().any(|&(kw, _)| kw == it));
-
-    // we leak things here for simplicity, that way we don't have to deal with lifetimes
-    // The execution is a one shot job so thats fine
-    let nodes = nodes
-        .iter()
-        .map(|it| &it.name)
-        .chain(enums.iter().map(|it| &it.name))
-        .map(|it| to_upper_snake_case(it))
-        .map(String::leak)
-        .map(|it| &*it)
-        .collect();
-    let nodes = Vec::leak(nodes);
-    nodes.sort();
-    let keywords = Vec::leak(keywords);
-    let contextual_keywords = Vec::leak(contextual_keywords);
-    let edition_dependent_keywords = Vec::leak(edition_dependent_keywords);
-    let literals = Vec::leak(literals);
-    literals.sort();
-    let tokens = Vec::leak(tokens);
-    tokens.sort();
-
-    KindsSrc {
-        punct: PUNCT,
-        nodes,
-        keywords,
-        contextual_keywords,
-        edition_dependent_keywords,
-        literals,
-        tokens,
-    }
-}
-
-#[derive(Default, Debug)]
-pub(crate) struct AstSrc {
-    pub(crate) tokens: Vec<String>,
-    pub(crate) nodes: Vec<AstNodeSrc>,
-    pub(crate) enums: Vec<AstEnumSrc>,
-}
-
-#[derive(Debug)]
-pub(crate) struct AstNodeSrc {
-    pub(crate) doc: Vec<String>,
-    pub(crate) name: String,
-    pub(crate) traits: Vec<String>,
-    pub(crate) fields: Vec<Field>,
-}
-
-#[derive(Debug, Eq, PartialEq)]
-pub(crate) enum Field {
-    Token(String),
-    Node {
-        name: String,
-        ty: String,
-        cardinality: Cardinality,
-    },
-}
-
-#[derive(Debug, Eq, PartialEq)]
-pub(crate) enum Cardinality {
-    Optional,
-    Many,
-}
-
-#[derive(Debug)]
-pub(crate) struct AstEnumSrc {
-    pub(crate) doc: Vec<String>,
-    pub(crate) name: String,
-    pub(crate) traits: Vec<String>,
-    pub(crate) variants: Vec<String>,
-}
--- a/rust/ast-generator/src/main.rs
+++ b/rust/ast-generator/src/main.rs
@@ -10,7 +10,7 @@ use ungrammar::Grammar;

 fn project_root() -> PathBuf {
    let dir =
-        env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| env!("CARGO_MANIFEST_DIR").to_owned());
+        env::var("CARGO_MANIFEST_DIR").unwrap().to_owned();
    PathBuf::from(dir).parent().unwrap().to_owned()
 }

@@ -591,10 +591,11 @@ impl Translator<'_> {{
 }

 fn main() -> std::io::Result<()> {
-    let grammar: Grammar = fs::read_to_string(project_root().join("ast-generator/rust.ungram"))
-        .unwrap()
+    let grammar = PathBuf::from("..").join(env::args().nth(1).expect("grammar file path required"));
+    let grammar: Grammar = fs::read_to_string(&grammar)
+        .expect(&format!("Failed to parse grammar file: {}", grammar.display()))
        .parse()
-        .unwrap();
+        .expect("Failed to parse grammar");
    let mut grammar = codegen::grammar::lower(&grammar);

    grammar.enums.retain(|x| x.name != "Adt");