Rust: fetch ungram and rust-analyzer code instead of checking it in

* The ungram file is now taken from the rust-analyzer dependencies
  pulled in by bazel
* the grammar parsing code is not published, so it must be taken
  directly from rust-analyzer code. That part should be less prone to be
  updated than the ungram file, so it does not necessarily need to be
  in sync with the rust-analyzer version is used elsewhere.
* both need some patches. The former is patched during build, the latter
  during loading in `MODULE.bazel`.
This commit is contained in:
Paolo Tranquilli
2024-12-18 16:25:30 +01:00
parent 023f48ff1c
commit 290a1043b1
141 changed files with 968 additions and 2902 deletions

View File

@@ -1,2 +1,4 @@
/target
/.idea
/src/codegen/grammar.rs
/src/codegen/grammar/

View File

@@ -1,10 +1,62 @@
load("@bazel_skylib//rules:write_file.bzl", "write_file")
load("@rules_shell//shell:sh_binary.bzl", "sh_binary")
load("//misc/bazel:rust.bzl", "codeql_rust_binary")
load("//misc/bazel/3rdparty/tree_sitter_extractors_deps:defs.bzl", "aliases", "all_crate_deps")
load("//misc/bazel/3rdparty/tree_sitter_extractors_deps:defs.bzl", "aliases", "all_crate_deps", "crate_deps")
(ra_ap_syntax_label,) = crate_deps(
["ra_ap_syntax"],
"rust/extractor",
)
ra_ap_syntax_workspace, _, _ = str(ra_ap_syntax_label).partition("//")
ungram_source = "%s//:rust.ungram" % ra_ap_syntax_workspace
genrule(
name = "ungram",
srcs = [
ungram_source,
"patches/rust.ungram.patch",
],
outs = ["rust.ungram"],
cmd = "\n".join([
"cp $(location %s) $@" % ungram_source,
"patch $@ $(location patches/rust.ungram.patch)",
]),
visibility = ["//rust/codegen:__pkg__"],
)
_codegen = [
"grammar.rs",
"grammar/ast_src.rs",
]
_codegen_srcs = ["@rust-analyzer-src//:xtask/src/codegen/%s" % f for f in _codegen]
_codegen_outs = ["src/codegen/%s" % f for f in _codegen]
genrule(
name = "codegen",
srcs = _codegen_srcs,
outs = _codegen_outs,
cmd = "\n".join(
["mkdir -p $(RULEDIR)/src/codegen/grammar"] +
[
"cp $(location %s) $(RULEDIR)/%s" % item
for item in zip(_codegen_srcs, _codegen_outs)
],
),
)
codeql_rust_binary(
name = "ast-generator",
srcs = glob(["src/**/*.rs"]),
srcs = glob(
["src/**/*.rs"],
exclude = ["src/codegen/**"],
) + [":codegen"],
aliases = aliases(),
args = ["$(rlocationpath :ungram)"],
data = [":ungram"],
proc_macro_deps = all_crate_deps(
proc_macro = True,
),
@@ -14,4 +66,27 @@ codeql_rust_binary(
),
)
write_file(
name = "update",
out = "update.sh",
content = [
"#!/bin/bash",
". misc/bazel/runfiles.sh",
'DST_DIR="$(dirname "$(rlocation "$1")")"',
'mkdir -p "$DST_DIR/src/codegen/grammar"',
] + [
'cp "$(rlocation "$%s")" "$DST_DIR/%s"' % item
for item in enumerate(_codegen_outs, 2)
],
is_executable = True,
)
sh_binary(
name = "inject-sources",
srcs = [":update"],
args = ["$(rlocationpath Cargo.toml)"] + ["$(rlocationpath %s)" % f for f in _codegen_outs],
data = ["Cargo.toml"] + _codegen_outs,
deps = ["//misc/bazel:sh_runfiles"],
)
exports_files(["Cargo.toml"])

View File

@@ -0,0 +1,4 @@
exports_files([
"xtask/src/codegen/grammar.rs",
"xtask/src/codegen/grammar/ast_src.rs",
])

View File

@@ -9,5 +9,5 @@ ungrammar = "1.16.1"
proc-macro2 = "1.0.47"
quote = "1.0.20"
either = "1.9.0"
ra_ap_stdx = "0.0.248"
stdx = {package = "ra_ap_stdx", version = "0.0.248"}
itertools = "0.12.0"

View File

@@ -0,0 +1,15 @@
This crate takes care of generating `ast.py` in the schema and `translate/generate.rs`
in the extractor.
It uses:
* `rust.ungram` from `ra_ap_syntax`
* a couple of slightly modified sources from `rust-analyzer` that are not published.
Both are fetched by bazel while building. In order to have proper IDE support and being
able to run cargo tooling in this crate, you can run
```bash
bazel run //rust/ast-generator:inject_sources
```
which will create the missing sources. Be aware that bazel will still use the source taken
directly from `rust-analyzer`, not the one in your working copy. Those should not need to be
update often though.

View File

@@ -0,0 +1,57 @@
diff --git a/xtask/src/codegen/grammar.rs b/xtask/src/codegen/grammar.rs
index e7534582f2..8bc9237737 100644
--- a/xtask/src/codegen/grammar.rs
+++ b/xtask/src/codegen/grammar.rs
@@ -3,6 +3,7 @@
//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
//! wrappers around `SyntaxNode` which implement `syntax::AstNode`.
+#![allow(warnings)]
#![allow(clippy::disallowed_types)]
use std::{
@@ -23,7 +24,7 @@ use crate::{
project_root,
};
-mod ast_src;
+pub mod ast_src;
use self::ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc};
pub(crate) fn generate(check: bool) {
@@ -624,7 +625,7 @@ fn pluralize(s: &str) -> String {
}
impl Field {
- fn is_many(&self) -> bool {
+ pub fn is_many(&self) -> bool {
matches!(self, Field::Node { cardinality: Cardinality::Many, .. })
}
fn token_kind(&self) -> Option<proc_macro2::TokenStream> {
@@ -636,7 +637,7 @@ impl Field {
_ => None,
}
}
- fn method_name(&self) -> String {
+ pub fn method_name(&self) -> String {
match self {
Field::Token(name) => {
let name = match name.as_str() {
@@ -682,7 +683,7 @@ impl Field {
}
}
}
- fn ty(&self) -> proc_macro2::Ident {
+ pub fn ty(&self) -> proc_macro2::Ident {
match self {
Field::Token(_) => format_ident!("SyntaxToken"),
Field::Node { ty, .. } => format_ident!("{}", ty),
@@ -699,7 +700,7 @@ fn clean_token_name(name: &str) -> String {
}
}
-fn lower(grammar: &Grammar) -> AstSrc {
+pub fn lower(grammar: &Grammar) -> AstSrc {
let mut res = AstSrc {
tokens:
"Whitespace Comment String ByteString CString IntNumber FloatNumber Char Byte Ident"

View File

@@ -0,0 +1,11 @@
--- rust.ungram 2006-07-24 03:21:28.000000000 +0200
+++ rust.ungram 2024-12-18 14:13:01.191592682 +0100
@@ -414,7 +414,7 @@
// option := "pure" / "nomem" / "readonly" / "preserves_flags" / "noreturn" / "nostack" / "att_syntax" / "raw"
AsmOption = 'pure' | 'nomem' | 'readonly' | 'preserves_flags' | 'noreturn' | 'nostack' | 'att_syntax' | 'raw' | 'may_unwind'
// options := "options(" option *("," option) [","] ")"
-AsmOptions = 'options' '(' AsmOption *(',' AsmOption) ','? ')'
+AsmOptions = 'options' '(' (AsmOption (',' AsmOption)* ','?) ')'
AsmLabel = 'label' BlockExpr
AsmSym = 'sym' Path
AsmConst = 'const' Expr

View File

@@ -1,754 +0,0 @@
// Rust Un-Grammar.
//
// This grammar specifies the structure of Rust's concrete syntax tree.
// It does not specify parsing rules (ambiguities, precedence, etc are out of scope).
// Tokens are processed -- contextual keywords are recognised, compound operators glued.
//
// Legend:
//
// // -- comment
// Name = -- non-terminal definition
// 'ident' -- keyword or punct token (terminal)
// '#ident' -- generic token (terminal)
// '@ident' -- literal token (terminal)
// A B -- sequence
// A | B -- alternation
// A* -- zero or more repetition
// A? -- zero or one repetition
// (A) -- same as A
// label:A -- suggested name for field of AST node
//*************************//
// Paths //
//*************************//
Name =
'#ident' | 'self'
NameRef =
'#ident' | '@int_number' | 'self' | 'super' | 'crate' | 'Self'
Lifetime =
'#lifetime_ident'
Path =
(qualifier:Path '::')? segment:PathSegment
PathSegment =
'::'? NameRef
| NameRef GenericArgList?
| NameRef ParenthesizedArgList RetType?
| NameRef ReturnTypeSyntax
| '<' Type ('as' PathType)? '>'
ReturnTypeSyntax =
'(' '..' ')'
//*************************//
// Generics //
//*************************//
ParenthesizedArgList =
'::'? '(' (TypeArg (',' TypeArg)* ','?)? ')'
GenericArgList =
'::'? '<' (GenericArg (',' GenericArg)* ','?)? '>'
GenericArg =
TypeArg
| AssocTypeArg
| LifetimeArg
| ConstArg
TypeArg =
Type
AssocTypeArg =
NameRef
(GenericArgList | ParamList RetType? | ReturnTypeSyntax)?
(':' TypeBoundList | ('=' Type | ConstArg))
LifetimeArg =
Lifetime
ConstArg =
Expr
GenericParamList =
'<' (GenericParam (',' GenericParam)* ','?)? '>'
GenericParam =
ConstParam
| LifetimeParam
| TypeParam
TypeParam =
Attr* Name (':' TypeBoundList?)?
('=' default_type:Type)?
ConstParam =
Attr* 'const' Name ':' Type
('=' default_val:ConstArg)?
LifetimeParam =
Attr* Lifetime (':' TypeBoundList?)?
WhereClause =
'where' predicates:(WherePred (',' WherePred)* ','?)
WherePred =
('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList?
//*************************//
// Macro //
//*************************//
MacroCall =
Attr* Path '!' TokenTree ';'?
TokenTree =
'(' ')'
| '{' '}'
| '[' ']'
MacroItems =
Item*
MacroStmts =
statements:Stmt*
Expr?
Attr =
'#' '!'? '[' Meta ']'
Meta =
'unsafe' '(' Path ('=' Expr | TokenTree)? ')'
| Path ('=' Expr | TokenTree)?
//*************************//
// Items //
//*************************//
SourceFile =
'#shebang'?
Attr*
Item*
Item =
Const
| Enum
| ExternBlock
| ExternCrate
| Fn
| Impl
| MacroCall
| MacroRules
| MacroDef
| Module
| Static
| Struct
| Trait
| TraitAlias
| TypeAlias
| Union
| Use
MacroRules =
Attr* Visibility?
'macro_rules' '!' Name
TokenTree
MacroDef =
Attr* Visibility?
'macro' Name args:TokenTree?
body:TokenTree
Module =
Attr* Visibility?
'mod' Name
(ItemList | ';')
ItemList =
'{' Attr* Item* '}'
ExternCrate =
Attr* Visibility?
'extern' 'crate' NameRef Rename? ';'
Rename =
'as' (Name | '_')
Use =
Attr* Visibility?
'use' UseTree ';'
UseTree =
(Path? '::')? ('*' | UseTreeList)
| Path Rename?
UseTreeList =
'{' (UseTree (',' UseTree)* ','?)? '}'
Fn =
Attr* Visibility?
'default'? 'const'? 'async'? 'gen'? 'unsafe'? 'safe'? Abi?
'fn' Name GenericParamList? ParamList RetType? WhereClause?
(body:BlockExpr | ';')
Abi =
'extern' '@string'?
ParamList =
'('(
SelfParam
| (SelfParam ',')? (Param (',' Param)* ','?)?
)')'
| '|' (Param (',' Param)* ','?)? '|'
SelfParam =
Attr* (
('&' Lifetime?)? 'mut'? Name
| 'mut'? Name ':' Type
)
Param =
Attr* (
Pat (':' Type)?
| Type
| '...'
)
RetType =
'->' Type
TypeAlias =
Attr* Visibility?
'default'?
'type' Name GenericParamList? (':' TypeBoundList?)? WhereClause?
('=' Type)? ';'
Struct =
Attr* Visibility?
'struct' Name GenericParamList? (
WhereClause? (RecordFieldList | ';')
| TupleFieldList WhereClause? ';'
)
RecordFieldList =
'{' fields:(RecordField (',' RecordField)* ','?)? '}'
RecordField =
Attr* Visibility?
Name ':' Type
TupleFieldList =
'(' fields:(TupleField (',' TupleField)* ','?)? ')'
TupleField =
Attr* Visibility?
Type
FieldList =
RecordFieldList
| TupleFieldList
Enum =
Attr* Visibility?
'enum' Name GenericParamList? WhereClause?
VariantList
VariantList =
'{' (Variant (',' Variant)* ','?)? '}'
Variant =
Attr* Visibility?
Name FieldList? ('=' Expr)?
Union =
Attr* Visibility?
'union' Name GenericParamList? WhereClause?
RecordFieldList
// A Data Type.
//
// Not used directly in the grammar, but handy to have anyway.
Adt =
Enum
| Struct
| Union
Const =
Attr* Visibility?
'default'?
'const' (Name | '_') ':' Type
('=' body:Expr)? ';'
Static =
Attr* Visibility?
'unsafe'? 'safe'?
'static' 'mut'? Name ':' Type
('=' body:Expr)? ';'
Trait =
Attr* Visibility?
'unsafe'? 'auto'?
'trait' Name GenericParamList?
(':' TypeBoundList?)? WhereClause? AssocItemList
TraitAlias =
Attr* Visibility?
'trait' Name GenericParamList? '=' TypeBoundList? WhereClause? ';'
AssocItemList =
'{' Attr* AssocItem* '}'
AssocItem =
Const
| Fn
| MacroCall
| TypeAlias
Impl =
Attr* Visibility?
'default'? 'unsafe'?
'impl' GenericParamList? ('const'? '!'? trait:Type 'for')? self_ty:Type WhereClause?
AssocItemList
ExternBlock =
Attr* 'unsafe'? Abi ExternItemList
ExternItemList =
'{' Attr* ExternItem* '}'
ExternItem =
Fn
| MacroCall
| Static
| TypeAlias
Visibility =
'pub' ('(' 'in'? Path ')')?
//****************************//
// Statements and Expressions //
//****************************//
Stmt =
';'
| ExprStmt
| Item
| LetStmt
LetStmt =
Attr* 'let' Pat (':' Type)?
'=' initializer:Expr
LetElse?
';'
LetElse =
'else' BlockExpr
ExprStmt =
Expr ';'?
Expr =
ArrayExpr
| AsmExpr
| AwaitExpr
| BinExpr
| BlockExpr
| BreakExpr
| CallExpr
| CastExpr
| ClosureExpr
| ContinueExpr
| FieldExpr
| ForExpr
| FormatArgsExpr
| IfExpr
| IndexExpr
| Literal
| LoopExpr
| MacroExpr
| MatchExpr
| MethodCallExpr
| OffsetOfExpr
| ParenExpr
| PathExpr
| PrefixExpr
| RangeExpr
| RecordExpr
| RefExpr
| ReturnExpr
| BecomeExpr
| TryExpr
| TupleExpr
| WhileExpr
| YieldExpr
| YeetExpr
| LetExpr
| UnderscoreExpr
OffsetOfExpr =
Attr* 'builtin' '#' 'offset_of' '(' Type ',' fields:(NameRef ('.' NameRef)* ) ')'
// asm := "asm!(" format_string *("," format_string) *("," operand) [","] ")"
// global_asm := "global_asm!(" format_string *("," format_string) *("," operand) [","] ")"
// format_string := STRING_LITERAL / RAW_STRING_LITERAL
AsmExpr =
Attr* 'builtin' '#' 'asm' '(' template:(Expr (',' Expr)*) (AsmPiece (',' AsmPiece)*)? ','? ')'
// operand_expr := expr / "_" / expr "=>" expr / expr "=>" "_"
AsmOperandExpr = in_expr:Expr ('=>' out_expr:Expr)?
// dir_spec := "in" / "out" / "lateout" / "inout" / "inlateout"
AsmDirSpec = 'in' | 'out' | 'lateout' | 'inout' | 'inlateout'
// reg_spec := <register class> / "\"" <explicit register> "\""
AsmRegSpec = '@string' | NameRef
// reg_operand := [ident "="] dir_spec "(" reg_spec ")" operand_expr
AsmRegOperand = AsmDirSpec '(' AsmRegSpec ')' AsmOperandExpr
// clobber_abi := "clobber_abi(" <abi> *("," <abi>) [","] ")"
AsmClobberAbi = 'clobber_abi' '(' ('@string' (',' '@string')* ','?) ')'
// option := "pure" / "nomem" / "readonly" / "preserves_flags" / "noreturn" / "nostack" / "att_syntax" / "raw"
AsmOption = 'pure' | 'nomem' | 'readonly' | 'preserves_flags' | 'noreturn' | 'nostack' | 'att_syntax' | 'raw' | 'may_unwind'
// options := "options(" option *("," option) [","] ")"
AsmOptions = 'options' '(' (AsmOption (',' AsmOption)* ','?) ')'
AsmLabel = 'label' BlockExpr
AsmSym = 'sym' Path
AsmConst = 'const' Expr
// operand := reg_operand / clobber_abi / options
AsmOperand = AsmRegOperand | AsmLabel | AsmSym | AsmConst
AsmOperandNamed = (Name '=')? AsmOperand
AsmPiece = AsmOperandNamed | AsmClobberAbi | AsmOptions
FormatArgsExpr =
Attr* 'builtin' '#' 'format_args' '('
template:Expr
(',' args:(FormatArgsArg (',' FormatArgsArg)* ','?)? )?
')'
FormatArgsArg =
(Name '=')? Expr
MacroExpr =
MacroCall
Literal =
Attr* value:(
'@int_number' | '@float_number'
| '@string' | '@raw_string'
| '@byte_string' | '@raw_byte_string'
| '@c_string' | '@raw_c_string'
| '@char' | '@byte'
| 'true' | 'false'
)
PathExpr =
Attr* Path
StmtList =
'{'
Attr*
statements:Stmt*
tail_expr:Expr?
'}'
RefExpr =
Attr* '&' (('raw' 'const'?)| ('raw'? 'mut') ) Expr
TryExpr =
Attr* Expr '?'
BlockExpr =
Attr* Label? ('try' | 'unsafe' | ('async' 'move'?) | ('gen' 'move'?) | 'const') StmtList
PrefixExpr =
Attr* op:('-' | '!' | '*') Expr
BinExpr =
Attr*
lhs:Expr
op:(
'||' | '&&'
| '==' | '!=' | '<=' | '>=' | '<' | '>'
| '+' | '*' | '-' | '/' | '%' | '<<' | '>>' | '^' | '|' | '&'
| '=' | '+=' | '/=' | '*=' | '%=' | '>>=' | '<<=' | '-=' | '|=' | '&=' | '^='
)
rhs:Expr
CastExpr =
Attr* Expr 'as' Type
ParenExpr =
Attr* '(' Attr* Expr ')'
ArrayExpr =
Attr* '[' Attr* (
(Expr (',' Expr)* ','?)?
| Expr ';' Expr
) ']'
IndexExpr =
Attr* base:Expr '[' index:Expr ']'
TupleExpr =
Attr* '(' Attr* fields:(Expr (',' Expr)* ','?)? ')'
RecordExpr =
Path RecordExprFieldList
RecordExprFieldList =
'{'
Attr*
fields:(RecordExprField (',' RecordExprField)* ','?)?
('..' spread:Expr?)?
'}'
RecordExprField =
Attr* (NameRef ':')? Expr
CallExpr =
Attr* Expr ArgList
ArgList =
'(' args:(Expr (',' Expr)* ','?)? ')'
MethodCallExpr =
Attr* receiver:Expr '.' NameRef GenericArgList? ArgList
FieldExpr =
Attr* Expr '.' NameRef
ClosureExpr =
Attr* ClosureBinder? 'const'? 'static'? 'async'? 'gen'? 'move'? ParamList RetType?
body:Expr
ClosureBinder =
'for' GenericParamList
IfExpr =
Attr* 'if' condition:Expr then_branch:BlockExpr
('else' else_branch:(IfExpr | BlockExpr))?
LoopExpr =
Attr* Label? 'loop'
loop_body:BlockExpr
ForExpr =
Attr* Label? 'for' Pat 'in' iterable:Expr
loop_body:BlockExpr
WhileExpr =
Attr* Label? 'while' condition:Expr
loop_body:BlockExpr
Label =
Lifetime ':'
BreakExpr =
Attr* 'break' Lifetime? Expr?
ContinueExpr =
Attr* 'continue' Lifetime?
RangeExpr =
Attr* start:Expr? op:('..' | '..=') end:Expr?
MatchExpr =
Attr* 'match' Expr MatchArmList
MatchArmList =
'{'
Attr*
arms:MatchArm*
'}'
MatchArm =
Attr* Pat guard:MatchGuard? '=>' Expr ','?
MatchGuard =
'if' condition:Expr
ReturnExpr =
Attr* 'return' Expr?
BecomeExpr =
Attr* 'become' Expr
YieldExpr =
Attr* 'yield' Expr?
YeetExpr =
Attr* 'do' 'yeet' Expr?
LetExpr =
Attr* 'let' Pat '=' Expr
UnderscoreExpr =
Attr* '_'
AwaitExpr =
Attr* Expr '.' 'await'
//*************************//
// Types //
//*************************//
Type =
ArrayType
| DynTraitType
| FnPtrType
| ForType
| ImplTraitType
| InferType
| MacroType
| NeverType
| ParenType
| PathType
| PtrType
| RefType
| SliceType
| TupleType
ParenType =
'(' Type ')'
NeverType =
'!'
MacroType =
MacroCall
PathType =
Path
TupleType =
'(' fields:(Type (',' Type)* ','?)? ')'
PtrType =
'*' ('const' | 'mut') Type
RefType =
'&' Lifetime? 'mut'? Type
ArrayType =
'[' Type ';' ConstArg ']'
SliceType =
'[' Type ']'
InferType =
'_'
FnPtrType =
'const'? 'async'? 'unsafe'? Abi? 'fn' ParamList RetType?
ForType =
'for' GenericParamList Type
ImplTraitType =
'impl' TypeBoundList
DynTraitType =
'dyn'? TypeBoundList
TypeBoundList =
bounds:(TypeBound ('+' TypeBound)* '+'?)
TypeBound =
Lifetime
| ('~' 'const' | 'const')? 'async'? '?'? Type
| 'use' UseBoundGenericArgs
UseBoundGenericArgs =
'<' (UseBoundGenericArg (',' UseBoundGenericArg)* ','?)? '>'
UseBoundGenericArg =
Lifetime
| NameRef
//************************//
// Patterns //
//************************//
Pat =
IdentPat
| BoxPat
| RestPat
| LiteralPat
| MacroPat
| OrPat
| ParenPat
| PathPat
| WildcardPat
| RangePat
| RecordPat
| RefPat
| SlicePat
| TuplePat
| TupleStructPat
| ConstBlockPat
LiteralPat =
'-'? Literal
IdentPat =
Attr* 'ref'? 'mut'? Name ('@' Pat)?
WildcardPat =
'_'
RangePat =
// 1..
start:Pat op:('..' | '..=')
// 1..2
| start:Pat op:('..' | '..=') end:Pat
// ..2
| op:('..' | '..=') end:Pat
RefPat =
'&' 'mut'? Pat
RecordPat =
Path RecordPatFieldList
RecordPatFieldList =
'{'
fields:(RecordPatField (',' RecordPatField)* ','?)?
RestPat?
'}'
RecordPatField =
Attr* (NameRef ':')? Pat
TupleStructPat =
Path '(' fields:(Pat (',' Pat)* ','?)? ')'
TuplePat =
'(' fields:(Pat (',' Pat)* ','?)? ')'
ParenPat =
'(' Pat ')'
SlicePat =
'[' (Pat (',' Pat)* ','?)? ']'
PathPat =
Path
OrPat =
'|'? (Pat ('|' Pat)*)
BoxPat =
'box' Pat
RestPat =
Attr* '..'
MacroPat =
MacroCall
ConstBlockPat =
'const' BlockExpr

File diff suppressed because it is too large Load Diff

View File

@@ -1,287 +0,0 @@
//! Defines input for code generation process.
use quote::ToTokens;
use crate::codegen::grammar::to_upper_snake_case;
#[derive(Copy, Clone, Debug)]
pub(crate) struct KindsSrc {
pub(crate) punct: &'static [(&'static str, &'static str)],
pub(crate) keywords: &'static [&'static str],
pub(crate) contextual_keywords: &'static [&'static str],
pub(crate) literals: &'static [&'static str],
pub(crate) tokens: &'static [&'static str],
pub(crate) nodes: &'static [&'static str],
pub(crate) edition_dependent_keywords: &'static [(&'static str, Edition)],
}
#[allow(dead_code)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub(super) enum Edition {
Edition2015,
Edition2018,
Edition2021,
Edition2024,
}
impl ToTokens for Edition {
fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
match self {
Edition::Edition2015 => {
tokens.extend(quote::quote! { Edition::Edition2015 });
}
Edition::Edition2018 => {
tokens.extend(quote::quote! { Edition::Edition2018 });
}
Edition::Edition2021 => {
tokens.extend(quote::quote! { Edition::Edition2021 });
}
Edition::Edition2024 => {
tokens.extend(quote::quote! { Edition::Edition2024 });
}
}
}
}
/// The punctuations of the language.
const PUNCT: &[(&str, &str)] = &[
// KEEP THE DOLLAR AT THE TOP ITS SPECIAL
("$", "DOLLAR"),
(";", "SEMICOLON"),
(",", "COMMA"),
("(", "L_PAREN"),
(")", "R_PAREN"),
("{", "L_CURLY"),
("}", "R_CURLY"),
("[", "L_BRACK"),
("]", "R_BRACK"),
("<", "L_ANGLE"),
(">", "R_ANGLE"),
("@", "AT"),
("#", "POUND"),
("~", "TILDE"),
("?", "QUESTION"),
("&", "AMP"),
("|", "PIPE"),
("+", "PLUS"),
("*", "STAR"),
("/", "SLASH"),
("^", "CARET"),
("%", "PERCENT"),
("_", "UNDERSCORE"),
(".", "DOT"),
("..", "DOT2"),
("...", "DOT3"),
("..=", "DOT2EQ"),
(":", "COLON"),
("::", "COLON2"),
("=", "EQ"),
("==", "EQ2"),
("=>", "FAT_ARROW"),
("!", "BANG"),
("!=", "NEQ"),
("-", "MINUS"),
("->", "THIN_ARROW"),
("<=", "LTEQ"),
(">=", "GTEQ"),
("+=", "PLUSEQ"),
("-=", "MINUSEQ"),
("|=", "PIPEEQ"),
("&=", "AMPEQ"),
("^=", "CARETEQ"),
("/=", "SLASHEQ"),
("*=", "STAREQ"),
("%=", "PERCENTEQ"),
("&&", "AMP2"),
("||", "PIPE2"),
("<<", "SHL"),
(">>", "SHR"),
("<<=", "SHLEQ"),
(">>=", "SHREQ"),
];
const TOKENS: &[&str] = &["ERROR", "WHITESPACE", "NEWLINE", "COMMENT"];
// &["ERROR", "IDENT", "WHITESPACE", "LIFETIME_IDENT", "COMMENT", "SHEBANG"],;
const EOF: &str = "EOF";
const RESERVED: &[&str] = &[
"abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof", "unsized",
"virtual", "yield",
];
// keywords that are keywords only in specific parse contexts
#[doc(alias = "WEAK_KEYWORDS")]
const CONTEXTUAL_KEYWORDS: &[&str] = &[
"macro_rules",
"union",
"default",
"raw",
"dyn",
"auto",
"yeet",
"safe",
];
// keywords we use for special macro expansions
const CONTEXTUAL_BUILTIN_KEYWORDS: &[&str] = &[
"asm",
"att_syntax",
"builtin",
"clobber_abi",
"format_args",
// "in",
"inlateout",
"inout",
"label",
"lateout",
"may_unwind",
"nomem",
"noreturn",
"nostack",
"offset_of",
"options",
"out",
"preserves_flags",
"pure",
// "raw",
"readonly",
"sym",
];
// keywords that are keywords depending on the edition
const EDITION_DEPENDENT_KEYWORDS: &[(&str, Edition)] = &[
("try", Edition::Edition2018),
("dyn", Edition::Edition2018),
("async", Edition::Edition2018),
("await", Edition::Edition2018),
("gen", Edition::Edition2024),
];
pub(crate) fn generate_kind_src(
nodes: &[AstNodeSrc],
enums: &[AstEnumSrc],
grammar: &ungrammar::Grammar,
) -> KindsSrc {
let mut contextual_keywords: Vec<&_> = CONTEXTUAL_KEYWORDS
.iter()
.chain(CONTEXTUAL_BUILTIN_KEYWORDS)
.copied()
.collect();
let mut keywords: Vec<&_> = Vec::new();
let mut tokens: Vec<&_> = TOKENS.to_vec();
let mut literals: Vec<&_> = Vec::new();
let mut used_puncts = vec![false; PUNCT.len()];
// Mark $ as used
used_puncts[0] = true;
grammar.tokens().for_each(|token| {
let name = &*grammar[token].name;
if name == EOF {
return;
}
match name.split_at(1) {
("@", lit) if !lit.is_empty() => {
literals.push(String::leak(to_upper_snake_case(lit)));
}
("#", token) if !token.is_empty() => {
tokens.push(String::leak(to_upper_snake_case(token)));
}
_ if contextual_keywords.contains(&name) => {}
_ if name.chars().all(char::is_alphabetic) => {
keywords.push(String::leak(name.to_owned()));
}
_ => {
let idx = PUNCT
.iter()
.position(|(punct, _)| punct == &name)
.unwrap_or_else(|| panic!("Grammar references unknown punctuation {name:?}"));
used_puncts[idx] = true;
}
}
});
PUNCT
.iter()
.zip(used_puncts)
.filter(|(_, used)| !used)
.for_each(|((punct, _), _)| {
panic!("Punctuation {punct:?} is not used in grammar");
});
keywords.extend(RESERVED.iter().copied());
keywords.sort();
keywords.dedup();
contextual_keywords.sort();
contextual_keywords.dedup();
let mut edition_dependent_keywords: Vec<(&_, _)> = EDITION_DEPENDENT_KEYWORDS.to_vec();
edition_dependent_keywords.sort();
edition_dependent_keywords.dedup();
keywords.retain(|&it| !contextual_keywords.contains(&it));
keywords.retain(|&it| !edition_dependent_keywords.iter().any(|&(kw, _)| kw == it));
// we leak things here for simplicity, that way we don't have to deal with lifetimes
// The execution is a one shot job so thats fine
let nodes = nodes
.iter()
.map(|it| &it.name)
.chain(enums.iter().map(|it| &it.name))
.map(|it| to_upper_snake_case(it))
.map(String::leak)
.map(|it| &*it)
.collect();
let nodes = Vec::leak(nodes);
nodes.sort();
let keywords = Vec::leak(keywords);
let contextual_keywords = Vec::leak(contextual_keywords);
let edition_dependent_keywords = Vec::leak(edition_dependent_keywords);
let literals = Vec::leak(literals);
literals.sort();
let tokens = Vec::leak(tokens);
tokens.sort();
KindsSrc {
punct: PUNCT,
nodes,
keywords,
contextual_keywords,
edition_dependent_keywords,
literals,
tokens,
}
}
#[derive(Default, Debug)]
pub(crate) struct AstSrc {
pub(crate) tokens: Vec<String>,
pub(crate) nodes: Vec<AstNodeSrc>,
pub(crate) enums: Vec<AstEnumSrc>,
}
#[derive(Debug)]
pub(crate) struct AstNodeSrc {
pub(crate) doc: Vec<String>,
pub(crate) name: String,
pub(crate) traits: Vec<String>,
pub(crate) fields: Vec<Field>,
}
#[derive(Debug, Eq, PartialEq)]
pub(crate) enum Field {
Token(String),
Node {
name: String,
ty: String,
cardinality: Cardinality,
},
}
#[derive(Debug, Eq, PartialEq)]
pub(crate) enum Cardinality {
Optional,
Many,
}
#[derive(Debug)]
pub(crate) struct AstEnumSrc {
pub(crate) doc: Vec<String>,
pub(crate) name: String,
pub(crate) traits: Vec<String>,
pub(crate) variants: Vec<String>,
}

View File

@@ -10,7 +10,7 @@ use ungrammar::Grammar;
fn project_root() -> PathBuf {
let dir =
env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| env!("CARGO_MANIFEST_DIR").to_owned());
env::var("CARGO_MANIFEST_DIR").unwrap().to_owned();
PathBuf::from(dir).parent().unwrap().to_owned()
}
@@ -591,10 +591,11 @@ impl Translator<'_> {{
}
fn main() -> std::io::Result<()> {
let grammar: Grammar = fs::read_to_string(project_root().join("ast-generator/rust.ungram"))
.unwrap()
let grammar = PathBuf::from("..").join(env::args().nth(1).expect("grammar file path required"));
let grammar: Grammar = fs::read_to_string(&grammar)
.expect(&format!("Failed to parse grammar file: {}", grammar.display()))
.parse()
.unwrap();
.expect("Failed to parse grammar");
let mut grammar = codegen::grammar::lower(&grammar);
grammar.enums.retain(|x| x.name != "Adt");