Merge pull request #22054 from github/tausbn/yeast-context-reification

This commit is contained in:
Taus
2026-06-26 11:01:19 +02:00
committed by GitHub
18 changed files with 1410 additions and 431 deletions

View File

@@ -280,10 +280,11 @@ pub fn location_label(writer: &mut trap::Writer, location: trap::Location) -> tr
}
/// Extracts the source file at `path`, which is assumed to be canonicalized.
/// When `yeast_runner` is `Some`, the parsed tree is first transformed
/// through the supplied yeast `Runner` before TRAP extraction. Building the
/// `Runner` (which parses YAML and constructs the schema) is the caller's
/// responsibility, allowing it to be done once and shared across files.
/// When `desugarer` is `Some`, the parsed tree is first transformed
/// through the supplied yeast desugarer before TRAP extraction. Building
/// the desugarer (which parses YAML and constructs the schema) is the
/// caller's responsibility, allowing it to be done once and shared across
/// files.
#[allow(clippy::too_many_arguments)]
pub fn extract(
language: &Language,
@@ -295,7 +296,7 @@ pub fn extract(
path: &Path,
source: &[u8],
ranges: &[Range],
yeast_runner: Option<&yeast::Runner<'_>>,
desugarer: Option<&dyn yeast::Desugarer>,
) {
let path_str = file_paths::normalize_and_transform_path(path, transformer);
let source_root = std::env::current_dir()
@@ -328,8 +329,8 @@ pub fn extract(
schema,
);
if let Some(yeast_runner) = yeast_runner {
let ast = yeast_runner
if let Some(desugarer) = desugarer {
let ast = desugarer
.run_from_tree(&tree, source)
.unwrap_or_else(|e| panic!("Desugaring failed for {path_str}: {e}"));
traverse_yeast(&ast, &mut visitor);

View File

@@ -13,11 +13,14 @@ pub struct LanguageSpec {
pub prefix: &'static str,
pub ts_language: tree_sitter::Language,
pub node_types: &'static str,
/// Optional yeast desugaring configuration. When set, the parsed
/// tree is rewritten through yeast before TRAP extraction. The
/// config's `output_node_types_yaml` (if set) provides the schema
/// used both at runtime (for the rewriter) and for TRAP validation.
pub desugar: Option<yeast::DesugaringConfig>,
/// Optional desugarer. When set, the parsed tree is rewritten through
/// the desugarer before TRAP extraction. The desugarer's
/// `output_node_types_yaml()` (if set) provides the schema used both
/// at runtime (for the rewriter) and for TRAP validation.
///
/// `Box<dyn yeast::Desugarer>` so the shared extractor is agnostic to
/// the user-defined context type the desugarer uses internally.
pub desugar: Option<Box<dyn yeast::Desugarer>>,
pub file_globs: Vec<String>,
}
@@ -91,35 +94,22 @@ impl Extractor {
.collect();
let mut schemas = vec![];
let mut yeast_runners = Vec::new();
for lang in &self.languages {
let effective_node_types: String =
match lang.desugar.as_ref().and_then(|c| c.output_node_types_yaml) {
Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| {
std::io::Error::other(format!(
"Failed to convert YAML node-types to JSON for {}: {e}",
lang.prefix
))
})?,
None => lang.node_types.to_string(),
};
let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?;
schemas.push(schema);
// Build the yeast runner once per language so the YAML schema
// isn't re-parsed for every file.
let yeast_runner = lang
let effective_node_types: String = match lang
.desugar
.as_ref()
.map(|config| yeast::Runner::from_config(lang.ts_language.clone(), config))
.transpose()
.map_err(|e| {
.and_then(|d| d.output_node_types_yaml())
{
Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| {
std::io::Error::other(format!(
"Failed to build desugaring runner for {}: {e}",
"Failed to convert YAML node-types to JSON for {}: {e}",
lang.prefix
))
})?;
yeast_runners.push(yeast_runner);
})?,
None => lang.node_types.to_string(),
};
let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?;
schemas.push(schema);
}
// Construct a single globset containing all language globs,
@@ -194,7 +184,7 @@ impl Extractor {
&path,
&source,
&[],
yeast_runners[i].as_ref(),
lang.desugar.as_deref(),
);
std::fs::create_dir_all(src_archive_file.parent().unwrap())?;
std::fs::copy(&path, &src_archive_file)?;

View File

@@ -121,3 +121,37 @@ pub fn rule(input: TokenStream) -> TokenStream {
Err(err) => err.to_compile_error().into(),
}
}
/// Define a desugaring rule whose transform is a hand-written Rust block.
///
/// Use `manual_rule!` when the transform needs control over capture
/// translation timing — for example, when an outer rule needs to set
/// state in `ctx` (the `BuildCtx`'s user context) before recursive
/// translation reaches inner rules that read that state.
///
/// ```text
/// manual_rule!(
/// (query_pattern field: (_) @name)
/// {
/// // `ctx` is a `&mut BuildCtx<'_, C>`; capture variables
/// // (`name: NodeRef`, etc.) are bound from the query.
/// let translated = ctx.translate(name)?;
/// Ok(translated)
/// }
/// )
/// ```
///
/// Differences from [`rule!`]:
/// - Captures are **not** auto-translated before the body runs; they
/// refer to raw input-schema nodes. Use [`BuildCtx::translate`] (or
/// [`BuildCtx::translate_opt`]) to translate them when you choose.
/// - The body is plain Rust returning `Result<Vec<Id>, String>` — no
/// tree template, no `Ok(...)` wrap.
#[proc_macro]
pub fn manual_rule(input: TokenStream) -> TokenStream {
let input2: TokenStream2 = input.into();
match parse::parse_manual_rule_top(input2) {
Ok(output) => output.into(),
Err(err) => err.to_compile_error().into(),
}
}

View File

@@ -121,9 +121,9 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
std::collections::HashMap::new();
let mut bare_children: Vec<TokenStream> = Vec::new();
let push_field_elem = |order: &mut Vec<String>,
map: &mut std::collections::HashMap<String, Vec<TokenStream>>,
name: String,
elem: TokenStream| {
map: &mut std::collections::HashMap<String, Vec<TokenStream>>,
name: String,
elem: TokenStream| {
if !map.contains_key(&name) {
order.push(name.clone());
map.insert(name, vec![elem]);
@@ -160,8 +160,7 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
} else {
let child = if peek_is_at(tokens) {
tokens.next();
let capture_name =
expect_ident(tokens, "expected capture name after @")?;
let capture_name = expect_ident(tokens, "expected capture name after @")?;
let name_str = capture_name.to_string();
quote! {
yeast::query::QueryNode::Capture {
@@ -296,10 +295,10 @@ fn parse_query_list(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
// tree! / trees! parsing — direct code generation against BuildCtx
// ---------------------------------------------------------------------------
const IMPLICIT_CTX: &str = "__yeast_ctx";
const IMPLICIT_CTX: &str = "ctx";
/// Determine the context identifier: either explicit `ctx,` or the implicit
/// `__yeast_ctx` from an enclosing `rule!`.
/// `ctx` from an enclosing `rule!`.
fn parse_ctx_or_implicit(tokens: &mut Tokens) -> Ident {
// Check if first token is an ident followed by a comma
let mut lookahead = tokens.clone();
@@ -359,7 +358,7 @@ fn parse_direct_node(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStream> {
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => {
let group = expect_group(tokens, Delimiter::Brace)?;
let expr = group.stream();
Ok(quote! { ::std::convert::Into::<usize>::into(#expr) })
Ok(quote! { ::std::convert::Into::<usize>::into({ #expr }) })
}
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Parenthesis => {
let group = expect_group(tokens, Delimiter::Parenthesis)?;
@@ -396,7 +395,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
let expr = group.stream();
return Ok(quote! {
{
let __expr = (#expr);
let __expr = { #expr };
let __value = yeast::YeastDisplay::yeast_to_string(&__expr, &*#ctx.ast);
let __source_range = yeast::YeastSourceRange::yeast_source_range(&__expr, &*#ctx.ast);
#ctx.literal_with_source_range(#kind_str, &__value, __source_range)
@@ -420,7 +419,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
// Named fields — compute each value into a temp, then reference it
while peek_is_field(tokens) {
let field_name = expect_ident(tokens, "expected field name")?;
let field_str = field_name.to_string().strip_prefix("r#").unwrap_or(&field_name.to_string()).to_string();
let field_str = field_name
.to_string()
.strip_prefix("r#")
.unwrap_or(&field_name.to_string())
.to_string();
expect_punct(tokens, ':', "expected `:` after field name")?;
let temp = Ident::new(
&format!("__field_{field_str}_{field_counter}"),
@@ -438,7 +441,8 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
// Determine if a chain (.map(..)) follows the `{}` group.
let mut after = tokens.clone();
after.next(); // skip the brace group
let has_chain = matches!(after.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
let has_chain =
matches!(after.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
if is_splice || has_chain {
let group = expect_group(tokens, Delimiter::Brace)?;
@@ -448,11 +452,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
inner.next(); // consume second .
let expr: TokenStream = inner.collect();
quote! {
(#expr).into_iter().map(::std::convert::Into::<usize>::into)
{ #expr }.into_iter().map(::std::convert::Into::<usize>::into)
}
} else {
let expr = group.stream();
quote! { (#expr).into_iter() }
quote! { { #expr }.into_iter() }
};
let chained = parse_chain_suffix(tokens, ctx, base)?;
stmts.push(quote! {
@@ -506,11 +510,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
/// Each call expects the receiver to be an iterator. The `base` argument
/// should therefore already be an iterator (use `.into_iter()` on it before
/// calling this function).
fn parse_chain_suffix(
tokens: &mut Tokens,
ctx: &Ident,
base: TokenStream,
) -> Result<TokenStream> {
fn parse_chain_suffix(tokens: &mut Tokens, ctx: &Ident, base: TokenStream) -> Result<TokenStream> {
let mut current = base;
while matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.') {
tokens.next(); // consume .
@@ -608,7 +608,8 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
// {expr} or {..expr} (with optional .chain) — single node or splice
if peek_is_group(tokens, Delimiter::Brace) {
let group = expect_group(tokens, Delimiter::Brace)?;
let has_chain = matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
let has_chain =
matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
let mut inner = group.stream().into_iter().peekable();
let is_splice = peek_is_dotdot(&inner);
if is_splice || has_chain {
@@ -617,11 +618,11 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
inner.next(); // consume second .
let expr: TokenStream = inner.collect();
quote! {
(#expr).into_iter().map(::std::convert::Into::<usize>::into)
{ #expr }.into_iter().map(::std::convert::Into::<usize>::into)
}
} else {
let expr = group.stream();
quote! { (#expr).into_iter() }
quote! { { #expr }.into_iter() }
};
let chained = parse_chain_suffix(tokens, ctx, base)?;
items.push(quote! {
@@ -630,7 +631,7 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
} else {
let expr = group.stream();
items.push(quote! {
__nodes.push(::std::convert::Into::<usize>::into(#expr));
__nodes.push(::std::convert::Into::<usize>::into({ #expr }));
});
}
continue;
@@ -888,10 +889,117 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
Ok(quote! {
{
let __query = #query_code;
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>| {
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
// Auto-translation prefix: recursively translate every
// captured node before invoking the user's transform body.
// For OneShot rules this preserves the legacy behaviour
// (input-schema captures translated to output-schema
// nodes); for Repeating rules it is a no-op.
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx)?;
#(#bindings)*
let mut #ctx_ident = yeast::build::BuildCtx::with_source_range(__ast, &__captures, __fresh, __source_range);
#transform_body
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
let __result: Vec<usize> = { #transform_body };
Ok(__result)
}))
}
})
}
/// Parse `manual_rule!( query { body } )`.
///
/// Like [`parse_rule_top`] but:
/// - Expects a Rust block `{ ... }` after the query (no `=>` arrow).
/// - Generates code that does NOT auto-translate captures before
/// running the body. Capture variables refer to raw (input-schema)
/// nodes; the body is responsible for explicit translation via
/// `ctx.translate(...)`.
/// - The body is included verbatim and must evaluate to
/// `Result<Vec<usize>, String>`.
pub fn parse_manual_rule_top(input: TokenStream) -> Result<TokenStream> {
let mut tokens = input.into_iter().peekable();
// Collect query tokens up to the body block `{ ... }`.
let mut query_tokens = Vec::new();
loop {
match tokens.peek() {
None => {
return Err(syn::Error::new(
Span::call_site(),
"expected a Rust block `{ ... }` after the query in manual_rule!",
))
}
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => break,
_ => {
query_tokens.push(tokens.next().unwrap());
}
}
}
let query_stream: TokenStream = query_tokens.into_iter().collect();
// Extract captures from the query (same as in `rule!`).
let captures = extract_captures(&query_stream);
// Parse the query into the QueryNode-building expression.
let query_code = parse_query_top(query_stream)?;
// Generate capture bindings (same as in `rule!`).
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
let bindings: Vec<TokenStream> = captures
.iter()
.map(|cap| {
let name = Ident::new(&cap.name, Span::call_site());
let name_str = &cap.name;
match cap.multiplicity {
CaptureMultiplicity::Repeated => quote! {
let #name: Vec<yeast::NodeRef> = __captures.get_all(#name_str)
.into_iter()
.map(yeast::NodeRef)
.collect();
},
CaptureMultiplicity::Optional => quote! {
let #name: Option<yeast::NodeRef> =
__captures.get_opt(#name_str).map(yeast::NodeRef);
},
CaptureMultiplicity::Single => quote! {
let #name: yeast::NodeRef =
yeast::NodeRef(__captures.get_var(#name_str).unwrap());
},
}
})
.collect();
// Consume the body block.
let body_group = match tokens.next() {
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => g,
other => {
return Err(syn::Error::new(
Span::call_site(),
format!(
"expected a Rust block `{{ ... }}` after the query in manual_rule!, found: {other:?}"
),
))
}
};
let body_stream = body_group.stream();
// No tokens should follow the body.
if let Some(tok) = tokens.next() {
return Err(syn::Error::new_spanned(
tok,
"unexpected token after manual_rule! body",
));
}
Ok(quote! {
{
let __query = #query_code;
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
// No auto-translate prefix for manual rules — the body
// is responsible for translating captures explicitly.
#(#bindings)*
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
#body_stream
}))
}
})

View File

@@ -265,7 +265,21 @@ occurrences of the same `$name` within one `BuildCtx` share the same value:
)
```
`{..expr}` splices a `Vec<Id>` (or any iterable of `Id`):
The contents of `{…}` are treated as a Rust block, so multi-statement
expressions (with `let` bindings) work too:
```rust
(assignment
left: {tmp}
right: {
let lit = ctx.literal("integer", "0");
tree!((binary_expr op: (operator "+") left: {tmp} right: {lit}))
})
```
`{..expr}` splices a `Vec<Id>` (or any iterable of `Id`); the contents
are likewise a Rust block, so the splice can be the result of arbitrary
computation:
```rust
yeast::trees!(ctx,

View File

@@ -20,7 +20,7 @@ fn main() {
let args = Cli::parse();
let language = get_language(&args.language);
let source = std::fs::read_to_string(&args.file).unwrap();
let runner = yeast::Runner::new(language, &[]);
let runner: yeast::Runner = yeast::Runner::new(language, &[]);
let ast = runner.run(&source).unwrap();
println!("{}", ast.print(&source, ast.get_root()));
}

View File

@@ -2,28 +2,60 @@ use std::collections::BTreeMap;
use crate::captures::Captures;
use crate::tree_builder::FreshScope;
use crate::{Ast, FieldId, Id, NodeContent};
use crate::{Ast, FieldId, Id, NodeContent, TranslatorHandle};
/// Context for building new AST nodes during a transformation.
///
/// Used by the `tree!` and `trees!` macros. Holds a mutable reference to the
/// AST, a reference to the captures from a query match, and a `FreshScope` for
/// generating unique identifiers.
pub struct BuildCtx<'a> {
/// AST, a reference to the captures from a query match, a `FreshScope` for
/// generating unique identifiers, and a mutable reference to a user-defined
/// context of type `C`.
///
/// The user context `C` is shared across rules via the framework's driver:
/// outer rules can write to it before recursive translation, and inner rules
/// can read (or further mutate) it during their transforms. The framework
/// snapshots and restores the user context around each rule application, so
/// mutations made by a rule are visible to its descendants (via recursive
/// translation) but not to its parent's siblings.
///
/// `BuildCtx` implements [`Deref`] and [`DerefMut`] targeting `C`, so user
/// context fields are accessible as `ctx.my_field` directly (provided they
/// don't collide with `BuildCtx`'s own fields like `ast`, `captures`, etc.).
///
/// The default `C = ()` means rules that don't need any user context don't
/// pay any cost.
///
/// When constructed by the framework (via the rule! macro), `BuildCtx` also
/// carries a [`TranslatorHandle`] that the [`translate`] method delegates
/// to. When constructed by hand (e.g. in tests), the translator is `None`
/// and [`translate`] returns an error.
pub struct BuildCtx<'a, C: 'a = ()> {
pub ast: &'a mut Ast,
pub captures: &'a Captures,
pub fresh: &'a FreshScope,
/// Source range of the matched node, inherited by synthetic nodes.
pub source_range: Option<tree_sitter::Range>,
/// User-supplied context, accessible directly via `ctx.field` (via Deref).
pub user_ctx: &'a mut C,
/// Optional translator handle, populated when the context is built by
/// the framework's rule driver. None when the context is built by hand.
pub(crate) translator: Option<TranslatorHandle<'a, C>>,
}
impl<'a> BuildCtx<'a> {
pub fn new(ast: &'a mut Ast, captures: &'a Captures, fresh: &'a FreshScope) -> Self {
impl<'a, C> BuildCtx<'a, C> {
pub fn new(
ast: &'a mut Ast,
captures: &'a Captures,
fresh: &'a FreshScope,
user_ctx: &'a mut C,
) -> Self {
Self {
ast,
captures,
fresh,
source_range: None,
user_ctx,
translator: None,
}
}
@@ -32,12 +64,35 @@ impl<'a> BuildCtx<'a> {
captures: &'a Captures,
fresh: &'a FreshScope,
source_range: Option<tree_sitter::Range>,
user_ctx: &'a mut C,
) -> Self {
Self {
ast,
captures,
fresh,
source_range,
user_ctx,
translator: None,
}
}
/// Construct a `BuildCtx` carrying a translator handle. Used by the
/// `rule!` macro to enable [`translate`] inside rule transforms.
pub fn with_translator(
ast: &'a mut Ast,
captures: &'a Captures,
fresh: &'a FreshScope,
source_range: Option<tree_sitter::Range>,
user_ctx: &'a mut C,
translator: TranslatorHandle<'a, C>,
) -> Self {
Self {
ast,
captures,
fresh,
source_range,
user_ctx,
translator: Some(translator),
}
}
@@ -113,3 +168,52 @@ impl<'a> BuildCtx<'a> {
self.ast.prepend_field_child(node_id, field_id, value_id);
}
}
impl<C: Clone> BuildCtx<'_, C> {
/// Recursively translate a node via the framework's rule machinery.
/// In a OneShot phase, applies OneShot rules to the given node and
/// returns the resulting node ids. In a Repeating phase, errors
/// (translation is not meaningful when input and output share a
/// schema).
///
/// Accepts any value convertible to [`Id`] (including [`crate::NodeRef`]),
/// so manual rules can pass capture bindings directly without unwrapping.
///
/// Errors if this `BuildCtx` was constructed by hand (without a
/// translator handle) — for example, in unit tests that don't go
/// through the rule driver.
pub fn translate<I: Into<Id>>(&mut self, id: I) -> Result<Vec<Id>, String> {
let id = id.into();
match &self.translator {
Some(t) => t.translate(self.ast, self.user_ctx, id),
None => Err("translate() called on a BuildCtx without a translator handle".into()),
}
}
/// Translate an optional capture, returning the first translated id or
/// `None`. Convenience for `?`-quantifier captures (`Option<NodeRef>`).
///
/// If the underlying translation produces multiple ids for a single
/// input, only the first is returned. For most use cases (e.g.
/// translating a single type annotation) this is what you want; if
/// you need all ids, use [`translate`] directly.
pub fn translate_opt<I: Into<Id>>(&mut self, id: Option<I>) -> Result<Option<Id>, String> {
match id {
Some(id) => Ok(self.translate(id)?.into_iter().next()),
None => Ok(None),
}
}
}
impl<C> std::ops::Deref for BuildCtx<'_, C> {
type Target = C;
fn deref(&self) -> &C {
&*self.user_ctx
}
}
impl<C> std::ops::DerefMut for BuildCtx<'_, C> {
fn deref_mut(&mut self) -> &mut C {
&mut *self.user_ctx
}
}

View File

@@ -53,12 +53,7 @@ pub fn dump_ast_with_options(
///
/// Any node that does not match the expected type set for its parent field is
/// rendered with a trailing `" <-- ERROR: ..."` annotation on the same line.
pub fn dump_ast_with_type_errors(
ast: &Ast,
root: usize,
source: &str,
schema: &Schema,
) -> String {
pub fn dump_ast_with_type_errors(ast: &Ast, root: usize, source: &str, schema: &Schema) -> String {
dump_ast_with_type_errors_and_options(ast, root, source, schema, &DumpOptions::default())
}
@@ -74,7 +69,15 @@ pub fn dump_ast_with_type_errors_and_options(
options: &DumpOptions,
) -> String {
let mut out = String::new();
dump_node(ast, root, source, options, 0, Some((schema, None, None)), &mut out);
dump_node(
ast,
root,
source,
options,
0,
Some((schema, None, None)),
&mut out,
);
out
}
@@ -232,8 +235,8 @@ fn dump_node(
}
let field_name = ast.field_name_for_id(field_id).unwrap_or("?");
let child_type_check = type_check.map(|(schema, _, _)| {
let expected = expected_for_field(schema, node.kind_name(), field_id)
.or(Some(EMPTY_NODE_TYPES));
let expected =
expected_for_field(schema, node.kind_name(), field_id).or(Some(EMPTY_NODE_TYPES));
let parent_field = Some((node.kind_name(), field_name));
(schema, expected, parent_field)
});

View File

@@ -16,7 +16,7 @@ pub mod schema;
pub mod tree_builder;
mod visitor;
pub use yeast_macros::{query, rule, tree, trees};
pub use yeast_macros::{manual_rule, query, rule, tree, trees};
use captures::Captures;
pub use cursor::Cursor;
@@ -297,7 +297,9 @@ impl Ast {
/// Returns the source text for `id`, resolving `NodeContent::Range`
/// against the stored source bytes when available.
pub fn source_text(&self, id: Id) -> String {
let Some(node) = self.get_node(id) else { return String::new(); };
let Some(node) = self.get_node(id) else {
return String::new();
};
let read_range = |range: &tree_sitter::Range| {
let start = range.start_byte;
let end = range.end_byte;
@@ -488,7 +490,10 @@ impl Ast {
/// Prepend a child id to the given field of the given node.
pub fn prepend_field_child(&mut self, node_id: Id, field_id: FieldId, value_id: Id) {
let node = self.nodes.get_mut(node_id).expect("prepend_field_child: invalid node id");
let node = self
.nodes
.get_mut(node_id)
.expect("prepend_field_child: invalid node id");
node.fields.entry(field_id).or_default().insert(0, value_id);
}
@@ -700,18 +705,118 @@ impl From<tree_sitter::Range> for NodeContent {
}
}
/// The transform function for a rule: takes the AST, captured variables, a
/// fresh-name scope, and the source range of the matched node, and returns
/// the IDs of the replacement nodes.
pub type Transform = Box<
dyn Fn(&mut Ast, Captures, &tree_builder::FreshScope, Option<tree_sitter::Range>) -> Vec<Id>
/// A handle that lets a rule transform recursively translate AST nodes via
/// the framework's rule machinery. Constructed by the driver and passed as
/// the last argument of every [`Transform`] invocation.
///
/// The `rule!` macro uses [`TranslatorHandle::auto_translate_captures`] in
/// its generated prefix to translate captures before running the user's
/// transform body. Manually-written transforms (using [`Rule::new`]
/// directly) can call [`TranslatorHandle::translate`] selectively on
/// specific node ids to control when translation happens.
pub struct TranslatorHandle<'a, C> {
inner: TranslatorImpl<'a, C>,
}
/// Internal phase-specific translation state. Kept private — callers
/// interact with [`TranslatorHandle`] only.
enum TranslatorImpl<'a, C> {
/// OneShot phase translator: recursively applies OneShot rules.
OneShot {
index: &'a RuleIndex<'a, C>,
fresh: &'a tree_builder::FreshScope,
rewrite_depth: usize,
/// The id of the node the current rule is matching. Used by
/// [`auto_translate_captures`] to avoid infinite recursion when a
/// rule captures its own match root (e.g. via `(_) @_`).
matched_root: Id,
},
/// Repeating phase translator: translation is not meaningful here
/// (input and output schemas are the same). [`translate`] errors;
/// [`auto_translate_captures`] is a no-op so the macro's auto-prefix
/// works unchanged for Repeating rules.
Repeating,
}
impl<'a, C: Clone> TranslatorHandle<'a, C> {
/// Recursively apply OneShot rules to `id` and return the resulting
/// node ids. Errors in a Repeating phase (where translation is not
/// meaningful).
pub fn translate(&self, ast: &mut Ast, user_ctx: &mut C, id: Id) -> Result<Vec<Id>, String> {
match &self.inner {
TranslatorImpl::OneShot {
index,
fresh,
rewrite_depth,
..
} => apply_one_shot_rules_inner(index, ast, user_ctx, id, fresh, rewrite_depth + 1),
TranslatorImpl::Repeating => {
Err("translate() is not available in a Repeating phase".into())
}
}
}
/// Translate every captured node in `captures` in place (OneShot phase
/// only). In a Repeating phase this is a no-op — Repeating rules
/// receive raw captures.
///
/// Used by the `rule!` macro's generated prefix to preserve the
/// pre-existing "auto-translate captures before running the transform
/// body" behavior. Manually-written transforms typically translate
/// captures selectively via [`translate`] instead.
///
/// To avoid infinite recursion, a capture whose id matches the rule's
/// matched root (e.g. from a `(_) @_` pattern) is left unchanged.
pub fn auto_translate_captures(
&self,
captures: &mut Captures,
ast: &mut Ast,
user_ctx: &mut C,
) -> Result<(), String> {
match &self.inner {
TranslatorImpl::OneShot { matched_root, .. } => {
let root = *matched_root;
captures.try_map_all_captures(|cid| {
if cid == root {
Ok(vec![cid])
} else {
self.translate(ast, user_ctx, cid)
}
})
}
TranslatorImpl::Repeating => Ok(()),
}
}
}
/// The transform function for a rule.
///
/// Takes the AST, the (raw, untranslated) captured variables, a fresh-name
/// scope, the source range of the matched node, a mutable reference to the
/// user context of type `C`, and a [`TranslatorHandle`] for recursively
/// translating nodes. Returns the IDs of the replacement nodes, or an
/// error message if the transform could not be completed.
///
/// Transforms produced by [`Rule::new`] receive **raw** captures and must
/// translate them themselves (via the handle). Transforms produced by the
/// `rule!` macro have an auto-translation prefix injected for backward
/// compatibility.
pub type Transform<C = ()> = Box<
dyn Fn(
&mut Ast,
Captures,
&tree_builder::FreshScope,
Option<tree_sitter::Range>,
&mut C,
TranslatorHandle<'_, C>,
) -> Result<Vec<Id>, String>
+ Send
+ Sync,
>;
pub struct Rule {
pub struct Rule<C = ()> {
query: QueryNode,
transform: Transform,
transform: Transform<C>,
/// If true, after this rule fires on a node the engine will try to
/// re-apply this same rule on the result root. Defaults to false:
/// each rule fires at most once on a given node, which prevents
@@ -719,8 +824,8 @@ pub struct Rule {
repeated: bool,
}
impl Rule {
pub fn new(query: QueryNode, transform: Transform) -> Self {
impl<C> Rule<C> {
pub fn new(query: QueryNode, transform: Transform<C>) -> Self {
Self {
query,
transform,
@@ -742,9 +847,13 @@ impl Rule {
ast: &mut Ast,
node: Id,
fresh: &tree_builder::FreshScope,
user_ctx: &mut C,
translator: TranslatorHandle<'_, C>,
) -> Result<Option<Vec<Id>>, String> {
match self.try_match(ast, node)? {
Some(captures) => Ok(Some(self.run_transform(ast, captures, node, fresh))),
Some(captures) => Ok(Some(
self.run_transform(ast, captures, node, fresh, user_ctx, translator)?,
)),
None => Ok(None),
}
}
@@ -768,29 +877,31 @@ impl Rule {
captures: Captures,
node: Id,
fresh: &tree_builder::FreshScope,
) -> Vec<Id> {
user_ctx: &mut C,
translator: TranslatorHandle<'_, C>,
) -> Result<Vec<Id>, String> {
fresh.next_scope();
let source_range = ast.get_node(node).and_then(|n| match n.content {
NodeContent::Range(r) => Some(r),
_ => n.source_range,
});
(self.transform)(ast, captures, fresh, source_range)
(self.transform)(ast, captures, fresh, source_range, user_ctx, translator)
}
}
const MAX_REWRITE_DEPTH: usize = 100;
/// Index of rules by their root query kind for fast lookup.
struct RuleIndex<'a> {
struct RuleIndex<'a, C> {
/// Rules indexed by root node kind name.
by_kind: BTreeMap<&'static str, Vec<&'a Rule>>,
by_kind: BTreeMap<&'static str, Vec<&'a Rule<C>>>,
/// Rules with wildcard queries (Any) that apply to all nodes.
wildcard: Vec<&'a Rule>,
wildcard: Vec<&'a Rule<C>>,
}
impl<'a> RuleIndex<'a> {
fn new(rules: &'a [Rule]) -> Self {
let mut by_kind: BTreeMap<&'static str, Vec<&'a Rule>> = BTreeMap::new();
impl<'a, C> RuleIndex<'a, C> {
fn new(rules: &'a [Rule<C>]) -> Self {
let mut by_kind: BTreeMap<&'static str, Vec<&'a Rule<C>>> = BTreeMap::new();
let mut wildcard = Vec::new();
for rule in rules {
match rule.query.root_kind() {
@@ -801,7 +912,7 @@ impl<'a> RuleIndex<'a> {
Self { by_kind, wildcard }
}
fn rules_for_kind(&self, kind: &str) -> impl Iterator<Item = &&'a Rule> {
fn rules_for_kind(&self, kind: &str) -> impl Iterator<Item = &&'a Rule<C>> {
self.by_kind
.get(kind)
.into_iter()
@@ -810,23 +921,25 @@ impl<'a> RuleIndex<'a> {
}
}
fn apply_repeating_rules(
rules: &[Rule],
fn apply_repeating_rules<C: Clone>(
rules: &[Rule<C>],
ast: &mut Ast,
user_ctx: &mut C,
id: Id,
fresh: &tree_builder::FreshScope,
) -> Result<Vec<Id>, String> {
let index = RuleIndex::new(rules);
apply_repeating_rules_inner(&index, ast, id, fresh, 0, None)
apply_repeating_rules_inner(&index, ast, user_ctx, id, fresh, 0, None)
}
fn apply_repeating_rules_inner(
index: &RuleIndex,
fn apply_repeating_rules_inner<C: Clone>(
index: &RuleIndex<C>,
ast: &mut Ast,
user_ctx: &mut C,
id: Id,
fresh: &tree_builder::FreshScope,
rewrite_depth: usize,
skip_rule: Option<*const Rule>,
skip_rule: Option<*const Rule<C>>,
) -> Result<Vec<Id>, String> {
if rewrite_depth > MAX_REWRITE_DEPTH {
return Err(format!(
@@ -837,11 +950,23 @@ fn apply_repeating_rules_inner(
let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or("");
for rule in index.rules_for_kind(node_kind) {
let rule_ptr = *rule as *const Rule;
let rule_ptr = *rule as *const Rule<C>;
if Some(rule_ptr) == skip_rule {
continue;
}
if let Some(result_node) = rule.try_rule(ast, id, fresh)? {
// Snapshot the user context before invoking the rule so that any
// mutations the rule makes are visible during recursive translation
// of its result, but not leaked to the parent's siblings.
let snapshot = user_ctx.clone();
// Repeating rules don't need a real translator: their captures
// aren't auto-translated (Repeating preserves the input schema),
// and `ctx.translate(id)` errors if invoked from a Repeating
// transform.
let translator = TranslatorHandle {
inner: TranslatorImpl::Repeating,
};
let try_result = rule.try_rule(ast, id, fresh, user_ctx, translator)?;
if let Some(result_node) = try_result {
// For non-repeated rules, suppress further application of *this*
// rule on the result root, so a rule whose output matches its own
// query doesn't loop. Other rules and child traversal are
@@ -852,14 +977,19 @@ fn apply_repeating_rules_inner(
results.extend(apply_repeating_rules_inner(
index,
ast,
user_ctx,
node,
fresh,
rewrite_depth + 1,
next_skip,
)?);
}
*user_ctx = snapshot;
return Ok(results);
}
// Rule didn't match; restore any speculative changes (none expected
// since try_rule only mutates on match, but be defensive).
*user_ctx = snapshot;
}
// Take the parent's fields by ownership: the recursion will rewrite
@@ -874,7 +1004,15 @@ fn apply_repeating_rules_inner(
for children in fields.values_mut() {
let mut new_children: Option<Vec<Id>> = None;
for (i, &child_id) in children.iter().enumerate() {
let result = apply_repeating_rules_inner(index, ast, child_id, fresh, rewrite_depth, None)?;
let result = apply_repeating_rules_inner(
index,
ast,
user_ctx,
child_id,
fresh,
rewrite_depth,
None,
)?;
let unchanged = result.len() == 1 && result[0] == child_id;
match (&mut new_children, unchanged) {
(None, true) => {} // unchanged so far, no allocation needed
@@ -903,24 +1041,25 @@ fn apply_repeating_rules_inner(
/// each visited node, recursion proceeds only through captured nodes (not
/// through the input node's children directly), and an error is returned if
/// no rule matches a visited node.
fn apply_one_shot_rules(
rules: &[Rule],
fn apply_one_shot_rules<C: Clone>(
rules: &[Rule<C>],
ast: &mut Ast,
user_ctx: &mut C,
id: Id,
fresh: &tree_builder::FreshScope,
) -> Result<Vec<Id>, String> {
let index = RuleIndex::new(rules);
apply_one_shot_rules_inner(&index, ast, id, fresh, 0)
apply_one_shot_rules_inner(&index, ast, user_ctx, id, fresh, 0)
}
fn apply_one_shot_rules_inner(
index: &RuleIndex,
fn apply_one_shot_rules_inner<C: Clone>(
index: &RuleIndex<C>,
ast: &mut Ast,
user_ctx: &mut C,
id: Id,
fresh: &tree_builder::FreshScope,
rewrite_depth: usize,
) -> Result<Vec<Id>, String> {
if rewrite_depth > MAX_REWRITE_DEPTH {
return Err(format!(
"Desugaring exceeded maximum rewrite depth ({MAX_REWRITE_DEPTH}). \
@@ -931,22 +1070,27 @@ fn apply_one_shot_rules_inner(
let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or("");
for rule in index.rules_for_kind(node_kind) {
if let Some(mut captures) = rule.try_match(ast, id)? {
// Recursively translate every captured node before invoking the
// transform. The transform's output uses output-schema kinds, so
// we must translate captured input-schema nodes to their
// output-schema equivalents first.
captures.try_map_all_captures(|captured_id| {
// Avoid infinite recursion when a capture refers to the root
// node of the matched tree (e.g. an `@_` capture on the
// pattern root): re-analyzing it would match the same rule
// again indefinitely.
if captured_id == id {
return Ok(vec![captured_id]);
}
apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)
})?;
return Ok(rule.run_transform(ast, captures, id, fresh));
if let Some(captures) = rule.try_match(ast, id)? {
// Snapshot the user context before invoking the rule so that any
// mutations the rule (or its transitively-translated captures)
// make are visible during this rule's transform, but not leaked
// to the parent's siblings.
let snapshot = user_ctx.clone();
// Build the translator handle the transform will use to
// recursively translate captures (or, for macro-generated
// rules, the auto-translate prefix uses it to translate every
// capture up front, preserving the legacy behavior).
let translator = TranslatorHandle {
inner: TranslatorImpl::OneShot {
index,
fresh,
rewrite_depth,
matched_root: id,
},
};
let result = rule.run_transform(ast, captures, id, fresh, user_ctx, translator)?;
*user_ctx = snapshot;
return Ok(result);
}
}
@@ -974,15 +1118,15 @@ pub enum PhaseKind {
/// starts. Rules within a phase compete for matches as usual; rules in
/// different phases never compete because each traversal only considers the
/// current phase's rules.
pub struct Phase {
pub struct Phase<C = ()> {
/// Name used in error messages.
pub name: String,
pub rules: Vec<Rule>,
pub rules: Vec<Rule<C>>,
pub kind: PhaseKind,
}
impl Phase {
pub fn new(name: impl Into<String>, kind: PhaseKind, rules: Vec<Rule>) -> Self {
impl<C> Phase<C> {
pub fn new(name: impl Into<String>, kind: PhaseKind, rules: Vec<Rule<C>>) -> Self {
Self {
name: name.into(),
rules,
@@ -1008,17 +1152,30 @@ impl Phase {
/// .add_phase("desugar", PhaseKind::Repeating, desugar_rules)
/// .with_output_node_types_yaml(yaml);
/// ```
#[derive(Default)]
pub struct DesugaringConfig {
///
/// The optional type parameter `C` is the user context type threaded through
/// rule transforms. Defaults to `()` (no user context).
pub struct DesugaringConfig<C = ()> {
/// Phases of rule application, applied in order.
pub phases: Vec<Phase>,
pub phases: Vec<Phase<C>>,
/// Output node-types in YAML format. If `None`, the input grammar's
/// node types are used (i.e. the desugared AST has the same node types
/// as the tree-sitter grammar).
pub output_node_types_yaml: Option<&'static str>,
}
impl DesugaringConfig {
// Manual `Default` impl so users with a custom `C` that doesn't implement
// `Default` can still construct an empty config.
impl<C> Default for DesugaringConfig<C> {
fn default() -> Self {
Self {
phases: Vec::new(),
output_node_types_yaml: None,
}
}
}
impl<C> DesugaringConfig<C> {
/// Create an empty configuration. Add phases via [`add_phase`] and an
/// optional output schema via [`with_output_node_types_yaml`].
pub fn new() -> Self {
@@ -1030,7 +1187,7 @@ impl DesugaringConfig {
mut self,
name: impl Into<String>,
kind: PhaseKind,
rules: Vec<Rule>,
rules: Vec<Rule<C>>,
) -> Self {
self.phases.push(Phase::new(name, kind, rules));
self
@@ -1052,15 +1209,15 @@ impl DesugaringConfig {
}
}
pub struct Runner<'a> {
pub struct Runner<'a, C = ()> {
language: tree_sitter::Language,
schema: schema::Schema,
phases: &'a [Phase],
phases: &'a [Phase<C>],
}
impl<'a> Runner<'a> {
impl<'a, C> Runner<'a, C> {
/// Create a runner using the input grammar's schema for output.
pub fn new(language: tree_sitter::Language, phases: &'a [Phase]) -> Self {
pub fn new(language: tree_sitter::Language, phases: &'a [Phase<C>]) -> Self {
let schema = schema::Schema::from_language(&language);
Self {
language,
@@ -1073,7 +1230,7 @@ impl<'a> Runner<'a> {
pub fn with_schema(
language: tree_sitter::Language,
schema: &schema::Schema,
phases: &'a [Phase],
phases: &'a [Phase<C>],
) -> Self {
Self {
language,
@@ -1085,7 +1242,7 @@ impl<'a> Runner<'a> {
/// Create a runner from a [`DesugaringConfig`].
pub fn from_config(
language: tree_sitter::Language,
config: &'a DesugaringConfig,
config: &'a DesugaringConfig<C>,
) -> Result<Self, String> {
let schema = config.build_schema(&language)?;
Ok(Self {
@@ -1094,11 +1251,17 @@ impl<'a> Runner<'a> {
phases: &config.phases,
})
}
}
pub fn run_from_tree(
impl<'a, C: Clone> Runner<'a, C> {
/// Parse `tree` against `source` and run all phases, threading
/// `user_ctx` through every rule transform. The caller owns the
/// initial context state.
pub fn run_from_tree_with_ctx(
&self,
tree: &tree_sitter::Tree,
source: &[u8],
user_ctx: &mut C,
) -> Result<Ast, String> {
let mut ast = Ast::from_tree_with_schema_and_source(
self.schema.clone(),
@@ -1106,11 +1269,13 @@ impl<'a> Runner<'a> {
&self.language,
source.to_vec(),
);
self.run_phases(&mut ast)?;
self.run_phases(&mut ast, user_ctx)?;
Ok(ast)
}
pub fn run(&self, input: &str) -> Result<Ast, String> {
/// Parse `input` and run all phases, threading `user_ctx` through
/// every rule transform. The caller owns the initial context state.
pub fn run_with_ctx(&self, input: &str, user_ctx: &mut C) -> Result<Ast, String> {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&self.language)
@@ -1124,20 +1289,24 @@ impl<'a> Runner<'a> {
&self.language,
input.as_bytes().to_vec(),
);
self.run_phases(&mut ast)?;
self.run_phases(&mut ast, user_ctx)?;
Ok(ast)
}
/// Apply each phase in turn to the AST, threading the root through.
/// A single `FreshScope` is shared across phases so that fresh
/// identifiers generated in different phases don't collide.
fn run_phases(&self, ast: &mut Ast) -> Result<(), String> {
fn run_phases(&self, ast: &mut Ast, user_ctx: &mut C) -> Result<(), String> {
let fresh = tree_builder::FreshScope::new();
let mut root = ast.get_root();
for phase in self.phases {
let res = match phase.kind {
PhaseKind::Repeating => apply_repeating_rules(&phase.rules, ast, root, &fresh),
PhaseKind::OneShot => apply_one_shot_rules(&phase.rules, ast, root, &fresh),
PhaseKind::Repeating => {
apply_repeating_rules(&phase.rules, ast, user_ctx, root, &fresh)
}
PhaseKind::OneShot => {
apply_one_shot_rules(&phase.rules, ast, user_ctx, root, &fresh)
}
}
.map_err(|e| format!("Phase `{}`: {e}", phase.name))?;
if res.len() != 1 {
@@ -1153,3 +1322,78 @@ impl<'a> Runner<'a> {
Ok(())
}
}
impl<'a, C: Clone + Default> Runner<'a, C> {
/// Parse `tree` against `source` and run all phases, using the
/// default context (`C::default()`) as the initial context state.
pub fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String> {
let mut user_ctx = C::default();
self.run_from_tree_with_ctx(tree, source, &mut user_ctx)
}
/// Parse `input` and run all phases, using the default context
/// (`C::default()`) as the initial context state.
pub fn run(&self, input: &str) -> Result<Ast, String> {
let mut user_ctx = C::default();
self.run_with_ctx(input, &mut user_ctx)
}
}
// ---------------------------------------------------------------------------
// Desugarer: type-erased view of a DesugaringConfig + Runner
// ---------------------------------------------------------------------------
/// Type-erased interface to a desugaring pipeline for a single language.
///
/// Consumers (e.g. a generic tree-sitter extractor) hold
/// `Box<dyn Desugarer>` so they can dispatch through the trait without
/// knowing the user context type `C` that's internal to yeast.
///
/// Construct one via [`ConcreteDesugarer::new`] from a
/// [`DesugaringConfig<C>`] and a [`tree_sitter::Language`].
pub trait Desugarer: Send + Sync {
/// The output AST schema (in YAML format), or `None` if the input
/// grammar's schema should be used.
fn output_node_types_yaml(&self) -> Option<&'static str>;
/// Parse `tree` against `source` and run the desugaring pipeline.
/// Each call constructs a fresh default user context internally.
fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String>;
}
/// A concrete [`Desugarer`] backed by a [`DesugaringConfig<C>`] for a
/// specific user context type `C`. Stores the language and a pre-built
/// schema so that per-call cost is bounded to constructing a transient
/// [`Runner`] and cloning the schema (no YAML re-parsing).
pub struct ConcreteDesugarer<C: Default + Clone + Send + Sync + 'static> {
language: tree_sitter::Language,
schema: schema::Schema,
config: DesugaringConfig<C>,
}
impl<C: Default + Clone + Send + Sync + 'static> ConcreteDesugarer<C> {
/// Build a desugarer for `language` from `config`. Parses the output
/// schema YAML once (if set) and stores it for reuse across files.
pub fn new(
language: tree_sitter::Language,
config: DesugaringConfig<C>,
) -> Result<Self, String> {
let schema = config.build_schema(&language)?;
Ok(Self {
language,
schema,
config,
})
}
}
impl<C: Default + Clone + Send + Sync + 'static> Desugarer for ConcreteDesugarer<C> {
fn output_node_types_yaml(&self) -> Option<&'static str> {
self.config.output_node_types_yaml
}
fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String> {
let runner = Runner::with_schema(self.language.clone(), &self.schema, &self.config.phases);
runner.run_from_tree(tree, source)
}
}

View File

@@ -242,10 +242,7 @@ pub fn convert(yaml_input: &str) -> Result<String, String> {
/// Apply YAML node-type definitions to a mutable Schema.
/// Registers all types, fields, and allowed types from the YAML into the schema.
fn apply_yaml_to_schema(
yaml: &YamlNodeTypes,
schema: &mut crate::schema::Schema,
) {
fn apply_yaml_to_schema(yaml: &YamlNodeTypes, schema: &mut crate::schema::Schema) {
// Register all supertypes as node kinds
for name in yaml.supertypes.keys() {
schema.register_kind(name);
@@ -307,7 +304,8 @@ fn apply_yaml_to_schema(
.into_vec()
.into_iter()
.map(|type_ref| {
let (kind, named) = resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types);
let (kind, named) =
resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types);
crate::schema::NodeType { kind, named }
})
.collect::<Vec<_>>();

View File

@@ -198,13 +198,8 @@ impl Schema {
.insert((parent_kind.to_string(), field_id), node_types);
}
pub fn field_types(
&self,
parent_kind: &str,
field_id: FieldId,
) -> Option<&Vec<NodeType>> {
self.field_types
.get(&(parent_kind.to_string(), field_id))
pub fn field_types(&self, parent_kind: &str, field_id: FieldId) -> Option<&Vec<NodeType>> {
self.field_types.get(&(parent_kind.to_string(), field_id))
}
pub fn set_field_cardinality(

View File

@@ -7,7 +7,7 @@ const OUTPUT_SCHEMA_YAML: &str = include_str!("node-types.yml");
/// Helper: parse Ruby source with no rules, return dump.
fn parse_and_dump(input: &str) -> String {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run(input).unwrap();
dump_ast(&ast, ast.get_root(), input)
}
@@ -24,7 +24,7 @@ fn run_and_ast(input: &str, rules: Vec<Rule>) -> Ast {
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
runner.run(input).unwrap()
}
@@ -34,7 +34,7 @@ fn run_phased_and_dump(input: &str, phases: Vec<Phase>) -> String {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let runner = Runner::with_schema(lang, &schema, &phases);
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let ast = runner.run(input).unwrap();
dump_ast(&ast, ast.get_root(), input)
}
@@ -46,7 +46,7 @@ fn run_and_get_error(input: &str, rules: Vec<Rule>) -> String {
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
runner
.run(input)
.expect_err("expected runner to return an error")
@@ -54,7 +54,7 @@ fn run_and_get_error(input: &str, rules: Vec<Rule>) -> String {
/// Helper: parse Ruby source with no rules and dump with schema type errors.
fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run(input).unwrap();
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
@@ -64,10 +64,10 @@ fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String {
/// building schema with language IDs so field checks align with parser fields.
fn parse_and_dump_typed_with_language(input: &str, schema_yaml: &str) -> String {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let runner = Runner::new(lang.clone(), &[]);
let runner: Runner = Runner::new(lang.clone(), &[]);
let ast = runner.run(input).unwrap();
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang)
.unwrap();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang).unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
}
@@ -76,7 +76,7 @@ fn run_and_dump_typed(input: &str, rules: Vec<Rule>, schema_yaml: &str) -> Strin
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let ast = runner.run(input).unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
}
@@ -166,7 +166,7 @@ fn test_parse_for_loop() {
#[test]
fn test_dump_highlights_type_errors_inline() {
let schema_yaml = r#"
let schema_yaml = r#"
named:
program:
$children*: assignment
@@ -176,13 +176,13 @@ named:
identifier:
"#;
let dump = parse_and_dump_typed("x = 1", schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
let dump = parse_and_dump_typed("x = 1", schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
}
#[test]
fn test_dump_reports_preserved_unknown_kind_after_transformation() {
let schema_yaml = r#"
let schema_yaml = r#"
named:
program:
$children*: assignment
@@ -192,25 +192,25 @@ named:
identifier:
"#;
// This rewrite runs and preserves the RHS node kind via capture.
// With schema above, preserving `integer` should be reported inline.
let rules = vec![yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
(assignment
left: {left}
right: {right}
)
)];
// This rewrite runs and preserves the RHS node kind via capture.
// With schema above, preserving `integer` should be reported inline.
let rules: Vec<Rule> = vec![yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
(assignment
left: {left}
right: {right}
)
)];
let dump = run_and_dump_typed("x = 1", rules, schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
assert!(dump.contains("node kind 'integer' not in schema"));
let dump = run_and_dump_typed("x = 1", rules, schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
assert!(dump.contains("node kind 'integer' not in schema"));
}
#[test]
fn test_dump_reports_undeclared_field_on_node() {
let schema_yaml = r#"
let schema_yaml = r#"
named:
program:
$children*: assignment
@@ -219,14 +219,14 @@ named:
identifier:
"#;
let dump = parse_and_dump_typed_with_language("x = y", schema_yaml);
assert!(dump.contains("right: identifier \"y\" <-- ERROR:"));
assert!(dump.contains("the node 'assignment' has no field 'right'"));
let dump = parse_and_dump_typed_with_language("x = y", schema_yaml);
assert!(dump.contains("right: identifier \"y\" <-- ERROR:"));
assert!(dump.contains("the node 'assignment' has no field 'right'"));
}
#[test]
fn test_dump_reports_disallowed_kind_in_field_type() {
let schema_yaml = r#"
let schema_yaml = r#"
named:
program:
$children*: assignment
@@ -237,17 +237,17 @@ named:
integer:
"#;
let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml);
assert!(dump.contains("right: integer \"1\" <-- ERROR:"));
assert!(dump.contains("should contain"));
assert!(dump.contains("but got integer"));
let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml);
assert!(dump.contains("right: integer \"1\" <-- ERROR:"));
assert!(dump.contains("should contain"));
assert!(dump.contains("but got integer"));
}
// ---- Query tests ----
#[test]
fn test_query_match() {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let query = yeast::query!(
@@ -268,7 +268,7 @@ fn test_query_match() {
#[test]
fn test_query_no_match() {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let query = yeast::query!(
@@ -293,7 +293,7 @@ fn test_query_skips_extras_in_positional_match() {
// captured comment to nothing (a common idiom, e.g.
// `(comment) => ()` in Swift) leaves the capture's match-list empty
// and causes the transform to fail with "Variable X has 0 matches".
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("[1, # comment\n2]").unwrap();
// Navigate to the `array` node: program -> array.
@@ -309,15 +309,11 @@ fn test_query_skips_extras_in_positional_match() {
let matched = query.do_match(&ast, array_id, &mut captures).unwrap();
assert!(matched);
assert_eq!(
ast.get_node(captures.get_var("a").unwrap())
.unwrap()
.kind(),
ast.get_node(captures.get_var("a").unwrap()).unwrap().kind(),
"integer"
);
assert_eq!(
ast.get_node(captures.get_var("b").unwrap())
.unwrap()
.kind(),
ast.get_node(captures.get_var("b").unwrap()).unwrap().kind(),
"integer"
);
}
@@ -325,14 +321,14 @@ fn test_query_skips_extras_in_positional_match() {
#[test]
fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang)
.unwrap();
let phases = vec![Phase::new(
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases: Vec<Phase> = vec![Phase::new(
"test",
PhaseKind::Repeating,
vec![yeast::rule!((integer) => (identifier "replaced"))],
)];
let runner = Runner::with_schema(lang, &schema, &phases);
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
@@ -350,7 +346,7 @@ fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
#[test]
fn test_query_repeated_capture() {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x, y, z = 1").unwrap();
let query = yeast::query!(
@@ -375,7 +371,7 @@ fn test_query_repeated_capture() {
#[test]
fn test_capture_unnamed_node_parenthesized() {
// `("=") @op` captures the unnamed `=` token between left and right.
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let query = yeast::query!(
@@ -403,7 +399,7 @@ fn test_capture_unnamed_node_parenthesized() {
fn test_capture_bare_underscore_repeated() {
// `_` matches named and unnamed nodes in bare-child position. On this
// assignment shape, bare children correspond to unnamed tokens (the `=`).
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let query = yeast::query!((assignment _* @all));
@@ -425,7 +421,7 @@ fn test_capture_bare_underscore_repeated() {
#[test]
fn test_capture_unnamed_node_bare_literal() {
// `"=" @op` (without surrounding parens) is the same as `("=") @op`.
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let query = yeast::query!(
@@ -454,7 +450,7 @@ fn test_bare_underscore_matches_unnamed() {
// Bare `_` matches any node, including unnamed tokens, while `(_)`
// matches only named nodes. Demonstrate by matching the unnamed `=`
// token in the implicit `child` field of an `assignment`.
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let mut cursor = AstCursor::new(&ast);
@@ -493,7 +489,7 @@ fn test_bare_forms_in_field_position() {
// field's value, not just in the bare-children position. This is
// syntactic sugar for `(_)` / `("…")` and goes through the same
// code paths.
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let mut cursor = AstCursor::new(&ast);
@@ -532,7 +528,7 @@ fn test_forward_scan_finds_unnamed_token_late() {
// query for `("end")` skip past the first two and match the third.
// Without forward-scan, the matcher took the first child unconditionally
// and failed.
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("for x in list do\n y\nend").unwrap();
// Navigate: program > for > do (the body wrapper).
@@ -559,7 +555,7 @@ fn test_forward_scan_preserves_order() {
// order. A query for ("end") then ("do") should fail because `do`
// appears before `end` in the source order; once forward-scan has
// consumed `end`, the iterator is exhausted.
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("for x in list do\n y\nend").unwrap();
let mut cursor = AstCursor::new(&ast);
@@ -580,7 +576,7 @@ fn test_forward_scan_preserves_order() {
#[test]
fn test_tree_builder() {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let mut ast = runner.run("x = 1").unwrap();
let input = "x = 1";
@@ -598,7 +594,8 @@ fn test_tree_builder() {
// Swap left and right
let fresh = yeast::tree_builder::FreshScope::new();
let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh);
let mut user_ctx = ();
let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh, &mut user_ctx);
let new_id = yeast::tree!(ctx,
(program
child: (assignment
@@ -626,7 +623,7 @@ fn test_tree_builder() {
// tree-sitter-ruby grammar with named fields for nodes that only have
// unnamed children in tree-sitter (e.g. block_body.stmt, block_parameters.parameter).
fn ruby_rules() -> Vec<Rule> {
let assign_rule = yeast::rule!(
let assign_rule: Rule = yeast::rule!(
(assignment
left: (left_assignment_list
(identifier)* @left
@@ -651,7 +648,7 @@ fn ruby_rules() -> Vec<Rule> {
)}
);
let for_rule = yeast::rule!(
let for_rule: Rule = yeast::rule!(
(for
pattern: (_) @pat
value: (in (_) @val)
@@ -733,7 +730,7 @@ fn test_desugar_for_loop() {
#[test]
fn test_shorthand_rule() {
let rule = yeast::rule!(
let rule: Rule = yeast::rule!(
(assignment
left: (_) @method
right: (_) @receiver
@@ -885,7 +882,7 @@ fn test_phase_error_includes_phase_name() {
PhaseKind::Repeating,
vec![swap_assignment_rule().repeated()],
)];
let runner = Runner::with_schema(lang, &schema, &phases);
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let err = runner
.run("x = 1")
.expect_err("expected runner to return an error");
@@ -928,7 +925,7 @@ fn test_one_shot_phase() {
PhaseKind::OneShot,
one_shot_xeq1_rules(),
)];
let runner = Runner::with_schema(lang, &schema, &phases);
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
@@ -954,7 +951,7 @@ fn test_one_shot_phase_errors_when_no_rule_matches() {
let mut rules = one_shot_xeq1_rules();
rules.pop();
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let err = runner
.run("x = 1")
@@ -978,7 +975,7 @@ fn test_one_shot_recurses_into_returned_capture() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let rules = vec![
let rules: Vec<Rule> = vec![
yeast::rule!(
(program (_)* @stmts)
=>
@@ -994,7 +991,7 @@ fn test_one_shot_recurses_into_returned_capture() {
yeast::rule!((integer) => (integer "INT")),
];
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
@@ -1020,7 +1017,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let rules = vec![
let rules: Vec<Rule> = vec![
yeast::rule!(
(program (_)* @stmts)
=>
@@ -1041,7 +1038,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
yeast::rule!((integer) => (integer "INT")),
];
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
@@ -1065,7 +1062,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
#[test]
fn test_cursor_navigation() {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let mut cursor = AstCursor::new(&ast);
@@ -1139,7 +1136,7 @@ fn test_desugar_for_with_multiple_assignment() {
/// resolves to the captured node's source text via `YeastDisplay`.
#[test]
fn test_hash_brace_renders_capture_source_text() {
let rule = rule!(
let rule: Rule = rule!(
(call
method: (identifier) @name
receiver: (identifier) @recv
@@ -1168,7 +1165,7 @@ fn test_hash_brace_renders_capture_source_text() {
/// `Display` impl (covered by `YeastDisplay`'s blanket impls for primitives).
#[test]
fn test_hash_brace_renders_integer_expression() {
let rule = rule!(
let rule: Rule = rule!(
(identifier) @_
=>
(identifier #{1 + 2})
@@ -1187,7 +1184,7 @@ fn test_hash_brace_renders_integer_expression() {
/// source location, not the full source range of the matched rule root.
#[test]
fn test_hash_brace_uses_capture_location_for_leaf() {
let rule = rule!(
let rule: Rule = rule!(
(call
method: (identifier) @name
receiver: (identifier) @recv
@@ -1204,7 +1201,9 @@ fn test_hash_brace_uses_capture_location_for_leaf() {
let mut bar_ids: Vec<usize> = Vec::new();
for id in ast.reachable_node_ids() {
let Some(node) = ast.get_node(id) else { continue; };
let Some(node) = ast.get_node(id) else {
continue;
};
if node.kind() == "identifier" && ast.source_text(id) == "bar" {
bar_ids.push(id);
}