Merge pull request #21848 from asgerf/asgerf/swift-yeast

Unified: Add schema checking and corpus-style tests
This commit is contained in:
Asger F
2026-05-26 22:00:21 +02:00
committed by GitHub
36 changed files with 5469 additions and 4732 deletions

View File

@@ -330,7 +330,7 @@ pub fn extract(
if let Some(yeast_runner) = yeast_runner {
let ast = yeast_runner
.run_from_tree(&tree)
.run_from_tree(&tree, source)
.unwrap_or_else(|e| panic!("Desugaring failed for {path_str}: {e}"));
traverse_yeast(&ast, &mut visitor);
} else {

View File

@@ -115,8 +115,19 @@ pub fn generate(
&node_parent_table_name,
)),
ql::TopLevel::Class(ql_gen::create_token_class(&token_name, &tokeninfo_name)),
ql::TopLevel::Class(ql_gen::create_reserved_word_class(&reserved_word_name)),
];
// Only emit the ReservedWord class when there are actually unnamed token
// types in the schema (i.e., @{prefix}_reserved_word exists in the dbscheme).
// When converting from a YEAST YAML schema that has no unnamed tokens, this
// type is absent and referencing it would cause a QL compilation error.
let has_reserved_words = nodes
.values()
.any(|n| n.dbscheme_name == reserved_word_name);
if has_reserved_words {
body.push(ql::TopLevel::Class(ql_gen::create_reserved_word_class(
&reserved_word_name,
)));
}
// Overlay discard predicates
body.push(ql::TopLevel::Predicate(

View File

@@ -113,8 +113,24 @@ fn parse_query_node_inner(tokens: &mut Tokens) -> Result<TokenStream> {
/// appear in any order; bare patterns are accumulated and emitted as a
/// single `("child", ...)` entry.
fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
let mut fields = Vec::new();
// Accumulate per-field elems in declaration order; multiple uses of the
// same field name extend the same list (so e.g. `cond: (foo) cond: (bar)`
// matches a `cond` field whose first child is `foo` and second is `bar`).
let mut field_order: Vec<String> = Vec::new();
let mut field_elems: std::collections::HashMap<String, Vec<TokenStream>> =
std::collections::HashMap::new();
let mut bare_children: Vec<TokenStream> = Vec::new();
let push_field_elem = |order: &mut Vec<String>,
map: &mut std::collections::HashMap<String, Vec<TokenStream>>,
name: String,
elem: TokenStream| {
if !map.contains_key(&name) {
order.push(name.clone());
map.insert(name, vec![elem]);
} else {
map.get_mut(&name).unwrap().push(elem);
}
};
while tokens.peek().is_some() {
if peek_is_field(tokens) {
let field_name = expect_ident(tokens, "expected field name")?;
@@ -122,10 +138,40 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
expect_punct(tokens, ':', "expected `:` after field name")?;
let child = parse_query_node(tokens)?;
fields.push(quote! {
(#field_str, vec![yeast::query::QueryListElem::SingleNode(#child)])
});
// Parse the field's pattern. To support repetition like
// `field: (kind)* @cap`, parse the atom first, then check for
// a quantifier, and lastly handle a trailing `@capture`.
let atom = parse_query_atom(tokens)?;
if peek_is_repetition(tokens) {
let rep = expect_repetition(tokens)?;
let elem = quote! {
yeast::query::QueryListElem::Repeated {
children: vec![yeast::query::QueryListElem::SingleNode(#atom)],
rep: #rep,
}
};
let elem = maybe_wrap_list_capture(tokens, elem)?;
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
} else {
let child = if peek_is_at(tokens) {
tokens.next();
let capture_name =
expect_ident(tokens, "expected capture name after @")?;
let name_str = capture_name.to_string();
quote! {
yeast::query::QueryNode::Capture {
capture: #name_str,
node: Box::new(#atom),
}
}
} else {
atom
};
let elem = quote! {
yeast::query::QueryListElem::SingleNode(#child)
};
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
}
} else {
// Bare patterns — accumulate into the implicit `child` field.
// We don't break here, so we can interleave with named fields.
@@ -137,6 +183,13 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
bare_children.extend(elems);
}
}
let mut fields: Vec<TokenStream> = Vec::new();
for name in field_order {
let elems = field_elems.remove(&name).unwrap();
fields.push(quote! {
(#name, vec![#(#elems),*])
});
}
if !bare_children.is_empty() {
fields.push(quote! {
("child", vec![#(#bare_children),*])
@@ -299,7 +352,7 @@ fn parse_direct_node(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStream> {
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => {
let group = expect_group(tokens, Delimiter::Brace)?;
let expr = group.stream();
Ok(quote! { #expr })
Ok(quote! { ::std::convert::Into::<usize>::into(#expr) })
}
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Parenthesis => {
let group = expect_group(tokens, Delimiter::Parenthesis)?;
@@ -329,12 +382,17 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
return Ok(quote! { #ctx.literal(#kind_str, #lit) });
}
// Check for (kind #{expr}) — computed literal, expr converted via .to_string()
// Check for (kind #{expr}) — computed literal, expr converted via YeastDisplay
if peek_is_hash(tokens) {
tokens.next(); // consume #
let group = expect_group(tokens, Delimiter::Brace)?;
let expr = group.stream();
return Ok(quote! { #ctx.literal(#kind_str, &(#expr).to_string()) });
return Ok(quote! {
{
let __value = yeast::YeastDisplay::yeast_to_string(&(#expr), &*#ctx.ast);
#ctx.literal(#kind_str, &__value)
}
});
}
// Check for (kind $fresh)
@@ -374,7 +432,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
inner.next(); // consume first .
inner.next(); // consume second .
let expr: proc_macro2::TokenStream = inner.collect();
stmts.push(quote! { let #temp: Vec<usize> = #expr; });
stmts.push(quote! {
let #temp: Vec<usize> = (#expr).into_iter()
.map(::std::convert::Into::<usize>::into)
.collect();
});
field_args.push(quote! { (#field_str, #temp) });
continue;
}
@@ -382,7 +444,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
}
let value = parse_direct_node(tokens, ctx)?;
stmts.push(quote! { let #temp = #value; });
stmts.push(quote! { let #temp: usize = #value; });
field_args.push(quote! { (#field_str, vec![#temp]) });
}
@@ -427,10 +489,16 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
inner.next(); // consume first .
inner.next(); // consume second .
let expr: TokenStream = inner.collect();
items.push(quote! { __nodes.extend(#expr); });
items.push(quote! {
__nodes.extend(
(#expr).into_iter().map(::std::convert::Into::<usize>::into)
);
});
} else {
let expr = group.stream();
items.push(quote! { __nodes.push(#expr); });
items.push(quote! {
__nodes.push(::std::convert::Into::<usize>::into(#expr));
});
}
continue;
}
@@ -580,13 +648,24 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
let name_str = &cap.name;
match cap.multiplicity {
CaptureMultiplicity::Repeated => {
quote! { let #name: Vec<usize> = __captures.get_all(#name_str); }
quote! {
let #name: Vec<yeast::NodeRef> = __captures.get_all(#name_str)
.into_iter()
.map(yeast::NodeRef)
.collect();
}
}
CaptureMultiplicity::Optional => {
quote! { let #name: Option<usize> = __captures.get_opt(#name_str); }
quote! {
let #name: Option<yeast::NodeRef> =
__captures.get_opt(#name_str).map(yeast::NodeRef);
}
}
CaptureMultiplicity::Single => {
quote! { let #name: usize = __captures.get_var(#name_str).unwrap(); }
quote! {
let #name: yeast::NodeRef =
yeast::NodeRef(__captures.get_var(#name_str).unwrap());
}
}
}
})
@@ -613,19 +692,26 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
CaptureMultiplicity::Repeated => quote! {
let __field_id = #ctx_ident.ast.field_id_for_name(#name_str)
.unwrap_or_else(|| panic!("field '{}' not found", #name_str));
__fields.insert(__field_id, #name);
__fields.insert(
__field_id,
#name.into_iter()
.map(::std::convert::Into::<usize>::into)
.collect(),
);
},
CaptureMultiplicity::Optional => quote! {
let __field_id = #ctx_ident.ast.field_id_for_name(#name_str)
.unwrap_or_else(|| panic!("field '{}' not found", #name_str));
if let Some(__id) = #name {
__fields.entry(__field_id).or_insert_with(Vec::new).push(__id);
__fields.entry(__field_id).or_insert_with(Vec::new)
.push(::std::convert::Into::<usize>::into(__id));
}
},
CaptureMultiplicity::Single => quote! {
let __field_id = #ctx_ident.ast.field_id_for_name(#name_str)
.unwrap_or_else(|| panic!("field '{}' not found", #name_str));
__fields.entry(__field_id).or_insert_with(Vec::new).push(#name);
__fields.entry(__field_id).or_insert_with(Vec::new)
.push(::std::convert::Into::<usize>::into(#name));
},
}
})

View File

@@ -349,8 +349,8 @@ to enable rewriting:
```rust
let desugar = yeast::DesugaringConfig::new()
.add_phase("cleanup", cleanup_rules())
.add_phase("desugar", desugar_rules())
.add_phase("cleanup", yeast::PhaseKind::Repeating, cleanup_rules())
.add_phase("translate", yeast::PhaseKind::OneShot, translate_rules())
.with_output_node_types_yaml(include_str!("output-node-types.yml"));
let lang = simple::LanguageSpec {
@@ -365,6 +365,15 @@ let lang = simple::LanguageSpec {
A single-phase config is just `.add_phase(...)` called once. Phase names
appear in error messages so you can tell which phase failed.
There are two kinds of phases:
- **Repeating**:
Each node is re-processed until none of the rules in the phase matches.
When a node no longer matches any rules, its children are recursively processed. In practice this is used to desugar or simplify an AST, while staying mostly within the same schema.
- **One-shot**:
Each node is processed by the first matching rule, and the engine panics if no rule matches.
Rules are then recursively applied to every captured node.
In practice this is used when translating from one AST schema to another, where an exhaustive match is required.
The same YAML node-types is used for both the runtime yeast `Schema` (so
rules can refer to output-only kinds and fields) and TRAP validation (it
is converted to JSON internally).

View File

@@ -61,6 +61,21 @@ impl Captures {
}
}
}
/// Apply a fallible function to every captured id (across all keys),
/// replacing each id with the result. Stops and returns the error on
/// the first failure.
pub fn try_map_all_captures<E>(
&mut self,
mut f: impl FnMut(Id) -> Result<Id, E>,
) -> Result<(), E> {
for ids in self.captures.values_mut() {
for id in ids {
*id = f(*id)?;
}
}
Ok(())
}
pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) {
if let Some(from_ids) = self.captures.get(from) {
let new_values = from_ids.iter().copied().map(f).collect();

View File

@@ -1,6 +1,6 @@
use std::fmt::Write;
use crate::{Ast, Node, NodeContent, CHILD_FIELD};
use crate::{schema::Schema, Ast, Node, NodeContent, CHILD_FIELD};
/// Options for controlling AST dump output.
pub struct DumpOptions {
@@ -45,16 +45,143 @@ pub fn dump_ast_with_options(
options: &DumpOptions,
) -> String {
let mut out = String::new();
dump_node(ast, root, source, options, 0, &mut out);
dump_node(ast, root, source, options, 0, None, &mut out);
out
}
/// Dump an AST and annotate type mismatches against a schema inline.
///
/// Any node that does not match the expected type set for its parent field is
/// rendered with a trailing `" <-- ERROR: ..."` annotation on the same line.
pub fn dump_ast_with_type_errors(
ast: &Ast,
root: usize,
source: &str,
schema: &Schema,
) -> String {
dump_ast_with_type_errors_and_options(ast, root, source, schema, &DumpOptions::default())
}
/// Dump an AST and annotate type mismatches against a schema inline.
///
/// Any node that does not match the expected type set for its parent field is
/// rendered with a trailing `" <-- ERROR: ..."` annotation on the same line.
pub fn dump_ast_with_type_errors_and_options(
ast: &Ast,
root: usize,
source: &str,
schema: &Schema,
options: &DumpOptions,
) -> String {
let mut out = String::new();
dump_node(ast, root, source, options, 0, Some((schema, None, None)), &mut out);
out
}
fn format_node_types(node_types: &[crate::schema::NodeType]) -> String {
node_types
.iter()
.map(|t| {
if t.named {
t.kind.clone()
} else {
format!("\"{}\"", t.kind)
}
})
.collect::<Vec<_>>()
.join(" | ")
}
const EMPTY_NODE_TYPES: &[crate::schema::NodeType] = &[];
/// Generate a type-checking error message for a node if it doesn't match expected types.
///
/// # Arguments
/// - `schema`: The AST schema to validate against.
/// - `node`: The node being checked.
/// - `expected`: The set of allowed types for this node, or `None` if type-checking is disabled.
/// - `parent_field`: Optional tuple of (parent_kind, field_name) for context in error messages.
///
/// # Returns
/// `Some(error_message)` if the node violates the schema (e.g., wrong kind, missing field declaration).
/// `None` if the node matches the expected types or if type-checking is disabled.
fn type_error_for_node(
schema: &Schema,
node: &Node,
expected: Option<&[crate::schema::NodeType]>,
parent_field: Option<(&str, &str)>,
) -> Option<String> {
if schema.id_for_node_kind(node.kind_name()).is_none()
&& schema.id_for_unnamed_node_kind(node.kind_name()).is_none()
{
return Some(format!("node kind '{}' not in schema", node.kind_name()));
}
let expected = expected?;
if expected.is_empty() {
if let Some((kind, field)) = parent_field {
return Some(format!("the node '{kind}' has no field '{field}'"));
}
return Some("field not declared in schema for this parent node".to_string());
}
if schema.node_matches_types(node.kind_name(), node.is_named(), expected) {
None
} else {
let actual = if node.is_named() {
node.kind_name().to_string()
} else {
format!("\"{}\"", node.kind_name())
};
if let Some((kind, field)) = parent_field {
Some(format!(
"The field {}.{} should contain {}, but got {}",
kind,
field,
format_node_types(expected),
actual
))
} else {
Some(format!(
"expected {}, got {}",
format_node_types(expected),
actual
))
}
}
}
/// Look up the allowed types for a field in the schema.
///
/// # Arguments
/// - `schema`: The AST schema to query.
/// - `parent_kind`: The node kind of the parent that contains this field.
/// - `field_id`: The field ID within that parent node.
///
/// # Returns
/// `Some(&[NodeType])` if the field is declared in the schema and has type constraints.
/// `None` if the field is not declared or has no constraints (undeclared field).
fn expected_for_field<'a>(
schema: &'a Schema,
parent_kind: &str,
field_id: u16,
) -> Option<&'a [crate::schema::NodeType]> {
schema
.field_types(parent_kind, field_id)
.map(|v| v.as_slice())
}
fn dump_node(
ast: &Ast,
id: usize,
source: &str,
options: &DumpOptions,
indent: usize,
type_check: Option<(
&Schema,
Option<&[crate::schema::NodeType]>,
Option<(&str, &str)>,
)>,
out: &mut String,
) {
let node = match ast.get_node(id) {
@@ -90,6 +217,12 @@ fn dump_node(
}
}
if let Some((schema, expected, parent_field)) = type_check {
if let Some(err) = type_error_for_node(schema, node, expected, parent_field) {
write!(out, " <-- ERROR: {err}").unwrap();
}
}
writeln!(out).unwrap();
// Named fields first
@@ -98,31 +231,68 @@ fn dump_node(
continue; // Handle unnamed children last
}
let field_name = ast.field_name_for_id(field_id).unwrap_or("?");
let child_type_check = type_check.map(|(schema, _, _)| {
let expected = expected_for_field(schema, node.kind_name(), field_id)
.or(Some(EMPTY_NODE_TYPES));
let parent_field = Some((node.kind_name(), field_name));
(schema, expected, parent_field)
});
if children.len() == 1 {
write!(out, "{prefix} {field_name}:").unwrap();
// Inline single child
let child = ast.get_node(children[0]);
if child.is_some_and(is_leaf) {
write!(out, " ").unwrap();
dump_node_inline(ast, children[0], source, options, out);
dump_node_inline(ast, children[0], source, options, child_type_check, out);
} else {
writeln!(out).unwrap();
dump_node(ast, children[0], source, options, indent + 2, out);
dump_node(
ast,
children[0],
source,
options,
indent + 2,
child_type_check,
out,
);
}
} else {
writeln!(out, "{prefix} {field_name}:").unwrap();
for &child_id in children {
dump_node(ast, child_id, source, options, indent + 2, out);
dump_node(
ast,
child_id,
source,
options,
indent + 2,
child_type_check,
out,
);
}
}
}
// Unnamed children — skip unnamed tokens (keywords, punctuation)
if let Some(children) = node.fields.get(&CHILD_FIELD) {
let child_type_check = type_check.map(|(schema, _, _)| {
let expected = expected_for_field(schema, node.kind_name(), CHILD_FIELD)
.or(Some(EMPTY_NODE_TYPES));
let parent_field = Some((node.kind_name(), "children"));
(schema, expected, parent_field)
});
for &child_id in children {
if let Some(child) = ast.get_node(child_id) {
if child.is_named() {
dump_node(ast, child_id, source, options, indent + 1, out);
dump_node(
ast,
child_id,
source,
options,
indent + 1,
child_type_check,
out,
);
}
}
}
@@ -130,7 +300,18 @@ fn dump_node(
}
/// Dump a leaf node inline (no newline prefix, caller provides context).
fn dump_node_inline(ast: &Ast, id: usize, source: &str, options: &DumpOptions, out: &mut String) {
fn dump_node_inline(
ast: &Ast,
id: usize,
source: &str,
options: &DumpOptions,
type_check: Option<(
&Schema,
Option<&[crate::schema::NodeType]>,
Option<(&str, &str)>,
)>,
out: &mut String,
) {
let node = match ast.get_node(id) {
Some(n) => n,
None => return,
@@ -159,6 +340,12 @@ fn dump_node_inline(ast: &Ast, id: usize, source: &str, options: &DumpOptions, o
}
}
if let Some((schema, expected, parent_field)) = type_check {
if let Some(err) = type_error_for_node(schema, node, expected, parent_field) {
write!(out, " <-- ERROR: {err}").unwrap();
}
}
writeln!(out).unwrap();
}

View File

@@ -23,12 +23,73 @@ pub use cursor::Cursor;
use query::QueryNode;
/// Node ids are indexes into the arena
type Id = usize;
pub type Id = usize;
/// Field and Kind ids are provided by tree-sitter
type FieldId = u16;
type KindId = u16;
/// A typed reference to a node in an [`Ast`] arena. Wraps an [`Id`] but
/// deliberately does not implement [`std::fmt::Display`]: rendering a node
/// requires the [`Ast`] it lives in (to resolve [`NodeContent::Range`] back
/// to source text). Use [`YeastDisplay::yeast_to_string`] to format it.
#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
pub struct NodeRef(pub Id);
impl NodeRef {
pub fn id(self) -> Id {
self.0
}
}
impl From<NodeRef> for Id {
fn from(value: NodeRef) -> Self {
value.0
}
}
/// Like [`std::fmt::Display`], but the formatting routine is given access to
/// the [`Ast`] so that node references can resolve to their source text.
///
/// All standard primitive and string types implement [`YeastDisplay`] via
/// the [`impl_yeast_display_via_display`] macro below. Coherence prevents a
/// blanket `impl<T: Display>`, so additional types must be added explicitly.
pub trait YeastDisplay {
fn yeast_to_string(&self, ast: &Ast) -> String;
}
impl YeastDisplay for NodeRef {
fn yeast_to_string(&self, ast: &Ast) -> String {
ast.source_text(self.0)
}
}
macro_rules! impl_yeast_display_via_display {
($($t:ty),* $(,)?) => {
$(
impl YeastDisplay for $t {
fn yeast_to_string(&self, _ast: &Ast) -> String {
::std::string::ToString::to_string(self)
}
}
)*
};
}
impl_yeast_display_via_display! {
i8, i16, i32, i64, i128, isize,
u8, u16, u32, u64, u128, usize,
f32, f64,
bool, char,
str, String,
}
impl<T: YeastDisplay + ?Sized> YeastDisplay for &T {
fn yeast_to_string(&self, ast: &Ast) -> String {
(**self).yeast_to_string(ast)
}
}
pub const CHILD_FIELD: u16 = u16::MAX;
#[derive(Debug)]
@@ -160,6 +221,9 @@ pub struct Ast {
root: Id,
nodes: Vec<Node>,
schema: schema::Schema,
/// Original source bytes the tree was parsed from. Used to resolve
/// `NodeContent::Range` to text for synthesized literal nodes.
source: Vec<u8>,
}
impl std::fmt::Debug for Ast {
@@ -182,21 +246,93 @@ impl Ast {
schema: schema::Schema,
tree: &tree_sitter::Tree,
language: &tree_sitter::Language,
) -> Self {
Self::from_tree_with_schema_and_source(schema, tree, language, Vec::new())
}
pub fn from_tree_with_schema_and_source(
schema: schema::Schema,
tree: &tree_sitter::Tree,
language: &tree_sitter::Language,
source: Vec<u8>,
) -> Self {
let mut visitor = visitor::Visitor::new(language.clone());
visitor.visit(tree);
visitor.build_with_schema(schema)
let mut ast = visitor.build_with_schema(schema);
ast.source = source;
ast
}
/// Returns the source text for `id`, resolving `NodeContent::Range`
/// against the stored source bytes when available.
pub fn source_text(&self, id: Id) -> String {
let Some(node) = self.get_node(id) else { return String::new(); };
let read_range = |range: &tree_sitter::Range| {
let start = range.start_byte;
let end = range.end_byte;
if end <= self.source.len() && start <= end {
String::from_utf8_lossy(&self.source[start..end]).into_owned()
} else {
String::new()
}
};
match &node.content {
NodeContent::Range(range) => read_range(range),
NodeContent::String(s) => s.to_string(),
NodeContent::DynamicString(s) if !s.is_empty() => s.clone(),
// Synthesized nodes (from rule transforms) carry an empty
// `DynamicString`; resolve them against the inherited source
// range so `#{capture}` after a translation still yields the
// original source text.
NodeContent::DynamicString(_) => match node.source_range {
Some(range) => read_range(&range),
None => String::new(),
},
}
}
pub fn walk(&self) -> AstCursor {
AstCursor::new(self)
}
/// Return all nodes currently allocated in the AST arena.
///
/// This includes nodes that are no longer reachable from `get_root()`
/// after desugaring rewrites. Use `reachable_node_ids()` for output-level
/// validation/traversal semantics.
pub fn nodes(&self) -> &[Node] {
&self.nodes
}
/// Return node ids reachable from `get_root()` by following child edges.
///
/// This reflects the effective AST after desugaring and excludes orphaned
/// arena nodes left behind by rewrite operations.
pub fn reachable_node_ids(&self) -> Vec<usize> {
let mut reachable = Vec::new();
let mut stack = vec![self.root];
let mut seen = vec![false; self.nodes.len()];
while let Some(id) = stack.pop() {
if id >= self.nodes.len() || seen[id] {
continue;
}
seen[id] = true;
reachable.push(id);
if let Some(node) = self.get_node(id) {
for children in node.fields.values() {
for &child in children {
stack.push(child);
}
}
}
}
reachable
}
pub fn get_root(&self) -> Id {
self.root
}
@@ -493,18 +629,39 @@ impl Rule {
node: Id,
fresh: &tree_builder::FreshScope,
) -> Result<Option<Vec<Id>>, String> {
match self.try_match(ast, node)? {
Some(captures) => Ok(Some(self.run_transform(ast, captures, node, fresh))),
None => Ok(None),
}
}
/// Attempt to match this rule's query against `node`, returning the
/// resulting captures on success. Does not invoke the transform.
fn try_match(&self, ast: &Ast, node: Id) -> Result<Option<Captures>, String> {
let mut captures = Captures::new();
if self.query.do_match(ast, node, &mut captures)? {
fresh.next_scope();
let source_range = ast.get_node(node).and_then(|n| match n.content {
NodeContent::Range(r) => Some(r),
_ => n.source_range,
});
Ok(Some((self.transform)(ast, captures, fresh, source_range)))
Ok(Some(captures))
} else {
Ok(None)
}
}
/// Run this rule's transform with the given captures, using `node`'s
/// source range as the source range of the produced nodes.
fn run_transform(
&self,
ast: &mut Ast,
captures: Captures,
node: Id,
fresh: &tree_builder::FreshScope,
) -> Vec<Id> {
fresh.next_scope();
let source_range = ast.get_node(node).and_then(|n| match n.content {
NodeContent::Range(r) => Some(r),
_ => n.source_range,
});
(self.transform)(ast, captures, fresh, source_range)
}
}
const MAX_REWRITE_DEPTH: usize = 100;
@@ -539,17 +696,17 @@ impl<'a> RuleIndex<'a> {
}
}
fn apply_rules(
fn apply_repeating_rules(
rules: &[Rule],
ast: &mut Ast,
id: Id,
fresh: &tree_builder::FreshScope,
) -> Result<Vec<Id>, String> {
let index = RuleIndex::new(rules);
apply_rules_inner(&index, ast, id, fresh, 0, None)
apply_repeating_rules_inner(&index, ast, id, fresh, 0, None)
}
fn apply_rules_inner(
fn apply_repeating_rules_inner(
index: &RuleIndex,
ast: &mut Ast,
id: Id,
@@ -578,7 +735,7 @@ fn apply_rules_inner(
let next_skip = if rule.repeated { None } else { Some(rule_ptr) };
let mut results = Vec::new();
for node in result_node {
results.extend(apply_rules_inner(
results.extend(apply_repeating_rules_inner(
index,
ast,
node,
@@ -603,7 +760,7 @@ fn apply_rules_inner(
for children in fields.values_mut() {
let mut new_children: Option<Vec<Id>> = None;
for (i, &child_id) in children.iter().enumerate() {
let result = apply_rules_inner(index, ast, child_id, fresh, rewrite_depth, None)?;
let result = apply_repeating_rules_inner(index, ast, child_id, fresh, rewrite_depth, None)?;
let unchanged = result.len() == 1 && result[0] == child_id;
match (&mut new_children, unchanged) {
(None, true) => {} // unchanged so far, no allocation needed
@@ -628,6 +785,92 @@ fn apply_rules_inner(
Ok(vec![id])
}
/// Apply rules using `OneShot` semantics: the first matching rule fires on
/// each visited node, recursion proceeds only through captured nodes (not
/// through the input node's children directly), and an error is returned if
/// no rule matches a visited node.
fn apply_one_shot_rules(
rules: &[Rule],
ast: &mut Ast,
id: Id,
fresh: &tree_builder::FreshScope,
) -> Result<Vec<Id>, String> {
let index = RuleIndex::new(rules);
apply_one_shot_rules_inner(&index, ast, id, fresh, 0)
}
fn apply_one_shot_rules_inner(
index: &RuleIndex,
ast: &mut Ast,
id: Id,
fresh: &tree_builder::FreshScope,
rewrite_depth: usize,
) -> Result<Vec<Id>, String> {
if rewrite_depth > MAX_REWRITE_DEPTH {
return Err(format!(
"Desugaring exceeded maximum rewrite depth ({MAX_REWRITE_DEPTH}). \
This likely indicates a non-terminating rule cycle."
));
}
let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or("");
// Don't rewrite unnamed nodes (punctuation, keywords, etc.); leave them
// as-is. Rules target named nodes only.
if let Some(node) = ast.get_node(id) {
if !node.is_named() {
return Ok(vec![id]);
}
}
for rule in index.rules_for_kind(node_kind) {
if let Some(mut captures) = rule.try_match(ast, id)? {
// Recursively translate every captured node before invoking the
// transform. The transform's output uses output-schema kinds, so
// we must translate captured input-schema nodes to their
// output-schema equivalents first.
captures.try_map_all_captures(|captured_id| {
// Avoid infinite recursion when a capture refers to the root
// node of the matched tree (e.g. an `@_` capture on the
// pattern root): re-analyzing it would match the same rule
// again indefinitely.
if captured_id == id {
return Ok(captured_id);
}
let result =
apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)?;
if result.len() != 1 {
return Err(format!(
"OneShot: recursion on captured node produced {} results, expected exactly 1",
result.len()
));
}
Ok(result[0])
})?;
return Ok(rule.run_transform(ast, captures, id, fresh));
}
}
Err(format!(
"OneShot: no rule matched node of kind '{node_kind}'"
))
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum PhaseKind {
/// A node is re-processed until none of the rules in the phase matches,
/// albeit a single rule cannot be applied twice in a row unless that rule is also marked as repeating.
/// When a node no longer matches any rules, its children are recursively processed (top down).
Repeating,
/// A node is processed by the first matching rule, and the engine panics if no rule matches.
/// Rules are then recursively applied to every captured node.
/// In practice this is used when translating from one AST schema to another, where every node must be rewritten,
/// and it would be a type error to match the rule patterns (based on the input schema) against the output nodes (which conform to the output schema).
OneShot,
}
/// One phase of a desugaring pass: a named bundle of rules that runs to
/// completion (a full traversal applying its rules) before the next phase
/// starts. Rules within a phase compete for matches as usual; rules in
@@ -637,13 +880,15 @@ pub struct Phase {
/// Name used in error messages.
pub name: String,
pub rules: Vec<Rule>,
pub kind: PhaseKind,
}
impl Phase {
pub fn new(name: impl Into<String>, rules: Vec<Rule>) -> Self {
pub fn new(name: impl Into<String>, kind: PhaseKind, rules: Vec<Rule>) -> Self {
Self {
name: name.into(),
rules,
kind,
}
}
}
@@ -661,8 +906,8 @@ impl Phase {
///
/// ```ignore
/// let config = yeast::DesugaringConfig::new()
/// .add_phase("cleanup", cleanup_rules)
/// .add_phase("desugar", desugar_rules)
/// .add_phase("cleanup", PhaseKind::Repeating, cleanup_rules)
/// .add_phase("desugar", PhaseKind::Repeating, desugar_rules)
/// .with_output_node_types_yaml(yaml);
/// ```
#[derive(Default)]
@@ -682,9 +927,14 @@ impl DesugaringConfig {
Self::default()
}
/// Append a new phase with the given name and rules.
pub fn add_phase(mut self, name: impl Into<String>, rules: Vec<Rule>) -> Self {
self.phases.push(Phase::new(name, rules));
/// Append a new phase with the given name, kind, and rules.
pub fn add_phase(
mut self,
name: impl Into<String>,
kind: PhaseKind,
rules: Vec<Rule>,
) -> Self {
self.phases.push(Phase::new(name, kind, rules));
self
}
@@ -747,8 +997,17 @@ impl<'a> Runner<'a> {
})
}
pub fn run_from_tree(&self, tree: &tree_sitter::Tree) -> Result<Ast, String> {
let mut ast = Ast::from_tree_with_schema(self.schema.clone(), tree, &self.language);
pub fn run_from_tree(
&self,
tree: &tree_sitter::Tree,
source: &[u8],
) -> Result<Ast, String> {
let mut ast = Ast::from_tree_with_schema_and_source(
self.schema.clone(),
tree,
&self.language,
source.to_vec(),
);
self.run_phases(&mut ast)?;
Ok(ast)
}
@@ -761,7 +1020,12 @@ impl<'a> Runner<'a> {
let tree = parser
.parse(input, None)
.ok_or_else(|| "Failed to parse input".to_string())?;
let mut ast = Ast::from_tree_with_schema(self.schema.clone(), &tree, &self.language);
let mut ast = Ast::from_tree_with_schema_and_source(
self.schema.clone(),
&tree,
&self.language,
input.as_bytes().to_vec(),
);
self.run_phases(&mut ast)?;
Ok(ast)
}
@@ -773,8 +1037,11 @@ impl<'a> Runner<'a> {
let fresh = tree_builder::FreshScope::new();
let mut root = ast.get_root();
for phase in self.phases {
let res = apply_rules(&phase.rules, ast, root, &fresh)
.map_err(|e| format!("Phase `{}`: {e}", phase.name))?;
let res = match phase.kind {
PhaseKind::Repeating => apply_repeating_rules(&phase.rules, ast, root, &fresh),
PhaseKind::OneShot => apply_one_shot_rules(&phase.rules, ast, root, &fresh),
}
.map_err(|e| format!("Phase `{}`: {e}", phase.name))?;
if res.len() != 1 {
return Err(format!(
"Phase `{}`: expected exactly one result node, got {}",

View File

@@ -23,6 +23,7 @@
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Write;
use crate::CHILD_FIELD;
use serde::Deserialize;
use serde_json::json;
@@ -100,30 +101,36 @@ fn parse_field_name(raw: &str) -> FieldSpec {
/// Resolve a TypeRef to a (type, named) pair, given the sets of known named
/// and unnamed types.
fn resolve_type_ref_pair(
type_ref: &TypeRef,
named_types: &BTreeSet<String>,
unnamed_types: &BTreeSet<String>,
) -> (String, bool) {
match type_ref {
TypeRef::Explicit { unnamed } => (unnamed.clone(), false),
TypeRef::Name(name) => {
let is_named = named_types.contains(name);
let is_unnamed = unnamed_types.contains(name);
if is_named && is_unnamed {
(name.clone(), true)
} else if is_unnamed {
(name.clone(), false)
} else {
(name.clone(), true)
}
}
}
}
/// Resolve a TypeRef to a {type, named} JSON record, given the sets of known named
/// and unnamed types.
fn resolve_type_ref(
type_ref: &TypeRef,
named_types: &BTreeSet<String>,
unnamed_types: &BTreeSet<String>,
) -> serde_json::Value {
match type_ref {
TypeRef::Explicit { unnamed } => {
json!({"type": unnamed, "named": false})
}
TypeRef::Name(name) => {
let is_named = named_types.contains(name);
let is_unnamed = unnamed_types.contains(name);
if is_named && is_unnamed {
// Ambiguous: default to named
json!({"type": name, "named": true})
} else if is_unnamed {
json!({"type": name, "named": false})
} else {
// Named, or unknown (assume named)
json!({"type": name, "named": true})
}
}
}
let (kind, named) = resolve_type_ref_pair(type_ref, named_types, unnamed_types);
json!({"type": kind, "named": named})
}
/// Convert YAML string to node-types JSON string.
@@ -233,14 +240,12 @@ pub fn convert(yaml_input: &str) -> Result<String, String> {
serde_json::to_string_pretty(&output).map_err(|e| format!("Failed to serialize JSON: {e}"))
}
/// Build a Schema from a YAML node-types string.
/// Registers all node kinds and field names found in the YAML.
pub fn schema_from_yaml(yaml_input: &str) -> Result<crate::schema::Schema, String> {
let yaml: YamlNodeTypes =
serde_yaml::from_str(yaml_input).map_err(|e| format!("Failed to parse YAML: {e}"))?;
let mut schema = crate::schema::Schema::new();
/// Apply YAML node-type definitions to a mutable Schema.
/// Registers all types, fields, and allowed types from the YAML into the schema.
fn apply_yaml_to_schema(
yaml: &YamlNodeTypes,
schema: &mut crate::schema::Schema,
) {
// Register all supertypes as node kinds
for name in yaml.supertypes.keys() {
schema.register_kind(name);
@@ -264,6 +269,62 @@ pub fn schema_from_yaml(yaml_input: &str) -> Result<crate::schema::Schema, Strin
schema.register_unnamed_kind(name);
}
let mut named_types = BTreeSet::new();
for name in yaml.supertypes.keys() {
named_types.insert(name.clone());
}
for name in yaml.named.keys() {
named_types.insert(name.clone());
}
let unnamed_types: BTreeSet<String> = yaml.unnamed.iter().cloned().collect();
for (supertype, members) in &yaml.supertypes {
let node_types = members
.iter()
.map(|m| {
let (kind, named) = resolve_type_ref_pair(m, &named_types, &unnamed_types);
crate::schema::NodeType { kind, named }
})
.collect();
schema.set_supertype_members(supertype, node_types);
}
// Register allowed field child types for type checking.
for (parent_kind, fields_opt) in &yaml.named {
let Some(fields) = fields_opt else {
continue;
};
for (raw_field_name, type_refs) in fields {
let spec = parse_field_name(raw_field_name);
let field_id = match &spec.name {
Some(name) => schema.register_field(name),
None => CHILD_FIELD,
};
let mut node_types = type_refs
.clone()
.into_vec()
.into_iter()
.map(|type_ref| {
let (kind, named) = resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types);
crate::schema::NodeType { kind, named }
})
.collect::<Vec<_>>();
node_types.sort_by(|a, b| a.kind.cmp(&b.kind).then(a.named.cmp(&b.named)));
node_types.dedup_by(|a, b| a.kind == b.kind && a.named == b.named);
schema.set_field_types(parent_kind, field_id, node_types);
}
}
}
pub fn schema_from_yaml(yaml_input: &str) -> Result<crate::schema::Schema, String> {
let yaml: YamlNodeTypes =
serde_yaml::from_str(yaml_input).map_err(|e| format!("Failed to parse YAML: {e}"))?;
let mut schema = crate::schema::Schema::new();
apply_yaml_to_schema(&yaml, &mut schema);
Ok(schema)
}
@@ -278,29 +339,7 @@ pub fn schema_from_yaml_with_language(
serde_yaml::from_str(yaml_input).map_err(|e| format!("Failed to parse YAML: {e}"))?;
let mut schema = crate::schema::Schema::from_language(language);
// Register supertypes
for name in yaml.supertypes.keys() {
schema.register_kind(name);
}
// Register named node kinds and their fields
for (name, fields_opt) in &yaml.named {
schema.register_kind(name);
if let Some(fields) = fields_opt {
for raw_field_name in fields.keys() {
let spec = parse_field_name(raw_field_name);
if let Some(field_name) = &spec.name {
schema.register_field(field_name);
}
}
}
}
// Register unnamed tokens
for name in &yaml.unnamed {
schema.register_unnamed_kind(name);
}
apply_yaml_to_schema(&yaml, &mut schema);
Ok(schema)
}

View File

@@ -1,7 +1,13 @@
use std::collections::BTreeMap;
use std::collections::{BTreeMap, BTreeSet};
use crate::{FieldId, KindId, CHILD_FIELD};
#[derive(Clone, Debug)]
pub struct NodeType {
pub kind: String,
pub named: bool,
}
/// A schema defining node kinds and field names for the output AST.
/// Built from a node-types.yml file, independent of any tree-sitter grammar.
///
@@ -25,6 +31,8 @@ pub struct Schema {
unnamed_kind_ids: BTreeMap<String, KindId>,
kind_names: BTreeMap<KindId, &'static str>,
next_kind_id: KindId,
field_types: BTreeMap<(String, FieldId), Vec<NodeType>>,
supertypes: BTreeMap<String, Vec<NodeType>>,
}
impl Default for Schema {
@@ -43,6 +51,8 @@ impl Schema {
unnamed_kind_ids: BTreeMap::new(),
kind_names: BTreeMap::new(),
next_kind_id: 1, // 0 is reserved
field_types: BTreeMap::new(),
supertypes: BTreeMap::new(),
}
}
@@ -166,4 +176,68 @@ impl Schema {
pub fn node_kind_for_id(&self, id: KindId) -> Option<&'static str> {
self.kind_names.get(&id).copied()
}
pub fn set_field_types(
&mut self,
parent_kind: &str,
field_id: FieldId,
node_types: Vec<NodeType>,
) {
self.field_types
.insert((parent_kind.to_string(), field_id), node_types);
}
pub fn field_types(
&self,
parent_kind: &str,
field_id: FieldId,
) -> Option<&Vec<NodeType>> {
self.field_types
.get(&(parent_kind.to_string(), field_id))
}
pub fn set_supertype_members(&mut self, supertype: &str, node_types: Vec<NodeType>) {
self.supertypes.insert(supertype.to_string(), node_types);
}
fn allows_node(
&self,
node_type: &NodeType,
node_kind: &str,
node_named: bool,
active: &mut BTreeSet<String>,
) -> bool {
if node_type.kind == node_kind && node_type.named == node_named {
return true;
}
if !node_type.named {
return false;
}
let Some(members) = self.supertypes.get(&node_type.kind) else {
return false;
};
if !active.insert(node_type.kind.clone()) {
return false;
}
let matched = members
.iter()
.any(|member| self.allows_node(member, node_kind, node_named, active));
active.remove(&node_type.kind);
matched
}
pub fn node_matches_types(
&self,
node_kind: &str,
node_named: bool,
node_types: &[NodeType],
) -> bool {
node_types.iter().any(|node_type| {
self.allows_node(node_type, node_kind, node_named, &mut BTreeSet::new())
})
}
}

View File

@@ -52,6 +52,7 @@ impl Visitor {
root: 0,
schema,
nodes: self.nodes.into_iter().map(|n| n.inner).collect(),
source: Vec::new(),
}
}

View File

@@ -1,6 +1,6 @@
#![cfg(test)]
use yeast::dump::dump_ast;
use yeast::dump::{dump_ast, dump_ast_with_type_errors};
use yeast::*;
const OUTPUT_SCHEMA_YAML: &str = include_str!("node-types.yml");
@@ -15,7 +15,7 @@ fn parse_and_dump(input: &str) -> String {
/// Helper: parse Ruby source with a custom output schema and a single
/// phase of rules, return dump.
fn run_and_dump(input: &str, rules: Vec<Rule>) -> String {
run_phased_and_dump(input, vec![Phase::new("test", rules)])
run_phased_and_dump(input, vec![Phase::new("test", PhaseKind::Repeating, rules)])
}
/// Helper: parse Ruby source with a custom output schema and multiple
@@ -35,13 +35,42 @@ fn run_and_get_error(input: &str, rules: Vec<Rule>) -> String {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases = vec![Phase::new("test", rules)];
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
runner
.run(input)
.expect_err("expected runner to return an error")
}
/// Helper: parse Ruby source with no rules and dump with schema type errors.
fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run(input).unwrap();
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
}
/// Helper: parse Ruby source with no rules and dump with schema type errors,
/// building schema with language IDs so field checks align with parser fields.
fn parse_and_dump_typed_with_language(input: &str, schema_yaml: &str) -> String {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let runner = Runner::new(lang.clone(), &[]);
let ast = runner.run(input).unwrap();
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang)
.unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
}
/// Helper: parse Ruby source with custom rules and dump with schema type errors.
fn run_and_dump_typed(input: &str, rules: Vec<Rule>, schema_yaml: &str) -> String {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let ast = runner.run(input).unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
}
/// Assert that a dump equals the expected string, treating the expected
/// string as an indented multiline literal: leading/trailing blank lines
/// are stripped, and the common leading indentation is removed from every
@@ -125,6 +154,85 @@ fn test_parse_for_loop() {
);
}
#[test]
fn test_dump_highlights_type_errors_inline() {
let schema_yaml = r#"
named:
program:
$children*: assignment
assignment:
left: identifier
right: identifier
identifier:
"#;
let dump = parse_and_dump_typed("x = 1", schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
}
#[test]
fn test_dump_reports_preserved_unknown_kind_after_transformation() {
let schema_yaml = r#"
named:
program:
$children*: assignment
assignment:
left: identifier
right: identifier
identifier:
"#;
// This rewrite runs and preserves the RHS node kind via capture.
// With schema above, preserving `integer` should be reported inline.
let rules = vec![yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
(assignment
left: {left}
right: {right}
)
)];
let dump = run_and_dump_typed("x = 1", rules, schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
assert!(dump.contains("node kind 'integer' not in schema"));
}
#[test]
fn test_dump_reports_undeclared_field_on_node() {
let schema_yaml = r#"
named:
program:
$children*: assignment
assignment:
left: identifier
identifier:
"#;
let dump = parse_and_dump_typed_with_language("x = y", schema_yaml);
assert!(dump.contains("right: identifier \"y\" <-- ERROR:"));
assert!(dump.contains("the node 'assignment' has no field 'right'"));
}
#[test]
fn test_dump_reports_disallowed_kind_in_field_type() {
let schema_yaml = r#"
named:
program:
$children*: assignment
assignment:
left: identifier
right: identifier
identifier:
integer:
"#;
let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml);
assert!(dump.contains("right: integer \"1\" <-- ERROR:"));
assert!(dump.contains("should contain"));
assert!(dump.contains("but got integer"));
}
// ---- Query tests ----
#[test]
@@ -166,6 +274,32 @@ fn test_query_no_match() {
assert!(!matched);
}
#[test]
fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang)
.unwrap();
let phases = vec![Phase::new(
"test",
PhaseKind::Repeating,
vec![yeast::rule!((integer) => (identifier "replaced"))],
)];
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let reachable_ids = ast.reachable_node_ids();
assert!(
ast.nodes().len() > reachable_ids.len(),
"expected rewrite to leave orphaned arena nodes"
);
let dump = dump_ast(&ast, ast.get_root(), input);
assert!(dump.contains("identifier \"replaced\""));
assert!(!dump.contains("integer \"1\""));
}
#[test]
fn test_query_repeated_capture() {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
@@ -653,8 +787,8 @@ fn test_phased_desugaring() {
let dump = run_phased_and_dump(
"x = 1",
vec![
Phase::new("cleanup", cleanup),
Phase::new("desugar", desugar),
Phase::new("cleanup", PhaseKind::Repeating, cleanup),
Phase::new("desugar", PhaseKind::Repeating, desugar),
],
);
assert_dump_eq(
@@ -675,7 +809,11 @@ fn test_phase_error_includes_phase_name() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases = vec![Phase::new("buggy", vec![swap_assignment_rule().repeated()])];
let phases = vec![Phase::new(
"buggy",
PhaseKind::Repeating,
vec![swap_assignment_rule().repeated()],
)];
let runner = Runner::with_schema(lang, &schema, &phases);
let err = runner
.run("x = 1")
@@ -690,6 +828,168 @@ fn test_phase_error_includes_phase_name() {
);
}
/// Helper: an exhaustive set of OneShot rules covering every node reachable
/// (via captures) when translating `"x = 1"`.
fn one_shot_xeq1_rules() -> Vec<Rule> {
vec![
yeast::rule!(
(program (_)* @stmts)
=>
(program stmt: {..stmts})
),
yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
(first_node left: {left} right: {right})
),
yeast::rule!((identifier) => (identifier "ID")),
yeast::rule!((integer) => (integer "INT")),
]
}
#[test]
fn test_one_shot_phase() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases = vec![Phase::new(
"translate",
PhaseKind::OneShot,
one_shot_xeq1_rules(),
)];
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let dump = dump_ast(&ast, ast.get_root(), input);
assert_dump_eq(
&dump,
r#"
program
stmt:
first_node
left: identifier "ID"
right: integer "INT"
"#,
);
}
#[test]
fn test_one_shot_phase_errors_when_no_rule_matches() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
// Drop the `integer` rule so the recursion has no rule for `integer`.
let mut rules = one_shot_xeq1_rules();
rules.pop();
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let err = runner
.run("x = 1")
.expect_err("expected OneShot to error on unmatched node");
assert!(
err.contains("Phase `translate`"),
"error should name the phase, got: {err}"
);
assert!(
err.contains("no rule matched") && err.contains("integer"),
"error should describe the unmatched node kind, got: {err}"
);
}
/// OneShot recursion must apply rules to *captured* nodes, even if the rule
/// returns a captured child verbatim. A buggy implementation that only
/// recurses into the children of the rule's output (rather than into the
/// captures) would leave the returned capture untransformed.
#[test]
fn test_one_shot_recurses_into_returned_capture() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let rules = vec![
yeast::rule!(
(program (_)* @stmts)
=>
(program stmt: {..stmts})
),
// Returns the captured `left` verbatim, discarding `right`.
yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
{left}
),
yeast::rule!((identifier) => (identifier "ID")),
yeast::rule!((integer) => (integer "INT")),
];
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let dump = dump_ast(&ast, ast.get_root(), input);
// `left` is an `identifier`; OneShot must apply the identifier rule to
// it before the assignment transform returns it verbatim.
assert_dump_eq(
&dump,
r#"
program
stmt: identifier "ID"
"#,
);
}
/// OneShot recursion must NOT descend into the children of the rule's output.
/// A rule may legitimately wrap a captured node in fresh output-schema nodes
/// that have no matching rule of their own (since rule patterns target the
/// input schema). Recursing into the output would erroneously try to find
/// rules for those wrapper kinds and fail.
#[test]
fn test_one_shot_does_not_recurse_into_wrapper_output() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let rules = vec![
yeast::rule!(
(program (_)* @stmts)
=>
(program stmt: {..stmts})
),
// Wraps `left` in nested `first_node`/`second_node` output kinds.
// Neither wrapper kind has a matching rule, so a buggy implementation
// that recurses into the wrapper's children would error.
yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
(first_node
left: (second_node left: {left} right: {right})
right: {left}
)
),
yeast::rule!((identifier) => (identifier "ID")),
yeast::rule!((integer) => (integer "INT")),
];
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let dump = dump_ast(&ast, ast.get_root(), input);
assert_dump_eq(
&dump,
r#"
program
stmt:
first_node
left:
second_node
left: identifier "ID"
right: integer "INT"
right: identifier "ID"
"#,
);
}
// ---- Cursor tests ----
#[test]
@@ -760,3 +1060,54 @@ fn test_desugar_for_with_multiple_assignment() {
"#,
);
}
/// Regression test: `#{capture}` in a template must render the *source text*
/// of the captured node, not its arena `Id`. Previously, captures were bound
/// as `usize`, so `#{cap}` printed the integer id (e.g. `"3"`) via `Display`.
/// Captures are now bound as `NodeRef`, which has no `Display` impl and
/// resolves to the captured node's source text via `YeastDisplay`.
#[test]
fn test_hash_brace_renders_capture_source_text() {
let rule = rule!(
(call
method: (identifier) @name
receiver: (identifier) @recv
)
=>
(call
method: (identifier #{name})
receiver: (identifier #{recv})
arguments: (argument_list)
)
);
let dump = run_and_dump("foo.bar()", vec![rule]);
assert_dump_eq(
&dump,
r#"
program
call
arguments: argument_list "foo.bar()"
method: identifier "bar"
receiver: identifier "foo"
"#,
);
}
/// Regression test: non-`NodeRef` values in `#{expr}` still render via their
/// `Display` impl (covered by `YeastDisplay`'s blanket impls for primitives).
#[test]
fn test_hash_brace_renders_integer_expression() {
let rule = rule!(
(identifier) @_
=>
(identifier #{1 + 2})
);
let dump = run_and_dump("foo", vec![rule]);
assert_dump_eq(
&dump,
r#"
program
identifier "3"
"#,
);
}