diff --git a/shared/yeast-macros/src/parse.rs b/shared/yeast-macros/src/parse.rs index fc6031eb39d..2b5c4f53003 100644 --- a/shared/yeast-macros/src/parse.rs +++ b/shared/yeast-macros/src/parse.rs @@ -22,10 +22,9 @@ pub fn parse_query_top(input: TokenStream) -> Result { /// Parse a single query node (possibly with a trailing `@capture`). fn parse_query_node(tokens: &mut Tokens) -> Result { let base = parse_query_atom(tokens)?; - // Check for trailing @capture + // Check for trailing @capture or @@capture if peek_is_at(tokens) { - tokens.next(); // consume @ - let capture_name = expect_ident(tokens, "expected capture name after @")?; + let capture_name = consume_capture_marker(tokens)?; let name_str = capture_name.to_string(); Ok(quote! { yeast::query::QueryNode::Capture { @@ -159,8 +158,7 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result> { push_field_elem(&mut field_order, &mut field_elems, field_str, elem); } else { let child = if peek_is_at(tokens) { - tokens.next(); - let capture_name = expect_ident(tokens, "expected capture name after @")?; + let capture_name = consume_capture_marker(tokens)?; let name_str = capture_name.to_string(); quote! { yeast::query::QueryNode::Capture { @@ -650,6 +648,9 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result { + // `@@name` marks the capture as raw (skip auto-translate). + let raw = matches!( + tokens.peek(), + Some(TokenTree::Punct(p)) if p.as_char() == '@' + ); + if raw { + tokens.next(); // consume the second `@` + } if let Some(TokenTree::Ident(name)) = tokens.next() { let mult = if parent_mult == CaptureMultiplicity::Repeated || last_mult == CaptureMultiplicity::Repeated @@ -723,6 +732,7 @@ fn extract_captures_inner( captures.push(CaptureInfo { name: name.to_string(), multiplicity: mult, + raw, }); } last_mult = CaptureMultiplicity::Single; @@ -776,6 +786,14 @@ pub fn parse_rule_top(input: TokenStream) -> Result { // Parse query let query_code = parse_query_top(query_stream.clone())?; + // Capture names marked `@@name` (raw) — passed to the auto-translate + // prefix as a skip list so those captures keep their input-schema ids. + let raw_capture_names: Vec<&str> = captures + .iter() + .filter(|c| c.raw) + .map(|c| c.name.as_str()) + .collect(); + // Generate capture bindings let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site()); let bindings: Vec = captures @@ -891,11 +909,14 @@ pub fn parse_rule_top(input: TokenStream) -> Result { let __query = #query_code; yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| { // Auto-translation prefix: recursively translate every - // captured node before invoking the user's transform body. + // captured node before invoking the user's transform body, + // except for `@@name` captures listed in `__skip` which the + // body consumes raw. // For OneShot rules this preserves the legacy behaviour // (input-schema captures translated to output-schema // nodes); for Repeating rules it is a no-op. - __translator.auto_translate_captures(&mut __captures, __ast, __user_ctx)?; + let __skip: &[&str] = &[#(#raw_capture_names),*]; + __translator.auto_translate_captures(&mut __captures, __ast, __user_ctx, __skip)?; #(#bindings)* let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator); let __result: Vec = { #transform_body }; @@ -1013,6 +1034,16 @@ fn peek_is_at(tokens: &mut Tokens) -> bool { matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '@') } +/// Consume an `@` or `@@` capture marker and the following name ident. +/// Caller has already verified `peek_is_at(tokens)`. +fn consume_capture_marker(tokens: &mut Tokens) -> Result { + tokens.next(); // consume the first `@` + if peek_is_at(tokens) { + tokens.next(); // consume the second `@` of `@@` + } + expect_ident(tokens, "expected capture name after `@` or `@@`") +} + fn peek_is_literal(tokens: &mut Tokens) -> bool { matches!(tokens.peek(), Some(TokenTree::Literal(_))) } @@ -1113,8 +1144,7 @@ fn expect_repetition(tokens: &mut Tokens) -> Result { fn maybe_wrap_capture(tokens: &mut Tokens, base: TokenStream) -> Result { if peek_is_at(tokens) { - tokens.next(); // consume @ - let name = expect_ident(tokens, "expected capture name after @")?; + let name = consume_capture_marker(tokens)?; let name_str = name.to_string(); Ok(quote! { yeast::query::QueryNode::Capture { @@ -1141,13 +1171,12 @@ fn maybe_wrap_repetition(tokens: &mut Tokens, single: TokenStream) -> Result Result { if peek_is_at(tokens) { - tokens.next(); - let name = expect_ident(tokens, "expected capture name after @")?; + let name = consume_capture_marker(tokens)?; let name_str = name.to_string(); // Re-parse the element isn't practical, so we generate a wrapper // that creates a new Repeated with each child wrapped in a capture. diff --git a/shared/yeast/doc/yeast.md b/shared/yeast/doc/yeast.md index 1700029b43c..3c122e7ebf9 100644 --- a/shared/yeast/doc/yeast.md +++ b/shared/yeast/doc/yeast.md @@ -292,6 +292,37 @@ Inside `rule!`, captures are Rust variables, so `{name}` inserts a single capture (`Id`) and `{..name}` splices a repeated capture (`Vec`). +### Raw captures (`@@name`) + +The default `@name` capture marker is *auto-translated*: in OneShot +phases the macro recursively translates the captured node before +binding it, so `{name}` in the output template splices a node that +already conforms to the output schema. + +For rules that need the raw (input-schema) capture — typically to read +its source text or to translate it explicitly with mutable context +state between calls — use `@@name` instead. The body sees the original +input-schema `NodeRef`: + +```rust +yeast::rule!( + (assignment left: (_) @@raw_lhs right: (_) @rhs) + => + { + // raw_lhs is untranslated: read its original source text. + let text = ctx.ast.source_text(raw_lhs.into()); + // rhs is already translated by the auto-translate prefix. + tree!((call + method: (identifier #{text.as_str()}) + receiver: {rhs})) + } +); +``` + +Mix `@` and `@@` freely in the same rule. In a Repeating phase both +markers are equivalent (auto-translation is a no-op for repeating +rules). + ## Complete example: for-loop desugaring This rule rewrites Ruby's `for pat in val do body end` into diff --git a/shared/yeast/src/captures.rs b/shared/yeast/src/captures.rs index 404d402a501..101ab329220 100644 --- a/shared/yeast/src/captures.rs +++ b/shared/yeast/src/captures.rs @@ -80,6 +80,28 @@ impl Captures { } Ok(()) } + + /// Like [`try_map_all_captures`] but leaves captures whose name appears + /// in `skip` untouched. Used by the `rule!` macro to support `@@name` + /// (raw) captures alongside the default auto-translated `@name` + /// captures. + pub fn try_map_captures_except( + &mut self, + skip: &[&str], + mut f: impl FnMut(Id) -> Result, E>, + ) -> Result<(), E> { + for (name, ids) in self.captures.iter_mut() { + if skip.contains(name) { + continue; + } + let mut new_ids = Vec::with_capacity(ids.len()); + for &id in ids.iter() { + new_ids.extend(f(id)?); + } + *ids = new_ids; + } + Ok(()) + } pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) { if let Some(from_ids) = self.captures.get(from) { let new_values = from_ids.iter().copied().map(f).collect(); diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index e0fffc551f3..2c08c12276f 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -757,13 +757,14 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> { } /// Translate every captured node in `captures` in place (OneShot phase - /// only). In a Repeating phase this is a no-op — Repeating rules - /// receive raw captures. + /// only), except for captures whose name appears in `skip` — those are + /// left as raw (input-schema) ids for the rule body to consume + /// directly. In a Repeating phase this is a no-op — Repeating rules + /// receive raw captures regardless of `skip`. /// - /// Used by the `rule!` macro's generated prefix to preserve the - /// pre-existing "auto-translate captures before running the transform - /// body" behavior. Manually-written transforms typically translate - /// captures selectively via [`translate`] instead. + /// Used by the `rule!` macro's generated prefix. `skip` is populated + /// from the macro's `@@name` capture markers; for plain `@name` + /// captures (and rules with no `@@` markers) it is empty. /// /// To avoid infinite recursion, a capture whose id matches the rule's /// matched root (e.g. from a `(_) @_` pattern) is left unchanged. @@ -772,11 +773,12 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> { captures: &mut Captures, ast: &mut Ast, user_ctx: &mut C, + skip: &[&str], ) -> Result<(), String> { match &self.inner { TranslatorImpl::OneShot { matched_root, .. } => { let root = *matched_root; - captures.try_map_all_captures(|cid| { + captures.try_map_captures_except(skip, |cid| { if cid == root { Ok(vec![cid]) } else { diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs index 99471f129ab..1444b7c2a46 100644 --- a/shared/yeast/tests/test.rs +++ b/shared/yeast/tests/test.rs @@ -1058,6 +1058,110 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() { ); } +/// Verify that `@@name` capture markers skip the auto-translate prefix: +/// the body sees the *raw* (input-schema) NodeRef and can read its +/// source text or call `ctx.translate(...)` explicitly. Compare with +/// the bare `@name` form, where the auto-translate prefix runs the +/// same translation up front and the body sees the post-translate id. +#[test] +fn test_raw_capture_marker() { + let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); + let schema = + yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); + let rules: Vec = vec![ + yeast::rule!( + (program (_)* @stmts) + => + (program stmt: {..stmts}) + ), + // `@@raw_lhs` is untranslated: the body reads its source text + // ("x") and embeds it directly as the identifier content. `@rhs` + // is auto-translated (rhs already points to (integer "INT")). + yeast::rule!( + (assignment left: (_) @@raw_lhs right: (_) @rhs) + => + { + let text = ctx.ast.source_text(raw_lhs.into()); + tree!((call + method: (identifier #{text.as_str()}) + receiver: {rhs})) + } + ), + yeast::rule!((identifier) => (identifier "ID")), + yeast::rule!((integer) => (integer "INT")), + ]; + let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)]; + let runner: Runner = Runner::with_schema(lang, &schema, &phases); + + let input = "x = 1"; + let ast = runner.run(input).unwrap(); + let dump = dump_ast(&ast, ast.get_root(), input); + // `method:` uses the raw source text ("x"); if `@@` were broken and + // auto-translation ran on `raw_lhs`, it would still produce the + // string "x" (source_text inherits the input range), so the dump + // wouldn't change. Add a second assertion: explicitly translating + // the raw NodeRef inside the body must succeed and produce + // `(identifier "ID")`. + assert_dump_eq( + &dump, + r#" + program + stmt: + call + method: identifier "x" + receiver: integer "INT" + "#, + ); +} + +/// Companion to `test_raw_capture_marker`: confirms that calling +/// `ctx.translate(raw)` on a `@@`-captured NodeRef from the rule body +/// produces the correctly-translated output-schema node. With `@`, the +/// translation has already happened, so `ctx.translate(...)` inside the +/// body would attempt to re-translate an output node (which has no +/// matching rule and would error). +#[test] +fn test_raw_capture_marker_explicit_translate() { + let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); + let schema = + yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); + let rules: Vec = vec![ + yeast::rule!( + (program (_)* @stmts) + => + (program stmt: {..stmts}) + ), + yeast::rule!( + (assignment left: (_) @@raw_lhs right: (_) @rhs) + => + { + let translated_lhs = ctx.translate(raw_lhs)?; + tree!((call + method: {..translated_lhs} + receiver: {rhs})) + } + ), + yeast::rule!((identifier) => (identifier "ID")), + yeast::rule!((integer) => (integer "INT")), + ]; + let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)]; + let runner: Runner = Runner::with_schema(lang, &schema, &phases); + + let input = "x = 1"; + let ast = runner.run(input).unwrap(); + let dump = dump_ast(&ast, ast.get_root(), input); + assert_dump_eq( + &dump, + r#" + program + stmt: + call + method: identifier "ID" + receiver: integer "INT" + "#, + ); +} + // ---- Cursor tests ---- #[test]