yeast: Add @@name raw-capture syntax to rule!

The `@@name` capture marker in `rule!` queries skips the
auto-translate prefix for that specific capture, letting the body see
the original capture (and thus delay its translation using
`ctx.translate` until it becomes convenient).

Regular `@name` captures continue to be auto-translated as before.
Specifically these are translated _eagerly_, before the main body of the
rewrite rule is run.

I settled on `@@` as the syntax because it did not add new symbols that
the user has to keep track of (it's still a kind of capture), but it's
still visually distinct enough that the user should be able to tell that
there's something special going on. In principle one could accidentally
write one form of capture where the other was intended, but in practice
this would result in code that did not compile (because the types would
not match).
This commit is contained in:
Taus
2026-06-25 15:36:54 +00:00
parent 2767b8dbbf
commit eb7f8cc43d
5 changed files with 209 additions and 21 deletions

View File

@@ -22,10 +22,9 @@ pub fn parse_query_top(input: TokenStream) -> Result<TokenStream> {
/// Parse a single query node (possibly with a trailing `@capture`).
fn parse_query_node(tokens: &mut Tokens) -> Result<TokenStream> {
let base = parse_query_atom(tokens)?;
// Check for trailing @capture
// Check for trailing @capture or @@capture
if peek_is_at(tokens) {
tokens.next(); // consume @
let capture_name = expect_ident(tokens, "expected capture name after @")?;
let capture_name = consume_capture_marker(tokens)?;
let name_str = capture_name.to_string();
Ok(quote! {
yeast::query::QueryNode::Capture {
@@ -159,8 +158,7 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
} else {
let child = if peek_is_at(tokens) {
tokens.next();
let capture_name = expect_ident(tokens, "expected capture name after @")?;
let capture_name = consume_capture_marker(tokens)?;
let name_str = capture_name.to_string();
quote! {
yeast::query::QueryNode::Capture {
@@ -650,6 +648,9 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
struct CaptureInfo {
name: String,
multiplicity: CaptureMultiplicity,
/// `true` for `@@name` captures: the auto-translate prefix skips them,
/// so the bound `NodeRef` refers to the raw (input-schema) node.
raw: bool,
}
#[derive(Clone, Copy, PartialEq)]
@@ -708,6 +709,14 @@ fn extract_captures_inner(
extract_captures_inner(&mut inner, captures, child_mult);
}
TokenTree::Punct(p) if p.as_char() == '@' => {
// `@@name` marks the capture as raw (skip auto-translate).
let raw = matches!(
tokens.peek(),
Some(TokenTree::Punct(p)) if p.as_char() == '@'
);
if raw {
tokens.next(); // consume the second `@`
}
if let Some(TokenTree::Ident(name)) = tokens.next() {
let mult = if parent_mult == CaptureMultiplicity::Repeated
|| last_mult == CaptureMultiplicity::Repeated
@@ -723,6 +732,7 @@ fn extract_captures_inner(
captures.push(CaptureInfo {
name: name.to_string(),
multiplicity: mult,
raw,
});
}
last_mult = CaptureMultiplicity::Single;
@@ -776,6 +786,14 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
// Parse query
let query_code = parse_query_top(query_stream.clone())?;
// Capture names marked `@@name` (raw) — passed to the auto-translate
// prefix as a skip list so those captures keep their input-schema ids.
let raw_capture_names: Vec<&str> = captures
.iter()
.filter(|c| c.raw)
.map(|c| c.name.as_str())
.collect();
// Generate capture bindings
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
let bindings: Vec<TokenStream> = captures
@@ -891,11 +909,14 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
let __query = #query_code;
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
// Auto-translation prefix: recursively translate every
// captured node before invoking the user's transform body.
// captured node before invoking the user's transform body,
// except for `@@name` captures listed in `__skip` which the
// body consumes raw.
// For OneShot rules this preserves the legacy behaviour
// (input-schema captures translated to output-schema
// nodes); for Repeating rules it is a no-op.
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx)?;
let __skip: &[&str] = &[#(#raw_capture_names),*];
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx, __skip)?;
#(#bindings)*
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
let __result: Vec<usize> = { #transform_body };
@@ -1013,6 +1034,16 @@ fn peek_is_at(tokens: &mut Tokens) -> bool {
matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '@')
}
/// Consume an `@` or `@@` capture marker and the following name ident.
/// Caller has already verified `peek_is_at(tokens)`.
fn consume_capture_marker(tokens: &mut Tokens) -> Result<Ident> {
tokens.next(); // consume the first `@`
if peek_is_at(tokens) {
tokens.next(); // consume the second `@` of `@@`
}
expect_ident(tokens, "expected capture name after `@` or `@@`")
}
fn peek_is_literal(tokens: &mut Tokens) -> bool {
matches!(tokens.peek(), Some(TokenTree::Literal(_)))
}
@@ -1113,8 +1144,7 @@ fn expect_repetition(tokens: &mut Tokens) -> Result<TokenStream> {
fn maybe_wrap_capture(tokens: &mut Tokens, base: TokenStream) -> Result<TokenStream> {
if peek_is_at(tokens) {
tokens.next(); // consume @
let name = expect_ident(tokens, "expected capture name after @")?;
let name = consume_capture_marker(tokens)?;
let name_str = name.to_string();
Ok(quote! {
yeast::query::QueryNode::Capture {
@@ -1141,13 +1171,12 @@ fn maybe_wrap_repetition(tokens: &mut Tokens, single: TokenStream) -> Result<Tok
}
}
/// If `@name` follows a Repeated list element, wrap each child SingleNode
/// inside the repetition with a Capture. This matches tree-sitter semantics
/// where `(_)* @name` captures each matched node.
/// If `@name` (or `@@name`) follows a Repeated list element, wrap each
/// child SingleNode inside the repetition with a Capture. This matches
/// tree-sitter semantics where `(_)* @name` captures each matched node.
fn maybe_wrap_list_capture(tokens: &mut Tokens, elem: TokenStream) -> Result<TokenStream> {
if peek_is_at(tokens) {
tokens.next();
let name = expect_ident(tokens, "expected capture name after @")?;
let name = consume_capture_marker(tokens)?;
let name_str = name.to_string();
// Re-parse the element isn't practical, so we generate a wrapper
// that creates a new Repeated with each child wrapped in a capture.

View File

@@ -292,6 +292,37 @@ Inside `rule!`, captures are Rust variables, so `{name}` inserts a
single capture (`Id`) and `{..name}` splices a repeated capture
(`Vec<Id>`).
### Raw captures (`@@name`)
The default `@name` capture marker is *auto-translated*: in OneShot
phases the macro recursively translates the captured node before
binding it, so `{name}` in the output template splices a node that
already conforms to the output schema.
For rules that need the raw (input-schema) capture — typically to read
its source text or to translate it explicitly with mutable context
state between calls — use `@@name` instead. The body sees the original
input-schema `NodeRef`:
```rust
yeast::rule!(
(assignment left: (_) @@raw_lhs right: (_) @rhs)
=>
{
// raw_lhs is untranslated: read its original source text.
let text = ctx.ast.source_text(raw_lhs.into());
// rhs is already translated by the auto-translate prefix.
tree!((call
method: (identifier #{text.as_str()})
receiver: {rhs}))
}
);
```
Mix `@` and `@@` freely in the same rule. In a Repeating phase both
markers are equivalent (auto-translation is a no-op for repeating
rules).
## Complete example: for-loop desugaring
This rule rewrites Ruby's `for pat in val do body end` into

View File

@@ -80,6 +80,28 @@ impl Captures {
}
Ok(())
}
/// Like [`try_map_all_captures`] but leaves captures whose name appears
/// in `skip` untouched. Used by the `rule!` macro to support `@@name`
/// (raw) captures alongside the default auto-translated `@name`
/// captures.
pub fn try_map_captures_except<E>(
&mut self,
skip: &[&str],
mut f: impl FnMut(Id) -> Result<Vec<Id>, E>,
) -> Result<(), E> {
for (name, ids) in self.captures.iter_mut() {
if skip.contains(name) {
continue;
}
let mut new_ids = Vec::with_capacity(ids.len());
for &id in ids.iter() {
new_ids.extend(f(id)?);
}
*ids = new_ids;
}
Ok(())
}
pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) {
if let Some(from_ids) = self.captures.get(from) {
let new_values = from_ids.iter().copied().map(f).collect();

View File

@@ -757,13 +757,14 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> {
}
/// Translate every captured node in `captures` in place (OneShot phase
/// only). In a Repeating phase this is a no-op — Repeating rules
/// receive raw captures.
/// only), except for captures whose name appears in `skip` — those are
/// left as raw (input-schema) ids for the rule body to consume
/// directly. In a Repeating phase this is a no-op — Repeating rules
/// receive raw captures regardless of `skip`.
///
/// Used by the `rule!` macro's generated prefix to preserve the
/// pre-existing "auto-translate captures before running the transform
/// body" behavior. Manually-written transforms typically translate
/// captures selectively via [`translate`] instead.
/// Used by the `rule!` macro's generated prefix. `skip` is populated
/// from the macro's `@@name` capture markers; for plain `@name`
/// captures (and rules with no `@@` markers) it is empty.
///
/// To avoid infinite recursion, a capture whose id matches the rule's
/// matched root (e.g. from a `(_) @_` pattern) is left unchanged.
@@ -772,11 +773,12 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> {
captures: &mut Captures,
ast: &mut Ast,
user_ctx: &mut C,
skip: &[&str],
) -> Result<(), String> {
match &self.inner {
TranslatorImpl::OneShot { matched_root, .. } => {
let root = *matched_root;
captures.try_map_all_captures(|cid| {
captures.try_map_captures_except(skip, |cid| {
if cid == root {
Ok(vec![cid])
} else {

View File

@@ -1058,6 +1058,110 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
);
}
/// Verify that `@@name` capture markers skip the auto-translate prefix:
/// the body sees the *raw* (input-schema) NodeRef and can read its
/// source text or call `ctx.translate(...)` explicitly. Compare with
/// the bare `@name` form, where the auto-translate prefix runs the
/// same translation up front and the body sees the post-translate id.
#[test]
fn test_raw_capture_marker() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let rules: Vec<Rule> = vec![
yeast::rule!(
(program (_)* @stmts)
=>
(program stmt: {..stmts})
),
// `@@raw_lhs` is untranslated: the body reads its source text
// ("x") and embeds it directly as the identifier content. `@rhs`
// is auto-translated (rhs already points to (integer "INT")).
yeast::rule!(
(assignment left: (_) @@raw_lhs right: (_) @rhs)
=>
{
let text = ctx.ast.source_text(raw_lhs.into());
tree!((call
method: (identifier #{text.as_str()})
receiver: {rhs}))
}
),
yeast::rule!((identifier) => (identifier "ID")),
yeast::rule!((integer) => (integer "INT")),
];
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let dump = dump_ast(&ast, ast.get_root(), input);
// `method:` uses the raw source text ("x"); if `@@` were broken and
// auto-translation ran on `raw_lhs`, it would still produce the
// string "x" (source_text inherits the input range), so the dump
// wouldn't change. Add a second assertion: explicitly translating
// the raw NodeRef inside the body must succeed and produce
// `(identifier "ID")`.
assert_dump_eq(
&dump,
r#"
program
stmt:
call
method: identifier "x"
receiver: integer "INT"
"#,
);
}
/// Companion to `test_raw_capture_marker`: confirms that calling
/// `ctx.translate(raw)` on a `@@`-captured NodeRef from the rule body
/// produces the correctly-translated output-schema node. With `@`, the
/// translation has already happened, so `ctx.translate(...)` inside the
/// body would attempt to re-translate an output node (which has no
/// matching rule and would error).
#[test]
fn test_raw_capture_marker_explicit_translate() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let rules: Vec<Rule> = vec![
yeast::rule!(
(program (_)* @stmts)
=>
(program stmt: {..stmts})
),
yeast::rule!(
(assignment left: (_) @@raw_lhs right: (_) @rhs)
=>
{
let translated_lhs = ctx.translate(raw_lhs)?;
tree!((call
method: {..translated_lhs}
receiver: {rhs}))
}
),
yeast::rule!((identifier) => (identifier "ID")),
yeast::rule!((integer) => (integer "INT")),
];
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let dump = dump_ast(&ast, ast.get_root(), input);
assert_dump_eq(
&dump,
r#"
program
stmt:
call
method: identifier "ID"
receiver: integer "INT"
"#,
);
}
// ---- Cursor tests ----
#[test]