diff --git a/shared/yeast-macros/src/parse.rs b/shared/yeast-macros/src/parse.rs index 0980d41f956..4b27b980439 100644 --- a/shared/yeast-macros/src/parse.rs +++ b/shared/yeast-macros/src/parse.rs @@ -396,8 +396,10 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result BuildCtx<'a> { .create_named_token_with_range(kind, value.to_string(), self.source_range) } + /// Create a leaf node with fixed content and an optional preferred source range. + /// If `source_range` is `None`, falls back to this context's inherited range. + pub fn literal_with_source_range( + &mut self, + kind: &'static str, + value: &str, + source_range: Option, + ) -> Id { + self.ast.create_named_token_with_range( + kind, + value.to_string(), + source_range.or(self.source_range), + ) + } + /// Create a leaf node with an auto-generated unique name. pub fn fresh(&mut self, kind: &'static str, name: &str) -> Id { let generated = self.fresh.resolve(name); diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index 40b90567537..9c3a4ad4114 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -58,12 +58,30 @@ pub trait YeastDisplay { fn yeast_to_string(&self, ast: &Ast) -> String; } +/// Optional source range for values used in `#{expr}` interpolations. +/// +/// By default this returns `None`, so synthesized leaves inherit the matched +/// rule's source range. `NodeRef` returns the referenced node's range, letting +/// `(kind #{capture})` carry the captured node's location. +pub trait YeastSourceRange { + fn yeast_source_range(&self, ast: &Ast) -> Option; +} + impl YeastDisplay for NodeRef { fn yeast_to_string(&self, ast: &Ast) -> String { ast.source_text(self.0) } } +impl YeastSourceRange for NodeRef { + fn yeast_source_range(&self, ast: &Ast) -> Option { + ast.get_node(self.0).and_then(|n| match &n.content { + NodeContent::Range(r) => Some(r.clone()), + _ => n.source_range, + }) + } +} + macro_rules! impl_yeast_display_via_display { ($($t:ty),* $(,)?) => { $( @@ -72,6 +90,12 @@ macro_rules! impl_yeast_display_via_display { ::std::string::ToString::to_string(self) } } + + impl YeastSourceRange for $t { + fn yeast_source_range(&self, _ast: &Ast) -> Option { + None + } + } )* }; } @@ -90,6 +114,12 @@ impl YeastDisplay for &T { } } +impl YeastSourceRange for &T { + fn yeast_source_range(&self, ast: &Ast) -> Option { + (**self).yeast_source_range(ast) + } +} + pub const CHILD_FIELD: u16 = u16::MAX; #[derive(Debug)] @@ -368,6 +398,15 @@ impl Ast { is_named: bool, source_range: Option, ) -> Id { + let source_range = match &content { + // Parsed nodes already carry an exact source range in their content. + NodeContent::Range(_) => source_range, + // Synthesized nodes derive location from children when possible, + // and fall back to the inherited rule-match range otherwise. + _ => self + .union_source_range_of_children(&fields) + .or(source_range), + }; let id = self.nodes.len(); self.nodes.push(Node { kind, @@ -383,6 +422,66 @@ impl Ast { id } + fn union_source_range_of_children( + &self, + fields: &BTreeMap>, + ) -> Option { + let mut start_byte: Option = None; + let mut end_byte: Option = None; + let mut start_point = tree_sitter::Point { row: 0, column: 0 }; + let mut end_point = tree_sitter::Point { row: 0, column: 0 }; + + for child_ids in fields.values() { + for &child_id in child_ids { + let Some(child) = self.get_node(child_id) else { + continue; + }; + + let child_start_byte = child.start_byte(); + let child_end_byte = child.end_byte(); + + // Skip children that carry no usable location. + if child_start_byte == 0 && child_end_byte == 0 { + continue; + } + + match start_byte { + None => { + start_byte = Some(child_start_byte); + start_point = child.start_position(); + } + Some(current_start) if child_start_byte < current_start => { + start_byte = Some(child_start_byte); + start_point = child.start_position(); + } + _ => {} + } + + match end_byte { + None => { + end_byte = Some(child_end_byte); + end_point = child.end_position(); + } + Some(current_end) if child_end_byte > current_end => { + end_byte = Some(child_end_byte); + end_point = child.end_position(); + } + _ => {} + } + } + } + + match (start_byte, end_byte) { + (Some(start_byte), Some(end_byte)) => Some(tree_sitter::Range { + start_byte, + end_byte, + start_point, + end_point, + }), + _ => None, + } + } + pub fn create_named_token(&mut self, kind: &'static str, content: String) -> Id { self.create_named_token_with_range(kind, content, None) } diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs index 3ae8e2072d9..069132d0923 100644 --- a/shared/yeast/tests/test.rs +++ b/shared/yeast/tests/test.rs @@ -18,6 +18,16 @@ fn run_and_dump(input: &str, rules: Vec) -> String { run_phased_and_dump(input, vec![Phase::new("test", PhaseKind::Repeating, rules)]) } +/// Helper: parse Ruby source with custom rules and return the transformed AST. +fn run_and_ast(input: &str, rules: Vec) -> Ast { + let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); + let schema = + yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); + let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)]; + let runner = Runner::with_schema(lang, &schema, &phases); + runner.run(input).unwrap() +} + /// Helper: parse Ruby source with a custom output schema and multiple /// rule phases, return dump. fn run_phased_and_dump(input: &str, phases: Vec) -> String { @@ -1172,3 +1182,37 @@ fn test_hash_brace_renders_integer_expression() { "#, ); } + +/// Regression test: `(kind #{capture})` should inherit the captured node's +/// source location, not the full source range of the matched rule root. +#[test] +fn test_hash_brace_uses_capture_location_for_leaf() { + let rule = rule!( + (call + method: (identifier) @name + receiver: (identifier) @recv + ) + => + (call + method: (identifier #{name}) + receiver: (identifier #{recv}) + arguments: (argument_list) + ) + ); + + let ast = run_and_ast("foo.bar()", vec![rule]); + + let mut bar_ids: Vec = Vec::new(); + for id in ast.reachable_node_ids() { + let Some(node) = ast.get_node(id) else { continue; }; + if node.kind() == "identifier" && ast.source_text(id) == "bar" { + bar_ids.push(id); + } + } + + assert_eq!(bar_ids.len(), 1, "expected exactly one identifier 'bar'"); + let bar = ast.get_node(bar_ids[0]).unwrap(); + + assert_eq!(bar.start_byte(), 4); + assert_eq!(bar.end_byte(), 7); +}