Yeast: use child locations instead of rule target

Previously, when a node was synthesized it would always take the
location from the node that matched the current rule. This resulted
in overly broad locations however.

For (foo #{bar}) we now take the location of the 'bar' node.

For non-leaf nodes we merge all its child node locations.
This commit is contained in:
Asger F
2026-06-08 15:23:05 +02:00
parent 166406acbb
commit 6c74cd31e4
4 changed files with 162 additions and 2 deletions

View File

@@ -396,8 +396,10 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
let expr = group.stream();
return Ok(quote! {
{
let __value = yeast::YeastDisplay::yeast_to_string(&(#expr), &*#ctx.ast);
#ctx.literal(#kind_str, &__value)
let __expr = (#expr);
let __value = yeast::YeastDisplay::yeast_to_string(&__expr, &*#ctx.ast);
let __source_range = yeast::YeastSourceRange::yeast_source_range(&__expr, &*#ctx.ast);
#ctx.literal_with_source_range(#kind_str, &__value, __source_range)
}
});
}

View File

@@ -82,6 +82,21 @@ impl<'a> BuildCtx<'a> {
.create_named_token_with_range(kind, value.to_string(), self.source_range)
}
/// Create a leaf node with fixed content and an optional preferred source range.
/// If `source_range` is `None`, falls back to this context's inherited range.
pub fn literal_with_source_range(
&mut self,
kind: &'static str,
value: &str,
source_range: Option<tree_sitter::Range>,
) -> Id {
self.ast.create_named_token_with_range(
kind,
value.to_string(),
source_range.or(self.source_range),
)
}
/// Create a leaf node with an auto-generated unique name.
pub fn fresh(&mut self, kind: &'static str, name: &str) -> Id {
let generated = self.fresh.resolve(name);

View File

@@ -58,12 +58,30 @@ pub trait YeastDisplay {
fn yeast_to_string(&self, ast: &Ast) -> String;
}
/// Optional source range for values used in `#{expr}` interpolations.
///
/// By default this returns `None`, so synthesized leaves inherit the matched
/// rule's source range. `NodeRef` returns the referenced node's range, letting
/// `(kind #{capture})` carry the captured node's location.
pub trait YeastSourceRange {
fn yeast_source_range(&self, ast: &Ast) -> Option<tree_sitter::Range>;
}
impl YeastDisplay for NodeRef {
fn yeast_to_string(&self, ast: &Ast) -> String {
ast.source_text(self.0)
}
}
impl YeastSourceRange for NodeRef {
fn yeast_source_range(&self, ast: &Ast) -> Option<tree_sitter::Range> {
ast.get_node(self.0).and_then(|n| match &n.content {
NodeContent::Range(r) => Some(r.clone()),
_ => n.source_range,
})
}
}
macro_rules! impl_yeast_display_via_display {
($($t:ty),* $(,)?) => {
$(
@@ -72,6 +90,12 @@ macro_rules! impl_yeast_display_via_display {
::std::string::ToString::to_string(self)
}
}
impl YeastSourceRange for $t {
fn yeast_source_range(&self, _ast: &Ast) -> Option<tree_sitter::Range> {
None
}
}
)*
};
}
@@ -90,6 +114,12 @@ impl<T: YeastDisplay + ?Sized> YeastDisplay for &T {
}
}
impl<T: YeastSourceRange + ?Sized> YeastSourceRange for &T {
fn yeast_source_range(&self, ast: &Ast) -> Option<tree_sitter::Range> {
(**self).yeast_source_range(ast)
}
}
pub const CHILD_FIELD: u16 = u16::MAX;
#[derive(Debug)]
@@ -368,6 +398,15 @@ impl Ast {
is_named: bool,
source_range: Option<tree_sitter::Range>,
) -> Id {
let source_range = match &content {
// Parsed nodes already carry an exact source range in their content.
NodeContent::Range(_) => source_range,
// Synthesized nodes derive location from children when possible,
// and fall back to the inherited rule-match range otherwise.
_ => self
.union_source_range_of_children(&fields)
.or(source_range),
};
let id = self.nodes.len();
self.nodes.push(Node {
kind,
@@ -383,6 +422,66 @@ impl Ast {
id
}
fn union_source_range_of_children(
&self,
fields: &BTreeMap<FieldId, Vec<Id>>,
) -> Option<tree_sitter::Range> {
let mut start_byte: Option<usize> = None;
let mut end_byte: Option<usize> = None;
let mut start_point = tree_sitter::Point { row: 0, column: 0 };
let mut end_point = tree_sitter::Point { row: 0, column: 0 };
for child_ids in fields.values() {
for &child_id in child_ids {
let Some(child) = self.get_node(child_id) else {
continue;
};
let child_start_byte = child.start_byte();
let child_end_byte = child.end_byte();
// Skip children that carry no usable location.
if child_start_byte == 0 && child_end_byte == 0 {
continue;
}
match start_byte {
None => {
start_byte = Some(child_start_byte);
start_point = child.start_position();
}
Some(current_start) if child_start_byte < current_start => {
start_byte = Some(child_start_byte);
start_point = child.start_position();
}
_ => {}
}
match end_byte {
None => {
end_byte = Some(child_end_byte);
end_point = child.end_position();
}
Some(current_end) if child_end_byte > current_end => {
end_byte = Some(child_end_byte);
end_point = child.end_position();
}
_ => {}
}
}
}
match (start_byte, end_byte) {
(Some(start_byte), Some(end_byte)) => Some(tree_sitter::Range {
start_byte,
end_byte,
start_point,
end_point,
}),
_ => None,
}
}
pub fn create_named_token(&mut self, kind: &'static str, content: String) -> Id {
self.create_named_token_with_range(kind, content, None)
}

View File

@@ -18,6 +18,16 @@ fn run_and_dump(input: &str, rules: Vec<Rule>) -> String {
run_phased_and_dump(input, vec![Phase::new("test", PhaseKind::Repeating, rules)])
}
/// Helper: parse Ruby source with custom rules and return the transformed AST.
fn run_and_ast(input: &str, rules: Vec<Rule>) -> Ast {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
runner.run(input).unwrap()
}
/// Helper: parse Ruby source with a custom output schema and multiple
/// rule phases, return dump.
fn run_phased_and_dump(input: &str, phases: Vec<Phase>) -> String {
@@ -1172,3 +1182,37 @@ fn test_hash_brace_renders_integer_expression() {
"#,
);
}
/// Regression test: `(kind #{capture})` should inherit the captured node's
/// source location, not the full source range of the matched rule root.
#[test]
fn test_hash_brace_uses_capture_location_for_leaf() {
let rule = rule!(
(call
method: (identifier) @name
receiver: (identifier) @recv
)
=>
(call
method: (identifier #{name})
receiver: (identifier #{recv})
arguments: (argument_list)
)
);
let ast = run_and_ast("foo.bar()", vec![rule]);
let mut bar_ids: Vec<usize> = Vec::new();
for id in ast.reachable_node_ids() {
let Some(node) = ast.get_node(id) else { continue; };
if node.kind() == "identifier" && ast.source_text(id) == "bar" {
bar_ids.push(id);
}
}
assert_eq!(bar_ids.len(), 1, "expected exactly one identifier 'bar'");
let bar = ast.get_node(bar_ids[0]).unwrap();
assert_eq!(bar.start_byte(), 4);
assert_eq!(bar.end_byte(), 7);
}