Merge pull request #21924 from asgerf/asgerf/yeast-changes

Yeast: some fixes
This commit is contained in:
Asger F
2026-06-03 10:32:38 +02:00
committed by GitHub
10 changed files with 205 additions and 61 deletions

View File

@@ -305,7 +305,18 @@ fn convert_nodes(
// type.
let members: Set<&str> = n_members
.iter()
.map(|n| nodes.get(n).unwrap().dbscheme_name.as_str())
.map(|n| {
nodes
.get(n)
.unwrap_or_else(|| {
panic!(
"union type '{}' references unknown member node type {:?}",
node.dbscheme_name, n
)
})
.dbscheme_name
.as_str()
})
.collect();
entries.push(dbscheme::Entry::Union(dbscheme::Union {
name: &node.dbscheme_name,

View File

@@ -411,7 +411,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
// Named fields — compute each value into a temp, then reference it
while peek_is_field(tokens) {
let field_name = expect_ident(tokens, "expected field name")?;
let field_str = field_name.to_string();
let field_str = field_name.to_string().strip_prefix("r#").unwrap_or(&field_name.to_string()).to_string();
expect_punct(tokens, ':', "expected `:` after field name")?;
let temp = Ident::new(
&format!("__field_{field_str}_{field_counter}"),
@@ -437,7 +437,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
.map(::std::convert::Into::<usize>::into)
.collect();
});
field_args.push(quote! { (#field_str, #temp) });
// An empty splice means the field is absent — skip it
// entirely rather than emitting an empty named field.
field_args.push(quote! {
if !#temp.is_empty() { __fields.push((#field_str, #temp)); }
});
continue;
}
}
@@ -445,7 +449,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
let value = parse_direct_node(tokens, ctx)?;
stmts.push(quote! { let #temp: usize = #value; });
field_args.push(quote! { (#field_str, vec![#temp]) });
field_args.push(quote! { __fields.push((#field_str, vec![#temp])); });
}
// After all named fields, no other tokens are allowed.
@@ -461,7 +465,9 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
Ok(quote! {
{
#(#stmts)*
#ctx.node(#kind_str, vec![#(#field_args),*])
let mut __fields: Vec<(&str, Vec<usize>)> = Vec::new();
#(#field_args)*
#ctx.node(#kind_str, __fields)
}
})
}
@@ -475,6 +481,11 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
let group = expect_group(tokens, Delimiter::Parenthesis)?;
let mut inner = group.stream().into_iter().peekable();
// Empty `()` represents an empty sequence — emit nothing.
if inner.peek().is_none() {
continue;
}
// Regular node
let node = parse_direct_node_inner(&mut inner, ctx)?;
items.push(quote! { __nodes.push(#node); });

View File

@@ -63,16 +63,20 @@ impl Captures {
}
/// Apply a fallible function to every captured id (across all keys),
/// replacing each id with the result. Stops and returns the error on
/// the first failure.
/// replacing each id with the results. A function returning an empty
/// vector removes the capture; returning multiple ids splices them
/// into the capture's value list (suitable for `*`/`+` captures).
/// Stops and returns the error on the first failure.
pub fn try_map_all_captures<E>(
&mut self,
mut f: impl FnMut(Id) -> Result<Id, E>,
mut f: impl FnMut(Id) -> Result<Vec<Id>, E>,
) -> Result<(), E> {
for ids in self.captures.values_mut() {
for id in ids {
*id = f(*id)?;
let mut new_ids = Vec::with_capacity(ids.len());
for &id in ids.iter() {
new_ids.extend(f(id)?);
}
*ids = new_ids;
}
Ok(())
}

View File

@@ -273,6 +273,16 @@ fn dump_node(
}
}
// Check for required fields that are absent
if let Some((schema, _, _)) = type_check {
for (field_id, field_name) in schema.required_fields_for_kind(node.kind_name()) {
if !node.fields.contains_key(&field_id) {
let name = field_name.unwrap_or("child");
writeln!(out, "{prefix} <-- ERROR: missing required field '{name}'").unwrap();
}
}
}
// Unnamed children — skip unnamed tokens (keywords, punctuation)
if let Some(children) = node.fields.get(&CHILD_FIELD) {
let child_type_check = type_check.map(|(schema, _, _)| {

View File

@@ -563,6 +563,15 @@ impl Node {
NodeContent::DynamicString(s) => Some(s.to_string()),
}
}
/// Read the child ids stored under a given field, or an empty slice if
/// no such field is present on this node.
pub fn field_children(&self, field_id: FieldId) -> &[Id] {
self.fields
.get(&field_id)
.map(|v| v.as_slice())
.unwrap_or(&[])
}
}
/// The contents of a node is either a range in the original source file,
@@ -836,17 +845,9 @@ fn apply_one_shot_rules_inner(
// pattern root): re-analyzing it would match the same rule
// again indefinitely.
if captured_id == id {
return Ok(captured_id);
return Ok(vec![captured_id]);
}
let result =
apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)?;
if result.len() != 1 {
return Err(format!(
"OneShot: recursion on captured node produced {} results, expected exactly 1",
result.len()
));
}
Ok(result[0])
apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)
})?;
return Ok(rule.run_transform(ast, captures, id, fresh));
}

View File

@@ -314,6 +314,14 @@ fn apply_yaml_to_schema(
node_types.sort_by(|a, b| a.kind.cmp(&b.kind).then(a.named.cmp(&b.named)));
node_types.dedup_by(|a, b| a.kind == b.kind && a.named == b.named);
schema.set_field_types(parent_kind, field_id, node_types);
schema.set_field_cardinality(
parent_kind,
field_id,
crate::schema::FieldCardinality {
multiple: spec.multiple,
required: spec.required,
},
);
}
}
}

View File

@@ -178,11 +178,15 @@ impl QueryListElem {
let Some(child) = remaining_children.next() else {
return Ok(false);
};
if skip_unnamed {
let node = ast.get_node(child).unwrap();
if !node.is_named() {
continue;
}
let node = ast.get_node(child).unwrap();
// Skip tree-sitter `extras` (e.g. comments) during
// positional matching: they are conceptually invisible
// between siblings, mirroring tree-sitter query semantics.
if node.is_extra() {
continue;
}
if skip_unnamed && !node.is_named() {
continue;
}
let snapshot = matches.clone();
if sub_query.do_match(ast, child, matches)? {

View File

@@ -8,6 +8,15 @@ pub struct NodeType {
pub named: bool,
}
/// Multiplicity/optionality of a field declaration.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct FieldCardinality {
/// Whether the field may hold more than one child.
pub multiple: bool,
/// Whether at least one child must be present.
pub required: bool,
}
/// A schema defining node kinds and field names for the output AST.
/// Built from a node-types.yml file, independent of any tree-sitter grammar.
///
@@ -32,6 +41,7 @@ pub struct Schema {
kind_names: BTreeMap<KindId, &'static str>,
next_kind_id: KindId,
field_types: BTreeMap<(String, FieldId), Vec<NodeType>>,
field_cardinalities: BTreeMap<(String, FieldId), FieldCardinality>,
supertypes: BTreeMap<String, Vec<NodeType>>,
}
@@ -52,6 +62,7 @@ impl Schema {
kind_names: BTreeMap::new(),
next_kind_id: 1, // 0 is reserved
field_types: BTreeMap::new(),
field_cardinalities: BTreeMap::new(),
supertypes: BTreeMap::new(),
}
}
@@ -196,6 +207,42 @@ impl Schema {
.get(&(parent_kind.to_string(), field_id))
}
pub fn set_field_cardinality(
&mut self,
parent_kind: &str,
field_id: FieldId,
cardinality: FieldCardinality,
) {
self.field_cardinalities
.insert((parent_kind.to_string(), field_id), cardinality);
}
/// Returns the declared cardinality for a field, if known.
pub fn field_cardinality(
&self,
parent_kind: &str,
field_id: FieldId,
) -> Option<FieldCardinality> {
self.field_cardinalities
.get(&(parent_kind.to_string(), field_id))
.copied()
}
/// Returns an iterator over all `(field_id, field_name)` pairs that are
/// declared as required (`required: true`) for the given `parent_kind`.
pub fn required_fields_for_kind<'a>(
&'a self,
parent_kind: &'a str,
) -> impl Iterator<Item = (FieldId, Option<&'static str>)> + 'a {
self.field_cardinalities
.iter()
.filter(move |((kind, _), card)| kind == parent_kind && card.required)
.map(move |((_, field_id), _)| {
let name = self.field_name_for_id(*field_id);
(*field_id, name)
})
}
pub fn set_supertype_members(&mut self, supertype: &str, node_types: Vec<NodeType>) {
self.supertypes.insert(supertype.to_string(), node_types);
}

View File

@@ -274,6 +274,44 @@ fn test_query_no_match() {
assert!(!matched);
}
#[test]
fn test_query_skips_extras_in_positional_match() {
// Regression test: positional wildcards `(_)` must not bind to
// tree-sitter `extras` (e.g. comments) during forward-scan; extras
// are conceptually invisible between siblings, matching tree-sitter
// query semantics. Without this, a later rule that translates a
// captured comment to nothing (a common idiom, e.g.
// `(comment) => ()` in Swift) leaves the capture's match-list empty
// and causes the transform to fail with "Variable X has 0 matches".
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("[1, # comment\n2]").unwrap();
// Navigate to the `array` node: program -> array.
let mut cursor = AstCursor::new(&ast);
cursor.goto_first_child();
let array_id = cursor.node_id();
assert_eq!(ast.get_node(array_id).unwrap().kind(), "array");
// Two positional wildcards should bind to the two integers, skipping
// the comment that sits between them.
let query = yeast::query!((array (_) @a (_) @b));
let mut captures = yeast::captures::Captures::new();
let matched = query.do_match(&ast, array_id, &mut captures).unwrap();
assert!(matched);
assert_eq!(
ast.get_node(captures.get_var("a").unwrap())
.unwrap()
.kind(),
"integer"
);
assert_eq!(
ast.get_node(captures.get_var("b").unwrap())
.unwrap()
.kind(),
"integer"
);
}
#[test]
fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();

View File

@@ -26,6 +26,13 @@ fn is_header_rule(line: &str) -> bool {
trimmed.len() >= 3 && trimmed.chars().all(|c| c == '=')
}
fn is_next_case_header(lines: &[&str], i: usize) -> bool {
is_header_rule(lines[i])
&& i + 2 < lines.len()
&& !lines[i + 1].trim().is_empty()
&& is_header_rule(lines[i + 2])
}
fn parse_corpus(content: &str) -> Vec<CorpusCase> {
let lines: Vec<&str> = content.lines().collect();
let mut i = 0;
@@ -58,48 +65,51 @@ fn parse_corpus(content: &str) -> Vec<CorpusCase> {
let input_start = i;
while i < lines.len() && lines[i].trim() != "---" {
if is_next_case_header(&lines, i) {
break;
}
i += 1;
}
assert!(i < lines.len(), "Missing --- separator for case {name}");
let input = lines[input_start..i].join("\n").trim_end().to_string();
i += 1;
// Raw tree-sitter parse section. New-format files have a second
// `---` separator between the raw tree and the mapped AST. Legacy
// files (with only one separator) have no raw section — in that
// case `raw` stays empty and update mode will populate it.
let raw_start = i;
let mut next_sep = i;
while next_sep < lines.len() && lines[next_sep].trim() != "---" {
if is_header_rule(lines[next_sep])
&& next_sep + 2 < lines.len()
&& !lines[next_sep + 1].trim().is_empty()
&& is_header_rule(lines[next_sep + 2])
{
break;
}
next_sep += 1;
}
let raw = if next_sep < lines.len() && lines[next_sep].trim() == "---" {
let raw_text = lines[raw_start..next_sep].join("\n").trim().to_string();
i = next_sep + 1;
raw_text
let raw;
let expected;
if i >= lines.len() || lines[i].trim() != "---" {
// No `---` separator before next case (or EOF). Treat the
// remaining sections as empty.
raw = String::new();
expected = String::new();
} else {
String::new()
};
let expected_start = i;
while i < lines.len() {
if is_header_rule(lines[i])
&& i + 2 < lines.len()
&& !lines[i + 1].trim().is_empty()
&& is_header_rule(lines[i + 2])
{
break;
}
i += 1;
// Raw tree-sitter parse section. New-format files have a second
// `---` separator between the raw tree and the mapped AST. Legacy
// files (with only one separator) have no raw section — in that
// case `raw` stays empty and update mode will populate it.
let raw_start = i;
let mut next_sep = i;
while next_sep < lines.len() && lines[next_sep].trim() != "---" {
if is_next_case_header(&lines, next_sep) {
break;
}
next_sep += 1;
}
raw = if next_sep < lines.len() && lines[next_sep].trim() == "---" {
let raw_text = lines[raw_start..next_sep].join("\n").trim().to_string();
i = next_sep + 1;
raw_text
} else {
String::new()
};
let expected_start = i;
while i < lines.len() {
if is_next_case_header(&lines, i) {
break;
}
i += 1;
}
expected = lines[expected_start..i].join("\n").trim().to_string();
}
let expected = lines[expected_start..i].join("\n").trim().to_string();
cases.push(CorpusCase {
name,