From 6f643a36045d559c5b6c2410a9f37d87f2aa6dcb Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 7 May 2026 12:35:58 +0000 Subject: [PATCH] yeast: Use canonical ID when registering unnamed kinds in Schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Schema::from_language registered unnamed kinds via or_insert(id), where `id` came from iterating 0..node_kind_count. For names with multiple unnamed IDs (notably "end" in tree-sitter-ruby has IDs 0 and 13, where ID 0 is the reserved error token), this picked the first encountered ID — typically the wrong one. The visitor sets node.kind via language.id_for_node_kind(name, false), which returns the canonical ID. So a query for ("end") would compare node.kind=13 against schema=0 and silently fail to match, with no diagnostic. Use language.id_for_node_kind(name, false) to obtain the canonical ID when registering, mirroring the named-kind path that already does the same with id_for_node_kind(name, true). --- shared/yeast/src/schema.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/shared/yeast/src/schema.rs b/shared/yeast/src/schema.rs index 0a33fd6e0ed..12554d9c869 100644 --- a/shared/yeast/src/schema.rs +++ b/shared/yeast/src/schema.rs @@ -61,9 +61,10 @@ impl Schema { } } // Import all node kind names, preserving tree-sitter's IDs. - // Track named and unnamed variants separately. - // For named kinds, use the canonical ID from id_for_node_kind(name, true) - // since some languages have multiple IDs for the same named kind. + // Track named and unnamed variants separately. For both named and + // unnamed kinds, use the canonical ID from id_for_node_kind, since + // some languages have multiple IDs for the same name (e.g., the + // reserved error token at ID 0 may share a name with a real token). for id in 0..language.node_kind_count() as u16 { if let Some(name) = language.node_kind_for_id(id) { if !name.is_empty() { @@ -75,12 +76,13 @@ impl Schema { schema.kind_names.insert(canonical_id, name); } } else { - // For unnamed kinds, only insert if we don't already have one - // (some languages have multiple unnamed IDs for the same text) - schema - .unnamed_kind_ids - .entry(name.to_string()) - .or_insert(id); + let canonical_id = language.id_for_node_kind(name, false); + if canonical_id != 0 && !schema.unnamed_kind_ids.contains_key(name) { + schema + .unnamed_kind_ids + .insert(name.to_string(), canonical_id); + schema.kind_names.insert(canonical_id, name); + } } // Always track the name for any ID we encounter schema.kind_names.entry(id).or_insert(name);