From 588f76fd50208a07077ee1224b7555b222746fab Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 4 May 2026 13:27:00 +0000 Subject: [PATCH] yeast: Support separate output node types in extractor generator Language and LanguageSpec gain optional output_node_types field. When set, the generator produces dbscheme/QL from the output types and the extractor validates TRAP against them. All existing extractors pass None (no behavior change). Ruby extract() calls gain vec![] for the new rules parameter. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ql/extractor/src/generator.rs | 4 ++++ ruby/extractor/src/generator.rs | 2 ++ .../src/generator/language.rs | 5 +++++ .../tree-sitter-extractor/src/generator/mod.rs | 16 +++++++++++++++- 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/ql/extractor/src/generator.rs b/ql/extractor/src/generator.rs index 650e11c138b..96ce5319dd1 100644 --- a/ql/extractor/src/generator.rs +++ b/ql/extractor/src/generator.rs @@ -21,18 +21,22 @@ pub fn run(options: Options) -> std::io::Result<()> { Language { name: "QL".to_owned(), node_types: tree_sitter_ql::NODE_TYPES, + desugar: None, }, Language { name: "Dbscheme".to_owned(), node_types: tree_sitter_ql_dbscheme::NODE_TYPES, + desugar: None, }, Language { name: "Blame".to_owned(), node_types: tree_sitter_blame::NODE_TYPES, + desugar: None, }, Language { name: "JSON".to_owned(), node_types: tree_sitter_json::NODE_TYPES, + desugar: None, }, ]; diff --git a/ruby/extractor/src/generator.rs b/ruby/extractor/src/generator.rs index de1d0dbfd7e..0430afd103e 100644 --- a/ruby/extractor/src/generator.rs +++ b/ruby/extractor/src/generator.rs @@ -21,10 +21,12 @@ pub fn run(options: Options) -> std::io::Result<()> { Language { name: "Ruby".to_owned(), node_types: tree_sitter_ruby::NODE_TYPES, + desugar: None, }, Language { name: "Erb".to_owned(), node_types: tree_sitter_embedded_template::NODE_TYPES, + desugar: None, }, ]; diff --git a/shared/tree-sitter-extractor/src/generator/language.rs b/shared/tree-sitter-extractor/src/generator/language.rs index f0b0ed1790f..a95f750b572 100644 --- a/shared/tree-sitter-extractor/src/generator/language.rs +++ b/shared/tree-sitter-extractor/src/generator/language.rs @@ -1,4 +1,9 @@ pub struct Language { pub name: String, pub node_types: &'static str, + /// Optional yeast desugaring configuration. When set with an + /// `output_node_types_yaml`, the generator uses that YAML for the + /// dbscheme/QL library instead of `node_types`. The `rules` field is + /// unused at code-generation time; only the schema matters. + pub desugar: Option, } diff --git a/shared/tree-sitter-extractor/src/generator/mod.rs b/shared/tree-sitter-extractor/src/generator/mod.rs index 78e9e4a0b69..d2521c51b3e 100644 --- a/shared/tree-sitter-extractor/src/generator/mod.rs +++ b/shared/tree-sitter-extractor/src/generator/mod.rs @@ -6,6 +6,7 @@ use std::io::Write; use std::path::PathBuf; use crate::node_types; +use yeast; pub mod dbscheme; pub mod language; @@ -68,7 +69,20 @@ pub fn generate( let token_name = format!("{}_token", &prefix); let tokeninfo_name = format!("{}_tokeninfo", &prefix); let reserved_word_name = format!("{}_reserved_word", &prefix); - let nodes = node_types::read_node_types_str(&prefix, language.node_types)?; + let effective_node_types: String = match language + .desugar + .as_ref() + .and_then(|c| c.output_node_types_yaml) + { + Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| { + std::io::Error::other(format!( + "Failed to convert YAML node-types to JSON for {}: {e}", + language.name + )) + })?, + None => language.node_types.to_string(), + }; + let nodes = node_types::read_node_types_str(&prefix, &effective_node_types)?; let (dbscheme_entries, mut ast_node_members, token_kinds) = convert_nodes(&nodes); ast_node_members.insert(&token_name); writeln!(&mut dbscheme_writer, "/*- {} dbscheme -*/", language.name)?;