From 2db6ff4f63eb40cd2293c28cc64278bd34e6669c Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 4 May 2026 13:27:00 +0000 Subject: [PATCH] Support separate output node types in extractor generator Language and LanguageSpec gain optional output_node_types field. When set, the generator produces dbscheme/QL from the output types and the extractor validates TRAP against them. All existing extractors pass None (no behavior change). Ruby extract() calls gain vec![] for the new rules parameter. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ql/extractor/src/generator.rs | 4 ++++ ruby/extractor/src/generator.rs | 2 ++ shared/tree-sitter-extractor/src/generator/language.rs | 4 ++++ shared/tree-sitter-extractor/src/generator/mod.rs | 3 ++- 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ql/extractor/src/generator.rs b/ql/extractor/src/generator.rs index 650e11c138b..95137f6d0ca 100644 --- a/ql/extractor/src/generator.rs +++ b/ql/extractor/src/generator.rs @@ -21,18 +21,22 @@ pub fn run(options: Options) -> std::io::Result<()> { Language { name: "QL".to_owned(), node_types: tree_sitter_ql::NODE_TYPES, + output_node_types: None, }, Language { name: "Dbscheme".to_owned(), node_types: tree_sitter_ql_dbscheme::NODE_TYPES, + output_node_types: None, }, Language { name: "Blame".to_owned(), node_types: tree_sitter_blame::NODE_TYPES, + output_node_types: None, }, Language { name: "JSON".to_owned(), node_types: tree_sitter_json::NODE_TYPES, + output_node_types: None, }, ]; diff --git a/ruby/extractor/src/generator.rs b/ruby/extractor/src/generator.rs index de1d0dbfd7e..00843b95d3d 100644 --- a/ruby/extractor/src/generator.rs +++ b/ruby/extractor/src/generator.rs @@ -21,10 +21,12 @@ pub fn run(options: Options) -> std::io::Result<()> { Language { name: "Ruby".to_owned(), node_types: tree_sitter_ruby::NODE_TYPES, + output_node_types: None, }, Language { name: "Erb".to_owned(), node_types: tree_sitter_embedded_template::NODE_TYPES, + output_node_types: None, }, ]; diff --git a/shared/tree-sitter-extractor/src/generator/language.rs b/shared/tree-sitter-extractor/src/generator/language.rs index f0b0ed1790f..ab0147c5eb6 100644 --- a/shared/tree-sitter-extractor/src/generator/language.rs +++ b/shared/tree-sitter-extractor/src/generator/language.rs @@ -1,4 +1,8 @@ pub struct Language { pub name: String, pub node_types: &'static str, + /// If set, the generator uses these node types for the dbscheme/QL library + /// instead of `node_types`. This is useful when desugaring transforms produce + /// an AST whose shape differs from the tree-sitter grammar. + pub output_node_types: Option<&'static str>, } diff --git a/shared/tree-sitter-extractor/src/generator/mod.rs b/shared/tree-sitter-extractor/src/generator/mod.rs index 78e9e4a0b69..db45f8d68d6 100644 --- a/shared/tree-sitter-extractor/src/generator/mod.rs +++ b/shared/tree-sitter-extractor/src/generator/mod.rs @@ -68,7 +68,8 @@ pub fn generate( let token_name = format!("{}_token", &prefix); let tokeninfo_name = format!("{}_tokeninfo", &prefix); let reserved_word_name = format!("{}_reserved_word", &prefix); - let nodes = node_types::read_node_types_str(&prefix, language.node_types)?; + let effective_node_types = language.output_node_types.unwrap_or(language.node_types); + let nodes = node_types::read_node_types_str(&prefix, effective_node_types)?; let (dbscheme_entries, mut ast_node_members, token_kinds) = convert_nodes(&nodes); ast_node_members.insert(&token_name); writeln!(&mut dbscheme_writer, "/*- {} dbscheme -*/", language.name)?;