From 0776ff89d9c9e7c4fdc02aec454c613f5198bb0f Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 1 May 2026 21:50:31 +0000 Subject: [PATCH] Shared extractor: Unify extract() and extract_and_desugar() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit extract() now takes an optional rules parameter. When rules are empty, it uses tree-sitter's native traversal. When rules are provided, it runs yeast desugaring and falls back to the un-desugared tree on error. extract_and_desugar() is removed — all callers use extract() directly. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ruby/extractor/src/extractor.rs | 2 + .../src/extractor/mod.rs | 91 +++---------------- .../src/extractor/simple.rs | 1 + 3 files changed, 18 insertions(+), 76 deletions(-) diff --git a/ruby/extractor/src/extractor.rs b/ruby/extractor/src/extractor.rs index 6807d09e9be..ae0ee84cce9 100644 --- a/ruby/extractor/src/extractor.rs +++ b/ruby/extractor/src/extractor.rs @@ -123,6 +123,7 @@ pub fn run(options: Options) -> std::io::Result<()> { &path, &source, &[], + vec![], ); let (ranges, line_breaks) = scan_erb( @@ -211,6 +212,7 @@ pub fn run(options: Options) -> std::io::Result<()> { &path, &source, &code_ranges, + vec![], ); std::fs::create_dir_all(src_archive_file.parent().unwrap())?; if needs_conversion { diff --git a/shared/tree-sitter-extractor/src/extractor/mod.rs b/shared/tree-sitter-extractor/src/extractor/mod.rs index bdd23ad7301..e93091a0a80 100644 --- a/shared/tree-sitter-extractor/src/extractor/mod.rs +++ b/shared/tree-sitter-extractor/src/extractor/mod.rs @@ -243,6 +243,8 @@ pub fn location_label(writer: &mut trap::Writer, location: trap::Location) -> tr } /// Extracts the source file at `path`, which is assumed to be canonicalized. +/// When `rules` is non-empty, the parsed tree is first transformed through +/// yeast before TRAP extraction. pub fn extract( language: &Language, language_prefix: &str, @@ -253,6 +255,7 @@ pub fn extract( path: &Path, source: &[u8], ranges: &[Range], + rules: Vec, ) { let path_str = file_paths::normalize_and_transform_path(path, transformer); let span = tracing::span!( @@ -275,13 +278,23 @@ pub fn extract( source, diagnostics_writer, trap_writer, - // TODO: should we handle path strings that are not valid UTF8 better? &path_str, file_label, language_prefix, schema, ); - traverse(&tree, &mut visitor); + + if rules.is_empty() { + traverse(&tree, &mut visitor); + } else { + let runner = yeast::Runner::new(language.clone(), rules); + let ast = runner.run_from_tree(&tree) + .unwrap_or_else(|e| { + tracing::error!("Desugaring failed: {e}"); + yeast::Ast::from_tree(language.clone(), &tree) + }); + traverse_yeast(&ast, &mut visitor); + } parser.reset(); } @@ -775,80 +788,6 @@ fn traverse(tree: &Tree, visitor: &mut Visitor) { } } -/// Like [`extract`], but applies yeast desugaring rules to the parsed tree -/// before extracting TRAP. The desugared AST may have a different structure -/// than the original tree-sitter parse tree. -/// -/// Note: This function uses yeast's own AST traversal, which may produce -/// different child ordering than tree-sitter's native traversal. Only use -/// this for languages that have desugaring rules. -pub fn extract_and_desugar( - language: &Language, - language_prefix: &str, - schema: &NodeTypeMap, - diagnostics_writer: &mut diagnostics::LogWriter, - trap_writer: &mut trap::Writer, - transformer: Option<&file_paths::PathTransformer>, - path: &Path, - source: &[u8], - ranges: &[Range], - rules: Vec, -) { - if rules.is_empty() { - // No desugaring needed — use the standard extract path - // which preserves tree-sitter's source-order traversal. - return extract( - language, - language_prefix, - schema, - diagnostics_writer, - trap_writer, - transformer, - path, - source, - ranges, - ); - } - - let path_str = file_paths::normalize_and_transform_path(path, transformer); - let span = tracing::span!( - tracing::Level::TRACE, - "extract_and_desugar", - file = %path_str - ); - - let _enter = span.enter(); - - tracing::debug!("extracting (with desugaring): {}", path_str); - - let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - parser.set_included_ranges(ranges).unwrap(); - let tree = parser.parse(source, None).expect("Failed to parse file"); - trap_writer.comment(format!("Auto-generated TRAP file for {path_str}")); - let file_label = populate_file(trap_writer, path, transformer); - let mut visitor = Visitor::new( - source, - diagnostics_writer, - trap_writer, - &path_str, - file_label, - language_prefix, - schema, - ); - let runner = yeast::Runner::new(language.clone(), rules); - let ast = runner.run_from_tree(&tree) - .unwrap_or_else(|e| { - tracing::error!("Desugaring failed: {e}"); - // Fall back to the un-desugared AST - yeast::Ast::from_tree(language.clone(), &tree) - }); - - traverse_yeast(&ast, &mut visitor); - - parser.reset(); -} - fn traverse_yeast(tree: &yeast::Ast, visitor: &mut Visitor) { use yeast::Cursor; let mut cursor = tree.walk(); diff --git a/shared/tree-sitter-extractor/src/extractor/simple.rs b/shared/tree-sitter-extractor/src/extractor/simple.rs index 3162e5b86aa..7f6edae2c3b 100644 --- a/shared/tree-sitter-extractor/src/extractor/simple.rs +++ b/shared/tree-sitter-extractor/src/extractor/simple.rs @@ -167,6 +167,7 @@ impl Extractor { &path, &source, &[], + vec![], ); std::fs::create_dir_all(src_archive_file.parent().unwrap())?; std::fs::copy(&path, &src_archive_file)?;