From 7efb03d4cc783da023aab56bcc5542b77cf8fa31 Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 14 Apr 2026 16:03:02 +0000 Subject: [PATCH] Yeast: AST desugaring framework (rebased from hackathon-desugaring) Add the yeast crate (Yet another Elaborator for Abstract Syntax Trees), a framework for tree-sitter AST transformations/desugaring. Integrate it into the shared tree-sitter extractor. Key components: - shared/yeast/: New crate with query/match/transform pipeline for tree-sitter ASTs, with Ruby desugaring rules as an example - shared/tree-sitter-extractor: Pass parsed trees through yeast before TRAP extraction, applying language-specific desugaring rules Updated from the original hackathon branch to work with tree-sitter 0.24 and current main dependencies. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Cargo.lock | 29 +- Cargo.toml | 1 + shared/tree-sitter-extractor/Cargo.toml | 1 + .../src/extractor/mod.rs | 42 +- shared/yeast/.envrc | 1 + shared/yeast/.gitignore | 1 + shared/yeast/.gitkeep | 0 shared/yeast/Cargo.lock | 357 +++++++++++ shared/yeast/Cargo.toml | 13 + shared/yeast/flake.lock | 85 +++ shared/yeast/flake.nix | 31 + shared/yeast/src/bin/main.rs | 26 + shared/yeast/src/captures.rs | 92 +++ shared/yeast/src/cursor.rs | 8 + shared/yeast/src/lib.rs | 575 ++++++++++++++++++ shared/yeast/src/print.rs | 34 ++ shared/yeast/src/query.rs | 243 ++++++++ shared/yeast/src/range.rs | 21 + shared/yeast/src/rules.rs | 133 ++++ shared/yeast/src/tree_builder.rs | 170 ++++++ shared/yeast/src/visitor.rs | 110 ++++ shared/yeast/tests/fixtures/1.parsed.json | 68 +++ shared/yeast/tests/fixtures/1.rb | 1 + shared/yeast/tests/fixtures/1.rewritten.json | 68 +++ .../fixtures/multiple_assignment.input.json | 85 +++ .../fixtures/multiple_assignment.output.json | 139 +++++ shared/yeast/tests/test.rs | 160 +++++ 27 files changed, 2475 insertions(+), 19 deletions(-) create mode 100644 shared/yeast/.envrc create mode 100644 shared/yeast/.gitignore create mode 100644 shared/yeast/.gitkeep create mode 100644 shared/yeast/Cargo.lock create mode 100644 shared/yeast/Cargo.toml create mode 100644 shared/yeast/flake.lock create mode 100644 shared/yeast/flake.nix create mode 100644 shared/yeast/src/bin/main.rs create mode 100644 shared/yeast/src/captures.rs create mode 100644 shared/yeast/src/cursor.rs create mode 100644 shared/yeast/src/lib.rs create mode 100644 shared/yeast/src/print.rs create mode 100644 shared/yeast/src/query.rs create mode 100644 shared/yeast/src/range.rs create mode 100644 shared/yeast/src/rules.rs create mode 100644 shared/yeast/src/tree_builder.rs create mode 100644 shared/yeast/src/visitor.rs create mode 100644 shared/yeast/tests/fixtures/1.parsed.json create mode 100644 shared/yeast/tests/fixtures/1.rb create mode 100644 shared/yeast/tests/fixtures/1.rewritten.json create mode 100644 shared/yeast/tests/fixtures/multiple_assignment.input.json create mode 100644 shared/yeast/tests/fixtures/multiple_assignment.output.json create mode 100644 shared/yeast/tests/test.rs diff --git a/Cargo.lock b/Cargo.lock index b6456c84106..de1679a733c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -416,6 +416,7 @@ dependencies = [ "tree-sitter", "tree-sitter-json", "tree-sitter-ql", + "yeast", "zstd", ] @@ -2470,7 +2471,6 @@ version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ - "indexmap 2.11.4", "itoa", "memchr", "ryu", @@ -2853,14 +2853,13 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.25.9" +version = "0.24.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccd2a058a86cfece0bf96f7cce1021efef9c8ed0e892ab74639173e5ed7a34fa" +checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" dependencies = [ "cc", "regex", "regex-syntax", - "serde_json", "streaming-iterator", "tree-sitter-language", ] @@ -2891,6 +2890,16 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" +[[package]] +name = "tree-sitter-python" +version = "0.23.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-ql" version = "0.23.1" @@ -3367,6 +3376,18 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "yeast" +version = "0.1.0" +dependencies = [ + "clap", + "serde", + "serde_json", + "tree-sitter", + "tree-sitter-python", + "tree-sitter-ruby", +] + [[package]] name = "yoke" version = "0.8.0" diff --git a/Cargo.toml b/Cargo.toml index 58a755340b9..87bfe57b2db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ resolver = "2" members = [ "shared/tree-sitter-extractor", + "shared/yeast", "ruby/extractor", "rust/extractor", "rust/extractor/macros", diff --git a/shared/tree-sitter-extractor/Cargo.toml b/shared/tree-sitter-extractor/Cargo.toml index d02f02fd588..1ad18a6df5a 100644 --- a/shared/tree-sitter-extractor/Cargo.toml +++ b/shared/tree-sitter-extractor/Cargo.toml @@ -20,6 +20,7 @@ serde_json = "1.0" chrono = { version = "0.4.42", features = ["serde"] } num_cpus = "1.17.0" zstd = "0.13.3" +yeast = { path = "../yeast" } [dev-dependencies] tree-sitter-ql = "0.23.1" diff --git a/shared/tree-sitter-extractor/src/extractor/mod.rs b/shared/tree-sitter-extractor/src/extractor/mod.rs index 0ace3831881..4b263274c71 100644 --- a/shared/tree-sitter-extractor/src/extractor/mod.rs +++ b/shared/tree-sitter-extractor/src/extractor/mod.rs @@ -14,7 +14,8 @@ use tracing_subscriber::fmt::format::DefaultFields; use tracing_subscriber::fmt::format::Format; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; -use tree_sitter::{Language, Node, Parser, Range, Tree}; +use tree_sitter::{Language, Parser, Range}; +use yeast::{Cursor, Node}; pub mod simple; @@ -242,7 +243,16 @@ pub fn extract( language_prefix, schema, ); - traverse(&tree, &mut visitor); + // HACK: Pass the tree through yeast + let rules = if language_prefix == "ruby" { + yeast::rules::rules() + } else { + vec![] + }; + let runner = yeast::Runner::new(language.clone(), rules); + let ast = runner.run_from_tree(&tree); + + traverse(&ast, &mut visitor); parser.reset(); } @@ -333,7 +343,7 @@ impl<'a> Visitor<'a> { &mut self, message: &str, args: &[diagnostics::MessageArg], - node: Node, + node: &Node, status_page: bool, ) { let loc = location_for(self, self.file_label, node); @@ -357,7 +367,7 @@ impl<'a> Visitor<'a> { self.record_parse_error(loc_label, &mesg); } - fn enter_node(&mut self, node: Node) -> bool { + fn enter_node(&mut self, node: &Node) -> bool { if node.is_missing() { self.record_parse_error_for_node( "A parse error occurred (expected {} symbol). Check the syntax of the file. If the file is invalid, correct the error or {} the file from analysis.", @@ -383,7 +393,7 @@ impl<'a> Visitor<'a> { true } - fn leave_node(&mut self, field_name: Option<&'static str>, node: Node) { + fn leave_node(&mut self, field_name: Option<&'static str>, node: &Node) { if node.is_error() || node.is_missing() { return; } @@ -529,7 +539,7 @@ impl<'a> Visitor<'a> { diagnostics::MessageArg::Code(&format!("{:?}", child_node.type_name)), diagnostics::MessageArg::Code(&format!("{:?}", field.type_info)), ], - *node, + node, false, ); } @@ -541,7 +551,7 @@ impl<'a> Visitor<'a> { diagnostics::MessageArg::Code(child_node.field_name.unwrap_or("child")), diagnostics::MessageArg::Code(&format!("{:?}", child_node.type_name)), ], - *node, + node, false, ); } @@ -566,7 +576,7 @@ impl<'a> Visitor<'a> { node.kind(), column_name ); - self.record_parse_error_for_node(&error_message, &[], *node, false); + self.record_parse_error_for_node(&error_message, &[], node, false); } } Storage::Table { @@ -582,7 +592,7 @@ impl<'a> Visitor<'a> { diagnostics::MessageArg::Code(node.kind()), diagnostics::MessageArg::Code(table_name), ], - *node, + node, false, ); break; @@ -639,15 +649,17 @@ impl<'a> Visitor<'a> { } // Emit a slice of a source file as an Arg. -fn sliced_source_arg(source: &[u8], n: Node) -> trap::Arg { - let range = n.byte_range(); - trap::Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned()) +fn sliced_source_arg(source: &[u8], n: &Node) -> trap::Arg { + trap::Arg::String(n.opt_string_content().unwrap_or_else(|| { + let range = n.byte_range(); + String::from_utf8_lossy(&source[range.start..range.end]).into_owned() + })) } // Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated. // The first is the location and label definition, and the second is the // 'Located' entry. -fn location_for(visitor: &mut Visitor, file_label: trap::Label, n: Node) -> trap::Location { +fn location_for(visitor: &mut Visitor, file_label: trap::Label, n: &Node) -> trap::Location { // Tree-sitter row, column values are 0-based while CodeQL starts // counting at 1. In addition Tree-sitter's row and column for the // end position are exclusive while CodeQL's end positions are inclusive. @@ -713,8 +725,8 @@ fn location_for(visitor: &mut Visitor, file_label: trap::Label, n: Node) -> trap } } -fn traverse(tree: &Tree, visitor: &mut Visitor) { - let cursor = &mut tree.walk(); +fn traverse(tree: &yeast::Ast, visitor: &mut Visitor) { + let mut cursor = tree.walk(); visitor.enter_node(cursor.node()); let mut recurse = true; loop { diff --git a/shared/yeast/.envrc b/shared/yeast/.envrc new file mode 100644 index 00000000000..3550a30f2de --- /dev/null +++ b/shared/yeast/.envrc @@ -0,0 +1 @@ +use flake diff --git a/shared/yeast/.gitignore b/shared/yeast/.gitignore new file mode 100644 index 00000000000..ea8c4bf7f35 --- /dev/null +++ b/shared/yeast/.gitignore @@ -0,0 +1 @@ +/target diff --git a/shared/yeast/.gitkeep b/shared/yeast/.gitkeep new file mode 100644 index 00000000000..e69de29bb2d diff --git a/shared/yeast/Cargo.lock b/shared/yeast/Cargo.lock new file mode 100644 index 00000000000..01fc0da60da --- /dev/null +++ b/shared/yeast/Cargo.lock @@ -0,0 +1,357 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "cc" +version = "1.2.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "clap" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tree-sitter" +version = "0.24.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" + +[[package]] +name = "tree-sitter-python" +version = "0.23.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-ruby" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "yeast" +version = "0.1.0" +dependencies = [ + "clap", + "serde", + "serde_json", + "tree-sitter", + "tree-sitter-python", + "tree-sitter-ruby", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/shared/yeast/Cargo.toml b/shared/yeast/Cargo.toml new file mode 100644 index 00000000000..816b93d4902 --- /dev/null +++ b/shared/yeast/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "yeast" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { version = "4.4.10", features = ["derive"] } +serde = { version = "1.0.193", features = ["derive"] } +serde_json = "1.0.108" +tree-sitter = "0.24" + +tree-sitter-ruby = "0.23" +tree-sitter-python = "0.23" diff --git a/shared/yeast/flake.lock b/shared/yeast/flake.lock new file mode 100644 index 00000000000..3a20223549e --- /dev/null +++ b/shared/yeast/flake.lock @@ -0,0 +1,85 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1694529238, + "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "ff7b65b44d01cf9ba6a71320833626af21126384", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1697730408, + "narHash": "sha256-Ww//zzukdTrwTrCUkaJA/NsaLEfUfQpWZXBdXBYfhak=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "ff0a5a776b56e0ca32d47a4a47695452ec7f7d80", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs", + "rust-overlay": "rust-overlay" + } + }, + "rust-overlay": { + "inputs": { + "flake-utils": [ + "flake-utils" + ], + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1697767917, + "narHash": "sha256-9+FjCVE1Y7iUKohBF43yD05KoQB+FPcw/XL2rlKkjqY=", + "owner": "oxalica", + "repo": "rust-overlay", + "rev": "679ea0878edc749f23516ea6d7ffa974c6304bf5", + "type": "github" + }, + "original": { + "owner": "oxalica", + "repo": "rust-overlay", + "type": "github" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/shared/yeast/flake.nix b/shared/yeast/flake.nix new file mode 100644 index 00000000000..75e56844b0f --- /dev/null +++ b/shared/yeast/flake.nix @@ -0,0 +1,31 @@ +{ + description = "YEAST elaborates abstract syntax trees"; + + inputs = { + nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + rust-overlay = { + url = "github:oxalica/rust-overlay"; + inputs.nixpkgs.follows = "nixpkgs"; + inputs.flake-utils.follows = "flake-utils"; + }; + }; + + + outputs = { self, nixpkgs, flake-utils, rust-overlay }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = nixpkgs.legacyPackages.${system}; + rust = rust-overlay.packages.${system}.rust; + in + { + devShells.default = pkgs.mkShell { + buildInputs = with pkgs; [ + pkgs.tree-sitter + rust + rust-analyzer + libiconv + ]; + }; + }); +} diff --git a/shared/yeast/src/bin/main.rs b/shared/yeast/src/bin/main.rs new file mode 100644 index 00000000000..92c1530982d --- /dev/null +++ b/shared/yeast/src/bin/main.rs @@ -0,0 +1,26 @@ +use clap::Parser; + +#[derive(Parser)] +#[clap(name = "yeast", about = "yeast elaborates abstract syntax trees")] +struct Cli { + file: String, + #[clap(default_value = "ruby")] + language: String, +} + +fn get_language(language: &str) -> tree_sitter::Language { + match language { + "ruby" => tree_sitter_ruby::LANGUAGE.into(), + "python" => tree_sitter_python::LANGUAGE.into(), + _ => panic!("Unsupported language: {}", language), + } +} + +fn main() { + let args = Cli::parse(); + let language = get_language(&args.language); + let source = std::fs::read_to_string(&args.file).unwrap(); + let runner = yeast::Runner::new(language, vec![]); + let ast = runner.run(&source); + println!("{}", ast.print(&source, ast.get_root())); +} diff --git a/shared/yeast/src/captures.rs b/shared/yeast/src/captures.rs new file mode 100644 index 00000000000..669461578ca --- /dev/null +++ b/shared/yeast/src/captures.rs @@ -0,0 +1,92 @@ +use std::collections::{BTreeMap, BTreeSet}; + +use crate::Id; + +#[derive(Debug, Clone)] +pub struct Captures { + captures: BTreeMap<&'static str, Vec>, +} + +impl Default for Captures { + fn default() -> Self { + Self::new() + } +} + +impl Captures { + pub fn new() -> Self { + Captures { + captures: BTreeMap::new(), + } + } + + pub fn get_var(&self, key: &str) -> Result { + let ids = self.captures.get(key); + if let Some(ids) = ids { + if ids.len() == 1 { + Ok(ids[0]) + } else { + Err(format!( + "Variable {} has {} matches, use * to allow repetition", + key, + ids.len() + )) + } + } else { + Err(format!("No variable named {}", key)) + } + } + + pub fn insert(&mut self, key: &'static str, id: Id) { + self.captures.entry(key).or_default().push(id); + } + + pub fn map_captures(&mut self, kind: &str, f: &mut impl FnMut(Id) -> Id) { + if let Some(ids) = self.captures.get_mut(kind) { + for id in ids { + *id = f(*id); + } + } + } + pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) { + if let Some(from_ids) = self.captures.get(from) { + let new_values = from_ids.iter().copied().map(f).collect(); + self.captures.insert(to, new_values); + } + } + + pub fn merge(&mut self, other: &Captures) { + for (key, ids) in &other.captures { + self.captures.entry(key).or_default().extend(ids); + } + } + + pub fn un_star<'a>( + &'a self, + children: &'a BTreeSet<&'static str>, + ) -> Result + 'a, String> { + let mut id_iter = children.iter(); + + if let Some(fst) = id_iter.next() { + let repeats = self + .captures + .get(fst) + .ok_or_else(|| format!("No variable named {}", fst))? + .len(); + // TODO: better error on missing capture + if id_iter.any(|id| self.captures.get(id).map(Vec::len).unwrap_or(0) != repeats) { + return Err("Repeated captures must have the same number of matches".to_string()); + } + Ok((0..repeats).map(move |iter| { + let mut new_vars: Captures = Captures::new(); + for id in children { + let child_capture = self.captures.get(id).unwrap()[iter]; + new_vars.captures.insert(id, vec![child_capture]); + } + new_vars + })) + } else { + Err("Repeated captures must have at least one capture".to_string()) + } + } +} diff --git a/shared/yeast/src/cursor.rs b/shared/yeast/src/cursor.rs new file mode 100644 index 00000000000..ef5f6d94f25 --- /dev/null +++ b/shared/yeast/src/cursor.rs @@ -0,0 +1,8 @@ +pub trait Cursor<'a, T, N, F> { + fn node(&self) -> &'a N; + fn field_id(&self) -> Option; + fn field_name(&self) -> Option<&'static str>; + fn goto_first_child(&mut self) -> bool; + fn goto_next_sibling(&mut self) -> bool; + fn goto_parent(&mut self) -> bool; +} diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs new file mode 100644 index 00000000000..155c0085e57 --- /dev/null +++ b/shared/yeast/src/lib.rs @@ -0,0 +1,575 @@ +use std::{collections::BTreeMap, mem}; + +use serde::Serialize; +use serde_json::{json, Value}; + +pub mod captures; +pub mod cursor; +pub mod print; +pub mod query; +mod range; +pub mod rules; +pub mod tree_builder; +mod visitor; + +use captures::Captures; +pub use cursor::Cursor; +use query::QueryNode; + +/// Node ids are indexes into the arena +type Id = usize; + +/// Field and Kind ids are provided by tree-sitter +type FieldId = u16; +type KindId = u16; + +pub const CHILD_FIELD: u16 = u16::MAX; +const CHILD_FIELD_NAME: &str = "child"; + +#[derive(Debug)] +pub struct AstCursor<'a> { + ast: &'a Ast, + /// A stack of parents, along with iterators for their children + parents: Vec<(&'a Node, ChildrenIter<'a>)>, + node: &'a Node, +} + +impl<'a> AstCursor<'a> { + pub fn new(ast: &'a Ast) -> Self { + // TODO: handle non-zero root + let node = ast.get_node(ast.root).unwrap(); + Self { + ast, + parents: vec![], + node, + } + } + + fn goto_next_sibling_opt(&mut self) -> Option<()> { + self.node = self.parents.last_mut()?.1.next()?; + Some(()) + } + + fn goto_first_child_opt(&mut self) -> Option<()> { + let parent = self.node; + let mut children = ChildrenIter::new(self.ast, parent); + let first_child = children.next()?; + self.node = first_child; + self.parents.push((parent, children)); + Some(()) + } + + fn goto_parent_opt(&mut self) -> Option<()> { + self.node = self.parents.pop()?.0; + Some(()) + } +} +impl<'a> Cursor<'a, Ast, Node, FieldId> for AstCursor<'a> { + fn node(&self) -> &'a Node { + self.node + } + + fn field_id(&self) -> Option { + let (_, children) = self.parents.last()?; + children.current_field() + } + + fn field_name(&self) -> Option<&'static str> { + if self.field_id() == Some(CHILD_FIELD) { + None + } else { + self.field_id() + .and_then(|id| self.ast.field_name_for_id(id)) + } + } + + fn goto_first_child(&mut self) -> bool { + self.goto_first_child_opt().is_some() + } + + fn goto_next_sibling(&mut self) -> bool { + self.goto_next_sibling_opt().is_some() + } + + fn goto_parent(&mut self) -> bool { + self.goto_parent_opt().is_some() + } +} + +/// An iterator over all the child nodes of a node. +#[derive(Debug)] +struct ChildrenIter<'a> { + ast: &'a Ast, + current_field: Option, + fields: std::collections::btree_map::Iter<'a, FieldId, Vec>, + field_children: Option>, +} + +impl<'a> ChildrenIter<'a> { + fn new(ast: &'a Ast, node: &'a Node) -> Self { + Self { + ast, + current_field: None, + fields: node.fields.iter(), + field_children: None, + } + } + + fn get_node(&self, id: Id) -> &'a Node { + self.ast.get_node(id).unwrap() + } + + fn current_field(&self) -> Option { + self.current_field + } +} + +impl<'a> Iterator for ChildrenIter<'a> { + type Item = &'a Node; + + fn next(&mut self) -> Option { + match self.field_children.as_mut() { + None => match self.fields.next() { + Some((field, children)) => { + self.current_field = Some(*field); + self.field_children = Some(children.iter()); + self.next() + } + None => None, + }, + Some(children) => match children.next() { + None => match self.fields.next() { + None => None, + Some((field, children)) => { + self.current_field = Some(*field); + self.field_children = Some(children.iter()); + self.next() + } + }, + Some(child_id) => Some(self.get_node(*child_id)), + }, + } + } +} + +/// Our AST +#[derive(PartialEq, Eq, Debug)] +pub struct Ast { + root: Id, + nodes: Vec, + language: tree_sitter::Language, +} + +impl Ast { + /// Construct an AST from a TS tree + pub fn from_tree(language: tree_sitter::Language, tree: &tree_sitter::Tree) -> Self { + let mut visitor = visitor::Visitor::new(language); + visitor.visit(tree); + visitor.build() + } + + pub fn walk(&self) -> AstCursor { + AstCursor::new(self) + } + + pub fn nodes(&self) -> &[Node] { + &self.nodes + } + + pub fn get_root(&self) -> Id { + self.root + } + + pub fn set_root(&mut self, root: Id) { + self.root = root; + } + + pub fn get_node(&self, id: Id) -> Option<&Node> { + self.nodes.get(id) + } + + pub fn print(&self, source: &str, root_id: Id) -> Value { + let root = &self.nodes()[root_id]; + self.print_node(root, source) + } + + fn create_node( + &mut self, + kind: KindId, + content: NodeContent, + fields: BTreeMap>, + is_named: bool, + ) -> Id { + let id = self.nodes.len(); + self.nodes.push(Node { + id, + kind, + kind_name: self.language.node_kind_for_id(kind).unwrap(), + fields, + content, + is_missing: false, + is_error: false, + is_extra: false, + is_named, + }); + id + } + + pub fn create_named_token(&mut self, kind: &'static str, content: String) -> Id { + let kind_id = self.language.id_for_node_kind(kind, true); + let id = self.nodes.len(); + self.nodes.push(Node { + id, + kind: kind_id, + kind_name: kind, + is_named: true, + is_missing: false, + is_error: false, + is_extra: false, + fields: BTreeMap::new(), + content: NodeContent::DynamicString(content), + }); + id + } + + fn field_name_for_id(&self, id: FieldId) -> Option<&'static str> { + if id == CHILD_FIELD { + Some(CHILD_FIELD_NAME) + } else { + self.language.field_name_for_id(id) + } + } + + fn field_id_for_name(&self, name: &str) -> Option { + if name == CHILD_FIELD_NAME { + Some(CHILD_FIELD) + } else { + self.language.field_id_for_name(name).map(|id| id.get()) + } + } + + /// Print a node for debugging + fn print_node(&self, node: &Node, source: &str) -> Value { + let fields: BTreeMap<&'static str, Vec> = node + .fields + .iter() + .map(|(field_id, nodes)| { + let field_name = if field_id == &CHILD_FIELD { + "rest" + } else { + self.field_name_for_id(*field_id).unwrap() + }; + let nodes: Vec = nodes + .iter() + .map(|id| self.print_node(self.get_node(*id).unwrap(), source)) + .collect(); + (field_name, nodes) + }) + .collect(); + let mut value = BTreeMap::new(); + let kind = self.language.node_kind_for_id(node.kind).unwrap(); + let content = match &node.content { + NodeContent::Range(range) => { + let len = range.end_byte - range.start_byte; + let end = range.start_byte + len; + source.as_bytes()[range.start_byte..end] + .iter() + .map(|b| *b as char) + .collect() + } + NodeContent::String(s) => s.to_string(), + NodeContent::DynamicString(s) => s.clone(), + }; + if fields.is_empty() { + value.insert(kind, json!(content)); + } else { + let mut fields: BTreeMap<_, _> = + fields.into_iter().map(|(k, v)| (k, json!(v))).collect(); + fields.insert("content", json!(content)); + value.insert(kind, json!(fields)); + } + json!(value) + } + + /// Return an example AST, for testing and to fill implementation gaps + pub fn example(language: tree_sitter::Language) -> Self { + // x = 1 + Self { + root: 0, + language, + nodes: vec![ + // assignment + Node { + id: 0, + kind: 276, + kind_name: "assignment", + fields: { + let mut map = BTreeMap::new(); + map.insert(18, vec![1]); + map.insert(28, vec![3]); + map + }, + content: NodeContent::String("x = 1"), + is_missing: false, + is_error: false, + is_extra: false, + is_named: true, + }, + // identifier + Node { + id: 1, + kind: 1, + kind_name: "identifier", + fields: BTreeMap::new(), + content: NodeContent::String("x"), + is_missing: false, + is_error: false, + is_extra: false, + is_named: true, + }, + // "=" + Node { + id: 2, + kind: 17, + kind_name: "=", + fields: BTreeMap::new(), + content: NodeContent::String("="), + is_missing: false, + is_error: false, + is_extra: false, + is_named: false, + }, + // integer + Node { + id: 3, + kind: 110, + kind_name: "integer", + fields: BTreeMap::new(), + content: NodeContent::String("1"), + is_missing: false, + is_error: false, + is_extra: false, + is_named: true, + }, + ], + } + } + + fn id_for_node_kind(&self, kind: &str) -> Option { + let id = self.language.id_for_node_kind(kind, true); + if id == 0 { + None + } else { + Some(id) + } + } + + fn id_for_unnamed_node_kind(&self, kind: &str) -> Option { + let id = self.language.id_for_node_kind(kind, false); + if id == 0 { + None + } else { + Some(id) + } + } +} + +/// A node in our AST +#[derive(PartialEq, Eq, Debug, Clone, Serialize)] +pub struct Node { + id: Id, + kind: KindId, + kind_name: &'static str, + fields: BTreeMap>, + content: NodeContent, + is_named: bool, + is_missing: bool, + is_extra: bool, + is_error: bool, +} + +impl Node { + pub fn id(&self) -> Id { + self.id + } + + pub fn kind(&self) -> &'static str { + self.kind_name + } + + pub fn is_named(&self) -> bool { + self.is_named + } + + pub fn is_missing(&self) -> bool { + self.is_missing + } + + pub fn is_extra(&self) -> bool { + self.is_extra + } + + pub fn is_error(&self) -> bool { + self.is_error + } + + fn fake_range(&self) -> tree_sitter::Range { + tree_sitter::Range { + start_byte: 0, + end_byte: 0, + start_point: tree_sitter::Point { row: 0, column: 0 }, + end_point: tree_sitter::Point { row: 0, column: 0 }, + } + } + + fn fake_point(&self) -> tree_sitter::Point { + tree_sitter::Point { row: 0, column: 0 } + } + + pub fn start_position(&self) -> tree_sitter::Point { + match self.content { + NodeContent::Range(range) => range.start_point, + _ => self.fake_point(), + } + } + + pub fn end_position(&self) -> tree_sitter::Point { + match self.content { + NodeContent::Range(range) => range.end_point, + _ => self.fake_point(), + } + } + + pub fn start_byte(&self) -> usize { + match self.content { + NodeContent::Range(range) => range.start_byte, + _ => 0, + } + } + + pub fn end_byte(&self) -> usize { + match self.content { + NodeContent::Range(range) => range.end_byte, + _ => 0, + } + } + + pub fn byte_range(&self) -> std::ops::Range { + self.start_byte()..self.end_byte() + } + + pub fn opt_string_content(&self) -> Option { + match &self.content { + NodeContent::Range(_range) => None, + NodeContent::String(s) => Some(s.to_string()), + NodeContent::DynamicString(s) => Some(s.to_string()), + } + } +} + +/// The contents of a node is either a range in the original source file, +/// or a new string if the node is synthesized. +#[derive(PartialEq, Eq, Debug, Clone, Serialize)] +pub enum NodeContent { + Range(#[serde(with = "range::Range")] tree_sitter::Range), + String(&'static str), + DynamicString(String), +} + +impl From<&'static str> for NodeContent { + fn from(value: &'static str) -> Self { + NodeContent::String(value) + } +} + +impl From for NodeContent { + fn from(value: tree_sitter::Range) -> Self { + NodeContent::Range(value) + } +} + +pub struct Rule { + query: QueryNode, + transform: Box Vec>, +} + +impl Rule { + pub fn new(query: QueryNode, transform: Box Vec>) -> Self { + Self { query, transform } + } + + fn try_rule(&self, ast: &mut Ast, node: Id) -> Option> { + let mut captures = Captures::new(); + if self.query.do_match(ast, node, &mut captures).unwrap() { + Some((self.transform)(ast, captures)) + } else { + None + } + } +} + +fn apply_rules(rules: &Vec, ast: &mut Ast, id: Id) -> Vec { + // apply the transformation rules on this node + for rule in rules { + if let Some(result_node) = rule.try_rule(ast, id) { + // We transformed it so now recurse into the result + return result_node + .iter() + .flat_map(|node| apply_rules(rules, ast, *node)) + .collect(); + } + } + + // copy the current node + let mut node = ast.nodes[id].clone(); + + // recursively descend into all the fields + for vec in node.fields.values_mut() { + let mut old = Vec::new(); + mem::swap(vec, &mut old); + *vec = old + .iter() + .flat_map(|node| apply_rules(rules, ast, *node)) + .collect(); + } + + node.id = ast.nodes.len(); + ast.nodes.push(node); + vec![ast.nodes.len() - 1] +} + +pub struct Runner { + language: tree_sitter::Language, + rules: Vec, +} + +impl Runner { + pub fn new(language: tree_sitter::Language, rules: Vec) -> Self { + Self { language, rules } + } + + pub fn run_from_tree(&self, tree: &tree_sitter::Tree) -> Ast { + let mut ast = Ast::from_tree(self.language.clone(), tree); + let res = apply_rules(&self.rules, &mut ast, 0); + if res.len() != 1 { + panic!("Expected at exactly one result node, got {}", res.len()); + } + ast.set_root(res[0]); + ast + } + + pub fn run(&self, input: &str) -> Ast { + // Parse the input into an AST + + let mut parser = tree_sitter::Parser::new(); + parser.set_language(&self.language).unwrap(); + let tree = parser.parse(input, None).unwrap(); + let mut ast = Ast::from_tree(self.language.clone(), &tree); + let res = apply_rules(&self.rules, &mut ast, 0); + if res.len() != 1 { + panic!("Expected at exactly one result node, got {}", res.len()); + } + ast.set_root(res[0]); + ast + } +} diff --git a/shared/yeast/src/print.rs b/shared/yeast/src/print.rs new file mode 100644 index 00000000000..6bf10bcfe20 --- /dev/null +++ b/shared/yeast/src/print.rs @@ -0,0 +1,34 @@ +use crate::{cursor::Cursor, AstCursor, Node}; + +pub struct Printer {} + +impl Printer { + pub fn visit(&mut self, mut cursor: AstCursor<'_>) { + self.enter_node(cursor.node()); + let mut recurse = true; + loop { + if recurse && cursor.goto_first_child() { + recurse = self.enter_node(cursor.node()); + } else { + self.leave_node(cursor.node()); + + if cursor.goto_next_sibling() { + recurse = self.enter_node(cursor.node()); + } else if cursor.goto_parent() { + recurse = false; + } else { + break; + } + } + } + } + + pub fn enter_node(&mut self, node: &Node) -> bool { + println!("enter_node: {:?}", node); + true + } + pub fn leave_node(&mut self, node: &Node) -> bool { + println!("leave_node: {:?}", node); + true + } +} diff --git a/shared/yeast/src/query.rs b/shared/yeast/src/query.rs new file mode 100644 index 00000000000..f8cddc5662a --- /dev/null +++ b/shared/yeast/src/query.rs @@ -0,0 +1,243 @@ + +use crate::{captures::Captures, Ast, Id}; + +#[derive(Debug, Clone)] +pub enum QueryNode { + Any(), + Node { + kind: &'static str, + children: Vec<(&'static str, Vec)>, + }, + UnnamedNode { + kind: &'static str, + }, + Capture { + capture: &'static str, + node: Box, + }, +} + +#[derive(Debug, Clone)] +pub enum QueryListElem { + Repeated { children: Vec, rep: Rep }, + SingleNode(QueryNode), +} + +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub enum Rep { + ZeroOrMore, + OneOrMore, + ZeroOrOne, +} + +impl QueryNode { + pub fn do_match(&self, ast: &Ast, node: Id, matches: &mut Captures) -> Result { + match self { + QueryNode::Any() => Ok(true), + QueryNode::Node { kind, children } => { + let node = ast.get_node(node).unwrap(); + let target_kind = ast.id_for_node_kind(kind).ok_or_else(|| { + format!("Node kind {} not found in language", kind) + })?; + if node.kind != target_kind { + return Ok(false); + } + for (field, field_children) in children { + let field_id = ast + .field_id_for_name(field) + .ok_or_else(|| format!("Field {} not found in language", field))?; + let empty = Vec::new(); + let mut child_iter = node + .fields + .get(&field_id) + .unwrap_or(&empty) + .iter() + .cloned(); + if !match_children(field_children.iter(), ast, &mut child_iter, matches)? { + return Ok(false); + } + } + Ok(true) + } + QueryNode::UnnamedNode { kind } => { + let node = ast.get_node(node).unwrap(); + let target_kind = ast.id_for_unnamed_node_kind(kind).ok_or_else(|| { + format!("unnamed Node kind {} not found in language", kind) + })?; + Ok(node.kind == target_kind) + } + QueryNode::Capture { + capture, + node: sub_query, + } => { + matches.insert(capture, node); + sub_query.do_match(ast, node, matches) + } + } + } +} + +fn match_children<'a>( + child_matchers: impl Iterator, + ast: &Ast, + remaining_children: &mut (impl Iterator + Clone), + matches: &mut Captures, +) -> Result { + for child in child_matchers { + if !child.do_match(ast, remaining_children, matches)? { + return Ok(false); + } + } + Ok(true) +} + +impl QueryListElem { + fn do_match( + &self, + ast: &Ast, + remaining_children: &mut (impl Iterator + Clone), + matches: &mut Captures, + ) -> Result { + match self { + QueryListElem::Repeated { children, rep } => { + let mut iters = 0; + + loop { + let matches_initial = matches.clone(); + let start = remaining_children.clone(); + if !match_children(children.iter(), ast, remaining_children, matches)? { + // Reset the state + *remaining_children = start; + *matches = matches_initial; + break; + } + iters += 1; + if *rep == Rep::ZeroOrOne { + break; + } + } + if *rep == Rep::OneOrMore && iters == 0 { + // We didn't match any children but we were supposed to + Ok(false) + } else { + Ok(true) + } + } + QueryListElem::SingleNode(sub_query) => { + if let Some(child) = remaining_children.next() { + sub_query.do_match(ast, child, matches) + } else { + Ok(false) + } + } + } + } +} + +#[macro_export] +macro_rules! query { + // _ + (_) => { $crate::query::QueryNode::Any()}; + // Parens + (($($child:tt)*)) => { query!($($child)*)}; + // Match a node of a given kind + ($node_id:ident $($rest:tt)*) => { $crate::query::QueryNode::Node{ kind: stringify!($node_id), children: query_fields!($($rest)*)}}; + // Match an unamed node of a given kind (using a string literal) + ($node_id:literal) => { $crate::query::QueryNode::UnnamedNode{ kind: $node_id}}; + // Capture + ($child:tt @ $capture_id:ident) => { $crate::query::QueryNode::Capture{ capture: stringify!($capture_id), node: Box::new(query!($child))}}; + // Capture only (implicit _) + (@ $capture_id:ident) => { $crate::query::QueryNode::Capture{ capture: stringify!($capture_id), node: Box::new($crate::query::QueryNode::Any())}}; +} + +// We use an accumulator to build up the list of children incrementally so this starts the tail recursion +#[macro_export] +macro_rules! query_list { + ($($rest:tt)*) => { _query_list!( @ACC [] $($rest)* )}; +} + +#[macro_export] +macro_rules! query_fields { + ($($rest:tt)*) => { _query_fields!( @ACC [] $($rest)* )}; +} + +#[macro_export] +macro_rules! _query_fields { + // vec! allows a trailing comma so we assume that either the accumulator is empty or`ends in a comma + + // Base case: no more tokens, so return the accumulator + (@ACC [$($acc:tt)*]) => { vec![$($acc)*]}; + // Parse field * : (nodeList) + (@ACC [$($acc:tt)*] $field_name:ident * : ($($sub_node:tt)*) $($rest:tt)*) => { _query_fields!( @ACC [ $($acc)* (stringify!($field_name), query_list!($($sub_node)*)),] $($rest)*)}; + // Parse field : node + (@ACC [$($acc:tt)*] $field_name:ident : $sub_node:tt $($rest:tt)*) => { _query_fields!( @ACC [ $($acc)* (stringify!($field_name), vec![$crate::query::QueryListElem::SingleNode(query!($sub_node))]),] $($rest)* )}; +} + +#[macro_export] +macro_rules! _query_list { + // vec! allows a trailing comma so we assume that either the accumulator is empty or`ends in a comma + + // Base case: no more tokens, so return the accumulator + (@ACC [$($acc:tt)*]) => { vec![$($acc)*]}; + // Parse (nodeList)* + (@ACC [$($acc:tt)*] ($($sub_node:tt)*) * $($rest:tt)*) => { _query_list!( @ACC [ $($acc)* $crate::query::QueryListElem::Repeated{children: query_list!($($sub_node)*), rep: $crate::query::Rep::ZeroOrMore},] $($rest)*)}; + // Parse (nodeList)+ + (@ACC [$($acc:tt)*] ($($sub_node:tt)*) + $($rest:tt)*) => { _query_list!( @ACC [ $($acc)* $crate::query::QueryListElem::Repeated{children: query_list!($($sub_node)*), rep: $crate::query::Rep::OneOrMore},] $($rest)*)}; + // Parse (nodeList)? + (@ACC [$($acc:tt)*] ($($sub_node:tt)*) ? $($rest:tt)*) => { _query_list!( @ACC [ $($acc)* $crate::query::QueryListElem::Repeated{children: query_list!($($sub_node)*), rep: $crate::query::Rep::ZeroOrOne},] $($rest)*)}; + // Parse node (treating @cap as a single node) + (@ACC [$($acc:tt)*] @ $sub_node:tt $($rest:tt)*) => { _query_list!( @ACC [ $($acc)* $crate::query::QueryListElem::SingleNode(query!(@$sub_node)),] $($rest)*)}; + // Parse node (this must be last as it only applies if the earlier cases don't match) + (@ACC [$($acc:tt)*] $sub_node:tt $($rest:tt)*) => { _query_list!( @ACC [ $($acc)* $crate::query::QueryListElem::SingleNode(query!($sub_node)),] $($rest)*)}; +} + +pub use query; +pub use query_list; + +#[cfg(test)] +mod tests { + use crate::query::*; + #[test] + fn it_works() { + let query1: QueryNode = query!(_); + println!("{:?}", query1); + let query2 = query!(foo); + println!("{:?}", query2); + let query3 = query!(foo child: (_)); + println!("{:?}", query3); + let query4 = query!(foo child*:((_)*)); + println!("{:?}", query4); + let query5: QueryNode = query!(foo child*:((_)*)); + println!("{:?}", query5); + let query6: QueryNode = query!(_ @ bar); + println!("{:?}", query6); + let query7: QueryNode = query!(foo child:(_ @ bar)); + println!("{:?}", query7); + let query7: QueryNode = query!(foo child:(@ bar)); + println!("{:?}", query7); + let query8: QueryNode = query!((assignment + left: (element_reference + object: (@ obj) + child: (_ @ index) + ) + right: (_ @ rhs) + )); + println!("{:?}", query8); + let query9: QueryNode = query!((assignment + left: (element_reference + object * : ((@ obj)*) + child: (_ @ index) + ) + right: (_ @ rhs) + )); + println!("{:?}", query9); + let query10 = query!( + program + child: (assignment + left: (@left) + right: (@right)) + + ); + println!("{:?}", query10); + } +} diff --git a/shared/yeast/src/range.rs b/shared/yeast/src/range.rs new file mode 100644 index 00000000000..ec670b438d5 --- /dev/null +++ b/shared/yeast/src/range.rs @@ -0,0 +1,21 @@ +//! (de)-serialize helpers for tree_sitter::Range + +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize)] +#[serde(remote = "tree_sitter::Point")] +pub struct Point { + pub row: usize, + pub column: usize, +} + +#[derive(Serialize, Deserialize)] +#[serde(remote = "tree_sitter::Range")] +pub struct Range { + pub start_byte: usize, + pub end_byte: usize, + #[serde(with = "Point")] + pub start_point: tree_sitter::Point, + #[serde(with = "Point")] + pub end_point: tree_sitter::Point, +} diff --git a/shared/yeast/src/rules.rs b/shared/yeast/src/rules.rs new file mode 100644 index 00000000000..2c6f7373aa3 --- /dev/null +++ b/shared/yeast/src/rules.rs @@ -0,0 +1,133 @@ +use std::cell::Cell; +use std::rc::Rc; + +use crate::{captures::Captures, *}; + +pub fn rules() -> Vec { + let fresh_ids = Rc::new(Cell::new(0)); + let fresh_ids2: Rc> = fresh_ids.clone(); + + let assign_query = query!( + (assignment + left: ( + left_assignment_list child*: ((((identifier) @ left) (",")?)*) + ) + right: (@right) + ) + ); + let assign_transform = move |ast: &mut Ast, mut match_: Captures| { + println!("match: {:?}", match_); + let fresh = fresh_ids.get(); + fresh_ids.set(fresh + 1); + + let new_ident = format!("tmp-{}", fresh); + match_.insert( + "tmp_lhs", + ast.create_named_token("identifier", new_ident.clone()), + ); + + let mut i = 0; + match_.map_captures_to("left", "assigns", &mut |old_id| { + let mut local_capture = Captures::new(); + local_capture.insert("lhs", old_id); + local_capture.insert( + "tmp", + ast.create_named_token("identifier", new_ident.clone()), + ); + let index: i32 = i; + i += 1; + local_capture.insert( + "index", + ast.create_named_token("integer", index.to_string()), + ); + tree_builder!( + (assignment + left: (@lhs) + right: ( + element_reference + object: (@tmp) + child: (@index) + ) + ) + ) + .build_tree(ast, &local_capture) + .unwrap() + }); + + // construct the new tree here maybe + // captures is probably a HashMap from capture name to AST node + trees_builder!( + (assignment + left: (@tmp_lhs) + right: (@right) + ) + ( + @assigns + )* + ) + .build_trees(ast, &match_) + .unwrap() + }; + + let assign_rule = Rule::new(assign_query, Box::new(assign_transform)); + + // TODO: There is a spurious end token + let for_query = query!( + (for + pattern: (@pat) + value: (in child*: ("in" @val)) + body: (do child*: (("do")? (@body)*)) + ) + ); + let for_transform = move |ast: &mut Ast, mut match_: Captures| { + let fresh = fresh_ids2.get(); + fresh_ids2.set(fresh + 1); + + let new_ident = format!("tmp-{}", fresh); + match_.insert( + "tmp_rhs", + ast.create_named_token("identifier", new_ident.clone()), + ); + match_.insert( + "tmp_param", + ast.create_named_token("identifier", new_ident.clone()), + ); + match_.insert( + "each", + ast.create_named_token("identifier", "each".to_string()), + ); + + trees_builder!( + (call + receiver: (@val) + method: (@each) + block: (block + parameters: ( + block_parameters + child: (@tmp_param) + ) + body: (block_body + child*: ( + (assignment + left: (@pat) + right: (@tmp_rhs) + ) + (@body)* + ) + ) + ) + ) + + ) + .build_trees(ast, &match_) + .unwrap() + }; + + let for_rule = Rule::new(for_query, Box::new(for_transform)); + + // Just get rid of all end tokens as they aren't needed + let end_query = query!(("end")); + let end_transform = |_ast: &mut Ast, _match: Captures| vec![]; + let end_rule = Rule::new(end_query, Box::new(end_transform)); + vec![assign_rule, for_rule, end_rule] +} diff --git a/shared/yeast/src/tree_builder.rs b/shared/yeast/src/tree_builder.rs new file mode 100644 index 00000000000..80fb89a12b2 --- /dev/null +++ b/shared/yeast/src/tree_builder.rs @@ -0,0 +1,170 @@ +use crate::{captures::Captures, Ast, Id}; +use std::collections::BTreeSet; + +#[derive(Debug, Clone)] +pub enum TreeBuilder { + Node { + kind: &'static str, + children: Vec<(&'static str, Vec)>, + }, + Capture { + capture: &'static str, + }, +} + +#[derive(Debug, Clone)] +pub enum TreeChildBuilder { + Repeated { + child: TreeBuilder, + }, + SingleNode(TreeBuilder), +} + +impl TreeChildBuilder { + fn get_opt_contained(&self) -> BTreeSet<&'static str> { + match self { + TreeChildBuilder::Repeated { child } => child.get_opt_contained(), + TreeChildBuilder::SingleNode(node) => node.get_opt_contained(), + } + } + + fn build_tree( + &self, + target: &mut Ast, + vars: &Captures, + child_ids: &mut Vec, + ) -> Result<(), String> { + match self { + TreeChildBuilder::Repeated { child } => { + let repeated_ids = self.get_opt_contained(); + + for sub_captures in vars.un_star(&repeated_ids)? { + child_ids.push(child.build_tree(target, &sub_captures)?) + } + Ok(()) + } + TreeChildBuilder::SingleNode(node) => { + child_ids.push(node.build_tree(target, vars)?); + Ok(()) + } + } + } +} + +impl TreeBuilder { + fn get_opt_contained(&self) -> BTreeSet<&'static str> { + match self { + TreeBuilder::Node { kind: _, children } => { + let mut contained = BTreeSet::new(); + for (_, children) in children { + for child in children { + contained.extend(child.get_opt_contained()); + } + } + contained + } + TreeBuilder::Capture { capture } => { + let mut contained = BTreeSet::new(); + contained.insert(*capture); + contained + } + } + } + + pub fn build_tree(&self, target: &mut Ast, vars: &Captures) -> Result { + match self { + TreeBuilder::Capture { capture } => vars.get_var(capture), + TreeBuilder::Node { kind, children } => { + let ast_kind = target.id_for_node_kind(kind).ok_or_else(|| + format!("Node kind {} does not exist in language", kind) + )?; + + let child_vars = children.iter().map(|(field, children)| { + let mut child_ids = Vec::new(); + for child in children { + child.build_tree(target, vars, &mut child_ids)?; + } + let field_id = target + .field_id_for_name(field) + .ok_or(format!("Field {} does not exist in language", field))?; + Ok((field_id, child_ids)) + }).collect::>()?; + Ok(target.create_node(ast_kind, "".into(), child_vars, true)) + } + } + } +} + +#[macro_export] +macro_rules! tree_builder { + (($($child:tt)*)) => { tree_builder!($($child)*)}; + // Match a node of a given kind + ($node_id:ident $($rest:tt)*) => { $crate::tree_builder::TreeBuilder::Node{ kind: stringify!($node_id), children: tree_builder_fields!($($rest)*)}}; + // Capture only (implicit _) + (@ $capture_id:ident) => { $crate::tree_builder::TreeBuilder::Capture{ capture: stringify!($capture_id)}}; +} + +// We use an accumulator to build up the list of children incrementally so this starts the tail recursion +#[macro_export] +macro_rules! tree_builder_child { + () => { Vec::new()}; + ($($rest:tt)*) => { _tree_builder_child!( @ACC [] $($rest)* )}; +} + +#[macro_export] +macro_rules! _tree_builder_child { + // vec! allows a trailing comma so we assume that either the accumulator is empty or`ends in a comma + + // Base case: no more tokens, so return the accumulator + (@ACC [$($acc:tt)*]) => { vec![$($acc)*]}; + // Parse field* : node + (@ACC [$($acc:tt)*] $field_name:ident * : ($($sub_node:tt)*) $($rest:tt)*) => { _tree_builder_child!( @ACC [ $($acc)* $crate::tree_builder::TreeChildBuilder::Field{field_name: stringify!($field_name), node: tree_builder_child!($($sub_node)*)},] $($rest)*)}; + // Parse field : node + (@ACC [$($acc:tt)*] $field_name:ident : $sub_node:tt $($rest:tt)*) => { _tree_builder_child!( @ACC [ $($acc)* $crate::tree_builder::TreeChildBuilder::Field{field_name: stringify!($field_name), node: vec![$crate::tree_builder::TreeChildBuilder::SingleNode(tree_builder!($sub_node))]},] $($rest)*)}; + + // Parse (node)* + (@ACC [$($acc:tt)*] $sub_node:tt * $($rest:tt)*) => { _tree_builder_child!( @ACC [ $($acc)* $crate::tree_builder::TreeChildBuilder::Repeated{child: tree_builder!($sub_node)},] $($rest)*)}; + // Parse node (this must be last as it only applies if the earlier cases don't match) + (@ACC [$($acc:tt)*] $sub_node:tt $($rest:tt)*) => { _tree_builder_child!( @ACC [ $($acc)* $crate::tree_builder::TreeChildBuilder::SingleNode(tree_builder!($sub_node)),] $($rest)*)}; +} + + +#[macro_export] +macro_rules! _tree_builder_fields { + // vec! allows a trailing comma so we assume that either the accumulator is empty or`ends in a comma + + // Base case: no more tokens, so return the accumulator + (@ACC [$($acc:tt)*]) => { vec![$($acc)*]}; + // Parse field* : node + (@ACC [$($acc:tt)*] $field_name:ident * : ($($sub_node:tt)*) $($rest:tt)*) => { _tree_builder_fields!( @ACC [ $($acc)* (stringify!($field_name), tree_builder_child!($($sub_node)*)),] $($rest)*)}; + // Parse field : node + (@ACC [$($acc:tt)*] $field_name:ident : $sub_node:tt $($rest:tt)*) => { _tree_builder_fields!( @ACC [ $($acc)* (stringify!($field_name), vec![$crate::tree_builder::TreeChildBuilder::SingleNode(tree_builder!($sub_node))]),] $($rest)*)}; +} +#[macro_export] +macro_rules! tree_builder_fields { + ($($all:tt)*) => { _tree_builder_fields!( @ACC [] $($all)*)}; +} + +pub struct TreesBuilder { + pub children: Vec, +} + +impl TreesBuilder { + pub fn build_trees(&self, target: &mut Ast, vars: &Captures) -> Result, String> { + let mut child_ids = Vec::new(); + for child in &self.children { + child.build_tree(target, vars, &mut child_ids)?; + } + Ok(child_ids) + } +} + +#[macro_export] +macro_rules! trees_builder { + () => { $crate::tree_builder::TreesBuilder { children: Vec::new()}}; + ($($rest:tt)*) => {$crate::tree_builder::TreesBuilder { children: _tree_builder_child!( @ACC [] $($rest)* )}}; +} + +pub use tree_builder; +pub use tree_builder_child; +pub use trees_builder; diff --git a/shared/yeast/src/visitor.rs b/shared/yeast/src/visitor.rs new file mode 100644 index 00000000000..ed6acfca0ea --- /dev/null +++ b/shared/yeast/src/visitor.rs @@ -0,0 +1,110 @@ +use std::collections::BTreeMap; +use tree_sitter::{Language, Tree}; + +use crate::{Ast, Id, Node, NodeContent, CHILD_FIELD}; + +#[derive(Debug)] +struct VisitorNode { + inner: Node, + parent: Option, +} + +/// A type that can walk a TS tree and produce an `Ast`. +#[derive(Debug)] +pub(crate) struct Visitor { + nodes: Vec, + current: Option, + language: Language, +} + +impl Visitor { + pub fn new(language: Language) -> Self { + Self { + nodes: Vec::new(), + current: None, + language, + } + } + + pub fn visit(&mut self, tree: &Tree) { + let cursor = &mut tree.walk(); + self.enter_node(cursor.node()); + let mut recurse = true; + loop { + if recurse && cursor.goto_first_child() { + recurse = self.enter_node(cursor.node()); + } else { + self.leave_node(cursor.field_name(), cursor.node()); + + if cursor.goto_next_sibling() { + recurse = self.enter_node(cursor.node()); + } else if cursor.goto_parent() { + recurse = false; + } else { + break; + } + } + } + } + + pub fn build(self) -> Ast { + Ast { + root: self.nodes[0].inner.id, // this is likely always just 0 + language: self.language, + nodes: self.nodes.into_iter().map(|n| n.inner).collect(), + } + } + + fn add_node(&mut self, n: tree_sitter::Node<'_>, content: NodeContent, is_named: bool) -> Id { + let id = self.nodes.len(); + self.nodes.push(VisitorNode { + inner: Node { + id, + kind: self.language.id_for_node_kind(n.kind(), is_named), + kind_name: n.kind(), + content, + fields: BTreeMap::new(), + is_missing: n.is_missing(), + is_named: n.is_named(), + is_extra: n.is_extra(), + is_error: n.is_error(), + }, + parent: self.current, + }); + id + } + + fn enter_node(&mut self, node: tree_sitter::Node<'_>) -> bool { + let id = self.add_node(node, node.range().into(), node.is_named()); + self.current = Some(id); + true + } + + fn leave_node(&mut self, field_name: Option<&'static str>, _node: tree_sitter::Node<'_>) { + let node = self.current.map(|i| &self.nodes[i]).unwrap(); + let node_id = node.inner.id; + let node_parent = node.parent; + + if let Some(parent_id) = node.parent { + let parent = self.nodes.get_mut(parent_id).unwrap(); + if let Some(field) = field_name { + let field_id = self.language.field_id_for_name(field).unwrap().get(); + parent + .inner + .fields + .entry(field_id) + .or_default() + .push(node_id); + } else { + parent + .inner + .fields + .entry(CHILD_FIELD) + .or_default() + .push(node_id); + } + } + + self.current = node_parent; + } +} diff --git a/shared/yeast/tests/fixtures/1.parsed.json b/shared/yeast/tests/fixtures/1.parsed.json new file mode 100644 index 00000000000..32738f8a393 --- /dev/null +++ b/shared/yeast/tests/fixtures/1.parsed.json @@ -0,0 +1,68 @@ +{ + "program": { + "content": "x, y, z = foo()\n", + "rest": [ + { + "assignment": { + "content": "x, y, z = foo()", + "left": [ + { + "left_assignment_list": { + "content": "x, y, z", + "rest": [ + { + "identifier": "x" + }, + { + ",": "," + }, + { + "identifier": "y" + }, + { + ",": "," + }, + { + "identifier": "z" + } + ] + } + } + ], + "rest": [ + { + "=": "=" + } + ], + "right": [ + { + "call": { + "arguments": [ + { + "argument_list": { + "content": "()", + "rest": [ + { + "(": "(" + }, + { + ")": ")" + } + ] + } + } + ], + "content": "foo()", + "method": [ + { + "identifier": "foo" + } + ] + } + } + ] + } + } + ] + } +} \ No newline at end of file diff --git a/shared/yeast/tests/fixtures/1.rb b/shared/yeast/tests/fixtures/1.rb new file mode 100644 index 00000000000..172a933595e --- /dev/null +++ b/shared/yeast/tests/fixtures/1.rb @@ -0,0 +1 @@ +x, y, z = foo() diff --git a/shared/yeast/tests/fixtures/1.rewritten.json b/shared/yeast/tests/fixtures/1.rewritten.json new file mode 100644 index 00000000000..8c0572237e0 --- /dev/null +++ b/shared/yeast/tests/fixtures/1.rewritten.json @@ -0,0 +1,68 @@ +{ + "program": { + "content": "", + "rest": [ + { + "assignment": { + "content": "", + "left": [ + { + "call": { + "arguments": [ + { + "argument_list": { + "content": "()", + "rest": [ + { + "(": "(" + }, + { + ")": ")" + } + ] + } + } + ], + "content": "foo()", + "method": [ + { + "identifier": "foo" + } + ] + } + } + ], + "rest": [ + { + "=": "=" + } + ], + "right": [ + { + "left_assignment_list": { + "content": "x, y, z", + "rest": [ + { + "identifier": "x" + }, + { + ",": "," + }, + { + "identifier": "y" + }, + { + ",": "," + }, + { + "identifier": "z" + } + ] + } + } + ] + } + } + ] + } +} \ No newline at end of file diff --git a/shared/yeast/tests/fixtures/multiple_assignment.input.json b/shared/yeast/tests/fixtures/multiple_assignment.input.json new file mode 100644 index 00000000000..ea83e7a7cb1 --- /dev/null +++ b/shared/yeast/tests/fixtures/multiple_assignment.input.json @@ -0,0 +1,85 @@ +{ + "program": { + "content": "for a, b in pairs_list do\n x=y\nend", + "rest": [ + { + "for": { + "body": [ + { + "do": { + "content": "do\n x=y\nend", + "rest": [ + { + "do": "do" + }, + { + "assignment": { + "content": "x=y", + "left": [ + { + "identifier": "x" + } + ], + "rest": [ + { + "=": "=" + } + ], + "right": [ + { + "identifier": "y" + } + ] + } + }, + { + "end": "end" + } + ] + } + } + ], + "content": "for a, b in pairs_list do\n x=y\nend", + "pattern": [ + { + "left_assignment_list": { + "content": "a, b", + "rest": [ + { + "identifier": "a" + }, + { + ",": "," + }, + { + "identifier": "b" + } + ] + } + } + ], + "rest": [ + { + "for": "for" + } + ], + "value": [ + { + "in": { + "content": "in pairs_list", + "rest": [ + { + "in": "in" + }, + { + "identifier": "pairs_list" + } + ] + } + } + ] + } + } + ] + } +} \ No newline at end of file diff --git a/shared/yeast/tests/fixtures/multiple_assignment.output.json b/shared/yeast/tests/fixtures/multiple_assignment.output.json new file mode 100644 index 00000000000..dd0a4301492 --- /dev/null +++ b/shared/yeast/tests/fixtures/multiple_assignment.output.json @@ -0,0 +1,139 @@ +{ + "program": { + "content": "for a, b in pairs_list do\n x=y\nend", + "rest": [ + { + "call": { + "block": [ + { + "block": { + "body": [ + { + "block_body": { + "content": "", + "rest": [ + { + "assignment": { + "content": "", + "left": [ + { + "identifier": "tmp-1" + } + ], + "right": [ + { + "identifier": "tmp-0" + } + ] + } + }, + { + "assignment": { + "content": "", + "left": [ + { + "identifier": "a" + } + ], + "right": [ + { + "element_reference": { + "content": "", + "object": [ + { + "identifier": "tmp-1" + } + ], + "rest": [ + { + "integer": "0" + } + ] + } + } + ] + } + }, + { + "assignment": { + "content": "", + "left": [ + { + "identifier": "b" + } + ], + "right": [ + { + "element_reference": { + "content": "", + "object": [ + { + "identifier": "tmp-1" + } + ], + "rest": [ + { + "integer": "1" + } + ] + } + } + ] + } + }, + { + "assignment": { + "content": "x=y", + "left": [ + { + "identifier": "x" + } + ], + "rest": [ + { + "=": "=" + } + ], + "right": [ + { + "identifier": "y" + } + ] + } + } + ] + } + } + ], + "content": "", + "parameters": [ + { + "block_parameters": { + "content": "", + "rest": [ + { + "identifier": "tmp-0" + } + ] + } + } + ] + } + } + ], + "content": "", + "method": [ + { + "identifier": "each" + } + ], + "receiver": [ + { + "identifier": "pairs_list" + } + ] + } + } + ] + } +} \ No newline at end of file diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs new file mode 100644 index 00000000000..321444a8757 --- /dev/null +++ b/shared/yeast/tests/test.rs @@ -0,0 +1,160 @@ +#![cfg(test)] +use std::cell::Cell; +use std::fs::read_to_string; +use std::path::Path; +use std::rc::Rc; + +use yeast::{captures::Captures, print::Printer, *, rules::rules}; + +#[test] +fn test_ruby_multiple_assignment() { + // We want to convert this + // + // x, y, z = e + // + // into this + // + // tmp = e + // x = tmp[0] + // y = tmp[1] + // z = tmp[2] + + // Define a desugaring rule, which is a query together with a transformation. + + let input = "for a, b in pairs_list do\n x=y\nend"; + + // Construct the thing that runs our desugaring process + let runner = Runner::new( + tree_sitter_ruby::LANGUAGE.into(), + rules(), + ); + + let old_root = 0; + + // Run it on our example + let ast = runner.run(input); + let new_root = ast.get_root(); + + let formattedInput = serde_json::to_string_pretty(&ast.print(input, old_root)).unwrap(); + let formattedOutput = serde_json::to_string_pretty(&ast.print(input, new_root)).unwrap(); + + println!("before transformation: {}", formattedInput); + println!("after transformation: {}", formattedOutput); + + assert_eq!( + formattedInput, + read_to_string("tests/fixtures/multiple_assignment.input.json").unwrap() + ); + assert_eq!( + formattedOutput, + read_to_string("tests/fixtures/multiple_assignment.output.json").unwrap() + ); +} + +#[test] +fn test_parse_input() { + let input = read_to_string("tests/fixtures/1.rb").unwrap(); + let parsed_expected = read_to_string("tests/fixtures/1.parsed.json").unwrap(); + + let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), vec![]); + let ast = runner.run(&input); + let parsed_actual = serde_json::to_string_pretty(&ast.print(&input, ast.get_root())).unwrap(); + + assert_eq!(parsed_actual, parsed_expected); +} + +#[test] +fn test_query_input() { + let input = read_to_string("tests/fixtures/1.rb").unwrap(); + let rewritten_expected = read_to_string("tests/fixtures/1.rewritten.json").unwrap(); + + let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), vec![]); + let mut ast = runner.run(&input); + + let query = yeast::query::query!( + program child:( + (assignment + left: (@left) + right: (@right) + child*: ((@rest)*) + ) + + ) + ); + print!("query: {:?}", query); + + let mut matches = Captures::new(); + if query.do_match(&ast, ast.get_root(), &mut matches).unwrap() { + println!("match: {:?}", matches); + } else { + println!("no match"); + } + + let builder = yeast::tree_builder::tree_builder!( + program child: + (assignment + left: (@right) + right: (@left) + child*:((@rest)*) + ) + ); + + let new_id = builder.build_tree(&mut ast, &matches).unwrap(); + + let rewritten_actual = serde_json::to_string_pretty(&ast.print(&input, new_id)).unwrap(); + + write_expected("tests/fixtures/1.rewritten.json", &rewritten_actual); + assert_eq!(rewritten_actual, rewritten_expected); +} + +/// Useful for updating fixtures +/// ``` +/// write_expected("tests/fixtures/1.parsed.json", &parsed_actual); +/// ``` +fn write_expected>(file: P, content: &str) { + use std::io::Write; + std::fs::File::create(file) + .unwrap() + .write_all(content.as_bytes()) + .unwrap(); +} + +#[test] +fn test_cursor() { + let input = read_to_string("tests/fixtures/1.rb").unwrap(); + + let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), vec![]); + let ast = runner.run(&input); + let mut cursor = AstCursor::new(&ast); + + assert_eq!(cursor.node().id(), ast.get_root()); + assert_eq!(cursor.field_id(), None); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().id(), 26); + + assert!(!cursor.goto_next_sibling()); + assert_eq!(cursor.node().id(), 26); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().id(), 19); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().id(), 14); + + assert!(!cursor.goto_first_child()); + assert_eq!(cursor.node().id(), 14); + + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().id(), 15); + assert_eq!(cursor.field_id(), Some(CHILD_FIELD)); + + assert!(cursor.goto_parent()); + assert_eq!(cursor.node().id(), 19); + + assert_eq!(cursor.field_id(), Some(18)); + + let cursor = AstCursor::new(&ast); + let mut printer = Printer {}; + printer.visit(cursor); +}