Compare commits

...

73 Commits

Author SHA1 Message Date
Taus
1ce091de96 Yeast: Appease the clippy 2023-12-04 21:59:12 +00:00
Taus
deaef2e9da Merge pull request #14986 from alexet/hackathinfixes
YEAST: Make some more fixes to rules.
2023-12-01 22:50:48 +01:00
Alex Eyers-Taylor
2cb100393e YEAST: Make some more fixes to rules. 2023-12-01 17:47:49 +00:00
Alexander Eyers-Taylor
753d0eec89 Merge pull request #14985 from alexet/hackathinfixes
Hackathinfixes
2023-12-01 15:22:14 +00:00
Alex Eyers-Taylor
cb3954e16d Fixes 2023-12-01 15:16:32 +00:00
Alex Eyers-Taylor
6b515dc051 Merge remote-tracking branch 'origin/hackathon-desugaring' into disable-macro-tests 2023-12-01 14:55:38 +00:00
Alex Eyers-Taylor
9a004d8b51 Use newer rust 2023-12-01 14:55:16 +00:00
Alex Eyers-Taylor
f42cfb0f84 Add content extraction 2023-12-01 14:55:03 +00:00
Harry Maclean
e7439e636e fake locations 2023-12-01 14:40:31 +00:00
Harry Maclean
1f062baf06 Use rules in extractor 2023-12-01 14:36:01 +00:00
Harry Maclean
ffa73c39a6 export rules 2023-12-01 14:32:34 +00:00
Harry Maclean
db3f8b1b21 fix the fix 2023-12-01 14:23:25 +00:00
Harry Maclean
6d27375f52 fix 2023-12-01 14:19:50 +00:00
Harry Maclean
b2070af9d6 clippy fixes 2023-12-01 13:59:43 +00:00
Harry Maclean
be6f0e9b96 Minor cleanup, get tests passing 2023-12-01 13:58:24 +00:00
Alexander Eyers-Taylor
4575583ebf Merge pull request #14980 from alexet/disable-macro-tests
YEAST: Disable trace macro expandtion feature
2023-12-01 13:54:13 +00:00
Alex Eyers-Taylor
0521851011 YEAST: Disable trace macro expandtion feature 2023-12-01 13:50:50 +00:00
Taus
410a60c2c2 Merge pull request #14975 from alexet/hookup-new-queries
YEAST: Allow multiple output nodes and merge in no-children
2023-12-01 14:04:48 +01:00
Alex Eyers-Taylor
174a4f9712 Merge remote-tracking branch 'origin/hackathon-desugaring' into hookup-new-queries 2023-12-01 12:32:17 +00:00
Harry Maclean
5e424ff962 Merge pull request #14960 from github/hmac-tree-output
yeast: tree output
2023-12-01 10:35:08 +00:00
Harry Maclean
01bc801217 yeast: fix off-by-one node ID 2023-12-01 10:22:21 +00:00
Taus
38cb350103 WIP yeast: support non-zero root
In its current state, this breaks the cursor test (because desugaring takes
place, and hence the root moves), and it isn't entirely
clear to me what the best way of fixing it would be.
2023-11-30 22:47:13 +00:00
Taus
099751b761 Merge pull request #14970 from github/tausbn/yeast-binary
yeast: Add a bare-bones binary
2023-11-30 22:57:30 +01:00
Alex Eyers-Taylor
0be9d0de1f Merge remote-tracking branch 'origin/hackathon-desugaring' into hookup-new-queries 2023-11-30 20:18:12 +00:00
Alex Eyers-Taylor
ebe27a231f YEAST: Remove cChildren from builders/queries. 2023-11-30 20:16:30 +00:00
Alex Eyers-Taylor
011d2b9caa Merge commit '263aecf55348d83fed494299f91a7e45eee3fb64' into hookup-new-queries 2023-11-30 20:16:05 +00:00
Alex Eyers-Taylor
19933ca2f8 WIP: Allow string tokens 2023-11-30 17:24:40 +00:00
Alex Eyers-Taylor
b7b73d4de7 Allow integer nodes 2023-11-30 17:23:27 +00:00
Alex Eyers-Taylor
e0f368a513 WIP Add desugar eaxmple. 2023-11-30 17:19:06 +00:00
Alex Eyers-Taylor
6e1f80f597 YEAST: helper for creating tokens. 2023-11-30 17:18:37 +00:00
Alex Eyers-Taylor
ec10d05964 WIP: Allow multiple outputs. 2023-11-30 17:18:15 +00:00
Harry Maclean
2c0f8e5d0a Hide the fake child field 2023-11-30 16:40:55 +00:00
Harry Maclean
b0a3671040 fixups 2023-11-30 16:05:03 +00:00
Harry Maclean
f532961ca7 use yeast in shared extractor 2023-11-30 16:05:03 +00:00
Harry Maclean
eed4031e05 add more TS info to Node 2023-11-30 16:05:00 +00:00
Harry Maclean
2dcda65a65 yeast: refactor cursor 2023-11-30 16:04:41 +00:00
Taus
f5f0d45226 Yeast: Use a special field for children with no field 2023-11-30 16:03:46 +00:00
Harry Maclean
e9d1d91823 yeast: tree output 2023-11-30 16:03:46 +00:00
Philip Ginsbach
bf40878668 Merge pull request #14971 from alexet/hookup-new-queries
YEAST: Hookup query code
2023-11-30 15:44:34 +00:00
Alex Eyers-Taylor
785bfaf6f2 Merge remote-tracking branch 'origin/hackathon-desugaring' into hookup-new-queries 2023-11-30 15:05:54 +00:00
Alex Eyers-Taylor
1bc854850c YEAST: Hookup query code 2023-11-30 15:02:13 +00:00
Philip Ginsbach
89c16ed22a Merge pull request #14968 from alexet/fix-missing-method
YEAST: Add missing method
2023-11-30 14:50:10 +00:00
Alex Eyers-Taylor
823fefcb7a YEAST: Fix some warnings. 2023-11-30 14:48:55 +00:00
Alex Eyers-Taylor
3c0a5260b1 YEAST: Fix more issues. 2023-11-30 14:44:21 +00:00
Taus
685ec84bff yeast: Add a bare-bones binary
Run using `cargo run <filename> [language]`, where if language is not specified
we use `ruby`. Outputs the desugared AST as JSON.
2023-11-30 14:35:22 +00:00
Alex Eyers-Taylor
a639e3eef8 YEAST: Fix paramerter order. 2023-11-30 14:31:07 +00:00
Alex Eyers-Taylor
78598fc9e8 Add missing mod lines. 2023-11-30 14:29:06 +00:00
Alex Eyers-Taylor
b9234e3603 YEAST: Add missing method 2023-11-30 14:24:12 +00:00
Philip Ginsbach
4469acfd8d Merge pull request #14966 from alexet/alexet/hackathon-desugaring
Yeast: Implement matcher and tree builders
2023-11-30 14:19:36 +00:00
Philip Ginsbach
91e0c3df79 Merge pull request #14967 from github/ginsbach/ImplementApplyRules
implement applyRules
2023-11-30 14:13:15 +00:00
Philip Ginsbach
79a64412cc update the Node id later 2023-11-30 13:56:51 +00:00
Philip Ginsbach
77d4d95714 implement applyRules 2023-11-30 13:40:10 +00:00
Alex Eyers-Taylor
43116d556a Yeast: Add tree_builder and implenmtn queries 2023-11-30 13:37:33 +00:00
Alex Eyers-Taylor
1e16ea0d75 WIP: Parser for queries. 2023-11-30 13:37:33 +00:00
Philip Ginsbach
39de0d41dd Merge pull request #14964 from github/ginsbach/SomePlaceholders
implement `tryRule` and insert placeholders for `isMatch`, `applyRules`
2023-11-30 13:08:03 +00:00
Philip Ginsbach
f1688f5c7d implement tryRule and insert placeholders for isMatch, applyRules 2023-11-30 12:54:42 +00:00
Philip Ginsbach
7d260192c3 Merge pull request #14963 from github/ginsbach/InterfaceChanges
update some interfaces ahead of merging actual implementations
2023-11-30 12:47:21 +00:00
Philip Ginsbach
2e1bccf198 Runner must return root new node id 2023-11-30 12:37:05 +00:00
Philip Ginsbach
445df836f9 update interface of transformation functor 2023-11-30 12:26:41 +00:00
Philip Ginsbach
9878c52ad9 store query and transform in Rule 2023-11-30 12:24:52 +00:00
Philip Ginsbach
a3ec05e45d store rules in Runner 2023-11-30 12:21:00 +00:00
Philip Ginsbach
7aff31d669 explicit root node argument for printing ast 2023-11-30 12:20:15 +00:00
Taus
263aecf553 Yeast: Use a special field for children with no field 2023-11-29 17:39:25 +00:00
Harry Maclean
b446982fae Merge pull request #14949 from github/hmac-desugar
yeast: update debug format to be more readable
2023-11-29 11:30:16 +00:00
Harry Maclean
050a18f240 Update debug format to be more readable 2023-11-29 11:21:15 +00:00
Harry Maclean
658fa944ed Merge pull request #14947 from hmac/hmac-desugar-parse-input
yeast: parse input into the AST
2023-11-29 10:29:35 +00:00
Harry Maclean
03473c2147 Parse input into the AST 2023-11-29 10:22:33 +00:00
Harry Maclean
34b21af46f Add basic AST type
We store the AST as a vector of nodes, with ids as indexes into the
vector.
2023-11-29 09:50:13 +00:00
Harry Maclean
cef3ce1cde add a basic failing test 2023-11-28 15:32:16 +00:00
Harry Maclean
17234d3939 Add rust project skeleton 2023-11-28 15:01:14 +00:00
Harry Maclean
9b6d00b737 Add basic nix config 2023-11-28 15:00:32 +00:00
Harry Maclean
267f9acc4c yeast initial commit 2023-11-28 14:40:26 +00:00
Harry Maclean
e5cc540475 wip: tree output 2023-11-18 17:17:38 +00:00
27 changed files with 2528 additions and 21 deletions

View File

@@ -2,6 +2,6 @@
# extractor. It is set to the lowest version of Rust we want to support.
[toolchain]
channel = "1.68"
channel = "1.70"
profile = "minimal"
components = [ "rustfmt" ]

View File

@@ -3,9 +3,6 @@ set -eux
CARGO=cargo
if [[ "$OSTYPE" == "linux-gnu"* ]]; then
platform="linux64"
if which cross; then
CARGO=cross
fi
elif [[ "$OSTYPE" == "darwin"* ]]; then
platform="osx64"
else

View File

@@ -17,6 +17,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
chrono = { version = "0.4.19", features = ["serde"] }
num_cpus = "1.14.0"
yeast = { path = "../yeast" }
[dev-dependencies]
tree-sitter-ql = { git = "https://github.com/tree-sitter/tree-sitter-ql" }

View File

@@ -6,7 +6,8 @@ use std::collections::BTreeMap as Map;
use std::collections::BTreeSet as Set;
use std::path::Path;
use tree_sitter::{Language, Node, Parser, Range, Tree};
use tree_sitter::{Language, Parser, Range};
use yeast::{Cursor, Node};
pub mod simple;
@@ -151,7 +152,12 @@ pub fn extract(
language_prefix,
schema,
);
traverse(&tree, &mut visitor);
// HACK: Pass the tree through yeast
let runner = yeast::Runner::new(language, yeast::rules::rules());
let ast = runner.run_from_tree(&tree);
traverse(&ast, &mut visitor);
parser.reset();
}
@@ -242,7 +248,7 @@ impl<'a> Visitor<'a> {
&mut self,
message: &str,
args: &[diagnostics::MessageArg],
node: Node,
node: &Node,
status_page: bool,
) {
let (start_line, start_column, end_line, end_column) = location_for(self, node);
@@ -267,7 +273,7 @@ impl<'a> Visitor<'a> {
self.record_parse_error(loc, &mesg);
}
fn enter_node(&mut self, node: Node) -> bool {
fn enter_node(&mut self, node: &Node) -> bool {
if node.is_missing() {
self.record_parse_error_for_node(
"A parse error occurred (expected {} symbol). Check the syntax of the file. If the file is invalid, correct the error or {} the file from analysis.",
@@ -293,7 +299,7 @@ impl<'a> Visitor<'a> {
true
}
fn leave_node(&mut self, field_name: Option<&'static str>, node: Node) {
fn leave_node(&mut self, field_name: Option<&'static str>, node: &Node) {
if node.is_error() || node.is_missing() {
return;
}
@@ -433,19 +439,21 @@ impl<'a> Visitor<'a> {
diagnostics::MessageArg::Code(&format!("{:?}", child_node.type_name)),
diagnostics::MessageArg::Code(&format!("{:?}", field.type_info)),
],
*node,
node,
false,
);
}
} else if child_node.field_name.is_some() || child_node.type_name.named {
self.record_parse_error_for_node(
"Value for unknown field: {}::{} and type {}",
"Value for unknown field: {}::{} and type {}. Expecting: {}",
&[
diagnostics::MessageArg::Code(node.kind()),
diagnostics::MessageArg::Code(child_node.field_name.unwrap_or("child")),
diagnostics::MessageArg::Code(child_node.field_name.unwrap_or("extractor_child")),
diagnostics::MessageArg::Code(&format!("{:?}", child_node.type_name)),
diagnostics::MessageArg::Code(&format!("{:?}", fields)),
],
*node,
node,
false,
);
}
@@ -470,7 +478,7 @@ impl<'a> Visitor<'a> {
node.kind(),
column_name
);
self.record_parse_error_for_node(&error_message, &[], *node, false);
self.record_parse_error_for_node(&error_message, &[], node, false);
}
}
Storage::Table {
@@ -486,7 +494,7 @@ impl<'a> Visitor<'a> {
diagnostics::MessageArg::Code(node.kind()),
diagnostics::MessageArg::Code(table_name),
],
*node,
node,
false,
);
break;
@@ -547,15 +555,17 @@ impl<'a> Visitor<'a> {
}
// Emit a slice of a source file as an Arg.
fn sliced_source_arg(source: &[u8], n: Node) -> trap::Arg {
let range = n.byte_range();
trap::Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned())
fn sliced_source_arg(source: &[u8], n: &Node) -> trap::Arg {
trap::Arg::String(n.opt_string_content().unwrap_or_else(|| {
let range = n.byte_range();
String::from_utf8_lossy(&source[range.start..range.end]).into_owned()
}))
}
// Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated.
// The first is the location and label definition, and the second is the
// 'Located' entry.
fn location_for(visitor: &mut Visitor, n: Node) -> (usize, usize, usize, usize) {
fn location_for(visitor: &mut Visitor, n: &Node) -> (usize, usize, usize, usize) {
// Tree-sitter row, column values are 0-based while CodeQL starts
// counting at 1. In addition Tree-sitter's row and column for the
// end position are exclusive while CodeQL's end positions are inclusive.
@@ -615,8 +625,8 @@ fn location_for(visitor: &mut Visitor, n: Node) -> (usize, usize, usize, usize)
(start_line, start_col, end_line, end_col)
}
fn traverse(tree: &Tree, visitor: &mut Visitor) {
let cursor = &mut tree.walk();
fn traverse(tree: &yeast::Ast, visitor: &mut Visitor) {
let mut cursor = tree.walk();
visitor.enter_node(cursor.node());
let mut recurse = true;
loop {

1
shared/yeast/.envrc Normal file
View File

@@ -0,0 +1 @@
use flake

1
shared/yeast/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/target

0
shared/yeast/.gitkeep Normal file
View File

360
shared/yeast/Cargo.lock generated Normal file
View File

@@ -0,0 +1,360 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
dependencies = [
"memchr",
]
[[package]]
name = "anstream"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87"
[[package]]
name = "anstyle-parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "317b9a89c1868f5ea6ff1d9539a69f45dffc21ce321ac1fd1160dfa48c8e2140"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "cc"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"libc",
]
[[package]]
name = "clap"
version = "4.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fffed7514f420abec6d183b1d3acfd9099c79c3a10a06ade4f8203f1411272"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63361bae7eef3771745f02d8d892bec2fee5f6e34af316ba556e7f97a7069ff1"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
[[package]]
name = "colorchoice"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "itoa"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
[[package]]
name = "libc"
version = "0.2.150"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
[[package]]
name = "memchr"
version = "2.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
[[package]]
name = "proc-macro2"
version = "1.0.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
[[package]]
name = "ryu"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
[[package]]
name = "serde"
version = "1.0.193"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.193"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "syn"
version = "2.0.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "tree-sitter"
version = "0.20.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e747b1f9b7b931ed39a548c1fae149101497de3c1fc8d9e18c62c1a66c683d3d"
dependencies = [
"cc",
"regex",
]
[[package]]
name = "tree-sitter-python"
version = "0.20.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c93b1b1fbd0d399db3445f51fd3058e43d0b4dcff62ddbdb46e66550978aa5"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-ruby"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ac30cbb1560363ae76e1ccde543d6d99087421e228cc47afcec004b86bb711a"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "utf8parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "yeast"
version = "0.1.0"
dependencies = [
"clap",
"serde",
"serde_json",
"tree-sitter",
"tree-sitter-python",
"tree-sitter-ruby",
]

13
shared/yeast/Cargo.toml Normal file
View File

@@ -0,0 +1,13 @@
[package]
name = "yeast"
version = "0.1.0"
edition = "2021"
[dependencies]
clap = { version = "4.4.10", features = ["derive"] }
serde = { version = "1.0.193", features = ["derive"] }
serde_json = "1.0.108"
tree-sitter = "0.20.10"
tree-sitter-ruby = "0.20.0"
tree-sitter-python = "0.20.4"

85
shared/yeast/flake.lock generated Normal file
View File

@@ -0,0 +1,85 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1694529238,
"narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "ff7b65b44d01cf9ba6a71320833626af21126384",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1697730408,
"narHash": "sha256-Ww//zzukdTrwTrCUkaJA/NsaLEfUfQpWZXBdXBYfhak=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "ff0a5a776b56e0ca32d47a4a47695452ec7f7d80",
"type": "github"
},
"original": {
"owner": "nixos",
"ref": "nixpkgs-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs",
"rust-overlay": "rust-overlay"
}
},
"rust-overlay": {
"inputs": {
"flake-utils": [
"flake-utils"
],
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1697767917,
"narHash": "sha256-9+FjCVE1Y7iUKohBF43yD05KoQB+FPcw/XL2rlKkjqY=",
"owner": "oxalica",
"repo": "rust-overlay",
"rev": "679ea0878edc749f23516ea6d7ffa974c6304bf5",
"type": "github"
},
"original": {
"owner": "oxalica",
"repo": "rust-overlay",
"type": "github"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

31
shared/yeast/flake.nix Normal file
View File

@@ -0,0 +1,31 @@
{
description = "YEAST elaborates abstract syntax trees";
inputs = {
nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable";
flake-utils.url = "github:numtide/flake-utils";
rust-overlay = {
url = "github:oxalica/rust-overlay";
inputs.nixpkgs.follows = "nixpkgs";
inputs.flake-utils.follows = "flake-utils";
};
};
outputs = { self, nixpkgs, flake-utils, rust-overlay }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = nixpkgs.legacyPackages.${system};
rust = rust-overlay.packages.${system}.rust;
in
{
devShells.default = pkgs.mkShell {
buildInputs = with pkgs; [
pkgs.tree-sitter
rust
rust-analyzer
libiconv
];
};
});
}

View File

@@ -0,0 +1,26 @@
use clap::Parser;
#[derive(Parser)]
#[clap(name = "yeast", about = "yeast elaborates abstract syntax trees")]
struct Cli {
file: String,
#[clap(default_value = "ruby")]
language: String,
}
fn get_language(language: &str) -> tree_sitter::Language {
match language {
"ruby" => tree_sitter_ruby::language(),
"python" => tree_sitter_python::language(),
_ => panic!("Unsupported language: {}", language),
}
}
fn main() {
let args = Cli::parse();
let language = get_language(&args.language);
let source = std::fs::read_to_string(&args.file).unwrap();
let runner = yeast::Runner::new(language, vec![]);
let ast = runner.run(&source);
println!("{}", ast.print(&source, ast.get_root()));
}

View File

@@ -0,0 +1,92 @@
use std::collections::{BTreeMap, BTreeSet};
use crate::Id;
#[derive(Debug, Clone)]
pub struct Captures {
captures: BTreeMap<&'static str, Vec<Id>>,
}
impl Default for Captures {
fn default() -> Self {
Self::new()
}
}
impl Captures {
pub fn new() -> Self {
Captures {
captures: BTreeMap::new(),
}
}
pub fn get_var(&self, key: &str) -> Result<Id, String> {
let ids = self.captures.get(key);
if let Some(ids) = ids {
if ids.len() == 1 {
Ok(ids[0])
} else {
Err(format!(
"Variable {} has {} matches, use * to allow repetition",
key,
ids.len()
))
}
} else {
Err(format!("No variable named {}", key))
}
}
pub fn insert(&mut self, key: &'static str, id: Id) {
self.captures.entry(key).or_default().push(id);
}
pub fn map_captures(&mut self, kind: &str, f: &mut impl FnMut(Id) -> Id) {
if let Some(ids) = self.captures.get_mut(kind) {
for id in ids {
*id = f(*id);
}
}
}
pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) {
if let Some(from_ids) = self.captures.get(from) {
let new_values = from_ids.iter().copied().map(f).collect();
self.captures.insert(to, new_values);
}
}
pub fn merge(&mut self, other: &Captures) {
for (key, ids) in &other.captures {
self.captures.entry(key).or_default().extend(ids);
}
}
pub fn un_star<'a>(
&'a self,
children: &'a BTreeSet<&'static str>,
) -> Result<impl Iterator<Item = Captures> + 'a, String> {
let mut id_iter = children.iter();
if let Some(fst) = id_iter.next() {
let repeats = self
.captures
.get(fst)
.ok_or_else(|| format!("No variable named {}", fst))?
.len();
// TODO: better error on missing capture
if id_iter.any(|id| self.captures.get(id).map(Vec::len).unwrap_or(0) != repeats) {
return Err("Repeated captures must have the same number of matches".to_string());
}
Ok((0..repeats).map(move |iter| {
let mut new_vars: Captures = Captures::new();
for id in children {
let child_capture = self.captures.get(id).unwrap()[iter];
new_vars.captures.insert(id, vec![child_capture]);
}
new_vars
}))
} else {
Err("Repeated captures must have at least one capture".to_string())
}
}
}

View File

@@ -0,0 +1,8 @@
pub trait Cursor<'a, T, N, F> {
fn node(&self) -> &'a N;
fn field_id(&self) -> Option<F>;
fn field_name(&self) -> Option<&'static str>;
fn goto_first_child(&mut self) -> bool;
fn goto_next_sibling(&mut self) -> bool;
fn goto_parent(&mut self) -> bool;
}

575
shared/yeast/src/lib.rs Normal file
View File

@@ -0,0 +1,575 @@
use std::{collections::BTreeMap, mem};
use serde::Serialize;
use serde_json::{json, Value};
pub mod captures;
pub mod cursor;
pub mod print;
pub mod query;
mod range;
pub mod rules;
pub mod tree_builder;
mod visitor;
use captures::Captures;
pub use cursor::Cursor;
use query::QueryNode;
/// Node ids are indexes into the arena
type Id = usize;
/// Field and Kind ids are provided by tree-sitter
type FieldId = u16;
type KindId = u16;
pub const CHILD_FIELD: u16 = u16::MAX;
const CHILD_FIELD_NAME: &str = "child";
#[derive(Debug)]
pub struct AstCursor<'a> {
ast: &'a Ast,
/// A stack of parents, along with iterators for their children
parents: Vec<(&'a Node, ChildrenIter<'a>)>,
node: &'a Node,
}
impl<'a> AstCursor<'a> {
pub fn new(ast: &'a Ast) -> Self {
// TODO: handle non-zero root
let node = ast.get_node(ast.root).unwrap();
Self {
ast,
parents: vec![],
node,
}
}
fn goto_next_sibling_opt(&mut self) -> Option<()> {
self.node = self.parents.last_mut()?.1.next()?;
Some(())
}
fn goto_first_child_opt(&mut self) -> Option<()> {
let parent = self.node;
let mut children = ChildrenIter::new(self.ast, parent);
let first_child = children.next()?;
self.node = first_child;
self.parents.push((parent, children));
Some(())
}
fn goto_parent_opt(&mut self) -> Option<()> {
self.node = self.parents.pop()?.0;
Some(())
}
}
impl<'a> Cursor<'a, Ast, Node, FieldId> for AstCursor<'a> {
fn node(&self) -> &'a Node {
self.node
}
fn field_id(&self) -> Option<FieldId> {
let (_, children) = self.parents.last()?;
children.current_field()
}
fn field_name(&self) -> Option<&'static str> {
if self.field_id() == Some(CHILD_FIELD) {
None
} else {
self.field_id()
.and_then(|id| self.ast.field_name_for_id(id))
}
}
fn goto_first_child(&mut self) -> bool {
self.goto_first_child_opt().is_some()
}
fn goto_next_sibling(&mut self) -> bool {
self.goto_next_sibling_opt().is_some()
}
fn goto_parent(&mut self) -> bool {
self.goto_parent_opt().is_some()
}
}
/// An iterator over all the child nodes of a node.
#[derive(Debug)]
struct ChildrenIter<'a> {
ast: &'a Ast,
current_field: Option<FieldId>,
fields: std::collections::btree_map::Iter<'a, FieldId, Vec<Id>>,
field_children: Option<std::slice::Iter<'a, Id>>,
}
impl<'a> ChildrenIter<'a> {
fn new(ast: &'a Ast, node: &'a Node) -> Self {
Self {
ast,
current_field: None,
fields: node.fields.iter(),
field_children: None,
}
}
fn get_node(&self, id: Id) -> &'a Node {
self.ast.get_node(id).unwrap()
}
fn current_field(&self) -> Option<FieldId> {
self.current_field
}
}
impl<'a> Iterator for ChildrenIter<'a> {
type Item = &'a Node;
fn next(&mut self) -> Option<Self::Item> {
match self.field_children.as_mut() {
None => match self.fields.next() {
Some((field, children)) => {
self.current_field = Some(*field);
self.field_children = Some(children.iter());
self.next()
}
None => None,
},
Some(children) => match children.next() {
None => match self.fields.next() {
None => None,
Some((field, children)) => {
self.current_field = Some(*field);
self.field_children = Some(children.iter());
self.next()
}
},
Some(child_id) => Some(self.get_node(*child_id)),
},
}
}
}
/// Our AST
#[derive(PartialEq, Eq, Debug)]
pub struct Ast {
root: Id,
nodes: Vec<Node>,
language: tree_sitter::Language,
}
impl Ast {
/// Construct an AST from a TS tree
pub fn from_tree(language: tree_sitter::Language, tree: &tree_sitter::Tree) -> Self {
let mut visitor = visitor::Visitor::new(language);
visitor.visit(tree);
visitor.build()
}
pub fn walk(&self) -> AstCursor {
AstCursor::new(self)
}
pub fn nodes(&self) -> &[Node] {
&self.nodes
}
pub fn get_root(&self) -> Id {
self.root
}
pub fn set_root(&mut self, root: Id) {
self.root = root;
}
pub fn get_node(&self, id: Id) -> Option<&Node> {
self.nodes.get(id)
}
pub fn print(&self, source: &str, root_id: Id) -> Value {
let root = &self.nodes()[root_id];
self.print_node(root, source)
}
fn create_node(
&mut self,
kind: KindId,
content: NodeContent,
fields: BTreeMap<FieldId, Vec<Id>>,
is_named: bool,
) -> Id {
let id = self.nodes.len();
self.nodes.push(Node {
id,
kind,
kind_name: self.language.node_kind_for_id(kind).unwrap(),
fields,
content,
is_missing: false,
is_error: false,
is_extra: false,
is_named,
});
id
}
pub fn create_named_token(&mut self, kind: &'static str, content: String) -> Id {
let kind_id = self.language.id_for_node_kind(kind, true);
let id = self.nodes.len();
self.nodes.push(Node {
id,
kind: kind_id,
kind_name: kind,
is_named: true,
is_missing: false,
is_error: false,
is_extra: false,
fields: BTreeMap::new(),
content: NodeContent::DynamicString(content),
});
id
}
fn field_name_for_id(&self, id: FieldId) -> Option<&'static str> {
if id == CHILD_FIELD {
Some(CHILD_FIELD_NAME)
} else {
self.language.field_name_for_id(id)
}
}
fn field_id_for_name(&self, name: &str) -> Option<FieldId> {
if name == CHILD_FIELD_NAME {
Some(CHILD_FIELD)
} else {
self.language.field_id_for_name(name)
}
}
/// Print a node for debugging
fn print_node(&self, node: &Node, source: &str) -> Value {
let fields: BTreeMap<&'static str, Vec<Value>> = node
.fields
.iter()
.map(|(field_id, nodes)| {
let field_name = if field_id == &CHILD_FIELD {
"rest"
} else {
self.field_name_for_id(*field_id).unwrap()
};
let nodes: Vec<Value> = nodes
.iter()
.map(|id| self.print_node(self.get_node(*id).unwrap(), source))
.collect();
(field_name, nodes)
})
.collect();
let mut value = BTreeMap::new();
let kind = self.language.node_kind_for_id(node.kind).unwrap();
let content = match &node.content {
NodeContent::Range(range) => {
let len = range.end_byte - range.start_byte;
let end = range.start_byte + len;
source.as_bytes()[range.start_byte..end]
.iter()
.map(|b| *b as char)
.collect()
}
NodeContent::String(s) => s.to_string(),
NodeContent::DynamicString(s) => s.clone(),
};
if fields.is_empty() {
value.insert(kind, json!(content));
} else {
let mut fields: BTreeMap<_, _> =
fields.into_iter().map(|(k, v)| (k, json!(v))).collect();
fields.insert("content", json!(content));
value.insert(kind, json!(fields));
}
json!(value)
}
/// Return an example AST, for testing and to fill implementation gaps
pub fn example(language: tree_sitter::Language) -> Self {
// x = 1
Self {
root: 0,
language,
nodes: vec![
// assignment
Node {
id: 0,
kind: 276,
kind_name: "assignment",
fields: {
let mut map = BTreeMap::new();
map.insert(18, vec![1]);
map.insert(28, vec![3]);
map
},
content: NodeContent::String("x = 1"),
is_missing: false,
is_error: false,
is_extra: false,
is_named: true,
},
// identifier
Node {
id: 1,
kind: 1,
kind_name: "identifier",
fields: BTreeMap::new(),
content: NodeContent::String("x"),
is_missing: false,
is_error: false,
is_extra: false,
is_named: true,
},
// "="
Node {
id: 2,
kind: 17,
kind_name: "=",
fields: BTreeMap::new(),
content: NodeContent::String("="),
is_missing: false,
is_error: false,
is_extra: false,
is_named: false,
},
// integer
Node {
id: 3,
kind: 110,
kind_name: "integer",
fields: BTreeMap::new(),
content: NodeContent::String("1"),
is_missing: false,
is_error: false,
is_extra: false,
is_named: true,
},
],
}
}
fn id_for_node_kind(&self, kind: &str) -> Option<KindId> {
let id = self.language.id_for_node_kind(kind, true);
if id == 0 {
None
} else {
Some(id)
}
}
fn id_for_unnamed_node_kind(&self, kind: &str) -> Option<KindId> {
let id = self.language.id_for_node_kind(kind, false);
if id == 0 {
None
} else {
Some(id)
}
}
}
/// A node in our AST
#[derive(PartialEq, Eq, Debug, Clone, Serialize)]
pub struct Node {
id: Id,
kind: KindId,
kind_name: &'static str,
fields: BTreeMap<FieldId, Vec<Id>>,
content: NodeContent,
is_named: bool,
is_missing: bool,
is_extra: bool,
is_error: bool,
}
impl Node {
pub fn id(&self) -> Id {
self.id
}
pub fn kind(&self) -> &'static str {
self.kind_name
}
pub fn is_named(&self) -> bool {
self.is_named
}
pub fn is_missing(&self) -> bool {
self.is_missing
}
pub fn is_extra(&self) -> bool {
self.is_extra
}
pub fn is_error(&self) -> bool {
self.is_error
}
fn fake_range(&self) -> tree_sitter::Range {
tree_sitter::Range {
start_byte: 0,
end_byte: 0,
start_point: tree_sitter::Point { row: 0, column: 0 },
end_point: tree_sitter::Point { row: 0, column: 0 },
}
}
fn fake_point(&self) -> tree_sitter::Point {
tree_sitter::Point { row: 0, column: 0 }
}
pub fn start_position(&self) -> tree_sitter::Point {
match self.content {
NodeContent::Range(range) => range.start_point,
_ => self.fake_point(),
}
}
pub fn end_position(&self) -> tree_sitter::Point {
match self.content {
NodeContent::Range(range) => range.end_point,
_ => self.fake_point(),
}
}
pub fn start_byte(&self) -> usize {
match self.content {
NodeContent::Range(range) => range.start_byte,
_ => 0,
}
}
pub fn end_byte(&self) -> usize {
match self.content {
NodeContent::Range(range) => range.end_byte,
_ => 0,
}
}
pub fn byte_range(&self) -> std::ops::Range<usize> {
self.start_byte()..self.end_byte()
}
pub fn opt_string_content(&self) -> Option<String> {
match &self.content {
NodeContent::Range(_range) => None,
NodeContent::String(s) => Some(s.to_string()),
NodeContent::DynamicString(s) => Some(s.to_string()),
}
}
}
/// The contents of a node is either a range in the original source file,
/// or a new string if the node is synthesized.
#[derive(PartialEq, Eq, Debug, Clone, Serialize)]
pub enum NodeContent {
Range(#[serde(with = "range::Range")] tree_sitter::Range),
String(&'static str),
DynamicString(String),
}
impl From<&'static str> for NodeContent {
fn from(value: &'static str) -> Self {
NodeContent::String(value)
}
}
impl From<tree_sitter::Range> for NodeContent {
fn from(value: tree_sitter::Range) -> Self {
NodeContent::Range(value)
}
}
pub struct Rule {
query: QueryNode,
transform: Box<dyn Fn(&mut Ast, Captures) -> Vec<Id>>,
}
impl Rule {
pub fn new(query: QueryNode, transform: Box<dyn Fn(&mut Ast, Captures) -> Vec<Id>>) -> Self {
Self { query, transform }
}
fn try_rule(&self, ast: &mut Ast, node: Id) -> Option<Vec<Id>> {
let mut captures = Captures::new();
if self.query.do_match(ast, node, &mut captures).unwrap() {
Some((self.transform)(ast, captures))
} else {
None
}
}
}
fn apply_rules(rules: &Vec<Rule>, ast: &mut Ast, id: Id) -> Vec<Id> {
// apply the transformation rules on this node
for rule in rules {
if let Some(result_node) = rule.try_rule(ast, id) {
// We transformed it so now recurse into the result
return result_node
.iter()
.flat_map(|node| apply_rules(rules, ast, *node))
.collect();
}
}
// copy the current node
let mut node = ast.nodes[id].clone();
// recursively descend into all the fields
for vec in node.fields.values_mut() {
let mut old = Vec::new();
mem::swap(vec, &mut old);
*vec = old
.iter()
.flat_map(|node| apply_rules(rules, ast, *node))
.collect();
}
node.id = ast.nodes.len();
ast.nodes.push(node);
vec![ast.nodes.len() - 1]
}
pub struct Runner {
language: tree_sitter::Language,
rules: Vec<Rule>,
}
impl Runner {
pub fn new(language: tree_sitter::Language, rules: Vec<Rule>) -> Self {
Self { language, rules }
}
pub fn run_from_tree(&self, tree: &tree_sitter::Tree) -> Ast {
let mut ast = Ast::from_tree(self.language, tree);
let res = apply_rules(&self.rules, &mut ast, 0);
if res.len() != 1 {
panic!("Expected at exactly one result node, got {}", res.len());
}
ast.set_root(res[0]);
ast
}
pub fn run(&self, input: &str) -> Ast {
// Parse the input into an AST
let mut parser = tree_sitter::Parser::new();
parser.set_language(self.language).unwrap();
let tree = parser.parse(input, None).unwrap();
let mut ast = Ast::from_tree(self.language, &tree);
let res = apply_rules(&self.rules, &mut ast, 0);
if res.len() != 1 {
panic!("Expected at exactly one result node, got {}", res.len());
}
ast.set_root(res[0]);
ast
}
}

34
shared/yeast/src/print.rs Normal file
View File

@@ -0,0 +1,34 @@
use crate::{cursor::Cursor, AstCursor, Node};
pub struct Printer {}
impl Printer {
pub fn visit(&mut self, mut cursor: AstCursor<'_>) {
self.enter_node(cursor.node());
let mut recurse = true;
loop {
if recurse && cursor.goto_first_child() {
recurse = self.enter_node(cursor.node());
} else {
self.leave_node(cursor.node());
if cursor.goto_next_sibling() {
recurse = self.enter_node(cursor.node());
} else if cursor.goto_parent() {
recurse = false;
} else {
break;
}
}
}
}
pub fn enter_node(&mut self, node: &Node) -> bool {
println!("enter_node: {:?}", node);
true
}
pub fn leave_node(&mut self, node: &Node) -> bool {
println!("leave_node: {:?}", node);
true
}
}

243
shared/yeast/src/query.rs Normal file
View File

@@ -0,0 +1,243 @@
use crate::{captures::Captures, Ast, Id};
#[derive(Debug, Clone)]
pub enum QueryNode {
Any(),
Node {
kind: &'static str,
children: Vec<(&'static str, Vec<QueryListElem>)>,
},
UnnamedNode {
kind: &'static str,
},
Capture {
capture: &'static str,
node: Box<QueryNode>,
},
}
#[derive(Debug, Clone)]
pub enum QueryListElem {
Repeated { children: Vec<QueryListElem>, rep: Rep },
SingleNode(QueryNode),
}
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum Rep {
ZeroOrMore,
OneOrMore,
ZeroOrOne,
}
impl QueryNode {
pub fn do_match(&self, ast: &Ast, node: Id, matches: &mut Captures) -> Result<bool, String> {
match self {
QueryNode::Any() => Ok(true),
QueryNode::Node { kind, children } => {
let node = ast.get_node(node).unwrap();
let target_kind = ast.id_for_node_kind(kind).ok_or_else(|| {
format!("Node kind {} not found in language", kind)
})?;
if node.kind != target_kind {
return Ok(false);
}
for (field, field_children) in children {
let field_id = ast
.field_id_for_name(field)
.ok_or_else(|| format!("Field {} not found in language", field))?;
let empty = Vec::new();
let mut child_iter = node
.fields
.get(&field_id)
.unwrap_or(&empty)
.iter()
.cloned();
if !match_children(field_children.iter(), ast, &mut child_iter, matches)? {
return Ok(false);
}
}
Ok(true)
}
QueryNode::UnnamedNode { kind } => {
let node = ast.get_node(node).unwrap();
let target_kind = ast.id_for_unnamed_node_kind(kind).ok_or_else(|| {
format!("unnamed Node kind {} not found in language", kind)
})?;
Ok(node.kind == target_kind)
}
QueryNode::Capture {
capture,
node: sub_query,
} => {
matches.insert(capture, node);
sub_query.do_match(ast, node, matches)
}
}
}
}
fn match_children<'a>(
child_matchers: impl Iterator<Item = &'a QueryListElem>,
ast: &Ast,
remaining_children: &mut (impl Iterator<Item = Id> + Clone),
matches: &mut Captures,
) -> Result<bool, String> {
for child in child_matchers {
if !child.do_match(ast, remaining_children, matches)? {
return Ok(false);
}
}
Ok(true)
}
impl QueryListElem {
fn do_match(
&self,
ast: &Ast,
remaining_children: &mut (impl Iterator<Item = Id> + Clone),
matches: &mut Captures,
) -> Result<bool, String> {
match self {
QueryListElem::Repeated { children, rep } => {
let mut iters = 0;
loop {
let matches_initial = matches.clone();
let start = remaining_children.clone();
if !match_children(children.iter(), ast, remaining_children, matches)? {
// Reset the state
*remaining_children = start;
*matches = matches_initial;
break;
}
iters += 1;
if *rep == Rep::ZeroOrOne {
break;
}
}
if *rep == Rep::OneOrMore && iters == 0 {
// We didn't match any children but we were supposed to
Ok(false)
} else {
Ok(true)
}
}
QueryListElem::SingleNode(sub_query) => {
if let Some(child) = remaining_children.next() {
sub_query.do_match(ast, child, matches)
} else {
Ok(false)
}
}
}
}
}
#[macro_export]
macro_rules! query {
// _
(_) => { $crate::query::QueryNode::Any()};
// Parens
(($($child:tt)*)) => { query!($($child)*)};
// Match a node of a given kind
($node_id:ident $($rest:tt)*) => { $crate::query::QueryNode::Node{ kind: stringify!($node_id), children: query_fields!($($rest)*)}};
// Match an unamed node of a given kind (using a string literal)
($node_id:literal) => { $crate::query::QueryNode::UnnamedNode{ kind: $node_id}};
// Capture
($child:tt @ $capture_id:ident) => { $crate::query::QueryNode::Capture{ capture: stringify!($capture_id), node: Box::new(query!($child))}};
// Capture only (implicit _)
(@ $capture_id:ident) => { $crate::query::QueryNode::Capture{ capture: stringify!($capture_id), node: Box::new($crate::query::QueryNode::Any())}};
}
// We use an accumulator to build up the list of children incrementally so this starts the tail recursion
#[macro_export]
macro_rules! query_list {
($($rest:tt)*) => { _query_list!( @ACC [] $($rest)* )};
}
#[macro_export]
macro_rules! query_fields {
($($rest:tt)*) => { _query_fields!( @ACC [] $($rest)* )};
}
#[macro_export]
macro_rules! _query_fields {
// vec! allows a trailing comma so we assume that either the accumulator is empty or`ends in a comma
// Base case: no more tokens, so return the accumulator
(@ACC [$($acc:tt)*]) => { vec![$($acc)*]};
// Parse field * : (nodeList)
(@ACC [$($acc:tt)*] $field_name:ident * : ($($sub_node:tt)*) $($rest:tt)*) => { _query_fields!( @ACC [ $($acc)* (stringify!($field_name), query_list!($($sub_node)*)),] $($rest)*)};
// Parse field : node
(@ACC [$($acc:tt)*] $field_name:ident : $sub_node:tt $($rest:tt)*) => { _query_fields!( @ACC [ $($acc)* (stringify!($field_name), vec![$crate::query::QueryListElem::SingleNode(query!($sub_node))]),] $($rest)* )};
}
#[macro_export]
macro_rules! _query_list {
// vec! allows a trailing comma so we assume that either the accumulator is empty or`ends in a comma
// Base case: no more tokens, so return the accumulator
(@ACC [$($acc:tt)*]) => { vec![$($acc)*]};
// Parse (nodeList)*
(@ACC [$($acc:tt)*] ($($sub_node:tt)*) * $($rest:tt)*) => { _query_list!( @ACC [ $($acc)* $crate::query::QueryListElem::Repeated{children: query_list!($($sub_node)*), rep: $crate::query::Rep::ZeroOrMore},] $($rest)*)};
// Parse (nodeList)+
(@ACC [$($acc:tt)*] ($($sub_node:tt)*) + $($rest:tt)*) => { _query_list!( @ACC [ $($acc)* $crate::query::QueryListElem::Repeated{children: query_list!($($sub_node)*), rep: $crate::query::Rep::OneOrMore},] $($rest)*)};
// Parse (nodeList)?
(@ACC [$($acc:tt)*] ($($sub_node:tt)*) ? $($rest:tt)*) => { _query_list!( @ACC [ $($acc)* $crate::query::QueryListElem::Repeated{children: query_list!($($sub_node)*), rep: $crate::query::Rep::ZeroOrOne},] $($rest)*)};
// Parse node (treating @cap as a single node)
(@ACC [$($acc:tt)*] @ $sub_node:tt $($rest:tt)*) => { _query_list!( @ACC [ $($acc)* $crate::query::QueryListElem::SingleNode(query!(@$sub_node)),] $($rest)*)};
// Parse node (this must be last as it only applies if the earlier cases don't match)
(@ACC [$($acc:tt)*] $sub_node:tt $($rest:tt)*) => { _query_list!( @ACC [ $($acc)* $crate::query::QueryListElem::SingleNode(query!($sub_node)),] $($rest)*)};
}
pub use query;
pub use query_list;
#[cfg(test)]
mod tests {
use crate::query::*;
#[test]
fn it_works() {
let query1: QueryNode = query!(_);
println!("{:?}", query1);
let query2 = query!(foo);
println!("{:?}", query2);
let query3 = query!(foo child: (_));
println!("{:?}", query3);
let query4 = query!(foo child*:((_)*));
println!("{:?}", query4);
let query5: QueryNode = query!(foo child*:((_)*));
println!("{:?}", query5);
let query6: QueryNode = query!(_ @ bar);
println!("{:?}", query6);
let query7: QueryNode = query!(foo child:(_ @ bar));
println!("{:?}", query7);
let query7: QueryNode = query!(foo child:(@ bar));
println!("{:?}", query7);
let query8: QueryNode = query!((assignment
left: (element_reference
object: (@ obj)
child: (_ @ index)
)
right: (_ @ rhs)
));
println!("{:?}", query8);
let query9: QueryNode = query!((assignment
left: (element_reference
object * : ((@ obj)*)
child: (_ @ index)
)
right: (_ @ rhs)
));
println!("{:?}", query9);
let query10 = query!(
program
child: (assignment
left: (@left)
right: (@right))
);
println!("{:?}", query10);
}
}

21
shared/yeast/src/range.rs Normal file
View File

@@ -0,0 +1,21 @@
//! (de)-serialize helpers for tree_sitter::Range
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize)]
#[serde(remote = "tree_sitter::Point")]
pub struct Point {
pub row: usize,
pub column: usize,
}
#[derive(Serialize, Deserialize)]
#[serde(remote = "tree_sitter::Range")]
pub struct Range {
pub start_byte: usize,
pub end_byte: usize,
#[serde(with = "Point")]
pub start_point: tree_sitter::Point,
#[serde(with = "Point")]
pub end_point: tree_sitter::Point,
}

133
shared/yeast/src/rules.rs Normal file
View File

@@ -0,0 +1,133 @@
use std::cell::Cell;
use std::rc::Rc;
use crate::{captures::Captures, *};
pub fn rules() -> Vec<Rule> {
let fresh_ids = Rc::new(Cell::new(0));
let fresh_ids2: Rc<Cell<i32>> = fresh_ids.clone();
let assign_query = query!(
(assignment
left: (
left_assignment_list child*: ((((identifier) @ left) (",")?)*)
)
right: (@right)
)
);
let assign_transform = move |ast: &mut Ast, mut match_: Captures| {
println!("match: {:?}", match_);
let fresh = fresh_ids.get();
fresh_ids.set(fresh + 1);
let new_ident = format!("tmp-{}", fresh);
match_.insert(
"tmp_lhs",
ast.create_named_token("identifier", new_ident.clone()),
);
let mut i = 0;
match_.map_captures_to("left", "assigns", &mut |old_id| {
let mut local_capture = Captures::new();
local_capture.insert("lhs", old_id);
local_capture.insert(
"tmp",
ast.create_named_token("identifier", new_ident.clone()),
);
let index: i32 = i;
i += 1;
local_capture.insert(
"index",
ast.create_named_token("integer", index.to_string()),
);
tree_builder!(
(assignment
left: (@lhs)
right: (
element_reference
object: (@tmp)
child: (@index)
)
)
)
.build_tree(ast, &local_capture)
.unwrap()
});
// construct the new tree here maybe
// captures is probably a HashMap from capture name to AST node
trees_builder!(
(assignment
left: (@tmp_lhs)
right: (@right)
)
(
@assigns
)*
)
.build_trees(ast, &match_)
.unwrap()
};
let assign_rule = Rule::new(assign_query, Box::new(assign_transform));
// TODO: There is a spurious end token
let for_query = query!(
(for
pattern: (@pat)
value: (in child*: ("in" @val))
body: (do child*: (("do")? (@body)*))
)
);
let for_transform = move |ast: &mut Ast, mut match_: Captures| {
let fresh = fresh_ids2.get();
fresh_ids2.set(fresh + 1);
let new_ident = format!("tmp-{}", fresh);
match_.insert(
"tmp_rhs",
ast.create_named_token("identifier", new_ident.clone()),
);
match_.insert(
"tmp_param",
ast.create_named_token("identifier", new_ident.clone()),
);
match_.insert(
"each",
ast.create_named_token("identifier", "each".to_string()),
);
trees_builder!(
(call
receiver: (@val)
method: (@each)
block: (block
parameters: (
block_parameters
child: (@tmp_param)
)
body: (block_body
child*: (
(assignment
left: (@pat)
right: (@tmp_rhs)
)
(@body)*
)
)
)
)
)
.build_trees(ast, &match_)
.unwrap()
};
let for_rule = Rule::new(for_query, Box::new(for_transform));
// Just get rid of all end tokens as they aren't needed
let end_query = query!(("end"));
let end_transform = |_ast: &mut Ast, _match: Captures| vec![];
let end_rule = Rule::new(end_query, Box::new(end_transform));
vec![assign_rule, for_rule, end_rule]
}

View File

@@ -0,0 +1,170 @@
use crate::{captures::Captures, Ast, Id};
use std::collections::BTreeSet;
#[derive(Debug, Clone)]
pub enum TreeBuilder {
Node {
kind: &'static str,
children: Vec<(&'static str, Vec<TreeChildBuilder>)>,
},
Capture {
capture: &'static str,
},
}
#[derive(Debug, Clone)]
pub enum TreeChildBuilder {
Repeated {
child: TreeBuilder,
},
SingleNode(TreeBuilder),
}
impl TreeChildBuilder {
fn get_opt_contained(&self) -> BTreeSet<&'static str> {
match self {
TreeChildBuilder::Repeated { child } => child.get_opt_contained(),
TreeChildBuilder::SingleNode(node) => node.get_opt_contained(),
}
}
fn build_tree(
&self,
target: &mut Ast,
vars: &Captures,
child_ids: &mut Vec<Id>,
) -> Result<(), String> {
match self {
TreeChildBuilder::Repeated { child } => {
let repeated_ids = self.get_opt_contained();
for sub_captures in vars.un_star(&repeated_ids)? {
child_ids.push(child.build_tree(target, &sub_captures)?)
}
Ok(())
}
TreeChildBuilder::SingleNode(node) => {
child_ids.push(node.build_tree(target, vars)?);
Ok(())
}
}
}
}
impl TreeBuilder {
fn get_opt_contained(&self) -> BTreeSet<&'static str> {
match self {
TreeBuilder::Node { kind: _, children } => {
let mut contained = BTreeSet::new();
for (_, children) in children {
for child in children {
contained.extend(child.get_opt_contained());
}
}
contained
}
TreeBuilder::Capture { capture } => {
let mut contained = BTreeSet::new();
contained.insert(*capture);
contained
}
}
}
pub fn build_tree(&self, target: &mut Ast, vars: &Captures) -> Result<Id, String> {
match self {
TreeBuilder::Capture { capture } => vars.get_var(capture),
TreeBuilder::Node { kind, children } => {
let ast_kind = target.id_for_node_kind(kind).ok_or_else(||
format!("Node kind {} does not exist in language", kind)
)?;
let child_vars = children.iter().map(|(field, children)| {
let mut child_ids = Vec::new();
for child in children {
child.build_tree(target, vars, &mut child_ids)?;
}
let field_id = target
.field_id_for_name(field)
.ok_or(format!("Field {} does not exist in language", field))?;
Ok((field_id, child_ids))
}).collect::<Result<_,String>>()?;
Ok(target.create_node(ast_kind, "".into(), child_vars, true))
}
}
}
}
#[macro_export]
macro_rules! tree_builder {
(($($child:tt)*)) => { tree_builder!($($child)*)};
// Match a node of a given kind
($node_id:ident $($rest:tt)*) => { $crate::tree_builder::TreeBuilder::Node{ kind: stringify!($node_id), children: tree_builder_fields!($($rest)*)}};
// Capture only (implicit _)
(@ $capture_id:ident) => { $crate::tree_builder::TreeBuilder::Capture{ capture: stringify!($capture_id)}};
}
// We use an accumulator to build up the list of children incrementally so this starts the tail recursion
#[macro_export]
macro_rules! tree_builder_child {
() => { Vec::new()};
($($rest:tt)*) => { _tree_builder_child!( @ACC [] $($rest)* )};
}
#[macro_export]
macro_rules! _tree_builder_child {
// vec! allows a trailing comma so we assume that either the accumulator is empty or`ends in a comma
// Base case: no more tokens, so return the accumulator
(@ACC [$($acc:tt)*]) => { vec![$($acc)*]};
// Parse field* : node
(@ACC [$($acc:tt)*] $field_name:ident * : ($($sub_node:tt)*) $($rest:tt)*) => { _tree_builder_child!( @ACC [ $($acc)* $crate::tree_builder::TreeChildBuilder::Field{field_name: stringify!($field_name), node: tree_builder_child!($($sub_node)*)},] $($rest)*)};
// Parse field : node
(@ACC [$($acc:tt)*] $field_name:ident : $sub_node:tt $($rest:tt)*) => { _tree_builder_child!( @ACC [ $($acc)* $crate::tree_builder::TreeChildBuilder::Field{field_name: stringify!($field_name), node: vec![$crate::tree_builder::TreeChildBuilder::SingleNode(tree_builder!($sub_node))]},] $($rest)*)};
// Parse (node)*
(@ACC [$($acc:tt)*] $sub_node:tt * $($rest:tt)*) => { _tree_builder_child!( @ACC [ $($acc)* $crate::tree_builder::TreeChildBuilder::Repeated{child: tree_builder!($sub_node)},] $($rest)*)};
// Parse node (this must be last as it only applies if the earlier cases don't match)
(@ACC [$($acc:tt)*] $sub_node:tt $($rest:tt)*) => { _tree_builder_child!( @ACC [ $($acc)* $crate::tree_builder::TreeChildBuilder::SingleNode(tree_builder!($sub_node)),] $($rest)*)};
}
#[macro_export]
macro_rules! _tree_builder_fields {
// vec! allows a trailing comma so we assume that either the accumulator is empty or`ends in a comma
// Base case: no more tokens, so return the accumulator
(@ACC [$($acc:tt)*]) => { vec![$($acc)*]};
// Parse field* : node
(@ACC [$($acc:tt)*] $field_name:ident * : ($($sub_node:tt)*) $($rest:tt)*) => { _tree_builder_fields!( @ACC [ $($acc)* (stringify!($field_name), tree_builder_child!($($sub_node)*)),] $($rest)*)};
// Parse field : node
(@ACC [$($acc:tt)*] $field_name:ident : $sub_node:tt $($rest:tt)*) => { _tree_builder_fields!( @ACC [ $($acc)* (stringify!($field_name), vec![$crate::tree_builder::TreeChildBuilder::SingleNode(tree_builder!($sub_node))]),] $($rest)*)};
}
#[macro_export]
macro_rules! tree_builder_fields {
($($all:tt)*) => { _tree_builder_fields!( @ACC [] $($all)*)};
}
pub struct TreesBuilder {
pub children: Vec<TreeChildBuilder>,
}
impl TreesBuilder {
pub fn build_trees(&self, target: &mut Ast, vars: &Captures) -> Result<Vec<Id>, String> {
let mut child_ids = Vec::new();
for child in &self.children {
child.build_tree(target, vars, &mut child_ids)?;
}
Ok(child_ids)
}
}
#[macro_export]
macro_rules! trees_builder {
() => { $crate::tree_builder::TreesBuilder { children: Vec::new()}};
($($rest:tt)*) => {$crate::tree_builder::TreesBuilder { children: _tree_builder_child!( @ACC [] $($rest)* )}};
}
pub use tree_builder;
pub use tree_builder_child;
pub use trees_builder;

110
shared/yeast/src/visitor.rs Normal file
View File

@@ -0,0 +1,110 @@
use std::collections::BTreeMap;
use tree_sitter::{Language, Tree};
use crate::{Ast, Id, Node, NodeContent, CHILD_FIELD};
#[derive(Debug)]
struct VisitorNode {
inner: Node,
parent: Option<Id>,
}
/// A type that can walk a TS tree and produce an `Ast`.
#[derive(Debug)]
pub(crate) struct Visitor {
nodes: Vec<VisitorNode>,
current: Option<Id>,
language: Language,
}
impl Visitor {
pub fn new(language: Language) -> Self {
Self {
nodes: Vec::new(),
current: None,
language,
}
}
pub fn visit(&mut self, tree: &Tree) {
let cursor = &mut tree.walk();
self.enter_node(cursor.node());
let mut recurse = true;
loop {
if recurse && cursor.goto_first_child() {
recurse = self.enter_node(cursor.node());
} else {
self.leave_node(cursor.field_name(), cursor.node());
if cursor.goto_next_sibling() {
recurse = self.enter_node(cursor.node());
} else if cursor.goto_parent() {
recurse = false;
} else {
break;
}
}
}
}
pub fn build(self) -> Ast {
Ast {
root: self.nodes[0].inner.id, // this is likely always just 0
language: self.language,
nodes: self.nodes.into_iter().map(|n| n.inner).collect(),
}
}
fn add_node(&mut self, n: tree_sitter::Node<'_>, content: NodeContent, is_named: bool) -> Id {
let id = self.nodes.len();
self.nodes.push(VisitorNode {
inner: Node {
id,
kind: self.language.id_for_node_kind(n.kind(), is_named),
kind_name: n.kind(),
content,
fields: BTreeMap::new(),
is_missing: n.is_missing(),
is_named: n.is_named(),
is_extra: n.is_extra(),
is_error: n.is_error(),
},
parent: self.current,
});
id
}
fn enter_node(&mut self, node: tree_sitter::Node<'_>) -> bool {
let id = self.add_node(node, node.range().into(), node.is_named());
self.current = Some(id);
true
}
fn leave_node(&mut self, field_name: Option<&'static str>, _node: tree_sitter::Node<'_>) {
let node = self.current.map(|i| &self.nodes[i]).unwrap();
let node_id = node.inner.id;
let node_parent = node.parent;
if let Some(parent_id) = node.parent {
let parent = self.nodes.get_mut(parent_id).unwrap();
if let Some(field) = field_name {
let field_id = self.language.field_id_for_name(field).unwrap();
parent
.inner
.fields
.entry(field_id)
.or_default()
.push(node_id);
} else {
parent
.inner
.fields
.entry(CHILD_FIELD)
.or_default()
.push(node_id);
}
}
self.current = node_parent;
}
}

View File

@@ -0,0 +1,68 @@
{
"program": {
"content": "x, y, z = foo()\n",
"rest": [
{
"assignment": {
"content": "x, y, z = foo()",
"left": [
{
"left_assignment_list": {
"content": "x, y, z",
"rest": [
{
"identifier": "x"
},
{
",": ","
},
{
"identifier": "y"
},
{
",": ","
},
{
"identifier": "z"
}
]
}
}
],
"rest": [
{
"=": "="
}
],
"right": [
{
"call": {
"arguments": [
{
"argument_list": {
"content": "()",
"rest": [
{
"(": "("
},
{
")": ")"
}
]
}
}
],
"content": "foo()",
"method": [
{
"identifier": "foo"
}
]
}
}
]
}
}
]
}
}

1
shared/yeast/tests/fixtures/1.rb vendored Normal file
View File

@@ -0,0 +1 @@
x, y, z = foo()

View File

@@ -0,0 +1,68 @@
{
"program": {
"content": "",
"rest": [
{
"assignment": {
"content": "",
"left": [
{
"call": {
"arguments": [
{
"argument_list": {
"content": "()",
"rest": [
{
"(": "("
},
{
")": ")"
}
]
}
}
],
"content": "foo()",
"method": [
{
"identifier": "foo"
}
]
}
}
],
"rest": [
{
"=": "="
}
],
"right": [
{
"left_assignment_list": {
"content": "x, y, z",
"rest": [
{
"identifier": "x"
},
{
",": ","
},
{
"identifier": "y"
},
{
",": ","
},
{
"identifier": "z"
}
]
}
}
]
}
}
]
}
}

View File

@@ -0,0 +1,119 @@
{
"program": {
"content": "for a, b in pairs_list do\n call(a, b)\n a+=b\nend",
"rest": [
{
"for": {
"body": [
{
"do": {
"content": "do\n call(a, b)\n a+=b\nend",
"rest": [
{
"do": "do"
},
{
"call": {
"arguments": [
{
"argument_list": {
"content": "(a, b)",
"rest": [
{
"(": "("
},
{
"identifier": "a"
},
{
",": ","
},
{
"identifier": "b"
},
{
")": ")"
}
]
}
}
],
"content": "call(a, b)",
"method": [
{
"identifier": "call"
}
]
}
},
{
"operator_assignment": {
"content": "a+=b",
"left": [
{
"identifier": "a"
}
],
"operator": [
{
"+=": "+="
}
],
"right": [
{
"identifier": "b"
}
]
}
},
{
"end": "end"
}
]
}
}
],
"content": "for a, b in pairs_list do\n call(a, b)\n a+=b\nend",
"pattern": [
{
"left_assignment_list": {
"content": "a, b",
"rest": [
{
"identifier": "a"
},
{
",": ","
},
{
"identifier": "b"
}
]
}
}
],
"rest": [
{
"for": "for"
}
],
"value": [
{
"in": {
"content": "in pairs_list",
"rest": [
{
"in": "in"
},
{
"identifier": "pairs_list"
}
]
}
}
]
}
}
]
}
}

View File

@@ -0,0 +1,180 @@
{
"program": {
"content": "for a, b in pairs_list do\n call(a, b)\n a+=b\nend",
"rest": [
{
"call": {
"block": [
{
"block": {
"content": "",
"parameters": [
{
"block_parameters": {
"content": "",
"rest": [
{
"identifier": "tmp-0"
}
]
}
}
],
"rest": [
{
"assignment": {
"content": "",
"left": [
{
"identifier": "tmp-1"
}
],
"right": [
{
"identifier": "tmp-0"
}
]
}
},
{
"assignment": {
"content": "",
"left": [
{
"identifier": {
"content": "",
"rest": [
{
"identifier": "a"
}
]
}
}
],
"right": [
{
"element_reference": {
"content": "",
"left": [
{
"identifier": "tmp-1"
}
],
"right": [
{
"integer": "0"
}
]
}
}
]
}
},
{
"assignment": {
"content": "",
"left": [
{
"identifier": {
"content": "",
"rest": [
{
"identifier": "b"
}
]
}
}
],
"right": [
{
"element_reference": {
"content": "",
"left": [
{
"identifier": "tmp-1"
}
],
"right": [
{
"integer": "1"
}
]
}
}
]
}
},
{
"call": {
"arguments": [
{
"argument_list": {
"content": "(a, b)",
"rest": [
{
"(": "("
},
{
"identifier": "a"
},
{
",": ","
},
{
"identifier": "b"
},
{
")": ")"
}
]
}
}
],
"content": "call(a, b)",
"method": [
{
"identifier": "call"
}
]
}
},
{
"operator_assignment": {
"content": "a+=b",
"left": [
{
"identifier": "a"
}
],
"operator": [
{
"+=": "+="
}
],
"right": [
{
"identifier": "b"
}
]
}
}
]
}
}
],
"content": "",
"method": [
{
"identifier": "each"
}
],
"receiver": [
{
"identifier": "pairs_list"
}
]
}
}
]
}
}

160
shared/yeast/tests/test.rs Normal file
View File

@@ -0,0 +1,160 @@
#![cfg(test)]
use std::cell::Cell;
use std::fs::read_to_string;
use std::path::Path;
use std::rc::Rc;
use yeast::{captures::Captures, print::Printer, *, rules::rules};
#[test]
fn test_ruby_multiple_assignment() {
// We want to convert this
//
// x, y, z = e
//
// into this
//
// tmp = e
// x = tmp[0]
// y = tmp[1]
// z = tmp[2]
// Define a desugaring rule, which is a query together with a transformation.
let input = "for a, b in pairs_list do\n x=y\nend";
// Construct the thing that runs our desugaring process
let runner = Runner::new(
tree_sitter_ruby::language(),
rules(),
);
let old_root = 0;
// Run it on our example
let ast = runner.run(input);
let new_root = ast.get_root();
let formattedInput = serde_json::to_string_pretty(&ast.print(input, old_root)).unwrap();
let formattedOutput = serde_json::to_string_pretty(&ast.print(input, new_root)).unwrap();
println!("before transformation: {}", formattedInput);
println!("after transformation: {}", formattedOutput);
assert_eq!(
formattedInput,
read_to_string("tests/fixtures/multiple_assignment.input.json").unwrap()
);
assert_eq!(
formattedOutput,
read_to_string("tests/fixtures/multiple_assignment.output.json").unwrap()
);
}
#[test]
fn test_parse_input() {
let input = read_to_string("tests/fixtures/1.rb").unwrap();
let parsed_expected = read_to_string("tests/fixtures/1.parsed.json").unwrap();
let runner = Runner::new(tree_sitter_ruby::language(), vec![]);
let ast = runner.run(&input);
let parsed_actual = serde_json::to_string_pretty(&ast.print(&input, ast.get_root())).unwrap();
assert_eq!(parsed_actual, parsed_expected);
}
#[test]
fn test_query_input() {
let input = read_to_string("tests/fixtures/1.rb").unwrap();
let rewritten_expected = read_to_string("tests/fixtures/1.rewritten.json").unwrap();
let runner = Runner::new(tree_sitter_ruby::language(), vec![]);
let mut ast = runner.run(&input);
let query = yeast::query::query!(
program child:(
(assignment
left: (@left)
right: (@right)
child*: ((@rest)*)
)
)
);
print!("query: {:?}", query);
let mut matches = Captures::new();
if query.do_match(&ast, ast.get_root(), &mut matches).unwrap() {
println!("match: {:?}", matches);
} else {
println!("no match");
}
let builder = yeast::tree_builder::tree_builder!(
program child:
(assignment
left: (@right)
right: (@left)
child*:((@rest)*)
)
);
let new_id = builder.build_tree(&mut ast, &matches).unwrap();
let rewritten_actual = serde_json::to_string_pretty(&ast.print(&input, new_id)).unwrap();
write_expected("tests/fixtures/1.rewritten.json", &rewritten_actual);
assert_eq!(rewritten_actual, rewritten_expected);
}
/// Useful for updating fixtures
/// ```
/// write_expected("tests/fixtures/1.parsed.json", &parsed_actual);
/// ```
fn write_expected<P: AsRef<Path>>(file: P, content: &str) {
use std::io::Write;
std::fs::File::create(file)
.unwrap()
.write_all(content.as_bytes())
.unwrap();
}
#[test]
fn test_cursor() {
let input = read_to_string("tests/fixtures/1.rb").unwrap();
let runner = Runner::new(tree_sitter_ruby::language(), vec![]);
let ast = runner.run(&input);
let mut cursor = AstCursor::new(&ast);
assert_eq!(cursor.node().id(), ast.get_root());
assert_eq!(cursor.field_id(), None);
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().id(), 26);
assert!(!cursor.goto_next_sibling());
assert_eq!(cursor.node().id(), 26);
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().id(), 19);
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().id(), 14);
assert!(!cursor.goto_first_child());
assert_eq!(cursor.node().id(), 14);
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().id(), 15);
assert_eq!(cursor.field_id(), Some(CHILD_FIELD));
assert!(cursor.goto_parent());
assert_eq!(cursor.node().id(), 19);
assert_eq!(cursor.field_id(), Some(18));
let cursor = AstCursor::new(&ast);
let mut printer = Printer {};
printer.visit(cursor);
}