Files
codeql/shared/yeast/tests/test.rs

1152 lines
36 KiB
Rust

#![cfg(test)]
use yeast::dump::{dump_ast, dump_ast_with_type_errors};
use yeast::*;
const OUTPUT_SCHEMA_YAML: &str = include_str!("node-types.yml");
/// Helper: parse Ruby source with no rules, return dump.
fn parse_and_dump(input: &str) -> String {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run(input).unwrap();
dump_ast(&ast, ast.get_root(), input)
}
/// Helper: parse Ruby source with a custom output schema and a single
/// phase of rules, return dump.
fn run_and_dump(input: &str, rules: Vec<Rule>) -> String {
run_phased_and_dump(input, vec![Phase::new("test", PhaseKind::Repeating, rules)])
}
/// Helper: parse Ruby source with a custom output schema and multiple
/// rule phases, return dump.
fn run_phased_and_dump(input: &str, phases: Vec<Phase>) -> String {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let runner = Runner::with_schema(lang, &schema, &phases);
let ast = runner.run(input).unwrap();
dump_ast(&ast, ast.get_root(), input)
}
/// Helper: like `run_and_dump`, but returns the runner error (if any)
/// instead of unwrapping.
fn run_and_get_error(input: &str, rules: Vec<Rule>) -> String {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
runner
.run(input)
.expect_err("expected runner to return an error")
}
/// Helper: parse Ruby source with no rules and dump with schema type errors.
fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run(input).unwrap();
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
}
/// Helper: parse Ruby source with no rules and dump with schema type errors,
/// building schema with language IDs so field checks align with parser fields.
fn parse_and_dump_typed_with_language(input: &str, schema_yaml: &str) -> String {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let runner = Runner::new(lang.clone(), &[]);
let ast = runner.run(input).unwrap();
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang)
.unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
}
/// Helper: parse Ruby source with custom rules and dump with schema type errors.
fn run_and_dump_typed(input: &str, rules: Vec<Rule>, schema_yaml: &str) -> String {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let ast = runner.run(input).unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
}
/// Assert that a dump equals the expected string, treating the expected
/// string as an indented multiline literal: leading/trailing blank lines
/// are stripped, and the common leading indentation is removed from every
/// line. This lets test assertions place the first line at the same
/// indentation as the rest of the body.
#[track_caller]
fn assert_dump_eq(actual: &str, expected: &str) {
let min_indent = expected
.lines()
.filter(|l| !l.trim().is_empty())
.map(|l| l.len() - l.trim_start().len())
.min()
.unwrap_or(0);
let dedented: String = expected
.lines()
.map(|l| {
if l.len() >= min_indent {
&l[min_indent..]
} else {
l
}
})
.collect::<Vec<_>>()
.join("\n");
assert_eq!(actual.trim(), dedented.trim());
}
// ---- Parsing tests ----
#[test]
fn test_parse_assignment() {
let dump = parse_and_dump("x = 1");
assert_dump_eq(
&dump,
r#"
program
assignment
left: identifier "x"
right: integer "1"
"#,
);
}
#[test]
fn test_parse_multiple_assignment() {
let dump = parse_and_dump("x, y = foo()");
assert_dump_eq(
&dump,
r#"
program
assignment
left:
left_assignment_list
identifier "x"
identifier "y"
right:
call
arguments:
argument_list
method: identifier "foo"
"#,
);
}
#[test]
fn test_parse_for_loop() {
let dump = parse_and_dump("for x in list do\n y\nend");
assert_dump_eq(
&dump,
r#"
program
for
body:
do
identifier "y"
pattern: identifier "x"
value:
in
identifier "list"
"#,
);
}
#[test]
fn test_dump_highlights_type_errors_inline() {
let schema_yaml = r#"
named:
program:
$children*: assignment
assignment:
left: identifier
right: identifier
identifier:
"#;
let dump = parse_and_dump_typed("x = 1", schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
}
#[test]
fn test_dump_reports_preserved_unknown_kind_after_transformation() {
let schema_yaml = r#"
named:
program:
$children*: assignment
assignment:
left: identifier
right: identifier
identifier:
"#;
// This rewrite runs and preserves the RHS node kind via capture.
// With schema above, preserving `integer` should be reported inline.
let rules = vec![yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
(assignment
left: {left}
right: {right}
)
)];
let dump = run_and_dump_typed("x = 1", rules, schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
assert!(dump.contains("node kind 'integer' not in schema"));
}
#[test]
fn test_dump_reports_undeclared_field_on_node() {
let schema_yaml = r#"
named:
program:
$children*: assignment
assignment:
left: identifier
identifier:
"#;
let dump = parse_and_dump_typed_with_language("x = y", schema_yaml);
assert!(dump.contains("right: identifier \"y\" <-- ERROR:"));
assert!(dump.contains("the node 'assignment' has no field 'right'"));
}
#[test]
fn test_dump_reports_disallowed_kind_in_field_type() {
let schema_yaml = r#"
named:
program:
$children*: assignment
assignment:
left: identifier
right: identifier
identifier:
integer:
"#;
let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml);
assert!(dump.contains("right: integer \"1\" <-- ERROR:"));
assert!(dump.contains("should contain"));
assert!(dump.contains("but got integer"));
}
// ---- Query tests ----
#[test]
fn test_query_match() {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let query = yeast::query!(
(program
child: (assignment
left: (_) @left
right: (_) @right
)
)
);
let mut captures = yeast::captures::Captures::new();
let matched = query.do_match(&ast, ast.get_root(), &mut captures).unwrap();
assert!(matched);
assert!(captures.get_var("left").is_ok());
assert!(captures.get_var("right").is_ok());
}
#[test]
fn test_query_no_match() {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let query = yeast::query!(
(program
child: (call
method: (_) @m
)
)
);
let mut captures = yeast::captures::Captures::new();
let matched = query.do_match(&ast, ast.get_root(), &mut captures).unwrap();
assert!(!matched);
}
#[test]
fn test_query_skips_extras_in_positional_match() {
// Regression test: positional wildcards `(_)` must not bind to
// tree-sitter `extras` (e.g. comments) during forward-scan; extras
// are conceptually invisible between siblings, matching tree-sitter
// query semantics. Without this, a later rule that translates a
// captured comment to nothing (a common idiom, e.g.
// `(comment) => ()` in Swift) leaves the capture's match-list empty
// and causes the transform to fail with "Variable X has 0 matches".
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("[1, # comment\n2]").unwrap();
// Navigate to the `array` node: program -> array.
let mut cursor = AstCursor::new(&ast);
cursor.goto_first_child();
let array_id = cursor.node_id();
assert_eq!(ast.get_node(array_id).unwrap().kind(), "array");
// Two positional wildcards should bind to the two integers, skipping
// the comment that sits between them.
let query = yeast::query!((array (_) @a (_) @b));
let mut captures = yeast::captures::Captures::new();
let matched = query.do_match(&ast, array_id, &mut captures).unwrap();
assert!(matched);
assert_eq!(
ast.get_node(captures.get_var("a").unwrap())
.unwrap()
.kind(),
"integer"
);
assert_eq!(
ast.get_node(captures.get_var("b").unwrap())
.unwrap()
.kind(),
"integer"
);
}
#[test]
fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang)
.unwrap();
let phases = vec![Phase::new(
"test",
PhaseKind::Repeating,
vec![yeast::rule!((integer) => (identifier "replaced"))],
)];
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let reachable_ids = ast.reachable_node_ids();
assert!(
ast.nodes().len() > reachable_ids.len(),
"expected rewrite to leave orphaned arena nodes"
);
let dump = dump_ast(&ast, ast.get_root(), input);
assert!(dump.contains("identifier \"replaced\""));
assert!(!dump.contains("integer \"1\""));
}
#[test]
fn test_query_repeated_capture() {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x, y, z = 1").unwrap();
let query = yeast::query!(
(assignment
left: (left_assignment_list
(identifier)* @names
)
)
);
// Match against the assignment node (first named child of program)
let mut cursor = AstCursor::new(&ast);
cursor.goto_first_child();
let assignment_id = cursor.node_id();
let mut captures = yeast::captures::Captures::new();
let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap();
assert!(matched);
assert_eq!(captures.get_all("names").len(), 3);
}
#[test]
fn test_capture_unnamed_node_parenthesized() {
// `("=") @op` captures the unnamed `=` token between left and right.
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let query = yeast::query!(
(assignment
left: (_) @lhs
("=") @op
right: (_) @rhs
)
);
let mut cursor = AstCursor::new(&ast);
cursor.goto_first_child();
let assignment_id = cursor.node_id();
let mut captures = yeast::captures::Captures::new();
let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap();
assert!(matched);
let op_id = captures.get_var("op").unwrap();
let op_node = ast.get_node(op_id).unwrap();
assert_eq!(op_node.kind(), "=");
assert!(!op_node.is_named());
}
#[test]
fn test_capture_unnamed_node_bare_literal() {
// `"=" @op` (without surrounding parens) is the same as `("=") @op`.
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let query = yeast::query!(
(assignment
left: (_) @lhs
"=" @op
right: (_) @rhs
)
);
let mut cursor = AstCursor::new(&ast);
cursor.goto_first_child();
let assignment_id = cursor.node_id();
let mut captures = yeast::captures::Captures::new();
let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap();
assert!(matched);
let op_id = captures.get_var("op").unwrap();
let op_node = ast.get_node(op_id).unwrap();
assert_eq!(op_node.kind(), "=");
assert!(!op_node.is_named());
}
#[test]
fn test_bare_underscore_matches_unnamed() {
// Bare `_` matches any node, including unnamed tokens, while `(_)`
// matches only named nodes. Demonstrate by matching the unnamed `=`
// token in the implicit `child` field of an `assignment`.
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let mut cursor = AstCursor::new(&ast);
cursor.goto_first_child();
let assignment_id = cursor.node_id();
// `(_)` skips unnamed children, so a query containing a single `(_)`
// bare pattern fails to match the assignment (whose only unfielded
// child is the unnamed `=`).
let query_named = yeast::query!((assignment (_) @any));
let mut captures = yeast::captures::Captures::new();
let matched = query_named
.do_match(&ast, assignment_id, &mut captures)
.unwrap();
assert!(
!matched,
"(_) should skip the unnamed `=` and fail to match"
);
// Bare `_` accepts the next child whatever it is, so it matches the
// unnamed `=` token.
let query_any = yeast::query!((assignment _ @any));
let mut captures = yeast::captures::Captures::new();
let matched = query_any
.do_match(&ast, assignment_id, &mut captures)
.unwrap();
assert!(matched, "_ should match the unnamed `=`");
let any_node = ast.get_node(captures.get_var("any").unwrap()).unwrap();
assert_eq!(any_node.kind(), "=");
assert!(!any_node.is_named());
}
#[test]
fn test_bare_forms_in_field_position() {
// The bare `_` and bare-literal forms should be accepted as a
// field's value, not just in the bare-children position. This is
// syntactic sugar for `(_)` / `("…")` and goes through the same
// code paths.
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let mut cursor = AstCursor::new(&ast);
cursor.goto_first_child();
let assignment_id = cursor.node_id();
// Bare `_` in field position. Captures the named `identifier "x"`
// child of the `left` field — bare `_` admits unnamed too, but the
// first child of `left` happens to be named.
let query = yeast::query!((assignment left: _ @lhs));
let mut captures = yeast::captures::Captures::new();
let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap();
assert!(matched);
assert_eq!(
ast.get_node(captures.get_var("lhs").unwrap())
.unwrap()
.kind(),
"identifier"
);
// Bare literal in field position. Equivalent to `("=") @op`.
let query = yeast::query!((assignment child: "=" @op));
let mut captures = yeast::captures::Captures::new();
let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap();
assert!(matched);
let op = ast.get_node(captures.get_var("op").unwrap()).unwrap();
assert_eq!(op.kind(), "=");
assert!(!op.is_named());
}
#[test]
fn test_forward_scan_finds_unnamed_token_late() {
// The `do` named-wrapper node has three children in its implicit
// `child` field, in source order: `do` (unnamed kw), the body
// identifier, and `end` (unnamed kw). Forward-scan semantics let a
// query for `("end")` skip past the first two and match the third.
// Without forward-scan, the matcher took the first child unconditionally
// and failed.
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("for x in list do\n y\nend").unwrap();
// Navigate: program > for > do (the body wrapper).
let mut cursor = AstCursor::new(&ast);
cursor.goto_first_child(); // for
cursor.goto_first_child(); // do (the body)
while cursor.node().kind() != "do" || !cursor.node().is_named() {
assert!(cursor.goto_next_sibling(), "expected to find named `do`");
}
let do_id = cursor.node_id();
let query = yeast::query!((do ("end") @kw));
let mut captures = yeast::captures::Captures::new();
let matched = query.do_match(&ast, do_id, &mut captures).unwrap();
assert!(matched, "forward-scan should find the `end` keyword");
let kw = ast.get_node(captures.get_var("kw").unwrap()).unwrap();
assert_eq!(kw.kind(), "end");
assert!(!kw.is_named());
}
#[test]
fn test_forward_scan_preserves_order() {
// Bare patterns are scanned left-to-right and consume positions in
// order. A query for ("end") then ("do") should fail because `do`
// appears before `end` in the source order; once forward-scan has
// consumed `end`, the iterator is exhausted.
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("for x in list do\n y\nend").unwrap();
let mut cursor = AstCursor::new(&ast);
cursor.goto_first_child();
cursor.goto_first_child();
while cursor.node().kind() != "do" || !cursor.node().is_named() {
assert!(cursor.goto_next_sibling(), "expected to find named `do`");
}
let do_id = cursor.node_id();
let query = yeast::query!((do ("end") @first ("do") @second));
let mut captures = yeast::captures::Captures::new();
let matched = query.do_match(&ast, do_id, &mut captures).unwrap();
assert!(!matched, "scan must not go backwards");
}
// ---- Tree builder tests ----
#[test]
fn test_tree_builder() {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let mut ast = runner.run("x = 1").unwrap();
let input = "x = 1";
let query = yeast::query!(
(program
child: (assignment
left: (_) @left
right: (_) @right
)
)
);
let mut captures = yeast::captures::Captures::new();
query.do_match(&ast, ast.get_root(), &mut captures).unwrap();
// Swap left and right
let fresh = yeast::tree_builder::FreshScope::new();
let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh);
let new_id = yeast::tree!(ctx,
(program
child: (assignment
left: {ctx.capture("right")}
right: {ctx.capture("left")}
)
)
);
let dump = dump_ast(ctx.ast, new_id, input);
assert_dump_eq(
&dump,
r#"
program
assignment
left: integer "1"
right: identifier "x"
"#,
);
}
// ---- Rule tests ----
// These rules use field names from node-types.yml, which extends the
// tree-sitter-ruby grammar with named fields for nodes that only have
// unnamed children in tree-sitter (e.g. block_body.stmt, block_parameters.parameter).
fn ruby_rules() -> Vec<Rule> {
let assign_rule = yeast::rule!(
(assignment
left: (left_assignment_list
(identifier)* @left
)
right: (_) @right
)
=>
(assignment
left: (identifier $tmp)
right: {right}
)
{..left.iter().enumerate().map(|(i, &lhs)|
yeast::tree!(
(assignment
left: {lhs}
right: (element_reference
object: (identifier $tmp)
index: (integer #{i})
)
)
)
)}
);
let for_rule = yeast::rule!(
(for
pattern: (_) @pat
value: (in (_) @val)
body: (do (_)* @body)
)
=>
(call
receiver: {val}
method: (identifier "each")
block: (block
parameters: (block_parameters
parameter: (identifier $tmp)
)
body: (block_body
stmt: (assignment
left: {pat}
right: (identifier $tmp)
)
stmt: {..body}
)
)
)
);
vec![assign_rule, for_rule]
}
#[test]
fn test_desugar_multiple_assignment() {
let dump = run_and_dump("x, y = e", ruby_rules());
assert_dump_eq(
&dump,
r#"
program
assignment
left: identifier "$tmp-0"
right: identifier "e"
assignment
left: identifier "x"
right:
element_reference
object: identifier "$tmp-0"
index: integer "0"
assignment
left: identifier "y"
right:
element_reference
object: identifier "$tmp-0"
index: integer "1"
"#,
);
}
#[test]
fn test_desugar_for_loop() {
let dump = run_and_dump("for x in list do\n y\nend", ruby_rules());
assert_dump_eq(
&dump,
r#"
program
call
block:
block
body:
block_body
stmt:
assignment
left: identifier "x"
right: identifier "$tmp-0"
identifier "y"
parameters:
block_parameters
parameter: identifier "$tmp-0"
method: identifier "each"
receiver: identifier "list"
"#,
);
}
#[test]
fn test_shorthand_rule() {
let rule = yeast::rule!(
(assignment
left: (_) @method
right: (_) @receiver
)
=> call
);
let dump = run_and_dump("x = 1", vec![rule]);
assert_dump_eq(
&dump,
r#"
program
call
method: identifier "x"
receiver: integer "1"
"#,
);
}
#[test]
fn test_chained_rules_output_only_kind() {
// Exercise rule chaining where an intermediate kind exists only in the
// output schema (not in the input tree-sitter grammar):
// assignment → first_node (input → output-only)
// first_node → second_node (output-only → output-only)
// The matcher must look up `first_node` against the schema, which only
// knows about it via the YAML node-types file.
let assignment_to_first = yeast::rule!(
(assignment
left: (_) @left
right: (_) @right
)
=> first_node
);
let first_to_second = yeast::rule!(
(first_node
left: (_) @left
right: (_) @right
)
=> second_node
);
let dump = run_and_dump("x = 1", vec![assignment_to_first, first_to_second]);
assert_dump_eq(
&dump,
r#"
program
second_node
left: identifier "x"
right: integer "1"
"#,
);
}
// A rule that swaps `assignment.left` and `assignment.right`. Each
// application produces another `assignment` whose query the rule
// matches again, so without the once-per-node default it would loop.
fn swap_assignment_rule() -> Rule {
yeast::rule!(
(assignment
left: (_) @left
right: (_) @right
)
=>
(assignment
left: {right}
right: {left}
)
)
}
#[test]
fn test_repeated_rule_hits_depth_limit() {
// With `.repeated()` the rule is allowed to fire on its own output,
// which cycles forever and trips the rewrite-depth safety net.
let err = run_and_get_error("x = 1", vec![swap_assignment_rule().repeated()]);
assert!(
err.contains("exceeded maximum rewrite depth"),
"expected depth-limit error, got: {err}"
);
}
#[test]
fn test_default_rule_fires_at_most_once_per_node() {
// Without `.repeated()` (the default), a rule fires at most once on a
// given node. The swap therefore happens exactly once and the desugaring
// terminates cleanly.
let dump = run_and_dump("x = 1", vec![swap_assignment_rule()]);
assert_dump_eq(
&dump,
r#"
program
assignment
left: integer "1"
right: identifier "x"
"#,
);
}
// ---- Phase tests ----
#[test]
fn test_phased_desugaring() {
// Two phases that could equally have been a single one with chained
// rules. Splitting them makes the intent (cleanup, then desugar)
// explicit and provides per-phase error messages.
let cleanup = vec![yeast::rule!(
(assignment
left: (_) @left
right: (_) @right
)
=> first_node
)];
let desugar = vec![yeast::rule!(
(first_node
left: (_) @left
right: (_) @right
)
=> second_node
)];
let dump = run_phased_and_dump(
"x = 1",
vec![
Phase::new("cleanup", PhaseKind::Repeating, cleanup),
Phase::new("desugar", PhaseKind::Repeating, desugar),
],
);
assert_dump_eq(
&dump,
r#"
program
second_node
left: identifier "x"
right: integer "1"
"#,
);
}
#[test]
fn test_phase_error_includes_phase_name() {
// A repeated rule that loops; the error message should identify the
// phase that tripped the depth limit.
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases = vec![Phase::new(
"buggy",
PhaseKind::Repeating,
vec![swap_assignment_rule().repeated()],
)];
let runner = Runner::with_schema(lang, &schema, &phases);
let err = runner
.run("x = 1")
.expect_err("expected runner to return an error");
assert!(
err.contains("Phase `buggy`"),
"error should mention the failing phase, got: {err}"
);
assert!(
err.contains("exceeded maximum rewrite depth"),
"error should mention the depth limit, got: {err}"
);
}
/// Helper: an exhaustive set of OneShot rules covering every node reachable
/// (via captures) when translating `"x = 1"`.
fn one_shot_xeq1_rules() -> Vec<Rule> {
vec![
yeast::rule!(
(program (_)* @stmts)
=>
(program stmt: {..stmts})
),
yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
(first_node left: {left} right: {right})
),
yeast::rule!((identifier) => (identifier "ID")),
yeast::rule!((integer) => (integer "INT")),
]
}
#[test]
fn test_one_shot_phase() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases = vec![Phase::new(
"translate",
PhaseKind::OneShot,
one_shot_xeq1_rules(),
)];
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let dump = dump_ast(&ast, ast.get_root(), input);
assert_dump_eq(
&dump,
r#"
program
stmt:
first_node
left: identifier "ID"
right: integer "INT"
"#,
);
}
#[test]
fn test_one_shot_phase_errors_when_no_rule_matches() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
// Drop the `integer` rule so the recursion has no rule for `integer`.
let mut rules = one_shot_xeq1_rules();
rules.pop();
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let err = runner
.run("x = 1")
.expect_err("expected OneShot to error on unmatched node");
assert!(
err.contains("Phase `translate`"),
"error should name the phase, got: {err}"
);
assert!(
err.contains("no rule matched") && err.contains("integer"),
"error should describe the unmatched node kind, got: {err}"
);
}
/// OneShot recursion must apply rules to *captured* nodes, even if the rule
/// returns a captured child verbatim. A buggy implementation that only
/// recurses into the children of the rule's output (rather than into the
/// captures) would leave the returned capture untransformed.
#[test]
fn test_one_shot_recurses_into_returned_capture() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let rules = vec![
yeast::rule!(
(program (_)* @stmts)
=>
(program stmt: {..stmts})
),
// Returns the captured `left` verbatim, discarding `right`.
yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
{left}
),
yeast::rule!((identifier) => (identifier "ID")),
yeast::rule!((integer) => (integer "INT")),
];
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let dump = dump_ast(&ast, ast.get_root(), input);
// `left` is an `identifier`; OneShot must apply the identifier rule to
// it before the assignment transform returns it verbatim.
assert_dump_eq(
&dump,
r#"
program
stmt: identifier "ID"
"#,
);
}
/// OneShot recursion must NOT descend into the children of the rule's output.
/// A rule may legitimately wrap a captured node in fresh output-schema nodes
/// that have no matching rule of their own (since rule patterns target the
/// input schema). Recursing into the output would erroneously try to find
/// rules for those wrapper kinds and fail.
#[test]
fn test_one_shot_does_not_recurse_into_wrapper_output() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let rules = vec![
yeast::rule!(
(program (_)* @stmts)
=>
(program stmt: {..stmts})
),
// Wraps `left` in nested `first_node`/`second_node` output kinds.
// Neither wrapper kind has a matching rule, so a buggy implementation
// that recurses into the wrapper's children would error.
yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
(first_node
left: (second_node left: {left} right: {right})
right: {left}
)
),
yeast::rule!((identifier) => (identifier "ID")),
yeast::rule!((integer) => (integer "INT")),
];
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let dump = dump_ast(&ast, ast.get_root(), input);
assert_dump_eq(
&dump,
r#"
program
stmt:
first_node
left:
second_node
left: identifier "ID"
right: integer "INT"
right: identifier "ID"
"#,
);
}
// ---- Cursor tests ----
#[test]
fn test_cursor_navigation() {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run("x = 1").unwrap();
let mut cursor = AstCursor::new(&ast);
// Start at root
assert_eq!(cursor.node().kind(), "program");
// Go to first child (assignment)
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "assignment");
// No sibling
assert!(!cursor.goto_next_sibling());
// Go to first child of assignment
assert!(cursor.goto_first_child());
assert!(cursor.node().is_named());
// Go back up
assert!(cursor.goto_parent());
assert_eq!(cursor.node().kind(), "assignment");
assert!(cursor.goto_parent());
assert_eq!(cursor.node().kind(), "program");
// Can't go further up
assert!(!cursor.goto_parent());
}
#[test]
fn test_desugar_for_with_multiple_assignment() {
let dump = run_and_dump("for a, b in list do\n x\nend", ruby_rules());
assert_dump_eq(
&dump,
r#"
program
call
block:
block
body:
block_body
stmt:
assignment
left: identifier "$tmp-1"
right: identifier "$tmp-0"
assignment
left: identifier "a"
right:
element_reference
object: identifier "$tmp-1"
index: integer "0"
assignment
left: identifier "b"
right:
element_reference
object: identifier "$tmp-1"
index: integer "1"
identifier "x"
parameters:
block_parameters
parameter: identifier "$tmp-0"
method: identifier "each"
receiver: identifier "list"
"#,
);
}
/// Regression test: `#{capture}` in a template must render the *source text*
/// of the captured node, not its arena `Id`. Previously, captures were bound
/// as `usize`, so `#{cap}` printed the integer id (e.g. `"3"`) via `Display`.
/// Captures are now bound as `NodeRef`, which has no `Display` impl and
/// resolves to the captured node's source text via `YeastDisplay`.
#[test]
fn test_hash_brace_renders_capture_source_text() {
let rule = rule!(
(call
method: (identifier) @name
receiver: (identifier) @recv
)
=>
(call
method: (identifier #{name})
receiver: (identifier #{recv})
arguments: (argument_list)
)
);
let dump = run_and_dump("foo.bar()", vec![rule]);
assert_dump_eq(
&dump,
r#"
program
call
arguments: argument_list "foo.bar()"
method: identifier "bar"
receiver: identifier "foo"
"#,
);
}
/// Regression test: non-`NodeRef` values in `#{expr}` still render via their
/// `Display` impl (covered by `YeastDisplay`'s blanket impls for primitives).
#[test]
fn test_hash_brace_renders_integer_expression() {
let rule = rule!(
(identifier) @_
=>
(identifier #{1 + 2})
);
let dump = run_and_dump("foo", vec![rule]);
assert_dump_eq(
&dump,
r#"
program
identifier "3"
"#,
);
}