mirror of
https://github.com/github/codeql.git
synced 2026-06-09 23:14:13 +02:00
1152 lines
36 KiB
Rust
1152 lines
36 KiB
Rust
#![cfg(test)]
|
|
|
|
use yeast::dump::{dump_ast, dump_ast_with_type_errors};
|
|
use yeast::*;
|
|
|
|
const OUTPUT_SCHEMA_YAML: &str = include_str!("node-types.yml");
|
|
|
|
/// Helper: parse Ruby source with no rules, return dump.
|
|
fn parse_and_dump(input: &str) -> String {
|
|
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
|
let ast = runner.run(input).unwrap();
|
|
dump_ast(&ast, ast.get_root(), input)
|
|
}
|
|
|
|
/// Helper: parse Ruby source with a custom output schema and a single
|
|
/// phase of rules, return dump.
|
|
fn run_and_dump(input: &str, rules: Vec<Rule>) -> String {
|
|
run_phased_and_dump(input, vec![Phase::new("test", PhaseKind::Repeating, rules)])
|
|
}
|
|
|
|
/// Helper: parse Ruby source with a custom output schema and multiple
|
|
/// rule phases, return dump.
|
|
fn run_phased_and_dump(input: &str, phases: Vec<Phase>) -> String {
|
|
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
|
let schema =
|
|
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
|
let runner = Runner::with_schema(lang, &schema, &phases);
|
|
let ast = runner.run(input).unwrap();
|
|
dump_ast(&ast, ast.get_root(), input)
|
|
}
|
|
|
|
/// Helper: like `run_and_dump`, but returns the runner error (if any)
|
|
/// instead of unwrapping.
|
|
fn run_and_get_error(input: &str, rules: Vec<Rule>) -> String {
|
|
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
|
let schema =
|
|
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
|
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
|
|
let runner = Runner::with_schema(lang, &schema, &phases);
|
|
runner
|
|
.run(input)
|
|
.expect_err("expected runner to return an error")
|
|
}
|
|
|
|
/// Helper: parse Ruby source with no rules and dump with schema type errors.
|
|
fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String {
|
|
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
|
let ast = runner.run(input).unwrap();
|
|
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
|
|
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
|
|
}
|
|
|
|
/// Helper: parse Ruby source with no rules and dump with schema type errors,
|
|
/// building schema with language IDs so field checks align with parser fields.
|
|
fn parse_and_dump_typed_with_language(input: &str, schema_yaml: &str) -> String {
|
|
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
|
let runner = Runner::new(lang.clone(), &[]);
|
|
let ast = runner.run(input).unwrap();
|
|
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang)
|
|
.unwrap();
|
|
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
|
|
}
|
|
|
|
/// Helper: parse Ruby source with custom rules and dump with schema type errors.
|
|
fn run_and_dump_typed(input: &str, rules: Vec<Rule>, schema_yaml: &str) -> String {
|
|
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
|
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
|
|
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
|
|
let runner = Runner::with_schema(lang, &schema, &phases);
|
|
let ast = runner.run(input).unwrap();
|
|
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
|
|
}
|
|
|
|
/// Assert that a dump equals the expected string, treating the expected
|
|
/// string as an indented multiline literal: leading/trailing blank lines
|
|
/// are stripped, and the common leading indentation is removed from every
|
|
/// line. This lets test assertions place the first line at the same
|
|
/// indentation as the rest of the body.
|
|
#[track_caller]
|
|
fn assert_dump_eq(actual: &str, expected: &str) {
|
|
let min_indent = expected
|
|
.lines()
|
|
.filter(|l| !l.trim().is_empty())
|
|
.map(|l| l.len() - l.trim_start().len())
|
|
.min()
|
|
.unwrap_or(0);
|
|
let dedented: String = expected
|
|
.lines()
|
|
.map(|l| {
|
|
if l.len() >= min_indent {
|
|
&l[min_indent..]
|
|
} else {
|
|
l
|
|
}
|
|
})
|
|
.collect::<Vec<_>>()
|
|
.join("\n");
|
|
assert_eq!(actual.trim(), dedented.trim());
|
|
}
|
|
|
|
// ---- Parsing tests ----
|
|
|
|
#[test]
|
|
fn test_parse_assignment() {
|
|
let dump = parse_and_dump("x = 1");
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
assignment
|
|
left: identifier "x"
|
|
right: integer "1"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_multiple_assignment() {
|
|
let dump = parse_and_dump("x, y = foo()");
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
assignment
|
|
left:
|
|
left_assignment_list
|
|
identifier "x"
|
|
identifier "y"
|
|
right:
|
|
call
|
|
arguments:
|
|
argument_list
|
|
method: identifier "foo"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_for_loop() {
|
|
let dump = parse_and_dump("for x in list do\n y\nend");
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
for
|
|
body:
|
|
do
|
|
identifier "y"
|
|
pattern: identifier "x"
|
|
value:
|
|
in
|
|
identifier "list"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_dump_highlights_type_errors_inline() {
|
|
let schema_yaml = r#"
|
|
named:
|
|
program:
|
|
$children*: assignment
|
|
assignment:
|
|
left: identifier
|
|
right: identifier
|
|
identifier:
|
|
"#;
|
|
|
|
let dump = parse_and_dump_typed("x = 1", schema_yaml);
|
|
assert!(dump.contains("integer \"1\" <-- ERROR:"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_dump_reports_preserved_unknown_kind_after_transformation() {
|
|
let schema_yaml = r#"
|
|
named:
|
|
program:
|
|
$children*: assignment
|
|
assignment:
|
|
left: identifier
|
|
right: identifier
|
|
identifier:
|
|
"#;
|
|
|
|
// This rewrite runs and preserves the RHS node kind via capture.
|
|
// With schema above, preserving `integer` should be reported inline.
|
|
let rules = vec![yeast::rule!(
|
|
(assignment left: (_) @left right: (_) @right)
|
|
=>
|
|
(assignment
|
|
left: {left}
|
|
right: {right}
|
|
)
|
|
)];
|
|
|
|
let dump = run_and_dump_typed("x = 1", rules, schema_yaml);
|
|
assert!(dump.contains("integer \"1\" <-- ERROR:"));
|
|
assert!(dump.contains("node kind 'integer' not in schema"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_dump_reports_undeclared_field_on_node() {
|
|
let schema_yaml = r#"
|
|
named:
|
|
program:
|
|
$children*: assignment
|
|
assignment:
|
|
left: identifier
|
|
identifier:
|
|
"#;
|
|
|
|
let dump = parse_and_dump_typed_with_language("x = y", schema_yaml);
|
|
assert!(dump.contains("right: identifier \"y\" <-- ERROR:"));
|
|
assert!(dump.contains("the node 'assignment' has no field 'right'"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_dump_reports_disallowed_kind_in_field_type() {
|
|
let schema_yaml = r#"
|
|
named:
|
|
program:
|
|
$children*: assignment
|
|
assignment:
|
|
left: identifier
|
|
right: identifier
|
|
identifier:
|
|
integer:
|
|
"#;
|
|
|
|
let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml);
|
|
assert!(dump.contains("right: integer \"1\" <-- ERROR:"));
|
|
assert!(dump.contains("should contain"));
|
|
assert!(dump.contains("but got integer"));
|
|
}
|
|
|
|
// ---- Query tests ----
|
|
|
|
#[test]
|
|
fn test_query_match() {
|
|
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
|
let ast = runner.run("x = 1").unwrap();
|
|
|
|
let query = yeast::query!(
|
|
(program
|
|
child: (assignment
|
|
left: (_) @left
|
|
right: (_) @right
|
|
)
|
|
)
|
|
);
|
|
|
|
let mut captures = yeast::captures::Captures::new();
|
|
let matched = query.do_match(&ast, ast.get_root(), &mut captures).unwrap();
|
|
assert!(matched);
|
|
assert!(captures.get_var("left").is_ok());
|
|
assert!(captures.get_var("right").is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn test_query_no_match() {
|
|
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
|
let ast = runner.run("x = 1").unwrap();
|
|
|
|
let query = yeast::query!(
|
|
(program
|
|
child: (call
|
|
method: (_) @m
|
|
)
|
|
)
|
|
);
|
|
|
|
let mut captures = yeast::captures::Captures::new();
|
|
let matched = query.do_match(&ast, ast.get_root(), &mut captures).unwrap();
|
|
assert!(!matched);
|
|
}
|
|
|
|
#[test]
|
|
fn test_query_skips_extras_in_positional_match() {
|
|
// Regression test: positional wildcards `(_)` must not bind to
|
|
// tree-sitter `extras` (e.g. comments) during forward-scan; extras
|
|
// are conceptually invisible between siblings, matching tree-sitter
|
|
// query semantics. Without this, a later rule that translates a
|
|
// captured comment to nothing (a common idiom, e.g.
|
|
// `(comment) => ()` in Swift) leaves the capture's match-list empty
|
|
// and causes the transform to fail with "Variable X has 0 matches".
|
|
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
|
let ast = runner.run("[1, # comment\n2]").unwrap();
|
|
|
|
// Navigate to the `array` node: program -> array.
|
|
let mut cursor = AstCursor::new(&ast);
|
|
cursor.goto_first_child();
|
|
let array_id = cursor.node_id();
|
|
assert_eq!(ast.get_node(array_id).unwrap().kind(), "array");
|
|
|
|
// Two positional wildcards should bind to the two integers, skipping
|
|
// the comment that sits between them.
|
|
let query = yeast::query!((array (_) @a (_) @b));
|
|
let mut captures = yeast::captures::Captures::new();
|
|
let matched = query.do_match(&ast, array_id, &mut captures).unwrap();
|
|
assert!(matched);
|
|
assert_eq!(
|
|
ast.get_node(captures.get_var("a").unwrap())
|
|
.unwrap()
|
|
.kind(),
|
|
"integer"
|
|
);
|
|
assert_eq!(
|
|
ast.get_node(captures.get_var("b").unwrap())
|
|
.unwrap()
|
|
.kind(),
|
|
"integer"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
|
|
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
|
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang)
|
|
.unwrap();
|
|
let phases = vec![Phase::new(
|
|
"test",
|
|
PhaseKind::Repeating,
|
|
vec![yeast::rule!((integer) => (identifier "replaced"))],
|
|
)];
|
|
let runner = Runner::with_schema(lang, &schema, &phases);
|
|
|
|
let input = "x = 1";
|
|
let ast = runner.run(input).unwrap();
|
|
let reachable_ids = ast.reachable_node_ids();
|
|
|
|
assert!(
|
|
ast.nodes().len() > reachable_ids.len(),
|
|
"expected rewrite to leave orphaned arena nodes"
|
|
);
|
|
|
|
let dump = dump_ast(&ast, ast.get_root(), input);
|
|
assert!(dump.contains("identifier \"replaced\""));
|
|
assert!(!dump.contains("integer \"1\""));
|
|
}
|
|
|
|
#[test]
|
|
fn test_query_repeated_capture() {
|
|
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
|
let ast = runner.run("x, y, z = 1").unwrap();
|
|
|
|
let query = yeast::query!(
|
|
(assignment
|
|
left: (left_assignment_list
|
|
(identifier)* @names
|
|
)
|
|
)
|
|
);
|
|
|
|
// Match against the assignment node (first named child of program)
|
|
let mut cursor = AstCursor::new(&ast);
|
|
cursor.goto_first_child();
|
|
let assignment_id = cursor.node_id();
|
|
|
|
let mut captures = yeast::captures::Captures::new();
|
|
let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap();
|
|
assert!(matched);
|
|
assert_eq!(captures.get_all("names").len(), 3);
|
|
}
|
|
|
|
#[test]
|
|
fn test_capture_unnamed_node_parenthesized() {
|
|
// `("=") @op` captures the unnamed `=` token between left and right.
|
|
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
|
let ast = runner.run("x = 1").unwrap();
|
|
|
|
let query = yeast::query!(
|
|
(assignment
|
|
left: (_) @lhs
|
|
("=") @op
|
|
right: (_) @rhs
|
|
)
|
|
);
|
|
|
|
let mut cursor = AstCursor::new(&ast);
|
|
cursor.goto_first_child();
|
|
let assignment_id = cursor.node_id();
|
|
|
|
let mut captures = yeast::captures::Captures::new();
|
|
let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap();
|
|
assert!(matched);
|
|
let op_id = captures.get_var("op").unwrap();
|
|
let op_node = ast.get_node(op_id).unwrap();
|
|
assert_eq!(op_node.kind(), "=");
|
|
assert!(!op_node.is_named());
|
|
}
|
|
|
|
#[test]
|
|
fn test_capture_unnamed_node_bare_literal() {
|
|
// `"=" @op` (without surrounding parens) is the same as `("=") @op`.
|
|
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
|
let ast = runner.run("x = 1").unwrap();
|
|
|
|
let query = yeast::query!(
|
|
(assignment
|
|
left: (_) @lhs
|
|
"=" @op
|
|
right: (_) @rhs
|
|
)
|
|
);
|
|
|
|
let mut cursor = AstCursor::new(&ast);
|
|
cursor.goto_first_child();
|
|
let assignment_id = cursor.node_id();
|
|
|
|
let mut captures = yeast::captures::Captures::new();
|
|
let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap();
|
|
assert!(matched);
|
|
let op_id = captures.get_var("op").unwrap();
|
|
let op_node = ast.get_node(op_id).unwrap();
|
|
assert_eq!(op_node.kind(), "=");
|
|
assert!(!op_node.is_named());
|
|
}
|
|
|
|
#[test]
|
|
fn test_bare_underscore_matches_unnamed() {
|
|
// Bare `_` matches any node, including unnamed tokens, while `(_)`
|
|
// matches only named nodes. Demonstrate by matching the unnamed `=`
|
|
// token in the implicit `child` field of an `assignment`.
|
|
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
|
let ast = runner.run("x = 1").unwrap();
|
|
|
|
let mut cursor = AstCursor::new(&ast);
|
|
cursor.goto_first_child();
|
|
let assignment_id = cursor.node_id();
|
|
|
|
// `(_)` skips unnamed children, so a query containing a single `(_)`
|
|
// bare pattern fails to match the assignment (whose only unfielded
|
|
// child is the unnamed `=`).
|
|
let query_named = yeast::query!((assignment (_) @any));
|
|
let mut captures = yeast::captures::Captures::new();
|
|
let matched = query_named
|
|
.do_match(&ast, assignment_id, &mut captures)
|
|
.unwrap();
|
|
assert!(
|
|
!matched,
|
|
"(_) should skip the unnamed `=` and fail to match"
|
|
);
|
|
|
|
// Bare `_` accepts the next child whatever it is, so it matches the
|
|
// unnamed `=` token.
|
|
let query_any = yeast::query!((assignment _ @any));
|
|
let mut captures = yeast::captures::Captures::new();
|
|
let matched = query_any
|
|
.do_match(&ast, assignment_id, &mut captures)
|
|
.unwrap();
|
|
assert!(matched, "_ should match the unnamed `=`");
|
|
let any_node = ast.get_node(captures.get_var("any").unwrap()).unwrap();
|
|
assert_eq!(any_node.kind(), "=");
|
|
assert!(!any_node.is_named());
|
|
}
|
|
|
|
#[test]
|
|
fn test_bare_forms_in_field_position() {
|
|
// The bare `_` and bare-literal forms should be accepted as a
|
|
// field's value, not just in the bare-children position. This is
|
|
// syntactic sugar for `(_)` / `("…")` and goes through the same
|
|
// code paths.
|
|
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
|
let ast = runner.run("x = 1").unwrap();
|
|
|
|
let mut cursor = AstCursor::new(&ast);
|
|
cursor.goto_first_child();
|
|
let assignment_id = cursor.node_id();
|
|
|
|
// Bare `_` in field position. Captures the named `identifier "x"`
|
|
// child of the `left` field — bare `_` admits unnamed too, but the
|
|
// first child of `left` happens to be named.
|
|
let query = yeast::query!((assignment left: _ @lhs));
|
|
let mut captures = yeast::captures::Captures::new();
|
|
let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap();
|
|
assert!(matched);
|
|
assert_eq!(
|
|
ast.get_node(captures.get_var("lhs").unwrap())
|
|
.unwrap()
|
|
.kind(),
|
|
"identifier"
|
|
);
|
|
|
|
// Bare literal in field position. Equivalent to `("=") @op`.
|
|
let query = yeast::query!((assignment child: "=" @op));
|
|
let mut captures = yeast::captures::Captures::new();
|
|
let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap();
|
|
assert!(matched);
|
|
let op = ast.get_node(captures.get_var("op").unwrap()).unwrap();
|
|
assert_eq!(op.kind(), "=");
|
|
assert!(!op.is_named());
|
|
}
|
|
|
|
#[test]
|
|
fn test_forward_scan_finds_unnamed_token_late() {
|
|
// The `do` named-wrapper node has three children in its implicit
|
|
// `child` field, in source order: `do` (unnamed kw), the body
|
|
// identifier, and `end` (unnamed kw). Forward-scan semantics let a
|
|
// query for `("end")` skip past the first two and match the third.
|
|
// Without forward-scan, the matcher took the first child unconditionally
|
|
// and failed.
|
|
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
|
let ast = runner.run("for x in list do\n y\nend").unwrap();
|
|
|
|
// Navigate: program > for > do (the body wrapper).
|
|
let mut cursor = AstCursor::new(&ast);
|
|
cursor.goto_first_child(); // for
|
|
cursor.goto_first_child(); // do (the body)
|
|
while cursor.node().kind() != "do" || !cursor.node().is_named() {
|
|
assert!(cursor.goto_next_sibling(), "expected to find named `do`");
|
|
}
|
|
let do_id = cursor.node_id();
|
|
|
|
let query = yeast::query!((do ("end") @kw));
|
|
let mut captures = yeast::captures::Captures::new();
|
|
let matched = query.do_match(&ast, do_id, &mut captures).unwrap();
|
|
assert!(matched, "forward-scan should find the `end` keyword");
|
|
let kw = ast.get_node(captures.get_var("kw").unwrap()).unwrap();
|
|
assert_eq!(kw.kind(), "end");
|
|
assert!(!kw.is_named());
|
|
}
|
|
|
|
#[test]
|
|
fn test_forward_scan_preserves_order() {
|
|
// Bare patterns are scanned left-to-right and consume positions in
|
|
// order. A query for ("end") then ("do") should fail because `do`
|
|
// appears before `end` in the source order; once forward-scan has
|
|
// consumed `end`, the iterator is exhausted.
|
|
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
|
let ast = runner.run("for x in list do\n y\nend").unwrap();
|
|
|
|
let mut cursor = AstCursor::new(&ast);
|
|
cursor.goto_first_child();
|
|
cursor.goto_first_child();
|
|
while cursor.node().kind() != "do" || !cursor.node().is_named() {
|
|
assert!(cursor.goto_next_sibling(), "expected to find named `do`");
|
|
}
|
|
let do_id = cursor.node_id();
|
|
|
|
let query = yeast::query!((do ("end") @first ("do") @second));
|
|
let mut captures = yeast::captures::Captures::new();
|
|
let matched = query.do_match(&ast, do_id, &mut captures).unwrap();
|
|
assert!(!matched, "scan must not go backwards");
|
|
}
|
|
|
|
// ---- Tree builder tests ----
|
|
|
|
#[test]
|
|
fn test_tree_builder() {
|
|
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
|
let mut ast = runner.run("x = 1").unwrap();
|
|
let input = "x = 1";
|
|
|
|
let query = yeast::query!(
|
|
(program
|
|
child: (assignment
|
|
left: (_) @left
|
|
right: (_) @right
|
|
)
|
|
)
|
|
);
|
|
|
|
let mut captures = yeast::captures::Captures::new();
|
|
query.do_match(&ast, ast.get_root(), &mut captures).unwrap();
|
|
|
|
// Swap left and right
|
|
let fresh = yeast::tree_builder::FreshScope::new();
|
|
let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh);
|
|
let new_id = yeast::tree!(ctx,
|
|
(program
|
|
child: (assignment
|
|
left: {ctx.capture("right")}
|
|
right: {ctx.capture("left")}
|
|
)
|
|
)
|
|
);
|
|
|
|
let dump = dump_ast(ctx.ast, new_id, input);
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
assignment
|
|
left: integer "1"
|
|
right: identifier "x"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
// ---- Rule tests ----
|
|
|
|
// These rules use field names from node-types.yml, which extends the
|
|
// tree-sitter-ruby grammar with named fields for nodes that only have
|
|
// unnamed children in tree-sitter (e.g. block_body.stmt, block_parameters.parameter).
|
|
fn ruby_rules() -> Vec<Rule> {
|
|
let assign_rule = yeast::rule!(
|
|
(assignment
|
|
left: (left_assignment_list
|
|
(identifier)* @left
|
|
)
|
|
right: (_) @right
|
|
)
|
|
=>
|
|
(assignment
|
|
left: (identifier $tmp)
|
|
right: {right}
|
|
)
|
|
{..left.iter().enumerate().map(|(i, &lhs)|
|
|
yeast::tree!(
|
|
(assignment
|
|
left: {lhs}
|
|
right: (element_reference
|
|
object: (identifier $tmp)
|
|
index: (integer #{i})
|
|
)
|
|
)
|
|
)
|
|
)}
|
|
);
|
|
|
|
let for_rule = yeast::rule!(
|
|
(for
|
|
pattern: (_) @pat
|
|
value: (in (_) @val)
|
|
body: (do (_)* @body)
|
|
)
|
|
=>
|
|
(call
|
|
receiver: {val}
|
|
method: (identifier "each")
|
|
block: (block
|
|
parameters: (block_parameters
|
|
parameter: (identifier $tmp)
|
|
)
|
|
body: (block_body
|
|
stmt: (assignment
|
|
left: {pat}
|
|
right: (identifier $tmp)
|
|
)
|
|
stmt: {..body}
|
|
)
|
|
)
|
|
)
|
|
);
|
|
|
|
vec![assign_rule, for_rule]
|
|
}
|
|
|
|
#[test]
|
|
fn test_desugar_multiple_assignment() {
|
|
let dump = run_and_dump("x, y = e", ruby_rules());
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
assignment
|
|
left: identifier "$tmp-0"
|
|
right: identifier "e"
|
|
assignment
|
|
left: identifier "x"
|
|
right:
|
|
element_reference
|
|
object: identifier "$tmp-0"
|
|
index: integer "0"
|
|
assignment
|
|
left: identifier "y"
|
|
right:
|
|
element_reference
|
|
object: identifier "$tmp-0"
|
|
index: integer "1"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_desugar_for_loop() {
|
|
let dump = run_and_dump("for x in list do\n y\nend", ruby_rules());
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
call
|
|
block:
|
|
block
|
|
body:
|
|
block_body
|
|
stmt:
|
|
assignment
|
|
left: identifier "x"
|
|
right: identifier "$tmp-0"
|
|
identifier "y"
|
|
parameters:
|
|
block_parameters
|
|
parameter: identifier "$tmp-0"
|
|
method: identifier "each"
|
|
receiver: identifier "list"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_shorthand_rule() {
|
|
let rule = yeast::rule!(
|
|
(assignment
|
|
left: (_) @method
|
|
right: (_) @receiver
|
|
)
|
|
=> call
|
|
);
|
|
|
|
let dump = run_and_dump("x = 1", vec![rule]);
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
call
|
|
method: identifier "x"
|
|
receiver: integer "1"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_chained_rules_output_only_kind() {
|
|
// Exercise rule chaining where an intermediate kind exists only in the
|
|
// output schema (not in the input tree-sitter grammar):
|
|
// assignment → first_node (input → output-only)
|
|
// first_node → second_node (output-only → output-only)
|
|
// The matcher must look up `first_node` against the schema, which only
|
|
// knows about it via the YAML node-types file.
|
|
let assignment_to_first = yeast::rule!(
|
|
(assignment
|
|
left: (_) @left
|
|
right: (_) @right
|
|
)
|
|
=> first_node
|
|
);
|
|
let first_to_second = yeast::rule!(
|
|
(first_node
|
|
left: (_) @left
|
|
right: (_) @right
|
|
)
|
|
=> second_node
|
|
);
|
|
|
|
let dump = run_and_dump("x = 1", vec![assignment_to_first, first_to_second]);
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
second_node
|
|
left: identifier "x"
|
|
right: integer "1"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
// A rule that swaps `assignment.left` and `assignment.right`. Each
|
|
// application produces another `assignment` whose query the rule
|
|
// matches again, so without the once-per-node default it would loop.
|
|
fn swap_assignment_rule() -> Rule {
|
|
yeast::rule!(
|
|
(assignment
|
|
left: (_) @left
|
|
right: (_) @right
|
|
)
|
|
=>
|
|
(assignment
|
|
left: {right}
|
|
right: {left}
|
|
)
|
|
)
|
|
}
|
|
|
|
#[test]
|
|
fn test_repeated_rule_hits_depth_limit() {
|
|
// With `.repeated()` the rule is allowed to fire on its own output,
|
|
// which cycles forever and trips the rewrite-depth safety net.
|
|
let err = run_and_get_error("x = 1", vec![swap_assignment_rule().repeated()]);
|
|
assert!(
|
|
err.contains("exceeded maximum rewrite depth"),
|
|
"expected depth-limit error, got: {err}"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_default_rule_fires_at_most_once_per_node() {
|
|
// Without `.repeated()` (the default), a rule fires at most once on a
|
|
// given node. The swap therefore happens exactly once and the desugaring
|
|
// terminates cleanly.
|
|
let dump = run_and_dump("x = 1", vec![swap_assignment_rule()]);
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
assignment
|
|
left: integer "1"
|
|
right: identifier "x"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
// ---- Phase tests ----
|
|
|
|
#[test]
|
|
fn test_phased_desugaring() {
|
|
// Two phases that could equally have been a single one with chained
|
|
// rules. Splitting them makes the intent (cleanup, then desugar)
|
|
// explicit and provides per-phase error messages.
|
|
let cleanup = vec![yeast::rule!(
|
|
(assignment
|
|
left: (_) @left
|
|
right: (_) @right
|
|
)
|
|
=> first_node
|
|
)];
|
|
let desugar = vec![yeast::rule!(
|
|
(first_node
|
|
left: (_) @left
|
|
right: (_) @right
|
|
)
|
|
=> second_node
|
|
)];
|
|
|
|
let dump = run_phased_and_dump(
|
|
"x = 1",
|
|
vec![
|
|
Phase::new("cleanup", PhaseKind::Repeating, cleanup),
|
|
Phase::new("desugar", PhaseKind::Repeating, desugar),
|
|
],
|
|
);
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
second_node
|
|
left: identifier "x"
|
|
right: integer "1"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_phase_error_includes_phase_name() {
|
|
// A repeated rule that loops; the error message should identify the
|
|
// phase that tripped the depth limit.
|
|
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
|
let schema =
|
|
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
|
let phases = vec![Phase::new(
|
|
"buggy",
|
|
PhaseKind::Repeating,
|
|
vec![swap_assignment_rule().repeated()],
|
|
)];
|
|
let runner = Runner::with_schema(lang, &schema, &phases);
|
|
let err = runner
|
|
.run("x = 1")
|
|
.expect_err("expected runner to return an error");
|
|
assert!(
|
|
err.contains("Phase `buggy`"),
|
|
"error should mention the failing phase, got: {err}"
|
|
);
|
|
assert!(
|
|
err.contains("exceeded maximum rewrite depth"),
|
|
"error should mention the depth limit, got: {err}"
|
|
);
|
|
}
|
|
|
|
/// Helper: an exhaustive set of OneShot rules covering every node reachable
|
|
/// (via captures) when translating `"x = 1"`.
|
|
fn one_shot_xeq1_rules() -> Vec<Rule> {
|
|
vec![
|
|
yeast::rule!(
|
|
(program (_)* @stmts)
|
|
=>
|
|
(program stmt: {..stmts})
|
|
),
|
|
yeast::rule!(
|
|
(assignment left: (_) @left right: (_) @right)
|
|
=>
|
|
(first_node left: {left} right: {right})
|
|
),
|
|
yeast::rule!((identifier) => (identifier "ID")),
|
|
yeast::rule!((integer) => (integer "INT")),
|
|
]
|
|
}
|
|
|
|
#[test]
|
|
fn test_one_shot_phase() {
|
|
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
|
let schema =
|
|
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
|
let phases = vec![Phase::new(
|
|
"translate",
|
|
PhaseKind::OneShot,
|
|
one_shot_xeq1_rules(),
|
|
)];
|
|
let runner = Runner::with_schema(lang, &schema, &phases);
|
|
|
|
let input = "x = 1";
|
|
let ast = runner.run(input).unwrap();
|
|
let dump = dump_ast(&ast, ast.get_root(), input);
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
stmt:
|
|
first_node
|
|
left: identifier "ID"
|
|
right: integer "INT"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_one_shot_phase_errors_when_no_rule_matches() {
|
|
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
|
let schema =
|
|
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
|
// Drop the `integer` rule so the recursion has no rule for `integer`.
|
|
let mut rules = one_shot_xeq1_rules();
|
|
rules.pop();
|
|
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
|
let runner = Runner::with_schema(lang, &schema, &phases);
|
|
|
|
let err = runner
|
|
.run("x = 1")
|
|
.expect_err("expected OneShot to error on unmatched node");
|
|
assert!(
|
|
err.contains("Phase `translate`"),
|
|
"error should name the phase, got: {err}"
|
|
);
|
|
assert!(
|
|
err.contains("no rule matched") && err.contains("integer"),
|
|
"error should describe the unmatched node kind, got: {err}"
|
|
);
|
|
}
|
|
|
|
/// OneShot recursion must apply rules to *captured* nodes, even if the rule
|
|
/// returns a captured child verbatim. A buggy implementation that only
|
|
/// recurses into the children of the rule's output (rather than into the
|
|
/// captures) would leave the returned capture untransformed.
|
|
#[test]
|
|
fn test_one_shot_recurses_into_returned_capture() {
|
|
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
|
let schema =
|
|
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
|
let rules = vec![
|
|
yeast::rule!(
|
|
(program (_)* @stmts)
|
|
=>
|
|
(program stmt: {..stmts})
|
|
),
|
|
// Returns the captured `left` verbatim, discarding `right`.
|
|
yeast::rule!(
|
|
(assignment left: (_) @left right: (_) @right)
|
|
=>
|
|
{left}
|
|
),
|
|
yeast::rule!((identifier) => (identifier "ID")),
|
|
yeast::rule!((integer) => (integer "INT")),
|
|
];
|
|
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
|
let runner = Runner::with_schema(lang, &schema, &phases);
|
|
|
|
let input = "x = 1";
|
|
let ast = runner.run(input).unwrap();
|
|
let dump = dump_ast(&ast, ast.get_root(), input);
|
|
// `left` is an `identifier`; OneShot must apply the identifier rule to
|
|
// it before the assignment transform returns it verbatim.
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
stmt: identifier "ID"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
/// OneShot recursion must NOT descend into the children of the rule's output.
|
|
/// A rule may legitimately wrap a captured node in fresh output-schema nodes
|
|
/// that have no matching rule of their own (since rule patterns target the
|
|
/// input schema). Recursing into the output would erroneously try to find
|
|
/// rules for those wrapper kinds and fail.
|
|
#[test]
|
|
fn test_one_shot_does_not_recurse_into_wrapper_output() {
|
|
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
|
let schema =
|
|
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
|
let rules = vec![
|
|
yeast::rule!(
|
|
(program (_)* @stmts)
|
|
=>
|
|
(program stmt: {..stmts})
|
|
),
|
|
// Wraps `left` in nested `first_node`/`second_node` output kinds.
|
|
// Neither wrapper kind has a matching rule, so a buggy implementation
|
|
// that recurses into the wrapper's children would error.
|
|
yeast::rule!(
|
|
(assignment left: (_) @left right: (_) @right)
|
|
=>
|
|
(first_node
|
|
left: (second_node left: {left} right: {right})
|
|
right: {left}
|
|
)
|
|
),
|
|
yeast::rule!((identifier) => (identifier "ID")),
|
|
yeast::rule!((integer) => (integer "INT")),
|
|
];
|
|
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
|
let runner = Runner::with_schema(lang, &schema, &phases);
|
|
|
|
let input = "x = 1";
|
|
let ast = runner.run(input).unwrap();
|
|
let dump = dump_ast(&ast, ast.get_root(), input);
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
stmt:
|
|
first_node
|
|
left:
|
|
second_node
|
|
left: identifier "ID"
|
|
right: integer "INT"
|
|
right: identifier "ID"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
// ---- Cursor tests ----
|
|
|
|
#[test]
|
|
fn test_cursor_navigation() {
|
|
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
|
let ast = runner.run("x = 1").unwrap();
|
|
let mut cursor = AstCursor::new(&ast);
|
|
|
|
// Start at root
|
|
assert_eq!(cursor.node().kind(), "program");
|
|
|
|
// Go to first child (assignment)
|
|
assert!(cursor.goto_first_child());
|
|
assert_eq!(cursor.node().kind(), "assignment");
|
|
|
|
// No sibling
|
|
assert!(!cursor.goto_next_sibling());
|
|
|
|
// Go to first child of assignment
|
|
assert!(cursor.goto_first_child());
|
|
assert!(cursor.node().is_named());
|
|
|
|
// Go back up
|
|
assert!(cursor.goto_parent());
|
|
assert_eq!(cursor.node().kind(), "assignment");
|
|
|
|
assert!(cursor.goto_parent());
|
|
assert_eq!(cursor.node().kind(), "program");
|
|
|
|
// Can't go further up
|
|
assert!(!cursor.goto_parent());
|
|
}
|
|
|
|
#[test]
|
|
fn test_desugar_for_with_multiple_assignment() {
|
|
let dump = run_and_dump("for a, b in list do\n x\nend", ruby_rules());
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
call
|
|
block:
|
|
block
|
|
body:
|
|
block_body
|
|
stmt:
|
|
assignment
|
|
left: identifier "$tmp-1"
|
|
right: identifier "$tmp-0"
|
|
assignment
|
|
left: identifier "a"
|
|
right:
|
|
element_reference
|
|
object: identifier "$tmp-1"
|
|
index: integer "0"
|
|
assignment
|
|
left: identifier "b"
|
|
right:
|
|
element_reference
|
|
object: identifier "$tmp-1"
|
|
index: integer "1"
|
|
identifier "x"
|
|
parameters:
|
|
block_parameters
|
|
parameter: identifier "$tmp-0"
|
|
method: identifier "each"
|
|
receiver: identifier "list"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
/// Regression test: `#{capture}` in a template must render the *source text*
|
|
/// of the captured node, not its arena `Id`. Previously, captures were bound
|
|
/// as `usize`, so `#{cap}` printed the integer id (e.g. `"3"`) via `Display`.
|
|
/// Captures are now bound as `NodeRef`, which has no `Display` impl and
|
|
/// resolves to the captured node's source text via `YeastDisplay`.
|
|
#[test]
|
|
fn test_hash_brace_renders_capture_source_text() {
|
|
let rule = rule!(
|
|
(call
|
|
method: (identifier) @name
|
|
receiver: (identifier) @recv
|
|
)
|
|
=>
|
|
(call
|
|
method: (identifier #{name})
|
|
receiver: (identifier #{recv})
|
|
arguments: (argument_list)
|
|
)
|
|
);
|
|
let dump = run_and_dump("foo.bar()", vec![rule]);
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
call
|
|
arguments: argument_list "foo.bar()"
|
|
method: identifier "bar"
|
|
receiver: identifier "foo"
|
|
"#,
|
|
);
|
|
}
|
|
|
|
/// Regression test: non-`NodeRef` values in `#{expr}` still render via their
|
|
/// `Display` impl (covered by `YeastDisplay`'s blanket impls for primitives).
|
|
#[test]
|
|
fn test_hash_brace_renders_integer_expression() {
|
|
let rule = rule!(
|
|
(identifier) @_
|
|
=>
|
|
(identifier #{1 + 2})
|
|
);
|
|
let dump = run_and_dump("foo", vec![rule]);
|
|
assert_dump_eq(
|
|
&dump,
|
|
r#"
|
|
program
|
|
identifier "3"
|
|
"#,
|
|
);
|
|
}
|