diff --git a/shared/yeast/doc/yeast.md b/shared/yeast/doc/yeast.md index d49ff96f11d..45e0006f909 100644 --- a/shared/yeast/doc/yeast.md +++ b/shared/yeast/doc/yeast.md @@ -61,6 +61,22 @@ rule matches, the node is kept and its children are processed recursively. A rule can replace one node with zero nodes (deletion), one node (rewriting), or multiple nodes (expansion). +By default a rule fires **at most once on a given node**: after firing, the +engine will not re-try that same rule on the result root. Other rules may +still fire on the result, and the rule may still fire on different nodes +(including the result's children). To opt into iterative behaviour — when a +rule's output is intentionally re-matched by the same rule — call +`.repeated()` on the constructed `Rule`: + +```rust +let r = yeast::rule!((foo ...) => (foo ...)).repeated(); +``` + +Without `.repeated()`, a rule whose output happens to match its own query +simply fires once and stops. With `.repeated()`, the rule is allowed to +re-match indefinitely; the runner still enforces a global rewrite-depth +limit (currently 100) as a safety net against accidental cycles. + ## Query language Queries use a syntax inspired by diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index 46629e19840..0c66a0c8521 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -471,11 +471,29 @@ pub type Transform = Box< pub struct Rule { query: QueryNode, transform: Transform, + /// If true, after this rule fires on a node the engine will try to + /// re-apply this same rule on the result root. Defaults to false: + /// each rule fires at most once on a given node, which prevents + /// accidental loops where a rule's output matches its own query. + repeated: bool, } impl Rule { pub fn new(query: QueryNode, transform: Transform) -> Self { - Self { query, transform } + Self { + query, + transform, + repeated: false, + } + } + + /// Mark this rule as allowed to fire multiple times on the same node. + /// Use when the rule is intentionally iterative (its output may match + /// its own query). Without this, a rule fires at most once per node; + /// other rules can still fire on the result. + pub fn repeated(mut self) -> Self { + self.repeated = true; + self } fn try_rule( @@ -537,7 +555,7 @@ fn apply_rules( fresh: &tree_builder::FreshScope, ) -> Result, String> { let index = RuleIndex::new(rules); - apply_rules_inner(&index, ast, id, fresh, 0) + apply_rules_inner(&index, ast, id, fresh, 0, None) } fn apply_rules_inner( @@ -546,6 +564,7 @@ fn apply_rules_inner( id: Id, fresh: &tree_builder::FreshScope, rewrite_depth: usize, + skip_rule: Option<*const Rule>, ) -> Result, String> { if rewrite_depth > MAX_REWRITE_DEPTH { return Err(format!( @@ -556,7 +575,16 @@ fn apply_rules_inner( let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or(""); for rule in index.rules_for_kind(node_kind) { + let rule_ptr = *rule as *const Rule; + if Some(rule_ptr) == skip_rule { + continue; + } if let Some(result_node) = rule.try_rule(ast, id, fresh)? { + // For non-repeated rules, suppress further application of *this* + // rule on the result root, so a rule whose output matches its own + // query doesn't loop. Other rules and child traversal are + // unaffected. + let next_skip = if rule.repeated { None } else { Some(rule_ptr) }; let mut results = Vec::new(); for node in result_node { results.extend(apply_rules_inner( @@ -565,6 +593,7 @@ fn apply_rules_inner( node, fresh, rewrite_depth + 1, + next_skip, )?); } return Ok(results); @@ -579,13 +608,14 @@ fn apply_rules_inner( .collect(); // recursively descend into all the fields - // Child traversal does not increment rewrite depth + // Child traversal does not increment rewrite depth and starts fresh + // (no rule is skipped on child subtrees). let mut changed = false; let mut new_fields = BTreeMap::new(); for (field_id, children) in field_entries { let mut new_children = Vec::new(); for child_id in children { - let result = apply_rules_inner(index, ast, child_id, fresh, rewrite_depth)?; + let result = apply_rules_inner(index, ast, child_id, fresh, rewrite_depth, None)?; if result.len() != 1 || result[0] != child_id { changed = true; } diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs index e4485857bff..aebda65eae4 100644 --- a/shared/yeast/tests/test.rs +++ b/shared/yeast/tests/test.rs @@ -22,6 +22,18 @@ fn run_and_dump(input: &str, rules: Vec) -> String { dump_ast(&ast, ast.get_root(), input) } +/// Helper: like `run_and_dump`, but returns the runner error (if any) +/// instead of unwrapping. +fn run_and_get_error(input: &str, rules: Vec) -> String { + let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); + let schema = + yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); + let runner = Runner::with_schema(lang, &schema, &rules); + runner + .run(input) + .expect_err("expected runner to return an error") +} + /// Assert that a dump equals the expected string, treating the expected /// string as an indented multiline literal: leading/trailing blank lines /// are stripped, and the common leading indentation is removed from every @@ -382,6 +394,51 @@ fn test_chained_rules_output_only_kind() { ); } +// A rule that swaps `assignment.left` and `assignment.right`. Each +// application produces another `assignment` whose query the rule +// matches again, so without the once-per-node default it would loop. +fn swap_assignment_rule() -> Rule { + yeast::rule!( + (assignment + left: (_) @left + right: (_) @right + ) + => + (assignment + left: {right} + right: {left} + ) + ) +} + +#[test] +fn test_repeated_rule_hits_depth_limit() { + // With `.repeated()` the rule is allowed to fire on its own output, + // which cycles forever and trips the rewrite-depth safety net. + let err = run_and_get_error("x = 1", vec![swap_assignment_rule().repeated()]); + assert!( + err.contains("exceeded maximum rewrite depth"), + "expected depth-limit error, got: {err}" + ); +} + +#[test] +fn test_default_rule_fires_at_most_once_per_node() { + // Without `.repeated()` (the default), a rule fires at most once on a + // given node. The swap therefore happens exactly once and the desugaring + // terminates cleanly. + let dump = run_and_dump("x = 1", vec![swap_assignment_rule()]); + assert_dump_eq( + &dump, + r#" + program + assignment + left: integer "1" + right: identifier "x" + "#, + ); +} + // ---- Cursor tests ---- #[test]