From 9a9483697445b4ab52203a7d280500131466ea50 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 6 May 2026 12:33:18 +0000 Subject: [PATCH] yeast: Add per-rule .repeated() flag to opt into iterative matching Previously, after a rule fired the engine would always re-try that same rule on the result root. A rule whose output matched its own query (intentionally or by accident) would loop until the global MAX_REWRITE_DEPTH safety net kicked in. Make the default behavior fire-once-per-node: after a rule fires on node N, the engine no longer tries that same rule on the result root. Other rules and child traversal are unaffected. Rules that intentionally rewrite iteratively can opt into the old behavior via the new Rule::repeated() builder method. Add two regression tests using a self-swapping assignment rule: - with .repeated(), the swap loops and trips the depth limit - without it (default), the swap fires once and terminates --- shared/yeast/doc/yeast.md | 16 +++++++++++ shared/yeast/src/lib.rs | 38 ++++++++++++++++++++++--- shared/yeast/tests/test.rs | 57 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 4 deletions(-) diff --git a/shared/yeast/doc/yeast.md b/shared/yeast/doc/yeast.md index d49ff96f11d..45e0006f909 100644 --- a/shared/yeast/doc/yeast.md +++ b/shared/yeast/doc/yeast.md @@ -61,6 +61,22 @@ rule matches, the node is kept and its children are processed recursively. A rule can replace one node with zero nodes (deletion), one node (rewriting), or multiple nodes (expansion). +By default a rule fires **at most once on a given node**: after firing, the +engine will not re-try that same rule on the result root. Other rules may +still fire on the result, and the rule may still fire on different nodes +(including the result's children). To opt into iterative behaviour — when a +rule's output is intentionally re-matched by the same rule — call +`.repeated()` on the constructed `Rule`: + +```rust +let r = yeast::rule!((foo ...) => (foo ...)).repeated(); +``` + +Without `.repeated()`, a rule whose output happens to match its own query +simply fires once and stops. With `.repeated()`, the rule is allowed to +re-match indefinitely; the runner still enforces a global rewrite-depth +limit (currently 100) as a safety net against accidental cycles. + ## Query language Queries use a syntax inspired by diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index 46629e19840..0c66a0c8521 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -471,11 +471,29 @@ pub type Transform = Box< pub struct Rule { query: QueryNode, transform: Transform, + /// If true, after this rule fires on a node the engine will try to + /// re-apply this same rule on the result root. Defaults to false: + /// each rule fires at most once on a given node, which prevents + /// accidental loops where a rule's output matches its own query. + repeated: bool, } impl Rule { pub fn new(query: QueryNode, transform: Transform) -> Self { - Self { query, transform } + Self { + query, + transform, + repeated: false, + } + } + + /// Mark this rule as allowed to fire multiple times on the same node. + /// Use when the rule is intentionally iterative (its output may match + /// its own query). Without this, a rule fires at most once per node; + /// other rules can still fire on the result. + pub fn repeated(mut self) -> Self { + self.repeated = true; + self } fn try_rule( @@ -537,7 +555,7 @@ fn apply_rules( fresh: &tree_builder::FreshScope, ) -> Result, String> { let index = RuleIndex::new(rules); - apply_rules_inner(&index, ast, id, fresh, 0) + apply_rules_inner(&index, ast, id, fresh, 0, None) } fn apply_rules_inner( @@ -546,6 +564,7 @@ fn apply_rules_inner( id: Id, fresh: &tree_builder::FreshScope, rewrite_depth: usize, + skip_rule: Option<*const Rule>, ) -> Result, String> { if rewrite_depth > MAX_REWRITE_DEPTH { return Err(format!( @@ -556,7 +575,16 @@ fn apply_rules_inner( let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or(""); for rule in index.rules_for_kind(node_kind) { + let rule_ptr = *rule as *const Rule; + if Some(rule_ptr) == skip_rule { + continue; + } if let Some(result_node) = rule.try_rule(ast, id, fresh)? { + // For non-repeated rules, suppress further application of *this* + // rule on the result root, so a rule whose output matches its own + // query doesn't loop. Other rules and child traversal are + // unaffected. + let next_skip = if rule.repeated { None } else { Some(rule_ptr) }; let mut results = Vec::new(); for node in result_node { results.extend(apply_rules_inner( @@ -565,6 +593,7 @@ fn apply_rules_inner( node, fresh, rewrite_depth + 1, + next_skip, )?); } return Ok(results); @@ -579,13 +608,14 @@ fn apply_rules_inner( .collect(); // recursively descend into all the fields - // Child traversal does not increment rewrite depth + // Child traversal does not increment rewrite depth and starts fresh + // (no rule is skipped on child subtrees). let mut changed = false; let mut new_fields = BTreeMap::new(); for (field_id, children) in field_entries { let mut new_children = Vec::new(); for child_id in children { - let result = apply_rules_inner(index, ast, child_id, fresh, rewrite_depth)?; + let result = apply_rules_inner(index, ast, child_id, fresh, rewrite_depth, None)?; if result.len() != 1 || result[0] != child_id { changed = true; } diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs index e4485857bff..aebda65eae4 100644 --- a/shared/yeast/tests/test.rs +++ b/shared/yeast/tests/test.rs @@ -22,6 +22,18 @@ fn run_and_dump(input: &str, rules: Vec) -> String { dump_ast(&ast, ast.get_root(), input) } +/// Helper: like `run_and_dump`, but returns the runner error (if any) +/// instead of unwrapping. +fn run_and_get_error(input: &str, rules: Vec) -> String { + let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); + let schema = + yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); + let runner = Runner::with_schema(lang, &schema, &rules); + runner + .run(input) + .expect_err("expected runner to return an error") +} + /// Assert that a dump equals the expected string, treating the expected /// string as an indented multiline literal: leading/trailing blank lines /// are stripped, and the common leading indentation is removed from every @@ -382,6 +394,51 @@ fn test_chained_rules_output_only_kind() { ); } +// A rule that swaps `assignment.left` and `assignment.right`. Each +// application produces another `assignment` whose query the rule +// matches again, so without the once-per-node default it would loop. +fn swap_assignment_rule() -> Rule { + yeast::rule!( + (assignment + left: (_) @left + right: (_) @right + ) + => + (assignment + left: {right} + right: {left} + ) + ) +} + +#[test] +fn test_repeated_rule_hits_depth_limit() { + // With `.repeated()` the rule is allowed to fire on its own output, + // which cycles forever and trips the rewrite-depth safety net. + let err = run_and_get_error("x = 1", vec![swap_assignment_rule().repeated()]); + assert!( + err.contains("exceeded maximum rewrite depth"), + "expected depth-limit error, got: {err}" + ); +} + +#[test] +fn test_default_rule_fires_at_most_once_per_node() { + // Without `.repeated()` (the default), a rule fires at most once on a + // given node. The swap therefore happens exactly once and the desugaring + // terminates cleanly. + let dump = run_and_dump("x = 1", vec![swap_assignment_rule()]); + assert_dump_eq( + &dump, + r#" + program + assignment + left: integer "1" + right: identifier "x" + "#, + ); +} + // ---- Cursor tests ---- #[test]