mirror of
https://github.com/github/codeql.git
synced 2026-05-14 11:19:27 +02:00
yeast: Add per-rule .repeated() flag to opt into iterative matching
Previously, after a rule fired the engine would always re-try that same rule on the result root. A rule whose output matched its own query (intentionally or by accident) would loop until the global MAX_REWRITE_DEPTH safety net kicked in. Make the default behavior fire-once-per-node: after a rule fires on node N, the engine no longer tries that same rule on the result root. Other rules and child traversal are unaffected. Rules that intentionally rewrite iteratively can opt into the old behavior via the new Rule::repeated() builder method. Add two regression tests using a self-swapping assignment rule: - with .repeated(), the swap loops and trips the depth limit - without it (default), the swap fires once and terminates
This commit is contained in:
@@ -61,6 +61,22 @@ rule matches, the node is kept and its children are processed recursively.
|
||||
A rule can replace one node with zero nodes (deletion), one node (rewriting),
|
||||
or multiple nodes (expansion).
|
||||
|
||||
By default a rule fires **at most once on a given node**: after firing, the
|
||||
engine will not re-try that same rule on the result root. Other rules may
|
||||
still fire on the result, and the rule may still fire on different nodes
|
||||
(including the result's children). To opt into iterative behaviour — when a
|
||||
rule's output is intentionally re-matched by the same rule — call
|
||||
`.repeated()` on the constructed `Rule`:
|
||||
|
||||
```rust
|
||||
let r = yeast::rule!((foo ...) => (foo ...)).repeated();
|
||||
```
|
||||
|
||||
Without `.repeated()`, a rule whose output happens to match its own query
|
||||
simply fires once and stops. With `.repeated()`, the rule is allowed to
|
||||
re-match indefinitely; the runner still enforces a global rewrite-depth
|
||||
limit (currently 100) as a safety net against accidental cycles.
|
||||
|
||||
## Query language
|
||||
|
||||
Queries use a syntax inspired by
|
||||
|
||||
@@ -471,11 +471,29 @@ pub type Transform = Box<
|
||||
pub struct Rule {
|
||||
query: QueryNode,
|
||||
transform: Transform,
|
||||
/// If true, after this rule fires on a node the engine will try to
|
||||
/// re-apply this same rule on the result root. Defaults to false:
|
||||
/// each rule fires at most once on a given node, which prevents
|
||||
/// accidental loops where a rule's output matches its own query.
|
||||
repeated: bool,
|
||||
}
|
||||
|
||||
impl Rule {
|
||||
pub fn new(query: QueryNode, transform: Transform) -> Self {
|
||||
Self { query, transform }
|
||||
Self {
|
||||
query,
|
||||
transform,
|
||||
repeated: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Mark this rule as allowed to fire multiple times on the same node.
|
||||
/// Use when the rule is intentionally iterative (its output may match
|
||||
/// its own query). Without this, a rule fires at most once per node;
|
||||
/// other rules can still fire on the result.
|
||||
pub fn repeated(mut self) -> Self {
|
||||
self.repeated = true;
|
||||
self
|
||||
}
|
||||
|
||||
fn try_rule(
|
||||
@@ -537,7 +555,7 @@ fn apply_rules(
|
||||
fresh: &tree_builder::FreshScope,
|
||||
) -> Result<Vec<Id>, String> {
|
||||
let index = RuleIndex::new(rules);
|
||||
apply_rules_inner(&index, ast, id, fresh, 0)
|
||||
apply_rules_inner(&index, ast, id, fresh, 0, None)
|
||||
}
|
||||
|
||||
fn apply_rules_inner(
|
||||
@@ -546,6 +564,7 @@ fn apply_rules_inner(
|
||||
id: Id,
|
||||
fresh: &tree_builder::FreshScope,
|
||||
rewrite_depth: usize,
|
||||
skip_rule: Option<*const Rule>,
|
||||
) -> Result<Vec<Id>, String> {
|
||||
if rewrite_depth > MAX_REWRITE_DEPTH {
|
||||
return Err(format!(
|
||||
@@ -556,7 +575,16 @@ fn apply_rules_inner(
|
||||
|
||||
let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or("");
|
||||
for rule in index.rules_for_kind(node_kind) {
|
||||
let rule_ptr = *rule as *const Rule;
|
||||
if Some(rule_ptr) == skip_rule {
|
||||
continue;
|
||||
}
|
||||
if let Some(result_node) = rule.try_rule(ast, id, fresh)? {
|
||||
// For non-repeated rules, suppress further application of *this*
|
||||
// rule on the result root, so a rule whose output matches its own
|
||||
// query doesn't loop. Other rules and child traversal are
|
||||
// unaffected.
|
||||
let next_skip = if rule.repeated { None } else { Some(rule_ptr) };
|
||||
let mut results = Vec::new();
|
||||
for node in result_node {
|
||||
results.extend(apply_rules_inner(
|
||||
@@ -565,6 +593,7 @@ fn apply_rules_inner(
|
||||
node,
|
||||
fresh,
|
||||
rewrite_depth + 1,
|
||||
next_skip,
|
||||
)?);
|
||||
}
|
||||
return Ok(results);
|
||||
@@ -579,13 +608,14 @@ fn apply_rules_inner(
|
||||
.collect();
|
||||
|
||||
// recursively descend into all the fields
|
||||
// Child traversal does not increment rewrite depth
|
||||
// Child traversal does not increment rewrite depth and starts fresh
|
||||
// (no rule is skipped on child subtrees).
|
||||
let mut changed = false;
|
||||
let mut new_fields = BTreeMap::new();
|
||||
for (field_id, children) in field_entries {
|
||||
let mut new_children = Vec::new();
|
||||
for child_id in children {
|
||||
let result = apply_rules_inner(index, ast, child_id, fresh, rewrite_depth)?;
|
||||
let result = apply_rules_inner(index, ast, child_id, fresh, rewrite_depth, None)?;
|
||||
if result.len() != 1 || result[0] != child_id {
|
||||
changed = true;
|
||||
}
|
||||
|
||||
@@ -22,6 +22,18 @@ fn run_and_dump(input: &str, rules: Vec<Rule>) -> String {
|
||||
dump_ast(&ast, ast.get_root(), input)
|
||||
}
|
||||
|
||||
/// Helper: like `run_and_dump`, but returns the runner error (if any)
|
||||
/// instead of unwrapping.
|
||||
fn run_and_get_error(input: &str, rules: Vec<Rule>) -> String {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let runner = Runner::with_schema(lang, &schema, &rules);
|
||||
runner
|
||||
.run(input)
|
||||
.expect_err("expected runner to return an error")
|
||||
}
|
||||
|
||||
/// Assert that a dump equals the expected string, treating the expected
|
||||
/// string as an indented multiline literal: leading/trailing blank lines
|
||||
/// are stripped, and the common leading indentation is removed from every
|
||||
@@ -382,6 +394,51 @@ fn test_chained_rules_output_only_kind() {
|
||||
);
|
||||
}
|
||||
|
||||
// A rule that swaps `assignment.left` and `assignment.right`. Each
|
||||
// application produces another `assignment` whose query the rule
|
||||
// matches again, so without the once-per-node default it would loop.
|
||||
fn swap_assignment_rule() -> Rule {
|
||||
yeast::rule!(
|
||||
(assignment
|
||||
left: (_) @left
|
||||
right: (_) @right
|
||||
)
|
||||
=>
|
||||
(assignment
|
||||
left: {right}
|
||||
right: {left}
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_repeated_rule_hits_depth_limit() {
|
||||
// With `.repeated()` the rule is allowed to fire on its own output,
|
||||
// which cycles forever and trips the rewrite-depth safety net.
|
||||
let err = run_and_get_error("x = 1", vec![swap_assignment_rule().repeated()]);
|
||||
assert!(
|
||||
err.contains("exceeded maximum rewrite depth"),
|
||||
"expected depth-limit error, got: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_rule_fires_at_most_once_per_node() {
|
||||
// Without `.repeated()` (the default), a rule fires at most once on a
|
||||
// given node. The swap therefore happens exactly once and the desugaring
|
||||
// terminates cleanly.
|
||||
let dump = run_and_dump("x = 1", vec![swap_assignment_rule()]);
|
||||
assert_dump_eq(
|
||||
&dump,
|
||||
r#"
|
||||
program
|
||||
assignment
|
||||
left: integer "1"
|
||||
right: identifier "x"
|
||||
"#,
|
||||
);
|
||||
}
|
||||
|
||||
// ---- Cursor tests ----
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user