diff --git a/shared/yeast/doc/yeast.md b/shared/yeast/doc/yeast.md index 4deb89d3058..5a6267fba0f 100644 --- a/shared/yeast/doc/yeast.md +++ b/shared/yeast/doc/yeast.md @@ -113,14 +113,24 @@ _ @anything // capture any node, named or unnamed The two wildcard forms `(_)` and bare `_` differ: - `(_)` matches only **named** nodes. When used as a positional pattern, - unnamed children (keywords, operators, punctuation) are skipped over to - find the next named child. + unnamed children (keywords, operators, punctuation) are skipped over. - Bare `_` matches **any** node, named or unnamed, taking whatever is next in the child list. -Similarly, named-kind patterns like `(call ...)` skip unnamed children; -unnamed-kind patterns like `("end")` or `"end"` consume the next child -unconditionally: +Bare child patterns are matched **forward-scan**: each pattern advances +through the iterator until it finds a child that matches, skipping +non-matching children along the way. So `(foo ("baz"))` against a `foo` +whose children are `[bar, baz]` succeeds — the matcher scans past `bar` +and matches `baz`. The iterator advances as it goes, so subsequent +patterns can never match children that appear earlier in source order +than already-matched ones. + +For named-only patterns (`(_)`, `(some_kind ...)`), the scan additionally +skips past unnamed tokens without trying to match them, since they can +never match anyway. + +Anchors (`.`) for forcing immediate adjacency, like in tree-sitter +queries, are not supported. ```rust (for diff --git a/shared/yeast/src/query.rs b/shared/yeast/src/query.rs index 710aaa7477d..01e5e22ad73 100644 --- a/shared/yeast/src/query.rs +++ b/shared/yeast/src/query.rs @@ -167,25 +167,28 @@ impl QueryListElem { } } QueryListElem::SingleNode(sub_query) => { - if sub_query.matches_named_only() { - // Skip unnamed children, matching tree-sitter semantics - // where (_) only matches named nodes. - loop { - match remaining_children.next() { - Some(child) => { - let node = ast.get_node(child).unwrap(); - if node.is_named() { - return sub_query.do_match(ast, child, matches); - } - // Skip unnamed child, continue to next - } - None => return Ok(false), + // Forward-scan semantics: advance through the iterator until + // we find a child that matches `sub_query`. Skip ahead past + // unnamed children when the sub-query is named-only (so they + // can never match anyway). On a match attempt that fails, + // restore the captures so partial captures from a complex + // sub-query don't leak. + let skip_unnamed = sub_query.matches_named_only(); + loop { + let Some(child) = remaining_children.next() else { + return Ok(false); + }; + if skip_unnamed { + let node = ast.get_node(child).unwrap(); + if !node.is_named() { + continue; } } - } else if let Some(child) = remaining_children.next() { - sub_query.do_match(ast, child, matches) - } else { - Ok(false) + let snapshot = matches.clone(); + if sub_query.do_match(ast, child, matches)? { + return Ok(true); + } + *matches = snapshot; } } } diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs index 594e4cb35bc..f7b363294bc 100644 --- a/shared/yeast/tests/test.rs +++ b/shared/yeast/tests/test.rs @@ -299,6 +299,58 @@ fn test_bare_forms_in_field_position() { assert!(!op.is_named()); } +#[test] +fn test_forward_scan_finds_unnamed_token_late() { + // The `do` named-wrapper node has three children in its implicit + // `child` field, in source order: `do` (unnamed kw), the body + // identifier, and `end` (unnamed kw). Forward-scan semantics let a + // query for `("end")` skip past the first two and match the third. + // Without forward-scan, the matcher took the first child unconditionally + // and failed. + let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let ast = runner.run("for x in list do\n y\nend").unwrap(); + + // Navigate: program > for > do (the body wrapper). + let mut cursor = AstCursor::new(&ast); + cursor.goto_first_child(); // for + cursor.goto_first_child(); // do (the body) + while cursor.node().kind() != "do" || !cursor.node().is_named() { + assert!(cursor.goto_next_sibling(), "expected to find named `do`"); + } + let do_id = cursor.node().id(); + + let query = yeast::query!((do ("end") @kw)); + let mut captures = yeast::captures::Captures::new(); + let matched = query.do_match(&ast, do_id, &mut captures).unwrap(); + assert!(matched, "forward-scan should find the `end` keyword"); + let kw = ast.get_node(captures.get_var("kw").unwrap()).unwrap(); + assert_eq!(kw.kind(), "end"); + assert!(!kw.is_named()); +} + +#[test] +fn test_forward_scan_preserves_order() { + // Bare patterns are scanned left-to-right and consume positions in + // order. A query for ("end") then ("do") should fail because `do` + // appears before `end` in the source order; once forward-scan has + // consumed `end`, the iterator is exhausted. + let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let ast = runner.run("for x in list do\n y\nend").unwrap(); + + let mut cursor = AstCursor::new(&ast); + cursor.goto_first_child(); + cursor.goto_first_child(); + while cursor.node().kind() != "do" || !cursor.node().is_named() { + assert!(cursor.goto_next_sibling(), "expected to find named `do`"); + } + let do_id = cursor.node().id(); + + let query = yeast::query!((do ("end") @first ("do") @second)); + let mut captures = yeast::captures::Captures::new(); + let matched = query.do_match(&ast, do_id, &mut captures).unwrap(); + assert!(!matched, "scan must not go backwards"); +} + // ---- Tree builder tests ---- #[test]