mirror of
https://github.com/github/codeql.git
synced 2026-05-14 11:19:27 +02:00
yeast: Forward-scan bare child patterns instead of strict positional
Previously, a bare child pattern in a query took whatever the next
child of the iterator was and either matched or failed: it would not
scan ahead to find a match. So `(foo ("baz"))` against a `foo` whose
implicit `child` field was `["bar", "baz"]` would fail (the pattern
took "bar" first).
Switch to forward-scan semantics: a SingleNode matcher advances through
the iterator until it finds a child that matches its sub-query. Patterns
that are named-only continue to skip past unnamed children for free.
Order is preserved across multiple bare patterns at the same level —
each pattern advances the shared iterator past whatever it consumed —
so a query cannot match children out of source order.
Captures from a failed match attempt are rolled back via a snapshot, so
partial captures from a complex sub-query do not leak across attempts.
Add two regression tests against the `do` body wrapper in a Ruby
for-loop, whose implicit `child` field contains [do, identifier, end]:
- a query for ("end") matches by skipping past `do` and the identifier
- a query for ("end") then ("do") fails, demonstrating order preservation
This commit is contained in:
@@ -113,14 +113,24 @@ _ @anything // capture any node, named or unnamed
|
||||
The two wildcard forms `(_)` and bare `_` differ:
|
||||
|
||||
- `(_)` matches only **named** nodes. When used as a positional pattern,
|
||||
unnamed children (keywords, operators, punctuation) are skipped over to
|
||||
find the next named child.
|
||||
unnamed children (keywords, operators, punctuation) are skipped over.
|
||||
- Bare `_` matches **any** node, named or unnamed, taking whatever is next
|
||||
in the child list.
|
||||
|
||||
Similarly, named-kind patterns like `(call ...)` skip unnamed children;
|
||||
unnamed-kind patterns like `("end")` or `"end"` consume the next child
|
||||
unconditionally:
|
||||
Bare child patterns are matched **forward-scan**: each pattern advances
|
||||
through the iterator until it finds a child that matches, skipping
|
||||
non-matching children along the way. So `(foo ("baz"))` against a `foo`
|
||||
whose children are `[bar, baz]` succeeds — the matcher scans past `bar`
|
||||
and matches `baz`. The iterator advances as it goes, so subsequent
|
||||
patterns can never match children that appear earlier in source order
|
||||
than already-matched ones.
|
||||
|
||||
For named-only patterns (`(_)`, `(some_kind ...)`), the scan additionally
|
||||
skips past unnamed tokens without trying to match them, since they can
|
||||
never match anyway.
|
||||
|
||||
Anchors (`.`) for forcing immediate adjacency, like in tree-sitter
|
||||
queries, are not supported.
|
||||
|
||||
```rust
|
||||
(for
|
||||
|
||||
@@ -167,25 +167,28 @@ impl QueryListElem {
|
||||
}
|
||||
}
|
||||
QueryListElem::SingleNode(sub_query) => {
|
||||
if sub_query.matches_named_only() {
|
||||
// Skip unnamed children, matching tree-sitter semantics
|
||||
// where (_) only matches named nodes.
|
||||
loop {
|
||||
match remaining_children.next() {
|
||||
Some(child) => {
|
||||
let node = ast.get_node(child).unwrap();
|
||||
if node.is_named() {
|
||||
return sub_query.do_match(ast, child, matches);
|
||||
}
|
||||
// Skip unnamed child, continue to next
|
||||
}
|
||||
None => return Ok(false),
|
||||
// Forward-scan semantics: advance through the iterator until
|
||||
// we find a child that matches `sub_query`. Skip ahead past
|
||||
// unnamed children when the sub-query is named-only (so they
|
||||
// can never match anyway). On a match attempt that fails,
|
||||
// restore the captures so partial captures from a complex
|
||||
// sub-query don't leak.
|
||||
let skip_unnamed = sub_query.matches_named_only();
|
||||
loop {
|
||||
let Some(child) = remaining_children.next() else {
|
||||
return Ok(false);
|
||||
};
|
||||
if skip_unnamed {
|
||||
let node = ast.get_node(child).unwrap();
|
||||
if !node.is_named() {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else if let Some(child) = remaining_children.next() {
|
||||
sub_query.do_match(ast, child, matches)
|
||||
} else {
|
||||
Ok(false)
|
||||
let snapshot = matches.clone();
|
||||
if sub_query.do_match(ast, child, matches)? {
|
||||
return Ok(true);
|
||||
}
|
||||
*matches = snapshot;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -299,6 +299,58 @@ fn test_bare_forms_in_field_position() {
|
||||
assert!(!op.is_named());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_forward_scan_finds_unnamed_token_late() {
|
||||
// The `do` named-wrapper node has three children in its implicit
|
||||
// `child` field, in source order: `do` (unnamed kw), the body
|
||||
// identifier, and `end` (unnamed kw). Forward-scan semantics let a
|
||||
// query for `("end")` skip past the first two and match the third.
|
||||
// Without forward-scan, the matcher took the first child unconditionally
|
||||
// and failed.
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("for x in list do\n y\nend").unwrap();
|
||||
|
||||
// Navigate: program > for > do (the body wrapper).
|
||||
let mut cursor = AstCursor::new(&ast);
|
||||
cursor.goto_first_child(); // for
|
||||
cursor.goto_first_child(); // do (the body)
|
||||
while cursor.node().kind() != "do" || !cursor.node().is_named() {
|
||||
assert!(cursor.goto_next_sibling(), "expected to find named `do`");
|
||||
}
|
||||
let do_id = cursor.node().id();
|
||||
|
||||
let query = yeast::query!((do ("end") @kw));
|
||||
let mut captures = yeast::captures::Captures::new();
|
||||
let matched = query.do_match(&ast, do_id, &mut captures).unwrap();
|
||||
assert!(matched, "forward-scan should find the `end` keyword");
|
||||
let kw = ast.get_node(captures.get_var("kw").unwrap()).unwrap();
|
||||
assert_eq!(kw.kind(), "end");
|
||||
assert!(!kw.is_named());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_forward_scan_preserves_order() {
|
||||
// Bare patterns are scanned left-to-right and consume positions in
|
||||
// order. A query for ("end") then ("do") should fail because `do`
|
||||
// appears before `end` in the source order; once forward-scan has
|
||||
// consumed `end`, the iterator is exhausted.
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("for x in list do\n y\nend").unwrap();
|
||||
|
||||
let mut cursor = AstCursor::new(&ast);
|
||||
cursor.goto_first_child();
|
||||
cursor.goto_first_child();
|
||||
while cursor.node().kind() != "do" || !cursor.node().is_named() {
|
||||
assert!(cursor.goto_next_sibling(), "expected to find named `do`");
|
||||
}
|
||||
let do_id = cursor.node().id();
|
||||
|
||||
let query = yeast::query!((do ("end") @first ("do") @second));
|
||||
let mut captures = yeast::captures::Captures::new();
|
||||
let matched = query.do_match(&ast, do_id, &mut captures).unwrap();
|
||||
assert!(!matched, "scan must not go backwards");
|
||||
}
|
||||
|
||||
// ---- Tree builder tests ----
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user