mirror of
https://github.com/github/codeql.git
synced 2026-06-26 15:17:06 +02:00
yeast: Add @@name raw-capture syntax to rule!
The `@@name` capture marker in `rule!` queries skips the auto-translate prefix for that specific capture, letting the body see the original capture (and thus delay its translation using `ctx.translate` until it becomes convenient). Regular `@name` captures continue to be auto-translated as before. Specifically these are translated _eagerly_, before the main body of the rewrite rule is run. I settled on `@@` as the syntax because it did not add new symbols that the user has to keep track of (it's still a kind of capture), but it's still visually distinct enough that the user should be able to tell that there's something special going on. In principle one could accidentally write one form of capture where the other was intended, but in practice this would result in code that did not compile (because the types would not match).
This commit is contained in:
@@ -22,10 +22,9 @@ pub fn parse_query_top(input: TokenStream) -> Result<TokenStream> {
|
||||
/// Parse a single query node (possibly with a trailing `@capture`).
|
||||
fn parse_query_node(tokens: &mut Tokens) -> Result<TokenStream> {
|
||||
let base = parse_query_atom(tokens)?;
|
||||
// Check for trailing @capture
|
||||
// Check for trailing @capture or @@capture
|
||||
if peek_is_at(tokens) {
|
||||
tokens.next(); // consume @
|
||||
let capture_name = expect_ident(tokens, "expected capture name after @")?;
|
||||
let capture_name = consume_capture_marker(tokens)?;
|
||||
let name_str = capture_name.to_string();
|
||||
Ok(quote! {
|
||||
yeast::query::QueryNode::Capture {
|
||||
@@ -159,8 +158,7 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
|
||||
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
|
||||
} else {
|
||||
let child = if peek_is_at(tokens) {
|
||||
tokens.next();
|
||||
let capture_name = expect_ident(tokens, "expected capture name after @")?;
|
||||
let capture_name = consume_capture_marker(tokens)?;
|
||||
let name_str = capture_name.to_string();
|
||||
quote! {
|
||||
yeast::query::QueryNode::Capture {
|
||||
@@ -650,6 +648,9 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
|
||||
struct CaptureInfo {
|
||||
name: String,
|
||||
multiplicity: CaptureMultiplicity,
|
||||
/// `true` for `@@name` captures: the auto-translate prefix skips them,
|
||||
/// so the bound `NodeRef` refers to the raw (input-schema) node.
|
||||
raw: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq)]
|
||||
@@ -708,6 +709,14 @@ fn extract_captures_inner(
|
||||
extract_captures_inner(&mut inner, captures, child_mult);
|
||||
}
|
||||
TokenTree::Punct(p) if p.as_char() == '@' => {
|
||||
// `@@name` marks the capture as raw (skip auto-translate).
|
||||
let raw = matches!(
|
||||
tokens.peek(),
|
||||
Some(TokenTree::Punct(p)) if p.as_char() == '@'
|
||||
);
|
||||
if raw {
|
||||
tokens.next(); // consume the second `@`
|
||||
}
|
||||
if let Some(TokenTree::Ident(name)) = tokens.next() {
|
||||
let mult = if parent_mult == CaptureMultiplicity::Repeated
|
||||
|| last_mult == CaptureMultiplicity::Repeated
|
||||
@@ -723,6 +732,7 @@ fn extract_captures_inner(
|
||||
captures.push(CaptureInfo {
|
||||
name: name.to_string(),
|
||||
multiplicity: mult,
|
||||
raw,
|
||||
});
|
||||
}
|
||||
last_mult = CaptureMultiplicity::Single;
|
||||
@@ -776,6 +786,14 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
// Parse query
|
||||
let query_code = parse_query_top(query_stream.clone())?;
|
||||
|
||||
// Capture names marked `@@name` (raw) — passed to the auto-translate
|
||||
// prefix as a skip list so those captures keep their input-schema ids.
|
||||
let raw_capture_names: Vec<&str> = captures
|
||||
.iter()
|
||||
.filter(|c| c.raw)
|
||||
.map(|c| c.name.as_str())
|
||||
.collect();
|
||||
|
||||
// Generate capture bindings
|
||||
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
|
||||
let bindings: Vec<TokenStream> = captures
|
||||
@@ -891,11 +909,14 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
let __query = #query_code;
|
||||
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
|
||||
// Auto-translation prefix: recursively translate every
|
||||
// captured node before invoking the user's transform body.
|
||||
// captured node before invoking the user's transform body,
|
||||
// except for `@@name` captures listed in `__skip` which the
|
||||
// body consumes raw.
|
||||
// For OneShot rules this preserves the legacy behaviour
|
||||
// (input-schema captures translated to output-schema
|
||||
// nodes); for Repeating rules it is a no-op.
|
||||
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx)?;
|
||||
let __skip: &[&str] = &[#(#raw_capture_names),*];
|
||||
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx, __skip)?;
|
||||
#(#bindings)*
|
||||
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
|
||||
let __result: Vec<usize> = { #transform_body };
|
||||
@@ -1013,6 +1034,16 @@ fn peek_is_at(tokens: &mut Tokens) -> bool {
|
||||
matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '@')
|
||||
}
|
||||
|
||||
/// Consume an `@` or `@@` capture marker and the following name ident.
|
||||
/// Caller has already verified `peek_is_at(tokens)`.
|
||||
fn consume_capture_marker(tokens: &mut Tokens) -> Result<Ident> {
|
||||
tokens.next(); // consume the first `@`
|
||||
if peek_is_at(tokens) {
|
||||
tokens.next(); // consume the second `@` of `@@`
|
||||
}
|
||||
expect_ident(tokens, "expected capture name after `@` or `@@`")
|
||||
}
|
||||
|
||||
fn peek_is_literal(tokens: &mut Tokens) -> bool {
|
||||
matches!(tokens.peek(), Some(TokenTree::Literal(_)))
|
||||
}
|
||||
@@ -1113,8 +1144,7 @@ fn expect_repetition(tokens: &mut Tokens) -> Result<TokenStream> {
|
||||
|
||||
fn maybe_wrap_capture(tokens: &mut Tokens, base: TokenStream) -> Result<TokenStream> {
|
||||
if peek_is_at(tokens) {
|
||||
tokens.next(); // consume @
|
||||
let name = expect_ident(tokens, "expected capture name after @")?;
|
||||
let name = consume_capture_marker(tokens)?;
|
||||
let name_str = name.to_string();
|
||||
Ok(quote! {
|
||||
yeast::query::QueryNode::Capture {
|
||||
@@ -1141,13 +1171,12 @@ fn maybe_wrap_repetition(tokens: &mut Tokens, single: TokenStream) -> Result<Tok
|
||||
}
|
||||
}
|
||||
|
||||
/// If `@name` follows a Repeated list element, wrap each child SingleNode
|
||||
/// inside the repetition with a Capture. This matches tree-sitter semantics
|
||||
/// where `(_)* @name` captures each matched node.
|
||||
/// If `@name` (or `@@name`) follows a Repeated list element, wrap each
|
||||
/// child SingleNode inside the repetition with a Capture. This matches
|
||||
/// tree-sitter semantics where `(_)* @name` captures each matched node.
|
||||
fn maybe_wrap_list_capture(tokens: &mut Tokens, elem: TokenStream) -> Result<TokenStream> {
|
||||
if peek_is_at(tokens) {
|
||||
tokens.next();
|
||||
let name = expect_ident(tokens, "expected capture name after @")?;
|
||||
let name = consume_capture_marker(tokens)?;
|
||||
let name_str = name.to_string();
|
||||
// Re-parse the element isn't practical, so we generate a wrapper
|
||||
// that creates a new Repeated with each child wrapped in a capture.
|
||||
|
||||
@@ -292,6 +292,37 @@ Inside `rule!`, captures are Rust variables, so `{name}` inserts a
|
||||
single capture (`Id`) and `{..name}` splices a repeated capture
|
||||
(`Vec<Id>`).
|
||||
|
||||
### Raw captures (`@@name`)
|
||||
|
||||
The default `@name` capture marker is *auto-translated*: in OneShot
|
||||
phases the macro recursively translates the captured node before
|
||||
binding it, so `{name}` in the output template splices a node that
|
||||
already conforms to the output schema.
|
||||
|
||||
For rules that need the raw (input-schema) capture — typically to read
|
||||
its source text or to translate it explicitly with mutable context
|
||||
state between calls — use `@@name` instead. The body sees the original
|
||||
input-schema `NodeRef`:
|
||||
|
||||
```rust
|
||||
yeast::rule!(
|
||||
(assignment left: (_) @@raw_lhs right: (_) @rhs)
|
||||
=>
|
||||
{
|
||||
// raw_lhs is untranslated: read its original source text.
|
||||
let text = ctx.ast.source_text(raw_lhs.into());
|
||||
// rhs is already translated by the auto-translate prefix.
|
||||
tree!((call
|
||||
method: (identifier #{text.as_str()})
|
||||
receiver: {rhs}))
|
||||
}
|
||||
);
|
||||
```
|
||||
|
||||
Mix `@` and `@@` freely in the same rule. In a Repeating phase both
|
||||
markers are equivalent (auto-translation is a no-op for repeating
|
||||
rules).
|
||||
|
||||
## Complete example: for-loop desugaring
|
||||
|
||||
This rule rewrites Ruby's `for pat in val do body end` into
|
||||
|
||||
@@ -80,6 +80,28 @@ impl Captures {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Like [`try_map_all_captures`] but leaves captures whose name appears
|
||||
/// in `skip` untouched. Used by the `rule!` macro to support `@@name`
|
||||
/// (raw) captures alongside the default auto-translated `@name`
|
||||
/// captures.
|
||||
pub fn try_map_captures_except<E>(
|
||||
&mut self,
|
||||
skip: &[&str],
|
||||
mut f: impl FnMut(Id) -> Result<Vec<Id>, E>,
|
||||
) -> Result<(), E> {
|
||||
for (name, ids) in self.captures.iter_mut() {
|
||||
if skip.contains(name) {
|
||||
continue;
|
||||
}
|
||||
let mut new_ids = Vec::with_capacity(ids.len());
|
||||
for &id in ids.iter() {
|
||||
new_ids.extend(f(id)?);
|
||||
}
|
||||
*ids = new_ids;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) {
|
||||
if let Some(from_ids) = self.captures.get(from) {
|
||||
let new_values = from_ids.iter().copied().map(f).collect();
|
||||
|
||||
@@ -757,13 +757,14 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> {
|
||||
}
|
||||
|
||||
/// Translate every captured node in `captures` in place (OneShot phase
|
||||
/// only). In a Repeating phase this is a no-op — Repeating rules
|
||||
/// receive raw captures.
|
||||
/// only), except for captures whose name appears in `skip` — those are
|
||||
/// left as raw (input-schema) ids for the rule body to consume
|
||||
/// directly. In a Repeating phase this is a no-op — Repeating rules
|
||||
/// receive raw captures regardless of `skip`.
|
||||
///
|
||||
/// Used by the `rule!` macro's generated prefix to preserve the
|
||||
/// pre-existing "auto-translate captures before running the transform
|
||||
/// body" behavior. Manually-written transforms typically translate
|
||||
/// captures selectively via [`translate`] instead.
|
||||
/// Used by the `rule!` macro's generated prefix. `skip` is populated
|
||||
/// from the macro's `@@name` capture markers; for plain `@name`
|
||||
/// captures (and rules with no `@@` markers) it is empty.
|
||||
///
|
||||
/// To avoid infinite recursion, a capture whose id matches the rule's
|
||||
/// matched root (e.g. from a `(_) @_` pattern) is left unchanged.
|
||||
@@ -772,11 +773,12 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> {
|
||||
captures: &mut Captures,
|
||||
ast: &mut Ast,
|
||||
user_ctx: &mut C,
|
||||
skip: &[&str],
|
||||
) -> Result<(), String> {
|
||||
match &self.inner {
|
||||
TranslatorImpl::OneShot { matched_root, .. } => {
|
||||
let root = *matched_root;
|
||||
captures.try_map_all_captures(|cid| {
|
||||
captures.try_map_captures_except(skip, |cid| {
|
||||
if cid == root {
|
||||
Ok(vec![cid])
|
||||
} else {
|
||||
|
||||
@@ -1058,6 +1058,110 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
|
||||
);
|
||||
}
|
||||
|
||||
/// Verify that `@@name` capture markers skip the auto-translate prefix:
|
||||
/// the body sees the *raw* (input-schema) NodeRef and can read its
|
||||
/// source text or call `ctx.translate(...)` explicitly. Compare with
|
||||
/// the bare `@name` form, where the auto-translate prefix runs the
|
||||
/// same translation up front and the body sees the post-translate id.
|
||||
#[test]
|
||||
fn test_raw_capture_marker() {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let rules: Vec<Rule> = vec![
|
||||
yeast::rule!(
|
||||
(program (_)* @stmts)
|
||||
=>
|
||||
(program stmt: {..stmts})
|
||||
),
|
||||
// `@@raw_lhs` is untranslated: the body reads its source text
|
||||
// ("x") and embeds it directly as the identifier content. `@rhs`
|
||||
// is auto-translated (rhs already points to (integer "INT")).
|
||||
yeast::rule!(
|
||||
(assignment left: (_) @@raw_lhs right: (_) @rhs)
|
||||
=>
|
||||
{
|
||||
let text = ctx.ast.source_text(raw_lhs.into());
|
||||
tree!((call
|
||||
method: (identifier #{text.as_str()})
|
||||
receiver: {rhs}))
|
||||
}
|
||||
),
|
||||
yeast::rule!((identifier) => (identifier "ID")),
|
||||
yeast::rule!((integer) => (integer "INT")),
|
||||
];
|
||||
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let input = "x = 1";
|
||||
let ast = runner.run(input).unwrap();
|
||||
let dump = dump_ast(&ast, ast.get_root(), input);
|
||||
// `method:` uses the raw source text ("x"); if `@@` were broken and
|
||||
// auto-translation ran on `raw_lhs`, it would still produce the
|
||||
// string "x" (source_text inherits the input range), so the dump
|
||||
// wouldn't change. Add a second assertion: explicitly translating
|
||||
// the raw NodeRef inside the body must succeed and produce
|
||||
// `(identifier "ID")`.
|
||||
assert_dump_eq(
|
||||
&dump,
|
||||
r#"
|
||||
program
|
||||
stmt:
|
||||
call
|
||||
method: identifier "x"
|
||||
receiver: integer "INT"
|
||||
"#,
|
||||
);
|
||||
}
|
||||
|
||||
/// Companion to `test_raw_capture_marker`: confirms that calling
|
||||
/// `ctx.translate(raw)` on a `@@`-captured NodeRef from the rule body
|
||||
/// produces the correctly-translated output-schema node. With `@`, the
|
||||
/// translation has already happened, so `ctx.translate(...)` inside the
|
||||
/// body would attempt to re-translate an output node (which has no
|
||||
/// matching rule and would error).
|
||||
#[test]
|
||||
fn test_raw_capture_marker_explicit_translate() {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let rules: Vec<Rule> = vec![
|
||||
yeast::rule!(
|
||||
(program (_)* @stmts)
|
||||
=>
|
||||
(program stmt: {..stmts})
|
||||
),
|
||||
yeast::rule!(
|
||||
(assignment left: (_) @@raw_lhs right: (_) @rhs)
|
||||
=>
|
||||
{
|
||||
let translated_lhs = ctx.translate(raw_lhs)?;
|
||||
tree!((call
|
||||
method: {..translated_lhs}
|
||||
receiver: {rhs}))
|
||||
}
|
||||
),
|
||||
yeast::rule!((identifier) => (identifier "ID")),
|
||||
yeast::rule!((integer) => (integer "INT")),
|
||||
];
|
||||
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let input = "x = 1";
|
||||
let ast = runner.run(input).unwrap();
|
||||
let dump = dump_ast(&ast, ast.get_root(), input);
|
||||
assert_dump_eq(
|
||||
&dump,
|
||||
r#"
|
||||
program
|
||||
stmt:
|
||||
call
|
||||
method: identifier "ID"
|
||||
receiver: integer "INT"
|
||||
"#,
|
||||
);
|
||||
}
|
||||
|
||||
// ---- Cursor tests ----
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user