mirror of
https://github.com/github/codeql.git
synced 2026-06-25 14:47:04 +02:00
yeast: Add macro for fine-grained rules
Adds `manual_rule!` which provides a more low-level interface for defining rewrites. (I'm not entirely sold on the name, so any suggestions would be welcome.) Notably, the captures bound in the body of such rules have _not_ been translated yet -- they still come from the _input_ tree. It is the user's duty to call ctx.translate on these (which has the effect of recursively invoking the translation) before substituting them into the output. For _truly_ low-level access, the user can still construct a Rule directly, but this is now somewhat cumbersome as the closure contained therein takes quite a few parameters. Still, the possibility remains.
This commit is contained in:
@@ -121,3 +121,37 @@ pub fn rule(input: TokenStream) -> TokenStream {
|
||||
Err(err) => err.to_compile_error().into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Define a desugaring rule whose transform is a hand-written Rust block.
|
||||
///
|
||||
/// Use `manual_rule!` when the transform needs control over capture
|
||||
/// translation timing — for example, when an outer rule needs to set
|
||||
/// state in `ctx` (the `BuildCtx`'s user context) before recursive
|
||||
/// translation reaches inner rules that read that state.
|
||||
///
|
||||
/// ```text
|
||||
/// manual_rule!(
|
||||
/// (query_pattern field: (_) @name)
|
||||
/// {
|
||||
/// // `ctx` is a `&mut BuildCtx<'_, C>`; capture variables
|
||||
/// // (`name: NodeRef`, etc.) are bound from the query.
|
||||
/// let translated = ctx.translate(name)?;
|
||||
/// Ok(translated)
|
||||
/// }
|
||||
/// )
|
||||
/// ```
|
||||
///
|
||||
/// Differences from [`rule!`]:
|
||||
/// - Captures are **not** auto-translated before the body runs; they
|
||||
/// refer to raw input-schema nodes. Use [`BuildCtx::translate`] (or
|
||||
/// [`BuildCtx::translate_opt`]) to translate them when you choose.
|
||||
/// - The body is plain Rust returning `Result<Vec<Id>, String>` — no
|
||||
/// tree template, no `Ok(...)` wrap.
|
||||
#[proc_macro]
|
||||
pub fn manual_rule(input: TokenStream) -> TokenStream {
|
||||
let input2: TokenStream2 = input.into();
|
||||
match parse::parse_manual_rule_top(input2) {
|
||||
Ok(output) => output.into(),
|
||||
Err(err) => err.to_compile_error().into(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -904,6 +904,106 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse `manual_rule!( query { body } )`.
|
||||
///
|
||||
/// Like [`parse_rule_top`] but:
|
||||
/// - Expects a Rust block `{ ... }` after the query (no `=>` arrow).
|
||||
/// - Generates code that does NOT auto-translate captures before
|
||||
/// running the body. Capture variables refer to raw (input-schema)
|
||||
/// nodes; the body is responsible for explicit translation via
|
||||
/// `ctx.translate(...)`.
|
||||
/// - The body is included verbatim and must evaluate to
|
||||
/// `Result<Vec<usize>, String>`.
|
||||
pub fn parse_manual_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
let mut tokens = input.into_iter().peekable();
|
||||
|
||||
// Collect query tokens up to the body block `{ ... }`.
|
||||
let mut query_tokens = Vec::new();
|
||||
loop {
|
||||
match tokens.peek() {
|
||||
None => {
|
||||
return Err(syn::Error::new(
|
||||
Span::call_site(),
|
||||
"expected a Rust block `{ ... }` after the query in manual_rule!",
|
||||
))
|
||||
}
|
||||
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => break,
|
||||
_ => {
|
||||
query_tokens.push(tokens.next().unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let query_stream: TokenStream = query_tokens.into_iter().collect();
|
||||
|
||||
// Extract captures from the query (same as in `rule!`).
|
||||
let captures = extract_captures(&query_stream);
|
||||
|
||||
// Parse the query into the QueryNode-building expression.
|
||||
let query_code = parse_query_top(query_stream)?;
|
||||
|
||||
// Generate capture bindings (same as in `rule!`).
|
||||
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
|
||||
let bindings: Vec<TokenStream> = captures
|
||||
.iter()
|
||||
.map(|cap| {
|
||||
let name = Ident::new(&cap.name, Span::call_site());
|
||||
let name_str = &cap.name;
|
||||
match cap.multiplicity {
|
||||
CaptureMultiplicity::Repeated => quote! {
|
||||
let #name: Vec<yeast::NodeRef> = __captures.get_all(#name_str)
|
||||
.into_iter()
|
||||
.map(yeast::NodeRef)
|
||||
.collect();
|
||||
},
|
||||
CaptureMultiplicity::Optional => quote! {
|
||||
let #name: Option<yeast::NodeRef> =
|
||||
__captures.get_opt(#name_str).map(yeast::NodeRef);
|
||||
},
|
||||
CaptureMultiplicity::Single => quote! {
|
||||
let #name: yeast::NodeRef =
|
||||
yeast::NodeRef(__captures.get_var(#name_str).unwrap());
|
||||
},
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Consume the body block.
|
||||
let body_group = match tokens.next() {
|
||||
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => g,
|
||||
other => {
|
||||
return Err(syn::Error::new(
|
||||
Span::call_site(),
|
||||
format!(
|
||||
"expected a Rust block `{{ ... }}` after the query in manual_rule!, found: {other:?}"
|
||||
),
|
||||
))
|
||||
}
|
||||
};
|
||||
let body_stream = body_group.stream();
|
||||
|
||||
// No tokens should follow the body.
|
||||
if let Some(tok) = tokens.next() {
|
||||
return Err(syn::Error::new_spanned(
|
||||
tok,
|
||||
"unexpected token after manual_rule! body",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(quote! {
|
||||
{
|
||||
let __query = #query_code;
|
||||
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
|
||||
// No auto-translate prefix for manual rules — the body
|
||||
// is responsible for translating captures explicitly.
|
||||
#(#bindings)*
|
||||
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
|
||||
#body_stream
|
||||
}))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Token utilities
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -176,15 +176,36 @@ impl<C: Clone> BuildCtx<'_, C> {
|
||||
/// (translation is not meaningful when input and output share a
|
||||
/// schema).
|
||||
///
|
||||
/// Accepts any value convertible to [`Id`] (including [`crate::NodeRef`]),
|
||||
/// so manual rules can pass capture bindings directly without unwrapping.
|
||||
///
|
||||
/// Errors if this `BuildCtx` was constructed by hand (without a
|
||||
/// translator handle) — for example, in unit tests that don't go
|
||||
/// through the rule driver.
|
||||
pub fn translate(&mut self, id: Id) -> Result<Vec<Id>, String> {
|
||||
pub fn translate<I: Into<Id>>(&mut self, id: I) -> Result<Vec<Id>, String> {
|
||||
let id = id.into();
|
||||
match &self.translator {
|
||||
Some(t) => t.translate(self.ast, self.user_ctx, id),
|
||||
None => Err("translate() called on a BuildCtx without a translator handle".into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Translate an optional capture, returning the first translated id or
|
||||
/// `None`. Convenience for `?`-quantifier captures (`Option<NodeRef>`).
|
||||
///
|
||||
/// If the underlying translation produces multiple ids for a single
|
||||
/// input, only the first is returned. For most use cases (e.g.
|
||||
/// translating a single type annotation) this is what you want; if
|
||||
/// you need all ids, use [`translate`] directly.
|
||||
pub fn translate_opt<I: Into<Id>>(
|
||||
&mut self,
|
||||
id: Option<I>,
|
||||
) -> Result<Option<Id>, String> {
|
||||
match id {
|
||||
Some(id) => Ok(self.translate(id)?.into_iter().next()),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<C> std::ops::Deref for BuildCtx<'_, C> {
|
||||
|
||||
@@ -16,7 +16,7 @@ pub mod schema;
|
||||
pub mod tree_builder;
|
||||
mod visitor;
|
||||
|
||||
pub use yeast_macros::{query, rule, tree, trees};
|
||||
pub use yeast_macros::{manual_rule, query, rule, tree, trees};
|
||||
|
||||
use captures::Captures;
|
||||
pub use cursor::Cursor;
|
||||
|
||||
Reference in New Issue
Block a user