mirror of
https://github.com/github/codeql.git
synced 2026-06-30 09:05:28 +02:00
Merge pull request #22054 from github/tausbn/yeast-context-reification
This commit is contained in:
@@ -280,10 +280,11 @@ pub fn location_label(writer: &mut trap::Writer, location: trap::Location) -> tr
|
||||
}
|
||||
|
||||
/// Extracts the source file at `path`, which is assumed to be canonicalized.
|
||||
/// When `yeast_runner` is `Some`, the parsed tree is first transformed
|
||||
/// through the supplied yeast `Runner` before TRAP extraction. Building the
|
||||
/// `Runner` (which parses YAML and constructs the schema) is the caller's
|
||||
/// responsibility, allowing it to be done once and shared across files.
|
||||
/// When `desugarer` is `Some`, the parsed tree is first transformed
|
||||
/// through the supplied yeast desugarer before TRAP extraction. Building
|
||||
/// the desugarer (which parses YAML and constructs the schema) is the
|
||||
/// caller's responsibility, allowing it to be done once and shared across
|
||||
/// files.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn extract(
|
||||
language: &Language,
|
||||
@@ -295,7 +296,7 @@ pub fn extract(
|
||||
path: &Path,
|
||||
source: &[u8],
|
||||
ranges: &[Range],
|
||||
yeast_runner: Option<&yeast::Runner<'_>>,
|
||||
desugarer: Option<&dyn yeast::Desugarer>,
|
||||
) {
|
||||
let path_str = file_paths::normalize_and_transform_path(path, transformer);
|
||||
let source_root = std::env::current_dir()
|
||||
@@ -328,8 +329,8 @@ pub fn extract(
|
||||
schema,
|
||||
);
|
||||
|
||||
if let Some(yeast_runner) = yeast_runner {
|
||||
let ast = yeast_runner
|
||||
if let Some(desugarer) = desugarer {
|
||||
let ast = desugarer
|
||||
.run_from_tree(&tree, source)
|
||||
.unwrap_or_else(|e| panic!("Desugaring failed for {path_str}: {e}"));
|
||||
traverse_yeast(&ast, &mut visitor);
|
||||
|
||||
@@ -13,11 +13,14 @@ pub struct LanguageSpec {
|
||||
pub prefix: &'static str,
|
||||
pub ts_language: tree_sitter::Language,
|
||||
pub node_types: &'static str,
|
||||
/// Optional yeast desugaring configuration. When set, the parsed
|
||||
/// tree is rewritten through yeast before TRAP extraction. The
|
||||
/// config's `output_node_types_yaml` (if set) provides the schema
|
||||
/// used both at runtime (for the rewriter) and for TRAP validation.
|
||||
pub desugar: Option<yeast::DesugaringConfig>,
|
||||
/// Optional desugarer. When set, the parsed tree is rewritten through
|
||||
/// the desugarer before TRAP extraction. The desugarer's
|
||||
/// `output_node_types_yaml()` (if set) provides the schema used both
|
||||
/// at runtime (for the rewriter) and for TRAP validation.
|
||||
///
|
||||
/// `Box<dyn yeast::Desugarer>` so the shared extractor is agnostic to
|
||||
/// the user-defined context type the desugarer uses internally.
|
||||
pub desugar: Option<Box<dyn yeast::Desugarer>>,
|
||||
pub file_globs: Vec<String>,
|
||||
}
|
||||
|
||||
@@ -91,35 +94,22 @@ impl Extractor {
|
||||
.collect();
|
||||
|
||||
let mut schemas = vec![];
|
||||
let mut yeast_runners = Vec::new();
|
||||
for lang in &self.languages {
|
||||
let effective_node_types: String =
|
||||
match lang.desugar.as_ref().and_then(|c| c.output_node_types_yaml) {
|
||||
Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| {
|
||||
std::io::Error::other(format!(
|
||||
"Failed to convert YAML node-types to JSON for {}: {e}",
|
||||
lang.prefix
|
||||
))
|
||||
})?,
|
||||
None => lang.node_types.to_string(),
|
||||
};
|
||||
let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?;
|
||||
schemas.push(schema);
|
||||
|
||||
// Build the yeast runner once per language so the YAML schema
|
||||
// isn't re-parsed for every file.
|
||||
let yeast_runner = lang
|
||||
let effective_node_types: String = match lang
|
||||
.desugar
|
||||
.as_ref()
|
||||
.map(|config| yeast::Runner::from_config(lang.ts_language.clone(), config))
|
||||
.transpose()
|
||||
.map_err(|e| {
|
||||
.and_then(|d| d.output_node_types_yaml())
|
||||
{
|
||||
Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| {
|
||||
std::io::Error::other(format!(
|
||||
"Failed to build desugaring runner for {}: {e}",
|
||||
"Failed to convert YAML node-types to JSON for {}: {e}",
|
||||
lang.prefix
|
||||
))
|
||||
})?;
|
||||
yeast_runners.push(yeast_runner);
|
||||
})?,
|
||||
None => lang.node_types.to_string(),
|
||||
};
|
||||
let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?;
|
||||
schemas.push(schema);
|
||||
}
|
||||
|
||||
// Construct a single globset containing all language globs,
|
||||
@@ -194,7 +184,7 @@ impl Extractor {
|
||||
&path,
|
||||
&source,
|
||||
&[],
|
||||
yeast_runners[i].as_ref(),
|
||||
lang.desugar.as_deref(),
|
||||
);
|
||||
std::fs::create_dir_all(src_archive_file.parent().unwrap())?;
|
||||
std::fs::copy(&path, &src_archive_file)?;
|
||||
|
||||
@@ -121,3 +121,37 @@ pub fn rule(input: TokenStream) -> TokenStream {
|
||||
Err(err) => err.to_compile_error().into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Define a desugaring rule whose transform is a hand-written Rust block.
|
||||
///
|
||||
/// Use `manual_rule!` when the transform needs control over capture
|
||||
/// translation timing — for example, when an outer rule needs to set
|
||||
/// state in `ctx` (the `BuildCtx`'s user context) before recursive
|
||||
/// translation reaches inner rules that read that state.
|
||||
///
|
||||
/// ```text
|
||||
/// manual_rule!(
|
||||
/// (query_pattern field: (_) @name)
|
||||
/// {
|
||||
/// // `ctx` is a `&mut BuildCtx<'_, C>`; capture variables
|
||||
/// // (`name: NodeRef`, etc.) are bound from the query.
|
||||
/// let translated = ctx.translate(name)?;
|
||||
/// Ok(translated)
|
||||
/// }
|
||||
/// )
|
||||
/// ```
|
||||
///
|
||||
/// Differences from [`rule!`]:
|
||||
/// - Captures are **not** auto-translated before the body runs; they
|
||||
/// refer to raw input-schema nodes. Use [`BuildCtx::translate`] (or
|
||||
/// [`BuildCtx::translate_opt`]) to translate them when you choose.
|
||||
/// - The body is plain Rust returning `Result<Vec<Id>, String>` — no
|
||||
/// tree template, no `Ok(...)` wrap.
|
||||
#[proc_macro]
|
||||
pub fn manual_rule(input: TokenStream) -> TokenStream {
|
||||
let input2: TokenStream2 = input.into();
|
||||
match parse::parse_manual_rule_top(input2) {
|
||||
Ok(output) => output.into(),
|
||||
Err(err) => err.to_compile_error().into(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -121,9 +121,9 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
|
||||
std::collections::HashMap::new();
|
||||
let mut bare_children: Vec<TokenStream> = Vec::new();
|
||||
let push_field_elem = |order: &mut Vec<String>,
|
||||
map: &mut std::collections::HashMap<String, Vec<TokenStream>>,
|
||||
name: String,
|
||||
elem: TokenStream| {
|
||||
map: &mut std::collections::HashMap<String, Vec<TokenStream>>,
|
||||
name: String,
|
||||
elem: TokenStream| {
|
||||
if !map.contains_key(&name) {
|
||||
order.push(name.clone());
|
||||
map.insert(name, vec![elem]);
|
||||
@@ -160,8 +160,7 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
|
||||
} else {
|
||||
let child = if peek_is_at(tokens) {
|
||||
tokens.next();
|
||||
let capture_name =
|
||||
expect_ident(tokens, "expected capture name after @")?;
|
||||
let capture_name = expect_ident(tokens, "expected capture name after @")?;
|
||||
let name_str = capture_name.to_string();
|
||||
quote! {
|
||||
yeast::query::QueryNode::Capture {
|
||||
@@ -296,10 +295,10 @@ fn parse_query_list(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
|
||||
// tree! / trees! parsing — direct code generation against BuildCtx
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const IMPLICIT_CTX: &str = "__yeast_ctx";
|
||||
const IMPLICIT_CTX: &str = "ctx";
|
||||
|
||||
/// Determine the context identifier: either explicit `ctx,` or the implicit
|
||||
/// `__yeast_ctx` from an enclosing `rule!`.
|
||||
/// `ctx` from an enclosing `rule!`.
|
||||
fn parse_ctx_or_implicit(tokens: &mut Tokens) -> Ident {
|
||||
// Check if first token is an ident followed by a comma
|
||||
let mut lookahead = tokens.clone();
|
||||
@@ -359,7 +358,7 @@ fn parse_direct_node(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStream> {
|
||||
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => {
|
||||
let group = expect_group(tokens, Delimiter::Brace)?;
|
||||
let expr = group.stream();
|
||||
Ok(quote! { ::std::convert::Into::<usize>::into(#expr) })
|
||||
Ok(quote! { ::std::convert::Into::<usize>::into({ #expr }) })
|
||||
}
|
||||
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Parenthesis => {
|
||||
let group = expect_group(tokens, Delimiter::Parenthesis)?;
|
||||
@@ -396,7 +395,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
let expr = group.stream();
|
||||
return Ok(quote! {
|
||||
{
|
||||
let __expr = (#expr);
|
||||
let __expr = { #expr };
|
||||
let __value = yeast::YeastDisplay::yeast_to_string(&__expr, &*#ctx.ast);
|
||||
let __source_range = yeast::YeastSourceRange::yeast_source_range(&__expr, &*#ctx.ast);
|
||||
#ctx.literal_with_source_range(#kind_str, &__value, __source_range)
|
||||
@@ -420,7 +419,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
// Named fields — compute each value into a temp, then reference it
|
||||
while peek_is_field(tokens) {
|
||||
let field_name = expect_ident(tokens, "expected field name")?;
|
||||
let field_str = field_name.to_string().strip_prefix("r#").unwrap_or(&field_name.to_string()).to_string();
|
||||
let field_str = field_name
|
||||
.to_string()
|
||||
.strip_prefix("r#")
|
||||
.unwrap_or(&field_name.to_string())
|
||||
.to_string();
|
||||
expect_punct(tokens, ':', "expected `:` after field name")?;
|
||||
let temp = Ident::new(
|
||||
&format!("__field_{field_str}_{field_counter}"),
|
||||
@@ -438,7 +441,8 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
// Determine if a chain (.map(..)) follows the `{}` group.
|
||||
let mut after = tokens.clone();
|
||||
after.next(); // skip the brace group
|
||||
let has_chain = matches!(after.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
|
||||
let has_chain =
|
||||
matches!(after.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
|
||||
|
||||
if is_splice || has_chain {
|
||||
let group = expect_group(tokens, Delimiter::Brace)?;
|
||||
@@ -448,11 +452,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
inner.next(); // consume second .
|
||||
let expr: TokenStream = inner.collect();
|
||||
quote! {
|
||||
(#expr).into_iter().map(::std::convert::Into::<usize>::into)
|
||||
{ #expr }.into_iter().map(::std::convert::Into::<usize>::into)
|
||||
}
|
||||
} else {
|
||||
let expr = group.stream();
|
||||
quote! { (#expr).into_iter() }
|
||||
quote! { { #expr }.into_iter() }
|
||||
};
|
||||
let chained = parse_chain_suffix(tokens, ctx, base)?;
|
||||
stmts.push(quote! {
|
||||
@@ -506,11 +510,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
/// Each call expects the receiver to be an iterator. The `base` argument
|
||||
/// should therefore already be an iterator (use `.into_iter()` on it before
|
||||
/// calling this function).
|
||||
fn parse_chain_suffix(
|
||||
tokens: &mut Tokens,
|
||||
ctx: &Ident,
|
||||
base: TokenStream,
|
||||
) -> Result<TokenStream> {
|
||||
fn parse_chain_suffix(tokens: &mut Tokens, ctx: &Ident, base: TokenStream) -> Result<TokenStream> {
|
||||
let mut current = base;
|
||||
while matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.') {
|
||||
tokens.next(); // consume .
|
||||
@@ -608,7 +608,8 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
|
||||
// {expr} or {..expr} (with optional .chain) — single node or splice
|
||||
if peek_is_group(tokens, Delimiter::Brace) {
|
||||
let group = expect_group(tokens, Delimiter::Brace)?;
|
||||
let has_chain = matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
|
||||
let has_chain =
|
||||
matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
|
||||
let mut inner = group.stream().into_iter().peekable();
|
||||
let is_splice = peek_is_dotdot(&inner);
|
||||
if is_splice || has_chain {
|
||||
@@ -617,11 +618,11 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
|
||||
inner.next(); // consume second .
|
||||
let expr: TokenStream = inner.collect();
|
||||
quote! {
|
||||
(#expr).into_iter().map(::std::convert::Into::<usize>::into)
|
||||
{ #expr }.into_iter().map(::std::convert::Into::<usize>::into)
|
||||
}
|
||||
} else {
|
||||
let expr = group.stream();
|
||||
quote! { (#expr).into_iter() }
|
||||
quote! { { #expr }.into_iter() }
|
||||
};
|
||||
let chained = parse_chain_suffix(tokens, ctx, base)?;
|
||||
items.push(quote! {
|
||||
@@ -630,7 +631,7 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
|
||||
} else {
|
||||
let expr = group.stream();
|
||||
items.push(quote! {
|
||||
__nodes.push(::std::convert::Into::<usize>::into(#expr));
|
||||
__nodes.push(::std::convert::Into::<usize>::into({ #expr }));
|
||||
});
|
||||
}
|
||||
continue;
|
||||
@@ -888,10 +889,117 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
Ok(quote! {
|
||||
{
|
||||
let __query = #query_code;
|
||||
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>| {
|
||||
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
|
||||
// Auto-translation prefix: recursively translate every
|
||||
// captured node before invoking the user's transform body.
|
||||
// For OneShot rules this preserves the legacy behaviour
|
||||
// (input-schema captures translated to output-schema
|
||||
// nodes); for Repeating rules it is a no-op.
|
||||
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx)?;
|
||||
#(#bindings)*
|
||||
let mut #ctx_ident = yeast::build::BuildCtx::with_source_range(__ast, &__captures, __fresh, __source_range);
|
||||
#transform_body
|
||||
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
|
||||
let __result: Vec<usize> = { #transform_body };
|
||||
Ok(__result)
|
||||
}))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse `manual_rule!( query { body } )`.
|
||||
///
|
||||
/// Like [`parse_rule_top`] but:
|
||||
/// - Expects a Rust block `{ ... }` after the query (no `=>` arrow).
|
||||
/// - Generates code that does NOT auto-translate captures before
|
||||
/// running the body. Capture variables refer to raw (input-schema)
|
||||
/// nodes; the body is responsible for explicit translation via
|
||||
/// `ctx.translate(...)`.
|
||||
/// - The body is included verbatim and must evaluate to
|
||||
/// `Result<Vec<usize>, String>`.
|
||||
pub fn parse_manual_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
let mut tokens = input.into_iter().peekable();
|
||||
|
||||
// Collect query tokens up to the body block `{ ... }`.
|
||||
let mut query_tokens = Vec::new();
|
||||
loop {
|
||||
match tokens.peek() {
|
||||
None => {
|
||||
return Err(syn::Error::new(
|
||||
Span::call_site(),
|
||||
"expected a Rust block `{ ... }` after the query in manual_rule!",
|
||||
))
|
||||
}
|
||||
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => break,
|
||||
_ => {
|
||||
query_tokens.push(tokens.next().unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let query_stream: TokenStream = query_tokens.into_iter().collect();
|
||||
|
||||
// Extract captures from the query (same as in `rule!`).
|
||||
let captures = extract_captures(&query_stream);
|
||||
|
||||
// Parse the query into the QueryNode-building expression.
|
||||
let query_code = parse_query_top(query_stream)?;
|
||||
|
||||
// Generate capture bindings (same as in `rule!`).
|
||||
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
|
||||
let bindings: Vec<TokenStream> = captures
|
||||
.iter()
|
||||
.map(|cap| {
|
||||
let name = Ident::new(&cap.name, Span::call_site());
|
||||
let name_str = &cap.name;
|
||||
match cap.multiplicity {
|
||||
CaptureMultiplicity::Repeated => quote! {
|
||||
let #name: Vec<yeast::NodeRef> = __captures.get_all(#name_str)
|
||||
.into_iter()
|
||||
.map(yeast::NodeRef)
|
||||
.collect();
|
||||
},
|
||||
CaptureMultiplicity::Optional => quote! {
|
||||
let #name: Option<yeast::NodeRef> =
|
||||
__captures.get_opt(#name_str).map(yeast::NodeRef);
|
||||
},
|
||||
CaptureMultiplicity::Single => quote! {
|
||||
let #name: yeast::NodeRef =
|
||||
yeast::NodeRef(__captures.get_var(#name_str).unwrap());
|
||||
},
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Consume the body block.
|
||||
let body_group = match tokens.next() {
|
||||
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => g,
|
||||
other => {
|
||||
return Err(syn::Error::new(
|
||||
Span::call_site(),
|
||||
format!(
|
||||
"expected a Rust block `{{ ... }}` after the query in manual_rule!, found: {other:?}"
|
||||
),
|
||||
))
|
||||
}
|
||||
};
|
||||
let body_stream = body_group.stream();
|
||||
|
||||
// No tokens should follow the body.
|
||||
if let Some(tok) = tokens.next() {
|
||||
return Err(syn::Error::new_spanned(
|
||||
tok,
|
||||
"unexpected token after manual_rule! body",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(quote! {
|
||||
{
|
||||
let __query = #query_code;
|
||||
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
|
||||
// No auto-translate prefix for manual rules — the body
|
||||
// is responsible for translating captures explicitly.
|
||||
#(#bindings)*
|
||||
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
|
||||
#body_stream
|
||||
}))
|
||||
}
|
||||
})
|
||||
|
||||
@@ -265,7 +265,21 @@ occurrences of the same `$name` within one `BuildCtx` share the same value:
|
||||
)
|
||||
```
|
||||
|
||||
`{..expr}` splices a `Vec<Id>` (or any iterable of `Id`):
|
||||
The contents of `{…}` are treated as a Rust block, so multi-statement
|
||||
expressions (with `let` bindings) work too:
|
||||
|
||||
```rust
|
||||
(assignment
|
||||
left: {tmp}
|
||||
right: {
|
||||
let lit = ctx.literal("integer", "0");
|
||||
tree!((binary_expr op: (operator "+") left: {tmp} right: {lit}))
|
||||
})
|
||||
```
|
||||
|
||||
`{..expr}` splices a `Vec<Id>` (or any iterable of `Id`); the contents
|
||||
are likewise a Rust block, so the splice can be the result of arbitrary
|
||||
computation:
|
||||
|
||||
```rust
|
||||
yeast::trees!(ctx,
|
||||
|
||||
@@ -20,7 +20,7 @@ fn main() {
|
||||
let args = Cli::parse();
|
||||
let language = get_language(&args.language);
|
||||
let source = std::fs::read_to_string(&args.file).unwrap();
|
||||
let runner = yeast::Runner::new(language, &[]);
|
||||
let runner: yeast::Runner = yeast::Runner::new(language, &[]);
|
||||
let ast = runner.run(&source).unwrap();
|
||||
println!("{}", ast.print(&source, ast.get_root()));
|
||||
}
|
||||
|
||||
@@ -2,28 +2,60 @@ use std::collections::BTreeMap;
|
||||
|
||||
use crate::captures::Captures;
|
||||
use crate::tree_builder::FreshScope;
|
||||
use crate::{Ast, FieldId, Id, NodeContent};
|
||||
use crate::{Ast, FieldId, Id, NodeContent, TranslatorHandle};
|
||||
|
||||
/// Context for building new AST nodes during a transformation.
|
||||
///
|
||||
/// Used by the `tree!` and `trees!` macros. Holds a mutable reference to the
|
||||
/// AST, a reference to the captures from a query match, and a `FreshScope` for
|
||||
/// generating unique identifiers.
|
||||
pub struct BuildCtx<'a> {
|
||||
/// AST, a reference to the captures from a query match, a `FreshScope` for
|
||||
/// generating unique identifiers, and a mutable reference to a user-defined
|
||||
/// context of type `C`.
|
||||
///
|
||||
/// The user context `C` is shared across rules via the framework's driver:
|
||||
/// outer rules can write to it before recursive translation, and inner rules
|
||||
/// can read (or further mutate) it during their transforms. The framework
|
||||
/// snapshots and restores the user context around each rule application, so
|
||||
/// mutations made by a rule are visible to its descendants (via recursive
|
||||
/// translation) but not to its parent's siblings.
|
||||
///
|
||||
/// `BuildCtx` implements [`Deref`] and [`DerefMut`] targeting `C`, so user
|
||||
/// context fields are accessible as `ctx.my_field` directly (provided they
|
||||
/// don't collide with `BuildCtx`'s own fields like `ast`, `captures`, etc.).
|
||||
///
|
||||
/// The default `C = ()` means rules that don't need any user context don't
|
||||
/// pay any cost.
|
||||
///
|
||||
/// When constructed by the framework (via the rule! macro), `BuildCtx` also
|
||||
/// carries a [`TranslatorHandle`] that the [`translate`] method delegates
|
||||
/// to. When constructed by hand (e.g. in tests), the translator is `None`
|
||||
/// and [`translate`] returns an error.
|
||||
pub struct BuildCtx<'a, C: 'a = ()> {
|
||||
pub ast: &'a mut Ast,
|
||||
pub captures: &'a Captures,
|
||||
pub fresh: &'a FreshScope,
|
||||
/// Source range of the matched node, inherited by synthetic nodes.
|
||||
pub source_range: Option<tree_sitter::Range>,
|
||||
/// User-supplied context, accessible directly via `ctx.field` (via Deref).
|
||||
pub user_ctx: &'a mut C,
|
||||
/// Optional translator handle, populated when the context is built by
|
||||
/// the framework's rule driver. None when the context is built by hand.
|
||||
pub(crate) translator: Option<TranslatorHandle<'a, C>>,
|
||||
}
|
||||
|
||||
impl<'a> BuildCtx<'a> {
|
||||
pub fn new(ast: &'a mut Ast, captures: &'a Captures, fresh: &'a FreshScope) -> Self {
|
||||
impl<'a, C> BuildCtx<'a, C> {
|
||||
pub fn new(
|
||||
ast: &'a mut Ast,
|
||||
captures: &'a Captures,
|
||||
fresh: &'a FreshScope,
|
||||
user_ctx: &'a mut C,
|
||||
) -> Self {
|
||||
Self {
|
||||
ast,
|
||||
captures,
|
||||
fresh,
|
||||
source_range: None,
|
||||
user_ctx,
|
||||
translator: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,12 +64,35 @@ impl<'a> BuildCtx<'a> {
|
||||
captures: &'a Captures,
|
||||
fresh: &'a FreshScope,
|
||||
source_range: Option<tree_sitter::Range>,
|
||||
user_ctx: &'a mut C,
|
||||
) -> Self {
|
||||
Self {
|
||||
ast,
|
||||
captures,
|
||||
fresh,
|
||||
source_range,
|
||||
user_ctx,
|
||||
translator: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct a `BuildCtx` carrying a translator handle. Used by the
|
||||
/// `rule!` macro to enable [`translate`] inside rule transforms.
|
||||
pub fn with_translator(
|
||||
ast: &'a mut Ast,
|
||||
captures: &'a Captures,
|
||||
fresh: &'a FreshScope,
|
||||
source_range: Option<tree_sitter::Range>,
|
||||
user_ctx: &'a mut C,
|
||||
translator: TranslatorHandle<'a, C>,
|
||||
) -> Self {
|
||||
Self {
|
||||
ast,
|
||||
captures,
|
||||
fresh,
|
||||
source_range,
|
||||
user_ctx,
|
||||
translator: Some(translator),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,3 +168,52 @@ impl<'a> BuildCtx<'a> {
|
||||
self.ast.prepend_field_child(node_id, field_id, value_id);
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: Clone> BuildCtx<'_, C> {
|
||||
/// Recursively translate a node via the framework's rule machinery.
|
||||
/// In a OneShot phase, applies OneShot rules to the given node and
|
||||
/// returns the resulting node ids. In a Repeating phase, errors
|
||||
/// (translation is not meaningful when input and output share a
|
||||
/// schema).
|
||||
///
|
||||
/// Accepts any value convertible to [`Id`] (including [`crate::NodeRef`]),
|
||||
/// so manual rules can pass capture bindings directly without unwrapping.
|
||||
///
|
||||
/// Errors if this `BuildCtx` was constructed by hand (without a
|
||||
/// translator handle) — for example, in unit tests that don't go
|
||||
/// through the rule driver.
|
||||
pub fn translate<I: Into<Id>>(&mut self, id: I) -> Result<Vec<Id>, String> {
|
||||
let id = id.into();
|
||||
match &self.translator {
|
||||
Some(t) => t.translate(self.ast, self.user_ctx, id),
|
||||
None => Err("translate() called on a BuildCtx without a translator handle".into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Translate an optional capture, returning the first translated id or
|
||||
/// `None`. Convenience for `?`-quantifier captures (`Option<NodeRef>`).
|
||||
///
|
||||
/// If the underlying translation produces multiple ids for a single
|
||||
/// input, only the first is returned. For most use cases (e.g.
|
||||
/// translating a single type annotation) this is what you want; if
|
||||
/// you need all ids, use [`translate`] directly.
|
||||
pub fn translate_opt<I: Into<Id>>(&mut self, id: Option<I>) -> Result<Option<Id>, String> {
|
||||
match id {
|
||||
Some(id) => Ok(self.translate(id)?.into_iter().next()),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<C> std::ops::Deref for BuildCtx<'_, C> {
|
||||
type Target = C;
|
||||
fn deref(&self) -> &C {
|
||||
&*self.user_ctx
|
||||
}
|
||||
}
|
||||
|
||||
impl<C> std::ops::DerefMut for BuildCtx<'_, C> {
|
||||
fn deref_mut(&mut self) -> &mut C {
|
||||
&mut *self.user_ctx
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,12 +53,7 @@ pub fn dump_ast_with_options(
|
||||
///
|
||||
/// Any node that does not match the expected type set for its parent field is
|
||||
/// rendered with a trailing `" <-- ERROR: ..."` annotation on the same line.
|
||||
pub fn dump_ast_with_type_errors(
|
||||
ast: &Ast,
|
||||
root: usize,
|
||||
source: &str,
|
||||
schema: &Schema,
|
||||
) -> String {
|
||||
pub fn dump_ast_with_type_errors(ast: &Ast, root: usize, source: &str, schema: &Schema) -> String {
|
||||
dump_ast_with_type_errors_and_options(ast, root, source, schema, &DumpOptions::default())
|
||||
}
|
||||
|
||||
@@ -74,7 +69,15 @@ pub fn dump_ast_with_type_errors_and_options(
|
||||
options: &DumpOptions,
|
||||
) -> String {
|
||||
let mut out = String::new();
|
||||
dump_node(ast, root, source, options, 0, Some((schema, None, None)), &mut out);
|
||||
dump_node(
|
||||
ast,
|
||||
root,
|
||||
source,
|
||||
options,
|
||||
0,
|
||||
Some((schema, None, None)),
|
||||
&mut out,
|
||||
);
|
||||
out
|
||||
}
|
||||
|
||||
@@ -232,8 +235,8 @@ fn dump_node(
|
||||
}
|
||||
let field_name = ast.field_name_for_id(field_id).unwrap_or("?");
|
||||
let child_type_check = type_check.map(|(schema, _, _)| {
|
||||
let expected = expected_for_field(schema, node.kind_name(), field_id)
|
||||
.or(Some(EMPTY_NODE_TYPES));
|
||||
let expected =
|
||||
expected_for_field(schema, node.kind_name(), field_id).or(Some(EMPTY_NODE_TYPES));
|
||||
let parent_field = Some((node.kind_name(), field_name));
|
||||
(schema, expected, parent_field)
|
||||
});
|
||||
|
||||
@@ -16,7 +16,7 @@ pub mod schema;
|
||||
pub mod tree_builder;
|
||||
mod visitor;
|
||||
|
||||
pub use yeast_macros::{query, rule, tree, trees};
|
||||
pub use yeast_macros::{manual_rule, query, rule, tree, trees};
|
||||
|
||||
use captures::Captures;
|
||||
pub use cursor::Cursor;
|
||||
@@ -297,7 +297,9 @@ impl Ast {
|
||||
/// Returns the source text for `id`, resolving `NodeContent::Range`
|
||||
/// against the stored source bytes when available.
|
||||
pub fn source_text(&self, id: Id) -> String {
|
||||
let Some(node) = self.get_node(id) else { return String::new(); };
|
||||
let Some(node) = self.get_node(id) else {
|
||||
return String::new();
|
||||
};
|
||||
let read_range = |range: &tree_sitter::Range| {
|
||||
let start = range.start_byte;
|
||||
let end = range.end_byte;
|
||||
@@ -488,7 +490,10 @@ impl Ast {
|
||||
|
||||
/// Prepend a child id to the given field of the given node.
|
||||
pub fn prepend_field_child(&mut self, node_id: Id, field_id: FieldId, value_id: Id) {
|
||||
let node = self.nodes.get_mut(node_id).expect("prepend_field_child: invalid node id");
|
||||
let node = self
|
||||
.nodes
|
||||
.get_mut(node_id)
|
||||
.expect("prepend_field_child: invalid node id");
|
||||
node.fields.entry(field_id).or_default().insert(0, value_id);
|
||||
}
|
||||
|
||||
@@ -700,18 +705,118 @@ impl From<tree_sitter::Range> for NodeContent {
|
||||
}
|
||||
}
|
||||
|
||||
/// The transform function for a rule: takes the AST, captured variables, a
|
||||
/// fresh-name scope, and the source range of the matched node, and returns
|
||||
/// the IDs of the replacement nodes.
|
||||
pub type Transform = Box<
|
||||
dyn Fn(&mut Ast, Captures, &tree_builder::FreshScope, Option<tree_sitter::Range>) -> Vec<Id>
|
||||
/// A handle that lets a rule transform recursively translate AST nodes via
|
||||
/// the framework's rule machinery. Constructed by the driver and passed as
|
||||
/// the last argument of every [`Transform`] invocation.
|
||||
///
|
||||
/// The `rule!` macro uses [`TranslatorHandle::auto_translate_captures`] in
|
||||
/// its generated prefix to translate captures before running the user's
|
||||
/// transform body. Manually-written transforms (using [`Rule::new`]
|
||||
/// directly) can call [`TranslatorHandle::translate`] selectively on
|
||||
/// specific node ids to control when translation happens.
|
||||
pub struct TranslatorHandle<'a, C> {
|
||||
inner: TranslatorImpl<'a, C>,
|
||||
}
|
||||
|
||||
/// Internal phase-specific translation state. Kept private — callers
|
||||
/// interact with [`TranslatorHandle`] only.
|
||||
enum TranslatorImpl<'a, C> {
|
||||
/// OneShot phase translator: recursively applies OneShot rules.
|
||||
OneShot {
|
||||
index: &'a RuleIndex<'a, C>,
|
||||
fresh: &'a tree_builder::FreshScope,
|
||||
rewrite_depth: usize,
|
||||
/// The id of the node the current rule is matching. Used by
|
||||
/// [`auto_translate_captures`] to avoid infinite recursion when a
|
||||
/// rule captures its own match root (e.g. via `(_) @_`).
|
||||
matched_root: Id,
|
||||
},
|
||||
/// Repeating phase translator: translation is not meaningful here
|
||||
/// (input and output schemas are the same). [`translate`] errors;
|
||||
/// [`auto_translate_captures`] is a no-op so the macro's auto-prefix
|
||||
/// works unchanged for Repeating rules.
|
||||
Repeating,
|
||||
}
|
||||
|
||||
impl<'a, C: Clone> TranslatorHandle<'a, C> {
|
||||
/// Recursively apply OneShot rules to `id` and return the resulting
|
||||
/// node ids. Errors in a Repeating phase (where translation is not
|
||||
/// meaningful).
|
||||
pub fn translate(&self, ast: &mut Ast, user_ctx: &mut C, id: Id) -> Result<Vec<Id>, String> {
|
||||
match &self.inner {
|
||||
TranslatorImpl::OneShot {
|
||||
index,
|
||||
fresh,
|
||||
rewrite_depth,
|
||||
..
|
||||
} => apply_one_shot_rules_inner(index, ast, user_ctx, id, fresh, rewrite_depth + 1),
|
||||
TranslatorImpl::Repeating => {
|
||||
Err("translate() is not available in a Repeating phase".into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Translate every captured node in `captures` in place (OneShot phase
|
||||
/// only). In a Repeating phase this is a no-op — Repeating rules
|
||||
/// receive raw captures.
|
||||
///
|
||||
/// Used by the `rule!` macro's generated prefix to preserve the
|
||||
/// pre-existing "auto-translate captures before running the transform
|
||||
/// body" behavior. Manually-written transforms typically translate
|
||||
/// captures selectively via [`translate`] instead.
|
||||
///
|
||||
/// To avoid infinite recursion, a capture whose id matches the rule's
|
||||
/// matched root (e.g. from a `(_) @_` pattern) is left unchanged.
|
||||
pub fn auto_translate_captures(
|
||||
&self,
|
||||
captures: &mut Captures,
|
||||
ast: &mut Ast,
|
||||
user_ctx: &mut C,
|
||||
) -> Result<(), String> {
|
||||
match &self.inner {
|
||||
TranslatorImpl::OneShot { matched_root, .. } => {
|
||||
let root = *matched_root;
|
||||
captures.try_map_all_captures(|cid| {
|
||||
if cid == root {
|
||||
Ok(vec![cid])
|
||||
} else {
|
||||
self.translate(ast, user_ctx, cid)
|
||||
}
|
||||
})
|
||||
}
|
||||
TranslatorImpl::Repeating => Ok(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The transform function for a rule.
|
||||
///
|
||||
/// Takes the AST, the (raw, untranslated) captured variables, a fresh-name
|
||||
/// scope, the source range of the matched node, a mutable reference to the
|
||||
/// user context of type `C`, and a [`TranslatorHandle`] for recursively
|
||||
/// translating nodes. Returns the IDs of the replacement nodes, or an
|
||||
/// error message if the transform could not be completed.
|
||||
///
|
||||
/// Transforms produced by [`Rule::new`] receive **raw** captures and must
|
||||
/// translate them themselves (via the handle). Transforms produced by the
|
||||
/// `rule!` macro have an auto-translation prefix injected for backward
|
||||
/// compatibility.
|
||||
pub type Transform<C = ()> = Box<
|
||||
dyn Fn(
|
||||
&mut Ast,
|
||||
Captures,
|
||||
&tree_builder::FreshScope,
|
||||
Option<tree_sitter::Range>,
|
||||
&mut C,
|
||||
TranslatorHandle<'_, C>,
|
||||
) -> Result<Vec<Id>, String>
|
||||
+ Send
|
||||
+ Sync,
|
||||
>;
|
||||
|
||||
pub struct Rule {
|
||||
pub struct Rule<C = ()> {
|
||||
query: QueryNode,
|
||||
transform: Transform,
|
||||
transform: Transform<C>,
|
||||
/// If true, after this rule fires on a node the engine will try to
|
||||
/// re-apply this same rule on the result root. Defaults to false:
|
||||
/// each rule fires at most once on a given node, which prevents
|
||||
@@ -719,8 +824,8 @@ pub struct Rule {
|
||||
repeated: bool,
|
||||
}
|
||||
|
||||
impl Rule {
|
||||
pub fn new(query: QueryNode, transform: Transform) -> Self {
|
||||
impl<C> Rule<C> {
|
||||
pub fn new(query: QueryNode, transform: Transform<C>) -> Self {
|
||||
Self {
|
||||
query,
|
||||
transform,
|
||||
@@ -742,9 +847,13 @@ impl Rule {
|
||||
ast: &mut Ast,
|
||||
node: Id,
|
||||
fresh: &tree_builder::FreshScope,
|
||||
user_ctx: &mut C,
|
||||
translator: TranslatorHandle<'_, C>,
|
||||
) -> Result<Option<Vec<Id>>, String> {
|
||||
match self.try_match(ast, node)? {
|
||||
Some(captures) => Ok(Some(self.run_transform(ast, captures, node, fresh))),
|
||||
Some(captures) => Ok(Some(
|
||||
self.run_transform(ast, captures, node, fresh, user_ctx, translator)?,
|
||||
)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
@@ -768,29 +877,31 @@ impl Rule {
|
||||
captures: Captures,
|
||||
node: Id,
|
||||
fresh: &tree_builder::FreshScope,
|
||||
) -> Vec<Id> {
|
||||
user_ctx: &mut C,
|
||||
translator: TranslatorHandle<'_, C>,
|
||||
) -> Result<Vec<Id>, String> {
|
||||
fresh.next_scope();
|
||||
let source_range = ast.get_node(node).and_then(|n| match n.content {
|
||||
NodeContent::Range(r) => Some(r),
|
||||
_ => n.source_range,
|
||||
});
|
||||
(self.transform)(ast, captures, fresh, source_range)
|
||||
(self.transform)(ast, captures, fresh, source_range, user_ctx, translator)
|
||||
}
|
||||
}
|
||||
|
||||
const MAX_REWRITE_DEPTH: usize = 100;
|
||||
|
||||
/// Index of rules by their root query kind for fast lookup.
|
||||
struct RuleIndex<'a> {
|
||||
struct RuleIndex<'a, C> {
|
||||
/// Rules indexed by root node kind name.
|
||||
by_kind: BTreeMap<&'static str, Vec<&'a Rule>>,
|
||||
by_kind: BTreeMap<&'static str, Vec<&'a Rule<C>>>,
|
||||
/// Rules with wildcard queries (Any) that apply to all nodes.
|
||||
wildcard: Vec<&'a Rule>,
|
||||
wildcard: Vec<&'a Rule<C>>,
|
||||
}
|
||||
|
||||
impl<'a> RuleIndex<'a> {
|
||||
fn new(rules: &'a [Rule]) -> Self {
|
||||
let mut by_kind: BTreeMap<&'static str, Vec<&'a Rule>> = BTreeMap::new();
|
||||
impl<'a, C> RuleIndex<'a, C> {
|
||||
fn new(rules: &'a [Rule<C>]) -> Self {
|
||||
let mut by_kind: BTreeMap<&'static str, Vec<&'a Rule<C>>> = BTreeMap::new();
|
||||
let mut wildcard = Vec::new();
|
||||
for rule in rules {
|
||||
match rule.query.root_kind() {
|
||||
@@ -801,7 +912,7 @@ impl<'a> RuleIndex<'a> {
|
||||
Self { by_kind, wildcard }
|
||||
}
|
||||
|
||||
fn rules_for_kind(&self, kind: &str) -> impl Iterator<Item = &&'a Rule> {
|
||||
fn rules_for_kind(&self, kind: &str) -> impl Iterator<Item = &&'a Rule<C>> {
|
||||
self.by_kind
|
||||
.get(kind)
|
||||
.into_iter()
|
||||
@@ -810,23 +921,25 @@ impl<'a> RuleIndex<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_repeating_rules(
|
||||
rules: &[Rule],
|
||||
fn apply_repeating_rules<C: Clone>(
|
||||
rules: &[Rule<C>],
|
||||
ast: &mut Ast,
|
||||
user_ctx: &mut C,
|
||||
id: Id,
|
||||
fresh: &tree_builder::FreshScope,
|
||||
) -> Result<Vec<Id>, String> {
|
||||
let index = RuleIndex::new(rules);
|
||||
apply_repeating_rules_inner(&index, ast, id, fresh, 0, None)
|
||||
apply_repeating_rules_inner(&index, ast, user_ctx, id, fresh, 0, None)
|
||||
}
|
||||
|
||||
fn apply_repeating_rules_inner(
|
||||
index: &RuleIndex,
|
||||
fn apply_repeating_rules_inner<C: Clone>(
|
||||
index: &RuleIndex<C>,
|
||||
ast: &mut Ast,
|
||||
user_ctx: &mut C,
|
||||
id: Id,
|
||||
fresh: &tree_builder::FreshScope,
|
||||
rewrite_depth: usize,
|
||||
skip_rule: Option<*const Rule>,
|
||||
skip_rule: Option<*const Rule<C>>,
|
||||
) -> Result<Vec<Id>, String> {
|
||||
if rewrite_depth > MAX_REWRITE_DEPTH {
|
||||
return Err(format!(
|
||||
@@ -837,11 +950,23 @@ fn apply_repeating_rules_inner(
|
||||
|
||||
let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or("");
|
||||
for rule in index.rules_for_kind(node_kind) {
|
||||
let rule_ptr = *rule as *const Rule;
|
||||
let rule_ptr = *rule as *const Rule<C>;
|
||||
if Some(rule_ptr) == skip_rule {
|
||||
continue;
|
||||
}
|
||||
if let Some(result_node) = rule.try_rule(ast, id, fresh)? {
|
||||
// Snapshot the user context before invoking the rule so that any
|
||||
// mutations the rule makes are visible during recursive translation
|
||||
// of its result, but not leaked to the parent's siblings.
|
||||
let snapshot = user_ctx.clone();
|
||||
// Repeating rules don't need a real translator: their captures
|
||||
// aren't auto-translated (Repeating preserves the input schema),
|
||||
// and `ctx.translate(id)` errors if invoked from a Repeating
|
||||
// transform.
|
||||
let translator = TranslatorHandle {
|
||||
inner: TranslatorImpl::Repeating,
|
||||
};
|
||||
let try_result = rule.try_rule(ast, id, fresh, user_ctx, translator)?;
|
||||
if let Some(result_node) = try_result {
|
||||
// For non-repeated rules, suppress further application of *this*
|
||||
// rule on the result root, so a rule whose output matches its own
|
||||
// query doesn't loop. Other rules and child traversal are
|
||||
@@ -852,14 +977,19 @@ fn apply_repeating_rules_inner(
|
||||
results.extend(apply_repeating_rules_inner(
|
||||
index,
|
||||
ast,
|
||||
user_ctx,
|
||||
node,
|
||||
fresh,
|
||||
rewrite_depth + 1,
|
||||
next_skip,
|
||||
)?);
|
||||
}
|
||||
*user_ctx = snapshot;
|
||||
return Ok(results);
|
||||
}
|
||||
// Rule didn't match; restore any speculative changes (none expected
|
||||
// since try_rule only mutates on match, but be defensive).
|
||||
*user_ctx = snapshot;
|
||||
}
|
||||
|
||||
// Take the parent's fields by ownership: the recursion will rewrite
|
||||
@@ -874,7 +1004,15 @@ fn apply_repeating_rules_inner(
|
||||
for children in fields.values_mut() {
|
||||
let mut new_children: Option<Vec<Id>> = None;
|
||||
for (i, &child_id) in children.iter().enumerate() {
|
||||
let result = apply_repeating_rules_inner(index, ast, child_id, fresh, rewrite_depth, None)?;
|
||||
let result = apply_repeating_rules_inner(
|
||||
index,
|
||||
ast,
|
||||
user_ctx,
|
||||
child_id,
|
||||
fresh,
|
||||
rewrite_depth,
|
||||
None,
|
||||
)?;
|
||||
let unchanged = result.len() == 1 && result[0] == child_id;
|
||||
match (&mut new_children, unchanged) {
|
||||
(None, true) => {} // unchanged so far, no allocation needed
|
||||
@@ -903,24 +1041,25 @@ fn apply_repeating_rules_inner(
|
||||
/// each visited node, recursion proceeds only through captured nodes (not
|
||||
/// through the input node's children directly), and an error is returned if
|
||||
/// no rule matches a visited node.
|
||||
fn apply_one_shot_rules(
|
||||
rules: &[Rule],
|
||||
fn apply_one_shot_rules<C: Clone>(
|
||||
rules: &[Rule<C>],
|
||||
ast: &mut Ast,
|
||||
user_ctx: &mut C,
|
||||
id: Id,
|
||||
fresh: &tree_builder::FreshScope,
|
||||
) -> Result<Vec<Id>, String> {
|
||||
let index = RuleIndex::new(rules);
|
||||
apply_one_shot_rules_inner(&index, ast, id, fresh, 0)
|
||||
apply_one_shot_rules_inner(&index, ast, user_ctx, id, fresh, 0)
|
||||
}
|
||||
|
||||
fn apply_one_shot_rules_inner(
|
||||
index: &RuleIndex,
|
||||
fn apply_one_shot_rules_inner<C: Clone>(
|
||||
index: &RuleIndex<C>,
|
||||
ast: &mut Ast,
|
||||
user_ctx: &mut C,
|
||||
id: Id,
|
||||
fresh: &tree_builder::FreshScope,
|
||||
rewrite_depth: usize,
|
||||
) -> Result<Vec<Id>, String> {
|
||||
|
||||
if rewrite_depth > MAX_REWRITE_DEPTH {
|
||||
return Err(format!(
|
||||
"Desugaring exceeded maximum rewrite depth ({MAX_REWRITE_DEPTH}). \
|
||||
@@ -931,22 +1070,27 @@ fn apply_one_shot_rules_inner(
|
||||
let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or("");
|
||||
|
||||
for rule in index.rules_for_kind(node_kind) {
|
||||
if let Some(mut captures) = rule.try_match(ast, id)? {
|
||||
// Recursively translate every captured node before invoking the
|
||||
// transform. The transform's output uses output-schema kinds, so
|
||||
// we must translate captured input-schema nodes to their
|
||||
// output-schema equivalents first.
|
||||
captures.try_map_all_captures(|captured_id| {
|
||||
// Avoid infinite recursion when a capture refers to the root
|
||||
// node of the matched tree (e.g. an `@_` capture on the
|
||||
// pattern root): re-analyzing it would match the same rule
|
||||
// again indefinitely.
|
||||
if captured_id == id {
|
||||
return Ok(vec![captured_id]);
|
||||
}
|
||||
apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)
|
||||
})?;
|
||||
return Ok(rule.run_transform(ast, captures, id, fresh));
|
||||
if let Some(captures) = rule.try_match(ast, id)? {
|
||||
// Snapshot the user context before invoking the rule so that any
|
||||
// mutations the rule (or its transitively-translated captures)
|
||||
// make are visible during this rule's transform, but not leaked
|
||||
// to the parent's siblings.
|
||||
let snapshot = user_ctx.clone();
|
||||
// Build the translator handle the transform will use to
|
||||
// recursively translate captures (or, for macro-generated
|
||||
// rules, the auto-translate prefix uses it to translate every
|
||||
// capture up front, preserving the legacy behavior).
|
||||
let translator = TranslatorHandle {
|
||||
inner: TranslatorImpl::OneShot {
|
||||
index,
|
||||
fresh,
|
||||
rewrite_depth,
|
||||
matched_root: id,
|
||||
},
|
||||
};
|
||||
let result = rule.run_transform(ast, captures, id, fresh, user_ctx, translator)?;
|
||||
*user_ctx = snapshot;
|
||||
return Ok(result);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -974,15 +1118,15 @@ pub enum PhaseKind {
|
||||
/// starts. Rules within a phase compete for matches as usual; rules in
|
||||
/// different phases never compete because each traversal only considers the
|
||||
/// current phase's rules.
|
||||
pub struct Phase {
|
||||
pub struct Phase<C = ()> {
|
||||
/// Name used in error messages.
|
||||
pub name: String,
|
||||
pub rules: Vec<Rule>,
|
||||
pub rules: Vec<Rule<C>>,
|
||||
pub kind: PhaseKind,
|
||||
}
|
||||
|
||||
impl Phase {
|
||||
pub fn new(name: impl Into<String>, kind: PhaseKind, rules: Vec<Rule>) -> Self {
|
||||
impl<C> Phase<C> {
|
||||
pub fn new(name: impl Into<String>, kind: PhaseKind, rules: Vec<Rule<C>>) -> Self {
|
||||
Self {
|
||||
name: name.into(),
|
||||
rules,
|
||||
@@ -1008,17 +1152,30 @@ impl Phase {
|
||||
/// .add_phase("desugar", PhaseKind::Repeating, desugar_rules)
|
||||
/// .with_output_node_types_yaml(yaml);
|
||||
/// ```
|
||||
#[derive(Default)]
|
||||
pub struct DesugaringConfig {
|
||||
///
|
||||
/// The optional type parameter `C` is the user context type threaded through
|
||||
/// rule transforms. Defaults to `()` (no user context).
|
||||
pub struct DesugaringConfig<C = ()> {
|
||||
/// Phases of rule application, applied in order.
|
||||
pub phases: Vec<Phase>,
|
||||
pub phases: Vec<Phase<C>>,
|
||||
/// Output node-types in YAML format. If `None`, the input grammar's
|
||||
/// node types are used (i.e. the desugared AST has the same node types
|
||||
/// as the tree-sitter grammar).
|
||||
pub output_node_types_yaml: Option<&'static str>,
|
||||
}
|
||||
|
||||
impl DesugaringConfig {
|
||||
// Manual `Default` impl so users with a custom `C` that doesn't implement
|
||||
// `Default` can still construct an empty config.
|
||||
impl<C> Default for DesugaringConfig<C> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
phases: Vec::new(),
|
||||
output_node_types_yaml: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<C> DesugaringConfig<C> {
|
||||
/// Create an empty configuration. Add phases via [`add_phase`] and an
|
||||
/// optional output schema via [`with_output_node_types_yaml`].
|
||||
pub fn new() -> Self {
|
||||
@@ -1030,7 +1187,7 @@ impl DesugaringConfig {
|
||||
mut self,
|
||||
name: impl Into<String>,
|
||||
kind: PhaseKind,
|
||||
rules: Vec<Rule>,
|
||||
rules: Vec<Rule<C>>,
|
||||
) -> Self {
|
||||
self.phases.push(Phase::new(name, kind, rules));
|
||||
self
|
||||
@@ -1052,15 +1209,15 @@ impl DesugaringConfig {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Runner<'a> {
|
||||
pub struct Runner<'a, C = ()> {
|
||||
language: tree_sitter::Language,
|
||||
schema: schema::Schema,
|
||||
phases: &'a [Phase],
|
||||
phases: &'a [Phase<C>],
|
||||
}
|
||||
|
||||
impl<'a> Runner<'a> {
|
||||
impl<'a, C> Runner<'a, C> {
|
||||
/// Create a runner using the input grammar's schema for output.
|
||||
pub fn new(language: tree_sitter::Language, phases: &'a [Phase]) -> Self {
|
||||
pub fn new(language: tree_sitter::Language, phases: &'a [Phase<C>]) -> Self {
|
||||
let schema = schema::Schema::from_language(&language);
|
||||
Self {
|
||||
language,
|
||||
@@ -1073,7 +1230,7 @@ impl<'a> Runner<'a> {
|
||||
pub fn with_schema(
|
||||
language: tree_sitter::Language,
|
||||
schema: &schema::Schema,
|
||||
phases: &'a [Phase],
|
||||
phases: &'a [Phase<C>],
|
||||
) -> Self {
|
||||
Self {
|
||||
language,
|
||||
@@ -1085,7 +1242,7 @@ impl<'a> Runner<'a> {
|
||||
/// Create a runner from a [`DesugaringConfig`].
|
||||
pub fn from_config(
|
||||
language: tree_sitter::Language,
|
||||
config: &'a DesugaringConfig,
|
||||
config: &'a DesugaringConfig<C>,
|
||||
) -> Result<Self, String> {
|
||||
let schema = config.build_schema(&language)?;
|
||||
Ok(Self {
|
||||
@@ -1094,11 +1251,17 @@ impl<'a> Runner<'a> {
|
||||
phases: &config.phases,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run_from_tree(
|
||||
impl<'a, C: Clone> Runner<'a, C> {
|
||||
/// Parse `tree` against `source` and run all phases, threading
|
||||
/// `user_ctx` through every rule transform. The caller owns the
|
||||
/// initial context state.
|
||||
pub fn run_from_tree_with_ctx(
|
||||
&self,
|
||||
tree: &tree_sitter::Tree,
|
||||
source: &[u8],
|
||||
user_ctx: &mut C,
|
||||
) -> Result<Ast, String> {
|
||||
let mut ast = Ast::from_tree_with_schema_and_source(
|
||||
self.schema.clone(),
|
||||
@@ -1106,11 +1269,13 @@ impl<'a> Runner<'a> {
|
||||
&self.language,
|
||||
source.to_vec(),
|
||||
);
|
||||
self.run_phases(&mut ast)?;
|
||||
self.run_phases(&mut ast, user_ctx)?;
|
||||
Ok(ast)
|
||||
}
|
||||
|
||||
pub fn run(&self, input: &str) -> Result<Ast, String> {
|
||||
/// Parse `input` and run all phases, threading `user_ctx` through
|
||||
/// every rule transform. The caller owns the initial context state.
|
||||
pub fn run_with_ctx(&self, input: &str, user_ctx: &mut C) -> Result<Ast, String> {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&self.language)
|
||||
@@ -1124,20 +1289,24 @@ impl<'a> Runner<'a> {
|
||||
&self.language,
|
||||
input.as_bytes().to_vec(),
|
||||
);
|
||||
self.run_phases(&mut ast)?;
|
||||
self.run_phases(&mut ast, user_ctx)?;
|
||||
Ok(ast)
|
||||
}
|
||||
|
||||
/// Apply each phase in turn to the AST, threading the root through.
|
||||
/// A single `FreshScope` is shared across phases so that fresh
|
||||
/// identifiers generated in different phases don't collide.
|
||||
fn run_phases(&self, ast: &mut Ast) -> Result<(), String> {
|
||||
fn run_phases(&self, ast: &mut Ast, user_ctx: &mut C) -> Result<(), String> {
|
||||
let fresh = tree_builder::FreshScope::new();
|
||||
let mut root = ast.get_root();
|
||||
for phase in self.phases {
|
||||
let res = match phase.kind {
|
||||
PhaseKind::Repeating => apply_repeating_rules(&phase.rules, ast, root, &fresh),
|
||||
PhaseKind::OneShot => apply_one_shot_rules(&phase.rules, ast, root, &fresh),
|
||||
PhaseKind::Repeating => {
|
||||
apply_repeating_rules(&phase.rules, ast, user_ctx, root, &fresh)
|
||||
}
|
||||
PhaseKind::OneShot => {
|
||||
apply_one_shot_rules(&phase.rules, ast, user_ctx, root, &fresh)
|
||||
}
|
||||
}
|
||||
.map_err(|e| format!("Phase `{}`: {e}", phase.name))?;
|
||||
if res.len() != 1 {
|
||||
@@ -1153,3 +1322,78 @@ impl<'a> Runner<'a> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, C: Clone + Default> Runner<'a, C> {
|
||||
/// Parse `tree` against `source` and run all phases, using the
|
||||
/// default context (`C::default()`) as the initial context state.
|
||||
pub fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String> {
|
||||
let mut user_ctx = C::default();
|
||||
self.run_from_tree_with_ctx(tree, source, &mut user_ctx)
|
||||
}
|
||||
|
||||
/// Parse `input` and run all phases, using the default context
|
||||
/// (`C::default()`) as the initial context state.
|
||||
pub fn run(&self, input: &str) -> Result<Ast, String> {
|
||||
let mut user_ctx = C::default();
|
||||
self.run_with_ctx(input, &mut user_ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Desugarer: type-erased view of a DesugaringConfig + Runner
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Type-erased interface to a desugaring pipeline for a single language.
|
||||
///
|
||||
/// Consumers (e.g. a generic tree-sitter extractor) hold
|
||||
/// `Box<dyn Desugarer>` so they can dispatch through the trait without
|
||||
/// knowing the user context type `C` that's internal to yeast.
|
||||
///
|
||||
/// Construct one via [`ConcreteDesugarer::new`] from a
|
||||
/// [`DesugaringConfig<C>`] and a [`tree_sitter::Language`].
|
||||
pub trait Desugarer: Send + Sync {
|
||||
/// The output AST schema (in YAML format), or `None` if the input
|
||||
/// grammar's schema should be used.
|
||||
fn output_node_types_yaml(&self) -> Option<&'static str>;
|
||||
|
||||
/// Parse `tree` against `source` and run the desugaring pipeline.
|
||||
/// Each call constructs a fresh default user context internally.
|
||||
fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String>;
|
||||
}
|
||||
|
||||
/// A concrete [`Desugarer`] backed by a [`DesugaringConfig<C>`] for a
|
||||
/// specific user context type `C`. Stores the language and a pre-built
|
||||
/// schema so that per-call cost is bounded to constructing a transient
|
||||
/// [`Runner`] and cloning the schema (no YAML re-parsing).
|
||||
pub struct ConcreteDesugarer<C: Default + Clone + Send + Sync + 'static> {
|
||||
language: tree_sitter::Language,
|
||||
schema: schema::Schema,
|
||||
config: DesugaringConfig<C>,
|
||||
}
|
||||
|
||||
impl<C: Default + Clone + Send + Sync + 'static> ConcreteDesugarer<C> {
|
||||
/// Build a desugarer for `language` from `config`. Parses the output
|
||||
/// schema YAML once (if set) and stores it for reuse across files.
|
||||
pub fn new(
|
||||
language: tree_sitter::Language,
|
||||
config: DesugaringConfig<C>,
|
||||
) -> Result<Self, String> {
|
||||
let schema = config.build_schema(&language)?;
|
||||
Ok(Self {
|
||||
language,
|
||||
schema,
|
||||
config,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: Default + Clone + Send + Sync + 'static> Desugarer for ConcreteDesugarer<C> {
|
||||
fn output_node_types_yaml(&self) -> Option<&'static str> {
|
||||
self.config.output_node_types_yaml
|
||||
}
|
||||
|
||||
fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String> {
|
||||
let runner = Runner::with_schema(self.language.clone(), &self.schema, &self.config.phases);
|
||||
runner.run_from_tree(tree, source)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -242,10 +242,7 @@ pub fn convert(yaml_input: &str) -> Result<String, String> {
|
||||
|
||||
/// Apply YAML node-type definitions to a mutable Schema.
|
||||
/// Registers all types, fields, and allowed types from the YAML into the schema.
|
||||
fn apply_yaml_to_schema(
|
||||
yaml: &YamlNodeTypes,
|
||||
schema: &mut crate::schema::Schema,
|
||||
) {
|
||||
fn apply_yaml_to_schema(yaml: &YamlNodeTypes, schema: &mut crate::schema::Schema) {
|
||||
// Register all supertypes as node kinds
|
||||
for name in yaml.supertypes.keys() {
|
||||
schema.register_kind(name);
|
||||
@@ -307,7 +304,8 @@ fn apply_yaml_to_schema(
|
||||
.into_vec()
|
||||
.into_iter()
|
||||
.map(|type_ref| {
|
||||
let (kind, named) = resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types);
|
||||
let (kind, named) =
|
||||
resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types);
|
||||
crate::schema::NodeType { kind, named }
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
@@ -198,13 +198,8 @@ impl Schema {
|
||||
.insert((parent_kind.to_string(), field_id), node_types);
|
||||
}
|
||||
|
||||
pub fn field_types(
|
||||
&self,
|
||||
parent_kind: &str,
|
||||
field_id: FieldId,
|
||||
) -> Option<&Vec<NodeType>> {
|
||||
self.field_types
|
||||
.get(&(parent_kind.to_string(), field_id))
|
||||
pub fn field_types(&self, parent_kind: &str, field_id: FieldId) -> Option<&Vec<NodeType>> {
|
||||
self.field_types.get(&(parent_kind.to_string(), field_id))
|
||||
}
|
||||
|
||||
pub fn set_field_cardinality(
|
||||
|
||||
@@ -7,7 +7,7 @@ const OUTPUT_SCHEMA_YAML: &str = include_str!("node-types.yml");
|
||||
|
||||
/// Helper: parse Ruby source with no rules, return dump.
|
||||
fn parse_and_dump(input: &str) -> String {
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run(input).unwrap();
|
||||
dump_ast(&ast, ast.get_root(), input)
|
||||
}
|
||||
@@ -24,7 +24,7 @@ fn run_and_ast(input: &str, rules: Vec<Rule>) -> Ast {
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
runner.run(input).unwrap()
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ fn run_phased_and_dump(input: &str, phases: Vec<Phase>) -> String {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let ast = runner.run(input).unwrap();
|
||||
dump_ast(&ast, ast.get_root(), input)
|
||||
}
|
||||
@@ -46,7 +46,7 @@ fn run_and_get_error(input: &str, rules: Vec<Rule>) -> String {
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
runner
|
||||
.run(input)
|
||||
.expect_err("expected runner to return an error")
|
||||
@@ -54,7 +54,7 @@ fn run_and_get_error(input: &str, rules: Vec<Rule>) -> String {
|
||||
|
||||
/// Helper: parse Ruby source with no rules and dump with schema type errors.
|
||||
fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String {
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run(input).unwrap();
|
||||
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
|
||||
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
|
||||
@@ -64,10 +64,10 @@ fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String {
|
||||
/// building schema with language IDs so field checks align with parser fields.
|
||||
fn parse_and_dump_typed_with_language(input: &str, schema_yaml: &str) -> String {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let runner = Runner::new(lang.clone(), &[]);
|
||||
let runner: Runner = Runner::new(lang.clone(), &[]);
|
||||
let ast = runner.run(input).unwrap();
|
||||
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang)
|
||||
.unwrap();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang).unwrap();
|
||||
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
|
||||
}
|
||||
|
||||
@@ -76,7 +76,7 @@ fn run_and_dump_typed(input: &str, rules: Vec<Rule>, schema_yaml: &str) -> Strin
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
|
||||
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let ast = runner.run(input).unwrap();
|
||||
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
|
||||
}
|
||||
@@ -166,7 +166,7 @@ fn test_parse_for_loop() {
|
||||
|
||||
#[test]
|
||||
fn test_dump_highlights_type_errors_inline() {
|
||||
let schema_yaml = r#"
|
||||
let schema_yaml = r#"
|
||||
named:
|
||||
program:
|
||||
$children*: assignment
|
||||
@@ -176,13 +176,13 @@ named:
|
||||
identifier:
|
||||
"#;
|
||||
|
||||
let dump = parse_and_dump_typed("x = 1", schema_yaml);
|
||||
assert!(dump.contains("integer \"1\" <-- ERROR:"));
|
||||
let dump = parse_and_dump_typed("x = 1", schema_yaml);
|
||||
assert!(dump.contains("integer \"1\" <-- ERROR:"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dump_reports_preserved_unknown_kind_after_transformation() {
|
||||
let schema_yaml = r#"
|
||||
let schema_yaml = r#"
|
||||
named:
|
||||
program:
|
||||
$children*: assignment
|
||||
@@ -192,25 +192,25 @@ named:
|
||||
identifier:
|
||||
"#;
|
||||
|
||||
// This rewrite runs and preserves the RHS node kind via capture.
|
||||
// With schema above, preserving `integer` should be reported inline.
|
||||
let rules = vec![yeast::rule!(
|
||||
(assignment left: (_) @left right: (_) @right)
|
||||
=>
|
||||
(assignment
|
||||
left: {left}
|
||||
right: {right}
|
||||
)
|
||||
)];
|
||||
// This rewrite runs and preserves the RHS node kind via capture.
|
||||
// With schema above, preserving `integer` should be reported inline.
|
||||
let rules: Vec<Rule> = vec![yeast::rule!(
|
||||
(assignment left: (_) @left right: (_) @right)
|
||||
=>
|
||||
(assignment
|
||||
left: {left}
|
||||
right: {right}
|
||||
)
|
||||
)];
|
||||
|
||||
let dump = run_and_dump_typed("x = 1", rules, schema_yaml);
|
||||
assert!(dump.contains("integer \"1\" <-- ERROR:"));
|
||||
assert!(dump.contains("node kind 'integer' not in schema"));
|
||||
let dump = run_and_dump_typed("x = 1", rules, schema_yaml);
|
||||
assert!(dump.contains("integer \"1\" <-- ERROR:"));
|
||||
assert!(dump.contains("node kind 'integer' not in schema"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dump_reports_undeclared_field_on_node() {
|
||||
let schema_yaml = r#"
|
||||
let schema_yaml = r#"
|
||||
named:
|
||||
program:
|
||||
$children*: assignment
|
||||
@@ -219,14 +219,14 @@ named:
|
||||
identifier:
|
||||
"#;
|
||||
|
||||
let dump = parse_and_dump_typed_with_language("x = y", schema_yaml);
|
||||
assert!(dump.contains("right: identifier \"y\" <-- ERROR:"));
|
||||
assert!(dump.contains("the node 'assignment' has no field 'right'"));
|
||||
let dump = parse_and_dump_typed_with_language("x = y", schema_yaml);
|
||||
assert!(dump.contains("right: identifier \"y\" <-- ERROR:"));
|
||||
assert!(dump.contains("the node 'assignment' has no field 'right'"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dump_reports_disallowed_kind_in_field_type() {
|
||||
let schema_yaml = r#"
|
||||
let schema_yaml = r#"
|
||||
named:
|
||||
program:
|
||||
$children*: assignment
|
||||
@@ -237,17 +237,17 @@ named:
|
||||
integer:
|
||||
"#;
|
||||
|
||||
let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml);
|
||||
assert!(dump.contains("right: integer \"1\" <-- ERROR:"));
|
||||
assert!(dump.contains("should contain"));
|
||||
assert!(dump.contains("but got integer"));
|
||||
let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml);
|
||||
assert!(dump.contains("right: integer \"1\" <-- ERROR:"));
|
||||
assert!(dump.contains("should contain"));
|
||||
assert!(dump.contains("but got integer"));
|
||||
}
|
||||
|
||||
// ---- Query tests ----
|
||||
|
||||
#[test]
|
||||
fn test_query_match() {
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
|
||||
let query = yeast::query!(
|
||||
@@ -268,7 +268,7 @@ fn test_query_match() {
|
||||
|
||||
#[test]
|
||||
fn test_query_no_match() {
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
|
||||
let query = yeast::query!(
|
||||
@@ -293,7 +293,7 @@ fn test_query_skips_extras_in_positional_match() {
|
||||
// captured comment to nothing (a common idiom, e.g.
|
||||
// `(comment) => ()` in Swift) leaves the capture's match-list empty
|
||||
// and causes the transform to fail with "Variable X has 0 matches".
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("[1, # comment\n2]").unwrap();
|
||||
|
||||
// Navigate to the `array` node: program -> array.
|
||||
@@ -309,15 +309,11 @@ fn test_query_skips_extras_in_positional_match() {
|
||||
let matched = query.do_match(&ast, array_id, &mut captures).unwrap();
|
||||
assert!(matched);
|
||||
assert_eq!(
|
||||
ast.get_node(captures.get_var("a").unwrap())
|
||||
.unwrap()
|
||||
.kind(),
|
||||
ast.get_node(captures.get_var("a").unwrap()).unwrap().kind(),
|
||||
"integer"
|
||||
);
|
||||
assert_eq!(
|
||||
ast.get_node(captures.get_var("b").unwrap())
|
||||
.unwrap()
|
||||
.kind(),
|
||||
ast.get_node(captures.get_var("b").unwrap()).unwrap().kind(),
|
||||
"integer"
|
||||
);
|
||||
}
|
||||
@@ -325,14 +321,14 @@ fn test_query_skips_extras_in_positional_match() {
|
||||
#[test]
|
||||
fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang)
|
||||
.unwrap();
|
||||
let phases = vec![Phase::new(
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let phases: Vec<Phase> = vec![Phase::new(
|
||||
"test",
|
||||
PhaseKind::Repeating,
|
||||
vec![yeast::rule!((integer) => (identifier "replaced"))],
|
||||
)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let input = "x = 1";
|
||||
let ast = runner.run(input).unwrap();
|
||||
@@ -350,7 +346,7 @@ fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
|
||||
|
||||
#[test]
|
||||
fn test_query_repeated_capture() {
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x, y, z = 1").unwrap();
|
||||
|
||||
let query = yeast::query!(
|
||||
@@ -375,7 +371,7 @@ fn test_query_repeated_capture() {
|
||||
#[test]
|
||||
fn test_capture_unnamed_node_parenthesized() {
|
||||
// `("=") @op` captures the unnamed `=` token between left and right.
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
|
||||
let query = yeast::query!(
|
||||
@@ -403,7 +399,7 @@ fn test_capture_unnamed_node_parenthesized() {
|
||||
fn test_capture_bare_underscore_repeated() {
|
||||
// `_` matches named and unnamed nodes in bare-child position. On this
|
||||
// assignment shape, bare children correspond to unnamed tokens (the `=`).
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
|
||||
let query = yeast::query!((assignment _* @all));
|
||||
@@ -425,7 +421,7 @@ fn test_capture_bare_underscore_repeated() {
|
||||
#[test]
|
||||
fn test_capture_unnamed_node_bare_literal() {
|
||||
// `"=" @op` (without surrounding parens) is the same as `("=") @op`.
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
|
||||
let query = yeast::query!(
|
||||
@@ -454,7 +450,7 @@ fn test_bare_underscore_matches_unnamed() {
|
||||
// Bare `_` matches any node, including unnamed tokens, while `(_)`
|
||||
// matches only named nodes. Demonstrate by matching the unnamed `=`
|
||||
// token in the implicit `child` field of an `assignment`.
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
|
||||
let mut cursor = AstCursor::new(&ast);
|
||||
@@ -493,7 +489,7 @@ fn test_bare_forms_in_field_position() {
|
||||
// field's value, not just in the bare-children position. This is
|
||||
// syntactic sugar for `(_)` / `("…")` and goes through the same
|
||||
// code paths.
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
|
||||
let mut cursor = AstCursor::new(&ast);
|
||||
@@ -532,7 +528,7 @@ fn test_forward_scan_finds_unnamed_token_late() {
|
||||
// query for `("end")` skip past the first two and match the third.
|
||||
// Without forward-scan, the matcher took the first child unconditionally
|
||||
// and failed.
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("for x in list do\n y\nend").unwrap();
|
||||
|
||||
// Navigate: program > for > do (the body wrapper).
|
||||
@@ -559,7 +555,7 @@ fn test_forward_scan_preserves_order() {
|
||||
// order. A query for ("end") then ("do") should fail because `do`
|
||||
// appears before `end` in the source order; once forward-scan has
|
||||
// consumed `end`, the iterator is exhausted.
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("for x in list do\n y\nend").unwrap();
|
||||
|
||||
let mut cursor = AstCursor::new(&ast);
|
||||
@@ -580,7 +576,7 @@ fn test_forward_scan_preserves_order() {
|
||||
|
||||
#[test]
|
||||
fn test_tree_builder() {
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let mut ast = runner.run("x = 1").unwrap();
|
||||
let input = "x = 1";
|
||||
|
||||
@@ -598,7 +594,8 @@ fn test_tree_builder() {
|
||||
|
||||
// Swap left and right
|
||||
let fresh = yeast::tree_builder::FreshScope::new();
|
||||
let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh);
|
||||
let mut user_ctx = ();
|
||||
let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh, &mut user_ctx);
|
||||
let new_id = yeast::tree!(ctx,
|
||||
(program
|
||||
child: (assignment
|
||||
@@ -626,7 +623,7 @@ fn test_tree_builder() {
|
||||
// tree-sitter-ruby grammar with named fields for nodes that only have
|
||||
// unnamed children in tree-sitter (e.g. block_body.stmt, block_parameters.parameter).
|
||||
fn ruby_rules() -> Vec<Rule> {
|
||||
let assign_rule = yeast::rule!(
|
||||
let assign_rule: Rule = yeast::rule!(
|
||||
(assignment
|
||||
left: (left_assignment_list
|
||||
(identifier)* @left
|
||||
@@ -651,7 +648,7 @@ fn ruby_rules() -> Vec<Rule> {
|
||||
)}
|
||||
);
|
||||
|
||||
let for_rule = yeast::rule!(
|
||||
let for_rule: Rule = yeast::rule!(
|
||||
(for
|
||||
pattern: (_) @pat
|
||||
value: (in (_) @val)
|
||||
@@ -733,7 +730,7 @@ fn test_desugar_for_loop() {
|
||||
|
||||
#[test]
|
||||
fn test_shorthand_rule() {
|
||||
let rule = yeast::rule!(
|
||||
let rule: Rule = yeast::rule!(
|
||||
(assignment
|
||||
left: (_) @method
|
||||
right: (_) @receiver
|
||||
@@ -885,7 +882,7 @@ fn test_phase_error_includes_phase_name() {
|
||||
PhaseKind::Repeating,
|
||||
vec![swap_assignment_rule().repeated()],
|
||||
)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let err = runner
|
||||
.run("x = 1")
|
||||
.expect_err("expected runner to return an error");
|
||||
@@ -928,7 +925,7 @@ fn test_one_shot_phase() {
|
||||
PhaseKind::OneShot,
|
||||
one_shot_xeq1_rules(),
|
||||
)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let input = "x = 1";
|
||||
let ast = runner.run(input).unwrap();
|
||||
@@ -954,7 +951,7 @@ fn test_one_shot_phase_errors_when_no_rule_matches() {
|
||||
let mut rules = one_shot_xeq1_rules();
|
||||
rules.pop();
|
||||
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let err = runner
|
||||
.run("x = 1")
|
||||
@@ -978,7 +975,7 @@ fn test_one_shot_recurses_into_returned_capture() {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let rules = vec![
|
||||
let rules: Vec<Rule> = vec![
|
||||
yeast::rule!(
|
||||
(program (_)* @stmts)
|
||||
=>
|
||||
@@ -994,7 +991,7 @@ fn test_one_shot_recurses_into_returned_capture() {
|
||||
yeast::rule!((integer) => (integer "INT")),
|
||||
];
|
||||
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let input = "x = 1";
|
||||
let ast = runner.run(input).unwrap();
|
||||
@@ -1020,7 +1017,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let rules = vec![
|
||||
let rules: Vec<Rule> = vec![
|
||||
yeast::rule!(
|
||||
(program (_)* @stmts)
|
||||
=>
|
||||
@@ -1041,7 +1038,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
|
||||
yeast::rule!((integer) => (integer "INT")),
|
||||
];
|
||||
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let input = "x = 1";
|
||||
let ast = runner.run(input).unwrap();
|
||||
@@ -1065,7 +1062,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
|
||||
|
||||
#[test]
|
||||
fn test_cursor_navigation() {
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
let mut cursor = AstCursor::new(&ast);
|
||||
|
||||
@@ -1139,7 +1136,7 @@ fn test_desugar_for_with_multiple_assignment() {
|
||||
/// resolves to the captured node's source text via `YeastDisplay`.
|
||||
#[test]
|
||||
fn test_hash_brace_renders_capture_source_text() {
|
||||
let rule = rule!(
|
||||
let rule: Rule = rule!(
|
||||
(call
|
||||
method: (identifier) @name
|
||||
receiver: (identifier) @recv
|
||||
@@ -1168,7 +1165,7 @@ fn test_hash_brace_renders_capture_source_text() {
|
||||
/// `Display` impl (covered by `YeastDisplay`'s blanket impls for primitives).
|
||||
#[test]
|
||||
fn test_hash_brace_renders_integer_expression() {
|
||||
let rule = rule!(
|
||||
let rule: Rule = rule!(
|
||||
(identifier) @_
|
||||
=>
|
||||
(identifier #{1 + 2})
|
||||
@@ -1187,7 +1184,7 @@ fn test_hash_brace_renders_integer_expression() {
|
||||
/// source location, not the full source range of the matched rule root.
|
||||
#[test]
|
||||
fn test_hash_brace_uses_capture_location_for_leaf() {
|
||||
let rule = rule!(
|
||||
let rule: Rule = rule!(
|
||||
(call
|
||||
method: (identifier) @name
|
||||
receiver: (identifier) @recv
|
||||
@@ -1204,7 +1201,9 @@ fn test_hash_brace_uses_capture_location_for_leaf() {
|
||||
|
||||
let mut bar_ids: Vec<usize> = Vec::new();
|
||||
for id in ast.reachable_node_ids() {
|
||||
let Some(node) = ast.get_node(id) else { continue; };
|
||||
let Some(node) = ast.get_node(id) else {
|
||||
continue;
|
||||
};
|
||||
if node.kind() == "identifier" && ast.source_text(id) == "bar" {
|
||||
bar_ids.push(id);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user