mirror of
https://github.com/github/codeql.git
synced 2026-06-26 15:17:06 +02:00
Merge pull request #22054 from github/tausbn/yeast-context-reification
This commit is contained in:
@@ -280,10 +280,11 @@ pub fn location_label(writer: &mut trap::Writer, location: trap::Location) -> tr
|
||||
}
|
||||
|
||||
/// Extracts the source file at `path`, which is assumed to be canonicalized.
|
||||
/// When `yeast_runner` is `Some`, the parsed tree is first transformed
|
||||
/// through the supplied yeast `Runner` before TRAP extraction. Building the
|
||||
/// `Runner` (which parses YAML and constructs the schema) is the caller's
|
||||
/// responsibility, allowing it to be done once and shared across files.
|
||||
/// When `desugarer` is `Some`, the parsed tree is first transformed
|
||||
/// through the supplied yeast desugarer before TRAP extraction. Building
|
||||
/// the desugarer (which parses YAML and constructs the schema) is the
|
||||
/// caller's responsibility, allowing it to be done once and shared across
|
||||
/// files.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn extract(
|
||||
language: &Language,
|
||||
@@ -295,7 +296,7 @@ pub fn extract(
|
||||
path: &Path,
|
||||
source: &[u8],
|
||||
ranges: &[Range],
|
||||
yeast_runner: Option<&yeast::Runner<'_>>,
|
||||
desugarer: Option<&dyn yeast::Desugarer>,
|
||||
) {
|
||||
let path_str = file_paths::normalize_and_transform_path(path, transformer);
|
||||
let source_root = std::env::current_dir()
|
||||
@@ -328,8 +329,8 @@ pub fn extract(
|
||||
schema,
|
||||
);
|
||||
|
||||
if let Some(yeast_runner) = yeast_runner {
|
||||
let ast = yeast_runner
|
||||
if let Some(desugarer) = desugarer {
|
||||
let ast = desugarer
|
||||
.run_from_tree(&tree, source)
|
||||
.unwrap_or_else(|e| panic!("Desugaring failed for {path_str}: {e}"));
|
||||
traverse_yeast(&ast, &mut visitor);
|
||||
|
||||
@@ -13,11 +13,14 @@ pub struct LanguageSpec {
|
||||
pub prefix: &'static str,
|
||||
pub ts_language: tree_sitter::Language,
|
||||
pub node_types: &'static str,
|
||||
/// Optional yeast desugaring configuration. When set, the parsed
|
||||
/// tree is rewritten through yeast before TRAP extraction. The
|
||||
/// config's `output_node_types_yaml` (if set) provides the schema
|
||||
/// used both at runtime (for the rewriter) and for TRAP validation.
|
||||
pub desugar: Option<yeast::DesugaringConfig>,
|
||||
/// Optional desugarer. When set, the parsed tree is rewritten through
|
||||
/// the desugarer before TRAP extraction. The desugarer's
|
||||
/// `output_node_types_yaml()` (if set) provides the schema used both
|
||||
/// at runtime (for the rewriter) and for TRAP validation.
|
||||
///
|
||||
/// `Box<dyn yeast::Desugarer>` so the shared extractor is agnostic to
|
||||
/// the user-defined context type the desugarer uses internally.
|
||||
pub desugar: Option<Box<dyn yeast::Desugarer>>,
|
||||
pub file_globs: Vec<String>,
|
||||
}
|
||||
|
||||
@@ -91,35 +94,22 @@ impl Extractor {
|
||||
.collect();
|
||||
|
||||
let mut schemas = vec![];
|
||||
let mut yeast_runners = Vec::new();
|
||||
for lang in &self.languages {
|
||||
let effective_node_types: String =
|
||||
match lang.desugar.as_ref().and_then(|c| c.output_node_types_yaml) {
|
||||
Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| {
|
||||
std::io::Error::other(format!(
|
||||
"Failed to convert YAML node-types to JSON for {}: {e}",
|
||||
lang.prefix
|
||||
))
|
||||
})?,
|
||||
None => lang.node_types.to_string(),
|
||||
};
|
||||
let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?;
|
||||
schemas.push(schema);
|
||||
|
||||
// Build the yeast runner once per language so the YAML schema
|
||||
// isn't re-parsed for every file.
|
||||
let yeast_runner = lang
|
||||
let effective_node_types: String = match lang
|
||||
.desugar
|
||||
.as_ref()
|
||||
.map(|config| yeast::Runner::from_config(lang.ts_language.clone(), config))
|
||||
.transpose()
|
||||
.map_err(|e| {
|
||||
.and_then(|d| d.output_node_types_yaml())
|
||||
{
|
||||
Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| {
|
||||
std::io::Error::other(format!(
|
||||
"Failed to build desugaring runner for {}: {e}",
|
||||
"Failed to convert YAML node-types to JSON for {}: {e}",
|
||||
lang.prefix
|
||||
))
|
||||
})?;
|
||||
yeast_runners.push(yeast_runner);
|
||||
})?,
|
||||
None => lang.node_types.to_string(),
|
||||
};
|
||||
let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?;
|
||||
schemas.push(schema);
|
||||
}
|
||||
|
||||
// Construct a single globset containing all language globs,
|
||||
@@ -194,7 +184,7 @@ impl Extractor {
|
||||
&path,
|
||||
&source,
|
||||
&[],
|
||||
yeast_runners[i].as_ref(),
|
||||
lang.desugar.as_deref(),
|
||||
);
|
||||
std::fs::create_dir_all(src_archive_file.parent().unwrap())?;
|
||||
std::fs::copy(&path, &src_archive_file)?;
|
||||
|
||||
@@ -121,3 +121,37 @@ pub fn rule(input: TokenStream) -> TokenStream {
|
||||
Err(err) => err.to_compile_error().into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Define a desugaring rule whose transform is a hand-written Rust block.
|
||||
///
|
||||
/// Use `manual_rule!` when the transform needs control over capture
|
||||
/// translation timing — for example, when an outer rule needs to set
|
||||
/// state in `ctx` (the `BuildCtx`'s user context) before recursive
|
||||
/// translation reaches inner rules that read that state.
|
||||
///
|
||||
/// ```text
|
||||
/// manual_rule!(
|
||||
/// (query_pattern field: (_) @name)
|
||||
/// {
|
||||
/// // `ctx` is a `&mut BuildCtx<'_, C>`; capture variables
|
||||
/// // (`name: NodeRef`, etc.) are bound from the query.
|
||||
/// let translated = ctx.translate(name)?;
|
||||
/// Ok(translated)
|
||||
/// }
|
||||
/// )
|
||||
/// ```
|
||||
///
|
||||
/// Differences from [`rule!`]:
|
||||
/// - Captures are **not** auto-translated before the body runs; they
|
||||
/// refer to raw input-schema nodes. Use [`BuildCtx::translate`] (or
|
||||
/// [`BuildCtx::translate_opt`]) to translate them when you choose.
|
||||
/// - The body is plain Rust returning `Result<Vec<Id>, String>` — no
|
||||
/// tree template, no `Ok(...)` wrap.
|
||||
#[proc_macro]
|
||||
pub fn manual_rule(input: TokenStream) -> TokenStream {
|
||||
let input2: TokenStream2 = input.into();
|
||||
match parse::parse_manual_rule_top(input2) {
|
||||
Ok(output) => output.into(),
|
||||
Err(err) => err.to_compile_error().into(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -121,9 +121,9 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
|
||||
std::collections::HashMap::new();
|
||||
let mut bare_children: Vec<TokenStream> = Vec::new();
|
||||
let push_field_elem = |order: &mut Vec<String>,
|
||||
map: &mut std::collections::HashMap<String, Vec<TokenStream>>,
|
||||
name: String,
|
||||
elem: TokenStream| {
|
||||
map: &mut std::collections::HashMap<String, Vec<TokenStream>>,
|
||||
name: String,
|
||||
elem: TokenStream| {
|
||||
if !map.contains_key(&name) {
|
||||
order.push(name.clone());
|
||||
map.insert(name, vec![elem]);
|
||||
@@ -160,8 +160,7 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
|
||||
} else {
|
||||
let child = if peek_is_at(tokens) {
|
||||
tokens.next();
|
||||
let capture_name =
|
||||
expect_ident(tokens, "expected capture name after @")?;
|
||||
let capture_name = expect_ident(tokens, "expected capture name after @")?;
|
||||
let name_str = capture_name.to_string();
|
||||
quote! {
|
||||
yeast::query::QueryNode::Capture {
|
||||
@@ -296,10 +295,10 @@ fn parse_query_list(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
|
||||
// tree! / trees! parsing — direct code generation against BuildCtx
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const IMPLICIT_CTX: &str = "__yeast_ctx";
|
||||
const IMPLICIT_CTX: &str = "ctx";
|
||||
|
||||
/// Determine the context identifier: either explicit `ctx,` or the implicit
|
||||
/// `__yeast_ctx` from an enclosing `rule!`.
|
||||
/// `ctx` from an enclosing `rule!`.
|
||||
fn parse_ctx_or_implicit(tokens: &mut Tokens) -> Ident {
|
||||
// Check if first token is an ident followed by a comma
|
||||
let mut lookahead = tokens.clone();
|
||||
@@ -359,7 +358,7 @@ fn parse_direct_node(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStream> {
|
||||
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => {
|
||||
let group = expect_group(tokens, Delimiter::Brace)?;
|
||||
let expr = group.stream();
|
||||
Ok(quote! { ::std::convert::Into::<usize>::into(#expr) })
|
||||
Ok(quote! { ::std::convert::Into::<usize>::into({ #expr }) })
|
||||
}
|
||||
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Parenthesis => {
|
||||
let group = expect_group(tokens, Delimiter::Parenthesis)?;
|
||||
@@ -396,7 +395,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
let expr = group.stream();
|
||||
return Ok(quote! {
|
||||
{
|
||||
let __expr = (#expr);
|
||||
let __expr = { #expr };
|
||||
let __value = yeast::YeastDisplay::yeast_to_string(&__expr, &*#ctx.ast);
|
||||
let __source_range = yeast::YeastSourceRange::yeast_source_range(&__expr, &*#ctx.ast);
|
||||
#ctx.literal_with_source_range(#kind_str, &__value, __source_range)
|
||||
@@ -420,7 +419,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
// Named fields — compute each value into a temp, then reference it
|
||||
while peek_is_field(tokens) {
|
||||
let field_name = expect_ident(tokens, "expected field name")?;
|
||||
let field_str = field_name.to_string().strip_prefix("r#").unwrap_or(&field_name.to_string()).to_string();
|
||||
let field_str = field_name
|
||||
.to_string()
|
||||
.strip_prefix("r#")
|
||||
.unwrap_or(&field_name.to_string())
|
||||
.to_string();
|
||||
expect_punct(tokens, ':', "expected `:` after field name")?;
|
||||
let temp = Ident::new(
|
||||
&format!("__field_{field_str}_{field_counter}"),
|
||||
@@ -438,7 +441,8 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
// Determine if a chain (.map(..)) follows the `{}` group.
|
||||
let mut after = tokens.clone();
|
||||
after.next(); // skip the brace group
|
||||
let has_chain = matches!(after.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
|
||||
let has_chain =
|
||||
matches!(after.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
|
||||
|
||||
if is_splice || has_chain {
|
||||
let group = expect_group(tokens, Delimiter::Brace)?;
|
||||
@@ -448,11 +452,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
inner.next(); // consume second .
|
||||
let expr: TokenStream = inner.collect();
|
||||
quote! {
|
||||
(#expr).into_iter().map(::std::convert::Into::<usize>::into)
|
||||
{ #expr }.into_iter().map(::std::convert::Into::<usize>::into)
|
||||
}
|
||||
} else {
|
||||
let expr = group.stream();
|
||||
quote! { (#expr).into_iter() }
|
||||
quote! { { #expr }.into_iter() }
|
||||
};
|
||||
let chained = parse_chain_suffix(tokens, ctx, base)?;
|
||||
stmts.push(quote! {
|
||||
@@ -506,11 +510,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
/// Each call expects the receiver to be an iterator. The `base` argument
|
||||
/// should therefore already be an iterator (use `.into_iter()` on it before
|
||||
/// calling this function).
|
||||
fn parse_chain_suffix(
|
||||
tokens: &mut Tokens,
|
||||
ctx: &Ident,
|
||||
base: TokenStream,
|
||||
) -> Result<TokenStream> {
|
||||
fn parse_chain_suffix(tokens: &mut Tokens, ctx: &Ident, base: TokenStream) -> Result<TokenStream> {
|
||||
let mut current = base;
|
||||
while matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.') {
|
||||
tokens.next(); // consume .
|
||||
@@ -608,7 +608,8 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
|
||||
// {expr} or {..expr} (with optional .chain) — single node or splice
|
||||
if peek_is_group(tokens, Delimiter::Brace) {
|
||||
let group = expect_group(tokens, Delimiter::Brace)?;
|
||||
let has_chain = matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
|
||||
let has_chain =
|
||||
matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
|
||||
let mut inner = group.stream().into_iter().peekable();
|
||||
let is_splice = peek_is_dotdot(&inner);
|
||||
if is_splice || has_chain {
|
||||
@@ -617,11 +618,11 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
|
||||
inner.next(); // consume second .
|
||||
let expr: TokenStream = inner.collect();
|
||||
quote! {
|
||||
(#expr).into_iter().map(::std::convert::Into::<usize>::into)
|
||||
{ #expr }.into_iter().map(::std::convert::Into::<usize>::into)
|
||||
}
|
||||
} else {
|
||||
let expr = group.stream();
|
||||
quote! { (#expr).into_iter() }
|
||||
quote! { { #expr }.into_iter() }
|
||||
};
|
||||
let chained = parse_chain_suffix(tokens, ctx, base)?;
|
||||
items.push(quote! {
|
||||
@@ -630,7 +631,7 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
|
||||
} else {
|
||||
let expr = group.stream();
|
||||
items.push(quote! {
|
||||
__nodes.push(::std::convert::Into::<usize>::into(#expr));
|
||||
__nodes.push(::std::convert::Into::<usize>::into({ #expr }));
|
||||
});
|
||||
}
|
||||
continue;
|
||||
@@ -888,10 +889,117 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
Ok(quote! {
|
||||
{
|
||||
let __query = #query_code;
|
||||
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>| {
|
||||
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, mut __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
|
||||
// Auto-translation prefix: recursively translate every
|
||||
// captured node before invoking the user's transform body.
|
||||
// For OneShot rules this preserves the legacy behaviour
|
||||
// (input-schema captures translated to output-schema
|
||||
// nodes); for Repeating rules it is a no-op.
|
||||
__translator.auto_translate_captures(&mut __captures, __ast, __user_ctx)?;
|
||||
#(#bindings)*
|
||||
let mut #ctx_ident = yeast::build::BuildCtx::with_source_range(__ast, &__captures, __fresh, __source_range);
|
||||
#transform_body
|
||||
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
|
||||
let __result: Vec<usize> = { #transform_body };
|
||||
Ok(__result)
|
||||
}))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse `manual_rule!( query { body } )`.
|
||||
///
|
||||
/// Like [`parse_rule_top`] but:
|
||||
/// - Expects a Rust block `{ ... }` after the query (no `=>` arrow).
|
||||
/// - Generates code that does NOT auto-translate captures before
|
||||
/// running the body. Capture variables refer to raw (input-schema)
|
||||
/// nodes; the body is responsible for explicit translation via
|
||||
/// `ctx.translate(...)`.
|
||||
/// - The body is included verbatim and must evaluate to
|
||||
/// `Result<Vec<usize>, String>`.
|
||||
pub fn parse_manual_rule_top(input: TokenStream) -> Result<TokenStream> {
|
||||
let mut tokens = input.into_iter().peekable();
|
||||
|
||||
// Collect query tokens up to the body block `{ ... }`.
|
||||
let mut query_tokens = Vec::new();
|
||||
loop {
|
||||
match tokens.peek() {
|
||||
None => {
|
||||
return Err(syn::Error::new(
|
||||
Span::call_site(),
|
||||
"expected a Rust block `{ ... }` after the query in manual_rule!",
|
||||
))
|
||||
}
|
||||
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => break,
|
||||
_ => {
|
||||
query_tokens.push(tokens.next().unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let query_stream: TokenStream = query_tokens.into_iter().collect();
|
||||
|
||||
// Extract captures from the query (same as in `rule!`).
|
||||
let captures = extract_captures(&query_stream);
|
||||
|
||||
// Parse the query into the QueryNode-building expression.
|
||||
let query_code = parse_query_top(query_stream)?;
|
||||
|
||||
// Generate capture bindings (same as in `rule!`).
|
||||
let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site());
|
||||
let bindings: Vec<TokenStream> = captures
|
||||
.iter()
|
||||
.map(|cap| {
|
||||
let name = Ident::new(&cap.name, Span::call_site());
|
||||
let name_str = &cap.name;
|
||||
match cap.multiplicity {
|
||||
CaptureMultiplicity::Repeated => quote! {
|
||||
let #name: Vec<yeast::NodeRef> = __captures.get_all(#name_str)
|
||||
.into_iter()
|
||||
.map(yeast::NodeRef)
|
||||
.collect();
|
||||
},
|
||||
CaptureMultiplicity::Optional => quote! {
|
||||
let #name: Option<yeast::NodeRef> =
|
||||
__captures.get_opt(#name_str).map(yeast::NodeRef);
|
||||
},
|
||||
CaptureMultiplicity::Single => quote! {
|
||||
let #name: yeast::NodeRef =
|
||||
yeast::NodeRef(__captures.get_var(#name_str).unwrap());
|
||||
},
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Consume the body block.
|
||||
let body_group = match tokens.next() {
|
||||
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => g,
|
||||
other => {
|
||||
return Err(syn::Error::new(
|
||||
Span::call_site(),
|
||||
format!(
|
||||
"expected a Rust block `{{ ... }}` after the query in manual_rule!, found: {other:?}"
|
||||
),
|
||||
))
|
||||
}
|
||||
};
|
||||
let body_stream = body_group.stream();
|
||||
|
||||
// No tokens should follow the body.
|
||||
if let Some(tok) = tokens.next() {
|
||||
return Err(syn::Error::new_spanned(
|
||||
tok,
|
||||
"unexpected token after manual_rule! body",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(quote! {
|
||||
{
|
||||
let __query = #query_code;
|
||||
yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option<tree_sitter::Range>, __user_ctx: &mut _, __translator: yeast::TranslatorHandle<'_, _>| {
|
||||
// No auto-translate prefix for manual rules — the body
|
||||
// is responsible for translating captures explicitly.
|
||||
#(#bindings)*
|
||||
let mut #ctx_ident = yeast::build::BuildCtx::with_translator(__ast, &__captures, __fresh, __source_range, __user_ctx, __translator);
|
||||
#body_stream
|
||||
}))
|
||||
}
|
||||
})
|
||||
|
||||
@@ -265,7 +265,21 @@ occurrences of the same `$name` within one `BuildCtx` share the same value:
|
||||
)
|
||||
```
|
||||
|
||||
`{..expr}` splices a `Vec<Id>` (or any iterable of `Id`):
|
||||
The contents of `{…}` are treated as a Rust block, so multi-statement
|
||||
expressions (with `let` bindings) work too:
|
||||
|
||||
```rust
|
||||
(assignment
|
||||
left: {tmp}
|
||||
right: {
|
||||
let lit = ctx.literal("integer", "0");
|
||||
tree!((binary_expr op: (operator "+") left: {tmp} right: {lit}))
|
||||
})
|
||||
```
|
||||
|
||||
`{..expr}` splices a `Vec<Id>` (or any iterable of `Id`); the contents
|
||||
are likewise a Rust block, so the splice can be the result of arbitrary
|
||||
computation:
|
||||
|
||||
```rust
|
||||
yeast::trees!(ctx,
|
||||
|
||||
@@ -20,7 +20,7 @@ fn main() {
|
||||
let args = Cli::parse();
|
||||
let language = get_language(&args.language);
|
||||
let source = std::fs::read_to_string(&args.file).unwrap();
|
||||
let runner = yeast::Runner::new(language, &[]);
|
||||
let runner: yeast::Runner = yeast::Runner::new(language, &[]);
|
||||
let ast = runner.run(&source).unwrap();
|
||||
println!("{}", ast.print(&source, ast.get_root()));
|
||||
}
|
||||
|
||||
@@ -2,28 +2,60 @@ use std::collections::BTreeMap;
|
||||
|
||||
use crate::captures::Captures;
|
||||
use crate::tree_builder::FreshScope;
|
||||
use crate::{Ast, FieldId, Id, NodeContent};
|
||||
use crate::{Ast, FieldId, Id, NodeContent, TranslatorHandle};
|
||||
|
||||
/// Context for building new AST nodes during a transformation.
|
||||
///
|
||||
/// Used by the `tree!` and `trees!` macros. Holds a mutable reference to the
|
||||
/// AST, a reference to the captures from a query match, and a `FreshScope` for
|
||||
/// generating unique identifiers.
|
||||
pub struct BuildCtx<'a> {
|
||||
/// AST, a reference to the captures from a query match, a `FreshScope` for
|
||||
/// generating unique identifiers, and a mutable reference to a user-defined
|
||||
/// context of type `C`.
|
||||
///
|
||||
/// The user context `C` is shared across rules via the framework's driver:
|
||||
/// outer rules can write to it before recursive translation, and inner rules
|
||||
/// can read (or further mutate) it during their transforms. The framework
|
||||
/// snapshots and restores the user context around each rule application, so
|
||||
/// mutations made by a rule are visible to its descendants (via recursive
|
||||
/// translation) but not to its parent's siblings.
|
||||
///
|
||||
/// `BuildCtx` implements [`Deref`] and [`DerefMut`] targeting `C`, so user
|
||||
/// context fields are accessible as `ctx.my_field` directly (provided they
|
||||
/// don't collide with `BuildCtx`'s own fields like `ast`, `captures`, etc.).
|
||||
///
|
||||
/// The default `C = ()` means rules that don't need any user context don't
|
||||
/// pay any cost.
|
||||
///
|
||||
/// When constructed by the framework (via the rule! macro), `BuildCtx` also
|
||||
/// carries a [`TranslatorHandle`] that the [`translate`] method delegates
|
||||
/// to. When constructed by hand (e.g. in tests), the translator is `None`
|
||||
/// and [`translate`] returns an error.
|
||||
pub struct BuildCtx<'a, C: 'a = ()> {
|
||||
pub ast: &'a mut Ast,
|
||||
pub captures: &'a Captures,
|
||||
pub fresh: &'a FreshScope,
|
||||
/// Source range of the matched node, inherited by synthetic nodes.
|
||||
pub source_range: Option<tree_sitter::Range>,
|
||||
/// User-supplied context, accessible directly via `ctx.field` (via Deref).
|
||||
pub user_ctx: &'a mut C,
|
||||
/// Optional translator handle, populated when the context is built by
|
||||
/// the framework's rule driver. None when the context is built by hand.
|
||||
pub(crate) translator: Option<TranslatorHandle<'a, C>>,
|
||||
}
|
||||
|
||||
impl<'a> BuildCtx<'a> {
|
||||
pub fn new(ast: &'a mut Ast, captures: &'a Captures, fresh: &'a FreshScope) -> Self {
|
||||
impl<'a, C> BuildCtx<'a, C> {
|
||||
pub fn new(
|
||||
ast: &'a mut Ast,
|
||||
captures: &'a Captures,
|
||||
fresh: &'a FreshScope,
|
||||
user_ctx: &'a mut C,
|
||||
) -> Self {
|
||||
Self {
|
||||
ast,
|
||||
captures,
|
||||
fresh,
|
||||
source_range: None,
|
||||
user_ctx,
|
||||
translator: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,12 +64,35 @@ impl<'a> BuildCtx<'a> {
|
||||
captures: &'a Captures,
|
||||
fresh: &'a FreshScope,
|
||||
source_range: Option<tree_sitter::Range>,
|
||||
user_ctx: &'a mut C,
|
||||
) -> Self {
|
||||
Self {
|
||||
ast,
|
||||
captures,
|
||||
fresh,
|
||||
source_range,
|
||||
user_ctx,
|
||||
translator: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct a `BuildCtx` carrying a translator handle. Used by the
|
||||
/// `rule!` macro to enable [`translate`] inside rule transforms.
|
||||
pub fn with_translator(
|
||||
ast: &'a mut Ast,
|
||||
captures: &'a Captures,
|
||||
fresh: &'a FreshScope,
|
||||
source_range: Option<tree_sitter::Range>,
|
||||
user_ctx: &'a mut C,
|
||||
translator: TranslatorHandle<'a, C>,
|
||||
) -> Self {
|
||||
Self {
|
||||
ast,
|
||||
captures,
|
||||
fresh,
|
||||
source_range,
|
||||
user_ctx,
|
||||
translator: Some(translator),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,3 +168,52 @@ impl<'a> BuildCtx<'a> {
|
||||
self.ast.prepend_field_child(node_id, field_id, value_id);
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: Clone> BuildCtx<'_, C> {
|
||||
/// Recursively translate a node via the framework's rule machinery.
|
||||
/// In a OneShot phase, applies OneShot rules to the given node and
|
||||
/// returns the resulting node ids. In a Repeating phase, errors
|
||||
/// (translation is not meaningful when input and output share a
|
||||
/// schema).
|
||||
///
|
||||
/// Accepts any value convertible to [`Id`] (including [`crate::NodeRef`]),
|
||||
/// so manual rules can pass capture bindings directly without unwrapping.
|
||||
///
|
||||
/// Errors if this `BuildCtx` was constructed by hand (without a
|
||||
/// translator handle) — for example, in unit tests that don't go
|
||||
/// through the rule driver.
|
||||
pub fn translate<I: Into<Id>>(&mut self, id: I) -> Result<Vec<Id>, String> {
|
||||
let id = id.into();
|
||||
match &self.translator {
|
||||
Some(t) => t.translate(self.ast, self.user_ctx, id),
|
||||
None => Err("translate() called on a BuildCtx without a translator handle".into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Translate an optional capture, returning the first translated id or
|
||||
/// `None`. Convenience for `?`-quantifier captures (`Option<NodeRef>`).
|
||||
///
|
||||
/// If the underlying translation produces multiple ids for a single
|
||||
/// input, only the first is returned. For most use cases (e.g.
|
||||
/// translating a single type annotation) this is what you want; if
|
||||
/// you need all ids, use [`translate`] directly.
|
||||
pub fn translate_opt<I: Into<Id>>(&mut self, id: Option<I>) -> Result<Option<Id>, String> {
|
||||
match id {
|
||||
Some(id) => Ok(self.translate(id)?.into_iter().next()),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<C> std::ops::Deref for BuildCtx<'_, C> {
|
||||
type Target = C;
|
||||
fn deref(&self) -> &C {
|
||||
&*self.user_ctx
|
||||
}
|
||||
}
|
||||
|
||||
impl<C> std::ops::DerefMut for BuildCtx<'_, C> {
|
||||
fn deref_mut(&mut self) -> &mut C {
|
||||
&mut *self.user_ctx
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,12 +53,7 @@ pub fn dump_ast_with_options(
|
||||
///
|
||||
/// Any node that does not match the expected type set for its parent field is
|
||||
/// rendered with a trailing `" <-- ERROR: ..."` annotation on the same line.
|
||||
pub fn dump_ast_with_type_errors(
|
||||
ast: &Ast,
|
||||
root: usize,
|
||||
source: &str,
|
||||
schema: &Schema,
|
||||
) -> String {
|
||||
pub fn dump_ast_with_type_errors(ast: &Ast, root: usize, source: &str, schema: &Schema) -> String {
|
||||
dump_ast_with_type_errors_and_options(ast, root, source, schema, &DumpOptions::default())
|
||||
}
|
||||
|
||||
@@ -74,7 +69,15 @@ pub fn dump_ast_with_type_errors_and_options(
|
||||
options: &DumpOptions,
|
||||
) -> String {
|
||||
let mut out = String::new();
|
||||
dump_node(ast, root, source, options, 0, Some((schema, None, None)), &mut out);
|
||||
dump_node(
|
||||
ast,
|
||||
root,
|
||||
source,
|
||||
options,
|
||||
0,
|
||||
Some((schema, None, None)),
|
||||
&mut out,
|
||||
);
|
||||
out
|
||||
}
|
||||
|
||||
@@ -232,8 +235,8 @@ fn dump_node(
|
||||
}
|
||||
let field_name = ast.field_name_for_id(field_id).unwrap_or("?");
|
||||
let child_type_check = type_check.map(|(schema, _, _)| {
|
||||
let expected = expected_for_field(schema, node.kind_name(), field_id)
|
||||
.or(Some(EMPTY_NODE_TYPES));
|
||||
let expected =
|
||||
expected_for_field(schema, node.kind_name(), field_id).or(Some(EMPTY_NODE_TYPES));
|
||||
let parent_field = Some((node.kind_name(), field_name));
|
||||
(schema, expected, parent_field)
|
||||
});
|
||||
|
||||
@@ -16,7 +16,7 @@ pub mod schema;
|
||||
pub mod tree_builder;
|
||||
mod visitor;
|
||||
|
||||
pub use yeast_macros::{query, rule, tree, trees};
|
||||
pub use yeast_macros::{manual_rule, query, rule, tree, trees};
|
||||
|
||||
use captures::Captures;
|
||||
pub use cursor::Cursor;
|
||||
@@ -297,7 +297,9 @@ impl Ast {
|
||||
/// Returns the source text for `id`, resolving `NodeContent::Range`
|
||||
/// against the stored source bytes when available.
|
||||
pub fn source_text(&self, id: Id) -> String {
|
||||
let Some(node) = self.get_node(id) else { return String::new(); };
|
||||
let Some(node) = self.get_node(id) else {
|
||||
return String::new();
|
||||
};
|
||||
let read_range = |range: &tree_sitter::Range| {
|
||||
let start = range.start_byte;
|
||||
let end = range.end_byte;
|
||||
@@ -488,7 +490,10 @@ impl Ast {
|
||||
|
||||
/// Prepend a child id to the given field of the given node.
|
||||
pub fn prepend_field_child(&mut self, node_id: Id, field_id: FieldId, value_id: Id) {
|
||||
let node = self.nodes.get_mut(node_id).expect("prepend_field_child: invalid node id");
|
||||
let node = self
|
||||
.nodes
|
||||
.get_mut(node_id)
|
||||
.expect("prepend_field_child: invalid node id");
|
||||
node.fields.entry(field_id).or_default().insert(0, value_id);
|
||||
}
|
||||
|
||||
@@ -700,18 +705,118 @@ impl From<tree_sitter::Range> for NodeContent {
|
||||
}
|
||||
}
|
||||
|
||||
/// The transform function for a rule: takes the AST, captured variables, a
|
||||
/// fresh-name scope, and the source range of the matched node, and returns
|
||||
/// the IDs of the replacement nodes.
|
||||
pub type Transform = Box<
|
||||
dyn Fn(&mut Ast, Captures, &tree_builder::FreshScope, Option<tree_sitter::Range>) -> Vec<Id>
|
||||
/// A handle that lets a rule transform recursively translate AST nodes via
|
||||
/// the framework's rule machinery. Constructed by the driver and passed as
|
||||
/// the last argument of every [`Transform`] invocation.
|
||||
///
|
||||
/// The `rule!` macro uses [`TranslatorHandle::auto_translate_captures`] in
|
||||
/// its generated prefix to translate captures before running the user's
|
||||
/// transform body. Manually-written transforms (using [`Rule::new`]
|
||||
/// directly) can call [`TranslatorHandle::translate`] selectively on
|
||||
/// specific node ids to control when translation happens.
|
||||
pub struct TranslatorHandle<'a, C> {
|
||||
inner: TranslatorImpl<'a, C>,
|
||||
}
|
||||
|
||||
/// Internal phase-specific translation state. Kept private — callers
|
||||
/// interact with [`TranslatorHandle`] only.
|
||||
enum TranslatorImpl<'a, C> {
|
||||
/// OneShot phase translator: recursively applies OneShot rules.
|
||||
OneShot {
|
||||
index: &'a RuleIndex<'a, C>,
|
||||
fresh: &'a tree_builder::FreshScope,
|
||||
rewrite_depth: usize,
|
||||
/// The id of the node the current rule is matching. Used by
|
||||
/// [`auto_translate_captures`] to avoid infinite recursion when a
|
||||
/// rule captures its own match root (e.g. via `(_) @_`).
|
||||
matched_root: Id,
|
||||
},
|
||||
/// Repeating phase translator: translation is not meaningful here
|
||||
/// (input and output schemas are the same). [`translate`] errors;
|
||||
/// [`auto_translate_captures`] is a no-op so the macro's auto-prefix
|
||||
/// works unchanged for Repeating rules.
|
||||
Repeating,
|
||||
}
|
||||
|
||||
impl<'a, C: Clone> TranslatorHandle<'a, C> {
|
||||
/// Recursively apply OneShot rules to `id` and return the resulting
|
||||
/// node ids. Errors in a Repeating phase (where translation is not
|
||||
/// meaningful).
|
||||
pub fn translate(&self, ast: &mut Ast, user_ctx: &mut C, id: Id) -> Result<Vec<Id>, String> {
|
||||
match &self.inner {
|
||||
TranslatorImpl::OneShot {
|
||||
index,
|
||||
fresh,
|
||||
rewrite_depth,
|
||||
..
|
||||
} => apply_one_shot_rules_inner(index, ast, user_ctx, id, fresh, rewrite_depth + 1),
|
||||
TranslatorImpl::Repeating => {
|
||||
Err("translate() is not available in a Repeating phase".into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Translate every captured node in `captures` in place (OneShot phase
|
||||
/// only). In a Repeating phase this is a no-op — Repeating rules
|
||||
/// receive raw captures.
|
||||
///
|
||||
/// Used by the `rule!` macro's generated prefix to preserve the
|
||||
/// pre-existing "auto-translate captures before running the transform
|
||||
/// body" behavior. Manually-written transforms typically translate
|
||||
/// captures selectively via [`translate`] instead.
|
||||
///
|
||||
/// To avoid infinite recursion, a capture whose id matches the rule's
|
||||
/// matched root (e.g. from a `(_) @_` pattern) is left unchanged.
|
||||
pub fn auto_translate_captures(
|
||||
&self,
|
||||
captures: &mut Captures,
|
||||
ast: &mut Ast,
|
||||
user_ctx: &mut C,
|
||||
) -> Result<(), String> {
|
||||
match &self.inner {
|
||||
TranslatorImpl::OneShot { matched_root, .. } => {
|
||||
let root = *matched_root;
|
||||
captures.try_map_all_captures(|cid| {
|
||||
if cid == root {
|
||||
Ok(vec![cid])
|
||||
} else {
|
||||
self.translate(ast, user_ctx, cid)
|
||||
}
|
||||
})
|
||||
}
|
||||
TranslatorImpl::Repeating => Ok(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The transform function for a rule.
|
||||
///
|
||||
/// Takes the AST, the (raw, untranslated) captured variables, a fresh-name
|
||||
/// scope, the source range of the matched node, a mutable reference to the
|
||||
/// user context of type `C`, and a [`TranslatorHandle`] for recursively
|
||||
/// translating nodes. Returns the IDs of the replacement nodes, or an
|
||||
/// error message if the transform could not be completed.
|
||||
///
|
||||
/// Transforms produced by [`Rule::new`] receive **raw** captures and must
|
||||
/// translate them themselves (via the handle). Transforms produced by the
|
||||
/// `rule!` macro have an auto-translation prefix injected for backward
|
||||
/// compatibility.
|
||||
pub type Transform<C = ()> = Box<
|
||||
dyn Fn(
|
||||
&mut Ast,
|
||||
Captures,
|
||||
&tree_builder::FreshScope,
|
||||
Option<tree_sitter::Range>,
|
||||
&mut C,
|
||||
TranslatorHandle<'_, C>,
|
||||
) -> Result<Vec<Id>, String>
|
||||
+ Send
|
||||
+ Sync,
|
||||
>;
|
||||
|
||||
pub struct Rule {
|
||||
pub struct Rule<C = ()> {
|
||||
query: QueryNode,
|
||||
transform: Transform,
|
||||
transform: Transform<C>,
|
||||
/// If true, after this rule fires on a node the engine will try to
|
||||
/// re-apply this same rule on the result root. Defaults to false:
|
||||
/// each rule fires at most once on a given node, which prevents
|
||||
@@ -719,8 +824,8 @@ pub struct Rule {
|
||||
repeated: bool,
|
||||
}
|
||||
|
||||
impl Rule {
|
||||
pub fn new(query: QueryNode, transform: Transform) -> Self {
|
||||
impl<C> Rule<C> {
|
||||
pub fn new(query: QueryNode, transform: Transform<C>) -> Self {
|
||||
Self {
|
||||
query,
|
||||
transform,
|
||||
@@ -742,9 +847,13 @@ impl Rule {
|
||||
ast: &mut Ast,
|
||||
node: Id,
|
||||
fresh: &tree_builder::FreshScope,
|
||||
user_ctx: &mut C,
|
||||
translator: TranslatorHandle<'_, C>,
|
||||
) -> Result<Option<Vec<Id>>, String> {
|
||||
match self.try_match(ast, node)? {
|
||||
Some(captures) => Ok(Some(self.run_transform(ast, captures, node, fresh))),
|
||||
Some(captures) => Ok(Some(
|
||||
self.run_transform(ast, captures, node, fresh, user_ctx, translator)?,
|
||||
)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
@@ -768,29 +877,31 @@ impl Rule {
|
||||
captures: Captures,
|
||||
node: Id,
|
||||
fresh: &tree_builder::FreshScope,
|
||||
) -> Vec<Id> {
|
||||
user_ctx: &mut C,
|
||||
translator: TranslatorHandle<'_, C>,
|
||||
) -> Result<Vec<Id>, String> {
|
||||
fresh.next_scope();
|
||||
let source_range = ast.get_node(node).and_then(|n| match n.content {
|
||||
NodeContent::Range(r) => Some(r),
|
||||
_ => n.source_range,
|
||||
});
|
||||
(self.transform)(ast, captures, fresh, source_range)
|
||||
(self.transform)(ast, captures, fresh, source_range, user_ctx, translator)
|
||||
}
|
||||
}
|
||||
|
||||
const MAX_REWRITE_DEPTH: usize = 100;
|
||||
|
||||
/// Index of rules by their root query kind for fast lookup.
|
||||
struct RuleIndex<'a> {
|
||||
struct RuleIndex<'a, C> {
|
||||
/// Rules indexed by root node kind name.
|
||||
by_kind: BTreeMap<&'static str, Vec<&'a Rule>>,
|
||||
by_kind: BTreeMap<&'static str, Vec<&'a Rule<C>>>,
|
||||
/// Rules with wildcard queries (Any) that apply to all nodes.
|
||||
wildcard: Vec<&'a Rule>,
|
||||
wildcard: Vec<&'a Rule<C>>,
|
||||
}
|
||||
|
||||
impl<'a> RuleIndex<'a> {
|
||||
fn new(rules: &'a [Rule]) -> Self {
|
||||
let mut by_kind: BTreeMap<&'static str, Vec<&'a Rule>> = BTreeMap::new();
|
||||
impl<'a, C> RuleIndex<'a, C> {
|
||||
fn new(rules: &'a [Rule<C>]) -> Self {
|
||||
let mut by_kind: BTreeMap<&'static str, Vec<&'a Rule<C>>> = BTreeMap::new();
|
||||
let mut wildcard = Vec::new();
|
||||
for rule in rules {
|
||||
match rule.query.root_kind() {
|
||||
@@ -801,7 +912,7 @@ impl<'a> RuleIndex<'a> {
|
||||
Self { by_kind, wildcard }
|
||||
}
|
||||
|
||||
fn rules_for_kind(&self, kind: &str) -> impl Iterator<Item = &&'a Rule> {
|
||||
fn rules_for_kind(&self, kind: &str) -> impl Iterator<Item = &&'a Rule<C>> {
|
||||
self.by_kind
|
||||
.get(kind)
|
||||
.into_iter()
|
||||
@@ -810,23 +921,25 @@ impl<'a> RuleIndex<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_repeating_rules(
|
||||
rules: &[Rule],
|
||||
fn apply_repeating_rules<C: Clone>(
|
||||
rules: &[Rule<C>],
|
||||
ast: &mut Ast,
|
||||
user_ctx: &mut C,
|
||||
id: Id,
|
||||
fresh: &tree_builder::FreshScope,
|
||||
) -> Result<Vec<Id>, String> {
|
||||
let index = RuleIndex::new(rules);
|
||||
apply_repeating_rules_inner(&index, ast, id, fresh, 0, None)
|
||||
apply_repeating_rules_inner(&index, ast, user_ctx, id, fresh, 0, None)
|
||||
}
|
||||
|
||||
fn apply_repeating_rules_inner(
|
||||
index: &RuleIndex,
|
||||
fn apply_repeating_rules_inner<C: Clone>(
|
||||
index: &RuleIndex<C>,
|
||||
ast: &mut Ast,
|
||||
user_ctx: &mut C,
|
||||
id: Id,
|
||||
fresh: &tree_builder::FreshScope,
|
||||
rewrite_depth: usize,
|
||||
skip_rule: Option<*const Rule>,
|
||||
skip_rule: Option<*const Rule<C>>,
|
||||
) -> Result<Vec<Id>, String> {
|
||||
if rewrite_depth > MAX_REWRITE_DEPTH {
|
||||
return Err(format!(
|
||||
@@ -837,11 +950,23 @@ fn apply_repeating_rules_inner(
|
||||
|
||||
let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or("");
|
||||
for rule in index.rules_for_kind(node_kind) {
|
||||
let rule_ptr = *rule as *const Rule;
|
||||
let rule_ptr = *rule as *const Rule<C>;
|
||||
if Some(rule_ptr) == skip_rule {
|
||||
continue;
|
||||
}
|
||||
if let Some(result_node) = rule.try_rule(ast, id, fresh)? {
|
||||
// Snapshot the user context before invoking the rule so that any
|
||||
// mutations the rule makes are visible during recursive translation
|
||||
// of its result, but not leaked to the parent's siblings.
|
||||
let snapshot = user_ctx.clone();
|
||||
// Repeating rules don't need a real translator: their captures
|
||||
// aren't auto-translated (Repeating preserves the input schema),
|
||||
// and `ctx.translate(id)` errors if invoked from a Repeating
|
||||
// transform.
|
||||
let translator = TranslatorHandle {
|
||||
inner: TranslatorImpl::Repeating,
|
||||
};
|
||||
let try_result = rule.try_rule(ast, id, fresh, user_ctx, translator)?;
|
||||
if let Some(result_node) = try_result {
|
||||
// For non-repeated rules, suppress further application of *this*
|
||||
// rule on the result root, so a rule whose output matches its own
|
||||
// query doesn't loop. Other rules and child traversal are
|
||||
@@ -852,14 +977,19 @@ fn apply_repeating_rules_inner(
|
||||
results.extend(apply_repeating_rules_inner(
|
||||
index,
|
||||
ast,
|
||||
user_ctx,
|
||||
node,
|
||||
fresh,
|
||||
rewrite_depth + 1,
|
||||
next_skip,
|
||||
)?);
|
||||
}
|
||||
*user_ctx = snapshot;
|
||||
return Ok(results);
|
||||
}
|
||||
// Rule didn't match; restore any speculative changes (none expected
|
||||
// since try_rule only mutates on match, but be defensive).
|
||||
*user_ctx = snapshot;
|
||||
}
|
||||
|
||||
// Take the parent's fields by ownership: the recursion will rewrite
|
||||
@@ -874,7 +1004,15 @@ fn apply_repeating_rules_inner(
|
||||
for children in fields.values_mut() {
|
||||
let mut new_children: Option<Vec<Id>> = None;
|
||||
for (i, &child_id) in children.iter().enumerate() {
|
||||
let result = apply_repeating_rules_inner(index, ast, child_id, fresh, rewrite_depth, None)?;
|
||||
let result = apply_repeating_rules_inner(
|
||||
index,
|
||||
ast,
|
||||
user_ctx,
|
||||
child_id,
|
||||
fresh,
|
||||
rewrite_depth,
|
||||
None,
|
||||
)?;
|
||||
let unchanged = result.len() == 1 && result[0] == child_id;
|
||||
match (&mut new_children, unchanged) {
|
||||
(None, true) => {} // unchanged so far, no allocation needed
|
||||
@@ -903,24 +1041,25 @@ fn apply_repeating_rules_inner(
|
||||
/// each visited node, recursion proceeds only through captured nodes (not
|
||||
/// through the input node's children directly), and an error is returned if
|
||||
/// no rule matches a visited node.
|
||||
fn apply_one_shot_rules(
|
||||
rules: &[Rule],
|
||||
fn apply_one_shot_rules<C: Clone>(
|
||||
rules: &[Rule<C>],
|
||||
ast: &mut Ast,
|
||||
user_ctx: &mut C,
|
||||
id: Id,
|
||||
fresh: &tree_builder::FreshScope,
|
||||
) -> Result<Vec<Id>, String> {
|
||||
let index = RuleIndex::new(rules);
|
||||
apply_one_shot_rules_inner(&index, ast, id, fresh, 0)
|
||||
apply_one_shot_rules_inner(&index, ast, user_ctx, id, fresh, 0)
|
||||
}
|
||||
|
||||
fn apply_one_shot_rules_inner(
|
||||
index: &RuleIndex,
|
||||
fn apply_one_shot_rules_inner<C: Clone>(
|
||||
index: &RuleIndex<C>,
|
||||
ast: &mut Ast,
|
||||
user_ctx: &mut C,
|
||||
id: Id,
|
||||
fresh: &tree_builder::FreshScope,
|
||||
rewrite_depth: usize,
|
||||
) -> Result<Vec<Id>, String> {
|
||||
|
||||
if rewrite_depth > MAX_REWRITE_DEPTH {
|
||||
return Err(format!(
|
||||
"Desugaring exceeded maximum rewrite depth ({MAX_REWRITE_DEPTH}). \
|
||||
@@ -931,22 +1070,27 @@ fn apply_one_shot_rules_inner(
|
||||
let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or("");
|
||||
|
||||
for rule in index.rules_for_kind(node_kind) {
|
||||
if let Some(mut captures) = rule.try_match(ast, id)? {
|
||||
// Recursively translate every captured node before invoking the
|
||||
// transform. The transform's output uses output-schema kinds, so
|
||||
// we must translate captured input-schema nodes to their
|
||||
// output-schema equivalents first.
|
||||
captures.try_map_all_captures(|captured_id| {
|
||||
// Avoid infinite recursion when a capture refers to the root
|
||||
// node of the matched tree (e.g. an `@_` capture on the
|
||||
// pattern root): re-analyzing it would match the same rule
|
||||
// again indefinitely.
|
||||
if captured_id == id {
|
||||
return Ok(vec![captured_id]);
|
||||
}
|
||||
apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)
|
||||
})?;
|
||||
return Ok(rule.run_transform(ast, captures, id, fresh));
|
||||
if let Some(captures) = rule.try_match(ast, id)? {
|
||||
// Snapshot the user context before invoking the rule so that any
|
||||
// mutations the rule (or its transitively-translated captures)
|
||||
// make are visible during this rule's transform, but not leaked
|
||||
// to the parent's siblings.
|
||||
let snapshot = user_ctx.clone();
|
||||
// Build the translator handle the transform will use to
|
||||
// recursively translate captures (or, for macro-generated
|
||||
// rules, the auto-translate prefix uses it to translate every
|
||||
// capture up front, preserving the legacy behavior).
|
||||
let translator = TranslatorHandle {
|
||||
inner: TranslatorImpl::OneShot {
|
||||
index,
|
||||
fresh,
|
||||
rewrite_depth,
|
||||
matched_root: id,
|
||||
},
|
||||
};
|
||||
let result = rule.run_transform(ast, captures, id, fresh, user_ctx, translator)?;
|
||||
*user_ctx = snapshot;
|
||||
return Ok(result);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -974,15 +1118,15 @@ pub enum PhaseKind {
|
||||
/// starts. Rules within a phase compete for matches as usual; rules in
|
||||
/// different phases never compete because each traversal only considers the
|
||||
/// current phase's rules.
|
||||
pub struct Phase {
|
||||
pub struct Phase<C = ()> {
|
||||
/// Name used in error messages.
|
||||
pub name: String,
|
||||
pub rules: Vec<Rule>,
|
||||
pub rules: Vec<Rule<C>>,
|
||||
pub kind: PhaseKind,
|
||||
}
|
||||
|
||||
impl Phase {
|
||||
pub fn new(name: impl Into<String>, kind: PhaseKind, rules: Vec<Rule>) -> Self {
|
||||
impl<C> Phase<C> {
|
||||
pub fn new(name: impl Into<String>, kind: PhaseKind, rules: Vec<Rule<C>>) -> Self {
|
||||
Self {
|
||||
name: name.into(),
|
||||
rules,
|
||||
@@ -1008,17 +1152,30 @@ impl Phase {
|
||||
/// .add_phase("desugar", PhaseKind::Repeating, desugar_rules)
|
||||
/// .with_output_node_types_yaml(yaml);
|
||||
/// ```
|
||||
#[derive(Default)]
|
||||
pub struct DesugaringConfig {
|
||||
///
|
||||
/// The optional type parameter `C` is the user context type threaded through
|
||||
/// rule transforms. Defaults to `()` (no user context).
|
||||
pub struct DesugaringConfig<C = ()> {
|
||||
/// Phases of rule application, applied in order.
|
||||
pub phases: Vec<Phase>,
|
||||
pub phases: Vec<Phase<C>>,
|
||||
/// Output node-types in YAML format. If `None`, the input grammar's
|
||||
/// node types are used (i.e. the desugared AST has the same node types
|
||||
/// as the tree-sitter grammar).
|
||||
pub output_node_types_yaml: Option<&'static str>,
|
||||
}
|
||||
|
||||
impl DesugaringConfig {
|
||||
// Manual `Default` impl so users with a custom `C` that doesn't implement
|
||||
// `Default` can still construct an empty config.
|
||||
impl<C> Default for DesugaringConfig<C> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
phases: Vec::new(),
|
||||
output_node_types_yaml: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<C> DesugaringConfig<C> {
|
||||
/// Create an empty configuration. Add phases via [`add_phase`] and an
|
||||
/// optional output schema via [`with_output_node_types_yaml`].
|
||||
pub fn new() -> Self {
|
||||
@@ -1030,7 +1187,7 @@ impl DesugaringConfig {
|
||||
mut self,
|
||||
name: impl Into<String>,
|
||||
kind: PhaseKind,
|
||||
rules: Vec<Rule>,
|
||||
rules: Vec<Rule<C>>,
|
||||
) -> Self {
|
||||
self.phases.push(Phase::new(name, kind, rules));
|
||||
self
|
||||
@@ -1052,15 +1209,15 @@ impl DesugaringConfig {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Runner<'a> {
|
||||
pub struct Runner<'a, C = ()> {
|
||||
language: tree_sitter::Language,
|
||||
schema: schema::Schema,
|
||||
phases: &'a [Phase],
|
||||
phases: &'a [Phase<C>],
|
||||
}
|
||||
|
||||
impl<'a> Runner<'a> {
|
||||
impl<'a, C> Runner<'a, C> {
|
||||
/// Create a runner using the input grammar's schema for output.
|
||||
pub fn new(language: tree_sitter::Language, phases: &'a [Phase]) -> Self {
|
||||
pub fn new(language: tree_sitter::Language, phases: &'a [Phase<C>]) -> Self {
|
||||
let schema = schema::Schema::from_language(&language);
|
||||
Self {
|
||||
language,
|
||||
@@ -1073,7 +1230,7 @@ impl<'a> Runner<'a> {
|
||||
pub fn with_schema(
|
||||
language: tree_sitter::Language,
|
||||
schema: &schema::Schema,
|
||||
phases: &'a [Phase],
|
||||
phases: &'a [Phase<C>],
|
||||
) -> Self {
|
||||
Self {
|
||||
language,
|
||||
@@ -1085,7 +1242,7 @@ impl<'a> Runner<'a> {
|
||||
/// Create a runner from a [`DesugaringConfig`].
|
||||
pub fn from_config(
|
||||
language: tree_sitter::Language,
|
||||
config: &'a DesugaringConfig,
|
||||
config: &'a DesugaringConfig<C>,
|
||||
) -> Result<Self, String> {
|
||||
let schema = config.build_schema(&language)?;
|
||||
Ok(Self {
|
||||
@@ -1094,11 +1251,17 @@ impl<'a> Runner<'a> {
|
||||
phases: &config.phases,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run_from_tree(
|
||||
impl<'a, C: Clone> Runner<'a, C> {
|
||||
/// Parse `tree` against `source` and run all phases, threading
|
||||
/// `user_ctx` through every rule transform. The caller owns the
|
||||
/// initial context state.
|
||||
pub fn run_from_tree_with_ctx(
|
||||
&self,
|
||||
tree: &tree_sitter::Tree,
|
||||
source: &[u8],
|
||||
user_ctx: &mut C,
|
||||
) -> Result<Ast, String> {
|
||||
let mut ast = Ast::from_tree_with_schema_and_source(
|
||||
self.schema.clone(),
|
||||
@@ -1106,11 +1269,13 @@ impl<'a> Runner<'a> {
|
||||
&self.language,
|
||||
source.to_vec(),
|
||||
);
|
||||
self.run_phases(&mut ast)?;
|
||||
self.run_phases(&mut ast, user_ctx)?;
|
||||
Ok(ast)
|
||||
}
|
||||
|
||||
pub fn run(&self, input: &str) -> Result<Ast, String> {
|
||||
/// Parse `input` and run all phases, threading `user_ctx` through
|
||||
/// every rule transform. The caller owns the initial context state.
|
||||
pub fn run_with_ctx(&self, input: &str, user_ctx: &mut C) -> Result<Ast, String> {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&self.language)
|
||||
@@ -1124,20 +1289,24 @@ impl<'a> Runner<'a> {
|
||||
&self.language,
|
||||
input.as_bytes().to_vec(),
|
||||
);
|
||||
self.run_phases(&mut ast)?;
|
||||
self.run_phases(&mut ast, user_ctx)?;
|
||||
Ok(ast)
|
||||
}
|
||||
|
||||
/// Apply each phase in turn to the AST, threading the root through.
|
||||
/// A single `FreshScope` is shared across phases so that fresh
|
||||
/// identifiers generated in different phases don't collide.
|
||||
fn run_phases(&self, ast: &mut Ast) -> Result<(), String> {
|
||||
fn run_phases(&self, ast: &mut Ast, user_ctx: &mut C) -> Result<(), String> {
|
||||
let fresh = tree_builder::FreshScope::new();
|
||||
let mut root = ast.get_root();
|
||||
for phase in self.phases {
|
||||
let res = match phase.kind {
|
||||
PhaseKind::Repeating => apply_repeating_rules(&phase.rules, ast, root, &fresh),
|
||||
PhaseKind::OneShot => apply_one_shot_rules(&phase.rules, ast, root, &fresh),
|
||||
PhaseKind::Repeating => {
|
||||
apply_repeating_rules(&phase.rules, ast, user_ctx, root, &fresh)
|
||||
}
|
||||
PhaseKind::OneShot => {
|
||||
apply_one_shot_rules(&phase.rules, ast, user_ctx, root, &fresh)
|
||||
}
|
||||
}
|
||||
.map_err(|e| format!("Phase `{}`: {e}", phase.name))?;
|
||||
if res.len() != 1 {
|
||||
@@ -1153,3 +1322,78 @@ impl<'a> Runner<'a> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, C: Clone + Default> Runner<'a, C> {
|
||||
/// Parse `tree` against `source` and run all phases, using the
|
||||
/// default context (`C::default()`) as the initial context state.
|
||||
pub fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String> {
|
||||
let mut user_ctx = C::default();
|
||||
self.run_from_tree_with_ctx(tree, source, &mut user_ctx)
|
||||
}
|
||||
|
||||
/// Parse `input` and run all phases, using the default context
|
||||
/// (`C::default()`) as the initial context state.
|
||||
pub fn run(&self, input: &str) -> Result<Ast, String> {
|
||||
let mut user_ctx = C::default();
|
||||
self.run_with_ctx(input, &mut user_ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Desugarer: type-erased view of a DesugaringConfig + Runner
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Type-erased interface to a desugaring pipeline for a single language.
|
||||
///
|
||||
/// Consumers (e.g. a generic tree-sitter extractor) hold
|
||||
/// `Box<dyn Desugarer>` so they can dispatch through the trait without
|
||||
/// knowing the user context type `C` that's internal to yeast.
|
||||
///
|
||||
/// Construct one via [`ConcreteDesugarer::new`] from a
|
||||
/// [`DesugaringConfig<C>`] and a [`tree_sitter::Language`].
|
||||
pub trait Desugarer: Send + Sync {
|
||||
/// The output AST schema (in YAML format), or `None` if the input
|
||||
/// grammar's schema should be used.
|
||||
fn output_node_types_yaml(&self) -> Option<&'static str>;
|
||||
|
||||
/// Parse `tree` against `source` and run the desugaring pipeline.
|
||||
/// Each call constructs a fresh default user context internally.
|
||||
fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String>;
|
||||
}
|
||||
|
||||
/// A concrete [`Desugarer`] backed by a [`DesugaringConfig<C>`] for a
|
||||
/// specific user context type `C`. Stores the language and a pre-built
|
||||
/// schema so that per-call cost is bounded to constructing a transient
|
||||
/// [`Runner`] and cloning the schema (no YAML re-parsing).
|
||||
pub struct ConcreteDesugarer<C: Default + Clone + Send + Sync + 'static> {
|
||||
language: tree_sitter::Language,
|
||||
schema: schema::Schema,
|
||||
config: DesugaringConfig<C>,
|
||||
}
|
||||
|
||||
impl<C: Default + Clone + Send + Sync + 'static> ConcreteDesugarer<C> {
|
||||
/// Build a desugarer for `language` from `config`. Parses the output
|
||||
/// schema YAML once (if set) and stores it for reuse across files.
|
||||
pub fn new(
|
||||
language: tree_sitter::Language,
|
||||
config: DesugaringConfig<C>,
|
||||
) -> Result<Self, String> {
|
||||
let schema = config.build_schema(&language)?;
|
||||
Ok(Self {
|
||||
language,
|
||||
schema,
|
||||
config,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: Default + Clone + Send + Sync + 'static> Desugarer for ConcreteDesugarer<C> {
|
||||
fn output_node_types_yaml(&self) -> Option<&'static str> {
|
||||
self.config.output_node_types_yaml
|
||||
}
|
||||
|
||||
fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String> {
|
||||
let runner = Runner::with_schema(self.language.clone(), &self.schema, &self.config.phases);
|
||||
runner.run_from_tree(tree, source)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -242,10 +242,7 @@ pub fn convert(yaml_input: &str) -> Result<String, String> {
|
||||
|
||||
/// Apply YAML node-type definitions to a mutable Schema.
|
||||
/// Registers all types, fields, and allowed types from the YAML into the schema.
|
||||
fn apply_yaml_to_schema(
|
||||
yaml: &YamlNodeTypes,
|
||||
schema: &mut crate::schema::Schema,
|
||||
) {
|
||||
fn apply_yaml_to_schema(yaml: &YamlNodeTypes, schema: &mut crate::schema::Schema) {
|
||||
// Register all supertypes as node kinds
|
||||
for name in yaml.supertypes.keys() {
|
||||
schema.register_kind(name);
|
||||
@@ -307,7 +304,8 @@ fn apply_yaml_to_schema(
|
||||
.into_vec()
|
||||
.into_iter()
|
||||
.map(|type_ref| {
|
||||
let (kind, named) = resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types);
|
||||
let (kind, named) =
|
||||
resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types);
|
||||
crate::schema::NodeType { kind, named }
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
@@ -198,13 +198,8 @@ impl Schema {
|
||||
.insert((parent_kind.to_string(), field_id), node_types);
|
||||
}
|
||||
|
||||
pub fn field_types(
|
||||
&self,
|
||||
parent_kind: &str,
|
||||
field_id: FieldId,
|
||||
) -> Option<&Vec<NodeType>> {
|
||||
self.field_types
|
||||
.get(&(parent_kind.to_string(), field_id))
|
||||
pub fn field_types(&self, parent_kind: &str, field_id: FieldId) -> Option<&Vec<NodeType>> {
|
||||
self.field_types.get(&(parent_kind.to_string(), field_id))
|
||||
}
|
||||
|
||||
pub fn set_field_cardinality(
|
||||
|
||||
@@ -7,7 +7,7 @@ const OUTPUT_SCHEMA_YAML: &str = include_str!("node-types.yml");
|
||||
|
||||
/// Helper: parse Ruby source with no rules, return dump.
|
||||
fn parse_and_dump(input: &str) -> String {
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run(input).unwrap();
|
||||
dump_ast(&ast, ast.get_root(), input)
|
||||
}
|
||||
@@ -24,7 +24,7 @@ fn run_and_ast(input: &str, rules: Vec<Rule>) -> Ast {
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
runner.run(input).unwrap()
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ fn run_phased_and_dump(input: &str, phases: Vec<Phase>) -> String {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let ast = runner.run(input).unwrap();
|
||||
dump_ast(&ast, ast.get_root(), input)
|
||||
}
|
||||
@@ -46,7 +46,7 @@ fn run_and_get_error(input: &str, rules: Vec<Rule>) -> String {
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
runner
|
||||
.run(input)
|
||||
.expect_err("expected runner to return an error")
|
||||
@@ -54,7 +54,7 @@ fn run_and_get_error(input: &str, rules: Vec<Rule>) -> String {
|
||||
|
||||
/// Helper: parse Ruby source with no rules and dump with schema type errors.
|
||||
fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String {
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run(input).unwrap();
|
||||
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
|
||||
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
|
||||
@@ -64,10 +64,10 @@ fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String {
|
||||
/// building schema with language IDs so field checks align with parser fields.
|
||||
fn parse_and_dump_typed_with_language(input: &str, schema_yaml: &str) -> String {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let runner = Runner::new(lang.clone(), &[]);
|
||||
let runner: Runner = Runner::new(lang.clone(), &[]);
|
||||
let ast = runner.run(input).unwrap();
|
||||
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang)
|
||||
.unwrap();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang).unwrap();
|
||||
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
|
||||
}
|
||||
|
||||
@@ -76,7 +76,7 @@ fn run_and_dump_typed(input: &str, rules: Vec<Rule>, schema_yaml: &str) -> Strin
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
|
||||
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let ast = runner.run(input).unwrap();
|
||||
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
|
||||
}
|
||||
@@ -166,7 +166,7 @@ fn test_parse_for_loop() {
|
||||
|
||||
#[test]
|
||||
fn test_dump_highlights_type_errors_inline() {
|
||||
let schema_yaml = r#"
|
||||
let schema_yaml = r#"
|
||||
named:
|
||||
program:
|
||||
$children*: assignment
|
||||
@@ -176,13 +176,13 @@ named:
|
||||
identifier:
|
||||
"#;
|
||||
|
||||
let dump = parse_and_dump_typed("x = 1", schema_yaml);
|
||||
assert!(dump.contains("integer \"1\" <-- ERROR:"));
|
||||
let dump = parse_and_dump_typed("x = 1", schema_yaml);
|
||||
assert!(dump.contains("integer \"1\" <-- ERROR:"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dump_reports_preserved_unknown_kind_after_transformation() {
|
||||
let schema_yaml = r#"
|
||||
let schema_yaml = r#"
|
||||
named:
|
||||
program:
|
||||
$children*: assignment
|
||||
@@ -192,25 +192,25 @@ named:
|
||||
identifier:
|
||||
"#;
|
||||
|
||||
// This rewrite runs and preserves the RHS node kind via capture.
|
||||
// With schema above, preserving `integer` should be reported inline.
|
||||
let rules = vec![yeast::rule!(
|
||||
(assignment left: (_) @left right: (_) @right)
|
||||
=>
|
||||
(assignment
|
||||
left: {left}
|
||||
right: {right}
|
||||
)
|
||||
)];
|
||||
// This rewrite runs and preserves the RHS node kind via capture.
|
||||
// With schema above, preserving `integer` should be reported inline.
|
||||
let rules: Vec<Rule> = vec![yeast::rule!(
|
||||
(assignment left: (_) @left right: (_) @right)
|
||||
=>
|
||||
(assignment
|
||||
left: {left}
|
||||
right: {right}
|
||||
)
|
||||
)];
|
||||
|
||||
let dump = run_and_dump_typed("x = 1", rules, schema_yaml);
|
||||
assert!(dump.contains("integer \"1\" <-- ERROR:"));
|
||||
assert!(dump.contains("node kind 'integer' not in schema"));
|
||||
let dump = run_and_dump_typed("x = 1", rules, schema_yaml);
|
||||
assert!(dump.contains("integer \"1\" <-- ERROR:"));
|
||||
assert!(dump.contains("node kind 'integer' not in schema"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dump_reports_undeclared_field_on_node() {
|
||||
let schema_yaml = r#"
|
||||
let schema_yaml = r#"
|
||||
named:
|
||||
program:
|
||||
$children*: assignment
|
||||
@@ -219,14 +219,14 @@ named:
|
||||
identifier:
|
||||
"#;
|
||||
|
||||
let dump = parse_and_dump_typed_with_language("x = y", schema_yaml);
|
||||
assert!(dump.contains("right: identifier \"y\" <-- ERROR:"));
|
||||
assert!(dump.contains("the node 'assignment' has no field 'right'"));
|
||||
let dump = parse_and_dump_typed_with_language("x = y", schema_yaml);
|
||||
assert!(dump.contains("right: identifier \"y\" <-- ERROR:"));
|
||||
assert!(dump.contains("the node 'assignment' has no field 'right'"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dump_reports_disallowed_kind_in_field_type() {
|
||||
let schema_yaml = r#"
|
||||
let schema_yaml = r#"
|
||||
named:
|
||||
program:
|
||||
$children*: assignment
|
||||
@@ -237,17 +237,17 @@ named:
|
||||
integer:
|
||||
"#;
|
||||
|
||||
let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml);
|
||||
assert!(dump.contains("right: integer \"1\" <-- ERROR:"));
|
||||
assert!(dump.contains("should contain"));
|
||||
assert!(dump.contains("but got integer"));
|
||||
let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml);
|
||||
assert!(dump.contains("right: integer \"1\" <-- ERROR:"));
|
||||
assert!(dump.contains("should contain"));
|
||||
assert!(dump.contains("but got integer"));
|
||||
}
|
||||
|
||||
// ---- Query tests ----
|
||||
|
||||
#[test]
|
||||
fn test_query_match() {
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
|
||||
let query = yeast::query!(
|
||||
@@ -268,7 +268,7 @@ fn test_query_match() {
|
||||
|
||||
#[test]
|
||||
fn test_query_no_match() {
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
|
||||
let query = yeast::query!(
|
||||
@@ -293,7 +293,7 @@ fn test_query_skips_extras_in_positional_match() {
|
||||
// captured comment to nothing (a common idiom, e.g.
|
||||
// `(comment) => ()` in Swift) leaves the capture's match-list empty
|
||||
// and causes the transform to fail with "Variable X has 0 matches".
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("[1, # comment\n2]").unwrap();
|
||||
|
||||
// Navigate to the `array` node: program -> array.
|
||||
@@ -309,15 +309,11 @@ fn test_query_skips_extras_in_positional_match() {
|
||||
let matched = query.do_match(&ast, array_id, &mut captures).unwrap();
|
||||
assert!(matched);
|
||||
assert_eq!(
|
||||
ast.get_node(captures.get_var("a").unwrap())
|
||||
.unwrap()
|
||||
.kind(),
|
||||
ast.get_node(captures.get_var("a").unwrap()).unwrap().kind(),
|
||||
"integer"
|
||||
);
|
||||
assert_eq!(
|
||||
ast.get_node(captures.get_var("b").unwrap())
|
||||
.unwrap()
|
||||
.kind(),
|
||||
ast.get_node(captures.get_var("b").unwrap()).unwrap().kind(),
|
||||
"integer"
|
||||
);
|
||||
}
|
||||
@@ -325,14 +321,14 @@ fn test_query_skips_extras_in_positional_match() {
|
||||
#[test]
|
||||
fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang)
|
||||
.unwrap();
|
||||
let phases = vec![Phase::new(
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let phases: Vec<Phase> = vec![Phase::new(
|
||||
"test",
|
||||
PhaseKind::Repeating,
|
||||
vec![yeast::rule!((integer) => (identifier "replaced"))],
|
||||
)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let input = "x = 1";
|
||||
let ast = runner.run(input).unwrap();
|
||||
@@ -350,7 +346,7 @@ fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
|
||||
|
||||
#[test]
|
||||
fn test_query_repeated_capture() {
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x, y, z = 1").unwrap();
|
||||
|
||||
let query = yeast::query!(
|
||||
@@ -375,7 +371,7 @@ fn test_query_repeated_capture() {
|
||||
#[test]
|
||||
fn test_capture_unnamed_node_parenthesized() {
|
||||
// `("=") @op` captures the unnamed `=` token between left and right.
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
|
||||
let query = yeast::query!(
|
||||
@@ -403,7 +399,7 @@ fn test_capture_unnamed_node_parenthesized() {
|
||||
fn test_capture_bare_underscore_repeated() {
|
||||
// `_` matches named and unnamed nodes in bare-child position. On this
|
||||
// assignment shape, bare children correspond to unnamed tokens (the `=`).
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
|
||||
let query = yeast::query!((assignment _* @all));
|
||||
@@ -425,7 +421,7 @@ fn test_capture_bare_underscore_repeated() {
|
||||
#[test]
|
||||
fn test_capture_unnamed_node_bare_literal() {
|
||||
// `"=" @op` (without surrounding parens) is the same as `("=") @op`.
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
|
||||
let query = yeast::query!(
|
||||
@@ -454,7 +450,7 @@ fn test_bare_underscore_matches_unnamed() {
|
||||
// Bare `_` matches any node, including unnamed tokens, while `(_)`
|
||||
// matches only named nodes. Demonstrate by matching the unnamed `=`
|
||||
// token in the implicit `child` field of an `assignment`.
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
|
||||
let mut cursor = AstCursor::new(&ast);
|
||||
@@ -493,7 +489,7 @@ fn test_bare_forms_in_field_position() {
|
||||
// field's value, not just in the bare-children position. This is
|
||||
// syntactic sugar for `(_)` / `("…")` and goes through the same
|
||||
// code paths.
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
|
||||
let mut cursor = AstCursor::new(&ast);
|
||||
@@ -532,7 +528,7 @@ fn test_forward_scan_finds_unnamed_token_late() {
|
||||
// query for `("end")` skip past the first two and match the third.
|
||||
// Without forward-scan, the matcher took the first child unconditionally
|
||||
// and failed.
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("for x in list do\n y\nend").unwrap();
|
||||
|
||||
// Navigate: program > for > do (the body wrapper).
|
||||
@@ -559,7 +555,7 @@ fn test_forward_scan_preserves_order() {
|
||||
// order. A query for ("end") then ("do") should fail because `do`
|
||||
// appears before `end` in the source order; once forward-scan has
|
||||
// consumed `end`, the iterator is exhausted.
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("for x in list do\n y\nend").unwrap();
|
||||
|
||||
let mut cursor = AstCursor::new(&ast);
|
||||
@@ -580,7 +576,7 @@ fn test_forward_scan_preserves_order() {
|
||||
|
||||
#[test]
|
||||
fn test_tree_builder() {
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let mut ast = runner.run("x = 1").unwrap();
|
||||
let input = "x = 1";
|
||||
|
||||
@@ -598,7 +594,8 @@ fn test_tree_builder() {
|
||||
|
||||
// Swap left and right
|
||||
let fresh = yeast::tree_builder::FreshScope::new();
|
||||
let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh);
|
||||
let mut user_ctx = ();
|
||||
let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh, &mut user_ctx);
|
||||
let new_id = yeast::tree!(ctx,
|
||||
(program
|
||||
child: (assignment
|
||||
@@ -626,7 +623,7 @@ fn test_tree_builder() {
|
||||
// tree-sitter-ruby grammar with named fields for nodes that only have
|
||||
// unnamed children in tree-sitter (e.g. block_body.stmt, block_parameters.parameter).
|
||||
fn ruby_rules() -> Vec<Rule> {
|
||||
let assign_rule = yeast::rule!(
|
||||
let assign_rule: Rule = yeast::rule!(
|
||||
(assignment
|
||||
left: (left_assignment_list
|
||||
(identifier)* @left
|
||||
@@ -651,7 +648,7 @@ fn ruby_rules() -> Vec<Rule> {
|
||||
)}
|
||||
);
|
||||
|
||||
let for_rule = yeast::rule!(
|
||||
let for_rule: Rule = yeast::rule!(
|
||||
(for
|
||||
pattern: (_) @pat
|
||||
value: (in (_) @val)
|
||||
@@ -733,7 +730,7 @@ fn test_desugar_for_loop() {
|
||||
|
||||
#[test]
|
||||
fn test_shorthand_rule() {
|
||||
let rule = yeast::rule!(
|
||||
let rule: Rule = yeast::rule!(
|
||||
(assignment
|
||||
left: (_) @method
|
||||
right: (_) @receiver
|
||||
@@ -885,7 +882,7 @@ fn test_phase_error_includes_phase_name() {
|
||||
PhaseKind::Repeating,
|
||||
vec![swap_assignment_rule().repeated()],
|
||||
)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let err = runner
|
||||
.run("x = 1")
|
||||
.expect_err("expected runner to return an error");
|
||||
@@ -928,7 +925,7 @@ fn test_one_shot_phase() {
|
||||
PhaseKind::OneShot,
|
||||
one_shot_xeq1_rules(),
|
||||
)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let input = "x = 1";
|
||||
let ast = runner.run(input).unwrap();
|
||||
@@ -954,7 +951,7 @@ fn test_one_shot_phase_errors_when_no_rule_matches() {
|
||||
let mut rules = one_shot_xeq1_rules();
|
||||
rules.pop();
|
||||
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let err = runner
|
||||
.run("x = 1")
|
||||
@@ -978,7 +975,7 @@ fn test_one_shot_recurses_into_returned_capture() {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let rules = vec![
|
||||
let rules: Vec<Rule> = vec![
|
||||
yeast::rule!(
|
||||
(program (_)* @stmts)
|
||||
=>
|
||||
@@ -994,7 +991,7 @@ fn test_one_shot_recurses_into_returned_capture() {
|
||||
yeast::rule!((integer) => (integer "INT")),
|
||||
];
|
||||
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let input = "x = 1";
|
||||
let ast = runner.run(input).unwrap();
|
||||
@@ -1020,7 +1017,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let rules = vec![
|
||||
let rules: Vec<Rule> = vec![
|
||||
yeast::rule!(
|
||||
(program (_)* @stmts)
|
||||
=>
|
||||
@@ -1041,7 +1038,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
|
||||
yeast::rule!((integer) => (integer "INT")),
|
||||
];
|
||||
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
let runner: Runner = Runner::with_schema(lang, &schema, &phases);
|
||||
|
||||
let input = "x = 1";
|
||||
let ast = runner.run(input).unwrap();
|
||||
@@ -1065,7 +1062,7 @@ fn test_one_shot_does_not_recurse_into_wrapper_output() {
|
||||
|
||||
#[test]
|
||||
fn test_cursor_navigation() {
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let runner: Runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("x = 1").unwrap();
|
||||
let mut cursor = AstCursor::new(&ast);
|
||||
|
||||
@@ -1139,7 +1136,7 @@ fn test_desugar_for_with_multiple_assignment() {
|
||||
/// resolves to the captured node's source text via `YeastDisplay`.
|
||||
#[test]
|
||||
fn test_hash_brace_renders_capture_source_text() {
|
||||
let rule = rule!(
|
||||
let rule: Rule = rule!(
|
||||
(call
|
||||
method: (identifier) @name
|
||||
receiver: (identifier) @recv
|
||||
@@ -1168,7 +1165,7 @@ fn test_hash_brace_renders_capture_source_text() {
|
||||
/// `Display` impl (covered by `YeastDisplay`'s blanket impls for primitives).
|
||||
#[test]
|
||||
fn test_hash_brace_renders_integer_expression() {
|
||||
let rule = rule!(
|
||||
let rule: Rule = rule!(
|
||||
(identifier) @_
|
||||
=>
|
||||
(identifier #{1 + 2})
|
||||
@@ -1187,7 +1184,7 @@ fn test_hash_brace_renders_integer_expression() {
|
||||
/// source location, not the full source range of the matched rule root.
|
||||
#[test]
|
||||
fn test_hash_brace_uses_capture_location_for_leaf() {
|
||||
let rule = rule!(
|
||||
let rule: Rule = rule!(
|
||||
(call
|
||||
method: (identifier) @name
|
||||
receiver: (identifier) @recv
|
||||
@@ -1204,7 +1201,9 @@ fn test_hash_brace_uses_capture_location_for_leaf() {
|
||||
|
||||
let mut bar_ids: Vec<usize> = Vec::new();
|
||||
for id in ast.reachable_node_ids() {
|
||||
let Some(node) = ast.get_node(id) else { continue; };
|
||||
let Some(node) = ast.get_node(id) else {
|
||||
continue;
|
||||
};
|
||||
if node.kind() == "identifier" && ast.source_text(id) == "bar" {
|
||||
bar_ids.push(id);
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use clap::Args;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::languages;
|
||||
use codeql_extractor::extractor::simple;
|
||||
use codeql_extractor::trap;
|
||||
use crate::languages;
|
||||
|
||||
#[derive(Args)]
|
||||
pub struct Options {
|
||||
@@ -35,7 +35,9 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
||||
prefix: "unified".to_string(),
|
||||
languages,
|
||||
trap_dir: options.output_dir,
|
||||
trap_compression: trap::Compression::from_env("CODEQL_EXTRACTOR_UNIFIED_OPTION_TRAP_COMPRESSION"),
|
||||
trap_compression: trap::Compression::from_env(
|
||||
"CODEQL_EXTRACTOR_UNIFIED_OPTION_TRAP_COMPRESSION",
|
||||
),
|
||||
source_archive_dir: options.source_archive_dir,
|
||||
file_lists: vec![options.file_list],
|
||||
};
|
||||
|
||||
@@ -22,14 +22,19 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
||||
// The QL-visible schema is the unified output AST, not the per-language
|
||||
// input grammars. Pass it via `desugar.output_node_types_yaml` so the
|
||||
// generator converts the YAML to JSON node-types.
|
||||
let desugar = yeast::DesugaringConfig::new()
|
||||
.with_output_node_types_yaml(languages::OUTPUT_AST_SCHEMA);
|
||||
let desugar =
|
||||
yeast::DesugaringConfig::new().with_output_node_types_yaml(languages::OUTPUT_AST_SCHEMA);
|
||||
|
||||
let languages = vec![Language {
|
||||
name: "Unified".to_owned(),
|
||||
node_types: "", // unused: generator picks up output_node_types_yaml above
|
||||
node_types: "", // unused: generator picks up output_node_types_yaml above
|
||||
desugar: Some(desugar),
|
||||
}];
|
||||
|
||||
generate(languages, options.dbscheme, options.library, "run unified/scripts/create-extractor-pack.sh")
|
||||
generate(
|
||||
languages,
|
||||
options.dbscheme,
|
||||
options.library,
|
||||
"run unified/scripts/create-extractor-pack.sh",
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,7 +1,98 @@
|
||||
use codeql_extractor::extractor::simple;
|
||||
use yeast::{rule, DesugaringConfig, PhaseKind};
|
||||
use yeast::{ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule, manual_rule, rule, tree};
|
||||
|
||||
fn translation_rules() -> Vec<yeast::Rule> {
|
||||
/// User context propagated from outer rules down to the inner rules that
|
||||
/// emit the corresponding output declarations, so that each emitted node
|
||||
/// is born with the outer information (name, type, modifiers, etc.)
|
||||
/// already set — no schema-invalid intermediate state requiring
|
||||
/// post-hoc mutation.
|
||||
#[derive(Clone, Default)]
|
||||
struct SwiftContext {
|
||||
/// Identifier node for the property name. Set by the outer
|
||||
/// `property_binding` (computed accessors / willSet-didSet) and
|
||||
/// `protocol_property_declaration` rules before translating accessor
|
||||
/// children; read by the accessor inner rules
|
||||
/// (`computed_getter`/`computed_setter`/`computed_modify`/
|
||||
/// `willset_clause`/`didset_clause`/`getter_specifier`/
|
||||
/// `setter_specifier`).
|
||||
property_name: Option<yeast::Id>,
|
||||
/// Translated type node for the property type. Set by the outer
|
||||
/// `property_binding` rule (computed accessors variant) and
|
||||
/// `protocol_property_declaration` when present; read by the
|
||||
/// accessor inner rules.
|
||||
property_type: Option<yeast::Id>,
|
||||
/// Default-value expression for the next translated `parameter`. Set
|
||||
/// by the outer `function_parameter` rule; read by the `parameter`
|
||||
/// rules.
|
||||
default_value: Option<yeast::Id>,
|
||||
/// Translated outer modifiers (e.g. visibility, attributes) to
|
||||
/// attach to each child of a flattening outer rule. Set by
|
||||
/// `property_declaration`, `enum_entry`, and
|
||||
/// `protocol_property_declaration`.
|
||||
outer_modifiers: Vec<yeast::Id>,
|
||||
/// The `let`/`var` binding modifier for a `property_declaration`.
|
||||
/// Set by `property_declaration`; read by the inner declaration
|
||||
/// rules (`property_binding` variants, accessor rules) so they
|
||||
/// emit it as part of the output node's `modifier:` field.
|
||||
binding_modifier: Option<yeast::Id>,
|
||||
/// True when the current child of a flattening outer rule is not
|
||||
/// the first one — its inner rule should emit a
|
||||
/// `chained_declaration` modifier so the original grouping can be
|
||||
/// recovered downstream.
|
||||
is_chained: bool,
|
||||
}
|
||||
|
||||
/// Build a freshly-created `chained_declaration` modifier node if
|
||||
/// `ctx.is_chained`, else `None`. Used by inner declaration rules to
|
||||
/// emit the chained tag for non-first children of a flattening outer
|
||||
/// rule. Returns `Option<Id>` so it splices via `{..…}` to 0 or 1 ids.
|
||||
fn chained_modifier(ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>) -> Option<yeast::Id> {
|
||||
if ctx.is_chained {
|
||||
Some(ctx.literal("modifier", "chained_declaration"))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Combine a list of boolean sub-conditions into a single expression by
|
||||
/// left-folding with the infix `&&` operator. Used by control-flow
|
||||
/// rules (`if`, `guard`, `while`, `repeat-while`) whose tree-sitter
|
||||
/// nodes carry one or more comma-separated conditions that the target
|
||||
/// AST represents as a single `condition:` field. Panics on an empty
|
||||
/// input because every caller's grammar guarantees at least one
|
||||
/// condition.
|
||||
fn and_chain(
|
||||
ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>,
|
||||
conds: Vec<yeast::NodeRef>,
|
||||
) -> yeast::Id {
|
||||
conds.into_iter()
|
||||
.map(yeast::Id::from)
|
||||
.reduce(|acc, elem| {
|
||||
tree!((binary_expr operator: (infix_operator "&&") left: {acc} right: {elem}))
|
||||
})
|
||||
.expect("control-flow statement must have at least one condition")
|
||||
}
|
||||
|
||||
/// Translate a multi-part identifier (for example `Foo.Bar.Baz`) into a
|
||||
/// `member_access_expr` chain rooted at a `name_expr` over the first
|
||||
/// part. Panics on an empty input because the grammar's `_+` quantifier
|
||||
/// guarantees at least one part.
|
||||
fn member_chain(
|
||||
ctx: &mut yeast::build::BuildCtx<'_, SwiftContext>,
|
||||
parts: Vec<yeast::NodeRef>,
|
||||
) -> yeast::Id {
|
||||
let mut iter = parts.into_iter();
|
||||
let first = iter
|
||||
.next()
|
||||
.expect("identifier with `part:` must have at least one part");
|
||||
let init = tree!((name_expr identifier: (identifier #{first})));
|
||||
iter.fold(
|
||||
init,
|
||||
|acc, elem| tree!((member_access_expr base: {acc} member: (identifier #{elem}))),
|
||||
)
|
||||
}
|
||||
|
||||
fn translation_rules() -> Vec<Rule<SwiftContext>> {
|
||||
vec![
|
||||
// ---- Top-level ----
|
||||
// Capture all top-level statements, including unnamed tokens like `nil`.
|
||||
@@ -88,32 +179,49 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
// nodes for individual declarators. The outer property_declaration rule splices these out
|
||||
// and attaches binding/modifiers from the parent.
|
||||
|
||||
// Computed property with explicit accessors (get/set/modify) →
|
||||
// a sequence of accessor_declaration nodes, each with the property name
|
||||
// attached. Subsequent accessors will be tagged chained_declaration by
|
||||
// the outer property_declaration rule.
|
||||
rule!(
|
||||
// Computed property with explicit accessors (get/set/modify) → a
|
||||
// sequence of `accessor_declaration` nodes. The outer rule
|
||||
// publishes the property's name and type into `ctx` so that each
|
||||
// inner accessor rule
|
||||
// (`computed_getter`/`computed_setter`/`computed_modify`) builds
|
||||
// its `accessor_declaration` with `name` and `type` set from the
|
||||
// start — no schema-invalid intermediate state.
|
||||
//
|
||||
// Toggles `ctx.is_chained` per accessor iteration: the first
|
||||
// accessor inherits the outer rule's chained state (i.e. whether
|
||||
// this whole property_binding is itself a non-first declarator
|
||||
// of a containing property_declaration); subsequent accessors
|
||||
// always emit `chained_declaration`.
|
||||
manual_rule!(
|
||||
(property_binding
|
||||
name: @pattern
|
||||
type: _? @ty
|
||||
computed_value: (computed_property accessor: _+ @accessors))
|
||||
=>
|
||||
{..{
|
||||
let name_text = __yeast_ctx.ast.source_text(pattern.into());
|
||||
let ty_ids: Vec<usize> = ty.iter().map(|&t| t.into()).collect();
|
||||
let acc_ids: Vec<usize> = accessors.iter().map(|&a| a.into()).collect();
|
||||
for &acc_id in &acc_ids {
|
||||
let ident = __yeast_ctx.literal("identifier", &name_text);
|
||||
__yeast_ctx.prepend_field(acc_id, "name", ident);
|
||||
for &ty_id in ty_ids.iter().rev() {
|
||||
__yeast_ctx.prepend_field(acc_id, "type", ty_id);
|
||||
{
|
||||
// Translate `ty` first so the context holds an
|
||||
// output-schema node id.
|
||||
let translated_ty = ctx.translate_opt(ty)?;
|
||||
// Build the property-name identifier from the
|
||||
// (untranslated) pattern leaf.
|
||||
let name_id = tree!((identifier #{pattern}));
|
||||
|
||||
ctx.property_name = Some(name_id);
|
||||
ctx.property_type = translated_ty;
|
||||
|
||||
let mut result = Vec::new();
|
||||
for (i, acc) in accessors.into_iter().enumerate() {
|
||||
if i > 0 {
|
||||
ctx.is_chained = true;
|
||||
}
|
||||
result.extend(ctx.translate(acc)?);
|
||||
}
|
||||
acc_ids
|
||||
}}
|
||||
Ok(result)
|
||||
}
|
||||
),
|
||||
// Computed property: shorthand getter (no explicit get/set, just statements) →
|
||||
// a single accessor_declaration with kind "get".
|
||||
// Computed property: shorthand getter (no explicit get/set, just
|
||||
// statements) → a single accessor_declaration with kind "get".
|
||||
// Reads outer modifiers / chained tag from `ctx` (set by the
|
||||
// outer `property_declaration` rule).
|
||||
rule!(
|
||||
(property_binding
|
||||
name: (pattern bound_identifier: @name)
|
||||
@@ -121,49 +229,62 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
computed_value: (computed_property statement: _* @body))
|
||||
=>
|
||||
(accessor_declaration
|
||||
modifier: {..ctx.binding_modifier}
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)}
|
||||
name: (identifier #{name})
|
||||
type: {..ty}
|
||||
accessor_kind: (accessor_kind "get")
|
||||
body: (block stmt: {..body}))
|
||||
),
|
||||
// Stored property with willSet/didSet observers (initializer optional) →
|
||||
// variable_declaration followed by one accessor_declaration per observer,
|
||||
// each carrying the property name. Subsequent items are tagged
|
||||
// chained_declaration by the outer property_declaration rule.
|
||||
rule!(
|
||||
// Stored property with willSet/didSet observers (initializer
|
||||
// optional) → a `variable_declaration` followed by one
|
||||
// `accessor_declaration` per observer, each born with the
|
||||
// property name set. Manual rule: we publish the property name
|
||||
// into `ctx` before translating the observer children so the
|
||||
// inner `willset_clause` / `didset_clause` rules construct
|
||||
// valid `accessor_declaration` nodes from the start.
|
||||
//
|
||||
// The `variable_declaration` itself inherits the outer rule's
|
||||
// chained state; observers always get `chained_declaration`
|
||||
// because they're subsequent outputs of this flattening rule.
|
||||
manual_rule!(
|
||||
(property_binding
|
||||
name: (pattern bound_identifier: @name)
|
||||
type: _? @ty
|
||||
value: _? @val
|
||||
observers: (willset_didset_block willset: _? @ws didset: _? @ds))
|
||||
=>
|
||||
{..{
|
||||
let name_text = __yeast_ctx.ast.source_text(name.into());
|
||||
let val_ids: Vec<usize> = val.iter().map(|&v| v.into()).collect();
|
||||
let ty_ids: Vec<usize> = ty.iter().map(|&t| t.into()).collect();
|
||||
let mut obs_ids: Vec<usize> = Vec::new();
|
||||
obs_ids.extend(ws.iter().map(|&o| { let id: usize = o.into(); id }));
|
||||
obs_ids.extend(ds.iter().map(|&o| { let id: usize = o.into(); id }));
|
||||
let ident_for_var = __yeast_ctx.literal("identifier", &name_text);
|
||||
let pat = __yeast_ctx.node("name_pattern", vec![("identifier", vec![ident_for_var])]);
|
||||
let mut var_fields: Vec<(&str, Vec<usize>)> = vec![("pattern", vec![pat])];
|
||||
if !ty_ids.is_empty() {
|
||||
var_fields.push(("type", ty_ids));
|
||||
{
|
||||
// Translate ty and val so the variable_declaration
|
||||
// below contains output-schema nodes.
|
||||
let translated_ty = ctx.translate_opt(ty)?;
|
||||
let translated_val = ctx.translate_opt(val)?;
|
||||
|
||||
let var_decl = tree!(
|
||||
(variable_declaration
|
||||
modifier: {..ctx.binding_modifier}
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)}
|
||||
pattern: (name_pattern identifier: (identifier #{name}))
|
||||
type: {..translated_ty}
|
||||
value: {..translated_val})
|
||||
);
|
||||
|
||||
// Publish the property name for the observer rules.
|
||||
ctx.property_name = Some(tree!((identifier #{name})));
|
||||
// Observers are subsequent outputs of this flattening
|
||||
// rule, so they always get `chained_declaration`.
|
||||
ctx.is_chained = true;
|
||||
|
||||
let mut result = vec![var_decl];
|
||||
for obs in ws.into_iter().chain(ds) {
|
||||
result.extend(ctx.translate(obs)?);
|
||||
}
|
||||
if !val_ids.is_empty() {
|
||||
var_fields.push(("value", val_ids));
|
||||
}
|
||||
let var_id = __yeast_ctx.node("variable_declaration", var_fields);
|
||||
let mut result = vec![var_id];
|
||||
for obs_id in obs_ids {
|
||||
let ident = __yeast_ctx.literal("identifier", &name_text);
|
||||
__yeast_ctx.prepend_field(obs_id, "name", ident);
|
||||
result.push(obs_id);
|
||||
}
|
||||
result
|
||||
}}
|
||||
Ok(result)
|
||||
}
|
||||
),
|
||||
// property_binding with any pattern name (identifier or destructuring)
|
||||
// property_binding with any pattern name (identifier or
|
||||
// destructuring). Reads outer modifiers / chained tag from `ctx`.
|
||||
rule!(
|
||||
(property_binding
|
||||
name: @pattern
|
||||
@@ -171,37 +292,44 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
value: _? @val)
|
||||
=>
|
||||
(variable_declaration
|
||||
modifier: {..ctx.binding_modifier}
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)}
|
||||
pattern: {pattern}
|
||||
type: {..ty}
|
||||
value: {..val})
|
||||
),
|
||||
// property_declaration: splice declarators (each may translate to multiple nodes —
|
||||
// variable_declaration and/or accessor_declaration), and attach the binding modifier
|
||||
// (let/var) and any outer modifiers to each. All children after the first additionally
|
||||
// get a synthetic chained_declaration modifier so the grouping can be recovered.
|
||||
rule!(
|
||||
// property_declaration: flatten declarators (each may translate
|
||||
// to multiple nodes — variable_declaration and/or
|
||||
// accessor_declaration) and attach the binding modifier
|
||||
// (let/var), outer modifiers, and `chained_declaration` for
|
||||
// non-first declarations. Manual rule: publishes
|
||||
// binding/outer modifiers into `ctx` and translates each
|
||||
// declarator with `ctx.is_chained` toggled per iteration. The
|
||||
// inner declaration rules (`property_binding` variants,
|
||||
// accessor inner rules) read these fields and emit complete
|
||||
// `modifier:` lists from the start.
|
||||
manual_rule!(
|
||||
(property_declaration
|
||||
binding: (value_binding_pattern mutability: @binding_kind)
|
||||
declarator: _* @decls
|
||||
(modifiers)* @mods)
|
||||
=>
|
||||
{..{
|
||||
let binding_text = __yeast_ctx.ast.source_text(binding_kind.into());
|
||||
let mod_ids: Vec<usize> = mods.iter().map(|&m| m.into()).collect();
|
||||
let decl_ids: Vec<usize> = decls.iter().map(|&d| d.into()).collect();
|
||||
for (i, &decl_id) in decl_ids.iter().enumerate() {
|
||||
if i > 0 {
|
||||
let chained = __yeast_ctx.literal("modifier", "chained_declaration");
|
||||
__yeast_ctx.prepend_field(decl_id, "modifier", chained);
|
||||
}
|
||||
for &mod_id in mod_ids.iter().rev() {
|
||||
__yeast_ctx.prepend_field(decl_id, "modifier", mod_id);
|
||||
}
|
||||
let binding_mod = __yeast_ctx.literal("modifier", &binding_text);
|
||||
__yeast_ctx.prepend_field(decl_id, "modifier", binding_mod);
|
||||
{
|
||||
let binding_text = ctx.ast.source_text(binding_kind.0);
|
||||
ctx.binding_modifier = Some(ctx.literal("modifier", &binding_text));
|
||||
let mut modifiers = Vec::new();
|
||||
for m in mods {
|
||||
modifiers.extend(ctx.translate(m)?);
|
||||
}
|
||||
decl_ids
|
||||
}}
|
||||
ctx.outer_modifiers = modifiers;
|
||||
|
||||
let mut result = Vec::new();
|
||||
for (i, decl) in decls.into_iter().enumerate() {
|
||||
ctx.is_chained = i > 0;
|
||||
result.extend(ctx.translate(decl)?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
),
|
||||
// ---- Enums ----
|
||||
// enum_type_parameter → parameter (with optional name as pattern).
|
||||
@@ -217,14 +345,18 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
=>
|
||||
(parameter type: {ty})
|
||||
),
|
||||
// enum_case_entry with associated values → class_like_declaration containing
|
||||
// a constructor whose parameters are the data parameters.
|
||||
// enum_case_entry with associated values → class_like_declaration
|
||||
// containing a constructor whose parameters are the data
|
||||
// parameters. Reads outer modifiers / chained tag from `ctx`
|
||||
// (set by the outer `enum_entry` rule).
|
||||
rule!(
|
||||
(enum_case_entry
|
||||
name: @name
|
||||
data_contents: (enum_type_parameters parameter: _* @params))
|
||||
=>
|
||||
(class_like_declaration
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)}
|
||||
modifier: (modifier "enum_case")
|
||||
name: (identifier #{name})
|
||||
member: (constructor_declaration parameter: {..params} body: (block)))
|
||||
@@ -234,6 +366,8 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
(enum_case_entry name: @name raw_value: @val)
|
||||
=>
|
||||
(variable_declaration
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)}
|
||||
modifier: (modifier "enum_case")
|
||||
pattern: (name_pattern identifier: (identifier #{name}))
|
||||
value: {val})
|
||||
@@ -243,28 +377,31 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
(enum_case_entry name: @name)
|
||||
=>
|
||||
(variable_declaration
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)}
|
||||
modifier: (modifier "enum_case")
|
||||
pattern: (name_pattern identifier: (identifier #{name})))
|
||||
),
|
||||
// enum_entry: flatten case entries; attach outer modifiers to each, and
|
||||
// chained_declaration on every entry after the first.
|
||||
rule!(
|
||||
// enum_entry: flatten case entries; publish outer modifiers
|
||||
// into `ctx` and translate each case with `ctx.is_chained`
|
||||
// toggled per iteration so the inner `enum_case_entry` rules
|
||||
// emit complete `modifier:` lists from the start.
|
||||
manual_rule!(
|
||||
(enum_entry case: _+ @cases (modifiers)* @mods)
|
||||
=>
|
||||
{..{
|
||||
let mod_ids: Vec<usize> = mods.iter().map(|&m| m.into()).collect();
|
||||
let case_ids: Vec<usize> = cases.iter().map(|&c| c.into()).collect();
|
||||
for (i, &case_id) in case_ids.iter().enumerate() {
|
||||
if i > 0 {
|
||||
let chained = __yeast_ctx.literal("modifier", "chained_declaration");
|
||||
__yeast_ctx.prepend_field(case_id, "modifier", chained);
|
||||
}
|
||||
for &mod_id in mod_ids.iter().rev() {
|
||||
__yeast_ctx.prepend_field(case_id, "modifier", mod_id);
|
||||
}
|
||||
{
|
||||
let mut modifiers = Vec::new();
|
||||
for m in mods {
|
||||
modifiers.extend(ctx.translate(m)?);
|
||||
}
|
||||
case_ids
|
||||
}}
|
||||
ctx.outer_modifiers = modifiers;
|
||||
|
||||
let mut result = Vec::new();
|
||||
for (i, case) in cases.into_iter().enumerate() {
|
||||
ctx.is_chained = i > 0;
|
||||
result.extend(ctx.translate(case)?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
),
|
||||
// Plain assignment: `x = expr`
|
||||
rule!(
|
||||
@@ -336,17 +473,15 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
body: (block stmt: {..body_stmts}))
|
||||
),
|
||||
// Parameters are wrapped in function_parameter, which also carries
|
||||
// optional default values.
|
||||
rule!(
|
||||
// optional default values. Publishes the default value into `ctx`
|
||||
// before translating the inner `parameter` so the `parameter`
|
||||
// rules can include it as a `default:` field directly.
|
||||
manual_rule!(
|
||||
(function_parameter parameter: @p default_value: _? @def)
|
||||
=>
|
||||
{..{
|
||||
let p_id: usize = p.into();
|
||||
for &d in def.iter().rev() {
|
||||
__yeast_ctx.prepend_field(p_id, "default", d.into());
|
||||
}
|
||||
vec![p_id]
|
||||
}}
|
||||
{
|
||||
ctx.default_value = ctx.translate_opt(def)?;
|
||||
ctx.translate(p)
|
||||
}
|
||||
),
|
||||
// Parameter with external name and type
|
||||
rule!(
|
||||
@@ -354,7 +489,8 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
=>
|
||||
(parameter
|
||||
external_name: (identifier #{ext})
|
||||
pattern: (name_pattern identifier: (identifier #{name})))
|
||||
pattern: (name_pattern identifier: (identifier #{name}))
|
||||
default: {..ctx.default_value})
|
||||
),
|
||||
rule!(
|
||||
(parameter external_name: @ext name: @name type: @ty)
|
||||
@@ -362,21 +498,24 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
(parameter
|
||||
external_name: (identifier #{ext})
|
||||
pattern: (name_pattern identifier: (identifier #{name}))
|
||||
type: {ty})
|
||||
type: {ty}
|
||||
default: {..ctx.default_value})
|
||||
),
|
||||
// Parameter with just name and type (no external name)
|
||||
rule!(
|
||||
(parameter name: @name)
|
||||
=>
|
||||
(parameter
|
||||
pattern: (name_pattern identifier: (identifier #{name})))
|
||||
pattern: (name_pattern identifier: (identifier #{name}))
|
||||
default: {..ctx.default_value})
|
||||
),
|
||||
rule!(
|
||||
(parameter name: @name type: @ty)
|
||||
=>
|
||||
(parameter
|
||||
pattern: (name_pattern identifier: (identifier #{name}))
|
||||
type: {ty})
|
||||
type: {ty}
|
||||
default: {..ctx.default_value})
|
||||
),
|
||||
// Reference to a function, f(x:y:z:). This is parsed as a call with a single argument with multiple reference_specifier labels.
|
||||
// We don't want downstream QL to try to handle this as a call_expr with a weird argument, so explicitly mark it as unsupported for now.
|
||||
@@ -484,11 +623,12 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
argument: (argument value: {closure}))
|
||||
),
|
||||
// ---- Control flow ----
|
||||
// If statement
|
||||
rule!(
|
||||
(if_statement condition: _* @cond body: @then_body else_branch: _? @else_stmts)
|
||||
=>
|
||||
(if_expr
|
||||
condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem}))
|
||||
condition: {and_chain(&mut ctx, cond)}
|
||||
then: {then_body}
|
||||
else: {..else_stmts})
|
||||
),
|
||||
@@ -497,7 +637,7 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
(guard_statement condition: _* @cond body: (block statement: _* @else_stmts))
|
||||
=>
|
||||
(guard_if_stmt
|
||||
condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem}))
|
||||
condition: {and_chain(&mut ctx, cond)}
|
||||
else: (block stmt: {..else_stmts}))
|
||||
),
|
||||
// Ternary expression → if_expr
|
||||
@@ -575,20 +715,24 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
rule!(
|
||||
(while_statement condition: _* @cond body: (block statement: _* @body))
|
||||
=>
|
||||
(while_stmt condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem})) body: (block stmt: {..body}))
|
||||
(while_stmt
|
||||
condition: {and_chain(&mut ctx, cond)}
|
||||
body: (block stmt: {..body}))
|
||||
),
|
||||
// Repeat-while loop
|
||||
rule!(
|
||||
(repeat_while_statement condition: _* @cond body: (block statement: _* @body))
|
||||
=>
|
||||
(do_while_stmt condition: {..cond}.reduce_left(first -> {first}, acc, elem -> (binary_expr operator: (infix_operator "&&") left: {acc} right: {elem})) body: (block stmt: {..body}))
|
||||
(do_while_stmt
|
||||
condition: {and_chain(&mut ctx, cond)}
|
||||
body: (block stmt: {..body}))
|
||||
),
|
||||
// Labeled statement (e.g. `outer: for ...`). Strip the trailing ':' from the label token.
|
||||
rule!((labeled_statement label: (statement_label) @lbl statement: @stmt) => {..{
|
||||
let text = __yeast_ctx.ast.source_text(lbl.into());
|
||||
let name = __yeast_ctx.literal("identifier", &text[..text.len() - 1]);
|
||||
vec![__yeast_ctx.node("labeled_stmt", vec![("label", vec![name]), ("stmt", vec![stmt.into()])])]
|
||||
}}),
|
||||
rule!((labeled_statement label: (statement_label) @lbl statement: @stmt) => {
|
||||
let text = ctx.ast.source_text(lbl.into());
|
||||
let name = &text[..text.len() - 1];
|
||||
tree!((labeled_stmt label: (identifier #{name}) stmt: {stmt}))
|
||||
}),
|
||||
// ---- Collections ----
|
||||
// Array literal
|
||||
rule!((array_literal element: _* @elems) => (array_literal element: {..elems})),
|
||||
@@ -598,16 +742,9 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
rule!(
|
||||
(dictionary_literal key: _* @keys value: _* @vals)
|
||||
=>
|
||||
(map_literal element: {..{
|
||||
keys.iter().zip(vals.iter()).map(|(&k, &v)| {
|
||||
let k_id: usize = k.into();
|
||||
let v_id: usize = v.into();
|
||||
__yeast_ctx.node("key_value_pair", vec![
|
||||
("key", vec![k_id]),
|
||||
("value", vec![v_id]),
|
||||
])
|
||||
}).collect::<Vec<_>>()
|
||||
}})
|
||||
(map_literal element: {..keys.into_iter().zip(vals).map(|(k, v)|
|
||||
tree!((key_value_pair key: {k} value: {v}))
|
||||
)})
|
||||
),
|
||||
rule!((dictionary_literal element: _* @elems) => (map_literal element: {..elems})),
|
||||
rule!((dictionary_literal_item key: @k value: @v) => (key_value_pair key: {k} value: {v})),
|
||||
@@ -669,9 +806,7 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
rule!(
|
||||
(identifier part: _+ @parts)
|
||||
=>
|
||||
{parts}.reduce_left(
|
||||
first -> (name_expr identifier: (identifier #{first})),
|
||||
acc, elem -> (member_access_expr base: {acc} member: (identifier #{elem})))
|
||||
{member_chain(&mut ctx, parts)}
|
||||
),
|
||||
// Scoped import declaration (for example `import struct Foo.Bar`):
|
||||
// flatten the identifier parts into a member_access_expr and bind the
|
||||
@@ -874,48 +1009,76 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
name: (identifier #{name})
|
||||
bound: {..bound})
|
||||
),
|
||||
// Protocol property declaration: translate each accessor requirement to an
|
||||
// accessor_declaration without a body, carrying the property name and type.
|
||||
// Subsequent accessors get chained_declaration (same flattening as computed properties).
|
||||
rule!(
|
||||
// Protocol property declaration: translate each accessor
|
||||
// requirement to an `accessor_declaration` carrying the property
|
||||
// name, type, and outer modifiers. Manual rule: we publish the
|
||||
// property's name/type/modifiers into `ctx` and translate each
|
||||
// accessor with `ctx.is_chained` toggled per iteration so the
|
||||
// inner `getter_specifier`/`setter_specifier` rules emit
|
||||
// complete nodes from the start (including the
|
||||
// `chained_declaration` tag for non-first accessors).
|
||||
manual_rule!(
|
||||
(protocol_property_declaration
|
||||
name: @pattern
|
||||
name: (pattern bound_identifier: @name)
|
||||
requirements: (protocol_property_requirements accessor: _+ @accessors)
|
||||
type: _? @ty
|
||||
(modifiers)* @mods)
|
||||
=>
|
||||
{..{
|
||||
let name_text = __yeast_ctx.ast.source_text(pattern.into());
|
||||
let mod_ids: Vec<usize> = mods.iter().map(|&m| m.into()).collect();
|
||||
let ty_ids: Vec<usize> = ty.iter().map(|&t| t.into()).collect();
|
||||
let acc_ids: Vec<usize> = accessors.iter().map(|&a| a.into()).collect();
|
||||
for (i, &acc_id) in acc_ids.iter().enumerate() {
|
||||
if i > 0 {
|
||||
let chained = __yeast_ctx.literal("modifier", "chained_declaration");
|
||||
__yeast_ctx.prepend_field(acc_id, "modifier", chained);
|
||||
}
|
||||
for &mod_id in mod_ids.iter().rev() {
|
||||
__yeast_ctx.prepend_field(acc_id, "modifier", mod_id);
|
||||
}
|
||||
for &ty_id in ty_ids.iter().rev() {
|
||||
__yeast_ctx.prepend_field(acc_id, "type", ty_id);
|
||||
}
|
||||
let ident = __yeast_ctx.literal("identifier", &name_text);
|
||||
__yeast_ctx.prepend_field(acc_id, "name", ident);
|
||||
{
|
||||
ctx.property_name = Some(tree!((identifier #{name})));
|
||||
ctx.property_type = ctx.translate_opt(ty)?;
|
||||
let mut modifiers = Vec::new();
|
||||
for m in mods {
|
||||
modifiers.extend(ctx.translate(m)?);
|
||||
}
|
||||
acc_ids
|
||||
}}
|
||||
ctx.outer_modifiers = modifiers;
|
||||
|
||||
let mut result = Vec::new();
|
||||
for (i, acc) in accessors.into_iter().enumerate() {
|
||||
ctx.is_chained = i > 0;
|
||||
result.extend(ctx.translate(acc)?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
),
|
||||
// getter_specifier / setter_specifier → bodyless accessor_declaration
|
||||
rule!((getter_specifier) => (accessor_declaration accessor_kind: (accessor_kind "get"))),
|
||||
rule!((setter_specifier) => (accessor_declaration accessor_kind: (accessor_kind "set"))),
|
||||
// getter_specifier / setter_specifier → bodyless
|
||||
// accessor_declaration. Reads property name/type/modifiers from
|
||||
// `ctx` set by the outer `protocol_property_declaration` rule.
|
||||
rule!(
|
||||
(getter_specifier)
|
||||
=>
|
||||
(accessor_declaration
|
||||
name: {ctx.property_name.ok_or("getter_specifier outside protocol_property_declaration context")?}
|
||||
type: {..ctx.property_type}
|
||||
accessor_kind: (accessor_kind "get")
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)})
|
||||
),
|
||||
rule!(
|
||||
(setter_specifier)
|
||||
=>
|
||||
(accessor_declaration
|
||||
name: {ctx.property_name.ok_or("setter_specifier outside protocol_property_declaration context")?}
|
||||
type: {..ctx.property_type}
|
||||
accessor_kind: (accessor_kind "set")
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)})
|
||||
),
|
||||
// protocol_property_requirements wrapper — should be consumed by above; fallback
|
||||
rule!((protocol_property_requirements accessor: _* @accs) => {..accs}),
|
||||
// Computed getter → accessor_declaration (body optional).
|
||||
// Reads property name/type from the outer property_binding rule
|
||||
// and binding/outer modifiers + chained tag from the outer
|
||||
// property_declaration rule.
|
||||
rule!(
|
||||
(computed_getter body: (block statement: _* @body)?)
|
||||
=>
|
||||
(accessor_declaration
|
||||
modifier: {..ctx.binding_modifier}
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)}
|
||||
name: {ctx.property_name.ok_or("computed_getter outside property_binding context")?}
|
||||
type: {..ctx.property_type}
|
||||
accessor_kind: (accessor_kind "get")
|
||||
body: (block stmt: {..body}))
|
||||
),
|
||||
@@ -924,6 +1087,11 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
(computed_setter parameter: @param body: (block statement: _* @body))
|
||||
=>
|
||||
(accessor_declaration
|
||||
modifier: {..ctx.binding_modifier}
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)}
|
||||
name: {ctx.property_name.ok_or("computed_setter outside property_binding context")?}
|
||||
type: {..ctx.property_type}
|
||||
accessor_kind: (accessor_kind "set")
|
||||
parameter: (parameter pattern: (name_pattern identifier: (identifier #{param})))
|
||||
body: (block stmt: {..body}))
|
||||
@@ -933,6 +1101,11 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
(computed_setter body: (block statement: _* @body)?)
|
||||
=>
|
||||
(accessor_declaration
|
||||
modifier: {..ctx.binding_modifier}
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)}
|
||||
name: {ctx.property_name.ok_or("computed_setter outside property_binding context")?}
|
||||
type: {..ctx.property_type}
|
||||
accessor_kind: (accessor_kind "set")
|
||||
body: (block stmt: {..body}))
|
||||
),
|
||||
@@ -941,16 +1114,30 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
(computed_modify body: (block statement: _* @body))
|
||||
=>
|
||||
(accessor_declaration
|
||||
modifier: {..ctx.binding_modifier}
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)}
|
||||
name: {ctx.property_name.ok_or("computed_modify outside property_binding context")?}
|
||||
type: {..ctx.property_type}
|
||||
accessor_kind: (accessor_kind "modify")
|
||||
body: (block stmt: {..body}))
|
||||
),
|
||||
// willset/didset block — spread to children
|
||||
// willset/didset block — spread to children (only reachable as a
|
||||
// fallback; the outer property_binding manual rule normally
|
||||
// captures the willset/didset clauses directly).
|
||||
rule!((willset_didset_block _* @clauses) => {..clauses}),
|
||||
// willset clause → accessor_declaration (body optional).
|
||||
// willset clause → accessor_declaration (body optional). Reads
|
||||
// `ctx.property_name` set by the outer property_binding rule and
|
||||
// binding/outer modifiers + chained tag from the outer
|
||||
// property_declaration rule.
|
||||
rule!(
|
||||
(willset_clause body: (block statement: _* @body)?)
|
||||
=>
|
||||
(accessor_declaration
|
||||
modifier: {..ctx.binding_modifier}
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)}
|
||||
name: {ctx.property_name.ok_or("willset_clause outside property_binding context")?}
|
||||
accessor_kind: (accessor_kind "willSet")
|
||||
body: (block stmt: {..body}))
|
||||
),
|
||||
@@ -959,6 +1146,10 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
(didset_clause body: (block statement: _* @body)?)
|
||||
=>
|
||||
(accessor_declaration
|
||||
modifier: {..ctx.binding_modifier}
|
||||
modifier: {..ctx.outer_modifiers.clone()}
|
||||
modifier: {..chained_modifier(&mut ctx)}
|
||||
name: {ctx.property_name.ok_or("didset_clause outside property_binding context")?}
|
||||
accessor_kind: (accessor_kind "didSet")
|
||||
body: (block stmt: {..body}))
|
||||
),
|
||||
@@ -979,14 +1170,17 @@ fn translation_rules() -> Vec<yeast::Rule> {
|
||||
}
|
||||
|
||||
pub fn language_spec(desugared_ast_schema: &'static str) -> simple::LanguageSpec {
|
||||
let desugar = DesugaringConfig::new()
|
||||
let ts_language: tree_sitter::Language = tree_sitter_swift::LANGUAGE.into();
|
||||
let config = DesugaringConfig::<SwiftContext>::new()
|
||||
.add_phase("translate", PhaseKind::OneShot, translation_rules())
|
||||
.with_output_node_types_yaml(desugared_ast_schema);
|
||||
let desugarer = ConcreteDesugarer::new(ts_language.clone(), config)
|
||||
.expect("failed to build Swift desugarer");
|
||||
simple::LanguageSpec {
|
||||
prefix: "swift",
|
||||
ts_language: tree_sitter_swift::LANGUAGE.into(),
|
||||
ts_language,
|
||||
node_types: tree_sitter_swift::NODE_TYPES,
|
||||
file_globs: vec!["*.swift".into(), "*.swiftinterface".into()],
|
||||
desugar: Some(desugar),
|
||||
desugar: Some(Box::new(desugarer)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -924,3 +924,159 @@ top_level
|
||||
accessor_kind: accessor_kind "set"
|
||||
modifier: modifier "class"
|
||||
name: identifier "Box"
|
||||
|
||||
===
|
||||
Protocol with read-only and read-write property requirements
|
||||
===
|
||||
|
||||
protocol P {
|
||||
var foo: Int { get }
|
||||
var bar: String { get set }
|
||||
}
|
||||
|
||||
---
|
||||
|
||||
source_file
|
||||
statement:
|
||||
protocol_declaration
|
||||
body:
|
||||
protocol_body
|
||||
member:
|
||||
protocol_property_declaration
|
||||
name:
|
||||
pattern
|
||||
binding:
|
||||
value_binding_pattern
|
||||
mutability: var
|
||||
bound_identifier: simple_identifier "foo"
|
||||
requirements:
|
||||
protocol_property_requirements
|
||||
accessor:
|
||||
getter_specifier
|
||||
type:
|
||||
type_annotation
|
||||
type:
|
||||
type
|
||||
name:
|
||||
user_type
|
||||
part:
|
||||
simple_user_type
|
||||
name: type_identifier "Int"
|
||||
protocol_property_declaration
|
||||
name:
|
||||
pattern
|
||||
binding:
|
||||
value_binding_pattern
|
||||
mutability: var
|
||||
bound_identifier: simple_identifier "bar"
|
||||
requirements:
|
||||
protocol_property_requirements
|
||||
accessor:
|
||||
getter_specifier
|
||||
setter_specifier
|
||||
type:
|
||||
type_annotation
|
||||
type:
|
||||
type
|
||||
name:
|
||||
user_type
|
||||
part:
|
||||
simple_user_type
|
||||
name: type_identifier "String"
|
||||
name: type_identifier "P"
|
||||
|
||||
---
|
||||
|
||||
top_level
|
||||
body:
|
||||
block
|
||||
stmt:
|
||||
class_like_declaration
|
||||
member:
|
||||
accessor_declaration
|
||||
name: identifier "foo"
|
||||
type:
|
||||
named_type_expr
|
||||
name: identifier "Int"
|
||||
accessor_kind: accessor_kind "get"
|
||||
accessor_declaration
|
||||
name: identifier "bar"
|
||||
type:
|
||||
named_type_expr
|
||||
name: identifier "String"
|
||||
accessor_kind: accessor_kind "get"
|
||||
accessor_declaration
|
||||
modifier: modifier "chained_declaration"
|
||||
name: identifier "bar"
|
||||
type:
|
||||
named_type_expr
|
||||
name: identifier "String"
|
||||
accessor_kind: accessor_kind "set"
|
||||
modifier: modifier "protocol"
|
||||
name: identifier "P"
|
||||
|
||||
===
|
||||
Enum with comma-separated cases (chained_declaration)
|
||||
===
|
||||
|
||||
enum Suit {
|
||||
case clubs, diamonds, hearts, spades
|
||||
}
|
||||
|
||||
---
|
||||
|
||||
source_file
|
||||
statement:
|
||||
class_declaration
|
||||
body:
|
||||
enum_class_body
|
||||
member:
|
||||
enum_entry
|
||||
case:
|
||||
enum_case_entry
|
||||
name: simple_identifier "clubs"
|
||||
enum_case_entry
|
||||
name: simple_identifier "diamonds"
|
||||
enum_case_entry
|
||||
name: simple_identifier "hearts"
|
||||
enum_case_entry
|
||||
name: simple_identifier "spades"
|
||||
declaration_kind: enum
|
||||
name: type_identifier "Suit"
|
||||
|
||||
---
|
||||
|
||||
top_level
|
||||
body:
|
||||
block
|
||||
stmt:
|
||||
class_like_declaration
|
||||
member:
|
||||
variable_declaration
|
||||
modifier: modifier "enum_case"
|
||||
pattern:
|
||||
name_pattern
|
||||
identifier: identifier "clubs"
|
||||
variable_declaration
|
||||
modifier:
|
||||
modifier "chained_declaration"
|
||||
modifier "enum_case"
|
||||
pattern:
|
||||
name_pattern
|
||||
identifier: identifier "diamonds"
|
||||
variable_declaration
|
||||
modifier:
|
||||
modifier "chained_declaration"
|
||||
modifier "enum_case"
|
||||
pattern:
|
||||
name_pattern
|
||||
identifier: identifier "hearts"
|
||||
variable_declaration
|
||||
modifier:
|
||||
modifier "chained_declaration"
|
||||
modifier "enum_case"
|
||||
pattern:
|
||||
name_pattern
|
||||
identifier: identifier "spades"
|
||||
modifier: modifier "enum"
|
||||
name: identifier "Suit"
|
||||
|
||||
@@ -319,3 +319,130 @@ top_level
|
||||
name_expr
|
||||
identifier: identifier "x"
|
||||
value: int_literal "1"
|
||||
|
||||
===
|
||||
Property with willSet and didSet observers
|
||||
===
|
||||
|
||||
class C {
|
||||
var x: Int = 0 {
|
||||
willSet { print(newValue) }
|
||||
didSet { print(oldValue) }
|
||||
}
|
||||
}
|
||||
|
||||
---
|
||||
|
||||
source_file
|
||||
statement:
|
||||
class_declaration
|
||||
body:
|
||||
class_body
|
||||
member:
|
||||
property_declaration
|
||||
binding:
|
||||
value_binding_pattern
|
||||
mutability: var
|
||||
declarator:
|
||||
property_binding
|
||||
name:
|
||||
pattern
|
||||
bound_identifier: simple_identifier "x"
|
||||
observers:
|
||||
willset_didset_block
|
||||
didset:
|
||||
didset_clause
|
||||
body:
|
||||
block
|
||||
statement:
|
||||
call_expression
|
||||
function: simple_identifier "print"
|
||||
suffix:
|
||||
call_suffix
|
||||
arguments:
|
||||
value_arguments
|
||||
argument:
|
||||
value_argument
|
||||
value: simple_identifier "oldValue"
|
||||
willset:
|
||||
willset_clause
|
||||
body:
|
||||
block
|
||||
statement:
|
||||
call_expression
|
||||
function: simple_identifier "print"
|
||||
suffix:
|
||||
call_suffix
|
||||
arguments:
|
||||
value_arguments
|
||||
argument:
|
||||
value_argument
|
||||
value: simple_identifier "newValue"
|
||||
type:
|
||||
type_annotation
|
||||
type:
|
||||
type
|
||||
name:
|
||||
user_type
|
||||
part:
|
||||
simple_user_type
|
||||
name: type_identifier "Int"
|
||||
value: integer_literal "0"
|
||||
declaration_kind: class
|
||||
name: type_identifier "C"
|
||||
|
||||
---
|
||||
|
||||
top_level
|
||||
body:
|
||||
block
|
||||
stmt:
|
||||
class_like_declaration
|
||||
member:
|
||||
variable_declaration
|
||||
modifier: modifier "var"
|
||||
pattern:
|
||||
name_pattern
|
||||
identifier: identifier "x"
|
||||
type:
|
||||
named_type_expr
|
||||
name: identifier "Int"
|
||||
value: int_literal "0"
|
||||
accessor_declaration
|
||||
body:
|
||||
block
|
||||
stmt:
|
||||
call_expr
|
||||
argument:
|
||||
argument
|
||||
value:
|
||||
name_expr
|
||||
identifier: identifier "newValue"
|
||||
callee:
|
||||
name_expr
|
||||
identifier: identifier "print"
|
||||
modifier:
|
||||
modifier "var"
|
||||
modifier "chained_declaration"
|
||||
name: identifier "x"
|
||||
accessor_kind: accessor_kind "willSet"
|
||||
accessor_declaration
|
||||
body:
|
||||
block
|
||||
stmt:
|
||||
call_expr
|
||||
argument:
|
||||
argument
|
||||
value:
|
||||
name_expr
|
||||
identifier: identifier "oldValue"
|
||||
callee:
|
||||
name_expr
|
||||
identifier: identifier "print"
|
||||
modifier:
|
||||
modifier "var"
|
||||
modifier "chained_declaration"
|
||||
name: identifier "x"
|
||||
accessor_kind: accessor_kind "didSet"
|
||||
modifier: modifier "class"
|
||||
name: identifier "C"
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use codeql_extractor::extractor::simple;
|
||||
use yeast::{dump::dump_ast, dump::dump_ast_with_type_errors, Runner};
|
||||
use yeast::{Runner, dump::dump_ast, dump::dump_ast_with_type_errors};
|
||||
|
||||
#[path = "../src/languages/mod.rs"]
|
||||
mod languages;
|
||||
@@ -146,29 +146,36 @@ fn render_corpus(cases: &[CorpusCase]) -> String {
|
||||
out
|
||||
}
|
||||
|
||||
fn run_desugaring(
|
||||
lang: &simple::LanguageSpec,
|
||||
input: &str,
|
||||
) -> Result<yeast::Ast, String> {
|
||||
let runner = match lang.desugar.as_ref() {
|
||||
Some(config) => Runner::from_config(lang.ts_language.clone(), config)
|
||||
.map_err(|e| format!("Failed to create yeast runner: {e}"))?,
|
||||
None => Runner::new(lang.ts_language.clone(), &[]),
|
||||
};
|
||||
|
||||
runner
|
||||
.run(input)
|
||||
.map_err(|e| format!("Failed to parse input: {e}"))
|
||||
fn run_desugaring(lang: &simple::LanguageSpec, input: &str) -> Result<yeast::Ast, String> {
|
||||
match lang.desugar.as_deref() {
|
||||
Some(desugarer) => {
|
||||
// Parse the input ourselves so we don't depend on the desugarer
|
||||
// knowing about the language.
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&lang.ts_language)
|
||||
.map_err(|e| format!("Failed to set language: {e}"))?;
|
||||
let tree = parser
|
||||
.parse(input, None)
|
||||
.ok_or_else(|| "Failed to parse input".to_string())?;
|
||||
desugarer
|
||||
.run_from_tree(&tree, input.as_bytes())
|
||||
.map_err(|e| format!("Desugaring failed: {e}"))
|
||||
}
|
||||
None => {
|
||||
let runner: Runner = Runner::new(lang.ts_language.clone(), &[]);
|
||||
runner
|
||||
.run(input)
|
||||
.map_err(|e| format!("Failed to parse input: {e}"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Produce the raw tree-sitter parse tree dump for `input`, with no
|
||||
/// desugaring rules applied. Uses a `Runner` with an empty phase list and
|
||||
/// the input grammar's own schema.
|
||||
fn dump_raw_parse(
|
||||
lang: &simple::LanguageSpec,
|
||||
input: &str,
|
||||
) -> Result<String, String> {
|
||||
let runner = Runner::new(lang.ts_language.clone(), &[]);
|
||||
fn dump_raw_parse(lang: &simple::LanguageSpec, input: &str) -> Result<String, String> {
|
||||
let runner: Runner = Runner::new(lang.ts_language.clone(), &[]);
|
||||
let ast = runner
|
||||
.run(input)
|
||||
.map_err(|e| format!("Failed to parse input: {e}"))?;
|
||||
@@ -272,11 +279,7 @@ fn test_corpus() {
|
||||
}
|
||||
}
|
||||
|
||||
assert!(
|
||||
failures.is_empty(),
|
||||
"{}",
|
||||
failures.join("\n\n") + "\n\n"
|
||||
);
|
||||
assert!(failures.is_empty(), "{}", failures.join("\n\n") + "\n\n");
|
||||
|
||||
if update_mode {
|
||||
let updated = render_corpus(&cases);
|
||||
@@ -285,7 +288,9 @@ fn test_corpus() {
|
||||
write_result.is_ok(),
|
||||
"Failed to update corpus file {}: {}",
|
||||
corpus_path.display(),
|
||||
write_result.err().map_or_else(String::new, |e| e.to_string())
|
||||
write_result
|
||||
.err()
|
||||
.map_or_else(String::new, |e| e.to_string())
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user