yeast: Use SmallVec<[Id; 1]> for rule output

Every rule firing returned `Vec<Id>` even though the overwhelming
majority of rules produce a single replacement node. Switch the
Transform return type, try_rule, apply_rules, and apply_rules_inner
to `RuleOutput = SmallVec<[Id; 1]>`. Re-export `smallvec` and
`SmallVec` from the yeast crate so generated rule! macro code can
refer to them by short paths.

For the rule! macro, generate `yeast::smallvec![__id]` for the
shorthand form and `yeast::RuleOutput = yeast::SmallVec::new()` for
the full template form, so rule outputs stay inline for the common
single-Id case.

Per-firing memory: a Vec allocation per rule firing turns into zero
when the rule produces 0 or 1 Ids. Rules that produce multiple Ids
still allocate (SmallVec spills to heap when the inline buffer is
exceeded), unchanged from before.
This commit is contained in:
Taus
2026-05-07 15:56:26 +00:00
parent 15936a5f8d
commit 9516861c71
7 changed files with 32 additions and 9 deletions

1
Cargo.lock generated
View File

@@ -3415,6 +3415,7 @@ dependencies = [
"serde",
"serde_json",
"serde_yaml",
"smallvec",
"tree-sitter",
"tree-sitter-python",
"tree-sitter-ruby",

View File

@@ -142,6 +142,7 @@ use_repo(
"vendor_ts__serde_json-1.0.145",
"vendor_ts__serde_with-3.14.1",
"vendor_ts__serde_yaml-0.9.34-deprecated",
"vendor_ts__smallvec-1.15.1",
"vendor_ts__syn-2.0.106",
"vendor_ts__toml-0.9.7",
"vendor_ts__tracing-0.1.41",

View File

@@ -541,6 +541,18 @@ alias(
tags = ["manual"],
)
alias(
name = "smallvec-1.15.1",
actual = "@vendor_ts__smallvec-1.15.1//:smallvec",
tags = ["manual"],
)
alias(
name = "smallvec",
actual = "@vendor_ts__smallvec-1.15.1//:smallvec",
tags = ["manual"],
)
alias(
name = "syn-2.0.106",
actual = "@vendor_ts__syn-2.0.106//:syn",

View File

@@ -391,6 +391,7 @@ _NORMAL_DEPENDENCIES = {
"serde": Label("@vendor_ts__serde-1.0.228//:serde"),
"serde_json": Label("@vendor_ts__serde_json-1.0.145//:serde_json"),
"serde_yaml": Label("@vendor_ts__serde_yaml-0.9.34-deprecated//:serde_yaml"),
"smallvec": Label("@vendor_ts__smallvec-1.15.1//:smallvec"),
"tree-sitter": Label("@vendor_ts__tree-sitter-0.26.8//:tree_sitter"),
"tree-sitter-python": Label("@vendor_ts__tree-sitter-python-0.23.6//:tree_sitter_python"),
"tree-sitter-ruby": Label("@vendor_ts__tree-sitter-ruby-0.23.1//:tree_sitter_ruby"),
@@ -4278,6 +4279,7 @@ def crate_repositories():
struct(repo = "vendor_ts__serde_json-1.0.145", is_dev_dep = False),
struct(repo = "vendor_ts__serde_with-3.14.1", is_dev_dep = False),
struct(repo = "vendor_ts__serde_yaml-0.9.34-deprecated", is_dev_dep = False),
struct(repo = "vendor_ts__smallvec-1.15.1", is_dev_dep = False),
struct(repo = "vendor_ts__syn-2.0.106", is_dev_dep = False),
struct(repo = "vendor_ts__toml-0.9.7", is_dev_dep = False),
struct(repo = "vendor_ts__tracing-0.1.41", is_dev_dep = False),

View File

@@ -643,7 +643,7 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
true,
__source_range,
);
vec![__id]
yeast::smallvec![__id]
}
} else {
// Full template form
@@ -657,7 +657,7 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
}
quote! {
let mut __nodes: Vec<usize> = Vec::new();
let mut __nodes: yeast::RuleOutput = yeast::SmallVec::new();
#(#transform_items)*
__nodes
}

View File

@@ -8,6 +8,7 @@ clap = { version = "4.4.10", features = ["derive"] }
serde = { version = "1.0.193", features = ["derive"] }
serde_json = "1.0.108"
serde_yaml = "0.9"
smallvec = "1.15"
tree-sitter = ">= 0.23.0"
yeast-macros = { path = "../yeast-macros" }

View File

@@ -4,6 +4,7 @@ extern crate self as yeast;
use serde::Serialize;
use serde_json::{json, Value};
pub use smallvec::{smallvec, SmallVec};
pub mod build;
pub mod captures;
@@ -29,6 +30,10 @@ type Id = usize;
type FieldId = u16;
type KindId = u16;
/// The output of one rule firing: a small list of replacement node Ids,
/// inline for the common "single replacement" case.
pub type RuleOutput = SmallVec<[Id; 1]>;
pub const CHILD_FIELD: u16 = u16::MAX;
#[derive(Debug)]
@@ -452,9 +457,10 @@ impl From<tree_sitter::Range> for NodeContent {
/// The transform function for a rule: takes the AST, captured variables, a
/// fresh-name scope, and the source range of the matched node, and returns
/// the IDs of the replacement nodes.
/// the IDs of the replacement nodes (typically a single Id; uses
/// `SmallVec` to keep the common case inline).
pub type Transform = Box<
dyn Fn(&mut Ast, Captures, &tree_builder::FreshScope, Option<tree_sitter::Range>) -> Vec<Id>
dyn Fn(&mut Ast, Captures, &tree_builder::FreshScope, Option<tree_sitter::Range>) -> RuleOutput
+ Send
+ Sync,
>;
@@ -492,7 +498,7 @@ impl Rule {
ast: &mut Ast,
node: Id,
fresh: &tree_builder::FreshScope,
) -> Result<Option<Vec<Id>>, String> {
) -> Result<Option<RuleOutput>, String> {
let mut captures = Captures::new();
if self.query.do_match(ast, node, &mut captures)? {
fresh.next_scope();
@@ -544,7 +550,7 @@ fn apply_rules(
ast: &mut Ast,
id: Id,
fresh: &tree_builder::FreshScope,
) -> Result<Vec<Id>, String> {
) -> Result<RuleOutput, String> {
let index = RuleIndex::new(rules);
apply_rules_inner(&index, ast, id, fresh, 0, None)
}
@@ -556,7 +562,7 @@ fn apply_rules_inner(
fresh: &tree_builder::FreshScope,
rewrite_depth: usize,
skip_rule: Option<*const Rule>,
) -> Result<Vec<Id>, String> {
) -> Result<RuleOutput, String> {
if rewrite_depth > MAX_REWRITE_DEPTH {
return Err(format!(
"Desugaring exceeded maximum rewrite depth ({MAX_REWRITE_DEPTH}). \
@@ -576,7 +582,7 @@ fn apply_rules_inner(
// query doesn't loop. Other rules and child traversal are
// unaffected.
let next_skip = if rule.repeated { None } else { Some(rule_ptr) };
let mut results = Vec::new();
let mut results = RuleOutput::new();
for node in result_node {
results.extend(apply_rules_inner(
index,
@@ -625,7 +631,7 @@ fn apply_rules_inner(
}
}
ast.nodes[id].fields = fields;
Ok(vec![id])
Ok(smallvec![id])
}
/// One phase of a desugaring pass: a named bundle of rules that runs to