Apply rustfmt

Format the touched Rust crates (shared/tree-sitter-extractor,
shared/yeast, shared/yeast-macros, unified/extractor) so the
tree-sitter-extractor CI fmt check passes. No functional changes.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Taus
2026-06-25 12:26:52 +00:00
parent 8a3bb915e4
commit f75cf95db5
12 changed files with 137 additions and 139 deletions

View File

@@ -95,16 +95,19 @@ impl Extractor {
let mut schemas = vec![];
for lang in &self.languages {
let effective_node_types: String =
match lang.desugar.as_ref().and_then(|d| d.output_node_types_yaml()) {
Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| {
std::io::Error::other(format!(
"Failed to convert YAML node-types to JSON for {}: {e}",
lang.prefix
))
})?,
None => lang.node_types.to_string(),
};
let effective_node_types: String = match lang
.desugar
.as_ref()
.and_then(|d| d.output_node_types_yaml())
{
Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| {
std::io::Error::other(format!(
"Failed to convert YAML node-types to JSON for {}: {e}",
lang.prefix
))
})?,
None => lang.node_types.to_string(),
};
let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?;
schemas.push(schema);
}

View File

@@ -121,9 +121,9 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
std::collections::HashMap::new();
let mut bare_children: Vec<TokenStream> = Vec::new();
let push_field_elem = |order: &mut Vec<String>,
map: &mut std::collections::HashMap<String, Vec<TokenStream>>,
name: String,
elem: TokenStream| {
map: &mut std::collections::HashMap<String, Vec<TokenStream>>,
name: String,
elem: TokenStream| {
if !map.contains_key(&name) {
order.push(name.clone());
map.insert(name, vec![elem]);
@@ -160,8 +160,7 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
} else {
let child = if peek_is_at(tokens) {
tokens.next();
let capture_name =
expect_ident(tokens, "expected capture name after @")?;
let capture_name = expect_ident(tokens, "expected capture name after @")?;
let name_str = capture_name.to_string();
quote! {
yeast::query::QueryNode::Capture {
@@ -420,7 +419,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
// Named fields — compute each value into a temp, then reference it
while peek_is_field(tokens) {
let field_name = expect_ident(tokens, "expected field name")?;
let field_str = field_name.to_string().strip_prefix("r#").unwrap_or(&field_name.to_string()).to_string();
let field_str = field_name
.to_string()
.strip_prefix("r#")
.unwrap_or(&field_name.to_string())
.to_string();
expect_punct(tokens, ':', "expected `:` after field name")?;
let temp = Ident::new(
&format!("__field_{field_str}_{field_counter}"),
@@ -438,7 +441,8 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
// Determine if a chain (.map(..)) follows the `{}` group.
let mut after = tokens.clone();
after.next(); // skip the brace group
let has_chain = matches!(after.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
let has_chain =
matches!(after.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
if is_splice || has_chain {
let group = expect_group(tokens, Delimiter::Brace)?;
@@ -506,11 +510,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
/// Each call expects the receiver to be an iterator. The `base` argument
/// should therefore already be an iterator (use `.into_iter()` on it before
/// calling this function).
fn parse_chain_suffix(
tokens: &mut Tokens,
ctx: &Ident,
base: TokenStream,
) -> Result<TokenStream> {
fn parse_chain_suffix(tokens: &mut Tokens, ctx: &Ident, base: TokenStream) -> Result<TokenStream> {
let mut current = base;
while matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.') {
tokens.next(); // consume .
@@ -608,7 +608,8 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
// {expr} or {..expr} (with optional .chain) — single node or splice
if peek_is_group(tokens, Delimiter::Brace) {
let group = expect_group(tokens, Delimiter::Brace)?;
let has_chain = matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
let has_chain =
matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.');
let mut inner = group.stream().into_iter().peekable();
let is_splice = peek_is_dotdot(&inner);
if is_splice || has_chain {

View File

@@ -197,10 +197,7 @@ impl<C: Clone> BuildCtx<'_, C> {
/// input, only the first is returned. For most use cases (e.g.
/// translating a single type annotation) this is what you want; if
/// you need all ids, use [`translate`] directly.
pub fn translate_opt<I: Into<Id>>(
&mut self,
id: Option<I>,
) -> Result<Option<Id>, String> {
pub fn translate_opt<I: Into<Id>>(&mut self, id: Option<I>) -> Result<Option<Id>, String> {
match id {
Some(id) => Ok(self.translate(id)?.into_iter().next()),
None => Ok(None),

View File

@@ -53,12 +53,7 @@ pub fn dump_ast_with_options(
///
/// Any node that does not match the expected type set for its parent field is
/// rendered with a trailing `" <-- ERROR: ..."` annotation on the same line.
pub fn dump_ast_with_type_errors(
ast: &Ast,
root: usize,
source: &str,
schema: &Schema,
) -> String {
pub fn dump_ast_with_type_errors(ast: &Ast, root: usize, source: &str, schema: &Schema) -> String {
dump_ast_with_type_errors_and_options(ast, root, source, schema, &DumpOptions::default())
}
@@ -74,7 +69,15 @@ pub fn dump_ast_with_type_errors_and_options(
options: &DumpOptions,
) -> String {
let mut out = String::new();
dump_node(ast, root, source, options, 0, Some((schema, None, None)), &mut out);
dump_node(
ast,
root,
source,
options,
0,
Some((schema, None, None)),
&mut out,
);
out
}
@@ -232,8 +235,8 @@ fn dump_node(
}
let field_name = ast.field_name_for_id(field_id).unwrap_or("?");
let child_type_check = type_check.map(|(schema, _, _)| {
let expected = expected_for_field(schema, node.kind_name(), field_id)
.or(Some(EMPTY_NODE_TYPES));
let expected =
expected_for_field(schema, node.kind_name(), field_id).or(Some(EMPTY_NODE_TYPES));
let parent_field = Some((node.kind_name(), field_name));
(schema, expected, parent_field)
});

View File

@@ -297,7 +297,9 @@ impl Ast {
/// Returns the source text for `id`, resolving `NodeContent::Range`
/// against the stored source bytes when available.
pub fn source_text(&self, id: Id) -> String {
let Some(node) = self.get_node(id) else { return String::new(); };
let Some(node) = self.get_node(id) else {
return String::new();
};
let read_range = |range: &tree_sitter::Range| {
let start = range.start_byte;
let end = range.end_byte;
@@ -488,7 +490,10 @@ impl Ast {
/// Prepend a child id to the given field of the given node.
pub fn prepend_field_child(&mut self, node_id: Id, field_id: FieldId, value_id: Id) {
let node = self.nodes.get_mut(node_id).expect("prepend_field_child: invalid node id");
let node = self
.nodes
.get_mut(node_id)
.expect("prepend_field_child: invalid node id");
node.fields.entry(field_id).or_default().insert(0, value_id);
}
@@ -737,12 +742,7 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> {
/// Recursively apply OneShot rules to `id` and return the resulting
/// node ids. Errors in a Repeating phase (where translation is not
/// meaningful).
pub fn translate(
&self,
ast: &mut Ast,
user_ctx: &mut C,
id: Id,
) -> Result<Vec<Id>, String> {
pub fn translate(&self, ast: &mut Ast, user_ctx: &mut C, id: Id) -> Result<Vec<Id>, String> {
match &self.inner {
TranslatorImpl::OneShot {
index,
@@ -851,9 +851,9 @@ impl<C> Rule<C> {
translator: TranslatorHandle<'_, C>,
) -> Result<Option<Vec<Id>>, String> {
match self.try_match(ast, node)? {
Some(captures) => Ok(Some(self.run_transform(
ast, captures, node, fresh, user_ctx, translator,
)?)),
Some(captures) => Ok(Some(
self.run_transform(ast, captures, node, fresh, user_ctx, translator)?,
)),
None => Ok(None),
}
}
@@ -1004,7 +1004,15 @@ fn apply_repeating_rules_inner<C: Clone>(
for children in fields.values_mut() {
let mut new_children: Option<Vec<Id>> = None;
for (i, &child_id) in children.iter().enumerate() {
let result = apply_repeating_rules_inner(index, ast, user_ctx, child_id, fresh, rewrite_depth, None)?;
let result = apply_repeating_rules_inner(
index,
ast,
user_ctx,
child_id,
fresh,
rewrite_depth,
None,
)?;
let unchanged = result.len() == 1 && result[0] == child_id;
match (&mut new_children, unchanged) {
(None, true) => {} // unchanged so far, no allocation needed
@@ -1052,7 +1060,6 @@ fn apply_one_shot_rules_inner<C: Clone>(
fresh: &tree_builder::FreshScope,
rewrite_depth: usize,
) -> Result<Vec<Id>, String> {
if rewrite_depth > MAX_REWRITE_DEPTH {
return Err(format!(
"Desugaring exceeded maximum rewrite depth ({MAX_REWRITE_DEPTH}). \
@@ -1294,8 +1301,12 @@ impl<'a, C: Clone> Runner<'a, C> {
let mut root = ast.get_root();
for phase in self.phases {
let res = match phase.kind {
PhaseKind::Repeating => apply_repeating_rules(&phase.rules, ast, user_ctx, root, &fresh),
PhaseKind::OneShot => apply_one_shot_rules(&phase.rules, ast, user_ctx, root, &fresh),
PhaseKind::Repeating => {
apply_repeating_rules(&phase.rules, ast, user_ctx, root, &fresh)
}
PhaseKind::OneShot => {
apply_one_shot_rules(&phase.rules, ast, user_ctx, root, &fresh)
}
}
.map_err(|e| format!("Phase `{}`: {e}", phase.name))?;
if res.len() != 1 {
@@ -1315,11 +1326,7 @@ impl<'a, C: Clone> Runner<'a, C> {
impl<'a, C: Clone + Default> Runner<'a, C> {
/// Parse `tree` against `source` and run all phases, using the
/// default context (`C::default()`) as the initial context state.
pub fn run_from_tree(
&self,
tree: &tree_sitter::Tree,
source: &[u8],
) -> Result<Ast, String> {
pub fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String> {
let mut user_ctx = C::default();
self.run_from_tree_with_ctx(tree, source, &mut user_ctx)
}
@@ -1351,8 +1358,7 @@ pub trait Desugarer: Send + Sync {
/// Parse `tree` against `source` and run the desugaring pipeline.
/// Each call constructs a fresh default user context internally.
fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8])
-> Result<Ast, String>;
fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String>;
}
/// A concrete [`Desugarer`] backed by a [`DesugaringConfig<C>`] for a
@@ -1386,13 +1392,8 @@ impl<C: Default + Clone + Send + Sync + 'static> Desugarer for ConcreteDesugarer
self.config.output_node_types_yaml
}
fn run_from_tree(
&self,
tree: &tree_sitter::Tree,
source: &[u8],
) -> Result<Ast, String> {
let runner =
Runner::with_schema(self.language.clone(), &self.schema, &self.config.phases);
fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result<Ast, String> {
let runner = Runner::with_schema(self.language.clone(), &self.schema, &self.config.phases);
runner.run_from_tree(tree, source)
}
}

View File

@@ -242,10 +242,7 @@ pub fn convert(yaml_input: &str) -> Result<String, String> {
/// Apply YAML node-type definitions to a mutable Schema.
/// Registers all types, fields, and allowed types from the YAML into the schema.
fn apply_yaml_to_schema(
yaml: &YamlNodeTypes,
schema: &mut crate::schema::Schema,
) {
fn apply_yaml_to_schema(yaml: &YamlNodeTypes, schema: &mut crate::schema::Schema) {
// Register all supertypes as node kinds
for name in yaml.supertypes.keys() {
schema.register_kind(name);
@@ -307,7 +304,8 @@ fn apply_yaml_to_schema(
.into_vec()
.into_iter()
.map(|type_ref| {
let (kind, named) = resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types);
let (kind, named) =
resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types);
crate::schema::NodeType { kind, named }
})
.collect::<Vec<_>>();

View File

@@ -198,13 +198,8 @@ impl Schema {
.insert((parent_kind.to_string(), field_id), node_types);
}
pub fn field_types(
&self,
parent_kind: &str,
field_id: FieldId,
) -> Option<&Vec<NodeType>> {
self.field_types
.get(&(parent_kind.to_string(), field_id))
pub fn field_types(&self, parent_kind: &str, field_id: FieldId) -> Option<&Vec<NodeType>> {
self.field_types.get(&(parent_kind.to_string(), field_id))
}
pub fn set_field_cardinality(

View File

@@ -66,8 +66,8 @@ fn parse_and_dump_typed_with_language(input: &str, schema_yaml: &str) -> String
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let runner: Runner = Runner::new(lang.clone(), &[]);
let ast = runner.run(input).unwrap();
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang)
.unwrap();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang).unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
}
@@ -166,7 +166,7 @@ fn test_parse_for_loop() {
#[test]
fn test_dump_highlights_type_errors_inline() {
let schema_yaml = r#"
let schema_yaml = r#"
named:
program:
$children*: assignment
@@ -176,13 +176,13 @@ named:
identifier:
"#;
let dump = parse_and_dump_typed("x = 1", schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
let dump = parse_and_dump_typed("x = 1", schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
}
#[test]
fn test_dump_reports_preserved_unknown_kind_after_transformation() {
let schema_yaml = r#"
let schema_yaml = r#"
named:
program:
$children*: assignment
@@ -192,25 +192,25 @@ named:
identifier:
"#;
// This rewrite runs and preserves the RHS node kind via capture.
// With schema above, preserving `integer` should be reported inline.
// This rewrite runs and preserves the RHS node kind via capture.
// With schema above, preserving `integer` should be reported inline.
let rules: Vec<Rule> = vec![yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
(assignment
left: {left}
right: {right}
)
)];
(assignment left: (_) @left right: (_) @right)
=>
(assignment
left: {left}
right: {right}
)
)];
let dump = run_and_dump_typed("x = 1", rules, schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
assert!(dump.contains("node kind 'integer' not in schema"));
let dump = run_and_dump_typed("x = 1", rules, schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
assert!(dump.contains("node kind 'integer' not in schema"));
}
#[test]
fn test_dump_reports_undeclared_field_on_node() {
let schema_yaml = r#"
let schema_yaml = r#"
named:
program:
$children*: assignment
@@ -219,14 +219,14 @@ named:
identifier:
"#;
let dump = parse_and_dump_typed_with_language("x = y", schema_yaml);
assert!(dump.contains("right: identifier \"y\" <-- ERROR:"));
assert!(dump.contains("the node 'assignment' has no field 'right'"));
let dump = parse_and_dump_typed_with_language("x = y", schema_yaml);
assert!(dump.contains("right: identifier \"y\" <-- ERROR:"));
assert!(dump.contains("the node 'assignment' has no field 'right'"));
}
#[test]
fn test_dump_reports_disallowed_kind_in_field_type() {
let schema_yaml = r#"
let schema_yaml = r#"
named:
program:
$children*: assignment
@@ -237,10 +237,10 @@ named:
integer:
"#;
let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml);
assert!(dump.contains("right: integer \"1\" <-- ERROR:"));
assert!(dump.contains("should contain"));
assert!(dump.contains("but got integer"));
let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml);
assert!(dump.contains("right: integer \"1\" <-- ERROR:"));
assert!(dump.contains("should contain"));
assert!(dump.contains("but got integer"));
}
// ---- Query tests ----
@@ -309,15 +309,11 @@ fn test_query_skips_extras_in_positional_match() {
let matched = query.do_match(&ast, array_id, &mut captures).unwrap();
assert!(matched);
assert_eq!(
ast.get_node(captures.get_var("a").unwrap())
.unwrap()
.kind(),
ast.get_node(captures.get_var("a").unwrap()).unwrap().kind(),
"integer"
);
assert_eq!(
ast.get_node(captures.get_var("b").unwrap())
.unwrap()
.kind(),
ast.get_node(captures.get_var("b").unwrap()).unwrap().kind(),
"integer"
);
}
@@ -325,8 +321,8 @@ fn test_query_skips_extras_in_positional_match() {
#[test]
fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang)
.unwrap();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases: Vec<Phase> = vec![Phase::new(
"test",
PhaseKind::Repeating,
@@ -1205,7 +1201,9 @@ fn test_hash_brace_uses_capture_location_for_leaf() {
let mut bar_ids: Vec<usize> = Vec::new();
for id in ast.reachable_node_ids() {
let Some(node) = ast.get_node(id) else { continue; };
let Some(node) = ast.get_node(id) else {
continue;
};
if node.kind() == "identifier" && ast.source_text(id) == "bar" {
bar_ids.push(id);
}

View File

@@ -1,9 +1,9 @@
use clap::Args;
use std::path::PathBuf;
use crate::languages;
use codeql_extractor::extractor::simple;
use codeql_extractor::trap;
use crate::languages;
#[derive(Args)]
pub struct Options {
@@ -35,7 +35,9 @@ pub fn run(options: Options) -> std::io::Result<()> {
prefix: "unified".to_string(),
languages,
trap_dir: options.output_dir,
trap_compression: trap::Compression::from_env("CODEQL_EXTRACTOR_UNIFIED_OPTION_TRAP_COMPRESSION"),
trap_compression: trap::Compression::from_env(
"CODEQL_EXTRACTOR_UNIFIED_OPTION_TRAP_COMPRESSION",
),
source_archive_dir: options.source_archive_dir,
file_lists: vec![options.file_list],
};

View File

@@ -22,14 +22,19 @@ pub fn run(options: Options) -> std::io::Result<()> {
// The QL-visible schema is the unified output AST, not the per-language
// input grammars. Pass it via `desugar.output_node_types_yaml` so the
// generator converts the YAML to JSON node-types.
let desugar = yeast::DesugaringConfig::new()
.with_output_node_types_yaml(languages::OUTPUT_AST_SCHEMA);
let desugar =
yeast::DesugaringConfig::new().with_output_node_types_yaml(languages::OUTPUT_AST_SCHEMA);
let languages = vec![Language {
name: "Unified".to_owned(),
node_types: "", // unused: generator picks up output_node_types_yaml above
node_types: "", // unused: generator picks up output_node_types_yaml above
desugar: Some(desugar),
}];
generate(languages, options.dbscheme, options.library, "run unified/scripts/create-extractor-pack.sh")
generate(
languages,
options.dbscheme,
options.library,
"run unified/scripts/create-extractor-pack.sh",
)
}

View File

@@ -1,5 +1,5 @@
use codeql_extractor::extractor::simple;
use yeast::{manual_rule, rule, tree, ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule};
use yeast::{ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule, manual_rule, rule, tree};
/// User context propagated from outer rules down to the inner rules that
/// emit the corresponding output declarations, so that each emitted node
@@ -82,11 +82,14 @@ fn member_chain(
parts: Vec<yeast::NodeRef>,
) -> yeast::Id {
let mut iter = parts.into_iter();
let first = iter.next().expect("identifier with `part:` must have at least one part");
let first = iter
.next()
.expect("identifier with `part:` must have at least one part");
let init = tree!((name_expr identifier: (identifier #{first})));
iter.fold(init, |acc, elem| {
tree!((member_access_expr base: {acc} member: (identifier #{elem})))
})
iter.fold(
init,
|acc, elem| tree!((member_access_expr base: {acc} member: (identifier #{elem}))),
)
}
fn translation_rules() -> Vec<Rule<SwiftContext>> {

View File

@@ -2,7 +2,7 @@ use std::fs;
use std::path::Path;
use codeql_extractor::extractor::simple;
use yeast::{dump::dump_ast, dump::dump_ast_with_type_errors, Runner};
use yeast::{Runner, dump::dump_ast, dump::dump_ast_with_type_errors};
#[path = "../src/languages/mod.rs"]
mod languages;
@@ -146,10 +146,7 @@ fn render_corpus(cases: &[CorpusCase]) -> String {
out
}
fn run_desugaring(
lang: &simple::LanguageSpec,
input: &str,
) -> Result<yeast::Ast, String> {
fn run_desugaring(lang: &simple::LanguageSpec, input: &str) -> Result<yeast::Ast, String> {
match lang.desugar.as_deref() {
Some(desugarer) => {
// Parse the input ourselves so we don't depend on the desugarer
@@ -177,10 +174,7 @@ fn run_desugaring(
/// Produce the raw tree-sitter parse tree dump for `input`, with no
/// desugaring rules applied. Uses a `Runner` with an empty phase list and
/// the input grammar's own schema.
fn dump_raw_parse(
lang: &simple::LanguageSpec,
input: &str,
) -> Result<String, String> {
fn dump_raw_parse(lang: &simple::LanguageSpec, input: &str) -> Result<String, String> {
let runner: Runner = Runner::new(lang.ts_language.clone(), &[]);
let ast = runner
.run(input)
@@ -285,11 +279,7 @@ fn test_corpus() {
}
}
assert!(
failures.is_empty(),
"{}",
failures.join("\n\n") + "\n\n"
);
assert!(failures.is_empty(), "{}", failures.join("\n\n") + "\n\n");
if update_mode {
let updated = render_corpus(&cases);
@@ -298,7 +288,9 @@ fn test_corpus() {
write_result.is_ok(),
"Failed to update corpus file {}: {}",
corpus_path.display(),
write_result.err().map_or_else(String::new, |e| e.to_string())
write_result
.err()
.map_or_else(String::new, |e| e.to_string())
);
}
}