diff --git a/shared/tree-sitter-extractor/src/extractor/simple.rs b/shared/tree-sitter-extractor/src/extractor/simple.rs index 55bf28a3ac2..9ba6f21778c 100644 --- a/shared/tree-sitter-extractor/src/extractor/simple.rs +++ b/shared/tree-sitter-extractor/src/extractor/simple.rs @@ -95,16 +95,19 @@ impl Extractor { let mut schemas = vec![]; for lang in &self.languages { - let effective_node_types: String = - match lang.desugar.as_ref().and_then(|d| d.output_node_types_yaml()) { - Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| { - std::io::Error::other(format!( - "Failed to convert YAML node-types to JSON for {}: {e}", - lang.prefix - )) - })?, - None => lang.node_types.to_string(), - }; + let effective_node_types: String = match lang + .desugar + .as_ref() + .and_then(|d| d.output_node_types_yaml()) + { + Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| { + std::io::Error::other(format!( + "Failed to convert YAML node-types to JSON for {}: {e}", + lang.prefix + )) + })?, + None => lang.node_types.to_string(), + }; let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?; schemas.push(schema); } diff --git a/shared/yeast-macros/src/parse.rs b/shared/yeast-macros/src/parse.rs index 3c1d4d44bec..fc6031eb39d 100644 --- a/shared/yeast-macros/src/parse.rs +++ b/shared/yeast-macros/src/parse.rs @@ -121,9 +121,9 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result> { std::collections::HashMap::new(); let mut bare_children: Vec = Vec::new(); let push_field_elem = |order: &mut Vec, - map: &mut std::collections::HashMap>, - name: String, - elem: TokenStream| { + map: &mut std::collections::HashMap>, + name: String, + elem: TokenStream| { if !map.contains_key(&name) { order.push(name.clone()); map.insert(name, vec![elem]); @@ -160,8 +160,7 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result> { } else { let child = if peek_is_at(tokens) { tokens.next(); - let capture_name = - expect_ident(tokens, "expected capture name after @")?; + let capture_name = expect_ident(tokens, "expected capture name after @")?; let name_str = capture_name.to_string(); quote! { yeast::query::QueryNode::Capture { @@ -420,7 +419,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result Result Result Result { +fn parse_chain_suffix(tokens: &mut Tokens, ctx: &Ident, base: TokenStream) -> Result { let mut current = base; while matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '.') { tokens.next(); // consume . @@ -608,7 +608,8 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result BuildCtx<'_, C> { /// input, only the first is returned. For most use cases (e.g. /// translating a single type annotation) this is what you want; if /// you need all ids, use [`translate`] directly. - pub fn translate_opt>( - &mut self, - id: Option, - ) -> Result, String> { + pub fn translate_opt>(&mut self, id: Option) -> Result, String> { match id { Some(id) => Ok(self.translate(id)?.into_iter().next()), None => Ok(None), diff --git a/shared/yeast/src/dump.rs b/shared/yeast/src/dump.rs index d046c192053..be496d40bd5 100644 --- a/shared/yeast/src/dump.rs +++ b/shared/yeast/src/dump.rs @@ -53,12 +53,7 @@ pub fn dump_ast_with_options( /// /// Any node that does not match the expected type set for its parent field is /// rendered with a trailing `" <-- ERROR: ..."` annotation on the same line. -pub fn dump_ast_with_type_errors( - ast: &Ast, - root: usize, - source: &str, - schema: &Schema, -) -> String { +pub fn dump_ast_with_type_errors(ast: &Ast, root: usize, source: &str, schema: &Schema) -> String { dump_ast_with_type_errors_and_options(ast, root, source, schema, &DumpOptions::default()) } @@ -74,7 +69,15 @@ pub fn dump_ast_with_type_errors_and_options( options: &DumpOptions, ) -> String { let mut out = String::new(); - dump_node(ast, root, source, options, 0, Some((schema, None, None)), &mut out); + dump_node( + ast, + root, + source, + options, + 0, + Some((schema, None, None)), + &mut out, + ); out } @@ -232,8 +235,8 @@ fn dump_node( } let field_name = ast.field_name_for_id(field_id).unwrap_or("?"); let child_type_check = type_check.map(|(schema, _, _)| { - let expected = expected_for_field(schema, node.kind_name(), field_id) - .or(Some(EMPTY_NODE_TYPES)); + let expected = + expected_for_field(schema, node.kind_name(), field_id).or(Some(EMPTY_NODE_TYPES)); let parent_field = Some((node.kind_name(), field_name)); (schema, expected, parent_field) }); diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs index 84337a1482c..e0fffc551f3 100644 --- a/shared/yeast/src/lib.rs +++ b/shared/yeast/src/lib.rs @@ -297,7 +297,9 @@ impl Ast { /// Returns the source text for `id`, resolving `NodeContent::Range` /// against the stored source bytes when available. pub fn source_text(&self, id: Id) -> String { - let Some(node) = self.get_node(id) else { return String::new(); }; + let Some(node) = self.get_node(id) else { + return String::new(); + }; let read_range = |range: &tree_sitter::Range| { let start = range.start_byte; let end = range.end_byte; @@ -488,7 +490,10 @@ impl Ast { /// Prepend a child id to the given field of the given node. pub fn prepend_field_child(&mut self, node_id: Id, field_id: FieldId, value_id: Id) { - let node = self.nodes.get_mut(node_id).expect("prepend_field_child: invalid node id"); + let node = self + .nodes + .get_mut(node_id) + .expect("prepend_field_child: invalid node id"); node.fields.entry(field_id).or_default().insert(0, value_id); } @@ -737,12 +742,7 @@ impl<'a, C: Clone> TranslatorHandle<'a, C> { /// Recursively apply OneShot rules to `id` and return the resulting /// node ids. Errors in a Repeating phase (where translation is not /// meaningful). - pub fn translate( - &self, - ast: &mut Ast, - user_ctx: &mut C, - id: Id, - ) -> Result, String> { + pub fn translate(&self, ast: &mut Ast, user_ctx: &mut C, id: Id) -> Result, String> { match &self.inner { TranslatorImpl::OneShot { index, @@ -851,9 +851,9 @@ impl Rule { translator: TranslatorHandle<'_, C>, ) -> Result>, String> { match self.try_match(ast, node)? { - Some(captures) => Ok(Some(self.run_transform( - ast, captures, node, fresh, user_ctx, translator, - )?)), + Some(captures) => Ok(Some( + self.run_transform(ast, captures, node, fresh, user_ctx, translator)?, + )), None => Ok(None), } } @@ -1004,7 +1004,15 @@ fn apply_repeating_rules_inner( for children in fields.values_mut() { let mut new_children: Option> = None; for (i, &child_id) in children.iter().enumerate() { - let result = apply_repeating_rules_inner(index, ast, user_ctx, child_id, fresh, rewrite_depth, None)?; + let result = apply_repeating_rules_inner( + index, + ast, + user_ctx, + child_id, + fresh, + rewrite_depth, + None, + )?; let unchanged = result.len() == 1 && result[0] == child_id; match (&mut new_children, unchanged) { (None, true) => {} // unchanged so far, no allocation needed @@ -1052,7 +1060,6 @@ fn apply_one_shot_rules_inner( fresh: &tree_builder::FreshScope, rewrite_depth: usize, ) -> Result, String> { - if rewrite_depth > MAX_REWRITE_DEPTH { return Err(format!( "Desugaring exceeded maximum rewrite depth ({MAX_REWRITE_DEPTH}). \ @@ -1294,8 +1301,12 @@ impl<'a, C: Clone> Runner<'a, C> { let mut root = ast.get_root(); for phase in self.phases { let res = match phase.kind { - PhaseKind::Repeating => apply_repeating_rules(&phase.rules, ast, user_ctx, root, &fresh), - PhaseKind::OneShot => apply_one_shot_rules(&phase.rules, ast, user_ctx, root, &fresh), + PhaseKind::Repeating => { + apply_repeating_rules(&phase.rules, ast, user_ctx, root, &fresh) + } + PhaseKind::OneShot => { + apply_one_shot_rules(&phase.rules, ast, user_ctx, root, &fresh) + } } .map_err(|e| format!("Phase `{}`: {e}", phase.name))?; if res.len() != 1 { @@ -1315,11 +1326,7 @@ impl<'a, C: Clone> Runner<'a, C> { impl<'a, C: Clone + Default> Runner<'a, C> { /// Parse `tree` against `source` and run all phases, using the /// default context (`C::default()`) as the initial context state. - pub fn run_from_tree( - &self, - tree: &tree_sitter::Tree, - source: &[u8], - ) -> Result { + pub fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result { let mut user_ctx = C::default(); self.run_from_tree_with_ctx(tree, source, &mut user_ctx) } @@ -1351,8 +1358,7 @@ pub trait Desugarer: Send + Sync { /// Parse `tree` against `source` and run the desugaring pipeline. /// Each call constructs a fresh default user context internally. - fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) - -> Result; + fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result; } /// A concrete [`Desugarer`] backed by a [`DesugaringConfig`] for a @@ -1386,13 +1392,8 @@ impl Desugarer for ConcreteDesugarer self.config.output_node_types_yaml } - fn run_from_tree( - &self, - tree: &tree_sitter::Tree, - source: &[u8], - ) -> Result { - let runner = - Runner::with_schema(self.language.clone(), &self.schema, &self.config.phases); + fn run_from_tree(&self, tree: &tree_sitter::Tree, source: &[u8]) -> Result { + let runner = Runner::with_schema(self.language.clone(), &self.schema, &self.config.phases); runner.run_from_tree(tree, source) } } diff --git a/shared/yeast/src/node_types_yaml.rs b/shared/yeast/src/node_types_yaml.rs index 797f14cba72..f4d9f2a1c42 100644 --- a/shared/yeast/src/node_types_yaml.rs +++ b/shared/yeast/src/node_types_yaml.rs @@ -242,10 +242,7 @@ pub fn convert(yaml_input: &str) -> Result { /// Apply YAML node-type definitions to a mutable Schema. /// Registers all types, fields, and allowed types from the YAML into the schema. -fn apply_yaml_to_schema( - yaml: &YamlNodeTypes, - schema: &mut crate::schema::Schema, -) { +fn apply_yaml_to_schema(yaml: &YamlNodeTypes, schema: &mut crate::schema::Schema) { // Register all supertypes as node kinds for name in yaml.supertypes.keys() { schema.register_kind(name); @@ -307,7 +304,8 @@ fn apply_yaml_to_schema( .into_vec() .into_iter() .map(|type_ref| { - let (kind, named) = resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types); + let (kind, named) = + resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types); crate::schema::NodeType { kind, named } }) .collect::>(); diff --git a/shared/yeast/src/schema.rs b/shared/yeast/src/schema.rs index bbd425f15a2..da13bb8b6b7 100644 --- a/shared/yeast/src/schema.rs +++ b/shared/yeast/src/schema.rs @@ -198,13 +198,8 @@ impl Schema { .insert((parent_kind.to_string(), field_id), node_types); } - pub fn field_types( - &self, - parent_kind: &str, - field_id: FieldId, - ) -> Option<&Vec> { - self.field_types - .get(&(parent_kind.to_string(), field_id)) + pub fn field_types(&self, parent_kind: &str, field_id: FieldId) -> Option<&Vec> { + self.field_types.get(&(parent_kind.to_string(), field_id)) } pub fn set_field_cardinality( diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs index 308c72b725f..99471f129ab 100644 --- a/shared/yeast/tests/test.rs +++ b/shared/yeast/tests/test.rs @@ -66,8 +66,8 @@ fn parse_and_dump_typed_with_language(input: &str, schema_yaml: &str) -> String let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); let runner: Runner = Runner::new(lang.clone(), &[]); let ast = runner.run(input).unwrap(); - let schema = yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang) - .unwrap(); + let schema = + yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang).unwrap(); dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema) } @@ -166,7 +166,7 @@ fn test_parse_for_loop() { #[test] fn test_dump_highlights_type_errors_inline() { - let schema_yaml = r#" + let schema_yaml = r#" named: program: $children*: assignment @@ -176,13 +176,13 @@ named: identifier: "#; - let dump = parse_and_dump_typed("x = 1", schema_yaml); - assert!(dump.contains("integer \"1\" <-- ERROR:")); + let dump = parse_and_dump_typed("x = 1", schema_yaml); + assert!(dump.contains("integer \"1\" <-- ERROR:")); } #[test] fn test_dump_reports_preserved_unknown_kind_after_transformation() { - let schema_yaml = r#" + let schema_yaml = r#" named: program: $children*: assignment @@ -192,25 +192,25 @@ named: identifier: "#; - // This rewrite runs and preserves the RHS node kind via capture. - // With schema above, preserving `integer` should be reported inline. + // This rewrite runs and preserves the RHS node kind via capture. + // With schema above, preserving `integer` should be reported inline. let rules: Vec = vec![yeast::rule!( - (assignment left: (_) @left right: (_) @right) - => - (assignment - left: {left} - right: {right} - ) - )]; + (assignment left: (_) @left right: (_) @right) + => + (assignment + left: {left} + right: {right} + ) + )]; - let dump = run_and_dump_typed("x = 1", rules, schema_yaml); - assert!(dump.contains("integer \"1\" <-- ERROR:")); - assert!(dump.contains("node kind 'integer' not in schema")); + let dump = run_and_dump_typed("x = 1", rules, schema_yaml); + assert!(dump.contains("integer \"1\" <-- ERROR:")); + assert!(dump.contains("node kind 'integer' not in schema")); } #[test] fn test_dump_reports_undeclared_field_on_node() { - let schema_yaml = r#" + let schema_yaml = r#" named: program: $children*: assignment @@ -219,14 +219,14 @@ named: identifier: "#; - let dump = parse_and_dump_typed_with_language("x = y", schema_yaml); - assert!(dump.contains("right: identifier \"y\" <-- ERROR:")); - assert!(dump.contains("the node 'assignment' has no field 'right'")); + let dump = parse_and_dump_typed_with_language("x = y", schema_yaml); + assert!(dump.contains("right: identifier \"y\" <-- ERROR:")); + assert!(dump.contains("the node 'assignment' has no field 'right'")); } #[test] fn test_dump_reports_disallowed_kind_in_field_type() { - let schema_yaml = r#" + let schema_yaml = r#" named: program: $children*: assignment @@ -237,10 +237,10 @@ named: integer: "#; - let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml); - assert!(dump.contains("right: integer \"1\" <-- ERROR:")); - assert!(dump.contains("should contain")); - assert!(dump.contains("but got integer")); + let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml); + assert!(dump.contains("right: integer \"1\" <-- ERROR:")); + assert!(dump.contains("should contain")); + assert!(dump.contains("but got integer")); } // ---- Query tests ---- @@ -309,15 +309,11 @@ fn test_query_skips_extras_in_positional_match() { let matched = query.do_match(&ast, array_id, &mut captures).unwrap(); assert!(matched); assert_eq!( - ast.get_node(captures.get_var("a").unwrap()) - .unwrap() - .kind(), + ast.get_node(captures.get_var("a").unwrap()).unwrap().kind(), "integer" ); assert_eq!( - ast.get_node(captures.get_var("b").unwrap()) - .unwrap() - .kind(), + ast.get_node(captures.get_var("b").unwrap()).unwrap().kind(), "integer" ); } @@ -325,8 +321,8 @@ fn test_query_skips_extras_in_positional_match() { #[test] fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() { let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); - let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang) - .unwrap(); + let schema = + yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); let phases: Vec = vec![Phase::new( "test", PhaseKind::Repeating, @@ -1205,7 +1201,9 @@ fn test_hash_brace_uses_capture_location_for_leaf() { let mut bar_ids: Vec = Vec::new(); for id in ast.reachable_node_ids() { - let Some(node) = ast.get_node(id) else { continue; }; + let Some(node) = ast.get_node(id) else { + continue; + }; if node.kind() == "identifier" && ast.source_text(id) == "bar" { bar_ids.push(id); } diff --git a/unified/extractor/src/extractor.rs b/unified/extractor/src/extractor.rs index 7601fa8addb..301c6cf533f 100644 --- a/unified/extractor/src/extractor.rs +++ b/unified/extractor/src/extractor.rs @@ -1,9 +1,9 @@ use clap::Args; use std::path::PathBuf; +use crate::languages; use codeql_extractor::extractor::simple; use codeql_extractor::trap; -use crate::languages; #[derive(Args)] pub struct Options { @@ -35,7 +35,9 @@ pub fn run(options: Options) -> std::io::Result<()> { prefix: "unified".to_string(), languages, trap_dir: options.output_dir, - trap_compression: trap::Compression::from_env("CODEQL_EXTRACTOR_UNIFIED_OPTION_TRAP_COMPRESSION"), + trap_compression: trap::Compression::from_env( + "CODEQL_EXTRACTOR_UNIFIED_OPTION_TRAP_COMPRESSION", + ), source_archive_dir: options.source_archive_dir, file_lists: vec![options.file_list], }; diff --git a/unified/extractor/src/generator.rs b/unified/extractor/src/generator.rs index cbf971a8ff2..974de5dbca9 100644 --- a/unified/extractor/src/generator.rs +++ b/unified/extractor/src/generator.rs @@ -22,14 +22,19 @@ pub fn run(options: Options) -> std::io::Result<()> { // The QL-visible schema is the unified output AST, not the per-language // input grammars. Pass it via `desugar.output_node_types_yaml` so the // generator converts the YAML to JSON node-types. - let desugar = yeast::DesugaringConfig::new() - .with_output_node_types_yaml(languages::OUTPUT_AST_SCHEMA); + let desugar = + yeast::DesugaringConfig::new().with_output_node_types_yaml(languages::OUTPUT_AST_SCHEMA); let languages = vec![Language { name: "Unified".to_owned(), - node_types: "", // unused: generator picks up output_node_types_yaml above + node_types: "", // unused: generator picks up output_node_types_yaml above desugar: Some(desugar), }]; - generate(languages, options.dbscheme, options.library, "run unified/scripts/create-extractor-pack.sh") + generate( + languages, + options.dbscheme, + options.library, + "run unified/scripts/create-extractor-pack.sh", + ) } diff --git a/unified/extractor/src/languages/swift/swift.rs b/unified/extractor/src/languages/swift/swift.rs index 7820d81e29f..c84e3cf3867 100644 --- a/unified/extractor/src/languages/swift/swift.rs +++ b/unified/extractor/src/languages/swift/swift.rs @@ -1,5 +1,5 @@ use codeql_extractor::extractor::simple; -use yeast::{manual_rule, rule, tree, ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule}; +use yeast::{ConcreteDesugarer, DesugaringConfig, PhaseKind, Rule, manual_rule, rule, tree}; /// User context propagated from outer rules down to the inner rules that /// emit the corresponding output declarations, so that each emitted node @@ -82,11 +82,14 @@ fn member_chain( parts: Vec, ) -> yeast::Id { let mut iter = parts.into_iter(); - let first = iter.next().expect("identifier with `part:` must have at least one part"); + let first = iter + .next() + .expect("identifier with `part:` must have at least one part"); let init = tree!((name_expr identifier: (identifier #{first}))); - iter.fold(init, |acc, elem| { - tree!((member_access_expr base: {acc} member: (identifier #{elem}))) - }) + iter.fold( + init, + |acc, elem| tree!((member_access_expr base: {acc} member: (identifier #{elem}))), + ) } fn translation_rules() -> Vec> { diff --git a/unified/extractor/tests/corpus_tests.rs b/unified/extractor/tests/corpus_tests.rs index e2a0fe17a4c..6c859c2f6cf 100644 --- a/unified/extractor/tests/corpus_tests.rs +++ b/unified/extractor/tests/corpus_tests.rs @@ -2,7 +2,7 @@ use std::fs; use std::path::Path; use codeql_extractor::extractor::simple; -use yeast::{dump::dump_ast, dump::dump_ast_with_type_errors, Runner}; +use yeast::{Runner, dump::dump_ast, dump::dump_ast_with_type_errors}; #[path = "../src/languages/mod.rs"] mod languages; @@ -146,10 +146,7 @@ fn render_corpus(cases: &[CorpusCase]) -> String { out } -fn run_desugaring( - lang: &simple::LanguageSpec, - input: &str, -) -> Result { +fn run_desugaring(lang: &simple::LanguageSpec, input: &str) -> Result { match lang.desugar.as_deref() { Some(desugarer) => { // Parse the input ourselves so we don't depend on the desugarer @@ -177,10 +174,7 @@ fn run_desugaring( /// Produce the raw tree-sitter parse tree dump for `input`, with no /// desugaring rules applied. Uses a `Runner` with an empty phase list and /// the input grammar's own schema. -fn dump_raw_parse( - lang: &simple::LanguageSpec, - input: &str, -) -> Result { +fn dump_raw_parse(lang: &simple::LanguageSpec, input: &str) -> Result { let runner: Runner = Runner::new(lang.ts_language.clone(), &[]); let ast = runner .run(input) @@ -285,11 +279,7 @@ fn test_corpus() { } } - assert!( - failures.is_empty(), - "{}", - failures.join("\n\n") + "\n\n" - ); + assert!(failures.is_empty(), "{}", failures.join("\n\n") + "\n\n"); if update_mode { let updated = render_corpus(&cases); @@ -298,7 +288,9 @@ fn test_corpus() { write_result.is_ok(), "Failed to update corpus file {}: {}", corpus_path.display(), - write_result.err().map_or_else(String::new, |e| e.to_string()) + write_result + .err() + .map_or_else(String::new, |e| e.to_string()) ); } }