diff --git a/Cargo.lock b/Cargo.lock index c7310c07515..a0d31f77ad8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -382,6 +382,7 @@ dependencies = [ "ra_ap_hir_def", "ra_ap_ide_db", "ra_ap_load-cargo", + "ra_ap_parser", "ra_ap_paths", "ra_ap_project_model", "ra_ap_syntax", diff --git a/MODULE.bazel b/MODULE.bazel index b539dee7874..d1ff8b0b8d2 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -61,6 +61,7 @@ r.from_cargo( "//ruby/extractor:Cargo.toml", "//rust/extractor:Cargo.toml", "//rust/extractor/macros:Cargo.toml", + "//rust/generate-schema:Cargo.toml", "//shared/tree-sitter-extractor:Cargo.toml", ], ) diff --git a/rust/extractor/Cargo.toml b/rust/extractor/Cargo.toml index 550c3b51441..71ad6d8ac45 100644 --- a/rust/extractor/Cargo.toml +++ b/rust/extractor/Cargo.toml @@ -18,6 +18,7 @@ ra_ap_paths = "0.0.232" ra_ap_project_model = "0.0.232" ra_ap_syntax = "0.0.232" ra_ap_vfs = "0.0.232" +ra_ap_parser = "0.0.232" serde = "1.0.209" serde_with = "3.9.0" stderrlog = "0.6.0" diff --git a/rust/extractor/src/main.rs b/rust/extractor/src/main.rs index 77c15d994a8..16042145662 100644 --- a/rust/extractor/src/main.rs +++ b/rust/extractor/src/main.rs @@ -1,37 +1,11 @@ -use crate::trap::TrapId; use anyhow::Context; -use itertools::Itertools; -use log::info; -use ra_ap_hir::db::DefDatabase; -use ra_ap_hir::Crate; -use ra_ap_load_cargo::{load_workspace_at, LoadCargoConfig, ProcMacroServerChoice}; -use ra_ap_project_model::CargoConfig; -use ra_ap_project_model::RustLibSource; -use ra_ap_vfs::AbsPathBuf; -use std::path::PathBuf; - +use ra_ap_ide_db::line_index::LineIndex; mod archive; mod config; pub mod generated; mod translate; pub mod trap; -fn find_project_manifests( - files: &[PathBuf], -) -> anyhow::Result> { - let current = std::env::current_dir()?; - let abs_files: Vec<_> = files - .iter() - .map(|path| AbsPathBuf::assert_utf8(current.join(path))) - .collect(); - let ret = ra_ap_project_model::ProjectManifest::discover_all(&abs_files); - info!( - "found manifests: {}", - ret.iter().map(|m| format!("{m}")).join(", ") - ); - Ok(ret) -} - fn main() -> anyhow::Result<()> { let cfg = config::Config::extract().context("failed to load configuration")?; stderrlog::new() @@ -43,52 +17,20 @@ fn main() -> anyhow::Result<()> { let archiver = archive::Archiver { root: cfg.source_archive_dir, }; - - let config = CargoConfig { - sysroot: Some(RustLibSource::Discover), - target_dir: ra_ap_paths::Utf8PathBuf::from_path_buf(cfg.scratch_dir) - .map(|x| x.join("target")) - .ok(), - ..Default::default() - }; - let progress = |t| (log::info!("progress: {}", t)); - let load_config = LoadCargoConfig { - load_out_dirs_from_check: true, - with_proc_macro_server: ProcMacroServerChoice::Sysroot, - prefill_caches: false, - }; - let projects = find_project_manifests(&cfg.inputs).context("loading inputs")?; - for project in projects { - let (db, vfs, _macro_server) = load_workspace_at( - project.manifest_path().as_ref(), - &config, - &load_config, - &progress, - )?; - - let crates = ::crate_graph(&db); - for crate_id in crates.iter() { - let krate = Crate::from(crate_id); - if !cfg.extract_dependencies && !krate.origin(&db).is_local() { - continue; - } - let name = krate.display_name(&db); - let crate_name = name - .as_ref() - .map(|n| n.canonical_name().as_str()) - .unwrap_or(""); - let trap = traps.create( - "crates", - &PathBuf::from(format!( - "/{}_{}", - crate_name, - crate_id.into_raw().into_u32() - )), - ); - translate::CrateTranslator::new(&db, trap, &krate, &vfs, &archiver) - .emit_crate() - .context("writing trap file")?; - } + for file in cfg.inputs { + let file = std::path::absolute(&file).unwrap_or(file); + let file = std::fs::canonicalize(&file).unwrap_or(file); + archiver.archive(&file); + let input = std::fs::read(&file)?; + let input = String::from_utf8(input)?; + let line_index = LineIndex::new(&input); + let display_path = file.to_string_lossy(); + let mut trap = traps.create("source", &file); + let label = trap.emit_file(&file); + translate::SourceFileTranslator::new(trap, label, line_index) + .extract(&display_path, &input) + .context("writing trap file")?; } + Ok(()) } diff --git a/rust/generate-schema/BUILD.bazel b/rust/generate-schema/BUILD.bazel new file mode 100644 index 00000000000..2a1be1d2c1a --- /dev/null +++ b/rust/generate-schema/BUILD.bazel @@ -0,0 +1,14 @@ +load("//misc/bazel:rust.bzl", "codeql_rust_binary") + +codeql_rust_binary( + name = "generate-schema", + srcs = glob(["src/**/*.rs"]), + aliases = aliases(), + proc_macro_deps = all_crate_deps( + proc_macro = True, + ), + visibility = ["//rust:__subpackages__"], + deps = all_crate_deps( + normal = True, + ), +) diff --git a/rust/generate-schema/src/codegen.rs b/rust/generate-schema/src/codegen.rs index 94ee8ef7a75..337a42bef19 100644 --- a/rust/generate-schema/src/codegen.rs +++ b/rust/generate-schema/src/codegen.rs @@ -1,5 +1,4 @@ pub mod grammar; - pub fn reformat(x: String) -> String { x } diff --git a/rust/generate-schema/src/codegen/grammar.rs b/rust/generate-schema/src/codegen/grammar.rs index 39e06f9642d..85a84bf05f7 100644 --- a/rust/generate-schema/src/codegen/grammar.rs +++ b/rust/generate-schema/src/codegen/grammar.rs @@ -21,7 +21,7 @@ use crate::{ project_root, }; -mod ast_src; +pub mod ast_src; use self::ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc}; pub(crate) fn generate(check: bool) { @@ -621,10 +621,16 @@ fn pluralize(s: &str) -> String { } impl Field { - fn is_many(&self) -> bool { - matches!(self, Field::Node { cardinality: Cardinality::Many, .. }) + pub fn is_many(&self) -> bool { + matches!( + self, + Field::Node { + cardinality: Cardinality::Many, + .. + } + ) } - fn token_kind(&self) -> Option { + pub fn token_kind(&self) -> Option { match self { Field::Token(token) => { let token: proc_macro2::TokenStream = token.parse().unwrap(); @@ -633,7 +639,7 @@ impl Field { _ => None, } } - fn method_name(&self) -> String { + pub fn method_name(&self) -> String { match self { Field::Token(name) => { let name = match name.as_str() { @@ -679,7 +685,7 @@ impl Field { } } } - fn ty(&self) -> proc_macro2::Ident { + pub fn ty(&self) -> proc_macro2::Ident { match self { Field::Token(_) => format_ident!("SyntaxToken"), Field::Node { ty, .. } => format_ident!("{}", ty), @@ -696,7 +702,7 @@ fn clean_token_name(name: &str) -> String { } } -fn lower(grammar: &Grammar) -> AstSrc { +pub(crate) fn lower(grammar: &Grammar) -> AstSrc { let mut res = AstSrc { tokens: "Whitespace Comment String ByteString CString IntNumber FloatNumber Char Byte Ident" diff --git a/rust/generate-schema/src/main.rs b/rust/generate-schema/src/main.rs index 4b3656339ca..69361404446 100644 --- a/rust/generate-schema/src/main.rs +++ b/rust/generate-schema/src/main.rs @@ -1,12 +1,503 @@ -use std::path::PathBuf; +use std::{fs, path::PathBuf}; -mod codegen; +pub mod codegen; mod flags; +use codegen::grammar::ast_src::{AstNodeSrc, AstSrc}; +use std::collections::{BTreeMap, BTreeSet}; use std::env; +use ungrammar::Grammar; fn project_root() -> PathBuf { let dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| env!("CARGO_MANIFEST_DIR").to_owned()); PathBuf::from(dir).parent().unwrap().to_owned() } -fn main() {} + +fn class_name(type_name: &String) -> String { + match type_name.as_str() { + "BinExpr" => "BinaryExpr".to_owned(), + "ElseBranch" => "Expr".to_owned(), + "Fn" => "Function".to_owned(), + "Literal" => "LiteralExpr".to_owned(), + "Type" => "TypeRef".to_owned(), + _ => type_name.to_owned(), + } +} + +fn property_name(type_name: &String, field_name: &String) -> String { + match (type_name.as_str(), field_name.as_str()) { + ("Path", "segment") => "part".to_owned(), + (_, "then_branch") => "then".to_owned(), + (_, "else_branch") => "else_".to_owned(), + _ => field_name.to_owned(), + } +} + +fn to_lower_snake_case(s: &str) -> String { + let mut buf = String::with_capacity(s.len()); + let mut prev = false; + for c in s.chars() { + if c.is_ascii_uppercase() && prev { + buf.push('_') + } + prev = true; + + buf.push(c.to_ascii_lowercase()); + } + buf +} + +fn print_schema(grammar: &AstSrc, super_types: BTreeMap>) { + for node in &grammar.enums { + let super_classses = if let Some(cls) = super_types.get(&node.name) { + let super_classes: Vec = cls.iter().map(|x| class_name(x)).collect(); + super_classes.join(",") + } else { + "AstNode".to_owned() + }; + println!("class {}({}):", class_name(&node.name), super_classses); + println!(" pass"); + println!(""); + } + for node in &grammar.nodes { + let super_classses = if let Some(cls) = super_types.get(&node.name) { + let super_classes: Vec = cls.iter().map(|x| class_name(x)).collect(); + super_classes.join(",") + } else { + "AstNode".to_owned() + }; + println!("class {}({}):", class_name(&node.name), super_classses); + let mut empty = true; + for field in get_fields(node) { + if field.tp == "SyntaxToken" { + continue; + } + + empty = false; + if field.tp == "string" { + println!( + " {}: optional[string]", + property_name(&node.name, &field.name), + ); + } else { + let list = field.is_many; + let (o, c) = if list { + ("list[", "]") + } else { + ("optional[", "]") + }; + println!( + " {}: {}\"{}\"{} | child", + property_name(&node.name, &field.name), + o, + class_name(&field.tp), + c + ); + }; + } + if empty { + println!(" pass"); + } + println!(""); + } +} + +struct FieldInfo { + name: String, + tp: String, + is_many: bool, +} +fn get_fields(node: &AstNodeSrc) -> Vec { + let mut result = Vec::new(); + + match node.name.as_str() { + "Name" | "NameRef" | "Lifetime" => { + result.push(FieldInfo { + name: "text".to_string(), + tp: "string".to_string(), + is_many: false, + }); + } + "Abi" => { + result.push(FieldInfo { + name: "abi_string".to_string(), + tp: "string".to_string(), + is_many: false, + }); + } + "Literal" => { + result.push(FieldInfo { + name: "text_value".to_string(), + tp: "string".to_string(), + is_many: false, + }); + } + "PrefixExpr" => { + result.push(FieldInfo { + name: "operator_name".to_string(), + tp: "string".to_string(), + is_many: false, + }); + } + "BinExpr" => { + result.push(FieldInfo { + name: "lhs".to_string(), + tp: "Expr".to_string(), + is_many: false, + }); + result.push(FieldInfo { + name: "rhs".to_string(), + tp: "Expr".to_string(), + is_many: false, + }); + result.push(FieldInfo { + name: "operator_name".to_string(), + tp: "string".to_string(), + is_many: false, + }); + } + "IfExpr" => { + result.push(FieldInfo { + name: "then_branch".to_string(), + tp: "BlockExpr".to_string(), + is_many: false, + }); + result.push(FieldInfo { + name: "else_branch".to_string(), + tp: "ElseBranch".to_string(), + is_many: false, + }); + result.push(FieldInfo { + name: "condition".to_string(), + tp: "Expr".to_string(), + is_many: false, + }); + } + "RangeExpr" => { + result.push(FieldInfo { + name: "start".to_string(), + tp: "Expr".to_string(), + is_many: false, + }); + result.push(FieldInfo { + name: "end".to_string(), + tp: "Expr".to_string(), + is_many: false, + }); + result.push(FieldInfo { + name: "operator_name".to_string(), + tp: "string".to_string(), + is_many: false, + }); + } + "RangePat" => { + result.push(FieldInfo { + name: "start".to_string(), + tp: "Pat".to_string(), + is_many: false, + }); + result.push(FieldInfo { + name: "end".to_string(), + tp: "Pat".to_string(), + is_many: false, + }); + result.push(FieldInfo { + name: "operator_name".to_string(), + tp: "string".to_string(), + is_many: false, + }); + } + "IndexExpr" => { + result.push(FieldInfo { + name: "index".to_string(), + tp: "Expr".to_string(), + is_many: false, + }); + result.push(FieldInfo { + name: "base".to_string(), + tp: "Expr".to_string(), + is_many: false, + }); + } + "Impl" => { + result.push(FieldInfo { + name: "trait_".to_string(), + tp: "Type".to_string(), + is_many: false, + }); + result.push(FieldInfo { + name: "self_ty".to_string(), + tp: "Type".to_string(), + is_many: false, + }); + } + "ForExpr" => { + result.push(FieldInfo { + name: "iterable".to_string(), + tp: "Expr".to_string(), + is_many: false, + }); + } + "WhileExpr" => { + result.push(FieldInfo { + name: "condition".to_string(), + tp: "Expr".to_string(), + is_many: false, + }); + } + "MatchGuard" => { + result.push(FieldInfo { + name: "condition".to_string(), + tp: "Expr".to_string(), + is_many: false, + }); + } + "MacroDef" => { + result.push(FieldInfo { + name: "args".to_string(), + tp: "TokenTree".to_string(), + is_many: false, + }); + result.push(FieldInfo { + name: "body".to_string(), + tp: "TokenTree".to_string(), + is_many: false, + }); + } + "FormatArgsExpr" => { + result.push(FieldInfo { + name: "args".to_string(), + tp: "FormatArgsArg".to_string(), + is_many: true, + }); + } + "ArgList" => { + result.push(FieldInfo { + name: "args".to_string(), + tp: "Expr".to_string(), + is_many: true, + }); + } + "Fn" => { + result.push(FieldInfo { + name: "body".to_string(), + tp: "BlockExpr".to_string(), + is_many: false, + }); + } + "Const" => { + result.push(FieldInfo { + name: "body".to_string(), + tp: "Expr".to_string(), + is_many: false, + }); + } + "Static" => { + result.push(FieldInfo { + name: "body".to_string(), + tp: "Expr".to_string(), + is_many: false, + }); + } + "ClosureExpr" => { + result.push(FieldInfo { + name: "body".to_string(), + tp: "Expr".to_string(), + is_many: false, + }); + } + _ => {} + } + + for field in &node.fields { + // The ArrayExpr type also has an 'exprs' field + if node.name == "ArrayExpr" && field.method_name() == "expr" { + continue; + } + result.push(FieldInfo { + name: field.method_name(), + tp: field.ty().to_string(), + is_many: field.is_many(), + }); + } + for trait_ in &node.traits { + match trait_.as_str() { + "HasAttrs" => result.push(FieldInfo { + name: "attrs".to_owned(), + tp: "Attr".to_owned(), + is_many: true, + }), + "HasName" => result.push(FieldInfo { + name: "name".to_owned(), + tp: "Name".to_owned(), + is_many: false, + }), + "HasVisibility" => result.push(FieldInfo { + name: "visibility".to_owned(), + tp: "Visibility".to_owned(), + is_many: false, + }), + "HasGenericParams" => { + result.push(FieldInfo { + name: "generic_param_list".to_owned(), + tp: "GenericParamList".to_owned(), + is_many: false, + }); + result.push(FieldInfo { + name: "where_clause".to_owned(), + tp: "WhereClause".to_owned(), + is_many: false, + }) + } + "HasGenericArgs" => result.push(FieldInfo { + name: "generic_arg_list".to_owned(), + tp: "GenericArgList".to_owned(), + is_many: false, + }), + "HasTypeBounds" => result.push(FieldInfo { + name: "type_bound_list".to_owned(), + tp: "TypeBoundList".to_owned(), + is_many: false, + }), + "HasModuleItem" => result.push(FieldInfo { + name: "items".to_owned(), + tp: "Item".to_owned(), + is_many: true, + }), + "HasLoopBody" => { + result.push(FieldInfo { + name: "label".to_owned(), + tp: "Label".to_owned(), + is_many: false, + }); + result.push(FieldInfo { + name: "loop_body".to_owned(), + tp: "BlockExpr".to_owned(), + is_many: false, + }) + } + "HasArgList" => result.push(FieldInfo { + name: "arg_list".to_owned(), + tp: "ArgList".to_owned(), + is_many: false, + }), + "HasDocComments" => {} + + _ => panic!("Unknown trait {}", trait_), + }; + } + result.sort_by(|x, y| x.name.cmp(&y.name)); + result +} + +fn print_extractor(grammar: &AstSrc) { + for node in &grammar.enums { + let type_name = &node.name; + let class_name = class_name(&node.name); + + println!( + " fn emit_{}(&mut self, node: ast::{}) -> Label {{", + to_lower_snake_case(type_name), + type_name, + class_name + ); + println!(" match node {{"); + for variant in &node.variants { + println!( + " ast::{}::{}(inner) => self.emit_{}(inner).into(),", + type_name, + variant, + to_lower_snake_case(variant) + ); + } + println!(" }}"); + println!(" }}\n"); + } + + for node in &grammar.nodes { + let type_name = &node.name; + let class_name = class_name(&node.name); + + println!( + " fn emit_{}(&mut self, node: ast::{}) -> Label {{", + to_lower_snake_case(type_name), + type_name, + class_name + ); + for field in get_fields(&node) { + if &field.tp == "SyntaxToken" { + continue; + } + + let type_name = &field.tp; + let struct_field_name = &field.name; + let class_field_name = property_name(&node.name, &field.name); + if field.tp == "string" { + println!(" let {} = node.try_get_text();", class_field_name,); + } else if field.is_many { + println!( + " let {} = node.{}().map(|x| self.emit_{}(x)).collect();", + class_field_name, + struct_field_name, + to_lower_snake_case(type_name) + ); + } else { + println!( + " let {} = node.{}().map(|x| self.emit_{}(x));", + class_field_name, + struct_field_name, + to_lower_snake_case(type_name) + ); + } + } + println!( + " let label = self.trap.emit(generated::{} {{", + class_name + ); + println!(" id: TrapId::Star,"); + for field in get_fields(&node) { + if field.tp == "SyntaxToken" { + continue; + } + + let class_field_name: String = property_name(&node.name, &field.name); + println!(" {},", class_field_name); + } + println!(" }});"); + println!(" self.emit_location(label, node);"); + println!(" label"); + + println!(" }}\n"); + } +} + +fn main() { + let grammar: Grammar = fs::read_to_string(project_root().join("generate-schema/rust.ungram")) + .unwrap() + .parse() + .unwrap(); + let mut grammar = codegen::grammar::lower(&grammar); + grammar + .nodes + .retain(|x| x.name != "MacroStmts" && x.name != "MacroItems"); + + grammar.enums.retain(|x| x.name != "Adt"); + + let mut super_types: BTreeMap> = BTreeMap::new(); + for node in &grammar.enums { + for variant in &node.variants { + let set = super_types + .entry(variant.to_owned()) + .or_insert_with(|| BTreeSet::new()); + set.insert(node.name.to_owned()); + } + } + // sort things while ensuring super clases are defined before they are used + grammar.enums.sort_by(|x, y| { + let super_class_x = super_types.get(&x.name).into_iter().flatten().max(); + let super_class_y = super_types.get(&y.name).into_iter().flatten().max(); + super_class_x.cmp(&super_class_y).then(x.name.cmp(&y.name)) + }); + //print_schema(&grammar, super_types); + print_extractor(&grammar); +}