Files
codeql/rust/ast-generator/src/main.rs
2024-12-19 12:22:40 +01:00

635 lines
20 KiB
Rust

use std::io::Write;
use std::{fs, path::PathBuf};
pub mod codegen;
mod flags;
use codegen::grammar::ast_src::{AstNodeSrc, AstSrc, Field};
use std::collections::{BTreeMap, BTreeSet};
use std::env;
use ungrammar::Grammar;
fn project_root() -> PathBuf {
let dir = env::var("CARGO_MANIFEST_DIR").unwrap().to_owned();
PathBuf::from(dir).parent().unwrap().to_owned()
}
fn class_name(type_name: &str) -> String {
match type_name {
"BinExpr" => "BinaryExpr".to_owned(),
"ElseBranch" => "Expr".to_owned(),
"Fn" => "Function".to_owned(),
"Literal" => "LiteralExpr".to_owned(),
"ArrayExpr" => "ArrayExprInternal".to_owned(),
"AsmOptions" => "AsmOptionsList".to_owned(),
_ if type_name.ends_with("Type") => format!("{}Repr", type_name),
_ => type_name.to_owned(),
}
}
fn property_name(type_name: &str, field_name: &str) -> String {
let name = match (type_name, field_name) {
("CallExpr", "expr") => "function",
("LetExpr", "expr") => "scrutinee",
("MatchExpr", "expr") => "scrutinee",
("Path", "segment") => "part",
(_, "then_branch") => "then",
(_, "else_branch") => "else_",
("ArrayType", "ty") => "element_type_repr",
("SelfParam", "is_amp") => "is_ref",
(_, "ty") => "type_repr",
_ => field_name,
};
name.to_owned()
}
fn to_lower_snake_case(s: &str) -> String {
let mut buf = String::with_capacity(s.len());
let mut prev = false;
for c in s.chars() {
if c.is_ascii_uppercase() && prev {
buf.push('_')
}
prev = true;
buf.push(c.to_ascii_lowercase());
}
buf
}
fn write_schema(
grammar: &AstSrc,
super_types: BTreeMap<String, BTreeSet<String>>,
) -> std::io::Result<String> {
let mut buf: Vec<u8> = Vec::new();
writeln!(
buf,
"# Generated by `ast-generator`, do not edit by hand.\n"
)?;
writeln!(buf, "from .prelude import *")?;
for node in &grammar.enums {
let super_classses = if let Some(cls) = super_types.get(&node.name) {
let super_classes: Vec<String> = cls.iter().map(|s| class_name(s)).collect();
super_classes.join(",")
} else {
"AstNode".to_owned()
};
writeln!(
buf,
"\nclass {}({}):",
class_name(&node.name),
super_classses
)?;
writeln!(buf, " pass")?;
}
for node in &grammar.nodes {
let super_classses = if let Some(cls) = super_types.get(&node.name) {
let super_classes: Vec<String> = cls.iter().map(|s| class_name(s)).collect();
super_classes.join(",")
} else {
"AstNode".to_owned()
};
writeln!(
buf,
"\nclass {}({}):",
class_name(&node.name),
super_classses
)?;
let mut empty = true;
for field in get_fields(node) {
if field.tp == "SyntaxToken" {
continue;
}
empty = false;
if field.tp == "predicate" {
writeln!(
buf,
" {}: predicate",
property_name(&node.name, &field.name),
)?;
} else if field.tp == "string" {
writeln!(
buf,
" {}: optional[string]",
property_name(&node.name, &field.name),
)?;
} else {
let list = field.is_many;
let (o, c) = if list {
("list[", "]")
} else {
("optional[", "]")
};
writeln!(
buf,
" {}: {}\"{}\"{} | child",
property_name(&node.name, &field.name),
o,
class_name(&field.tp),
c
)?;
};
}
if empty {
writeln!(buf, " pass")?;
}
}
Ok(String::from_utf8_lossy(&buf).to_string())
}
struct FieldInfo {
name: String,
tp: String,
is_many: bool,
}
fn get_fields(node: &AstNodeSrc) -> Vec<FieldInfo> {
let mut result = Vec::new();
let predicates = [
"async", "auto", "const", "default", "gen", "move", "mut", "raw", "ref", "static", "try",
"unsafe",
];
for field in &node.fields {
if let Field::Token(name) = field {
if predicates.contains(&name.as_str()) {
result.push(FieldInfo {
name: format!("is_{name}"),
tp: "predicate".to_string(),
is_many: false,
});
}
}
}
match node.name.as_str() {
"Name" | "NameRef" | "Lifetime" => {
result.push(FieldInfo {
name: "text".to_string(),
tp: "string".to_string(),
is_many: false,
});
}
"Abi" => {
result.push(FieldInfo {
name: "abi_string".to_string(),
tp: "string".to_string(),
is_many: false,
});
}
"Literal" => {
result.push(FieldInfo {
name: "text_value".to_string(),
tp: "string".to_string(),
is_many: false,
});
}
"PrefixExpr" => {
result.push(FieldInfo {
name: "operator_name".to_string(),
tp: "string".to_string(),
is_many: false,
});
}
"BinExpr" => {
result.push(FieldInfo {
name: "lhs".to_string(),
tp: "Expr".to_string(),
is_many: false,
});
result.push(FieldInfo {
name: "rhs".to_string(),
tp: "Expr".to_string(),
is_many: false,
});
result.push(FieldInfo {
name: "operator_name".to_string(),
tp: "string".to_string(),
is_many: false,
});
}
"IfExpr" => {
result.push(FieldInfo {
name: "then_branch".to_string(),
tp: "BlockExpr".to_string(),
is_many: false,
});
result.push(FieldInfo {
name: "else_branch".to_string(),
tp: "ElseBranch".to_string(),
is_many: false,
});
result.push(FieldInfo {
name: "condition".to_string(),
tp: "Expr".to_string(),
is_many: false,
});
}
"RangeExpr" => {
result.push(FieldInfo {
name: "start".to_string(),
tp: "Expr".to_string(),
is_many: false,
});
result.push(FieldInfo {
name: "end".to_string(),
tp: "Expr".to_string(),
is_many: false,
});
result.push(FieldInfo {
name: "operator_name".to_string(),
tp: "string".to_string(),
is_many: false,
});
}
"RangePat" => {
result.push(FieldInfo {
name: "start".to_string(),
tp: "Pat".to_string(),
is_many: false,
});
result.push(FieldInfo {
name: "end".to_string(),
tp: "Pat".to_string(),
is_many: false,
});
result.push(FieldInfo {
name: "operator_name".to_string(),
tp: "string".to_string(),
is_many: false,
});
}
"IndexExpr" => {
result.push(FieldInfo {
name: "index".to_string(),
tp: "Expr".to_string(),
is_many: false,
});
result.push(FieldInfo {
name: "base".to_string(),
tp: "Expr".to_string(),
is_many: false,
});
}
"Impl" => {
result.push(FieldInfo {
name: "trait_".to_string(),
tp: "Type".to_string(),
is_many: false,
});
result.push(FieldInfo {
name: "self_ty".to_string(),
tp: "Type".to_string(),
is_many: false,
});
}
"ForExpr" => {
result.push(FieldInfo {
name: "iterable".to_string(),
tp: "Expr".to_string(),
is_many: false,
});
}
"WhileExpr" => {
result.push(FieldInfo {
name: "condition".to_string(),
tp: "Expr".to_string(),
is_many: false,
});
}
"MatchGuard" => {
result.push(FieldInfo {
name: "condition".to_string(),
tp: "Expr".to_string(),
is_many: false,
});
}
"MacroDef" => {
result.push(FieldInfo {
name: "args".to_string(),
tp: "TokenTree".to_string(),
is_many: false,
});
result.push(FieldInfo {
name: "body".to_string(),
tp: "TokenTree".to_string(),
is_many: false,
});
}
"FormatArgsExpr" => {
result.push(FieldInfo {
name: "args".to_string(),
tp: "FormatArgsArg".to_string(),
is_many: true,
});
}
"ArgList" => {
result.push(FieldInfo {
name: "args".to_string(),
tp: "Expr".to_string(),
is_many: true,
});
}
"Fn" => {
result.push(FieldInfo {
name: "body".to_string(),
tp: "BlockExpr".to_string(),
is_many: false,
});
}
"Const" => {
result.push(FieldInfo {
name: "body".to_string(),
tp: "Expr".to_string(),
is_many: false,
});
}
"Static" => {
result.push(FieldInfo {
name: "body".to_string(),
tp: "Expr".to_string(),
is_many: false,
});
}
"ClosureExpr" => {
result.push(FieldInfo {
name: "body".to_string(),
tp: "Expr".to_string(),
is_many: false,
});
}
"ArrayExpr" => {
result.push(FieldInfo {
name: "is_semicolon".to_string(),
tp: "predicate".to_string(),
is_many: false,
});
}
"SelfParam" => {
result.push(FieldInfo {
name: "is_amp".to_string(),
tp: "predicate".to_string(),
is_many: false,
});
}
_ => {}
}
for field in &node.fields {
// The ArrayExpr type also has an 'exprs' field
if node.name == "ArrayExpr" && field.method_name() == "expr" {
continue;
}
result.push(FieldInfo {
name: field.method_name(),
tp: field.ty().to_string(),
is_many: field.is_many(),
});
}
for trait_ in &node.traits {
match trait_.as_str() {
"HasAttrs" => result.push(FieldInfo {
name: "attrs".to_owned(),
tp: "Attr".to_owned(),
is_many: true,
}),
"HasName" => result.push(FieldInfo {
name: "name".to_owned(),
tp: "Name".to_owned(),
is_many: false,
}),
"HasVisibility" => result.push(FieldInfo {
name: "visibility".to_owned(),
tp: "Visibility".to_owned(),
is_many: false,
}),
"HasGenericParams" => {
result.push(FieldInfo {
name: "generic_param_list".to_owned(),
tp: "GenericParamList".to_owned(),
is_many: false,
});
result.push(FieldInfo {
name: "where_clause".to_owned(),
tp: "WhereClause".to_owned(),
is_many: false,
})
}
"HasGenericArgs" => result.push(FieldInfo {
name: "generic_arg_list".to_owned(),
tp: "GenericArgList".to_owned(),
is_many: false,
}),
"HasTypeBounds" => result.push(FieldInfo {
name: "type_bound_list".to_owned(),
tp: "TypeBoundList".to_owned(),
is_many: false,
}),
"HasModuleItem" => result.push(FieldInfo {
name: "items".to_owned(),
tp: "Item".to_owned(),
is_many: true,
}),
"HasLoopBody" => {
result.push(FieldInfo {
name: "label".to_owned(),
tp: "Label".to_owned(),
is_many: false,
});
result.push(FieldInfo {
name: "loop_body".to_owned(),
tp: "BlockExpr".to_owned(),
is_many: false,
})
}
"HasArgList" => result.push(FieldInfo {
name: "arg_list".to_owned(),
tp: "ArgList".to_owned(),
is_many: false,
}),
"HasDocComments" => {}
_ => panic!("Unknown trait {}", trait_),
};
}
result.sort_by(|x, y| x.name.cmp(&y.name));
result
}
fn write_extractor(grammar: &AstSrc) -> std::io::Result<String> {
let mut buf: Vec<u8> = Vec::new();
writeln!(
buf,
"//! Generated by `ast-generator`, do not edit by hand.\n
#![cfg_attr(any(), rustfmt::skip)]
use super::base::Translator;
use super::mappings::TextValue;
use crate::emit_detached;
use crate::generated;
use crate::trap::{{Label, TrapId}};
use ra_ap_syntax::ast::{{
HasArgList, HasAttrs, HasGenericArgs, HasGenericParams, HasLoopBody, HasModuleItem, HasName,
HasTypeBounds, HasVisibility, RangeItem,
}};
use ra_ap_syntax::{{ast, AstNode}};
impl Translator<'_> {{
fn emit_else_branch(&mut self, node: ast::ElseBranch) -> Label<generated::Expr> {{
match node {{
ast::ElseBranch::IfExpr(inner) => self.emit_if_expr(inner).into(),
ast::ElseBranch::Block(inner) => self.emit_block_expr(inner).into(),
}}
}}\n"
)?;
for node in &grammar.enums {
let type_name = &node.name;
let class_name = class_name(&node.name);
writeln!(
buf,
" pub(crate) fn emit_{}(&mut self, node: ast::{}) -> Label<generated::{}> {{",
to_lower_snake_case(type_name),
type_name,
class_name
)?;
writeln!(buf, " match node {{")?;
for variant in &node.variants {
writeln!(
buf,
" ast::{}::{}(inner) => self.emit_{}(inner).into(),",
type_name,
variant,
to_lower_snake_case(variant)
)?;
}
writeln!(buf, " }}")?;
writeln!(buf, " }}\n")?;
}
for node in &grammar.nodes {
let type_name = &node.name;
let class_name = class_name(&node.name);
writeln!(
buf,
" pub(crate) fn emit_{}(&mut self, node: ast::{}) -> Label<generated::{}> {{",
to_lower_snake_case(type_name),
type_name,
class_name
)?;
for field in get_fields(node) {
if &field.tp == "SyntaxToken" {
continue;
}
let type_name = &field.tp;
let struct_field_name = &field.name;
let class_field_name = property_name(&node.name, &field.name);
if field.tp == "predicate" {
writeln!(
buf,
" let {} = node.{}_token().is_some();",
class_field_name,
&struct_field_name[3..],
)?;
} else if field.tp == "string" {
writeln!(
buf,
" let {} = node.try_get_text();",
class_field_name,
)?;
} else if field.is_many {
writeln!(
buf,
" let {} = node.{}().map(|x| self.emit_{}(x)).collect();",
class_field_name,
struct_field_name,
to_lower_snake_case(type_name)
)?;
} else {
writeln!(
buf,
" let {} = node.{}().map(|x| self.emit_{}(x));",
class_field_name,
struct_field_name,
to_lower_snake_case(type_name)
)?;
}
}
writeln!(
buf,
" let label = self.trap.emit(generated::{} {{",
class_name
)?;
writeln!(buf, " id: TrapId::Star,")?;
for field in get_fields(node) {
if field.tp == "SyntaxToken" {
continue;
}
let class_field_name: String = property_name(&node.name, &field.name);
writeln!(buf, " {},", class_field_name)?;
}
writeln!(buf, " }});")?;
writeln!(buf, " self.emit_location(label, &node);")?;
writeln!(
buf,
" emit_detached!({}, self, node, label);",
class_name
)?;
writeln!(
buf,
" self.emit_tokens(&node, label.into(), node.syntax().children_with_tokens());"
)?;
writeln!(buf, " label")?;
writeln!(buf, " }}\n")?;
}
writeln!(buf, "}}")?;
Ok(String::from_utf8_lossy(&buf).into_owned())
}
fn main() -> std::io::Result<()> {
let grammar = PathBuf::from("..").join(env::args().nth(1).expect("grammar file path required"));
let grammar: Grammar = fs::read_to_string(&grammar)
.unwrap_or_else(|_| panic!("Failed to parse grammar file: {}", grammar.display()))
.parse()
.expect("Failed to parse grammar");
let mut grammar = codegen::grammar::lower(&grammar);
grammar.enums.retain(|x| x.name != "Adt");
let mut super_types: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
for node in &grammar.enums {
for variant in &node.variants {
let set = super_types.entry(variant.to_owned()).or_default();
set.insert(node.name.to_owned());
}
}
// sort things while ensuring super clases are defined before they are used
grammar.enums.sort_by(|x, y| {
let super_class_x = super_types.get(&x.name).into_iter().flatten().max();
let super_class_y = super_types.get(&y.name).into_iter().flatten().max();
super_class_x.cmp(&super_class_y).then(x.name.cmp(&y.name))
});
let schema = write_schema(&grammar, super_types)?;
let schema_path = project_root().join("schema/ast.py");
codegen::ensure_file_contents(
crate::flags::CodegenType::Grammar,
&schema_path,
&schema,
false,
);
let extractor = write_extractor(&grammar)?;
let extractor_path = project_root().join("extractor/src/translate/generated.rs");
codegen::ensure_file_contents(
crate::flags::CodegenType::Grammar,
&extractor_path,
&extractor,
false,
);
Ok(())
}