use serde::Deserialize; use std::collections::BTreeMap; use std::path::Path; use std::collections::BTreeSet as Set; use std::fs; /// A lookup table from TypeName to Entry. pub type NodeTypeMap = BTreeMap; #[derive(Debug)] pub struct Entry { pub dbscheme_name: String, pub ql_class_name: String, pub kind: EntryKind, } #[derive(Debug)] pub enum EntryKind { Union { members: Set }, Table { name: String, fields: Vec }, Token { kind_id: usize }, } #[derive(Debug, Ord, PartialOrd, Eq, PartialEq)] pub struct TypeName { pub kind: String, pub named: bool, } #[derive(Debug)] pub enum FieldTypeInfo { /// The field has a single type. Single(TypeName), /// The field can take one of several types, so we also provide the name of /// the database union type that wraps them, and the corresponding QL class /// name. Multiple { types: Set, dbscheme_union: String, ql_class: String, }, /// The field can be one of several tokens, so the db type will be an `int` /// with a `case @foo.kind` for each possiblity. ReservedWordInt(BTreeMap), } #[derive(Debug)] pub struct Field { pub parent: TypeName, pub type_info: FieldTypeInfo, /// The name of the field or None for the anonymous 'children' /// entry from node_types.json pub name: Option, /// The name of the predicate to get this field. pub getter_name: String, pub storage: Storage, } fn name_for_field_or_child(name: &Option) -> String { match name { Some(name) => name.clone(), None => "child".to_owned(), } } #[derive(Debug)] pub enum Storage { /// the field is stored as a column in the parent table Column { name: String }, /// the field is stored in a link table, and may or may not have an /// associated index column Table { name: String, has_index: bool }, } pub fn read_node_types(node_types_path: &Path) -> std::io::Result { let file = fs::File::open(node_types_path)?; let node_types = serde_json::from_reader(file)?; Ok(convert_nodes(&node_types)) } pub fn read_node_types_str(node_types_json: &str) -> std::io::Result { let node_types = serde_json::from_str(node_types_json)?; Ok(convert_nodes(&node_types)) } fn convert_type(node_type: &NodeType) -> TypeName { TypeName { kind: node_type.kind.to_string(), named: node_type.named, } } fn convert_types(node_types: &Vec) -> Set { let iter = node_types.iter().map(convert_type).collect(); std::collections::BTreeSet::from(iter) } pub fn convert_nodes(nodes: &Vec) -> NodeTypeMap { let mut entries = NodeTypeMap::new(); let mut token_kinds = Set::new(); // First, find all the token kinds for node in nodes { if node.subtypes.is_none() { if node.fields.as_ref().map_or(0, |x| x.len()) == 0 && node.children.is_none() { let type_name = TypeName { kind: node.kind.clone(), named: node.named, }; token_kinds.insert(type_name); } } } for node in nodes { let flattened_name = &node_type_name(&node.kind, node.named); let dbscheme_name = escape_name(&flattened_name); let ql_class_name = dbscheme_name_to_class_name(&dbscheme_name); if let Some(subtypes) = &node.subtypes { // It's a tree-sitter supertype node, for which we create a union // type. entries.insert( TypeName { kind: node.kind.clone(), named: node.named, }, Entry { dbscheme_name, ql_class_name, kind: EntryKind::Union { members: convert_types(&subtypes), }, }, ); } else if node.fields.as_ref().map_or(0, |x| x.len()) == 0 && node.children.is_none() { // Token kind, handled above. } else { // It's a product type, defined by a table. let type_name = TypeName { kind: node.kind.clone(), named: node.named, }; let table_name = escape_name(&(format!("{}_def", &flattened_name))); let mut fields = Vec::new(); // If the type also has fields or children, then we create either // auxiliary tables or columns in the defining table for them. if let Some(node_fields) = &node.fields { for (field_name, field_info) in node_fields { add_field( &type_name, Some(field_name.to_string()), field_info, &mut fields, &token_kinds, ); } } if let Some(children) = &node.children { // Treat children as if they were a field called 'child'. add_field(&type_name, None, children, &mut fields, &token_kinds); } entries.insert( type_name, Entry { dbscheme_name, ql_class_name, kind: EntryKind::Table { name: table_name, fields, }, }, ); } } let mut counter = 0; for type_name in token_kinds { let entry = if type_name.named { counter += 1; let unprefixed_name = node_type_name(&type_name.kind, true); Entry { dbscheme_name: escape_name(&format!("token_{}", &unprefixed_name)), ql_class_name: dbscheme_name_to_class_name(&escape_name(&unprefixed_name)), kind: EntryKind::Token { kind_id: counter }, } } else { Entry { dbscheme_name: "reserved_word".to_owned(), ql_class_name: "ReservedWord".to_owned(), kind: EntryKind::Token { kind_id: 0 }, } }; entries.insert(type_name, entry); } entries } fn add_field( parent_type_name: &TypeName, field_name: Option, field_info: &FieldInfo, fields: &mut Vec, token_kinds: &Set, ) { let parent_flattened_name = node_type_name(&parent_type_name.kind, parent_type_name.named); let storage = if !field_info.multiple && field_info.required { // This field must appear exactly once, so we add it as // a column to the main table for the node type. Storage::Column { name: escape_name(&name_for_field_or_child(&field_name)), } } else { // Put the field in an auxiliary table. let has_index = field_info.multiple; let field_table_name = escape_name(&format!( "{}_{}", parent_flattened_name, &name_for_field_or_child(&field_name) )); Storage::Table { has_index, name: field_table_name, } }; let converted_types = convert_types(&field_info.types); let type_info = if field_info .types .iter() .all(|t| !t.named && token_kinds.contains(&convert_type(t))) { // All possible types for this field are reserved words. The db // representation will be an `int` with a `case @foo.field = ...` to // enumerate the possible values. let mut counter = 0; let mut field_token_ints: BTreeMap = BTreeMap::new(); for t in converted_types { let dbscheme_variant_name = escape_name(&format!("{}_{}", parent_flattened_name, t.kind)); field_token_ints.insert(t.kind.to_owned(), (counter, dbscheme_variant_name)); counter += 1; } FieldTypeInfo::ReservedWordInt(field_token_ints) } else if field_info.types.len() == 1 { FieldTypeInfo::Single(converted_types.into_iter().next().unwrap()) } else { // The dbscheme type for this field will be a union. In QL, it'll just be AstNode. FieldTypeInfo::Multiple { types: converted_types, dbscheme_union: format!( "{}_{}_type", &parent_flattened_name, &name_for_field_or_child(&field_name) ), ql_class: "AstNode".to_owned(), } }; let getter_name = format!( "get{}", dbscheme_name_to_class_name(&escape_name(&name_for_field_or_child(&field_name))) ); fields.push(Field { parent: TypeName { kind: parent_type_name.kind.to_string(), named: parent_type_name.named, }, type_info, name: field_name, getter_name, storage, }); } #[derive(Deserialize)] pub struct NodeInfo { #[serde(rename = "type")] pub kind: String, pub named: bool, #[serde(skip_serializing_if = "Option::is_none")] pub fields: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub children: Option, #[serde(skip_serializing_if = "Option::is_none")] pub subtypes: Option>, } #[derive(Deserialize)] pub struct NodeType { #[serde(rename = "type")] pub kind: String, pub named: bool, } #[derive(Deserialize)] pub struct FieldInfo { pub multiple: bool, pub required: bool, pub types: Vec, } /// Given a tree-sitter node type's (kind, named) pair, returns a single string /// representing the (unescaped) name we'll use to refer to corresponding QL /// type. fn node_type_name(kind: &str, named: bool) -> String { if named { kind.to_string() } else { format!("{}_unnamed", kind) } } const RESERVED_KEYWORDS: [&'static str; 14] = [ "boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype", "type", "unique", "varchar", ]; /// Returns a string that's a copy of `name` but suitably escaped to be a valid /// QL identifier. fn escape_name(name: &str) -> String { let mut result = String::new(); // If there's a leading underscore, replace it with 'underscore_'. if let Some(c) = name.chars().next() { if c == '_' { result.push_str("underscore"); } } for c in name.chars() { match c { '{' => result.push_str("lbrace"), '}' => result.push_str("rbrace"), '<' => result.push_str("langle"), '>' => result.push_str("rangle"), '[' => result.push_str("lbracket"), ']' => result.push_str("rbracket"), '(' => result.push_str("lparen"), ')' => result.push_str("rparen"), '|' => result.push_str("pipe"), '=' => result.push_str("equal"), '~' => result.push_str("tilde"), '?' => result.push_str("question"), '`' => result.push_str("backtick"), '^' => result.push_str("caret"), '!' => result.push_str("bang"), '#' => result.push_str("hash"), '%' => result.push_str("percent"), '&' => result.push_str("ampersand"), '.' => result.push_str("dot"), ',' => result.push_str("comma"), '/' => result.push_str("slash"), ':' => result.push_str("colon"), ';' => result.push_str("semicolon"), '"' => result.push_str("dquote"), '*' => result.push_str("star"), '+' => result.push_str("plus"), '-' => result.push_str("minus"), '@' => result.push_str("at"), _ if c.is_uppercase() => { result.push('_'); result.push_str(&c.to_lowercase().to_string()) } _ => result.push(c), } } for &keyword in &RESERVED_KEYWORDS { if result == keyword { result.push_str("__"); break; } } result } /// Given a valid dbscheme name (i.e. in snake case), produces the equivalent QL /// name (i.e. in CamelCase). For example, "foo_bar_baz" becomes "FooBarBaz". fn dbscheme_name_to_class_name(dbscheme_name: &str) -> String { fn to_title_case(word: &str) -> String { let mut first = true; let mut result = String::new(); for c in word.chars() { if first { first = false; result.push(c.to_ascii_uppercase()); } else { result.push(c); } } result } dbscheme_name .split('_') .map(|word| to_title_case(word)) .collect::>() .join("") }