diff --git a/Cargo.lock b/Cargo.lock index 6e11bf03a9c..8ed7cea0808 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -101,6 +101,8 @@ name = "generator" version = "0.1.0" dependencies = [ "node-types", + "tracing", + "tracing-subscriber", ] [[package]] @@ -265,8 +267,7 @@ version = "0.1.0" dependencies = [ "cc", "clap", - "serde", - "serde_json", + "node-types", "tracing", "tracing-subscriber", "tree-sitter", diff --git a/extractor/Cargo.toml b/extractor/Cargo.toml index 587f99a4999..527321aa229 100644 --- a/extractor/Cargo.toml +++ b/extractor/Cargo.toml @@ -7,9 +7,8 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +node-types = { path = "../node-types" } tree-sitter = "0.17.0" -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" clap = "2.33" tracing = "0.1" tracing-subscriber = { version = "0.2", features = ["env-filter"] } diff --git a/extractor/src/extractor.rs b/extractor/src/extractor.rs index c57179c22b7..4593c16ff8e 100644 --- a/extractor/src/extractor.rs +++ b/extractor/src/extractor.rs @@ -1,5 +1,4 @@ -use super::nodes_types::{Entry, Field, Storage, TypeName}; - +use node_types::{escape_name, node_type_name, Entry, Field, Storage, TypeName}; use std::collections::BTreeMap as Map; use std::collections::BTreeSet as Set; use std::fmt; @@ -23,7 +22,7 @@ impl Extractor { let span = span!( Level::TRACE, "extract", - file = &path.display().to_string()[..] + file = %path.display() ); let _enter = span.enter(); @@ -99,11 +98,7 @@ struct Visitor<'a> { impl Visitor<'_> { fn enter_node(&mut self, node: Node) -> bool { if node.is_error() { - error!( - "{}:{}: parse error", - &self.path, - node.start_position().row, - ); + error!("{}:{}: parse error", &self.path, node.start_position().row); return false; } if node.is_missing() { @@ -237,10 +232,10 @@ impl Visitor<'_> { ) } } - Storage::Table { parent, index } => { + Storage::Table { index } => { for child_id in child_ids { self.trap_output.push(TrapEntry::ChildOf( - node_type_name(&parent.kind, parent.named), + node_type_name(&field.parent.kind, field.parent.named), parent_id, match &field.name { Some(name) => name.to_owned(), @@ -427,74 +422,3 @@ impl fmt::Display for Arg { } } } - -const RESERVED_KEYWORDS: [&'static str; 14] = [ - "boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype", - "type", "unique", "varchar", -]; - -/// Returns a string that's a copy of `name` but suitably escaped to be a valid -/// QL identifier. -pub fn escape_name(name: &str) -> String { - let mut result = String::new(); - - // If there's a leading underscore, replace it with 'underscore_'. - if let Some(c) = name.chars().next() { - if c == '_' { - result.push_str("underscore"); - } - } - for c in name.chars() { - match c { - '{' => result.push_str("lbrace"), - '}' => result.push_str("rbrace"), - '<' => result.push_str("langle"), - '>' => result.push_str("rangle"), - '[' => result.push_str("lbracket"), - ']' => result.push_str("rbracket"), - '(' => result.push_str("lparen"), - ')' => result.push_str("rparen"), - '|' => result.push_str("pipe"), - '=' => result.push_str("equal"), - '~' => result.push_str("tilde"), - '?' => result.push_str("question"), - '`' => result.push_str("backtick"), - '^' => result.push_str("caret"), - '!' => result.push_str("bang"), - '#' => result.push_str("hash"), - '%' => result.push_str("percent"), - '&' => result.push_str("ampersand"), - '.' => result.push_str("dot"), - ',' => result.push_str("comma"), - '/' => result.push_str("slash"), - ':' => result.push_str("colon"), - ';' => result.push_str("semicolon"), - '"' => result.push_str("dquote"), - '*' => result.push_str("star"), - '+' => result.push_str("plus"), - '-' => result.push_str("minus"), - '@' => result.push_str("at"), - _ => result.push_str(&c.to_lowercase().to_string()), - } - } - - for &keyword in &RESERVED_KEYWORDS { - if result == keyword { - result.push_str("__"); - break; - } - } - - result -} - -/// Given a tree-sitter node type's (kind, named) pair, returns a single string -/// representing the (unescaped) name we'll use to refer to corresponding QL -/// type. -fn node_type_name(kind: &str, named: bool) -> String { - if named { - kind.to_string() - } else { - format!("{}_unnamed", kind) - } -} diff --git a/extractor/src/main.rs b/extractor/src/main.rs index b228e29562a..69f5acfe972 100644 --- a/extractor/src/main.rs +++ b/extractor/src/main.rs @@ -1,5 +1,4 @@ mod extractor; -mod nodes_types; use clap; use std::fs; @@ -44,7 +43,7 @@ fn main() -> std::io::Result<()> { let node_types_path = PathBuf::from("tree-sitter-ruby/src/node-types.json"); let language = unsafe { tree_sitter_ruby() }; - let schema = nodes_types::read_node_types(&node_types_path)?; + let schema = node_types::read_node_types(&node_types_path)?; let mut extractor = extractor::create(language, schema); for line in std::io::BufReader::new(file_list).lines() { let path = PathBuf::from(line?); diff --git a/extractor/src/nodes_types.rs b/extractor/src/nodes_types.rs deleted file mode 100644 index 67363d408e0..00000000000 --- a/extractor/src/nodes_types.rs +++ /dev/null @@ -1,170 +0,0 @@ -use serde::Deserialize; - -use std::collections::BTreeMap as Map; -use std::collections::BTreeSet as Set; -use std::fs; -use std::path::Path; - -#[derive(Debug)] -pub enum Entry { - Union { - type_name: TypeName, - members: Set, - }, - Table { - type_name: TypeName, - fields: Vec, - }, -} - -#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)] -pub struct TypeName { - pub kind: String, - pub named: bool, -} - -#[derive(Debug)] -pub struct Field { - pub types: Set, - /// The name of the field or None for the anonymous 'children' - /// entry from node_types.json - pub name: Option, - pub storage: Storage, -} - -#[derive(Debug)] -pub enum Storage { - /// the field is stored as a column in the parent table - Column, - // the field is store in a link table - Table { - parent: TypeName, - index: usize, - }, -} - -pub fn read_node_types(node_types_path: &Path) -> std::io::Result> { - let file = fs::File::open(node_types_path)?; - let node_types = serde_json::from_reader(file)?; - Ok(convert_nodes(node_types)) -} - -fn convert_type(node_type: &NodeType) -> TypeName { - TypeName { - kind: node_type.kind.to_string(), - named: node_type.named, - } -} - -fn convert_types(node_types: &Vec) -> Set { - let iter = node_types.iter().map(convert_type).collect(); - std::collections::BTreeSet::from(iter) -} -pub fn convert_nodes(nodes: Vec) -> Vec { - let mut entries: Vec = Vec::new(); - - for node in nodes { - if let Some(subtypes) = &node.subtypes { - // It's a tree-sitter supertype node, for which we create a union - // type. - entries.push(Entry::Union { - type_name: TypeName { - kind: node.kind, - named: node.named, - }, - members: convert_types(&subtypes), - }); - } else { - // It's a product type, defined by a table. - let type_name = TypeName { - kind: node.kind, - named: node.named, - }; - let mut fields = Vec::new(); - - // If the type also has fields or children, then we create either - // auxiliary tables or columns in the defining table for them. - if let Some(node_fields) = &node.fields { - for (field_name, field_info) in node_fields { - add_field( - &type_name, - Some(field_name.to_string()), - field_info, - &mut fields, - ); - } - } - if let Some(children) = &node.children { - // Treat children as if they were a field called 'child'. - add_field(&type_name, None, children, &mut fields); - } - entries.push(Entry::Table { type_name, fields }); - } - } - entries -} - -fn add_field( - parent_type_name: &TypeName, - field_name: Option, - field_info: &FieldInfo, - fields: &mut Vec, -) { - let storage; - if !field_info.multiple && field_info.required { - // This field must appear exactly once, so we add it as - // a column to the main table for the node type. - storage = Storage::Column; - } else { - // This field can appear zero or multiple times, so put - // it in an auxiliary table. - storage = Storage::Table { - parent: TypeName { - kind: parent_type_name.kind.to_string(), - named: parent_type_name.named, - }, - index: fields.len(), - }; - } - fields.push(Field { - types: convert_types(&field_info.types), - name: field_name, - storage, - }); -} -#[derive(Deserialize)] -pub struct NodeInfo { - #[serde(rename = "type")] - pub kind: String, - pub named: bool, - #[serde(skip_serializing_if = "Option::is_none")] - pub fields: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub children: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub subtypes: Option>, -} - -#[derive(Deserialize)] -pub struct NodeType { - #[serde(rename = "type")] - pub kind: String, - pub named: bool, -} - -#[derive(Deserialize)] -pub struct FieldInfo { - pub multiple: bool, - pub required: bool, - pub types: Vec, -} - -impl Default for FieldInfo { - fn default() -> Self { - FieldInfo { - multiple: false, - required: true, - types: Vec::new(), - } - } -} diff --git a/generator/Cargo.toml b/generator/Cargo.toml index b597ca7a826..a7587f73497 100644 --- a/generator/Cargo.toml +++ b/generator/Cargo.toml @@ -8,3 +8,5 @@ edition = "2018" [dependencies] node-types = { path = "../node-types" } +tracing = "0.1" +tracing-subscriber = { version = "0.2", features = ["env-filter"] } diff --git a/generator/src/dbscheme.rs b/generator/src/dbscheme.rs index 27078ce9717..fc5d5e1eda9 100644 --- a/generator/src/dbscheme.rs +++ b/generator/src/dbscheme.rs @@ -49,66 +49,6 @@ pub enum QlColumnType { Custom(String), } -const RESERVED_KEYWORDS: [&'static str; 14] = [ - "boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype", - "type", "unique", "varchar", -]; - -/// Returns a string that's a copy of `name` but suitably escaped to be a valid -/// QL identifier. -pub fn escape_name(name: &str) -> String { - let mut result = String::new(); - - // If there's a leading underscore, replace it with 'underscore_'. - if let Some(c) = name.chars().next() { - if c == '_' { - result.push_str("underscore"); - } - } - for c in name.chars() { - match c { - '{' => result.push_str("lbrace"), - '}' => result.push_str("rbrace"), - '<' => result.push_str("langle"), - '>' => result.push_str("rangle"), - '[' => result.push_str("lbracket"), - ']' => result.push_str("rbracket"), - '(' => result.push_str("lparen"), - ')' => result.push_str("rparen"), - '|' => result.push_str("pipe"), - '=' => result.push_str("equal"), - '~' => result.push_str("tilde"), - '?' => result.push_str("question"), - '`' => result.push_str("backtick"), - '^' => result.push_str("caret"), - '!' => result.push_str("bang"), - '#' => result.push_str("hash"), - '%' => result.push_str("percent"), - '&' => result.push_str("ampersand"), - '.' => result.push_str("dot"), - ',' => result.push_str("comma"), - '/' => result.push_str("slash"), - ':' => result.push_str("colon"), - ';' => result.push_str("semicolon"), - '"' => result.push_str("dquote"), - '*' => result.push_str("star"), - '+' => result.push_str("plus"), - '-' => result.push_str("minus"), - '@' => result.push_str("at"), - _ => result.push_str(&c.to_lowercase().to_string()), - } - } - - for &keyword in &RESERVED_KEYWORDS { - if result == keyword { - result.push_str("__"); - break; - } - } - - result -} - impl fmt::Display for Table { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if let Some(keyset) = &self.keysets { diff --git a/generator/src/main.rs b/generator/src/main.rs index 88f144e0620..f6e698675ec 100644 --- a/generator/src/main.rs +++ b/generator/src/main.rs @@ -2,21 +2,12 @@ mod dbscheme; mod language; use language::Language; -use node_types::{FieldInfo, NodeInfo}; +use node_types; +use std::collections::BTreeSet as Set; use std::fs::File; use std::io::LineWriter; use std::path::PathBuf; - -/// Given a tree-sitter node type's (kind, named) pair, returns a single string -/// representing the (unescaped) name we'll use to refer to corresponding QL -/// type. -fn node_type_name(kind: &str, named: bool) -> String { - if named { - kind.to_string() - } else { - format!("{}_unnamed", kind) - } -} +use tracing::{error, info}; /// Given the name of the parent node, and its field information, returns the /// name of the field's type. This may be an ad-hoc union of all the possible @@ -24,21 +15,21 @@ fn node_type_name(kind: &str, named: bool) -> String { fn make_field_type( parent_name: &str, field_name: &str, - field_info: &FieldInfo, + types: &Set, entries: &mut Vec, ) -> String { - if field_info.types.len() == 1 { + if types.len() == 1 { // This field can only have a single type. - let t = &field_info.types[0]; - dbscheme::escape_name(&node_type_name(&t.kind, t.named)) + let t = types.iter().next().unwrap(); + node_types::escape_name(&node_types::node_type_name(&t.kind, t.named)) } else { // This field can have one of several types. Create an ad-hoc QL union // type to represent them. let field_union_name = format!("{}_{}_type", parent_name, field_name); - let field_union_name = dbscheme::escape_name(&field_union_name); + let field_union_name = node_types::escape_name(&field_union_name); let mut members: Vec = Vec::new(); - for field_type in &field_info.types { - members.push(dbscheme::escape_name(&node_type_name( + for field_type in types { + members.push(node_types::escape_name(&node_types::node_type_name( &field_type.kind, field_type.named, ))); @@ -55,140 +46,146 @@ fn make_field_type( /// column on `main_table`, or as an auxiliary table. fn add_field( main_table: &mut dbscheme::Table, - parent_name: &str, - field_name: &str, - field_info: &FieldInfo, + field: &node_types::Field, entries: &mut Vec, ) { - if field_info.multiple || !field_info.required { - // This field can appear zero or multiple times, so put - // it in an auxiliary table. - let field_type = make_field_type(parent_name, field_name, field_info, entries); - let field_table = dbscheme::Table { - name: format!("{}_{}", parent_name, field_name), - columns: vec![ - // First column is a reference to the parent. - dbscheme::Column { - unique: false, - db_type: dbscheme::DbColumnType::Int, - name: dbscheme::escape_name(parent_name), - ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(parent_name)), - ql_type_is_ref: true, - }, - // Then an index column. - dbscheme::Column { - unique: false, - db_type: dbscheme::DbColumnType::Int, - name: "index".to_string(), - ql_type: dbscheme::QlColumnType::Int, - ql_type_is_ref: true, - }, - // And then the field - dbscheme::Column { - unique: true, - db_type: dbscheme::DbColumnType::Int, - name: field_type.clone(), - ql_type: dbscheme::QlColumnType::Custom(field_type), - ql_type_is_ref: true, - }, - ], - // In addition to the field being unique, the combination of - // parent+index is unique, so add a keyset for them. - keysets: Some(vec![ - dbscheme::escape_name(parent_name), - "index".to_string(), - ]), - }; - entries.push(dbscheme::Entry::Table(field_table)); - } else { - // This field must appear exactly once, so we add it as - // a column to the main table for the node type. - let field_type = make_field_type(parent_name, field_name, field_info, entries); - main_table.columns.push(dbscheme::Column { - unique: false, - db_type: dbscheme::DbColumnType::Int, - name: String::from(field_name), - ql_type: dbscheme::QlColumnType::Custom(field_type), - ql_type_is_ref: true, - }); + let field_name = match &field.name { + None => "child".to_owned(), + Some(x) => x.to_owned(), + }; + let parent_name = node_types::node_type_name(&field.parent.kind, field.parent.named); + match field.storage { + node_types::Storage::Table { .. } => { + // This field can appear zero or multiple times, so put + // it in an auxiliary table. + let field_type = make_field_type(&parent_name, &field_name, &field.types, entries); + let field_table = dbscheme::Table { + name: format!("{}_{}", parent_name, field_name), + columns: vec![ + // First column is a reference to the parent. + dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::Int, + name: node_types::escape_name(&parent_name), + ql_type: dbscheme::QlColumnType::Custom(node_types::escape_name( + &parent_name, + )), + ql_type_is_ref: true, + }, + // Then an index column. + dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::Int, + name: "index".to_string(), + ql_type: dbscheme::QlColumnType::Int, + ql_type_is_ref: true, + }, + // And then the field + dbscheme::Column { + unique: true, + db_type: dbscheme::DbColumnType::Int, + name: field_type.clone(), + ql_type: dbscheme::QlColumnType::Custom(field_type), + ql_type_is_ref: true, + }, + ], + // In addition to the field being unique, the combination of + // parent+index is unique, so add a keyset for them. + keysets: Some(vec![ + node_types::escape_name(&parent_name), + "index".to_string(), + ]), + }; + entries.push(dbscheme::Entry::Table(field_table)); + } + node_types::Storage::Column => { + // This field must appear exactly once, so we add it as + // a column to the main table for the node type. + let field_type = make_field_type(&parent_name, &field_name, &field.types, entries); + main_table.columns.push(dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::Int, + name: field_name, + ql_type: dbscheme::QlColumnType::Custom(field_type), + ql_type_is_ref: true, + }); + } } } /// Converts the given tree-sitter node types into CodeQL dbscheme entries. -fn convert_nodes(nodes: &[NodeInfo]) -> Vec { +fn convert_nodes(nodes: &Vec) -> Vec { let mut entries: Vec = Vec::new(); let mut top_members: Vec = Vec::new(); for node in nodes { - if let Some(subtypes) = &node.subtypes { - // It's a tree-sitter supertype node, for which we create a union - // type. - let mut members: Vec = Vec::new(); - for subtype in subtypes { - members.push(dbscheme::escape_name(&node_type_name( - &subtype.kind, - subtype.named, - ))) - } - entries.push(dbscheme::Entry::Union(dbscheme::Union { - name: dbscheme::escape_name(&node_type_name(&node.kind, node.named)), - members, - })); - } else { - // It's a product type, defined by a table. - let name = node_type_name(&node.kind, node.named); - let mut main_table = dbscheme::Table { - name: dbscheme::escape_name(&(format!("{}_def", name))), - columns: vec![dbscheme::Column { - db_type: dbscheme::DbColumnType::Int, - name: "id".to_string(), - unique: true, - ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(&name)), - ql_type_is_ref: false, - }], - keysets: None, - }; - top_members.push(dbscheme::escape_name(&name)); - - let mut is_leaf = true; - - // If the type also has fields or children, then we create either - // auxiliary tables or columns in the defining table for them. - if let Some(fields) = &node.fields { - for (field_name, field_info) in fields { - is_leaf = false; - add_field(&mut main_table, &name, field_name, field_info, &mut entries); + match &node { + node_types::Entry::Union { + type_name, + members: n_members, + } => { + // It's a tree-sitter supertype node, for which we create a union + // type. + let mut members: Vec = Vec::new(); + for n_member in n_members { + members.push(node_types::escape_name(&node_types::node_type_name( + &n_member.kind, + n_member.named, + ))) } + entries.push(dbscheme::Entry::Union(dbscheme::Union { + name: node_types::escape_name(&node_types::node_type_name( + &type_name.kind, + type_name.named, + )), + members, + })); } - if let Some(children) = &node.children { - is_leaf = false; + node_types::Entry::Table { type_name, fields } => { + // It's a product type, defined by a table. + let name = node_types::node_type_name(&type_name.kind, type_name.named); + let mut main_table = dbscheme::Table { + name: node_types::escape_name(&(format!("{}_def", name))), + columns: vec![dbscheme::Column { + db_type: dbscheme::DbColumnType::Int, + name: "id".to_string(), + unique: true, + ql_type: dbscheme::QlColumnType::Custom(node_types::escape_name(&name)), + ql_type_is_ref: false, + }], + keysets: None, + }; + top_members.push(node_types::escape_name(&name)); - // Treat children as if they were a field called 'child'. - add_field(&mut main_table, &name, "child", children, &mut entries); - } + // If the type also has fields or children, then we create either + // auxiliary tables or columns in the defining table for them. + for field in fields { + add_field(&mut main_table, &field, &mut entries); + } - if is_leaf { - // There were no fields and no children, so it's a leaf node in - // the TS grammar. Add a column for the node text. + if fields.is_empty() { + // There were no fields and no children, so it's a leaf node in + // the TS grammar. Add a column for the node text. + main_table.columns.push(dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::String, + name: "text".to_string(), + ql_type: dbscheme::QlColumnType::String, + ql_type_is_ref: true, + }); + } + + // Finally, the type's defining table also includes the location. main_table.columns.push(dbscheme::Column { unique: false, - db_type: dbscheme::DbColumnType::String, - name: "text".to_string(), - ql_type: dbscheme::QlColumnType::String, + db_type: dbscheme::DbColumnType::Int, + name: "loc".to_string(), + ql_type: dbscheme::QlColumnType::Custom("location".to_string()), ql_type_is_ref: true, }); + + entries.push(dbscheme::Entry::Table(main_table)); } - - // Finally, the type's defining table also includes the location. - main_table.columns.push(dbscheme::Column { - unique: false, - db_type: dbscheme::DbColumnType::Int, - name: "loc".to_string(), - ql_type: dbscheme::QlColumnType::Custom("location".to_string()), - ql_type_is_ref: true, - }); - - entries.push(dbscheme::Entry::Table(main_table)); } } @@ -202,7 +199,7 @@ fn convert_nodes(nodes: &[NodeInfo]) -> Vec { } fn write_dbscheme(language: &Language, entries: &[dbscheme::Entry]) -> std::io::Result<()> { - println!( + info!( "Writing to '{}'", match language.dbscheme_path.to_str() { None => "", @@ -280,6 +277,13 @@ fn create_source_location_prefix_entry() -> dbscheme::Entry { } fn main() { + tracing_subscriber::fmt() + .with_target(false) + .without_time() + .with_level(true) + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .init(); + // TODO: figure out proper dbscheme output path and/or take it from the // command line. let ruby = Language { @@ -287,16 +291,9 @@ fn main() { node_types_path: PathBuf::from("tree-sitter-ruby/src/node-types.json"), dbscheme_path: PathBuf::from("ruby.dbscheme"), }; - match node_types::read(&ruby.node_types_path) { + match node_types::read_node_types(&ruby.node_types_path) { Err(e) => { - println!( - "Failed to read '{}': {}", - match ruby.node_types_path.to_str() { - None => "", - Some(p) => p, - }, - e - ); + error!("Failed to read '{}': {}", ruby.node_types_path.display(), e); std::process::exit(1); } Ok(nodes) => { @@ -305,7 +302,7 @@ fn main() { dbscheme_entries.push(create_source_location_prefix_entry()); match write_dbscheme(&ruby, &dbscheme_entries) { Err(e) => { - println!("Failed to write dbscheme: {}", e); + error!("Failed to write dbscheme: {}", e); std::process::exit(2); } Ok(()) => {} diff --git a/node-types/src/lib.rs b/node-types/src/lib.rs index f8c06f29c67..b1fa7541362 100644 --- a/node-types/src/lib.rs +++ b/node-types/src/lib.rs @@ -1,8 +1,137 @@ use serde::Deserialize; use std::collections::BTreeMap; -use std::fmt; use std::path::Path; +use std::collections::BTreeSet as Set; +use std::fs; + +#[derive(Debug)] +pub enum Entry { + Union { + type_name: TypeName, + members: Set, + }, + Table { + type_name: TypeName, + fields: Vec, + }, +} + +#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)] +pub struct TypeName { + pub kind: String, + pub named: bool, +} + +#[derive(Debug)] +pub struct Field { + pub parent: TypeName, + pub types: Set, + /// The name of the field or None for the anonymous 'children' + /// entry from node_types.json + pub name: Option, + pub storage: Storage, +} + +#[derive(Debug)] +pub enum Storage { + /// the field is stored as a column in the parent table + Column, + // the field is store in a link table + Table { + index: usize, + }, +} + +pub fn read_node_types(node_types_path: &Path) -> std::io::Result> { + let file = fs::File::open(node_types_path)?; + let node_types = serde_json::from_reader(file)?; + Ok(convert_nodes(node_types)) +} + +fn convert_type(node_type: &NodeType) -> TypeName { + TypeName { + kind: node_type.kind.to_string(), + named: node_type.named, + } +} + +fn convert_types(node_types: &Vec) -> Set { + let iter = node_types.iter().map(convert_type).collect(); + std::collections::BTreeSet::from(iter) +} +pub fn convert_nodes(nodes: Vec) -> Vec { + let mut entries: Vec = Vec::new(); + + for node in nodes { + if let Some(subtypes) = &node.subtypes { + // It's a tree-sitter supertype node, for which we create a union + // type. + entries.push(Entry::Union { + type_name: TypeName { + kind: node.kind, + named: node.named, + }, + members: convert_types(&subtypes), + }); + } else { + // It's a product type, defined by a table. + let type_name = TypeName { + kind: node.kind, + named: node.named, + }; + let mut fields = Vec::new(); + + // If the type also has fields or children, then we create either + // auxiliary tables or columns in the defining table for them. + if let Some(node_fields) = &node.fields { + for (field_name, field_info) in node_fields { + add_field( + &type_name, + Some(field_name.to_string()), + field_info, + &mut fields, + ); + } + } + if let Some(children) = &node.children { + // Treat children as if they were a field called 'child'. + add_field(&type_name, None, children, &mut fields); + } + entries.push(Entry::Table { type_name, fields }); + } + } + entries +} + +fn add_field( + parent_type_name: &TypeName, + field_name: Option, + field_info: &FieldInfo, + fields: &mut Vec, +) { + let storage; + if !field_info.multiple && field_info.required { + // This field must appear exactly once, so we add it as + // a column to the main table for the node type. + storage = Storage::Column; + } else { + // This field can appear zero or multiple times, so put + // it in an auxiliary table. + storage = Storage::Table { + index: fields.len(), + }; + } + fields.push(Field { + parent: TypeName { + kind: parent_type_name.kind.to_string(), + named: parent_type_name.named, + }, + types: convert_types(&field_info.types), + name: field_name, + storage, + }); +} #[derive(Deserialize)] pub struct NodeInfo { #[serde(rename = "type")] @@ -30,45 +159,73 @@ pub struct FieldInfo { pub types: Vec, } -impl Default for FieldInfo { - fn default() -> Self { - FieldInfo { - multiple: false, - required: true, - types: Vec::new(), +/// Given a tree-sitter node type's (kind, named) pair, returns a single string +/// representing the (unescaped) name we'll use to refer to corresponding QL +/// type. +pub fn node_type_name(kind: &str, named: bool) -> String { + if named { + kind.to_string() + } else { + format!("{}_unnamed", kind) + } +} + +const RESERVED_KEYWORDS: [&'static str; 14] = [ + "boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype", + "type", "unique", "varchar", +]; + +/// Returns a string that's a copy of `name` but suitably escaped to be a valid +/// QL identifier. +pub fn escape_name(name: &str) -> String { + let mut result = String::new(); + + // If there's a leading underscore, replace it with 'underscore_'. + if let Some(c) = name.chars().next() { + if c == '_' { + result.push_str("underscore"); } } -} - -pub enum Error { - IOError(std::io::Error), - JsonError(serde_json::error::Error), -} - -impl From for Error { - fn from(error: std::io::Error) -> Self { - Error::IOError(error) - } -} - -impl From for Error { - fn from(error: serde_json::Error) -> Self { - Error::JsonError(error) - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Error::IOError(e) => write!(f, "{}", e), - Error::JsonError(e) => write!(f, "{}", e), + for c in name.chars() { + match c { + '{' => result.push_str("lbrace"), + '}' => result.push_str("rbrace"), + '<' => result.push_str("langle"), + '>' => result.push_str("rangle"), + '[' => result.push_str("lbracket"), + ']' => result.push_str("rbracket"), + '(' => result.push_str("lparen"), + ')' => result.push_str("rparen"), + '|' => result.push_str("pipe"), + '=' => result.push_str("equal"), + '~' => result.push_str("tilde"), + '?' => result.push_str("question"), + '`' => result.push_str("backtick"), + '^' => result.push_str("caret"), + '!' => result.push_str("bang"), + '#' => result.push_str("hash"), + '%' => result.push_str("percent"), + '&' => result.push_str("ampersand"), + '.' => result.push_str("dot"), + ',' => result.push_str("comma"), + '/' => result.push_str("slash"), + ':' => result.push_str("colon"), + ';' => result.push_str("semicolon"), + '"' => result.push_str("dquote"), + '*' => result.push_str("star"), + '+' => result.push_str("plus"), + '-' => result.push_str("minus"), + '@' => result.push_str("at"), + _ => result.push_str(&c.to_lowercase().to_string()), } } -} -/// Deserializes the node types from the JSON at the given `path`. -pub fn read(path: &Path) -> Result, Error> { - let json_data = std::fs::read_to_string(path)?; - let node_types: Vec = serde_json::from_str(&json_data)?; - Ok(node_types) + for &keyword in &RESERVED_KEYWORDS { + if result == keyword { + result.push_str("__"); + break; + } + } + + result }