Merge pull request #7 from github/aibaars/refactor

Refactor dbscheme generator to use intermediate representation
This commit is contained in:
Arthur Baars
2020-10-27 14:12:05 +01:00
committed by GitHub
9 changed files with 348 additions and 499 deletions

5
Cargo.lock generated
View File

@@ -101,6 +101,8 @@ name = "generator"
version = "0.1.0"
dependencies = [
"node-types",
"tracing",
"tracing-subscriber",
]
[[package]]
@@ -265,8 +267,7 @@ version = "0.1.0"
dependencies = [
"cc",
"clap",
"serde",
"serde_json",
"node-types",
"tracing",
"tracing-subscriber",
"tree-sitter",

View File

@@ -7,9 +7,8 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
node-types = { path = "../node-types" }
tree-sitter = "0.17.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
clap = "2.33"
tracing = "0.1"
tracing-subscriber = { version = "0.2", features = ["env-filter"] }

View File

@@ -1,5 +1,4 @@
use super::nodes_types::{Entry, Field, Storage, TypeName};
use node_types::{escape_name, node_type_name, Entry, Field, Storage, TypeName};
use std::collections::BTreeMap as Map;
use std::collections::BTreeSet as Set;
use std::fmt;
@@ -23,7 +22,7 @@ impl Extractor {
let span = span!(
Level::TRACE,
"extract",
file = &path.display().to_string()[..]
file = %path.display()
);
let _enter = span.enter();
@@ -99,11 +98,7 @@ struct Visitor<'a> {
impl Visitor<'_> {
fn enter_node(&mut self, node: Node) -> bool {
if node.is_error() {
error!(
"{}:{}: parse error",
&self.path,
node.start_position().row,
);
error!("{}:{}: parse error", &self.path, node.start_position().row);
return false;
}
if node.is_missing() {
@@ -237,10 +232,10 @@ impl Visitor<'_> {
)
}
}
Storage::Table { parent, index } => {
Storage::Table { index } => {
for child_id in child_ids {
self.trap_output.push(TrapEntry::ChildOf(
node_type_name(&parent.kind, parent.named),
node_type_name(&field.parent.kind, field.parent.named),
parent_id,
match &field.name {
Some(name) => name.to_owned(),
@@ -427,74 +422,3 @@ impl fmt::Display for Arg {
}
}
}
const RESERVED_KEYWORDS: [&'static str; 14] = [
"boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype",
"type", "unique", "varchar",
];
/// Returns a string that's a copy of `name` but suitably escaped to be a valid
/// QL identifier.
pub fn escape_name(name: &str) -> String {
let mut result = String::new();
// If there's a leading underscore, replace it with 'underscore_'.
if let Some(c) = name.chars().next() {
if c == '_' {
result.push_str("underscore");
}
}
for c in name.chars() {
match c {
'{' => result.push_str("lbrace"),
'}' => result.push_str("rbrace"),
'<' => result.push_str("langle"),
'>' => result.push_str("rangle"),
'[' => result.push_str("lbracket"),
']' => result.push_str("rbracket"),
'(' => result.push_str("lparen"),
')' => result.push_str("rparen"),
'|' => result.push_str("pipe"),
'=' => result.push_str("equal"),
'~' => result.push_str("tilde"),
'?' => result.push_str("question"),
'`' => result.push_str("backtick"),
'^' => result.push_str("caret"),
'!' => result.push_str("bang"),
'#' => result.push_str("hash"),
'%' => result.push_str("percent"),
'&' => result.push_str("ampersand"),
'.' => result.push_str("dot"),
',' => result.push_str("comma"),
'/' => result.push_str("slash"),
':' => result.push_str("colon"),
';' => result.push_str("semicolon"),
'"' => result.push_str("dquote"),
'*' => result.push_str("star"),
'+' => result.push_str("plus"),
'-' => result.push_str("minus"),
'@' => result.push_str("at"),
_ => result.push_str(&c.to_lowercase().to_string()),
}
}
for &keyword in &RESERVED_KEYWORDS {
if result == keyword {
result.push_str("__");
break;
}
}
result
}
/// Given a tree-sitter node type's (kind, named) pair, returns a single string
/// representing the (unescaped) name we'll use to refer to corresponding QL
/// type.
fn node_type_name(kind: &str, named: bool) -> String {
if named {
kind.to_string()
} else {
format!("{}_unnamed", kind)
}
}

View File

@@ -1,5 +1,4 @@
mod extractor;
mod nodes_types;
use clap;
use std::fs;
@@ -44,7 +43,7 @@ fn main() -> std::io::Result<()> {
let node_types_path = PathBuf::from("tree-sitter-ruby/src/node-types.json");
let language = unsafe { tree_sitter_ruby() };
let schema = nodes_types::read_node_types(&node_types_path)?;
let schema = node_types::read_node_types(&node_types_path)?;
let mut extractor = extractor::create(language, schema);
for line in std::io::BufReader::new(file_list).lines() {
let path = PathBuf::from(line?);

View File

@@ -1,170 +0,0 @@
use serde::Deserialize;
use std::collections::BTreeMap as Map;
use std::collections::BTreeSet as Set;
use std::fs;
use std::path::Path;
#[derive(Debug)]
pub enum Entry {
Union {
type_name: TypeName,
members: Set<TypeName>,
},
Table {
type_name: TypeName,
fields: Vec<Field>,
},
}
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)]
pub struct TypeName {
pub kind: String,
pub named: bool,
}
#[derive(Debug)]
pub struct Field {
pub types: Set<TypeName>,
/// The name of the field or None for the anonymous 'children'
/// entry from node_types.json
pub name: Option<String>,
pub storage: Storage,
}
#[derive(Debug)]
pub enum Storage {
/// the field is stored as a column in the parent table
Column,
// the field is store in a link table
Table {
parent: TypeName,
index: usize,
},
}
pub fn read_node_types(node_types_path: &Path) -> std::io::Result<Vec<Entry>> {
let file = fs::File::open(node_types_path)?;
let node_types = serde_json::from_reader(file)?;
Ok(convert_nodes(node_types))
}
fn convert_type(node_type: &NodeType) -> TypeName {
TypeName {
kind: node_type.kind.to_string(),
named: node_type.named,
}
}
fn convert_types(node_types: &Vec<NodeType>) -> Set<TypeName> {
let iter = node_types.iter().map(convert_type).collect();
std::collections::BTreeSet::from(iter)
}
pub fn convert_nodes(nodes: Vec<NodeInfo>) -> Vec<Entry> {
let mut entries: Vec<Entry> = Vec::new();
for node in nodes {
if let Some(subtypes) = &node.subtypes {
// It's a tree-sitter supertype node, for which we create a union
// type.
entries.push(Entry::Union {
type_name: TypeName {
kind: node.kind,
named: node.named,
},
members: convert_types(&subtypes),
});
} else {
// It's a product type, defined by a table.
let type_name = TypeName {
kind: node.kind,
named: node.named,
};
let mut fields = Vec::new();
// If the type also has fields or children, then we create either
// auxiliary tables or columns in the defining table for them.
if let Some(node_fields) = &node.fields {
for (field_name, field_info) in node_fields {
add_field(
&type_name,
Some(field_name.to_string()),
field_info,
&mut fields,
);
}
}
if let Some(children) = &node.children {
// Treat children as if they were a field called 'child'.
add_field(&type_name, None, children, &mut fields);
}
entries.push(Entry::Table { type_name, fields });
}
}
entries
}
fn add_field(
parent_type_name: &TypeName,
field_name: Option<String>,
field_info: &FieldInfo,
fields: &mut Vec<Field>,
) {
let storage;
if !field_info.multiple && field_info.required {
// This field must appear exactly once, so we add it as
// a column to the main table for the node type.
storage = Storage::Column;
} else {
// This field can appear zero or multiple times, so put
// it in an auxiliary table.
storage = Storage::Table {
parent: TypeName {
kind: parent_type_name.kind.to_string(),
named: parent_type_name.named,
},
index: fields.len(),
};
}
fields.push(Field {
types: convert_types(&field_info.types),
name: field_name,
storage,
});
}
#[derive(Deserialize)]
pub struct NodeInfo {
#[serde(rename = "type")]
pub kind: String,
pub named: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub fields: Option<Map<String, FieldInfo>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub children: Option<FieldInfo>,
#[serde(skip_serializing_if = "Option::is_none")]
pub subtypes: Option<Vec<NodeType>>,
}
#[derive(Deserialize)]
pub struct NodeType {
#[serde(rename = "type")]
pub kind: String,
pub named: bool,
}
#[derive(Deserialize)]
pub struct FieldInfo {
pub multiple: bool,
pub required: bool,
pub types: Vec<NodeType>,
}
impl Default for FieldInfo {
fn default() -> Self {
FieldInfo {
multiple: false,
required: true,
types: Vec::new(),
}
}
}

View File

@@ -8,3 +8,5 @@ edition = "2018"
[dependencies]
node-types = { path = "../node-types" }
tracing = "0.1"
tracing-subscriber = { version = "0.2", features = ["env-filter"] }

View File

@@ -49,66 +49,6 @@ pub enum QlColumnType {
Custom(String),
}
const RESERVED_KEYWORDS: [&'static str; 14] = [
"boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype",
"type", "unique", "varchar",
];
/// Returns a string that's a copy of `name` but suitably escaped to be a valid
/// QL identifier.
pub fn escape_name(name: &str) -> String {
let mut result = String::new();
// If there's a leading underscore, replace it with 'underscore_'.
if let Some(c) = name.chars().next() {
if c == '_' {
result.push_str("underscore");
}
}
for c in name.chars() {
match c {
'{' => result.push_str("lbrace"),
'}' => result.push_str("rbrace"),
'<' => result.push_str("langle"),
'>' => result.push_str("rangle"),
'[' => result.push_str("lbracket"),
']' => result.push_str("rbracket"),
'(' => result.push_str("lparen"),
')' => result.push_str("rparen"),
'|' => result.push_str("pipe"),
'=' => result.push_str("equal"),
'~' => result.push_str("tilde"),
'?' => result.push_str("question"),
'`' => result.push_str("backtick"),
'^' => result.push_str("caret"),
'!' => result.push_str("bang"),
'#' => result.push_str("hash"),
'%' => result.push_str("percent"),
'&' => result.push_str("ampersand"),
'.' => result.push_str("dot"),
',' => result.push_str("comma"),
'/' => result.push_str("slash"),
':' => result.push_str("colon"),
';' => result.push_str("semicolon"),
'"' => result.push_str("dquote"),
'*' => result.push_str("star"),
'+' => result.push_str("plus"),
'-' => result.push_str("minus"),
'@' => result.push_str("at"),
_ => result.push_str(&c.to_lowercase().to_string()),
}
}
for &keyword in &RESERVED_KEYWORDS {
if result == keyword {
result.push_str("__");
break;
}
}
result
}
impl fmt::Display for Table {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(keyset) = &self.keysets {

View File

@@ -2,21 +2,12 @@ mod dbscheme;
mod language;
use language::Language;
use node_types::{FieldInfo, NodeInfo};
use node_types;
use std::collections::BTreeSet as Set;
use std::fs::File;
use std::io::LineWriter;
use std::path::PathBuf;
/// Given a tree-sitter node type's (kind, named) pair, returns a single string
/// representing the (unescaped) name we'll use to refer to corresponding QL
/// type.
fn node_type_name(kind: &str, named: bool) -> String {
if named {
kind.to_string()
} else {
format!("{}_unnamed", kind)
}
}
use tracing::{error, info};
/// Given the name of the parent node, and its field information, returns the
/// name of the field's type. This may be an ad-hoc union of all the possible
@@ -24,21 +15,21 @@ fn node_type_name(kind: &str, named: bool) -> String {
fn make_field_type(
parent_name: &str,
field_name: &str,
field_info: &FieldInfo,
types: &Set<node_types::TypeName>,
entries: &mut Vec<dbscheme::Entry>,
) -> String {
if field_info.types.len() == 1 {
if types.len() == 1 {
// This field can only have a single type.
let t = &field_info.types[0];
dbscheme::escape_name(&node_type_name(&t.kind, t.named))
let t = types.iter().next().unwrap();
node_types::escape_name(&node_types::node_type_name(&t.kind, t.named))
} else {
// This field can have one of several types. Create an ad-hoc QL union
// type to represent them.
let field_union_name = format!("{}_{}_type", parent_name, field_name);
let field_union_name = dbscheme::escape_name(&field_union_name);
let field_union_name = node_types::escape_name(&field_union_name);
let mut members: Vec<String> = Vec::new();
for field_type in &field_info.types {
members.push(dbscheme::escape_name(&node_type_name(
for field_type in types {
members.push(node_types::escape_name(&node_types::node_type_name(
&field_type.kind,
field_type.named,
)));
@@ -55,140 +46,146 @@ fn make_field_type(
/// column on `main_table`, or as an auxiliary table.
fn add_field(
main_table: &mut dbscheme::Table,
parent_name: &str,
field_name: &str,
field_info: &FieldInfo,
field: &node_types::Field,
entries: &mut Vec<dbscheme::Entry>,
) {
if field_info.multiple || !field_info.required {
// This field can appear zero or multiple times, so put
// it in an auxiliary table.
let field_type = make_field_type(parent_name, field_name, field_info, entries);
let field_table = dbscheme::Table {
name: format!("{}_{}", parent_name, field_name),
columns: vec![
// First column is a reference to the parent.
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: dbscheme::escape_name(parent_name),
ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(parent_name)),
ql_type_is_ref: true,
},
// Then an index column.
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "index".to_string(),
ql_type: dbscheme::QlColumnType::Int,
ql_type_is_ref: true,
},
// And then the field
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: field_type.clone(),
ql_type: dbscheme::QlColumnType::Custom(field_type),
ql_type_is_ref: true,
},
],
// In addition to the field being unique, the combination of
// parent+index is unique, so add a keyset for them.
keysets: Some(vec![
dbscheme::escape_name(parent_name),
"index".to_string(),
]),
};
entries.push(dbscheme::Entry::Table(field_table));
} else {
// This field must appear exactly once, so we add it as
// a column to the main table for the node type.
let field_type = make_field_type(parent_name, field_name, field_info, entries);
main_table.columns.push(dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: String::from(field_name),
ql_type: dbscheme::QlColumnType::Custom(field_type),
ql_type_is_ref: true,
});
let field_name = match &field.name {
None => "child".to_owned(),
Some(x) => x.to_owned(),
};
let parent_name = node_types::node_type_name(&field.parent.kind, field.parent.named);
match field.storage {
node_types::Storage::Table { .. } => {
// This field can appear zero or multiple times, so put
// it in an auxiliary table.
let field_type = make_field_type(&parent_name, &field_name, &field.types, entries);
let field_table = dbscheme::Table {
name: format!("{}_{}", parent_name, field_name),
columns: vec![
// First column is a reference to the parent.
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: node_types::escape_name(&parent_name),
ql_type: dbscheme::QlColumnType::Custom(node_types::escape_name(
&parent_name,
)),
ql_type_is_ref: true,
},
// Then an index column.
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "index".to_string(),
ql_type: dbscheme::QlColumnType::Int,
ql_type_is_ref: true,
},
// And then the field
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: field_type.clone(),
ql_type: dbscheme::QlColumnType::Custom(field_type),
ql_type_is_ref: true,
},
],
// In addition to the field being unique, the combination of
// parent+index is unique, so add a keyset for them.
keysets: Some(vec![
node_types::escape_name(&parent_name),
"index".to_string(),
]),
};
entries.push(dbscheme::Entry::Table(field_table));
}
node_types::Storage::Column => {
// This field must appear exactly once, so we add it as
// a column to the main table for the node type.
let field_type = make_field_type(&parent_name, &field_name, &field.types, entries);
main_table.columns.push(dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: field_name,
ql_type: dbscheme::QlColumnType::Custom(field_type),
ql_type_is_ref: true,
});
}
}
}
/// Converts the given tree-sitter node types into CodeQL dbscheme entries.
fn convert_nodes(nodes: &[NodeInfo]) -> Vec<dbscheme::Entry> {
fn convert_nodes(nodes: &Vec<node_types::Entry>) -> Vec<dbscheme::Entry> {
let mut entries: Vec<dbscheme::Entry> = Vec::new();
let mut top_members: Vec<String> = Vec::new();
for node in nodes {
if let Some(subtypes) = &node.subtypes {
// It's a tree-sitter supertype node, for which we create a union
// type.
let mut members: Vec<String> = Vec::new();
for subtype in subtypes {
members.push(dbscheme::escape_name(&node_type_name(
&subtype.kind,
subtype.named,
)))
}
entries.push(dbscheme::Entry::Union(dbscheme::Union {
name: dbscheme::escape_name(&node_type_name(&node.kind, node.named)),
members,
}));
} else {
// It's a product type, defined by a table.
let name = node_type_name(&node.kind, node.named);
let mut main_table = dbscheme::Table {
name: dbscheme::escape_name(&(format!("{}_def", name))),
columns: vec![dbscheme::Column {
db_type: dbscheme::DbColumnType::Int,
name: "id".to_string(),
unique: true,
ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(&name)),
ql_type_is_ref: false,
}],
keysets: None,
};
top_members.push(dbscheme::escape_name(&name));
let mut is_leaf = true;
// If the type also has fields or children, then we create either
// auxiliary tables or columns in the defining table for them.
if let Some(fields) = &node.fields {
for (field_name, field_info) in fields {
is_leaf = false;
add_field(&mut main_table, &name, field_name, field_info, &mut entries);
match &node {
node_types::Entry::Union {
type_name,
members: n_members,
} => {
// It's a tree-sitter supertype node, for which we create a union
// type.
let mut members: Vec<String> = Vec::new();
for n_member in n_members {
members.push(node_types::escape_name(&node_types::node_type_name(
&n_member.kind,
n_member.named,
)))
}
entries.push(dbscheme::Entry::Union(dbscheme::Union {
name: node_types::escape_name(&node_types::node_type_name(
&type_name.kind,
type_name.named,
)),
members,
}));
}
if let Some(children) = &node.children {
is_leaf = false;
node_types::Entry::Table { type_name, fields } => {
// It's a product type, defined by a table.
let name = node_types::node_type_name(&type_name.kind, type_name.named);
let mut main_table = dbscheme::Table {
name: node_types::escape_name(&(format!("{}_def", name))),
columns: vec![dbscheme::Column {
db_type: dbscheme::DbColumnType::Int,
name: "id".to_string(),
unique: true,
ql_type: dbscheme::QlColumnType::Custom(node_types::escape_name(&name)),
ql_type_is_ref: false,
}],
keysets: None,
};
top_members.push(node_types::escape_name(&name));
// Treat children as if they were a field called 'child'.
add_field(&mut main_table, &name, "child", children, &mut entries);
}
// If the type also has fields or children, then we create either
// auxiliary tables or columns in the defining table for them.
for field in fields {
add_field(&mut main_table, &field, &mut entries);
}
if is_leaf {
// There were no fields and no children, so it's a leaf node in
// the TS grammar. Add a column for the node text.
if fields.is_empty() {
// There were no fields and no children, so it's a leaf node in
// the TS grammar. Add a column for the node text.
main_table.columns.push(dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::String,
name: "text".to_string(),
ql_type: dbscheme::QlColumnType::String,
ql_type_is_ref: true,
});
}
// Finally, the type's defining table also includes the location.
main_table.columns.push(dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::String,
name: "text".to_string(),
ql_type: dbscheme::QlColumnType::String,
db_type: dbscheme::DbColumnType::Int,
name: "loc".to_string(),
ql_type: dbscheme::QlColumnType::Custom("location".to_string()),
ql_type_is_ref: true,
});
entries.push(dbscheme::Entry::Table(main_table));
}
// Finally, the type's defining table also includes the location.
main_table.columns.push(dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "loc".to_string(),
ql_type: dbscheme::QlColumnType::Custom("location".to_string()),
ql_type_is_ref: true,
});
entries.push(dbscheme::Entry::Table(main_table));
}
}
@@ -202,7 +199,7 @@ fn convert_nodes(nodes: &[NodeInfo]) -> Vec<dbscheme::Entry> {
}
fn write_dbscheme(language: &Language, entries: &[dbscheme::Entry]) -> std::io::Result<()> {
println!(
info!(
"Writing to '{}'",
match language.dbscheme_path.to_str() {
None => "<undisplayable>",
@@ -280,6 +277,13 @@ fn create_source_location_prefix_entry() -> dbscheme::Entry {
}
fn main() {
tracing_subscriber::fmt()
.with_target(false)
.without_time()
.with_level(true)
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
// TODO: figure out proper dbscheme output path and/or take it from the
// command line.
let ruby = Language {
@@ -287,16 +291,9 @@ fn main() {
node_types_path: PathBuf::from("tree-sitter-ruby/src/node-types.json"),
dbscheme_path: PathBuf::from("ruby.dbscheme"),
};
match node_types::read(&ruby.node_types_path) {
match node_types::read_node_types(&ruby.node_types_path) {
Err(e) => {
println!(
"Failed to read '{}': {}",
match ruby.node_types_path.to_str() {
None => "<undisplayable>",
Some(p) => p,
},
e
);
error!("Failed to read '{}': {}", ruby.node_types_path.display(), e);
std::process::exit(1);
}
Ok(nodes) => {
@@ -305,7 +302,7 @@ fn main() {
dbscheme_entries.push(create_source_location_prefix_entry());
match write_dbscheme(&ruby, &dbscheme_entries) {
Err(e) => {
println!("Failed to write dbscheme: {}", e);
error!("Failed to write dbscheme: {}", e);
std::process::exit(2);
}
Ok(()) => {}

View File

@@ -1,8 +1,137 @@
use serde::Deserialize;
use std::collections::BTreeMap;
use std::fmt;
use std::path::Path;
use std::collections::BTreeSet as Set;
use std::fs;
#[derive(Debug)]
pub enum Entry {
Union {
type_name: TypeName,
members: Set<TypeName>,
},
Table {
type_name: TypeName,
fields: Vec<Field>,
},
}
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)]
pub struct TypeName {
pub kind: String,
pub named: bool,
}
#[derive(Debug)]
pub struct Field {
pub parent: TypeName,
pub types: Set<TypeName>,
/// The name of the field or None for the anonymous 'children'
/// entry from node_types.json
pub name: Option<String>,
pub storage: Storage,
}
#[derive(Debug)]
pub enum Storage {
/// the field is stored as a column in the parent table
Column,
// the field is store in a link table
Table {
index: usize,
},
}
pub fn read_node_types(node_types_path: &Path) -> std::io::Result<Vec<Entry>> {
let file = fs::File::open(node_types_path)?;
let node_types = serde_json::from_reader(file)?;
Ok(convert_nodes(node_types))
}
fn convert_type(node_type: &NodeType) -> TypeName {
TypeName {
kind: node_type.kind.to_string(),
named: node_type.named,
}
}
fn convert_types(node_types: &Vec<NodeType>) -> Set<TypeName> {
let iter = node_types.iter().map(convert_type).collect();
std::collections::BTreeSet::from(iter)
}
pub fn convert_nodes(nodes: Vec<NodeInfo>) -> Vec<Entry> {
let mut entries: Vec<Entry> = Vec::new();
for node in nodes {
if let Some(subtypes) = &node.subtypes {
// It's a tree-sitter supertype node, for which we create a union
// type.
entries.push(Entry::Union {
type_name: TypeName {
kind: node.kind,
named: node.named,
},
members: convert_types(&subtypes),
});
} else {
// It's a product type, defined by a table.
let type_name = TypeName {
kind: node.kind,
named: node.named,
};
let mut fields = Vec::new();
// If the type also has fields or children, then we create either
// auxiliary tables or columns in the defining table for them.
if let Some(node_fields) = &node.fields {
for (field_name, field_info) in node_fields {
add_field(
&type_name,
Some(field_name.to_string()),
field_info,
&mut fields,
);
}
}
if let Some(children) = &node.children {
// Treat children as if they were a field called 'child'.
add_field(&type_name, None, children, &mut fields);
}
entries.push(Entry::Table { type_name, fields });
}
}
entries
}
fn add_field(
parent_type_name: &TypeName,
field_name: Option<String>,
field_info: &FieldInfo,
fields: &mut Vec<Field>,
) {
let storage;
if !field_info.multiple && field_info.required {
// This field must appear exactly once, so we add it as
// a column to the main table for the node type.
storage = Storage::Column;
} else {
// This field can appear zero or multiple times, so put
// it in an auxiliary table.
storage = Storage::Table {
index: fields.len(),
};
}
fields.push(Field {
parent: TypeName {
kind: parent_type_name.kind.to_string(),
named: parent_type_name.named,
},
types: convert_types(&field_info.types),
name: field_name,
storage,
});
}
#[derive(Deserialize)]
pub struct NodeInfo {
#[serde(rename = "type")]
@@ -30,45 +159,73 @@ pub struct FieldInfo {
pub types: Vec<NodeType>,
}
impl Default for FieldInfo {
fn default() -> Self {
FieldInfo {
multiple: false,
required: true,
types: Vec::new(),
/// Given a tree-sitter node type's (kind, named) pair, returns a single string
/// representing the (unescaped) name we'll use to refer to corresponding QL
/// type.
pub fn node_type_name(kind: &str, named: bool) -> String {
if named {
kind.to_string()
} else {
format!("{}_unnamed", kind)
}
}
const RESERVED_KEYWORDS: [&'static str; 14] = [
"boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype",
"type", "unique", "varchar",
];
/// Returns a string that's a copy of `name` but suitably escaped to be a valid
/// QL identifier.
pub fn escape_name(name: &str) -> String {
let mut result = String::new();
// If there's a leading underscore, replace it with 'underscore_'.
if let Some(c) = name.chars().next() {
if c == '_' {
result.push_str("underscore");
}
}
}
pub enum Error {
IOError(std::io::Error),
JsonError(serde_json::error::Error),
}
impl From<std::io::Error> for Error {
fn from(error: std::io::Error) -> Self {
Error::IOError(error)
}
}
impl From<serde_json::Error> for Error {
fn from(error: serde_json::Error) -> Self {
Error::JsonError(error)
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Error::IOError(e) => write!(f, "{}", e),
Error::JsonError(e) => write!(f, "{}", e),
for c in name.chars() {
match c {
'{' => result.push_str("lbrace"),
'}' => result.push_str("rbrace"),
'<' => result.push_str("langle"),
'>' => result.push_str("rangle"),
'[' => result.push_str("lbracket"),
']' => result.push_str("rbracket"),
'(' => result.push_str("lparen"),
')' => result.push_str("rparen"),
'|' => result.push_str("pipe"),
'=' => result.push_str("equal"),
'~' => result.push_str("tilde"),
'?' => result.push_str("question"),
'`' => result.push_str("backtick"),
'^' => result.push_str("caret"),
'!' => result.push_str("bang"),
'#' => result.push_str("hash"),
'%' => result.push_str("percent"),
'&' => result.push_str("ampersand"),
'.' => result.push_str("dot"),
',' => result.push_str("comma"),
'/' => result.push_str("slash"),
':' => result.push_str("colon"),
';' => result.push_str("semicolon"),
'"' => result.push_str("dquote"),
'*' => result.push_str("star"),
'+' => result.push_str("plus"),
'-' => result.push_str("minus"),
'@' => result.push_str("at"),
_ => result.push_str(&c.to_lowercase().to_string()),
}
}
}
/// Deserializes the node types from the JSON at the given `path`.
pub fn read(path: &Path) -> Result<Vec<NodeInfo>, Error> {
let json_data = std::fs::read_to_string(path)?;
let node_types: Vec<NodeInfo> = serde_json::from_str(&json_data)?;
Ok(node_types)
for &keyword in &RESERVED_KEYWORDS {
if result == keyword {
result.push_str("__");
break;
}
}
result
}