mirror of
https://github.com/github/codeql.git
synced 2026-02-19 16:33:40 +01:00
Merge pull request #7 from github/aibaars/refactor
Refactor dbscheme generator to use intermediate representation
This commit is contained in:
5
Cargo.lock
generated
5
Cargo.lock
generated
@@ -101,6 +101,8 @@ name = "generator"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"node-types",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -265,8 +267,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"clap",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"node-types",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"tree-sitter",
|
||||
|
||||
@@ -7,9 +7,8 @@ edition = "2018"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
node-types = { path = "../node-types" }
|
||||
tree-sitter = "0.17.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
clap = "2.33"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.2", features = ["env-filter"] }
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use super::nodes_types::{Entry, Field, Storage, TypeName};
|
||||
|
||||
use node_types::{escape_name, node_type_name, Entry, Field, Storage, TypeName};
|
||||
use std::collections::BTreeMap as Map;
|
||||
use std::collections::BTreeSet as Set;
|
||||
use std::fmt;
|
||||
@@ -23,7 +22,7 @@ impl Extractor {
|
||||
let span = span!(
|
||||
Level::TRACE,
|
||||
"extract",
|
||||
file = &path.display().to_string()[..]
|
||||
file = %path.display()
|
||||
);
|
||||
|
||||
let _enter = span.enter();
|
||||
@@ -99,11 +98,7 @@ struct Visitor<'a> {
|
||||
impl Visitor<'_> {
|
||||
fn enter_node(&mut self, node: Node) -> bool {
|
||||
if node.is_error() {
|
||||
error!(
|
||||
"{}:{}: parse error",
|
||||
&self.path,
|
||||
node.start_position().row,
|
||||
);
|
||||
error!("{}:{}: parse error", &self.path, node.start_position().row);
|
||||
return false;
|
||||
}
|
||||
if node.is_missing() {
|
||||
@@ -237,10 +232,10 @@ impl Visitor<'_> {
|
||||
)
|
||||
}
|
||||
}
|
||||
Storage::Table { parent, index } => {
|
||||
Storage::Table { index } => {
|
||||
for child_id in child_ids {
|
||||
self.trap_output.push(TrapEntry::ChildOf(
|
||||
node_type_name(&parent.kind, parent.named),
|
||||
node_type_name(&field.parent.kind, field.parent.named),
|
||||
parent_id,
|
||||
match &field.name {
|
||||
Some(name) => name.to_owned(),
|
||||
@@ -427,74 +422,3 @@ impl fmt::Display for Arg {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const RESERVED_KEYWORDS: [&'static str; 14] = [
|
||||
"boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype",
|
||||
"type", "unique", "varchar",
|
||||
];
|
||||
|
||||
/// Returns a string that's a copy of `name` but suitably escaped to be a valid
|
||||
/// QL identifier.
|
||||
pub fn escape_name(name: &str) -> String {
|
||||
let mut result = String::new();
|
||||
|
||||
// If there's a leading underscore, replace it with 'underscore_'.
|
||||
if let Some(c) = name.chars().next() {
|
||||
if c == '_' {
|
||||
result.push_str("underscore");
|
||||
}
|
||||
}
|
||||
for c in name.chars() {
|
||||
match c {
|
||||
'{' => result.push_str("lbrace"),
|
||||
'}' => result.push_str("rbrace"),
|
||||
'<' => result.push_str("langle"),
|
||||
'>' => result.push_str("rangle"),
|
||||
'[' => result.push_str("lbracket"),
|
||||
']' => result.push_str("rbracket"),
|
||||
'(' => result.push_str("lparen"),
|
||||
')' => result.push_str("rparen"),
|
||||
'|' => result.push_str("pipe"),
|
||||
'=' => result.push_str("equal"),
|
||||
'~' => result.push_str("tilde"),
|
||||
'?' => result.push_str("question"),
|
||||
'`' => result.push_str("backtick"),
|
||||
'^' => result.push_str("caret"),
|
||||
'!' => result.push_str("bang"),
|
||||
'#' => result.push_str("hash"),
|
||||
'%' => result.push_str("percent"),
|
||||
'&' => result.push_str("ampersand"),
|
||||
'.' => result.push_str("dot"),
|
||||
',' => result.push_str("comma"),
|
||||
'/' => result.push_str("slash"),
|
||||
':' => result.push_str("colon"),
|
||||
';' => result.push_str("semicolon"),
|
||||
'"' => result.push_str("dquote"),
|
||||
'*' => result.push_str("star"),
|
||||
'+' => result.push_str("plus"),
|
||||
'-' => result.push_str("minus"),
|
||||
'@' => result.push_str("at"),
|
||||
_ => result.push_str(&c.to_lowercase().to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
for &keyword in &RESERVED_KEYWORDS {
|
||||
if result == keyword {
|
||||
result.push_str("__");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Given a tree-sitter node type's (kind, named) pair, returns a single string
|
||||
/// representing the (unescaped) name we'll use to refer to corresponding QL
|
||||
/// type.
|
||||
fn node_type_name(kind: &str, named: bool) -> String {
|
||||
if named {
|
||||
kind.to_string()
|
||||
} else {
|
||||
format!("{}_unnamed", kind)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
mod extractor;
|
||||
mod nodes_types;
|
||||
|
||||
use clap;
|
||||
use std::fs;
|
||||
@@ -44,7 +43,7 @@ fn main() -> std::io::Result<()> {
|
||||
|
||||
let node_types_path = PathBuf::from("tree-sitter-ruby/src/node-types.json");
|
||||
let language = unsafe { tree_sitter_ruby() };
|
||||
let schema = nodes_types::read_node_types(&node_types_path)?;
|
||||
let schema = node_types::read_node_types(&node_types_path)?;
|
||||
let mut extractor = extractor::create(language, schema);
|
||||
for line in std::io::BufReader::new(file_list).lines() {
|
||||
let path = PathBuf::from(line?);
|
||||
|
||||
@@ -1,170 +0,0 @@
|
||||
use serde::Deserialize;
|
||||
|
||||
use std::collections::BTreeMap as Map;
|
||||
use std::collections::BTreeSet as Set;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Entry {
|
||||
Union {
|
||||
type_name: TypeName,
|
||||
members: Set<TypeName>,
|
||||
},
|
||||
Table {
|
||||
type_name: TypeName,
|
||||
fields: Vec<Field>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)]
|
||||
pub struct TypeName {
|
||||
pub kind: String,
|
||||
pub named: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Field {
|
||||
pub types: Set<TypeName>,
|
||||
/// The name of the field or None for the anonymous 'children'
|
||||
/// entry from node_types.json
|
||||
pub name: Option<String>,
|
||||
pub storage: Storage,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Storage {
|
||||
/// the field is stored as a column in the parent table
|
||||
Column,
|
||||
// the field is store in a link table
|
||||
Table {
|
||||
parent: TypeName,
|
||||
index: usize,
|
||||
},
|
||||
}
|
||||
|
||||
pub fn read_node_types(node_types_path: &Path) -> std::io::Result<Vec<Entry>> {
|
||||
let file = fs::File::open(node_types_path)?;
|
||||
let node_types = serde_json::from_reader(file)?;
|
||||
Ok(convert_nodes(node_types))
|
||||
}
|
||||
|
||||
fn convert_type(node_type: &NodeType) -> TypeName {
|
||||
TypeName {
|
||||
kind: node_type.kind.to_string(),
|
||||
named: node_type.named,
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_types(node_types: &Vec<NodeType>) -> Set<TypeName> {
|
||||
let iter = node_types.iter().map(convert_type).collect();
|
||||
std::collections::BTreeSet::from(iter)
|
||||
}
|
||||
pub fn convert_nodes(nodes: Vec<NodeInfo>) -> Vec<Entry> {
|
||||
let mut entries: Vec<Entry> = Vec::new();
|
||||
|
||||
for node in nodes {
|
||||
if let Some(subtypes) = &node.subtypes {
|
||||
// It's a tree-sitter supertype node, for which we create a union
|
||||
// type.
|
||||
entries.push(Entry::Union {
|
||||
type_name: TypeName {
|
||||
kind: node.kind,
|
||||
named: node.named,
|
||||
},
|
||||
members: convert_types(&subtypes),
|
||||
});
|
||||
} else {
|
||||
// It's a product type, defined by a table.
|
||||
let type_name = TypeName {
|
||||
kind: node.kind,
|
||||
named: node.named,
|
||||
};
|
||||
let mut fields = Vec::new();
|
||||
|
||||
// If the type also has fields or children, then we create either
|
||||
// auxiliary tables or columns in the defining table for them.
|
||||
if let Some(node_fields) = &node.fields {
|
||||
for (field_name, field_info) in node_fields {
|
||||
add_field(
|
||||
&type_name,
|
||||
Some(field_name.to_string()),
|
||||
field_info,
|
||||
&mut fields,
|
||||
);
|
||||
}
|
||||
}
|
||||
if let Some(children) = &node.children {
|
||||
// Treat children as if they were a field called 'child'.
|
||||
add_field(&type_name, None, children, &mut fields);
|
||||
}
|
||||
entries.push(Entry::Table { type_name, fields });
|
||||
}
|
||||
}
|
||||
entries
|
||||
}
|
||||
|
||||
fn add_field(
|
||||
parent_type_name: &TypeName,
|
||||
field_name: Option<String>,
|
||||
field_info: &FieldInfo,
|
||||
fields: &mut Vec<Field>,
|
||||
) {
|
||||
let storage;
|
||||
if !field_info.multiple && field_info.required {
|
||||
// This field must appear exactly once, so we add it as
|
||||
// a column to the main table for the node type.
|
||||
storage = Storage::Column;
|
||||
} else {
|
||||
// This field can appear zero or multiple times, so put
|
||||
// it in an auxiliary table.
|
||||
storage = Storage::Table {
|
||||
parent: TypeName {
|
||||
kind: parent_type_name.kind.to_string(),
|
||||
named: parent_type_name.named,
|
||||
},
|
||||
index: fields.len(),
|
||||
};
|
||||
}
|
||||
fields.push(Field {
|
||||
types: convert_types(&field_info.types),
|
||||
name: field_name,
|
||||
storage,
|
||||
});
|
||||
}
|
||||
#[derive(Deserialize)]
|
||||
pub struct NodeInfo {
|
||||
#[serde(rename = "type")]
|
||||
pub kind: String,
|
||||
pub named: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub fields: Option<Map<String, FieldInfo>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub children: Option<FieldInfo>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub subtypes: Option<Vec<NodeType>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct NodeType {
|
||||
#[serde(rename = "type")]
|
||||
pub kind: String,
|
||||
pub named: bool,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct FieldInfo {
|
||||
pub multiple: bool,
|
||||
pub required: bool,
|
||||
pub types: Vec<NodeType>,
|
||||
}
|
||||
|
||||
impl Default for FieldInfo {
|
||||
fn default() -> Self {
|
||||
FieldInfo {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -8,3 +8,5 @@ edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
node-types = { path = "../node-types" }
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.2", features = ["env-filter"] }
|
||||
|
||||
@@ -49,66 +49,6 @@ pub enum QlColumnType {
|
||||
Custom(String),
|
||||
}
|
||||
|
||||
const RESERVED_KEYWORDS: [&'static str; 14] = [
|
||||
"boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype",
|
||||
"type", "unique", "varchar",
|
||||
];
|
||||
|
||||
/// Returns a string that's a copy of `name` but suitably escaped to be a valid
|
||||
/// QL identifier.
|
||||
pub fn escape_name(name: &str) -> String {
|
||||
let mut result = String::new();
|
||||
|
||||
// If there's a leading underscore, replace it with 'underscore_'.
|
||||
if let Some(c) = name.chars().next() {
|
||||
if c == '_' {
|
||||
result.push_str("underscore");
|
||||
}
|
||||
}
|
||||
for c in name.chars() {
|
||||
match c {
|
||||
'{' => result.push_str("lbrace"),
|
||||
'}' => result.push_str("rbrace"),
|
||||
'<' => result.push_str("langle"),
|
||||
'>' => result.push_str("rangle"),
|
||||
'[' => result.push_str("lbracket"),
|
||||
']' => result.push_str("rbracket"),
|
||||
'(' => result.push_str("lparen"),
|
||||
')' => result.push_str("rparen"),
|
||||
'|' => result.push_str("pipe"),
|
||||
'=' => result.push_str("equal"),
|
||||
'~' => result.push_str("tilde"),
|
||||
'?' => result.push_str("question"),
|
||||
'`' => result.push_str("backtick"),
|
||||
'^' => result.push_str("caret"),
|
||||
'!' => result.push_str("bang"),
|
||||
'#' => result.push_str("hash"),
|
||||
'%' => result.push_str("percent"),
|
||||
'&' => result.push_str("ampersand"),
|
||||
'.' => result.push_str("dot"),
|
||||
',' => result.push_str("comma"),
|
||||
'/' => result.push_str("slash"),
|
||||
':' => result.push_str("colon"),
|
||||
';' => result.push_str("semicolon"),
|
||||
'"' => result.push_str("dquote"),
|
||||
'*' => result.push_str("star"),
|
||||
'+' => result.push_str("plus"),
|
||||
'-' => result.push_str("minus"),
|
||||
'@' => result.push_str("at"),
|
||||
_ => result.push_str(&c.to_lowercase().to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
for &keyword in &RESERVED_KEYWORDS {
|
||||
if result == keyword {
|
||||
result.push_str("__");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
impl fmt::Display for Table {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
if let Some(keyset) = &self.keysets {
|
||||
|
||||
@@ -2,21 +2,12 @@ mod dbscheme;
|
||||
mod language;
|
||||
|
||||
use language::Language;
|
||||
use node_types::{FieldInfo, NodeInfo};
|
||||
use node_types;
|
||||
use std::collections::BTreeSet as Set;
|
||||
use std::fs::File;
|
||||
use std::io::LineWriter;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Given a tree-sitter node type's (kind, named) pair, returns a single string
|
||||
/// representing the (unescaped) name we'll use to refer to corresponding QL
|
||||
/// type.
|
||||
fn node_type_name(kind: &str, named: bool) -> String {
|
||||
if named {
|
||||
kind.to_string()
|
||||
} else {
|
||||
format!("{}_unnamed", kind)
|
||||
}
|
||||
}
|
||||
use tracing::{error, info};
|
||||
|
||||
/// Given the name of the parent node, and its field information, returns the
|
||||
/// name of the field's type. This may be an ad-hoc union of all the possible
|
||||
@@ -24,21 +15,21 @@ fn node_type_name(kind: &str, named: bool) -> String {
|
||||
fn make_field_type(
|
||||
parent_name: &str,
|
||||
field_name: &str,
|
||||
field_info: &FieldInfo,
|
||||
types: &Set<node_types::TypeName>,
|
||||
entries: &mut Vec<dbscheme::Entry>,
|
||||
) -> String {
|
||||
if field_info.types.len() == 1 {
|
||||
if types.len() == 1 {
|
||||
// This field can only have a single type.
|
||||
let t = &field_info.types[0];
|
||||
dbscheme::escape_name(&node_type_name(&t.kind, t.named))
|
||||
let t = types.iter().next().unwrap();
|
||||
node_types::escape_name(&node_types::node_type_name(&t.kind, t.named))
|
||||
} else {
|
||||
// This field can have one of several types. Create an ad-hoc QL union
|
||||
// type to represent them.
|
||||
let field_union_name = format!("{}_{}_type", parent_name, field_name);
|
||||
let field_union_name = dbscheme::escape_name(&field_union_name);
|
||||
let field_union_name = node_types::escape_name(&field_union_name);
|
||||
let mut members: Vec<String> = Vec::new();
|
||||
for field_type in &field_info.types {
|
||||
members.push(dbscheme::escape_name(&node_type_name(
|
||||
for field_type in types {
|
||||
members.push(node_types::escape_name(&node_types::node_type_name(
|
||||
&field_type.kind,
|
||||
field_type.named,
|
||||
)));
|
||||
@@ -55,140 +46,146 @@ fn make_field_type(
|
||||
/// column on `main_table`, or as an auxiliary table.
|
||||
fn add_field(
|
||||
main_table: &mut dbscheme::Table,
|
||||
parent_name: &str,
|
||||
field_name: &str,
|
||||
field_info: &FieldInfo,
|
||||
field: &node_types::Field,
|
||||
entries: &mut Vec<dbscheme::Entry>,
|
||||
) {
|
||||
if field_info.multiple || !field_info.required {
|
||||
// This field can appear zero or multiple times, so put
|
||||
// it in an auxiliary table.
|
||||
let field_type = make_field_type(parent_name, field_name, field_info, entries);
|
||||
let field_table = dbscheme::Table {
|
||||
name: format!("{}_{}", parent_name, field_name),
|
||||
columns: vec![
|
||||
// First column is a reference to the parent.
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: dbscheme::escape_name(parent_name),
|
||||
ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(parent_name)),
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
// Then an index column.
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "index".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Int,
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
// And then the field
|
||||
dbscheme::Column {
|
||||
unique: true,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: field_type.clone(),
|
||||
ql_type: dbscheme::QlColumnType::Custom(field_type),
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
],
|
||||
// In addition to the field being unique, the combination of
|
||||
// parent+index is unique, so add a keyset for them.
|
||||
keysets: Some(vec![
|
||||
dbscheme::escape_name(parent_name),
|
||||
"index".to_string(),
|
||||
]),
|
||||
};
|
||||
entries.push(dbscheme::Entry::Table(field_table));
|
||||
} else {
|
||||
// This field must appear exactly once, so we add it as
|
||||
// a column to the main table for the node type.
|
||||
let field_type = make_field_type(parent_name, field_name, field_info, entries);
|
||||
main_table.columns.push(dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: String::from(field_name),
|
||||
ql_type: dbscheme::QlColumnType::Custom(field_type),
|
||||
ql_type_is_ref: true,
|
||||
});
|
||||
let field_name = match &field.name {
|
||||
None => "child".to_owned(),
|
||||
Some(x) => x.to_owned(),
|
||||
};
|
||||
let parent_name = node_types::node_type_name(&field.parent.kind, field.parent.named);
|
||||
match field.storage {
|
||||
node_types::Storage::Table { .. } => {
|
||||
// This field can appear zero or multiple times, so put
|
||||
// it in an auxiliary table.
|
||||
let field_type = make_field_type(&parent_name, &field_name, &field.types, entries);
|
||||
let field_table = dbscheme::Table {
|
||||
name: format!("{}_{}", parent_name, field_name),
|
||||
columns: vec![
|
||||
// First column is a reference to the parent.
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: node_types::escape_name(&parent_name),
|
||||
ql_type: dbscheme::QlColumnType::Custom(node_types::escape_name(
|
||||
&parent_name,
|
||||
)),
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
// Then an index column.
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "index".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Int,
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
// And then the field
|
||||
dbscheme::Column {
|
||||
unique: true,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: field_type.clone(),
|
||||
ql_type: dbscheme::QlColumnType::Custom(field_type),
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
],
|
||||
// In addition to the field being unique, the combination of
|
||||
// parent+index is unique, so add a keyset for them.
|
||||
keysets: Some(vec![
|
||||
node_types::escape_name(&parent_name),
|
||||
"index".to_string(),
|
||||
]),
|
||||
};
|
||||
entries.push(dbscheme::Entry::Table(field_table));
|
||||
}
|
||||
node_types::Storage::Column => {
|
||||
// This field must appear exactly once, so we add it as
|
||||
// a column to the main table for the node type.
|
||||
let field_type = make_field_type(&parent_name, &field_name, &field.types, entries);
|
||||
main_table.columns.push(dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: field_name,
|
||||
ql_type: dbscheme::QlColumnType::Custom(field_type),
|
||||
ql_type_is_ref: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the given tree-sitter node types into CodeQL dbscheme entries.
|
||||
fn convert_nodes(nodes: &[NodeInfo]) -> Vec<dbscheme::Entry> {
|
||||
fn convert_nodes(nodes: &Vec<node_types::Entry>) -> Vec<dbscheme::Entry> {
|
||||
let mut entries: Vec<dbscheme::Entry> = Vec::new();
|
||||
let mut top_members: Vec<String> = Vec::new();
|
||||
|
||||
for node in nodes {
|
||||
if let Some(subtypes) = &node.subtypes {
|
||||
// It's a tree-sitter supertype node, for which we create a union
|
||||
// type.
|
||||
let mut members: Vec<String> = Vec::new();
|
||||
for subtype in subtypes {
|
||||
members.push(dbscheme::escape_name(&node_type_name(
|
||||
&subtype.kind,
|
||||
subtype.named,
|
||||
)))
|
||||
}
|
||||
entries.push(dbscheme::Entry::Union(dbscheme::Union {
|
||||
name: dbscheme::escape_name(&node_type_name(&node.kind, node.named)),
|
||||
members,
|
||||
}));
|
||||
} else {
|
||||
// It's a product type, defined by a table.
|
||||
let name = node_type_name(&node.kind, node.named);
|
||||
let mut main_table = dbscheme::Table {
|
||||
name: dbscheme::escape_name(&(format!("{}_def", name))),
|
||||
columns: vec![dbscheme::Column {
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "id".to_string(),
|
||||
unique: true,
|
||||
ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(&name)),
|
||||
ql_type_is_ref: false,
|
||||
}],
|
||||
keysets: None,
|
||||
};
|
||||
top_members.push(dbscheme::escape_name(&name));
|
||||
|
||||
let mut is_leaf = true;
|
||||
|
||||
// If the type also has fields or children, then we create either
|
||||
// auxiliary tables or columns in the defining table for them.
|
||||
if let Some(fields) = &node.fields {
|
||||
for (field_name, field_info) in fields {
|
||||
is_leaf = false;
|
||||
add_field(&mut main_table, &name, field_name, field_info, &mut entries);
|
||||
match &node {
|
||||
node_types::Entry::Union {
|
||||
type_name,
|
||||
members: n_members,
|
||||
} => {
|
||||
// It's a tree-sitter supertype node, for which we create a union
|
||||
// type.
|
||||
let mut members: Vec<String> = Vec::new();
|
||||
for n_member in n_members {
|
||||
members.push(node_types::escape_name(&node_types::node_type_name(
|
||||
&n_member.kind,
|
||||
n_member.named,
|
||||
)))
|
||||
}
|
||||
entries.push(dbscheme::Entry::Union(dbscheme::Union {
|
||||
name: node_types::escape_name(&node_types::node_type_name(
|
||||
&type_name.kind,
|
||||
type_name.named,
|
||||
)),
|
||||
members,
|
||||
}));
|
||||
}
|
||||
if let Some(children) = &node.children {
|
||||
is_leaf = false;
|
||||
node_types::Entry::Table { type_name, fields } => {
|
||||
// It's a product type, defined by a table.
|
||||
let name = node_types::node_type_name(&type_name.kind, type_name.named);
|
||||
let mut main_table = dbscheme::Table {
|
||||
name: node_types::escape_name(&(format!("{}_def", name))),
|
||||
columns: vec![dbscheme::Column {
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "id".to_string(),
|
||||
unique: true,
|
||||
ql_type: dbscheme::QlColumnType::Custom(node_types::escape_name(&name)),
|
||||
ql_type_is_ref: false,
|
||||
}],
|
||||
keysets: None,
|
||||
};
|
||||
top_members.push(node_types::escape_name(&name));
|
||||
|
||||
// Treat children as if they were a field called 'child'.
|
||||
add_field(&mut main_table, &name, "child", children, &mut entries);
|
||||
}
|
||||
// If the type also has fields or children, then we create either
|
||||
// auxiliary tables or columns in the defining table for them.
|
||||
for field in fields {
|
||||
add_field(&mut main_table, &field, &mut entries);
|
||||
}
|
||||
|
||||
if is_leaf {
|
||||
// There were no fields and no children, so it's a leaf node in
|
||||
// the TS grammar. Add a column for the node text.
|
||||
if fields.is_empty() {
|
||||
// There were no fields and no children, so it's a leaf node in
|
||||
// the TS grammar. Add a column for the node text.
|
||||
main_table.columns.push(dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::String,
|
||||
name: "text".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::String,
|
||||
ql_type_is_ref: true,
|
||||
});
|
||||
}
|
||||
|
||||
// Finally, the type's defining table also includes the location.
|
||||
main_table.columns.push(dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::String,
|
||||
name: "text".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::String,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "loc".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Custom("location".to_string()),
|
||||
ql_type_is_ref: true,
|
||||
});
|
||||
|
||||
entries.push(dbscheme::Entry::Table(main_table));
|
||||
}
|
||||
|
||||
// Finally, the type's defining table also includes the location.
|
||||
main_table.columns.push(dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "loc".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Custom("location".to_string()),
|
||||
ql_type_is_ref: true,
|
||||
});
|
||||
|
||||
entries.push(dbscheme::Entry::Table(main_table));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -202,7 +199,7 @@ fn convert_nodes(nodes: &[NodeInfo]) -> Vec<dbscheme::Entry> {
|
||||
}
|
||||
|
||||
fn write_dbscheme(language: &Language, entries: &[dbscheme::Entry]) -> std::io::Result<()> {
|
||||
println!(
|
||||
info!(
|
||||
"Writing to '{}'",
|
||||
match language.dbscheme_path.to_str() {
|
||||
None => "<undisplayable>",
|
||||
@@ -280,6 +277,13 @@ fn create_source_location_prefix_entry() -> dbscheme::Entry {
|
||||
}
|
||||
|
||||
fn main() {
|
||||
tracing_subscriber::fmt()
|
||||
.with_target(false)
|
||||
.without_time()
|
||||
.with_level(true)
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
// TODO: figure out proper dbscheme output path and/or take it from the
|
||||
// command line.
|
||||
let ruby = Language {
|
||||
@@ -287,16 +291,9 @@ fn main() {
|
||||
node_types_path: PathBuf::from("tree-sitter-ruby/src/node-types.json"),
|
||||
dbscheme_path: PathBuf::from("ruby.dbscheme"),
|
||||
};
|
||||
match node_types::read(&ruby.node_types_path) {
|
||||
match node_types::read_node_types(&ruby.node_types_path) {
|
||||
Err(e) => {
|
||||
println!(
|
||||
"Failed to read '{}': {}",
|
||||
match ruby.node_types_path.to_str() {
|
||||
None => "<undisplayable>",
|
||||
Some(p) => p,
|
||||
},
|
||||
e
|
||||
);
|
||||
error!("Failed to read '{}': {}", ruby.node_types_path.display(), e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
Ok(nodes) => {
|
||||
@@ -305,7 +302,7 @@ fn main() {
|
||||
dbscheme_entries.push(create_source_location_prefix_entry());
|
||||
match write_dbscheme(&ruby, &dbscheme_entries) {
|
||||
Err(e) => {
|
||||
println!("Failed to write dbscheme: {}", e);
|
||||
error!("Failed to write dbscheme: {}", e);
|
||||
std::process::exit(2);
|
||||
}
|
||||
Ok(()) => {}
|
||||
|
||||
@@ -1,8 +1,137 @@
|
||||
use serde::Deserialize;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
|
||||
use std::collections::BTreeSet as Set;
|
||||
use std::fs;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Entry {
|
||||
Union {
|
||||
type_name: TypeName,
|
||||
members: Set<TypeName>,
|
||||
},
|
||||
Table {
|
||||
type_name: TypeName,
|
||||
fields: Vec<Field>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)]
|
||||
pub struct TypeName {
|
||||
pub kind: String,
|
||||
pub named: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Field {
|
||||
pub parent: TypeName,
|
||||
pub types: Set<TypeName>,
|
||||
/// The name of the field or None for the anonymous 'children'
|
||||
/// entry from node_types.json
|
||||
pub name: Option<String>,
|
||||
pub storage: Storage,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Storage {
|
||||
/// the field is stored as a column in the parent table
|
||||
Column,
|
||||
// the field is store in a link table
|
||||
Table {
|
||||
index: usize,
|
||||
},
|
||||
}
|
||||
|
||||
pub fn read_node_types(node_types_path: &Path) -> std::io::Result<Vec<Entry>> {
|
||||
let file = fs::File::open(node_types_path)?;
|
||||
let node_types = serde_json::from_reader(file)?;
|
||||
Ok(convert_nodes(node_types))
|
||||
}
|
||||
|
||||
fn convert_type(node_type: &NodeType) -> TypeName {
|
||||
TypeName {
|
||||
kind: node_type.kind.to_string(),
|
||||
named: node_type.named,
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_types(node_types: &Vec<NodeType>) -> Set<TypeName> {
|
||||
let iter = node_types.iter().map(convert_type).collect();
|
||||
std::collections::BTreeSet::from(iter)
|
||||
}
|
||||
pub fn convert_nodes(nodes: Vec<NodeInfo>) -> Vec<Entry> {
|
||||
let mut entries: Vec<Entry> = Vec::new();
|
||||
|
||||
for node in nodes {
|
||||
if let Some(subtypes) = &node.subtypes {
|
||||
// It's a tree-sitter supertype node, for which we create a union
|
||||
// type.
|
||||
entries.push(Entry::Union {
|
||||
type_name: TypeName {
|
||||
kind: node.kind,
|
||||
named: node.named,
|
||||
},
|
||||
members: convert_types(&subtypes),
|
||||
});
|
||||
} else {
|
||||
// It's a product type, defined by a table.
|
||||
let type_name = TypeName {
|
||||
kind: node.kind,
|
||||
named: node.named,
|
||||
};
|
||||
let mut fields = Vec::new();
|
||||
|
||||
// If the type also has fields or children, then we create either
|
||||
// auxiliary tables or columns in the defining table for them.
|
||||
if let Some(node_fields) = &node.fields {
|
||||
for (field_name, field_info) in node_fields {
|
||||
add_field(
|
||||
&type_name,
|
||||
Some(field_name.to_string()),
|
||||
field_info,
|
||||
&mut fields,
|
||||
);
|
||||
}
|
||||
}
|
||||
if let Some(children) = &node.children {
|
||||
// Treat children as if they were a field called 'child'.
|
||||
add_field(&type_name, None, children, &mut fields);
|
||||
}
|
||||
entries.push(Entry::Table { type_name, fields });
|
||||
}
|
||||
}
|
||||
entries
|
||||
}
|
||||
|
||||
fn add_field(
|
||||
parent_type_name: &TypeName,
|
||||
field_name: Option<String>,
|
||||
field_info: &FieldInfo,
|
||||
fields: &mut Vec<Field>,
|
||||
) {
|
||||
let storage;
|
||||
if !field_info.multiple && field_info.required {
|
||||
// This field must appear exactly once, so we add it as
|
||||
// a column to the main table for the node type.
|
||||
storage = Storage::Column;
|
||||
} else {
|
||||
// This field can appear zero or multiple times, so put
|
||||
// it in an auxiliary table.
|
||||
storage = Storage::Table {
|
||||
index: fields.len(),
|
||||
};
|
||||
}
|
||||
fields.push(Field {
|
||||
parent: TypeName {
|
||||
kind: parent_type_name.kind.to_string(),
|
||||
named: parent_type_name.named,
|
||||
},
|
||||
types: convert_types(&field_info.types),
|
||||
name: field_name,
|
||||
storage,
|
||||
});
|
||||
}
|
||||
#[derive(Deserialize)]
|
||||
pub struct NodeInfo {
|
||||
#[serde(rename = "type")]
|
||||
@@ -30,45 +159,73 @@ pub struct FieldInfo {
|
||||
pub types: Vec<NodeType>,
|
||||
}
|
||||
|
||||
impl Default for FieldInfo {
|
||||
fn default() -> Self {
|
||||
FieldInfo {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: Vec::new(),
|
||||
/// Given a tree-sitter node type's (kind, named) pair, returns a single string
|
||||
/// representing the (unescaped) name we'll use to refer to corresponding QL
|
||||
/// type.
|
||||
pub fn node_type_name(kind: &str, named: bool) -> String {
|
||||
if named {
|
||||
kind.to_string()
|
||||
} else {
|
||||
format!("{}_unnamed", kind)
|
||||
}
|
||||
}
|
||||
|
||||
const RESERVED_KEYWORDS: [&'static str; 14] = [
|
||||
"boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype",
|
||||
"type", "unique", "varchar",
|
||||
];
|
||||
|
||||
/// Returns a string that's a copy of `name` but suitably escaped to be a valid
|
||||
/// QL identifier.
|
||||
pub fn escape_name(name: &str) -> String {
|
||||
let mut result = String::new();
|
||||
|
||||
// If there's a leading underscore, replace it with 'underscore_'.
|
||||
if let Some(c) = name.chars().next() {
|
||||
if c == '_' {
|
||||
result.push_str("underscore");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Error {
|
||||
IOError(std::io::Error),
|
||||
JsonError(serde_json::error::Error),
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for Error {
|
||||
fn from(error: std::io::Error) -> Self {
|
||||
Error::IOError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Error> for Error {
|
||||
fn from(error: serde_json::Error) -> Self {
|
||||
Error::JsonError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Error::IOError(e) => write!(f, "{}", e),
|
||||
Error::JsonError(e) => write!(f, "{}", e),
|
||||
for c in name.chars() {
|
||||
match c {
|
||||
'{' => result.push_str("lbrace"),
|
||||
'}' => result.push_str("rbrace"),
|
||||
'<' => result.push_str("langle"),
|
||||
'>' => result.push_str("rangle"),
|
||||
'[' => result.push_str("lbracket"),
|
||||
']' => result.push_str("rbracket"),
|
||||
'(' => result.push_str("lparen"),
|
||||
')' => result.push_str("rparen"),
|
||||
'|' => result.push_str("pipe"),
|
||||
'=' => result.push_str("equal"),
|
||||
'~' => result.push_str("tilde"),
|
||||
'?' => result.push_str("question"),
|
||||
'`' => result.push_str("backtick"),
|
||||
'^' => result.push_str("caret"),
|
||||
'!' => result.push_str("bang"),
|
||||
'#' => result.push_str("hash"),
|
||||
'%' => result.push_str("percent"),
|
||||
'&' => result.push_str("ampersand"),
|
||||
'.' => result.push_str("dot"),
|
||||
',' => result.push_str("comma"),
|
||||
'/' => result.push_str("slash"),
|
||||
':' => result.push_str("colon"),
|
||||
';' => result.push_str("semicolon"),
|
||||
'"' => result.push_str("dquote"),
|
||||
'*' => result.push_str("star"),
|
||||
'+' => result.push_str("plus"),
|
||||
'-' => result.push_str("minus"),
|
||||
'@' => result.push_str("at"),
|
||||
_ => result.push_str(&c.to_lowercase().to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Deserializes the node types from the JSON at the given `path`.
|
||||
pub fn read(path: &Path) -> Result<Vec<NodeInfo>, Error> {
|
||||
let json_data = std::fs::read_to_string(path)?;
|
||||
let node_types: Vec<NodeInfo> = serde_json::from_str(&json_data)?;
|
||||
Ok(node_types)
|
||||
for &keyword in &RESERVED_KEYWORDS {
|
||||
if result == keyword {
|
||||
result.push_str("__");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user