Merge pull request #1 from github/dbscheme

Basic dbscheme generation from `node-types.json`
This commit is contained in:
Nick Rolfe
2020-10-22 12:29:44 +01:00
committed by GitHub
5 changed files with 2455 additions and 2 deletions

195
generator/src/dbscheme.rs Normal file
View File

@@ -0,0 +1,195 @@
use std::fmt;
/// Represents a distinct entry in the database schema.
pub enum Entry {
/// An entry defining a database table.
Table(Table),
/// An entry defining type that is a union of other types.
Union(Union),
}
/// A table in the database schema.
pub struct Table {
pub name: String,
pub columns: Vec<Column>,
pub keysets: Option<Vec<String>>,
}
/// A union in the database schema.
pub struct Union {
pub name: String,
pub members: Vec<String>,
}
/// A column in a table.
pub struct Column {
pub db_type: DbColumnType,
pub name: String,
pub unique: bool,
pub ql_type: QlColumnType,
pub ql_type_is_ref: bool,
}
/// The database column type.
pub enum DbColumnType {
Int,
String,
}
// The QL type of a column.
pub enum QlColumnType {
/// Primitive `int` type.
Int,
/// Primitive `string` type.
String,
/// A custom type, defined elsewhere by a table or union.
Custom(String),
}
const RESERVED_KEYWORDS: [&'static str; 14] = [
"boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype",
"type", "unique", "varchar",
];
/// Returns a string that's a copy of `name` but suitably escaped to be a valid
/// QL identifier.
pub fn escape_name(name: &str) -> String {
let mut result = String::new();
// If there's a leading underscore, replace it with 'underscore_'.
if let Some(c) = name.chars().next() {
if c == '_' {
result.push_str("underscore");
}
}
for c in name.chars() {
match c {
'{' => result.push_str("lbrace"),
'}' => result.push_str("rbrace"),
'<' => result.push_str("langle"),
'>' => result.push_str("rangle"),
'[' => result.push_str("lbracket"),
']' => result.push_str("rbracket"),
'(' => result.push_str("lparen"),
')' => result.push_str("rparen"),
'|' => result.push_str("pipe"),
'=' => result.push_str("equal"),
'~' => result.push_str("tilde"),
'?' => result.push_str("question"),
'`' => result.push_str("backtick"),
'^' => result.push_str("caret"),
'!' => result.push_str("bang"),
'#' => result.push_str("hash"),
'%' => result.push_str("percent"),
'&' => result.push_str("ampersand"),
'.' => result.push_str("dot"),
',' => result.push_str("comma"),
'/' => result.push_str("slash"),
':' => result.push_str("colon"),
';' => result.push_str("semicolon"),
'"' => result.push_str("dquote"),
'*' => result.push_str("star"),
'+' => result.push_str("plus"),
'-' => result.push_str("minus"),
'@' => result.push_str("at"),
_ => result.push_str(&c.to_lowercase().to_string()),
}
}
for &keyword in &RESERVED_KEYWORDS {
if result == keyword {
result.push_str("__");
break;
}
}
result
}
impl fmt::Display for Table {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(keyset) = &self.keysets {
write!(f, "#keyset[")?;
for (key_index, key) in keyset.iter().enumerate() {
if key_index > 0 {
write!(f, ", ")?;
}
write!(f, "{}", key)?;
}
write!(f, "]\n")?;
}
write!(f, "{}(\n", self.name)?;
for (column_index, column) in self.columns.iter().enumerate() {
write!(f, " ")?;
if column.unique {
write!(f, "unique ")?;
}
write!(
f,
"{} ",
match column.db_type {
DbColumnType::Int => "int",
DbColumnType::String => "string",
}
)?;
write!(f, "{}: ", column.name)?;
match &column.ql_type {
QlColumnType::Int => write!(f, "int")?,
QlColumnType::String => write!(f, "string")?,
QlColumnType::Custom(name) => write!(f, "@{}", name)?,
}
if column.ql_type_is_ref {
write!(f, " ref")?;
}
if column_index + 1 != self.columns.len() {
write!(f, ",")?;
}
write!(f, "\n")?;
}
write!(f, ");")?;
Ok(())
}
}
impl fmt::Display for Union {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "@{} = ", self.name)?;
let mut first = true;
for member in &self.members {
if first {
first = false;
} else {
write!(f, " | ")?;
}
write!(f, "@{}", member)?;
}
Ok(())
}
}
/// Generates the dbscheme by writing the given dbscheme `entries` to the `file`.
pub fn write(
language_name: &str,
file: &mut dyn std::io::Write,
entries: &[Entry],
) -> std::io::Result<()> {
write!(file, "// CodeQL database schema for {}\n", language_name)?;
write!(
file,
"// Automatically generated from the tree-sitter grammar; do not edit\n\n"
)?;
for entry in entries {
match entry {
Entry::Table(table) => write!(file, "{}\n\n", table)?,
Entry::Union(union) => write!(file, "{}\n\n", union)?,
}
}
Ok(())
}

View File

@@ -0,0 +1,7 @@
use std::path::PathBuf;
pub struct Language {
pub name: String,
pub node_types_path: PathBuf,
pub dbscheme_path: PathBuf,
}

View File

@@ -1,3 +1,316 @@
fn main() {
println!("generator");
mod dbscheme;
mod language;
mod node_types;
use language::Language;
use node_types::{FieldInfo, NodeInfo};
use std::fs::File;
use std::io::LineWriter;
use std::path::PathBuf;
/// Given a tree-sitter node type's (kind, named) pair, returns a single string
/// representing the (unescaped) name we'll use to refer to corresponding QL
/// type.
fn node_type_name(kind: &str, named: bool) -> String {
if named {
kind.to_string()
} else {
format!("{}_unnamed", kind)
}
}
/// Given the name of the parent node, and its field information, returns the
/// name of the field's type. This may be an ad-hoc union of all the possible
/// types the field can take, in which case the union is added to `entries`.
fn make_field_type(
parent_name: &str,
field_name: &str,
field_info: &FieldInfo,
entries: &mut Vec<dbscheme::Entry>,
) -> String {
if field_info.types.len() == 1 {
// This field can only have a single type.
let t = &field_info.types[0];
dbscheme::escape_name(&node_type_name(&t.kind, t.named))
} else {
// This field can have one of several types. Create an ad-hoc QL union
// type to represent them.
let field_union_name = format!("{}_{}_type", parent_name, field_name);
let field_union_name = dbscheme::escape_name(&field_union_name);
let mut members: Vec<String> = Vec::new();
for field_type in &field_info.types {
members.push(dbscheme::escape_name(&node_type_name(
&field_type.kind,
field_type.named,
)));
}
entries.push(dbscheme::Entry::Union(dbscheme::Union {
name: field_union_name.clone(),
members,
}));
field_union_name
}
}
/// Adds the appropriate dbscheme information for the given field, either as a
/// column on `main_table`, or as an auxiliary table.
fn add_field(
main_table: &mut dbscheme::Table,
parent_name: &str,
field_name: &str,
field_info: &FieldInfo,
entries: &mut Vec<dbscheme::Entry>,
) {
if field_info.multiple || !field_info.required {
// This field can appear zero or multiple times, so put
// it in an auxiliary table.
let field_type = make_field_type(parent_name, field_name, field_info, entries);
let field_table = dbscheme::Table {
name: format!("{}_{}", parent_name, field_name),
columns: vec![
// First column is a reference to the parent.
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: dbscheme::escape_name(parent_name),
ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(parent_name)),
ql_type_is_ref: true,
},
// Then an index column.
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "index".to_string(),
ql_type: dbscheme::QlColumnType::Int,
ql_type_is_ref: true,
},
// And then the field
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: field_type.clone(),
ql_type: dbscheme::QlColumnType::Custom(field_type),
ql_type_is_ref: true,
},
],
// In addition to the field being unique, the combination of
// parent+index is unique, so add a keyset for them.
keysets: Some(vec![
dbscheme::escape_name(parent_name),
"index".to_string(),
]),
};
entries.push(dbscheme::Entry::Table(field_table));
} else {
// This field must appear exactly once, so we add it as
// a column to the main table for the node type.
let field_type = make_field_type(parent_name, field_name, field_info, entries);
main_table.columns.push(dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: String::from(field_name),
ql_type: dbscheme::QlColumnType::Custom(field_type),
ql_type_is_ref: true,
});
}
}
/// Converts the given tree-sitter node types into CodeQL dbscheme entries.
fn convert_nodes(nodes: &[NodeInfo]) -> Vec<dbscheme::Entry> {
let mut entries: Vec<dbscheme::Entry> = Vec::new();
let mut top_members: Vec<String> = Vec::new();
for node in nodes {
if let Some(subtypes) = &node.subtypes {
// It's a tree-sitter supertype node, for which we create a union
// type.
let mut members: Vec<String> = Vec::new();
for subtype in subtypes {
members.push(dbscheme::escape_name(&node_type_name(
&subtype.kind,
subtype.named,
)))
}
entries.push(dbscheme::Entry::Union(dbscheme::Union {
name: dbscheme::escape_name(&node_type_name(&node.kind, node.named)),
members,
}));
} else {
// It's a product type, defined by a table.
let name = node_type_name(&node.kind, node.named);
let mut main_table = dbscheme::Table {
name: dbscheme::escape_name(&(format!("{}_def", name))),
columns: vec![dbscheme::Column {
db_type: dbscheme::DbColumnType::Int,
name: "id".to_string(),
unique: true,
ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(&name)),
ql_type_is_ref: false,
}],
keysets: None,
};
top_members.push(dbscheme::escape_name(&name));
let mut is_leaf = true;
// If the type also has fields or children, then we create either
// auxiliary tables or columns in the defining table for them.
if let Some(fields) = &node.fields {
for (field_name, field_info) in fields {
is_leaf = false;
add_field(&mut main_table, &name, field_name, field_info, &mut entries);
}
}
if let Some(children) = &node.children {
is_leaf = false;
// Treat children as if they were a field called 'child'.
add_field(&mut main_table, &name, "child", children, &mut entries);
}
if is_leaf {
// There were no fields and no children, so it's a leaf node in
// the TS grammar. Add a column for the node text.
main_table.columns.push(dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::String,
name: "text".to_string(),
ql_type: dbscheme::QlColumnType::String,
ql_type_is_ref: true,
});
}
// Finally, the type's defining table also includes the location.
main_table.columns.push(dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "loc".to_string(),
ql_type: dbscheme::QlColumnType::Custom("location".to_string()),
ql_type_is_ref: true,
});
entries.push(dbscheme::Entry::Table(main_table));
}
}
// Create a union of all database types.
entries.push(dbscheme::Entry::Union(dbscheme::Union {
name: "top".to_string(),
members: top_members,
}));
entries
}
fn write_dbscheme(language: &Language, entries: &[dbscheme::Entry]) -> std::io::Result<()> {
println!(
"Writing to '{}'",
match language.dbscheme_path.to_str() {
None => "<undisplayable>",
Some(p) => p,
}
);
let file = File::create(&language.dbscheme_path)?;
let mut file = LineWriter::new(file);
dbscheme::write(&language.name, &mut file, &entries)
}
fn create_location_entry() -> dbscheme::Entry {
dbscheme::Entry::Table(dbscheme::Table {
name: "location".to_string(),
keysets: None,
columns: vec![
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: "id".to_string(),
ql_type: dbscheme::QlColumnType::Custom("location".to_string()),
ql_type_is_ref: false,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::String,
name: "file_path".to_string(),
ql_type: dbscheme::QlColumnType::String,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "start_line".to_string(),
ql_type: dbscheme::QlColumnType::Int,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "start_column".to_string(),
ql_type: dbscheme::QlColumnType::Int,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "end_line".to_string(),
ql_type: dbscheme::QlColumnType::Int,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "end_column".to_string(),
ql_type: dbscheme::QlColumnType::Int,
ql_type_is_ref: true,
},
],
})
}
fn create_source_location_prefix_entry() -> dbscheme::Entry {
dbscheme::Entry::Table(dbscheme::Table {
name: "sourceLocationPrefix".to_string(),
keysets: None,
columns: vec![dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::String,
name: "prefix".to_string(),
ql_type: dbscheme::QlColumnType::String,
ql_type_is_ref: true,
}],
})
}
fn main() {
// TODO: figure out proper dbscheme output path and/or take it from the
// command line.
let ruby = Language {
name: "Ruby".to_string(),
node_types_path: PathBuf::from("tree-sitter-ruby/src/node-types.json"),
dbscheme_path: PathBuf::from("ruby.dbscheme"),
};
match node_types::read(&ruby.node_types_path) {
Err(e) => {
println!(
"Failed to read '{}': {}",
match ruby.node_types_path.to_str() {
None => "<undisplayable>",
Some(p) => p,
},
e
);
std::process::exit(1);
}
Ok(nodes) => {
let mut dbscheme_entries = convert_nodes(&nodes);
dbscheme_entries.push(create_location_entry());
dbscheme_entries.push(create_source_location_prefix_entry());
match write_dbscheme(&ruby, &dbscheme_entries) {
Err(e) => {
println!("Failed to write dbscheme: {}", e);
std::process::exit(2);
}
Ok(()) => {}
}
}
}
}

View File

@@ -0,0 +1,74 @@
use serde::Deserialize;
use std::collections::BTreeMap;
use std::fmt;
use std::path::Path;
#[derive(Deserialize)]
pub struct NodeInfo {
#[serde(rename = "type")]
pub kind: String,
pub named: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub fields: Option<BTreeMap<String, FieldInfo>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub children: Option<FieldInfo>,
#[serde(skip_serializing_if = "Option::is_none")]
pub subtypes: Option<Vec<NodeType>>,
}
#[derive(Deserialize)]
pub struct NodeType {
#[serde(rename = "type")]
pub kind: String,
pub named: bool,
}
#[derive(Deserialize)]
pub struct FieldInfo {
pub multiple: bool,
pub required: bool,
pub types: Vec<NodeType>,
}
impl Default for FieldInfo {
fn default() -> Self {
FieldInfo {
multiple: false,
required: true,
types: Vec::new(),
}
}
}
pub enum Error {
IOError(std::io::Error),
JsonError(serde_json::error::Error),
}
impl From<std::io::Error> for Error {
fn from(error: std::io::Error) -> Self {
Error::IOError(error)
}
}
impl From<serde_json::Error> for Error {
fn from(error: serde_json::Error) -> Self {
Error::JsonError(error)
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Error::IOError(e) => write!(f, "{}", e),
Error::JsonError(e) => write!(f, "{}", e),
}
}
}
/// Deserializes the node types from the JSON at the given `path`.
pub fn read(path: &Path) -> Result<Vec<NodeInfo>, Error> {
let json_data = std::fs::read_to_string(path)?;
let node_types: Vec<NodeInfo> = serde_json::from_str(&json_data)?;
Ok(node_types)
}

1864
ruby.dbscheme Normal file

File diff suppressed because it is too large Load Diff