mirror of
https://github.com/github/codeql.git
synced 2026-02-19 16:33:40 +01:00
Merge pull request #1 from github/dbscheme
Basic dbscheme generation from `node-types.json`
This commit is contained in:
195
generator/src/dbscheme.rs
Normal file
195
generator/src/dbscheme.rs
Normal file
@@ -0,0 +1,195 @@
|
||||
use std::fmt;
|
||||
|
||||
/// Represents a distinct entry in the database schema.
|
||||
pub enum Entry {
|
||||
/// An entry defining a database table.
|
||||
Table(Table),
|
||||
|
||||
/// An entry defining type that is a union of other types.
|
||||
Union(Union),
|
||||
}
|
||||
|
||||
/// A table in the database schema.
|
||||
pub struct Table {
|
||||
pub name: String,
|
||||
pub columns: Vec<Column>,
|
||||
pub keysets: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
/// A union in the database schema.
|
||||
pub struct Union {
|
||||
pub name: String,
|
||||
pub members: Vec<String>,
|
||||
}
|
||||
|
||||
/// A column in a table.
|
||||
pub struct Column {
|
||||
pub db_type: DbColumnType,
|
||||
pub name: String,
|
||||
pub unique: bool,
|
||||
pub ql_type: QlColumnType,
|
||||
pub ql_type_is_ref: bool,
|
||||
}
|
||||
|
||||
/// The database column type.
|
||||
pub enum DbColumnType {
|
||||
Int,
|
||||
String,
|
||||
}
|
||||
|
||||
// The QL type of a column.
|
||||
pub enum QlColumnType {
|
||||
/// Primitive `int` type.
|
||||
Int,
|
||||
|
||||
/// Primitive `string` type.
|
||||
String,
|
||||
|
||||
/// A custom type, defined elsewhere by a table or union.
|
||||
Custom(String),
|
||||
}
|
||||
|
||||
const RESERVED_KEYWORDS: [&'static str; 14] = [
|
||||
"boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype",
|
||||
"type", "unique", "varchar",
|
||||
];
|
||||
|
||||
/// Returns a string that's a copy of `name` but suitably escaped to be a valid
|
||||
/// QL identifier.
|
||||
pub fn escape_name(name: &str) -> String {
|
||||
let mut result = String::new();
|
||||
|
||||
// If there's a leading underscore, replace it with 'underscore_'.
|
||||
if let Some(c) = name.chars().next() {
|
||||
if c == '_' {
|
||||
result.push_str("underscore");
|
||||
}
|
||||
}
|
||||
for c in name.chars() {
|
||||
match c {
|
||||
'{' => result.push_str("lbrace"),
|
||||
'}' => result.push_str("rbrace"),
|
||||
'<' => result.push_str("langle"),
|
||||
'>' => result.push_str("rangle"),
|
||||
'[' => result.push_str("lbracket"),
|
||||
']' => result.push_str("rbracket"),
|
||||
'(' => result.push_str("lparen"),
|
||||
')' => result.push_str("rparen"),
|
||||
'|' => result.push_str("pipe"),
|
||||
'=' => result.push_str("equal"),
|
||||
'~' => result.push_str("tilde"),
|
||||
'?' => result.push_str("question"),
|
||||
'`' => result.push_str("backtick"),
|
||||
'^' => result.push_str("caret"),
|
||||
'!' => result.push_str("bang"),
|
||||
'#' => result.push_str("hash"),
|
||||
'%' => result.push_str("percent"),
|
||||
'&' => result.push_str("ampersand"),
|
||||
'.' => result.push_str("dot"),
|
||||
',' => result.push_str("comma"),
|
||||
'/' => result.push_str("slash"),
|
||||
':' => result.push_str("colon"),
|
||||
';' => result.push_str("semicolon"),
|
||||
'"' => result.push_str("dquote"),
|
||||
'*' => result.push_str("star"),
|
||||
'+' => result.push_str("plus"),
|
||||
'-' => result.push_str("minus"),
|
||||
'@' => result.push_str("at"),
|
||||
_ => result.push_str(&c.to_lowercase().to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
for &keyword in &RESERVED_KEYWORDS {
|
||||
if result == keyword {
|
||||
result.push_str("__");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
impl fmt::Display for Table {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
if let Some(keyset) = &self.keysets {
|
||||
write!(f, "#keyset[")?;
|
||||
for (key_index, key) in keyset.iter().enumerate() {
|
||||
if key_index > 0 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
write!(f, "{}", key)?;
|
||||
}
|
||||
write!(f, "]\n")?;
|
||||
}
|
||||
|
||||
write!(f, "{}(\n", self.name)?;
|
||||
for (column_index, column) in self.columns.iter().enumerate() {
|
||||
write!(f, " ")?;
|
||||
if column.unique {
|
||||
write!(f, "unique ")?;
|
||||
}
|
||||
write!(
|
||||
f,
|
||||
"{} ",
|
||||
match column.db_type {
|
||||
DbColumnType::Int => "int",
|
||||
DbColumnType::String => "string",
|
||||
}
|
||||
)?;
|
||||
write!(f, "{}: ", column.name)?;
|
||||
match &column.ql_type {
|
||||
QlColumnType::Int => write!(f, "int")?,
|
||||
QlColumnType::String => write!(f, "string")?,
|
||||
QlColumnType::Custom(name) => write!(f, "@{}", name)?,
|
||||
}
|
||||
if column.ql_type_is_ref {
|
||||
write!(f, " ref")?;
|
||||
}
|
||||
if column_index + 1 != self.columns.len() {
|
||||
write!(f, ",")?;
|
||||
}
|
||||
write!(f, "\n")?;
|
||||
}
|
||||
write!(f, ");")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Union {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "@{} = ", self.name)?;
|
||||
let mut first = true;
|
||||
for member in &self.members {
|
||||
if first {
|
||||
first = false;
|
||||
} else {
|
||||
write!(f, " | ")?;
|
||||
}
|
||||
write!(f, "@{}", member)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates the dbscheme by writing the given dbscheme `entries` to the `file`.
|
||||
pub fn write(
|
||||
language_name: &str,
|
||||
file: &mut dyn std::io::Write,
|
||||
entries: &[Entry],
|
||||
) -> std::io::Result<()> {
|
||||
write!(file, "// CodeQL database schema for {}\n", language_name)?;
|
||||
write!(
|
||||
file,
|
||||
"// Automatically generated from the tree-sitter grammar; do not edit\n\n"
|
||||
)?;
|
||||
|
||||
for entry in entries {
|
||||
match entry {
|
||||
Entry::Table(table) => write!(file, "{}\n\n", table)?,
|
||||
Entry::Union(union) => write!(file, "{}\n\n", union)?,
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
7
generator/src/language.rs
Normal file
7
generator/src/language.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub struct Language {
|
||||
pub name: String,
|
||||
pub node_types_path: PathBuf,
|
||||
pub dbscheme_path: PathBuf,
|
||||
}
|
||||
@@ -1,3 +1,316 @@
|
||||
fn main() {
|
||||
println!("generator");
|
||||
mod dbscheme;
|
||||
mod language;
|
||||
mod node_types;
|
||||
|
||||
use language::Language;
|
||||
use node_types::{FieldInfo, NodeInfo};
|
||||
use std::fs::File;
|
||||
use std::io::LineWriter;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Given a tree-sitter node type's (kind, named) pair, returns a single string
|
||||
/// representing the (unescaped) name we'll use to refer to corresponding QL
|
||||
/// type.
|
||||
fn node_type_name(kind: &str, named: bool) -> String {
|
||||
if named {
|
||||
kind.to_string()
|
||||
} else {
|
||||
format!("{}_unnamed", kind)
|
||||
}
|
||||
}
|
||||
|
||||
/// Given the name of the parent node, and its field information, returns the
|
||||
/// name of the field's type. This may be an ad-hoc union of all the possible
|
||||
/// types the field can take, in which case the union is added to `entries`.
|
||||
fn make_field_type(
|
||||
parent_name: &str,
|
||||
field_name: &str,
|
||||
field_info: &FieldInfo,
|
||||
entries: &mut Vec<dbscheme::Entry>,
|
||||
) -> String {
|
||||
if field_info.types.len() == 1 {
|
||||
// This field can only have a single type.
|
||||
let t = &field_info.types[0];
|
||||
dbscheme::escape_name(&node_type_name(&t.kind, t.named))
|
||||
} else {
|
||||
// This field can have one of several types. Create an ad-hoc QL union
|
||||
// type to represent them.
|
||||
let field_union_name = format!("{}_{}_type", parent_name, field_name);
|
||||
let field_union_name = dbscheme::escape_name(&field_union_name);
|
||||
let mut members: Vec<String> = Vec::new();
|
||||
for field_type in &field_info.types {
|
||||
members.push(dbscheme::escape_name(&node_type_name(
|
||||
&field_type.kind,
|
||||
field_type.named,
|
||||
)));
|
||||
}
|
||||
entries.push(dbscheme::Entry::Union(dbscheme::Union {
|
||||
name: field_union_name.clone(),
|
||||
members,
|
||||
}));
|
||||
field_union_name
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds the appropriate dbscheme information for the given field, either as a
|
||||
/// column on `main_table`, or as an auxiliary table.
|
||||
fn add_field(
|
||||
main_table: &mut dbscheme::Table,
|
||||
parent_name: &str,
|
||||
field_name: &str,
|
||||
field_info: &FieldInfo,
|
||||
entries: &mut Vec<dbscheme::Entry>,
|
||||
) {
|
||||
if field_info.multiple || !field_info.required {
|
||||
// This field can appear zero or multiple times, so put
|
||||
// it in an auxiliary table.
|
||||
let field_type = make_field_type(parent_name, field_name, field_info, entries);
|
||||
let field_table = dbscheme::Table {
|
||||
name: format!("{}_{}", parent_name, field_name),
|
||||
columns: vec![
|
||||
// First column is a reference to the parent.
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: dbscheme::escape_name(parent_name),
|
||||
ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(parent_name)),
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
// Then an index column.
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "index".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Int,
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
// And then the field
|
||||
dbscheme::Column {
|
||||
unique: true,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: field_type.clone(),
|
||||
ql_type: dbscheme::QlColumnType::Custom(field_type),
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
],
|
||||
// In addition to the field being unique, the combination of
|
||||
// parent+index is unique, so add a keyset for them.
|
||||
keysets: Some(vec![
|
||||
dbscheme::escape_name(parent_name),
|
||||
"index".to_string(),
|
||||
]),
|
||||
};
|
||||
entries.push(dbscheme::Entry::Table(field_table));
|
||||
} else {
|
||||
// This field must appear exactly once, so we add it as
|
||||
// a column to the main table for the node type.
|
||||
let field_type = make_field_type(parent_name, field_name, field_info, entries);
|
||||
main_table.columns.push(dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: String::from(field_name),
|
||||
ql_type: dbscheme::QlColumnType::Custom(field_type),
|
||||
ql_type_is_ref: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the given tree-sitter node types into CodeQL dbscheme entries.
|
||||
fn convert_nodes(nodes: &[NodeInfo]) -> Vec<dbscheme::Entry> {
|
||||
let mut entries: Vec<dbscheme::Entry> = Vec::new();
|
||||
let mut top_members: Vec<String> = Vec::new();
|
||||
|
||||
for node in nodes {
|
||||
if let Some(subtypes) = &node.subtypes {
|
||||
// It's a tree-sitter supertype node, for which we create a union
|
||||
// type.
|
||||
let mut members: Vec<String> = Vec::new();
|
||||
for subtype in subtypes {
|
||||
members.push(dbscheme::escape_name(&node_type_name(
|
||||
&subtype.kind,
|
||||
subtype.named,
|
||||
)))
|
||||
}
|
||||
entries.push(dbscheme::Entry::Union(dbscheme::Union {
|
||||
name: dbscheme::escape_name(&node_type_name(&node.kind, node.named)),
|
||||
members,
|
||||
}));
|
||||
} else {
|
||||
// It's a product type, defined by a table.
|
||||
let name = node_type_name(&node.kind, node.named);
|
||||
let mut main_table = dbscheme::Table {
|
||||
name: dbscheme::escape_name(&(format!("{}_def", name))),
|
||||
columns: vec![dbscheme::Column {
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "id".to_string(),
|
||||
unique: true,
|
||||
ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(&name)),
|
||||
ql_type_is_ref: false,
|
||||
}],
|
||||
keysets: None,
|
||||
};
|
||||
top_members.push(dbscheme::escape_name(&name));
|
||||
|
||||
let mut is_leaf = true;
|
||||
|
||||
// If the type also has fields or children, then we create either
|
||||
// auxiliary tables or columns in the defining table for them.
|
||||
if let Some(fields) = &node.fields {
|
||||
for (field_name, field_info) in fields {
|
||||
is_leaf = false;
|
||||
add_field(&mut main_table, &name, field_name, field_info, &mut entries);
|
||||
}
|
||||
}
|
||||
if let Some(children) = &node.children {
|
||||
is_leaf = false;
|
||||
|
||||
// Treat children as if they were a field called 'child'.
|
||||
add_field(&mut main_table, &name, "child", children, &mut entries);
|
||||
}
|
||||
|
||||
if is_leaf {
|
||||
// There were no fields and no children, so it's a leaf node in
|
||||
// the TS grammar. Add a column for the node text.
|
||||
main_table.columns.push(dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::String,
|
||||
name: "text".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::String,
|
||||
ql_type_is_ref: true,
|
||||
});
|
||||
}
|
||||
|
||||
// Finally, the type's defining table also includes the location.
|
||||
main_table.columns.push(dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "loc".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Custom("location".to_string()),
|
||||
ql_type_is_ref: true,
|
||||
});
|
||||
|
||||
entries.push(dbscheme::Entry::Table(main_table));
|
||||
}
|
||||
}
|
||||
|
||||
// Create a union of all database types.
|
||||
entries.push(dbscheme::Entry::Union(dbscheme::Union {
|
||||
name: "top".to_string(),
|
||||
members: top_members,
|
||||
}));
|
||||
|
||||
entries
|
||||
}
|
||||
|
||||
fn write_dbscheme(language: &Language, entries: &[dbscheme::Entry]) -> std::io::Result<()> {
|
||||
println!(
|
||||
"Writing to '{}'",
|
||||
match language.dbscheme_path.to_str() {
|
||||
None => "<undisplayable>",
|
||||
Some(p) => p,
|
||||
}
|
||||
);
|
||||
let file = File::create(&language.dbscheme_path)?;
|
||||
let mut file = LineWriter::new(file);
|
||||
dbscheme::write(&language.name, &mut file, &entries)
|
||||
}
|
||||
|
||||
fn create_location_entry() -> dbscheme::Entry {
|
||||
dbscheme::Entry::Table(dbscheme::Table {
|
||||
name: "location".to_string(),
|
||||
keysets: None,
|
||||
columns: vec![
|
||||
dbscheme::Column {
|
||||
unique: true,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "id".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Custom("location".to_string()),
|
||||
ql_type_is_ref: false,
|
||||
},
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::String,
|
||||
name: "file_path".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::String,
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "start_line".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Int,
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "start_column".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Int,
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "end_line".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Int,
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "end_column".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Int,
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
],
|
||||
})
|
||||
}
|
||||
|
||||
fn create_source_location_prefix_entry() -> dbscheme::Entry {
|
||||
dbscheme::Entry::Table(dbscheme::Table {
|
||||
name: "sourceLocationPrefix".to_string(),
|
||||
keysets: None,
|
||||
columns: vec![dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::String,
|
||||
name: "prefix".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::String,
|
||||
ql_type_is_ref: true,
|
||||
}],
|
||||
})
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// TODO: figure out proper dbscheme output path and/or take it from the
|
||||
// command line.
|
||||
let ruby = Language {
|
||||
name: "Ruby".to_string(),
|
||||
node_types_path: PathBuf::from("tree-sitter-ruby/src/node-types.json"),
|
||||
dbscheme_path: PathBuf::from("ruby.dbscheme"),
|
||||
};
|
||||
match node_types::read(&ruby.node_types_path) {
|
||||
Err(e) => {
|
||||
println!(
|
||||
"Failed to read '{}': {}",
|
||||
match ruby.node_types_path.to_str() {
|
||||
None => "<undisplayable>",
|
||||
Some(p) => p,
|
||||
},
|
||||
e
|
||||
);
|
||||
std::process::exit(1);
|
||||
}
|
||||
Ok(nodes) => {
|
||||
let mut dbscheme_entries = convert_nodes(&nodes);
|
||||
dbscheme_entries.push(create_location_entry());
|
||||
dbscheme_entries.push(create_source_location_prefix_entry());
|
||||
match write_dbscheme(&ruby, &dbscheme_entries) {
|
||||
Err(e) => {
|
||||
println!("Failed to write dbscheme: {}", e);
|
||||
std::process::exit(2);
|
||||
}
|
||||
Ok(()) => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
74
generator/src/node_types.rs
Normal file
74
generator/src/node_types.rs
Normal file
@@ -0,0 +1,74 @@
|
||||
use serde::Deserialize;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct NodeInfo {
|
||||
#[serde(rename = "type")]
|
||||
pub kind: String,
|
||||
pub named: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub fields: Option<BTreeMap<String, FieldInfo>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub children: Option<FieldInfo>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub subtypes: Option<Vec<NodeType>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct NodeType {
|
||||
#[serde(rename = "type")]
|
||||
pub kind: String,
|
||||
pub named: bool,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct FieldInfo {
|
||||
pub multiple: bool,
|
||||
pub required: bool,
|
||||
pub types: Vec<NodeType>,
|
||||
}
|
||||
|
||||
impl Default for FieldInfo {
|
||||
fn default() -> Self {
|
||||
FieldInfo {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Error {
|
||||
IOError(std::io::Error),
|
||||
JsonError(serde_json::error::Error),
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for Error {
|
||||
fn from(error: std::io::Error) -> Self {
|
||||
Error::IOError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Error> for Error {
|
||||
fn from(error: serde_json::Error) -> Self {
|
||||
Error::JsonError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Error::IOError(e) => write!(f, "{}", e),
|
||||
Error::JsonError(e) => write!(f, "{}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Deserializes the node types from the JSON at the given `path`.
|
||||
pub fn read(path: &Path) -> Result<Vec<NodeInfo>, Error> {
|
||||
let json_data = std::fs::read_to_string(path)?;
|
||||
let node_types: Vec<NodeInfo> = serde_json::from_str(&json_data)?;
|
||||
Ok(node_types)
|
||||
}
|
||||
1864
ruby.dbscheme
Normal file
1864
ruby.dbscheme
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user