mirror of
https://github.com/github/codeql.git
synced 2026-04-25 00:35:20 +02:00
Basic dbscheme generation from node-types.json
This commit is contained in:
173
generator/src/dbscheme.rs
Normal file
173
generator/src/dbscheme.rs
Normal file
@@ -0,0 +1,173 @@
|
||||
/// Represents a distinct entry in the database schema.
|
||||
pub enum Entry {
|
||||
/// An entry defining a database table.
|
||||
Table(Table),
|
||||
|
||||
/// An entry defining type that is a union of other types.
|
||||
Union { name: String, members: Vec<String> },
|
||||
}
|
||||
|
||||
/// A table in the database schema.
|
||||
pub struct Table {
|
||||
pub name: String,
|
||||
pub columns: Vec<Column>,
|
||||
pub keysets: Vec<Vec<String>>,
|
||||
}
|
||||
|
||||
/// A column in a table.
|
||||
pub struct Column {
|
||||
pub db_type: DbColumnType,
|
||||
pub name: String,
|
||||
pub unique: bool,
|
||||
pub ql_type: QlColumnType,
|
||||
pub ql_type_is_ref: bool,
|
||||
}
|
||||
|
||||
/// The database column type.
|
||||
pub enum DbColumnType {
|
||||
Int,
|
||||
String,
|
||||
}
|
||||
|
||||
// The QL type of a column.
|
||||
pub enum QlColumnType {
|
||||
/// Primitive `int` type.
|
||||
Int,
|
||||
|
||||
/// Primitive `string` type.
|
||||
String,
|
||||
|
||||
/// A custom type, defined elsewhere by a table or union.
|
||||
Custom(String),
|
||||
}
|
||||
|
||||
const RESERVED_KEYWORDS: [&'static str; 14] = [
|
||||
"boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype",
|
||||
"type", "unique", "varchar",
|
||||
];
|
||||
|
||||
/// Returns a string that's a copy of `name` but suitably escaped to be a valid
|
||||
/// QL identifier.
|
||||
pub fn escape_name(name: &str) -> String {
|
||||
let mut result = String::new();
|
||||
|
||||
// If there's a leading underscore, replace it with 'underscore_'.
|
||||
if let Some(c) = name.chars().next() {
|
||||
if c == '_' {
|
||||
result.push_str("underscore");
|
||||
}
|
||||
}
|
||||
for c in name.chars() {
|
||||
match c {
|
||||
'{' => result.push_str("lbrace"),
|
||||
'}' => result.push_str("rbrace"),
|
||||
'<' => result.push_str("langle"),
|
||||
'>' => result.push_str("rangle"),
|
||||
'[' => result.push_str("lbracket"),
|
||||
']' => result.push_str("rbracket"),
|
||||
'(' => result.push_str("lparen"),
|
||||
')' => result.push_str("rparen"),
|
||||
'|' => result.push_str("pipe"),
|
||||
'=' => result.push_str("equal"),
|
||||
'~' => result.push_str("tilde"),
|
||||
'?' => result.push_str("question"),
|
||||
'`' => result.push_str("backtick"),
|
||||
'^' => result.push_str("caret"),
|
||||
'!' => result.push_str("bang"),
|
||||
'#' => result.push_str("hash"),
|
||||
'%' => result.push_str("percent"),
|
||||
'&' => result.push_str("ampersand"),
|
||||
'.' => result.push_str("dot"),
|
||||
',' => result.push_str("comma"),
|
||||
'/' => result.push_str("slash"),
|
||||
':' => result.push_str("colon"),
|
||||
';' => result.push_str("semicolon"),
|
||||
'"' => result.push_str("dquote"),
|
||||
'*' => result.push_str("star"),
|
||||
'+' => result.push_str("plus"),
|
||||
'-' => result.push_str("minus"),
|
||||
'@' => result.push_str("at"),
|
||||
_ => result.push_str(&c.to_lowercase().to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
for &keyword in &RESERVED_KEYWORDS {
|
||||
if result == keyword {
|
||||
result.push_str("__");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Generates the dbscheme by writing the given dbscheme `entries` to the `file`.
|
||||
pub fn write(file: &mut dyn std::io::Write, entries: &[Entry]) -> Result<(), std::io::Error> {
|
||||
write!(file, "// CodeQL database schema for Ruby\n")?;
|
||||
write!(
|
||||
file,
|
||||
"// Automatically generated from the tree-sitter grammar; do not edit\n\n"
|
||||
)?;
|
||||
|
||||
for entry in entries {
|
||||
match entry {
|
||||
Entry::Table(table) => {
|
||||
for keyset in &table.keysets {
|
||||
write!(file, "#keyset[")?;
|
||||
for (key_index, key) in keyset.iter().enumerate() {
|
||||
if key_index > 0 {
|
||||
write!(file, ", ")?;
|
||||
}
|
||||
write!(file, "{}", key)?;
|
||||
}
|
||||
write!(file, "]\n")?;
|
||||
}
|
||||
|
||||
write!(file, "{}(\n", table.name)?;
|
||||
for (column_index, column) in table.columns.iter().enumerate() {
|
||||
write!(file, " ")?;
|
||||
if column.unique {
|
||||
write!(file, "unique ")?;
|
||||
}
|
||||
write!(
|
||||
file,
|
||||
"{} ",
|
||||
match column.db_type {
|
||||
DbColumnType::Int => "int",
|
||||
DbColumnType::String => "string",
|
||||
}
|
||||
)?;
|
||||
write!(file, "{}: ", column.name)?;
|
||||
match &column.ql_type {
|
||||
QlColumnType::Int => write!(file, "int")?,
|
||||
QlColumnType::String => write!(file, "string")?,
|
||||
QlColumnType::Custom(name) => write!(file, "@{}", name)?,
|
||||
}
|
||||
if column.ql_type_is_ref {
|
||||
write!(file, " ref")?;
|
||||
}
|
||||
if column_index + 1 != table.columns.len() {
|
||||
write!(file, ",")?;
|
||||
}
|
||||
write!(file, "\n")?;
|
||||
}
|
||||
write!(file, ");\n\n")?;
|
||||
}
|
||||
Entry::Union { name, members } => {
|
||||
write!(file, "@{} = ", name)?;
|
||||
let mut first = true;
|
||||
for member in members {
|
||||
if first {
|
||||
first = false;
|
||||
} else {
|
||||
write!(file, " | ")?;
|
||||
}
|
||||
write!(file, "@{}", member)?;
|
||||
}
|
||||
write!(file, "\n\n")?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,3 +1,316 @@
|
||||
fn main() {
|
||||
println!("generator");
|
||||
use std::fs::File;
|
||||
use std::io::LineWriter;
|
||||
use std::path::Path;
|
||||
|
||||
mod dbscheme;
|
||||
mod node_types;
|
||||
use node_types::{FieldInfo, NodeInfo};
|
||||
|
||||
fn read_node_types() -> Option<Vec<NodeInfo>> {
|
||||
let json_data = match std::fs::read_to_string(Path::new("tree-sitter-ruby/src/node-types.json"))
|
||||
{
|
||||
Ok(s) => s,
|
||||
Err(_) => return None,
|
||||
};
|
||||
let nodes: Vec<NodeInfo> = match serde_json::from_str(&json_data) {
|
||||
Ok(n) => n,
|
||||
Err(_) => return None,
|
||||
};
|
||||
|
||||
Some(nodes)
|
||||
}
|
||||
|
||||
/// Given a tree-sitter node type's (kind, named) pair, returns a single string
|
||||
/// representing the (unescaped) name we'll use to refer to corresponding QL
|
||||
/// type.
|
||||
fn node_type_name(kind: &str, named: bool) -> String {
|
||||
if named {
|
||||
kind.to_string()
|
||||
} else {
|
||||
format!("{}_unnamed", kind)
|
||||
}
|
||||
}
|
||||
|
||||
/// Given the name of the parent node, and its field information, returns the
|
||||
/// name of the field's type. This may be an ad-hoc union of all the possible
|
||||
/// types the field can take, in which case the union is added to `entries`.
|
||||
fn make_field_type(
|
||||
parent_name: &str,
|
||||
field_name: &str,
|
||||
field_info: &FieldInfo,
|
||||
entries: &mut Vec<dbscheme::Entry>,
|
||||
) -> String {
|
||||
if field_info.types.len() == 1 {
|
||||
// This field can only have a single type.
|
||||
let t = &field_info.types[0];
|
||||
dbscheme::escape_name(&node_type_name(&t.kind, t.named))
|
||||
} else {
|
||||
// This field can have one of several types. Create an ad-hoc QL union
|
||||
// type to represent them.
|
||||
let field_union_name = format!("{}_{}_type", parent_name, field_name);
|
||||
let field_union_name = dbscheme::escape_name(&field_union_name);
|
||||
let mut members: Vec<String> = Vec::new();
|
||||
for field_type in &field_info.types {
|
||||
members.push(dbscheme::escape_name(&node_type_name(
|
||||
&field_type.kind,
|
||||
field_type.named,
|
||||
)));
|
||||
}
|
||||
entries.push(dbscheme::Entry::Union {
|
||||
name: field_union_name.clone(),
|
||||
members,
|
||||
});
|
||||
field_union_name
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds the appropriate dbscheme information for the given field, either as a
|
||||
/// column on `main_table`, or as an auxiliary table.
|
||||
fn add_field(
|
||||
main_table: &mut dbscheme::Table,
|
||||
parent_name: &str,
|
||||
field_name: &str,
|
||||
field_info: &FieldInfo,
|
||||
entries: &mut Vec<dbscheme::Entry>,
|
||||
) {
|
||||
if field_info.multiple || !field_info.required {
|
||||
// This field can appear zero or multiple times, so put
|
||||
// it in an auxiliary table.
|
||||
let field_type = make_field_type(parent_name, field_name, field_info, entries);
|
||||
let field_table = dbscheme::Table {
|
||||
name: format!("{}_{}", parent_name, field_name),
|
||||
columns: vec![
|
||||
// First column is a reference to the parent.
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: dbscheme::escape_name(parent_name),
|
||||
ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(parent_name)),
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
// Then an index column.
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "index".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Int,
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
// And then the field
|
||||
dbscheme::Column {
|
||||
unique: true,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: field_type.clone(),
|
||||
ql_type: dbscheme::QlColumnType::Custom(field_type),
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
],
|
||||
// In addition to the field being unique, the combination of
|
||||
// parent+index is unique, so add a keyset for them.
|
||||
keysets: vec![vec![
|
||||
dbscheme::escape_name(parent_name),
|
||||
"index".to_string(),
|
||||
]],
|
||||
};
|
||||
entries.push(dbscheme::Entry::Table(field_table));
|
||||
} else {
|
||||
// This field must appear exactly once, so we add it as
|
||||
// a column to the main table for the node type.
|
||||
let field_type = make_field_type(parent_name, field_name, field_info, entries);
|
||||
main_table.columns.push(dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: String::from(field_name),
|
||||
ql_type: dbscheme::QlColumnType::Custom(field_type),
|
||||
ql_type_is_ref: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the given tree-sitter node types into CodeQL dbscheme entries.
|
||||
fn convert_nodes(nodes: &[NodeInfo]) -> Vec<dbscheme::Entry> {
|
||||
let mut entries: Vec<dbscheme::Entry> = Vec::new();
|
||||
let mut top_members: Vec<String> = Vec::new();
|
||||
|
||||
for node in nodes {
|
||||
if let Some(subtypes) = &node.subtypes {
|
||||
// It's a tree-sitter supertype node, for which we create a union
|
||||
// type.
|
||||
let mut members: Vec<String> = Vec::new();
|
||||
for subtype in subtypes {
|
||||
members.push(dbscheme::escape_name(&node_type_name(
|
||||
&subtype.kind,
|
||||
subtype.named,
|
||||
)))
|
||||
}
|
||||
entries.push(dbscheme::Entry::Union {
|
||||
name: dbscheme::escape_name(&node_type_name(&node.kind, node.named)),
|
||||
members,
|
||||
});
|
||||
} else {
|
||||
// It's a product type, defined by a table.
|
||||
let name = node_type_name(&node.kind, node.named);
|
||||
let mut main_table = dbscheme::Table {
|
||||
name: dbscheme::escape_name(&(format!("{}_def", name))),
|
||||
columns: vec![dbscheme::Column {
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "id".to_string(),
|
||||
unique: true,
|
||||
ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(&name)),
|
||||
ql_type_is_ref: false,
|
||||
}],
|
||||
keysets: vec![],
|
||||
};
|
||||
top_members.push(dbscheme::escape_name(&name));
|
||||
|
||||
let mut is_leaf = true;
|
||||
|
||||
// If the type also has fields or children, then we create either
|
||||
// auxiliary tables or columns in the defining table for them.
|
||||
if let Some(fields) = &node.fields {
|
||||
for (field_name, field_info) in fields {
|
||||
is_leaf = false;
|
||||
add_field(&mut main_table, &name, field_name, field_info, &mut entries);
|
||||
}
|
||||
}
|
||||
if let Some(children) = &node.children {
|
||||
is_leaf = false;
|
||||
|
||||
// Treat children as if they were a field called 'child'.
|
||||
add_field(&mut main_table, &name, "child", children, &mut entries);
|
||||
}
|
||||
|
||||
if is_leaf {
|
||||
// There were no fields and no children, so it's a leaf node in
|
||||
// the TS grammar. Add a column for the node text.
|
||||
main_table.columns.push(dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::String,
|
||||
name: "text".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::String,
|
||||
ql_type_is_ref: true,
|
||||
});
|
||||
}
|
||||
|
||||
// Finally, the type's defining table also includes the location.
|
||||
main_table.columns.push(dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "loc".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Custom("location".to_string()),
|
||||
ql_type_is_ref: true,
|
||||
});
|
||||
|
||||
entries.push(dbscheme::Entry::Table(main_table));
|
||||
}
|
||||
}
|
||||
|
||||
// Create a union of all database types.
|
||||
entries.push(dbscheme::Entry::Union {
|
||||
name: "top".to_string(),
|
||||
members: top_members,
|
||||
});
|
||||
|
||||
entries
|
||||
}
|
||||
|
||||
fn write_dbscheme(entries: &[dbscheme::Entry]) -> std::io::Result<()> {
|
||||
// TODO: figure out proper output path and/or take it from the command line.
|
||||
let path = Path::new("ruby.dbscheme");
|
||||
println!(
|
||||
"Writing to '{}'",
|
||||
match path.to_str() {
|
||||
None => "<undisplayable>",
|
||||
Some(p) => p,
|
||||
}
|
||||
);
|
||||
let file = File::create(path)?;
|
||||
let mut file = LineWriter::new(file);
|
||||
dbscheme::write(&mut file, &entries)
|
||||
}
|
||||
|
||||
fn create_location_entry() -> dbscheme::Entry {
|
||||
dbscheme::Entry::Table(dbscheme::Table {
|
||||
name: "location".to_string(),
|
||||
keysets: Vec::new(),
|
||||
columns: vec![
|
||||
dbscheme::Column {
|
||||
unique: true,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "id".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Custom("location".to_string()),
|
||||
ql_type_is_ref: false,
|
||||
},
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::String,
|
||||
name: "file_path".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::String,
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "start_line".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Int,
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "start_column".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Int,
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "end_line".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Int,
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::Int,
|
||||
name: "end_column".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::Int,
|
||||
ql_type_is_ref: true,
|
||||
},
|
||||
],
|
||||
})
|
||||
}
|
||||
|
||||
fn create_source_location_prefix_entry() -> dbscheme::Entry {
|
||||
dbscheme::Entry::Table(dbscheme::Table {
|
||||
name: "sourceLocationPrefix".to_string(),
|
||||
keysets: Vec::new(),
|
||||
columns: vec![dbscheme::Column {
|
||||
unique: false,
|
||||
db_type: dbscheme::DbColumnType::String,
|
||||
name: "prefix".to_string(),
|
||||
ql_type: dbscheme::QlColumnType::String,
|
||||
ql_type_is_ref: true,
|
||||
}],
|
||||
})
|
||||
}
|
||||
|
||||
fn main() {
|
||||
match read_node_types() {
|
||||
None => {
|
||||
println!("Failed to read node types");
|
||||
std::process::exit(1);
|
||||
}
|
||||
Some(nodes) => {
|
||||
let mut dbscheme_entries = convert_nodes(&nodes);
|
||||
dbscheme_entries.push(create_location_entry());
|
||||
dbscheme_entries.push(create_source_location_prefix_entry());
|
||||
match write_dbscheme(&dbscheme_entries) {
|
||||
Err(e) => {
|
||||
println!("Failed to write dbscheme: {}", e);
|
||||
std::process::exit(2);
|
||||
}
|
||||
Ok(()) => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
39
generator/src/node_types.rs
Normal file
39
generator/src/node_types.rs
Normal file
@@ -0,0 +1,39 @@
|
||||
use serde::Deserialize;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct NodeInfo {
|
||||
#[serde(rename = "type")]
|
||||
pub kind: String,
|
||||
pub named: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub fields: Option<BTreeMap<String, FieldInfo>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub children: Option<FieldInfo>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub subtypes: Option<Vec<NodeType>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct NodeType {
|
||||
#[serde(rename = "type")]
|
||||
pub kind: String,
|
||||
pub named: bool,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct FieldInfo {
|
||||
pub multiple: bool,
|
||||
pub required: bool,
|
||||
pub types: Vec<NodeType>,
|
||||
}
|
||||
|
||||
impl Default for FieldInfo {
|
||||
fn default() -> Self {
|
||||
FieldInfo {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
1864
ruby.dbscheme
Normal file
1864
ruby.dbscheme
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user