mirror of
https://github.com/github/codeql.git
synced 2026-02-08 19:21:07 +01:00
Includes better QLDoc generation and better escaping in the generated files (the latter has changed the dbscheme, so rebuilding databases will be required).
405 lines
13 KiB
Rust
405 lines
13 KiB
Rust
use serde::Deserialize;
|
|
use std::collections::BTreeMap;
|
|
use std::path::Path;
|
|
|
|
use std::collections::BTreeSet as Set;
|
|
use std::fs;
|
|
|
|
/// A lookup table from TypeName to Entry.
|
|
pub type NodeTypeMap = BTreeMap<TypeName, Entry>;
|
|
|
|
#[derive(Debug)]
|
|
pub struct Entry {
|
|
pub dbscheme_name: String,
|
|
pub ql_class_name: String,
|
|
pub kind: EntryKind,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum EntryKind {
|
|
Union { members: Set<TypeName> },
|
|
Table { name: String, fields: Vec<Field> },
|
|
Token { kind_id: usize },
|
|
}
|
|
|
|
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)]
|
|
pub struct TypeName {
|
|
pub kind: String,
|
|
pub named: bool,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum FieldTypeInfo {
|
|
/// The field has a single type.
|
|
Single(TypeName),
|
|
|
|
/// The field can take one of several types, so we also provide the name of
|
|
/// the database union type that wraps them, and the corresponding QL class
|
|
/// name.
|
|
Multiple {
|
|
types: Set<TypeName>,
|
|
dbscheme_union: String,
|
|
ql_class: String,
|
|
},
|
|
|
|
/// The field can be one of several tokens, so the db type will be an `int`
|
|
/// with a `case @foo.kind` for each possiblity.
|
|
ReservedWordInt(BTreeMap<String, (usize, String)>),
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct Field {
|
|
pub parent: TypeName,
|
|
pub type_info: FieldTypeInfo,
|
|
/// The name of the field or None for the anonymous 'children'
|
|
/// entry from node_types.json
|
|
pub name: Option<String>,
|
|
/// The name of the predicate to get this field.
|
|
pub getter_name: String,
|
|
pub storage: Storage,
|
|
}
|
|
|
|
fn name_for_field_or_child(name: &Option<String>) -> String {
|
|
match name {
|
|
Some(name) => name.clone(),
|
|
None => "child".to_owned(),
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum Storage {
|
|
/// the field is stored as a column in the parent table
|
|
Column { name: String },
|
|
/// the field is stored in a link table
|
|
Table {
|
|
/// the name of the table
|
|
name: String,
|
|
/// the name of the column for the field in the dbscheme
|
|
column_name: String,
|
|
/// does it have an associated index column?
|
|
has_index: bool,
|
|
},
|
|
}
|
|
|
|
pub fn read_node_types(node_types_path: &Path) -> std::io::Result<NodeTypeMap> {
|
|
let file = fs::File::open(node_types_path)?;
|
|
let node_types = serde_json::from_reader(file)?;
|
|
Ok(convert_nodes(&node_types))
|
|
}
|
|
|
|
pub fn read_node_types_str(node_types_json: &str) -> std::io::Result<NodeTypeMap> {
|
|
let node_types = serde_json::from_str(node_types_json)?;
|
|
Ok(convert_nodes(&node_types))
|
|
}
|
|
|
|
fn convert_type(node_type: &NodeType) -> TypeName {
|
|
TypeName {
|
|
kind: node_type.kind.to_string(),
|
|
named: node_type.named,
|
|
}
|
|
}
|
|
|
|
fn convert_types(node_types: &Vec<NodeType>) -> Set<TypeName> {
|
|
let iter = node_types.iter().map(convert_type).collect();
|
|
std::collections::BTreeSet::from(iter)
|
|
}
|
|
|
|
pub fn convert_nodes(nodes: &Vec<NodeInfo>) -> NodeTypeMap {
|
|
let mut entries = NodeTypeMap::new();
|
|
let mut token_kinds = Set::new();
|
|
|
|
// First, find all the token kinds
|
|
for node in nodes {
|
|
if node.subtypes.is_none() {
|
|
if node.fields.as_ref().map_or(0, |x| x.len()) == 0 && node.children.is_none() {
|
|
let type_name = TypeName {
|
|
kind: node.kind.clone(),
|
|
named: node.named,
|
|
};
|
|
token_kinds.insert(type_name);
|
|
}
|
|
}
|
|
}
|
|
|
|
for node in nodes {
|
|
let flattened_name = &node_type_name(&node.kind, node.named);
|
|
let dbscheme_name = escape_name(&flattened_name);
|
|
let ql_class_name = dbscheme_name_to_class_name(&dbscheme_name);
|
|
if let Some(subtypes) = &node.subtypes {
|
|
// It's a tree-sitter supertype node, for which we create a union
|
|
// type.
|
|
entries.insert(
|
|
TypeName {
|
|
kind: node.kind.clone(),
|
|
named: node.named,
|
|
},
|
|
Entry {
|
|
dbscheme_name,
|
|
ql_class_name,
|
|
kind: EntryKind::Union {
|
|
members: convert_types(&subtypes),
|
|
},
|
|
},
|
|
);
|
|
} else if node.fields.as_ref().map_or(0, |x| x.len()) == 0 && node.children.is_none() {
|
|
// Token kind, handled above.
|
|
} else {
|
|
// It's a product type, defined by a table.
|
|
let type_name = TypeName {
|
|
kind: node.kind.clone(),
|
|
named: node.named,
|
|
};
|
|
let table_name = escape_name(&(format!("{}_def", &flattened_name)));
|
|
let mut fields = Vec::new();
|
|
|
|
// If the type also has fields or children, then we create either
|
|
// auxiliary tables or columns in the defining table for them.
|
|
if let Some(node_fields) = &node.fields {
|
|
for (field_name, field_info) in node_fields {
|
|
add_field(
|
|
&type_name,
|
|
Some(field_name.to_string()),
|
|
field_info,
|
|
&mut fields,
|
|
&token_kinds,
|
|
);
|
|
}
|
|
}
|
|
if let Some(children) = &node.children {
|
|
// Treat children as if they were a field called 'child'.
|
|
add_field(&type_name, None, children, &mut fields, &token_kinds);
|
|
}
|
|
entries.insert(
|
|
type_name,
|
|
Entry {
|
|
dbscheme_name,
|
|
ql_class_name,
|
|
kind: EntryKind::Table {
|
|
name: table_name,
|
|
fields,
|
|
},
|
|
},
|
|
);
|
|
}
|
|
}
|
|
let mut counter = 0;
|
|
for type_name in token_kinds {
|
|
let entry = if type_name.named {
|
|
counter += 1;
|
|
let unprefixed_name = node_type_name(&type_name.kind, true);
|
|
Entry {
|
|
dbscheme_name: escape_name(&format!("token_{}", &unprefixed_name)),
|
|
ql_class_name: dbscheme_name_to_class_name(&escape_name(&unprefixed_name)),
|
|
kind: EntryKind::Token { kind_id: counter },
|
|
}
|
|
} else {
|
|
Entry {
|
|
dbscheme_name: "reserved_word".to_owned(),
|
|
ql_class_name: "ReservedWord".to_owned(),
|
|
kind: EntryKind::Token { kind_id: 0 },
|
|
}
|
|
};
|
|
entries.insert(type_name, entry);
|
|
}
|
|
entries
|
|
}
|
|
|
|
fn add_field(
|
|
parent_type_name: &TypeName,
|
|
field_name: Option<String>,
|
|
field_info: &FieldInfo,
|
|
fields: &mut Vec<Field>,
|
|
token_kinds: &Set<TypeName>,
|
|
) {
|
|
let parent_flattened_name = node_type_name(&parent_type_name.kind, parent_type_name.named);
|
|
let column_name = escape_name(&name_for_field_or_child(&field_name));
|
|
let storage = if !field_info.multiple && field_info.required {
|
|
// This field must appear exactly once, so we add it as
|
|
// a column to the main table for the node type.
|
|
Storage::Column { name: column_name }
|
|
} else {
|
|
// Put the field in an auxiliary table.
|
|
let has_index = field_info.multiple;
|
|
let field_table_name = escape_name(&format!(
|
|
"{}_{}",
|
|
parent_flattened_name,
|
|
&name_for_field_or_child(&field_name)
|
|
));
|
|
Storage::Table {
|
|
has_index,
|
|
name: field_table_name,
|
|
column_name,
|
|
}
|
|
};
|
|
let converted_types = convert_types(&field_info.types);
|
|
let type_info = if field_info
|
|
.types
|
|
.iter()
|
|
.all(|t| !t.named && token_kinds.contains(&convert_type(t)))
|
|
{
|
|
// All possible types for this field are reserved words. The db
|
|
// representation will be an `int` with a `case @foo.field = ...` to
|
|
// enumerate the possible values.
|
|
let mut counter = 0;
|
|
let mut field_token_ints: BTreeMap<String, (usize, String)> = BTreeMap::new();
|
|
for t in converted_types {
|
|
let dbscheme_variant_name =
|
|
escape_name(&format!("{}_{}", parent_flattened_name, t.kind));
|
|
field_token_ints.insert(t.kind.to_owned(), (counter, dbscheme_variant_name));
|
|
counter += 1;
|
|
}
|
|
FieldTypeInfo::ReservedWordInt(field_token_ints)
|
|
} else if field_info.types.len() == 1 {
|
|
FieldTypeInfo::Single(converted_types.into_iter().next().unwrap())
|
|
} else {
|
|
// The dbscheme type for this field will be a union. In QL, it'll just be AstNode.
|
|
FieldTypeInfo::Multiple {
|
|
types: converted_types,
|
|
dbscheme_union: format!(
|
|
"{}_{}_type",
|
|
&parent_flattened_name,
|
|
&name_for_field_or_child(&field_name)
|
|
),
|
|
ql_class: "AstNode".to_owned(),
|
|
}
|
|
};
|
|
let getter_name = format!(
|
|
"get{}",
|
|
dbscheme_name_to_class_name(&escape_name(&name_for_field_or_child(&field_name)))
|
|
);
|
|
fields.push(Field {
|
|
parent: TypeName {
|
|
kind: parent_type_name.kind.to_string(),
|
|
named: parent_type_name.named,
|
|
},
|
|
type_info,
|
|
name: field_name,
|
|
getter_name,
|
|
storage,
|
|
});
|
|
}
|
|
#[derive(Deserialize)]
|
|
pub struct NodeInfo {
|
|
#[serde(rename = "type")]
|
|
pub kind: String,
|
|
pub named: bool,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub fields: Option<BTreeMap<String, FieldInfo>>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub children: Option<FieldInfo>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub subtypes: Option<Vec<NodeType>>,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
pub struct NodeType {
|
|
#[serde(rename = "type")]
|
|
pub kind: String,
|
|
pub named: bool,
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
pub struct FieldInfo {
|
|
pub multiple: bool,
|
|
pub required: bool,
|
|
pub types: Vec<NodeType>,
|
|
}
|
|
|
|
/// Given a tree-sitter node type's (kind, named) pair, returns a single string
|
|
/// representing the (unescaped) name we'll use to refer to corresponding QL
|
|
/// type.
|
|
fn node_type_name(kind: &str, named: bool) -> String {
|
|
if named {
|
|
kind.to_string()
|
|
} else {
|
|
format!("{}_unnamed", kind)
|
|
}
|
|
}
|
|
|
|
const RESERVED_KEYWORDS: [&'static str; 14] = [
|
|
"boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype",
|
|
"type", "unique", "varchar",
|
|
];
|
|
|
|
/// Returns a string that's a copy of `name` but suitably escaped to be a valid
|
|
/// QL identifier.
|
|
fn escape_name(name: &str) -> String {
|
|
let mut result = String::new();
|
|
|
|
// If there's a leading underscore, replace it with 'underscore_'.
|
|
if let Some(c) = name.chars().next() {
|
|
if c == '_' {
|
|
result.push_str("underscore");
|
|
}
|
|
}
|
|
for c in name.chars() {
|
|
match c {
|
|
'{' => result.push_str("lbrace"),
|
|
'}' => result.push_str("rbrace"),
|
|
'<' => result.push_str("langle"),
|
|
'>' => result.push_str("rangle"),
|
|
'[' => result.push_str("lbracket"),
|
|
']' => result.push_str("rbracket"),
|
|
'(' => result.push_str("lparen"),
|
|
')' => result.push_str("rparen"),
|
|
'|' => result.push_str("pipe"),
|
|
'=' => result.push_str("equal"),
|
|
'~' => result.push_str("tilde"),
|
|
'?' => result.push_str("question"),
|
|
'`' => result.push_str("backtick"),
|
|
'^' => result.push_str("caret"),
|
|
'!' => result.push_str("bang"),
|
|
'#' => result.push_str("hash"),
|
|
'%' => result.push_str("percent"),
|
|
'&' => result.push_str("ampersand"),
|
|
'.' => result.push_str("dot"),
|
|
',' => result.push_str("comma"),
|
|
'/' => result.push_str("slash"),
|
|
':' => result.push_str("colon"),
|
|
';' => result.push_str("semicolon"),
|
|
'"' => result.push_str("dquote"),
|
|
'*' => result.push_str("star"),
|
|
'+' => result.push_str("plus"),
|
|
'-' => result.push_str("minus"),
|
|
'@' => result.push_str("at"),
|
|
_ if c.is_uppercase() => {
|
|
result.push('_');
|
|
result.push_str(&c.to_lowercase().to_string())
|
|
}
|
|
_ => result.push(c),
|
|
}
|
|
}
|
|
|
|
for &keyword in &RESERVED_KEYWORDS {
|
|
if result == keyword {
|
|
result.push_str("__");
|
|
break;
|
|
}
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
/// Given a valid dbscheme name (i.e. in snake case), produces the equivalent QL
|
|
/// name (i.e. in CamelCase). For example, "foo_bar_baz" becomes "FooBarBaz".
|
|
fn dbscheme_name_to_class_name(dbscheme_name: &str) -> String {
|
|
fn to_title_case(word: &str) -> String {
|
|
let mut first = true;
|
|
let mut result = String::new();
|
|
for c in word.chars() {
|
|
if first {
|
|
first = false;
|
|
result.push(c.to_ascii_uppercase());
|
|
} else {
|
|
result.push(c);
|
|
}
|
|
}
|
|
result
|
|
}
|
|
dbscheme_name
|
|
.split('_')
|
|
.map(|word| to_title_case(word))
|
|
.collect::<Vec<String>>()
|
|
.join("")
|
|
}
|