Refactor to move naming decisions to shared library

This commit is contained in:
Nick Rolfe
2020-11-10 19:08:21 +00:00
parent 505d5c04d8
commit 83a0e5fea6
6 changed files with 400 additions and 406 deletions

View File

@@ -11,71 +11,51 @@ use std::io::LineWriter;
use std::path::PathBuf;
use tracing::{error, info};
fn child_node_type_name(token_types: &Map<String, usize>, t: &node_types::TypeName) -> String {
if !t.named {
// an unnamed token
"reserved_word".to_owned()
} else if token_types.contains_key(&t.kind) {
// a named token
format!("token_{}", t.kind)
} else {
// a normal node
node_types::node_type_name(&t.kind, t.named)
}
}
/// Given the name of the parent node, and its field information, returns the
/// name of the field's type. This may be an ad-hoc union of all the possible
/// types the field can take, in which case the union is added to `entries`.
fn make_field_type(
token_types: &Map<String, usize>,
parent_name: &str,
field_name: &str,
types: &Set<node_types::TypeName>,
field: &node_types::Field,
entries: &mut Vec<dbscheme::Entry>,
nodes: &node_types::NodeTypeMap,
) -> String {
if types.len() == 1 {
// This field can only have a single type.
let t = types.iter().next().unwrap();
node_types::escape_name(&child_node_type_name(token_types, t))
} else {
// This field can have one of several types. Create an ad-hoc QL union
// type to represent them.
let field_union_name = format!("{}_{}_type", parent_name, field_name);
let field_union_name = node_types::escape_name(&field_union_name);
let members: Set<String> = types
.iter()
.map(|t| node_types::escape_name(&child_node_type_name(token_types, t)))
.collect();
entries.push(dbscheme::Entry::Union(dbscheme::Union {
name: field_union_name.clone(),
members,
}));
field_union_name
match &field.type_info {
node_types::FieldTypeInfo::Multiple {
types,
dbscheme_union,
ql_class: _,
} => {
// This field can have one of several types. Create an ad-hoc QL union
// type to represent them.
let members: Set<String> = types
.iter()
.map(|t| node_types::escape_name(&nodes.get(t).unwrap().flattened_name))
.collect();
entries.push(dbscheme::Entry::Union(dbscheme::Union {
name: node_types::escape_name(&dbscheme_union),
members,
}));
dbscheme_union.clone()
}
node_types::FieldTypeInfo::Single(t) => nodes.get(&t).unwrap().flattened_name.clone(),
}
}
/// Adds the appropriate dbscheme information for the given field, either as a
/// column on `main_table`, or as an auxiliary table.
fn add_field(
token_types: &Map<String, usize>,
main_table: &mut dbscheme::Table,
field: &node_types::Field,
entries: &mut Vec<dbscheme::Entry>,
nodes: &node_types::NodeTypeMap,
) {
let field_name = field.get_name();
let parent_name = node_types::node_type_name(&field.parent.kind, field.parent.named);
let parent_name = &nodes.get(&field.parent).unwrap().flattened_name;
match &field.storage {
node_types::Storage::Table(has_index) => {
// This field can appear zero or multiple times, so put
// it in an auxiliary table.
let field_type = make_field_type(
token_types,
&parent_name,
&field_name,
&field.types,
entries,
);
let field_type = node_types::escape_name(&make_field_type(&field, entries, nodes));
let parent_column = dbscheme::Column {
unique: !*has_index,
db_type: dbscheme::DbColumnType::Int,
@@ -93,12 +73,12 @@ fn add_field(
let field_column = dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: node_types::escape_name(&field_type),
name: field_type.clone(),
ql_type: ql::Type::AtType(field_type),
ql_type_is_ref: true,
};
let field_table = dbscheme::Table {
name: format!("{}_{}", parent_name, field_name),
name: node_types::escape_name(&format!("{}_{}", parent_name, field_name)),
columns: if *has_index {
vec![parent_column, index_column, field_column]
} else {
@@ -120,18 +100,12 @@ fn add_field(
node_types::Storage::Column => {
// This field must appear exactly once, so we add it as
// a column to the main table for the node type.
let field_type = make_field_type(
token_types,
&parent_name,
&field_name,
&field.types,
entries,
);
let field_type = make_field_type(&field, entries, nodes);
main_table.columns.push(dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: node_types::escape_name(&field_name),
ql_type: ql::Type::AtType(field_type),
ql_type: ql::Type::AtType(node_types::escape_name(&field_type)),
ql_type_is_ref: true,
});
}
@@ -139,7 +113,7 @@ fn add_field(
}
/// Converts the given tree-sitter node types into CodeQL dbscheme entries.
fn convert_nodes(nodes: &Vec<node_types::Entry>) -> Vec<dbscheme::Entry> {
fn convert_nodes(nodes: &node_types::NodeTypeMap) -> Vec<dbscheme::Entry> {
let mut entries: Vec<dbscheme::Entry> = vec![
create_location_union(),
create_locations_default_table(),
@@ -152,59 +126,50 @@ fn convert_nodes(nodes: &Vec<node_types::Entry>) -> Vec<dbscheme::Entry> {
create_source_location_prefix_table(),
];
let mut ast_node_members: Set<String> = Set::new();
let mut token_kinds: Map<String, usize> = Map::new();
ast_node_members.insert(node_types::escape_name("token"));
for node in nodes {
if let node_types::Entry::Token { type_name, kind_id } = node {
if type_name.named {
token_kinds.insert(type_name.kind.to_owned(), *kind_id);
let token_kinds: Map<String, usize> = nodes
.iter()
.filter_map(|(_, node)| match &node.kind {
node_types::EntryKind::Token { kind_id } => {
Some((node.flattened_name.clone(), *kind_id))
}
}
}
_ => None,
})
.collect();
ast_node_members.insert(node_types::escape_name("token"));
for node in nodes {
match &node {
node_types::Entry::Union {
type_name,
members: n_members,
} => {
for (_, node) in nodes {
match &node.kind {
node_types::EntryKind::Union { members: n_members } => {
// It's a tree-sitter supertype node, for which we create a union
// type.
let mut members: Set<String> = Set::new();
for n_member in n_members {
members.insert(node_types::escape_name(&child_node_type_name(
&token_kinds,
n_member,
)));
}
let members: Set<String> = n_members
.iter()
.map(|n| node_types::escape_name(&nodes.get(n).unwrap().flattened_name))
.collect();
entries.push(dbscheme::Entry::Union(dbscheme::Union {
name: node_types::escape_name(&node_types::node_type_name(
&type_name.kind,
type_name.named,
)),
name: node_types::escape_name(&node.flattened_name),
members,
}));
}
node_types::Entry::Table { type_name, fields } => {
node_types::EntryKind::Table { fields } => {
// It's a product type, defined by a table.
let name = node_types::node_type_name(&type_name.kind, type_name.named);
let mut main_table = dbscheme::Table {
name: node_types::escape_name(&(format!("{}_def", name))),
name: node_types::escape_name(&(format!("{}_def", &node.flattened_name))),
columns: vec![dbscheme::Column {
db_type: dbscheme::DbColumnType::Int,
name: "id".to_string(),
unique: true,
ql_type: ql::Type::AtType(node_types::escape_name(&name)),
ql_type: ql::Type::AtType(node_types::escape_name(&node.flattened_name)),
ql_type_is_ref: false,
}],
keysets: None,
};
ast_node_members.insert(node_types::escape_name(&name));
ast_node_members.insert(node_types::escape_name(&node.flattened_name));
// If the type also has fields or children, then we create either
// auxiliary tables or columns in the defining table for them.
for field in fields {
add_field(&token_kinds, &mut main_table, &field, &mut entries);
add_field(&mut main_table, &field, &mut entries, nodes);
}
if fields.is_empty() {
@@ -230,7 +195,7 @@ fn convert_nodes(nodes: &Vec<node_types::Entry>) -> Vec<dbscheme::Entry> {
entries.push(dbscheme::Entry::Table(main_table));
}
node_types::Entry::Token { .. } => {}
node_types::EntryKind::Token { .. } => {}
}
}
@@ -295,15 +260,10 @@ fn add_tokeninfo_table(entries: &mut Vec<dbscheme::Entry>, token_kinds: Map<Stri
},
],
}));
let mut branches: Vec<(usize, String)> = Vec::new();
branches.push((0, "reserved_word".to_owned()));
for (token_kind, idx) in token_kinds.iter() {
branches.push((
*idx,
node_types::escape_name(&format!("token_{}", token_kind)),
));
}
let branches: Vec<(usize, String)> = token_kinds
.iter()
.map(|(name, kind_id)| (*kind_id, node_types::escape_name(name)))
.collect();
entries.push(dbscheme::Entry::Case(dbscheme::Case {
name: "token".to_owned(),
column: "kind".to_owned(),

View File

@@ -1,3 +1,4 @@
use std::collections::BTreeSet;
use std::fmt;
pub enum TopLevel {
@@ -14,10 +15,11 @@ impl fmt::Display for TopLevel {
}
}
#[derive(Clone, Eq, PartialEq, Hash)]
pub struct Class {
pub name: String,
pub is_abstract: bool,
pub supertypes: Vec<Type>,
pub supertypes: BTreeSet<Type>,
pub characteristic_predicate: Option<Expression>,
pub predicates: Vec<Predicate>,
}
@@ -61,7 +63,7 @@ impl fmt::Display for Class {
}
// The QL type of a column.
#[derive(Clone)]
#[derive(Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
pub enum Type {
/// Primitive `int` type.
Int,
@@ -69,11 +71,11 @@ pub enum Type {
/// Primitive `string` type.
String,
/// A user-defined type.
Normal(String),
/// A database type that will need to be referred to with an `@` prefix.
AtType(String),
/// A user-defined type.
Normal(String),
}
impl fmt::Display for Type {
@@ -87,15 +89,13 @@ impl fmt::Display for Type {
}
}
#[derive(Clone)]
#[derive(Clone, Eq, PartialEq, Hash)]
pub enum Expression {
Var(String),
String(String),
Pred(String, Vec<Expression>),
Or(Vec<Expression>),
And(Vec<Expression>),
Equals(Box<Expression>, Box<Expression>),
Exists(Vec<FormalParameter>, Box<Expression>),
Dot(Box<Expression>, String, Vec<Expression>),
}
@@ -127,30 +127,7 @@ impl fmt::Display for Expression {
Ok(())
}
}
Expression::And(conjuncts) => {
if conjuncts.is_empty() {
write!(f, "any()")
} else {
for (index, conjunct) in conjuncts.iter().enumerate() {
if index > 0 {
write!(f, " and ")?;
}
write!(f, "{}", conjunct)?;
}
Ok(())
}
}
Expression::Equals(a, b) => write!(f, "{} = {}", a, b),
Expression::Exists(params, formula) => {
write!(f, "exists(")?;
for (index, param) in params.iter().enumerate() {
if index > 0 {
write!(f, ", ")?;
}
write!(f, "{}", param)?;
}
write!(f, " | {})", formula)
}
Expression::Dot(x, member_pred, args) => {
write!(f, "{}.{}(", x, member_pred)?;
for (index, arg) in args.iter().enumerate() {
@@ -165,7 +142,7 @@ impl fmt::Display for Expression {
}
}
#[derive(Clone)]
#[derive(Clone, Eq, PartialEq, Hash)]
pub struct Predicate {
pub name: String,
pub overridden: bool,
@@ -196,7 +173,7 @@ impl fmt::Display for Predicate {
}
}
#[derive(Clone)]
#[derive(Clone, Eq, PartialEq, Hash)]
pub struct FormalParameter {
pub name: String,
pub param_type: Type,

View File

@@ -67,7 +67,9 @@ fn create_ast_node_class() -> ql::Class {
ql::Class {
name: "AstNode".to_owned(),
is_abstract: false,
supertypes: vec![ql::Type::AtType("ast_node".to_owned())],
supertypes: vec![ql::Type::AtType("ast_node".to_owned())]
.into_iter()
.collect(),
characteristic_predicate: None,
predicates: vec![
to_string,
@@ -77,6 +79,7 @@ fn create_ast_node_class() -> ql::Class {
],
}
}
fn create_token_class() -> ql::Class {
let get_value = ql::Predicate {
name: "getValue".to_owned(),
@@ -128,7 +131,9 @@ fn create_token_class() -> ql::Class {
supertypes: vec![
ql::Type::AtType("token".to_owned()),
ql::Type::Normal("AstNode".to_owned()),
],
]
.into_iter()
.collect(),
characteristic_predicate: None,
predicates: vec![
get_value,
@@ -141,16 +146,18 @@ fn create_token_class() -> ql::Class {
// Creates the `ReservedWord` class.
fn create_reserved_word_class() -> ql::Class {
let db_name = "reserved_word".to_owned();
let class_name = dbscheme_name_to_class_name(&db_name);
let db_name = "reserved_word";
let class_name = "ReservedWord".to_owned();
let describe_ql_class = create_describe_ql_class(&class_name);
ql::Class {
name: class_name,
is_abstract: false,
supertypes: vec![
ql::Type::AtType(db_name.to_owned()),
ql::Type::Normal("Token".to_owned()),
ql::Type::AtType(db_name),
],
]
.into_iter()
.collect(),
characteristic_predicate: None,
predicates: vec![describe_ql_class],
}
@@ -172,47 +179,6 @@ fn create_none_predicate(
}
}
/// Given the name of the parent node, and its field information, returns the
/// name of the field's type. This may be an ad-hoc union of all the possible
/// types the field can take, in which case we create a new class and push it to
/// `classes`.
fn create_field_class(token_kinds: &BTreeSet<String>, field: &node_types::Field) -> String {
if field.types.len() == 1 {
// This field can only have a single type.
let t = field.types.iter().next().unwrap();
if !t.named || token_kinds.contains(&t.kind) {
"Token".to_owned()
} else {
node_types::escape_name(&node_types::node_type_name(&t.kind, t.named))
}
} else {
"AstNode".to_owned()
}
}
/// Given a valid dbscheme name (i.e. in snake case), produces the equivalent QL
/// name (i.e. in CamelCase). For example, "foo_bar_baz" becomes "FooBarBaz".
fn dbscheme_name_to_class_name(dbscheme_name: &str) -> String {
fn to_title_case(word: &str) -> String {
let mut first = true;
let mut result = String::new();
for c in word.chars() {
if first {
first = false;
result.push(c.to_ascii_uppercase());
} else {
result.push(c);
}
}
result
}
dbscheme_name
.split('_')
.map(|word| to_title_case(word))
.collect::<Vec<String>>()
.join("")
}
/// Creates an overridden `describeQlClass` predicate that returns the given
/// name.
fn create_describe_ql_class(class_name: &str) -> ql::Predicate {
@@ -345,20 +311,24 @@ fn create_field_getters(
main_table_column_index: &mut usize,
parent_name: &str,
field: &node_types::Field,
field_type: &str,
nodes: &node_types::NodeTypeMap,
) -> (ql::Predicate, ql::Expression) {
let predicate_name = format!(
"get{}",
dbscheme_name_to_class_name(&node_types::escape_name(&field.get_name()))
);
let return_type = Some(ql::Type::Normal(dbscheme_name_to_class_name(field_type)));
let predicate_name = field.get_getter_name();
let return_type = Some(ql::Type::Normal(match &field.type_info {
node_types::FieldTypeInfo::Single(t) => nodes.get(&t).unwrap().ql_class_name.clone(),
node_types::FieldTypeInfo::Multiple {
types: _,
dbscheme_union: _,
ql_class,
} => ql_class.clone(),
}));
match &field.storage {
node_types::Storage::Column => {
let result = (
ql::Predicate {
name: predicate_name,
overridden: false,
return_type: return_type,
return_type,
formal_parameters: vec![],
body: create_get_field_expr_for_column_storage(
&main_table_name,
@@ -381,7 +351,7 @@ fn create_field_getters(
ql::Predicate {
name: predicate_name,
overridden: false,
return_type: return_type,
return_type,
formal_parameters: if *has_index {
vec![ql::FormalParameter {
name: "i".to_owned(),
@@ -405,7 +375,7 @@ fn create_field_getters(
}
/// Converts the given node types into CodeQL classes wrapping the dbscheme.
pub fn convert_nodes(nodes: &Vec<node_types::Entry>) -> Vec<ql::TopLevel> {
pub fn convert_nodes(nodes: &node_types::NodeTypeMap) -> Vec<ql::TopLevel> {
let mut classes: Vec<ql::TopLevel> = vec![
ql::TopLevel::Import("codeql.files.FileSystem".to_owned()),
ql::TopLevel::Import("codeql.Locations".to_owned()),
@@ -414,61 +384,48 @@ pub fn convert_nodes(nodes: &Vec<node_types::Entry>) -> Vec<ql::TopLevel> {
ql::TopLevel::Class(create_reserved_word_class()),
];
let mut token_kinds = BTreeSet::new();
for node in nodes {
if let node_types::Entry::Token { type_name, .. } = node {
for (type_name, node) in nodes {
if let node_types::EntryKind::Token { .. } = &node.kind {
if type_name.named {
token_kinds.insert(type_name.kind.to_owned());
}
}
}
for node in nodes {
match &node {
node_types::Entry::Token {
type_name,
kind_id: _,
} => {
for (type_name, node) in nodes {
match &node.kind {
node_types::EntryKind::Token { kind_id: _ } => {
if type_name.named {
let db_name = format!("token_{}", &type_name.kind);
let db_name = node_types::escape_name(&db_name);
let class_name =
dbscheme_name_to_class_name(&node_types::escape_name(&type_name.kind));
let describe_ql_class = create_describe_ql_class(&class_name);
let describe_ql_class = create_describe_ql_class(&node.ql_class_name);
let mut supertypes: BTreeSet<ql::Type> = BTreeSet::new();
supertypes.insert(ql::Type::AtType(node.flattened_name.to_owned()));
supertypes.insert(ql::Type::Normal("Token".to_owned()));
classes.push(ql::TopLevel::Class(ql::Class {
name: class_name,
name: node.ql_class_name.clone(),
is_abstract: false,
supertypes: vec![
ql::Type::Normal("Token".to_owned()),
ql::Type::AtType(db_name),
],
supertypes,
characteristic_predicate: None,
predicates: vec![describe_ql_class],
}));
}
}
node_types::Entry::Union {
type_name,
members: _,
} => {
node_types::EntryKind::Union { members: _ } => {
// It's a tree-sitter supertype node, so we're wrapping a dbscheme
// union type.
let union_name = node_types::escape_name(&node_types::node_type_name(
&type_name.kind,
type_name.named,
));
let class_name = dbscheme_name_to_class_name(&union_name);
classes.push(ql::TopLevel::Class(ql::Class {
name: class_name.clone(),
name: node.ql_class_name.clone(),
is_abstract: false,
supertypes: vec![
ql::Type::AtType(union_name),
ql::Type::AtType(node_types::escape_name(&node.flattened_name)),
ql::Type::Normal("AstNode".to_owned()),
],
]
.into_iter()
.collect(),
characteristic_predicate: None,
predicates: vec![],
}));
}
node_types::Entry::Table { type_name, fields } => {
node_types::EntryKind::Table { fields } => {
// Count how many columns there will be in the main table.
// There will be:
// - one for the id
@@ -484,15 +441,19 @@ pub fn convert_nodes(nodes: &Vec<node_types::Entry>) -> Vec<ql::TopLevel> {
.count()
};
let name = node_types::node_type_name(&type_name.kind, type_name.named);
let dbscheme_name = node_types::escape_name(&name);
let ql_type = ql::Type::AtType(dbscheme_name.clone());
let main_table_name = node_types::escape_name(&(format!("{}_def", name)));
let main_class_name = dbscheme_name_to_class_name(&dbscheme_name);
let escaped_name = node_types::escape_name(&node.flattened_name);
let main_class_name = &node.ql_class_name;
let main_table_name =
node_types::escape_name(&format!("{}_def", &node.flattened_name));
let mut main_class = ql::Class {
name: main_class_name.clone(),
is_abstract: false,
supertypes: vec![ql_type, ql::Type::Normal("AstNode".to_owned())],
supertypes: vec![
ql::Type::AtType(escaped_name),
ql::Type::Normal("AstNode".to_owned()),
]
.into_iter()
.collect(),
characteristic_predicate: None,
predicates: vec![
create_describe_ql_class(&main_class_name),
@@ -513,14 +474,13 @@ pub fn convert_nodes(nodes: &Vec<node_types::Entry>) -> Vec<ql::TopLevel> {
// - predicates to access the fields,
// - the QL expressions to access the fields that will be part of getAFieldOrChild.
for field in fields {
let field_type = create_field_class(&token_kinds, field);
let (get_pred, get_child_expr) = create_field_getters(
&main_table_name,
main_table_arity,
&mut main_table_column_index,
&name,
&node.flattened_name,
field,
&field_type,
nodes,
);
main_class.predicates.push(get_pred);
get_child_exprs.push(get_child_expr);