Merge remote-tracking branch 'rc/3.3' into 'main'

This commit is contained in:
Arthur Baars
2021-10-15 15:21:48 +02:00
552 changed files with 102 additions and 279 deletions

View File

@@ -0,0 +1,130 @@
use crate::ql;
use std::collections::BTreeSet as Set;
use std::fmt;
/// Represents a distinct entry in the database schema.
pub enum Entry<'a> {
/// An entry defining a database table.
Table(Table<'a>),
/// An entry defining a database table.
Case(Case<'a>),
/// An entry defining type that is a union of other types.
Union(Union<'a>),
}
/// A table in the database schema.
pub struct Table<'a> {
pub name: &'a str,
pub columns: Vec<Column<'a>>,
pub keysets: Option<Vec<&'a str>>,
}
/// A union in the database schema.
pub struct Union<'a> {
pub name: &'a str,
pub members: Set<&'a str>,
}
/// A table in the database schema.
pub struct Case<'a> {
pub name: &'a str,
pub column: &'a str,
pub branches: Vec<(usize, &'a str)>,
}
/// A column in a table.
pub struct Column<'a> {
pub db_type: DbColumnType,
pub name: &'a str,
pub unique: bool,
pub ql_type: ql::Type<'a>,
pub ql_type_is_ref: bool,
}
/// The database column type.
pub enum DbColumnType {
Int,
String,
}
impl<'a> fmt::Display for Case<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "case @{}.{} of", &self.name, &self.column)?;
let mut sep = " ";
for (c, tp) in &self.branches {
writeln!(f, "{} {} = @{}", sep, c, tp)?;
sep = "|";
}
writeln!(f, ";")
}
}
impl<'a> fmt::Display for Table<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(keyset) = &self.keysets {
write!(f, "#keyset[")?;
for (key_index, key) in keyset.iter().enumerate() {
if key_index > 0 {
write!(f, ", ")?;
}
write!(f, "{}", key)?;
}
writeln!(f, "]")?;
}
writeln!(f, "{}(", self.name)?;
for (column_index, column) in self.columns.iter().enumerate() {
write!(f, " ")?;
if column.unique {
write!(f, "unique ")?;
}
write!(
f,
"{} ",
match column.db_type {
DbColumnType::Int => "int",
DbColumnType::String => "string",
}
)?;
write!(f, "{}: {}", column.name, column.ql_type)?;
if column.ql_type_is_ref {
write!(f, " ref")?;
}
if column_index + 1 != self.columns.len() {
write!(f, ",")?;
}
writeln!(f)?;
}
write!(f, ");")?;
Ok(())
}
}
impl<'a> fmt::Display for Union<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "@{} = ", self.name)?;
let mut first = true;
for member in &self.members {
if first {
first = false;
} else {
write!(f, " | ")?;
}
write!(f, "@{}", member)?;
}
Ok(())
}
}
/// Generates the dbscheme by writing the given dbscheme `entries` to the `file`.
pub fn write<'a>(file: &mut dyn std::io::Write, entries: &'a [Entry]) -> std::io::Result<()> {
for entry in entries {
match entry {
Entry::Case(case) => write!(file, "{}\n\n", case)?,
Entry::Table(table) => write!(file, "{}\n\n", table)?,
Entry::Union(union) => write!(file, "{}\n\n", union)?,
}
}
Ok(())
}

View File

@@ -0,0 +1,4 @@
pub struct Language {
pub name: String,
pub node_types: &'static str,
}

671
ruby/generator/src/main.rs Normal file
View File

@@ -0,0 +1,671 @@
mod dbscheme;
mod language;
mod ql;
mod ql_gen;
use language::Language;
use std::collections::BTreeMap as Map;
use std::collections::BTreeSet as Set;
use std::fs::File;
use std::io::LineWriter;
use std::io::Write;
use std::path::PathBuf;
/// Given the name of the parent node, and its field information, returns a pair,
/// the first of which is the field's type. The second is an optional dbscheme
/// entry that should be added.
fn make_field_type<'a>(
parent_name: &'a str,
field: &'a node_types::Field,
nodes: &'a node_types::NodeTypeMap,
) -> (ql::Type<'a>, Option<dbscheme::Entry<'a>>) {
match &field.type_info {
node_types::FieldTypeInfo::Multiple {
types,
dbscheme_union,
ql_class: _,
} => {
// This field can have one of several types. Create an ad-hoc QL union
// type to represent them.
let members: Set<&str> = types
.iter()
.map(|t| nodes.get(t).unwrap().dbscheme_name.as_str())
.collect();
(
ql::Type::At(dbscheme_union),
Some(dbscheme::Entry::Union(dbscheme::Union {
name: dbscheme_union,
members,
})),
)
}
node_types::FieldTypeInfo::Single(t) => {
let dbscheme_name = &nodes.get(t).unwrap().dbscheme_name;
(ql::Type::At(dbscheme_name), None)
}
node_types::FieldTypeInfo::ReservedWordInt(int_mapping) => {
// The field will be an `int` in the db, and we add a case split to
// create other db types for each integer value.
let mut branches: Vec<(usize, &'a str)> = Vec::new();
for (value, name) in int_mapping.values() {
branches.push((*value, name));
}
let case = dbscheme::Entry::Case(dbscheme::Case {
name: parent_name,
column: match &field.storage {
node_types::Storage::Column { name } => name,
node_types::Storage::Table { name, .. } => name,
},
branches,
});
(ql::Type::Int, Some(case))
}
}
}
fn add_field_for_table_storage<'a>(
field: &'a node_types::Field,
table_name: &'a str,
column_name: &'a str,
has_index: bool,
nodes: &'a node_types::NodeTypeMap,
) -> (dbscheme::Table<'a>, Option<dbscheme::Entry<'a>>) {
let parent_name = &nodes.get(&field.parent).unwrap().dbscheme_name;
// This field can appear zero or multiple times, so put
// it in an auxiliary table.
let (field_ql_type, field_type_entry) = make_field_type(parent_name, field, nodes);
let parent_column = dbscheme::Column {
unique: !has_index,
db_type: dbscheme::DbColumnType::Int,
name: parent_name,
ql_type: ql::Type::At(parent_name),
ql_type_is_ref: true,
};
let index_column = dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "index",
ql_type: ql::Type::Int,
ql_type_is_ref: true,
};
let field_column = dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: column_name,
ql_type: field_ql_type,
ql_type_is_ref: true,
};
let field_table = dbscheme::Table {
name: table_name,
columns: if has_index {
vec![parent_column, index_column, field_column]
} else {
vec![parent_column, field_column]
},
// In addition to the field being unique, the combination of
// parent+index is unique, so add a keyset for them.
keysets: if has_index {
Some(vec![parent_name, "index"])
} else {
None
},
};
(field_table, field_type_entry)
}
fn add_field_for_column_storage<'a>(
parent_name: &'a str,
field: &'a node_types::Field,
column_name: &'a str,
nodes: &'a node_types::NodeTypeMap,
) -> (dbscheme::Column<'a>, Option<dbscheme::Entry<'a>>) {
// This field must appear exactly once, so we add it as
// a column to the main table for the node type.
let (field_ql_type, field_type_entry) = make_field_type(parent_name, field, nodes);
(
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: column_name,
ql_type: field_ql_type,
ql_type_is_ref: true,
},
field_type_entry,
)
}
/// Converts the given tree-sitter node types into CodeQL dbscheme entries.
/// Returns a tuple containing:
///
/// 1. A vector of dbscheme entries.
/// 2. A set of names of the members of the `<lang>_ast_node` union.
/// 3. A map where the keys are the dbscheme names for token kinds, and the
/// values are their integer representations.
fn convert_nodes(
nodes: &node_types::NodeTypeMap,
) -> (Vec<dbscheme::Entry>, Set<&str>, Map<&str, usize>) {
let mut entries: Vec<dbscheme::Entry> = Vec::new();
let mut ast_node_members: Set<&str> = Set::new();
let token_kinds: Map<&str, usize> = nodes
.iter()
.filter_map(|(_, node)| match &node.kind {
node_types::EntryKind::Token { kind_id } => {
Some((node.dbscheme_name.as_str(), *kind_id))
}
_ => None,
})
.collect();
for node in nodes.values() {
match &node.kind {
node_types::EntryKind::Union { members: n_members } => {
// It's a tree-sitter supertype node, for which we create a union
// type.
let members: Set<&str> = n_members
.iter()
.map(|n| nodes.get(n).unwrap().dbscheme_name.as_str())
.collect();
entries.push(dbscheme::Entry::Union(dbscheme::Union {
name: &node.dbscheme_name,
members,
}));
}
node_types::EntryKind::Table { name, fields } => {
// It's a product type, defined by a table.
let mut main_table = dbscheme::Table {
name,
columns: vec![dbscheme::Column {
db_type: dbscheme::DbColumnType::Int,
name: "id",
unique: true,
ql_type: ql::Type::At(&node.dbscheme_name),
ql_type_is_ref: false,
}],
keysets: None,
};
ast_node_members.insert(&node.dbscheme_name);
// If the type also has fields or children, then we create either
// auxiliary tables or columns in the defining table for them.
for field in fields {
match &field.storage {
node_types::Storage::Column { name: column_name } => {
let (field_column, field_type_entry) = add_field_for_column_storage(
&node.dbscheme_name,
field,
column_name,
nodes,
);
if let Some(field_type_entry) = field_type_entry {
entries.push(field_type_entry);
}
main_table.columns.push(field_column);
}
node_types::Storage::Table {
name,
has_index,
column_name,
} => {
let (field_table, field_type_entry) = add_field_for_table_storage(
field,
name,
column_name,
*has_index,
nodes,
);
if let Some(field_type_entry) = field_type_entry {
entries.push(field_type_entry);
}
entries.push(dbscheme::Entry::Table(field_table));
}
}
}
if fields.is_empty() {
// There were no fields and no children, so it's a leaf node in
// the TS grammar. Add a column for the node text.
main_table.columns.push(dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::String,
name: "text",
ql_type: ql::Type::String,
ql_type_is_ref: true,
});
}
// Finally, the type's defining table also includes the location.
main_table.columns.push(dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "loc",
ql_type: ql::Type::At("location"),
ql_type_is_ref: true,
});
entries.push(dbscheme::Entry::Table(main_table));
}
node_types::EntryKind::Token { .. } => {}
}
}
(entries, ast_node_members, token_kinds)
}
/// Creates a dbscheme table entry representing the parent relation for AST nodes.
///
/// # Arguments
/// - `name` - the name of both the table to create and the node parent type.
/// - `ast_node_name` - the name of the node child type.
fn create_ast_node_parent_table<'a>(name: &'a str, ast_node_name: &'a str) -> dbscheme::Table<'a> {
dbscheme::Table {
name,
columns: vec![
dbscheme::Column {
db_type: dbscheme::DbColumnType::Int,
name: "child",
unique: false,
ql_type: ql::Type::At(ast_node_name),
ql_type_is_ref: true,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::Int,
name: "parent",
unique: false,
ql_type: ql::Type::At(name),
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "parent_index",
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
],
keysets: Some(vec!["parent", "parent_index"]),
}
}
fn create_tokeninfo<'a>(name: &'a str, type_name: &'a str) -> dbscheme::Table<'a> {
dbscheme::Table {
name,
keysets: None,
columns: vec![
dbscheme::Column {
db_type: dbscheme::DbColumnType::Int,
name: "id",
unique: true,
ql_type: ql::Type::At(type_name),
ql_type_is_ref: false,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "kind",
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::String,
name: "value",
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "loc",
ql_type: ql::Type::At("location"),
ql_type_is_ref: true,
},
],
}
}
fn create_token_case<'a>(name: &'a str, token_kinds: Map<&'a str, usize>) -> dbscheme::Case<'a> {
let branches: Vec<(usize, &str)> = token_kinds
.iter()
.map(|(&name, kind_id)| (*kind_id, name))
.collect();
dbscheme::Case {
name,
column: "kind",
branches,
}
}
fn create_location_union<'a>() -> dbscheme::Entry<'a> {
dbscheme::Entry::Union(dbscheme::Union {
name: "location",
members: vec!["location_default"].into_iter().collect(),
})
}
fn create_files_table<'a>() -> dbscheme::Entry<'a> {
dbscheme::Entry::Table(dbscheme::Table {
name: "files",
keysets: None,
columns: vec![
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: "id",
ql_type: ql::Type::At("file"),
ql_type_is_ref: false,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::String,
name: "name",
unique: false,
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
],
})
}
fn create_folders_table<'a>() -> dbscheme::Entry<'a> {
dbscheme::Entry::Table(dbscheme::Table {
name: "folders",
keysets: None,
columns: vec![
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: "id",
ql_type: ql::Type::At("folder"),
ql_type_is_ref: false,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::String,
name: "name",
unique: false,
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
],
})
}
fn create_locations_default_table<'a>() -> dbscheme::Entry<'a> {
dbscheme::Entry::Table(dbscheme::Table {
name: "locations_default",
keysets: None,
columns: vec![
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: "id",
ql_type: ql::Type::At("location_default"),
ql_type_is_ref: false,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "file",
ql_type: ql::Type::At("file"),
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "start_line",
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "start_column",
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "end_line",
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "end_column",
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
],
})
}
fn create_container_union<'a>() -> dbscheme::Entry<'a> {
dbscheme::Entry::Union(dbscheme::Union {
name: "container",
members: vec!["folder", "file"].into_iter().collect(),
})
}
fn create_containerparent_table<'a>() -> dbscheme::Entry<'a> {
dbscheme::Entry::Table(dbscheme::Table {
name: "containerparent",
columns: vec![
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "parent",
ql_type: ql::Type::At("container"),
ql_type_is_ref: true,
},
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: "child",
ql_type: ql::Type::At("container"),
ql_type_is_ref: true,
},
],
keysets: None,
})
}
fn create_source_location_prefix_table<'a>() -> dbscheme::Entry<'a> {
dbscheme::Entry::Table(dbscheme::Table {
name: "sourceLocationPrefix",
keysets: None,
columns: vec![dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::String,
name: "prefix",
ql_type: ql::Type::String,
ql_type_is_ref: true,
}],
})
}
fn create_diagnostics<'a>() -> (dbscheme::Case<'a>, dbscheme::Table<'a>) {
let table = dbscheme::Table {
name: "diagnostics",
keysets: None,
columns: vec![
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: "id",
ql_type: ql::Type::At("diagnostic"),
ql_type_is_ref: false,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "severity",
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::String,
name: "error_tag",
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::String,
name: "error_message",
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::String,
name: "full_error_message",
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "location",
ql_type: ql::Type::At("location_default"),
ql_type_is_ref: true,
},
],
};
let severities: Vec<(usize, &str)> = vec![
(10, "diagnostic_debug"),
(20, "diagnostic_info"),
(30, "diagnostic_warning"),
(40, "diagnostic_error"),
];
let case = dbscheme::Case {
name: "diagnostic",
column: "severity",
branches: severities,
};
(case, table)
}
fn main() -> std::io::Result<()> {
tracing_subscriber::fmt()
.with_target(false)
.without_time()
.with_level(true)
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let matches = clap::App::new("Ruby dbscheme generator")
.version("1.0")
.author("GitHub")
.about("CodeQL Ruby dbscheme generator")
.args_from_usage(
"--dbscheme=<FILE> 'Path of the generated dbscheme file'
--library=<FILE> 'Path of the generated QLL file'",
)
.get_matches();
let dbscheme_path = matches.value_of("dbscheme").expect("missing --dbscheme");
let dbscheme_path = PathBuf::from(dbscheme_path);
let ql_library_path = matches.value_of("library").expect("missing --library");
let ql_library_path = PathBuf::from(ql_library_path);
let languages = vec![
Language {
name: "Ruby".to_owned(),
node_types: tree_sitter_ruby::NODE_TYPES,
},
Language {
name: "Erb".to_owned(),
node_types: tree_sitter_embedded_template::NODE_TYPES,
},
];
let mut dbscheme_writer = LineWriter::new(File::create(dbscheme_path)?);
write!(
dbscheme_writer,
"// CodeQL database schema for {}\n\
// Automatically generated from the tree-sitter grammar; do not edit\n\n",
languages[0].name
)?;
let (diagnostics_case, diagnostics_table) = create_diagnostics();
dbscheme::write(
&mut dbscheme_writer,
&[
create_location_union(),
create_locations_default_table(),
create_files_table(),
create_folders_table(),
create_container_union(),
create_containerparent_table(),
create_source_location_prefix_table(),
dbscheme::Entry::Table(diagnostics_table),
dbscheme::Entry::Case(diagnostics_case),
],
)?;
let mut ql_writer = LineWriter::new(File::create(ql_library_path)?);
write!(
ql_writer,
"/*\n\
* CodeQL library for {}
* Automatically generated from the tree-sitter grammar; do not edit\n\
*/\n\n",
languages[0].name
)?;
ql::write(
&mut ql_writer,
&[
ql::TopLevel::Import("codeql.files.FileSystem"),
ql::TopLevel::Import("codeql.Locations"),
],
)?;
for language in languages {
let prefix = node_types::to_snake_case(&language.name);
let ast_node_name = format!("{}_ast_node", &prefix);
let ast_node_parent_name = format!("{}_ast_node_parent", &prefix);
let token_name = format!("{}_token", &prefix);
let tokeninfo_name = format!("{}_tokeninfo", &prefix);
let reserved_word_name = format!("{}_reserved_word", &prefix);
let nodes = node_types::read_node_types_str(&prefix, language.node_types)?;
let (dbscheme_entries, mut ast_node_members, token_kinds) = convert_nodes(&nodes);
ast_node_members.insert(&token_name);
dbscheme::write(&mut dbscheme_writer, &dbscheme_entries)?;
let token_case = create_token_case(&token_name, token_kinds);
dbscheme::write(
&mut dbscheme_writer,
&[
dbscheme::Entry::Table(create_tokeninfo(&tokeninfo_name, &token_name)),
dbscheme::Entry::Case(token_case),
dbscheme::Entry::Union(dbscheme::Union {
name: &ast_node_name,
members: ast_node_members,
}),
dbscheme::Entry::Union(dbscheme::Union {
name: &ast_node_parent_name,
members: [&ast_node_name, "file"].iter().cloned().collect(),
}),
dbscheme::Entry::Table(create_ast_node_parent_table(
&ast_node_parent_name,
&ast_node_name,
)),
],
)?;
let mut body = vec![
ql::TopLevel::Class(ql_gen::create_ast_node_class(
&ast_node_name,
&ast_node_parent_name,
)),
ql::TopLevel::Class(ql_gen::create_token_class(&token_name, &tokeninfo_name)),
ql::TopLevel::Class(ql_gen::create_reserved_word_class(&reserved_word_name)),
];
body.append(&mut ql_gen::convert_nodes(&nodes));
ql::write(
&mut ql_writer,
&[ql::TopLevel::Module(ql::Module {
qldoc: None,
name: &language.name,
body,
})],
)?;
}
Ok(())
}

275
ruby/generator/src/ql.rs Normal file
View File

@@ -0,0 +1,275 @@
use std::collections::BTreeSet;
use std::fmt;
#[derive(Clone, Eq, PartialEq, Hash)]
pub enum TopLevel<'a> {
Class(Class<'a>),
Import(&'a str),
Module(Module<'a>),
}
impl<'a> fmt::Display for TopLevel<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
TopLevel::Import(x) => write!(f, "private import {}", x),
TopLevel::Class(cls) => write!(f, "{}", cls),
TopLevel::Module(m) => write!(f, "{}", m),
}
}
}
#[derive(Clone, Eq, PartialEq, Hash)]
pub struct Class<'a> {
pub qldoc: Option<String>,
pub name: &'a str,
pub is_abstract: bool,
pub supertypes: BTreeSet<Type<'a>>,
pub characteristic_predicate: Option<Expression<'a>>,
pub predicates: Vec<Predicate<'a>>,
}
impl<'a> fmt::Display for Class<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if let Some(qldoc) = &self.qldoc {
write!(f, "/** {} */", qldoc)?;
}
if self.is_abstract {
write!(f, "abstract ")?;
}
write!(f, "class {} extends ", &self.name)?;
for (index, supertype) in self.supertypes.iter().enumerate() {
if index > 0 {
write!(f, ", ")?;
}
write!(f, "{}", supertype)?;
}
writeln!(f, " {{ ")?;
if let Some(charpred) = &self.characteristic_predicate {
writeln!(
f,
" {}",
Predicate {
qldoc: None,
name: self.name,
overridden: false,
return_type: None,
formal_parameters: vec![],
body: charpred.clone(),
}
)?;
}
for predicate in &self.predicates {
writeln!(f, " {}", predicate)?;
}
write!(f, "}}")?;
Ok(())
}
}
#[derive(Clone, Eq, PartialEq, Hash)]
pub struct Module<'a> {
pub qldoc: Option<String>,
pub name: &'a str,
pub body: Vec<TopLevel<'a>>,
}
impl<'a> fmt::Display for Module<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if let Some(qldoc) = &self.qldoc {
write!(f, "/** {} */", qldoc)?;
}
writeln!(f, "module {} {{ ", self.name)?;
for decl in &self.body {
writeln!(f, " {}", decl)?;
}
write!(f, "}}")?;
Ok(())
}
}
// The QL type of a column.
#[derive(Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
pub enum Type<'a> {
/// Primitive `int` type.
Int,
/// Primitive `string` type.
String,
/// A database type that will need to be referred to with an `@` prefix.
At(&'a str),
/// A user-defined type.
Normal(&'a str),
}
impl<'a> fmt::Display for Type<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Type::Int => write!(f, "int"),
Type::String => write!(f, "string"),
Type::Normal(name) => write!(f, "{}", name),
Type::At(name) => write!(f, "@{}", name),
}
}
}
#[derive(Clone, Eq, PartialEq, Hash)]
pub enum Expression<'a> {
Var(&'a str),
String(&'a str),
Integer(usize),
Pred(&'a str, Vec<Expression<'a>>),
And(Vec<Expression<'a>>),
Or(Vec<Expression<'a>>),
Equals(Box<Expression<'a>>, Box<Expression<'a>>),
Dot(Box<Expression<'a>>, &'a str, Vec<Expression<'a>>),
Aggregate {
name: &'a str,
vars: Vec<FormalParameter<'a>>,
range: Option<Box<Expression<'a>>>,
expr: Box<Expression<'a>>,
second_expr: Option<Box<Expression<'a>>>,
},
}
impl<'a> fmt::Display for Expression<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Expression::Var(x) => write!(f, "{}", x),
Expression::String(s) => write!(f, "\"{}\"", s),
Expression::Integer(n) => write!(f, "{}", n),
Expression::Pred(n, args) => {
write!(f, "{}(", n)?;
for (index, arg) in args.iter().enumerate() {
if index > 0 {
write!(f, ", ")?;
}
write!(f, "{}", arg)?;
}
write!(f, ")")
}
Expression::And(conjuncts) => {
if conjuncts.is_empty() {
write!(f, "any()")
} else {
for (index, conjunct) in conjuncts.iter().enumerate() {
if index > 0 {
write!(f, " and ")?;
}
write!(f, "({})", conjunct)?;
}
Ok(())
}
}
Expression::Or(disjuncts) => {
if disjuncts.is_empty() {
write!(f, "none()")
} else {
for (index, disjunct) in disjuncts.iter().enumerate() {
if index > 0 {
write!(f, " or ")?;
}
write!(f, "({})", disjunct)?;
}
Ok(())
}
}
Expression::Equals(a, b) => write!(f, "{} = {}", a, b),
Expression::Dot(x, member_pred, args) => {
write!(f, "{}.{}(", x, member_pred)?;
for (index, arg) in args.iter().enumerate() {
if index > 0 {
write!(f, ", ")?;
}
write!(f, "{}", arg)?;
}
write!(f, ")")
}
Expression::Aggregate {
name,
vars,
range,
expr,
second_expr,
} => {
write!(f, "{}(", name)?;
if !vars.is_empty() {
for (index, var) in vars.iter().enumerate() {
if index > 0 {
write!(f, ", ")?;
}
write!(f, "{}", var)?;
}
write!(f, " | ")?;
}
if let Some(range) = range {
write!(f, "{} | ", range)?;
}
write!(f, "{}", expr)?;
if let Some(second_expr) = second_expr {
write!(f, ", {}", second_expr)?;
}
write!(f, ")")
}
}
}
}
#[derive(Clone, Eq, PartialEq, Hash)]
pub struct Predicate<'a> {
pub qldoc: Option<String>,
pub name: &'a str,
pub overridden: bool,
pub return_type: Option<Type<'a>>,
pub formal_parameters: Vec<FormalParameter<'a>>,
pub body: Expression<'a>,
}
impl<'a> fmt::Display for Predicate<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if let Some(qldoc) = &self.qldoc {
write!(f, "/** {} */", qldoc)?;
}
if self.overridden {
write!(f, "override ")?;
}
match &self.return_type {
None => write!(f, "predicate ")?,
Some(return_type) => write!(f, "{} ", return_type)?,
}
write!(f, "{}(", self.name)?;
for (index, param) in self.formal_parameters.iter().enumerate() {
if index > 0 {
write!(f, ", ")?;
}
write!(f, "{}", param)?;
}
write!(f, ") {{ {} }}", self.body)?;
Ok(())
}
}
#[derive(Clone, Eq, PartialEq, Hash)]
pub struct FormalParameter<'a> {
pub name: &'a str,
pub param_type: Type<'a>,
}
impl<'a> fmt::Display for FormalParameter<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} {}", self.param_type, self.name)
}
}
/// Generates a QL library by writing the given `elements` to the `file`.
pub fn write<'a>(file: &mut dyn std::io::Write, elements: &'a [TopLevel]) -> std::io::Result<()> {
for element in elements {
write!(file, "{}\n\n", &element)?;
}
Ok(())
}

View File

@@ -0,0 +1,610 @@
use crate::ql;
use std::collections::BTreeSet;
/// Creates the hard-coded `AstNode` class that acts as a supertype of all
/// classes we generate.
pub fn create_ast_node_class<'a>(ast_node: &'a str, ast_node_parent: &'a str) -> ql::Class<'a> {
// Default implementation of `toString` calls `this.getAPrimaryQlClass()`
let to_string = ql::Predicate {
qldoc: Some(String::from(
"Gets a string representation of this element.",
)),
name: "toString",
overridden: false,
return_type: Some(ql::Type::String),
formal_parameters: vec![],
body: ql::Expression::Equals(
Box::new(ql::Expression::Var("result")),
Box::new(ql::Expression::Dot(
Box::new(ql::Expression::Var("this")),
"getAPrimaryQlClass",
vec![],
)),
),
};
let get_location = create_none_predicate(
Some(String::from("Gets the location of this element.")),
"getLocation",
false,
Some(ql::Type::Normal("Location")),
);
let get_a_field_or_child = create_none_predicate(
Some(String::from("Gets a field or child node of this node.")),
"getAFieldOrChild",
false,
Some(ql::Type::Normal("AstNode")),
);
let get_parent = ql::Predicate {
qldoc: Some(String::from("Gets the parent of this element.")),
name: "getParent",
overridden: false,
return_type: Some(ql::Type::Normal("AstNode")),
formal_parameters: vec![],
body: ql::Expression::Pred(
ast_node_parent,
vec![
ql::Expression::Var("this"),
ql::Expression::Var("result"),
ql::Expression::Var("_"),
],
),
};
let get_parent_index = ql::Predicate {
qldoc: Some(String::from(
"Gets the index of this node among the children of its parent.",
)),
name: "getParentIndex",
overridden: false,
return_type: Some(ql::Type::Int),
formal_parameters: vec![],
body: ql::Expression::Pred(
ast_node_parent,
vec![
ql::Expression::Var("this"),
ql::Expression::Var("_"),
ql::Expression::Var("result"),
],
),
};
let get_a_primary_ql_class = ql::Predicate {
qldoc: Some(String::from(
"Gets the name of the primary QL class for this element.",
)),
name: "getAPrimaryQlClass",
overridden: false,
return_type: Some(ql::Type::String),
formal_parameters: vec![],
body: ql::Expression::Equals(
Box::new(ql::Expression::Var("result")),
Box::new(ql::Expression::String("???")),
),
};
let get_primary_ql_classes = ql::Predicate {
qldoc: Some(
"Gets a comma-separated list of the names of the primary CodeQL \
classes to which this element belongs."
.to_owned(),
),
name: "getPrimaryQlClasses",
overridden: false,
return_type: Some(ql::Type::String),
formal_parameters: vec![],
body: ql::Expression::Equals(
Box::new(ql::Expression::Var("result")),
Box::new(ql::Expression::Aggregate {
name: "concat",
vars: vec![],
range: None,
expr: Box::new(ql::Expression::Dot(
Box::new(ql::Expression::Var("this")),
"getAPrimaryQlClass",
vec![],
)),
second_expr: Some(Box::new(ql::Expression::String(","))),
}),
),
};
ql::Class {
qldoc: Some(String::from("The base class for all AST nodes")),
name: "AstNode",
is_abstract: false,
supertypes: vec![ql::Type::At(ast_node)].into_iter().collect(),
characteristic_predicate: None,
predicates: vec![
to_string,
get_location,
get_parent,
get_parent_index,
get_a_field_or_child,
get_a_primary_ql_class,
get_primary_ql_classes,
],
}
}
pub fn create_token_class<'a>(token_type: &'a str, tokeninfo: &'a str) -> ql::Class<'a> {
let tokeninfo_arity = 4;
let get_value = ql::Predicate {
qldoc: Some(String::from("Gets the value of this token.")),
name: "getValue",
overridden: false,
return_type: Some(ql::Type::String),
formal_parameters: vec![],
body: create_get_field_expr_for_column_storage("result", tokeninfo, 1, tokeninfo_arity),
};
let get_location = ql::Predicate {
qldoc: Some(String::from("Gets the location of this token.")),
name: "getLocation",
overridden: true,
return_type: Some(ql::Type::Normal("Location")),
formal_parameters: vec![],
body: create_get_field_expr_for_column_storage("result", tokeninfo, 2, tokeninfo_arity),
};
let to_string = ql::Predicate {
qldoc: Some(String::from(
"Gets a string representation of this element.",
)),
name: "toString",
overridden: true,
return_type: Some(ql::Type::String),
formal_parameters: vec![],
body: ql::Expression::Equals(
Box::new(ql::Expression::Var("result")),
Box::new(ql::Expression::Dot(
Box::new(ql::Expression::Var("this")),
"getValue",
vec![],
)),
),
};
ql::Class {
qldoc: Some(String::from("A token.")),
name: "Token",
is_abstract: false,
supertypes: vec![ql::Type::At(token_type), ql::Type::Normal("AstNode")]
.into_iter()
.collect(),
characteristic_predicate: None,
predicates: vec![
get_value,
get_location,
to_string,
create_get_a_primary_ql_class("Token"),
],
}
}
// Creates the `ReservedWord` class.
pub fn create_reserved_word_class(db_name: &str) -> ql::Class {
let class_name = "ReservedWord";
let get_a_primary_ql_class = create_get_a_primary_ql_class(class_name);
ql::Class {
qldoc: Some(String::from("A reserved word.")),
name: class_name,
is_abstract: false,
supertypes: vec![ql::Type::At(db_name), ql::Type::Normal("Token")]
.into_iter()
.collect(),
characteristic_predicate: None,
predicates: vec![get_a_primary_ql_class],
}
}
/// Creates a predicate whose body is `none()`.
fn create_none_predicate<'a>(
qldoc: Option<String>,
name: &'a str,
overridden: bool,
return_type: Option<ql::Type<'a>>,
) -> ql::Predicate<'a> {
ql::Predicate {
qldoc,
name,
overridden,
return_type,
formal_parameters: Vec::new(),
body: ql::Expression::Pred("none", vec![]),
}
}
/// Creates an overridden `getAPrimaryQlClass` predicate that returns the given
/// name.
fn create_get_a_primary_ql_class(class_name: &str) -> ql::Predicate {
ql::Predicate {
qldoc: Some(String::from(
"Gets the name of the primary QL class for this element.",
)),
name: "getAPrimaryQlClass",
overridden: true,
return_type: Some(ql::Type::String),
formal_parameters: vec![],
body: ql::Expression::Equals(
Box::new(ql::Expression::Var("result")),
Box::new(ql::Expression::String(class_name)),
),
}
}
/// Creates the `getLocation` predicate.
///
/// # Arguments
///
/// `def_table` - the name of the table that defines the entity and its location.
/// `arity` - the total number of columns in the table
fn create_get_location_predicate(def_table: &str, arity: usize) -> ql::Predicate {
ql::Predicate {
qldoc: Some(String::from("Gets the location of this element.")),
name: "getLocation",
overridden: true,
return_type: Some(ql::Type::Normal("Location")),
formal_parameters: vec![],
// body of the form: foo_bar_def(_, _, ..., result)
body: ql::Expression::Pred(
def_table,
[
vec![ql::Expression::Var("this")],
vec![ql::Expression::Var("_"); arity - 2],
vec![ql::Expression::Var("result")],
]
.concat(),
),
}
}
/// Creates the `getText` predicate for a leaf node.
///
/// # Arguments
///
/// `def_table` - the name of the table that defines the entity and its text.
fn create_get_text_predicate(def_table: &str) -> ql::Predicate {
ql::Predicate {
qldoc: Some(String::from("Gets the text content of this element.")),
name: "getText",
overridden: false,
return_type: Some(ql::Type::String),
formal_parameters: vec![],
body: ql::Expression::Pred(
def_table,
vec![
ql::Expression::Var("this"),
ql::Expression::Var("result"),
ql::Expression::Var("_"),
],
),
}
}
/// Returns an expression to get a field that's defined as a column in the parent's table.
///
/// # Arguments
///
/// * `result_var_name` - the name of the variable to which the resulting value should be bound
/// * `table_name` - the name of parent's defining table
/// * `column_index` - the index in that table that defines the field
/// * `arity` - the total number of columns in the table
fn create_get_field_expr_for_column_storage<'a>(
result_var_name: &'a str,
table_name: &'a str,
column_index: usize,
arity: usize,
) -> ql::Expression<'a> {
let num_underscores_before = column_index;
let num_underscores_after = arity - 2 - num_underscores_before;
ql::Expression::Pred(
table_name,
[
vec![ql::Expression::Var("this")],
vec![ql::Expression::Var("_"); num_underscores_before],
vec![ql::Expression::Var(result_var_name)],
vec![ql::Expression::Var("_"); num_underscores_after],
]
.concat(),
)
}
/// Returns an expression to get the field with the given index from its
/// auxiliary table. The index name can be "_" so the expression will hold for
/// all indices.
fn create_get_field_expr_for_table_storage<'a>(
result_var_name: &'a str,
table_name: &'a str,
index_var_name: Option<&'a str>,
) -> ql::Expression<'a> {
ql::Expression::Pred(
table_name,
match index_var_name {
Some(index_var_name) => vec![
ql::Expression::Var("this"),
ql::Expression::Var(index_var_name),
ql::Expression::Var(result_var_name),
],
None => vec![ql::Expression::Var("this"), ql::Expression::Var("result")],
},
)
}
/// Creates a pair consisting of a predicate to get the given field, and an
/// optional expression that will get the same field. When the field can occur
/// multiple times, the predicate will take an index argument, while the
/// expression will use the "don't care" expression to hold for all occurrences.
///
/// # Arguments
///
/// `main_table_name` - the name of the defining table for the parent node
/// `main_table_arity` - the number of columns in the main table
/// `main_table_column_index` - a mutable reference to a column index indicating
/// where the field is in the main table. If this is used (i.e. the field has
/// column storage), then the index is incremented.
/// `parent_name` - the name of the parent node
/// `field` - the field whose getters we are creating
/// `field_type` - the db name of the field's type (possibly being a union we created)
fn create_field_getters<'a>(
main_table_name: &'a str,
main_table_arity: usize,
main_table_column_index: &mut usize,
field: &'a node_types::Field,
nodes: &'a node_types::NodeTypeMap,
) -> (ql::Predicate<'a>, Option<ql::Expression<'a>>) {
let return_type = match &field.type_info {
node_types::FieldTypeInfo::Single(t) => {
Some(ql::Type::Normal(&nodes.get(t).unwrap().ql_class_name))
}
node_types::FieldTypeInfo::Multiple {
types: _,
dbscheme_union: _,
ql_class,
} => Some(ql::Type::Normal(ql_class)),
node_types::FieldTypeInfo::ReservedWordInt(_) => Some(ql::Type::String),
};
let formal_parameters = match &field.storage {
node_types::Storage::Column { .. } => vec![],
node_types::Storage::Table { has_index, .. } => {
if *has_index {
vec![ql::FormalParameter {
name: "i",
param_type: ql::Type::Int,
}]
} else {
vec![]
}
}
};
// For the expression to get a value, what variable name should the result
// be bound to?
let get_value_result_var_name = match &field.type_info {
node_types::FieldTypeInfo::ReservedWordInt(_) => "value",
node_types::FieldTypeInfo::Single(_) => "result",
node_types::FieldTypeInfo::Multiple { .. } => "result",
};
// Two expressions for getting the value. One that's suitable use in the
// getter predicate (where there may be a specific index), and another for
// use in `getAFieldOrChild` (where we use a "don't care" expression to
// match any index).
let (get_value, get_value_any_index) = match &field.storage {
node_types::Storage::Column { name: _ } => {
let column_index = *main_table_column_index;
*main_table_column_index += 1;
(
create_get_field_expr_for_column_storage(
get_value_result_var_name,
main_table_name,
column_index,
main_table_arity,
),
create_get_field_expr_for_column_storage(
get_value_result_var_name,
main_table_name,
column_index,
main_table_arity,
),
)
}
node_types::Storage::Table {
name: field_table_name,
has_index,
column_name: _,
} => (
create_get_field_expr_for_table_storage(
get_value_result_var_name,
field_table_name,
if *has_index { Some("i") } else { None },
),
create_get_field_expr_for_table_storage(
get_value_result_var_name,
field_table_name,
if *has_index { Some("_") } else { None },
),
),
};
let (body, optional_expr) = match &field.type_info {
node_types::FieldTypeInfo::ReservedWordInt(int_mapping) => {
// Create an expression that binds the corresponding string to `result` for each `value`, e.g.:
// result = "foo" and value = 0 or
// result = "bar" and value = 1 or
// result = "baz" and value = 2
let disjuncts = int_mapping
.iter()
.map(|(token_str, (value, _))| {
ql::Expression::And(vec![
ql::Expression::Equals(
Box::new(ql::Expression::Var("result")),
Box::new(ql::Expression::String(token_str)),
),
ql::Expression::Equals(
Box::new(ql::Expression::Var("value")),
Box::new(ql::Expression::Integer(*value)),
),
])
})
.collect();
(
ql::Expression::Aggregate {
name: "exists",
vars: vec![ql::FormalParameter {
name: "value",
param_type: ql::Type::Int,
}],
range: Some(Box::new(get_value)),
expr: Box::new(ql::Expression::Or(disjuncts)),
second_expr: None,
},
// Since the getter returns a string and not an AstNode, it won't be part of getAFieldOrChild:
None,
)
}
node_types::FieldTypeInfo::Single(_) | node_types::FieldTypeInfo::Multiple { .. } => {
(get_value, Some(get_value_any_index))
}
};
let qldoc = match &field.name {
Some(name) => format!("Gets the node corresponding to the field `{}`.", name),
None => {
if formal_parameters.is_empty() {
"Gets the child of this node.".to_owned()
} else {
"Gets the `i`th child of this node.".to_owned()
}
}
};
(
ql::Predicate {
qldoc: Some(qldoc),
name: &field.getter_name,
overridden: false,
return_type,
formal_parameters,
body,
},
optional_expr,
)
}
/// Converts the given node types into CodeQL classes wrapping the dbscheme.
pub fn convert_nodes(nodes: &node_types::NodeTypeMap) -> Vec<ql::TopLevel> {
let mut classes: Vec<ql::TopLevel> = Vec::new();
let mut token_kinds = BTreeSet::new();
for (type_name, node) in nodes {
if let node_types::EntryKind::Token { .. } = &node.kind {
if type_name.named {
token_kinds.insert(&type_name.kind);
}
}
}
for (type_name, node) in nodes {
match &node.kind {
node_types::EntryKind::Token { kind_id: _ } => {
if type_name.named {
let get_a_primary_ql_class = create_get_a_primary_ql_class(&node.ql_class_name);
let mut supertypes: BTreeSet<ql::Type> = BTreeSet::new();
supertypes.insert(ql::Type::At(&node.dbscheme_name));
supertypes.insert(ql::Type::Normal("Token"));
classes.push(ql::TopLevel::Class(ql::Class {
qldoc: Some(format!("A class representing `{}` tokens.", type_name.kind)),
name: &node.ql_class_name,
is_abstract: false,
supertypes,
characteristic_predicate: None,
predicates: vec![get_a_primary_ql_class],
}));
}
}
node_types::EntryKind::Union { members: _ } => {
// It's a tree-sitter supertype node, so we're wrapping a dbscheme
// union type.
classes.push(ql::TopLevel::Class(ql::Class {
qldoc: None,
name: &node.ql_class_name,
is_abstract: false,
supertypes: vec![
ql::Type::At(&node.dbscheme_name),
ql::Type::Normal("AstNode"),
]
.into_iter()
.collect(),
characteristic_predicate: None,
predicates: vec![],
}));
}
node_types::EntryKind::Table {
name: main_table_name,
fields,
} => {
// Count how many columns there will be in the main table.
// There will be:
// - one for the id
// - one for the location
// - one for each field that's stored as a column
// - if there are no fields, one for the text column.
let main_table_arity = 2 + if fields.is_empty() {
1
} else {
fields
.iter()
.filter(|&f| matches!(f.storage, node_types::Storage::Column { .. }))
.count()
};
let main_class_name = &node.ql_class_name;
let mut main_class = ql::Class {
qldoc: Some(format!("A class representing `{}` nodes.", type_name.kind)),
name: main_class_name,
is_abstract: false,
supertypes: vec![
ql::Type::At(&node.dbscheme_name),
ql::Type::Normal("AstNode"),
]
.into_iter()
.collect(),
characteristic_predicate: None,
predicates: vec![
create_get_a_primary_ql_class(main_class_name),
create_get_location_predicate(main_table_name, main_table_arity),
],
};
if fields.is_empty() {
main_class
.predicates
.push(create_get_text_predicate(main_table_name));
} else {
let mut main_table_column_index: usize = 0;
let mut get_child_exprs: Vec<ql::Expression> = Vec::new();
// Iterate through the fields, creating:
// - classes to wrap union types if fields need them,
// - predicates to access the fields,
// - the QL expressions to access the fields that will be part of getAFieldOrChild.
for field in fields {
let (get_pred, get_child_expr) = create_field_getters(
main_table_name,
main_table_arity,
&mut main_table_column_index,
field,
nodes,
);
main_class.predicates.push(get_pred);
if let Some(get_child_expr) = get_child_expr {
get_child_exprs.push(get_child_expr)
}
}
main_class.predicates.push(ql::Predicate {
qldoc: Some(String::from("Gets a field or child node of this node.")),
name: "getAFieldOrChild",
overridden: true,
return_type: Some(ql::Type::Normal("AstNode")),
formal_parameters: vec![],
body: ql::Expression::Or(get_child_exprs),
});
}
classes.push(ql::TopLevel::Class(main_class));
}
}
}
classes
}