mirror of
https://github.com/github/codeql.git
synced 2026-06-06 05:57:07 +02:00
Compare commits
7 Commits
codeql-cli
...
nickrolfe/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b8146a1089 | ||
|
|
ea5d696d55 | ||
|
|
6908a0dc12 | ||
|
|
189e75bfe2 | ||
|
|
b502e68783 | ||
|
|
6d28e87f57 | ||
|
|
5cada400f1 |
BIN
ruby/Cargo.lock
generated
BIN
ruby/Cargo.lock
generated
Binary file not shown.
@@ -18,3 +18,4 @@ tracing-subscriber = { version = "0.3.3", features = ["env-filter"] }
|
|||||||
rayon = "1.5.0"
|
rayon = "1.5.0"
|
||||||
num_cpus = "1.13.0"
|
num_cpus = "1.13.0"
|
||||||
regex = "1.4.3"
|
regex = "1.4.3"
|
||||||
|
indexmap = "1.7.0"
|
||||||
@@ -1,161 +1,111 @@
|
|||||||
|
use crate::trap;
|
||||||
|
use indexmap::IndexMap;
|
||||||
use node_types::{EntryKind, Field, NodeTypeMap, Storage, TypeName};
|
use node_types::{EntryKind, Field, NodeTypeMap, Storage, TypeName};
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::collections::BTreeMap as Map;
|
|
||||||
use std::collections::BTreeSet as Set;
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::io::Write;
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use tracing::{error, info, span, Level};
|
use tracing::{error, info, span, Level};
|
||||||
use tree_sitter::{Language, Node, Parser, Range, Tree};
|
use tree_sitter::{Language, Node, Parser, Range, Tree};
|
||||||
|
|
||||||
pub struct TrapWriter {
|
pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label {
|
||||||
/// The accumulated trap entries
|
let (file_label, fresh) =
|
||||||
trap_output: Vec<TrapEntry>,
|
writer.global_id(trap::full_id_for_file(&normalize_path(absolute_path)));
|
||||||
/// A counter for generating fresh labels
|
if fresh {
|
||||||
counter: u32,
|
writer.add_tuple(
|
||||||
/// cache of global keys
|
"files",
|
||||||
global_keys: std::collections::HashMap<String, Label>,
|
vec![
|
||||||
|
trap::Arg::Label(file_label),
|
||||||
|
trap::Arg::String(normalize_path(absolute_path)),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
populate_parent_folders(writer, file_label, absolute_path.parent());
|
||||||
|
}
|
||||||
|
file_label
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_trap_writer() -> TrapWriter {
|
fn populate_empty_file(writer: &mut trap::Writer) -> trap::Label {
|
||||||
TrapWriter {
|
let (file_label, fresh) = writer.global_id("empty;sourcefile".to_owned());
|
||||||
counter: 0,
|
if fresh {
|
||||||
trap_output: Vec::new(),
|
writer.add_tuple(
|
||||||
global_keys: std::collections::HashMap::new(),
|
"files",
|
||||||
|
vec![
|
||||||
|
trap::Arg::Label(file_label),
|
||||||
|
trap::Arg::String("".to_string()),
|
||||||
|
],
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
file_label
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TrapWriter {
|
pub fn populate_empty_location(writer: &mut trap::Writer) {
|
||||||
/// Gets a label that will hold the unique ID of the passed string at import time.
|
let file_label = populate_empty_file(writer);
|
||||||
/// This can be used for incrementally importable TRAP files -- use globally unique
|
location(writer, file_label, 0, 0, 0, 0);
|
||||||
/// strings to compute a unique ID for table tuples.
|
}
|
||||||
///
|
|
||||||
/// Note: You probably want to make sure that the key strings that you use are disjoint
|
|
||||||
/// for disjoint column types; the standard way of doing this is to prefix (or append)
|
|
||||||
/// the column type name to the ID. Thus, you might identify methods in Java by the
|
|
||||||
/// full ID "methods_com.method.package.DeclaringClass.method(argumentList)".
|
|
||||||
|
|
||||||
fn fresh_id(&mut self) -> Label {
|
pub fn populate_parent_folders(
|
||||||
let label = Label(self.counter);
|
writer: &mut trap::Writer,
|
||||||
self.counter += 1;
|
child_label: trap::Label,
|
||||||
self.trap_output.push(TrapEntry::FreshId(label));
|
path: Option<&Path>,
|
||||||
label
|
) {
|
||||||
}
|
let mut path = path;
|
||||||
|
let mut child_label = child_label;
|
||||||
fn global_id(&mut self, key: &str) -> (Label, bool) {
|
loop {
|
||||||
if let Some(label) = self.global_keys.get(key) {
|
match path {
|
||||||
return (*label, false);
|
None => break,
|
||||||
}
|
Some(folder) => {
|
||||||
let label = Label(self.counter);
|
let (folder_label, fresh) =
|
||||||
self.counter += 1;
|
writer.global_id(trap::full_id_for_folder(&normalize_path(folder)));
|
||||||
self.global_keys.insert(key.to_owned(), label);
|
writer.add_tuple(
|
||||||
self.trap_output
|
"containerparent",
|
||||||
.push(TrapEntry::MapLabelToKey(label, key.to_owned()));
|
vec![
|
||||||
(label, true)
|
trap::Arg::Label(folder_label),
|
||||||
}
|
trap::Arg::Label(child_label),
|
||||||
|
],
|
||||||
fn add_tuple(&mut self, table_name: &str, args: Vec<Arg>) {
|
);
|
||||||
self.trap_output
|
if fresh {
|
||||||
.push(TrapEntry::GenericTuple(table_name.to_owned(), args))
|
writer.add_tuple(
|
||||||
}
|
"folders",
|
||||||
|
vec![
|
||||||
fn populate_file(&mut self, absolute_path: &Path) -> Label {
|
trap::Arg::Label(folder_label),
|
||||||
let (file_label, fresh) = self.global_id(&full_id_for_file(absolute_path));
|
trap::Arg::String(normalize_path(folder)),
|
||||||
if fresh {
|
],
|
||||||
self.add_tuple(
|
|
||||||
"files",
|
|
||||||
vec![
|
|
||||||
Arg::Label(file_label),
|
|
||||||
Arg::String(normalize_path(absolute_path)),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
self.populate_parent_folders(file_label, absolute_path.parent());
|
|
||||||
}
|
|
||||||
file_label
|
|
||||||
}
|
|
||||||
|
|
||||||
fn populate_empty_file(&mut self) -> Label {
|
|
||||||
let (file_label, fresh) = self.global_id("empty;sourcefile");
|
|
||||||
if fresh {
|
|
||||||
self.add_tuple(
|
|
||||||
"files",
|
|
||||||
vec![Arg::Label(file_label), Arg::String("".to_string())],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
file_label
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn populate_empty_location(&mut self) {
|
|
||||||
let file_label = self.populate_empty_file();
|
|
||||||
self.location(file_label, 0, 0, 0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn populate_parent_folders(&mut self, child_label: Label, path: Option<&Path>) {
|
|
||||||
let mut path = path;
|
|
||||||
let mut child_label = child_label;
|
|
||||||
loop {
|
|
||||||
match path {
|
|
||||||
None => break,
|
|
||||||
Some(folder) => {
|
|
||||||
let (folder_label, fresh) = self.global_id(&full_id_for_folder(folder));
|
|
||||||
self.add_tuple(
|
|
||||||
"containerparent",
|
|
||||||
vec![Arg::Label(folder_label), Arg::Label(child_label)],
|
|
||||||
);
|
);
|
||||||
if fresh {
|
path = folder.parent();
|
||||||
self.add_tuple(
|
child_label = folder_label;
|
||||||
"folders",
|
} else {
|
||||||
vec![
|
break;
|
||||||
Arg::Label(folder_label),
|
|
||||||
Arg::String(normalize_path(folder)),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
path = folder.parent();
|
|
||||||
child_label = folder_label;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn location(
|
fn location(
|
||||||
&mut self,
|
writer: &mut trap::Writer,
|
||||||
file_label: Label,
|
file_label: trap::Label,
|
||||||
start_line: usize,
|
start_line: usize,
|
||||||
start_column: usize,
|
start_column: usize,
|
||||||
end_line: usize,
|
end_line: usize,
|
||||||
end_column: usize,
|
end_column: usize,
|
||||||
) -> Label {
|
) -> trap::Label {
|
||||||
let (loc_label, fresh) = self.global_id(&format!(
|
let (loc_label, fresh) = writer.global_id(format!(
|
||||||
"loc,{{{}}},{},{},{},{}",
|
"loc,{{{}}},{},{},{},{}",
|
||||||
file_label, start_line, start_column, end_line, end_column
|
file_label, start_line, start_column, end_line, end_column
|
||||||
));
|
));
|
||||||
if fresh {
|
if fresh {
|
||||||
self.add_tuple(
|
writer.add_tuple(
|
||||||
"locations_default",
|
"locations_default",
|
||||||
vec![
|
vec![
|
||||||
Arg::Label(loc_label),
|
trap::Arg::Label(loc_label),
|
||||||
Arg::Label(file_label),
|
trap::Arg::Label(file_label),
|
||||||
Arg::Int(start_line),
|
trap::Arg::Int(start_line),
|
||||||
Arg::Int(start_column),
|
trap::Arg::Int(start_column),
|
||||||
Arg::Int(end_line),
|
trap::Arg::Int(end_line),
|
||||||
Arg::Int(end_column),
|
trap::Arg::Int(end_column),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
}
|
|
||||||
loc_label
|
|
||||||
}
|
|
||||||
|
|
||||||
fn comment(&mut self, text: String) {
|
|
||||||
self.trap_output.push(TrapEntry::Comment(text));
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn output(self, writer: &mut dyn Write) -> std::io::Result<()> {
|
|
||||||
write!(writer, "{}", Program(self.trap_output))
|
|
||||||
}
|
}
|
||||||
|
loc_label
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extracts the source file at `path`, which is assumed to be canonicalized.
|
/// Extracts the source file at `path`, which is assumed to be canonicalized.
|
||||||
@@ -163,71 +113,42 @@ pub fn extract(
|
|||||||
language: Language,
|
language: Language,
|
||||||
language_prefix: &str,
|
language_prefix: &str,
|
||||||
schema: &NodeTypeMap,
|
schema: &NodeTypeMap,
|
||||||
trap_writer: &mut TrapWriter,
|
trap_writer: &mut trap::Writer,
|
||||||
path: &Path,
|
path: &Path,
|
||||||
source: &[u8],
|
source: &[u8],
|
||||||
ranges: &[Range],
|
ranges: &[Range],
|
||||||
) -> std::io::Result<()> {
|
) -> std::io::Result<()> {
|
||||||
|
let path_str = format!("{}", path.display());
|
||||||
let span = span!(
|
let span = span!(
|
||||||
Level::TRACE,
|
Level::TRACE,
|
||||||
"extract",
|
"extract",
|
||||||
file = %path.display()
|
file = %path_str
|
||||||
);
|
);
|
||||||
|
|
||||||
let _enter = span.enter();
|
let _enter = span.enter();
|
||||||
|
|
||||||
info!("extracting: {}", path.display());
|
info!("extracting: {}", path_str);
|
||||||
|
|
||||||
let mut parser = Parser::new();
|
let mut parser = Parser::new();
|
||||||
parser.set_language(language).unwrap();
|
parser.set_language(language).unwrap();
|
||||||
parser.set_included_ranges(ranges).unwrap();
|
parser.set_included_ranges(ranges).unwrap();
|
||||||
let tree = parser.parse(&source, None).expect("Failed to parse file");
|
let tree = parser.parse(&source, None).expect("Failed to parse file");
|
||||||
trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display()));
|
let file_label = populate_file(trap_writer, path);
|
||||||
let file_label = &trap_writer.populate_file(path);
|
let mut visitor = Visitor::new(
|
||||||
let mut visitor = Visitor {
|
|
||||||
source,
|
source,
|
||||||
trap_writer,
|
trap_writer,
|
||||||
// TODO: should we handle path strings that are not valid UTF8 better?
|
// TODO: should we handle path strings that are not valid UTF8 better?
|
||||||
path: format!("{}", path.display()),
|
&path_str,
|
||||||
file_label: *file_label,
|
file_label,
|
||||||
toplevel_child_counter: 0,
|
|
||||||
stack: Vec::new(),
|
|
||||||
language_prefix,
|
language_prefix,
|
||||||
schema,
|
schema,
|
||||||
};
|
);
|
||||||
traverse(&tree, &mut visitor);
|
traverse(&tree, &mut visitor);
|
||||||
|
|
||||||
parser.reset();
|
parser.reset();
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Escapes a string for use in a TRAP key, by replacing special characters with
|
|
||||||
/// HTML entities.
|
|
||||||
fn escape_key<'a, S: Into<Cow<'a, str>>>(key: S) -> Cow<'a, str> {
|
|
||||||
fn needs_escaping(c: char) -> bool {
|
|
||||||
matches!(c, '&' | '{' | '}' | '"' | '@' | '#')
|
|
||||||
}
|
|
||||||
|
|
||||||
let key = key.into();
|
|
||||||
if key.contains(needs_escaping) {
|
|
||||||
let mut escaped = String::with_capacity(2 * key.len());
|
|
||||||
for c in key.chars() {
|
|
||||||
match c {
|
|
||||||
'&' => escaped.push_str("&"),
|
|
||||||
'{' => escaped.push_str("{"),
|
|
||||||
'}' => escaped.push_str("}"),
|
|
||||||
'"' => escaped.push_str("""),
|
|
||||||
'@' => escaped.push_str("@"),
|
|
||||||
'#' => escaped.push_str("#"),
|
|
||||||
_ => escaped.push(c),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Cow::Owned(escaped)
|
|
||||||
} else {
|
|
||||||
key
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Normalizes the path according the common CodeQL specification. Assumes that
|
/// Normalizes the path according the common CodeQL specification. Assumes that
|
||||||
/// `path` has already been canonicalized using `std::fs::canonicalize`.
|
/// `path` has already been canonicalized using `std::fs::canonicalize`.
|
||||||
fn normalize_path(path: &Path) -> String {
|
fn normalize_path(path: &Path) -> String {
|
||||||
@@ -267,34 +188,28 @@ fn normalize_path(path: &Path) -> String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn full_id_for_file(path: &Path) -> String {
|
|
||||||
format!("{};sourcefile", escape_key(&normalize_path(path)))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn full_id_for_folder(path: &Path) -> String {
|
|
||||||
format!("{};folder", escape_key(&normalize_path(path)))
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ChildNode {
|
struct ChildNode {
|
||||||
field_name: Option<&'static str>,
|
field_name: Option<&'static str>,
|
||||||
label: Label,
|
label: trap::Label,
|
||||||
type_name: TypeName,
|
type_name: TypeName,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Visitor<'a> {
|
struct Visitor<'a> {
|
||||||
/// The file path of the source code (as string)
|
/// The file path of the source code (as string)
|
||||||
path: String,
|
path: &'a str,
|
||||||
/// The label to use whenever we need to refer to the `@file` entity of this
|
/// The label to use whenever we need to refer to the `@file` entity of this
|
||||||
/// source file.
|
/// source file.
|
||||||
file_label: Label,
|
file_label: trap::Label,
|
||||||
/// The source code as a UTF-8 byte array
|
/// The source code as a UTF-8 byte array
|
||||||
source: &'a [u8],
|
source: &'a [u8],
|
||||||
/// A TrapWriter to accumulate trap entries
|
/// A trap::Writer to accumulate trap entries
|
||||||
trap_writer: &'a mut TrapWriter,
|
trap_writer: &'a mut trap::Writer,
|
||||||
/// A counter for top-level child nodes
|
/// A counter for top-level child nodes
|
||||||
toplevel_child_counter: usize,
|
toplevel_child_counter: usize,
|
||||||
/// Language prefix
|
/// Language-specific name of the AST parent table
|
||||||
language_prefix: &'a str,
|
ast_node_parent_table_name: String,
|
||||||
|
/// Language-specific name of the tokeninfo table
|
||||||
|
tokeninfo_table_name: String,
|
||||||
/// A lookup table from type name to node types
|
/// A lookup table from type name to node types
|
||||||
schema: &'a NodeTypeMap,
|
schema: &'a NodeTypeMap,
|
||||||
/// A stack for gathering information from child nodes. Whenever a node is
|
/// A stack for gathering information from child nodes. Whenever a node is
|
||||||
@@ -303,39 +218,62 @@ struct Visitor<'a> {
|
|||||||
/// node the list containing the child data is popped from the stack and
|
/// node the list containing the child data is popped from the stack and
|
||||||
/// matched against the dbscheme for the node. If the expectations are met
|
/// matched against the dbscheme for the node. If the expectations are met
|
||||||
/// the corresponding row definitions are added to the trap_output.
|
/// the corresponding row definitions are added to the trap_output.
|
||||||
stack: Vec<(Label, usize, Vec<ChildNode>)>,
|
stack: Vec<(trap::Label, usize, Vec<ChildNode>)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Visitor<'_> {
|
impl<'a> Visitor<'a> {
|
||||||
|
fn new(
|
||||||
|
source: &'a [u8],
|
||||||
|
trap_writer: &'a mut trap::Writer,
|
||||||
|
path: &'a str,
|
||||||
|
file_label: trap::Label,
|
||||||
|
language_prefix: &str,
|
||||||
|
schema: &'a NodeTypeMap,
|
||||||
|
) -> Visitor<'a> {
|
||||||
|
Visitor {
|
||||||
|
path,
|
||||||
|
file_label,
|
||||||
|
source,
|
||||||
|
trap_writer,
|
||||||
|
toplevel_child_counter: 0,
|
||||||
|
ast_node_parent_table_name: format!("{}_ast_node_parent", language_prefix),
|
||||||
|
tokeninfo_table_name: format!("{}_tokeninfo", language_prefix),
|
||||||
|
schema,
|
||||||
|
stack: Vec::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn record_parse_error(
|
fn record_parse_error(
|
||||||
&mut self,
|
&mut self,
|
||||||
error_message: String,
|
error_message: String,
|
||||||
full_error_message: String,
|
full_error_message: String,
|
||||||
loc: Label,
|
loc: trap::Label,
|
||||||
) {
|
) {
|
||||||
error!("{}", full_error_message);
|
error!("{}", full_error_message);
|
||||||
let id = self.trap_writer.fresh_id();
|
let id = self.trap_writer.fresh_id();
|
||||||
self.trap_writer.add_tuple(
|
self.trap_writer.add_tuple(
|
||||||
"diagnostics",
|
"diagnostics",
|
||||||
vec![
|
vec![
|
||||||
Arg::Label(id),
|
trap::Arg::Label(id),
|
||||||
Arg::Int(40), // severity 40 = error
|
trap::Arg::Int(40), // severity 40 = error
|
||||||
Arg::String("parse_error".to_string()),
|
trap::Arg::String("parse_error".to_string()),
|
||||||
Arg::String(error_message),
|
trap::Arg::String(error_message),
|
||||||
Arg::String(full_error_message),
|
trap::Arg::String(full_error_message),
|
||||||
Arg::Label(loc),
|
trap::Arg::Label(loc),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn record_parse_error_for_node(
|
fn record_parse_error_for_node(&mut self, error_message: String, node: &Node) {
|
||||||
&mut self,
|
let full_error_message = format!(
|
||||||
error_message: String,
|
"{}:{}: {}",
|
||||||
full_error_message: String,
|
&self.path,
|
||||||
node: Node,
|
node.start_position().row + 1,
|
||||||
) {
|
&error_message
|
||||||
|
);
|
||||||
let (start_line, start_column, end_line, end_column) = location_for(self.source, node);
|
let (start_line, start_column, end_line, end_column) = location_for(self.source, node);
|
||||||
let loc = self.trap_writer.location(
|
let loc = location(
|
||||||
|
self.trap_writer,
|
||||||
self.file_label,
|
self.file_label,
|
||||||
start_line,
|
start_line,
|
||||||
start_column,
|
start_column,
|
||||||
@@ -345,20 +283,14 @@ impl Visitor<'_> {
|
|||||||
self.record_parse_error(error_message, full_error_message, loc);
|
self.record_parse_error(error_message, full_error_message, loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn enter_node(&mut self, node: Node) -> bool {
|
fn enter_node(&mut self, node: &Node) -> bool {
|
||||||
if node.is_error() || node.is_missing() {
|
if node.is_error() || node.is_missing() {
|
||||||
let error_message = if node.is_missing() {
|
let error_message = if node.is_missing() {
|
||||||
format!("parse error: expecting '{}'", node.kind())
|
format!("parse error: expecting '{}'", node.kind())
|
||||||
} else {
|
} else {
|
||||||
"parse error".to_string()
|
"parse error".to_string()
|
||||||
};
|
};
|
||||||
let full_error_message = format!(
|
self.record_parse_error_for_node(error_message, node);
|
||||||
"{}:{}: {}",
|
|
||||||
&self.path,
|
|
||||||
node.start_position().row + 1,
|
|
||||||
error_message
|
|
||||||
);
|
|
||||||
self.record_parse_error_for_node(error_message, full_error_message, node);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -368,13 +300,14 @@ impl Visitor<'_> {
|
|||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
fn leave_node(&mut self, field_name: Option<&'static str>, node: Node) {
|
fn leave_node(&mut self, field_name: Option<&'static str>, node: &Node) {
|
||||||
if node.is_error() || node.is_missing() {
|
if node.is_error() || node.is_missing() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
let (id, _, child_nodes) = self.stack.pop().expect("Vistor: empty stack");
|
let (id, _, child_nodes) = self.stack.pop().expect("Vistor: empty stack");
|
||||||
let (start_line, start_column, end_line, end_column) = location_for(self.source, node);
|
let (start_line, start_column, end_line, end_column) = location_for(self.source, node);
|
||||||
let loc = self.trap_writer.location(
|
let loc = location(
|
||||||
|
self.trap_writer,
|
||||||
self.file_label,
|
self.file_label,
|
||||||
start_line,
|
start_line,
|
||||||
start_column,
|
start_column,
|
||||||
@@ -402,20 +335,20 @@ impl Visitor<'_> {
|
|||||||
match &table.kind {
|
match &table.kind {
|
||||||
EntryKind::Token { kind_id, .. } => {
|
EntryKind::Token { kind_id, .. } => {
|
||||||
self.trap_writer.add_tuple(
|
self.trap_writer.add_tuple(
|
||||||
&format!("{}_ast_node_parent", self.language_prefix),
|
&self.ast_node_parent_table_name,
|
||||||
vec![
|
vec![
|
||||||
Arg::Label(id),
|
trap::Arg::Label(id),
|
||||||
Arg::Label(parent_id),
|
trap::Arg::Label(parent_id),
|
||||||
Arg::Int(parent_index),
|
trap::Arg::Int(parent_index),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
self.trap_writer.add_tuple(
|
self.trap_writer.add_tuple(
|
||||||
&format!("{}_tokeninfo", self.language_prefix),
|
&self.tokeninfo_table_name,
|
||||||
vec![
|
vec![
|
||||||
Arg::Label(id),
|
trap::Arg::Label(id),
|
||||||
Arg::Int(*kind_id),
|
trap::Arg::Int(*kind_id),
|
||||||
sliced_source_arg(self.source, node),
|
sliced_source_arg(self.source, node),
|
||||||
Arg::Label(loc),
|
trap::Arg::Label(loc),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -423,18 +356,18 @@ impl Visitor<'_> {
|
|||||||
fields,
|
fields,
|
||||||
name: table_name,
|
name: table_name,
|
||||||
} => {
|
} => {
|
||||||
if let Some(args) = self.complex_node(&node, fields, &child_nodes, id) {
|
if let Some(args) = self.complex_node(node, fields, &child_nodes, id) {
|
||||||
self.trap_writer.add_tuple(
|
self.trap_writer.add_tuple(
|
||||||
&format!("{}_ast_node_parent", self.language_prefix),
|
&self.ast_node_parent_table_name,
|
||||||
vec![
|
vec![
|
||||||
Arg::Label(id),
|
trap::Arg::Label(id),
|
||||||
Arg::Label(parent_id),
|
trap::Arg::Label(parent_id),
|
||||||
Arg::Int(parent_index),
|
trap::Arg::Int(parent_index),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
let mut all_args = vec![Arg::Label(id)];
|
let mut all_args = vec![trap::Arg::Label(id)];
|
||||||
all_args.extend(args);
|
all_args.extend(args);
|
||||||
all_args.push(Arg::Label(loc));
|
all_args.push(trap::Arg::Label(loc));
|
||||||
self.trap_writer.add_tuple(table_name, all_args);
|
self.trap_writer.add_tuple(table_name, all_args);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -472,9 +405,9 @@ impl Visitor<'_> {
|
|||||||
node: &Node,
|
node: &Node,
|
||||||
fields: &[Field],
|
fields: &[Field],
|
||||||
child_nodes: &[ChildNode],
|
child_nodes: &[ChildNode],
|
||||||
parent_id: Label,
|
parent_id: trap::Label,
|
||||||
) -> Option<Vec<Arg>> {
|
) -> Option<Vec<trap::Arg>> {
|
||||||
let mut map: Map<&Option<String>, (&Field, Vec<Arg>)> = Map::new();
|
let mut map: IndexMap<&Option<String>, (&Field, Vec<trap::Arg>)> = IndexMap::new();
|
||||||
for field in fields {
|
for field in fields {
|
||||||
map.insert(&field.name, (field, Vec::new()));
|
map.insert(&field.name, (field, Vec::new()));
|
||||||
}
|
}
|
||||||
@@ -482,46 +415,47 @@ impl Visitor<'_> {
|
|||||||
if let Some((field, values)) = map.get_mut(&child_node.field_name.map(|x| x.to_owned()))
|
if let Some((field, values)) = map.get_mut(&child_node.field_name.map(|x| x.to_owned()))
|
||||||
{
|
{
|
||||||
//TODO: handle error and missing nodes
|
//TODO: handle error and missing nodes
|
||||||
if self.type_matches(&child_node.type_name, &field.type_info) {
|
if field.type_info.valid_types.contains(&child_node.type_name) {
|
||||||
if let node_types::FieldTypeInfo::ReservedWordInt(int_mapping) =
|
if let node_types::FieldTypeKind::ReservedWordInt(int_mapping) =
|
||||||
&field.type_info
|
&field.type_info.kind
|
||||||
{
|
{
|
||||||
// We can safely unwrap because type_matches checks the key is in the map.
|
match int_mapping.get(&child_node.type_name.kind) {
|
||||||
let (int_value, _) = int_mapping.get(&child_node.type_name.kind).unwrap();
|
Some((int_value, _)) => values.push(trap::Arg::Int(*int_value)),
|
||||||
values.push(Arg::Int(*int_value));
|
None => self.record_parse_error_for_node(
|
||||||
|
format!(
|
||||||
|
"could not map field {}::{} with type {:?} to an integer value",
|
||||||
|
node.kind(),
|
||||||
|
child_node.field_name.unwrap_or("child"),
|
||||||
|
child_node.type_name
|
||||||
|
),
|
||||||
|
node,
|
||||||
|
),
|
||||||
|
};
|
||||||
} else {
|
} else {
|
||||||
values.push(Arg::Label(child_node.label));
|
values.push(trap::Arg::Label(child_node.label));
|
||||||
}
|
}
|
||||||
} else if field.name.is_some() {
|
} else if field.name.is_some() {
|
||||||
let error_message = format!(
|
self.record_parse_error_for_node(
|
||||||
"type mismatch for field {}::{} with type {:?} != {:?}",
|
format!(
|
||||||
node.kind(),
|
"type mismatch for field {}::{} with type {:?} != {:?}",
|
||||||
child_node.field_name.unwrap_or("child"),
|
node.kind(),
|
||||||
child_node.type_name,
|
child_node.field_name.unwrap_or("child"),
|
||||||
field.type_info
|
child_node.type_name,
|
||||||
|
field.type_info
|
||||||
|
),
|
||||||
|
node,
|
||||||
);
|
);
|
||||||
let full_error_message = format!(
|
|
||||||
"{}:{}: {}",
|
|
||||||
&self.path,
|
|
||||||
node.start_position().row + 1,
|
|
||||||
error_message
|
|
||||||
);
|
|
||||||
self.record_parse_error_for_node(error_message, full_error_message, *node);
|
|
||||||
}
|
}
|
||||||
} else if child_node.field_name.is_some() || child_node.type_name.named {
|
} else if child_node.field_name.is_some() || child_node.type_name.named {
|
||||||
let error_message = format!(
|
self.record_parse_error_for_node(
|
||||||
"value for unknown field: {}::{} and type {:?}",
|
format!(
|
||||||
node.kind(),
|
"value for unknown field: {}::{} and type {:?}",
|
||||||
&child_node.field_name.unwrap_or("child"),
|
node.kind(),
|
||||||
&child_node.type_name
|
&child_node.field_name.unwrap_or("child"),
|
||||||
|
&child_node.type_name
|
||||||
|
),
|
||||||
|
node,
|
||||||
);
|
);
|
||||||
let full_error_message = format!(
|
|
||||||
"{}:{}: {}",
|
|
||||||
&self.path,
|
|
||||||
node.start_position().row + 1,
|
|
||||||
error_message
|
|
||||||
);
|
|
||||||
self.record_parse_error_for_node(error_message, full_error_message, *node);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut args = Vec::new();
|
let mut args = Vec::new();
|
||||||
@@ -534,23 +468,19 @@ impl Visitor<'_> {
|
|||||||
args.push(child_values.first().unwrap().clone());
|
args.push(child_values.first().unwrap().clone());
|
||||||
} else {
|
} else {
|
||||||
is_valid = false;
|
is_valid = false;
|
||||||
let error_message = format!(
|
self.record_parse_error_for_node(
|
||||||
"{} for field: {}::{}",
|
format!(
|
||||||
if child_values.is_empty() {
|
"{} for field: {}::{}",
|
||||||
"missing value"
|
if child_values.is_empty() {
|
||||||
} else {
|
"missing value"
|
||||||
"too many values"
|
} else {
|
||||||
},
|
"too many values"
|
||||||
node.kind(),
|
},
|
||||||
column_name
|
node.kind(),
|
||||||
|
column_name
|
||||||
|
),
|
||||||
|
node,
|
||||||
);
|
);
|
||||||
let full_error_message = format!(
|
|
||||||
"{}:{}: {}",
|
|
||||||
&self.path,
|
|
||||||
node.start_position().row + 1,
|
|
||||||
error_message
|
|
||||||
);
|
|
||||||
self.record_parse_error_for_node(error_message, full_error_message, *node);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Storage::Table {
|
Storage::Table {
|
||||||
@@ -569,9 +499,9 @@ impl Visitor<'_> {
|
|||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let mut args = vec![Arg::Label(parent_id)];
|
let mut args = vec![trap::Arg::Label(parent_id)];
|
||||||
if *has_index {
|
if *has_index {
|
||||||
args.push(Arg::Int(index))
|
args.push(trap::Arg::Int(index))
|
||||||
}
|
}
|
||||||
args.push(child_value.clone());
|
args.push(child_value.clone());
|
||||||
self.trap_writer.add_tuple(table_name, args);
|
self.trap_writer.add_tuple(table_name, args);
|
||||||
@@ -585,55 +515,18 @@ impl Visitor<'_> {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn type_matches(&self, tp: &TypeName, type_info: &node_types::FieldTypeInfo) -> bool {
|
|
||||||
match type_info {
|
|
||||||
node_types::FieldTypeInfo::Single(single_type) => {
|
|
||||||
if tp == single_type {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if let EntryKind::Union { members } = &self.schema.get(single_type).unwrap().kind {
|
|
||||||
if self.type_matches_set(tp, members) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
node_types::FieldTypeInfo::Multiple { types, .. } => {
|
|
||||||
return self.type_matches_set(tp, types);
|
|
||||||
}
|
|
||||||
|
|
||||||
node_types::FieldTypeInfo::ReservedWordInt(int_mapping) => {
|
|
||||||
return !tp.named && int_mapping.contains_key(&tp.kind)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
false
|
|
||||||
}
|
|
||||||
|
|
||||||
fn type_matches_set(&self, tp: &TypeName, types: &Set<TypeName>) -> bool {
|
|
||||||
if types.contains(tp) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
for other in types.iter() {
|
|
||||||
if let EntryKind::Union { members } = &self.schema.get(other).unwrap().kind {
|
|
||||||
if self.type_matches_set(tp, members) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Emit a slice of a source file as an Arg.
|
// Emit a slice of a source file as a trap::Arg.
|
||||||
fn sliced_source_arg(source: &[u8], n: Node) -> Arg {
|
fn sliced_source_arg(source: &[u8], n: &Node) -> trap::Arg {
|
||||||
let range = n.byte_range();
|
let range = n.byte_range();
|
||||||
Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned())
|
trap::Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated.
|
// Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated.
|
||||||
// The first is the location and label definition, and the second is the
|
// The first is the location and label definition, and the second is the
|
||||||
// 'Located' entry.
|
// 'Located' entry.
|
||||||
fn location_for(source: &[u8], n: Node) -> (usize, usize, usize, usize) {
|
fn location_for(source: &[u8], n: &Node) -> (usize, usize, usize, usize) {
|
||||||
// Tree-sitter row, column values are 0-based while CodeQL starts
|
// Tree-sitter row, column values are 0-based while CodeQL starts
|
||||||
// counting at 1. In addition Tree-sitter's row and column for the
|
// counting at 1. In addition Tree-sitter's row and column for the
|
||||||
// end position are exclusive while CodeQL's end positions are inclusive.
|
// end position are exclusive while CodeQL's end positions are inclusive.
|
||||||
@@ -680,16 +573,16 @@ fn location_for(source: &[u8], n: Node) -> (usize, usize, usize, usize) {
|
|||||||
|
|
||||||
fn traverse(tree: &Tree, visitor: &mut Visitor) {
|
fn traverse(tree: &Tree, visitor: &mut Visitor) {
|
||||||
let cursor = &mut tree.walk();
|
let cursor = &mut tree.walk();
|
||||||
visitor.enter_node(cursor.node());
|
visitor.enter_node(&cursor.node());
|
||||||
let mut recurse = true;
|
let mut recurse = true;
|
||||||
loop {
|
loop {
|
||||||
if recurse && cursor.goto_first_child() {
|
if recurse && cursor.goto_first_child() {
|
||||||
recurse = visitor.enter_node(cursor.node());
|
recurse = visitor.enter_node(&cursor.node());
|
||||||
} else {
|
} else {
|
||||||
visitor.leave_node(cursor.field_name(), cursor.node());
|
visitor.leave_node(cursor.field_name(), &cursor.node());
|
||||||
|
|
||||||
if cursor.goto_next_sibling() {
|
if cursor.goto_next_sibling() {
|
||||||
recurse = visitor.enter_node(cursor.node());
|
recurse = visitor.enter_node(&cursor.node());
|
||||||
} else if cursor.goto_parent() {
|
} else if cursor.goto_parent() {
|
||||||
recurse = false;
|
recurse = false;
|
||||||
} else {
|
} else {
|
||||||
@@ -699,59 +592,6 @@ fn traverse(tree: &Tree, visitor: &mut Visitor) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Program(Vec<TrapEntry>);
|
|
||||||
|
|
||||||
impl fmt::Display for Program {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
let mut text = String::new();
|
|
||||||
for trap_entry in &self.0 {
|
|
||||||
text.push_str(&format!("{}\n", trap_entry));
|
|
||||||
}
|
|
||||||
write!(f, "{}", text)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
enum TrapEntry {
|
|
||||||
/// Maps the label to a fresh id, e.g. `#123=*`.
|
|
||||||
FreshId(Label),
|
|
||||||
/// Maps the label to a key, e.g. `#7=@"foo"`.
|
|
||||||
MapLabelToKey(Label, String),
|
|
||||||
/// foo_bar(arg*)
|
|
||||||
GenericTuple(String, Vec<Arg>),
|
|
||||||
Comment(String),
|
|
||||||
}
|
|
||||||
impl fmt::Display for TrapEntry {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
TrapEntry::FreshId(label) => write!(f, "{}=*", label),
|
|
||||||
TrapEntry::MapLabelToKey(label, key) => {
|
|
||||||
write!(f, "{}=@\"{}\"", label, key.replace("\"", "\"\""))
|
|
||||||
}
|
|
||||||
TrapEntry::GenericTuple(name, args) => {
|
|
||||||
write!(f, "{}(", name)?;
|
|
||||||
for (index, arg) in args.iter().enumerate() {
|
|
||||||
if index > 0 {
|
|
||||||
write!(f, ",")?;
|
|
||||||
}
|
|
||||||
write!(f, "{}", arg)?;
|
|
||||||
}
|
|
||||||
write!(f, ")")
|
|
||||||
}
|
|
||||||
TrapEntry::Comment(line) => write!(f, "// {}", line),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone)]
|
|
||||||
// Identifiers of the form #0, #1...
|
|
||||||
struct Label(u32);
|
|
||||||
|
|
||||||
impl fmt::Display for Label {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
write!(f, "#{:x}", self.0)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Numeric indices.
|
// Numeric indices.
|
||||||
#[derive(Debug, Copy, Clone)]
|
#[derive(Debug, Copy, Clone)]
|
||||||
struct Index(usize);
|
struct Index(usize);
|
||||||
@@ -761,69 +601,3 @@ impl fmt::Display for Index {
|
|||||||
write!(f, "{}", self.0)
|
write!(f, "{}", self.0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Some untyped argument to a TrapEntry.
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
enum Arg {
|
|
||||||
Label(Label),
|
|
||||||
Int(usize),
|
|
||||||
String(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
const MAX_STRLEN: usize = 1048576;
|
|
||||||
|
|
||||||
impl fmt::Display for Arg {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
Arg::Label(x) => write!(f, "{}", x),
|
|
||||||
Arg::Int(x) => write!(f, "{}", x),
|
|
||||||
Arg::String(x) => write!(
|
|
||||||
f,
|
|
||||||
"\"{}\"",
|
|
||||||
limit_string(x, MAX_STRLEN).replace("\"", "\"\"")
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Limit the length (in bytes) of a string. If the string's length in bytes is
|
|
||||||
/// less than or equal to the limit then the entire string is returned. Otherwise
|
|
||||||
/// the string is sliced at the provided limit. If there is a multi-byte character
|
|
||||||
/// at the limit then the returned slice will be slightly shorter than the limit to
|
|
||||||
/// avoid splitting that multi-byte character.
|
|
||||||
fn limit_string(string: &str, max_size: usize) -> &str {
|
|
||||||
if string.len() <= max_size {
|
|
||||||
return string;
|
|
||||||
}
|
|
||||||
let p = string.as_bytes();
|
|
||||||
let mut index = max_size;
|
|
||||||
// We want to clip the string at [max_size]; however, the character at that position
|
|
||||||
// may span several bytes. We need to find the first byte of the character. In UTF-8
|
|
||||||
// encoded data any byte that matches the bit pattern 10XXXXXX is not a start byte.
|
|
||||||
// Therefore we decrement the index as long as there are bytes matching this pattern.
|
|
||||||
// This ensures we cut the string at the border between one character and another.
|
|
||||||
while index > 0 && (p[index] & 0b11000000) == 0b10000000 {
|
|
||||||
index -= 1;
|
|
||||||
}
|
|
||||||
&string[0..index]
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn limit_string_test() {
|
|
||||||
assert_eq!("hello", limit_string(&"hello world".to_owned(), 5));
|
|
||||||
assert_eq!("hi ☹", limit_string(&"hi ☹☹".to_owned(), 6));
|
|
||||||
assert_eq!("hi ", limit_string(&"hi ☹☹".to_owned(), 5));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn escape_key_test() {
|
|
||||||
assert_eq!("foo!", escape_key("foo!"));
|
|
||||||
assert_eq!("foo{}", escape_key("foo{}"));
|
|
||||||
assert_eq!("{}", escape_key("{}"));
|
|
||||||
assert_eq!("", escape_key(""));
|
|
||||||
assert_eq!("/path/to/foo.rb", escape_key("/path/to/foo.rb"));
|
|
||||||
assert_eq!(
|
|
||||||
"/path/to/foo&{}"@#.rb",
|
|
||||||
escape_key("/path/to/foo&{}\"@#.rb")
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,51 +1,15 @@
|
|||||||
mod extractor;
|
mod extractor;
|
||||||
|
mod trap;
|
||||||
|
|
||||||
extern crate num_cpus;
|
extern crate num_cpus;
|
||||||
|
|
||||||
use clap::arg;
|
use clap::arg;
|
||||||
use flate2::write::GzEncoder;
|
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::io::{BufRead, BufWriter};
|
use std::io::BufRead;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use tree_sitter::{Language, Parser, Range};
|
use tree_sitter::{Language, Parser, Range};
|
||||||
|
|
||||||
enum TrapCompression {
|
|
||||||
None,
|
|
||||||
Gzip,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TrapCompression {
|
|
||||||
fn from_env() -> TrapCompression {
|
|
||||||
match std::env::var("CODEQL_RUBY_TRAP_COMPRESSION") {
|
|
||||||
Ok(method) => match TrapCompression::from_string(&method) {
|
|
||||||
Some(c) => c,
|
|
||||||
None => {
|
|
||||||
tracing::error!("Unknown compression method '{}'; using gzip.", &method);
|
|
||||||
TrapCompression::Gzip
|
|
||||||
}
|
|
||||||
},
|
|
||||||
// Default compression method if the env var isn't set:
|
|
||||||
Err(_) => TrapCompression::Gzip,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn from_string(s: &str) -> Option<TrapCompression> {
|
|
||||||
match s.to_lowercase().as_ref() {
|
|
||||||
"none" => Some(TrapCompression::None),
|
|
||||||
"gzip" => Some(TrapCompression::Gzip),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn extension(&self) -> &str {
|
|
||||||
match self {
|
|
||||||
TrapCompression::None => "trap",
|
|
||||||
TrapCompression::Gzip => "trap.gz",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the number of threads the extractor should use, by reading the
|
* Gets the number of threads the extractor should use, by reading the
|
||||||
* CODEQL_THREADS environment variable and using it as described in the
|
* CODEQL_THREADS environment variable and using it as described in the
|
||||||
@@ -118,7 +82,7 @@ fn main() -> std::io::Result<()> {
|
|||||||
.value_of("output-dir")
|
.value_of("output-dir")
|
||||||
.expect("missing --output-dir");
|
.expect("missing --output-dir");
|
||||||
let trap_dir = PathBuf::from(trap_dir);
|
let trap_dir = PathBuf::from(trap_dir);
|
||||||
let trap_compression = TrapCompression::from_env();
|
let trap_compression = trap::Compression::from_env("CODEQL_RUBY_TRAP_COMPRESSION");
|
||||||
|
|
||||||
let file_list = matches.value_of("file-list").expect("missing --file-list");
|
let file_list = matches.value_of("file-list").expect("missing --file-list");
|
||||||
let file_list = fs::File::open(file_list)?;
|
let file_list = fs::File::open(file_list)?;
|
||||||
@@ -141,7 +105,7 @@ fn main() -> std::io::Result<()> {
|
|||||||
let src_archive_file = path_for(&src_archive_dir, &path, "");
|
let src_archive_file = path_for(&src_archive_dir, &path, "");
|
||||||
let mut source = std::fs::read(&path)?;
|
let mut source = std::fs::read(&path)?;
|
||||||
let code_ranges;
|
let code_ranges;
|
||||||
let mut trap_writer = extractor::new_trap_writer();
|
let mut trap_writer = trap::Writer::new();
|
||||||
if path.extension().map_or(false, |x| x == "erb") {
|
if path.extension().map_or(false, |x| x == "erb") {
|
||||||
tracing::info!("scanning: {}", path.display());
|
tracing::info!("scanning: {}", path.display());
|
||||||
extractor::extract(
|
extractor::extract(
|
||||||
@@ -186,28 +150,20 @@ fn main() -> std::io::Result<()> {
|
|||||||
.expect("failed to extract files");
|
.expect("failed to extract files");
|
||||||
|
|
||||||
let path = PathBuf::from("extras");
|
let path = PathBuf::from("extras");
|
||||||
let mut trap_writer = extractor::new_trap_writer();
|
let mut trap_writer = trap::Writer::new();
|
||||||
trap_writer.populate_empty_location();
|
extractor::populate_empty_location(&mut trap_writer);
|
||||||
write_trap(&trap_dir, path, trap_writer, &trap_compression)
|
write_trap(&trap_dir, path, trap_writer, &trap_compression)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_trap(
|
fn write_trap(
|
||||||
trap_dir: &Path,
|
trap_dir: &Path,
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
trap_writer: extractor::TrapWriter,
|
trap_writer: trap::Writer,
|
||||||
trap_compression: &TrapCompression,
|
trap_compression: &trap::Compression,
|
||||||
) -> std::io::Result<()> {
|
) -> std::io::Result<()> {
|
||||||
let trap_file = path_for(trap_dir, &path, trap_compression.extension());
|
let trap_file = path_for(trap_dir, &path, trap_compression.extension());
|
||||||
std::fs::create_dir_all(&trap_file.parent().unwrap())?;
|
std::fs::create_dir_all(&trap_file.parent().unwrap())?;
|
||||||
let trap_file = std::fs::File::create(&trap_file)?;
|
trap_writer.write_to_file(&trap_file, trap_compression)
|
||||||
let mut trap_file = BufWriter::new(trap_file);
|
|
||||||
match trap_compression {
|
|
||||||
TrapCompression::None => trap_writer.output(&mut trap_file),
|
|
||||||
TrapCompression::Gzip => {
|
|
||||||
let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast());
|
|
||||||
trap_writer.output(&mut compressed_writer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scan_erb(
|
fn scan_erb(
|
||||||
|
|||||||
247
ruby/extractor/src/trap.rs
Normal file
247
ruby/extractor/src/trap.rs
Normal file
@@ -0,0 +1,247 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
use std::fmt;
|
||||||
|
use std::io::BufWriter;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use flate2::write::GzEncoder;
|
||||||
|
use indexmap::IndexMap;
|
||||||
|
|
||||||
|
pub struct Writer {
|
||||||
|
/// Labels that should be assigned fresh ids, e.g. `#123=*`.
|
||||||
|
fresh_ids: Vec<Label>,
|
||||||
|
|
||||||
|
/// Labels that should be assigned trap keys, e.g. `#7=@"foo"`.
|
||||||
|
global_keys: IndexMap<String, Label>,
|
||||||
|
|
||||||
|
/// Database rows to emit. Each key is the tuple name, each value is a list.
|
||||||
|
/// Each member of *that* list represents an instance of that tuple,
|
||||||
|
/// containing a list of the arguments/column values.
|
||||||
|
tuples: IndexMap<String, Vec<Vec<Arg>>>,
|
||||||
|
|
||||||
|
/// A counter for generating fresh labels
|
||||||
|
counter: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Writer {
|
||||||
|
pub fn new() -> Writer {
|
||||||
|
Writer {
|
||||||
|
fresh_ids: Vec::new(),
|
||||||
|
tuples: IndexMap::new(),
|
||||||
|
global_keys: IndexMap::new(),
|
||||||
|
counter: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets a label that will hold the unique ID of the passed string at import time.
|
||||||
|
/// This can be used for incrementally importable TRAP files -- use globally unique
|
||||||
|
/// strings to compute a unique ID for table tuples.
|
||||||
|
///
|
||||||
|
/// Note: You probably want to make sure that the key strings that you use are disjoint
|
||||||
|
/// for disjoint column types; the standard way of doing this is to prefix (or append)
|
||||||
|
/// the column type name to the ID. Thus, you might identify methods in Java by the
|
||||||
|
/// full ID "methods_com.method.package.DeclaringClass.method(argumentList)".
|
||||||
|
pub fn fresh_id(&mut self) -> Label {
|
||||||
|
let label = Label(self.counter);
|
||||||
|
self.counter += 1;
|
||||||
|
self.fresh_ids.push(label);
|
||||||
|
label
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn global_id(&mut self, key: String) -> (Label, bool) {
|
||||||
|
if let Some(label) = self.global_keys.get(&key) {
|
||||||
|
return (*label, false);
|
||||||
|
}
|
||||||
|
let label = Label(self.counter);
|
||||||
|
self.counter += 1;
|
||||||
|
self.global_keys.insert(key, label);
|
||||||
|
(label, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_tuple(&mut self, table_name: &str, args: Vec<Arg>) {
|
||||||
|
self.tuples
|
||||||
|
.entry(table_name.to_owned())
|
||||||
|
.or_insert_with(Vec::new)
|
||||||
|
.push(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write<T: std::io::Write>(&self, dest: &mut T) -> std::io::Result<()> {
|
||||||
|
for label in &self.fresh_ids {
|
||||||
|
writeln!(dest, "{}=*", label)?;
|
||||||
|
}
|
||||||
|
for (key, label) in &self.global_keys {
|
||||||
|
writeln!(dest, "{}=@\"{}\"", label, key.replace("\"", "\"\""))?;
|
||||||
|
}
|
||||||
|
for (name, instances) in &self.tuples {
|
||||||
|
for instance in instances {
|
||||||
|
write!(dest, "{}(", name)?;
|
||||||
|
for (index, arg) in instance.iter().enumerate() {
|
||||||
|
if index > 0 {
|
||||||
|
write!(dest, ",")?;
|
||||||
|
}
|
||||||
|
write!(dest, "{}", arg)?;
|
||||||
|
}
|
||||||
|
writeln!(dest, ")")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn write_to_file(&self, path: &Path, compression: &Compression) -> std::io::Result<()> {
|
||||||
|
let trap_file = std::fs::File::create(path)?;
|
||||||
|
let mut trap_file = BufWriter::new(trap_file);
|
||||||
|
match compression {
|
||||||
|
Compression::None => self.write(&mut trap_file),
|
||||||
|
Compression::Gzip => {
|
||||||
|
let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast());
|
||||||
|
self.write(&mut compressed_writer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
// Identifiers of the form #0, #1...
|
||||||
|
pub struct Label(u32);
|
||||||
|
|
||||||
|
impl fmt::Display for Label {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "#{:x}", self.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some untyped argument to a TrapEntry.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum Arg {
|
||||||
|
Label(Label),
|
||||||
|
Int(usize),
|
||||||
|
String(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
const MAX_STRLEN: usize = 1048576;
|
||||||
|
|
||||||
|
impl fmt::Display for Arg {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Arg::Label(x) => write!(f, "{}", x),
|
||||||
|
Arg::Int(x) => write!(f, "{}", x),
|
||||||
|
Arg::String(x) => write!(
|
||||||
|
f,
|
||||||
|
"\"{}\"",
|
||||||
|
limit_string(x, MAX_STRLEN).replace("\"", "\"\"")
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn full_id_for_file(normalized_path: &str) -> String {
|
||||||
|
format!("{};sourcefile", escape_key(normalized_path))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn full_id_for_folder(normalized_path: &str) -> String {
|
||||||
|
format!("{};folder", escape_key(normalized_path))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Escapes a string for use in a TRAP key, by replacing special characters with
|
||||||
|
/// HTML entities.
|
||||||
|
fn escape_key<'a, S: Into<Cow<'a, str>>>(key: S) -> Cow<'a, str> {
|
||||||
|
fn needs_escaping(c: char) -> bool {
|
||||||
|
matches!(c, '&' | '{' | '}' | '"' | '@' | '#')
|
||||||
|
}
|
||||||
|
|
||||||
|
let key = key.into();
|
||||||
|
if key.contains(needs_escaping) {
|
||||||
|
let mut escaped = String::with_capacity(2 * key.len());
|
||||||
|
for c in key.chars() {
|
||||||
|
match c {
|
||||||
|
'&' => escaped.push_str("&"),
|
||||||
|
'{' => escaped.push_str("{"),
|
||||||
|
'}' => escaped.push_str("}"),
|
||||||
|
'"' => escaped.push_str("""),
|
||||||
|
'@' => escaped.push_str("@"),
|
||||||
|
'#' => escaped.push_str("#"),
|
||||||
|
_ => escaped.push(c),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Cow::Owned(escaped)
|
||||||
|
} else {
|
||||||
|
key
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Limit the length (in bytes) of a string. If the string's length in bytes is
|
||||||
|
/// less than or equal to the limit then the entire string is returned. Otherwise
|
||||||
|
/// the string is sliced at the provided limit. If there is a multi-byte character
|
||||||
|
/// at the limit then the returned slice will be slightly shorter than the limit to
|
||||||
|
/// avoid splitting that multi-byte character.
|
||||||
|
fn limit_string(string: &str, max_size: usize) -> &str {
|
||||||
|
if string.len() <= max_size {
|
||||||
|
return string;
|
||||||
|
}
|
||||||
|
let p = string.as_bytes();
|
||||||
|
let mut index = max_size;
|
||||||
|
// We want to clip the string at [max_size]; however, the character at that position
|
||||||
|
// may span several bytes. We need to find the first byte of the character. In UTF-8
|
||||||
|
// encoded data any byte that matches the bit pattern 10XXXXXX is not a start byte.
|
||||||
|
// Therefore we decrement the index as long as there are bytes matching this pattern.
|
||||||
|
// This ensures we cut the string at the border between one character and another.
|
||||||
|
while index > 0 && (p[index] & 0b11000000) == 0b10000000 {
|
||||||
|
index -= 1;
|
||||||
|
}
|
||||||
|
&string[0..index]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum Compression {
|
||||||
|
None,
|
||||||
|
Gzip,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Compression {
|
||||||
|
pub fn from_env(var_name: &str) -> Compression {
|
||||||
|
match std::env::var(var_name) {
|
||||||
|
Ok(method) => match Compression::from_string(&method) {
|
||||||
|
Some(c) => c,
|
||||||
|
None => {
|
||||||
|
tracing::error!("Unknown compression method '{}'; using gzip.", &method);
|
||||||
|
Compression::Gzip
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// Default compression method if the env var isn't set:
|
||||||
|
Err(_) => Compression::Gzip,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_string(s: &str) -> Option<Compression> {
|
||||||
|
match s.to_lowercase().as_ref() {
|
||||||
|
"none" => Some(Compression::None),
|
||||||
|
"gzip" => Some(Compression::Gzip),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn extension(&self) -> &str {
|
||||||
|
match self {
|
||||||
|
Compression::None => "trap",
|
||||||
|
Compression::Gzip => "trap.gz",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn limit_string_test() {
|
||||||
|
assert_eq!("hello", limit_string(&"hello world".to_owned(), 5));
|
||||||
|
assert_eq!("hi ☹", limit_string(&"hi ☹☹".to_owned(), 6));
|
||||||
|
assert_eq!("hi ", limit_string(&"hi ☹☹".to_owned(), 5));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn escape_key_test() {
|
||||||
|
assert_eq!("foo!", escape_key("foo!"));
|
||||||
|
assert_eq!("foo{}", escape_key("foo{}"));
|
||||||
|
assert_eq!("{}", escape_key("{}"));
|
||||||
|
assert_eq!("", escape_key(""));
|
||||||
|
assert_eq!("/path/to/foo.rb", escape_key("/path/to/foo.rb"));
|
||||||
|
assert_eq!(
|
||||||
|
"/path/to/foo&{}"@#.rb",
|
||||||
|
escape_key("/path/to/foo&{}\"@#.rb")
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -20,8 +20,8 @@ fn make_field_type<'a>(
|
|||||||
field: &'a node_types::Field,
|
field: &'a node_types::Field,
|
||||||
nodes: &'a node_types::NodeTypeMap,
|
nodes: &'a node_types::NodeTypeMap,
|
||||||
) -> (ql::Type<'a>, Option<dbscheme::Entry<'a>>) {
|
) -> (ql::Type<'a>, Option<dbscheme::Entry<'a>>) {
|
||||||
match &field.type_info {
|
match &field.type_info.kind {
|
||||||
node_types::FieldTypeInfo::Multiple {
|
node_types::FieldTypeKind::Multiple {
|
||||||
types,
|
types,
|
||||||
dbscheme_union,
|
dbscheme_union,
|
||||||
ql_class: _,
|
ql_class: _,
|
||||||
@@ -40,11 +40,11 @@ fn make_field_type<'a>(
|
|||||||
})),
|
})),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
node_types::FieldTypeInfo::Single(t) => {
|
node_types::FieldTypeKind::Single(t) => {
|
||||||
let dbscheme_name = &nodes.get(t).unwrap().dbscheme_name;
|
let dbscheme_name = &nodes.get(t).unwrap().dbscheme_name;
|
||||||
(ql::Type::At(dbscheme_name), None)
|
(ql::Type::At(dbscheme_name), None)
|
||||||
}
|
}
|
||||||
node_types::FieldTypeInfo::ReservedWordInt(int_mapping) => {
|
node_types::FieldTypeKind::ReservedWordInt(int_mapping) => {
|
||||||
// The field will be an `int` in the db, and we add a case split to
|
// The field will be an `int` in the db, and we add a case split to
|
||||||
// create other db types for each integer value.
|
// create other db types for each integer value.
|
||||||
let mut branches: Vec<(usize, &'a str)> = Vec::new();
|
let mut branches: Vec<(usize, &'a str)> = Vec::new();
|
||||||
|
|||||||
@@ -345,16 +345,16 @@ fn create_field_getters<'a>(
|
|||||||
field: &'a node_types::Field,
|
field: &'a node_types::Field,
|
||||||
nodes: &'a node_types::NodeTypeMap,
|
nodes: &'a node_types::NodeTypeMap,
|
||||||
) -> (ql::Predicate<'a>, Option<ql::Expression<'a>>) {
|
) -> (ql::Predicate<'a>, Option<ql::Expression<'a>>) {
|
||||||
let return_type = match &field.type_info {
|
let return_type = match &field.type_info.kind {
|
||||||
node_types::FieldTypeInfo::Single(t) => {
|
node_types::FieldTypeKind::Single(t) => {
|
||||||
Some(ql::Type::Normal(&nodes.get(t).unwrap().ql_class_name))
|
Some(ql::Type::Normal(&nodes.get(t).unwrap().ql_class_name))
|
||||||
}
|
}
|
||||||
node_types::FieldTypeInfo::Multiple {
|
node_types::FieldTypeKind::Multiple {
|
||||||
types: _,
|
types: _,
|
||||||
dbscheme_union: _,
|
dbscheme_union: _,
|
||||||
ql_class,
|
ql_class,
|
||||||
} => Some(ql::Type::Normal(ql_class)),
|
} => Some(ql::Type::Normal(ql_class)),
|
||||||
node_types::FieldTypeInfo::ReservedWordInt(_) => Some(ql::Type::String),
|
node_types::FieldTypeKind::ReservedWordInt(_) => Some(ql::Type::String),
|
||||||
};
|
};
|
||||||
let formal_parameters = match &field.storage {
|
let formal_parameters = match &field.storage {
|
||||||
node_types::Storage::Column { .. } => vec![],
|
node_types::Storage::Column { .. } => vec![],
|
||||||
@@ -372,10 +372,10 @@ fn create_field_getters<'a>(
|
|||||||
|
|
||||||
// For the expression to get a value, what variable name should the result
|
// For the expression to get a value, what variable name should the result
|
||||||
// be bound to?
|
// be bound to?
|
||||||
let get_value_result_var_name = match &field.type_info {
|
let get_value_result_var_name = match &field.type_info.kind {
|
||||||
node_types::FieldTypeInfo::ReservedWordInt(_) => "value",
|
node_types::FieldTypeKind::ReservedWordInt(_) => "value",
|
||||||
node_types::FieldTypeInfo::Single(_) => "result",
|
node_types::FieldTypeKind::Single(_) => "result",
|
||||||
node_types::FieldTypeInfo::Multiple { .. } => "result",
|
node_types::FieldTypeKind::Multiple { .. } => "result",
|
||||||
};
|
};
|
||||||
|
|
||||||
// Two expressions for getting the value. One that's suitable use in the
|
// Two expressions for getting the value. One that's suitable use in the
|
||||||
@@ -418,8 +418,8 @@ fn create_field_getters<'a>(
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
let (body, optional_expr) = match &field.type_info {
|
let (body, optional_expr) = match &field.type_info.kind {
|
||||||
node_types::FieldTypeInfo::ReservedWordInt(int_mapping) => {
|
node_types::FieldTypeKind::ReservedWordInt(int_mapping) => {
|
||||||
// Create an expression that binds the corresponding string to `result` for each `value`, e.g.:
|
// Create an expression that binds the corresponding string to `result` for each `value`, e.g.:
|
||||||
// result = "foo" and value = 0 or
|
// result = "foo" and value = 0 or
|
||||||
// result = "bar" and value = 1 or
|
// result = "bar" and value = 1 or
|
||||||
@@ -454,7 +454,7 @@ fn create_field_getters<'a>(
|
|||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
node_types::FieldTypeInfo::Single(_) | node_types::FieldTypeInfo::Multiple { .. } => {
|
node_types::FieldTypeKind::Single(_) | node_types::FieldTypeKind::Multiple { .. } => {
|
||||||
(get_value, Some(get_value_any_index))
|
(get_value, Some(get_value_any_index))
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use std::collections::BTreeMap;
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use std::collections::BTreeSet as Set;
|
use std::collections::BTreeSet as Set;
|
||||||
@@ -22,14 +22,23 @@ pub enum EntryKind {
|
|||||||
Token { kind_id: usize },
|
Token { kind_id: usize },
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)]
|
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Hash, Clone)]
|
||||||
pub struct TypeName {
|
pub struct TypeName {
|
||||||
pub kind: String,
|
pub kind: String,
|
||||||
pub named: bool,
|
pub named: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Eq, PartialEq)]
|
||||||
pub enum FieldTypeInfo {
|
pub struct FieldTypeInfo {
|
||||||
|
pub kind: FieldTypeKind,
|
||||||
|
|
||||||
|
/// The set of types this field is allowed to take, after recursively
|
||||||
|
/// expanding subtypes.
|
||||||
|
pub valid_types: HashSet<TypeName>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Eq, PartialEq)]
|
||||||
|
pub enum FieldTypeKind {
|
||||||
/// The field has a single type.
|
/// The field has a single type.
|
||||||
Single(TypeName),
|
Single(TypeName),
|
||||||
|
|
||||||
@@ -103,7 +112,49 @@ fn convert_types(node_types: &[NodeType]) -> Set<TypeName> {
|
|||||||
node_types.iter().map(convert_type).collect()
|
node_types.iter().map(convert_type).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_matching_types(
|
||||||
|
node: &NodeInfo,
|
||||||
|
type_map: &HashMap<NodeType, &NodeInfo>,
|
||||||
|
) -> HashSet<TypeName> {
|
||||||
|
let mut result = HashSet::new();
|
||||||
|
let node_type_name = TypeName {
|
||||||
|
kind: node.kind.clone(),
|
||||||
|
named: node.named,
|
||||||
|
};
|
||||||
|
result.insert(node_type_name);
|
||||||
|
if let Some(subtypes) = &node.subtypes {
|
||||||
|
for subtype in subtypes {
|
||||||
|
let subtype = type_map.get(subtype).unwrap();
|
||||||
|
result.extend(get_matching_types(subtype, type_map));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
pub fn convert_nodes(prefix: &str, nodes: &[NodeInfo]) -> NodeTypeMap {
|
pub fn convert_nodes(prefix: &str, nodes: &[NodeInfo]) -> NodeTypeMap {
|
||||||
|
// Since the nodes contain only weak references to their subtypes, build a
|
||||||
|
// map so we can resolve them.
|
||||||
|
let mut type_map: HashMap<NodeType, &NodeInfo> = HashMap::new();
|
||||||
|
for node in nodes {
|
||||||
|
let node_type = NodeType {
|
||||||
|
kind: node.kind.clone(),
|
||||||
|
named: node.named,
|
||||||
|
};
|
||||||
|
type_map.insert(node_type, node);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now recursively expand subtypes so that for each tree-sitter node type,
|
||||||
|
// we have a set of all matching types.
|
||||||
|
let mut transitive_type_map = HashMap::new();
|
||||||
|
for node in nodes {
|
||||||
|
let type_name = TypeName {
|
||||||
|
kind: node.kind.clone(),
|
||||||
|
named: node.named,
|
||||||
|
};
|
||||||
|
let matching_types = get_matching_types(node, &type_map);
|
||||||
|
transitive_type_map.insert(type_name, matching_types);
|
||||||
|
}
|
||||||
|
|
||||||
let mut entries = NodeTypeMap::new();
|
let mut entries = NodeTypeMap::new();
|
||||||
let mut token_kinds = Set::new();
|
let mut token_kinds = Set::new();
|
||||||
|
|
||||||
@@ -166,6 +217,7 @@ pub fn convert_nodes(prefix: &str, nodes: &[NodeInfo]) -> NodeTypeMap {
|
|||||||
field_info,
|
field_info,
|
||||||
&mut fields,
|
&mut fields,
|
||||||
&token_kinds,
|
&token_kinds,
|
||||||
|
&transitive_type_map,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -178,6 +230,7 @@ pub fn convert_nodes(prefix: &str, nodes: &[NodeInfo]) -> NodeTypeMap {
|
|||||||
children,
|
children,
|
||||||
&mut fields,
|
&mut fields,
|
||||||
&token_kinds,
|
&token_kinds,
|
||||||
|
&transitive_type_map,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
entries.insert(
|
entries.insert(
|
||||||
@@ -222,6 +275,7 @@ fn add_field(
|
|||||||
field_info: &FieldInfo,
|
field_info: &FieldInfo,
|
||||||
fields: &mut Vec<Field>,
|
fields: &mut Vec<Field>,
|
||||||
token_kinds: &Set<TypeName>,
|
token_kinds: &Set<TypeName>,
|
||||||
|
transitive_type_map: &HashMap<TypeName, HashSet<TypeName>>,
|
||||||
) {
|
) {
|
||||||
let parent_flattened_name = node_type_name(&parent_type_name.kind, parent_type_name.named);
|
let parent_flattened_name = node_type_name(&parent_type_name.kind, parent_type_name.named);
|
||||||
let column_name = escape_name(&name_for_field_or_child(&field_name));
|
let column_name = escape_name(&name_for_field_or_child(&field_name));
|
||||||
@@ -245,6 +299,18 @@ fn add_field(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
let converted_types = convert_types(&field_info.types);
|
let converted_types = convert_types(&field_info.types);
|
||||||
|
|
||||||
|
// Use the transitive type map we built earlier to create the set of all
|
||||||
|
// possible types this field could take.
|
||||||
|
let mut valid_types: HashSet<TypeName> = HashSet::new();
|
||||||
|
for type_name in &converted_types {
|
||||||
|
if let Some(types) = transitive_type_map.get(type_name) {
|
||||||
|
for t in types {
|
||||||
|
valid_types.insert(t.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let type_info = if field_info
|
let type_info = if field_info
|
||||||
.types
|
.types
|
||||||
.iter()
|
.iter()
|
||||||
@@ -259,20 +325,29 @@ fn add_field(
|
|||||||
escape_name(&format!("{}_{}_{}", &prefix, parent_flattened_name, t.kind));
|
escape_name(&format!("{}_{}_{}", &prefix, parent_flattened_name, t.kind));
|
||||||
field_token_ints.insert(t.kind.to_owned(), (counter, dbscheme_variant_name));
|
field_token_ints.insert(t.kind.to_owned(), (counter, dbscheme_variant_name));
|
||||||
}
|
}
|
||||||
FieldTypeInfo::ReservedWordInt(field_token_ints)
|
FieldTypeInfo {
|
||||||
|
kind: FieldTypeKind::ReservedWordInt(field_token_ints),
|
||||||
|
valid_types,
|
||||||
|
}
|
||||||
} else if field_info.types.len() == 1 {
|
} else if field_info.types.len() == 1 {
|
||||||
FieldTypeInfo::Single(converted_types.into_iter().next().unwrap())
|
FieldTypeInfo {
|
||||||
|
kind: FieldTypeKind::Single(converted_types.into_iter().next().unwrap()),
|
||||||
|
valid_types,
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// The dbscheme type for this field will be a union. In QL, it'll just be AstNode.
|
// The dbscheme type for this field will be a union. In QL, it'll just be AstNode.
|
||||||
FieldTypeInfo::Multiple {
|
FieldTypeInfo {
|
||||||
types: converted_types,
|
kind: FieldTypeKind::Multiple {
|
||||||
dbscheme_union: format!(
|
types: converted_types,
|
||||||
"{}_{}_{}_type",
|
dbscheme_union: format!(
|
||||||
&prefix,
|
"{}_{}_{}_type",
|
||||||
&parent_flattened_name,
|
&prefix,
|
||||||
&name_for_field_or_child(&field_name)
|
&parent_flattened_name,
|
||||||
),
|
&name_for_field_or_child(&field_name)
|
||||||
ql_class: "AstNode".to_owned(),
|
),
|
||||||
|
ql_class: "AstNode".to_owned(),
|
||||||
|
},
|
||||||
|
valid_types,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let getter_name = format!(
|
let getter_name = format!(
|
||||||
@@ -303,7 +378,7 @@ pub struct NodeInfo {
|
|||||||
pub subtypes: Option<Vec<NodeType>>,
|
pub subtypes: Option<Vec<NodeType>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize, Hash, Eq, PartialEq)]
|
||||||
pub struct NodeType {
|
pub struct NodeType {
|
||||||
#[serde(rename = "type")]
|
#[serde(rename = "type")]
|
||||||
pub kind: String,
|
pub kind: String,
|
||||||
|
|||||||
Reference in New Issue
Block a user