Sort TRAP output

First, emit labels with fresh ids. Then other labels. Then tuples,
grouped by name. Hopefully this will help both with the compression
ratio but also with branch prediction in the TRAP importer.
This commit is contained in:
Nick Rolfe
2021-11-01 17:03:44 +00:00
parent b502e68783
commit 189e75bfe2
2 changed files with 50 additions and 75 deletions

View File

@@ -9,7 +9,7 @@ use tree_sitter::{Language, Node, Parser, Range, Tree};
pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label {
let (file_label, fresh) =
writer.global_id(&trap::full_id_for_file(&normalize_path(absolute_path)));
writer.global_id(trap::full_id_for_file(&normalize_path(absolute_path)));
if fresh {
writer.add_tuple(
"files",
@@ -24,7 +24,7 @@ pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::L
}
fn populate_empty_file(writer: &mut trap::Writer) -> trap::Label {
let (file_label, fresh) = writer.global_id("empty;sourcefile");
let (file_label, fresh) = writer.global_id("empty;sourcefile".to_owned());
if fresh {
writer.add_tuple(
"files",
@@ -54,7 +54,7 @@ pub fn populate_parent_folders(
None => break,
Some(folder) => {
let (folder_label, fresh) =
writer.global_id(&trap::full_id_for_folder(&normalize_path(folder)));
writer.global_id(trap::full_id_for_folder(&normalize_path(folder)));
writer.add_tuple(
"containerparent",
vec![
@@ -88,7 +88,7 @@ fn location(
end_line: usize,
end_column: usize,
) -> trap::Label {
let (loc_label, fresh) = writer.global_id(&format!(
let (loc_label, fresh) = writer.global_id(format!(
"loc,{{{}}},{},{},{},{}",
file_label, start_line, start_column, end_line, end_column
));
@@ -133,7 +133,6 @@ pub fn extract(
parser.set_language(language).unwrap();
parser.set_included_ranges(ranges).unwrap();
let tree = parser.parse(&source, None).expect("Failed to parse file");
trap_writer.comment(format!("Auto-generated TRAP file for {}", path_str));
let file_label = populate_file(trap_writer, path);
let mut visitor = Visitor {
source,

View File

@@ -1,25 +1,34 @@
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::fmt;
use std::io::{BufWriter, Write};
use std::io::BufWriter;
use std::path::Path;
use flate2::write::GzEncoder;
pub struct Writer {
/// The accumulated trap entries
trap_output: Vec<Entry>,
/// Labels that should be assigned fresh ids, e.g. `#123=*`.
fresh_ids: Vec<Label>,
/// Labels that should be assigned trap keys, e.g. `#7=@"foo"`.
global_keys: BTreeMap<String, Label>,
/// Database rows to emit. Each key is the tuple name, each value is a list.
/// Each member of *that* list represents an instance of that tuple,
/// containing a list of the arguments/column values.
tuples: BTreeMap<String, Vec<Vec<Arg>>>,
/// A counter for generating fresh labels
counter: u32,
/// cache of global keys
global_keys: std::collections::HashMap<String, Label>,
}
impl Writer {
pub fn new() -> Writer {
Writer {
fresh_ids: Vec::new(),
tuples: BTreeMap::new(),
global_keys: BTreeMap::new(),
counter: 0,
trap_output: Vec::new(),
global_keys: std::collections::HashMap::new(),
}
}
@@ -34,80 +43,59 @@ impl Writer {
pub fn fresh_id(&mut self) -> Label {
let label = Label(self.counter);
self.counter += 1;
self.trap_output.push(Entry::FreshId(label));
self.fresh_ids.push(label);
label
}
pub fn global_id(&mut self, key: &str) -> (Label, bool) {
if let Some(label) = self.global_keys.get(key) {
pub fn global_id(&mut self, key: String) -> (Label, bool) {
if let Some(label) = self.global_keys.get(&key) {
return (*label, false);
}
let label = Label(self.counter);
self.counter += 1;
self.global_keys.insert(key.to_owned(), label);
self.trap_output
.push(Entry::MapLabelToKey(label, key.to_owned()));
self.global_keys.insert(key, label);
(label, true)
}
pub fn add_tuple(&mut self, table_name: &str, args: Vec<Arg>) {
self.trap_output
.push(Entry::GenericTuple(table_name.to_owned(), args))
self.tuples
.entry(table_name.to_owned())
.or_insert_with(Vec::new)
.push(args);
}
pub fn comment(&mut self, text: String) {
self.trap_output.push(Entry::Comment(text));
fn write<T: std::io::Write>(&self, dest: &mut T) -> std::io::Result<()> {
for label in &self.fresh_ids {
writeln!(dest, "{}=*", label)?;
}
for (key, label) in &self.global_keys {
writeln!(dest, "{}=@\"{}\"", label, key.replace("\"", "\"\""))?;
}
for (name, instances) in &self.tuples {
for instance in instances {
write!(dest, "{}(", name)?;
for (index, arg) in instance.iter().enumerate() {
if index > 0 {
write!(dest, ",")?;
}
write!(dest, "{}", arg)?;
}
writeln!(dest, ")")?;
}
}
Ok(())
}
pub fn write_to_file(&self, path: &Path, compression: &Compression) -> std::io::Result<()> {
let trap_file = std::fs::File::create(path)?;
let mut trap_file = BufWriter::new(trap_file);
match compression {
Compression::None => {
for trap_entry in &self.trap_output {
writeln!(trap_file, "{}", trap_entry)?;
}
}
Compression::None => self.write(&mut trap_file),
Compression::Gzip => {
let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast());
for trap_entry in &self.trap_output {
writeln!(compressed_writer, "{}", trap_entry)?;
}
self.write(&mut compressed_writer)
}
}
std::io::Result::Ok(())
}
}
pub enum Entry {
/// Maps the label to a fresh id, e.g. `#123=*`.
FreshId(Label),
/// Maps the label to a key, e.g. `#7=@"foo"`.
MapLabelToKey(Label, String),
/// foo_bar(arg*)
GenericTuple(String, Vec<Arg>),
Comment(String),
}
impl fmt::Display for Entry {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Entry::FreshId(label) => write!(f, "{}=*", label),
Entry::MapLabelToKey(label, key) => {
write!(f, "{}=@\"{}\"", label, key.replace("\"", "\"\""))
}
Entry::GenericTuple(name, args) => {
write!(f, "{}(", name)?;
for (index, arg) in args.iter().enumerate() {
if index > 0 {
write!(f, ",")?;
}
write!(f, "{}", arg)?;
}
write!(f, ")")
}
Entry::Comment(line) => write!(f, "// {}", line),
}
}
}
@@ -145,18 +133,6 @@ impl fmt::Display for Arg {
}
}
pub struct Program(Vec<Entry>);
impl fmt::Display for Program {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut text = String::new();
for trap_entry in &self.0 {
text.push_str(&format!("{}\n", trap_entry));
}
write!(f, "{}", text)
}
}
pub fn full_id_for_file(normalized_path: &str) -> String {
format!("{};sourcefile", escape_key(normalized_path))
}