mirror of
https://github.com/github/codeql.git
synced 2026-05-24 16:17:07 +02:00
Sort TRAP output
First, emit labels with fresh ids. Then other labels. Then tuples, grouped by name. Hopefully this will help both with the compression ratio but also with branch prediction in the TRAP importer.
This commit is contained in:
@@ -9,7 +9,7 @@ use tree_sitter::{Language, Node, Parser, Range, Tree};
|
||||
|
||||
pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label {
|
||||
let (file_label, fresh) =
|
||||
writer.global_id(&trap::full_id_for_file(&normalize_path(absolute_path)));
|
||||
writer.global_id(trap::full_id_for_file(&normalize_path(absolute_path)));
|
||||
if fresh {
|
||||
writer.add_tuple(
|
||||
"files",
|
||||
@@ -24,7 +24,7 @@ pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::L
|
||||
}
|
||||
|
||||
fn populate_empty_file(writer: &mut trap::Writer) -> trap::Label {
|
||||
let (file_label, fresh) = writer.global_id("empty;sourcefile");
|
||||
let (file_label, fresh) = writer.global_id("empty;sourcefile".to_owned());
|
||||
if fresh {
|
||||
writer.add_tuple(
|
||||
"files",
|
||||
@@ -54,7 +54,7 @@ pub fn populate_parent_folders(
|
||||
None => break,
|
||||
Some(folder) => {
|
||||
let (folder_label, fresh) =
|
||||
writer.global_id(&trap::full_id_for_folder(&normalize_path(folder)));
|
||||
writer.global_id(trap::full_id_for_folder(&normalize_path(folder)));
|
||||
writer.add_tuple(
|
||||
"containerparent",
|
||||
vec![
|
||||
@@ -88,7 +88,7 @@ fn location(
|
||||
end_line: usize,
|
||||
end_column: usize,
|
||||
) -> trap::Label {
|
||||
let (loc_label, fresh) = writer.global_id(&format!(
|
||||
let (loc_label, fresh) = writer.global_id(format!(
|
||||
"loc,{{{}}},{},{},{},{}",
|
||||
file_label, start_line, start_column, end_line, end_column
|
||||
));
|
||||
@@ -133,7 +133,6 @@ pub fn extract(
|
||||
parser.set_language(language).unwrap();
|
||||
parser.set_included_ranges(ranges).unwrap();
|
||||
let tree = parser.parse(&source, None).expect("Failed to parse file");
|
||||
trap_writer.comment(format!("Auto-generated TRAP file for {}", path_str));
|
||||
let file_label = populate_file(trap_writer, path);
|
||||
let mut visitor = Visitor {
|
||||
source,
|
||||
|
||||
@@ -1,25 +1,34 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::io::BufWriter;
|
||||
use std::path::Path;
|
||||
|
||||
use flate2::write::GzEncoder;
|
||||
|
||||
pub struct Writer {
|
||||
/// The accumulated trap entries
|
||||
trap_output: Vec<Entry>,
|
||||
/// Labels that should be assigned fresh ids, e.g. `#123=*`.
|
||||
fresh_ids: Vec<Label>,
|
||||
|
||||
/// Labels that should be assigned trap keys, e.g. `#7=@"foo"`.
|
||||
global_keys: BTreeMap<String, Label>,
|
||||
|
||||
/// Database rows to emit. Each key is the tuple name, each value is a list.
|
||||
/// Each member of *that* list represents an instance of that tuple,
|
||||
/// containing a list of the arguments/column values.
|
||||
tuples: BTreeMap<String, Vec<Vec<Arg>>>,
|
||||
|
||||
/// A counter for generating fresh labels
|
||||
counter: u32,
|
||||
/// cache of global keys
|
||||
global_keys: std::collections::HashMap<String, Label>,
|
||||
}
|
||||
|
||||
impl Writer {
|
||||
pub fn new() -> Writer {
|
||||
Writer {
|
||||
fresh_ids: Vec::new(),
|
||||
tuples: BTreeMap::new(),
|
||||
global_keys: BTreeMap::new(),
|
||||
counter: 0,
|
||||
trap_output: Vec::new(),
|
||||
global_keys: std::collections::HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,80 +43,59 @@ impl Writer {
|
||||
pub fn fresh_id(&mut self) -> Label {
|
||||
let label = Label(self.counter);
|
||||
self.counter += 1;
|
||||
self.trap_output.push(Entry::FreshId(label));
|
||||
self.fresh_ids.push(label);
|
||||
label
|
||||
}
|
||||
|
||||
pub fn global_id(&mut self, key: &str) -> (Label, bool) {
|
||||
if let Some(label) = self.global_keys.get(key) {
|
||||
pub fn global_id(&mut self, key: String) -> (Label, bool) {
|
||||
if let Some(label) = self.global_keys.get(&key) {
|
||||
return (*label, false);
|
||||
}
|
||||
let label = Label(self.counter);
|
||||
self.counter += 1;
|
||||
self.global_keys.insert(key.to_owned(), label);
|
||||
self.trap_output
|
||||
.push(Entry::MapLabelToKey(label, key.to_owned()));
|
||||
self.global_keys.insert(key, label);
|
||||
(label, true)
|
||||
}
|
||||
|
||||
pub fn add_tuple(&mut self, table_name: &str, args: Vec<Arg>) {
|
||||
self.trap_output
|
||||
.push(Entry::GenericTuple(table_name.to_owned(), args))
|
||||
self.tuples
|
||||
.entry(table_name.to_owned())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(args);
|
||||
}
|
||||
|
||||
pub fn comment(&mut self, text: String) {
|
||||
self.trap_output.push(Entry::Comment(text));
|
||||
fn write<T: std::io::Write>(&self, dest: &mut T) -> std::io::Result<()> {
|
||||
for label in &self.fresh_ids {
|
||||
writeln!(dest, "{}=*", label)?;
|
||||
}
|
||||
for (key, label) in &self.global_keys {
|
||||
writeln!(dest, "{}=@\"{}\"", label, key.replace("\"", "\"\""))?;
|
||||
}
|
||||
for (name, instances) in &self.tuples {
|
||||
for instance in instances {
|
||||
write!(dest, "{}(", name)?;
|
||||
for (index, arg) in instance.iter().enumerate() {
|
||||
if index > 0 {
|
||||
write!(dest, ",")?;
|
||||
}
|
||||
write!(dest, "{}", arg)?;
|
||||
}
|
||||
writeln!(dest, ")")?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_to_file(&self, path: &Path, compression: &Compression) -> std::io::Result<()> {
|
||||
let trap_file = std::fs::File::create(path)?;
|
||||
let mut trap_file = BufWriter::new(trap_file);
|
||||
match compression {
|
||||
Compression::None => {
|
||||
for trap_entry in &self.trap_output {
|
||||
writeln!(trap_file, "{}", trap_entry)?;
|
||||
}
|
||||
}
|
||||
Compression::None => self.write(&mut trap_file),
|
||||
Compression::Gzip => {
|
||||
let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast());
|
||||
for trap_entry in &self.trap_output {
|
||||
writeln!(compressed_writer, "{}", trap_entry)?;
|
||||
}
|
||||
self.write(&mut compressed_writer)
|
||||
}
|
||||
}
|
||||
std::io::Result::Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Entry {
|
||||
/// Maps the label to a fresh id, e.g. `#123=*`.
|
||||
FreshId(Label),
|
||||
/// Maps the label to a key, e.g. `#7=@"foo"`.
|
||||
MapLabelToKey(Label, String),
|
||||
/// foo_bar(arg*)
|
||||
GenericTuple(String, Vec<Arg>),
|
||||
Comment(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for Entry {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Entry::FreshId(label) => write!(f, "{}=*", label),
|
||||
Entry::MapLabelToKey(label, key) => {
|
||||
write!(f, "{}=@\"{}\"", label, key.replace("\"", "\"\""))
|
||||
}
|
||||
Entry::GenericTuple(name, args) => {
|
||||
write!(f, "{}(", name)?;
|
||||
for (index, arg) in args.iter().enumerate() {
|
||||
if index > 0 {
|
||||
write!(f, ",")?;
|
||||
}
|
||||
write!(f, "{}", arg)?;
|
||||
}
|
||||
write!(f, ")")
|
||||
}
|
||||
Entry::Comment(line) => write!(f, "// {}", line),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -145,18 +133,6 @@ impl fmt::Display for Arg {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Program(Vec<Entry>);
|
||||
|
||||
impl fmt::Display for Program {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let mut text = String::new();
|
||||
for trap_entry in &self.0 {
|
||||
text.push_str(&format!("{}\n", trap_entry));
|
||||
}
|
||||
write!(f, "{}", text)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn full_id_for_file(normalized_path: &str) -> String {
|
||||
format!("{};sourcefile", escape_key(normalized_path))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user