mirror of
https://github.com/github/codeql.git
synced 2026-04-27 01:35:13 +02:00
Tree-sitter extractors: use fresh IDs for locations
Since locations for any given source file are never referenced in any TRAP files besides the one for that particular source file, it's not necessary to use global IDs. Using fresh IDs will reduce the size of the ID pool (both on disk and in memory) and the speed of multi-threaded TRAP import. The one exception is the empty location, which still uses a global ID.
This commit is contained in:
@@ -43,7 +43,16 @@ fn populate_empty_file(writer: &mut trap::Writer) -> trap::Label {
|
||||
|
||||
pub fn populate_empty_location(writer: &mut trap::Writer) {
|
||||
let file_label = populate_empty_file(writer);
|
||||
location(writer, file_label, 0, 0, 0, 0);
|
||||
global_location(
|
||||
writer,
|
||||
file_label,
|
||||
trap::Location {
|
||||
start_line: 0,
|
||||
start_column: 0,
|
||||
end_line: 0,
|
||||
end_column: 0,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
pub fn populate_parent_folders(
|
||||
@@ -85,17 +94,19 @@ pub fn populate_parent_folders(
|
||||
}
|
||||
}
|
||||
|
||||
fn location(
|
||||
/** Get the label for the given location, defining it a global ID if it doesn't exist yet. */
|
||||
fn global_location(
|
||||
writer: &mut trap::Writer,
|
||||
file_label: trap::Label,
|
||||
start_line: usize,
|
||||
start_column: usize,
|
||||
end_line: usize,
|
||||
end_column: usize,
|
||||
location: trap::Location,
|
||||
) -> trap::Label {
|
||||
let (loc_label, fresh) = writer.global_id(&format!(
|
||||
"loc,{{{}}},{},{},{},{}",
|
||||
file_label, start_line, start_column, end_line, end_column
|
||||
file_label,
|
||||
location.start_line,
|
||||
location.start_column,
|
||||
location.end_line,
|
||||
location.end_column
|
||||
));
|
||||
if fresh {
|
||||
writer.add_tuple(
|
||||
@@ -103,10 +114,34 @@ fn location(
|
||||
vec![
|
||||
trap::Arg::Label(loc_label),
|
||||
trap::Arg::Label(file_label),
|
||||
trap::Arg::Int(start_line),
|
||||
trap::Arg::Int(start_column),
|
||||
trap::Arg::Int(end_line),
|
||||
trap::Arg::Int(end_column),
|
||||
trap::Arg::Int(location.start_line),
|
||||
trap::Arg::Int(location.start_column),
|
||||
trap::Arg::Int(location.end_line),
|
||||
trap::Arg::Int(location.end_column),
|
||||
],
|
||||
);
|
||||
}
|
||||
loc_label
|
||||
}
|
||||
|
||||
/** Get the label for the given location, creating it as a fresh ID if we haven't seen the location
|
||||
* yet for this file. */
|
||||
fn location_label(
|
||||
writer: &mut trap::Writer,
|
||||
file_label: trap::Label,
|
||||
location: trap::Location,
|
||||
) -> trap::Label {
|
||||
let (loc_label, fresh) = writer.location_label(location);
|
||||
if fresh {
|
||||
writer.add_tuple(
|
||||
"locations_default",
|
||||
vec![
|
||||
trap::Arg::Label(loc_label),
|
||||
trap::Arg::Label(file_label),
|
||||
trap::Arg::Int(location.start_line),
|
||||
trap::Arg::Int(location.start_column),
|
||||
trap::Arg::Int(location.end_line),
|
||||
trap::Arg::Int(location.end_column),
|
||||
],
|
||||
);
|
||||
}
|
||||
@@ -245,26 +280,25 @@ impl<'a> Visitor<'a> {
|
||||
node: Node,
|
||||
status_page: bool,
|
||||
) {
|
||||
let (start_line, start_column, end_line, end_column) = location_for(self, node);
|
||||
let loc = location(
|
||||
self.trap_writer,
|
||||
self.file_label,
|
||||
start_line,
|
||||
start_column,
|
||||
end_line,
|
||||
end_column,
|
||||
);
|
||||
let loc = location_for(self, node);
|
||||
let loc_label = location_label(self.trap_writer, self.file_label, loc);
|
||||
let mut mesg = self.diagnostics_writer.new_entry(
|
||||
"parse-error",
|
||||
"Could not process some files due to syntax errors",
|
||||
);
|
||||
mesg.severity(diagnostics::Severity::Warning)
|
||||
.location(self.path, start_line, start_column, end_line, end_column)
|
||||
.location(
|
||||
self.path,
|
||||
loc.start_line,
|
||||
loc.start_column,
|
||||
loc.end_line,
|
||||
loc.end_column,
|
||||
)
|
||||
.message(message, args);
|
||||
if status_page {
|
||||
mesg.status_page();
|
||||
}
|
||||
self.record_parse_error(loc, &mesg);
|
||||
self.record_parse_error(loc_label, &mesg);
|
||||
}
|
||||
|
||||
fn enter_node(&mut self, node: Node) -> bool {
|
||||
@@ -298,15 +332,8 @@ impl<'a> Visitor<'a> {
|
||||
return;
|
||||
}
|
||||
let (id, _, child_nodes) = self.stack.pop().expect("Vistor: empty stack");
|
||||
let (start_line, start_column, end_line, end_column) = location_for(self, node);
|
||||
let loc = location(
|
||||
self.trap_writer,
|
||||
self.file_label,
|
||||
start_line,
|
||||
start_column,
|
||||
end_line,
|
||||
end_column,
|
||||
);
|
||||
let loc = location_for(self, node);
|
||||
let loc_label = location_label(self.trap_writer, self.file_label, loc);
|
||||
let table = self
|
||||
.schema
|
||||
.get(&TypeName {
|
||||
@@ -333,7 +360,7 @@ impl<'a> Visitor<'a> {
|
||||
trap::Arg::Label(id),
|
||||
trap::Arg::Label(parent_id),
|
||||
trap::Arg::Int(parent_index),
|
||||
trap::Arg::Label(loc),
|
||||
trap::Arg::Label(loc_label),
|
||||
],
|
||||
);
|
||||
self.trap_writer.add_tuple(
|
||||
@@ -356,7 +383,7 @@ impl<'a> Visitor<'a> {
|
||||
trap::Arg::Label(id),
|
||||
trap::Arg::Label(parent_id),
|
||||
trap::Arg::Int(parent_index),
|
||||
trap::Arg::Label(loc),
|
||||
trap::Arg::Label(loc_label),
|
||||
],
|
||||
);
|
||||
let mut all_args = vec![trap::Arg::Label(id)];
|
||||
@@ -366,14 +393,20 @@ impl<'a> Visitor<'a> {
|
||||
}
|
||||
_ => {
|
||||
self.record_parse_error(
|
||||
loc,
|
||||
loc_label,
|
||||
self.diagnostics_writer
|
||||
.new_entry(
|
||||
"parse-error",
|
||||
"Could not process some files due to syntax errors",
|
||||
)
|
||||
.severity(diagnostics::Severity::Warning)
|
||||
.location(self.path, start_line, start_column, end_line, end_column)
|
||||
.location(
|
||||
self.path,
|
||||
loc.start_line,
|
||||
loc.start_column,
|
||||
loc.end_line,
|
||||
loc.end_column,
|
||||
)
|
||||
.message(
|
||||
"Unknown table type: {}",
|
||||
&[diagnostics::MessageArg::Code(node.kind())],
|
||||
@@ -555,7 +588,7 @@ fn sliced_source_arg(source: &[u8], n: Node) -> trap::Arg {
|
||||
// Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated.
|
||||
// The first is the location and label definition, and the second is the
|
||||
// 'Located' entry.
|
||||
fn location_for(visitor: &mut Visitor, n: Node) -> (usize, usize, usize, usize) {
|
||||
fn location_for(visitor: &mut Visitor, n: Node) -> trap::Location {
|
||||
// Tree-sitter row, column values are 0-based while CodeQL starts
|
||||
// counting at 1. In addition Tree-sitter's row and column for the
|
||||
// end position are exclusive while CodeQL's end positions are inclusive.
|
||||
@@ -565,16 +598,16 @@ fn location_for(visitor: &mut Visitor, n: Node) -> (usize, usize, usize, usize)
|
||||
// the end column is 0 (start of a line). In such cases the end position must be
|
||||
// set to the end of the previous line.
|
||||
let start_line = n.start_position().row + 1;
|
||||
let start_col = n.start_position().column + 1;
|
||||
let start_column = n.start_position().column + 1;
|
||||
let mut end_line = n.end_position().row + 1;
|
||||
let mut end_col = n.end_position().column;
|
||||
if start_line > end_line || start_line == end_line && start_col > end_col {
|
||||
let mut end_column = n.end_position().column;
|
||||
if start_line > end_line || start_line == end_line && start_column > end_column {
|
||||
// the range is empty, clip it to sensible values
|
||||
end_line = start_line;
|
||||
end_col = start_col - 1;
|
||||
} else if end_col == 0 {
|
||||
end_column = start_column - 1;
|
||||
} else if end_column == 0 {
|
||||
let source = visitor.source;
|
||||
// end_col = 0 means that we are at the start of a line
|
||||
// end_column = 0 means that we are at the start of a line
|
||||
// unfortunately 0 is invalid as column number, therefore
|
||||
// we should update the end location to be the end of the
|
||||
// previous line
|
||||
@@ -591,10 +624,10 @@ fn location_for(visitor: &mut Visitor, n: Node) -> (usize, usize, usize, usize)
|
||||
);
|
||||
}
|
||||
end_line -= 1;
|
||||
end_col = 1;
|
||||
end_column = 1;
|
||||
while index > 0 && source[index - 1] != b'\n' {
|
||||
index -= 1;
|
||||
end_col += 1;
|
||||
end_column += 1;
|
||||
}
|
||||
} else {
|
||||
visitor.diagnostics_writer.write(
|
||||
@@ -612,7 +645,12 @@ fn location_for(visitor: &mut Visitor, n: Node) -> (usize, usize, usize, usize)
|
||||
);
|
||||
}
|
||||
}
|
||||
(start_line, start_col, end_line, end_col)
|
||||
trap::Location {
|
||||
start_line,
|
||||
start_column,
|
||||
end_line,
|
||||
end_column,
|
||||
}
|
||||
}
|
||||
|
||||
fn traverse(tree: &Tree, visitor: &mut Visitor) {
|
||||
|
||||
@@ -5,6 +5,14 @@ use std::path::Path;
|
||||
|
||||
use flate2::write::GzEncoder;
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
||||
pub struct Location {
|
||||
pub start_line: usize,
|
||||
pub start_column: usize,
|
||||
pub end_line: usize,
|
||||
pub end_column: usize,
|
||||
}
|
||||
|
||||
pub struct Writer {
|
||||
/// The accumulated trap entries
|
||||
trap_output: Vec<Entry>,
|
||||
@@ -12,6 +20,8 @@ pub struct Writer {
|
||||
counter: u32,
|
||||
/// cache of global keys
|
||||
global_keys: std::collections::HashMap<String, Label>,
|
||||
/// Labels for locations, which don't use global keys
|
||||
location_labels: std::collections::HashMap<Location, Label>,
|
||||
}
|
||||
|
||||
impl Writer {
|
||||
@@ -20,6 +30,7 @@ impl Writer {
|
||||
counter: 0,
|
||||
trap_output: Vec::new(),
|
||||
global_keys: std::collections::HashMap::new(),
|
||||
location_labels: std::collections::HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,6 +61,17 @@ impl Writer {
|
||||
(label, true)
|
||||
}
|
||||
|
||||
/// Gets the label for the given location. The first call for a given location will define it as
|
||||
/// a fresh (star) ID.
|
||||
pub fn location_label(&mut self, loc: Location) -> (Label, bool) {
|
||||
if let Some(label) = self.location_labels.get(&loc) {
|
||||
return (*label, false);
|
||||
}
|
||||
let label = self.fresh_id();
|
||||
self.location_labels.insert(loc, label);
|
||||
(label, true)
|
||||
}
|
||||
|
||||
pub fn add_tuple(&mut self, table_name: &str, args: Vec<Arg>) {
|
||||
self.trap_output
|
||||
.push(Entry::GenericTuple(table_name.to_owned(), args))
|
||||
|
||||
Reference in New Issue
Block a user