Parallelize extraction

Use the Rayon library to do parallel iteration over the file list. The
number of threads used respects the CODEQL_THREADS environment variable.
This commit is contained in:
Nick Rolfe
2020-12-22 19:04:24 +00:00
parent c35283cefb
commit bf4eac5113
4 changed files with 206 additions and 52 deletions

View File

@@ -4,7 +4,7 @@ use std::collections::BTreeSet as Set;
use std::fmt;
use std::path::Path;
use tracing::{error, info, span, Level};
use tree_sitter::{Language, Node, Parser, Tree};
use tree_sitter::{Node, Parser, Tree};
struct TrapWriter {
/// The accumulated trap entries
@@ -148,55 +148,38 @@ impl TrapWriter {
}
}
pub struct Extractor {
pub parser: Parser,
pub schema: NodeTypeMap,
}
/// Extracts the source file at `path`, which is assumed to be canonicalized.
pub fn extract(parser: &mut Parser, schema: &NodeTypeMap, path: &Path) -> std::io::Result<Program> {
let span = span!(
Level::TRACE,
"extract",
file = %path.display()
);
pub fn create(language: Language, schema: NodeTypeMap) -> Extractor {
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let _enter = span.enter();
Extractor { parser, schema }
}
info!("extracting: {}", path.display());
impl Extractor {
/// Extracts the source file at `path`, which is assumed to be canonicalized.
pub fn extract<'a>(&'a mut self, path: &Path) -> std::io::Result<Program> {
let span = span!(
Level::TRACE,
"extract",
file = %path.display()
);
let source = std::fs::read(&path)?;
let tree = parser.parse(&source, None).expect("Failed to parse file");
let mut trap_writer = new_trap_writer();
trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display()));
let file_label = &trap_writer.populate_file(path);
let mut visitor = Visitor {
source: &source,
trap_writer: trap_writer,
// TODO: should we handle path strings that are not valid UTF8 better?
path: format!("{}", path.display()),
file_label: *file_label,
token_counter: 0,
toplevel_child_counter: 0,
stack: Vec::new(),
schema,
};
traverse(&tree, &mut visitor);
let _enter = span.enter();
info!("extracting: {}", path.display());
let source = std::fs::read(&path)?;
let tree = &self
.parser
.parse(&source, None)
.expect("Failed to parse file");
let mut trap_writer = new_trap_writer();
trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display()));
let file_label = &trap_writer.populate_file(path);
let mut visitor = Visitor {
source: &source,
trap_writer: trap_writer,
// TODO: should we handle path strings that are not valid UTF8 better?
path: format!("{}", path.display()),
file_label: *file_label,
token_counter: 0,
toplevel_child_counter: 0,
stack: Vec::new(),
schema: &self.schema,
};
traverse(&tree, &mut visitor);
&self.parser.reset();
Ok(Program(visitor.trap_writer.trap_output))
}
parser.reset();
Ok(Program(visitor.trap_writer.trap_output))
}
/// Normalizes the path according the common CodeQL specification. Assumes that