mirror of
https://github.com/github/codeql.git
synced 2026-04-20 14:34:04 +02:00
Parallelize extraction
Use the Rayon library to do parallel iteration over the file list. The number of threads used respects the CODEQL_THREADS environment variable.
This commit is contained in:
@@ -4,7 +4,7 @@ use std::collections::BTreeSet as Set;
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
use tracing::{error, info, span, Level};
|
||||
use tree_sitter::{Language, Node, Parser, Tree};
|
||||
use tree_sitter::{Node, Parser, Tree};
|
||||
|
||||
struct TrapWriter {
|
||||
/// The accumulated trap entries
|
||||
@@ -148,55 +148,38 @@ impl TrapWriter {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Extractor {
|
||||
pub parser: Parser,
|
||||
pub schema: NodeTypeMap,
|
||||
}
|
||||
/// Extracts the source file at `path`, which is assumed to be canonicalized.
|
||||
pub fn extract(parser: &mut Parser, schema: &NodeTypeMap, path: &Path) -> std::io::Result<Program> {
|
||||
let span = span!(
|
||||
Level::TRACE,
|
||||
"extract",
|
||||
file = %path.display()
|
||||
);
|
||||
|
||||
pub fn create(language: Language, schema: NodeTypeMap) -> Extractor {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let _enter = span.enter();
|
||||
|
||||
Extractor { parser, schema }
|
||||
}
|
||||
info!("extracting: {}", path.display());
|
||||
|
||||
impl Extractor {
|
||||
/// Extracts the source file at `path`, which is assumed to be canonicalized.
|
||||
pub fn extract<'a>(&'a mut self, path: &Path) -> std::io::Result<Program> {
|
||||
let span = span!(
|
||||
Level::TRACE,
|
||||
"extract",
|
||||
file = %path.display()
|
||||
);
|
||||
let source = std::fs::read(&path)?;
|
||||
let tree = parser.parse(&source, None).expect("Failed to parse file");
|
||||
let mut trap_writer = new_trap_writer();
|
||||
trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display()));
|
||||
let file_label = &trap_writer.populate_file(path);
|
||||
let mut visitor = Visitor {
|
||||
source: &source,
|
||||
trap_writer: trap_writer,
|
||||
// TODO: should we handle path strings that are not valid UTF8 better?
|
||||
path: format!("{}", path.display()),
|
||||
file_label: *file_label,
|
||||
token_counter: 0,
|
||||
toplevel_child_counter: 0,
|
||||
stack: Vec::new(),
|
||||
schema,
|
||||
};
|
||||
traverse(&tree, &mut visitor);
|
||||
|
||||
let _enter = span.enter();
|
||||
|
||||
info!("extracting: {}", path.display());
|
||||
|
||||
let source = std::fs::read(&path)?;
|
||||
let tree = &self
|
||||
.parser
|
||||
.parse(&source, None)
|
||||
.expect("Failed to parse file");
|
||||
let mut trap_writer = new_trap_writer();
|
||||
trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display()));
|
||||
let file_label = &trap_writer.populate_file(path);
|
||||
let mut visitor = Visitor {
|
||||
source: &source,
|
||||
trap_writer: trap_writer,
|
||||
// TODO: should we handle path strings that are not valid UTF8 better?
|
||||
path: format!("{}", path.display()),
|
||||
file_label: *file_label,
|
||||
token_counter: 0,
|
||||
toplevel_child_counter: 0,
|
||||
stack: Vec::new(),
|
||||
schema: &self.schema,
|
||||
};
|
||||
traverse(&tree, &mut visitor);
|
||||
|
||||
&self.parser.reset();
|
||||
Ok(Program(visitor.trap_writer.trap_output))
|
||||
}
|
||||
parser.reset();
|
||||
Ok(Program(visitor.trap_writer.trap_output))
|
||||
}
|
||||
|
||||
/// Normalizes the path according the common CodeQL specification. Assumes that
|
||||
|
||||
Reference in New Issue
Block a user