Extract ERB files

This commit is contained in:
Arthur Baars
2021-01-20 17:39:52 +01:00
parent bc55fa861e
commit 9d974bd56d
7 changed files with 128 additions and 123 deletions

View File

@@ -10,9 +10,11 @@ edition = "2018"
flate2 = "1.0"
node-types = { path = "../node-types" }
tree-sitter = "0.17"
tree-sitter-embedded-template = { git = "https://github.com/aibaars/tree-sitter-embedded-template", rev = "d4aac29c08aa7c596633d00b5ec2dd2d247eafe4" }
tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "add8cb36d5fc0a00d4499ba2e8eedc04a38a2488" }
clap = "2.33"
tracing = "0.1"
tracing-subscriber = { version = "0.2", features = ["env-filter"] }
rayon = "1.5.0"
num_cpus = "1.13.0"
regex = "1.4.3"

View File

@@ -4,7 +4,7 @@ use std::collections::BTreeSet as Set;
use std::fmt;
use std::path::Path;
use tracing::{error, info, span, Level};
use tree_sitter::{Language, Node, Parser, Tree};
use tree_sitter::{Language, Node, Parser, Range, Tree};
struct TrapWriter {
/// The accumulated trap entries
@@ -149,7 +149,13 @@ impl TrapWriter {
}
/// Extracts the source file at `path`, which is assumed to be canonicalized.
pub fn extract(language: Language, schema: &NodeTypeMap, path: &Path) -> std::io::Result<Program> {
pub fn extract(
language: Language,
schema: &NodeTypeMap,
path: &Path,
source: &Vec<u8>,
ranges: &[Range],
) -> std::io::Result<Program> {
let span = span!(
Level::TRACE,
"extract",
@@ -162,7 +168,7 @@ pub fn extract(language: Language, schema: &NodeTypeMap, path: &Path) -> std::io
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let source = std::fs::read(&path)?;
parser.set_included_ranges(&ranges).unwrap();
let tree = parser.parse(&source, None).expect("Failed to parse file");
let mut trap_writer = new_trap_writer();
trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display()));

View File

@@ -8,6 +8,7 @@ use rayon::prelude::*;
use std::fs;
use std::io::{BufRead, BufWriter, Write};
use std::path::{Path, PathBuf};
use tree_sitter::{Language, Parser, Range};
enum TrapCompression {
None,
@@ -126,6 +127,7 @@ fn main() -> std::io::Result<()> {
let file_list = fs::File::open(file_list)?;
let language = tree_sitter_ruby::language();
let erb = tree_sitter_embedded_template::language();
let schema = node_types::read_node_types_str(tree_sitter_ruby::NODE_TYPES)?;
let lines: std::io::Result<Vec<String>> = std::io::BufReader::new(file_list).lines().collect();
let lines = lines?;
@@ -133,16 +135,22 @@ fn main() -> std::io::Result<()> {
let path = PathBuf::from(line).canonicalize()?;
let trap_file = path_for(&trap_dir, &path, trap_compression.extension());
let src_archive_file = path_for(&src_archive_dir, &path, "");
let trap = extractor::extract(language, &schema, &path)?;
let source = std::fs::read(&path)?;
let code_ranges;
if path.extension().map_or(false, |x| x == "erb") {
tracing::info!("scanning: {}", path.display());
code_ranges = scan_erb(erb, &source);
} else {
code_ranges = vec![];
}
let trap = extractor::extract(language, &schema, &path, &source, &code_ranges)?;
std::fs::create_dir_all(&src_archive_file.parent().unwrap())?;
std::fs::copy(&path, &src_archive_file)?;
std::fs::create_dir_all(&trap_file.parent().unwrap())?;
let trap_file = std::fs::File::create(&trap_file)?;
let mut trap_file = BufWriter::new(trap_file);
match trap_compression {
TrapCompression::None => {
write!(trap_file, "{}", trap)
}
TrapCompression::None => write!(trap_file, "{}", trap),
TrapCompression::Gzip => {
let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast());
write!(compressed_writer, "{}", trap)
@@ -151,6 +159,35 @@ fn main() -> std::io::Result<()> {
})
}
fn scan_erb(erb: Language, source: &std::vec::Vec<u8>) -> Vec<Range> {
let mut parser = Parser::new();
parser.set_language(erb).unwrap();
let tree = parser.parse(&source, None).expect("Failed to parse file");
let mut result = Vec::new();
for n in tree.root_node().children(&mut tree.walk()) {
let kind = n.kind();
if kind == "directive" || kind == "output_directive" {
for c in n.children(&mut tree.walk()) {
if c.kind() == "code" {
result.push(c.range());
}
}
}
}
if result.len() == 0 {
let root = tree.root_node();
// Add an empty range at the end of the file
result.push(Range {
start_byte: root.end_byte(),
end_byte: root.end_byte(),
start_point: root.end_position(),
end_point: root.end_position(),
});
}
result
}
fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf {
let mut result = PathBuf::from(dir);
for component in path.components() {