mirror of
https://github.com/github/codeql.git
synced 2026-05-05 21:55:19 +02:00
Extract ERB files
This commit is contained in:
@@ -10,9 +10,11 @@ edition = "2018"
|
||||
flate2 = "1.0"
|
||||
node-types = { path = "../node-types" }
|
||||
tree-sitter = "0.17"
|
||||
tree-sitter-embedded-template = { git = "https://github.com/aibaars/tree-sitter-embedded-template", rev = "d4aac29c08aa7c596633d00b5ec2dd2d247eafe4" }
|
||||
tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "add8cb36d5fc0a00d4499ba2e8eedc04a38a2488" }
|
||||
clap = "2.33"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.2", features = ["env-filter"] }
|
||||
rayon = "1.5.0"
|
||||
num_cpus = "1.13.0"
|
||||
regex = "1.4.3"
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::collections::BTreeSet as Set;
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
use tracing::{error, info, span, Level};
|
||||
use tree_sitter::{Language, Node, Parser, Tree};
|
||||
use tree_sitter::{Language, Node, Parser, Range, Tree};
|
||||
|
||||
struct TrapWriter {
|
||||
/// The accumulated trap entries
|
||||
@@ -149,7 +149,13 @@ impl TrapWriter {
|
||||
}
|
||||
|
||||
/// Extracts the source file at `path`, which is assumed to be canonicalized.
|
||||
pub fn extract(language: Language, schema: &NodeTypeMap, path: &Path) -> std::io::Result<Program> {
|
||||
pub fn extract(
|
||||
language: Language,
|
||||
schema: &NodeTypeMap,
|
||||
path: &Path,
|
||||
source: &Vec<u8>,
|
||||
ranges: &[Range],
|
||||
) -> std::io::Result<Program> {
|
||||
let span = span!(
|
||||
Level::TRACE,
|
||||
"extract",
|
||||
@@ -162,7 +168,7 @@ pub fn extract(language: Language, schema: &NodeTypeMap, path: &Path) -> std::io
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let source = std::fs::read(&path)?;
|
||||
parser.set_included_ranges(&ranges).unwrap();
|
||||
let tree = parser.parse(&source, None).expect("Failed to parse file");
|
||||
let mut trap_writer = new_trap_writer();
|
||||
trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display()));
|
||||
|
||||
@@ -8,6 +8,7 @@ use rayon::prelude::*;
|
||||
use std::fs;
|
||||
use std::io::{BufRead, BufWriter, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use tree_sitter::{Language, Parser, Range};
|
||||
|
||||
enum TrapCompression {
|
||||
None,
|
||||
@@ -126,6 +127,7 @@ fn main() -> std::io::Result<()> {
|
||||
let file_list = fs::File::open(file_list)?;
|
||||
|
||||
let language = tree_sitter_ruby::language();
|
||||
let erb = tree_sitter_embedded_template::language();
|
||||
let schema = node_types::read_node_types_str(tree_sitter_ruby::NODE_TYPES)?;
|
||||
let lines: std::io::Result<Vec<String>> = std::io::BufReader::new(file_list).lines().collect();
|
||||
let lines = lines?;
|
||||
@@ -133,16 +135,22 @@ fn main() -> std::io::Result<()> {
|
||||
let path = PathBuf::from(line).canonicalize()?;
|
||||
let trap_file = path_for(&trap_dir, &path, trap_compression.extension());
|
||||
let src_archive_file = path_for(&src_archive_dir, &path, "");
|
||||
let trap = extractor::extract(language, &schema, &path)?;
|
||||
let source = std::fs::read(&path)?;
|
||||
let code_ranges;
|
||||
if path.extension().map_or(false, |x| x == "erb") {
|
||||
tracing::info!("scanning: {}", path.display());
|
||||
code_ranges = scan_erb(erb, &source);
|
||||
} else {
|
||||
code_ranges = vec![];
|
||||
}
|
||||
let trap = extractor::extract(language, &schema, &path, &source, &code_ranges)?;
|
||||
std::fs::create_dir_all(&src_archive_file.parent().unwrap())?;
|
||||
std::fs::copy(&path, &src_archive_file)?;
|
||||
std::fs::create_dir_all(&trap_file.parent().unwrap())?;
|
||||
let trap_file = std::fs::File::create(&trap_file)?;
|
||||
let mut trap_file = BufWriter::new(trap_file);
|
||||
match trap_compression {
|
||||
TrapCompression::None => {
|
||||
write!(trap_file, "{}", trap)
|
||||
}
|
||||
TrapCompression::None => write!(trap_file, "{}", trap),
|
||||
TrapCompression::Gzip => {
|
||||
let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast());
|
||||
write!(compressed_writer, "{}", trap)
|
||||
@@ -151,6 +159,35 @@ fn main() -> std::io::Result<()> {
|
||||
})
|
||||
}
|
||||
|
||||
fn scan_erb(erb: Language, source: &std::vec::Vec<u8>) -> Vec<Range> {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(erb).unwrap();
|
||||
let tree = parser.parse(&source, None).expect("Failed to parse file");
|
||||
let mut result = Vec::new();
|
||||
|
||||
for n in tree.root_node().children(&mut tree.walk()) {
|
||||
let kind = n.kind();
|
||||
if kind == "directive" || kind == "output_directive" {
|
||||
for c in n.children(&mut tree.walk()) {
|
||||
if c.kind() == "code" {
|
||||
result.push(c.range());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if result.len() == 0 {
|
||||
let root = tree.root_node();
|
||||
// Add an empty range at the end of the file
|
||||
result.push(Range {
|
||||
start_byte: root.end_byte(),
|
||||
end_byte: root.end_byte(),
|
||||
start_point: root.end_position(),
|
||||
end_point: root.end_position(),
|
||||
});
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf {
|
||||
let mut result = PathBuf::from(dir);
|
||||
for component in path.components() {
|
||||
|
||||
Reference in New Issue
Block a user