Merge pull request #98 from github/aibaars/erb-extractor

Quick and dirty ERB extraction
This commit is contained in:
Arthur Baars
2021-02-05 18:45:47 +01:00
committed by GitHub
7 changed files with 141 additions and 123 deletions

184
Cargo.lock generated
View File

@@ -58,21 +58,15 @@ checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "byteorder"
version = "1.3.4"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
checksum = "ae44d1a3d5a19df61dd0c8beb138458ac2a53a7ac09eba97d55592540004306b"
[[package]]
name = "cc"
version = "1.0.65"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95752358c8f7552394baf48cd82695b345628ad3f170d607de3ca03b8dacca15"
[[package]]
name = "cfg-if"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48"
[[package]]
name = "cfg-if"
@@ -110,9 +104,9 @@ dependencies = [
[[package]]
name = "const_fn"
version = "0.4.4"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd51eab21ab4fd6a3bf889e2d0958c0a6e3a61ad04260325e919e652a2a62826"
checksum = "28b9d6de7f49e22cf97ad17fc4036ece69300032f45f78f30b4a4482cdc3f4a6"
[[package]]
name = "crc32fast"
@@ -120,7 +114,7 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a"
dependencies = [
"cfg-if 1.0.0",
"cfg-if",
]
[[package]]
@@ -129,7 +123,7 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775"
dependencies = [
"cfg-if 1.0.0",
"cfg-if",
"crossbeam-utils",
]
@@ -139,7 +133,7 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
dependencies = [
"cfg-if 1.0.0",
"cfg-if",
"crossbeam-epoch",
"crossbeam-utils",
]
@@ -150,7 +144,7 @@ version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1aaa739f95311c2c7887a76863f500026092fb1dce0161dab577e559ef3569d"
dependencies = [
"cfg-if 1.0.0",
"cfg-if",
"const_fn",
"crossbeam-utils",
"lazy_static",
@@ -165,7 +159,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d"
dependencies = [
"autocfg",
"cfg-if 1.0.0",
"cfg-if",
"lazy_static",
]
@@ -177,43 +171,30 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "flate2"
version = "1.0.19"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7411863d55df97a419aa64cb4d2f167103ea9d767e2c54a1868b7ac3f6b47129"
checksum = "cd3aec53de10fe96d7d8c565eb17f2c687bb5518a2ec453b5b1252964526abe0"
dependencies = [
"cfg-if 1.0.0",
"cfg-if",
"crc32fast",
"libc",
"miniz_oxide",
]
[[package]]
name = "generator"
version = "0.6.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cdc09201b2e8ca1b19290cf7e65de2246b8e91fb6874279722189c4de7b94dc"
dependencies = [
"cc",
"libc",
"log",
"rustc_version",
"winapi",
]
[[package]]
name = "hermit-abi"
version = "0.1.17"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aca5565f760fb5b220e499d72710ed156fdb74e631659e99377d9ebfbd13ae8"
checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c"
dependencies = [
"libc",
]
[[package]]
name = "itoa"
version = "0.4.6"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6"
checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
[[package]]
name = "lazy_static"
@@ -223,30 +204,17 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.80"
version = "0.2.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614"
checksum = "7ccac4b00700875e6a07c6cde370d44d32fa01c5a65cdd2fca6858c479d28bb3"
[[package]]
name = "log"
version = "0.4.11"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
dependencies = [
"cfg-if 0.1.10",
]
[[package]]
name = "loom"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0e8460f2f2121162705187214720353c517b97bdfb3494c0b1e33d83ebe4bed"
dependencies = [
"cfg-if 0.1.10",
"generator",
"scoped-tls",
"serde",
"serde_json",
"cfg-if",
]
[[package]]
@@ -321,10 +289,16 @@ dependencies = [
]
[[package]]
name = "pin-project-lite"
version = "0.2.0"
name = "once_cell"
version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b063f57ec186e6140e2b8b6921e5f1bd89c7356dda5b33acc5401203ca6131c"
checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0"
[[package]]
name = "pin-project-lite"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439697af366c49a6d0a010c56a0d97685bc140ce0d377b13a2ea2aa42d64a827"
[[package]]
name = "proc-macro2"
@@ -337,9 +311,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.7"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df"
dependencies = [
"proc-macro2",
]
@@ -371,9 +345,9 @@ dependencies = [
[[package]]
name = "regex"
version = "1.4.2"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c"
checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a"
dependencies = [
"aho-corasick",
"memchr",
@@ -393,9 +367,9 @@ dependencies = [
[[package]]
name = "regex-syntax"
version = "0.6.21"
version = "0.6.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189"
checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581"
[[package]]
name = "ruby-extractor"
@@ -406,9 +380,11 @@ dependencies = [
"node-types",
"num_cpus",
"rayon",
"regex",
"tracing",
"tracing-subscriber",
"tree-sitter",
"tree-sitter-embedded-template",
"tree-sitter-ruby",
]
@@ -422,62 +398,32 @@ dependencies = [
"tree-sitter-ruby",
]
[[package]]
name = "rustc_version"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
dependencies = [
"semver",
]
[[package]]
name = "ryu"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
[[package]]
name = "scoped-tls"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2"
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "semver"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
dependencies = [
"semver-parser",
]
[[package]]
name = "semver-parser"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]]
name = "serde"
version = "1.0.117"
version = "1.0.123"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a"
checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.117"
version = "1.0.123"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e"
checksum = "9391c295d64fc0abb2c556bad848f33cb8296276b1ad2677d1ae1ace4f258f31"
dependencies = [
"proc-macro2",
"quote",
@@ -486,9 +432,9 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.60"
version = "1.0.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1500e84d27fe482ed1dc791a56eddc2f230046a040fa908c08bda1d9fb615779"
checksum = "4fceb2595057b6891a4ee808f70054bd2d12f0e97f1cbb78689b59f676df325a"
dependencies = [
"itoa",
"ryu",
@@ -497,19 +443,18 @@ dependencies = [
[[package]]
name = "sharded-slab"
version = "0.1.0"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b4921be914e16899a80adefb821f8ddb7974e3f1250223575a44ed994882127"
checksum = "79c719719ee05df97490f80a45acfc99e5a30ce98a1e4fb67aee422745ae14e3"
dependencies = [
"lazy_static",
"loom",
]
[[package]]
name = "smallvec"
version = "1.5.0"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7acad6f34eb9e8a259d3283d1e8c1d34d7415943d4895f65cc73813c7396fc85"
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
[[package]]
name = "strsim"
@@ -519,9 +464,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "syn"
version = "1.0.53"
version = "1.0.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8833e20724c24de12bbaba5ad230ea61c3eafb05b881c7c9d3cfe8638b187e68"
checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081"
dependencies = [
"proc-macro2",
"quote",
@@ -539,11 +484,11 @@ dependencies = [
[[package]]
name = "thread_local"
version = "1.0.1"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd"
dependencies = [
"lazy_static",
"once_cell",
]
[[package]]
@@ -559,11 +504,11 @@ dependencies = [
[[package]]
name = "tracing"
version = "0.1.22"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f47026cdc4080c07e49b37087de021820269d996f581aac150ef9e5583eefe3"
checksum = "f7d40a22fd029e33300d8d89a5cc8ffce18bb7c587662f54629e94c9de5487f3"
dependencies = [
"cfg-if 1.0.0",
"cfg-if",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
@@ -571,9 +516,9 @@ dependencies = [
[[package]]
name = "tracing-attributes"
version = "0.1.11"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80e0ccfc3378da0cce270c946b676a376943f5cd16aeba64568e7939806f4ada"
checksum = "43f080ea7e4107844ef4766459426fa2d5c1ada2e47edba05dc7fa99d9629f47"
dependencies = [
"proc-macro2",
"quote",
@@ -642,6 +587,15 @@ dependencies = [
"regex",
]
[[package]]
name = "tree-sitter-embedded-template"
version = "0.17.0"
source = "git+https://github.com/aibaars/tree-sitter-embedded-template?rev=d4aac29c08aa7c596633d00b5ec2dd2d247eafe4#d4aac29c08aa7c596633d00b5ec2dd2d247eafe4"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-ruby"
version = "0.17.0"

View File

@@ -7,4 +7,8 @@ file_types:
- name: ruby
display_name: Ruby files
extensions:
- .rb
- .rb
- name: ERB
display_name: Ruby templates
extensions:
- .erb

View File

@@ -10,9 +10,11 @@ edition = "2018"
flate2 = "1.0"
node-types = { path = "../node-types" }
tree-sitter = "0.17"
tree-sitter-embedded-template = { git = "https://github.com/aibaars/tree-sitter-embedded-template", rev = "d4aac29c08aa7c596633d00b5ec2dd2d247eafe4" }
tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "add8cb36d5fc0a00d4499ba2e8eedc04a38a2488" }
clap = "2.33"
tracing = "0.1"
tracing-subscriber = { version = "0.2", features = ["env-filter"] }
rayon = "1.5.0"
num_cpus = "1.13.0"
regex = "1.4.3"

View File

@@ -4,7 +4,7 @@ use std::collections::BTreeSet as Set;
use std::fmt;
use std::path::Path;
use tracing::{error, info, span, Level};
use tree_sitter::{Language, Node, Parser, Tree};
use tree_sitter::{Language, Node, Parser, Range, Tree};
struct TrapWriter {
/// The accumulated trap entries
@@ -149,7 +149,13 @@ impl TrapWriter {
}
/// Extracts the source file at `path`, which is assumed to be canonicalized.
pub fn extract(language: Language, schema: &NodeTypeMap, path: &Path) -> std::io::Result<Program> {
pub fn extract(
language: Language,
schema: &NodeTypeMap,
path: &Path,
source: &Vec<u8>,
ranges: &[Range],
) -> std::io::Result<Program> {
let span = span!(
Level::TRACE,
"extract",
@@ -162,7 +168,7 @@ pub fn extract(language: Language, schema: &NodeTypeMap, path: &Path) -> std::io
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let source = std::fs::read(&path)?;
parser.set_included_ranges(&ranges).unwrap();
let tree = parser.parse(&source, None).expect("Failed to parse file");
let mut trap_writer = new_trap_writer();
trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display()));

View File

@@ -8,6 +8,7 @@ use rayon::prelude::*;
use std::fs;
use std::io::{BufRead, BufWriter, Write};
use std::path::{Path, PathBuf};
use tree_sitter::{Language, Parser, Range};
enum TrapCompression {
None,
@@ -126,6 +127,7 @@ fn main() -> std::io::Result<()> {
let file_list = fs::File::open(file_list)?;
let language = tree_sitter_ruby::language();
let erb = tree_sitter_embedded_template::language();
let schema = node_types::read_node_types_str(tree_sitter_ruby::NODE_TYPES)?;
let lines: std::io::Result<Vec<String>> = std::io::BufReader::new(file_list).lines().collect();
let lines = lines?;
@@ -133,16 +135,28 @@ fn main() -> std::io::Result<()> {
let path = PathBuf::from(line).canonicalize()?;
let trap_file = path_for(&trap_dir, &path, trap_compression.extension());
let src_archive_file = path_for(&src_archive_dir, &path, "");
let trap = extractor::extract(language, &schema, &path)?;
let mut source = std::fs::read(&path)?;
let code_ranges;
if path.extension().map_or(false, |x| x == "erb") {
tracing::info!("scanning: {}", path.display());
let (ranges, line_breaks) = scan_erb(erb, &source);
for i in line_breaks {
if i < source.len() {
source[i] = b'\n';
}
}
code_ranges = ranges;
} else {
code_ranges = vec![];
}
let trap = extractor::extract(language, &schema, &path, &source, &code_ranges)?;
std::fs::create_dir_all(&src_archive_file.parent().unwrap())?;
std::fs::copy(&path, &src_archive_file)?;
std::fs::create_dir_all(&trap_file.parent().unwrap())?;
let trap_file = std::fs::File::create(&trap_file)?;
let mut trap_file = BufWriter::new(trap_file);
match trap_compression {
TrapCompression::None => {
write!(trap_file, "{}", trap)
}
TrapCompression::None => write!(trap_file, "{}", trap),
TrapCompression::Gzip => {
let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast());
write!(compressed_writer, "{}", trap)
@@ -151,6 +165,42 @@ fn main() -> std::io::Result<()> {
})
}
fn scan_erb(erb: Language, source: &std::vec::Vec<u8>) -> (Vec<Range>, Vec<usize>) {
let mut parser = Parser::new();
parser.set_language(erb).unwrap();
let tree = parser.parse(&source, None).expect("Failed to parse file");
let mut result = Vec::new();
let mut line_breaks = vec![];
for n in tree.root_node().children(&mut tree.walk()) {
let kind = n.kind();
if kind == "directive" || kind == "output_directive" {
for c in n.children(&mut tree.walk()) {
if c.kind() == "code" {
let mut range = c.range();
if range.end_byte < source.len() {
line_breaks.push(range.end_byte);
range.end_byte += 1;
range.end_point.column += 1;
}
result.push(range);
}
}
}
}
if result.len() == 0 {
let root = tree.root_node();
// Add an empty range at the end of the file
result.push(Range {
start_byte: root.end_byte(),
end_byte: root.end_byte(),
start_point: root.end_position(),
end_point: root.end_position(),
});
}
(result, line_breaks)
}
fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf {
let mut result = PathBuf::from(dir);
for component in path.components() {

View File

@@ -2,6 +2,7 @@
type NUL && "%CODEQL_DIST%\codeql.exe" database index-files ^
--include-extension=.rb ^
--include-extension=.erb ^
--size-limit=5m ^
--language=ruby ^
"%CODEQL_EXTRACTOR_RUBY_WIP_DATABASE%"

View File

@@ -4,6 +4,7 @@ set -eu
exec "${CODEQL_DIST}/codeql" database index-files \
--include-extension=.rb \
--include-extension=.erb \
--size-limit=5m \
--language=ruby \
--working-dir=.\