diff --git a/Cargo.lock b/Cargo.lock index 61c02ba2aa2..d84c0336ea5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,11 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "adler" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" + [[package]] name = "aho-corasick" version = "0.7.14" @@ -68,6 +74,12 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "chrono" version = "0.4.19" @@ -96,6 +108,27 @@ dependencies = [ "vec_map", ] +[[package]] +name = "crc32fast" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "flate2" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da80be589a72651dcda34d8b35bcdc9b7254ad06325611074d9cc0fbb19f60ee" +dependencies = [ + "cfg-if 0.1.10", + "crc32fast", + "libc", + "miniz_oxide", +] + [[package]] name = "generator" version = "0.6.23" @@ -142,7 +175,7 @@ version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b" dependencies = [ - "cfg-if", + "cfg-if 0.1.10", ] [[package]] @@ -151,7 +184,7 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a0e8460f2f2121162705187214720353c517b97bdfb3494c0b1e33d83ebe4bed" dependencies = [ - "cfg-if", + "cfg-if 0.1.10", "generator", "scoped-tls", "serde", @@ -173,6 +206,16 @@ version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" +[[package]] +name = "miniz_oxide" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f2d26ec3309788e423cfbf68ad1800f061638098d76a83681af979dc4eda19d" +dependencies = [ + "adler", + "autocfg", +] + [[package]] name = "node-types" version = "0.1.0" @@ -257,6 +300,7 @@ name = "ruby-extractor" version = "0.1.0" dependencies = [ "clap", + "flate2", "node-types", "tracing", "tracing-subscriber", @@ -409,7 +453,7 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0987850db3733619253fe60e17cb59b82d37c7e6c0236bb81e4d6b87c879f27" dependencies = [ - "cfg-if", + "cfg-if 0.1.10", "pin-project-lite", "tracing-attributes", "tracing-core", diff --git a/extractor/Cargo.toml b/extractor/Cargo.toml index cb9c4f54ddf..6aabc942542 100644 --- a/extractor/Cargo.toml +++ b/extractor/Cargo.toml @@ -7,6 +7,7 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +flate2 = "1.0" node-types = { path = "../node-types" } tree-sitter = "0.17.0" tree-sitter-ruby = "0.16" diff --git a/extractor/src/main.rs b/extractor/src/main.rs index 4dd1aa1e9fc..ca9747fb673 100644 --- a/extractor/src/main.rs +++ b/extractor/src/main.rs @@ -1,10 +1,47 @@ mod extractor; use clap; +use flate2::write::GzEncoder; use std::fs; -use std::io::BufRead; +use std::io::{BufRead, BufWriter, Write}; use std::path::{Path, PathBuf}; +enum TrapCompression { + None, + Gzip, +} + +impl TrapCompression { + fn from_env() -> TrapCompression { + match std::env::var("CODEQL_RUBY_TRAP_COMPRESSION") { + Ok(method) => match TrapCompression::from_string(&method) { + Some(c) => c, + None => { + tracing::error!("Unknown compression method '{}'; using gzip.", &method); + TrapCompression::Gzip + } + }, + // Default compression method if the env var isn't set: + Err(_) => TrapCompression::Gzip, + } + } + + fn from_string(s: &str) -> Option { + match s.to_lowercase().as_ref() { + "none" => Some(TrapCompression::None), + "gzip" => Some(TrapCompression::Gzip), + _ => None, + } + } + + fn extension(&self) -> &str { + match self { + TrapCompression::None => ".trap", + TrapCompression::Gzip => ".trap.gz", + } + } +} + fn main() -> std::io::Result<()> { tracing_subscriber::fmt() .with_target(false) @@ -32,6 +69,7 @@ fn main() -> std::io::Result<()> { .value_of("output-dir") .expect("missing --output-dir"); let trap_dir = PathBuf::from(trap_dir); + let trap_compression = TrapCompression::from_env(); let file_list = matches.value_of("file-list").expect("missing --file-list"); let file_list = fs::File::open(file_list)?; @@ -41,18 +79,27 @@ fn main() -> std::io::Result<()> { let mut extractor = extractor::create(language, schema); for line in std::io::BufReader::new(file_list).lines() { let path = PathBuf::from(line?).canonicalize()?; - let trap_file = path_for(&trap_dir, &path, ".trap"); + let trap_file = path_for(&trap_dir, &path, trap_compression.extension()); let src_archive_file = path_for(&src_archive_dir, &path, ""); let trap = extractor.extract(&path)?; std::fs::create_dir_all(&src_archive_file.parent().unwrap())?; std::fs::copy(&path, &src_archive_file)?; std::fs::create_dir_all(&trap_file.parent().unwrap())?; - let mut trap_file = std::fs::File::create(&trap_file)?; - let trap_file: &mut dyn std::io::Write = &mut trap_file; - write!(trap_file, "{}", trap)?; + let trap_file = std::fs::File::create(&trap_file)?; + let mut trap_file = BufWriter::new(trap_file); + match trap_compression { + TrapCompression::None => { + write!(trap_file, "{}", trap)?; + } + TrapCompression::Gzip => { + let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast()); + write!(compressed_writer, "{}", trap)?; + } + } } return Ok(()); } + fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf { let mut result = PathBuf::from(dir); for component in path.components() {