Merge pull request #24 from github/gzip

Add buffered writing and gzip compression for trap files
This commit is contained in:
Arthur Baars
2020-11-03 13:45:19 +01:00
committed by GitHub
3 changed files with 100 additions and 8 deletions

50
Cargo.lock generated
View File

@@ -1,5 +1,11 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "adler"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e"
[[package]]
name = "aho-corasick"
version = "0.7.14"
@@ -68,6 +74,12 @@ version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.19"
@@ -96,6 +108,27 @@ dependencies = [
"vec_map",
]
[[package]]
name = "crc32fast"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a"
dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "flate2"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da80be589a72651dcda34d8b35bcdc9b7254ad06325611074d9cc0fbb19f60ee"
dependencies = [
"cfg-if 0.1.10",
"crc32fast",
"libc",
"miniz_oxide",
]
[[package]]
name = "generator"
version = "0.6.23"
@@ -142,7 +175,7 @@ version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
dependencies = [
"cfg-if",
"cfg-if 0.1.10",
]
[[package]]
@@ -151,7 +184,7 @@ version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0e8460f2f2121162705187214720353c517b97bdfb3494c0b1e33d83ebe4bed"
dependencies = [
"cfg-if",
"cfg-if 0.1.10",
"generator",
"scoped-tls",
"serde",
@@ -173,6 +206,16 @@ version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
[[package]]
name = "miniz_oxide"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f2d26ec3309788e423cfbf68ad1800f061638098d76a83681af979dc4eda19d"
dependencies = [
"adler",
"autocfg",
]
[[package]]
name = "node-types"
version = "0.1.0"
@@ -257,6 +300,7 @@ name = "ruby-extractor"
version = "0.1.0"
dependencies = [
"clap",
"flate2",
"node-types",
"tracing",
"tracing-subscriber",
@@ -409,7 +453,7 @@ version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0987850db3733619253fe60e17cb59b82d37c7e6c0236bb81e4d6b87c879f27"
dependencies = [
"cfg-if",
"cfg-if 0.1.10",
"pin-project-lite",
"tracing-attributes",
"tracing-core",

View File

@@ -7,6 +7,7 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
flate2 = "1.0"
node-types = { path = "../node-types" }
tree-sitter = "0.17.0"
tree-sitter-ruby = "0.16"

View File

@@ -1,10 +1,47 @@
mod extractor;
use clap;
use flate2::write::GzEncoder;
use std::fs;
use std::io::BufRead;
use std::io::{BufRead, BufWriter, Write};
use std::path::{Path, PathBuf};
enum TrapCompression {
None,
Gzip,
}
impl TrapCompression {
fn from_env() -> TrapCompression {
match std::env::var("CODEQL_RUBY_TRAP_COMPRESSION") {
Ok(method) => match TrapCompression::from_string(&method) {
Some(c) => c,
None => {
tracing::error!("Unknown compression method '{}'; using gzip.", &method);
TrapCompression::Gzip
}
},
// Default compression method if the env var isn't set:
Err(_) => TrapCompression::Gzip,
}
}
fn from_string(s: &str) -> Option<TrapCompression> {
match s.to_lowercase().as_ref() {
"none" => Some(TrapCompression::None),
"gzip" => Some(TrapCompression::Gzip),
_ => None,
}
}
fn extension(&self) -> &str {
match self {
TrapCompression::None => ".trap",
TrapCompression::Gzip => ".trap.gz",
}
}
}
fn main() -> std::io::Result<()> {
tracing_subscriber::fmt()
.with_target(false)
@@ -32,6 +69,7 @@ fn main() -> std::io::Result<()> {
.value_of("output-dir")
.expect("missing --output-dir");
let trap_dir = PathBuf::from(trap_dir);
let trap_compression = TrapCompression::from_env();
let file_list = matches.value_of("file-list").expect("missing --file-list");
let file_list = fs::File::open(file_list)?;
@@ -41,18 +79,27 @@ fn main() -> std::io::Result<()> {
let mut extractor = extractor::create(language, schema);
for line in std::io::BufReader::new(file_list).lines() {
let path = PathBuf::from(line?).canonicalize()?;
let trap_file = path_for(&trap_dir, &path, ".trap");
let trap_file = path_for(&trap_dir, &path, trap_compression.extension());
let src_archive_file = path_for(&src_archive_dir, &path, "");
let trap = extractor.extract(&path)?;
std::fs::create_dir_all(&src_archive_file.parent().unwrap())?;
std::fs::copy(&path, &src_archive_file)?;
std::fs::create_dir_all(&trap_file.parent().unwrap())?;
let mut trap_file = std::fs::File::create(&trap_file)?;
let trap_file: &mut dyn std::io::Write = &mut trap_file;
write!(trap_file, "{}", trap)?;
let trap_file = std::fs::File::create(&trap_file)?;
let mut trap_file = BufWriter::new(trap_file);
match trap_compression {
TrapCompression::None => {
write!(trap_file, "{}", trap)?;
}
TrapCompression::Gzip => {
let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast());
write!(compressed_writer, "{}", trap)?;
}
}
}
return Ok(());
}
fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf {
let mut result = PathBuf::from(dir);
for component in path.components() {