diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4ddc19183b8..e9fc6349e87 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -203,7 +203,7 @@ jobs: shell: bash run: | echo "import ruby select count(File f)" > "test.ql" - echo "| 3 |" > "test.expected" + echo "| 4 |" > "test.expected" echo 'name: sample-tests version: 0.0.0 dependencies: diff --git a/extractor/src/extractor.rs b/extractor/src/extractor.rs index 933854af217..bdfc31df688 100644 --- a/extractor/src/extractor.rs +++ b/extractor/src/extractor.rs @@ -75,6 +75,22 @@ impl TrapWriter { file_label } + fn populate_empty_file(&mut self) -> Label { + let (file_label, fresh) = self.global_id("empty;sourcefile"); + if fresh { + self.add_tuple( + "files", + vec![Arg::Label(file_label), Arg::String("".to_string())], + ); + } + file_label + } + + pub fn populate_empty_location(&mut self) { + let file_label = self.populate_empty_file(); + self.location(file_label, 0, 0, 0, 0); + } + fn populate_parent_folders(&mut self, child_label: Label, path: Option<&Path>) { let mut path = path; let mut child_label = child_label; diff --git a/extractor/src/main.rs b/extractor/src/main.rs index 47cde58584d..5e9fe618726 100644 --- a/extractor/src/main.rs +++ b/extractor/src/main.rs @@ -129,57 +129,74 @@ fn main() -> std::io::Result<()> { node_types::read_node_types_str("erb", tree_sitter_embedded_template::NODE_TYPES)?; let lines: std::io::Result> = std::io::BufReader::new(file_list).lines().collect(); let lines = lines?; - lines.par_iter().try_for_each(|line| { - let path = PathBuf::from(line).canonicalize()?; - let trap_file = path_for(&trap_dir, &path, trap_compression.extension()); - let src_archive_file = path_for(&src_archive_dir, &path, ""); - let mut source = std::fs::read(&path)?; - let code_ranges; - let mut trap_writer = extractor::new_trap_writer(); - if path.extension().map_or(false, |x| x == "erb") { - tracing::info!("scanning: {}", path.display()); + lines + .par_iter() + .try_for_each(|line| { + let path = PathBuf::from(line).canonicalize()?; + let src_archive_file = path_for(&src_archive_dir, &path, ""); + let mut source = std::fs::read(&path)?; + let code_ranges; + let mut trap_writer = extractor::new_trap_writer(); + if path.extension().map_or(false, |x| x == "erb") { + tracing::info!("scanning: {}", path.display()); + extractor::extract( + erb, + "erb", + &erb_schema, + &mut trap_writer, + &path, + &source, + &[], + )?; + + let (ranges, line_breaks) = scan_erb(erb, &source); + for i in line_breaks { + if i < source.len() { + source[i] = b'\n'; + } + } + code_ranges = ranges; + } else { + code_ranges = vec![]; + } extractor::extract( - erb, - "erb", - &erb_schema, + language, + "ruby", + &schema, &mut trap_writer, &path, &source, - &[], + &code_ranges, )?; + std::fs::create_dir_all(&src_archive_file.parent().unwrap())?; + std::fs::copy(&path, &src_archive_file)?; + write_trap(&trap_dir, path, trap_writer, &trap_compression) + }) + .expect("failed to extract files"); - let (ranges, line_breaks) = scan_erb(erb, &source); - for i in line_breaks { - if i < source.len() { - source[i] = b'\n'; - } - } - code_ranges = ranges; - } else { - code_ranges = vec![]; + let path = PathBuf::from("extras"); + let mut trap_writer = extractor::new_trap_writer(); + &trap_writer.populate_empty_location(); + write_trap(&trap_dir, path, trap_writer, &trap_compression) +} + +fn write_trap( + trap_dir: &PathBuf, + path: PathBuf, + trap_writer: extractor::TrapWriter, + trap_compression: &TrapCompression, +) -> std::io::Result<()> { + let trap_file = path_for(&trap_dir, &path, trap_compression.extension()); + std::fs::create_dir_all(&trap_file.parent().unwrap())?; + let trap_file = std::fs::File::create(&trap_file)?; + let mut trap_file = BufWriter::new(trap_file); + match trap_compression { + TrapCompression::None => trap_writer.output(&mut trap_file), + TrapCompression::Gzip => { + let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast()); + trap_writer.output(&mut compressed_writer) } - extractor::extract( - language, - "ruby", - &schema, - &mut trap_writer, - &path, - &source, - &code_ranges, - )?; - std::fs::create_dir_all(&src_archive_file.parent().unwrap())?; - std::fs::copy(&path, &src_archive_file)?; - std::fs::create_dir_all(&trap_file.parent().unwrap())?; - let trap_file = std::fs::File::create(&trap_file)?; - let mut trap_file = BufWriter::new(trap_file); - match trap_compression { - TrapCompression::None => trap_writer.output(&mut trap_file), - TrapCompression::Gzip => { - let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast()); - trap_writer.output(&mut compressed_writer) - } - } - }) + } } fn scan_erb(erb: Language, source: &std::vec::Vec) -> (Vec, Vec) {