From 41a53ec109999f4da1e4b6de47875f4fafcf29eb Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Mon, 13 Mar 2023 11:50:03 +0100 Subject: [PATCH] Address comments --- ruby/extractor/src/extractor.rs | 59 +++----------- ruby/extractor/src/file_paths.rs | 135 +++++++++++++++++++++++++++++++ ruby/extractor/src/main.rs | 111 ++----------------------- 3 files changed, 154 insertions(+), 151 deletions(-) create mode 100644 ruby/extractor/src/file_paths.rs diff --git a/ruby/extractor/src/extractor.rs b/ruby/extractor/src/extractor.rs index b891c003151..36337d6a43f 100644 --- a/ruby/extractor/src/extractor.rs +++ b/ruby/extractor/src/extractor.rs @@ -1,4 +1,5 @@ use crate::diagnostics; +use crate::file_paths; use crate::trap; use node_types::{EntryKind, Field, NodeTypeMap, Storage, TypeName}; use std::collections::BTreeMap as Map; @@ -9,14 +10,15 @@ use std::path::Path; use tree_sitter::{Language, Node, Parser, Range, Tree}; pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label { - let (file_label, fresh) = - writer.global_id(&trap::full_id_for_file(&normalize_path(absolute_path))); + let (file_label, fresh) = writer.global_id(&trap::full_id_for_file( + &file_paths::normalize_path(absolute_path), + )); if fresh { writer.add_tuple( "files", vec![ trap::Arg::Label(file_label), - trap::Arg::String(normalize_path(absolute_path)), + trap::Arg::String(file_paths::normalize_path(absolute_path)), ], ); populate_parent_folders(writer, file_label, absolute_path.parent()); @@ -54,8 +56,9 @@ pub fn populate_parent_folders( match path { None => break, Some(folder) => { - let (folder_label, fresh) = - writer.global_id(&trap::full_id_for_folder(&normalize_path(folder))); + let (folder_label, fresh) = writer.global_id(&trap::full_id_for_folder( + &file_paths::normalize_path(folder), + )); writer.add_tuple( "containerparent", vec![ @@ -68,7 +71,7 @@ pub fn populate_parent_folders( "folders", vec![ trap::Arg::Label(folder_label), - trap::Arg::String(normalize_path(folder)), + trap::Arg::String(file_paths::normalize_path(folder)), ], ); path = folder.parent(); @@ -119,8 +122,8 @@ pub fn extract( path: &Path, source: &[u8], ranges: &[Range], -) -> () { - let path_str = normalize_path(&path); +) { + let path_str = file_paths::normalize_path(&path); let span = tracing::span!( tracing::Level::TRACE, "extract", @@ -150,46 +153,6 @@ pub fn extract( traverse(&tree, &mut visitor); parser.reset(); - () -} - -/// Normalizes the path according the common CodeQL specification. Assumes that -/// `path` has already been canonicalized using `std::fs::canonicalize`. -pub fn normalize_path(path: &Path) -> String { - if cfg!(windows) { - // The way Rust canonicalizes paths doesn't match the CodeQL spec, so we - // have to do a bit of work removing certain prefixes and replacing - // backslashes. - let mut components: Vec = Vec::new(); - for component in path.components() { - match component { - std::path::Component::Prefix(prefix) => match prefix.kind() { - std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => { - components.push(format!("{}:", letter as char)); - } - std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => { - components.push(x.to_string_lossy().to_string()); - } - std::path::Prefix::UNC(server, share) - | std::path::Prefix::VerbatimUNC(server, share) => { - components.push(server.to_string_lossy().to_string()); - components.push(share.to_string_lossy().to_string()); - } - }, - std::path::Component::Normal(n) => { - components.push(n.to_string_lossy().to_string()); - } - std::path::Component::RootDir => {} - std::path::Component::CurDir => {} - std::path::Component::ParentDir => {} - } - } - components.join("/") - } else { - // For other operating systems, we can use the canonicalized path - // without modifications. - format!("{}", path.display()) - } } struct ChildNode { diff --git a/ruby/extractor/src/file_paths.rs b/ruby/extractor/src/file_paths.rs new file mode 100644 index 00000000000..aa27bbec552 --- /dev/null +++ b/ruby/extractor/src/file_paths.rs @@ -0,0 +1,135 @@ +use std::path::{Path, PathBuf}; + +/// Normalizes the path according the common CodeQL specification. Assumes that +/// `path` has already been canonicalized using `std::fs::canonicalize`. +pub fn normalize_path(path: &Path) -> String { + if cfg!(windows) { + // The way Rust canonicalizes paths doesn't match the CodeQL spec, so we + // have to do a bit of work removing certain prefixes and replacing + // backslashes. + let mut components: Vec = Vec::new(); + for component in path.components() { + match component { + std::path::Component::Prefix(prefix) => match prefix.kind() { + std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => { + components.push(format!("{}:", letter as char)); + } + std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => { + components.push(x.to_string_lossy().to_string()); + } + std::path::Prefix::UNC(server, share) + | std::path::Prefix::VerbatimUNC(server, share) => { + components.push(server.to_string_lossy().to_string()); + components.push(share.to_string_lossy().to_string()); + } + }, + std::path::Component::Normal(n) => { + components.push(n.to_string_lossy().to_string()); + } + std::path::Component::RootDir => {} + std::path::Component::CurDir => {} + std::path::Component::ParentDir => {} + } + } + components.join("/") + } else { + // For other operating systems, we can use the canonicalized path + // without modifications. + format!("{}", path.display()) + } +} + +/// Convert a user-supplied path to an absolute path, and convert it to a verbatim path on Windows. +pub fn path_from_string(path: &str) -> PathBuf { + let mut path = PathBuf::from(path); + // make path absolute + if path.is_relative() { + path = std::env::current_dir().unwrap().join(path) + }; + let mut components = path.components(); + + // make Windows paths verbatim (with `\\?\` prefixes) which allow for extended-length paths. + let mut result = match components.next() { + None => unreachable!("empty path"), + + Some(component) => match component { + std::path::Component::Prefix(prefix) => match prefix.kind() { + std::path::Prefix::Disk(drive) => { + let root = format!(r"\\?\{}:\", drive as char); + PathBuf::from(root) + } + std::path::Prefix::UNC(server, share) => { + let mut root = std::ffi::OsString::from(r"\\?\UNC\"); + root.push(server); + root.push(r"\"); + root.push(share); + PathBuf::from(root) + } + std::path::Prefix::Verbatim(_) + | std::path::Prefix::VerbatimUNC(_, _) + | std::path::Prefix::VerbatimDisk(_) + | std::path::Prefix::DeviceNS(_) => Path::new(&component).to_path_buf(), + }, + _ => Path::new(&component).to_path_buf(), + }, + }; + // remove `.` and `..` components + for component in components { + match component { + std::path::Component::CurDir => continue, + std::path::Component::ParentDir => { + result.pop(); + } + _ => result.push(component), + } + } + result +} + +pub fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf { + let mut result = PathBuf::from(dir); + for component in path.components() { + match component { + std::path::Component::Prefix(prefix) => match prefix.kind() { + std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => { + result.push(format!("{}_", letter as char)) + } + std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => { + result.push(x); + } + std::path::Prefix::UNC(server, share) + | std::path::Prefix::VerbatimUNC(server, share) => { + result.push("unc"); + result.push(server); + result.push(share); + } + }, + std::path::Component::RootDir => { + // skip + } + std::path::Component::Normal(_) => { + result.push(component); + } + std::path::Component::CurDir => { + // skip + } + std::path::Component::ParentDir => { + result.pop(); + } + } + } + if !ext.is_empty() { + match result.extension() { + Some(x) => { + let mut new_ext = x.to_os_string(); + new_ext.push("."); + new_ext.push(ext); + result.set_extension(new_ext); + } + None => { + result.set_extension(ext); + } + } + } + result +} diff --git a/ruby/extractor/src/main.rs b/ruby/extractor/src/main.rs index 856fcd1fe7e..3268cb5726c 100644 --- a/ruby/extractor/src/main.rs +++ b/ruby/extractor/src/main.rs @@ -1,5 +1,6 @@ mod diagnostics; mod extractor; +mod file_paths; mod trap; #[macro_use] @@ -56,53 +57,6 @@ fn encoding_from_name(encoding_name: &str) -> Option<&(dyn encoding::Encoding + } } -/// Convert a user-supplied path to an absolute path, and convert it to a verbatim path on Windows. -fn path_from_string(path: &str) -> PathBuf { - let mut path = PathBuf::from(path); - // make path absolute - if path.is_relative() { - path = std::env::current_dir().unwrap().join(path) - }; - let mut components = path.components(); - - // make Windows paths verbatim (with `\\?\` prefixes) which allow for extended-length paths. - let mut result = match components.next() { - None => unimplemented!("empty path"), - - Some(component) => match component { - std::path::Component::Prefix(prefix) => match prefix.kind() { - std::path::Prefix::Disk(drive) => { - let root = format!(r"\\?\{}:\", drive as char); - PathBuf::from(root) - } - std::path::Prefix::UNC(server, share) => { - let mut root = std::ffi::OsString::from(r"\\?\UNC\"); - root.push(server); - root.push(r"\"); - root.push(share); - PathBuf::from(root) - } - std::path::Prefix::Verbatim(_) - | std::path::Prefix::VerbatimUNC(_, _) - | std::path::Prefix::VerbatimDisk(_) - | std::path::Prefix::DeviceNS(_) => Path::new(&component).to_path_buf(), - }, - _ => Path::new(&component).to_path_buf(), - }, - }; - // remove `.` and `..` components - for component in components { - match component { - std::path::Component::CurDir => continue, - std::path::Component::ParentDir => { - result.pop(); - } - _ => result.push(component), - } - } - result -} - fn main() -> std::io::Result<()> { tracing_subscriber::fmt() .with_target(false) @@ -169,15 +123,15 @@ fn main() -> std::io::Result<()> { let src_archive_dir = matches .value_of("source-archive-dir") .expect("missing --source-archive-dir"); - let src_archive_dir = path_from_string(src_archive_dir); + let src_archive_dir = file_paths::path_from_string(src_archive_dir); let trap_dir = matches .value_of("output-dir") .expect("missing --output-dir"); - let trap_dir = path_from_string(trap_dir); + let trap_dir = file_paths::path_from_string(trap_dir); let file_list = matches.value_of("file-list").expect("missing --file-list"); - let file_list = fs::File::open(path_from_string(file_list))?; + let file_list = fs::File::open(file_paths::path_from_string(file_list))?; let language = tree_sitter_ruby::language(); let erb = tree_sitter_embedded_template::language(); @@ -195,7 +149,7 @@ fn main() -> std::io::Result<()> { .try_for_each(|line| { let mut diagnostics_writer = diagnostics.logger(); let path = PathBuf::from(line).canonicalize()?; - let src_archive_file = path_for(&src_archive_dir, &path, ""); + let src_archive_file = file_paths::path_for(&src_archive_dir, &path, ""); let mut source = std::fs::read(&path)?; let mut needs_conversion = false; let code_ranges; @@ -252,7 +206,7 @@ fn main() -> std::io::Result<()> { "character-decoding-error", "Character decoding error", ) - .file(&extractor::normalize_path(&path)) + .file(&file_paths::normalize_path(&path)) .message( "Could not decode the file contents as {}: {}. The contents of the file must match the character encoding specified in the {} {}.", &[ @@ -272,7 +226,7 @@ fn main() -> std::io::Result<()> { diagnostics_writer.write( diagnostics_writer .new_entry("unknown-character-encoding", "Unknown character encoding") - .file(&extractor::normalize_path(&path)) + .file(&file_paths::normalize_path(&path)) .message( "Unknown character encoding {} in {} {}.", &[ @@ -321,7 +275,7 @@ fn write_trap( trap_writer: &trap::Writer, trap_compression: trap::Compression, ) -> std::io::Result<()> { - let trap_file = path_for(trap_dir, &path, trap_compression.extension()); + let trap_file = file_paths::path_for(trap_dir, &path, trap_compression.extension()); std::fs::create_dir_all(&trap_file.parent().unwrap())?; trap_writer.write_to_file(&trap_file, trap_compression) } @@ -368,54 +322,6 @@ fn scan_erb( (result, line_breaks) } -fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf { - let mut result = PathBuf::from(dir); - for component in path.components() { - match component { - std::path::Component::Prefix(prefix) => match prefix.kind() { - std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => { - result.push(format!("{}_", letter as char)) - } - std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => { - result.push(x); - } - std::path::Prefix::UNC(server, share) - | std::path::Prefix::VerbatimUNC(server, share) => { - result.push("unc"); - result.push(server); - result.push(share); - } - }, - std::path::Component::RootDir => { - // skip - } - std::path::Component::Normal(_) => { - result.push(component); - } - std::path::Component::CurDir => { - // skip - } - std::path::Component::ParentDir => { - result.pop(); - } - } - } - if !ext.is_empty() { - match result.extension() { - Some(x) => { - let mut new_ext = x.to_os_string(); - new_ext.push("."); - new_ext.push(ext); - result.set_extension(new_ext); - } - None => { - result.set_extension(ext); - } - } - } - result -} - fn skip_space(content: &[u8], index: usize) -> usize { let mut index = index; while index < content.len() { @@ -429,7 +335,6 @@ fn skip_space(content: &[u8], index: usize) -> usize { } index } - fn scan_coding_comment(content: &[u8]) -> std::option::Option> { let mut index = 0; // skip UTF-8 BOM marker if there is one