Address comments

This commit is contained in:
Arthur Baars
2023-03-13 11:50:03 +01:00
parent c67bfff33b
commit 41a53ec109
3 changed files with 154 additions and 151 deletions

View File

@@ -1,4 +1,5 @@
use crate::diagnostics;
use crate::file_paths;
use crate::trap;
use node_types::{EntryKind, Field, NodeTypeMap, Storage, TypeName};
use std::collections::BTreeMap as Map;
@@ -9,14 +10,15 @@ use std::path::Path;
use tree_sitter::{Language, Node, Parser, Range, Tree};
pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label {
let (file_label, fresh) =
writer.global_id(&trap::full_id_for_file(&normalize_path(absolute_path)));
let (file_label, fresh) = writer.global_id(&trap::full_id_for_file(
&file_paths::normalize_path(absolute_path),
));
if fresh {
writer.add_tuple(
"files",
vec![
trap::Arg::Label(file_label),
trap::Arg::String(normalize_path(absolute_path)),
trap::Arg::String(file_paths::normalize_path(absolute_path)),
],
);
populate_parent_folders(writer, file_label, absolute_path.parent());
@@ -54,8 +56,9 @@ pub fn populate_parent_folders(
match path {
None => break,
Some(folder) => {
let (folder_label, fresh) =
writer.global_id(&trap::full_id_for_folder(&normalize_path(folder)));
let (folder_label, fresh) = writer.global_id(&trap::full_id_for_folder(
&file_paths::normalize_path(folder),
));
writer.add_tuple(
"containerparent",
vec![
@@ -68,7 +71,7 @@ pub fn populate_parent_folders(
"folders",
vec![
trap::Arg::Label(folder_label),
trap::Arg::String(normalize_path(folder)),
trap::Arg::String(file_paths::normalize_path(folder)),
],
);
path = folder.parent();
@@ -119,8 +122,8 @@ pub fn extract(
path: &Path,
source: &[u8],
ranges: &[Range],
) -> () {
let path_str = normalize_path(&path);
) {
let path_str = file_paths::normalize_path(&path);
let span = tracing::span!(
tracing::Level::TRACE,
"extract",
@@ -150,46 +153,6 @@ pub fn extract(
traverse(&tree, &mut visitor);
parser.reset();
()
}
/// Normalizes the path according the common CodeQL specification. Assumes that
/// `path` has already been canonicalized using `std::fs::canonicalize`.
pub fn normalize_path(path: &Path) -> String {
if cfg!(windows) {
// The way Rust canonicalizes paths doesn't match the CodeQL spec, so we
// have to do a bit of work removing certain prefixes and replacing
// backslashes.
let mut components: Vec<String> = Vec::new();
for component in path.components() {
match component {
std::path::Component::Prefix(prefix) => match prefix.kind() {
std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => {
components.push(format!("{}:", letter as char));
}
std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => {
components.push(x.to_string_lossy().to_string());
}
std::path::Prefix::UNC(server, share)
| std::path::Prefix::VerbatimUNC(server, share) => {
components.push(server.to_string_lossy().to_string());
components.push(share.to_string_lossy().to_string());
}
},
std::path::Component::Normal(n) => {
components.push(n.to_string_lossy().to_string());
}
std::path::Component::RootDir => {}
std::path::Component::CurDir => {}
std::path::Component::ParentDir => {}
}
}
components.join("/")
} else {
// For other operating systems, we can use the canonicalized path
// without modifications.
format!("{}", path.display())
}
}
struct ChildNode {

View File

@@ -0,0 +1,135 @@
use std::path::{Path, PathBuf};
/// Normalizes the path according the common CodeQL specification. Assumes that
/// `path` has already been canonicalized using `std::fs::canonicalize`.
pub fn normalize_path(path: &Path) -> String {
if cfg!(windows) {
// The way Rust canonicalizes paths doesn't match the CodeQL spec, so we
// have to do a bit of work removing certain prefixes and replacing
// backslashes.
let mut components: Vec<String> = Vec::new();
for component in path.components() {
match component {
std::path::Component::Prefix(prefix) => match prefix.kind() {
std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => {
components.push(format!("{}:", letter as char));
}
std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => {
components.push(x.to_string_lossy().to_string());
}
std::path::Prefix::UNC(server, share)
| std::path::Prefix::VerbatimUNC(server, share) => {
components.push(server.to_string_lossy().to_string());
components.push(share.to_string_lossy().to_string());
}
},
std::path::Component::Normal(n) => {
components.push(n.to_string_lossy().to_string());
}
std::path::Component::RootDir => {}
std::path::Component::CurDir => {}
std::path::Component::ParentDir => {}
}
}
components.join("/")
} else {
// For other operating systems, we can use the canonicalized path
// without modifications.
format!("{}", path.display())
}
}
/// Convert a user-supplied path to an absolute path, and convert it to a verbatim path on Windows.
pub fn path_from_string(path: &str) -> PathBuf {
let mut path = PathBuf::from(path);
// make path absolute
if path.is_relative() {
path = std::env::current_dir().unwrap().join(path)
};
let mut components = path.components();
// make Windows paths verbatim (with `\\?\` prefixes) which allow for extended-length paths.
let mut result = match components.next() {
None => unreachable!("empty path"),
Some(component) => match component {
std::path::Component::Prefix(prefix) => match prefix.kind() {
std::path::Prefix::Disk(drive) => {
let root = format!(r"\\?\{}:\", drive as char);
PathBuf::from(root)
}
std::path::Prefix::UNC(server, share) => {
let mut root = std::ffi::OsString::from(r"\\?\UNC\");
root.push(server);
root.push(r"\");
root.push(share);
PathBuf::from(root)
}
std::path::Prefix::Verbatim(_)
| std::path::Prefix::VerbatimUNC(_, _)
| std::path::Prefix::VerbatimDisk(_)
| std::path::Prefix::DeviceNS(_) => Path::new(&component).to_path_buf(),
},
_ => Path::new(&component).to_path_buf(),
},
};
// remove `.` and `..` components
for component in components {
match component {
std::path::Component::CurDir => continue,
std::path::Component::ParentDir => {
result.pop();
}
_ => result.push(component),
}
}
result
}
pub fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf {
let mut result = PathBuf::from(dir);
for component in path.components() {
match component {
std::path::Component::Prefix(prefix) => match prefix.kind() {
std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => {
result.push(format!("{}_", letter as char))
}
std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => {
result.push(x);
}
std::path::Prefix::UNC(server, share)
| std::path::Prefix::VerbatimUNC(server, share) => {
result.push("unc");
result.push(server);
result.push(share);
}
},
std::path::Component::RootDir => {
// skip
}
std::path::Component::Normal(_) => {
result.push(component);
}
std::path::Component::CurDir => {
// skip
}
std::path::Component::ParentDir => {
result.pop();
}
}
}
if !ext.is_empty() {
match result.extension() {
Some(x) => {
let mut new_ext = x.to_os_string();
new_ext.push(".");
new_ext.push(ext);
result.set_extension(new_ext);
}
None => {
result.set_extension(ext);
}
}
}
result
}

View File

@@ -1,5 +1,6 @@
mod diagnostics;
mod extractor;
mod file_paths;
mod trap;
#[macro_use]
@@ -56,53 +57,6 @@ fn encoding_from_name(encoding_name: &str) -> Option<&(dyn encoding::Encoding +
}
}
/// Convert a user-supplied path to an absolute path, and convert it to a verbatim path on Windows.
fn path_from_string(path: &str) -> PathBuf {
let mut path = PathBuf::from(path);
// make path absolute
if path.is_relative() {
path = std::env::current_dir().unwrap().join(path)
};
let mut components = path.components();
// make Windows paths verbatim (with `\\?\` prefixes) which allow for extended-length paths.
let mut result = match components.next() {
None => unimplemented!("empty path"),
Some(component) => match component {
std::path::Component::Prefix(prefix) => match prefix.kind() {
std::path::Prefix::Disk(drive) => {
let root = format!(r"\\?\{}:\", drive as char);
PathBuf::from(root)
}
std::path::Prefix::UNC(server, share) => {
let mut root = std::ffi::OsString::from(r"\\?\UNC\");
root.push(server);
root.push(r"\");
root.push(share);
PathBuf::from(root)
}
std::path::Prefix::Verbatim(_)
| std::path::Prefix::VerbatimUNC(_, _)
| std::path::Prefix::VerbatimDisk(_)
| std::path::Prefix::DeviceNS(_) => Path::new(&component).to_path_buf(),
},
_ => Path::new(&component).to_path_buf(),
},
};
// remove `.` and `..` components
for component in components {
match component {
std::path::Component::CurDir => continue,
std::path::Component::ParentDir => {
result.pop();
}
_ => result.push(component),
}
}
result
}
fn main() -> std::io::Result<()> {
tracing_subscriber::fmt()
.with_target(false)
@@ -169,15 +123,15 @@ fn main() -> std::io::Result<()> {
let src_archive_dir = matches
.value_of("source-archive-dir")
.expect("missing --source-archive-dir");
let src_archive_dir = path_from_string(src_archive_dir);
let src_archive_dir = file_paths::path_from_string(src_archive_dir);
let trap_dir = matches
.value_of("output-dir")
.expect("missing --output-dir");
let trap_dir = path_from_string(trap_dir);
let trap_dir = file_paths::path_from_string(trap_dir);
let file_list = matches.value_of("file-list").expect("missing --file-list");
let file_list = fs::File::open(path_from_string(file_list))?;
let file_list = fs::File::open(file_paths::path_from_string(file_list))?;
let language = tree_sitter_ruby::language();
let erb = tree_sitter_embedded_template::language();
@@ -195,7 +149,7 @@ fn main() -> std::io::Result<()> {
.try_for_each(|line| {
let mut diagnostics_writer = diagnostics.logger();
let path = PathBuf::from(line).canonicalize()?;
let src_archive_file = path_for(&src_archive_dir, &path, "");
let src_archive_file = file_paths::path_for(&src_archive_dir, &path, "");
let mut source = std::fs::read(&path)?;
let mut needs_conversion = false;
let code_ranges;
@@ -252,7 +206,7 @@ fn main() -> std::io::Result<()> {
"character-decoding-error",
"Character decoding error",
)
.file(&extractor::normalize_path(&path))
.file(&file_paths::normalize_path(&path))
.message(
"Could not decode the file contents as {}: {}. The contents of the file must match the character encoding specified in the {} {}.",
&[
@@ -272,7 +226,7 @@ fn main() -> std::io::Result<()> {
diagnostics_writer.write(
diagnostics_writer
.new_entry("unknown-character-encoding", "Unknown character encoding")
.file(&extractor::normalize_path(&path))
.file(&file_paths::normalize_path(&path))
.message(
"Unknown character encoding {} in {} {}.",
&[
@@ -321,7 +275,7 @@ fn write_trap(
trap_writer: &trap::Writer,
trap_compression: trap::Compression,
) -> std::io::Result<()> {
let trap_file = path_for(trap_dir, &path, trap_compression.extension());
let trap_file = file_paths::path_for(trap_dir, &path, trap_compression.extension());
std::fs::create_dir_all(&trap_file.parent().unwrap())?;
trap_writer.write_to_file(&trap_file, trap_compression)
}
@@ -368,54 +322,6 @@ fn scan_erb(
(result, line_breaks)
}
fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf {
let mut result = PathBuf::from(dir);
for component in path.components() {
match component {
std::path::Component::Prefix(prefix) => match prefix.kind() {
std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => {
result.push(format!("{}_", letter as char))
}
std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => {
result.push(x);
}
std::path::Prefix::UNC(server, share)
| std::path::Prefix::VerbatimUNC(server, share) => {
result.push("unc");
result.push(server);
result.push(share);
}
},
std::path::Component::RootDir => {
// skip
}
std::path::Component::Normal(_) => {
result.push(component);
}
std::path::Component::CurDir => {
// skip
}
std::path::Component::ParentDir => {
result.pop();
}
}
}
if !ext.is_empty() {
match result.extension() {
Some(x) => {
let mut new_ext = x.to_os_string();
new_ext.push(".");
new_ext.push(ext);
result.set_extension(new_ext);
}
None => {
result.set_extension(ext);
}
}
}
result
}
fn skip_space(content: &[u8], index: usize) -> usize {
let mut index = index;
while index < content.len() {
@@ -429,7 +335,6 @@ fn skip_space(content: &[u8], index: usize) -> usize {
}
index
}
fn scan_coding_comment(content: &[u8]) -> std::option::Option<Cow<str>> {
let mut index = 0;
// skip UTF-8 BOM marker if there is one