Ruby: add minimal path transformer support

Supports only a minimal subset of the project layout specification;
enough to work with the transformers produced by the CLI when building
an overlay database.
This commit is contained in:
Nick Rolfe
2025-06-06 11:36:27 +01:00
parent 1bd7c4f11c
commit 665df4baef
6 changed files with 145 additions and 31 deletions

View File

@@ -1,4 +1,5 @@
use clap::Args;
use codeql_extractor::file_paths::PathTransformer;
use lazy_static::lazy_static;
use rayon::prelude::*;
use serde_json;
@@ -81,6 +82,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
let file_list = fs::File::open(file_paths::path_from_string(&options.file_list))?;
let overlay_changed_files: Option<HashSet<PathBuf>> = get_overlay_changed_files();
let path_transformer = file_paths::load_path_transformer()?;
let language: Language = tree_sitter_ruby::LANGUAGE.into();
let erb: Language = tree_sitter_embedded_template::LANGUAGE.into();
@@ -105,7 +107,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
}
_ => {},
}
let src_archive_file = file_paths::path_for(&src_archive_dir, &path, "");
let src_archive_file = file_paths::path_for(&src_archive_dir, &path, "", path_transformer.as_ref());
let mut source = std::fs::read(&path)?;
let mut needs_conversion = false;
let code_ranges;
@@ -118,6 +120,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
&erb_schema,
&mut diagnostics_writer,
&mut trap_writer,
path_transformer.as_ref(),
&path,
&source,
&[],
@@ -162,7 +165,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
"character-decoding-error",
"Character decoding error",
)
.file(&file_paths::normalize_path(&path))
.file(&file_paths::normalize_and_transform_path(&path, path_transformer.as_ref()))
.message(
"Could not decode the file contents as {}: {}. The contents of the file must match the character encoding specified in the {} {}.",
&[
@@ -182,7 +185,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
diagnostics_writer.write(
diagnostics_writer
.new_entry("unknown-character-encoding", "Could not process some files due to an unknown character encoding")
.file(&file_paths::normalize_path(&path))
.file(&file_paths::normalize_and_transform_path(&path, path_transformer.as_ref()))
.message(
"Unknown character encoding {} in {} {}.",
&[
@@ -205,6 +208,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
&schema,
&mut diagnostics_writer,
&mut trap_writer,
path_transformer.as_ref(),
&path,
&source,
&code_ranges,
@@ -215,14 +219,20 @@ pub fn run(options: Options) -> std::io::Result<()> {
} else {
std::fs::copy(&path, &src_archive_file)?;
}
write_trap(&trap_dir, path, &trap_writer, trap_compression)
write_trap(&trap_dir, path, &trap_writer, trap_compression, path_transformer.as_ref())
})
.expect("failed to extract files");
let path = PathBuf::from("extras");
let mut trap_writer = trap::Writer::new();
extractor::populate_empty_location(&mut trap_writer);
let res = write_trap(&trap_dir, path, &trap_writer, trap_compression);
let res = write_trap(
&trap_dir,
path,
&trap_writer,
trap_compression,
path_transformer.as_ref(),
);
if let Ok(output_path) = std::env::var("CODEQL_EXTRACTOR_RUBY_OVERLAY_BASE_METADATA_OUT") {
// We're extracting an overlay base. For now, we don't have any metadata we need to store
// that would get read when extracting the overlay, but the CLI expects us to write
@@ -254,8 +264,14 @@ fn write_trap(
path: PathBuf,
trap_writer: &trap::Writer,
trap_compression: trap::Compression,
path_transformer: Option<&PathTransformer>,
) -> std::io::Result<()> {
let trap_file = file_paths::path_for(trap_dir, &path, trap_compression.extension());
let trap_file = file_paths::path_for(
trap_dir,
&path,
trap_compression.extension(),
path_transformer,
);
std::fs::create_dir_all(trap_file.parent().unwrap())?;
trap_writer.write_to_file(&trap_file, trap_compression)
}

View File

@@ -15,7 +15,7 @@ impl Archiver {
}
fn try_archive(&self, source: &Path) -> std::io::Result<()> {
let dest = file_paths::path_for(&self.root, source, "");
let dest = file_paths::path_for(&self.root, source, "", None);
if fs::metadata(&dest).is_ok() {
return Ok(());
}

View File

@@ -212,7 +212,7 @@ impl TrapFile {
);
}
pub fn emit_file(&mut self, absolute_path: &Path) -> Label<generated::File> {
let untyped = extractor::populate_file(&mut self.writer, absolute_path);
let untyped = extractor::populate_file(&mut self.writer, absolute_path, None);
// SAFETY: populate_file emits `@file` typed labels
unsafe { Label::from_untyped(untyped) }
}
@@ -268,6 +268,7 @@ impl TrapFileProvider {
&self.trap_dir.join(category),
key.as_ref(),
self.compression.extension(),
None,
);
debug!("creating trap file {}", path.display());
let mut writer = trap::Writer::new();

View File

@@ -67,19 +67,26 @@ pub fn default_subscriber_with_level(
),
)
}
pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label {
pub fn populate_file(
writer: &mut trap::Writer,
absolute_path: &Path,
transformer: Option<&file_paths::PathTransformer>,
) -> trap::Label {
let (file_label, fresh) = writer.global_id(&trap::full_id_for_file(
&file_paths::normalize_path(absolute_path),
&file_paths::normalize_and_transform_path(absolute_path, transformer),
));
if fresh {
writer.add_tuple(
"files",
vec![
trap::Arg::Label(file_label),
trap::Arg::String(file_paths::normalize_path(absolute_path)),
trap::Arg::String(file_paths::normalize_and_transform_path(
absolute_path,
transformer,
)),
],
);
populate_parent_folders(writer, file_label, absolute_path.parent());
populate_parent_folders(writer, file_label, absolute_path.parent(), transformer);
}
file_label
}
@@ -117,6 +124,7 @@ pub fn populate_parent_folders(
writer: &mut trap::Writer,
child_label: trap::Label,
path: Option<&Path>,
transformer: Option<&file_paths::PathTransformer>,
) {
let mut path = path;
let mut child_label = child_label;
@@ -124,9 +132,9 @@ pub fn populate_parent_folders(
match path {
None => break,
Some(folder) => {
let (folder_label, fresh) = writer.global_id(&trap::full_id_for_folder(
&file_paths::normalize_path(folder),
));
let parent = folder.parent();
let folder = file_paths::normalize_and_transform_path(folder, transformer);
let (folder_label, fresh) = writer.global_id(&trap::full_id_for_folder(&folder));
writer.add_tuple(
"containerparent",
vec![
@@ -137,12 +145,9 @@ pub fn populate_parent_folders(
if fresh {
writer.add_tuple(
"folders",
vec![
trap::Arg::Label(folder_label),
trap::Arg::String(file_paths::normalize_path(folder)),
],
vec![trap::Arg::Label(folder_label), trap::Arg::String(folder)],
);
path = folder.parent();
path = parent;
child_label = folder_label;
} else {
break;
@@ -205,11 +210,12 @@ pub fn extract(
schema: &NodeTypeMap,
diagnostics_writer: &mut diagnostics::LogWriter,
trap_writer: &mut trap::Writer,
transformer: Option<&file_paths::PathTransformer>,
path: &Path,
source: &[u8],
ranges: &[Range],
) {
let path_str = file_paths::normalize_path(path);
let path_str = file_paths::normalize_and_transform_path(path, transformer);
let span = tracing::span!(
tracing::Level::TRACE,
"extract",
@@ -225,7 +231,7 @@ pub fn extract(
parser.set_included_ranges(ranges).unwrap();
let tree = parser.parse(source, None).expect("Failed to parse file");
trap_writer.comment(format!("Auto-generated TRAP file for {}", path_str));
let file_label = populate_file(trap_writer, path);
let file_label = populate_file(trap_writer, path, transformer);
let mut visitor = Visitor::new(
source,
diagnostics_writer,

View File

@@ -1,4 +1,4 @@
use crate::trap;
use crate::{file_paths, trap};
use globset::{GlobBuilder, GlobSetBuilder};
use rayon::prelude::*;
use std::fs::File;
@@ -111,6 +111,8 @@ impl Extractor {
)
};
let path_transformer = file_paths::load_path_transformer()?;
let lines: std::io::Result<Vec<String>> = file_lists
.iter()
.flat_map(|file_list| std::io::BufReader::new(file_list).lines())
@@ -122,8 +124,12 @@ impl Extractor {
.try_for_each(|line| {
let mut diagnostics_writer = diagnostics.logger();
let path = PathBuf::from(line).canonicalize()?;
let src_archive_file =
crate::file_paths::path_for(&self.source_archive_dir, &path, "");
let src_archive_file = crate::file_paths::path_for(
&self.source_archive_dir,
&path,
"",
path_transformer.as_ref(),
);
let source = std::fs::read(&path)?;
let mut trap_writer = trap::Writer::new();
@@ -152,6 +158,7 @@ impl Extractor {
&schemas[i],
&mut diagnostics_writer,
&mut trap_writer,
None,
&path,
&source,
&[],
@@ -183,7 +190,7 @@ fn write_trap(
trap_writer: &trap::Writer,
trap_compression: trap::Compression,
) -> std::io::Result<()> {
let trap_file = crate::file_paths::path_for(trap_dir, path, trap_compression.extension());
let trap_file = crate::file_paths::path_for(trap_dir, path, trap_compression.extension(), None);
std::fs::create_dir_all(trap_file.parent().unwrap())?;
trap_writer.write_to_file(&trap_file, trap_compression)
}

View File

@@ -1,8 +1,81 @@
use std::path::{Path, PathBuf};
use std::{
fs,
path::{Path, PathBuf},
};
/// Normalizes the path according the common CodeQL specification. Assumes that
/// `path` has already been canonicalized using `std::fs::canonicalize`.
pub fn normalize_path(path: &Path) -> String {
/// This represents the minimum supported path transformation that is needed to support extracting
/// overlay databases. Specifically, it represents a transformer where one path prefix is replaced
/// with a different prefix.
pub struct PathTransformer {
pub original: String,
pub replacement: String,
}
/// Normalizes the path according to the common CodeQL specification, and, applies the given path
/// transformer, if any. Assumes that `path` has already been canonicalized using
/// `std::fs::canonicalize`.
pub fn normalize_and_transform_path(path: &Path, transformer: Option<&PathTransformer>) -> String {
let path = normalize_path(path);
match transformer {
Some(transformer) => match path.strip_prefix(&transformer.original) {
Some(suffix) => format!("{}{}", transformer.replacement, suffix),
None => path,
},
None => path,
}
}
/**
* Attempts to load a path transformer.
*
* If the `CODEQL_PATH_TRANSFORMER` environment variable is not set, no transformer has been
* specified and the function returns `Ok(None)`.
*
* If the environment variable is set, the function attempts to load the transformer from the file
* at the specified path. If this is successful, it returns `Ok(Some(PathTransformer))`.
*
* If the file cannot be read, or if it does not match the minimal subset of the path-transformer
* syntax supported by this extractor, the function returns an error.
*/
pub fn load_path_transformer() -> std::io::Result<Option<PathTransformer>> {
let path = match std::env::var("CODEQL_PATH_TRANSFORMER") {
Ok(p) => p,
Err(_) => return Ok(None),
};
let file_content = fs::read_to_string(path)?;
let lines = file_content
.lines()
.map(|line| line.trim().to_owned())
.filter(|line| !line.is_empty())
.collect::<Vec<String>>();
if lines.len() != 2 {
return Err(unsupported_transformer_error());
}
let replacement = lines[0]
.strip_prefix('#')
.ok_or(unsupported_transformer_error())?;
let original = lines[1]
.strip_suffix("//")
.ok_or(unsupported_transformer_error())?;
Ok(Some(PathTransformer {
original: original.to_owned(),
replacement: replacement.to_owned(),
}))
}
fn unsupported_transformer_error() -> std::io::Error {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
"This extractor only supports path transformers specifying a single path-prefix rewrite, \
with the first line starting with a # and the second line ending with //.",
)
}
/// Normalizes the path according to the common CodeQL specification. Assumes that `path` has
/// already been canonicalized using `std::fs::canonicalize`.
fn normalize_path(path: &Path) -> String {
if cfg!(windows) {
// The way Rust canonicalizes paths doesn't match the CodeQL spec, so we
// have to do a bit of work removing certain prefixes and replacing
@@ -93,7 +166,18 @@ pub fn path_from_string(path: &str) -> PathBuf {
result
}
pub fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf {
pub fn path_for(
dir: &Path,
path: &Path,
ext: &str,
transformer: Option<&PathTransformer>,
) -> PathBuf {
let path = if transformer.is_some() {
let transformed = normalize_and_transform_path(path, transformer);
PathBuf::from(transformed)
} else {
path.to_path_buf()
};
let mut result = PathBuf::from(dir);
for component in path.components() {
match component {