mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Ruby: add minimal path transformer support
Supports only a minimal subset of the project layout specification; enough to work with the transformers produced by the CLI when building an overlay database.
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
use clap::Args;
|
||||
use codeql_extractor::file_paths::PathTransformer;
|
||||
use lazy_static::lazy_static;
|
||||
use rayon::prelude::*;
|
||||
use serde_json;
|
||||
@@ -81,6 +82,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
||||
let file_list = fs::File::open(file_paths::path_from_string(&options.file_list))?;
|
||||
|
||||
let overlay_changed_files: Option<HashSet<PathBuf>> = get_overlay_changed_files();
|
||||
let path_transformer = file_paths::load_path_transformer()?;
|
||||
|
||||
let language: Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let erb: Language = tree_sitter_embedded_template::LANGUAGE.into();
|
||||
@@ -105,7 +107,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
||||
}
|
||||
_ => {},
|
||||
}
|
||||
let src_archive_file = file_paths::path_for(&src_archive_dir, &path, "");
|
||||
let src_archive_file = file_paths::path_for(&src_archive_dir, &path, "", path_transformer.as_ref());
|
||||
let mut source = std::fs::read(&path)?;
|
||||
let mut needs_conversion = false;
|
||||
let code_ranges;
|
||||
@@ -118,6 +120,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
||||
&erb_schema,
|
||||
&mut diagnostics_writer,
|
||||
&mut trap_writer,
|
||||
path_transformer.as_ref(),
|
||||
&path,
|
||||
&source,
|
||||
&[],
|
||||
@@ -162,7 +165,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
||||
"character-decoding-error",
|
||||
"Character decoding error",
|
||||
)
|
||||
.file(&file_paths::normalize_path(&path))
|
||||
.file(&file_paths::normalize_and_transform_path(&path, path_transformer.as_ref()))
|
||||
.message(
|
||||
"Could not decode the file contents as {}: {}. The contents of the file must match the character encoding specified in the {} {}.",
|
||||
&[
|
||||
@@ -182,7 +185,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
||||
diagnostics_writer.write(
|
||||
diagnostics_writer
|
||||
.new_entry("unknown-character-encoding", "Could not process some files due to an unknown character encoding")
|
||||
.file(&file_paths::normalize_path(&path))
|
||||
.file(&file_paths::normalize_and_transform_path(&path, path_transformer.as_ref()))
|
||||
.message(
|
||||
"Unknown character encoding {} in {} {}.",
|
||||
&[
|
||||
@@ -205,6 +208,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
||||
&schema,
|
||||
&mut diagnostics_writer,
|
||||
&mut trap_writer,
|
||||
path_transformer.as_ref(),
|
||||
&path,
|
||||
&source,
|
||||
&code_ranges,
|
||||
@@ -215,14 +219,20 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
||||
} else {
|
||||
std::fs::copy(&path, &src_archive_file)?;
|
||||
}
|
||||
write_trap(&trap_dir, path, &trap_writer, trap_compression)
|
||||
write_trap(&trap_dir, path, &trap_writer, trap_compression, path_transformer.as_ref())
|
||||
})
|
||||
.expect("failed to extract files");
|
||||
|
||||
let path = PathBuf::from("extras");
|
||||
let mut trap_writer = trap::Writer::new();
|
||||
extractor::populate_empty_location(&mut trap_writer);
|
||||
let res = write_trap(&trap_dir, path, &trap_writer, trap_compression);
|
||||
let res = write_trap(
|
||||
&trap_dir,
|
||||
path,
|
||||
&trap_writer,
|
||||
trap_compression,
|
||||
path_transformer.as_ref(),
|
||||
);
|
||||
if let Ok(output_path) = std::env::var("CODEQL_EXTRACTOR_RUBY_OVERLAY_BASE_METADATA_OUT") {
|
||||
// We're extracting an overlay base. For now, we don't have any metadata we need to store
|
||||
// that would get read when extracting the overlay, but the CLI expects us to write
|
||||
@@ -254,8 +264,14 @@ fn write_trap(
|
||||
path: PathBuf,
|
||||
trap_writer: &trap::Writer,
|
||||
trap_compression: trap::Compression,
|
||||
path_transformer: Option<&PathTransformer>,
|
||||
) -> std::io::Result<()> {
|
||||
let trap_file = file_paths::path_for(trap_dir, &path, trap_compression.extension());
|
||||
let trap_file = file_paths::path_for(
|
||||
trap_dir,
|
||||
&path,
|
||||
trap_compression.extension(),
|
||||
path_transformer,
|
||||
);
|
||||
std::fs::create_dir_all(trap_file.parent().unwrap())?;
|
||||
trap_writer.write_to_file(&trap_file, trap_compression)
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ impl Archiver {
|
||||
}
|
||||
|
||||
fn try_archive(&self, source: &Path) -> std::io::Result<()> {
|
||||
let dest = file_paths::path_for(&self.root, source, "");
|
||||
let dest = file_paths::path_for(&self.root, source, "", None);
|
||||
if fs::metadata(&dest).is_ok() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -212,7 +212,7 @@ impl TrapFile {
|
||||
);
|
||||
}
|
||||
pub fn emit_file(&mut self, absolute_path: &Path) -> Label<generated::File> {
|
||||
let untyped = extractor::populate_file(&mut self.writer, absolute_path);
|
||||
let untyped = extractor::populate_file(&mut self.writer, absolute_path, None);
|
||||
// SAFETY: populate_file emits `@file` typed labels
|
||||
unsafe { Label::from_untyped(untyped) }
|
||||
}
|
||||
@@ -268,6 +268,7 @@ impl TrapFileProvider {
|
||||
&self.trap_dir.join(category),
|
||||
key.as_ref(),
|
||||
self.compression.extension(),
|
||||
None,
|
||||
);
|
||||
debug!("creating trap file {}", path.display());
|
||||
let mut writer = trap::Writer::new();
|
||||
|
||||
@@ -67,19 +67,26 @@ pub fn default_subscriber_with_level(
|
||||
),
|
||||
)
|
||||
}
|
||||
pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label {
|
||||
pub fn populate_file(
|
||||
writer: &mut trap::Writer,
|
||||
absolute_path: &Path,
|
||||
transformer: Option<&file_paths::PathTransformer>,
|
||||
) -> trap::Label {
|
||||
let (file_label, fresh) = writer.global_id(&trap::full_id_for_file(
|
||||
&file_paths::normalize_path(absolute_path),
|
||||
&file_paths::normalize_and_transform_path(absolute_path, transformer),
|
||||
));
|
||||
if fresh {
|
||||
writer.add_tuple(
|
||||
"files",
|
||||
vec![
|
||||
trap::Arg::Label(file_label),
|
||||
trap::Arg::String(file_paths::normalize_path(absolute_path)),
|
||||
trap::Arg::String(file_paths::normalize_and_transform_path(
|
||||
absolute_path,
|
||||
transformer,
|
||||
)),
|
||||
],
|
||||
);
|
||||
populate_parent_folders(writer, file_label, absolute_path.parent());
|
||||
populate_parent_folders(writer, file_label, absolute_path.parent(), transformer);
|
||||
}
|
||||
file_label
|
||||
}
|
||||
@@ -117,6 +124,7 @@ pub fn populate_parent_folders(
|
||||
writer: &mut trap::Writer,
|
||||
child_label: trap::Label,
|
||||
path: Option<&Path>,
|
||||
transformer: Option<&file_paths::PathTransformer>,
|
||||
) {
|
||||
let mut path = path;
|
||||
let mut child_label = child_label;
|
||||
@@ -124,9 +132,9 @@ pub fn populate_parent_folders(
|
||||
match path {
|
||||
None => break,
|
||||
Some(folder) => {
|
||||
let (folder_label, fresh) = writer.global_id(&trap::full_id_for_folder(
|
||||
&file_paths::normalize_path(folder),
|
||||
));
|
||||
let parent = folder.parent();
|
||||
let folder = file_paths::normalize_and_transform_path(folder, transformer);
|
||||
let (folder_label, fresh) = writer.global_id(&trap::full_id_for_folder(&folder));
|
||||
writer.add_tuple(
|
||||
"containerparent",
|
||||
vec![
|
||||
@@ -137,12 +145,9 @@ pub fn populate_parent_folders(
|
||||
if fresh {
|
||||
writer.add_tuple(
|
||||
"folders",
|
||||
vec![
|
||||
trap::Arg::Label(folder_label),
|
||||
trap::Arg::String(file_paths::normalize_path(folder)),
|
||||
],
|
||||
vec![trap::Arg::Label(folder_label), trap::Arg::String(folder)],
|
||||
);
|
||||
path = folder.parent();
|
||||
path = parent;
|
||||
child_label = folder_label;
|
||||
} else {
|
||||
break;
|
||||
@@ -205,11 +210,12 @@ pub fn extract(
|
||||
schema: &NodeTypeMap,
|
||||
diagnostics_writer: &mut diagnostics::LogWriter,
|
||||
trap_writer: &mut trap::Writer,
|
||||
transformer: Option<&file_paths::PathTransformer>,
|
||||
path: &Path,
|
||||
source: &[u8],
|
||||
ranges: &[Range],
|
||||
) {
|
||||
let path_str = file_paths::normalize_path(path);
|
||||
let path_str = file_paths::normalize_and_transform_path(path, transformer);
|
||||
let span = tracing::span!(
|
||||
tracing::Level::TRACE,
|
||||
"extract",
|
||||
@@ -225,7 +231,7 @@ pub fn extract(
|
||||
parser.set_included_ranges(ranges).unwrap();
|
||||
let tree = parser.parse(source, None).expect("Failed to parse file");
|
||||
trap_writer.comment(format!("Auto-generated TRAP file for {}", path_str));
|
||||
let file_label = populate_file(trap_writer, path);
|
||||
let file_label = populate_file(trap_writer, path, transformer);
|
||||
let mut visitor = Visitor::new(
|
||||
source,
|
||||
diagnostics_writer,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::trap;
|
||||
use crate::{file_paths, trap};
|
||||
use globset::{GlobBuilder, GlobSetBuilder};
|
||||
use rayon::prelude::*;
|
||||
use std::fs::File;
|
||||
@@ -111,6 +111,8 @@ impl Extractor {
|
||||
)
|
||||
};
|
||||
|
||||
let path_transformer = file_paths::load_path_transformer()?;
|
||||
|
||||
let lines: std::io::Result<Vec<String>> = file_lists
|
||||
.iter()
|
||||
.flat_map(|file_list| std::io::BufReader::new(file_list).lines())
|
||||
@@ -122,8 +124,12 @@ impl Extractor {
|
||||
.try_for_each(|line| {
|
||||
let mut diagnostics_writer = diagnostics.logger();
|
||||
let path = PathBuf::from(line).canonicalize()?;
|
||||
let src_archive_file =
|
||||
crate::file_paths::path_for(&self.source_archive_dir, &path, "");
|
||||
let src_archive_file = crate::file_paths::path_for(
|
||||
&self.source_archive_dir,
|
||||
&path,
|
||||
"",
|
||||
path_transformer.as_ref(),
|
||||
);
|
||||
let source = std::fs::read(&path)?;
|
||||
let mut trap_writer = trap::Writer::new();
|
||||
|
||||
@@ -152,6 +158,7 @@ impl Extractor {
|
||||
&schemas[i],
|
||||
&mut diagnostics_writer,
|
||||
&mut trap_writer,
|
||||
None,
|
||||
&path,
|
||||
&source,
|
||||
&[],
|
||||
@@ -183,7 +190,7 @@ fn write_trap(
|
||||
trap_writer: &trap::Writer,
|
||||
trap_compression: trap::Compression,
|
||||
) -> std::io::Result<()> {
|
||||
let trap_file = crate::file_paths::path_for(trap_dir, path, trap_compression.extension());
|
||||
let trap_file = crate::file_paths::path_for(trap_dir, path, trap_compression.extension(), None);
|
||||
std::fs::create_dir_all(trap_file.parent().unwrap())?;
|
||||
trap_writer.write_to_file(&trap_file, trap_compression)
|
||||
}
|
||||
|
||||
@@ -1,8 +1,81 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::{
|
||||
fs,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
/// Normalizes the path according the common CodeQL specification. Assumes that
|
||||
/// `path` has already been canonicalized using `std::fs::canonicalize`.
|
||||
pub fn normalize_path(path: &Path) -> String {
|
||||
/// This represents the minimum supported path transformation that is needed to support extracting
|
||||
/// overlay databases. Specifically, it represents a transformer where one path prefix is replaced
|
||||
/// with a different prefix.
|
||||
pub struct PathTransformer {
|
||||
pub original: String,
|
||||
pub replacement: String,
|
||||
}
|
||||
|
||||
/// Normalizes the path according to the common CodeQL specification, and, applies the given path
|
||||
/// transformer, if any. Assumes that `path` has already been canonicalized using
|
||||
/// `std::fs::canonicalize`.
|
||||
pub fn normalize_and_transform_path(path: &Path, transformer: Option<&PathTransformer>) -> String {
|
||||
let path = normalize_path(path);
|
||||
match transformer {
|
||||
Some(transformer) => match path.strip_prefix(&transformer.original) {
|
||||
Some(suffix) => format!("{}{}", transformer.replacement, suffix),
|
||||
None => path,
|
||||
},
|
||||
None => path,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to load a path transformer.
|
||||
*
|
||||
* If the `CODEQL_PATH_TRANSFORMER` environment variable is not set, no transformer has been
|
||||
* specified and the function returns `Ok(None)`.
|
||||
*
|
||||
* If the environment variable is set, the function attempts to load the transformer from the file
|
||||
* at the specified path. If this is successful, it returns `Ok(Some(PathTransformer))`.
|
||||
*
|
||||
* If the file cannot be read, or if it does not match the minimal subset of the path-transformer
|
||||
* syntax supported by this extractor, the function returns an error.
|
||||
*/
|
||||
pub fn load_path_transformer() -> std::io::Result<Option<PathTransformer>> {
|
||||
let path = match std::env::var("CODEQL_PATH_TRANSFORMER") {
|
||||
Ok(p) => p,
|
||||
Err(_) => return Ok(None),
|
||||
};
|
||||
let file_content = fs::read_to_string(path)?;
|
||||
let lines = file_content
|
||||
.lines()
|
||||
.map(|line| line.trim().to_owned())
|
||||
.filter(|line| !line.is_empty())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
if lines.len() != 2 {
|
||||
return Err(unsupported_transformer_error());
|
||||
}
|
||||
let replacement = lines[0]
|
||||
.strip_prefix('#')
|
||||
.ok_or(unsupported_transformer_error())?;
|
||||
let original = lines[1]
|
||||
.strip_suffix("//")
|
||||
.ok_or(unsupported_transformer_error())?;
|
||||
|
||||
Ok(Some(PathTransformer {
|
||||
original: original.to_owned(),
|
||||
replacement: replacement.to_owned(),
|
||||
}))
|
||||
}
|
||||
|
||||
fn unsupported_transformer_error() -> std::io::Error {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
"This extractor only supports path transformers specifying a single path-prefix rewrite, \
|
||||
with the first line starting with a # and the second line ending with //.",
|
||||
)
|
||||
}
|
||||
|
||||
/// Normalizes the path according to the common CodeQL specification. Assumes that `path` has
|
||||
/// already been canonicalized using `std::fs::canonicalize`.
|
||||
fn normalize_path(path: &Path) -> String {
|
||||
if cfg!(windows) {
|
||||
// The way Rust canonicalizes paths doesn't match the CodeQL spec, so we
|
||||
// have to do a bit of work removing certain prefixes and replacing
|
||||
@@ -93,7 +166,18 @@ pub fn path_from_string(path: &str) -> PathBuf {
|
||||
result
|
||||
}
|
||||
|
||||
pub fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf {
|
||||
pub fn path_for(
|
||||
dir: &Path,
|
||||
path: &Path,
|
||||
ext: &str,
|
||||
transformer: Option<&PathTransformer>,
|
||||
) -> PathBuf {
|
||||
let path = if transformer.is_some() {
|
||||
let transformed = normalize_and_transform_path(path, transformer);
|
||||
PathBuf::from(transformed)
|
||||
} else {
|
||||
path.to_path_buf()
|
||||
};
|
||||
let mut result = PathBuf::from(dir);
|
||||
for component in path.components() {
|
||||
match component {
|
||||
|
||||
Reference in New Issue
Block a user