mirror of
https://github.com/github/codeql.git
synced 2026-05-14 03:09:26 +02:00
Use relative paths in tree-sitter extractor diagnostics
Diagnostic `location.file` entries were using absolute paths (e.g. `/home/runner/work/...`), causing broken links in the GitHub UI. Now relativize against CWD (the source root during extraction), falling back to a properly percent-encoded `file:` URI for paths outside it. Fixes https://github.com/github/codeql/issues/21802 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -456,6 +456,7 @@ dependencies = [
|
||||
"tree-sitter",
|
||||
"tree-sitter-json",
|
||||
"tree-sitter-ql",
|
||||
"url",
|
||||
"yeast",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
BIN
ql/Cargo.lock
generated
BIN
ql/Cargo.lock
generated
Binary file not shown.
@@ -94,11 +94,14 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
||||
node_types::read_node_types_str("erb", tree_sitter_embedded_template::NODE_TYPES)?;
|
||||
let lines: std::io::Result<Vec<String>> = std::io::BufReader::new(file_list).lines().collect();
|
||||
let lines = lines?;
|
||||
let source_root = std::env::current_dir().ok();
|
||||
lines
|
||||
.par_iter()
|
||||
.try_for_each(|line| {
|
||||
let mut diagnostics_writer = diagnostics.logger();
|
||||
let path = PathBuf::from(line).canonicalize()?;
|
||||
let diagnostics_file_path =
|
||||
file_paths::relativize_for_diagnostic(&path, source_root.as_deref());
|
||||
match &overlay_changed_files {
|
||||
Some(changed_files) if !changed_files.contains(&path) => {
|
||||
// We are extracting an overlay and this file is not in the list of changes files, so we should skip it.
|
||||
@@ -165,7 +168,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
||||
"character-decoding-error",
|
||||
"Character decoding error",
|
||||
)
|
||||
.file(&file_paths::normalize_and_transform_path(&path, path_transformer.as_ref()))
|
||||
.file(&diagnostics_file_path)
|
||||
.message(
|
||||
"Could not decode the file contents as {}: {}. The contents of the file must match the character encoding specified in the {} {}.",
|
||||
&[
|
||||
@@ -185,7 +188,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
||||
diagnostics_writer.write(
|
||||
diagnostics_writer
|
||||
.new_entry("unknown-character-encoding", "Could not process some files due to an unknown character encoding")
|
||||
.file(&file_paths::normalize_and_transform_path(&path, path_transformer.as_ref()))
|
||||
.file(&diagnostics_file_path)
|
||||
.message(
|
||||
"Unknown character encoding {} in {} {}.",
|
||||
&[
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
"location": {
|
||||
"endColumn": 5,
|
||||
"endLine": 1,
|
||||
"file": "<test-root-directory>/bad.rb",
|
||||
"file": "bad.rb",
|
||||
"startColumn": 4,
|
||||
"startLine": 1
|
||||
},
|
||||
@@ -28,7 +28,7 @@
|
||||
"location": {
|
||||
"endColumn": 7,
|
||||
"endLine": 3,
|
||||
"file": "<test-root-directory>/bad.rb",
|
||||
"file": "bad.rb",
|
||||
"startColumn": 8,
|
||||
"startLine": 3
|
||||
},
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
"https://docs.ruby-lang.org/en/master/syntax/comments_rdoc.html#label-encoding+Directive"
|
||||
],
|
||||
"location": {
|
||||
"file": "<test-root-directory>/encoding.rb"
|
||||
"file": "encoding.rb"
|
||||
},
|
||||
"markdownMessage": "Unknown character encoding `silly` in `#encoding:` [directive](https://docs.ruby-lang.org/en/master/syntax/comments_rdoc.html#label-encoding+Directive).",
|
||||
"plaintextMessage": "Unknown character encoding silly in #encoding: directive.",
|
||||
|
||||
@@ -18,6 +18,7 @@ lazy_static = "1.5.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
chrono = { version = "0.4.42", features = ["serde"] }
|
||||
url = "2.5"
|
||||
num_cpus = "1.17.0"
|
||||
zstd = "0.13.3"
|
||||
yeast = { path = "../yeast" }
|
||||
|
||||
@@ -298,6 +298,9 @@ pub fn extract(
|
||||
yeast_runner: Option<&yeast::Runner<'_>>,
|
||||
) {
|
||||
let path_str = file_paths::normalize_and_transform_path(path, transformer);
|
||||
let source_root = std::env::current_dir().ok();
|
||||
let diagnostics_path =
|
||||
file_paths::relativize_for_diagnostic(path, source_root.as_deref());
|
||||
let span = tracing::span!(
|
||||
tracing::Level::TRACE,
|
||||
"extract",
|
||||
@@ -318,7 +321,7 @@ pub fn extract(
|
||||
source,
|
||||
diagnostics_writer,
|
||||
trap_writer,
|
||||
&path_str,
|
||||
&diagnostics_path,
|
||||
file_label,
|
||||
language_prefix,
|
||||
schema,
|
||||
@@ -343,8 +346,9 @@ struct ChildNode {
|
||||
}
|
||||
|
||||
struct Visitor<'a> {
|
||||
/// The file path of the source code (as string)
|
||||
path: &'a str,
|
||||
/// A path suitable for diagnostic locations: relative to the source root if possible,
|
||||
/// otherwise a file: URI
|
||||
diagnostics_path: &'a str,
|
||||
/// The label to use whenever we need to refer to the `@file` entity of this
|
||||
/// source file.
|
||||
file_label: trap::Label,
|
||||
@@ -376,13 +380,13 @@ impl<'a> Visitor<'a> {
|
||||
source: &'a [u8],
|
||||
diagnostics_writer: &'a mut diagnostics::LogWriter,
|
||||
trap_writer: &'a mut trap::Writer,
|
||||
path: &'a str,
|
||||
diagnostics_path: &'a str,
|
||||
file_label: trap::Label,
|
||||
language_prefix: &str,
|
||||
schema: &'a NodeTypeMap,
|
||||
) -> Visitor<'a> {
|
||||
Visitor {
|
||||
path,
|
||||
diagnostics_path,
|
||||
file_label,
|
||||
source,
|
||||
diagnostics_writer,
|
||||
@@ -433,7 +437,7 @@ impl<'a> Visitor<'a> {
|
||||
);
|
||||
mesg.severity(diagnostics::Severity::Warning)
|
||||
.location(
|
||||
self.path,
|
||||
self.diagnostics_path,
|
||||
loc.start_line,
|
||||
loc.start_column,
|
||||
loc.end_line,
|
||||
@@ -553,7 +557,7 @@ impl<'a> Visitor<'a> {
|
||||
)
|
||||
.severity(diagnostics::Severity::Warning)
|
||||
.location(
|
||||
self.path,
|
||||
self.diagnostics_path,
|
||||
loc.start_line,
|
||||
loc.start_column,
|
||||
loc.end_line,
|
||||
|
||||
@@ -3,6 +3,25 @@ use std::{
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
/// Given an absolute path, returns a relative path if it's under `source_root`,
|
||||
/// otherwise a properly-encoded `file:` URI. This is used for diagnostic locations, which
|
||||
/// should use relative paths per the CodeQL diagnostic message format spec.
|
||||
pub fn relativize_for_diagnostic(path: &Path, source_root: Option<&Path>) -> String {
|
||||
source_root
|
||||
.and_then(|root| path.strip_prefix(root).ok())
|
||||
.and_then(|rel| rel.to_str())
|
||||
.map(|s| s.to_owned())
|
||||
.unwrap_or_else(|| path_to_file_uri(path))
|
||||
}
|
||||
|
||||
/// Convert a path to a `file:` URI, using the `url` crate for proper percent-encoding.
|
||||
/// Falls back to a simple `file://` prefix if the `url` crate can't handle the path.
|
||||
fn path_to_file_uri(path: &Path) -> String {
|
||||
url::Url::from_file_path(path)
|
||||
.map(|u| u.to_string())
|
||||
.unwrap_or_else(|()| format!("file://{}", path.display()))
|
||||
}
|
||||
|
||||
/// This represents the minimum supported path transformation that is needed to support extracting
|
||||
/// overlay databases. Specifically, it represents a transformer where one path prefix is replaced
|
||||
/// with a different prefix.
|
||||
@@ -224,3 +243,44 @@ pub fn path_for(
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn relativize_under_source_root() {
|
||||
let path = Path::new("/home/runner/work/repo/src/foo.rb");
|
||||
let result =
|
||||
relativize_for_diagnostic(path, Some(Path::new("/home/runner/work/repo")));
|
||||
assert_eq!(result, "src/foo.rb");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn relativize_outside_source_root_produces_file_uri() {
|
||||
let path = Path::new("/other/location/foo.rb");
|
||||
let result =
|
||||
relativize_for_diagnostic(path, Some(Path::new("/home/runner/work/repo")));
|
||||
assert_eq!(result, "file:///other/location/foo.rb");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn relativize_no_source_root_produces_file_uri() {
|
||||
let path = Path::new("/home/runner/work/repo/src/foo.rb");
|
||||
let result = relativize_for_diagnostic(path, None);
|
||||
assert_eq!(result, "file:///home/runner/work/repo/src/foo.rb");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn path_to_file_uri_encodes_spaces() {
|
||||
let result = path_to_file_uri(Path::new("/home/user/my project/foo.rb"));
|
||||
assert_eq!(result, "file:///home/user/my%20project/foo.rb");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn relativize_exact_root_path() {
|
||||
let path = Path::new("/repo/foo.rb");
|
||||
let result = relativize_for_diagnostic(path, Some(Path::new("/repo")));
|
||||
assert_eq!(result, "foo.rb");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user