Use relative paths in tree-sitter extractor diagnostics

Diagnostic `location.file` entries were using absolute paths (e.g.
`/home/runner/work/...`), causing broken links in the GitHub UI.
Now relativize against CWD (the source root during extraction), falling
back to a properly percent-encoded `file:` URI for paths outside it.

Fixes https://github.com/github/codeql/issues/21802

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Paolo Tranquilli
2026-05-13 09:32:11 +02:00
parent 0b808e1170
commit d16bc36e83
8 changed files with 81 additions and 12 deletions

1
Cargo.lock generated
View File

@@ -456,6 +456,7 @@ dependencies = [
"tree-sitter",
"tree-sitter-json",
"tree-sitter-ql",
"url",
"yeast",
"zstd",
]

BIN
ql/Cargo.lock generated

Binary file not shown.

View File

@@ -94,11 +94,14 @@ pub fn run(options: Options) -> std::io::Result<()> {
node_types::read_node_types_str("erb", tree_sitter_embedded_template::NODE_TYPES)?;
let lines: std::io::Result<Vec<String>> = std::io::BufReader::new(file_list).lines().collect();
let lines = lines?;
let source_root = std::env::current_dir().ok();
lines
.par_iter()
.try_for_each(|line| {
let mut diagnostics_writer = diagnostics.logger();
let path = PathBuf::from(line).canonicalize()?;
let diagnostics_file_path =
file_paths::relativize_for_diagnostic(&path, source_root.as_deref());
match &overlay_changed_files {
Some(changed_files) if !changed_files.contains(&path) => {
// We are extracting an overlay and this file is not in the list of changes files, so we should skip it.
@@ -165,7 +168,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
"character-decoding-error",
"Character decoding error",
)
.file(&file_paths::normalize_and_transform_path(&path, path_transformer.as_ref()))
.file(&diagnostics_file_path)
.message(
"Could not decode the file contents as {}: {}. The contents of the file must match the character encoding specified in the {} {}.",
&[
@@ -185,7 +188,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
diagnostics_writer.write(
diagnostics_writer
.new_entry("unknown-character-encoding", "Could not process some files due to an unknown character encoding")
.file(&file_paths::normalize_and_transform_path(&path, path_transformer.as_ref()))
.file(&diagnostics_file_path)
.message(
"Unknown character encoding {} in {} {}.",
&[

View File

@@ -5,7 +5,7 @@
"location": {
"endColumn": 5,
"endLine": 1,
"file": "<test-root-directory>/bad.rb",
"file": "bad.rb",
"startColumn": 4,
"startLine": 1
},
@@ -28,7 +28,7 @@
"location": {
"endColumn": 7,
"endLine": 3,
"file": "<test-root-directory>/bad.rb",
"file": "bad.rb",
"startColumn": 8,
"startLine": 3
},

View File

@@ -3,7 +3,7 @@
"https://docs.ruby-lang.org/en/master/syntax/comments_rdoc.html#label-encoding+Directive"
],
"location": {
"file": "<test-root-directory>/encoding.rb"
"file": "encoding.rb"
},
"markdownMessage": "Unknown character encoding `silly` in `#encoding:` [directive](https://docs.ruby-lang.org/en/master/syntax/comments_rdoc.html#label-encoding+Directive).",
"plaintextMessage": "Unknown character encoding silly in #encoding: directive.",

View File

@@ -18,6 +18,7 @@ lazy_static = "1.5.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
chrono = { version = "0.4.42", features = ["serde"] }
url = "2.5"
num_cpus = "1.17.0"
zstd = "0.13.3"
yeast = { path = "../yeast" }

View File

@@ -298,6 +298,9 @@ pub fn extract(
yeast_runner: Option<&yeast::Runner<'_>>,
) {
let path_str = file_paths::normalize_and_transform_path(path, transformer);
let source_root = std::env::current_dir().ok();
let diagnostics_path =
file_paths::relativize_for_diagnostic(path, source_root.as_deref());
let span = tracing::span!(
tracing::Level::TRACE,
"extract",
@@ -318,7 +321,7 @@ pub fn extract(
source,
diagnostics_writer,
trap_writer,
&path_str,
&diagnostics_path,
file_label,
language_prefix,
schema,
@@ -343,8 +346,9 @@ struct ChildNode {
}
struct Visitor<'a> {
/// The file path of the source code (as string)
path: &'a str,
/// A path suitable for diagnostic locations: relative to the source root if possible,
/// otherwise a file: URI
diagnostics_path: &'a str,
/// The label to use whenever we need to refer to the `@file` entity of this
/// source file.
file_label: trap::Label,
@@ -376,13 +380,13 @@ impl<'a> Visitor<'a> {
source: &'a [u8],
diagnostics_writer: &'a mut diagnostics::LogWriter,
trap_writer: &'a mut trap::Writer,
path: &'a str,
diagnostics_path: &'a str,
file_label: trap::Label,
language_prefix: &str,
schema: &'a NodeTypeMap,
) -> Visitor<'a> {
Visitor {
path,
diagnostics_path,
file_label,
source,
diagnostics_writer,
@@ -433,7 +437,7 @@ impl<'a> Visitor<'a> {
);
mesg.severity(diagnostics::Severity::Warning)
.location(
self.path,
self.diagnostics_path,
loc.start_line,
loc.start_column,
loc.end_line,
@@ -553,7 +557,7 @@ impl<'a> Visitor<'a> {
)
.severity(diagnostics::Severity::Warning)
.location(
self.path,
self.diagnostics_path,
loc.start_line,
loc.start_column,
loc.end_line,

View File

@@ -3,6 +3,25 @@ use std::{
path::{Path, PathBuf},
};
/// Given an absolute path, returns a relative path if it's under `source_root`,
/// otherwise a properly-encoded `file:` URI. This is used for diagnostic locations, which
/// should use relative paths per the CodeQL diagnostic message format spec.
pub fn relativize_for_diagnostic(path: &Path, source_root: Option<&Path>) -> String {
source_root
.and_then(|root| path.strip_prefix(root).ok())
.and_then(|rel| rel.to_str())
.map(|s| s.to_owned())
.unwrap_or_else(|| path_to_file_uri(path))
}
/// Convert a path to a `file:` URI, using the `url` crate for proper percent-encoding.
/// Falls back to a simple `file://` prefix if the `url` crate can't handle the path.
fn path_to_file_uri(path: &Path) -> String {
url::Url::from_file_path(path)
.map(|u| u.to_string())
.unwrap_or_else(|()| format!("file://{}", path.display()))
}
/// This represents the minimum supported path transformation that is needed to support extracting
/// overlay databases. Specifically, it represents a transformer where one path prefix is replaced
/// with a different prefix.
@@ -224,3 +243,44 @@ pub fn path_for(
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn relativize_under_source_root() {
let path = Path::new("/home/runner/work/repo/src/foo.rb");
let result =
relativize_for_diagnostic(path, Some(Path::new("/home/runner/work/repo")));
assert_eq!(result, "src/foo.rb");
}
#[test]
fn relativize_outside_source_root_produces_file_uri() {
let path = Path::new("/other/location/foo.rb");
let result =
relativize_for_diagnostic(path, Some(Path::new("/home/runner/work/repo")));
assert_eq!(result, "file:///other/location/foo.rb");
}
#[test]
fn relativize_no_source_root_produces_file_uri() {
let path = Path::new("/home/runner/work/repo/src/foo.rb");
let result = relativize_for_diagnostic(path, None);
assert_eq!(result, "file:///home/runner/work/repo/src/foo.rb");
}
#[test]
fn path_to_file_uri_encodes_spaces() {
let result = path_to_file_uri(Path::new("/home/user/my project/foo.rb"));
assert_eq!(result, "file:///home/user/my%20project/foo.rb");
}
#[test]
fn relativize_exact_root_path() {
let path = Path::new("/repo/foo.rb");
let result = relativize_for_diagnostic(path, Some(Path::new("/repo")));
assert_eq!(result, "foo.rb");
}
}