Files
codeql/rust/extractor/src/rust_analyzer.rs
2025-03-21 11:00:30 +01:00

305 lines
10 KiB
Rust

use itertools::Itertools;
use ra_ap_base_db::{EditionedFileId, RootQueryDb, SourceDatabase};
use ra_ap_hir::Semantics;
use ra_ap_ide_db::RootDatabase;
use ra_ap_load_cargo::{LoadCargoConfig, load_workspace_at};
use ra_ap_paths::{AbsPath, Utf8PathBuf};
use ra_ap_project_model::ProjectManifest;
use ra_ap_project_model::{CargoConfig, ManifestPath};
use ra_ap_span::Edition;
use ra_ap_span::EditionedFileId as SpanEditionedFileId;
use ra_ap_span::TextRange;
use ra_ap_span::TextSize;
use ra_ap_syntax::SourceFile;
use ra_ap_syntax::SyntaxError;
use ra_ap_vfs::Vfs;
use ra_ap_vfs::VfsPath;
use ra_ap_vfs::{AbsPathBuf, FileId};
use serde::Deserialize;
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use std::rc::Rc;
use tracing::{debug, error, info, trace, warn};
use triomphe::Arc;
pub enum RustAnalyzer<'a> {
WithSemantics {
vfs: &'a Vfs,
semantics: &'a Semantics<'a, RootDatabase>,
},
WithoutSemantics {
reason: &'a str,
},
}
pub struct FileSemanticInformation<'a> {
pub file_id: EditionedFileId,
pub semantics: &'a Semantics<'a, RootDatabase>,
}
pub struct ParseResult<'a> {
pub ast: SourceFile,
pub text: Arc<str>,
pub errors: Vec<SyntaxError>,
pub semantics_info: Result<FileSemanticInformation<'a>, &'a str>,
}
impl<'a> RustAnalyzer<'a> {
pub fn load_workspace(
project: &ProjectManifest,
config: &CargoConfig,
load_config: &LoadCargoConfig,
) -> Option<(RootDatabase, Vfs)> {
let progress = |t| (trace!("progress: {}", t));
let manifest = project.manifest_path();
match load_workspace_at(manifest.as_ref(), config, load_config, &progress) {
Ok((db, vfs, _macro_server)) => Some((db, vfs)),
Err(err) => {
error!("failed to load workspace for {}: {}", manifest, err);
None
}
}
}
pub fn new(vfs: &'a Vfs, semantics: &'a Semantics<'a, RootDatabase>) -> Self {
RustAnalyzer::WithSemantics { vfs, semantics }
}
pub fn parse(&self, path: &Path) -> ParseResult {
let no_semantics_reason;
match self {
RustAnalyzer::WithSemantics { vfs, semantics } => {
if let Some(file_id) = path_to_file_id(path, vfs) {
if let Ok(input) = std::panic::catch_unwind(|| semantics.db.file_text(file_id))
{
let file_id = EditionedFileId::new(
semantics.db,
SpanEditionedFileId::current_edition(file_id),
);
let source_file = semantics.parse(file_id);
let errors = semantics
.db
.parse_errors(file_id)
.into_iter()
.flat_map(|x| x.to_vec())
.collect();
return ParseResult {
ast: source_file,
text: input.text(semantics.db),
errors,
semantics_info: Ok(FileSemanticInformation { file_id, semantics }),
};
}
debug!(
"No text available for file_id '{:?}', falling back to loading file '{}' from disk.",
file_id,
path.to_string_lossy()
);
no_semantics_reason = "no text available for the file in the project";
} else {
no_semantics_reason = "file not found in project";
}
}
RustAnalyzer::WithoutSemantics { reason } => {
no_semantics_reason = reason;
}
}
let mut errors = Vec::new();
let input = match std::fs::read(path) {
Ok(data) => data,
Err(e) => {
errors.push(SyntaxError::new(
format!("Could not read {}: {}", path.to_string_lossy(), e),
TextRange::empty(TextSize::default()),
));
vec![]
}
};
let (input, err) = from_utf8_lossy(&input);
let parse = ra_ap_syntax::ast::SourceFile::parse(&input, Edition::CURRENT);
errors.extend(parse.errors());
errors.extend(err);
ParseResult {
ast: parse.tree(),
text: input.as_ref().into(),
errors,
semantics_info: Err(no_semantics_reason),
}
}
}
#[derive(Deserialize)]
struct CargoManifestMembersSlice {
#[serde(default)]
members: Vec<String>,
#[serde(default)]
exclude: Vec<String>,
}
#[derive(Deserialize)]
struct CargoManifestSlice {
workspace: Option<CargoManifestMembersSlice>,
}
struct TomlReader {
cache: HashMap<ManifestPath, Rc<CargoManifestSlice>>,
}
impl TomlReader {
fn new() -> Self {
Self {
cache: HashMap::new(),
}
}
fn read(&mut self, manifest: &ManifestPath) -> anyhow::Result<Rc<CargoManifestSlice>> {
if let Some(table) = self.cache.get(manifest) {
return Ok(table.clone());
}
let content = fs::read_to_string(manifest).map_err(|e| {
error!("failed to read {} ({e})", manifest.as_str());
e
})?;
let table = Rc::<CargoManifestSlice>::new(toml::from_str(&content).map_err(|e| {
error!("failed to parse {} ({e})", manifest.as_str());
e
})?);
self.cache.insert(manifest.clone(), table.clone());
Ok(table)
}
}
fn workspace_members_match(workspace_dir: &AbsPath, members: &[String], target: &AbsPath) -> bool {
members.iter().any(|p| {
glob::Pattern::new(workspace_dir.join(p).as_str()).is_ok_and(|p| p.matches(target.as_str()))
})
}
fn find_workspace(reader: &mut TomlReader, manifest: &ProjectManifest) -> Option<ProjectManifest> {
let ProjectManifest::CargoToml(cargo) = manifest else {
return None;
};
let parsed_cargo = reader.read(cargo).ok()?;
if parsed_cargo.workspace.is_some() {
debug!("{cargo} is a workspace");
return Some(manifest.clone());
}
let Some(parent_dir) = cargo.parent().parent() else {
warn!("no parent dir for {cargo}");
return None;
};
let discovered = ProjectManifest::discover(parent_dir)
.map_err(|e| {
error!(
"encountered error while searching for manifests under {}: {e}",
parent_dir.as_str()
);
e
})
.ok()?;
discovered
.iter()
.find_map(|it| match it {
ProjectManifest::CargoToml(other)
if cargo.starts_with(other.parent())
&& reader.read(other).is_ok_and(|it| {
it.workspace.as_ref().is_some_and(|w| {
workspace_members_match(other.parent(), &w.members, cargo.parent())
&& !workspace_members_match(
other.parent(),
&w.exclude,
cargo.parent(),
)
})
}) =>
{
debug!("found workspace {other} containing {cargo}");
Some(it.clone())
}
_ => None,
})
.or_else(|| {
debug!("no workspace found for {cargo}");
None
})
}
pub fn find_project_manifests(
files: &[PathBuf],
) -> anyhow::Result<Vec<ra_ap_project_model::ProjectManifest>> {
let current = std::env::current_dir()?;
let abs_files: Vec<_> = files
.iter()
.map(|path| AbsPathBuf::assert_utf8(current.join(path)))
.collect();
let discovered = ra_ap_project_model::ProjectManifest::discover_all(&abs_files);
let mut ret = HashSet::new();
let mut reader = TomlReader::new();
for manifest in discovered {
let workspace = find_workspace(&mut reader, &manifest).unwrap_or(manifest);
ret.insert(workspace);
}
let iter = || ret.iter().map(|m| format!(" {m}"));
const LOG_LIMIT: usize = 10;
if ret.len() <= LOG_LIMIT {
info!("found manifests:\n{}", iter().join("\n"));
} else {
info!(
"found manifests:\n{}\nand {} more",
iter().take(LOG_LIMIT).join("\n"),
ret.len() - LOG_LIMIT
);
debug!(
"rest of the manifests found:\n{}",
iter().dropping(LOG_LIMIT).join("\n")
);
}
Ok(ret.into_iter().collect())
}
fn from_utf8_lossy(v: &[u8]) -> (Cow<'_, str>, Option<SyntaxError>) {
let mut iter = v.utf8_chunks();
let (first_valid, first_invalid) = if let Some(chunk) = iter.next() {
let valid = chunk.valid();
let invalid = chunk.invalid();
if invalid.is_empty() {
debug_assert_eq!(valid.len(), v.len());
return (Cow::Borrowed(valid), None);
}
(valid, invalid)
} else {
return (Cow::Borrowed(""), None);
};
const REPLACEMENT: &str = "\u{FFFD}";
let error_start = first_valid.len() as u32;
let error_end = error_start + first_invalid.len() as u32;
let error_range = TextRange::new(TextSize::new(error_start), TextSize::new(error_end));
let error = SyntaxError::new("invalid utf-8 sequence".to_owned(), error_range);
let mut res = String::with_capacity(v.len());
res.push_str(first_valid);
res.push_str(REPLACEMENT);
for chunk in iter {
res.push_str(chunk.valid());
if !chunk.invalid().is_empty() {
res.push_str(REPLACEMENT);
}
}
(Cow::Owned(res), Some(error))
}
pub(crate) fn path_to_file_id(path: &Path, vfs: &Vfs) -> Option<FileId> {
Utf8PathBuf::from_path_buf(path.to_path_buf())
.ok()
.and_then(|x| AbsPathBuf::try_from(x).ok())
.map(VfsPath::from)
.and_then(|x| vfs.file_id(&x))
.map(|(id, _excluded)| id)
}