mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
Rust: extract files on a per-project basis
This way we have only one "project" database in-memory at a time. This should avoid running out of memory when analyzing large mono-repos.
This commit is contained in:
@@ -1,5 +1,13 @@
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use anyhow::Context;
|
||||
use archive::Archiver;
|
||||
use ra_ap_ide_db::line_index::{LineCol, LineIndex};
|
||||
use ra_ap_project_model::ProjectManifest;
|
||||
use rust_analyzer::RustAnalyzer;
|
||||
mod archive;
|
||||
mod config;
|
||||
pub mod generated;
|
||||
@@ -9,10 +17,13 @@ pub mod trap;
|
||||
|
||||
fn extract(
|
||||
rust_analyzer: &mut rust_analyzer::RustAnalyzer,
|
||||
archiver: &Archiver,
|
||||
traps: &trap::TrapFileProvider,
|
||||
file: std::path::PathBuf,
|
||||
) -> anyhow::Result<()> {
|
||||
let (ast, input, parse_errors, file_id, semi) = rust_analyzer.parse(&file);
|
||||
file: &std::path::Path,
|
||||
) -> () {
|
||||
archiver.archive(&file);
|
||||
|
||||
let (ast, input, parse_errors, file_id, semi) = rust_analyzer.parse(file);
|
||||
let line_index = LineIndex::new(input.as_ref());
|
||||
let display_path = file.to_string_lossy();
|
||||
let mut trap = traps.create("source", &file);
|
||||
@@ -40,8 +51,13 @@ fn extract(
|
||||
);
|
||||
}
|
||||
translator.emit_source_file(ast);
|
||||
translator.trap.commit()?;
|
||||
Ok(())
|
||||
translator.trap.commit().unwrap_or_else(|err| {
|
||||
log::error!(
|
||||
"Failed to write trap file for: {}: {}",
|
||||
display_path,
|
||||
err.to_string()
|
||||
)
|
||||
});
|
||||
}
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let cfg = config::Config::extract().context("failed to load configuration")?;
|
||||
@@ -49,17 +65,49 @@ fn main() -> anyhow::Result<()> {
|
||||
.module(module_path!())
|
||||
.verbosity(2 + cfg.verbose as usize)
|
||||
.init()?;
|
||||
let mut rust_analyzer = rust_analyzer::RustAnalyzer::new(&cfg)?;
|
||||
|
||||
let traps = trap::TrapFileProvider::new(&cfg).context("failed to set up trap files")?;
|
||||
let archiver = archive::Archiver {
|
||||
root: cfg.source_archive_dir,
|
||||
};
|
||||
for file in cfg.inputs {
|
||||
let file = std::path::absolute(&file).unwrap_or(file);
|
||||
let file = std::fs::canonicalize(&file).unwrap_or(file);
|
||||
archiver.archive(&file);
|
||||
extract(&mut rust_analyzer, &traps, file)?;
|
||||
let files: Vec<PathBuf> = cfg
|
||||
.inputs
|
||||
.iter()
|
||||
.map(|file| {
|
||||
let file = std::path::absolute(&file).unwrap_or(file.to_path_buf());
|
||||
std::fs::canonicalize(&file).unwrap_or(file)
|
||||
})
|
||||
.collect();
|
||||
let manifests = rust_analyzer::find_project_manifests(&files)?;
|
||||
let mut map: HashMap<&Path, (&ProjectManifest, Vec<&Path>)> = manifests
|
||||
.iter()
|
||||
.map(|x| (x.manifest_path().parent().as_ref(), (x, Vec::new())))
|
||||
.collect();
|
||||
let mut other_files = Vec::new();
|
||||
|
||||
'outer: for file in &files {
|
||||
let mut p = file.as_path();
|
||||
while let Some(parent) = p.parent() {
|
||||
p = parent;
|
||||
if let Some((_, files)) = map.get_mut(parent) {
|
||||
files.push(file);
|
||||
continue 'outer;
|
||||
}
|
||||
}
|
||||
other_files.push(file);
|
||||
}
|
||||
for (manifest, files) in map.values() {
|
||||
if files.is_empty() {
|
||||
break;
|
||||
}
|
||||
let mut rust_analyzer = RustAnalyzer::new(manifest, &cfg.scratch_dir);
|
||||
for file in files {
|
||||
extract(&mut rust_analyzer, &archiver, &traps, file);
|
||||
}
|
||||
}
|
||||
let mut rust_analyzer = RustAnalyzer::WithoutDatabase();
|
||||
for file in other_files {
|
||||
extract(&mut rust_analyzer, &archiver, &traps, file);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
use crate::config::Config;
|
||||
use anyhow::Context;
|
||||
use itertools::Itertools;
|
||||
use log::info;
|
||||
use ra_ap_base_db::SourceDatabase;
|
||||
@@ -9,6 +7,7 @@ use ra_ap_ide_db::RootDatabase;
|
||||
use ra_ap_load_cargo::{load_workspace_at, LoadCargoConfig, ProcMacroServerChoice};
|
||||
use ra_ap_paths::Utf8PathBuf;
|
||||
use ra_ap_project_model::CargoConfig;
|
||||
use ra_ap_project_model::ProjectManifest;
|
||||
use ra_ap_project_model::RustLibSource;
|
||||
use ra_ap_span::Edition;
|
||||
use ra_ap_span::EditionedFileId;
|
||||
@@ -20,19 +19,18 @@ use ra_ap_vfs::AbsPathBuf;
|
||||
use ra_ap_vfs::Vfs;
|
||||
use ra_ap_vfs::VfsPath;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use triomphe::Arc;
|
||||
pub struct RustAnalyzer {
|
||||
workspace: HashMap<PathBuf, (Vfs, RootDatabase)>,
|
||||
pub enum RustAnalyzer {
|
||||
WithDatabase { db: RootDatabase, vfs: Vfs },
|
||||
WithoutDatabase(),
|
||||
}
|
||||
|
||||
impl RustAnalyzer {
|
||||
pub fn new(cfg: &Config) -> anyhow::Result<RustAnalyzer> {
|
||||
let mut workspace = HashMap::new();
|
||||
pub fn new(project: &ProjectManifest, scratch_dir: &Path) -> Self {
|
||||
let config = CargoConfig {
|
||||
sysroot: Some(RustLibSource::Discover),
|
||||
target_dir: ra_ap_paths::Utf8PathBuf::from_path_buf(cfg.scratch_dir.to_path_buf())
|
||||
target_dir: ra_ap_paths::Utf8PathBuf::from_path_buf(scratch_dir.to_path_buf())
|
||||
.map(|x| x.join("target"))
|
||||
.ok(),
|
||||
..Default::default()
|
||||
@@ -43,25 +41,19 @@ impl RustAnalyzer {
|
||||
with_proc_macro_server: ProcMacroServerChoice::Sysroot,
|
||||
prefill_caches: false,
|
||||
};
|
||||
let projects = find_project_manifests(&cfg.inputs).context("loading inputs")?;
|
||||
for project in projects {
|
||||
let manifest = project.manifest_path();
|
||||
let manifest = project.manifest_path();
|
||||
|
||||
match load_workspace_at(manifest.as_ref(), &config, &load_config, &progress) {
|
||||
Ok((db, vfs, _macro_server)) => {
|
||||
let path: &Path = manifest.parent().as_ref();
|
||||
workspace.insert(path.to_path_buf(), (vfs, db));
|
||||
}
|
||||
Err(err) => {
|
||||
log::error!("failed to load workspace for {}: {}", manifest, err);
|
||||
}
|
||||
match load_workspace_at(manifest.as_ref(), &config, &load_config, &progress) {
|
||||
Ok((db, vfs, _macro_server)) => RustAnalyzer::WithDatabase { db, vfs },
|
||||
Err(err) => {
|
||||
log::error!("failed to load workspace for {}: {}", manifest, err);
|
||||
RustAnalyzer::WithoutDatabase()
|
||||
}
|
||||
}
|
||||
Ok(RustAnalyzer { workspace })
|
||||
}
|
||||
pub fn parse(
|
||||
&mut self,
|
||||
path: &PathBuf,
|
||||
path: &Path,
|
||||
) -> (
|
||||
SourceFile,
|
||||
Arc<str>,
|
||||
@@ -82,37 +74,30 @@ impl RustAnalyzer {
|
||||
};
|
||||
let (input, err) = from_utf8_lossy(&input);
|
||||
|
||||
let mut p = path.as_path();
|
||||
while let Some(parent) = p.parent() {
|
||||
p = parent;
|
||||
if self.workspace.contains_key(parent) {
|
||||
let (vfs, db) = self.workspace.get_mut(parent).unwrap();
|
||||
if let Some(file_id) = Utf8PathBuf::from_path_buf(path.to_path_buf())
|
||||
.ok()
|
||||
.and_then(|x| AbsPathBuf::try_from(x).ok())
|
||||
.map(VfsPath::from)
|
||||
.and_then(|x| vfs.file_id(&x))
|
||||
{
|
||||
db.set_file_text(file_id, &input);
|
||||
let semi = Semantics::new(db);
|
||||
if let RustAnalyzer::WithDatabase { vfs, db } = self {
|
||||
if let Some(file_id) = Utf8PathBuf::from_path_buf(path.to_path_buf())
|
||||
.ok()
|
||||
.and_then(|x| AbsPathBuf::try_from(x).ok())
|
||||
.map(VfsPath::from)
|
||||
.and_then(|x| vfs.file_id(&x))
|
||||
{
|
||||
db.set_file_text(file_id, &input);
|
||||
let semi = Semantics::new(db);
|
||||
|
||||
let file_id = EditionedFileId::current_edition(file_id);
|
||||
let source_file = semi.parse(file_id);
|
||||
errors.extend(
|
||||
db.parse_errors(file_id)
|
||||
.into_iter()
|
||||
.flat_map(|x| x.to_vec()),
|
||||
);
|
||||
return (
|
||||
source_file,
|
||||
input.as_ref().into(),
|
||||
errors,
|
||||
Some(file_id),
|
||||
Some(semi),
|
||||
);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
let file_id = EditionedFileId::current_edition(file_id);
|
||||
let source_file = semi.parse(file_id);
|
||||
errors.extend(
|
||||
db.parse_errors(file_id)
|
||||
.into_iter()
|
||||
.flat_map(|x| x.to_vec()),
|
||||
);
|
||||
return (
|
||||
source_file,
|
||||
input.as_ref().into(),
|
||||
errors,
|
||||
Some(file_id),
|
||||
Some(semi),
|
||||
);
|
||||
}
|
||||
}
|
||||
let parse = ra_ap_syntax::ast::SourceFile::parse(&input, Edition::CURRENT);
|
||||
@@ -122,7 +107,7 @@ impl RustAnalyzer {
|
||||
}
|
||||
}
|
||||
|
||||
fn find_project_manifests(
|
||||
pub fn find_project_manifests(
|
||||
files: &[PathBuf],
|
||||
) -> anyhow::Result<Vec<ra_ap_project_model::ProjectManifest>> {
|
||||
let current = std::env::current_dir()?;
|
||||
|
||||
Reference in New Issue
Block a user