Merge branch 'main' into redsun82/rust-less-canonical-paths

This commit is contained in:
Paolo Tranquilli
2024-12-04 14:35:10 +01:00
677 changed files with 1661 additions and 18192 deletions

View File

@@ -33,4 +33,6 @@ codeql-extractor = { path = "../../shared/tree-sitter-extractor" }
rust-extractor-macros = { path = "macros" }
itertools = "0.13.0"
glob = "0.3.1"
chrono = { version = "0.4.38", features = ["serde"] }
serde_json = "1.0.133"
dunce = "1.0.5"

View File

@@ -45,6 +45,7 @@ pub struct Config {
pub scratch_dir: PathBuf,
pub trap_dir: PathBuf,
pub source_archive_dir: PathBuf,
pub diagnostic_dir: PathBuf,
pub cargo_target_dir: Option<PathBuf>,
pub cargo_target: Option<String>,
pub cargo_features: Vec<String>,

View File

@@ -0,0 +1,255 @@
use crate::config::Config;
use anyhow::Context;
use chrono::{DateTime, Utc};
use log::{debug, info};
use ra_ap_project_model::ProjectManifest;
use serde::ser::SerializeMap;
use serde::Serialize;
use std::collections::HashMap;
use std::fmt::Display;
use std::fs::File;
use std::path::{Path, PathBuf};
use std::time::Instant;
#[derive(Default, Debug, Clone, Copy, Serialize)]
#[serde(rename_all = "camelCase")]
#[allow(dead_code)]
enum Severity {
#[default]
Note,
Warning,
Error,
}
#[derive(Default, Debug, Clone, Copy, Serialize)]
#[serde(rename_all = "camelCase")]
struct Visibility {
status_page: bool,
cli_summary_table: bool,
telemetry: bool,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
#[allow(dead_code)]
enum Message {
TextMessage(String),
MarkdownMessage(String),
}
impl Default for Message {
fn default() -> Self {
Message::TextMessage("".to_string())
}
}
#[derive(Default, Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
struct Source {
id: String,
name: String,
extractor_name: String,
}
#[derive(Default, Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
struct Location {
file: PathBuf,
start_line: u32,
start_column: u32,
end_line: u32,
end_column: u32,
}
#[derive(Default, Debug, Clone, Serialize)]
pub struct Diagnostics<T> {
source: Source,
visibility: Visibility,
severity: Severity,
#[serde(flatten)]
message: Message,
timestamp: DateTime<Utc>,
#[serde(skip_serializing_if = "Option::is_none")]
location: Option<Location>,
attributes: T,
}
#[derive(Default, Debug, Clone, Copy, Serialize, PartialEq, Eq, Hash)]
#[serde(rename_all = "camelCase")]
pub enum ExtractionStepKind {
#[default]
LoadManifest,
LoadSource,
Parse,
Extract,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct ExtractionStep {
pub action: ExtractionStepKind,
pub file: PathBuf,
pub ms: u128,
}
impl ExtractionStep {
fn new(start: Instant, action: ExtractionStepKind, file: PathBuf) -> Self {
let ret = ExtractionStep {
action,
file,
ms: start.elapsed().as_millis(),
};
debug!("{ret:?}");
ret
}
pub fn load_manifest(start: Instant, target: &ProjectManifest) -> Self {
Self::new(
start,
ExtractionStepKind::LoadManifest,
PathBuf::from(target.manifest_path()),
)
}
pub fn parse(start: Instant, target: &Path) -> Self {
Self::new(start, ExtractionStepKind::Parse, PathBuf::from(target))
}
pub fn extract(start: Instant, target: &Path) -> Self {
Self::new(start, ExtractionStepKind::Extract, PathBuf::from(target))
}
pub fn load_source(start: Instant, target: &Path) -> Self {
Self::new(start, ExtractionStepKind::LoadSource, PathBuf::from(target))
}
}
#[derive(Debug, Default, Clone)]
struct HumanReadableDuration(u128);
impl Serialize for HumanReadableDuration {
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
let mut map = serializer.serialize_map(Some(2))?;
map.serialize_entry("ms", &self.0)?;
map.serialize_entry("pretty", &self.pretty())?;
map.end()
}
}
impl HumanReadableDuration {
pub fn add(&mut self, other: u128) {
self.0 += other;
}
pub fn pretty(&self) -> String {
let milliseconds = self.0 % 1000;
let mut seconds = self.0 / 1000;
if seconds < 60 {
return format!("{seconds}.{milliseconds:03}s");
}
let mut minutes = seconds / 60;
seconds %= 60;
if minutes < 60 {
return format!("{minutes}min{seconds:02}.{milliseconds:03}s");
}
let hours = minutes / 60;
minutes %= 60;
format!("{hours}h{minutes:02}min{seconds:02}.{milliseconds:03}s")
}
}
impl From<u128> for HumanReadableDuration {
fn from(val: u128) -> Self {
HumanReadableDuration(val)
}
}
impl Display for HumanReadableDuration {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.write_str(&self.pretty())
}
}
#[derive(Debug, Default, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
struct DurationsSummary {
#[serde(flatten)]
durations: HashMap<ExtractionStepKind, HumanReadableDuration>,
total: HumanReadableDuration,
}
#[derive(Debug, Default, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
struct ExtractionSummary {
number_of_manifests: usize,
number_of_files: usize,
durations: DurationsSummary,
}
type ExtractionDiagnostics = Diagnostics<ExtractionSummary>;
fn summary(start: Instant, steps: &[ExtractionStep]) -> ExtractionSummary {
let mut number_of_manifests = 0;
let mut number_of_files = 0;
let mut durations = HashMap::new();
for step in steps {
match &step.action {
ExtractionStepKind::LoadManifest => {
number_of_manifests += 1;
}
ExtractionStepKind::Parse => {
number_of_files += 1;
}
_ => {}
}
durations
.entry(step.action)
.or_insert(HumanReadableDuration(0))
.add(step.ms);
}
let total = start.elapsed().as_millis().into();
for (key, value) in &durations {
info!("total duration ({key:?}): {value}");
}
info!("total duration: {total}");
ExtractionSummary {
number_of_manifests,
number_of_files,
durations: DurationsSummary { durations, total },
}
}
pub fn emit_extraction_diagnostics(
start: Instant,
config: &Config,
steps: &[ExtractionStep],
) -> anyhow::Result<()> {
let summary = summary(start, steps);
let diagnostics = ExtractionDiagnostics {
source: Source {
id: "rust/extractor/telemetry".to_owned(),
name: "telemetry".to_string(),
extractor_name: "rust".to_string(),
},
visibility: Visibility {
telemetry: true,
..Default::default()
},
timestamp: Utc::now(),
attributes: summary,
..Default::default()
};
std::fs::create_dir_all(&config.diagnostic_dir).with_context(|| {
format!(
"creating diagnostics directory {}",
config.diagnostic_dir.display()
)
})?;
let target = config.diagnostic_dir.join("extraction.jsonc");
let mut output = File::create(&target)
.with_context(|| format!("creating diagnostics file {}", target.display()))?;
serde_json::to_writer_pretty(&mut output, &diagnostics)
.with_context(|| format!("writing to diagnostics file {}", target.display()))?;
Ok(())
}

View File

@@ -1,2 +1,2 @@
mod.rs 4bcb9def847469aae9d8649461546b7c21ec97cf6e63d3cf394e339915ce65d7 4bcb9def847469aae9d8649461546b7c21ec97cf6e63d3cf394e339915ce65d7
top.rs e0e5e208e1fa42245e8d76bb420a89cb7f357fcc6228683db25aeb9ac057a5b9 e0e5e208e1fa42245e8d76bb420a89cb7f357fcc6228683db25aeb9ac057a5b9
top.rs d09cf25daa06fc9bc802e438231e0f038443d2ede3972a0dd829f322b390c4e4 d09cf25daa06fc9bc802e438231e0f038443d2ede3972a0dd829f322b390c4e4

View File

@@ -4,6 +4,15 @@
use crate::trap;
#[derive(Debug)]
pub struct File {
_unused: ()
}
impl trap::TrapClass for File {
fn class_name() -> &'static str { "File" }
}
#[derive(Debug)]
pub struct Element {
_unused: ()
@@ -13,6 +22,37 @@ impl trap::TrapClass for Element {
fn class_name() -> &'static str { "Element" }
}
#[derive(Debug)]
pub struct ExtractorStep {
pub id: trap::TrapId<ExtractorStep>,
pub action: String,
pub file: trap::Label<File>,
pub duration_ms: usize,
}
impl trap::TrapEntry for ExtractorStep {
fn extract_id(&mut self) -> trap::TrapId<Self> {
std::mem::replace(&mut self.id, trap::TrapId::Star)
}
fn emit(self, id: trap::Label<Self>, out: &mut trap::Writer) {
out.add_tuple("extractor_steps", vec![id.into(), self.action.into(), self.file.into(), self.duration_ms.into()]);
}
}
impl trap::TrapClass for ExtractorStep {
fn class_name() -> &'static str { "ExtractorStep" }
}
impl From<trap::Label<ExtractorStep>> for trap::Label<Element> {
fn from(value: trap::Label<ExtractorStep>) -> Self {
// SAFETY: this is safe because in the dbscheme ExtractorStep is a subclass of Element
unsafe {
Self::from_untyped(value.as_untyped())
}
}
}
#[derive(Debug)]
pub struct Locatable {
_unused: ()

View File

@@ -1,13 +1,16 @@
use crate::diagnostics::{emit_extraction_diagnostics, ExtractionStep};
use crate::rust_analyzer::path_to_file_id;
use crate::trap::TrapId;
use anyhow::Context;
use archive::Archiver;
use log::info;
use log::{info, warn};
use ra_ap_hir::Semantics;
use ra_ap_ide_db::line_index::{LineCol, LineIndex};
use ra_ap_ide_db::RootDatabase;
use ra_ap_project_model::ProjectManifest;
use ra_ap_project_model::{CargoConfig, ProjectManifest};
use ra_ap_vfs::Vfs;
use rust_analyzer::{ParseResult, RustAnalyzer};
use std::time::Instant;
use std::{
collections::HashMap,
path::{Path, PathBuf},
@@ -15,6 +18,7 @@ use std::{
mod archive;
mod config;
mod diagnostics;
pub mod generated;
mod qltest;
mod rust_analyzer;
@@ -24,18 +28,31 @@ pub mod trap;
struct Extractor<'a> {
archiver: &'a Archiver,
traps: &'a trap::TrapFileProvider,
steps: Vec<ExtractionStep>,
}
impl Extractor<'_> {
fn extract(&self, rust_analyzer: &rust_analyzer::RustAnalyzer, file: &std::path::Path) {
impl<'a> Extractor<'a> {
pub fn new(archiver: &'a Archiver, traps: &'a trap::TrapFileProvider) -> Self {
Self {
archiver,
traps,
steps: Vec::new(),
}
}
fn extract(&mut self, rust_analyzer: &rust_analyzer::RustAnalyzer, file: &std::path::Path) {
self.archiver.archive(file);
let before_parse = Instant::now();
let ParseResult {
ast,
text,
errors,
semantics_info,
} = rust_analyzer.parse(file);
self.steps.push(ExtractionStep::parse(before_parse, file));
let before_extract = Instant::now();
let line_index = LineIndex::new(text.as_ref());
let display_path = file.to_string_lossy();
let mut trap = self.traps.create("source", file);
@@ -73,22 +90,79 @@ impl Extractor<'_> {
err.to_string()
)
});
self.steps
.push(ExtractionStep::extract(before_extract, file));
}
pub fn extract_with_semantics(
&self,
&mut self,
file: &Path,
semantics: &Semantics<'_, RootDatabase>,
vfs: &Vfs,
) {
self.extract(&RustAnalyzer::new(vfs, semantics), file);
}
pub fn extract_without_semantics(&self, file: &Path, reason: &str) {
pub fn extract_without_semantics(&mut self, file: &Path, reason: &str) {
self.extract(&RustAnalyzer::WithoutSemantics { reason }, file);
}
pub fn load_manifest(
&mut self,
project: &ProjectManifest,
config: &CargoConfig,
) -> Option<(RootDatabase, Vfs)> {
let before = Instant::now();
let ret = RustAnalyzer::load_workspace(project, config);
self.steps
.push(ExtractionStep::load_manifest(before, project));
ret
}
pub fn load_source(
&mut self,
file: &Path,
semantics: &Semantics<'_, RootDatabase>,
vfs: &Vfs,
) -> Result<(), String> {
let before = Instant::now();
let Some(id) = path_to_file_id(file, vfs) else {
return Err("not included in files loaded from manifest".to_string());
};
if semantics.file_to_module_def(id).is_none() {
return Err("not included as a module".to_string());
}
self.steps.push(ExtractionStep::load_source(before, file));
Ok(())
}
pub fn emit_extraction_diagnostics(
self,
start: Instant,
cfg: &config::Config,
) -> anyhow::Result<()> {
emit_extraction_diagnostics(start, cfg, &self.steps)?;
let mut trap = self.traps.create("diagnostics", "extraction");
for step in self.steps {
let file = trap.emit_file(&step.file);
let duration_ms = usize::try_from(step.ms).unwrap_or_else(|_e| {
warn!("extraction step duration overflowed ({step:?})");
i32::MAX as usize
});
trap.emit(generated::ExtractorStep {
id: TrapId::Star,
action: format!("{:?}", step.action),
file,
duration_ms,
});
}
trap.commit()?;
Ok(())
}
}
fn main() -> anyhow::Result<()> {
let start = Instant::now();
let mut cfg = config::Config::extract().context("failed to load configuration")?;
stderrlog::new()
.module(module_path!())
@@ -103,10 +177,7 @@ fn main() -> anyhow::Result<()> {
let archiver = archive::Archiver {
root: cfg.source_archive_dir.clone(),
};
let extractor = Extractor {
archiver: &archiver,
traps: &traps,
};
let mut extractor = Extractor::new(&archiver, &traps);
let files: Vec<PathBuf> = cfg
.inputs
.iter()
@@ -132,21 +203,13 @@ fn main() -> anyhow::Result<()> {
}
let cargo_config = cfg.to_cargo_config();
for (manifest, files) in map.values().filter(|(_, files)| !files.is_empty()) {
if let Some((ref db, ref vfs)) = RustAnalyzer::load_workspace(manifest, &cargo_config) {
if let Some((ref db, ref vfs)) = extractor.load_manifest(manifest, &cargo_config) {
let semantics = Semantics::new(db);
for file in files {
let Some(id) = path_to_file_id(file, vfs) else {
extractor.extract_without_semantics(
file,
"not included in files loaded from manifest",
);
continue;
match extractor.load_source(file, &semantics, vfs) {
Ok(()) => extractor.extract_with_semantics(file, &semantics, vfs),
Err(reason) => extractor.extract_without_semantics(file, &reason),
};
if semantics.file_to_module_def(id).is_none() {
extractor.extract_without_semantics(file, "not included as a module");
continue;
}
extractor.extract_with_semantics(file, &semantics, vfs);
}
} else {
for file in files {
@@ -155,5 +218,5 @@ fn main() -> anyhow::Result<()> {
}
}
Ok(())
extractor.emit_extraction_diagnostics(start, &cfg)
}

View File

@@ -4,7 +4,6 @@ use crate::generated::{self};
use crate::rust_analyzer::FileSemanticInformation;
use crate::trap::{DiagnosticSeverity, TrapFile, TrapId};
use crate::trap::{Label, TrapClass};
use codeql_extractor::trap::{self};
use itertools::Either;
use log::Level;
use ra_ap_base_db::salsa::InternKey;
@@ -77,7 +76,7 @@ macro_rules! emit_detached {
pub struct Translator<'a> {
pub trap: TrapFile,
path: &'a str,
label: trap::Label,
label: Label<generated::File>,
line_index: LineIndex,
file_id: Option<EditionedFileId>,
pub semantics: Option<&'a Semantics<'a, RootDatabase>>,
@@ -87,7 +86,7 @@ impl<'a> Translator<'a> {
pub fn new(
trap: TrapFile,
path: &'a str,
label: trap::Label,
label: Label<generated::File>,
line_index: LineIndex,
semantic_info: Option<&FileSemanticInformation<'a>>,
) -> Translator<'a> {

View File

@@ -1,5 +1,5 @@
use crate::config;
use crate::config::Compression;
use crate::{config, generated};
use codeql_extractor::{extractor, file_paths, trap};
use log::debug;
use ra_ap_ide_db::line_index::LineCol;
@@ -138,7 +138,7 @@ pub enum DiagnosticSeverity {
impl TrapFile {
pub fn emit_location_label(
&mut self,
file_label: UntypedLabel,
file_label: Label<generated::File>,
start: LineCol,
end: LineCol,
) -> UntypedLabel {
@@ -149,7 +149,7 @@ impl TrapFile {
extractor::location_label(
&mut self.writer,
trap::Location {
file_label,
file_label: file_label.as_untyped(),
start_line,
start_column,
end_line,
@@ -159,7 +159,7 @@ impl TrapFile {
}
pub fn emit_location<E: TrapClass>(
&mut self,
file_label: UntypedLabel,
file_label: Label<generated::File>,
entity_label: Label<E>,
start: LineCol,
end: LineCol,
@@ -192,8 +192,10 @@ impl TrapFile {
],
);
}
pub fn emit_file(&mut self, absolute_path: &Path) -> trap::Label {
extractor::populate_file(&mut self.writer, absolute_path)
pub fn emit_file(&mut self, absolute_path: &Path) -> Label<generated::File> {
let untyped = extractor::populate_file(&mut self.writer, absolute_path);
// SAFETY: populate_file emits `@file` typed labels
unsafe { Label::from_untyped(untyped) }
}
pub fn label<T: TrapEntry>(&mut self, id: TrapId<T>) -> Label<T> {
@@ -243,8 +245,8 @@ impl TrapFileProvider {
})
}
pub fn create(&self, category: &str, key: &Path) -> TrapFile {
let path = file_paths::path_for(&self.trap_dir.join(category), key, "trap");
pub fn create(&self, category: &str, key: impl AsRef<Path>) -> TrapFile {
let path = file_paths::path_for(&self.trap_dir.join(category), key.as_ref(), "trap");
debug!("creating trap file {}", path.display());
let mut writer = trap::Writer::new();
extractor::populate_empty_location(&mut writer);