mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Rust: integrate Rust Analyzer's Semantic module into extractor
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -390,11 +390,13 @@ dependencies = [
|
|||||||
"ra_ap_base_db",
|
"ra_ap_base_db",
|
||||||
"ra_ap_hir",
|
"ra_ap_hir",
|
||||||
"ra_ap_hir_def",
|
"ra_ap_hir_def",
|
||||||
|
"ra_ap_hir_expand",
|
||||||
"ra_ap_ide_db",
|
"ra_ap_ide_db",
|
||||||
"ra_ap_load-cargo",
|
"ra_ap_load-cargo",
|
||||||
"ra_ap_parser",
|
"ra_ap_parser",
|
||||||
"ra_ap_paths",
|
"ra_ap_paths",
|
||||||
"ra_ap_project_model",
|
"ra_ap_project_model",
|
||||||
|
"ra_ap_span",
|
||||||
"ra_ap_syntax",
|
"ra_ap_syntax",
|
||||||
"ra_ap_vfs",
|
"ra_ap_vfs",
|
||||||
"rust-extractor-macros",
|
"rust-extractor-macros",
|
||||||
|
|||||||
@@ -428,7 +428,7 @@ use ra_ap_syntax::ast::{{
|
|||||||
}};
|
}};
|
||||||
use ra_ap_syntax::{{ast, AstNode}};
|
use ra_ap_syntax::{{ast, AstNode}};
|
||||||
|
|
||||||
impl Translator {{
|
impl Translator<'_> {{
|
||||||
fn emit_else_branch(&mut self, node: ast::ElseBranch) -> Label<generated::Expr> {{
|
fn emit_else_branch(&mut self, node: ast::ElseBranch) -> Label<generated::Expr> {{
|
||||||
match node {{
|
match node {{
|
||||||
ast::ElseBranch::IfExpr(inner) => self.emit_if_expr(inner).into(),
|
ast::ElseBranch::IfExpr(inner) => self.emit_if_expr(inner).into(),
|
||||||
|
|||||||
@@ -13,12 +13,14 @@ ra_ap_base_db = "0.0.232"
|
|||||||
ra_ap_hir = "0.0.232"
|
ra_ap_hir = "0.0.232"
|
||||||
ra_ap_hir_def = "0.0.232"
|
ra_ap_hir_def = "0.0.232"
|
||||||
ra_ap_ide_db = "0.0.232"
|
ra_ap_ide_db = "0.0.232"
|
||||||
|
ra_ap_hir_expand = "0.0.232"
|
||||||
ra_ap_load-cargo = "0.0.232"
|
ra_ap_load-cargo = "0.0.232"
|
||||||
ra_ap_paths = "0.0.232"
|
ra_ap_paths = "0.0.232"
|
||||||
ra_ap_project_model = "0.0.232"
|
ra_ap_project_model = "0.0.232"
|
||||||
ra_ap_syntax = "0.0.232"
|
ra_ap_syntax = "0.0.232"
|
||||||
ra_ap_vfs = "0.0.232"
|
ra_ap_vfs = "0.0.232"
|
||||||
ra_ap_parser = "0.0.232"
|
ra_ap_parser = "0.0.232"
|
||||||
|
ra_ap_span = "0.0.232"
|
||||||
serde = "1.0.209"
|
serde = "1.0.209"
|
||||||
serde_with = "3.9.0"
|
serde_with = "3.9.0"
|
||||||
stderrlog = "0.6.0"
|
stderrlog = "0.6.0"
|
||||||
|
|||||||
@@ -1,76 +1,28 @@
|
|||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use ra_ap_ide_db::line_index::LineIndex;
|
use ra_ap_ide_db::line_index::LineIndex;
|
||||||
use ra_ap_parser::Edition;
|
|
||||||
use std::borrow::Cow;
|
|
||||||
mod archive;
|
mod archive;
|
||||||
mod config;
|
mod config;
|
||||||
pub mod generated;
|
pub mod generated;
|
||||||
|
mod rust_analyzer;
|
||||||
mod translate;
|
mod translate;
|
||||||
pub mod trap;
|
pub mod trap;
|
||||||
use ra_ap_syntax::ast::SourceFile;
|
|
||||||
use ra_ap_syntax::{AstNode, SyntaxError, TextRange, TextSize};
|
|
||||||
|
|
||||||
fn from_utf8_lossy(v: &[u8]) -> (Cow<'_, str>, Option<SyntaxError>) {
|
|
||||||
let mut iter = v.utf8_chunks();
|
|
||||||
let (first_valid, first_invalid) = if let Some(chunk) = iter.next() {
|
|
||||||
let valid = chunk.valid();
|
|
||||||
let invalid = chunk.invalid();
|
|
||||||
if invalid.is_empty() {
|
|
||||||
debug_assert_eq!(valid.len(), v.len());
|
|
||||||
return (Cow::Borrowed(valid), None);
|
|
||||||
}
|
|
||||||
(valid, invalid)
|
|
||||||
} else {
|
|
||||||
return (Cow::Borrowed(""), None);
|
|
||||||
};
|
|
||||||
|
|
||||||
const REPLACEMENT: &str = "\u{FFFD}";
|
|
||||||
let error_start = first_valid.len() as u32;
|
|
||||||
let error_end = error_start + first_invalid.len() as u32;
|
|
||||||
let error_range = TextRange::new(TextSize::new(error_start), TextSize::new(error_end));
|
|
||||||
let error = SyntaxError::new("invalid utf-8 sequence".to_owned(), error_range);
|
|
||||||
let mut res = String::with_capacity(v.len());
|
|
||||||
res.push_str(first_valid);
|
|
||||||
|
|
||||||
res.push_str(REPLACEMENT);
|
|
||||||
|
|
||||||
for chunk in iter {
|
|
||||||
res.push_str(chunk.valid());
|
|
||||||
if !chunk.invalid().is_empty() {
|
|
||||||
res.push_str(REPLACEMENT);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
(Cow::Owned(res), Some(error))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn extract(
|
fn extract(
|
||||||
archiver: &archive::Archiver,
|
rust_analyzer: &rust_analyzer::RustAnalyzer,
|
||||||
traps: &trap::TrapFileProvider,
|
traps: &trap::TrapFileProvider,
|
||||||
file: std::path::PathBuf,
|
file: std::path::PathBuf,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let file = std::path::absolute(&file).unwrap_or(file);
|
let (ast, input, parse_errors, semi) = rust_analyzer.parse(&file);
|
||||||
let file = std::fs::canonicalize(&file).unwrap_or(file);
|
let line_index = LineIndex::new(input.as_ref());
|
||||||
archiver.archive(&file);
|
|
||||||
let input = std::fs::read(&file)?;
|
|
||||||
let (input, err) = from_utf8_lossy(&input);
|
|
||||||
let line_index = LineIndex::new(&input);
|
|
||||||
let display_path = file.to_string_lossy();
|
let display_path = file.to_string_lossy();
|
||||||
let mut trap = traps.create("source", &file);
|
let mut trap = traps.create("source", &file);
|
||||||
let label = trap.emit_file(&file);
|
let label = trap.emit_file(&file);
|
||||||
let mut translator = translate::Translator::new(trap, label, line_index);
|
let mut translator = translate::Translator::new(trap, label, line_index, semi);
|
||||||
if let Some(err) = err {
|
|
||||||
|
for err in parse_errors {
|
||||||
translator.emit_parse_error(display_path.as_ref(), err);
|
translator.emit_parse_error(display_path.as_ref(), err);
|
||||||
}
|
}
|
||||||
let parse = ra_ap_syntax::ast::SourceFile::parse(&input, Edition::CURRENT);
|
translator.emit_source_file(ast);
|
||||||
for err in parse.errors() {
|
|
||||||
translator.emit_parse_error(display_path.as_ref(), err);
|
|
||||||
}
|
|
||||||
if let Some(ast) = SourceFile::cast(parse.syntax_node()) {
|
|
||||||
translator.emit_source_file(ast);
|
|
||||||
} else {
|
|
||||||
log::warn!("Skipped {}", display_path);
|
|
||||||
}
|
|
||||||
translator.trap.commit()?;
|
translator.trap.commit()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -81,12 +33,17 @@ fn main() -> anyhow::Result<()> {
|
|||||||
.verbosity(2 + cfg.verbose as usize)
|
.verbosity(2 + cfg.verbose as usize)
|
||||||
.init()?;
|
.init()?;
|
||||||
log::info!("{cfg:?}");
|
log::info!("{cfg:?}");
|
||||||
|
let rust_analyzer = rust_analyzer::RustAnalyzer::new(&cfg)?;
|
||||||
|
|
||||||
let traps = trap::TrapFileProvider::new(&cfg).context("failed to set up trap files")?;
|
let traps = trap::TrapFileProvider::new(&cfg).context("failed to set up trap files")?;
|
||||||
let archiver = archive::Archiver {
|
let archiver = archive::Archiver {
|
||||||
root: cfg.source_archive_dir,
|
root: cfg.source_archive_dir,
|
||||||
};
|
};
|
||||||
for file in cfg.inputs {
|
for file in cfg.inputs {
|
||||||
extract(&archiver, &traps, file)?;
|
let file = std::path::absolute(&file).unwrap_or(file);
|
||||||
|
let file = std::fs::canonicalize(&file).unwrap_or(file);
|
||||||
|
archiver.archive(&file);
|
||||||
|
extract(&rust_analyzer, &traps, file)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
144
rust/extractor/src/rust_analyzer.rs
Normal file
144
rust/extractor/src/rust_analyzer.rs
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
use crate::config::Config;
|
||||||
|
use anyhow::Context;
|
||||||
|
use itertools::Itertools;
|
||||||
|
use log::info;
|
||||||
|
use ra_ap_base_db::SourceDatabase;
|
||||||
|
use ra_ap_hir::Semantics;
|
||||||
|
use ra_ap_ide_db::RootDatabase;
|
||||||
|
use ra_ap_load_cargo::{load_workspace_at, LoadCargoConfig, ProcMacroServerChoice};
|
||||||
|
use ra_ap_paths::Utf8PathBuf;
|
||||||
|
use ra_ap_project_model::CargoConfig;
|
||||||
|
use ra_ap_project_model::RustLibSource;
|
||||||
|
use ra_ap_span::Edition;
|
||||||
|
use ra_ap_span::EditionedFileId;
|
||||||
|
use ra_ap_span::TextRange;
|
||||||
|
use ra_ap_span::TextSize;
|
||||||
|
use ra_ap_syntax::SourceFile;
|
||||||
|
use ra_ap_syntax::SyntaxError;
|
||||||
|
use ra_ap_vfs::AbsPathBuf;
|
||||||
|
use ra_ap_vfs::Vfs;
|
||||||
|
use ra_ap_vfs::VfsPath;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use triomphe::Arc;
|
||||||
|
pub struct RustAnalyzer {
|
||||||
|
workspace: HashMap<PathBuf, (Vfs, RootDatabase)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RustAnalyzer {
|
||||||
|
pub fn new(cfg: &Config) -> anyhow::Result<RustAnalyzer> {
|
||||||
|
let mut workspace = HashMap::new();
|
||||||
|
let config = CargoConfig {
|
||||||
|
sysroot: Some(RustLibSource::Discover),
|
||||||
|
target_dir: ra_ap_paths::Utf8PathBuf::from_path_buf(cfg.scratch_dir.to_path_buf())
|
||||||
|
.map(|x| x.join("target"))
|
||||||
|
.ok(),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let progress = |t| (log::info!("progress: {}", t));
|
||||||
|
let load_config = LoadCargoConfig {
|
||||||
|
load_out_dirs_from_check: true,
|
||||||
|
with_proc_macro_server: ProcMacroServerChoice::Sysroot,
|
||||||
|
prefill_caches: false,
|
||||||
|
};
|
||||||
|
let projects = find_project_manifests(&cfg.inputs).context("loading inputs")?;
|
||||||
|
for project in projects {
|
||||||
|
let manifest = project.manifest_path();
|
||||||
|
let (db, vfs, _macro_server) =
|
||||||
|
load_workspace_at(manifest.as_ref(), &config, &load_config, &progress)?;
|
||||||
|
let path: &Path = manifest.parent().as_ref();
|
||||||
|
workspace.insert(path.to_path_buf(), (vfs, db));
|
||||||
|
}
|
||||||
|
Ok(RustAnalyzer { workspace })
|
||||||
|
}
|
||||||
|
pub fn parse(
|
||||||
|
&self,
|
||||||
|
path: &PathBuf,
|
||||||
|
) -> (
|
||||||
|
SourceFile,
|
||||||
|
Arc<str>,
|
||||||
|
Vec<SyntaxError>,
|
||||||
|
Option<Semantics<'_, RootDatabase>>,
|
||||||
|
) {
|
||||||
|
let mut p = path.as_path();
|
||||||
|
while let Some(parent) = p.parent() {
|
||||||
|
p = parent;
|
||||||
|
if let Some((vfs, db)) = self.workspace.get(parent) {
|
||||||
|
if let Some(file_id) = Utf8PathBuf::from_path_buf(path.to_path_buf())
|
||||||
|
.ok()
|
||||||
|
.and_then(|x| AbsPathBuf::try_from(x).ok())
|
||||||
|
.map(VfsPath::from)
|
||||||
|
.and_then(|x| vfs.file_id(&x))
|
||||||
|
{
|
||||||
|
let semi = Semantics::new(db);
|
||||||
|
let file_id = EditionedFileId::current_edition(file_id);
|
||||||
|
|
||||||
|
return (
|
||||||
|
semi.parse(file_id),
|
||||||
|
db.file_text(file_id.into()),
|
||||||
|
db.parse_errors(file_id)
|
||||||
|
.map(|x| x.to_vec())
|
||||||
|
.unwrap_or_default(),
|
||||||
|
Some(semi),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let input = std::fs::read(&path).unwrap();
|
||||||
|
let (input, err) = from_utf8_lossy(&input);
|
||||||
|
let parse = ra_ap_syntax::ast::SourceFile::parse(&input, Edition::CURRENT);
|
||||||
|
let mut errors = parse.errors();
|
||||||
|
errors.extend(err.into_iter());
|
||||||
|
(parse.tree(), input.as_ref().into(), errors, None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_project_manifests(
|
||||||
|
files: &[PathBuf],
|
||||||
|
) -> anyhow::Result<Vec<ra_ap_project_model::ProjectManifest>> {
|
||||||
|
let current = std::env::current_dir()?;
|
||||||
|
let abs_files: Vec<_> = files
|
||||||
|
.iter()
|
||||||
|
.map(|path| AbsPathBuf::assert_utf8(current.join(path)))
|
||||||
|
.collect();
|
||||||
|
let ret = ra_ap_project_model::ProjectManifest::discover_all(&abs_files);
|
||||||
|
info!(
|
||||||
|
"found manifests: {}",
|
||||||
|
ret.iter().map(|m| format!("{m}")).join(", ")
|
||||||
|
);
|
||||||
|
Ok(ret)
|
||||||
|
}
|
||||||
|
fn from_utf8_lossy(v: &[u8]) -> (Cow<'_, str>, Option<SyntaxError>) {
|
||||||
|
let mut iter = v.utf8_chunks();
|
||||||
|
let (first_valid, first_invalid) = if let Some(chunk) = iter.next() {
|
||||||
|
let valid = chunk.valid();
|
||||||
|
let invalid = chunk.invalid();
|
||||||
|
if invalid.is_empty() {
|
||||||
|
debug_assert_eq!(valid.len(), v.len());
|
||||||
|
return (Cow::Borrowed(valid), None);
|
||||||
|
}
|
||||||
|
(valid, invalid)
|
||||||
|
} else {
|
||||||
|
return (Cow::Borrowed(""), None);
|
||||||
|
};
|
||||||
|
|
||||||
|
const REPLACEMENT: &str = "\u{FFFD}";
|
||||||
|
let error_start = first_valid.len() as u32;
|
||||||
|
let error_end = error_start + first_invalid.len() as u32;
|
||||||
|
let error_range = TextRange::new(TextSize::new(error_start), TextSize::new(error_end));
|
||||||
|
let error = SyntaxError::new("invalid utf-8 sequence".to_owned(), error_range);
|
||||||
|
let mut res = String::with_capacity(v.len());
|
||||||
|
res.push_str(first_valid);
|
||||||
|
|
||||||
|
res.push_str(REPLACEMENT);
|
||||||
|
|
||||||
|
for chunk in iter {
|
||||||
|
res.push_str(chunk.valid());
|
||||||
|
if !chunk.invalid().is_empty() {
|
||||||
|
res.push_str(REPLACEMENT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(Cow::Owned(res), Some(error))
|
||||||
|
}
|
||||||
@@ -2,7 +2,9 @@ use crate::generated::{self, AstNode};
|
|||||||
use crate::trap::{DiagnosticSeverity, TrapFile, TrapId};
|
use crate::trap::{DiagnosticSeverity, TrapFile, TrapId};
|
||||||
use crate::trap::{Label, TrapClass};
|
use crate::trap::{Label, TrapClass};
|
||||||
use codeql_extractor::trap::{self};
|
use codeql_extractor::trap::{self};
|
||||||
|
use ra_ap_hir::Semantics;
|
||||||
use ra_ap_ide_db::line_index::{LineCol, LineIndex};
|
use ra_ap_ide_db::line_index::{LineCol, LineIndex};
|
||||||
|
use ra_ap_ide_db::RootDatabase;
|
||||||
use ra_ap_parser::SyntaxKind;
|
use ra_ap_parser::SyntaxKind;
|
||||||
use ra_ap_syntax::ast::RangeItem;
|
use ra_ap_syntax::ast::RangeItem;
|
||||||
use ra_ap_syntax::{ast, NodeOrToken, SyntaxElementChildren, SyntaxError, SyntaxToken, TextRange};
|
use ra_ap_syntax::{ast, NodeOrToken, SyntaxElementChildren, SyntaxError, SyntaxToken, TextRange};
|
||||||
@@ -56,18 +58,25 @@ impl TextValue for ast::RangePat {
|
|||||||
self.op_token().map(|x| x.text().to_string())
|
self.op_token().map(|x| x.text().to_string())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub struct Translator {
|
pub struct Translator<'a> {
|
||||||
pub trap: TrapFile,
|
pub trap: TrapFile,
|
||||||
label: trap::Label,
|
label: trap::Label,
|
||||||
line_index: LineIndex,
|
line_index: LineIndex,
|
||||||
|
semi: Option<Semantics<'a, RootDatabase>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Translator {
|
impl Translator<'_> {
|
||||||
pub fn new(trap: TrapFile, label: trap::Label, line_index: LineIndex) -> Translator {
|
pub fn new(
|
||||||
|
trap: TrapFile,
|
||||||
|
label: trap::Label,
|
||||||
|
line_index: LineIndex,
|
||||||
|
semi: Option<Semantics<'_, RootDatabase>>,
|
||||||
|
) -> Translator {
|
||||||
Translator {
|
Translator {
|
||||||
trap,
|
trap,
|
||||||
label,
|
label,
|
||||||
line_index,
|
line_index,
|
||||||
|
semi,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn location(&self, range: TextRange) -> (LineCol, LineCol) {
|
pub fn location(&self, range: TextRange) -> (LineCol, LineCol) {
|
||||||
|
|||||||
2
rust/extractor/src/translate/generated.rs
generated
2
rust/extractor/src/translate/generated.rs
generated
@@ -11,7 +11,7 @@ use ra_ap_syntax::ast::{
|
|||||||
};
|
};
|
||||||
use ra_ap_syntax::{ast, AstNode};
|
use ra_ap_syntax::{ast, AstNode};
|
||||||
|
|
||||||
impl Translator {
|
impl Translator<'_> {
|
||||||
fn emit_else_branch(&mut self, node: ast::ElseBranch) -> Label<generated::Expr> {
|
fn emit_else_branch(&mut self, node: ast::ElseBranch) -> Label<generated::Expr> {
|
||||||
match node {
|
match node {
|
||||||
ast::ElseBranch::IfExpr(inner) => self.emit_if_expr(inner).into(),
|
ast::ElseBranch::IfExpr(inner) => self.emit_if_expr(inner).into(),
|
||||||
|
|||||||
Reference in New Issue
Block a user