mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
Merge pull request #17552 from github/aibaars/diagnostics
Rust: extract parse errors as diagnostics
This commit is contained in:
@@ -355,5 +355,9 @@
|
||||
"Python model summaries test extension": [
|
||||
"python/ql/test/library-tests/dataflow/model-summaries/InlineTaintTest.ext.yml",
|
||||
"python/ql/test/library-tests/dataflow/model-summaries/NormalDataflowTest.ext.yml"
|
||||
],
|
||||
"Diagnostics.qll": [
|
||||
"ruby/ql/lib/codeql/ruby/Diagnostics.qll",
|
||||
"rust/ql/lib/codeql/rust/Diagnostics.qll"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
/** Provides classes relating to extraction diagnostics. */
|
||||
|
||||
private import codeql.Locations
|
||||
|
||||
/** A diagnostic emitted during extraction, such as a parse error */
|
||||
|
||||
@@ -1,13 +1,48 @@
|
||||
use anyhow::Context;
|
||||
use ra_ap_ide_db::line_index::LineIndex;
|
||||
use ra_ap_parser::Edition;
|
||||
use std::borrow::Cow;
|
||||
mod archive;
|
||||
mod config;
|
||||
pub mod generated;
|
||||
mod translate;
|
||||
pub mod trap;
|
||||
use ra_ap_syntax::ast::SourceFile;
|
||||
use ra_ap_syntax::AstNode;
|
||||
use ra_ap_syntax::{AstNode, SyntaxError, TextRange, TextSize};
|
||||
|
||||
fn from_utf8_lossy(v: &[u8]) -> (Cow<'_, str>, Option<SyntaxError>) {
|
||||
let mut iter = v.utf8_chunks();
|
||||
let (first_valid, first_invalid) = if let Some(chunk) = iter.next() {
|
||||
let valid = chunk.valid();
|
||||
let invalid = chunk.invalid();
|
||||
if invalid.is_empty() {
|
||||
debug_assert_eq!(valid.len(), v.len());
|
||||
return (Cow::Borrowed(valid), None);
|
||||
}
|
||||
(valid, invalid)
|
||||
} else {
|
||||
return (Cow::Borrowed(""), None);
|
||||
};
|
||||
|
||||
const REPLACEMENT: &str = "\u{FFFD}";
|
||||
let error_start = first_valid.len() as u32;
|
||||
let error_end = error_start + first_invalid.len() as u32;
|
||||
let error_range = TextRange::new(TextSize::new(error_start), TextSize::new(error_end));
|
||||
let error = SyntaxError::new("invalid utf-8 sequence".to_owned(), error_range);
|
||||
let mut res = String::with_capacity(v.len());
|
||||
res.push_str(first_valid);
|
||||
|
||||
res.push_str(REPLACEMENT);
|
||||
|
||||
for chunk in iter {
|
||||
res.push_str(chunk.valid());
|
||||
if !chunk.invalid().is_empty() {
|
||||
res.push_str(REPLACEMENT);
|
||||
}
|
||||
}
|
||||
|
||||
(Cow::Owned(res), Some(error))
|
||||
}
|
||||
|
||||
fn extract(
|
||||
archiver: &archive::Archiver,
|
||||
@@ -18,24 +53,25 @@ fn extract(
|
||||
let file = std::fs::canonicalize(&file).unwrap_or(file);
|
||||
archiver.archive(&file);
|
||||
let input = std::fs::read(&file)?;
|
||||
let input = String::from_utf8(input)?;
|
||||
let (input, err) = from_utf8_lossy(&input);
|
||||
let line_index = LineIndex::new(&input);
|
||||
let display_path = file.to_string_lossy();
|
||||
let mut trap = traps.create("source", &file);
|
||||
let label = trap.emit_file(&file);
|
||||
let mut translator = translate::Translator::new(trap, label, line_index);
|
||||
|
||||
if let Some(err) = err {
|
||||
translator.emit_parse_error(display_path.as_ref(), err);
|
||||
}
|
||||
let parse = ra_ap_syntax::ast::SourceFile::parse(&input, Edition::CURRENT);
|
||||
for err in parse.errors() {
|
||||
let (start, _) = translator.location(err.range());
|
||||
log::warn!("{}:{}:{}: {}", display_path, start.line, start.col, err);
|
||||
translator.emit_parse_error(display_path.as_ref(), err);
|
||||
}
|
||||
if let Some(ast) = SourceFile::cast(parse.syntax_node()) {
|
||||
translator.emit_source_file(ast);
|
||||
translator.trap.commit()?
|
||||
} else {
|
||||
log::warn!("Skipped {}", display_path);
|
||||
}
|
||||
translator.trap.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
fn main() -> anyhow::Result<()> {
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
use crate::trap::TrapFile;
|
||||
use crate::trap::{DiagnosticSeverity, TrapFile};
|
||||
use crate::trap::{Label, TrapClass};
|
||||
use codeql_extractor::trap::{self};
|
||||
use ra_ap_ide_db::line_index::{LineCol, LineIndex};
|
||||
use ra_ap_syntax::ast::RangeItem;
|
||||
use ra_ap_syntax::TextSize;
|
||||
use ra_ap_syntax::{ast, TextRange};
|
||||
use ra_ap_syntax::{ast, SyntaxError, TextRange};
|
||||
pub trait TextValue {
|
||||
fn try_get_text(&self) -> Option<String>;
|
||||
}
|
||||
@@ -71,16 +70,38 @@ impl Translator {
|
||||
}
|
||||
pub fn location(&self, range: TextRange) -> (LineCol, LineCol) {
|
||||
let start = self.line_index.line_col(range.start());
|
||||
let end = self.line_index.line_col(
|
||||
range
|
||||
.end()
|
||||
.checked_sub(TextSize::new(1))
|
||||
.unwrap_or(range.end()),
|
||||
);
|
||||
let range_end = range.end();
|
||||
// QL end positions are inclusive, while TextRange offsets are exclusive and point at the position
|
||||
// right after the last character of the range. We need to shift the end offset one character to the left to
|
||||
// get the right inclusive QL position. Unfortunately, simply subtracting `1` from the end-offset may cause
|
||||
// the offset to point in the middle of a mult-byte character, resulting in a `panic`. Therefore we use `try_line_col`
|
||||
// with decreasing offsets to find the start of the last character included in the range.
|
||||
for i in 1..4 {
|
||||
if let Some(end) = range_end
|
||||
.checked_sub(i.into())
|
||||
.and_then(|x| self.line_index.try_line_col(x))
|
||||
{
|
||||
return (start, end);
|
||||
}
|
||||
}
|
||||
let end = self.line_index.line_col(range_end);
|
||||
(start, end)
|
||||
}
|
||||
pub fn emit_location<T: TrapClass>(&mut self, label: Label<T>, node: impl ast::AstNode) {
|
||||
let (start, end) = self.location(node.syntax().text_range());
|
||||
self.trap.emit_location(self.label, label, start, end)
|
||||
}
|
||||
pub fn emit_parse_error(&mut self, path: &str, err: SyntaxError) {
|
||||
let (start, end) = self.location(err.range());
|
||||
log::warn!("{}:{}:{}: {}", path, start.line + 1, start.col + 1, err);
|
||||
let message = err.to_string();
|
||||
let location = self.trap.emit_location_label(self.label, start, end);
|
||||
self.trap.emit_diagnostic(
|
||||
DiagnosticSeverity::Warning,
|
||||
"parse_error".to_owned(),
|
||||
message.clone(),
|
||||
message,
|
||||
location,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -128,19 +128,25 @@ pub struct TrapFile {
|
||||
compression: Compression,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub enum DiagnosticSeverity {
|
||||
Debug = 10,
|
||||
Info = 20,
|
||||
Warning = 30,
|
||||
Error = 40,
|
||||
}
|
||||
impl TrapFile {
|
||||
pub fn emit_location<E: TrapClass>(
|
||||
pub fn emit_location_label(
|
||||
&mut self,
|
||||
file_label: UntypedLabel,
|
||||
entity_label: Label<E>,
|
||||
start: LineCol,
|
||||
end: LineCol,
|
||||
) {
|
||||
) -> UntypedLabel {
|
||||
let start_line = 1 + start.line as usize;
|
||||
let start_column = 1 + start.col as usize;
|
||||
let end_line = 1 + end.line as usize;
|
||||
let end_column = 1 + end.col as usize;
|
||||
let location_label = extractor::location_label(
|
||||
extractor::location_label(
|
||||
&mut self.writer,
|
||||
trap::Location {
|
||||
file_label,
|
||||
@@ -149,13 +155,43 @@ impl TrapFile {
|
||||
end_line,
|
||||
end_column,
|
||||
},
|
||||
);
|
||||
)
|
||||
}
|
||||
pub fn emit_location<E: TrapClass>(
|
||||
&mut self,
|
||||
file_label: UntypedLabel,
|
||||
entity_label: Label<E>,
|
||||
start: LineCol,
|
||||
end: LineCol,
|
||||
) {
|
||||
let location_label = self.emit_location_label(file_label, start, end);
|
||||
self.writer.add_tuple(
|
||||
"locatable_locations",
|
||||
vec![entity_label.into(), location_label.into()],
|
||||
);
|
||||
}
|
||||
|
||||
pub fn emit_diagnostic(
|
||||
&mut self,
|
||||
severity: DiagnosticSeverity,
|
||||
error_tag: String,
|
||||
error_message: String,
|
||||
full_error_message: String,
|
||||
location: UntypedLabel,
|
||||
) {
|
||||
let label = self.writer.fresh_id();
|
||||
self.writer.add_tuple(
|
||||
"diagnostics",
|
||||
vec![
|
||||
trap::Arg::Label(label),
|
||||
trap::Arg::Int(severity as usize),
|
||||
trap::Arg::String(error_tag),
|
||||
trap::Arg::String(error_message),
|
||||
trap::Arg::String(full_error_message),
|
||||
trap::Arg::Label(location),
|
||||
],
|
||||
);
|
||||
}
|
||||
pub fn emit_file(&mut self, absolute_path: &Path) -> trap::Label {
|
||||
extractor::populate_file(&mut self.writer, absolute_path)
|
||||
}
|
||||
|
||||
54
rust/ql/lib/codeql/rust/Diagnostics.qll
Normal file
54
rust/ql/lib/codeql/rust/Diagnostics.qll
Normal file
@@ -0,0 +1,54 @@
|
||||
/** Provides classes relating to extraction diagnostics. */
|
||||
|
||||
private import codeql.Locations
|
||||
|
||||
/** A diagnostic emitted during extraction, such as a parse error */
|
||||
class Diagnostic extends @diagnostic {
|
||||
int severity;
|
||||
string tag;
|
||||
string message;
|
||||
string fullMessage;
|
||||
Location location;
|
||||
|
||||
Diagnostic() { diagnostics(this, severity, tag, message, fullMessage, location) }
|
||||
|
||||
/**
|
||||
* Gets the numerical severity level associated with this diagnostic.
|
||||
*/
|
||||
int getSeverity() { result = severity }
|
||||
|
||||
/** Gets a string representation of the severity of this diagnostic. */
|
||||
string getSeverityText() {
|
||||
severity = 10 and result = "Debug"
|
||||
or
|
||||
severity = 20 and result = "Info"
|
||||
or
|
||||
severity = 30 and result = "Warning"
|
||||
or
|
||||
severity = 40 and result = "Error"
|
||||
}
|
||||
|
||||
/** Gets the error code associated with this diagnostic, e.g. parse_error. */
|
||||
string getTag() { result = tag }
|
||||
|
||||
/**
|
||||
* Gets the error message text associated with this diagnostic.
|
||||
*/
|
||||
string getMessage() { result = message }
|
||||
|
||||
/**
|
||||
* Gets the full error message text associated with this diagnostic.
|
||||
*/
|
||||
string getFullMessage() { result = fullMessage }
|
||||
|
||||
/** Gets the source location of this diagnostic. */
|
||||
Location getLocation() { result = location }
|
||||
|
||||
/** Gets a textual representation of this diagnostic. */
|
||||
string toString() { result = this.getMessage() }
|
||||
}
|
||||
|
||||
/** A diagnostic relating to a particular error in extracting a file. */
|
||||
class ExtractionError extends Diagnostic {
|
||||
ExtractionError() { this.getTag() = "parse_error" }
|
||||
}
|
||||
18
rust/ql/src/queries/diagnostics/ExtractionErrors.ql
Normal file
18
rust/ql/src/queries/diagnostics/ExtractionErrors.ql
Normal file
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* @name Extraction errors
|
||||
* @description List all extraction errors for files in the source code directory.
|
||||
* @kind diagnostic
|
||||
* @id rust/diagnostics/extraction-errors
|
||||
*/
|
||||
|
||||
import codeql.rust.Diagnostics
|
||||
import codeql.files.FileSystem
|
||||
|
||||
/** Gets the SARIF severity to associate an error. */
|
||||
int getSeverity() { result = 2 }
|
||||
|
||||
from ExtractionError error, File f
|
||||
where
|
||||
f = error.getLocation().getFile() and
|
||||
exists(f.getRelativePath())
|
||||
select error, "Extraction failed in " + f + " with error " + error.getMessage(), getSeverity()
|
||||
@@ -0,0 +1,15 @@
|
||||
/**
|
||||
* @id rust/summary/number-of-files-extracted-with-errors
|
||||
* @name Total number of Rust files that were extracted with errors
|
||||
* @description The total number of Rust files in the source code directory that
|
||||
* were extracted, but where at least one extraction error occurred in the process.
|
||||
* @kind metric
|
||||
* @tags summary
|
||||
*/
|
||||
|
||||
import codeql.files.FileSystem
|
||||
import codeql.rust.Diagnostics
|
||||
|
||||
select count(File f |
|
||||
exists(ExtractionError e | e.getLocation().getFile() = f) and exists(f.getRelativePath())
|
||||
)
|
||||
@@ -0,0 +1,15 @@
|
||||
/**
|
||||
* @id rust/summary/number-of-successfully-extracted-files
|
||||
* @name Total number of Rust files that were extracted without error
|
||||
* @description The total number of Rust files in the source code directory that
|
||||
* were extracted without encountering any extraction errors.
|
||||
* @kind metric
|
||||
* @tags summary
|
||||
*/
|
||||
|
||||
import codeql.rust.Diagnostics
|
||||
import codeql.files.FileSystem
|
||||
|
||||
select count(File f |
|
||||
not exists(ExtractionError e | e.getLocation().getFile() = f) and exists(f.getRelativePath())
|
||||
)
|
||||
39
rust/ql/test/extractor-tests/utf8/ast.expected
Normal file
39
rust/ql/test/extractor-tests/utf8/ast.expected
Normal file
@@ -0,0 +1,39 @@
|
||||
| lib.rs:1:1:3:22 | SourceFile |
|
||||
| lib.rs:2:1:2:8 | Module |
|
||||
| lib.rs:2:5:2:7 | Name |
|
||||
| lib.rs:3:1:3:8 | Module |
|
||||
| lib.rs:3:5:3:8 | Name |
|
||||
| lib.rs:3:10:3:20 | NameRef |
|
||||
| lib.rs:3:10:3:20 | Path |
|
||||
| lib.rs:3:10:3:20 | PathSegment |
|
||||
| lib.rs:3:10:3:21 | MacroCall |
|
||||
| utf8-identifiers.rs:1:1:4:6 | foo |
|
||||
| utf8-identifiers.rs:1:1:12:2 | SourceFile |
|
||||
| utf8-identifiers.rs:1:4:1:6 | Name |
|
||||
| utf8-identifiers.rs:1:7:4:1 | GenericParamList |
|
||||
| utf8-identifiers.rs:2:5:2:6 | Lifetime |
|
||||
| utf8-identifiers.rs:2:5:2:6 | LifetimeParam |
|
||||
| utf8-identifiers.rs:3:5:3:5 | Name |
|
||||
| utf8-identifiers.rs:3:5:3:5 | TypeParam |
|
||||
| utf8-identifiers.rs:4:2:4:3 | ParamList |
|
||||
| utf8-identifiers.rs:4:5:4:6 | BlockExpr |
|
||||
| utf8-identifiers.rs:4:5:4:6 | StmtList |
|
||||
| utf8-identifiers.rs:6:1:8:1 | Struct |
|
||||
| utf8-identifiers.rs:6:8:6:8 | Name |
|
||||
| utf8-identifiers.rs:6:10:8:1 | RecordFieldList |
|
||||
| utf8-identifiers.rs:7:5:7:5 | Name |
|
||||
| utf8-identifiers.rs:7:5:7:13 | RecordField |
|
||||
| utf8-identifiers.rs:7:9:7:13 | NameRef |
|
||||
| utf8-identifiers.rs:7:9:7:13 | Path |
|
||||
| utf8-identifiers.rs:7:9:7:13 | PathSegment |
|
||||
| utf8-identifiers.rs:7:9:7:13 | PathType |
|
||||
| utf8-identifiers.rs:10:1:10:3 | Visibility |
|
||||
| utf8-identifiers.rs:10:1:12:1 | main |
|
||||
| utf8-identifiers.rs:10:8:10:11 | Name |
|
||||
| utf8-identifiers.rs:10:12:10:13 | ParamList |
|
||||
| utf8-identifiers.rs:10:15:12:1 | BlockExpr |
|
||||
| utf8-identifiers.rs:10:15:12:1 | StmtList |
|
||||
| utf8-identifiers.rs:11:5:11:24 | LetStmt |
|
||||
| utf8-identifiers.rs:11:9:11:9 | IdentPat |
|
||||
| utf8-identifiers.rs:11:9:11:9 | Name |
|
||||
| utf8-identifiers.rs:11:14:11:23 | LiteralExpr |
|
||||
3
rust/ql/test/extractor-tests/utf8/ast.ql
Normal file
3
rust/ql/test/extractor-tests/utf8/ast.ql
Normal file
@@ -0,0 +1,3 @@
|
||||
import codeql.rust.elements
|
||||
|
||||
select any(AstNode n)
|
||||
12
rust/ql/test/extractor-tests/utf8/utf8-identifiers.rs
Normal file
12
rust/ql/test/extractor-tests/utf8/utf8-identifiers.rs
Normal file
@@ -0,0 +1,12 @@
|
||||
fn foo<
|
||||
'β,
|
||||
γ
|
||||
>() {}
|
||||
|
||||
struct X {
|
||||
δ: usize
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
let α = 0.00001f64;
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
| does_not_compile.rs:2:6:2:5 | expected SEMICOLON | Extraction failed in does_not_compile.rs with error expected SEMICOLON | 2 |
|
||||
| does_not_compile.rs:2:9:2:8 | expected SEMICOLON | Extraction failed in does_not_compile.rs with error expected SEMICOLON | 2 |
|
||||
| does_not_compile.rs:2:13:2:12 | expected SEMICOLON | Extraction failed in does_not_compile.rs with error expected SEMICOLON | 2 |
|
||||
| does_not_compile.rs:2:21:2:20 | expected SEMICOLON | Extraction failed in does_not_compile.rs with error expected SEMICOLON | 2 |
|
||||
| does_not_compile.rs:2:26:2:25 | expected SEMICOLON | Extraction failed in does_not_compile.rs with error expected SEMICOLON | 2 |
|
||||
| does_not_compile.rs:2:32:2:31 | expected field name or number | Extraction failed in does_not_compile.rs with error expected field name or number | 2 |
|
||||
@@ -0,0 +1 @@
|
||||
queries/diagnostics/ExtractionErrors.ql
|
||||
Reference in New Issue
Block a user