mirror of
https://github.com/github/codeql.git
synced 2026-04-30 19:26:02 +02:00
Merge pull request #11956 from aibaars/json-log
Ruby: structured logging
This commit is contained in:
BIN
ruby/Cargo.lock
generated
BIN
ruby/Cargo.lock
generated
Binary file not shown.
@@ -20,3 +20,6 @@ num_cpus = "1.14.0"
|
||||
regex = "1.7.1"
|
||||
encoding = "0.2"
|
||||
lazy_static = "1.4.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
chrono = { version = "0.4.19", features = ["serde"] }
|
||||
|
||||
268
ruby/extractor/src/diagnostics.rs
Normal file
268
ruby/extractor/src/diagnostics.rs
Normal file
@@ -0,0 +1,268 @@
|
||||
use serde::Serialize;
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
/** SARIF severity */
|
||||
#[derive(Serialize)]
|
||||
pub enum Severity {
|
||||
Error,
|
||||
Warning,
|
||||
#[allow(unused)]
|
||||
Note,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Source {
|
||||
/** An identifier under which it makes sense to group this diagnostic message. This is used to build the SARIF reporting descriptor object.*/
|
||||
pub id: String,
|
||||
/** Display name for the ID. This is used to build the SARIF reporting descriptor object. */
|
||||
pub name: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
/** Name of the CodeQL extractor. This is used to identify which tool component the reporting descriptor object should be nested under in SARIF.*/
|
||||
pub extractor_name: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Visibility {
|
||||
#[serde(skip_serializing_if = "std::ops::Not::not")]
|
||||
/** True if the message should be displayed on the status page (defaults to false) */
|
||||
pub status_page: bool,
|
||||
#[serde(skip_serializing_if = "std::ops::Not::not")]
|
||||
/** True if the message should be counted in the diagnostics summary table printed by `codeql database analyze` (defaults to false) */
|
||||
pub cli_summary_table: bool,
|
||||
#[serde(skip_serializing_if = "std::ops::Not::not")]
|
||||
/** True if the message should be sent to telemetry (defaults to false) */
|
||||
pub telemetry: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Location {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
/** Path to the affected file if appropriate, relative to the source root */
|
||||
pub file: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub start_line: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub start_column: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub end_line: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub end_column: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct DiagnosticMessage {
|
||||
/** Unix timestamp */
|
||||
pub timestamp: chrono::DateTime<chrono::Utc>,
|
||||
pub source: Source,
|
||||
#[serde(skip_serializing_if = "String::is_empty")]
|
||||
/** GitHub flavored Markdown formatted message. Should include inline links to any help pages. */
|
||||
pub markdown_message: String,
|
||||
#[serde(skip_serializing_if = "String::is_empty")]
|
||||
/** Plain text message. Used by components where the string processing needed to support Markdown is cumbersome. */
|
||||
pub plaintext_message: String,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
/** List of help links intended to supplement the `plaintextMessage`. */
|
||||
pub help_links: Vec<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub severity: Option<Severity>,
|
||||
#[serde(skip_serializing_if = "std::ops::Not::not")]
|
||||
pub internal: bool,
|
||||
#[serde(skip_serializing_if = "is_default_visibility")]
|
||||
pub visibility: Visibility,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub location: Option<Location>,
|
||||
}
|
||||
|
||||
fn is_default_visibility(v: &Visibility) -> bool {
|
||||
!v.cli_summary_table && !v.status_page && !v.telemetry
|
||||
}
|
||||
|
||||
pub struct LogWriter {
|
||||
extractor: String,
|
||||
path: Option<PathBuf>,
|
||||
inner: Option<std::io::BufWriter<std::fs::File>>,
|
||||
}
|
||||
|
||||
impl LogWriter {
|
||||
pub fn message(&self, id: &str, name: &str) -> DiagnosticMessage {
|
||||
DiagnosticMessage {
|
||||
timestamp: chrono::Utc::now(),
|
||||
source: Source {
|
||||
id: format!("{}/{}", self.extractor, id),
|
||||
name: name.to_owned(),
|
||||
extractor_name: Some(self.extractor.to_owned()),
|
||||
},
|
||||
markdown_message: String::new(),
|
||||
plaintext_message: String::new(),
|
||||
help_links: vec![],
|
||||
severity: None,
|
||||
internal: false,
|
||||
visibility: Visibility {
|
||||
cli_summary_table: false,
|
||||
status_page: false,
|
||||
telemetry: false,
|
||||
},
|
||||
location: None,
|
||||
}
|
||||
}
|
||||
pub fn write(&mut self, mesg: &DiagnosticMessage) {
|
||||
let full_error_message = mesg.full_error_message();
|
||||
|
||||
match mesg.severity {
|
||||
Some(Severity::Error) => tracing::error!("{}", full_error_message),
|
||||
Some(Severity::Warning) => tracing::warn!("{}", full_error_message),
|
||||
Some(Severity::Note) => tracing::info!("{}", full_error_message),
|
||||
None => tracing::debug!("{}", full_error_message),
|
||||
}
|
||||
if self.inner.is_none() {
|
||||
if let Some(path) = self.path.as_ref() {
|
||||
match std::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.write(true)
|
||||
.open(&path)
|
||||
{
|
||||
Err(e) => {
|
||||
tracing::error!(
|
||||
"Could not open log file '{}': {}",
|
||||
&path.to_string_lossy(),
|
||||
e
|
||||
);
|
||||
self.path = None;
|
||||
self.inner = None
|
||||
}
|
||||
Ok(file) => self.inner = Some(std::io::BufWriter::new(file)),
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(mut writer) = self.inner.as_mut() {
|
||||
serde_json::to_writer(&mut writer, mesg)
|
||||
.unwrap_or_else(|e| tracing::debug!("Failed to write log entry: {}", e));
|
||||
&mut writer
|
||||
.write_all(b"\n")
|
||||
.unwrap_or_else(|e| tracing::debug!("Failed to write log entry: {}", e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DiagnosticLoggers {
|
||||
extractor: String,
|
||||
root: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl DiagnosticLoggers {
|
||||
pub fn new(extractor: &str) -> Self {
|
||||
let env_var = format!(
|
||||
"CODEQL_EXTRACTOR_{}_DIAGNOSTIC_DIR",
|
||||
extractor.to_ascii_uppercase()
|
||||
);
|
||||
|
||||
let root = match std::env::var(&env_var) {
|
||||
Err(e) => {
|
||||
tracing::error!("{}: {}", e, &env_var);
|
||||
None
|
||||
}
|
||||
Ok(dir) => {
|
||||
if let Err(e) = std::fs::create_dir_all(&dir) {
|
||||
tracing::error!("Failed to create log directory {}: {}", &dir, e);
|
||||
None
|
||||
} else {
|
||||
Some(PathBuf::from(dir))
|
||||
}
|
||||
}
|
||||
};
|
||||
DiagnosticLoggers {
|
||||
extractor: extractor.to_owned(),
|
||||
root,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn logger(&self) -> LogWriter {
|
||||
thread_local! {
|
||||
static THREAD_NUM: usize = {
|
||||
static COUNT: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0);
|
||||
COUNT.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
|
||||
};
|
||||
}
|
||||
THREAD_NUM.with(|n| LogWriter {
|
||||
extractor: self.extractor.to_owned(),
|
||||
inner: None,
|
||||
path: self
|
||||
.root
|
||||
.as_ref()
|
||||
.map(|root| root.to_owned().join(format!("extractor_{}.jsonl", n))),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl DiagnosticMessage {
|
||||
pub fn full_error_message(&self) -> String {
|
||||
match &self.location {
|
||||
Some(Location {
|
||||
file: Some(path),
|
||||
start_line: Some(line),
|
||||
..
|
||||
}) => format!("{}:{}: {}", path, line, self.plaintext_message),
|
||||
_ => self.plaintext_message.to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn text(&mut self, text: &str) -> &mut Self {
|
||||
self.plaintext_message = text.to_owned();
|
||||
self
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn markdown(&mut self, text: &str) -> &mut Self {
|
||||
self.markdown_message = text.to_owned();
|
||||
self
|
||||
}
|
||||
pub fn severity(&mut self, severity: Severity) -> &mut Self {
|
||||
self.severity = Some(severity);
|
||||
self
|
||||
}
|
||||
#[allow(unused)]
|
||||
pub fn help_link(&mut self, link: &str) -> &mut Self {
|
||||
self.help_links.push(link.to_owned());
|
||||
self
|
||||
}
|
||||
#[allow(unused)]
|
||||
pub fn internal(&mut self) -> &mut Self {
|
||||
self.internal = true;
|
||||
self
|
||||
}
|
||||
#[allow(unused)]
|
||||
pub fn cli_summary_table(&mut self) -> &mut Self {
|
||||
self.visibility.cli_summary_table = true;
|
||||
self
|
||||
}
|
||||
pub fn status_page(&mut self) -> &mut Self {
|
||||
self.visibility.status_page = true;
|
||||
self
|
||||
}
|
||||
#[allow(unused)]
|
||||
pub fn telemetry(&mut self) -> &mut Self {
|
||||
self.visibility.telemetry = true;
|
||||
self
|
||||
}
|
||||
pub fn location(
|
||||
&mut self,
|
||||
path: &str,
|
||||
start_line: usize,
|
||||
start_column: usize,
|
||||
end_line: usize,
|
||||
end_column: usize,
|
||||
) -> &mut Self {
|
||||
let loc = self.location.get_or_insert(Default::default());
|
||||
loc.file = Some(path.to_owned());
|
||||
loc.start_line = Some(start_line);
|
||||
loc.start_column = Some(start_column);
|
||||
loc.end_line = Some(end_line);
|
||||
loc.end_column = Some(end_column);
|
||||
self
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
use crate::diagnostics;
|
||||
use crate::trap;
|
||||
use node_types::{EntryKind, Field, NodeTypeMap, Storage, TypeName};
|
||||
use std::collections::BTreeMap as Map;
|
||||
@@ -5,7 +6,6 @@ use std::collections::BTreeSet as Set;
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
|
||||
use tracing::{error, info, span, Level};
|
||||
use tree_sitter::{Language, Node, Parser, Range, Tree};
|
||||
|
||||
pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label {
|
||||
@@ -114,21 +114,22 @@ pub fn extract(
|
||||
language: Language,
|
||||
language_prefix: &str,
|
||||
schema: &NodeTypeMap,
|
||||
diagnostics_writer: &mut diagnostics::LogWriter,
|
||||
trap_writer: &mut trap::Writer,
|
||||
path: &Path,
|
||||
source: &[u8],
|
||||
ranges: &[Range],
|
||||
) -> std::io::Result<()> {
|
||||
let path_str = format!("{}", path.display());
|
||||
let span = span!(
|
||||
Level::TRACE,
|
||||
let span = tracing::span!(
|
||||
tracing::Level::TRACE,
|
||||
"extract",
|
||||
file = %path_str
|
||||
);
|
||||
|
||||
let _enter = span.enter();
|
||||
|
||||
info!("extracting: {}", path_str);
|
||||
tracing::info!("extracting: {}", path_str);
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
@@ -138,6 +139,7 @@ pub fn extract(
|
||||
let file_label = populate_file(trap_writer, path);
|
||||
let mut visitor = Visitor::new(
|
||||
source,
|
||||
diagnostics_writer,
|
||||
trap_writer,
|
||||
// TODO: should we handle path strings that are not valid UTF8 better?
|
||||
&path_str,
|
||||
@@ -204,6 +206,8 @@ struct Visitor<'a> {
|
||||
file_label: trap::Label,
|
||||
/// The source code as a UTF-8 byte array
|
||||
source: &'a [u8],
|
||||
/// A diagnostics::LogWriter to write diagnostic messages
|
||||
diagnostics_writer: &'a mut diagnostics::LogWriter,
|
||||
/// A trap::Writer to accumulate trap entries
|
||||
trap_writer: &'a mut trap::Writer,
|
||||
/// A counter for top-level child nodes
|
||||
@@ -226,6 +230,7 @@ struct Visitor<'a> {
|
||||
impl<'a> Visitor<'a> {
|
||||
fn new(
|
||||
source: &'a [u8],
|
||||
diagnostics_writer: &'a mut diagnostics::LogWriter,
|
||||
trap_writer: &'a mut trap::Writer,
|
||||
path: &'a str,
|
||||
file_label: trap::Label,
|
||||
@@ -236,6 +241,7 @@ impl<'a> Visitor<'a> {
|
||||
path,
|
||||
file_label,
|
||||
source,
|
||||
diagnostics_writer,
|
||||
trap_writer,
|
||||
toplevel_child_counter: 0,
|
||||
ast_node_info_table_name: format!("{}_ast_node_info", language_prefix),
|
||||
@@ -245,34 +251,31 @@ impl<'a> Visitor<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn record_parse_error(
|
||||
&mut self,
|
||||
error_message: String,
|
||||
full_error_message: String,
|
||||
loc: trap::Label,
|
||||
) {
|
||||
error!("{}", full_error_message);
|
||||
fn record_parse_error(&mut self, loc: trap::Label, mesg: &diagnostics::DiagnosticMessage) {
|
||||
self.diagnostics_writer.write(mesg);
|
||||
let id = self.trap_writer.fresh_id();
|
||||
let full_error_message = mesg.full_error_message();
|
||||
let severity_code = match mesg.severity {
|
||||
Some(diagnostics::Severity::Error) => 40,
|
||||
Some(diagnostics::Severity::Warning) => 30,
|
||||
Some(diagnostics::Severity::Note) => 20,
|
||||
None => 10,
|
||||
};
|
||||
self.trap_writer.add_tuple(
|
||||
"diagnostics",
|
||||
vec![
|
||||
trap::Arg::Label(id),
|
||||
trap::Arg::Int(40), // severity 40 = error
|
||||
trap::Arg::Int(severity_code),
|
||||
trap::Arg::String("parse_error".to_string()),
|
||||
trap::Arg::String(error_message),
|
||||
trap::Arg::String(mesg.plaintext_message.to_owned()),
|
||||
trap::Arg::String(full_error_message),
|
||||
trap::Arg::Label(loc),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
fn record_parse_error_for_node(
|
||||
&mut self,
|
||||
error_message: String,
|
||||
full_error_message: String,
|
||||
node: Node,
|
||||
) {
|
||||
let (start_line, start_column, end_line, end_column) = location_for(self.source, node);
|
||||
fn record_parse_error_for_node(&mut self, error_message: String, node: Node) {
|
||||
let (start_line, start_column, end_line, end_column) = location_for(self, node);
|
||||
let loc = location(
|
||||
self.trap_writer,
|
||||
self.file_label,
|
||||
@@ -281,7 +284,14 @@ impl<'a> Visitor<'a> {
|
||||
end_line,
|
||||
end_column,
|
||||
);
|
||||
self.record_parse_error(error_message, full_error_message, loc);
|
||||
self.record_parse_error(
|
||||
loc,
|
||||
self.diagnostics_writer
|
||||
.message("parse-error", "Parse error")
|
||||
.severity(diagnostics::Severity::Error)
|
||||
.location(self.path, start_line, start_column, end_line, end_column)
|
||||
.text(&error_message),
|
||||
);
|
||||
}
|
||||
|
||||
fn enter_node(&mut self, node: Node) -> bool {
|
||||
@@ -291,13 +301,7 @@ impl<'a> Visitor<'a> {
|
||||
} else {
|
||||
"parse error".to_string()
|
||||
};
|
||||
let full_error_message = format!(
|
||||
"{}:{}: {}",
|
||||
&self.path,
|
||||
node.start_position().row + 1,
|
||||
error_message
|
||||
);
|
||||
self.record_parse_error_for_node(error_message, full_error_message, node);
|
||||
self.record_parse_error_for_node(error_message, node);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -312,7 +316,7 @@ impl<'a> Visitor<'a> {
|
||||
return;
|
||||
}
|
||||
let (id, _, child_nodes) = self.stack.pop().expect("Vistor: empty stack");
|
||||
let (start_line, start_column, end_line, end_column) = location_for(self.source, node);
|
||||
let (start_line, start_column, end_line, end_column) = location_for(self, node);
|
||||
let loc = location(
|
||||
self.trap_writer,
|
||||
self.file_label,
|
||||
@@ -380,13 +384,15 @@ impl<'a> Visitor<'a> {
|
||||
}
|
||||
_ => {
|
||||
let error_message = format!("unknown table type: '{}'", node.kind());
|
||||
let full_error_message = format!(
|
||||
"{}:{}: {}",
|
||||
&self.path,
|
||||
node.start_position().row + 1,
|
||||
error_message
|
||||
self.record_parse_error(
|
||||
loc,
|
||||
self.diagnostics_writer
|
||||
.message("parse-error", "Parse error")
|
||||
.severity(diagnostics::Severity::Error)
|
||||
.location(self.path, start_line, start_column, end_line, end_column)
|
||||
.text(&error_message)
|
||||
.status_page(),
|
||||
);
|
||||
self.record_parse_error(error_message, full_error_message, loc);
|
||||
|
||||
valid = false;
|
||||
}
|
||||
@@ -440,13 +446,7 @@ impl<'a> Visitor<'a> {
|
||||
child_node.type_name,
|
||||
field.type_info
|
||||
);
|
||||
let full_error_message = format!(
|
||||
"{}:{}: {}",
|
||||
&self.path,
|
||||
node.start_position().row + 1,
|
||||
error_message
|
||||
);
|
||||
self.record_parse_error_for_node(error_message, full_error_message, *node);
|
||||
self.record_parse_error_for_node(error_message, *node);
|
||||
}
|
||||
} else if child_node.field_name.is_some() || child_node.type_name.named {
|
||||
let error_message = format!(
|
||||
@@ -455,13 +455,7 @@ impl<'a> Visitor<'a> {
|
||||
&child_node.field_name.unwrap_or("child"),
|
||||
&child_node.type_name
|
||||
);
|
||||
let full_error_message = format!(
|
||||
"{}:{}: {}",
|
||||
&self.path,
|
||||
node.start_position().row + 1,
|
||||
error_message
|
||||
);
|
||||
self.record_parse_error_for_node(error_message, full_error_message, *node);
|
||||
self.record_parse_error_for_node(error_message, *node);
|
||||
}
|
||||
}
|
||||
let mut args = Vec::new();
|
||||
@@ -484,13 +478,7 @@ impl<'a> Visitor<'a> {
|
||||
node.kind(),
|
||||
column_name
|
||||
);
|
||||
let full_error_message = format!(
|
||||
"{}:{}: {}",
|
||||
&self.path,
|
||||
node.start_position().row + 1,
|
||||
error_message
|
||||
);
|
||||
self.record_parse_error_for_node(error_message, full_error_message, *node);
|
||||
self.record_parse_error_for_node(error_message, *node);
|
||||
}
|
||||
}
|
||||
Storage::Table {
|
||||
@@ -505,17 +493,8 @@ impl<'a> Visitor<'a> {
|
||||
node.kind(),
|
||||
table_name,
|
||||
);
|
||||
let full_error_message = format!(
|
||||
"{}:{}: {}",
|
||||
&self.path,
|
||||
node.start_position().row + 1,
|
||||
error_message
|
||||
);
|
||||
self.record_parse_error_for_node(
|
||||
error_message,
|
||||
full_error_message,
|
||||
*node,
|
||||
);
|
||||
|
||||
self.record_parse_error_for_node(error_message, *node);
|
||||
break;
|
||||
}
|
||||
let mut args = vec![trap::Arg::Label(parent_id)];
|
||||
@@ -582,7 +561,7 @@ fn sliced_source_arg(source: &[u8], n: Node) -> trap::Arg {
|
||||
// Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated.
|
||||
// The first is the location and label definition, and the second is the
|
||||
// 'Located' entry.
|
||||
fn location_for(source: &[u8], n: Node) -> (usize, usize, usize, usize) {
|
||||
fn location_for(visitor: &mut Visitor, n: Node) -> (usize, usize, usize, usize) {
|
||||
// Tree-sitter row, column values are 0-based while CodeQL starts
|
||||
// counting at 1. In addition Tree-sitter's row and column for the
|
||||
// end position are exclusive while CodeQL's end positions are inclusive.
|
||||
@@ -600,6 +579,7 @@ fn location_for(source: &[u8], n: Node) -> (usize, usize, usize, usize) {
|
||||
end_line = start_line;
|
||||
end_col = start_col - 1;
|
||||
} else if end_col == 0 {
|
||||
let source = visitor.source;
|
||||
// end_col = 0 means that we are at the start of a line
|
||||
// unfortunately 0 is invalid as column number, therefore
|
||||
// we should update the end location to be the end of the
|
||||
@@ -608,7 +588,14 @@ fn location_for(source: &[u8], n: Node) -> (usize, usize, usize, usize) {
|
||||
if index > 0 && index <= source.len() {
|
||||
index -= 1;
|
||||
if source[index] != b'\n' {
|
||||
error!("expecting a line break symbol, but none found while correcting end column value");
|
||||
visitor.diagnostics_writer.write(
|
||||
visitor
|
||||
.diagnostics_writer
|
||||
.message("internal-error", "Internal error")
|
||||
.text("expecting a line break symbol, but none found while correcting end column value")
|
||||
.status_page()
|
||||
.severity(diagnostics::Severity::Error),
|
||||
);
|
||||
}
|
||||
end_line -= 1;
|
||||
end_col = 1;
|
||||
@@ -617,10 +604,17 @@ fn location_for(source: &[u8], n: Node) -> (usize, usize, usize, usize) {
|
||||
end_col += 1;
|
||||
}
|
||||
} else {
|
||||
error!(
|
||||
"cannot correct end column value: end_byte index {} is not in range [1,{}]",
|
||||
index,
|
||||
source.len()
|
||||
visitor.diagnostics_writer.write(
|
||||
visitor
|
||||
.diagnostics_writer
|
||||
.message("internal-error", "Internal error")
|
||||
.text(&format!(
|
||||
"cannot correct end column value: end_byte index {} is not in range [1,{}]",
|
||||
index,
|
||||
source.len()
|
||||
))
|
||||
.status_page()
|
||||
.severity(diagnostics::Severity::Error),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
mod diagnostics;
|
||||
mod extractor;
|
||||
mod trap;
|
||||
|
||||
@@ -24,22 +25,19 @@ use tree_sitter::{Language, Parser, Range};
|
||||
* of cores available on the machine to determine how many threads to use
|
||||
* (minimum of 1). If unspecified, should be considered as set to -1."
|
||||
*/
|
||||
fn num_codeql_threads() -> usize {
|
||||
fn num_codeql_threads() -> Result<usize, String> {
|
||||
let threads_str = std::env::var("CODEQL_THREADS").unwrap_or_else(|_| "-1".to_owned());
|
||||
match threads_str.parse::<i32>() {
|
||||
Ok(num) if num <= 0 => {
|
||||
let reduction = -num as usize;
|
||||
std::cmp::max(1, num_cpus::get() - reduction)
|
||||
Ok(std::cmp::max(1, num_cpus::get() - reduction))
|
||||
}
|
||||
Ok(num) => num as usize,
|
||||
Ok(num) => Ok(num as usize),
|
||||
|
||||
Err(_) => {
|
||||
tracing::error!(
|
||||
"Unable to parse CODEQL_THREADS value '{}'; defaulting to 1 thread.",
|
||||
&threads_str
|
||||
);
|
||||
1
|
||||
}
|
||||
Err(_) => Err(format!(
|
||||
"Unable to parse CODEQL_THREADS value '{}'",
|
||||
&threads_str
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,7 +66,21 @@ fn main() -> std::io::Result<()> {
|
||||
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("ruby_extractor=warn")),
|
||||
)
|
||||
.init();
|
||||
let num_threads = num_codeql_threads();
|
||||
let diagnostics = diagnostics::DiagnosticLoggers::new("ruby");
|
||||
let mut main_thread_logger = diagnostics.logger();
|
||||
let num_threads = match num_codeql_threads() {
|
||||
Ok(num) => num,
|
||||
Err(e) => {
|
||||
main_thread_logger.write(
|
||||
main_thread_logger
|
||||
.message("configuration-error", "Configuration error")
|
||||
.text(&format!("{}; defaulting to 1 thread.", e))
|
||||
.status_page()
|
||||
.severity(diagnostics::Severity::Warning),
|
||||
);
|
||||
1
|
||||
}
|
||||
};
|
||||
tracing::info!(
|
||||
"Using {} {}",
|
||||
num_threads,
|
||||
@@ -78,6 +90,20 @@ fn main() -> std::io::Result<()> {
|
||||
"threads"
|
||||
}
|
||||
);
|
||||
let trap_compression = match trap::Compression::from_env("CODEQL_RUBY_TRAP_COMPRESSION") {
|
||||
Ok(x) => x,
|
||||
Err(e) => {
|
||||
main_thread_logger.write(
|
||||
main_thread_logger
|
||||
.message("configuration-error", "Configuration error")
|
||||
.text(&format!("{}; using gzip.", e))
|
||||
.status_page()
|
||||
.severity(diagnostics::Severity::Warning),
|
||||
);
|
||||
trap::Compression::Gzip
|
||||
}
|
||||
};
|
||||
drop(main_thread_logger);
|
||||
rayon::ThreadPoolBuilder::new()
|
||||
.num_threads(num_threads)
|
||||
.build_global()
|
||||
@@ -100,7 +126,6 @@ fn main() -> std::io::Result<()> {
|
||||
.value_of("output-dir")
|
||||
.expect("missing --output-dir");
|
||||
let trap_dir = PathBuf::from(trap_dir);
|
||||
let trap_compression = trap::Compression::from_env("CODEQL_RUBY_TRAP_COMPRESSION");
|
||||
|
||||
let file_list = matches.value_of("file-list").expect("missing --file-list");
|
||||
let file_list = fs::File::open(file_list)?;
|
||||
@@ -119,6 +144,7 @@ fn main() -> std::io::Result<()> {
|
||||
lines
|
||||
.par_iter()
|
||||
.try_for_each(|line| {
|
||||
let mut diagnostics_writer = diagnostics.logger();
|
||||
let path = PathBuf::from(line).canonicalize()?;
|
||||
let src_archive_file = path_for(&src_archive_dir, &path, "");
|
||||
let mut source = std::fs::read(&path)?;
|
||||
@@ -131,6 +157,7 @@ fn main() -> std::io::Result<()> {
|
||||
erb,
|
||||
"erb",
|
||||
&erb_schema,
|
||||
&mut diagnostics_writer,
|
||||
&mut trap_writer,
|
||||
&path,
|
||||
&source,
|
||||
@@ -170,20 +197,35 @@ fn main() -> std::io::Result<()> {
|
||||
Ok(str) => source = str.as_bytes().to_owned(),
|
||||
Err(msg) => {
|
||||
needs_conversion = false;
|
||||
tracing::warn!(
|
||||
"{}: character decoding failure: {} ({})",
|
||||
&path.to_string_lossy(),
|
||||
msg,
|
||||
&encoding_name
|
||||
diagnostics_writer.write(
|
||||
diagnostics_writer
|
||||
.message(
|
||||
"character-encoding-error",
|
||||
"Character encoding error",
|
||||
)
|
||||
.text(&format!(
|
||||
"{}: character decoding failure: {} ({})",
|
||||
&path.to_string_lossy(),
|
||||
msg,
|
||||
&encoding_name
|
||||
))
|
||||
.status_page()
|
||||
.severity(diagnostics::Severity::Warning),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tracing::warn!(
|
||||
"{}: unknown character encoding: '{}'",
|
||||
&path.to_string_lossy(),
|
||||
&encoding_name
|
||||
diagnostics_writer.write(
|
||||
diagnostics_writer
|
||||
.message("character-encoding-error", "Character encoding error")
|
||||
.text(&format!(
|
||||
"{}: unknown character encoding: '{}'",
|
||||
&path.to_string_lossy(),
|
||||
&encoding_name
|
||||
))
|
||||
.status_page()
|
||||
.severity(diagnostics::Severity::Warning),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -194,6 +236,7 @@ fn main() -> std::io::Result<()> {
|
||||
language,
|
||||
"ruby",
|
||||
&schema,
|
||||
&mut diagnostics_writer,
|
||||
&mut trap_writer,
|
||||
&path,
|
||||
&source,
|
||||
|
||||
@@ -224,17 +224,14 @@ pub enum Compression {
|
||||
}
|
||||
|
||||
impl Compression {
|
||||
pub fn from_env(var_name: &str) -> Compression {
|
||||
pub fn from_env(var_name: &str) -> Result<Compression, String> {
|
||||
match std::env::var(var_name) {
|
||||
Ok(method) => match Compression::from_string(&method) {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
tracing::error!("Unknown compression method '{}'; using gzip.", &method);
|
||||
Compression::Gzip
|
||||
}
|
||||
Some(c) => Ok(c),
|
||||
None => Err(format!("Unknown compression method '{}'", &method)),
|
||||
},
|
||||
// Default compression method if the env var isn't set:
|
||||
Err(_) => Compression::Gzip,
|
||||
Err(_) => Ok(Compression::Gzip),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user