mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Ruby: add support for extracting overlays
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -405,6 +405,7 @@ dependencies = [
|
|||||||
"lazy_static",
|
"lazy_static",
|
||||||
"rayon",
|
"rayon",
|
||||||
"regex",
|
"regex",
|
||||||
|
"serde_json",
|
||||||
"tracing",
|
"tracing",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
"tree-sitter",
|
"tree-sitter",
|
||||||
|
|||||||
1
misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl
generated
vendored
1
misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl
generated
vendored
@@ -301,6 +301,7 @@ _NORMAL_DEPENDENCIES = {
|
|||||||
"lazy_static": Label("@vendor_ts__lazy_static-1.5.0//:lazy_static"),
|
"lazy_static": Label("@vendor_ts__lazy_static-1.5.0//:lazy_static"),
|
||||||
"rayon": Label("@vendor_ts__rayon-1.10.0//:rayon"),
|
"rayon": Label("@vendor_ts__rayon-1.10.0//:rayon"),
|
||||||
"regex": Label("@vendor_ts__regex-1.11.1//:regex"),
|
"regex": Label("@vendor_ts__regex-1.11.1//:regex"),
|
||||||
|
"serde_json": Label("@vendor_ts__serde_json-1.0.140//:serde_json"),
|
||||||
"tracing": Label("@vendor_ts__tracing-0.1.41//:tracing"),
|
"tracing": Label("@vendor_ts__tracing-0.1.41//:tracing"),
|
||||||
"tracing-subscriber": Label("@vendor_ts__tracing-subscriber-0.3.19//:tracing_subscriber"),
|
"tracing-subscriber": Label("@vendor_ts__tracing-subscriber-0.3.19//:tracing_subscriber"),
|
||||||
"tree-sitter": Label("@vendor_ts__tree-sitter-0.24.6//:tree_sitter"),
|
"tree-sitter": Label("@vendor_ts__tree-sitter-0.24.6//:tree_sitter"),
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ display_name: "Ruby"
|
|||||||
version: 0.1.0
|
version: 0.1.0
|
||||||
column_kind: "utf8"
|
column_kind: "utf8"
|
||||||
legacy_qltest_extraction: true
|
legacy_qltest_extraction: true
|
||||||
|
overlay_support_version: 20250108
|
||||||
build_modes:
|
build_modes:
|
||||||
- none
|
- none
|
||||||
github_api_languages:
|
github_api_languages:
|
||||||
|
|||||||
@@ -17,5 +17,6 @@ rayon = "1.10.0"
|
|||||||
regex = "1.11.1"
|
regex = "1.11.1"
|
||||||
encoding = "0.2"
|
encoding = "0.2"
|
||||||
lazy_static = "1.5.0"
|
lazy_static = "1.5.0"
|
||||||
|
serde_json = "1.0.140"
|
||||||
|
|
||||||
codeql-extractor = { path = "../../shared/tree-sitter-extractor" }
|
codeql-extractor = { path = "../../shared/tree-sitter-extractor" }
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
use clap::Args;
|
use clap::Args;
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
use serde_json;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::collections::HashSet;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::io::BufRead;
|
use std::io::BufRead;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
@@ -78,6 +80,8 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
|||||||
|
|
||||||
let file_list = fs::File::open(file_paths::path_from_string(&options.file_list))?;
|
let file_list = fs::File::open(file_paths::path_from_string(&options.file_list))?;
|
||||||
|
|
||||||
|
let overlay_changed_files: Option<HashSet<PathBuf>> = get_overlay_changed_files();
|
||||||
|
|
||||||
let language: Language = tree_sitter_ruby::LANGUAGE.into();
|
let language: Language = tree_sitter_ruby::LANGUAGE.into();
|
||||||
let erb: Language = tree_sitter_embedded_template::LANGUAGE.into();
|
let erb: Language = tree_sitter_embedded_template::LANGUAGE.into();
|
||||||
// Look up tree-sitter kind ids now, to avoid string comparisons when scanning ERB files.
|
// Look up tree-sitter kind ids now, to avoid string comparisons when scanning ERB files.
|
||||||
@@ -94,6 +98,13 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
|||||||
.try_for_each(|line| {
|
.try_for_each(|line| {
|
||||||
let mut diagnostics_writer = diagnostics.logger();
|
let mut diagnostics_writer = diagnostics.logger();
|
||||||
let path = PathBuf::from(line).canonicalize()?;
|
let path = PathBuf::from(line).canonicalize()?;
|
||||||
|
match &overlay_changed_files {
|
||||||
|
Some(changed_files) if !changed_files.contains(&path) => {
|
||||||
|
// We are extracting an overlay and this file is not in the list of changes files, so we should skip it.
|
||||||
|
return Result::Ok(());
|
||||||
|
}
|
||||||
|
_ => {},
|
||||||
|
}
|
||||||
let src_archive_file = file_paths::path_for(&src_archive_dir, &path, "");
|
let src_archive_file = file_paths::path_for(&src_archive_dir, &path, "");
|
||||||
let mut source = std::fs::read(&path)?;
|
let mut source = std::fs::read(&path)?;
|
||||||
let mut needs_conversion = false;
|
let mut needs_conversion = false;
|
||||||
@@ -212,6 +223,12 @@ pub fn run(options: Options) -> std::io::Result<()> {
|
|||||||
let mut trap_writer = trap::Writer::new();
|
let mut trap_writer = trap::Writer::new();
|
||||||
extractor::populate_empty_location(&mut trap_writer);
|
extractor::populate_empty_location(&mut trap_writer);
|
||||||
let res = write_trap(&trap_dir, path, &trap_writer, trap_compression);
|
let res = write_trap(&trap_dir, path, &trap_writer, trap_compression);
|
||||||
|
if let Ok(output_path) = std::env::var("CODEQL_EXTRACTOR_RUBY_OVERLAY_BASE_METADATA_OUT") {
|
||||||
|
// We're extracting an overlay base. For now, we don't have any metadata we need to store
|
||||||
|
// that would get read when extracting the overlay, but the CLI expects us to write
|
||||||
|
// *something*. An empty file will do.
|
||||||
|
std::fs::write(output_path, b"")?;
|
||||||
|
}
|
||||||
tracing::info!("Extraction complete");
|
tracing::info!("Extraction complete");
|
||||||
res
|
res
|
||||||
}
|
}
|
||||||
@@ -302,6 +319,41 @@ fn skip_space(content: &[u8], index: usize) -> usize {
|
|||||||
}
|
}
|
||||||
index
|
index
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If the relevant environment variable has been set by the CLI, indicating that we are extracting
|
||||||
|
* an overlay, this function reads the JSON file at the path given by its value, and returns a set
|
||||||
|
* of canonicalized paths of source files that have changed and should therefore be extracted.
|
||||||
|
*
|
||||||
|
* If the environment variable is not set (i.e. we're not extracting an overlay), or if the file
|
||||||
|
* cannot be read, this function returns `None`. In that case, all files should be extracted.
|
||||||
|
*/
|
||||||
|
fn get_overlay_changed_files() -> Option<HashSet<PathBuf>> {
|
||||||
|
let path = std::env::var("CODEQL_EXTRACTOR_RUBY_OVERLAY_CHANGES").ok()?;
|
||||||
|
let file_content = fs::read_to_string(path).ok()?;
|
||||||
|
let json_value: serde_json::Value = serde_json::from_str(&file_content).ok()?;
|
||||||
|
|
||||||
|
// The JSON file is expected to have the following structure:
|
||||||
|
// {
|
||||||
|
// "changes": [
|
||||||
|
// "relative/path/to/changed/file1.rb",
|
||||||
|
// "relative/path/to/changed/file2.rb",
|
||||||
|
// ...
|
||||||
|
// ]
|
||||||
|
// }
|
||||||
|
json_value
|
||||||
|
.get("changes")?
|
||||||
|
.as_array()?
|
||||||
|
.iter()
|
||||||
|
.map(|change| {
|
||||||
|
change
|
||||||
|
.as_str()
|
||||||
|
.map(|s| PathBuf::from(s).canonicalize().ok())
|
||||||
|
.flatten()
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
fn scan_coding_comment(content: &[u8]) -> std::option::Option<Cow<str>> {
|
fn scan_coding_comment(content: &[u8]) -> std::option::Option<Cow<str>> {
|
||||||
let mut index = 0;
|
let mut index = 0;
|
||||||
// skip UTF-8 BOM marker if there is one
|
// skip UTF-8 BOM marker if there is one
|
||||||
|
|||||||
Reference in New Issue
Block a user