mirror of
https://github.com/github/codeql.git
synced 2026-02-20 08:53:49 +01:00
2
.github/workflows/dataset_measure.yml
vendored
2
.github/workflows/dataset_measure.yml
vendored
@@ -27,6 +27,7 @@ jobs:
|
||||
unzip -q codeql-linux64.zip
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
CODEQL_THREADS: 4 # TODO: remove this once it's set by the CLI
|
||||
- uses: actions/cache@v2
|
||||
with:
|
||||
path: |
|
||||
@@ -46,6 +47,7 @@ jobs:
|
||||
run: |
|
||||
codeql/codeql database create \
|
||||
--search-path "${{ github.workspace }}" \
|
||||
--threads 4 \
|
||||
--language ruby --source-root "${{ github.workspace }}/repo" \
|
||||
"${{ runner.temp }}/database"
|
||||
- name: Measure database
|
||||
|
||||
110
Cargo.lock
generated
110
Cargo.lock
generated
@@ -108,6 +108,12 @@ dependencies = [
|
||||
"vec_map",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const_fn"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd51eab21ab4fd6a3bf889e2d0958c0a6e3a61ad04260325e919e652a2a62826"
|
||||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
version = "1.2.1"
|
||||
@@ -117,6 +123,58 @@ dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1aaa739f95311c2c7887a76863f500026092fb1dce0161dab577e559ef3569d"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"const_fn",
|
||||
"crossbeam-utils",
|
||||
"lazy_static",
|
||||
"memoffset",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if 1.0.0",
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.19"
|
||||
@@ -206,6 +264,15 @@ version = "2.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "157b4208e3059a8f9e78d559edc658e13df41410cb3ae03979c83130067fdd87"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.4.3"
|
||||
@@ -243,6 +310,16 @@ dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.0"
|
||||
@@ -267,6 +344,31 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"crossbeam-deque",
|
||||
"either",
|
||||
"rayon-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon-core"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-utils",
|
||||
"lazy_static",
|
||||
"num_cpus",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.4.2"
|
||||
@@ -302,6 +404,8 @@ dependencies = [
|
||||
"clap",
|
||||
"flate2",
|
||||
"node-types",
|
||||
"num_cpus",
|
||||
"rayon",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"tree-sitter",
|
||||
@@ -339,6 +443,12 @@ version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2"
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "0.9.0"
|
||||
|
||||
@@ -14,3 +14,5 @@ tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git"
|
||||
clap = "2.33"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.2", features = ["env-filter"] }
|
||||
rayon = "1.5.0"
|
||||
num_cpus = "1.13.0"
|
||||
|
||||
@@ -148,55 +148,40 @@ impl TrapWriter {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Extractor {
|
||||
pub parser: Parser,
|
||||
pub schema: NodeTypeMap,
|
||||
}
|
||||
/// Extracts the source file at `path`, which is assumed to be canonicalized.
|
||||
pub fn extract(language: Language, schema: &NodeTypeMap, path: &Path) -> std::io::Result<Program> {
|
||||
let span = span!(
|
||||
Level::TRACE,
|
||||
"extract",
|
||||
file = %path.display()
|
||||
);
|
||||
|
||||
let _enter = span.enter();
|
||||
|
||||
info!("extracting: {}", path.display());
|
||||
|
||||
pub fn create(language: Language, schema: NodeTypeMap) -> Extractor {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let source = std::fs::read(&path)?;
|
||||
let tree = parser.parse(&source, None).expect("Failed to parse file");
|
||||
let mut trap_writer = new_trap_writer();
|
||||
trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display()));
|
||||
let file_label = &trap_writer.populate_file(path);
|
||||
let mut visitor = Visitor {
|
||||
source: &source,
|
||||
trap_writer: trap_writer,
|
||||
// TODO: should we handle path strings that are not valid UTF8 better?
|
||||
path: format!("{}", path.display()),
|
||||
file_label: *file_label,
|
||||
token_counter: 0,
|
||||
toplevel_child_counter: 0,
|
||||
stack: Vec::new(),
|
||||
schema,
|
||||
};
|
||||
traverse(&tree, &mut visitor);
|
||||
|
||||
Extractor { parser, schema }
|
||||
}
|
||||
|
||||
impl Extractor {
|
||||
/// Extracts the source file at `path`, which is assumed to be canonicalized.
|
||||
pub fn extract<'a>(&'a mut self, path: &Path) -> std::io::Result<Program> {
|
||||
let span = span!(
|
||||
Level::TRACE,
|
||||
"extract",
|
||||
file = %path.display()
|
||||
);
|
||||
|
||||
let _enter = span.enter();
|
||||
|
||||
info!("extracting: {}", path.display());
|
||||
|
||||
let source = std::fs::read(&path)?;
|
||||
let tree = &self
|
||||
.parser
|
||||
.parse(&source, None)
|
||||
.expect("Failed to parse file");
|
||||
let mut trap_writer = new_trap_writer();
|
||||
trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display()));
|
||||
let file_label = &trap_writer.populate_file(path);
|
||||
let mut visitor = Visitor {
|
||||
source: &source,
|
||||
trap_writer: trap_writer,
|
||||
// TODO: should we handle path strings that are not valid UTF8 better?
|
||||
path: format!("{}", path.display()),
|
||||
file_label: *file_label,
|
||||
token_counter: 0,
|
||||
toplevel_child_counter: 0,
|
||||
stack: Vec::new(),
|
||||
schema: &self.schema,
|
||||
};
|
||||
traverse(&tree, &mut visitor);
|
||||
|
||||
&self.parser.reset();
|
||||
Ok(Program(visitor.trap_writer.trap_output))
|
||||
}
|
||||
parser.reset();
|
||||
Ok(Program(visitor.trap_writer.trap_output))
|
||||
}
|
||||
|
||||
/// Normalizes the path according the common CodeQL specification. Assumes that
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
mod extractor;
|
||||
|
||||
extern crate num_cpus;
|
||||
|
||||
use clap;
|
||||
use flate2::write::GzEncoder;
|
||||
use rayon::prelude::*;
|
||||
use std::fs;
|
||||
use std::io::{BufRead, BufWriter, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
@@ -42,6 +45,39 @@ impl TrapCompression {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the number of threads the extractor should use, by reading the
|
||||
* CODEQL_THREADS environment variable and using it as described in the
|
||||
* extractor spec:
|
||||
*
|
||||
* "If the number is positive, it indicates the number of threads that should
|
||||
* be used. If the number is negative or zero, it should be added to the number
|
||||
* of cores available on the machine to determine how many threads to use
|
||||
* (minimum of 1). If unspecified, should be considered as set to 1."
|
||||
*/
|
||||
fn num_codeql_threads() -> usize {
|
||||
match std::env::var("CODEQL_THREADS") {
|
||||
// Use 1 thread if the environment variable isn't set.
|
||||
Err(_) => 1,
|
||||
|
||||
Ok(num) => match num.parse::<i32>() {
|
||||
Ok(num) if num <= 0 => {
|
||||
let reduction = -num as usize;
|
||||
num_cpus::get() - reduction
|
||||
}
|
||||
Ok(num) => num as usize,
|
||||
|
||||
Err(_) => {
|
||||
tracing::error!(
|
||||
"Unable to parse CODEQL_THREADS value '{}'; defaulting to 1 thread.",
|
||||
&num
|
||||
);
|
||||
1
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn main() -> std::io::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_target(false)
|
||||
@@ -50,6 +86,21 @@ fn main() -> std::io::Result<()> {
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
let num_threads = num_codeql_threads();
|
||||
tracing::info!(
|
||||
"Using {} {}",
|
||||
num_threads,
|
||||
if num_threads == 1 {
|
||||
"thread"
|
||||
} else {
|
||||
"threads"
|
||||
}
|
||||
);
|
||||
rayon::ThreadPoolBuilder::new()
|
||||
.num_threads(num_threads)
|
||||
.build_global()
|
||||
.unwrap();
|
||||
|
||||
let matches = clap::App::new("Ruby extractor")
|
||||
.version("1.0")
|
||||
.author("GitHub")
|
||||
@@ -76,12 +127,13 @@ fn main() -> std::io::Result<()> {
|
||||
|
||||
let language = tree_sitter_ruby::language();
|
||||
let schema = node_types::read_node_types_str(tree_sitter_ruby::NODE_TYPES)?;
|
||||
let mut extractor = extractor::create(language, schema);
|
||||
for line in std::io::BufReader::new(file_list).lines() {
|
||||
let path = PathBuf::from(line?).canonicalize()?;
|
||||
let lines: std::io::Result<Vec<String>> = std::io::BufReader::new(file_list).lines().collect();
|
||||
let lines = lines?;
|
||||
lines.par_iter().try_for_each(|line| {
|
||||
let path = PathBuf::from(line).canonicalize()?;
|
||||
let trap_file = path_for(&trap_dir, &path, trap_compression.extension());
|
||||
let src_archive_file = path_for(&src_archive_dir, &path, "");
|
||||
let trap = extractor.extract(&path)?;
|
||||
let trap = extractor::extract(language, &schema, &path)?;
|
||||
std::fs::create_dir_all(&src_archive_file.parent().unwrap())?;
|
||||
std::fs::copy(&path, &src_archive_file)?;
|
||||
std::fs::create_dir_all(&trap_file.parent().unwrap())?;
|
||||
@@ -89,15 +141,14 @@ fn main() -> std::io::Result<()> {
|
||||
let mut trap_file = BufWriter::new(trap_file);
|
||||
match trap_compression {
|
||||
TrapCompression::None => {
|
||||
write!(trap_file, "{}", trap)?;
|
||||
write!(trap_file, "{}", trap)
|
||||
}
|
||||
TrapCompression::Gzip => {
|
||||
let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast());
|
||||
write!(compressed_writer, "{}", trap)?;
|
||||
write!(compressed_writer, "{}", trap)
|
||||
}
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
})
|
||||
}
|
||||
|
||||
fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf {
|
||||
|
||||
Reference in New Issue
Block a user