QL: Add tool for extracting blame info

I had some trouble getting this to work with version 1.54 of the Rust
toolchain, so I had to bump it up to 1.59.
This commit is contained in:
Taus
2023-02-17 13:20:06 +00:00
parent 430b432add
commit 0e0ec89e60
6 changed files with 144 additions and 1 deletions

BIN
ql/Cargo.lock generated

Binary file not shown.

View File

@@ -4,4 +4,5 @@ members = [
"extractor",
"generator",
"node-types",
"buramu",
]

12
ql/buramu/Cargo.toml Normal file
View File

@@ -0,0 +1,12 @@
[package]
name = "buramu"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
lazy_static = "1.4.0"
chrono = "0.4.23"
rayon = "1.5.0"
regex = "1.7.1"

22
ql/buramu/README.md Normal file
View File

@@ -0,0 +1,22 @@
Gathers up `git blame` information for all lines with `deprecated` annotations in QL files in the
codebase.
## Usage
From within the root of the `codeql` repo (having first run the `create-extractor-pack.sh` script):
```
./ql/target/release/buramu > deprecated.blame
```
## Output
The contents of the `deprecated.blame` file will look something like this:
```
today: 2023-02-17
file: cpp/ql/lib/semmle/code/cpp/security/TaintTrackingImpl.qll
last_modified: 2022-11-25 124 167 173 184 188 329 358 400 415 546 553 584 593
file: go/ql/lib/semmle/go/security/FlowSources.qll
last_modified: 2022-12-19 33
file: python/ql/src/experimental/semmle/python/Concepts.qll
last_modified: 2022-08-18 172 202
last_modified: 2022-03-11 94 110 129 145 177 206 225 241 258 272 289 303 454 485 529 570
```

108
ql/buramu/src/main.rs Normal file
View File

@@ -0,0 +1,108 @@
use lazy_static::lazy_static;
use rayon::prelude::*;
use regex::Regex;
use std::collections::HashMap;
use std::{io::BufRead, process::Command};
// A map from filenames to lists of line numbers (for just the lines with deprecations)
type FileDeprecations = HashMap<String, Vec<String>>;
fn get_filename_and_lineno(line: &str) -> (String, String) {
let mut parts = line.splitn(3, ':');
let file = parts.next().unwrap().to_string();
let lineno = parts.next().unwrap().to_string();
(file, lineno)
}
#[test]
fn test_get_filename_and_lineno() {
let line = "path/to/file.ql:61:deprecated class Foo = Bar;";
let (file, lineno) = get_filename_and_lineno(line);
assert_eq!(file, "path/to/file.ql");
assert_eq!(lineno, "61");
}
fn get_files_with_deprecations() -> FileDeprecations {
let output = Command::new("git")
.args(&[
"grep",
"-n",
"-E",
"^[^*]*deprecated", // skip lines that have a `*` before `deprecated`, as they are probably comments
"--",
"*.ql",
"*.qll",
])
.output()
.expect("failed to execute process");
let mut file_deprecations: FileDeprecations = HashMap::new();
for line in output.stdout.lines() {
let (file, lineno) = get_filename_and_lineno(&line.unwrap());
file_deprecations
.entry(file)
.or_insert_with(Vec::new)
.push(lineno);
}
file_deprecations
}
struct LastModifiedLine {
date: String,
lineno: String,
}
type LastModifiedMap = HashMap<String, Vec<String>>;
fn get_blame_dates_for_filedeprecation(file: &str, linenos: &[String]) -> LastModifiedMap {
let mut command = Command::new("git");
command.arg("blame");
for lineno in linenos {
command.arg("-L").arg(format!("{},{}", lineno, lineno));
}
command.arg(file);
let output = command.output().expect("failed to execute process");
let mut blame_dates = HashMap::new();
for line in output.stdout.lines() {
let line = line.unwrap();
let LastModifiedLine { date, lineno } = get_last_modified(&line);
blame_dates
.entry(date)
.or_insert_with(Vec::new)
.push(lineno);
}
blame_dates
}
lazy_static! {
static ref BLAME_RE: Regex =
Regex::new("(\\d{4}-\\d{2}-\\d{2}).*[+-]\\d{4}\\s+(\\d+)\\)").unwrap();
}
fn get_last_modified(line: &str) -> LastModifiedLine {
let caps = BLAME_RE.captures(line).unwrap();
let date = caps.get(1).unwrap().as_str().into();
let lineno = caps.get(2).unwrap().as_str().into();
LastModifiedLine { date, lineno }
}
#[test]
fn test_get_date_and_lineno() {
let line = "cc7a9ef97a78 (john doe 2022-08-24 12:59:07 +0200 61) deprecated class Foo = Bar;";
let LastModifiedLine { date, lineno } = get_last_modified(line);
assert_eq!(date, "2022-08-24");
assert_eq!(lineno, "61");
}
fn main() {
let filedeprecations = get_files_with_deprecations();
let filedeprecations: Vec<(String, Vec<String>)> = filedeprecations.into_iter().collect();
println!("today: {}", chrono::Local::now().format("%Y-%m-%d"));
let deprecations = filedeprecations
.par_iter()
.map(|(file, linenos)| (file, get_blame_dates_for_filedeprecation(file, linenos)));
deprecations.for_each(|(file, linenos_and_dates)| {
println!("file: {}", file);
for (date, linenos) in linenos_and_dates.iter() {
println!(" last_modified: {} {}", date, linenos.join(" "));
}
});
}

View File

@@ -2,6 +2,6 @@
# extractor. It is set to the lowest version of Rust we want to support.
[toolchain]
channel = "1.54"
channel = "1.59"
profile = "minimal"
components = [ "rustfmt" ]