From 0e0ec89e60653fb83ec06334f6a75b926fc295f8 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 17 Feb 2023 13:20:06 +0000 Subject: [PATCH] QL: Add tool for extracting blame info I had some trouble getting this to work with version 1.54 of the Rust toolchain, so I had to bump it up to 1.59. --- ql/Cargo.lock | Bin 15321 -> 21930 bytes ql/Cargo.toml | 1 + ql/buramu/Cargo.toml | 12 +++++ ql/buramu/README.md | 22 +++++++++ ql/buramu/src/main.rs | 108 +++++++++++++++++++++++++++++++++++++++++ ql/rust-toolchain.toml | 2 +- 6 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 ql/buramu/Cargo.toml create mode 100644 ql/buramu/README.md create mode 100644 ql/buramu/src/main.rs diff --git a/ql/Cargo.lock b/ql/Cargo.lock index b0c217453ed40151e64bd34a251109d4d6ae64c4..8bcae25b4aa2f4b96581f96c22f382a1dfc51b9f 100644 GIT binary patch delta 5257 zcmZ`-OKhB171r3TiJhd0TRV2@v~k>|Rh!P}z8|8MjYK67h$cm_t?uL8WT-vk&e%=T z6X%?k7F_@Wweqq+L5NKy)K+3sgjj;arecWz!FT`3!;_>^66gOtbMLw5obPe& z=huGr^UnL8Tbw$yxaX6fwJ(Qq)_cz$7hV>`TgE_klllv!D?rHo3u;;iyAN)}Yq#rj~F z&CwN|ISpOyswZUgwYitKxQQ&t>~SzIgo;8iZLPyQ8>34)$3i@k zi$OVK9obm4rKC`9tvGh!E6cC+!yt6``a9Hpb2y;(JUHICWbQ^rMSt;m^s?zF*PfpUsGGGSd{ zs(9y%wdhWbPuS>ls?|DJJksfOOs&gi!nc{qITYuFv^mM>vMWBw#2G`$nTZ-zkX$Mz zy;4l8EM#KNvZMjlM^>6Ijx97FE-W;EJ$j`1)4XloahEm$6>?mQXuOCqh%7A@Lt{Cs zmD19_ zgd8xby-O}=%VN|SNES*a8+`Uzhnn@~*GGNx(6N`AM`tf>sS=Tw60CPt)ZWh#iz-pL zRgCc*umjeOM%ch}c7|BQk|KDWz3^>6HlH3jw|xEjRHN`7>v`U`Jl2%Tp_Pr%!J@Ba zL`w@Da|Va7Kv7@^!w@y1S0v~hRwNlp%f z=ul~xGYNbPPQ?+NQqpKcIu#2PC9jiFLUC~CkdI2bB?;=-0H!fkAKo> zK7Qo%tmw7a+ycLIVH7JQ0gPB8`rcaTlPN4a1_DR|%mRTV>7!vBNg-Jj02Bz@w@v@{ ziL>j|w0SruRi*Ril zZY2!9OmV9iJ&5M&`)OY9UJd=%_U~o)ds%zwL0@W~Jsq}?7#T=71&uUTL0t_8Z2(v~ zND3Aj2mOKAA_4NO{$RP({w>gij`HC+waGn0k_N9Ig| zLtZrOj*~@Zr*Iz7orf?ws&oxFT38^9&TvoxO(uXtn3WZl$EH$N!>5yIgnCD z+a!;kkx4?3bYLDl4~6pZSJ;hl@D2mhDi2>pL8$B@0h9^))i%{kN;UZhe_Q;~%&ffy zI552PSW=`+x=3(q2r2;)8o&a>R<0^pf#0FdnPLXnLKqbsPSB+^Y~>X6VBw&<{LPuQ zJGNoE7`&%+dkP^m^d4ForDh%>!kreKgr{gCs-)2b-eq4vjz*eXigwv1Q>->V0p1h! zIW}J{N^40#&1+BGd7921>xmwpY^>pgP$!iN69rAALWBg{g02uC03B79sJ}2^Y9W;- zOH5h;)09lgDc{<7q3JHKG~<=kj-FUfus0Yr2M$T1h!q2nF0=Ha2pNQj-XU+`JjY`J znmATNz3|p*n8CX2$*b@%{k?98eR$nUSud=ey`J+NU6>iwkP1I(&t)2!%EPq+E&kfK((!t zAYV)cz&wOW2C1^q3fhXkGHo7ybMeT*cVSakFok5HMF7t!I0Fga&n2pBBr;g-ACRaM$A|Gxu`s>sXaSIt zw@Okr&Sr(DM7el`K_OTP*##L8R~T+m3=CZd>!vCx1N~^-bDh=Z`1#+jPOQUy_GMC} zoD=-629ZOkBtt97Lhx5m^lE4;6?#x3q!T1sK-Fl<2wklf&hRMOM*HOaA7^JctI*gC zn%PK&F#jlspf*yW4B&86qccui)&BpQAh zjff4AMR@wkR_`wyZT@xasg-tWI(&xe7;e!bkC;jWv=HJ1OHlOzo(*#$>?}70^D!C} zj}gk`&^J&IQ&|*sfUBFB^5fh6II*};tA0CE>&P%@NJ!dRiuo&`q&MhL;A*edIZ+t(}YeaG<1t)XLmd3|Fl zl=0OtqFh6~F!lcaN#Rg{u-KS0ZDO#CN#f!MP{AAA1p&T6rvTTD8w`%&6Z1c^WwTo(};q!xxR!!=^ot&pTOJ5RdaLpRP&da_2%)f Ge)NCXYy2Sq delta 1879 zcmYk7ORHT)5QW({qREX$#oU`Kl0>7ykY`u*qq<4RJy8&Kpx1~_M5MdA+XUhxBIwA+ zPe>a7fZ{|a${9KkrxH+SIuI2>bfDl|!F9F>!r{Q#XYcB&Ro`0sqmQTW-uUF%X^rCe zX6wlXH7nYyZxX!Znw{_BxQ3RJW-l@NXo^?Qsj9@DJ5kdX@WXKMp^rahWBfH8S?=BS z!L;Yd$@P1WT)A9yERG&s{(RHh(-^nxA6a%5+= z*j_`9P9vcKHE3ehx@M<1$m=}7-`jh~_qVT)KlW`;G?xv|do`LVbyK;QE}lv7^x-I<2d(4fw zyB;eQ$lQCj(t6Owk?LIV$2-!VS6lVc zn{}O|=Tf*8kHuM2tfFMM*5kT(Z>36e1Q06#*cxvh{C48;_14b9x>SAkiJ)IBE;XuT ztT>@Sk}cP&6#!doEx8pM(L}J#tA}i~VO4JV-QoAvc7@Y^Ts^Y0owE&8UC%8i8;mu< zWy6BgJA|kaCwL642{eRQx(^xMJC40d(tkJp;~neE!}mY7HqJe`F|HlFc)kbYV++o* zQ=w+KC>Qdkxo(R_j-eJvX-#6X@JNOa3bkBJPqmH3*#qmB+G{Uee*IJ~m&fj7=O(+_ z&Vpjblg%1|J5rCf6=Srt?6SgHAz0(P&jQLibcVoP1)o;yJj%*W%ioWEIIRd;gw~~c z!Q%>xa|MYF;8g`OO@wLH6WJ2cxYFBVB<;4DJ~ zL1IfHp%*P&**IaOSRY>dd2S>EF8SBf7<{ z8fvaCxgN;u^5X#OhgK@i)Xc&0rTKFcF7S2$GOfC zBejip=wEC=5Q&8Z>@r*FCU8i8TyuAiU+ovmOa8{LDb4(Fucj73MkJ{T(HH|RfpFh4 z!dapRJkB9F*~9y&QbN~NQQVX(q2|`|hxFOBeCymaji=+zw)Yei@h;I4sW%1KsS!d2 z^}%G7ge{S-TuzV&@FKGV;siO0*AYFiISxK_>o|LUYq{8d_2%XNNABD;Cx_QSB_qiZ z5gu`ynJpPci<|&TKrjmkz%GIVX4PcI`w6mfZ7s_z=8eY=%{gpaUA^EOm@C+y1OyQh ygB6peoV#}}h9+4}iWLO(-U8l!4e9EUZFV-!^~c|tmd7ssHjO<`Z;sv1y#Eg$1tn7e diff --git a/ql/Cargo.toml b/ql/Cargo.toml index 7acbd36afa6..4bc60c3333d 100644 --- a/ql/Cargo.toml +++ b/ql/Cargo.toml @@ -4,4 +4,5 @@ members = [ "extractor", "generator", "node-types", + "buramu", ] diff --git a/ql/buramu/Cargo.toml b/ql/buramu/Cargo.toml new file mode 100644 index 00000000000..8e09c6c1395 --- /dev/null +++ b/ql/buramu/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "buramu" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +lazy_static = "1.4.0" +chrono = "0.4.23" +rayon = "1.5.0" +regex = "1.7.1" diff --git a/ql/buramu/README.md b/ql/buramu/README.md new file mode 100644 index 00000000000..0c2bcb7ed3c --- /dev/null +++ b/ql/buramu/README.md @@ -0,0 +1,22 @@ +Gathers up `git blame` information for all lines with `deprecated` annotations in QL files in the +codebase. + +## Usage + +From within the root of the `codeql` repo (having first run the `create-extractor-pack.sh` script): +``` + ./ql/target/release/buramu > deprecated.blame +``` + +## Output +The contents of the `deprecated.blame` file will look something like this: +``` +today: 2023-02-17 +file: cpp/ql/lib/semmle/code/cpp/security/TaintTrackingImpl.qll + last_modified: 2022-11-25 124 167 173 184 188 329 358 400 415 546 553 584 593 +file: go/ql/lib/semmle/go/security/FlowSources.qll + last_modified: 2022-12-19 33 +file: python/ql/src/experimental/semmle/python/Concepts.qll + last_modified: 2022-08-18 172 202 + last_modified: 2022-03-11 94 110 129 145 177 206 225 241 258 272 289 303 454 485 529 570 +``` diff --git a/ql/buramu/src/main.rs b/ql/buramu/src/main.rs new file mode 100644 index 00000000000..f91ba8352ef --- /dev/null +++ b/ql/buramu/src/main.rs @@ -0,0 +1,108 @@ +use lazy_static::lazy_static; +use rayon::prelude::*; +use regex::Regex; +use std::collections::HashMap; +use std::{io::BufRead, process::Command}; + +// A map from filenames to lists of line numbers (for just the lines with deprecations) +type FileDeprecations = HashMap>; + +fn get_filename_and_lineno(line: &str) -> (String, String) { + let mut parts = line.splitn(3, ':'); + let file = parts.next().unwrap().to_string(); + let lineno = parts.next().unwrap().to_string(); + (file, lineno) +} + +#[test] +fn test_get_filename_and_lineno() { + let line = "path/to/file.ql:61:deprecated class Foo = Bar;"; + let (file, lineno) = get_filename_and_lineno(line); + assert_eq!(file, "path/to/file.ql"); + assert_eq!(lineno, "61"); +} + +fn get_files_with_deprecations() -> FileDeprecations { + let output = Command::new("git") + .args(&[ + "grep", + "-n", + "-E", + "^[^*]*deprecated", // skip lines that have a `*` before `deprecated`, as they are probably comments + "--", + "*.ql", + "*.qll", + ]) + .output() + .expect("failed to execute process"); + let mut file_deprecations: FileDeprecations = HashMap::new(); + for line in output.stdout.lines() { + let (file, lineno) = get_filename_and_lineno(&line.unwrap()); + file_deprecations + .entry(file) + .or_insert_with(Vec::new) + .push(lineno); + } + file_deprecations +} + +struct LastModifiedLine { + date: String, + lineno: String, +} +type LastModifiedMap = HashMap>; + +fn get_blame_dates_for_filedeprecation(file: &str, linenos: &[String]) -> LastModifiedMap { + let mut command = Command::new("git"); + command.arg("blame"); + for lineno in linenos { + command.arg("-L").arg(format!("{},{}", lineno, lineno)); + } + command.arg(file); + let output = command.output().expect("failed to execute process"); + let mut blame_dates = HashMap::new(); + for line in output.stdout.lines() { + let line = line.unwrap(); + let LastModifiedLine { date, lineno } = get_last_modified(&line); + blame_dates + .entry(date) + .or_insert_with(Vec::new) + .push(lineno); + } + blame_dates +} + +lazy_static! { + static ref BLAME_RE: Regex = + Regex::new("(\\d{4}-\\d{2}-\\d{2}).*[+-]\\d{4}\\s+(\\d+)\\)").unwrap(); +} + +fn get_last_modified(line: &str) -> LastModifiedLine { + let caps = BLAME_RE.captures(line).unwrap(); + let date = caps.get(1).unwrap().as_str().into(); + let lineno = caps.get(2).unwrap().as_str().into(); + LastModifiedLine { date, lineno } +} + +#[test] +fn test_get_date_and_lineno() { + let line = "cc7a9ef97a78 (john doe 2022-08-24 12:59:07 +0200 61) deprecated class Foo = Bar;"; + let LastModifiedLine { date, lineno } = get_last_modified(line); + assert_eq!(date, "2022-08-24"); + assert_eq!(lineno, "61"); +} + +fn main() { + let filedeprecations = get_files_with_deprecations(); + let filedeprecations: Vec<(String, Vec)> = filedeprecations.into_iter().collect(); + println!("today: {}", chrono::Local::now().format("%Y-%m-%d")); + let deprecations = filedeprecations + .par_iter() + .map(|(file, linenos)| (file, get_blame_dates_for_filedeprecation(file, linenos))); + deprecations.for_each(|(file, linenos_and_dates)| { + println!("file: {}", file); + for (date, linenos) in linenos_and_dates.iter() { + println!(" last_modified: {} {}", date, linenos.join(" ")); + } + }); +} diff --git a/ql/rust-toolchain.toml b/ql/rust-toolchain.toml index c0ca7a2593a..38ca5da4f14 100644 --- a/ql/rust-toolchain.toml +++ b/ql/rust-toolchain.toml @@ -2,6 +2,6 @@ # extractor. It is set to the lowest version of Rust we want to support. [toolchain] -channel = "1.54" +channel = "1.59" profile = "minimal" components = [ "rustfmt" ]