diff --git a/Cargo.lock b/Cargo.lock index fcd7d806843..d3b54343499 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -96,6 +96,16 @@ version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8" +[[package]] +name = "argfile" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a1cc0ba69de57db40674c66f7cf2caee3981ddef084388482c95c0e2133e5e8" +dependencies = [ + "fs-err", + "os_str_bytes", +] + [[package]] name = "arrayvec" version = "0.7.6" @@ -360,6 +370,7 @@ name = "codeql-rust" version = "0.1.0" dependencies = [ "anyhow", + "argfile", "clap", "codeql-extractor", "figment", @@ -374,6 +385,7 @@ dependencies = [ "ra_ap_project_model", "ra_ap_syntax", "ra_ap_vfs", + "rust-extractor-macros", "serde", "serde_with", "stderrlog", @@ -643,6 +655,15 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "fs-err" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88a41f105fe1d5b6b34b2055e3dc59bb79b46b48b2040b9e6c7b4b5de097aa41" +dependencies = [ + "autocfg", +] + [[package]] name = "fsevent-sys" version = "4.1.0" @@ -1064,6 +1085,15 @@ version = "11.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" +[[package]] +name = "os_str_bytes" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ac44c994af577c799b1b4bd80dc214701e349873ad894d6cdf96f4f7526e0b9" +dependencies = [ + "memchr", +] + [[package]] name = "overload" version = "0.1.1" @@ -1875,6 +1905,14 @@ dependencies = [ "text-size", ] +[[package]] +name = "rust-extractor-macros" +version = "0.1.0" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "rustc-hash" version = "1.1.0" diff --git a/Cargo.toml b/Cargo.toml index 5f095736c8a..4aacef79adc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "shared/tree-sitter-extractor", "ruby/extractor", "rust/extractor", + "rust/extractor/macros", ] [patch.crates-io] diff --git a/rust/extractor/Cargo.toml b/rust/extractor/Cargo.toml index c849ea4aa46..3b474f90f98 100644 --- a/rust/extractor/Cargo.toml +++ b/rust/extractor/Cargo.toml @@ -22,7 +22,6 @@ serde = "1.0.209" serde_with = "3.9.0" stderrlog = "0.6.0" triomphe = "0.1.13" -# Ideally, we'd like to pull this in via a relative path. -# However, our bazel/rust tooling chokes on this, c.f. https://github.com/bazelbuild/rules_rust/issues/1525 -# Therefore, we have a pretty bad hack in place instead, see README.md in the codeql-extractor-fake-crate directory. +argfile = "0.2.1" codeql-extractor = { path = "../../shared/tree-sitter-extractor" } +rust-extractor-macros = { path = "macros" } diff --git a/rust/extractor/macros/Cargo.toml b/rust/extractor/macros/Cargo.toml new file mode 100644 index 00000000000..d4d10bc3bde --- /dev/null +++ b/rust/extractor/macros/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "rust-extractor-macros" +version = "0.1.0" +edition = "2021" + +[lib] +proc-macro = true + +[dependencies] +quote = "1.0.37" +syn = { version = "2.0.77", features = ["full"] } diff --git a/rust/extractor/macros/src/lib.rs b/rust/extractor/macros/src/lib.rs new file mode 100644 index 00000000000..13472665454 --- /dev/null +++ b/rust/extractor/macros/src/lib.rs @@ -0,0 +1,52 @@ +use proc_macro::TokenStream; +use quote::{quote, format_ident}; +use syn; + + +/// Allow all fields in the extractor config to be also overrideable by extractor CLI flags +#[proc_macro_attribute] +pub fn extractor_cli_config(_attr: TokenStream, item: TokenStream) -> TokenStream { + let ast = syn::parse_macro_input!(item as syn::ItemStruct); + let name = &ast.ident; + let new_name = format_ident!("Cli{}", name); + let fields: Vec<_> = ast.fields.iter().map(|f| { + let id = f.ident.as_ref().unwrap(); + let ty = &f.ty; + if let syn::Type::Path(p) = ty { + if p.path.is_ident(&format_ident!("bool")) { + return quote! { + #[arg(long)] + #id: bool, + }; + } + } + if id == &format_ident!("verbose") { + quote! { + #[arg(long, short, action=clap::ArgAction::Count)] + #id: u8, + } + } else if id == &format_ident!("inputs") { + quote! { + #id: #ty, + } + } else { + quote! { + #[arg(long)] + #id: Option<#ty>, + } + } + }).collect(); + let gen = quote! { + #[serde_with::apply(_ => #[serde(default)])] + #[derive(Debug, Deserialize, Default)] + #ast + + #[serde_with::skip_serializing_none] + #[derive(clap::Parser, Serialize)] + #[command(about, long_about = None)] + struct #new_name { + #(#fields)* + } + }; + gen.into() +} diff --git a/rust/extractor/src/config.rs b/rust/extractor/src/config.rs index 399c2bb9e7e..310ca2c3649 100644 --- a/rust/extractor/src/config.rs +++ b/rust/extractor/src/config.rs @@ -1,14 +1,15 @@ use anyhow::Context; -use clap::{ArgAction, Parser, ValueEnum}; +use clap::Parser; use codeql_extractor::trap; use figment::{ providers::{Env, Serialized}, Figment, }; +use rust_extractor_macros::extractor_cli_config; use serde::{Deserialize, Serialize}; use std::path::PathBuf; -#[derive(Debug, PartialEq, Eq, Default, Serialize, Deserialize, Clone, Copy, ValueEnum)] +#[derive(Debug, PartialEq, Eq, Default, Serialize, Deserialize, Clone, Copy, clap::ValueEnum)] #[serde(rename_all = "lowercase")] #[clap(rename_all = "lowercase")] pub enum Compression { @@ -26,8 +27,7 @@ impl From for trap::Compression { } } -#[serde_with::apply(_ => #[serde(default)])] -#[derive(Debug, Deserialize, Default)] +#[extractor_cli_config] pub struct Config { pub scratch_dir: PathBuf, pub trap_dir: PathBuf, @@ -38,39 +38,10 @@ pub struct Config { pub inputs: Vec, } -#[serde_with::apply(_ => #[serde(skip_serializing_if = "is_default")])] -#[derive(clap::Parser, Serialize)] -#[command(about, long_about = None)] -struct CliArgs { - #[arg(long)] - scratch_dir: Option, - #[arg(long)] - trap_dir: Option, - #[arg(long)] - source_archive_dir: Option, - #[arg(long)] - compression: Option, - #[arg(short, long, action = ArgAction::Count)] - verbose: u8, - #[arg(long)] - inputs_file: Option, - - inputs: Vec, -} - -fn is_default(t: &T) -> bool { - *t == Default::default() -} - impl Config { pub fn extract() -> anyhow::Result { - let mut cli_args = CliArgs::parse(); - if let Some(inputs_file) = cli_args.inputs_file.take() { - let inputs_list = std::fs::read_to_string(inputs_file).context("reading file list")?; - cli_args - .inputs - .extend(inputs_list.split_terminator("\n").map(PathBuf::from)); - } + let args = argfile::expand_args(argfile::parse_fromfile, argfile::PREFIX)?; + let cli_args = CliConfig::parse_from(args); Figment::new() .merge(Env::prefixed("CODEQL_EXTRACTOR_RUST_")) .merge(Env::prefixed("CODEQL_EXTRACTOR_RUST_OPTION_")) diff --git a/rust/tools/index-files.sh b/rust/tools/index-files.sh index da4b841b692..f3d93fbaf4a 100755 --- a/rust/tools/index-files.sh +++ b/rust/tools/index-files.sh @@ -2,4 +2,4 @@ set -eu -exec "$CODEQL_EXTRACTOR_RUST_ROOT/tools/$CODEQL_PLATFORM/extractor" --inputs-file="$1" +exec "$CODEQL_EXTRACTOR_RUST_ROOT/tools/$CODEQL_PLATFORM/extractor" @"$1"