mirror of
https://github.com/github/codeql.git
synced 2026-05-02 12:15:17 +02:00
Rust: fetch ungram and rust-analyzer code instead of checking it in
* The ungram file is now taken from the rust-analyzer dependencies pulled in by bazel * the grammar parsing code is not published, so it must be taken directly from rust-analyzer code. That part should be less prone to be updated than the ungram file, so it does not necessarily need to be in sync with the rust-analyzer version is used elsewhere. * both need some patches. The former is patched during build, the latter during loading in `MODULE.bazel`.
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -1,287 +0,0 @@
|
||||
//! Defines input for code generation process.
|
||||
|
||||
use quote::ToTokens;
|
||||
|
||||
use crate::codegen::grammar::to_upper_snake_case;
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub(crate) struct KindsSrc {
|
||||
pub(crate) punct: &'static [(&'static str, &'static str)],
|
||||
pub(crate) keywords: &'static [&'static str],
|
||||
pub(crate) contextual_keywords: &'static [&'static str],
|
||||
pub(crate) literals: &'static [&'static str],
|
||||
pub(crate) tokens: &'static [&'static str],
|
||||
pub(crate) nodes: &'static [&'static str],
|
||||
pub(crate) edition_dependent_keywords: &'static [(&'static str, Edition)],
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(super) enum Edition {
|
||||
Edition2015,
|
||||
Edition2018,
|
||||
Edition2021,
|
||||
Edition2024,
|
||||
}
|
||||
|
||||
impl ToTokens for Edition {
|
||||
fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
|
||||
match self {
|
||||
Edition::Edition2015 => {
|
||||
tokens.extend(quote::quote! { Edition::Edition2015 });
|
||||
}
|
||||
Edition::Edition2018 => {
|
||||
tokens.extend(quote::quote! { Edition::Edition2018 });
|
||||
}
|
||||
Edition::Edition2021 => {
|
||||
tokens.extend(quote::quote! { Edition::Edition2021 });
|
||||
}
|
||||
Edition::Edition2024 => {
|
||||
tokens.extend(quote::quote! { Edition::Edition2024 });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The punctuations of the language.
|
||||
const PUNCT: &[(&str, &str)] = &[
|
||||
// KEEP THE DOLLAR AT THE TOP ITS SPECIAL
|
||||
("$", "DOLLAR"),
|
||||
(";", "SEMICOLON"),
|
||||
(",", "COMMA"),
|
||||
("(", "L_PAREN"),
|
||||
(")", "R_PAREN"),
|
||||
("{", "L_CURLY"),
|
||||
("}", "R_CURLY"),
|
||||
("[", "L_BRACK"),
|
||||
("]", "R_BRACK"),
|
||||
("<", "L_ANGLE"),
|
||||
(">", "R_ANGLE"),
|
||||
("@", "AT"),
|
||||
("#", "POUND"),
|
||||
("~", "TILDE"),
|
||||
("?", "QUESTION"),
|
||||
("&", "AMP"),
|
||||
("|", "PIPE"),
|
||||
("+", "PLUS"),
|
||||
("*", "STAR"),
|
||||
("/", "SLASH"),
|
||||
("^", "CARET"),
|
||||
("%", "PERCENT"),
|
||||
("_", "UNDERSCORE"),
|
||||
(".", "DOT"),
|
||||
("..", "DOT2"),
|
||||
("...", "DOT3"),
|
||||
("..=", "DOT2EQ"),
|
||||
(":", "COLON"),
|
||||
("::", "COLON2"),
|
||||
("=", "EQ"),
|
||||
("==", "EQ2"),
|
||||
("=>", "FAT_ARROW"),
|
||||
("!", "BANG"),
|
||||
("!=", "NEQ"),
|
||||
("-", "MINUS"),
|
||||
("->", "THIN_ARROW"),
|
||||
("<=", "LTEQ"),
|
||||
(">=", "GTEQ"),
|
||||
("+=", "PLUSEQ"),
|
||||
("-=", "MINUSEQ"),
|
||||
("|=", "PIPEEQ"),
|
||||
("&=", "AMPEQ"),
|
||||
("^=", "CARETEQ"),
|
||||
("/=", "SLASHEQ"),
|
||||
("*=", "STAREQ"),
|
||||
("%=", "PERCENTEQ"),
|
||||
("&&", "AMP2"),
|
||||
("||", "PIPE2"),
|
||||
("<<", "SHL"),
|
||||
(">>", "SHR"),
|
||||
("<<=", "SHLEQ"),
|
||||
(">>=", "SHREQ"),
|
||||
];
|
||||
const TOKENS: &[&str] = &["ERROR", "WHITESPACE", "NEWLINE", "COMMENT"];
|
||||
// &["ERROR", "IDENT", "WHITESPACE", "LIFETIME_IDENT", "COMMENT", "SHEBANG"],;
|
||||
|
||||
const EOF: &str = "EOF";
|
||||
|
||||
const RESERVED: &[&str] = &[
|
||||
"abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof", "unsized",
|
||||
"virtual", "yield",
|
||||
];
|
||||
// keywords that are keywords only in specific parse contexts
|
||||
#[doc(alias = "WEAK_KEYWORDS")]
|
||||
const CONTEXTUAL_KEYWORDS: &[&str] = &[
|
||||
"macro_rules",
|
||||
"union",
|
||||
"default",
|
||||
"raw",
|
||||
"dyn",
|
||||
"auto",
|
||||
"yeet",
|
||||
"safe",
|
||||
];
|
||||
// keywords we use for special macro expansions
|
||||
const CONTEXTUAL_BUILTIN_KEYWORDS: &[&str] = &[
|
||||
"asm",
|
||||
"att_syntax",
|
||||
"builtin",
|
||||
"clobber_abi",
|
||||
"format_args",
|
||||
// "in",
|
||||
"inlateout",
|
||||
"inout",
|
||||
"label",
|
||||
"lateout",
|
||||
"may_unwind",
|
||||
"nomem",
|
||||
"noreturn",
|
||||
"nostack",
|
||||
"offset_of",
|
||||
"options",
|
||||
"out",
|
||||
"preserves_flags",
|
||||
"pure",
|
||||
// "raw",
|
||||
"readonly",
|
||||
"sym",
|
||||
];
|
||||
|
||||
// keywords that are keywords depending on the edition
|
||||
const EDITION_DEPENDENT_KEYWORDS: &[(&str, Edition)] = &[
|
||||
("try", Edition::Edition2018),
|
||||
("dyn", Edition::Edition2018),
|
||||
("async", Edition::Edition2018),
|
||||
("await", Edition::Edition2018),
|
||||
("gen", Edition::Edition2024),
|
||||
];
|
||||
|
||||
pub(crate) fn generate_kind_src(
|
||||
nodes: &[AstNodeSrc],
|
||||
enums: &[AstEnumSrc],
|
||||
grammar: &ungrammar::Grammar,
|
||||
) -> KindsSrc {
|
||||
let mut contextual_keywords: Vec<&_> = CONTEXTUAL_KEYWORDS
|
||||
.iter()
|
||||
.chain(CONTEXTUAL_BUILTIN_KEYWORDS)
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
let mut keywords: Vec<&_> = Vec::new();
|
||||
let mut tokens: Vec<&_> = TOKENS.to_vec();
|
||||
let mut literals: Vec<&_> = Vec::new();
|
||||
let mut used_puncts = vec![false; PUNCT.len()];
|
||||
// Mark $ as used
|
||||
used_puncts[0] = true;
|
||||
grammar.tokens().for_each(|token| {
|
||||
let name = &*grammar[token].name;
|
||||
if name == EOF {
|
||||
return;
|
||||
}
|
||||
match name.split_at(1) {
|
||||
("@", lit) if !lit.is_empty() => {
|
||||
literals.push(String::leak(to_upper_snake_case(lit)));
|
||||
}
|
||||
("#", token) if !token.is_empty() => {
|
||||
tokens.push(String::leak(to_upper_snake_case(token)));
|
||||
}
|
||||
_ if contextual_keywords.contains(&name) => {}
|
||||
_ if name.chars().all(char::is_alphabetic) => {
|
||||
keywords.push(String::leak(name.to_owned()));
|
||||
}
|
||||
_ => {
|
||||
let idx = PUNCT
|
||||
.iter()
|
||||
.position(|(punct, _)| punct == &name)
|
||||
.unwrap_or_else(|| panic!("Grammar references unknown punctuation {name:?}"));
|
||||
used_puncts[idx] = true;
|
||||
}
|
||||
}
|
||||
});
|
||||
PUNCT
|
||||
.iter()
|
||||
.zip(used_puncts)
|
||||
.filter(|(_, used)| !used)
|
||||
.for_each(|((punct, _), _)| {
|
||||
panic!("Punctuation {punct:?} is not used in grammar");
|
||||
});
|
||||
keywords.extend(RESERVED.iter().copied());
|
||||
keywords.sort();
|
||||
keywords.dedup();
|
||||
contextual_keywords.sort();
|
||||
contextual_keywords.dedup();
|
||||
let mut edition_dependent_keywords: Vec<(&_, _)> = EDITION_DEPENDENT_KEYWORDS.to_vec();
|
||||
edition_dependent_keywords.sort();
|
||||
edition_dependent_keywords.dedup();
|
||||
|
||||
keywords.retain(|&it| !contextual_keywords.contains(&it));
|
||||
keywords.retain(|&it| !edition_dependent_keywords.iter().any(|&(kw, _)| kw == it));
|
||||
|
||||
// we leak things here for simplicity, that way we don't have to deal with lifetimes
|
||||
// The execution is a one shot job so thats fine
|
||||
let nodes = nodes
|
||||
.iter()
|
||||
.map(|it| &it.name)
|
||||
.chain(enums.iter().map(|it| &it.name))
|
||||
.map(|it| to_upper_snake_case(it))
|
||||
.map(String::leak)
|
||||
.map(|it| &*it)
|
||||
.collect();
|
||||
let nodes = Vec::leak(nodes);
|
||||
nodes.sort();
|
||||
let keywords = Vec::leak(keywords);
|
||||
let contextual_keywords = Vec::leak(contextual_keywords);
|
||||
let edition_dependent_keywords = Vec::leak(edition_dependent_keywords);
|
||||
let literals = Vec::leak(literals);
|
||||
literals.sort();
|
||||
let tokens = Vec::leak(tokens);
|
||||
tokens.sort();
|
||||
|
||||
KindsSrc {
|
||||
punct: PUNCT,
|
||||
nodes,
|
||||
keywords,
|
||||
contextual_keywords,
|
||||
edition_dependent_keywords,
|
||||
literals,
|
||||
tokens,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
pub(crate) struct AstSrc {
|
||||
pub(crate) tokens: Vec<String>,
|
||||
pub(crate) nodes: Vec<AstNodeSrc>,
|
||||
pub(crate) enums: Vec<AstEnumSrc>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct AstNodeSrc {
|
||||
pub(crate) doc: Vec<String>,
|
||||
pub(crate) name: String,
|
||||
pub(crate) traits: Vec<String>,
|
||||
pub(crate) fields: Vec<Field>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub(crate) enum Field {
|
||||
Token(String),
|
||||
Node {
|
||||
name: String,
|
||||
ty: String,
|
||||
cardinality: Cardinality,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub(crate) enum Cardinality {
|
||||
Optional,
|
||||
Many,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct AstEnumSrc {
|
||||
pub(crate) doc: Vec<String>,
|
||||
pub(crate) name: String,
|
||||
pub(crate) traits: Vec<String>,
|
||||
pub(crate) variants: Vec<String>,
|
||||
}
|
||||
@@ -10,7 +10,7 @@ use ungrammar::Grammar;
|
||||
|
||||
fn project_root() -> PathBuf {
|
||||
let dir =
|
||||
env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| env!("CARGO_MANIFEST_DIR").to_owned());
|
||||
env::var("CARGO_MANIFEST_DIR").unwrap().to_owned();
|
||||
PathBuf::from(dir).parent().unwrap().to_owned()
|
||||
}
|
||||
|
||||
@@ -591,10 +591,11 @@ impl Translator<'_> {{
|
||||
}
|
||||
|
||||
fn main() -> std::io::Result<()> {
|
||||
let grammar: Grammar = fs::read_to_string(project_root().join("ast-generator/rust.ungram"))
|
||||
.unwrap()
|
||||
let grammar = PathBuf::from("..").join(env::args().nth(1).expect("grammar file path required"));
|
||||
let grammar: Grammar = fs::read_to_string(&grammar)
|
||||
.expect(&format!("Failed to parse grammar file: {}", grammar.display()))
|
||||
.parse()
|
||||
.unwrap();
|
||||
.expect("Failed to parse grammar");
|
||||
let mut grammar = codegen::grammar::lower(&grammar);
|
||||
|
||||
grammar.enums.retain(|x| x.name != "Adt");
|
||||
|
||||
Reference in New Issue
Block a user