mirror of
https://github.com/github/codeql.git
synced 2026-06-06 05:57:07 +02:00
Merge pull request #21924 from asgerf/asgerf/yeast-changes
Yeast: some fixes
This commit is contained in:
@@ -305,7 +305,18 @@ fn convert_nodes(
|
||||
// type.
|
||||
let members: Set<&str> = n_members
|
||||
.iter()
|
||||
.map(|n| nodes.get(n).unwrap().dbscheme_name.as_str())
|
||||
.map(|n| {
|
||||
nodes
|
||||
.get(n)
|
||||
.unwrap_or_else(|| {
|
||||
panic!(
|
||||
"union type '{}' references unknown member node type {:?}",
|
||||
node.dbscheme_name, n
|
||||
)
|
||||
})
|
||||
.dbscheme_name
|
||||
.as_str()
|
||||
})
|
||||
.collect();
|
||||
entries.push(dbscheme::Entry::Union(dbscheme::Union {
|
||||
name: &node.dbscheme_name,
|
||||
|
||||
@@ -411,7 +411,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
// Named fields — compute each value into a temp, then reference it
|
||||
while peek_is_field(tokens) {
|
||||
let field_name = expect_ident(tokens, "expected field name")?;
|
||||
let field_str = field_name.to_string();
|
||||
let field_str = field_name.to_string().strip_prefix("r#").unwrap_or(&field_name.to_string()).to_string();
|
||||
expect_punct(tokens, ':', "expected `:` after field name")?;
|
||||
let temp = Ident::new(
|
||||
&format!("__field_{field_str}_{field_counter}"),
|
||||
@@ -437,7 +437,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
.map(::std::convert::Into::<usize>::into)
|
||||
.collect();
|
||||
});
|
||||
field_args.push(quote! { (#field_str, #temp) });
|
||||
// An empty splice means the field is absent — skip it
|
||||
// entirely rather than emitting an empty named field.
|
||||
field_args.push(quote! {
|
||||
if !#temp.is_empty() { __fields.push((#field_str, #temp)); }
|
||||
});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -445,7 +449,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
|
||||
let value = parse_direct_node(tokens, ctx)?;
|
||||
stmts.push(quote! { let #temp: usize = #value; });
|
||||
field_args.push(quote! { (#field_str, vec![#temp]) });
|
||||
field_args.push(quote! { __fields.push((#field_str, vec![#temp])); });
|
||||
}
|
||||
|
||||
// After all named fields, no other tokens are allowed.
|
||||
@@ -461,7 +465,9 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
Ok(quote! {
|
||||
{
|
||||
#(#stmts)*
|
||||
#ctx.node(#kind_str, vec![#(#field_args),*])
|
||||
let mut __fields: Vec<(&str, Vec<usize>)> = Vec::new();
|
||||
#(#field_args)*
|
||||
#ctx.node(#kind_str, __fields)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -475,6 +481,11 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
|
||||
let group = expect_group(tokens, Delimiter::Parenthesis)?;
|
||||
let mut inner = group.stream().into_iter().peekable();
|
||||
|
||||
// Empty `()` represents an empty sequence — emit nothing.
|
||||
if inner.peek().is_none() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Regular node
|
||||
let node = parse_direct_node_inner(&mut inner, ctx)?;
|
||||
items.push(quote! { __nodes.push(#node); });
|
||||
|
||||
@@ -63,16 +63,20 @@ impl Captures {
|
||||
}
|
||||
|
||||
/// Apply a fallible function to every captured id (across all keys),
|
||||
/// replacing each id with the result. Stops and returns the error on
|
||||
/// the first failure.
|
||||
/// replacing each id with the results. A function returning an empty
|
||||
/// vector removes the capture; returning multiple ids splices them
|
||||
/// into the capture's value list (suitable for `*`/`+` captures).
|
||||
/// Stops and returns the error on the first failure.
|
||||
pub fn try_map_all_captures<E>(
|
||||
&mut self,
|
||||
mut f: impl FnMut(Id) -> Result<Id, E>,
|
||||
mut f: impl FnMut(Id) -> Result<Vec<Id>, E>,
|
||||
) -> Result<(), E> {
|
||||
for ids in self.captures.values_mut() {
|
||||
for id in ids {
|
||||
*id = f(*id)?;
|
||||
let mut new_ids = Vec::with_capacity(ids.len());
|
||||
for &id in ids.iter() {
|
||||
new_ids.extend(f(id)?);
|
||||
}
|
||||
*ids = new_ids;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -273,6 +273,16 @@ fn dump_node(
|
||||
}
|
||||
}
|
||||
|
||||
// Check for required fields that are absent
|
||||
if let Some((schema, _, _)) = type_check {
|
||||
for (field_id, field_name) in schema.required_fields_for_kind(node.kind_name()) {
|
||||
if !node.fields.contains_key(&field_id) {
|
||||
let name = field_name.unwrap_or("child");
|
||||
writeln!(out, "{prefix} <-- ERROR: missing required field '{name}'").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unnamed children — skip unnamed tokens (keywords, punctuation)
|
||||
if let Some(children) = node.fields.get(&CHILD_FIELD) {
|
||||
let child_type_check = type_check.map(|(schema, _, _)| {
|
||||
|
||||
@@ -563,6 +563,15 @@ impl Node {
|
||||
NodeContent::DynamicString(s) => Some(s.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Read the child ids stored under a given field, or an empty slice if
|
||||
/// no such field is present on this node.
|
||||
pub fn field_children(&self, field_id: FieldId) -> &[Id] {
|
||||
self.fields
|
||||
.get(&field_id)
|
||||
.map(|v| v.as_slice())
|
||||
.unwrap_or(&[])
|
||||
}
|
||||
}
|
||||
|
||||
/// The contents of a node is either a range in the original source file,
|
||||
@@ -836,17 +845,9 @@ fn apply_one_shot_rules_inner(
|
||||
// pattern root): re-analyzing it would match the same rule
|
||||
// again indefinitely.
|
||||
if captured_id == id {
|
||||
return Ok(captured_id);
|
||||
return Ok(vec![captured_id]);
|
||||
}
|
||||
let result =
|
||||
apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)?;
|
||||
if result.len() != 1 {
|
||||
return Err(format!(
|
||||
"OneShot: recursion on captured node produced {} results, expected exactly 1",
|
||||
result.len()
|
||||
));
|
||||
}
|
||||
Ok(result[0])
|
||||
apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)
|
||||
})?;
|
||||
return Ok(rule.run_transform(ast, captures, id, fresh));
|
||||
}
|
||||
|
||||
@@ -314,6 +314,14 @@ fn apply_yaml_to_schema(
|
||||
node_types.sort_by(|a, b| a.kind.cmp(&b.kind).then(a.named.cmp(&b.named)));
|
||||
node_types.dedup_by(|a, b| a.kind == b.kind && a.named == b.named);
|
||||
schema.set_field_types(parent_kind, field_id, node_types);
|
||||
schema.set_field_cardinality(
|
||||
parent_kind,
|
||||
field_id,
|
||||
crate::schema::FieldCardinality {
|
||||
multiple: spec.multiple,
|
||||
required: spec.required,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -178,11 +178,15 @@ impl QueryListElem {
|
||||
let Some(child) = remaining_children.next() else {
|
||||
return Ok(false);
|
||||
};
|
||||
if skip_unnamed {
|
||||
let node = ast.get_node(child).unwrap();
|
||||
if !node.is_named() {
|
||||
continue;
|
||||
}
|
||||
let node = ast.get_node(child).unwrap();
|
||||
// Skip tree-sitter `extras` (e.g. comments) during
|
||||
// positional matching: they are conceptually invisible
|
||||
// between siblings, mirroring tree-sitter query semantics.
|
||||
if node.is_extra() {
|
||||
continue;
|
||||
}
|
||||
if skip_unnamed && !node.is_named() {
|
||||
continue;
|
||||
}
|
||||
let snapshot = matches.clone();
|
||||
if sub_query.do_match(ast, child, matches)? {
|
||||
|
||||
@@ -8,6 +8,15 @@ pub struct NodeType {
|
||||
pub named: bool,
|
||||
}
|
||||
|
||||
/// Multiplicity/optionality of a field declaration.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct FieldCardinality {
|
||||
/// Whether the field may hold more than one child.
|
||||
pub multiple: bool,
|
||||
/// Whether at least one child must be present.
|
||||
pub required: bool,
|
||||
}
|
||||
|
||||
/// A schema defining node kinds and field names for the output AST.
|
||||
/// Built from a node-types.yml file, independent of any tree-sitter grammar.
|
||||
///
|
||||
@@ -32,6 +41,7 @@ pub struct Schema {
|
||||
kind_names: BTreeMap<KindId, &'static str>,
|
||||
next_kind_id: KindId,
|
||||
field_types: BTreeMap<(String, FieldId), Vec<NodeType>>,
|
||||
field_cardinalities: BTreeMap<(String, FieldId), FieldCardinality>,
|
||||
supertypes: BTreeMap<String, Vec<NodeType>>,
|
||||
}
|
||||
|
||||
@@ -52,6 +62,7 @@ impl Schema {
|
||||
kind_names: BTreeMap::new(),
|
||||
next_kind_id: 1, // 0 is reserved
|
||||
field_types: BTreeMap::new(),
|
||||
field_cardinalities: BTreeMap::new(),
|
||||
supertypes: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
@@ -196,6 +207,42 @@ impl Schema {
|
||||
.get(&(parent_kind.to_string(), field_id))
|
||||
}
|
||||
|
||||
pub fn set_field_cardinality(
|
||||
&mut self,
|
||||
parent_kind: &str,
|
||||
field_id: FieldId,
|
||||
cardinality: FieldCardinality,
|
||||
) {
|
||||
self.field_cardinalities
|
||||
.insert((parent_kind.to_string(), field_id), cardinality);
|
||||
}
|
||||
|
||||
/// Returns the declared cardinality for a field, if known.
|
||||
pub fn field_cardinality(
|
||||
&self,
|
||||
parent_kind: &str,
|
||||
field_id: FieldId,
|
||||
) -> Option<FieldCardinality> {
|
||||
self.field_cardinalities
|
||||
.get(&(parent_kind.to_string(), field_id))
|
||||
.copied()
|
||||
}
|
||||
|
||||
/// Returns an iterator over all `(field_id, field_name)` pairs that are
|
||||
/// declared as required (`required: true`) for the given `parent_kind`.
|
||||
pub fn required_fields_for_kind<'a>(
|
||||
&'a self,
|
||||
parent_kind: &'a str,
|
||||
) -> impl Iterator<Item = (FieldId, Option<&'static str>)> + 'a {
|
||||
self.field_cardinalities
|
||||
.iter()
|
||||
.filter(move |((kind, _), card)| kind == parent_kind && card.required)
|
||||
.map(move |((_, field_id), _)| {
|
||||
let name = self.field_name_for_id(*field_id);
|
||||
(*field_id, name)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn set_supertype_members(&mut self, supertype: &str, node_types: Vec<NodeType>) {
|
||||
self.supertypes.insert(supertype.to_string(), node_types);
|
||||
}
|
||||
|
||||
@@ -274,6 +274,44 @@ fn test_query_no_match() {
|
||||
assert!(!matched);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_skips_extras_in_positional_match() {
|
||||
// Regression test: positional wildcards `(_)` must not bind to
|
||||
// tree-sitter `extras` (e.g. comments) during forward-scan; extras
|
||||
// are conceptually invisible between siblings, matching tree-sitter
|
||||
// query semantics. Without this, a later rule that translates a
|
||||
// captured comment to nothing (a common idiom, e.g.
|
||||
// `(comment) => ()` in Swift) leaves the capture's match-list empty
|
||||
// and causes the transform to fail with "Variable X has 0 matches".
|
||||
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
|
||||
let ast = runner.run("[1, # comment\n2]").unwrap();
|
||||
|
||||
// Navigate to the `array` node: program -> array.
|
||||
let mut cursor = AstCursor::new(&ast);
|
||||
cursor.goto_first_child();
|
||||
let array_id = cursor.node_id();
|
||||
assert_eq!(ast.get_node(array_id).unwrap().kind(), "array");
|
||||
|
||||
// Two positional wildcards should bind to the two integers, skipping
|
||||
// the comment that sits between them.
|
||||
let query = yeast::query!((array (_) @a (_) @b));
|
||||
let mut captures = yeast::captures::Captures::new();
|
||||
let matched = query.do_match(&ast, array_id, &mut captures).unwrap();
|
||||
assert!(matched);
|
||||
assert_eq!(
|
||||
ast.get_node(captures.get_var("a").unwrap())
|
||||
.unwrap()
|
||||
.kind(),
|
||||
"integer"
|
||||
);
|
||||
assert_eq!(
|
||||
ast.get_node(captures.get_var("b").unwrap())
|
||||
.unwrap()
|
||||
.kind(),
|
||||
"integer"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
|
||||
@@ -26,6 +26,13 @@ fn is_header_rule(line: &str) -> bool {
|
||||
trimmed.len() >= 3 && trimmed.chars().all(|c| c == '=')
|
||||
}
|
||||
|
||||
fn is_next_case_header(lines: &[&str], i: usize) -> bool {
|
||||
is_header_rule(lines[i])
|
||||
&& i + 2 < lines.len()
|
||||
&& !lines[i + 1].trim().is_empty()
|
||||
&& is_header_rule(lines[i + 2])
|
||||
}
|
||||
|
||||
fn parse_corpus(content: &str) -> Vec<CorpusCase> {
|
||||
let lines: Vec<&str> = content.lines().collect();
|
||||
let mut i = 0;
|
||||
@@ -58,48 +65,51 @@ fn parse_corpus(content: &str) -> Vec<CorpusCase> {
|
||||
|
||||
let input_start = i;
|
||||
while i < lines.len() && lines[i].trim() != "---" {
|
||||
if is_next_case_header(&lines, i) {
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
assert!(i < lines.len(), "Missing --- separator for case {name}");
|
||||
let input = lines[input_start..i].join("\n").trim_end().to_string();
|
||||
i += 1;
|
||||
|
||||
// Raw tree-sitter parse section. New-format files have a second
|
||||
// `---` separator between the raw tree and the mapped AST. Legacy
|
||||
// files (with only one separator) have no raw section — in that
|
||||
// case `raw` stays empty and update mode will populate it.
|
||||
let raw_start = i;
|
||||
let mut next_sep = i;
|
||||
while next_sep < lines.len() && lines[next_sep].trim() != "---" {
|
||||
if is_header_rule(lines[next_sep])
|
||||
&& next_sep + 2 < lines.len()
|
||||
&& !lines[next_sep + 1].trim().is_empty()
|
||||
&& is_header_rule(lines[next_sep + 2])
|
||||
{
|
||||
break;
|
||||
}
|
||||
next_sep += 1;
|
||||
}
|
||||
let raw = if next_sep < lines.len() && lines[next_sep].trim() == "---" {
|
||||
let raw_text = lines[raw_start..next_sep].join("\n").trim().to_string();
|
||||
i = next_sep + 1;
|
||||
raw_text
|
||||
let raw;
|
||||
let expected;
|
||||
if i >= lines.len() || lines[i].trim() != "---" {
|
||||
// No `---` separator before next case (or EOF). Treat the
|
||||
// remaining sections as empty.
|
||||
raw = String::new();
|
||||
expected = String::new();
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
let expected_start = i;
|
||||
while i < lines.len() {
|
||||
if is_header_rule(lines[i])
|
||||
&& i + 2 < lines.len()
|
||||
&& !lines[i + 1].trim().is_empty()
|
||||
&& is_header_rule(lines[i + 2])
|
||||
{
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
|
||||
// Raw tree-sitter parse section. New-format files have a second
|
||||
// `---` separator between the raw tree and the mapped AST. Legacy
|
||||
// files (with only one separator) have no raw section — in that
|
||||
// case `raw` stays empty and update mode will populate it.
|
||||
let raw_start = i;
|
||||
let mut next_sep = i;
|
||||
while next_sep < lines.len() && lines[next_sep].trim() != "---" {
|
||||
if is_next_case_header(&lines, next_sep) {
|
||||
break;
|
||||
}
|
||||
next_sep += 1;
|
||||
}
|
||||
raw = if next_sep < lines.len() && lines[next_sep].trim() == "---" {
|
||||
let raw_text = lines[raw_start..next_sep].join("\n").trim().to_string();
|
||||
i = next_sep + 1;
|
||||
raw_text
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
let expected_start = i;
|
||||
while i < lines.len() {
|
||||
if is_next_case_header(&lines, i) {
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
expected = lines[expected_start..i].join("\n").trim().to_string();
|
||||
}
|
||||
let expected = lines[expected_start..i].join("\n").trim().to_string();
|
||||
|
||||
cases.push(CorpusCase {
|
||||
name,
|
||||
|
||||
Reference in New Issue
Block a user