Merge pull request #21848 from asgerf/asgerf/swift-yeast

Unified: Add schema checking and corpus-style tests
This commit is contained in:
Asger F
2026-05-26 22:00:21 +02:00
committed by GitHub
36 changed files with 5469 additions and 4732 deletions

View File

@@ -330,7 +330,7 @@ pub fn extract(
if let Some(yeast_runner) = yeast_runner {
let ast = yeast_runner
.run_from_tree(&tree)
.run_from_tree(&tree, source)
.unwrap_or_else(|e| panic!("Desugaring failed for {path_str}: {e}"));
traverse_yeast(&ast, &mut visitor);
} else {

View File

@@ -115,8 +115,19 @@ pub fn generate(
&node_parent_table_name,
)),
ql::TopLevel::Class(ql_gen::create_token_class(&token_name, &tokeninfo_name)),
ql::TopLevel::Class(ql_gen::create_reserved_word_class(&reserved_word_name)),
];
// Only emit the ReservedWord class when there are actually unnamed token
// types in the schema (i.e., @{prefix}_reserved_word exists in the dbscheme).
// When converting from a YEAST YAML schema that has no unnamed tokens, this
// type is absent and referencing it would cause a QL compilation error.
let has_reserved_words = nodes
.values()
.any(|n| n.dbscheme_name == reserved_word_name);
if has_reserved_words {
body.push(ql::TopLevel::Class(ql_gen::create_reserved_word_class(
&reserved_word_name,
)));
}
// Overlay discard predicates
body.push(ql::TopLevel::Predicate(

View File

@@ -113,8 +113,24 @@ fn parse_query_node_inner(tokens: &mut Tokens) -> Result<TokenStream> {
/// appear in any order; bare patterns are accumulated and emitted as a
/// single `("child", ...)` entry.
fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
let mut fields = Vec::new();
// Accumulate per-field elems in declaration order; multiple uses of the
// same field name extend the same list (so e.g. `cond: (foo) cond: (bar)`
// matches a `cond` field whose first child is `foo` and second is `bar`).
let mut field_order: Vec<String> = Vec::new();
let mut field_elems: std::collections::HashMap<String, Vec<TokenStream>> =
std::collections::HashMap::new();
let mut bare_children: Vec<TokenStream> = Vec::new();
let push_field_elem = |order: &mut Vec<String>,
map: &mut std::collections::HashMap<String, Vec<TokenStream>>,
name: String,
elem: TokenStream| {
if !map.contains_key(&name) {
order.push(name.clone());
map.insert(name, vec![elem]);
} else {
map.get_mut(&name).unwrap().push(elem);
}
};
while tokens.peek().is_some() {
if peek_is_field(tokens) {
let field_name = expect_ident(tokens, "expected field name")?;
@@ -122,10 +138,40 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
expect_punct(tokens, ':', "expected `:` after field name")?;
let child = parse_query_node(tokens)?;
fields.push(quote! {
(#field_str, vec![yeast::query::QueryListElem::SingleNode(#child)])
});
// Parse the field's pattern. To support repetition like
// `field: (kind)* @cap`, parse the atom first, then check for
// a quantifier, and lastly handle a trailing `@capture`.
let atom = parse_query_atom(tokens)?;
if peek_is_repetition(tokens) {
let rep = expect_repetition(tokens)?;
let elem = quote! {
yeast::query::QueryListElem::Repeated {
children: vec![yeast::query::QueryListElem::SingleNode(#atom)],
rep: #rep,
}
};
let elem = maybe_wrap_list_capture(tokens, elem)?;
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
} else {
let child = if peek_is_at(tokens) {
tokens.next();
let capture_name =
expect_ident(tokens, "expected capture name after @")?;
let name_str = capture_name.to_string();
quote! {
yeast::query::QueryNode::Capture {
capture: #name_str,
node: Box::new(#atom),
}
}
} else {
atom
};
let elem = quote! {
yeast::query::QueryListElem::SingleNode(#child)
};
push_field_elem(&mut field_order, &mut field_elems, field_str, elem);
}
} else {
// Bare patterns — accumulate into the implicit `child` field.
// We don't break here, so we can interleave with named fields.
@@ -137,6 +183,13 @@ fn parse_query_fields(tokens: &mut Tokens) -> Result<Vec<TokenStream>> {
bare_children.extend(elems);
}
}
let mut fields: Vec<TokenStream> = Vec::new();
for name in field_order {
let elems = field_elems.remove(&name).unwrap();
fields.push(quote! {
(#name, vec![#(#elems),*])
});
}
if !bare_children.is_empty() {
fields.push(quote! {
("child", vec![#(#bare_children),*])
@@ -299,7 +352,7 @@ fn parse_direct_node(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStream> {
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => {
let group = expect_group(tokens, Delimiter::Brace)?;
let expr = group.stream();
Ok(quote! { #expr })
Ok(quote! { ::std::convert::Into::<usize>::into(#expr) })
}
Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Parenthesis => {
let group = expect_group(tokens, Delimiter::Parenthesis)?;
@@ -329,12 +382,17 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
return Ok(quote! { #ctx.literal(#kind_str, #lit) });
}
// Check for (kind #{expr}) — computed literal, expr converted via .to_string()
// Check for (kind #{expr}) — computed literal, expr converted via YeastDisplay
if peek_is_hash(tokens) {
tokens.next(); // consume #
let group = expect_group(tokens, Delimiter::Brace)?;
let expr = group.stream();
return Ok(quote! { #ctx.literal(#kind_str, &(#expr).to_string()) });
return Ok(quote! {
{
let __value = yeast::YeastDisplay::yeast_to_string(&(#expr), &*#ctx.ast);
#ctx.literal(#kind_str, &__value)
}
});
}
// Check for (kind $fresh)
@@ -374,7 +432,11 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
inner.next(); // consume first .
inner.next(); // consume second .
let expr: proc_macro2::TokenStream = inner.collect();
stmts.push(quote! { let #temp: Vec<usize> = #expr; });
stmts.push(quote! {
let #temp: Vec<usize> = (#expr).into_iter()
.map(::std::convert::Into::<usize>::into)
.collect();
});
field_args.push(quote! { (#field_str, #temp) });
continue;
}
@@ -382,7 +444,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
}
let value = parse_direct_node(tokens, ctx)?;
stmts.push(quote! { let #temp = #value; });
stmts.push(quote! { let #temp: usize = #value; });
field_args.push(quote! { (#field_str, vec![#temp]) });
}
@@ -427,10 +489,16 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
inner.next(); // consume first .
inner.next(); // consume second .
let expr: TokenStream = inner.collect();
items.push(quote! { __nodes.extend(#expr); });
items.push(quote! {
__nodes.extend(
(#expr).into_iter().map(::std::convert::Into::<usize>::into)
);
});
} else {
let expr = group.stream();
items.push(quote! { __nodes.push(#expr); });
items.push(quote! {
__nodes.push(::std::convert::Into::<usize>::into(#expr));
});
}
continue;
}
@@ -580,13 +648,24 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
let name_str = &cap.name;
match cap.multiplicity {
CaptureMultiplicity::Repeated => {
quote! { let #name: Vec<usize> = __captures.get_all(#name_str); }
quote! {
let #name: Vec<yeast::NodeRef> = __captures.get_all(#name_str)
.into_iter()
.map(yeast::NodeRef)
.collect();
}
}
CaptureMultiplicity::Optional => {
quote! { let #name: Option<usize> = __captures.get_opt(#name_str); }
quote! {
let #name: Option<yeast::NodeRef> =
__captures.get_opt(#name_str).map(yeast::NodeRef);
}
}
CaptureMultiplicity::Single => {
quote! { let #name: usize = __captures.get_var(#name_str).unwrap(); }
quote! {
let #name: yeast::NodeRef =
yeast::NodeRef(__captures.get_var(#name_str).unwrap());
}
}
}
})
@@ -613,19 +692,26 @@ pub fn parse_rule_top(input: TokenStream) -> Result<TokenStream> {
CaptureMultiplicity::Repeated => quote! {
let __field_id = #ctx_ident.ast.field_id_for_name(#name_str)
.unwrap_or_else(|| panic!("field '{}' not found", #name_str));
__fields.insert(__field_id, #name);
__fields.insert(
__field_id,
#name.into_iter()
.map(::std::convert::Into::<usize>::into)
.collect(),
);
},
CaptureMultiplicity::Optional => quote! {
let __field_id = #ctx_ident.ast.field_id_for_name(#name_str)
.unwrap_or_else(|| panic!("field '{}' not found", #name_str));
if let Some(__id) = #name {
__fields.entry(__field_id).or_insert_with(Vec::new).push(__id);
__fields.entry(__field_id).or_insert_with(Vec::new)
.push(::std::convert::Into::<usize>::into(__id));
}
},
CaptureMultiplicity::Single => quote! {
let __field_id = #ctx_ident.ast.field_id_for_name(#name_str)
.unwrap_or_else(|| panic!("field '{}' not found", #name_str));
__fields.entry(__field_id).or_insert_with(Vec::new).push(#name);
__fields.entry(__field_id).or_insert_with(Vec::new)
.push(::std::convert::Into::<usize>::into(#name));
},
}
})

View File

@@ -349,8 +349,8 @@ to enable rewriting:
```rust
let desugar = yeast::DesugaringConfig::new()
.add_phase("cleanup", cleanup_rules())
.add_phase("desugar", desugar_rules())
.add_phase("cleanup", yeast::PhaseKind::Repeating, cleanup_rules())
.add_phase("translate", yeast::PhaseKind::OneShot, translate_rules())
.with_output_node_types_yaml(include_str!("output-node-types.yml"));
let lang = simple::LanguageSpec {
@@ -365,6 +365,15 @@ let lang = simple::LanguageSpec {
A single-phase config is just `.add_phase(...)` called once. Phase names
appear in error messages so you can tell which phase failed.
There are two kinds of phases:
- **Repeating**:
Each node is re-processed until none of the rules in the phase matches.
When a node no longer matches any rules, its children are recursively processed. In practice this is used to desugar or simplify an AST, while staying mostly within the same schema.
- **One-shot**:
Each node is processed by the first matching rule, and the engine panics if no rule matches.
Rules are then recursively applied to every captured node.
In practice this is used when translating from one AST schema to another, where an exhaustive match is required.
The same YAML node-types is used for both the runtime yeast `Schema` (so
rules can refer to output-only kinds and fields) and TRAP validation (it
is converted to JSON internally).

View File

@@ -61,6 +61,21 @@ impl Captures {
}
}
}
/// Apply a fallible function to every captured id (across all keys),
/// replacing each id with the result. Stops and returns the error on
/// the first failure.
pub fn try_map_all_captures<E>(
&mut self,
mut f: impl FnMut(Id) -> Result<Id, E>,
) -> Result<(), E> {
for ids in self.captures.values_mut() {
for id in ids {
*id = f(*id)?;
}
}
Ok(())
}
pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) {
if let Some(from_ids) = self.captures.get(from) {
let new_values = from_ids.iter().copied().map(f).collect();

View File

@@ -1,6 +1,6 @@
use std::fmt::Write;
use crate::{Ast, Node, NodeContent, CHILD_FIELD};
use crate::{schema::Schema, Ast, Node, NodeContent, CHILD_FIELD};
/// Options for controlling AST dump output.
pub struct DumpOptions {
@@ -45,16 +45,143 @@ pub fn dump_ast_with_options(
options: &DumpOptions,
) -> String {
let mut out = String::new();
dump_node(ast, root, source, options, 0, &mut out);
dump_node(ast, root, source, options, 0, None, &mut out);
out
}
/// Dump an AST and annotate type mismatches against a schema inline.
///
/// Any node that does not match the expected type set for its parent field is
/// rendered with a trailing `" <-- ERROR: ..."` annotation on the same line.
pub fn dump_ast_with_type_errors(
ast: &Ast,
root: usize,
source: &str,
schema: &Schema,
) -> String {
dump_ast_with_type_errors_and_options(ast, root, source, schema, &DumpOptions::default())
}
/// Dump an AST and annotate type mismatches against a schema inline.
///
/// Any node that does not match the expected type set for its parent field is
/// rendered with a trailing `" <-- ERROR: ..."` annotation on the same line.
pub fn dump_ast_with_type_errors_and_options(
ast: &Ast,
root: usize,
source: &str,
schema: &Schema,
options: &DumpOptions,
) -> String {
let mut out = String::new();
dump_node(ast, root, source, options, 0, Some((schema, None, None)), &mut out);
out
}
fn format_node_types(node_types: &[crate::schema::NodeType]) -> String {
node_types
.iter()
.map(|t| {
if t.named {
t.kind.clone()
} else {
format!("\"{}\"", t.kind)
}
})
.collect::<Vec<_>>()
.join(" | ")
}
const EMPTY_NODE_TYPES: &[crate::schema::NodeType] = &[];
/// Generate a type-checking error message for a node if it doesn't match expected types.
///
/// # Arguments
/// - `schema`: The AST schema to validate against.
/// - `node`: The node being checked.
/// - `expected`: The set of allowed types for this node, or `None` if type-checking is disabled.
/// - `parent_field`: Optional tuple of (parent_kind, field_name) for context in error messages.
///
/// # Returns
/// `Some(error_message)` if the node violates the schema (e.g., wrong kind, missing field declaration).
/// `None` if the node matches the expected types or if type-checking is disabled.
fn type_error_for_node(
schema: &Schema,
node: &Node,
expected: Option<&[crate::schema::NodeType]>,
parent_field: Option<(&str, &str)>,
) -> Option<String> {
if schema.id_for_node_kind(node.kind_name()).is_none()
&& schema.id_for_unnamed_node_kind(node.kind_name()).is_none()
{
return Some(format!("node kind '{}' not in schema", node.kind_name()));
}
let expected = expected?;
if expected.is_empty() {
if let Some((kind, field)) = parent_field {
return Some(format!("the node '{kind}' has no field '{field}'"));
}
return Some("field not declared in schema for this parent node".to_string());
}
if schema.node_matches_types(node.kind_name(), node.is_named(), expected) {
None
} else {
let actual = if node.is_named() {
node.kind_name().to_string()
} else {
format!("\"{}\"", node.kind_name())
};
if let Some((kind, field)) = parent_field {
Some(format!(
"The field {}.{} should contain {}, but got {}",
kind,
field,
format_node_types(expected),
actual
))
} else {
Some(format!(
"expected {}, got {}",
format_node_types(expected),
actual
))
}
}
}
/// Look up the allowed types for a field in the schema.
///
/// # Arguments
/// - `schema`: The AST schema to query.
/// - `parent_kind`: The node kind of the parent that contains this field.
/// - `field_id`: The field ID within that parent node.
///
/// # Returns
/// `Some(&[NodeType])` if the field is declared in the schema and has type constraints.
/// `None` if the field is not declared or has no constraints (undeclared field).
fn expected_for_field<'a>(
schema: &'a Schema,
parent_kind: &str,
field_id: u16,
) -> Option<&'a [crate::schema::NodeType]> {
schema
.field_types(parent_kind, field_id)
.map(|v| v.as_slice())
}
fn dump_node(
ast: &Ast,
id: usize,
source: &str,
options: &DumpOptions,
indent: usize,
type_check: Option<(
&Schema,
Option<&[crate::schema::NodeType]>,
Option<(&str, &str)>,
)>,
out: &mut String,
) {
let node = match ast.get_node(id) {
@@ -90,6 +217,12 @@ fn dump_node(
}
}
if let Some((schema, expected, parent_field)) = type_check {
if let Some(err) = type_error_for_node(schema, node, expected, parent_field) {
write!(out, " <-- ERROR: {err}").unwrap();
}
}
writeln!(out).unwrap();
// Named fields first
@@ -98,31 +231,68 @@ fn dump_node(
continue; // Handle unnamed children last
}
let field_name = ast.field_name_for_id(field_id).unwrap_or("?");
let child_type_check = type_check.map(|(schema, _, _)| {
let expected = expected_for_field(schema, node.kind_name(), field_id)
.or(Some(EMPTY_NODE_TYPES));
let parent_field = Some((node.kind_name(), field_name));
(schema, expected, parent_field)
});
if children.len() == 1 {
write!(out, "{prefix} {field_name}:").unwrap();
// Inline single child
let child = ast.get_node(children[0]);
if child.is_some_and(is_leaf) {
write!(out, " ").unwrap();
dump_node_inline(ast, children[0], source, options, out);
dump_node_inline(ast, children[0], source, options, child_type_check, out);
} else {
writeln!(out).unwrap();
dump_node(ast, children[0], source, options, indent + 2, out);
dump_node(
ast,
children[0],
source,
options,
indent + 2,
child_type_check,
out,
);
}
} else {
writeln!(out, "{prefix} {field_name}:").unwrap();
for &child_id in children {
dump_node(ast, child_id, source, options, indent + 2, out);
dump_node(
ast,
child_id,
source,
options,
indent + 2,
child_type_check,
out,
);
}
}
}
// Unnamed children — skip unnamed tokens (keywords, punctuation)
if let Some(children) = node.fields.get(&CHILD_FIELD) {
let child_type_check = type_check.map(|(schema, _, _)| {
let expected = expected_for_field(schema, node.kind_name(), CHILD_FIELD)
.or(Some(EMPTY_NODE_TYPES));
let parent_field = Some((node.kind_name(), "children"));
(schema, expected, parent_field)
});
for &child_id in children {
if let Some(child) = ast.get_node(child_id) {
if child.is_named() {
dump_node(ast, child_id, source, options, indent + 1, out);
dump_node(
ast,
child_id,
source,
options,
indent + 1,
child_type_check,
out,
);
}
}
}
@@ -130,7 +300,18 @@ fn dump_node(
}
/// Dump a leaf node inline (no newline prefix, caller provides context).
fn dump_node_inline(ast: &Ast, id: usize, source: &str, options: &DumpOptions, out: &mut String) {
fn dump_node_inline(
ast: &Ast,
id: usize,
source: &str,
options: &DumpOptions,
type_check: Option<(
&Schema,
Option<&[crate::schema::NodeType]>,
Option<(&str, &str)>,
)>,
out: &mut String,
) {
let node = match ast.get_node(id) {
Some(n) => n,
None => return,
@@ -159,6 +340,12 @@ fn dump_node_inline(ast: &Ast, id: usize, source: &str, options: &DumpOptions, o
}
}
if let Some((schema, expected, parent_field)) = type_check {
if let Some(err) = type_error_for_node(schema, node, expected, parent_field) {
write!(out, " <-- ERROR: {err}").unwrap();
}
}
writeln!(out).unwrap();
}

View File

@@ -23,12 +23,73 @@ pub use cursor::Cursor;
use query::QueryNode;
/// Node ids are indexes into the arena
type Id = usize;
pub type Id = usize;
/// Field and Kind ids are provided by tree-sitter
type FieldId = u16;
type KindId = u16;
/// A typed reference to a node in an [`Ast`] arena. Wraps an [`Id`] but
/// deliberately does not implement [`std::fmt::Display`]: rendering a node
/// requires the [`Ast`] it lives in (to resolve [`NodeContent::Range`] back
/// to source text). Use [`YeastDisplay::yeast_to_string`] to format it.
#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
pub struct NodeRef(pub Id);
impl NodeRef {
pub fn id(self) -> Id {
self.0
}
}
impl From<NodeRef> for Id {
fn from(value: NodeRef) -> Self {
value.0
}
}
/// Like [`std::fmt::Display`], but the formatting routine is given access to
/// the [`Ast`] so that node references can resolve to their source text.
///
/// All standard primitive and string types implement [`YeastDisplay`] via
/// the [`impl_yeast_display_via_display`] macro below. Coherence prevents a
/// blanket `impl<T: Display>`, so additional types must be added explicitly.
pub trait YeastDisplay {
fn yeast_to_string(&self, ast: &Ast) -> String;
}
impl YeastDisplay for NodeRef {
fn yeast_to_string(&self, ast: &Ast) -> String {
ast.source_text(self.0)
}
}
macro_rules! impl_yeast_display_via_display {
($($t:ty),* $(,)?) => {
$(
impl YeastDisplay for $t {
fn yeast_to_string(&self, _ast: &Ast) -> String {
::std::string::ToString::to_string(self)
}
}
)*
};
}
impl_yeast_display_via_display! {
i8, i16, i32, i64, i128, isize,
u8, u16, u32, u64, u128, usize,
f32, f64,
bool, char,
str, String,
}
impl<T: YeastDisplay + ?Sized> YeastDisplay for &T {
fn yeast_to_string(&self, ast: &Ast) -> String {
(**self).yeast_to_string(ast)
}
}
pub const CHILD_FIELD: u16 = u16::MAX;
#[derive(Debug)]
@@ -160,6 +221,9 @@ pub struct Ast {
root: Id,
nodes: Vec<Node>,
schema: schema::Schema,
/// Original source bytes the tree was parsed from. Used to resolve
/// `NodeContent::Range` to text for synthesized literal nodes.
source: Vec<u8>,
}
impl std::fmt::Debug for Ast {
@@ -182,21 +246,93 @@ impl Ast {
schema: schema::Schema,
tree: &tree_sitter::Tree,
language: &tree_sitter::Language,
) -> Self {
Self::from_tree_with_schema_and_source(schema, tree, language, Vec::new())
}
pub fn from_tree_with_schema_and_source(
schema: schema::Schema,
tree: &tree_sitter::Tree,
language: &tree_sitter::Language,
source: Vec<u8>,
) -> Self {
let mut visitor = visitor::Visitor::new(language.clone());
visitor.visit(tree);
visitor.build_with_schema(schema)
let mut ast = visitor.build_with_schema(schema);
ast.source = source;
ast
}
/// Returns the source text for `id`, resolving `NodeContent::Range`
/// against the stored source bytes when available.
pub fn source_text(&self, id: Id) -> String {
let Some(node) = self.get_node(id) else { return String::new(); };
let read_range = |range: &tree_sitter::Range| {
let start = range.start_byte;
let end = range.end_byte;
if end <= self.source.len() && start <= end {
String::from_utf8_lossy(&self.source[start..end]).into_owned()
} else {
String::new()
}
};
match &node.content {
NodeContent::Range(range) => read_range(range),
NodeContent::String(s) => s.to_string(),
NodeContent::DynamicString(s) if !s.is_empty() => s.clone(),
// Synthesized nodes (from rule transforms) carry an empty
// `DynamicString`; resolve them against the inherited source
// range so `#{capture}` after a translation still yields the
// original source text.
NodeContent::DynamicString(_) => match node.source_range {
Some(range) => read_range(&range),
None => String::new(),
},
}
}
pub fn walk(&self) -> AstCursor {
AstCursor::new(self)
}
/// Return all nodes currently allocated in the AST arena.
///
/// This includes nodes that are no longer reachable from `get_root()`
/// after desugaring rewrites. Use `reachable_node_ids()` for output-level
/// validation/traversal semantics.
pub fn nodes(&self) -> &[Node] {
&self.nodes
}
/// Return node ids reachable from `get_root()` by following child edges.
///
/// This reflects the effective AST after desugaring and excludes orphaned
/// arena nodes left behind by rewrite operations.
pub fn reachable_node_ids(&self) -> Vec<usize> {
let mut reachable = Vec::new();
let mut stack = vec![self.root];
let mut seen = vec![false; self.nodes.len()];
while let Some(id) = stack.pop() {
if id >= self.nodes.len() || seen[id] {
continue;
}
seen[id] = true;
reachable.push(id);
if let Some(node) = self.get_node(id) {
for children in node.fields.values() {
for &child in children {
stack.push(child);
}
}
}
}
reachable
}
pub fn get_root(&self) -> Id {
self.root
}
@@ -493,18 +629,39 @@ impl Rule {
node: Id,
fresh: &tree_builder::FreshScope,
) -> Result<Option<Vec<Id>>, String> {
match self.try_match(ast, node)? {
Some(captures) => Ok(Some(self.run_transform(ast, captures, node, fresh))),
None => Ok(None),
}
}
/// Attempt to match this rule's query against `node`, returning the
/// resulting captures on success. Does not invoke the transform.
fn try_match(&self, ast: &Ast, node: Id) -> Result<Option<Captures>, String> {
let mut captures = Captures::new();
if self.query.do_match(ast, node, &mut captures)? {
fresh.next_scope();
let source_range = ast.get_node(node).and_then(|n| match n.content {
NodeContent::Range(r) => Some(r),
_ => n.source_range,
});
Ok(Some((self.transform)(ast, captures, fresh, source_range)))
Ok(Some(captures))
} else {
Ok(None)
}
}
/// Run this rule's transform with the given captures, using `node`'s
/// source range as the source range of the produced nodes.
fn run_transform(
&self,
ast: &mut Ast,
captures: Captures,
node: Id,
fresh: &tree_builder::FreshScope,
) -> Vec<Id> {
fresh.next_scope();
let source_range = ast.get_node(node).and_then(|n| match n.content {
NodeContent::Range(r) => Some(r),
_ => n.source_range,
});
(self.transform)(ast, captures, fresh, source_range)
}
}
const MAX_REWRITE_DEPTH: usize = 100;
@@ -539,17 +696,17 @@ impl<'a> RuleIndex<'a> {
}
}
fn apply_rules(
fn apply_repeating_rules(
rules: &[Rule],
ast: &mut Ast,
id: Id,
fresh: &tree_builder::FreshScope,
) -> Result<Vec<Id>, String> {
let index = RuleIndex::new(rules);
apply_rules_inner(&index, ast, id, fresh, 0, None)
apply_repeating_rules_inner(&index, ast, id, fresh, 0, None)
}
fn apply_rules_inner(
fn apply_repeating_rules_inner(
index: &RuleIndex,
ast: &mut Ast,
id: Id,
@@ -578,7 +735,7 @@ fn apply_rules_inner(
let next_skip = if rule.repeated { None } else { Some(rule_ptr) };
let mut results = Vec::new();
for node in result_node {
results.extend(apply_rules_inner(
results.extend(apply_repeating_rules_inner(
index,
ast,
node,
@@ -603,7 +760,7 @@ fn apply_rules_inner(
for children in fields.values_mut() {
let mut new_children: Option<Vec<Id>> = None;
for (i, &child_id) in children.iter().enumerate() {
let result = apply_rules_inner(index, ast, child_id, fresh, rewrite_depth, None)?;
let result = apply_repeating_rules_inner(index, ast, child_id, fresh, rewrite_depth, None)?;
let unchanged = result.len() == 1 && result[0] == child_id;
match (&mut new_children, unchanged) {
(None, true) => {} // unchanged so far, no allocation needed
@@ -628,6 +785,92 @@ fn apply_rules_inner(
Ok(vec![id])
}
/// Apply rules using `OneShot` semantics: the first matching rule fires on
/// each visited node, recursion proceeds only through captured nodes (not
/// through the input node's children directly), and an error is returned if
/// no rule matches a visited node.
fn apply_one_shot_rules(
rules: &[Rule],
ast: &mut Ast,
id: Id,
fresh: &tree_builder::FreshScope,
) -> Result<Vec<Id>, String> {
let index = RuleIndex::new(rules);
apply_one_shot_rules_inner(&index, ast, id, fresh, 0)
}
fn apply_one_shot_rules_inner(
index: &RuleIndex,
ast: &mut Ast,
id: Id,
fresh: &tree_builder::FreshScope,
rewrite_depth: usize,
) -> Result<Vec<Id>, String> {
if rewrite_depth > MAX_REWRITE_DEPTH {
return Err(format!(
"Desugaring exceeded maximum rewrite depth ({MAX_REWRITE_DEPTH}). \
This likely indicates a non-terminating rule cycle."
));
}
let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or("");
// Don't rewrite unnamed nodes (punctuation, keywords, etc.); leave them
// as-is. Rules target named nodes only.
if let Some(node) = ast.get_node(id) {
if !node.is_named() {
return Ok(vec![id]);
}
}
for rule in index.rules_for_kind(node_kind) {
if let Some(mut captures) = rule.try_match(ast, id)? {
// Recursively translate every captured node before invoking the
// transform. The transform's output uses output-schema kinds, so
// we must translate captured input-schema nodes to their
// output-schema equivalents first.
captures.try_map_all_captures(|captured_id| {
// Avoid infinite recursion when a capture refers to the root
// node of the matched tree (e.g. an `@_` capture on the
// pattern root): re-analyzing it would match the same rule
// again indefinitely.
if captured_id == id {
return Ok(captured_id);
}
let result =
apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)?;
if result.len() != 1 {
return Err(format!(
"OneShot: recursion on captured node produced {} results, expected exactly 1",
result.len()
));
}
Ok(result[0])
})?;
return Ok(rule.run_transform(ast, captures, id, fresh));
}
}
Err(format!(
"OneShot: no rule matched node of kind '{node_kind}'"
))
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum PhaseKind {
/// A node is re-processed until none of the rules in the phase matches,
/// albeit a single rule cannot be applied twice in a row unless that rule is also marked as repeating.
/// When a node no longer matches any rules, its children are recursively processed (top down).
Repeating,
/// A node is processed by the first matching rule, and the engine panics if no rule matches.
/// Rules are then recursively applied to every captured node.
/// In practice this is used when translating from one AST schema to another, where every node must be rewritten,
/// and it would be a type error to match the rule patterns (based on the input schema) against the output nodes (which conform to the output schema).
OneShot,
}
/// One phase of a desugaring pass: a named bundle of rules that runs to
/// completion (a full traversal applying its rules) before the next phase
/// starts. Rules within a phase compete for matches as usual; rules in
@@ -637,13 +880,15 @@ pub struct Phase {
/// Name used in error messages.
pub name: String,
pub rules: Vec<Rule>,
pub kind: PhaseKind,
}
impl Phase {
pub fn new(name: impl Into<String>, rules: Vec<Rule>) -> Self {
pub fn new(name: impl Into<String>, kind: PhaseKind, rules: Vec<Rule>) -> Self {
Self {
name: name.into(),
rules,
kind,
}
}
}
@@ -661,8 +906,8 @@ impl Phase {
///
/// ```ignore
/// let config = yeast::DesugaringConfig::new()
/// .add_phase("cleanup", cleanup_rules)
/// .add_phase("desugar", desugar_rules)
/// .add_phase("cleanup", PhaseKind::Repeating, cleanup_rules)
/// .add_phase("desugar", PhaseKind::Repeating, desugar_rules)
/// .with_output_node_types_yaml(yaml);
/// ```
#[derive(Default)]
@@ -682,9 +927,14 @@ impl DesugaringConfig {
Self::default()
}
/// Append a new phase with the given name and rules.
pub fn add_phase(mut self, name: impl Into<String>, rules: Vec<Rule>) -> Self {
self.phases.push(Phase::new(name, rules));
/// Append a new phase with the given name, kind, and rules.
pub fn add_phase(
mut self,
name: impl Into<String>,
kind: PhaseKind,
rules: Vec<Rule>,
) -> Self {
self.phases.push(Phase::new(name, kind, rules));
self
}
@@ -747,8 +997,17 @@ impl<'a> Runner<'a> {
})
}
pub fn run_from_tree(&self, tree: &tree_sitter::Tree) -> Result<Ast, String> {
let mut ast = Ast::from_tree_with_schema(self.schema.clone(), tree, &self.language);
pub fn run_from_tree(
&self,
tree: &tree_sitter::Tree,
source: &[u8],
) -> Result<Ast, String> {
let mut ast = Ast::from_tree_with_schema_and_source(
self.schema.clone(),
tree,
&self.language,
source.to_vec(),
);
self.run_phases(&mut ast)?;
Ok(ast)
}
@@ -761,7 +1020,12 @@ impl<'a> Runner<'a> {
let tree = parser
.parse(input, None)
.ok_or_else(|| "Failed to parse input".to_string())?;
let mut ast = Ast::from_tree_with_schema(self.schema.clone(), &tree, &self.language);
let mut ast = Ast::from_tree_with_schema_and_source(
self.schema.clone(),
&tree,
&self.language,
input.as_bytes().to_vec(),
);
self.run_phases(&mut ast)?;
Ok(ast)
}
@@ -773,8 +1037,11 @@ impl<'a> Runner<'a> {
let fresh = tree_builder::FreshScope::new();
let mut root = ast.get_root();
for phase in self.phases {
let res = apply_rules(&phase.rules, ast, root, &fresh)
.map_err(|e| format!("Phase `{}`: {e}", phase.name))?;
let res = match phase.kind {
PhaseKind::Repeating => apply_repeating_rules(&phase.rules, ast, root, &fresh),
PhaseKind::OneShot => apply_one_shot_rules(&phase.rules, ast, root, &fresh),
}
.map_err(|e| format!("Phase `{}`: {e}", phase.name))?;
if res.len() != 1 {
return Err(format!(
"Phase `{}`: expected exactly one result node, got {}",

View File

@@ -23,6 +23,7 @@
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Write;
use crate::CHILD_FIELD;
use serde::Deserialize;
use serde_json::json;
@@ -100,30 +101,36 @@ fn parse_field_name(raw: &str) -> FieldSpec {
/// Resolve a TypeRef to a (type, named) pair, given the sets of known named
/// and unnamed types.
fn resolve_type_ref_pair(
type_ref: &TypeRef,
named_types: &BTreeSet<String>,
unnamed_types: &BTreeSet<String>,
) -> (String, bool) {
match type_ref {
TypeRef::Explicit { unnamed } => (unnamed.clone(), false),
TypeRef::Name(name) => {
let is_named = named_types.contains(name);
let is_unnamed = unnamed_types.contains(name);
if is_named && is_unnamed {
(name.clone(), true)
} else if is_unnamed {
(name.clone(), false)
} else {
(name.clone(), true)
}
}
}
}
/// Resolve a TypeRef to a {type, named} JSON record, given the sets of known named
/// and unnamed types.
fn resolve_type_ref(
type_ref: &TypeRef,
named_types: &BTreeSet<String>,
unnamed_types: &BTreeSet<String>,
) -> serde_json::Value {
match type_ref {
TypeRef::Explicit { unnamed } => {
json!({"type": unnamed, "named": false})
}
TypeRef::Name(name) => {
let is_named = named_types.contains(name);
let is_unnamed = unnamed_types.contains(name);
if is_named && is_unnamed {
// Ambiguous: default to named
json!({"type": name, "named": true})
} else if is_unnamed {
json!({"type": name, "named": false})
} else {
// Named, or unknown (assume named)
json!({"type": name, "named": true})
}
}
}
let (kind, named) = resolve_type_ref_pair(type_ref, named_types, unnamed_types);
json!({"type": kind, "named": named})
}
/// Convert YAML string to node-types JSON string.
@@ -233,14 +240,12 @@ pub fn convert(yaml_input: &str) -> Result<String, String> {
serde_json::to_string_pretty(&output).map_err(|e| format!("Failed to serialize JSON: {e}"))
}
/// Build a Schema from a YAML node-types string.
/// Registers all node kinds and field names found in the YAML.
pub fn schema_from_yaml(yaml_input: &str) -> Result<crate::schema::Schema, String> {
let yaml: YamlNodeTypes =
serde_yaml::from_str(yaml_input).map_err(|e| format!("Failed to parse YAML: {e}"))?;
let mut schema = crate::schema::Schema::new();
/// Apply YAML node-type definitions to a mutable Schema.
/// Registers all types, fields, and allowed types from the YAML into the schema.
fn apply_yaml_to_schema(
yaml: &YamlNodeTypes,
schema: &mut crate::schema::Schema,
) {
// Register all supertypes as node kinds
for name in yaml.supertypes.keys() {
schema.register_kind(name);
@@ -264,6 +269,62 @@ pub fn schema_from_yaml(yaml_input: &str) -> Result<crate::schema::Schema, Strin
schema.register_unnamed_kind(name);
}
let mut named_types = BTreeSet::new();
for name in yaml.supertypes.keys() {
named_types.insert(name.clone());
}
for name in yaml.named.keys() {
named_types.insert(name.clone());
}
let unnamed_types: BTreeSet<String> = yaml.unnamed.iter().cloned().collect();
for (supertype, members) in &yaml.supertypes {
let node_types = members
.iter()
.map(|m| {
let (kind, named) = resolve_type_ref_pair(m, &named_types, &unnamed_types);
crate::schema::NodeType { kind, named }
})
.collect();
schema.set_supertype_members(supertype, node_types);
}
// Register allowed field child types for type checking.
for (parent_kind, fields_opt) in &yaml.named {
let Some(fields) = fields_opt else {
continue;
};
for (raw_field_name, type_refs) in fields {
let spec = parse_field_name(raw_field_name);
let field_id = match &spec.name {
Some(name) => schema.register_field(name),
None => CHILD_FIELD,
};
let mut node_types = type_refs
.clone()
.into_vec()
.into_iter()
.map(|type_ref| {
let (kind, named) = resolve_type_ref_pair(&type_ref, &named_types, &unnamed_types);
crate::schema::NodeType { kind, named }
})
.collect::<Vec<_>>();
node_types.sort_by(|a, b| a.kind.cmp(&b.kind).then(a.named.cmp(&b.named)));
node_types.dedup_by(|a, b| a.kind == b.kind && a.named == b.named);
schema.set_field_types(parent_kind, field_id, node_types);
}
}
}
pub fn schema_from_yaml(yaml_input: &str) -> Result<crate::schema::Schema, String> {
let yaml: YamlNodeTypes =
serde_yaml::from_str(yaml_input).map_err(|e| format!("Failed to parse YAML: {e}"))?;
let mut schema = crate::schema::Schema::new();
apply_yaml_to_schema(&yaml, &mut schema);
Ok(schema)
}
@@ -278,29 +339,7 @@ pub fn schema_from_yaml_with_language(
serde_yaml::from_str(yaml_input).map_err(|e| format!("Failed to parse YAML: {e}"))?;
let mut schema = crate::schema::Schema::from_language(language);
// Register supertypes
for name in yaml.supertypes.keys() {
schema.register_kind(name);
}
// Register named node kinds and their fields
for (name, fields_opt) in &yaml.named {
schema.register_kind(name);
if let Some(fields) = fields_opt {
for raw_field_name in fields.keys() {
let spec = parse_field_name(raw_field_name);
if let Some(field_name) = &spec.name {
schema.register_field(field_name);
}
}
}
}
// Register unnamed tokens
for name in &yaml.unnamed {
schema.register_unnamed_kind(name);
}
apply_yaml_to_schema(&yaml, &mut schema);
Ok(schema)
}

View File

@@ -1,7 +1,13 @@
use std::collections::BTreeMap;
use std::collections::{BTreeMap, BTreeSet};
use crate::{FieldId, KindId, CHILD_FIELD};
#[derive(Clone, Debug)]
pub struct NodeType {
pub kind: String,
pub named: bool,
}
/// A schema defining node kinds and field names for the output AST.
/// Built from a node-types.yml file, independent of any tree-sitter grammar.
///
@@ -25,6 +31,8 @@ pub struct Schema {
unnamed_kind_ids: BTreeMap<String, KindId>,
kind_names: BTreeMap<KindId, &'static str>,
next_kind_id: KindId,
field_types: BTreeMap<(String, FieldId), Vec<NodeType>>,
supertypes: BTreeMap<String, Vec<NodeType>>,
}
impl Default for Schema {
@@ -43,6 +51,8 @@ impl Schema {
unnamed_kind_ids: BTreeMap::new(),
kind_names: BTreeMap::new(),
next_kind_id: 1, // 0 is reserved
field_types: BTreeMap::new(),
supertypes: BTreeMap::new(),
}
}
@@ -166,4 +176,68 @@ impl Schema {
pub fn node_kind_for_id(&self, id: KindId) -> Option<&'static str> {
self.kind_names.get(&id).copied()
}
pub fn set_field_types(
&mut self,
parent_kind: &str,
field_id: FieldId,
node_types: Vec<NodeType>,
) {
self.field_types
.insert((parent_kind.to_string(), field_id), node_types);
}
pub fn field_types(
&self,
parent_kind: &str,
field_id: FieldId,
) -> Option<&Vec<NodeType>> {
self.field_types
.get(&(parent_kind.to_string(), field_id))
}
pub fn set_supertype_members(&mut self, supertype: &str, node_types: Vec<NodeType>) {
self.supertypes.insert(supertype.to_string(), node_types);
}
fn allows_node(
&self,
node_type: &NodeType,
node_kind: &str,
node_named: bool,
active: &mut BTreeSet<String>,
) -> bool {
if node_type.kind == node_kind && node_type.named == node_named {
return true;
}
if !node_type.named {
return false;
}
let Some(members) = self.supertypes.get(&node_type.kind) else {
return false;
};
if !active.insert(node_type.kind.clone()) {
return false;
}
let matched = members
.iter()
.any(|member| self.allows_node(member, node_kind, node_named, active));
active.remove(&node_type.kind);
matched
}
pub fn node_matches_types(
&self,
node_kind: &str,
node_named: bool,
node_types: &[NodeType],
) -> bool {
node_types.iter().any(|node_type| {
self.allows_node(node_type, node_kind, node_named, &mut BTreeSet::new())
})
}
}

View File

@@ -52,6 +52,7 @@ impl Visitor {
root: 0,
schema,
nodes: self.nodes.into_iter().map(|n| n.inner).collect(),
source: Vec::new(),
}
}

View File

@@ -1,6 +1,6 @@
#![cfg(test)]
use yeast::dump::dump_ast;
use yeast::dump::{dump_ast, dump_ast_with_type_errors};
use yeast::*;
const OUTPUT_SCHEMA_YAML: &str = include_str!("node-types.yml");
@@ -15,7 +15,7 @@ fn parse_and_dump(input: &str) -> String {
/// Helper: parse Ruby source with a custom output schema and a single
/// phase of rules, return dump.
fn run_and_dump(input: &str, rules: Vec<Rule>) -> String {
run_phased_and_dump(input, vec![Phase::new("test", rules)])
run_phased_and_dump(input, vec![Phase::new("test", PhaseKind::Repeating, rules)])
}
/// Helper: parse Ruby source with a custom output schema and multiple
@@ -35,13 +35,42 @@ fn run_and_get_error(input: &str, rules: Vec<Rule>) -> String {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases = vec![Phase::new("test", rules)];
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
runner
.run(input)
.expect_err("expected runner to return an error")
}
/// Helper: parse Ruby source with no rules and dump with schema type errors.
fn parse_and_dump_typed(input: &str, schema_yaml: &str) -> String {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
let ast = runner.run(input).unwrap();
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
}
/// Helper: parse Ruby source with no rules and dump with schema type errors,
/// building schema with language IDs so field checks align with parser fields.
fn parse_and_dump_typed_with_language(input: &str, schema_yaml: &str) -> String {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let runner = Runner::new(lang.clone(), &[]);
let ast = runner.run(input).unwrap();
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(schema_yaml, &lang)
.unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
}
/// Helper: parse Ruby source with custom rules and dump with schema type errors.
fn run_and_dump_typed(input: &str, rules: Vec<Rule>, schema_yaml: &str) -> String {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema = yeast::node_types_yaml::schema_from_yaml(schema_yaml).unwrap();
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let ast = runner.run(input).unwrap();
dump_ast_with_type_errors(&ast, ast.get_root(), input, &schema)
}
/// Assert that a dump equals the expected string, treating the expected
/// string as an indented multiline literal: leading/trailing blank lines
/// are stripped, and the common leading indentation is removed from every
@@ -125,6 +154,85 @@ fn test_parse_for_loop() {
);
}
#[test]
fn test_dump_highlights_type_errors_inline() {
let schema_yaml = r#"
named:
program:
$children*: assignment
assignment:
left: identifier
right: identifier
identifier:
"#;
let dump = parse_and_dump_typed("x = 1", schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
}
#[test]
fn test_dump_reports_preserved_unknown_kind_after_transformation() {
let schema_yaml = r#"
named:
program:
$children*: assignment
assignment:
left: identifier
right: identifier
identifier:
"#;
// This rewrite runs and preserves the RHS node kind via capture.
// With schema above, preserving `integer` should be reported inline.
let rules = vec![yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
(assignment
left: {left}
right: {right}
)
)];
let dump = run_and_dump_typed("x = 1", rules, schema_yaml);
assert!(dump.contains("integer \"1\" <-- ERROR:"));
assert!(dump.contains("node kind 'integer' not in schema"));
}
#[test]
fn test_dump_reports_undeclared_field_on_node() {
let schema_yaml = r#"
named:
program:
$children*: assignment
assignment:
left: identifier
identifier:
"#;
let dump = parse_and_dump_typed_with_language("x = y", schema_yaml);
assert!(dump.contains("right: identifier \"y\" <-- ERROR:"));
assert!(dump.contains("the node 'assignment' has no field 'right'"));
}
#[test]
fn test_dump_reports_disallowed_kind_in_field_type() {
let schema_yaml = r#"
named:
program:
$children*: assignment
assignment:
left: identifier
right: identifier
identifier:
integer:
"#;
let dump = parse_and_dump_typed_with_language("x = 1", schema_yaml);
assert!(dump.contains("right: integer \"1\" <-- ERROR:"));
assert!(dump.contains("should contain"));
assert!(dump.contains("but got integer"));
}
// ---- Query tests ----
#[test]
@@ -166,6 +274,32 @@ fn test_query_no_match() {
assert!(!matched);
}
#[test]
fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema = yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang)
.unwrap();
let phases = vec![Phase::new(
"test",
PhaseKind::Repeating,
vec![yeast::rule!((integer) => (identifier "replaced"))],
)];
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let reachable_ids = ast.reachable_node_ids();
assert!(
ast.nodes().len() > reachable_ids.len(),
"expected rewrite to leave orphaned arena nodes"
);
let dump = dump_ast(&ast, ast.get_root(), input);
assert!(dump.contains("identifier \"replaced\""));
assert!(!dump.contains("integer \"1\""));
}
#[test]
fn test_query_repeated_capture() {
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
@@ -653,8 +787,8 @@ fn test_phased_desugaring() {
let dump = run_phased_and_dump(
"x = 1",
vec![
Phase::new("cleanup", cleanup),
Phase::new("desugar", desugar),
Phase::new("cleanup", PhaseKind::Repeating, cleanup),
Phase::new("desugar", PhaseKind::Repeating, desugar),
],
);
assert_dump_eq(
@@ -675,7 +809,11 @@ fn test_phase_error_includes_phase_name() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases = vec![Phase::new("buggy", vec![swap_assignment_rule().repeated()])];
let phases = vec![Phase::new(
"buggy",
PhaseKind::Repeating,
vec![swap_assignment_rule().repeated()],
)];
let runner = Runner::with_schema(lang, &schema, &phases);
let err = runner
.run("x = 1")
@@ -690,6 +828,168 @@ fn test_phase_error_includes_phase_name() {
);
}
/// Helper: an exhaustive set of OneShot rules covering every node reachable
/// (via captures) when translating `"x = 1"`.
fn one_shot_xeq1_rules() -> Vec<Rule> {
vec![
yeast::rule!(
(program (_)* @stmts)
=>
(program stmt: {..stmts})
),
yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
(first_node left: {left} right: {right})
),
yeast::rule!((identifier) => (identifier "ID")),
yeast::rule!((integer) => (integer "INT")),
]
}
#[test]
fn test_one_shot_phase() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let phases = vec![Phase::new(
"translate",
PhaseKind::OneShot,
one_shot_xeq1_rules(),
)];
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let dump = dump_ast(&ast, ast.get_root(), input);
assert_dump_eq(
&dump,
r#"
program
stmt:
first_node
left: identifier "ID"
right: integer "INT"
"#,
);
}
#[test]
fn test_one_shot_phase_errors_when_no_rule_matches() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
// Drop the `integer` rule so the recursion has no rule for `integer`.
let mut rules = one_shot_xeq1_rules();
rules.pop();
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let err = runner
.run("x = 1")
.expect_err("expected OneShot to error on unmatched node");
assert!(
err.contains("Phase `translate`"),
"error should name the phase, got: {err}"
);
assert!(
err.contains("no rule matched") && err.contains("integer"),
"error should describe the unmatched node kind, got: {err}"
);
}
/// OneShot recursion must apply rules to *captured* nodes, even if the rule
/// returns a captured child verbatim. A buggy implementation that only
/// recurses into the children of the rule's output (rather than into the
/// captures) would leave the returned capture untransformed.
#[test]
fn test_one_shot_recurses_into_returned_capture() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let rules = vec![
yeast::rule!(
(program (_)* @stmts)
=>
(program stmt: {..stmts})
),
// Returns the captured `left` verbatim, discarding `right`.
yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
{left}
),
yeast::rule!((identifier) => (identifier "ID")),
yeast::rule!((integer) => (integer "INT")),
];
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let dump = dump_ast(&ast, ast.get_root(), input);
// `left` is an `identifier`; OneShot must apply the identifier rule to
// it before the assignment transform returns it verbatim.
assert_dump_eq(
&dump,
r#"
program
stmt: identifier "ID"
"#,
);
}
/// OneShot recursion must NOT descend into the children of the rule's output.
/// A rule may legitimately wrap a captured node in fresh output-schema nodes
/// that have no matching rule of their own (since rule patterns target the
/// input schema). Recursing into the output would erroneously try to find
/// rules for those wrapper kinds and fail.
#[test]
fn test_one_shot_does_not_recurse_into_wrapper_output() {
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
let schema =
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
let rules = vec![
yeast::rule!(
(program (_)* @stmts)
=>
(program stmt: {..stmts})
),
// Wraps `left` in nested `first_node`/`second_node` output kinds.
// Neither wrapper kind has a matching rule, so a buggy implementation
// that recurses into the wrapper's children would error.
yeast::rule!(
(assignment left: (_) @left right: (_) @right)
=>
(first_node
left: (second_node left: {left} right: {right})
right: {left}
)
),
yeast::rule!((identifier) => (identifier "ID")),
yeast::rule!((integer) => (integer "INT")),
];
let phases = vec![Phase::new("translate", PhaseKind::OneShot, rules)];
let runner = Runner::with_schema(lang, &schema, &phases);
let input = "x = 1";
let ast = runner.run(input).unwrap();
let dump = dump_ast(&ast, ast.get_root(), input);
assert_dump_eq(
&dump,
r#"
program
stmt:
first_node
left:
second_node
left: identifier "ID"
right: integer "INT"
right: identifier "ID"
"#,
);
}
// ---- Cursor tests ----
#[test]
@@ -760,3 +1060,54 @@ fn test_desugar_for_with_multiple_assignment() {
"#,
);
}
/// Regression test: `#{capture}` in a template must render the *source text*
/// of the captured node, not its arena `Id`. Previously, captures were bound
/// as `usize`, so `#{cap}` printed the integer id (e.g. `"3"`) via `Display`.
/// Captures are now bound as `NodeRef`, which has no `Display` impl and
/// resolves to the captured node's source text via `YeastDisplay`.
#[test]
fn test_hash_brace_renders_capture_source_text() {
let rule = rule!(
(call
method: (identifier) @name
receiver: (identifier) @recv
)
=>
(call
method: (identifier #{name})
receiver: (identifier #{recv})
arguments: (argument_list)
)
);
let dump = run_and_dump("foo.bar()", vec![rule]);
assert_dump_eq(
&dump,
r#"
program
call
arguments: argument_list "foo.bar()"
method: identifier "bar"
receiver: identifier "foo"
"#,
);
}
/// Regression test: non-`NodeRef` values in `#{expr}` still render via their
/// `Display` impl (covered by `YeastDisplay`'s blanket impls for primitives).
#[test]
fn test_hash_brace_renders_integer_expression() {
let rule = rule!(
(identifier) @_
=>
(identifier #{1 + 2})
);
let dump = run_and_dump("foo", vec![rule]);
assert_dump_eq(
&dump,
r#"
program
identifier "3"
"#,
);
}

View File

@@ -20,10 +20,15 @@ grammar source), run `scripts/regenerate-grammar.sh` to:
it shows the impact of a grammar tweak on the named node kinds, fields,
and child types in a form much easier to read than the raw JSON.
## Testing
- If you changed the extractor code, always rebuild it before running tests.
## Extractor Testing
- To run extractor tests, run `cargo test` in the `extractor` directory.
- To run all tests, run `codeql test run --search-path extractor-pack ql/test`
- Do not edit the printed ASTs in `extractor/test/corpus` directly. To regenerate the ASTs, run `scripts/update-corpus.sh`.
## CodeQL Testing
- If you changed the extractor code, always rebuild it before running CodeQL tests.
- To run all CodeQL tests, run `codeql test run --search-path extractor-pack ql/test`
- Do not edit `.expected` files manually. To update the expected output, pass `--learn` to the `codeql test run` command.

View File

@@ -0,0 +1,144 @@
supertypes:
expr:
- name_expr
- int_literal
- string_literal
- binary_expr
- unary_expr
- call_expr
- member_access_expr
- lambda_expr
- unsupported_node
stmt:
- empty_stmt
- block_stmt
- expr_stmt
- if_stmt
- variable_declaration_stmt
- guard_if_stmt
- unsupported_node
condition:
- expr_condition
- let_pattern_condition
- sequence_condition
- unsupported_node
pattern:
- var_pattern
- apply_pattern
- tuple_pattern
- ignore_pattern
- unsupported_node
named:
# Top-level is the root node, currently containing a list of expressions
top_level:
body*: [expr, stmt]
# An identifier used in the context of an expression
name_expr:
identifier: identifier
# An integer literal
int_literal:
# A string literal
string_literal:
# Application of a binary operator, such as `a + b`
binary_expr:
left: expr
operator: operator
right: expr
# Application of a unary operator, such as `!x`
unary_expr:
operand: expr
operator: operator
# A function or method call, such as `f(x)` or `obj.m(x)`. Method calls
# are represented as a call whose `function` is a `member_access_expr`.
call_expr:
function: expr
argument*: expr
# Member access, such as `obj.member`.
member_access_expr:
target: expr
member: identifier
lambda_expr:
parameter*: parameter
body: [expr, stmt]
# A parameter
parameter:
pattern: pattern
empty_stmt:
block_stmt:
body*: stmt
expr_stmt:
expr: expr
if_stmt:
condition: condition
then?: stmt
else?: stmt
variable_declaration_stmt:
variable_declarator+: variable_declarator
# A variable declaration, or assignment to a pattern.
# The initializer is optional (but typically only possible in combination with a simple variable pattern).
variable_declarator:
pattern: pattern
value?: expr
# Evaluate 'condition', and if false, execute 'else' which must break from the enclosing block scope (return, break, etc).
# Any variables bound by 'condition' will be in scope for the remainder of the enclosing block scope
# (which differs from how if_stmt works).
guard_if_stmt:
condition: condition
else: stmt
# Evaluates the given condition and interprets it as a boolean (by language conventions)
expr_condition:
expr: expr
# A series of statements that are executed before evaluating the trailing condition.
# Useful for languages where a conditional clause may be preceded by side-effecting
# syntactic elements (e.g. binding clauses) that don't themselves form a condition.
sequence_condition:
stmt*: stmt
condition: condition
# Evaluate 'expr' and match its result against 'pattern', and return true if it matches.
# Variables bound by the pattern will be in scope within the 'true' branch controlled by this condition.
let_pattern_condition:
pattern: pattern
value: expr
# A pattern matching anything, binding its value to the given variable
var_pattern:
identifier: identifier
# A pattern matching anything, binding no variables, usually using the syntax "_"
ignore_pattern:
# A pattern such as `Some(x)` where `Some` is the constructor and `x` is an argument
apply_pattern:
constructor: expr
argument*: pattern
# A tuple pattern such as `(a, b)` in `let (a, b) = pair`.
tuple_pattern:
element*: pattern
# An simple unqualified identifier token
identifier:
# A node that we don't yet translate
unsupported_node:
operator:

View File

@@ -3,9 +3,7 @@ use std::path::PathBuf;
use codeql_extractor::extractor::simple;
use codeql_extractor::trap;
#[path = "languages/swift/swift.rs"]
mod swift;
use crate::languages;
#[derive(Args)]
pub struct Options {
@@ -25,11 +23,17 @@ pub struct Options {
pub fn run(options: Options) -> std::io::Result<()> {
codeql_extractor::extractor::set_tracing_level("unified");
// The generated dbscheme/QL library uses the unified_* relation namespace.
// Keep per-language specs for parser/rules/file globs, but normalize the
// extraction table prefix so emitted TRAP relations match the dbscheme.
let mut languages = languages::all_language_specs();
for lang in &mut languages {
lang.prefix = "unified";
}
let extractor = simple::Extractor {
prefix: "unified".to_string(),
languages: vec![
swift::language_spec(),
],
languages,
trap_dir: options.output_dir,
trap_compression: trap::Compression::from_env("CODEQL_EXTRACTOR_UNIFIED_OPTION_TRAP_COMPRESSION"),
source_archive_dir: options.source_archive_dir,

View File

@@ -3,6 +3,8 @@ use std::path::PathBuf;
use codeql_extractor::generator::{generate, language::Language};
use crate::languages;
#[derive(Args)]
pub struct Options {
/// Path of the generated dbscheme file
@@ -17,10 +19,16 @@ pub struct Options {
pub fn run(options: Options) -> std::io::Result<()> {
codeql_extractor::extractor::set_tracing_level("unified");
// The QL-visible schema is the unified output AST, not the per-language
// input grammars. Pass it via `desugar.output_node_types_yaml` so the
// generator converts the YAML to JSON node-types.
let desugar = yeast::DesugaringConfig::new()
.with_output_node_types_yaml(languages::OUTPUT_AST_SCHEMA);
let languages = vec![Language {
name: "Swift".to_owned(),
node_types: tree_sitter_swift::NODE_TYPES,
desugar: None,
name: "Unified".to_owned(),
node_types: "", // unused: generator picks up output_node_types_yaml above
desugar: Some(desugar),
}];
generate(languages, options.dbscheme, options.library, "run unified/scripts/create-extractor-pack.sh")

View File

@@ -0,0 +1,11 @@
use codeql_extractor::extractor::simple;
#[path = "swift/swift.rs"]
mod swift;
/// Shared YEAST output AST schema for all languages.
pub(crate) const OUTPUT_AST_SCHEMA: &str = include_str!("../../ast_types.yml");
pub fn all_language_specs() -> Vec<simple::LanguageSpec> {
vec![swift::language_spec(OUTPUT_AST_SCHEMA)]
}

View File

@@ -1,18 +1,358 @@
use codeql_extractor::extractor::simple;
use yeast::{rule, DesugaringConfig};
use yeast::{build::BuildCtx, rule, DesugaringConfig, PhaseKind};
fn desugaring_rules() -> Vec<yeast::Rule> {
/// Names of output AST kinds that belong to the `expr` supertype. Kept in
/// sync with `ast_types.yml`. `unsupported_node` is intentionally omitted
/// because it is also a member of the `stmt` supertype.
const EXPR_KINDS: &[&str] = &[
"name_expr",
"int_literal",
"string_literal",
"binary_expr",
"unary_expr",
"call_expr",
"member_access_expr",
"lambda_expr",
];
/// If `id` is an `expr`, wrap it in `expr_stmt` so it can sit in a `stmt`
/// position; otherwise return it unchanged.
fn wrap_expr_in_stmt(ctx: &mut BuildCtx, id: usize) -> usize {
let kind = ctx.ast.get_node(id).map(|n| n.kind()).unwrap_or("");
if EXPR_KINDS.contains(&kind) {
yeast::tree!(ctx, (expr_stmt expr: {id}))
} else {
id
}
}
fn translation_rules() -> Vec<yeast::Rule> {
vec![
rule!(
(additive_expression)
(source_file (_)* @children)
=>
(simple_identifier "blah")
(top_level
body: {..children}
)
),
// ---- Binary expressions ----
// Swift's parser produces a different node kind for each operator
// family, but the field shape (`lhs` / `op` / `rhs`) is uniform, so
// each maps onto `binary_expr`.
rule!(
(additive_expression
lhs: (_) @left
op: _ @operator
rhs: (_) @right)
=>
(binary_expr
left: {left}
operator: (operator #{operator})
right: {right})
),
rule!(
(multiplicative_expression
lhs: (_) @left
op: _ @operator
rhs: (_) @right)
=>
(binary_expr
left: {left}
operator: (operator #{operator})
right: {right})
),
rule!(
(comparison_expression
lhs: (_) @left
op: _ @operator
rhs: (_) @right)
=>
(binary_expr
left: {left}
operator: (operator #{operator})
right: {right})
),
rule!(
(equality_expression
lhs: (_) @left
op: _ @operator
rhs: (_) @right)
=>
(binary_expr
left: {left}
operator: (operator #{operator})
right: {right})
),
rule!(
(conjunction_expression
lhs: (_) @left
op: _ @operator
rhs: (_) @right)
=>
(binary_expr
left: {left}
operator: (operator #{operator})
right: {right})
),
rule!(
(disjunction_expression
lhs: (_) @left
op: _ @operator
rhs: (_) @right)
=>
(binary_expr
left: {left}
operator: (operator #{operator})
right: {right})
),
rule!(
(nil_coalescing_expression
lhs: (_) @left
op: _ @operator
rhs: (_) @right)
=>
(binary_expr
left: {left}
operator: (operator #{operator})
right: {right})
),
rule!(
(range_expression
start: (_) @left
op: _ @operator
end: (_) @right)
=>
(binary_expr
left: {left}
operator: (operator #{operator})
right: {right})
),
// ---- Unary expressions ----
rule!(
(prefix_expression
operation: _ @operator
target: (_) @operand)
=>
(unary_expr
operand: {operand}
operator: (operator #{operator}))
),
// ---- Identifiers / name expressions ----
rule!(
(simple_identifier) @name
=>
(name_expr
identifier: (identifier #{name}))
),
// ---- Literals ----
rule!(
(integer_literal) @lit
=>
(int_literal #{lit})
),
// String literals: render the *raw* source text, including the
// surrounding quotes. Interpolations (e.g. `"hi \(x)"`) are not
// yet broken out into structured pieces \u2014 they show up as part
// of the literal's source text.
rule!(
(line_string_literal) @lit
=>
(string_literal #{lit})
), // ---- Lambdas / closures ----
// Map a `lambda_literal` whose body is a single statement to
// `lambda_expr`. Multi-statement bodies fall through to
// `unsupported_node` because `lambda_expr.body` is single-valued
// in the current `ast_types.yml`. Parameters from explicit-typed
// closures (`{ (x: Int) -> Int in ... }`) are not yet captured.
rule!(
(lambda_literal
(statements (_) @body))
=>
(lambda_expr
body: {body})
),
// ---- Block / statement wrapping ----
// A `(statements ...)` node corresponds to a brace-delimited block.
// Each child is mapped through translation; bare expression results
// get wrapped in `expr_stmt` so they fit the `body*: stmt` field.
rule!(
(statements (_)* @stmts)
=>
(block_stmt body: {..stmts.iter().copied().map(|n|
wrap_expr_in_stmt(&mut __yeast_ctx, n.into())
).collect::<Vec<usize>>()})
),
// ---- Calls and member access ----
// Member access, e.g. `obj.member`. The Swift parser wraps the
// member name as `(navigation_suffix suffix: (simple_identifier))`.
rule!(
(navigation_expression
target: (_) @target
suffix: (navigation_suffix
suffix: (simple_identifier) @member))
=>
(member_access_expr
target: {target}
member: (identifier #{member}))
),
// Function / method call. The callee is the first child of
// `call_expression`; the second is a `call_suffix` whose
// `value_arguments` (if present) hold the parenthesized args. A
// trailing closure (`call_suffix` with a `lambda_literal` child)
// is appended as a final argument.
rule!(
(call_expression
(_) @callee
(call_suffix
(value_arguments
(value_argument value: (_) @args)*)?
(lambda_literal)? @trailing))
=>
(call_expr
function: {callee}
argument: {..args}
argument: {..trailing}
)
),
// ---- Guard statement ----
// `guard let x = e else { ... }` — currently only handles the
// let-binding form. The Swift parser models the `let` keyword as a
// `value_binding_pattern` child of `condition`, followed by an
// unnamed `=` and the source expression.
rule!(
(guard_statement
bound_identifier: (simple_identifier) @id
condition: (value_binding_pattern)
condition: (_) @value
(else)
(statements) @else_branch)
=>
(guard_if_stmt
condition: (let_pattern_condition
pattern: (var_pattern identifier: (identifier #{id}))
value: {value})
else: {else_branch})
),
// ---- If statement ----
// if-let binding (with optional else branch). The Swift parser puts
// the bound name in `bound_identifier`, the `let` keyword as a
// `value_binding_pattern` child of `condition`, and the source
// expression as a separate child of `condition`.
rule!(
(if_statement
bound_identifier: (simple_identifier) @id
condition: (value_binding_pattern)
condition: (_) @value
(statements) @then
(else)
(_) @else_branch)
=>
(if_stmt
condition: (let_pattern_condition
pattern: (var_pattern identifier: (identifier #{id}))
value: {value})
then: {then}
else: {else_branch})
),
rule!(
(if_statement
bound_identifier: (simple_identifier) @id
condition: (value_binding_pattern)
condition: (_) @value
(statements) @then)
=>
(if_stmt
condition: (let_pattern_condition
pattern: (var_pattern identifier: (identifier #{id}))
value: {value})
then: {then})
),
// With explicit else branch (block or chained if).
rule!(
(if_statement
condition: (_) @cond
(statements) @then
(else)
(_) @else_branch)
=>
(if_stmt
condition: (expr_condition expr: {cond})
then: {then}
else: {else_branch})
),
// Without else branch.
rule!(
(if_statement
condition: (_) @cond
(statements) @then)
=>
(if_stmt
condition: (expr_condition expr: {cond})
then: {then})
), // ---- Patterns ----
// The Swift parser uses a `pattern` node with a `bound_identifier`
// field for simple bindings such as `let x = ...`.
rule!(
(pattern bound_identifier: (simple_identifier) @id)
=>
(var_pattern
identifier: (identifier #{id}))
),
// Inside tuple patterns, the inner `pattern` node holds a bare
// `simple_identifier` (with no `bound_identifier` field).
rule!(
(pattern (simple_identifier) @id)
=>
(var_pattern
identifier: (identifier #{id}))
),
// Tuple destructuring pattern, e.g. `let (a, b) = pair`. The parser
// emits a `pattern` node whose unnamed children are themselves
// `pattern` nodes.
rule!(
(pattern (pattern)+ @parts)
=>
(tuple_pattern element: {..parts})
),
// ---- Variable declarations ----
// Handles single (`let x = e`), multiple (`let x = 1, y = 2`),
// and uninitialized (`var x: T`) bindings.
rule!(
(property_declaration
name: (_)* @pats
value: (_)* @vals)
=>
(variable_declaration_stmt
variable_declarator: {..pats.iter().enumerate().map(|(i, &pat)| {
match vals.get(i).copied() {
Some(val) => yeast::tree!(
(variable_declarator
pattern: {pat}
value: {val})),
None => yeast::tree!(
(variable_declarator
pattern: {pat})),
}
})})
),
// ---- Fallbacks ----
rule!(
(_)
=>
(unsupported_node)
),
rule!(
_ @node
=>
{node}
),
]
}
pub fn language_spec() -> simple::LanguageSpec {
let desugar = DesugaringConfig::new().add_phase("desugar", desugaring_rules());
pub fn language_spec(desugared_ast_schema: &'static str) -> simple::LanguageSpec {
let desugar = DesugaringConfig::new()
.add_phase("translate", PhaseKind::OneShot, translation_rules())
.with_output_node_types_yaml(desugared_ast_schema);
simple::LanguageSpec {
prefix: "swift",
ts_language: tree_sitter_swift::LANGUAGE.into(),

View File

@@ -3,6 +3,7 @@ use clap::Parser;
mod autobuilder;
mod extractor;
mod generator;
mod languages;
#[derive(Parser)]
#[command(author, version, about)]

View File

@@ -0,0 +1,281 @@
===
Closure with explicit parameters
===
let f = { (x: Int) -> Int in x * 2 }
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "f"
value:
lambda_literal
type:
lambda_function_type
return_type:
type
name:
user_type
type_identifier "Int"
lambda_function_type_parameters
lambda_parameter
name: simple_identifier "x"
type:
type
name:
user_type
type_identifier "Int"
statements
multiplicative_expression
lhs: simple_identifier "x"
op: *
rhs: integer_literal "2"
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value:
lambda_expr
body:
binary_expr
operator: operator "*"
left:
name_expr
identifier: identifier "x"
right: int_literal "2"
pattern:
var_pattern
identifier: identifier "f"
===
Closure with shorthand parameters
===
let f = { $0 + $1 }
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "f"
value:
lambda_literal
statements
additive_expression
lhs: simple_identifier "$0"
op: +
rhs: simple_identifier "$1"
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value:
lambda_expr
body:
binary_expr
operator: operator "+"
left:
name_expr
identifier: identifier "$0"
right:
name_expr
identifier: identifier "$1"
pattern:
var_pattern
identifier: identifier "f"
===
Trailing closure
===
xs.map { $0 * 2 }
---
source_file
call_expression
navigation_expression
suffix:
navigation_suffix
suffix: simple_identifier "map"
target: simple_identifier "xs"
call_suffix
lambda_literal
statements
multiplicative_expression
lhs: simple_identifier "$0"
op: *
rhs: integer_literal "2"
---
top_level
body:
call_expr
argument:
lambda_expr
body:
binary_expr
operator: operator "*"
left:
name_expr
identifier: identifier "$0"
right: int_literal "2"
function:
member_access_expr
target:
name_expr
identifier: identifier "xs"
member: identifier "map"
===
Closure with capture list
===
let f = { [weak self] in self?.doThing() }
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "f"
value:
lambda_literal
captures:
capture_list
capture_list_item
name: simple_identifier "self"
ownership_modifier
statements
call_expression
navigation_expression
suffix:
navigation_suffix
suffix: simple_identifier "doThing"
target:
optional_chain_marker
self_expression
call_suffix
value_arguments
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value:
lambda_expr
body:
call_expr
argument:
function:
member_access_expr
target: unsupported_node "self?"
member: identifier "doThing"
pattern:
var_pattern
identifier: identifier "f"
===
Multi-statement closure
===
let f = { (x: Int) -> Int in
let y = x + 1
return y * 2
}
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "f"
value:
lambda_literal
type:
lambda_function_type
return_type:
type
name:
user_type
type_identifier "Int"
lambda_function_type_parameters
lambda_parameter
name: simple_identifier "x"
type:
type
name:
user_type
type_identifier "Int"
statements
property_declaration
name:
pattern
bound_identifier: simple_identifier "y"
value:
additive_expression
lhs: simple_identifier "x"
op: +
rhs: integer_literal "1"
value_binding_pattern
mutability: let
control_transfer_statement
result:
multiplicative_expression
lhs: simple_identifier "y"
op: *
rhs: integer_literal "2"
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value:
lambda_expr
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value:
binary_expr
operator: operator "+"
left:
name_expr
identifier: identifier "x"
right: int_literal "1"
pattern:
var_pattern
identifier: identifier "y"
pattern:
var_pattern
identifier: identifier "f"

View File

@@ -0,0 +1,322 @@
===
Array literal
===
let xs = [1, 2, 3]
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "xs"
value:
array_literal
element:
integer_literal "1"
integer_literal "2"
integer_literal "3"
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: unsupported_node "[1, 2, 3]"
pattern:
var_pattern
identifier: identifier "xs"
===
Empty array literal with type
===
let xs: [Int] = []
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "xs"
value:
array_literal
value_binding_pattern
mutability: let
type_annotation
type:
type
name:
array_type
element:
type
name:
user_type
type_identifier "Int"
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: unsupported_node "[]"
pattern:
var_pattern
identifier: identifier "xs"
===
Dictionary literal
===
let d = ["a": 1, "b": 2]
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "d"
value:
dictionary_literal
key:
line_string_literal
text: line_str_text "a"
line_string_literal
text: line_str_text "b"
value:
integer_literal "1"
integer_literal "2"
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: unsupported_node "[\"a\": 1, \"b\": 2]"
pattern:
var_pattern
identifier: identifier "d"
===
Set literal
===
let s: Set<Int> = [1, 2, 3]
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "s"
value:
array_literal
element:
integer_literal "1"
integer_literal "2"
integer_literal "3"
value_binding_pattern
mutability: let
type_annotation
type:
type
name:
user_type
type_identifier "Set"
type_arguments
type
name:
user_type
type_identifier "Int"
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: unsupported_node "[1, 2, 3]"
pattern:
var_pattern
identifier: identifier "s"
===
Tuple literal
===
let t = (1, "two", 3.0)
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "t"
value:
tuple_expression
value:
integer_literal "1"
line_string_literal
text: line_str_text "two"
real_literal "3.0"
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: unsupported_node "(1, \"two\", 3.0)"
pattern:
var_pattern
identifier: identifier "t"
===
Subscript access
===
// TODO: tree-sitter-swift parses `xs[0]` as a call_expression (same shape
// as `xs(0)`), so the mapping currently produces a call_expr. Update the
// parser / add a separate subscript_expr node and remap when fixed.
let first = xs[0]
---
source_file
comment "// TODO: tree-sitter-swift parses `xs[0]` as a call_expression (same shape"
comment "// as `xs(0)`), so the mapping currently produces a call_expr. Update the"
comment "// parser / add a separate subscript_expr node and remap when fixed."
property_declaration
name:
pattern
bound_identifier: simple_identifier "first"
value:
call_expression
simple_identifier "xs"
call_suffix
value_arguments
value_argument
value: integer_literal "0"
value_binding_pattern
mutability: let
---
top_level
body:
unsupported_node "// TODO: tree-sitter-swift parses `xs[0]` as a call_expression (same shape"
unsupported_node "// as `xs(0)`), so the mapping currently produces a call_expr. Update the"
unsupported_node "// parser / add a separate subscript_expr node and remap when fixed."
variable_declaration_stmt
variable_declarator:
variable_declarator
value:
call_expr
argument: int_literal "0"
function:
name_expr
identifier: identifier "xs"
pattern:
var_pattern
identifier: identifier "first"
===
Dictionary subscript
===
// TODO: same parser issue as the array subscript case above —
// `d["key"]` is parsed as `call_expression(d, ("key"))`.
let v = d["key"]
---
source_file
comment "// TODO: same parser issue as the array subscript case above —"
comment "// `d[\"key\"]` is parsed as `call_expression(d, (\"key\"))`."
property_declaration
name:
pattern
bound_identifier: simple_identifier "v"
value:
call_expression
simple_identifier "d"
call_suffix
value_arguments
value_argument
value:
line_string_literal
text: line_str_text "key"
value_binding_pattern
mutability: let
---
top_level
body:
unsupported_node "// TODO: same parser issue as the array subscript case above —"
unsupported_node "// `d[\"key\"]` is parsed as `call_expression(d, (\"key\"))`."
variable_declaration_stmt
variable_declarator:
variable_declarator
value:
call_expr
argument: string_literal "\"key\""
function:
name_expr
identifier: identifier "d"
pattern:
var_pattern
identifier: identifier "v"
===
Tuple member access
===
let n = t.0
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "n"
value:
navigation_expression
suffix:
navigation_suffix
suffix: integer_literal "0"
target: simple_identifier "t"
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: unsupported_node "t.0"
pattern:
var_pattern
identifier: identifier "n"

View File

@@ -0,0 +1,448 @@
===
If statement
===
if x > 0 {
print(x)
}
---
source_file
if_statement
condition:
if_condition
comparison_expression
lhs: simple_identifier "x"
op: >
rhs: integer_literal "0"
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: simple_identifier "x"
---
top_level
body:
if_stmt
condition:
expr_condition
expr: unsupported_node "x > 0"
then:
block_stmt
body:
expr_stmt
expr:
call_expr
argument:
name_expr
identifier: identifier "x"
function:
name_expr
identifier: identifier "print"
===
If-else
===
if x > 0 {
print(x)
} else {
print(-x)
}
---
source_file
if_statement
condition:
if_condition
comparison_expression
lhs: simple_identifier "x"
op: >
rhs: integer_literal "0"
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: simple_identifier "x"
else "else"
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value:
prefix_expression
operation: -
target: simple_identifier "x"
---
top_level
body:
if_stmt
condition:
expr_condition
expr: unsupported_node "x > 0"
else:
block_stmt
body:
expr_stmt
expr:
call_expr
argument:
unary_expr
operator: operator "-"
operand:
name_expr
identifier: identifier "x"
function:
name_expr
identifier: identifier "print"
then:
block_stmt
body:
expr_stmt
expr:
call_expr
argument:
name_expr
identifier: identifier "x"
function:
name_expr
identifier: identifier "print"
===
If-else-if chain
===
if x > 0 {
print(1)
} else if x < 0 {
print(2)
} else {
print(3)
}
---
source_file
if_statement
condition:
if_condition
comparison_expression
lhs: simple_identifier "x"
op: >
rhs: integer_literal "0"
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: integer_literal "1"
else "else"
if_statement
condition:
if_condition
comparison_expression
lhs: simple_identifier "x"
op: <
rhs: integer_literal "0"
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: integer_literal "2"
else "else"
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: integer_literal "3"
---
top_level
body:
if_stmt
condition:
expr_condition
expr: unsupported_node "x > 0"
else:
if_stmt
condition:
expr_condition
expr: unsupported_node "x < 0"
else:
block_stmt
body:
expr_stmt
expr:
call_expr
argument: int_literal "3"
function:
name_expr
identifier: identifier "print"
then:
block_stmt
body:
expr_stmt
expr:
call_expr
argument: int_literal "2"
function:
name_expr
identifier: identifier "print"
then:
block_stmt
body:
expr_stmt
expr:
call_expr
argument: int_literal "1"
function:
name_expr
identifier: identifier "print"
===
If-let optional binding
===
if let value = optional {
print(value)
}
---
source_file
if_statement
condition:
if_condition
if_let_binding
bound_identifier: simple_identifier "value"
value_binding_pattern
mutability: let
simple_identifier "optional"
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: simple_identifier "value"
---
top_level
body:
if_stmt
condition:
expr_condition
expr: unsupported_node "let value = optional"
then:
block_stmt
body:
expr_stmt
expr:
call_expr
argument:
name_expr
identifier: identifier "value"
function:
name_expr
identifier: identifier "print"
===
Guard let
===
guard let value = optional else { return }
---
source_file
guard_statement
condition:
if_condition
if_let_binding
bound_identifier: simple_identifier "value"
value_binding_pattern
mutability: let
simple_identifier "optional"
else "else"
statements
control_transfer_statement
---
top_level
body: unsupported_node "guard let value = optional else { return }"
===
Ternary expression
===
let y = x > 0 ? 1 : -1
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "y"
value:
ternary_expression
condition:
comparison_expression
lhs: simple_identifier "x"
op: >
rhs: integer_literal "0"
if_false:
prefix_expression
operation: -
target: integer_literal "1"
if_true: integer_literal "1"
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: unsupported_node "x > 0 ? 1 : -1"
pattern:
var_pattern
identifier: identifier "y"
===
Switch statement
===
switch x {
case 1:
print("one")
case 2, 3:
print("two or three")
default:
print("other")
}
---
source_file
switch_statement
expr: simple_identifier "x"
switch_entry
switch_pattern
pattern
integer_literal "1"
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value:
line_string_literal
text: line_str_text "one"
switch_entry
switch_pattern
pattern
integer_literal "2"
switch_pattern
pattern
integer_literal "3"
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value:
line_string_literal
text: line_str_text "two or three"
switch_entry
default_keyword "default"
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value:
line_string_literal
text: line_str_text "other"
---
top_level
body: unsupported_node "switch x {\ncase 1:\n print(\"one\")\ncase 2, 3:\n print(\"two or three\")\ndefault:\n print(\"other\")\n}"
===
Switch with binding pattern
===
switch shape {
case .circle(let r):
print(r)
case .square(let s):
print(s)
}
---
source_file
switch_statement
expr: simple_identifier "shape"
switch_entry
switch_pattern
pattern
simple_identifier "circle"
pattern
bound_identifier: simple_identifier "r"
value_binding_pattern
mutability: let
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: simple_identifier "r"
switch_entry
switch_pattern
pattern
simple_identifier "square"
pattern
bound_identifier: simple_identifier "s"
value_binding_pattern
mutability: let
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: simple_identifier "s"
---
top_level
body: unsupported_node "switch shape {\ncase .circle(let r):\n print(r)\ncase .square(let s):\n print(s)\n}"

View File

@@ -0,0 +1,49 @@
===
Additive expression is desugared
===
1 + 2
---
source_file
additive_expression
lhs: integer_literal "1"
op: +
rhs: integer_literal "2"
---
top_level
body:
binary_expr
operator: operator "+"
left: int_literal "1"
right: int_literal "2"
===
Another additive expression is desugared
===
foo + bar
---
source_file
additive_expression
lhs: simple_identifier "foo"
op: +
rhs: simple_identifier "bar"
---
top_level
body:
binary_expr
operator: operator "+"
left:
name_expr
identifier: identifier "foo"
right:
name_expr
identifier: identifier "bar"

View File

@@ -0,0 +1,336 @@
===
Function with no parameters
===
func greet() {
print("hello")
}
---
source_file
function_declaration
body:
function_body
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value:
line_string_literal
text: line_str_text "hello"
name: simple_identifier "greet"
---
top_level
body: unsupported_node "func greet() {\n print(\"hello\")\n}"
===
Function with parameters and return type
===
func add(_ a: Int, _ b: Int) -> Int {
return a + b
}
---
source_file
function_declaration
body:
function_body
statements
control_transfer_statement
result:
additive_expression
lhs: simple_identifier "a"
op: +
rhs: simple_identifier "b"
name: simple_identifier "add"
return_type:
type
name:
user_type
type_identifier "Int"
parameter
external_name: simple_identifier "_"
name: simple_identifier "a"
type:
type
name:
user_type
type_identifier "Int"
parameter
external_name: simple_identifier "_"
name: simple_identifier "b"
type:
type
name:
user_type
type_identifier "Int"
---
top_level
body: unsupported_node "func add(_ a: Int, _ b: Int) -> Int {\n return a + b\n}"
===
Function with named parameters
===
func greet(person name: String) {
print(name)
}
---
source_file
function_declaration
body:
function_body
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: simple_identifier "name"
name: simple_identifier "greet"
parameter
external_name: simple_identifier "person"
name: simple_identifier "name"
type:
type
name:
user_type
type_identifier "String"
---
top_level
body: unsupported_node "func greet(person name: String) {\n print(name)\n}"
===
Function with default parameter value
===
func greet(name: String = "world") {
print(name)
}
---
source_file
function_declaration
body:
function_body
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: simple_identifier "name"
default_value:
line_string_literal
text: line_str_text "world"
name: simple_identifier "greet"
parameter
name: simple_identifier "name"
type:
type
name:
user_type
type_identifier "String"
---
top_level
body: unsupported_node "func greet(name: String = \"world\") {\n print(name)\n}"
===
Variadic function
===
func sum(_ values: Int...) -> Int {
return values.reduce(0, +)
}
---
source_file
function_declaration
body:
function_body
statements
control_transfer_statement
result:
call_expression
navigation_expression
suffix:
navigation_suffix
suffix: simple_identifier "reduce"
target: simple_identifier "values"
call_suffix
value_arguments
value_argument
value: integer_literal "0"
value_argument
value:
referenceable_operator
name: simple_identifier "sum"
return_type:
type
name:
user_type
type_identifier "Int"
parameter
external_name: simple_identifier "_"
name: simple_identifier "values"
type:
type
name:
user_type
type_identifier "Int"
---
top_level
body: unsupported_node "func sum(_ values: Int...) -> Int {\n return values.reduce(0, +)\n}"
===
Function call
===
foo(1, 2)
---
source_file
call_expression
simple_identifier "foo"
call_suffix
value_arguments
value_argument
value: integer_literal "1"
value_argument
value: integer_literal "2"
---
top_level
body:
call_expr
argument:
int_literal "1"
int_literal "2"
function:
name_expr
identifier: identifier "foo"
===
Function call with labelled arguments
===
greet(person: "Bob")
---
source_file
call_expression
simple_identifier "greet"
call_suffix
value_arguments
value_argument
name:
value_argument_label
simple_identifier "person"
value:
line_string_literal
text: line_str_text "Bob"
---
top_level
body:
call_expr
argument: string_literal "\"Bob\""
function:
name_expr
identifier: identifier "greet"
===
Method call
===
list.append(1)
---
source_file
call_expression
navigation_expression
suffix:
navigation_suffix
suffix: simple_identifier "append"
target: simple_identifier "list"
call_suffix
value_arguments
value_argument
value: integer_literal "1"
---
top_level
body:
call_expr
argument: int_literal "1"
function:
member_access_expr
target:
name_expr
identifier: identifier "list"
member: identifier "append"
===
Generic function
===
func identity<T>(_ x: T) -> T {
return x
}
---
source_file
function_declaration
body:
function_body
statements
control_transfer_statement
result: simple_identifier "x"
name: simple_identifier "identity"
return_type:
type
name:
user_type
type_identifier "T"
type_parameters
type_parameter
type_identifier "T"
parameter
external_name: simple_identifier "_"
name: simple_identifier "x"
type:
type
name:
user_type
type_identifier "T"
---
top_level
body: unsupported_node "func identity<T>(_ x: T) -> T {\n return x\n}"

View File

@@ -0,0 +1,124 @@
===
Integer literal
===
42
---
source_file
integer_literal "42"
---
top_level
body: int_literal "42"
===
Negative integer literal
===
-7
---
source_file
prefix_expression
operation: -
target: integer_literal "7"
---
top_level
body:
unary_expr
operator: operator "-"
operand: int_literal "7"
===
Floating-point literal
===
3.14
---
source_file
real_literal "3.14"
---
top_level
body: unsupported_node "3.14"
===
Boolean literals
===
true
false
---
source_file
boolean_literal
boolean_literal
---
top_level
body:
unsupported_node "true"
unsupported_node "false"
===
Nil literal
===
nil
---
source_file
---
top_level
body:
===
String literal
===
"hello"
---
source_file
line_string_literal
text: line_str_text "hello"
---
top_level
body: string_literal "\"hello\""
===
String with interpolation
===
"hello \(name)"
---
source_file
line_string_literal
interpolation:
interpolated_expression
value: simple_identifier "name"
text: line_str_text "hello "
---
top_level
body: string_literal "\"hello \\(name)\""

View File

@@ -0,0 +1,212 @@
===
For-in over array literal
===
for x in [1, 2, 3] {
print(x)
}
---
source_file
for_statement
collection:
array_literal
element:
integer_literal "1"
integer_literal "2"
integer_literal "3"
item:
pattern
bound_identifier: simple_identifier "x"
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: simple_identifier "x"
---
top_level
body: unsupported_node "for x in [1, 2, 3] {\n print(x)\n}"
===
For-in over range
===
for i in 0..<10 {
print(i)
}
---
source_file
for_statement
collection:
range_expression
end: integer_literal "10"
op: ..<
start: integer_literal "0"
item:
pattern
bound_identifier: simple_identifier "i"
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: simple_identifier "i"
---
top_level
body: unsupported_node "for i in 0..<10 {\n print(i)\n}"
===
For-in with where clause
===
for x in xs where x > 0 {
print(x)
}
---
source_file
for_statement
collection: simple_identifier "xs"
item:
pattern
bound_identifier: simple_identifier "x"
where_clause
where_keyword "where"
comparison_expression
lhs: simple_identifier "x"
op: >
rhs: integer_literal "0"
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: simple_identifier "x"
---
top_level
body: unsupported_node "for x in xs where x > 0 {\n print(x)\n}"
===
While loop
===
while x > 0 {
x -= 1
}
---
source_file
while_statement
condition:
if_condition
comparison_expression
lhs: simple_identifier "x"
op: >
rhs: integer_literal "0"
statements
assignment
operator: -=
result: integer_literal "1"
target:
directly_assignable_expression
simple_identifier "x"
---
top_level
body: unsupported_node "while x > 0 {\n x -= 1\n}"
===
Repeat-while loop
===
repeat {
x -= 1
} while x > 0
---
source_file
repeat_while_statement
condition:
if_condition
comparison_expression
lhs: simple_identifier "x"
op: >
rhs: integer_literal "0"
statements
assignment
operator: -=
result: integer_literal "1"
target:
directly_assignable_expression
simple_identifier "x"
---
top_level
body: unsupported_node "repeat {\n x -= 1\n} while x > 0"
===
Break and continue
===
for x in xs {
if x < 0 { continue }
if x > 100 { break }
print(x)
}
---
source_file
for_statement
collection: simple_identifier "xs"
item:
pattern
bound_identifier: simple_identifier "x"
statements
if_statement
condition:
if_condition
comparison_expression
lhs: simple_identifier "x"
op: <
rhs: integer_literal "0"
statements
control_transfer_statement
if_statement
condition:
if_condition
comparison_expression
lhs: simple_identifier "x"
op: >
rhs: integer_literal "100"
statements
control_transfer_statement
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: simple_identifier "x"
---
top_level
body: unsupported_node "for x in xs {\n if x < 0 { continue }\n if x > 100 { break }\n print(x)\n}"

View File

@@ -0,0 +1,329 @@
===
Addition
===
a + b
---
source_file
additive_expression
lhs: simple_identifier "a"
op: +
rhs: simple_identifier "b"
---
top_level
body:
binary_expr
operator: operator "+"
left:
name_expr
identifier: identifier "a"
right:
name_expr
identifier: identifier "b"
===
Subtraction
===
a - b
---
source_file
additive_expression
lhs: simple_identifier "a"
op: -
rhs: simple_identifier "b"
---
top_level
body:
binary_expr
operator: operator "-"
left:
name_expr
identifier: identifier "a"
right:
name_expr
identifier: identifier "b"
===
Multiplication
===
a * b
---
source_file
multiplicative_expression
lhs: simple_identifier "a"
op: *
rhs: simple_identifier "b"
---
top_level
body:
binary_expr
operator: operator "*"
left:
name_expr
identifier: identifier "a"
right:
name_expr
identifier: identifier "b"
===
Division
===
a / b
---
source_file
multiplicative_expression
lhs: simple_identifier "a"
op: /
rhs: simple_identifier "b"
---
top_level
body:
binary_expr
operator: operator "/"
left:
name_expr
identifier: identifier "a"
right:
name_expr
identifier: identifier "b"
===
Operator precedence: addition and multiplication
===
a + b * c
---
source_file
additive_expression
lhs: simple_identifier "a"
op: +
rhs:
multiplicative_expression
lhs: simple_identifier "b"
op: *
rhs: simple_identifier "c"
---
top_level
body:
binary_expr
operator: operator "+"
left:
name_expr
identifier: identifier "a"
right:
binary_expr
operator: operator "*"
left:
name_expr
identifier: identifier "b"
right:
name_expr
identifier: identifier "c"
===
Parenthesised expression
===
(a + b) * c
---
source_file
multiplicative_expression
lhs:
tuple_expression
value:
additive_expression
lhs: simple_identifier "a"
op: +
rhs: simple_identifier "b"
op: *
rhs: simple_identifier "c"
---
top_level
body:
binary_expr
operator: operator "*"
left: unsupported_node "(a + b)"
right:
name_expr
identifier: identifier "c"
===
Comparison
===
a < b
---
source_file
comparison_expression
lhs: simple_identifier "a"
op: <
rhs: simple_identifier "b"
---
top_level
body:
binary_expr
operator: operator "<"
left:
name_expr
identifier: identifier "a"
right:
name_expr
identifier: identifier "b"
===
Equality
===
a == b
---
source_file
equality_expression
lhs: simple_identifier "a"
op: ==
rhs: simple_identifier "b"
---
top_level
body:
binary_expr
operator: operator "=="
left:
name_expr
identifier: identifier "a"
right:
name_expr
identifier: identifier "b"
===
Logical and
===
a && b
---
source_file
conjunction_expression
lhs: simple_identifier "a"
op: &&
rhs: simple_identifier "b"
---
top_level
body:
binary_expr
operator: operator "&&"
left:
name_expr
identifier: identifier "a"
right:
name_expr
identifier: identifier "b"
===
Logical or
===
a || b
---
source_file
disjunction_expression
lhs: simple_identifier "a"
op: ||
rhs: simple_identifier "b"
---
top_level
body:
binary_expr
operator: operator "||"
left:
name_expr
identifier: identifier "a"
right:
name_expr
identifier: identifier "b"
===
Logical not
===
!a
---
source_file
prefix_expression
operation: bang "!"
target: simple_identifier "a"
---
top_level
body:
unary_expr
operator: operator "!"
operand:
name_expr
identifier: identifier "a"
===
Range operator
===
1...10
---
source_file
range_expression
end: integer_literal "10"
op: ...
start: integer_literal "1"
---
top_level
body:
binary_expr
operator: operator "..."
left: int_literal "1"
right: int_literal "10"

View File

@@ -0,0 +1,284 @@
===
Optional type annotation
===
let x: Int? = nil
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "x"
value: nil
value_binding_pattern
mutability: let
type_annotation
type:
type
name:
optional_type
wrapped:
user_type
type_identifier "Int"
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
pattern:
var_pattern
identifier: identifier "x"
===
Optional chaining
===
let n = obj?.foo?.bar
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "n"
value:
navigation_expression
suffix:
navigation_suffix
suffix: simple_identifier "bar"
target:
optional_chain_marker
navigation_expression
suffix:
navigation_suffix
suffix: simple_identifier "foo"
target:
optional_chain_marker
simple_identifier "obj"
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value:
member_access_expr
target: unsupported_node "obj?.foo?"
member: identifier "bar"
pattern:
var_pattern
identifier: identifier "n"
===
Force unwrap
===
let n = opt!
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "n"
value:
postfix_expression
operation: bang "!"
target: simple_identifier "opt"
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: unsupported_node "opt!"
pattern:
var_pattern
identifier: identifier "n"
===
Nil-coalescing
===
let n = opt ?? 0
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "n"
value:
nil_coalescing_expression
if_nil: integer_literal "0"
value: simple_identifier "opt"
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: unsupported_node "opt ?? 0"
pattern:
var_pattern
identifier: identifier "n"
===
Throwing function
===
func read() throws -> String {
return ""
}
---
source_file
function_declaration
body:
function_body
statements
control_transfer_statement
result:
line_string_literal
name: simple_identifier "read"
return_type:
type
name:
user_type
type_identifier "String"
throws "throws"
---
top_level
body: unsupported_node "func read() throws -> String {\n return \"\"\n}"
===
Do-catch
===
do {
try foo()
} catch {
print(error)
}
---
source_file
do_statement
statements
try_expression
expr:
call_expression
simple_identifier "foo"
call_suffix
value_arguments
try_operator
catch_block
catch_keyword "catch"
statements
call_expression
simple_identifier "print"
call_suffix
value_arguments
value_argument
value: simple_identifier "error"
---
top_level
body: unsupported_node "do {\n try foo()\n} catch {\n print(error)\n}"
===
Try? expression
===
let result = try? foo()
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "result"
value:
try_expression
expr:
call_expression
simple_identifier "foo"
call_suffix
value_arguments
try_operator
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: unsupported_node "try? foo()"
pattern:
var_pattern
identifier: identifier "result"
===
Try! expression
===
let result = try! foo()
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "result"
value:
try_expression
expr:
call_expression
simple_identifier "foo"
call_suffix
value_arguments
try_operator
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: unsupported_node "try! foo()"
pattern:
var_pattern
identifier: identifier "result"

View File

@@ -0,0 +1,514 @@
===
Empty class
===
class Foo {}
---
source_file
class_declaration
body:
class_body
declaration_kind: class
name: type_identifier "Foo"
---
top_level
body: unsupported_node "class Foo {}"
===
Class with stored properties
===
class Point {
var x: Int
var y: Int
}
---
source_file
class_declaration
body:
class_body
property_declaration
name:
pattern
bound_identifier: simple_identifier "x"
value_binding_pattern
mutability: var
type_annotation
type:
type
name:
user_type
type_identifier "Int"
property_declaration
name:
pattern
bound_identifier: simple_identifier "y"
value_binding_pattern
mutability: var
type_annotation
type:
type
name:
user_type
type_identifier "Int"
declaration_kind: class
name: type_identifier "Point"
---
top_level
body: unsupported_node "class Point {\n var x: Int\n var y: Int\n}"
===
Class with initializer
===
class Point {
var x: Int
init(x: Int) {
self.x = x
}
}
---
source_file
class_declaration
body:
class_body
property_declaration
name:
pattern
bound_identifier: simple_identifier "x"
value_binding_pattern
mutability: var
type_annotation
type:
type
name:
user_type
type_identifier "Int"
init_declaration
body:
function_body
statements
assignment
operator: =
result: simple_identifier "x"
target:
directly_assignable_expression
navigation_expression
suffix:
navigation_suffix
suffix: simple_identifier "x"
target:
self_expression
name: init
parameter
name: simple_identifier "x"
type:
type
name:
user_type
type_identifier "Int"
declaration_kind: class
name: type_identifier "Point"
---
top_level
body: unsupported_node "class Point {\n var x: Int\n init(x: Int) {\n self.x = x\n }\n}"
===
Class with method
===
class Counter {
var n = 0
func bump() {
n += 1
}
}
---
source_file
class_declaration
body:
class_body
property_declaration
name:
pattern
bound_identifier: simple_identifier "n"
value: integer_literal "0"
value_binding_pattern
mutability: var
function_declaration
body:
function_body
statements
assignment
operator: +=
result: integer_literal "1"
target:
directly_assignable_expression
simple_identifier "n"
name: simple_identifier "bump"
declaration_kind: class
name: type_identifier "Counter"
---
top_level
body: unsupported_node "class Counter {\n var n = 0\n func bump() {\n n += 1\n }\n}"
===
Class inheritance
===
class Dog: Animal {}
---
source_file
class_declaration
body:
class_body
declaration_kind: class
name: type_identifier "Dog"
inheritance_specifier
inherits_from:
user_type
type_identifier "Animal"
---
top_level
body: unsupported_node "class Dog: Animal {}"
===
Struct
===
struct Point {
let x: Int
let y: Int
}
---
source_file
class_declaration
body:
class_body
property_declaration
name:
pattern
bound_identifier: simple_identifier "x"
value_binding_pattern
mutability: let
type_annotation
type:
type
name:
user_type
type_identifier "Int"
property_declaration
name:
pattern
bound_identifier: simple_identifier "y"
value_binding_pattern
mutability: let
type_annotation
type:
type
name:
user_type
type_identifier "Int"
declaration_kind: struct
name: type_identifier "Point"
---
top_level
body: unsupported_node "struct Point {\n let x: Int\n let y: Int\n}"
===
Enum with cases
===
enum Direction {
case north
case south
case east
case west
}
---
source_file
class_declaration
body:
enum_class_body
enum_entry
name: simple_identifier "north"
enum_entry
name: simple_identifier "south"
enum_entry
name: simple_identifier "east"
enum_entry
name: simple_identifier "west"
declaration_kind: enum
name: type_identifier "Direction"
---
top_level
body: unsupported_node "enum Direction {\n case north\n case south\n case east\n case west\n}"
===
Enum with associated values
===
enum Shape {
case circle(radius: Double)
case square(side: Double)
}
---
source_file
class_declaration
body:
enum_class_body
enum_entry
data_contents:
enum_type_parameters
simple_identifier "radius"
type
name:
user_type
type_identifier "Double"
name: simple_identifier "circle"
enum_entry
data_contents:
enum_type_parameters
simple_identifier "side"
type
name:
user_type
type_identifier "Double"
name: simple_identifier "square"
declaration_kind: enum
name: type_identifier "Shape"
---
top_level
body: unsupported_node "enum Shape {\n case circle(radius: Double)\n case square(side: Double)\n}"
===
Protocol declaration
===
protocol Drawable {
func draw()
}
---
source_file
protocol_declaration
body:
protocol_body
protocol_function_declaration
name: simple_identifier "draw"
declaration_kind: protocol
name: type_identifier "Drawable"
---
top_level
body: unsupported_node "protocol Drawable {\n func draw()\n}"
===
Extension
===
extension Int {
func squared() -> Int { return self * self }
}
---
source_file
class_declaration
body:
class_body
function_declaration
body:
function_body
statements
control_transfer_statement
result:
multiplicative_expression
lhs:
self_expression
op: *
rhs:
self_expression
name: simple_identifier "squared"
return_type:
type
name:
user_type
type_identifier "Int"
declaration_kind: extension
name:
user_type
type_identifier "Int"
---
top_level
body: unsupported_node "extension Int {\n func squared() -> Int { return self * self }\n}"
===
Computed property
===
class Rect {
var w: Double
var h: Double
var area: Double {
return w * h
}
}
---
source_file
class_declaration
body:
class_body
property_declaration
name:
pattern
bound_identifier: simple_identifier "w"
value_binding_pattern
mutability: var
type_annotation
type:
type
name:
user_type
type_identifier "Double"
property_declaration
name:
pattern
bound_identifier: simple_identifier "h"
value_binding_pattern
mutability: var
type_annotation
type:
type
name:
user_type
type_identifier "Double"
property_declaration
computed_value:
computed_property
statements
control_transfer_statement
result:
multiplicative_expression
lhs: simple_identifier "w"
op: *
rhs: simple_identifier "h"
name:
pattern
bound_identifier: simple_identifier "area"
value_binding_pattern
mutability: var
type_annotation
type:
type
name:
user_type
type_identifier "Double"
declaration_kind: class
name: type_identifier "Rect"
---
top_level
body: unsupported_node "class Rect {\n var w: Double\n var h: Double\n var area: Double {\n return w * h\n }\n}"
===
Property with getter and setter
===
class Box {
private var _v = 0
var v: Int {
get { return _v }
set { _v = newValue }
}
}
---
source_file
class_declaration
body:
class_body
property_declaration
name:
pattern
bound_identifier: simple_identifier "_v"
value: integer_literal "0"
modifiers
visibility_modifier
value_binding_pattern
mutability: var
property_declaration
computed_value:
computed_property
computed_getter
getter_specifier
statements
control_transfer_statement
result: simple_identifier "_v"
computed_setter
setter_specifier
statements
assignment
operator: =
result: simple_identifier "newValue"
target:
directly_assignable_expression
simple_identifier "_v"
name:
pattern
bound_identifier: simple_identifier "v"
value_binding_pattern
mutability: var
type_annotation
type:
type
name:
user_type
type_identifier "Int"
declaration_kind: class
name: type_identifier "Box"
---
top_level
body: unsupported_node "class Box {\n private var _v = 0\n var v: Int {\n get { return _v }\n set { _v = newValue }\n }\n}"

View File

@@ -0,0 +1,243 @@
===
Let binding
===
let x = 1
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "x"
value: integer_literal "1"
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: int_literal "1"
pattern:
var_pattern
identifier: identifier "x"
===
Var binding
===
var x = 1
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "x"
value: integer_literal "1"
value_binding_pattern
mutability: var
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: int_literal "1"
pattern:
var_pattern
identifier: identifier "x"
===
Let with type annotation
===
let x: Int = 1
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "x"
value: integer_literal "1"
value_binding_pattern
mutability: let
type_annotation
type:
type
name:
user_type
type_identifier "Int"
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: int_literal "1"
pattern:
var_pattern
identifier: identifier "x"
===
Var without initialiser
===
var x: Int
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "x"
value_binding_pattern
mutability: var
type_annotation
type:
type
name:
user_type
type_identifier "Int"
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
pattern:
var_pattern
identifier: identifier "x"
===
Tuple destructuring binding
===
let (a, b) = pair
---
source_file
property_declaration
name:
pattern
pattern
simple_identifier "a"
pattern
simple_identifier "b"
value: simple_identifier "pair"
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value:
name_expr
identifier: identifier "pair"
pattern:
tuple_pattern
element:
var_pattern
identifier: identifier "a"
var_pattern
identifier: identifier "b"
===
Multiple bindings on one line
===
let x = 1, y = 2
---
source_file
property_declaration
name:
pattern
bound_identifier: simple_identifier "x"
pattern
bound_identifier: simple_identifier "y"
value:
integer_literal "1"
integer_literal "2"
value_binding_pattern
mutability: let
---
top_level
body:
variable_declaration_stmt
variable_declarator:
variable_declarator
value: int_literal "1"
pattern:
var_pattern
identifier: identifier "x"
variable_declarator
value: int_literal "2"
pattern:
var_pattern
identifier: identifier "y"
===
Assignment
===
x = 1
---
source_file
assignment
operator: =
result: integer_literal "1"
target:
directly_assignable_expression
simple_identifier "x"
---
top_level
body: unsupported_node "x = 1"
===
Compound assignment
===
x += 1
---
source_file
assignment
operator: +=
result: integer_literal "1"
target:
directly_assignable_expression
simple_identifier "x"
---
top_level
body: unsupported_node "x += 1"

View File

@@ -0,0 +1,283 @@
use std::fs;
use std::path::Path;
use codeql_extractor::extractor::simple;
use yeast::{dump::dump_ast, dump::dump_ast_with_type_errors, Runner};
#[path = "../src/languages/mod.rs"]
mod languages;
#[derive(Debug)]
struct CorpusCase {
name: String,
input: String,
raw: String,
expected: String,
}
fn update_mode_enabled() -> bool {
std::env::var("UNIFIED_UPDATE_CORPUS")
.map(|v| matches!(v.to_ascii_lowercase().as_str(), "1" | "true" | "yes" | "on"))
.unwrap_or(false)
}
fn is_header_rule(line: &str) -> bool {
let trimmed = line.trim();
trimmed.len() >= 3 && trimmed.chars().all(|c| c == '=')
}
fn parse_corpus(content: &str) -> Vec<CorpusCase> {
let lines: Vec<&str> = content.lines().collect();
let mut i = 0;
let mut cases = Vec::new();
while i < lines.len() {
while i < lines.len() && lines[i].trim().is_empty() {
i += 1;
}
if i >= lines.len() {
break;
}
assert!(
is_header_rule(lines[i]),
"Expected header delimiter at line {}",
i + 1
);
i += 1;
assert!(i < lines.len(), "Missing test name at line {}", i + 1);
let name = lines[i].trim().to_string();
i += 1;
assert!(
i < lines.len() && is_header_rule(lines[i]),
"Missing closing header delimiter for case {name}"
);
i += 1;
let input_start = i;
while i < lines.len() && lines[i].trim() != "---" {
i += 1;
}
assert!(i < lines.len(), "Missing --- separator for case {name}");
let input = lines[input_start..i].join("\n").trim_end().to_string();
i += 1;
// Raw tree-sitter parse section. New-format files have a second
// `---` separator between the raw tree and the mapped AST. Legacy
// files (with only one separator) have no raw section — in that
// case `raw` stays empty and update mode will populate it.
let raw_start = i;
let mut next_sep = i;
while next_sep < lines.len() && lines[next_sep].trim() != "---" {
if is_header_rule(lines[next_sep])
&& next_sep + 2 < lines.len()
&& !lines[next_sep + 1].trim().is_empty()
&& is_header_rule(lines[next_sep + 2])
{
break;
}
next_sep += 1;
}
let raw = if next_sep < lines.len() && lines[next_sep].trim() == "---" {
let raw_text = lines[raw_start..next_sep].join("\n").trim().to_string();
i = next_sep + 1;
raw_text
} else {
String::new()
};
let expected_start = i;
while i < lines.len() {
if is_header_rule(lines[i])
&& i + 2 < lines.len()
&& !lines[i + 1].trim().is_empty()
&& is_header_rule(lines[i + 2])
{
break;
}
i += 1;
}
let expected = lines[expected_start..i].join("\n").trim().to_string();
cases.push(CorpusCase {
name,
input,
raw,
expected,
});
}
cases
}
fn render_corpus(cases: &[CorpusCase]) -> String {
let mut out = String::new();
for (idx, case) in cases.iter().enumerate() {
if idx > 0 {
// Blank line between cases.
out.push('\n');
}
out.push_str("===\n");
out.push_str(case.name.trim());
out.push_str("\n===\n\n");
out.push_str(case.input.trim());
out.push_str("\n\n---\n\n");
out.push_str(case.raw.trim());
out.push_str("\n\n---\n\n");
out.push_str(case.expected.trim());
// Single trailing newline per case; the inter-case blank line is
// added by the prefix above, and the file ends with exactly one `\n`.
out.push('\n');
}
out
}
fn run_desugaring(
lang: &simple::LanguageSpec,
input: &str,
) -> Result<yeast::Ast, String> {
let runner = match lang.desugar.as_ref() {
Some(config) => Runner::from_config(lang.ts_language.clone(), config)
.map_err(|e| format!("Failed to create yeast runner: {e}"))?,
None => Runner::new(lang.ts_language.clone(), &[]),
};
runner
.run(input)
.map_err(|e| format!("Failed to parse input: {e}"))
}
/// Produce the raw tree-sitter parse tree dump for `input`, with no
/// desugaring rules applied. Uses a `Runner` with an empty phase list and
/// the input grammar's own schema.
fn dump_raw_parse(
lang: &simple::LanguageSpec,
input: &str,
) -> Result<String, String> {
let runner = Runner::new(lang.ts_language.clone(), &[]);
let ast = runner
.run(input)
.map_err(|e| format!("Failed to parse input: {e}"))?;
Ok(dump_ast(&ast, ast.get_root(), input))
}
#[test]
fn test_corpus() {
let update_mode = update_mode_enabled();
let all_languages = languages::all_language_specs();
let corpus_dir = Path::new("tests/corpus");
for lang in all_languages {
let output_schema = yeast::node_types_yaml::schema_from_yaml_with_language(
languages::OUTPUT_AST_SCHEMA,
&lang.ts_language,
)
.expect("Failed to parse OUTPUT_AST_SCHEMA YAML");
let lang_corpus_dir = corpus_dir.join(&lang.prefix);
if !lang_corpus_dir.exists() {
continue;
}
let mut corpus_files: Vec<_> = fs::read_dir(&lang_corpus_dir)
.unwrap_or_else(|e| {
panic!(
"Failed to read corpus directory {}: {e}",
lang_corpus_dir.display()
)
})
.map(|entry| entry.expect("Failed to read corpus entry").path())
.filter(|path| path.extension().is_some_and(|ext| ext == "txt"))
.collect();
corpus_files.sort();
for corpus_path in corpus_files {
let content = fs::read_to_string(&corpus_path)
.unwrap_or_else(|e| panic!("Failed to read {}: {e}", corpus_path.display()));
let mut cases = parse_corpus(&content);
let mut failures = Vec::new();
assert!(
!cases.is_empty(),
"No corpus cases found in {}",
corpus_path.display()
);
for case in &mut cases {
match dump_raw_parse(&lang, &case.input) {
Err(e) => {
failures.push(format!(
"Raw parse failed for {} in {}: {}",
case.name,
corpus_path.display(),
e
));
}
Ok(actual_raw) => {
if update_mode {
case.raw = actual_raw.trim().to_string();
} else if case.raw.trim() != actual_raw.trim() {
failures.push(format!(
"Raw parse mismatch in {}: \"{}\"\nEXPECTED:\n\n{}\n\nACTUAL:\n\n{}",
corpus_path.display(),
case.name,
case.raw.trim(),
actual_raw.trim()
));
}
}
}
match run_desugaring(&lang, &case.input) {
Err(e) => {
failures.push(format!(
"Desugaring failed for {} in {}: {}",
case.name,
corpus_path.display(),
e
));
}
Ok(actual) => {
let actual_dump = dump_ast_with_type_errors(
&actual,
actual.get_root(),
&case.input,
&output_schema,
);
if update_mode {
case.expected = actual_dump.trim().to_string();
} else if case.expected.trim() != actual_dump.trim() {
failures.push(format!(
"Test failed in {}: \"{}\"\nEXPECTED:\n\n{}\n\nACTUAL:\n\n{}",
corpus_path.display(),
case.name,
case.expected.trim(),
actual_dump.trim()
));
}
}
}
}
assert!(
failures.is_empty(),
"{}",
failures.join("\n\n") + "\n\n"
);
if update_mode {
let updated = render_corpus(&cases);
let write_result = fs::write(&corpus_path, updated);
assert!(
write_result.is_ok(),
"Failed to update corpus file {}: {}",
corpus_path.display(),
write_result.err().map_or_else(String::new, |e| e.to_string())
);
}
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
var x = y + 2;

View File

@@ -1,101 +1,18 @@
identifier
| test.swift:1:8:1:17 | Foundation | Foundation |
| test.swift:5:9:5:13 | items | items |
| test.swift:7:19:7:21 | add | add |
| test.swift:7:23:7:23 | _ | _ |
| test.swift:7:25:7:28 | item | item |
| test.swift:8:9:8:13 | items | items |
| test.swift:8:15:8:20 | append | append |
| test.swift:8:22:8:25 | item | item |
| test.swift:11:10:11:17 | contains | contains |
| test.swift:11:19:11:19 | _ | _ |
| test.swift:11:21:11:24 | item | item |
| test.swift:12:16:12:20 | items | items |
| test.swift:12:22:12:29 | contains | contains |
| test.swift:12:31:12:34 | item | item |
| test.swift:19:9:19:13 | count | count |
| test.swift:20:10:20:13 | item | item |
| test.swift:20:15:20:16 | at | at |
| test.swift:20:18:20:22 | index | index |
| test.swift:24:6:24:10 | merge | merge |
| test.swift:24:27:24:27 | _ | _ |
| test.swift:24:29:24:33 | first | first |
| test.swift:24:39:24:39 | _ | _ |
| test.swift:24:41:24:46 | second | second |
| test.swift:24:73:24:73 | T | T |
| test.swift:24:75:24:81 | Element | Element |
| test.swift:25:9:25:14 | result | result |
| test.swift:25:18:25:22 | Array | Array |
| test.swift:25:24:25:28 | first | first |
| test.swift:26:9:26:12 | item | item |
| test.swift:26:17:26:22 | second | second |
| test.swift:27:13:27:18 | result | result |
| test.swift:27:20:27:27 | contains | contains |
| test.swift:27:29:27:32 | item | item |
| test.swift:28:13:28:18 | result | result |
| test.swift:28:20:28:25 | append | append |
| test.swift:28:27:28:30 | item | item |
| test.swift:31:12:31:17 | result | result |
| test.swift:37:17:37:20 | data | data |
| test.swift:39:9:39:13 | count | count |
| test.swift:40:16:40:19 | data | data |
| test.swift:40:21:40:25 | count | count |
| test.swift:43:9:43:15 | isEmpty | isEmpty |
| test.swift:44:9:44:12 | data | data |
| test.swift:44:14:44:20 | isEmpty | isEmpty |
| test.swift:47:10:47:13 | item | item |
| test.swift:47:15:47:16 | at | at |
| test.swift:47:18:47:22 | index | index |
| test.swift:48:15:48:19 | index | index |
| test.swift:48:29:48:33 | index | index |
| test.swift:48:37:48:40 | data | data |
| test.swift:48:42:48:46 | count | count |
| test.swift:49:16:49:19 | data | data |
| test.swift:49:21:49:25 | index | index |
| test.swift:52:10:52:12 | add | add |
| test.swift:52:14:52:14 | _ | _ |
| test.swift:52:16:52:19 | item | item |
| test.swift:53:9:53:12 | data | data |
| test.swift:53:14:53:19 | append | append |
| test.swift:53:21:53:24 | item | item |
| test.swift:59:10:59:16 | success | success |
| test.swift:60:10:60:16 | failure | failure |
| test.swift:62:10:62:12 | map | map |
| test.swift:62:17:62:17 | _ | _ |
| test.swift:62:19:62:27 | transform | transform |
| test.swift:64:15:64:21 | success | success |
| test.swift:64:27:64:31 | value | value |
| test.swift:65:21:65:27 | success | success |
| test.swift:65:29:65:37 | transform | transform |
| test.swift:65:39:65:43 | value | value |
| test.swift:66:15:66:21 | failure | failure |
| test.swift:66:27:66:31 | error | error |
| test.swift:67:21:67:27 | failure | failure |
| test.swift:67:29:67:33 | error | error |
| test.swift:73:23:73:29 | Element | Element |
| test.swift:74:10:74:17 | isSorted | isSorted |
| test.swift:75:13:75:13 | i | i |
| test.swift:75:23:75:31 | blah | blah |
| test.swift:76:21:76:21 | i | i |
| test.swift:76:31:76:35 | blah | blah |
| test.swift:85:6:85:12 | combine | combine |
| test.swift:85:17:85:17 | _ | _ |
| test.swift:85:19:85:24 | values | values |
| test.swift:85:32:85:40 | transform | transform |
| test.swift:86:12:86:17 | values | values |
| test.swift:86:19:86:25 | isEmpty | isEmpty |
| test.swift:87:12:87:17 | values | values |
| test.swift:87:19:87:27 | dropFirst | dropFirst |
| test.swift:87:31:87:36 | reduce | reduce |
| test.swift:87:38:87:43 | values | values |
| test.swift:87:49:87:57 | transform | transform |
func
| test.swift:7:5:9:5 | FunctionDeclaration |
| test.swift:11:5:13:5 | FunctionDeclaration |
| test.swift:24:1:32:1 | FunctionDeclaration |
| test.swift:47:5:50:5 | FunctionDeclaration |
| test.swift:52:5:54:5 | FunctionDeclaration |
| test.swift:62:5:69:5 | FunctionDeclaration |
| test.swift:74:5:81:5 | FunctionDeclaration |
| test.swift:85:1:88:1 | FunctionDeclaration |
add
nameExpr
| name_expr.swift:1:9:1:9 | NameExpr | y |
unsupported
| test.swift:1:1:1:17 | | |
| test.swift:3:1:3:38 | | |
| test.swift:4:1:14:1 | | |
| test.swift:16:1:16:32 | | |
| test.swift:17:1:21:1 | | |
| test.swift:23:1:23:37 | | |
| test.swift:24:1:32:1 | | |
| test.swift:34:1:34:49 | | |
| test.swift:35:1:55:1 | | |
| test.swift:57:1:57:30 | | |
| test.swift:58:1:70:1 | | |
| test.swift:72:1:72:37 | | |
| test.swift:73:1:82:1 | | |
| test.swift:84:1:84:24 | | |
| test.swift:85:1:88:1 | | |

View File

@@ -1,9 +1,5 @@
import codeql.unified.Ast
import codeql.unified.Ast::Unified
query predicate identifier(Swift::SimpleIdentifier node, string name) { name = node.getValue() }
query predicate nameExpr(NameExpr node, string value) { value = node.getIdentifier().getValue() }
query predicate func(Swift::FunctionDeclaration node) { any() }
query predicate add(Swift::AdditiveExpression node, Swift::AstNode lhs, Swift::AstNode rhs) {
lhs = node.getLhs(0) and rhs = node.getRhs(0)
}
query predicate unsupported(UnsupportedNode node, string value) { value = node.getValue() }

View File

@@ -0,0 +1,8 @@
#!/bin/bash
set -euo pipefail
IFS=$'\n\t'
cd "$(dirname "$0")/.."
cd extractor
UNIFIED_UPDATE_CORPUS=1 cargo test