diff --git a/python/extractor/tsg-python/python.tsg b/python/extractor/tsg-python/python.tsg index 8dec9ad5d3e..963df06fd7c 100644 --- a/python/extractor/tsg-python/python.tsg +++ b/python/extractor/tsg-python/python.tsg @@ -6,16 +6,6 @@ (module) @mod { let @mod.node = (ast-node @mod "Module") } -(_) @anynode -{ - scan (node-type @anynode) { - "^(ERROR|MISSING)$" { - let @anynode.node = (ast-node @anynode "SyntaxErrorNode") - attr (@anynode.node) source = (source-text @anynode) - } - } -} - (parenthesized_expression) @nd { let @nd.node = (ast-node @nd "Expr") } diff --git a/python/extractor/tsg-python/src/main.rs b/python/extractor/tsg-python/src/main.rs index c99145132f7..a94d93ba209 100644 --- a/python/extractor/tsg-python/src/main.rs +++ b/python/extractor/tsg-python/src/main.rs @@ -480,6 +480,99 @@ pub mod extra_functions { } } +struct TreeIterator<'a> { + nodes_to_visit: Vec>, +} + +impl<'a> TreeIterator<'a> { + fn new(root: tree_sitter::Node<'a>) -> Self { + Self { + nodes_to_visit: vec![root], + } + } +} + +impl<'a> Iterator for TreeIterator<'a> { + type Item = tree_sitter::Node<'a>; + + fn next(&mut self) -> Option { + if let Some(node) = self.nodes_to_visit.pop() { + // Add all children to the queue for processing + self.nodes_to_visit + .extend((0..node.child_count()).rev().filter_map(|i| node.child(i))); + Some(node) + } else { + None + } + } +} + +#[derive(Debug, Clone)] +struct SyntaxError { + start_pos: tree_sitter::Point, + end_pos: tree_sitter::Point, + source: String, +} + +fn syntax_errors_from_tree<'a>( + root: tree_sitter::Node<'a>, + source: &'a str, +) -> impl Iterator + 'a { + TreeIterator::new(root) + .filter(|&node| node.is_error() || node.is_missing()) + .map(move |node| { + let start_pos = node.start_position(); + let end_pos = node.end_position(); + let text = &source[node.byte_range()]; + SyntaxError { + start_pos, + end_pos, + source: text.to_string(), + } + }) +} + +fn add_syntax_error_nodes(graph: &mut tree_sitter_graph::graph::Graph, errors: &[SyntaxError]) { + for error in errors { + let error_node = graph.add_graph_node(); + + // Add _kind attribute + graph[error_node] + .attributes + .add( + tree_sitter_graph::Identifier::from("_kind"), + tree_sitter_graph::graph::Value::String("SyntaxErrorNode".to_string()), + ) + .expect("Fresh node should not have duplicate attributes"); + + // Add _location attribute + let location = tree_sitter_graph::graph::Value::List( + vec![ + error.start_pos.row, + error.start_pos.column, + error.end_pos.row, + error.end_pos.column, + ] + .into_iter() + .map(|v| tree_sitter_graph::graph::Value::from(v as u32)) + .collect(), + ); + graph[error_node] + .attributes + .add(tree_sitter_graph::Identifier::from("_location"), location) + .expect("Fresh node should not have duplicate attributes"); + + // Add source attribute + graph[error_node] + .attributes + .add( + tree_sitter_graph::Identifier::from("source"), + tree_sitter_graph::graph::Value::String(error.source.clone()), + ) + .expect("Fresh node should not have duplicate attributes"); + } +} + fn main() -> Result<()> { let matches = Command::new("tsg-python") .version(BUILD_VERSION) @@ -581,10 +674,18 @@ fn main() -> Result<()> { ); let globals = Variables::new(); - let mut config = ExecutionConfig::new(&mut functions, &globals).lazy(false); - let graph = file - .execute(&tree, &source, &mut config, &NoCancellation) + let config = ExecutionConfig::new(&functions, &globals).lazy(false); + let mut graph = file + .execute(&tree, &source, &config, &NoCancellation) .with_context(|| format!("Could not execute TSG file {}", tsg_path))?; + + // Collect and add syntax error nodes to the graph + if tree.root_node().has_error() { + let syntax_errors: Vec = + syntax_errors_from_tree(tree.root_node(), &source).collect(); + add_syntax_error_nodes(&mut graph, &syntax_errors); + } + print!("{}", graph.pretty_print()); Ok(()) }