mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Python: Improve handling of syntax errors
Rather than relying on matching arbitrary nodes inside tree-sitter-graph and then checking whether they are of type ERROR or MISSING (which seems to have stopped working in later versions of tree-sitter), we now explicitly go through the tree-sitter tree, locating all of the error and missing nodes along the way. We then add these on to the graph output in the same format as was previously produced by tree-sitter-graph. Note that it's very likely that some of the syntax errors will move around a bit as a consequence of this change. In general, we don't expect syntax errors to have stable locations, as small changes in the grammar can cause an error to appear in a different position, even if the underlying (erroneous) code has not changed.
This commit is contained in:
@@ -6,16 +6,6 @@
|
||||
(module) @mod
|
||||
{ let @mod.node = (ast-node @mod "Module") }
|
||||
|
||||
(_) @anynode
|
||||
{
|
||||
scan (node-type @anynode) {
|
||||
"^(ERROR|MISSING)$" {
|
||||
let @anynode.node = (ast-node @anynode "SyntaxErrorNode")
|
||||
attr (@anynode.node) source = (source-text @anynode)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(parenthesized_expression) @nd
|
||||
{ let @nd.node = (ast-node @nd "Expr") }
|
||||
|
||||
|
||||
@@ -480,6 +480,99 @@ pub mod extra_functions {
|
||||
}
|
||||
}
|
||||
|
||||
struct TreeIterator<'a> {
|
||||
nodes_to_visit: Vec<tree_sitter::Node<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> TreeIterator<'a> {
|
||||
fn new(root: tree_sitter::Node<'a>) -> Self {
|
||||
Self {
|
||||
nodes_to_visit: vec![root],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for TreeIterator<'a> {
|
||||
type Item = tree_sitter::Node<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(node) = self.nodes_to_visit.pop() {
|
||||
// Add all children to the queue for processing
|
||||
self.nodes_to_visit
|
||||
.extend((0..node.child_count()).rev().filter_map(|i| node.child(i)));
|
||||
Some(node)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct SyntaxError {
|
||||
start_pos: tree_sitter::Point,
|
||||
end_pos: tree_sitter::Point,
|
||||
source: String,
|
||||
}
|
||||
|
||||
fn syntax_errors_from_tree<'a>(
|
||||
root: tree_sitter::Node<'a>,
|
||||
source: &'a str,
|
||||
) -> impl Iterator<Item = SyntaxError> + 'a {
|
||||
TreeIterator::new(root)
|
||||
.filter(|&node| node.is_error() || node.is_missing())
|
||||
.map(move |node| {
|
||||
let start_pos = node.start_position();
|
||||
let end_pos = node.end_position();
|
||||
let text = &source[node.byte_range()];
|
||||
SyntaxError {
|
||||
start_pos,
|
||||
end_pos,
|
||||
source: text.to_string(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn add_syntax_error_nodes(graph: &mut tree_sitter_graph::graph::Graph, errors: &[SyntaxError]) {
|
||||
for error in errors {
|
||||
let error_node = graph.add_graph_node();
|
||||
|
||||
// Add _kind attribute
|
||||
graph[error_node]
|
||||
.attributes
|
||||
.add(
|
||||
tree_sitter_graph::Identifier::from("_kind"),
|
||||
tree_sitter_graph::graph::Value::String("SyntaxErrorNode".to_string()),
|
||||
)
|
||||
.expect("Fresh node should not have duplicate attributes");
|
||||
|
||||
// Add _location attribute
|
||||
let location = tree_sitter_graph::graph::Value::List(
|
||||
vec![
|
||||
error.start_pos.row,
|
||||
error.start_pos.column,
|
||||
error.end_pos.row,
|
||||
error.end_pos.column,
|
||||
]
|
||||
.into_iter()
|
||||
.map(|v| tree_sitter_graph::graph::Value::from(v as u32))
|
||||
.collect(),
|
||||
);
|
||||
graph[error_node]
|
||||
.attributes
|
||||
.add(tree_sitter_graph::Identifier::from("_location"), location)
|
||||
.expect("Fresh node should not have duplicate attributes");
|
||||
|
||||
// Add source attribute
|
||||
graph[error_node]
|
||||
.attributes
|
||||
.add(
|
||||
tree_sitter_graph::Identifier::from("source"),
|
||||
tree_sitter_graph::graph::Value::String(error.source.clone()),
|
||||
)
|
||||
.expect("Fresh node should not have duplicate attributes");
|
||||
}
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let matches = Command::new("tsg-python")
|
||||
.version(BUILD_VERSION)
|
||||
@@ -581,10 +674,18 @@ fn main() -> Result<()> {
|
||||
);
|
||||
|
||||
let globals = Variables::new();
|
||||
let mut config = ExecutionConfig::new(&mut functions, &globals).lazy(false);
|
||||
let graph = file
|
||||
.execute(&tree, &source, &mut config, &NoCancellation)
|
||||
let config = ExecutionConfig::new(&functions, &globals).lazy(false);
|
||||
let mut graph = file
|
||||
.execute(&tree, &source, &config, &NoCancellation)
|
||||
.with_context(|| format!("Could not execute TSG file {}", tsg_path))?;
|
||||
|
||||
// Collect and add syntax error nodes to the graph
|
||||
if tree.root_node().has_error() {
|
||||
let syntax_errors: Vec<SyntaxError> =
|
||||
syntax_errors_from_tree(tree.root_node(), &source).collect();
|
||||
add_syntax_error_nodes(&mut graph, &syntax_errors);
|
||||
}
|
||||
|
||||
print!("{}", graph.pretty_print());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user