Merge pull request #19929 from github/tausbn/python-update-tree-sitter-dependency

Python: Update `tree-sitter` dependency
This commit is contained in:
Taus
2025-09-17 13:40:13 +02:00
committed by GitHub
17 changed files with 213 additions and 384 deletions

View File

@@ -10,7 +10,7 @@ from io import BytesIO
#Semantic version of extractor.
#Update this if any changes are made
VERSION = "7.1.3"
VERSION = "7.1.4"
PY_EXTENSIONS = ".py", ".pyw"

View File

@@ -308,47 +308,6 @@ Module: [1, 0] - [23, 0]
]
ctx: Load
ctx: Load
TypeAlias: [20, 0] - [20, 41]
name:
Name: [20, 5] - [20, 9]
variable: Variable('Baz2', None)
ctx: Store
type_parameters: [
ParamSpec: [20, 10] - [20, 27]
name:
Name: [20, 12] - [20, 14]
variable: Variable('P2', None)
ctx: Store
default:
List: [20, 17] - [20, 27]
elts: [
Name: [20, 18] - [20, 21]
variable: Variable('int', None)
ctx: Load
Name: [20, 23] - [20, 26]
variable: Variable('str', None)
ctx: Load
]
ctx: Load
]
value:
Subscript: [20, 31] - [20, 41]
value:
Name: [20, 31] - [20, 35]
variable: Variable('Spam', None)
ctx: Load
index:
BinOp: [20, 36] - [20, 40]
left:
Name: [20, 36] - [20, 36]
variable: Variable('', None)
ctx: Load
op: Pow
right:
Name: [20, 38] - [20, 40]
variable: Variable('P2', None)
ctx: Load
ctx: Load
TypeAlias: [21, 0] - [21, 41]
name:
Name: [21, 5] - [21, 9]

View File

@@ -17,6 +17,6 @@ class Qux1[*Ts1 = *tuple[int, bool]]: ...
# TypeAliases
type Foo2[T15, U1 = str] = Bar1[T15, U1]
type Baz2[**P2 = [int, str]] = Spam[**P2]
# type Baz2[**P2 = [int, str]] = Spam[**P2] # From the PEP, but this is not actually valid syntax!
type Qux2[*Ts2 = *tuple[str]] = Ham[*Ts2]
type Rab[U2, T15 = str] = Bar2[T15, U2]

View File

@@ -1,12 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0453232ace82dee0dd0b4c87a59bd90f7b53b314f3e0f61fe2ee7c8a16482289"
version = 4
[[package]]
name = "aho-corasick"
@@ -82,12 +76,6 @@ dependencies = [
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "4.5.30"
@@ -121,15 +109,6 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "hashbrown"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04"
dependencies = [
"ahash",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
@@ -258,14 +237,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd"
[[package]]
name = "string-interner"
version = "0.12.2"
name = "streaming-iterator"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "383196d1876517ee6f9f0864d1fc1070331b803335d3c6daaa04bbcccd823c08"
dependencies = [
"cfg-if",
"hashbrown",
]
checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520"
[[package]]
name = "strsim"
@@ -306,30 +281,39 @@ dependencies = [
[[package]]
name = "tree-sitter"
version = "0.20.4"
version = "0.24.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e34327f8eac545e3f037382471b2b19367725a242bba7bc45edb9efb49fe39a"
checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75"
dependencies = [
"cc",
"regex",
"regex-syntax",
"streaming-iterator",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-graph"
version = "0.7.0"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "639d21e886f581d293de5f5081f09af003c54607ff3fa85efa159b243ba1f97a"
checksum = "63f86eb73c7d891c4b9b6fe4d4e63dd94c506e4788af7c2296afdcfbeea626cc"
dependencies = [
"log",
"regex",
"serde",
"serde_json",
"smallvec",
"string-interner",
"streaming-iterator",
"thiserror",
"tree-sitter",
]
[[package]]
name = "tree-sitter-language"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8"
[[package]]
name = "tsg-python"
version = "0.1.0"

View File

@@ -10,7 +10,7 @@ edition = "2024"
[dependencies]
anyhow = "1.0"
regex = "1"
tree-sitter = "=0.20.4"
tree-sitter-graph = "0.7.0"
tree-sitter = "=0.24.7"
tree-sitter-graph = "0.12.0"
tsp = {path = "tsp"}
clap = "4.5"

View File

@@ -6,16 +6,6 @@
(module) @mod
{ let @mod.node = (ast-node @mod "Module") }
(_) @anynode
{
scan (node-type @anynode) {
"^(ERROR|MISSING)$" {
let @anynode.node = (ast-node @anynode "SyntaxErrorNode")
attr (@anynode.node) source = (source-text @anynode)
}
}
}
(parenthesized_expression) @nd
{ let @nd.node = (ast-node @nd "Expr") }
@@ -416,13 +406,13 @@
attr (@if.node) _location_end = (location-end @expr)
}
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @genexpr
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @_genexpr
{
attr (@child.node) _location_start = (location-start @start)
attr (@child.node) _location_end = (location-end @end)
}
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @end . (comment)* . ")" .) @genexpr
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @end . (comment)* . ")" .) @_genexpr
{
attr (@end.node) _location_start = (location-start @start)
attr (@end.node) _location_end = (location-end @end)
@@ -524,7 +514,7 @@
attr (@del.node -> @target.node) targets = (named-child-index @target)
}
(delete_statement target: (_) @target) @del
(delete_statement target: (_) @target) @_del
{
attr (@target.node) ctx = "del"
}
@@ -798,8 +788,8 @@
(dictionary_comprehension
body: (pair
key: (_) @key
value: (_) @value
key: (_) @_key
value: (_) @_value
)
) @genexpr
{
@@ -1299,7 +1289,7 @@
; the index of the left-hand side of the current assignment.
; Base case, for the outermost assignment we set the outermost node to this node, and the index to zero.
(expression_statement (assignment !type) @assign) @expr
(expression_statement (assignment !type) @assign) @_expr
{
let @assign.outermost_assignment = @assign.node
let @assign.target_index = 0
@@ -1358,7 +1348,7 @@
}
(assignment
left: (_) @target
left: (_) @_target
type: (_)
right: (_) @value
) @assign
@@ -2330,7 +2320,7 @@
attr (@operand.node) ctx = "load"
}
(unary_operator "~" @op) @unaryop
(unary_operator "~" @_op) @unaryop
{
attr (@unaryop.node) op = "~"
}
@@ -2614,7 +2604,7 @@
; Async status
; NOTE: We only set the `is_async` field on the _first_ clause of the `with` statement,
; as this is the behaviour of the old parser.
(with_statement "async" "with" @with_keyword (with_clause . (with_item) @with))
(with_statement "async" "with" @_with_keyword (with_clause . (with_item) @with))
{
attr (@with.node) is_async = #true
}
@@ -2800,7 +2790,7 @@
(identifier) @obj
.
(identifier) @attr
) @match_value_pattern
) @_match_value_pattern
{
let attribute = (ast-node @attr "Attribute")
attr (@attr.node) _skip_to = attribute
@@ -2814,7 +2804,7 @@
(match_value_pattern
.
(identifier) @id
) @match_value_pattern
) @_match_value_pattern
{
attr (@id.node) ctx = "load"
}
@@ -3267,8 +3257,8 @@
(decorated_definition
(decorator (expression) @exp1) @dec1
. (comment)* .
(decorator (expression) @exp2) @dec2
) @decorator
(decorator (expression) @_exp2) @dec2
) @_decorator
{
attr (@dec1.node) func = @exp1.node
edge @dec1.node -> @dec2.node
@@ -3279,7 +3269,7 @@
(decorator (expression) @exp) @last
. (comment)* .
definition: (function_definition) @funcdef
) @decorator
) @_decorator
{
attr (@last.node) func = @exp.node
edge @last.node -> @funcdef.funcexpr
@@ -3291,7 +3281,7 @@
(decorator (expression) @exp) @last
. (comment)* .
definition: (class_definition) @class
) @decorator
) @_decorator
{
attr (@last.node) func = @exp.node
edge @last.node -> @class.class_expr

View File

@@ -480,6 +480,102 @@ pub mod extra_functions {
}
}
struct TreeIterator<'a> {
nodes_to_visit: Vec<tree_sitter::Node<'a>>,
}
impl<'a> TreeIterator<'a> {
fn new(root: tree_sitter::Node<'a>) -> Self {
Self {
nodes_to_visit: vec![root],
}
}
}
impl<'a> Iterator for TreeIterator<'a> {
type Item = tree_sitter::Node<'a>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(node) = self.nodes_to_visit.pop() {
// Add all children to the queue for processing
let children: Vec<_> = (0..node.child_count())
.rev()
.filter_map(|i| node.child(i))
.collect();
self.nodes_to_visit.extend(children);
Some(node)
} else {
None
}
}
}
#[derive(Debug, Clone)]
struct SyntaxError {
start_pos: tree_sitter::Point,
end_pos: tree_sitter::Point,
source: String,
}
fn syntax_errors_from_tree<'a>(
root: tree_sitter::Node<'a>,
source: &'a str,
) -> impl Iterator<Item = SyntaxError> + 'a {
TreeIterator::new(root)
.filter(|&node| node.is_error() || node.is_missing())
.map(move |node| {
let start_pos = node.start_position();
let end_pos = node.end_position();
let text = &source.get(node.byte_range()).unwrap_or("");
SyntaxError {
start_pos,
end_pos,
source: text.to_string(),
}
})
}
fn add_syntax_error_nodes(graph: &mut tree_sitter_graph::graph::Graph, errors: &[SyntaxError]) {
for error in errors {
let error_node = graph.add_graph_node();
// Add _kind attribute
graph[error_node]
.attributes
.add(
tree_sitter_graph::Identifier::from("_kind"),
tree_sitter_graph::graph::Value::String("SyntaxErrorNode".to_string()),
)
.expect("Fresh node should not have duplicate attributes");
// Add _location attribute
let location = tree_sitter_graph::graph::Value::List(
vec![
error.start_pos.row,
error.start_pos.column,
error.end_pos.row,
error.end_pos.column,
]
.into_iter()
.map(|v| tree_sitter_graph::graph::Value::from(v as u32))
.collect(),
);
graph[error_node]
.attributes
.add(tree_sitter_graph::Identifier::from("_location"), location)
.expect("Fresh node should not have duplicate attributes");
// Add source attribute
graph[error_node]
.attributes
.add(
tree_sitter_graph::Identifier::from("source"),
tree_sitter_graph::graph::Value::String(error.source.clone()),
)
.expect("Fresh node should not have duplicate attributes");
}
}
fn main() -> Result<()> {
let matches = Command::new("tsg-python")
.version(BUILD_VERSION)
@@ -502,7 +598,7 @@ fn main() -> Result<()> {
let source_path = Path::new(matches.get_one::<String>("source").unwrap());
let language = tsp::language();
let mut parser = Parser::new();
parser.set_language(language)?;
parser.set_language(&language)?;
// Statically include `python.tsg`:
let tsg = if matches.contains_id("tsg") {
std::fs::read(&tsg_path).with_context(|| format!("Error reading TSG file {}", tsg_path))?
@@ -581,10 +677,18 @@ fn main() -> Result<()> {
);
let globals = Variables::new();
let mut config = ExecutionConfig::new(&mut functions, &globals).lazy(false);
let graph = file
.execute(&tree, &source, &mut config, &NoCancellation)
let config = ExecutionConfig::new(&functions, &globals).lazy(false);
let mut graph = file
.execute(&tree, &source, &config, &NoCancellation)
.with_context(|| format!("Could not execute TSG file {}", tsg_path))?;
// Collect and add syntax error nodes to the graph
if tree.root_node().has_error() {
let syntax_errors: Vec<SyntaxError> =
syntax_errors_from_tree(tree.root_node(), &source).collect();
add_syntax_error_nodes(&mut graph, &syntax_errors);
}
print!("{}", graph.pretty_print());
Ok(())
}

View File

@@ -26,7 +26,7 @@ path = "bindings/rust/lib.rs"
## When updating these dependencies, run `misc/bazel/3rdparty/update_cargo_deps.sh`
[dependencies]
tree-sitter = ">= 0.20, < 0.21"
tree-sitter = "=0.24.7"
[build-dependencies]
cc = "1.2"