Python: Update tree-sitter dependency

Updates the Python extractor to depend on version 0.24.7 of tree-sitter
(and 0.12.0 of tree-sitter-graph).

A few changes were needed in order to make the code build and run after
updating the dependencies:

- In `main.rs`, the `Language` parameter is now passed as a reference.
- In `python.tsg`, many queries had captures that were not actually used
in the body of the stanza. This is no longer allowed (unless the
captures start with an underscore), as it may indicate an error. To fix
this, I added underscores in the appropriate places (and verified that
none of these unused captures were in fact bugs).
This commit is contained in:
Taus
2025-06-30 14:23:53 +00:00
parent 31852985e5
commit 76f15a890c
5 changed files with 37 additions and 53 deletions

View File

@@ -1,12 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0453232ace82dee0dd0b4c87a59bd90f7b53b314f3e0f61fe2ee7c8a16482289"
version = 4
[[package]]
name = "aho-corasick"
@@ -82,12 +76,6 @@ dependencies = [
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "4.5.30"
@@ -121,15 +109,6 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "hashbrown"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04"
dependencies = [
"ahash",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
@@ -258,14 +237,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd"
[[package]]
name = "string-interner"
version = "0.12.2"
name = "streaming-iterator"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "383196d1876517ee6f9f0864d1fc1070331b803335d3c6daaa04bbcccd823c08"
dependencies = [
"cfg-if",
"hashbrown",
]
checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520"
[[package]]
name = "strsim"
@@ -306,30 +281,39 @@ dependencies = [
[[package]]
name = "tree-sitter"
version = "0.20.4"
version = "0.24.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e34327f8eac545e3f037382471b2b19367725a242bba7bc45edb9efb49fe39a"
checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75"
dependencies = [
"cc",
"regex",
"regex-syntax",
"streaming-iterator",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-graph"
version = "0.7.0"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "639d21e886f581d293de5f5081f09af003c54607ff3fa85efa159b243ba1f97a"
checksum = "63f86eb73c7d891c4b9b6fe4d4e63dd94c506e4788af7c2296afdcfbeea626cc"
dependencies = [
"log",
"regex",
"serde",
"serde_json",
"smallvec",
"string-interner",
"streaming-iterator",
"thiserror",
"tree-sitter",
]
[[package]]
name = "tree-sitter-language"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8"
[[package]]
name = "tsg-python"
version = "0.1.0"

View File

@@ -10,7 +10,7 @@ edition = "2024"
[dependencies]
anyhow = "1.0"
regex = "1"
tree-sitter = "=0.20.4"
tree-sitter-graph = "0.7.0"
tree-sitter = "=0.24.7"
tree-sitter-graph = "0.12.0"
tsp = {path = "tsp"}
clap = "4.5"

View File

@@ -416,13 +416,13 @@
attr (@if.node) _location_end = (location-end @expr)
}
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @genexpr
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @_genexpr
{
attr (@child.node) _location_start = (location-start @start)
attr (@child.node) _location_end = (location-end @end)
}
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @end . (comment)* . ")" .) @genexpr
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @end . (comment)* . ")" .) @_genexpr
{
attr (@end.node) _location_start = (location-start @start)
attr (@end.node) _location_end = (location-end @end)
@@ -524,7 +524,7 @@
attr (@del.node -> @target.node) targets = (named-child-index @target)
}
(delete_statement target: (_) @target) @del
(delete_statement target: (_) @target) @_del
{
attr (@target.node) ctx = "del"
}
@@ -798,8 +798,8 @@
(dictionary_comprehension
body: (pair
key: (_) @key
value: (_) @value
key: (_) @_key
value: (_) @_value
)
) @genexpr
{
@@ -1299,7 +1299,7 @@
; the index of the left-hand side of the current assignment.
; Base case, for the outermost assignment we set the outermost node to this node, and the index to zero.
(expression_statement (assignment !type) @assign) @expr
(expression_statement (assignment !type) @assign) @_expr
{
let @assign.outermost_assignment = @assign.node
let @assign.target_index = 0
@@ -1358,7 +1358,7 @@
}
(assignment
left: (_) @target
left: (_) @_target
type: (_)
right: (_) @value
) @assign
@@ -2330,7 +2330,7 @@
attr (@operand.node) ctx = "load"
}
(unary_operator "~" @op) @unaryop
(unary_operator "~" @_op) @unaryop
{
attr (@unaryop.node) op = "~"
}
@@ -2614,7 +2614,7 @@
; Async status
; NOTE: We only set the `is_async` field on the _first_ clause of the `with` statement,
; as this is the behaviour of the old parser.
(with_statement "async" "with" @with_keyword (with_clause . (with_item) @with))
(with_statement "async" "with" @_with_keyword (with_clause . (with_item) @with))
{
attr (@with.node) is_async = #true
}
@@ -2800,7 +2800,7 @@
(identifier) @obj
.
(identifier) @attr
) @match_value_pattern
) @_match_value_pattern
{
let attribute = (ast-node @attr "Attribute")
attr (@attr.node) _skip_to = attribute
@@ -2814,7 +2814,7 @@
(match_value_pattern
.
(identifier) @id
) @match_value_pattern
) @_match_value_pattern
{
attr (@id.node) ctx = "load"
}
@@ -3267,8 +3267,8 @@
(decorated_definition
(decorator (expression) @exp1) @dec1
. (comment)* .
(decorator (expression) @exp2) @dec2
) @decorator
(decorator (expression) @_exp2) @dec2
) @_decorator
{
attr (@dec1.node) func = @exp1.node
edge @dec1.node -> @dec2.node
@@ -3279,7 +3279,7 @@
(decorator (expression) @exp) @last
. (comment)* .
definition: (function_definition) @funcdef
) @decorator
) @_decorator
{
attr (@last.node) func = @exp.node
edge @last.node -> @funcdef.funcexpr
@@ -3291,7 +3291,7 @@
(decorator (expression) @exp) @last
. (comment)* .
definition: (class_definition) @class
) @decorator
) @_decorator
{
attr (@last.node) func = @exp.node
edge @last.node -> @class.class_expr

View File

@@ -502,7 +502,7 @@ fn main() -> Result<()> {
let source_path = Path::new(matches.get_one::<String>("source").unwrap());
let language = tsp::language();
let mut parser = Parser::new();
parser.set_language(language)?;
parser.set_language(&language)?;
// Statically include `python.tsg`:
let tsg = if matches.contains_id("tsg") {
std::fs::read(&tsg_path).with_context(|| format!("Error reading TSG file {}", tsg_path))?

View File

@@ -26,7 +26,7 @@ path = "bindings/rust/lib.rs"
## When updating these dependencies, run `misc/bazel/3rdparty/update_cargo_deps.sh`
[dependencies]
tree-sitter = ">= 0.20, < 0.21"
tree-sitter = "=0.24.7"
[build-dependencies]
cc = "1.2"