From c5be2a3e2db370066604f64af50bf03e78baae3c Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 6 Feb 2025 14:04:57 +0000 Subject: [PATCH] Python: Allow comments in subscripts Once again, the interaction between anchors and extras (specifically comments) was causing trouble. The root of the problem was the fact that in `a[b]`, we put `b` in the `index` field of the subscript node, whereas in `a[b,c]`, we additionally synthesize a `Tuple` node for `b,c` (which matches the Python AST). To fix this, we refactored the grammar slightly so as to make that tuple explicit, such that a subscript node either contains a single expression or the newly added tuple node. This greatly simplifies the logic. --- python/extractor/tests/parser/subscripts.py | 21 ++++++++ python/extractor/tsg-python/python.tsg | 60 +++------------------ python/extractor/tsg-python/tsp/grammar.js | 11 +++- 3 files changed, 37 insertions(+), 55 deletions(-) create mode 100644 python/extractor/tests/parser/subscripts.py diff --git a/python/extractor/tests/parser/subscripts.py b/python/extractor/tests/parser/subscripts.py new file mode 100644 index 00000000000..02a11c2ad53 --- /dev/null +++ b/python/extractor/tests/parser/subscripts.py @@ -0,0 +1,21 @@ +a[b] + +c[d,e] + +c1[d1,] + +# And now with many comments + +e[ + # comment1 + f + # comment2 +] + +g[ + # comment3 + h, + # comment4 + i + # comment5 +] diff --git a/python/extractor/tsg-python/python.tsg b/python/extractor/tsg-python/python.tsg index 097eb47a6e0..7ad0f3f1424 100644 --- a/python/extractor/tsg-python/python.tsg +++ b/python/extractor/tsg-python/python.tsg @@ -22,7 +22,7 @@ (assignment !type) @assign { let @assign.node = (ast-node @assign "Assign") } -[ (expression_list) (tuple) (tuple_pattern) (pattern_list) ] @tuple +[ (expression_list) (tuple) (tuple_pattern) (pattern_list) (index_expression_list) ] @tuple { let @tuple.node = (ast-node @tuple "Tuple") } (list_pattern) @list @@ -2543,66 +2543,16 @@ (subscript value: (_) @value + subscript: (_) @index ) @subscript { attr (@subscript.node) value = @value.node attr (@value.node) ctx = "load" -} -; Single subscript -(subscript - value: (_) - . - subscript: (_) @index - . -) @subscript -{ attr (@subscript.node) index = @index.node attr (@index.node) ctx = "load" } -; For expressions of the form `a[b, c]` we must explicitly synthesize an internal tuple node -; We do this and also hook it up: -(subscript - value: (_) - . - subscript: (_) @first - . - subscript: (_) -) @subscript -{ - let @subscript.tuple = (ast-node @first "Tuple") - attr (@subscript.tuple) ctx = "load" - attr (@subscript.node) index = @subscript.tuple - edge @subscript.tuple -> @first.node - attr (@subscript.tuple -> @first.node) elts = (named-child-index @first) - attr (@first.node) ctx = "load" -} - -(subscript - value: (_) - . - subscript: (_) - subscript: (_) @elt -) @subscript -{ - edge @subscript.tuple -> @elt.node - attr (@subscript.tuple -> @elt.node) elts = (named-child-index @elt) - attr (@elt.node) ctx = "load" -} - - -; Set the end position correctly -(subscript - value: (_) - . - subscript: (_) - subscript: (_) @last - . -) @subscript -{ - attr (@subscript.tuple) _location_end = (location-end @last) -} @@ -3448,9 +3398,12 @@ ; Left hand side of an assignment such as `[foo, bar] = ...` (list_pattern element: (_) @elt) @parent - ; An unadorned tuple (such as in `x = y, z`) + ; An unadorned tuple such as in `x = y, z` (expression_list element: (_) @elt) @parent + ; An index containing multiple indices such as in `x[y, z]` + (index_expression_list element: (_) @elt) @parent + ; A regular tuple such as `(x, y, z)` (tuple element: (_) @elt) @parent @@ -3486,6 +3439,7 @@ (pattern_list element: (_) @elt) (list_pattern element: (_) @elt) (expression_list element: (_) @elt) + (index_expression_list element: (_) @elt) (parenthesized_expression inner: (_) @elt) (set element: (_) @elt) (match_sequence_pattern (_) @elt) diff --git a/python/extractor/tsg-python/tsp/grammar.js b/python/extractor/tsg-python/tsp/grammar.js index b41906d9d7b..b0eaaba2a3f 100644 --- a/python/extractor/tsg-python/tsp/grammar.js +++ b/python/extractor/tsg-python/tsp/grammar.js @@ -929,11 +929,18 @@ module.exports = grammar({ field('attribute', $.identifier) )), + _index_expression: $ => choice( + $.list_splat, + $.expression, + $.slice + ), + + index_expression_list: $ => open_sequence(field('element', $._index_expression)), + subscript: $ => prec(PREC.call, seq( field('value', $.primary_expression), '[', - commaSep1(field('subscript', choice($.list_splat, $.expression, $.slice))), - optional(','), + field('subscript', choice($._index_expression, $.index_expression_list)), ']' )),