From 91d4cf662436a7f22cb4e16382974a9c176ca7f7 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 10 Apr 2026 16:07:25 +0000 Subject: [PATCH] Python: Update `python.tsg` First, we extend the various location overriding hacks to also accept list and dict splats in various places. Having done this, we then have to tackle how to actually desugar these new comprehension forms (as this is what we currently do for the old forms). As a reminder, a list comprehension like `[x for x in y]` currently gets desugared into a small local function, something like ```python def listcomp(a): for x in a: yield x listcomp(y) ``` For `[*x for x in y]`, the behaviour we want is that we unpack `x` before yielding its elements in turn. This is essentially what we would get if we were to use `yield from x` instead of `yield x` in the above desugaring, so that's what we do. This also works for set comprehensions. For dict comprehensions, it's slightly more complicated. Here, the generator function instead yields a stream of `(key, value)` tuples. (And apparently the old parser got this wrong and emitted `(value, key)` pairs instead, which we faithfully recreated in the new parser as well. We fix that bug in both parsers while we're at it). So, a bare `yield from` is not enough, we also need a `.items()` call to get the double-starred expression to emit its items as a stream of tuples (that we then `yield from`. To make this (hopefully) less verbose in the implementation, we defer the decision of whether to use `yield` or `yield from` by introducing a `yield_kind` scoped variable that determines the type of the actual AST node. And of course for dict comprehensions with unpacking we need to synthesise the extra machinery mentioned above. On the plus side, this means we don't have to mess with control-flow, as the existing machinery should be able to handle the desugared syntax just fine. --- python/extractor/tsg-python/python.tsg | 78 ++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 12 deletions(-) diff --git a/python/extractor/tsg-python/python.tsg b/python/extractor/tsg-python/python.tsg index 93d6e95a344..e0aadd5432c 100644 --- a/python/extractor/tsg-python/python.tsg +++ b/python/extractor/tsg-python/python.tsg @@ -403,7 +403,7 @@ ;;; GeneratorExp -(generator_expression . "(" . (comment)* . (expression) @start [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @generatorexp +(generator_expression . "(" . (comment)* . [(expression) (list_splat)] @start [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @generatorexp { attr (@generatorexp.node) _location_start = (location-start @start) attr (@generatorexp.node) _location_end = (location-end @end) @@ -415,13 +415,13 @@ attr (@if.node) _location_end = (location-end @expr) } -(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @_genexpr +(generator_expression . "(" . (comment)* . [(expression) (list_splat)] @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @_genexpr { attr (@child.node) _location_start = (location-start @start) attr (@child.node) _location_end = (location-end @end) } -(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @end . (comment)* . ")" .) @_genexpr +(generator_expression . "(" . (comment)* . [(expression) (list_splat)] @start (for_in_clause) @end . (comment)* . ")" .) @_genexpr { attr (@end.node) _location_start = (location-start @start) attr (@end.node) _location_end = (location-end @end) @@ -824,6 +824,29 @@ attr (@genexpr.arg_use) ctx = "load" } +; DictComp with unpacking (PEP 798): `{**d for d in dicts}` +(dictionary_comprehension + body: (dictionary_splat) +) @genexpr +{ + let @genexpr.fun = (ast-node @genexpr "Function") + attr (@genexpr.node) function = @genexpr.fun + attr (@genexpr.fun) name = "dictcomp" + + let @genexpr.arg = (ast-node @genexpr "Name") + attr (@genexpr.arg) variable = ".0" + attr (@genexpr.arg) ctx = "param" + + edge @genexpr.fun -> @genexpr.arg + attr (@genexpr.fun -> @genexpr.arg) args = 0 + attr (@genexpr.fun) kwonlyargs = #null + attr (@genexpr.fun) kwarg = #null + + let @genexpr.arg_use = (ast-node @genexpr "Name") + attr (@genexpr.arg_use) variable = ".0" + attr (@genexpr.arg_use) ctx = "load" +} + ;;;;;; End of DictComp (`{a: b for c in d if e}`) ;;;;;; GeneratorExp (`(a for b in c if d)`) @@ -862,7 +885,7 @@ ; information for the entire generator expression (yes, it is a wide parameter!) and so we must recreate the logic for ; setting this location information correctly. -(generator_expression . "(" . (comment)* . (expression) @start [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @genexpr +(generator_expression . "(" . (comment)* . [(expression) (list_splat)] @start [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @genexpr { ; Synthesize the `genexpr` function let @genexpr.fun = (ast-node @genexpr "Function") @@ -1034,12 +1057,25 @@ ; For everything except dictionary comprehensions, the innermost expression is just the `body` of the ; comprehension. [ - (generator_expression body: (_) @body) @genexpr - (list_comprehension body: (_) @body) @genexpr - (set_comprehension body: (_) @body) @genexpr + (generator_expression body: (expression) @body) @genexpr + (list_comprehension body: (expression) @body) @genexpr + (set_comprehension body: (expression) @body) @genexpr ] { let @genexpr.result = @body.node + let @genexpr.yield_kind = "Yield" +} + +; For starred comprehensions (PEP 798), the result is the inner expression (not the Starred +; wrapper), and we use `yield from` instead of `yield` to represent the unpacking semantics. +[ + (generator_expression body: (list_splat (expression) @inner) @_body) @genexpr + (list_comprehension body: (list_splat (expression) @inner) @_body) @genexpr + (set_comprehension body: (list_splat (expression) @inner) @_body) @genexpr +] +{ + let @genexpr.result = @inner.node + let @genexpr.yield_kind = "YieldFrom" } ; For dict comprehensions, we build an explicit tuple using the key and value pair. @@ -1052,13 +1088,31 @@ { let tuple = (ast-node @body "Tuple") edge tuple -> @key.node - attr (tuple -> @key.node) elts = 1 + attr (tuple -> @key.node) elts = 0 edge tuple -> @value.node - attr (tuple -> @value.node) elts = 0 - ; TODO verify that it is correct to use a `(value, key)` tuple, and not a `(key, value)` tuple above. - ; That is what the current parser does... + attr (tuple -> @value.node) elts = 1 attr (tuple) ctx = "load" let @genexpr.result = tuple + let @genexpr.yield_kind = "Yield" +} + +; For dict comprehensions with unpacking (PEP 798), `{**d for d in dicts}` desugars to +; `yield from d.items()` to produce (key, value) tuples consistent with the regular dict comp model. +(dictionary_comprehension + body: (dictionary_splat (expression) @inner) @_body +) @genexpr +{ + ; Synthesize `d.items()`: Attribute(value=d, attr='items') then Call(func=attr) + let attr = (ast-node @inner "Attribute") + attr (attr) value = @inner.node + attr (attr) attr = "items" + attr (attr) ctx = "load" + + let call = (ast-node @inner "Call") + attr (call) func = attr + + let @genexpr.result = call + let @genexpr.yield_kind = "YieldFrom" } ; For the final clause, we need to hook it up with the rest of the expression. @@ -1094,7 +1148,7 @@ let last = (get-last-element @last_candidates) let expr = (ast-node @body "Expr") - let yield = (ast-node @body "Yield") + let yield = (ast-node @body @genexpr.yield_kind) let @genexpr.expr = expr let @genexpr.yield = yield