From 68c1a3d38991a74b536411e2a20ddb7ed7c96eac Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 5 Feb 2026 13:45:54 +0000 Subject: [PATCH] Python: Fix syntax error when `=` is used as a format fill character An example (provided by @redsun82) is the string `f"{x:=^20}"`. Parsing this (with unnamed nodes shown) illustrates the problem: ``` module [0, 0] - [2, 0] expression_statement [0, 0] - [0, 11] string [0, 0] - [0, 11] string_start [0, 0] - [0, 2] interpolation [0, 2] - [0, 10] "{" [0, 2] - [0, 3] expression: named_expression [0, 3] - [0, 9] name: identifier [0, 3] - [0, 4] ":=" [0, 4] - [0, 6] ERROR [0, 6] - [0, 7] "^" [0, 6] - [0, 7] value: integer [0, 7] - [0, 9] "}" [0, 9] - [0, 10] string_end [0, 10] - [0, 11] ``` Observe that we've managed to combine the format specifier token `:` and the fill character `=` in a single token (which doesn't match the `:` we expect in the grammar rule), and hence we get a syntax error. If we change the `=` to some other character (e.g. a `-`), we instead get ``` module [0, 0] - [2, 0] expression_statement [0, 0] - [0, 11] string [0, 0] - [0, 11] string_start [0, 0] - [0, 2] interpolation [0, 2] - [0, 10] "{" [0, 2] - [0, 3] expression: identifier [0, 3] - [0, 4] format_specifier: format_specifier [0, 4] - [0, 9] ":" [0, 4] - [0, 5] "}" [0, 9] - [0, 10] string_end [0, 10] - [0, 11] ``` and in particular no syntax error. To fix this, we want to ensure that the `:` is lexed on its own, and the `token(prec(1, ...))` construction can be used to do exactly this. Finally, you may wonder why `=` is special here. I think what's going on is that the lexer knows that `:=` is a token on its own (because it's used in the walrus operator), and so it greedily consumes the following `=` with this in mind. --- python/extractor/tsg-python/tsp/grammar.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/extractor/tsg-python/tsp/grammar.js b/python/extractor/tsg-python/tsp/grammar.js index a30ddb9f0a0..c53a67da126 100644 --- a/python/extractor/tsg-python/tsp/grammar.js +++ b/python/extractor/tsg-python/tsp/grammar.js @@ -1168,7 +1168,7 @@ module.exports = grammar({ _not_escape_sequence: $ => token.immediate('\\'), format_specifier: $ => seq( - ':', + token(prec(1,':')), repeat(choice( token(prec(1, /[^{}\n]+/)), alias($.interpolation, $.format_expression)