Python: Support template strings in rest of extractor

Adds three new AST nodes to the mix:

- `TemplateString` represents a t-string in Python 3.14
- `TemplateStringPart` represents one of the string constituents of a
t-string. (The interpolated expressions are represented as `Expr` nodes,
just like f-strings.)
- `JoinedTemplateString` represents an implicit concatenation of
template strings.

Importantly, we _completely avoid_ the complicated construction we
currently do for format strings (as well as the confusing nomenclature).
No extra injection of empty strings (so that a template string is a
strict alternation of strings and expressions). A `JoinedTemplateString`
simply has a list of template string children, and a `TemplateString`
has a list of "values" which may be either `Expr` or
`TemplateStringPart` nodes.

If we ever find that we actually want the more complicated interface for
these strings, then I would much rather we reconstruct this inside of QL
rather than in the parser.
This commit is contained in:
Taus
2025-10-28 22:05:53 +00:00
parent 5928d0ff12
commit 48cd54165a
7 changed files with 323 additions and 4 deletions

View File

@@ -56,6 +56,15 @@ class StringPart(AstBase):
self.text = text
self.s = s
class TemplateStringPart(AstBase):
'''A string constituent of a template string literal'''
__slots__ = "text", "s",
def __init__(self, text, s):
self.text = text
self.s = s
class alias(AstBase):
__slots__ = "value", "asname",
@@ -356,6 +365,19 @@ class JoinedStr(expr):
def __init__(self, values):
self.values = values
class TemplateString(expr):
__slots__ = "prefix", "values",
def __init__(self, prefix, values):
self.prefix = prefix
self.values = values
class JoinedTemplateString(expr):
__slots__ = "strings",
def __init__(self, strings):
self.strings = strings
class Lambda(expr):
__slots__ = "args", "inner_scope",

View File

@@ -186,12 +186,20 @@ FormattedStringLiteral.set_name("Fstring")
FormattedValue = ClassNode("FormattedValue", expr, descriptive_name='formatted value')
AnnAssign = ClassNode("AnnAssign", stmt, descriptive_name='annotated assignment')
AssignExpr = ClassNode('AssignExpr', expr, "assignment expression")
SpecialOperation = ClassNode('SpecialOperation', expr, "special operation")
TemplateString = ClassNode('TemplateString', expr, 'template string literal')
template_string_list = ListNode(TemplateString)
JoinedTemplateString = ClassNode("JoinedTemplateString", expr, descriptive_name='joined template string')
TemplateStringPart = ClassNode('TemplateStringPart', expr, "string part of a template string")
type_parameter = ClassNode('type_parameter', descriptive_name='type parameter')
type_parameter.field('location', location)
type_parameter_list = ListNode(type_parameter)
@@ -435,6 +443,9 @@ Subscript.field('value', expr)
Subscript.field('index', expr)
Subscript.field('ctx', expr_context, 'context')
TemplateString.field('prefix', string, 'prefix')
TemplateString.field('values', expr_list, 'values')
Try.field('body', stmt_list)
Try.field('orelse', stmt_list, 'else block')
Try.field('handlers', stmt_list, 'exception handlers')
@@ -484,10 +495,15 @@ PlaceHolder.field('ctx', expr_context, 'context')
StringPart.field('text', string)
StringPart.field('location', location)
TemplateStringPart.field('text', string)
Await.field('value', expr, 'expression waited upon')
FormattedStringLiteral.field('values', expr_list)
JoinedTemplateString.field('strings', template_string_list)
FormattedValue.field('value', expr, "expression to be formatted")
FormattedValue.field('conversion', string, 'type conversion')
FormattedValue.field('format_spec', FormattedStringLiteral, 'format specifier')

View File

@@ -273,6 +273,8 @@ list_fields = {
ast.Print: ("values",),
ast.Set: ("elts",),
ast.Str: ("implicitly_concatenated_parts",),
ast.TemplateString: ("values",),
ast.JoinedTemplateString: ("strings",),
ast.TypeAlias: ("type_parameters",),
ast.Try: ("body", "handlers", "orelse", "finalbody"),
ast.Tuple: ("elts",),

View File

@@ -0,0 +1,194 @@
Module: [1, 0] - [18, 0]
body: [
Assign: [1, 0] - [1, 14]
targets: [
Name: [1, 0] - [1, 4]
variable: Variable('name', None)
ctx: Store
]
value:
Str: [1, 7] - [1, 14]
s: 'World'
prefix: '"'
implicitly_concatenated_parts: None
Assign: [2, 0] - [2, 15]
targets: [
Name: [2, 0] - [2, 5]
variable: Variable('value', None)
ctx: Store
]
value:
Num: [2, 8] - [2, 15]
n: 42.5678
text: '42.5678'
Assign: [3, 0] - [3, 15]
targets: [
Name: [3, 0] - [3, 5]
variable: Variable('first', None)
ctx: Store
]
value:
Str: [3, 8] - [3, 15]
s: 'first'
prefix: '"'
implicitly_concatenated_parts: None
Assign: [4, 0] - [4, 17]
targets: [
Name: [4, 0] - [4, 6]
variable: Variable('second', None)
ctx: Store
]
value:
Str: [4, 9] - [4, 17]
s: 'second'
prefix: '"'
implicitly_concatenated_parts: None
If: [6, 0] - [6, 5]
test:
Num: [6, 3] - [6, 4]
n: 1
text: '1'
body: [
Expr: [7, 4] - [7, 7]
value:
TemplateString: [7, 4] - [7, 7]
prefix: 't"'
values: []
]
orelse: None
If: [8, 0] - [8, 5]
test:
Num: [8, 3] - [8, 4]
n: 2
text: '2'
body: [
Expr: [9, 4] - [9, 21]
value:
TemplateString: [9, 4] - [9, 21]
prefix: 't"'
values: [
TemplateStringPart: [9, 6] - [9, 13]
text: '"Hello, "'
s: 'Hello, '
Name: [9, 14] - [9, 18]
variable: Variable('name', None)
ctx: Load
TemplateStringPart: [9, 19] - [9, 20]
text: '"!"'
s: '!'
]
]
orelse: None
If: [10, 0] - [10, 5]
test:
Num: [10, 3] - [10, 4]
n: 3
text: '3'
body: [
Expr: [11, 4] - [11, 42]
value:
TemplateString: [11, 4] - [11, 42]
prefix: 't"'
values: [
TemplateStringPart: [11, 6] - [11, 13]
text: '"Value: "'
s: 'Value: '
Name: [11, 14] - [11, 19]
variable: Variable('value', None)
ctx: Load
TemplateStringPart: [11, 24] - [11, 31]
text: '", Hex: "'
s: ', Hex: '
Name: [11, 32] - [11, 37]
variable: Variable('value', None)
ctx: Load
]
]
orelse: None
If: [12, 0] - [12, 5]
test:
Num: [12, 3] - [12, 4]
n: 4
text: '4'
body: [
Expr: [13, 4] - [13, 29]
value:
TemplateString: [13, 4] - [13, 29]
prefix: 't"'
values: [
TemplateStringPart: [13, 6] - [13, 28]
text: '"Just a regular string."'
s: 'Just a regular string.'
]
]
orelse: None
If: [14, 0] - [14, 5]
test:
Num: [14, 3] - [14, 4]
n: 5
text: '5'
body: [
Expr: [15, 4] - [15, 50]
value:
TemplateString: [15, 4] - [15, 50]
prefix: 't"'
values: [
TemplateStringPart: [15, 6] - [15, 15]
text: '"Multiple "'
s: 'Multiple '
Name: [15, 16] - [15, 21]
variable: Variable('first', None)
ctx: Load
TemplateStringPart: [15, 22] - [15, 27]
text: '" and "'
s: ' and '
Name: [15, 28] - [15, 34]
variable: Variable('second', None)
ctx: Load
TemplateStringPart: [15, 35] - [15, 49]
text: '" placeholders."'
s: ' placeholders.'
]
]
orelse: None
If: [16, 0] - [16, 5]
test:
Num: [16, 3] - [16, 4]
n: 6
text: '6'
body: [
Expr: [17, 4] - [17, 66]
value:
JoinedTemplateString: [17, 4] - [17, 66]
strings: [
TemplateString: [17, 4] - [17, 31]
prefix: 't"'
values: [
TemplateStringPart: [17, 6] - [17, 30]
text: '"Implicit concatenation: "'
s: 'Implicit concatenation: '
]
TemplateString: [17, 32] - [17, 49]
prefix: 't"'
values: [
TemplateStringPart: [17, 34] - [17, 41]
text: '"Hello, "'
s: 'Hello, '
Name: [17, 42] - [17, 46]
variable: Variable('name', None)
ctx: Load
TemplateStringPart: [17, 47] - [17, 48]
text: '"!"'
s: '!'
]
TemplateString: [17, 50] - [17, 66]
prefix: 't"'
values: [
TemplateStringPart: [17, 52] - [17, 65]
text: '" How are you?"'
s: ' How are you?'
]
]
]
orelse: None
]

View File

@@ -0,0 +1,17 @@
name = "World"
value = 42.5678
first = "first"
second = "second"
if 1:
t""
if 2:
t"Hello, {name}!"
if 3:
t"Value: {value:.2f}, Hex: {value:#x}"
if 4:
t"Just a regular string."
if 5:
t"Multiple {first} and {second} placeholders."
if 6:
t"Implicit concatenation: " t"Hello, {name}!" t" How are you?"

View File

@@ -117,6 +117,9 @@
(string string_content: (_) @part)
{ let @part.node = (ast-node @part "StringPart") }
(template_string string_content: (_) @part)
{ let @part.node = (ast-node @part "TemplateStringPart") }
; A string concatenation that contains no interpolated expressions is just a `Str` (and its children
; will be `StringPart`s). A string concatenation that contains interpolated expressions is a
; `JoinedStr`, however.
@@ -142,6 +145,12 @@
}
}
(template_string) @tstring
{ let @tstring.node = (ast-node @tstring "TemplateString") }
(concatenated_template_string) @tstrings
{ let @tstrings.node = (ast-node @tstrings "JoinedTemplateString") }
(pair) @kvpair
{ let @kvpair.node = (ast-node @kvpair "KeyValuePair") }
@@ -2052,6 +2061,44 @@
;;;;;; End of JoinedStr (`f"foo"`)
;;;;;; JoinedTemplateString / TemplateString (`t"foo"`)
; Record the prefix of the template string.
(template_string) @tstring
{
attr (@tstring.node) prefix = (string-prefix @tstring)
}
; Attach raw children (string parts and interpolations) to the template string node.
(template_string (string_content) @part) @tmpl_any
{
edge @tmpl_any.node -> @part.node
attr (@tmpl_any.node -> @part.node) values = (named-child-index @part)
attr (@part.node) ctx = "load"
let safe_string = (concatenate-strings (string-safe-prefix @tmpl_any) (source-text @part) (string-quotes @tmpl_any))
attr (@part.node) s = safe_string
attr (@part.node) text = safe_string
}
(template_string (interpolation expression: (_) @part) @interp) @tmpl_any
{
edge @tmpl_any.node -> @part.node
attr (@tmpl_any.node -> @part.node) values = (named-child-index @interp)
attr (@part.node) ctx = "load"
}
; Concatenated template strings simply have a list-like field containing the template strings that
; are concatenated together.
(concatenated_template_string (template_string) @tstring) @tmpl_concat
{
edge @tmpl_concat.node -> @tstring.node
attr (@tmpl_concat.node -> @tstring.node) strings = (named-child-index @tstring)
attr (@tstring.node) ctx = "load"
}
;;;;;; End of JoinedTemplateString / TemplateString (`t"foo"`)
;;;;;; List (`[...]`)

View File

@@ -140,15 +140,22 @@ pub mod extra_functions {
}
fn safe(&self) -> Prefix {
// Remove format (f/F) and template (t/T) flags when generating a safe prefix.
Prefix {
flags: self.flags.clone().replace("f", "").replace("F", ""),
flags: self
.flags
.clone()
.replace("f", "")
.replace("F", "")
.replace("t", "")
.replace("T", ""),
quotes: self.quotes.clone(),
}
}
}
fn get_prefix(s: &str) -> Prefix {
let flags_matcher = regex::Regex::new("^[bfurBFUR]{0,2}").unwrap();
let flags_matcher = regex::Regex::new("^[bfurtBFURT]{0,2}").unwrap();
let mut end = 0;
let flags = match flags_matcher.find(s) {
Some(m) => {
@@ -170,7 +177,7 @@ pub mod extra_functions {
quotes = "}";
}
Prefix {
flags: flags.to_lowercase().to_owned(),
flags: flags.to_owned(),
quotes: quotes.to_owned(),
}
}
@@ -198,6 +205,12 @@ pub mod extra_functions {
let p = get_prefix("\"\"\"\"\"\"");
assert_eq!(p.flags, "");
assert_eq!(p.quotes, "\"\"\"");
let p = get_prefix("t\"hello\"");
assert_eq!(p.flags, "t");
assert_eq!(p.quotes, "\"");
let p = get_prefix("Tr'world'");
assert_eq!(p.flags, "Tr");
assert_eq!(p.quotes, "'");
}
fn get_string_contents(s: String) -> String {
@@ -227,6 +240,10 @@ pub mod extra_functions {
assert_eq!(get_string_contents(s.to_owned()), "");
let s = "''''''";
assert_eq!(get_string_contents(s.to_owned()), "");
let s = "t\"tmpl\"";
assert_eq!(get_string_contents(s.to_owned()), "tmpl");
let s = "Tr'world'";
assert_eq!(get_string_contents(s.to_owned()), "world");
}
pub struct StringPrefix;
@@ -291,7 +308,11 @@ pub mod extra_functions {
let node = graph[parameters.param()?.into_syntax_node_ref()?];
parameters.finish()?;
let prefix = get_prefix(&source[node.byte_range()]).full();
let prefix = prefix.replace("f", "").replace("F", "");
let prefix = prefix
.replace("f", "")
.replace("F", "")
.replace("t", "")
.replace("T", "");
Ok(Value::String(prefix))
}
}