mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Python: Support template strings in rest of extractor
Adds three new AST nodes to the mix: - `TemplateString` represents a t-string in Python 3.14 - `TemplateStringPart` represents one of the string constituents of a t-string. (The interpolated expressions are represented as `Expr` nodes, just like f-strings.) - `JoinedTemplateString` represents an implicit concatenation of template strings. Importantly, we _completely avoid_ the complicated construction we currently do for format strings (as well as the confusing nomenclature). No extra injection of empty strings (so that a template string is a strict alternation of strings and expressions). A `JoinedTemplateString` simply has a list of template string children, and a `TemplateString` has a list of "values" which may be either `Expr` or `TemplateStringPart` nodes. If we ever find that we actually want the more complicated interface for these strings, then I would much rather we reconstruct this inside of QL rather than in the parser.
This commit is contained in:
@@ -56,6 +56,15 @@ class StringPart(AstBase):
|
||||
self.text = text
|
||||
self.s = s
|
||||
|
||||
class TemplateStringPart(AstBase):
|
||||
'''A string constituent of a template string literal'''
|
||||
|
||||
__slots__ = "text", "s",
|
||||
|
||||
def __init__(self, text, s):
|
||||
self.text = text
|
||||
self.s = s
|
||||
|
||||
class alias(AstBase):
|
||||
__slots__ = "value", "asname",
|
||||
|
||||
@@ -356,6 +365,19 @@ class JoinedStr(expr):
|
||||
def __init__(self, values):
|
||||
self.values = values
|
||||
|
||||
class TemplateString(expr):
|
||||
__slots__ = "prefix", "values",
|
||||
|
||||
def __init__(self, prefix, values):
|
||||
self.prefix = prefix
|
||||
self.values = values
|
||||
|
||||
class JoinedTemplateString(expr):
|
||||
__slots__ = "strings",
|
||||
|
||||
def __init__(self, strings):
|
||||
self.strings = strings
|
||||
|
||||
|
||||
class Lambda(expr):
|
||||
__slots__ = "args", "inner_scope",
|
||||
|
||||
@@ -186,12 +186,20 @@ FormattedStringLiteral.set_name("Fstring")
|
||||
|
||||
FormattedValue = ClassNode("FormattedValue", expr, descriptive_name='formatted value')
|
||||
|
||||
|
||||
AnnAssign = ClassNode("AnnAssign", stmt, descriptive_name='annotated assignment')
|
||||
|
||||
AssignExpr = ClassNode('AssignExpr', expr, "assignment expression")
|
||||
|
||||
SpecialOperation = ClassNode('SpecialOperation', expr, "special operation")
|
||||
|
||||
TemplateString = ClassNode('TemplateString', expr, 'template string literal')
|
||||
|
||||
template_string_list = ListNode(TemplateString)
|
||||
|
||||
JoinedTemplateString = ClassNode("JoinedTemplateString", expr, descriptive_name='joined template string')
|
||||
TemplateStringPart = ClassNode('TemplateStringPart', expr, "string part of a template string")
|
||||
|
||||
type_parameter = ClassNode('type_parameter', descriptive_name='type parameter')
|
||||
type_parameter.field('location', location)
|
||||
type_parameter_list = ListNode(type_parameter)
|
||||
@@ -435,6 +443,9 @@ Subscript.field('value', expr)
|
||||
Subscript.field('index', expr)
|
||||
Subscript.field('ctx', expr_context, 'context')
|
||||
|
||||
TemplateString.field('prefix', string, 'prefix')
|
||||
TemplateString.field('values', expr_list, 'values')
|
||||
|
||||
Try.field('body', stmt_list)
|
||||
Try.field('orelse', stmt_list, 'else block')
|
||||
Try.field('handlers', stmt_list, 'exception handlers')
|
||||
@@ -484,10 +495,15 @@ PlaceHolder.field('ctx', expr_context, 'context')
|
||||
StringPart.field('text', string)
|
||||
StringPart.field('location', location)
|
||||
|
||||
TemplateStringPart.field('text', string)
|
||||
|
||||
|
||||
Await.field('value', expr, 'expression waited upon')
|
||||
|
||||
FormattedStringLiteral.field('values', expr_list)
|
||||
|
||||
JoinedTemplateString.field('strings', template_string_list)
|
||||
|
||||
FormattedValue.field('value', expr, "expression to be formatted")
|
||||
FormattedValue.field('conversion', string, 'type conversion')
|
||||
FormattedValue.field('format_spec', FormattedStringLiteral, 'format specifier')
|
||||
|
||||
@@ -273,6 +273,8 @@ list_fields = {
|
||||
ast.Print: ("values",),
|
||||
ast.Set: ("elts",),
|
||||
ast.Str: ("implicitly_concatenated_parts",),
|
||||
ast.TemplateString: ("values",),
|
||||
ast.JoinedTemplateString: ("strings",),
|
||||
ast.TypeAlias: ("type_parameters",),
|
||||
ast.Try: ("body", "handlers", "orelse", "finalbody"),
|
||||
ast.Tuple: ("elts",),
|
||||
|
||||
194
python/extractor/tests/parser/template_strings_new.expected
Normal file
194
python/extractor/tests/parser/template_strings_new.expected
Normal file
@@ -0,0 +1,194 @@
|
||||
Module: [1, 0] - [18, 0]
|
||||
body: [
|
||||
Assign: [1, 0] - [1, 14]
|
||||
targets: [
|
||||
Name: [1, 0] - [1, 4]
|
||||
variable: Variable('name', None)
|
||||
ctx: Store
|
||||
]
|
||||
value:
|
||||
Str: [1, 7] - [1, 14]
|
||||
s: 'World'
|
||||
prefix: '"'
|
||||
implicitly_concatenated_parts: None
|
||||
Assign: [2, 0] - [2, 15]
|
||||
targets: [
|
||||
Name: [2, 0] - [2, 5]
|
||||
variable: Variable('value', None)
|
||||
ctx: Store
|
||||
]
|
||||
value:
|
||||
Num: [2, 8] - [2, 15]
|
||||
n: 42.5678
|
||||
text: '42.5678'
|
||||
Assign: [3, 0] - [3, 15]
|
||||
targets: [
|
||||
Name: [3, 0] - [3, 5]
|
||||
variable: Variable('first', None)
|
||||
ctx: Store
|
||||
]
|
||||
value:
|
||||
Str: [3, 8] - [3, 15]
|
||||
s: 'first'
|
||||
prefix: '"'
|
||||
implicitly_concatenated_parts: None
|
||||
Assign: [4, 0] - [4, 17]
|
||||
targets: [
|
||||
Name: [4, 0] - [4, 6]
|
||||
variable: Variable('second', None)
|
||||
ctx: Store
|
||||
]
|
||||
value:
|
||||
Str: [4, 9] - [4, 17]
|
||||
s: 'second'
|
||||
prefix: '"'
|
||||
implicitly_concatenated_parts: None
|
||||
If: [6, 0] - [6, 5]
|
||||
test:
|
||||
Num: [6, 3] - [6, 4]
|
||||
n: 1
|
||||
text: '1'
|
||||
body: [
|
||||
Expr: [7, 4] - [7, 7]
|
||||
value:
|
||||
TemplateString: [7, 4] - [7, 7]
|
||||
prefix: 't"'
|
||||
values: []
|
||||
]
|
||||
orelse: None
|
||||
If: [8, 0] - [8, 5]
|
||||
test:
|
||||
Num: [8, 3] - [8, 4]
|
||||
n: 2
|
||||
text: '2'
|
||||
body: [
|
||||
Expr: [9, 4] - [9, 21]
|
||||
value:
|
||||
TemplateString: [9, 4] - [9, 21]
|
||||
prefix: 't"'
|
||||
values: [
|
||||
TemplateStringPart: [9, 6] - [9, 13]
|
||||
text: '"Hello, "'
|
||||
s: 'Hello, '
|
||||
Name: [9, 14] - [9, 18]
|
||||
variable: Variable('name', None)
|
||||
ctx: Load
|
||||
TemplateStringPart: [9, 19] - [9, 20]
|
||||
text: '"!"'
|
||||
s: '!'
|
||||
]
|
||||
]
|
||||
orelse: None
|
||||
If: [10, 0] - [10, 5]
|
||||
test:
|
||||
Num: [10, 3] - [10, 4]
|
||||
n: 3
|
||||
text: '3'
|
||||
body: [
|
||||
Expr: [11, 4] - [11, 42]
|
||||
value:
|
||||
TemplateString: [11, 4] - [11, 42]
|
||||
prefix: 't"'
|
||||
values: [
|
||||
TemplateStringPart: [11, 6] - [11, 13]
|
||||
text: '"Value: "'
|
||||
s: 'Value: '
|
||||
Name: [11, 14] - [11, 19]
|
||||
variable: Variable('value', None)
|
||||
ctx: Load
|
||||
TemplateStringPart: [11, 24] - [11, 31]
|
||||
text: '", Hex: "'
|
||||
s: ', Hex: '
|
||||
Name: [11, 32] - [11, 37]
|
||||
variable: Variable('value', None)
|
||||
ctx: Load
|
||||
]
|
||||
]
|
||||
orelse: None
|
||||
If: [12, 0] - [12, 5]
|
||||
test:
|
||||
Num: [12, 3] - [12, 4]
|
||||
n: 4
|
||||
text: '4'
|
||||
body: [
|
||||
Expr: [13, 4] - [13, 29]
|
||||
value:
|
||||
TemplateString: [13, 4] - [13, 29]
|
||||
prefix: 't"'
|
||||
values: [
|
||||
TemplateStringPart: [13, 6] - [13, 28]
|
||||
text: '"Just a regular string."'
|
||||
s: 'Just a regular string.'
|
||||
]
|
||||
]
|
||||
orelse: None
|
||||
If: [14, 0] - [14, 5]
|
||||
test:
|
||||
Num: [14, 3] - [14, 4]
|
||||
n: 5
|
||||
text: '5'
|
||||
body: [
|
||||
Expr: [15, 4] - [15, 50]
|
||||
value:
|
||||
TemplateString: [15, 4] - [15, 50]
|
||||
prefix: 't"'
|
||||
values: [
|
||||
TemplateStringPart: [15, 6] - [15, 15]
|
||||
text: '"Multiple "'
|
||||
s: 'Multiple '
|
||||
Name: [15, 16] - [15, 21]
|
||||
variable: Variable('first', None)
|
||||
ctx: Load
|
||||
TemplateStringPart: [15, 22] - [15, 27]
|
||||
text: '" and "'
|
||||
s: ' and '
|
||||
Name: [15, 28] - [15, 34]
|
||||
variable: Variable('second', None)
|
||||
ctx: Load
|
||||
TemplateStringPart: [15, 35] - [15, 49]
|
||||
text: '" placeholders."'
|
||||
s: ' placeholders.'
|
||||
]
|
||||
]
|
||||
orelse: None
|
||||
If: [16, 0] - [16, 5]
|
||||
test:
|
||||
Num: [16, 3] - [16, 4]
|
||||
n: 6
|
||||
text: '6'
|
||||
body: [
|
||||
Expr: [17, 4] - [17, 66]
|
||||
value:
|
||||
JoinedTemplateString: [17, 4] - [17, 66]
|
||||
strings: [
|
||||
TemplateString: [17, 4] - [17, 31]
|
||||
prefix: 't"'
|
||||
values: [
|
||||
TemplateStringPart: [17, 6] - [17, 30]
|
||||
text: '"Implicit concatenation: "'
|
||||
s: 'Implicit concatenation: '
|
||||
]
|
||||
TemplateString: [17, 32] - [17, 49]
|
||||
prefix: 't"'
|
||||
values: [
|
||||
TemplateStringPart: [17, 34] - [17, 41]
|
||||
text: '"Hello, "'
|
||||
s: 'Hello, '
|
||||
Name: [17, 42] - [17, 46]
|
||||
variable: Variable('name', None)
|
||||
ctx: Load
|
||||
TemplateStringPart: [17, 47] - [17, 48]
|
||||
text: '"!"'
|
||||
s: '!'
|
||||
]
|
||||
TemplateString: [17, 50] - [17, 66]
|
||||
prefix: 't"'
|
||||
values: [
|
||||
TemplateStringPart: [17, 52] - [17, 65]
|
||||
text: '" How are you?"'
|
||||
s: ' How are you?'
|
||||
]
|
||||
]
|
||||
]
|
||||
orelse: None
|
||||
]
|
||||
17
python/extractor/tests/parser/template_strings_new.py
Normal file
17
python/extractor/tests/parser/template_strings_new.py
Normal file
@@ -0,0 +1,17 @@
|
||||
name = "World"
|
||||
value = 42.5678
|
||||
first = "first"
|
||||
second = "second"
|
||||
|
||||
if 1:
|
||||
t""
|
||||
if 2:
|
||||
t"Hello, {name}!"
|
||||
if 3:
|
||||
t"Value: {value:.2f}, Hex: {value:#x}"
|
||||
if 4:
|
||||
t"Just a regular string."
|
||||
if 5:
|
||||
t"Multiple {first} and {second} placeholders."
|
||||
if 6:
|
||||
t"Implicit concatenation: " t"Hello, {name}!" t" How are you?"
|
||||
@@ -117,6 +117,9 @@
|
||||
(string string_content: (_) @part)
|
||||
{ let @part.node = (ast-node @part "StringPart") }
|
||||
|
||||
(template_string string_content: (_) @part)
|
||||
{ let @part.node = (ast-node @part "TemplateStringPart") }
|
||||
|
||||
; A string concatenation that contains no interpolated expressions is just a `Str` (and its children
|
||||
; will be `StringPart`s). A string concatenation that contains interpolated expressions is a
|
||||
; `JoinedStr`, however.
|
||||
@@ -142,6 +145,12 @@
|
||||
}
|
||||
}
|
||||
|
||||
(template_string) @tstring
|
||||
{ let @tstring.node = (ast-node @tstring "TemplateString") }
|
||||
|
||||
(concatenated_template_string) @tstrings
|
||||
{ let @tstrings.node = (ast-node @tstrings "JoinedTemplateString") }
|
||||
|
||||
(pair) @kvpair
|
||||
{ let @kvpair.node = (ast-node @kvpair "KeyValuePair") }
|
||||
|
||||
@@ -2052,6 +2061,44 @@
|
||||
|
||||
;;;;;; End of JoinedStr (`f"foo"`)
|
||||
|
||||
;;;;;; JoinedTemplateString / TemplateString (`t"foo"`)
|
||||
|
||||
; Record the prefix of the template string.
|
||||
(template_string) @tstring
|
||||
{
|
||||
attr (@tstring.node) prefix = (string-prefix @tstring)
|
||||
}
|
||||
|
||||
; Attach raw children (string parts and interpolations) to the template string node.
|
||||
(template_string (string_content) @part) @tmpl_any
|
||||
{
|
||||
edge @tmpl_any.node -> @part.node
|
||||
attr (@tmpl_any.node -> @part.node) values = (named-child-index @part)
|
||||
attr (@part.node) ctx = "load"
|
||||
let safe_string = (concatenate-strings (string-safe-prefix @tmpl_any) (source-text @part) (string-quotes @tmpl_any))
|
||||
attr (@part.node) s = safe_string
|
||||
attr (@part.node) text = safe_string
|
||||
}
|
||||
|
||||
(template_string (interpolation expression: (_) @part) @interp) @tmpl_any
|
||||
{
|
||||
edge @tmpl_any.node -> @part.node
|
||||
attr (@tmpl_any.node -> @part.node) values = (named-child-index @interp)
|
||||
attr (@part.node) ctx = "load"
|
||||
}
|
||||
|
||||
|
||||
; Concatenated template strings simply have a list-like field containing the template strings that
|
||||
; are concatenated together.
|
||||
(concatenated_template_string (template_string) @tstring) @tmpl_concat
|
||||
{
|
||||
edge @tmpl_concat.node -> @tstring.node
|
||||
attr (@tmpl_concat.node -> @tstring.node) strings = (named-child-index @tstring)
|
||||
attr (@tstring.node) ctx = "load"
|
||||
}
|
||||
|
||||
;;;;;; End of JoinedTemplateString / TemplateString (`t"foo"`)
|
||||
|
||||
|
||||
|
||||
;;;;;; List (`[...]`)
|
||||
|
||||
@@ -140,15 +140,22 @@ pub mod extra_functions {
|
||||
}
|
||||
|
||||
fn safe(&self) -> Prefix {
|
||||
// Remove format (f/F) and template (t/T) flags when generating a safe prefix.
|
||||
Prefix {
|
||||
flags: self.flags.clone().replace("f", "").replace("F", ""),
|
||||
flags: self
|
||||
.flags
|
||||
.clone()
|
||||
.replace("f", "")
|
||||
.replace("F", "")
|
||||
.replace("t", "")
|
||||
.replace("T", ""),
|
||||
quotes: self.quotes.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_prefix(s: &str) -> Prefix {
|
||||
let flags_matcher = regex::Regex::new("^[bfurBFUR]{0,2}").unwrap();
|
||||
let flags_matcher = regex::Regex::new("^[bfurtBFURT]{0,2}").unwrap();
|
||||
let mut end = 0;
|
||||
let flags = match flags_matcher.find(s) {
|
||||
Some(m) => {
|
||||
@@ -170,7 +177,7 @@ pub mod extra_functions {
|
||||
quotes = "}";
|
||||
}
|
||||
Prefix {
|
||||
flags: flags.to_lowercase().to_owned(),
|
||||
flags: flags.to_owned(),
|
||||
quotes: quotes.to_owned(),
|
||||
}
|
||||
}
|
||||
@@ -198,6 +205,12 @@ pub mod extra_functions {
|
||||
let p = get_prefix("\"\"\"\"\"\"");
|
||||
assert_eq!(p.flags, "");
|
||||
assert_eq!(p.quotes, "\"\"\"");
|
||||
let p = get_prefix("t\"hello\"");
|
||||
assert_eq!(p.flags, "t");
|
||||
assert_eq!(p.quotes, "\"");
|
||||
let p = get_prefix("Tr'world'");
|
||||
assert_eq!(p.flags, "Tr");
|
||||
assert_eq!(p.quotes, "'");
|
||||
}
|
||||
|
||||
fn get_string_contents(s: String) -> String {
|
||||
@@ -227,6 +240,10 @@ pub mod extra_functions {
|
||||
assert_eq!(get_string_contents(s.to_owned()), "");
|
||||
let s = "''''''";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "");
|
||||
let s = "t\"tmpl\"";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "tmpl");
|
||||
let s = "Tr'world'";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "world");
|
||||
}
|
||||
|
||||
pub struct StringPrefix;
|
||||
@@ -291,7 +308,11 @@ pub mod extra_functions {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let prefix = get_prefix(&source[node.byte_range()]).full();
|
||||
let prefix = prefix.replace("f", "").replace("F", "");
|
||||
let prefix = prefix
|
||||
.replace("f", "")
|
||||
.replace("F", "")
|
||||
.replace("t", "")
|
||||
.replace("T", "");
|
||||
Ok(Value::String(prefix))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user