mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Python: Add parser support for template strings
- Extends the scanner with a new token kind representing the start of a template string. This is used to distinguish template strings from regular strings (because only a template string will start with a `_template_string_start` external token). - Cleans up the logic surrounding interpolations (and the method names) so that format strings and template strings behave the same in this case. Finally, we add two new node types in the tree-sitter grammar: - `template_string` behaves like format strings, but is a distinct type (mainly so that an implicit concatenation between template strings and regular strings becomes a syntax error). - `concatenated_template_string` is the counterpart of `concatenated_string`. However, internally, the string parts of a template strings are just the same `string_content` nodes that are used in regular format strings. We will disambiguate these inside `tsg-python`.
This commit is contained in:
@@ -55,6 +55,7 @@ module.exports = grammar({
|
||||
$._string_start,
|
||||
$._string_content,
|
||||
$._string_end,
|
||||
$._template_string_start,
|
||||
],
|
||||
|
||||
inline: $ => [
|
||||
@@ -423,6 +424,8 @@ module.exports = grammar({
|
||||
),
|
||||
$.string,
|
||||
$.concatenated_string,
|
||||
$.template_string,
|
||||
$.concatenated_template_string,
|
||||
$.none,
|
||||
$.true,
|
||||
$.false
|
||||
@@ -765,6 +768,8 @@ module.exports = grammar({
|
||||
$.keyword_identifier,
|
||||
$.string,
|
||||
$.concatenated_string,
|
||||
$.template_string,
|
||||
$.concatenated_template_string,
|
||||
$.integer,
|
||||
$.float,
|
||||
$.true,
|
||||
@@ -1099,6 +1104,20 @@ module.exports = grammar({
|
||||
field('suffix', alias($._string_end, '"'))
|
||||
),
|
||||
|
||||
concatenated_template_string: $ => seq(
|
||||
$.template_string,
|
||||
repeat1($.template_string)
|
||||
),
|
||||
|
||||
template_string: $ => seq(
|
||||
field('prefix', alias($._template_string_start, '"')),
|
||||
repeat(choice(
|
||||
field('interpolation', $.interpolation),
|
||||
field('string_content', $.string_content)
|
||||
)),
|
||||
field('suffix', alias($._string_end, '"'))
|
||||
),
|
||||
|
||||
string_content: $ => prec.right(0, repeat1(
|
||||
choice(
|
||||
$._escape_interpolation,
|
||||
|
||||
@@ -17,6 +17,7 @@ enum TokenType {
|
||||
STRING_START,
|
||||
STRING_CONTENT,
|
||||
STRING_END,
|
||||
TEMPLATE_STRING_START,
|
||||
};
|
||||
|
||||
struct Delimiter {
|
||||
@@ -28,6 +29,7 @@ struct Delimiter {
|
||||
Format = 1 << 4,
|
||||
Triple = 1 << 5,
|
||||
Bytes = 1 << 6,
|
||||
Template = 1 << 7,
|
||||
};
|
||||
|
||||
Delimiter() : flags(0) {}
|
||||
@@ -36,6 +38,14 @@ struct Delimiter {
|
||||
return flags & Format;
|
||||
}
|
||||
|
||||
bool is_template() const {
|
||||
return flags & Template;
|
||||
}
|
||||
|
||||
bool can_interpolate() const {
|
||||
return is_format() || is_template();
|
||||
}
|
||||
|
||||
bool is_raw() const {
|
||||
return flags & Raw;
|
||||
}
|
||||
@@ -59,6 +69,10 @@ struct Delimiter {
|
||||
flags |= Format;
|
||||
}
|
||||
|
||||
void set_template() {
|
||||
flags |= Template;
|
||||
}
|
||||
|
||||
void set_raw() {
|
||||
flags |= Raw;
|
||||
}
|
||||
@@ -154,7 +168,7 @@ struct Scanner {
|
||||
int32_t end_character = delimiter.end_character();
|
||||
bool has_content = false;
|
||||
while (lexer->lookahead) {
|
||||
if ((lexer->lookahead == '{' || lexer->lookahead == '}') && delimiter.is_format()) {
|
||||
if ((lexer->lookahead == '{' || lexer->lookahead == '}') && delimiter.can_interpolate()) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return has_content;
|
||||
@@ -322,13 +336,17 @@ struct Scanner {
|
||||
}
|
||||
}
|
||||
|
||||
if (first_comment_indent_length == -1 && valid_symbols[STRING_START]) {
|
||||
bool expects_string_start = valid_symbols[STRING_START] || valid_symbols[TEMPLATE_STRING_START];
|
||||
|
||||
if (first_comment_indent_length == -1 && expects_string_start) {
|
||||
Delimiter delimiter;
|
||||
|
||||
bool has_flags = false;
|
||||
while (lexer->lookahead) {
|
||||
if (lexer->lookahead == 'f' || lexer->lookahead == 'F') {
|
||||
delimiter.set_format();
|
||||
} else if (lexer->lookahead == 't' || lexer->lookahead == 'T') {
|
||||
delimiter.set_template();
|
||||
} else if (lexer->lookahead == 'r' || lexer->lookahead == 'R') {
|
||||
delimiter.set_raw();
|
||||
} else if (lexer->lookahead == 'b' || lexer->lookahead == 'B') {
|
||||
@@ -372,7 +390,7 @@ struct Scanner {
|
||||
|
||||
if (delimiter.end_character()) {
|
||||
delimiter_stack.push_back(delimiter);
|
||||
lexer->result_symbol = STRING_START;
|
||||
lexer->result_symbol = delimiter.is_template() ? TEMPLATE_STRING_START : STRING_START;
|
||||
return true;
|
||||
} else if (has_flags) {
|
||||
return false;
|
||||
|
||||
Reference in New Issue
Block a user