mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Merge pull request #17873 from github/tausbn/python-fix-generator-expression-locations
Python: Even more parser fixes
This commit is contained in:
@@ -404,7 +404,7 @@
|
||||
|
||||
;;; GeneratorExp
|
||||
|
||||
(generator_expression . "(" . (comment)* . (_) @start (_) @end . (comment)* . ")" .) @generatorexp
|
||||
(generator_expression . "(" . (comment)* . (expression) @start [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @generatorexp
|
||||
{
|
||||
attr (@generatorexp.node) _location_start = (location-start @start)
|
||||
attr (@generatorexp.node) _location_end = (location-end @end)
|
||||
@@ -416,13 +416,13 @@
|
||||
attr (@if.node) _location_end = (location-end @expr)
|
||||
}
|
||||
|
||||
(generator_expression . "(" . (comment)* . (_) @start (for_in_clause) @child (_) @end . (comment)* . ")" .) @genexpr
|
||||
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @genexpr
|
||||
{
|
||||
attr (@child.node) _location_start = (location-start @start)
|
||||
attr (@child.node) _location_end = (location-end @end)
|
||||
}
|
||||
|
||||
(generator_expression . "(" . (comment)* . (_) @start (for_in_clause) @end . (comment)* . ")" .) @genexpr
|
||||
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @end . (comment)* . ")" .) @genexpr
|
||||
{
|
||||
attr (@end.node) _location_start = (location-start @start)
|
||||
attr (@end.node) _location_end = (location-end @end)
|
||||
@@ -863,7 +863,7 @@
|
||||
; information for the entire generator expression (yes, it is a wide parameter!) and so we must recreate the logic for
|
||||
; setting this location information correctly.
|
||||
|
||||
(generator_expression . "(" . (comment)* . (_) @start (_) @end . (comment)* . ")" .) @genexpr
|
||||
(generator_expression . "(" . (comment)* . (expression) @start [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @genexpr
|
||||
{
|
||||
; Synthesize the `genexpr` function
|
||||
let @genexpr.fun = (ast-node @genexpr "Function")
|
||||
@@ -2661,6 +2661,14 @@
|
||||
let @with.first = @first.node
|
||||
}
|
||||
|
||||
; Async status
|
||||
; NOTE: We only set the `is_async` field on the _first_ clause of the `with` statement,
|
||||
; as this is the behaviour of the old parser.
|
||||
(with_statement "async" "with" @with_keyword (with_clause . (with_item) @with))
|
||||
{
|
||||
attr (@with.node) is_async = #true
|
||||
}
|
||||
|
||||
(with_item
|
||||
value: (_) @value
|
||||
) @with
|
||||
@@ -3264,6 +3272,16 @@
|
||||
}
|
||||
}
|
||||
|
||||
; Async status
|
||||
(function_definition "async" "def" @def_keyword) @funcdef
|
||||
{
|
||||
let start = (location-start @def_keyword)
|
||||
attr (@funcdef.function) is_async = #true
|
||||
attr (@funcdef.node) _location_start = start
|
||||
attr (@funcdef.function) _location_start = start
|
||||
attr (@funcdef.funcexpr) _location_start = start
|
||||
}
|
||||
|
||||
;;; Decorators
|
||||
|
||||
(decorated_definition
|
||||
@@ -3478,5 +3496,9 @@
|
||||
|
||||
[(tuple element: (_)) (tuple_pattern)] @tup
|
||||
{
|
||||
attr (@tup.node) parenthesised = #true
|
||||
; In order to avoid writing to the `parenthesised` attribute twice, we only set it here
|
||||
; if the surrounding expression is not a `parenthesized_expression`.
|
||||
if (not (instance-of (get-parent @tup) "parenthesized_expression")) {
|
||||
attr (@tup.node) parenthesised = #true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -751,7 +751,6 @@ module.exports = grammar({
|
||||
$.comparison_operator,
|
||||
$.not_operator,
|
||||
$.boolean_operator,
|
||||
$.await,
|
||||
$.lambda,
|
||||
$.primary_expression,
|
||||
$.conditional_expression,
|
||||
@@ -759,6 +758,7 @@ module.exports = grammar({
|
||||
),
|
||||
|
||||
primary_expression: $ => choice(
|
||||
$.await,
|
||||
$.binary_operator,
|
||||
$.identifier,
|
||||
$.keyword_identifier,
|
||||
@@ -1202,7 +1202,7 @@ module.exports = grammar({
|
||||
|
||||
await: $ => prec(PREC.unary, seq(
|
||||
'await',
|
||||
$.expression
|
||||
$.primary_expression
|
||||
)),
|
||||
|
||||
comment: $ => token(seq('#', /.*/)),
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
{
|
||||
"$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json",
|
||||
"name": "python",
|
||||
"word": "identifier",
|
||||
"rules": {
|
||||
@@ -3843,10 +3842,6 @@
|
||||
"type": "SYMBOL",
|
||||
"name": "boolean_operator"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "await"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "lambda"
|
||||
@@ -3868,6 +3863,10 @@
|
||||
"primary_expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "await"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "binary_operator"
|
||||
@@ -6586,7 +6585,7 @@
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "expression"
|
||||
"name": "primary_expression"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -6696,3 +6695,4 @@
|
||||
"parameter"
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@@ -115,10 +115,6 @@
|
||||
"type": "expression",
|
||||
"named": true,
|
||||
"subtypes": [
|
||||
{
|
||||
"type": "await",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "boolean_operator",
|
||||
"named": true
|
||||
@@ -229,6 +225,10 @@
|
||||
"type": "attribute",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "await",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "binary_operator",
|
||||
"named": true
|
||||
@@ -587,7 +587,7 @@
|
||||
"required": true,
|
||||
"types": [
|
||||
{
|
||||
"type": "expression",
|
||||
"type": "primary_expression",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
@@ -2691,7 +2691,6 @@
|
||||
{
|
||||
"type": "module",
|
||||
"named": true,
|
||||
"root": true,
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": true,
|
||||
@@ -3816,10 +3815,6 @@
|
||||
"type": ":=",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": ";",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "<",
|
||||
"named": false
|
||||
@@ -3876,10 +3871,6 @@
|
||||
"type": "[",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "\\",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "]",
|
||||
"named": false
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -161,6 +161,22 @@ struct Scanner {
|
||||
} else if (lexer->lookahead == '\\') {
|
||||
if (delimiter.is_raw()) {
|
||||
lexer->advance(lexer, false);
|
||||
// In raw strings, backslashes _can_ escape the same kind of quotes as the outer
|
||||
// string, so we must take care to traverse any such escaped quotes now. If we don't do
|
||||
// this, we will mistakenly consider the string to end at that escaped quote.
|
||||
// Likewise, this also extends to escaped backslashes.
|
||||
if (lexer->lookahead == end_character || lexer->lookahead == '\\') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
// Newlines after backslashes also cause issues, so we explicitly step over them here.
|
||||
if (lexer->lookahead == '\r') {
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
} else if (lexer->lookahead == '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
continue;
|
||||
} else if (delimiter.is_bytes()) {
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
@@ -13,8 +13,9 @@ extern "C" {
|
||||
#define ts_builtin_sym_end 0
|
||||
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
|
||||
|
||||
#ifndef TREE_SITTER_API_H_
|
||||
typedef uint16_t TSStateId;
|
||||
|
||||
#ifndef TREE_SITTER_API_H_
|
||||
typedef uint16_t TSSymbol;
|
||||
typedef uint16_t TSFieldId;
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
@@ -47,7 +48,6 @@ struct TSLexer {
|
||||
uint32_t (*get_column)(TSLexer *);
|
||||
bool (*is_at_included_range_start)(const TSLexer *);
|
||||
bool (*eof)(const TSLexer *);
|
||||
void (*log)(const TSLexer *, const char *, ...);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
@@ -87,11 +87,6 @@ typedef union {
|
||||
} entry;
|
||||
} TSParseActionEntry;
|
||||
|
||||
typedef struct {
|
||||
int32_t start;
|
||||
int32_t end;
|
||||
} TSCharacterRange;
|
||||
|
||||
struct TSLanguage {
|
||||
uint32_t version;
|
||||
uint32_t symbol_count;
|
||||
@@ -128,41 +123,15 @@ struct TSLanguage {
|
||||
unsigned (*serialize)(void *, char *);
|
||||
void (*deserialize)(void *, const char *, unsigned);
|
||||
} external_scanner;
|
||||
const TSStateId *primary_state_ids;
|
||||
};
|
||||
|
||||
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
|
||||
uint32_t index = 0;
|
||||
uint32_t size = len - index;
|
||||
while (size > 1) {
|
||||
uint32_t half_size = size / 2;
|
||||
uint32_t mid_index = index + half_size;
|
||||
TSCharacterRange *range = &ranges[mid_index];
|
||||
if (lookahead >= range->start && lookahead <= range->end) {
|
||||
return true;
|
||||
} else if (lookahead > range->end) {
|
||||
index = mid_index;
|
||||
}
|
||||
size -= half_size;
|
||||
}
|
||||
TSCharacterRange *range = &ranges[index];
|
||||
return (lookahead >= range->start && lookahead <= range->end);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lexer Macros
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define UNUSED __pragma(warning(suppress : 4101))
|
||||
#else
|
||||
#define UNUSED __attribute__((unused))
|
||||
#endif
|
||||
|
||||
#define START_LEXER() \
|
||||
bool result = false; \
|
||||
bool skip = false; \
|
||||
UNUSED \
|
||||
bool eof = false; \
|
||||
int32_t lookahead; \
|
||||
goto start; \
|
||||
@@ -178,17 +147,6 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ADVANCE_MAP(...) \
|
||||
{ \
|
||||
static const uint16_t map[] = { __VA_ARGS__ }; \
|
||||
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
|
||||
if (map[i] == lookahead) { \
|
||||
state = map[i + 1]; \
|
||||
goto next_state; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define SKIP(state_value) \
|
||||
{ \
|
||||
skip = true; \
|
||||
@@ -207,7 +165,7 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
|
||||
* Parse Table Macros
|
||||
*/
|
||||
|
||||
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
|
||||
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
|
||||
|
||||
#define STATE(id) id
|
||||
|
||||
@@ -217,7 +175,7 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.state = (state_value) \
|
||||
.state = state_value \
|
||||
} \
|
||||
}}
|
||||
|
||||
@@ -225,7 +183,7 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.state = (state_value), \
|
||||
.state = state_value, \
|
||||
.repetition = true \
|
||||
} \
|
||||
}}
|
||||
@@ -238,15 +196,14 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
|
||||
} \
|
||||
}}
|
||||
|
||||
#define REDUCE(symbol_name, children, precedence, prod_id) \
|
||||
{{ \
|
||||
.reduce = { \
|
||||
.type = TSParseActionTypeReduce, \
|
||||
.symbol = symbol_name, \
|
||||
.child_count = children, \
|
||||
.dynamic_precedence = precedence, \
|
||||
.production_id = prod_id \
|
||||
}, \
|
||||
#define REDUCE(symbol_val, child_count_val, ...) \
|
||||
{{ \
|
||||
.reduce = { \
|
||||
.type = TSParseActionTypeReduce, \
|
||||
.symbol = symbol_val, \
|
||||
.child_count = child_count_val, \
|
||||
__VA_ARGS__ \
|
||||
}, \
|
||||
}}
|
||||
|
||||
#define RECOVER() \
|
||||
|
||||
Reference in New Issue
Block a user