mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Merge pull request #17822 from github/tausbn/python-more-parser-fixes
Python: A few more parser fixes
This commit is contained in:
@@ -97,9 +97,27 @@ class AstDumper(object):
|
||||
|
||||
|
||||
class StdoutLogger(logging.Logger):
|
||||
error_count = 0
|
||||
def log(self, level, fmt, *args):
|
||||
sys.stdout.write(fmt % args + "\n")
|
||||
|
||||
def info(self, fmt, *args):
|
||||
self.log(logging.INFO, fmt, *args)
|
||||
|
||||
def warn(self, fmt, *args):
|
||||
self.log(logging.WARN, fmt, *args)
|
||||
self.error_count += 1
|
||||
|
||||
def error(self, fmt, *args):
|
||||
self.log(logging.ERROR, fmt, *args)
|
||||
self.error_count += 1
|
||||
|
||||
def had_errors(self):
|
||||
return self.error_count > 0
|
||||
|
||||
def reset_error_count(self):
|
||||
self.error_count = 0
|
||||
|
||||
def old_parser(inputfile, logger):
|
||||
mod = PythonSourceModule(None, inputfile, logger)
|
||||
logger.close()
|
||||
|
||||
@@ -440,7 +440,7 @@ def concatenate_stringparts(stringparts, logger):
|
||||
try:
|
||||
return "".join(decode_str(stringpart.s) for stringpart in stringparts)
|
||||
except Exception as ex:
|
||||
logger.error("Unable to concatenate string %s getting error %s", stringparts, ex)
|
||||
logger.error("Unable to concatenate string {} getting error {}".format(stringparts, ex))
|
||||
return stringparts[0].s
|
||||
|
||||
|
||||
|
||||
@@ -14,4 +14,6 @@ x, y = z, w = 3, 4
|
||||
|
||||
s, *t = u
|
||||
|
||||
[v, *w] = x
|
||||
|
||||
o,p, = q,r,
|
||||
|
||||
@@ -55,3 +55,13 @@
|
||||
t = tuple(x for y in z)
|
||||
|
||||
[( t, ) for v in w]
|
||||
|
||||
[# comment
|
||||
a for b in c # comment
|
||||
# comment
|
||||
] # comment
|
||||
|
||||
[# comment
|
||||
d for e in f if g # comment
|
||||
# comment
|
||||
] # comment
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Module: [1, 0] - [22, 0]
|
||||
Module: [1, 0] - [27, 0]
|
||||
body: [
|
||||
Try: [1, 0] - [1, 4]
|
||||
body: [
|
||||
@@ -133,4 +133,24 @@ Module: [1, 0] - [22, 0]
|
||||
variable: Variable('v', None)
|
||||
ctx: Load
|
||||
]
|
||||
Try: [23, 0] - [23, 4]
|
||||
body: [
|
||||
Pass: [24, 4] - [24, 8]
|
||||
]
|
||||
orelse: []
|
||||
handlers: [
|
||||
ExceptGroupStmt: [25, 0] - [26, 8]
|
||||
type:
|
||||
Name: [25, 8] - [25, 11]
|
||||
variable: Variable('foo', None)
|
||||
ctx: Load
|
||||
name:
|
||||
Name: [25, 15] - [25, 16]
|
||||
variable: Variable('e', None)
|
||||
ctx: Store
|
||||
body: [
|
||||
Pass: [26, 4] - [26, 8]
|
||||
]
|
||||
]
|
||||
finalbody: []
|
||||
]
|
||||
|
||||
@@ -19,3 +19,8 @@ else:
|
||||
finally:
|
||||
u
|
||||
v
|
||||
|
||||
try:
|
||||
pass
|
||||
except *foo as e:
|
||||
pass
|
||||
|
||||
41
python/extractor/tests/parser/functions_new.expected
Normal file
41
python/extractor/tests/parser/functions_new.expected
Normal file
@@ -0,0 +1,41 @@
|
||||
Module: [1, 0] - [3, 0]
|
||||
body: [
|
||||
Assign: [1, 0] - [1, 42]
|
||||
targets: [
|
||||
Name: [1, 4] - [1, 26]
|
||||
variable: Variable('tuple_typed_list_splat', None)
|
||||
ctx: Store
|
||||
]
|
||||
value:
|
||||
FunctionExpr: [1, 0] - [1, 42]
|
||||
name: 'tuple_typed_list_splat'
|
||||
args:
|
||||
arguments
|
||||
defaults: []
|
||||
kw_defaults: []
|
||||
annotations: []
|
||||
varargannotation:
|
||||
Starred: [1, 35] - [1, 40]
|
||||
value:
|
||||
Name: [1, 36] - [1, 40]
|
||||
variable: Variable('ARGS', None)
|
||||
ctx: Load
|
||||
ctx: Load
|
||||
kwargannotation: None
|
||||
kw_annotations: []
|
||||
returns: None
|
||||
inner_scope:
|
||||
Function: [1, 0] - [1, 42]
|
||||
name: 'tuple_typed_list_splat'
|
||||
type_parameters: []
|
||||
args: []
|
||||
vararg:
|
||||
Name: [1, 28] - [1, 32]
|
||||
variable: Variable('args', None)
|
||||
ctx: Param
|
||||
kwonlyargs: []
|
||||
kwarg: None
|
||||
body: [
|
||||
Pass: [2, 4] - [2, 8]
|
||||
]
|
||||
]
|
||||
2
python/extractor/tests/parser/functions_new.py
Normal file
2
python/extractor/tests/parser/functions_new.py
Normal file
@@ -0,0 +1,2 @@
|
||||
def tuple_typed_list_splat(*args : *ARGS):
|
||||
pass
|
||||
@@ -49,6 +49,8 @@ class ParserTest(unittest.TestCase):
|
||||
diff = e.output
|
||||
if diff:
|
||||
pytest.fail(diff.decode("utf-8"))
|
||||
self.check_for_stdout_errors(logger)
|
||||
|
||||
self.assertEqual(self.capsys.readouterr().err, "")
|
||||
os.remove(oldfile)
|
||||
os.remove(newfile)
|
||||
@@ -84,9 +86,15 @@ class ParserTest(unittest.TestCase):
|
||||
diff = e.output
|
||||
if diff:
|
||||
pytest.fail(diff.decode("utf-8"))
|
||||
|
||||
self.check_for_stdout_errors(logger)
|
||||
self.assertEqual(self.capsys.readouterr().err, "")
|
||||
os.remove(actual)
|
||||
|
||||
def check_for_stdout_errors(self, logger):
|
||||
if logger.had_errors():
|
||||
logger.reset_error_count()
|
||||
pytest.fail("Errors/warnings were logged to stdout during testing.")
|
||||
|
||||
def setup_tests():
|
||||
test_folder = os.path.join(os.path.dirname(__file__), "parser")
|
||||
|
||||
@@ -25,6 +25,9 @@
|
||||
[ (expression_list) (tuple) (tuple_pattern) (pattern_list) ] @tuple
|
||||
{ let @tuple.node = (ast-node @tuple "Tuple") }
|
||||
|
||||
(list_pattern) @list
|
||||
{ let @list.node = (ast-node @list "List") }
|
||||
|
||||
(call) @call { let @call.node = (ast-node @call "Call") }
|
||||
|
||||
(for_statement) @for
|
||||
@@ -1059,30 +1062,38 @@
|
||||
let @genexpr.result = tuple
|
||||
}
|
||||
|
||||
; For the final `if` clause, we need to hook it up with the `yield` expression and with its associated `for` clause.
|
||||
; For the final clause, we need to hook it up with the rest of the expression.
|
||||
; If it's an `if` clause, we need to hook it up with the `yield` expression and with its associated
|
||||
; `for` clause.
|
||||
; If it's a `for` clause, we only need to create and hook it up with the `yield` expression.
|
||||
;
|
||||
; It would be tempting to use anchors here, but they just don't work. In particular, an anchor of
|
||||
; the form `. (comment)* . )` (which would be needed in order to handle the case where there are
|
||||
; comments after the last clause) cause the `tree-sitter` query engine to match _all_ clauses, not
|
||||
; just the last one.
|
||||
; Instead, we gather up all clauses in a list (these will be in the order they appear in the source
|
||||
; code), and extract the last element using a custom Rust function.
|
||||
[
|
||||
(generator_expression
|
||||
body: (_) @body
|
||||
(if_clause) @last
|
||||
.
|
||||
[(if_clause) (for_in_clause)]+ @last_candidates
|
||||
) @genexpr
|
||||
(list_comprehension
|
||||
body: (_) @body
|
||||
(if_clause) @last
|
||||
.
|
||||
[(if_clause) (for_in_clause)]+ @last_candidates
|
||||
) @genexpr
|
||||
(set_comprehension
|
||||
body: (_) @body
|
||||
(if_clause) @last
|
||||
.
|
||||
[(if_clause) (for_in_clause)]+ @last_candidates
|
||||
) @genexpr
|
||||
(dictionary_comprehension
|
||||
body: (_) @body
|
||||
(if_clause) @last
|
||||
.
|
||||
[(if_clause) (for_in_clause)]+ @last_candidates
|
||||
) @genexpr
|
||||
]
|
||||
{
|
||||
let last = (get-last-element @last_candidates)
|
||||
|
||||
let expr = (ast-node @body "Expr")
|
||||
let yield = (ast-node @body "Yield")
|
||||
|
||||
@@ -1093,50 +1104,19 @@
|
||||
|
||||
attr (yield) value = @genexpr.result
|
||||
attr (@body.node) ctx = "load"
|
||||
edge @last.first_if -> expr
|
||||
attr (@last.first_if -> expr) body = 0
|
||||
|
||||
; Hook up this `if` clause with its `for` clause
|
||||
edge @last.for -> @last.node
|
||||
attr (@last.for -> @last.node) body = 0
|
||||
}
|
||||
if (instance-of last "if_clause") {
|
||||
edge last.first_if -> expr
|
||||
attr (last.first_if -> expr) body = 0
|
||||
|
||||
; If the last clause is a `for`, we only have to create and hook up the `yield` expression.
|
||||
[
|
||||
(generator_expression
|
||||
body: (_) @body
|
||||
(for_in_clause) @last
|
||||
.
|
||||
) @genexpr
|
||||
(list_comprehension
|
||||
body: (_) @body
|
||||
(for_in_clause) @last
|
||||
.
|
||||
) @genexpr
|
||||
(set_comprehension
|
||||
body: (_) @body
|
||||
(for_in_clause) @last
|
||||
.
|
||||
) @genexpr
|
||||
(dictionary_comprehension
|
||||
body: (_) @body
|
||||
(for_in_clause) @last
|
||||
.
|
||||
) @genexpr
|
||||
]
|
||||
{
|
||||
let expr = (ast-node @body "Expr")
|
||||
let yield = (ast-node @body "Yield")
|
||||
|
||||
let @genexpr.expr = expr
|
||||
let @genexpr.yield = yield
|
||||
|
||||
attr (expr) value = yield
|
||||
|
||||
attr (yield) value = @genexpr.result
|
||||
attr (@body.node) ctx = "load"
|
||||
edge @last.node -> expr
|
||||
attr (@last.node -> expr) body = 0
|
||||
; Hook up this `if` clause with its `for` clause
|
||||
edge last.for -> last.node
|
||||
attr (last.for -> last.node) body = 0
|
||||
} else {
|
||||
; If the last clause is a `for`, we only have to create and hook up the `yield` expression.
|
||||
edge last.node -> expr
|
||||
attr (last.node -> expr) body = 0
|
||||
}
|
||||
}
|
||||
|
||||
; For whatever reason, we do not consider parentheses around the yielded expression if they are present, so
|
||||
@@ -3180,11 +3160,11 @@
|
||||
(typed_parameter
|
||||
(identifier) @name
|
||||
.
|
||||
type: (type (expression) @type)
|
||||
type: (type (_) @type)
|
||||
)
|
||||
(typed_default_parameter
|
||||
name: (_) @name
|
||||
type: (type (expression) @type)
|
||||
type: (type (_) @type)
|
||||
value: (_) @value
|
||||
)
|
||||
] @param
|
||||
@@ -3239,7 +3219,7 @@
|
||||
(list_splat_pattern vararg: (_) @name) @starred
|
||||
(typed_parameter
|
||||
(list_splat_pattern vararg: (_) @name) @starred
|
||||
type: (type (expression) @type)
|
||||
type: (type (_) @type)
|
||||
)
|
||||
]
|
||||
) @params
|
||||
@@ -3256,7 +3236,7 @@
|
||||
|
||||
; Return type
|
||||
(function_definition
|
||||
return_type: (type (expression) @type)
|
||||
return_type: (type (_) @type)
|
||||
) @funcdef
|
||||
{
|
||||
attr (@funcdef.funcexpr) returns = @type.node
|
||||
@@ -3270,7 +3250,7 @@
|
||||
(dictionary_splat_pattern kwarg: (identifier) @name)
|
||||
(typed_parameter
|
||||
(dictionary_splat_pattern kwarg: (identifier) @name)
|
||||
type: (type (expression) @type)
|
||||
type: (type (_) @type)
|
||||
)
|
||||
]
|
||||
) @params
|
||||
@@ -3447,6 +3427,9 @@
|
||||
; Left hand side of an assignment such as `foo, bar = ...`
|
||||
(pattern_list element: (_) @elt) @parent
|
||||
|
||||
; Left hand side of an assignment such as `[foo, bar] = ...`
|
||||
(list_pattern element: (_) @elt) @parent
|
||||
|
||||
; An unadorned tuple (such as in `x = y, z`)
|
||||
(expression_list element: (_) @elt) @parent
|
||||
|
||||
@@ -3483,6 +3466,7 @@
|
||||
(tuple element: (_) @elt)
|
||||
(tuple_pattern element: (_) @elt)
|
||||
(pattern_list element: (_) @elt)
|
||||
(list_pattern element: (_) @elt)
|
||||
(expression_list element: (_) @elt)
|
||||
(parenthesized_expression inner: (_) @elt)
|
||||
(set element: (_) @elt)
|
||||
|
||||
@@ -463,6 +463,22 @@ pub mod extra_functions {
|
||||
Ok(Value::Integer(left % right))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct GetLastElement;
|
||||
|
||||
impl Function for GetLastElement {
|
||||
fn call(
|
||||
&self,
|
||||
_graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let list = parameters.param()?.into_list()?;
|
||||
parameters.finish()?;
|
||||
let last = list.last().unwrap_or(&Value::Null).clone();
|
||||
Ok(last)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
@@ -562,6 +578,12 @@ fn main() -> Result<()> {
|
||||
);
|
||||
|
||||
functions.add(Identifier::from("mod"), extra_functions::Modulo);
|
||||
|
||||
functions.add(
|
||||
Identifier::from("get-last-element"),
|
||||
extra_functions::GetLastElement,
|
||||
);
|
||||
|
||||
let globals = Variables::new();
|
||||
let mut config = ExecutionConfig::new(&mut functions, &globals).lazy(false);
|
||||
let graph = file
|
||||
|
||||
@@ -309,7 +309,8 @@ module.exports = grammar({
|
||||
),
|
||||
|
||||
except_group_clause: $ => seq(
|
||||
'except*',
|
||||
'except',
|
||||
'*',
|
||||
seq(
|
||||
field('type', $.expression),
|
||||
optional(seq(
|
||||
@@ -963,7 +964,7 @@ module.exports = grammar({
|
||||
field('type', $.type)
|
||||
)),
|
||||
|
||||
type: $ => $.expression,
|
||||
type: $ => choice($.list_splat, $.expression),
|
||||
|
||||
keyword_argument: $ => seq(
|
||||
field('name', choice($.identifier, $.keyword_identifier)),
|
||||
|
||||
@@ -1169,7 +1169,11 @@
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "except*"
|
||||
"value": "except"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "*"
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
@@ -5289,8 +5293,17 @@
|
||||
}
|
||||
},
|
||||
"type": {
|
||||
"type": "SYMBOL",
|
||||
"name": "expression"
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "list_splat"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "expression"
|
||||
}
|
||||
]
|
||||
},
|
||||
"keyword_argument": {
|
||||
"type": "SEQ",
|
||||
|
||||
@@ -3347,6 +3347,10 @@
|
||||
{
|
||||
"type": "expression",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "list_splat",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -3960,10 +3964,6 @@
|
||||
"type": "except",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "except*",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "exec",
|
||||
"named": false
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user