mirror of
https://github.com/github/codeql.git
synced 2026-02-13 13:41:08 +01:00
1492 lines
53 KiB
Python
1492 lines
53 KiB
Python
from blib2to3.pgen2 import token
|
|
from ast import literal_eval
|
|
from semmle.python import ast
|
|
from blib2to3.pgen2.parse import ParseError
|
|
import sys
|
|
|
|
LOAD = ast.Load()
|
|
STORE = ast.Store()
|
|
PARAM = ast.Param()
|
|
DEL = ast.Del()
|
|
|
|
POSITIONAL = 1
|
|
KEYWORD = 2
|
|
|
|
|
|
class ParseTreeVisitor(object):
|
|
'''Standard tree-walking visitor,
|
|
using `node.name` rather than `type(node).__name__`
|
|
'''
|
|
|
|
def visit(self, node, extra_arg=None):
|
|
method = 'visit_' + node.name
|
|
if extra_arg is None:
|
|
return getattr(self, method)(node)
|
|
else:
|
|
return getattr(self, method)(node, extra_arg)
|
|
|
|
class Convertor(ParseTreeVisitor):
|
|
''' Walk the conrete parse tree, returning an AST.
|
|
The CPT is specified by blib2to3/Grammar.txt.
|
|
The AST specified by semmle/python/master.py.
|
|
Each `visit_X` method takes a `X` node in the CFG and
|
|
produces some part of the AST, usually a single node.
|
|
'''
|
|
|
|
def __init__(self, logger):
|
|
self.logger = logger
|
|
# To handle f-strings nested inside other f-strings, we must keep track of the stack of
|
|
# surrounding prefixes while walking the tree. This is necessary because inside an f-string
|
|
# like `f"hello{f'to{you}dear'}world"`, the string part containing "world" has (in terms of
|
|
# the concrete parse tree) a prefix of `}`, which doesn't tell us how to interpret it (in
|
|
# particular, we can't tell if it's a raw string or not). So instead we look at the top of
|
|
# the prefix stack to figure out what the "current prefix" is. The nested f-string in the
|
|
# example above demonstrates why we must do this as a stack -- we must restore the outer
|
|
# `f"` prefix when we're done with the inner `f'`-prefix string.
|
|
#
|
|
# The stack manipulation itself takes place in the `visit_FSTRING_START` and
|
|
# `visit_FSTRING_END` methods. The text wrangling takes place in the `parse_string` helper
|
|
# function.
|
|
|
|
self.outer_prefix_stack = []
|
|
|
|
|
|
def visit_file_input(self, node):
|
|
body = []
|
|
for s in [self.visit(s) for s in node.children if s.name not in ("ENDMARKER", "NEWLINE")]:
|
|
if isinstance(s, list):
|
|
body.extend(s)
|
|
else:
|
|
body.append(s)
|
|
result = ast.Module(body)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_import_from(self, node):
|
|
level = 0
|
|
index = 1
|
|
module_start = node.children[index].start
|
|
while is_token(node.children[index], "."):
|
|
level += 1
|
|
index += 1
|
|
if is_token(node.children[index], "import"):
|
|
module_end = node.children[index-1].end
|
|
index += 1
|
|
module_name = None
|
|
else:
|
|
module_end = node.children[index].end
|
|
module_name = self.visit(node.children[index])
|
|
index += 2
|
|
if is_token(node.children[index], "*"):
|
|
module = ast.ImportExpr(level, module_name, False)
|
|
set_location(module, module_start, module_end)
|
|
result = ast.ImportFrom(module)
|
|
set_location(result, node)
|
|
return result
|
|
if is_token(node.children[index], "("):
|
|
import_as_names = node.children[index+1]
|
|
else:
|
|
import_as_names = node.children[index]
|
|
aliases = []
|
|
for import_as_name in import_as_names.children[::2]:
|
|
module = ast.ImportExpr(level, module_name, False)
|
|
set_location(module, module_start, module_end)
|
|
aliases.append(self._import_as_name(import_as_name, module))
|
|
result = ast.Import(aliases)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
#Helper for visit_import_from
|
|
def _import_as_name(self, node, module):
|
|
name = node.children[0].value
|
|
if len(node.children) == 3:
|
|
asname = node.children[2]
|
|
else:
|
|
asname = node.children[0]
|
|
expr = ast.ImportMember(module, name)
|
|
set_location(expr, node)
|
|
rhs = make_name(asname.value, STORE, asname.start, asname.end)
|
|
result = ast.alias(expr, rhs)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_small_stmt(self, node):
|
|
return self.visit(node.children[0])
|
|
|
|
def visit_simple_stmt(self, node):
|
|
return [self.visit(s) for s in node.children if s.name not in ("SEMI", "NEWLINE")]
|
|
|
|
def visit_stmt(self, node):
|
|
return self.visit(node.children[0])
|
|
|
|
def visit_compound_stmt(self, node):
|
|
return self.visit(node.children[0])
|
|
|
|
def visit_pass_stmt(self, node):
|
|
p = ast.Pass()
|
|
set_location(p, node)
|
|
return p
|
|
|
|
def visit_classdef(self, node):
|
|
if len(node.children) == 4:
|
|
cls, name, colon, suite = node.children
|
|
args, keywords = [], []
|
|
elif len(node.children) == 7:
|
|
cls, name, _, args, _, colon, suite = node.children
|
|
args, keywords = self.visit(args)
|
|
else:
|
|
assert len(node.children) == 6
|
|
cls, name, _, _, colon, suite = node.children
|
|
args, keywords = [], []
|
|
start = cls.start
|
|
end = colon.end
|
|
suite = self.visit(suite)
|
|
inner = ast.Class(name.value, suite)
|
|
set_location(inner, start, end)
|
|
cls_expr = ast.ClassExpr(name.value, [], args, keywords, inner)
|
|
set_location(cls_expr, start, end)
|
|
name_expr = make_name(name.value, STORE, name.start, name.end)
|
|
result = ast.Assign(cls_expr, [name_expr])
|
|
set_location(result, start, end)
|
|
return result
|
|
|
|
def visit_arglist(self, node):
|
|
all_args = self._visit_list(node.children[::2])
|
|
args = [ arg for kind, arg in all_args if kind is POSITIONAL ]
|
|
keywords = [ arg for kind, arg in all_args if kind is KEYWORD ]
|
|
return args, keywords
|
|
|
|
def visit_argument(self, node):
|
|
child = node.children[0]
|
|
if is_token(child, "*"):
|
|
kind, arg = POSITIONAL, ast.Starred(self.visit(node.children[1], LOAD), LOAD)
|
|
elif is_token(child, "**"):
|
|
kind, arg = KEYWORD, ast.DictUnpacking(self.visit(node.children[1], LOAD))
|
|
elif len(node.children) == 3 and is_token(node.children[1], "="):
|
|
try:
|
|
name = get_node_value(child)
|
|
except Exception:
|
|
#Not a legal name
|
|
name = None
|
|
self.logger.warning("Illegal name for keyword on line %s", child.start[0])
|
|
kind, arg = KEYWORD, ast.keyword(name, self.visit(node.children[2], LOAD))
|
|
else:
|
|
arg = self.visit(child, LOAD)
|
|
if len(node.children) == 1:
|
|
return POSITIONAL, arg
|
|
elif len(node.children) == 3 and is_token(node.children[1], ":="):
|
|
return POSITIONAL, self.visit_namedexpr_test(node, LOAD)
|
|
generators = self.visit(node.children[1])
|
|
kind, arg = POSITIONAL, ast.GeneratorExp(arg, generators)
|
|
set_location(arg, node)
|
|
rewrite_comp(arg)
|
|
set_location(arg, node)
|
|
return kind, arg
|
|
|
|
def visit_namedexpr_test(self, node, ctx):
|
|
if len(node.children) == 1:
|
|
return self.visit(node.children[0], ctx)
|
|
target = self.visit(node.children[0], STORE)
|
|
value = self.visit(node.children[-1], LOAD)
|
|
result = ast.AssignExpr(value, target)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_test(self, node, ctx):
|
|
if len(node.children) == 1:
|
|
return self.visit(node.children[0], ctx)
|
|
else:
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
body = self.visit(node.children[0], ctx)
|
|
test = self.visit(node.children[2], ctx)
|
|
orelse = self.visit(node.children[4], ctx)
|
|
ifexp = ast.IfExp(test, body, orelse)
|
|
set_location(ifexp, node)
|
|
return ifexp
|
|
|
|
def visit_or_test(self, node, ctx):
|
|
return self._boolop(node, ast.Or, ctx)
|
|
|
|
def visit_and_test(self, node, ctx):
|
|
return self._boolop(node, ast.And, ctx)
|
|
|
|
def visit_not_test(self, node, ctx):
|
|
if len(node.children) == 1:
|
|
return self.visit(node.children[0], ctx)
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
result = ast.UnaryOp(
|
|
ast.Not(),
|
|
self.visit(node.children[1], ctx)
|
|
)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
# Helper for `or` and `and`.
|
|
def _boolop(self, node, opcls, ctx):
|
|
if len(node.children) == 1:
|
|
return self.visit(node.children[0], ctx)
|
|
values = [ self.visit(s, ctx) for s in node.children[::2] ]
|
|
result = ast.BoolOp(opcls(), values)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
# Helper for various binary expression visitors.
|
|
def _binary(self, node, opfact, ctx):
|
|
if len(node.children) == 1:
|
|
return self.visit(node.children[0], ctx)
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
children = iter(node.children)
|
|
result = self.visit(next(children), LOAD)
|
|
for op in children:
|
|
item = next(children)
|
|
rhs = self.visit(item, LOAD)
|
|
result = ast.BinOp(result, opfact(op), rhs)
|
|
set_location(result, node.start, item.end)
|
|
return result
|
|
|
|
def visit_suite(self, node):
|
|
if len(node.children) == 1:
|
|
return self.visit(node.children[0])
|
|
result = []
|
|
for s in [self.visit(s) for s in node.children[2:-1]]:
|
|
if isinstance(s, list):
|
|
result.extend(s)
|
|
else:
|
|
result.append(s)
|
|
return result
|
|
|
|
def visit_expr_stmt(self, node):
|
|
if len(node.children) == 1:
|
|
result = ast.Expr(self.visit(node.children[0], LOAD))
|
|
set_location(result, node)
|
|
return result
|
|
if len(node.children) > 1 and is_token(node.children[1], "="):
|
|
return self._assign(node)
|
|
if len(node.children) == 2:
|
|
# Annotated assignment
|
|
target = self.visit(node.children[0], STORE)
|
|
ann = node.children[1]
|
|
type_anno = self.visit(ann.children[1], LOAD)
|
|
if len(ann.children) > 2:
|
|
value = self.visit(ann.children[3], LOAD)
|
|
else:
|
|
value = None
|
|
result = ast.AnnAssign(value, type_anno, target)
|
|
else:
|
|
#Augmented assignment
|
|
lhs = self.visit(node.children[0], LOAD)
|
|
op = self.visit(node.children[1])
|
|
rhs = self.visit(node.children[2], LOAD)
|
|
expr = ast.BinOp(lhs, op, rhs)
|
|
set_location(expr, node)
|
|
result = ast.AugAssign(expr)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_augassign(self, node):
|
|
return AUG_ASSIGN_OPS[node.children[0].value]()
|
|
|
|
#Helper for visit_expr_stmt (for assignment)
|
|
def _assign(self, node):
|
|
targets = [ self.visit(t, STORE) for t in node.children[:-1:2]]
|
|
result = ast.Assign(self.visit(node.children[-1], LOAD), targets)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_testlist(self, node, ctx):
|
|
if len(node.children) == 1:
|
|
return self.visit(node.children[0], ctx)
|
|
elts = self._visit_list(node.children[::2], ctx)
|
|
result = ast.Tuple(elts, ctx)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
visit_testlist_star_expr = visit_testlist
|
|
|
|
def visit_comparison(self, node, ctx):
|
|
if len(node.children) == 1:
|
|
return self.visit(node.children[0], ctx)
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
left = self.visit(node.children[0], ctx)
|
|
ops = [ self.visit(op) for op in node.children[1::2]]
|
|
comps = [ self.visit(op, ctx) for op in node.children[2::2]]
|
|
result = ast.Compare(left, ops, comps)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_comp_op(self, node):
|
|
if len(node.children) == 1:
|
|
return COMP_OP_CLASSES[node.children[0].value]()
|
|
else:
|
|
assert len(node.children) == 2
|
|
return ast.IsNot() if node.children[0].value == "is" else ast.NotIn()
|
|
|
|
def visit_expr(self, node, ctx):
|
|
return self._binary(node, lambda _: ast.BitOr(), ctx)
|
|
|
|
def visit_xor_expr(self, node, ctx):
|
|
return self._binary(node, lambda _: ast.BitXor(), ctx)
|
|
|
|
def visit_and_expr(self, node, ctx):
|
|
return self._binary(node, lambda _: ast.BitAnd(), ctx)
|
|
|
|
def visit_shift_expr(self, node, ctx):
|
|
return self._binary(
|
|
node,
|
|
lambda op: ast.LShift() if op.value == "<<" else ast.RShift(),
|
|
ctx
|
|
)
|
|
|
|
def visit_arith_expr(self, node, ctx):
|
|
return self._binary(
|
|
node,
|
|
lambda op: ast.Add() if op.value == "+" else ast.Sub(),
|
|
ctx
|
|
)
|
|
|
|
def visit_term(self, node, ctx):
|
|
return self._binary(
|
|
node,
|
|
lambda op: TERM_OP_CLASSES[op.value](),
|
|
ctx
|
|
)
|
|
|
|
def visit_factor(self, node, ctx):
|
|
if len(node.children) == 1:
|
|
return self.visit(node.children[0], ctx)
|
|
result = ast.UnaryOp(
|
|
FACTOR_OP_CLASSES[node.children[0].value](),
|
|
self.visit(node.children[1], ctx)
|
|
)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_power(self, node, ctx):
|
|
'''This part of the Grammar is formulated in a slightly
|
|
awkward way, so we need to recursively handle the `await`
|
|
prefix, then the `** factor` suffix, then the atom and trailers.
|
|
'''
|
|
|
|
# Because `await` was a valid identifier in earlier versions of Python,
|
|
# we cannot assume it indicates an `await` expression. We therefore
|
|
# have to look at what follows in order to make a decision. The
|
|
# relevant part of the grammar is
|
|
#
|
|
# power: ['await'] atom trailer* ['**' factor]
|
|
#
|
|
# The case we wish to identify is when 'await' appears, but as an
|
|
# `atom`, and not an `await` token.
|
|
#
|
|
# Because `atom` nodes may no longer be present (see
|
|
# `SKIP_IF_SINGLE_CHILD_NAMES` in `__init__.py`) we instead look at the
|
|
# node following the (potentially) skipped `atom`. In particular, if
|
|
# the following node is a `trailer` or "**" token, we know that the
|
|
# given node cannot be an `await` token, and must be an `atom` instead.
|
|
try:
|
|
next_node = node.children[1]
|
|
next_is_atom = next_node.name != "trailer" and not is_token(next_node, "**")
|
|
except (IndexError, AttributeError):
|
|
# IndexError if `node` has at most one child.
|
|
# AttributeError if `next_node` is a `Leaf` instead of a `Node`.
|
|
next_is_atom = False
|
|
if is_token(node.children[0], "await") and next_is_atom:
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
pow = self._power(node.children[1:], ctx)
|
|
result = ast.Await(pow)
|
|
set_location(result, node)
|
|
return result
|
|
else:
|
|
return self._power(node.children, ctx)
|
|
|
|
#Helper for visit_power
|
|
def _power(self, children, ctx):
|
|
start = children[0].start
|
|
if len(children) > 1 and is_token(children[-2], "**"):
|
|
if ctx is not LOAD:
|
|
context_error(children[0])
|
|
trailers = children[1:-2]
|
|
pow_expr = self.visit(children[-1], ctx)
|
|
else:
|
|
trailers = children[1:]
|
|
pow_expr = None
|
|
if trailers:
|
|
expr = self.visit(children[0], LOAD)
|
|
for trailer in trailers[:-1]:
|
|
expr = self._apply_trailer(expr, trailer, start, LOAD)
|
|
expr = self._apply_trailer(expr, trailers[-1], start, ctx)
|
|
else:
|
|
expr = self.visit(children[0], ctx)
|
|
if pow_expr:
|
|
expr = ast.BinOp(expr, ast.Pow(), pow_expr)
|
|
set_location(expr, children[0].start, children[-1].end)
|
|
return expr
|
|
|
|
#Helper for _power
|
|
def _atom(self, children, ctx):
|
|
start = children[0].start
|
|
if len(children) == 1:
|
|
return self.visit(children[0], ctx)
|
|
atom = self.visit(children[0], LOAD)
|
|
for trailer in children[1:-1]:
|
|
atom = self._apply_trailer(atom, trailer, start, LOAD)
|
|
atom = self._apply_trailer(atom, children[-1], start, ctx)
|
|
return atom
|
|
|
|
#Helper for _atom
|
|
def _apply_trailer(self, atom, trailer, start, ctx):
|
|
children = trailer.children
|
|
left = children[0]
|
|
if is_token(left, "("):
|
|
if is_token(children[1], ")"):
|
|
args, keywords = [], []
|
|
end = children[1].end
|
|
else:
|
|
args, keywords = self.visit(children[1])
|
|
end = children[2].end
|
|
result = ast.Call(atom, args, keywords)
|
|
elif is_token(left, "["):
|
|
result = ast.Subscript(atom, self.visit(children[1], LOAD), ctx)
|
|
end = children[2].end
|
|
else:
|
|
assert is_token(left, ".")
|
|
result = ast.Attribute(atom, children[1].value, ctx)
|
|
end = children[1].end
|
|
set_location(result, start, end)
|
|
return result
|
|
|
|
def visit_atom(self, node, ctx):
|
|
left = node.children[0]
|
|
if left.value in "[({":
|
|
n = node.children[1]
|
|
if hasattr(n, "value") and n.value in "])}":
|
|
if n.value == ")":
|
|
result = ast.Tuple([], ctx)
|
|
elif n.value == "]":
|
|
result = ast.List([], ctx)
|
|
else:
|
|
result = ast.Dict([])
|
|
set_location(result, node)
|
|
return result
|
|
else:
|
|
result = self.visit(node.children[1], ctx)
|
|
if left.value == "(":
|
|
result.parenthesised = True
|
|
else:
|
|
#Meaningful bracketing
|
|
set_location(result, node)
|
|
if isinstance(result, (ast.GeneratorExp, ast.ListComp, ast.SetComp, ast.DictComp)):
|
|
rewrite_comp(result)
|
|
return result
|
|
if left.type == token.NAME:
|
|
return make_name(left.value, ctx, left.start, left.end)
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
if left.type == token.NUMBER:
|
|
val = get_numeric_value(left)
|
|
result = ast.Num(val, left.value)
|
|
set_location(result, left)
|
|
return result
|
|
if left.value == ".":
|
|
assert len(node.children) == 3 and node.children[2].value == "."
|
|
result = ast.Ellipsis()
|
|
set_location(result, node)
|
|
return result
|
|
assert left.type == token.BACKQUOTE
|
|
result = ast.Repr(self.visit(node.children[1], LOAD))
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_STRING(self, node, ctx):
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
outer_prefix = self.outer_prefix_stack[-1] if self.outer_prefix_stack else None
|
|
prefix, s = parse_string(node.value, self.logger, outer_prefix)
|
|
text = get_text(node.value, outer_prefix)
|
|
result = ast.StringPart(prefix, text, s)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_NUMBER(self, node, ctx):
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
val = get_numeric_value(node)
|
|
result = ast.Num(val, node.value)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_funcdef(self, node, is_async=False):
|
|
# funcdef: 'def' NAME parameters ['->' test] ':' suite
|
|
name = node.children[1].value
|
|
if node.children[3].value == "->":
|
|
return_type = self.visit(node.children[4], LOAD)
|
|
end = node.children[5].end
|
|
body = self.visit(node.children[6])
|
|
else:
|
|
return_type = None
|
|
end = node.children[3].end
|
|
body = self.visit(node.children[4])
|
|
start = node.children[0].start
|
|
params = node.children[2]
|
|
if len(params.children) == 2:
|
|
args, vararg, kwonlyargs, kwarg = [], None, [], None
|
|
else:
|
|
args, vararg, kwonlyargs, kwarg = self._get_parameters(params.children[1])
|
|
func = ast.Function(name, [], args, vararg, kwonlyargs, kwarg, body, is_async)
|
|
set_location(func, start, end)
|
|
if len(params.children) == 2:
|
|
args = ast.arguments([], [], [], None, None, [])
|
|
else:
|
|
args = self._get_defaults_and_annotations(params.children[1])
|
|
funcexpr = ast.FunctionExpr(name, args, return_type, func)
|
|
set_location(funcexpr, start, end)
|
|
name_expr = make_name(name, STORE, node.children[1].start, node.children[1].end)
|
|
result = ast.Assign(funcexpr, [name_expr])
|
|
set_location(result, start, end)
|
|
return result
|
|
|
|
#Helper for visit_funcdef and visit_lambdef
|
|
def _get_parameters(self, node):
|
|
'''Returns the quadruple: args, vararg, kwonlyargs, kwarg
|
|
'''
|
|
args = []
|
|
vararg = None
|
|
kwonlyargs = []
|
|
kwarg = None
|
|
children = iter(node.children)
|
|
arg = None
|
|
for child in children:
|
|
if is_token(child, "*"):
|
|
try:
|
|
child = next(children)
|
|
except StopIteration:
|
|
pass
|
|
else:
|
|
if not is_token(child, ","):
|
|
vararg = self.visit(child, PARAM)
|
|
break
|
|
if is_token(child, ","):
|
|
pass
|
|
elif is_token(child, "/"):
|
|
pass
|
|
elif is_token(child, "="):
|
|
next(children)
|
|
elif is_token(child, "**"):
|
|
child = next(children)
|
|
kwarg = self.visit(child, PARAM)
|
|
else:
|
|
arg = self.visit(child, PARAM)
|
|
args.append(arg)
|
|
#kwonly args
|
|
for child in children:
|
|
if is_token(child, ","):
|
|
pass
|
|
elif is_token(child, "="):
|
|
next(children)
|
|
elif is_token(child, "**"):
|
|
child = next(children)
|
|
kwarg = self.visit(child, PARAM)
|
|
else:
|
|
arg = self.visit(child, PARAM)
|
|
kwonlyargs.append(arg)
|
|
return args, vararg, kwonlyargs, kwarg
|
|
|
|
#Helper for visit_funcdef and visit_lambdef
|
|
def _get_defaults_and_annotations(self, node):
|
|
defaults = []
|
|
kw_defaults = []
|
|
annotations = []
|
|
varargannotation = None
|
|
kwargannotation = None
|
|
kw_annotations = []
|
|
children = iter(node.children)
|
|
# Because we want the i'th element of `kw_defaults` to be the default value for
|
|
# the i'th keyword-only argument, when encountering the combined token for the
|
|
# argument name and optional annotation, we add a `None` to `kw_defaults` assuming
|
|
# that there is no default value. If there turns out to be a default value, we
|
|
# remove the `None` and add the real default value. Like-wise for `defaults`.
|
|
|
|
# positional-only args and "normal" args
|
|
for child in children:
|
|
if is_token(child, "*"):
|
|
try:
|
|
child = next(children)
|
|
except StopIteration:
|
|
pass
|
|
else:
|
|
if not is_token(child, ","):
|
|
varargannotation = self.visit(child, LOAD)
|
|
break
|
|
if is_token(child, ","):
|
|
pass
|
|
elif is_token(child, "/"):
|
|
pass
|
|
elif is_token(child, "="):
|
|
child = next(children)
|
|
defaults.pop()
|
|
defaults.append(self.visit(child, LOAD))
|
|
elif is_token(child, "**"):
|
|
child = next(children)
|
|
kwargannotation = self.visit(child, LOAD)
|
|
arg = None
|
|
else:
|
|
# Preemptively assume there is no default argument (indicated by None)
|
|
defaults.append(None)
|
|
annotations.append(self.visit(child, LOAD))
|
|
|
|
#kwonly args
|
|
for child in children:
|
|
if is_token(child, ","):
|
|
pass
|
|
elif is_token(child, "="):
|
|
child = next(children)
|
|
kw_defaults.pop()
|
|
kw_defaults.append(self.visit(child, LOAD))
|
|
elif is_token(child, "**"):
|
|
child = next(children)
|
|
kwargannotation = self.visit(child, LOAD)
|
|
else:
|
|
# Preemptively assume there is no default argument (indicated by None)
|
|
kw_defaults.append(None)
|
|
kw_annotations.append(self.visit(child, LOAD))
|
|
result = ast.arguments(defaults, kw_defaults, annotations, varargannotation, kwargannotation, kw_annotations)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_tfpdef(self, node, ctx):
|
|
# TO DO Support tuple parameters
|
|
# No one uses them any more, so this isn't super important.
|
|
child = node.children[0]
|
|
if is_token(child, "("):
|
|
return None
|
|
return self.visit(child, ctx)
|
|
|
|
def visit_tname(self, node, ctx):
|
|
if ctx is PARAM:
|
|
child = node.children[0]
|
|
return make_name(child.value, ctx, child.start, child.end)
|
|
elif len(node.children) > 1:
|
|
return self.visit(node.children[2], ctx)
|
|
else:
|
|
return None
|
|
|
|
def visit_decorated(self, node):
|
|
asgn = self.visit(node.children[1])
|
|
value = asgn.value
|
|
for deco in reversed(node.children[0].children):
|
|
defn = value
|
|
decorator = self.visit(deco)
|
|
value = ast.Call(decorator, [defn], [])
|
|
copy_location(decorator, value)
|
|
asgn.value = value
|
|
return asgn
|
|
|
|
def visit_decorators(self, node):
|
|
return self._visit_list(node.children)
|
|
|
|
def visit_decorator(self, node):
|
|
namedexpr_test = node.children[1]
|
|
result = self.visit_namedexpr_test(namedexpr_test, LOAD)
|
|
set_location(result, namedexpr_test)
|
|
return result
|
|
|
|
def _visit_list(self, items, ctx=None):
|
|
if ctx is None:
|
|
return [ self.visit(i) for i in items ]
|
|
else:
|
|
return [ self.visit(i, ctx) for i in items ]
|
|
|
|
def visit_dotted_name(self, node):
|
|
return ".".join(name.value for name in node.children[::2])
|
|
|
|
def visit_NAME(self, name, ctx):
|
|
return make_name(name.value, ctx, name.start, name.end)
|
|
|
|
def visit_listmaker(self, node, ctx):
|
|
if len(node.children) == 1 or is_token(node.children[1], ","):
|
|
items = [self.visit(c, ctx) for c in node.children[::2]]
|
|
result = ast.List(items, ctx)
|
|
else:
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
elt = self.visit(node.children[0], ctx)
|
|
generators = self.visit(node.children[1])
|
|
result = ast.ListComp(elt, generators)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_testlist_gexp(self, node, ctx):
|
|
if len(node.children) == 1:
|
|
return self.visit(node.children[0], ctx)
|
|
if is_token(node.children[1], ","):
|
|
items = [self.visit(c, ctx) for c in node.children[::2]]
|
|
result = ast.Tuple(items, ctx)
|
|
else:
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
elt = self.visit(node.children[0], ctx)
|
|
generators = self.visit(node.children[1])
|
|
result = ast.GeneratorExp(elt, generators)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_comp_for(self, node):
|
|
is_async = is_token(node.children[0], "async")
|
|
target = self.visit(node.children[1+is_async], STORE)
|
|
iter = self.visit(node.children[3+is_async], LOAD)
|
|
if len(node.children) == 5+is_async:
|
|
ifs = []
|
|
end = iter._end
|
|
comp_iter = self.visit(node.children[4+is_async])
|
|
while comp_iter and not isinstance(comp_iter[0], ast.comprehension):
|
|
ifs.append(comp_iter[0])
|
|
end = comp_iter[0]._end
|
|
comp_iter = comp_iter[1:]
|
|
comp = ast.comprehension(target, iter, ifs)
|
|
comp.is_async = is_async
|
|
set_location(comp, node.children[0].start, end)
|
|
return [comp] + comp_iter
|
|
else:
|
|
comp = ast.comprehension(target, iter, [])
|
|
comp.is_async = is_async
|
|
set_location(comp, node)
|
|
return [comp]
|
|
|
|
visit_old_comp_for = visit_comp_for
|
|
|
|
def visit_comp_iter(self, node):
|
|
return self.visit(node.children[0])
|
|
|
|
def visit_comp_if(self, node):
|
|
cond = self.visit(node.children[1], LOAD)
|
|
if len(node.children) == 3:
|
|
comp_list = self.visit(node.children[2])
|
|
return [cond] + comp_list
|
|
else:
|
|
return [cond]
|
|
|
|
visit_old_comp_if = visit_comp_if
|
|
|
|
visit_old_comp_iter = visit_comp_iter
|
|
|
|
def visit_exprlist(self, node, ctx):
|
|
#Despite the name this returns a single expression
|
|
if len(node.children) == 1:
|
|
return self.visit(node.children[0], ctx)
|
|
else:
|
|
elts = self._visit_list(node.children[::2], ctx)
|
|
result = ast.Tuple(elts, ctx)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
visit_testlist_safe = visit_exprlist
|
|
|
|
def visit_old_test(self, node, ctx):
|
|
return self.visit(node.children[0], ctx)
|
|
|
|
def visit_if_stmt(self, node):
|
|
endindex = len(node.children)
|
|
if is_token(node.children[-3], "else"):
|
|
orelse = self.visit(node.children[-1])
|
|
endindex -= 3
|
|
else:
|
|
orelse = None
|
|
while endindex:
|
|
test = self.visit(node.children[endindex-3], LOAD)
|
|
body = self.visit(node.children[endindex-1])
|
|
result = ast.If(test, body, orelse)
|
|
start = node.children[endindex-4].start
|
|
end = node.children[endindex-2].end
|
|
set_location(result, start, end)
|
|
orelse = [result]
|
|
endindex -= 4
|
|
return result
|
|
|
|
def visit_import_stmt(self, node):
|
|
return self.visit(node.children[0])
|
|
|
|
def visit_import_name(self, node):
|
|
aliases = self.visit(node.children[1])
|
|
result = ast.Import(aliases)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_dotted_as_names(self, node):
|
|
return self._visit_list(node.children[::2])
|
|
|
|
def visit_dotted_as_name(self, node):
|
|
child0 = node.children[0]
|
|
dotted_name = self.visit(child0)
|
|
if len(node.children) == 3:
|
|
value = ast.ImportExpr(0, dotted_name, False)
|
|
child2 = node.children[2]
|
|
asname = make_name(child2.value, STORE, child2.start, child2.end)
|
|
else:
|
|
value = ast.ImportExpr(0, dotted_name, True)
|
|
topname = dotted_name.split(".")[0]
|
|
asname = make_name(topname, STORE, child0.start, child0.end)
|
|
set_location(value, child0)
|
|
result = ast.alias(value, asname)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_dictsetmaker(self, node, ctx):
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
if is_token(node.children[0], "**") or len(node.children) > 1 and is_token(node.children[1], ":"):
|
|
return self._dictmaker(node)
|
|
else:
|
|
return self._setmaker(node)
|
|
|
|
#Helper for visit_dictsetmaker (for dictionaries)
|
|
def _dictmaker(self, node):
|
|
if len(node.children) == 4 and is_token(node.children[1], ":") and not is_token(node.children[3], ","):
|
|
#Comprehension form
|
|
key = self.visit(node.children[0], LOAD)
|
|
value = self.visit(node.children[2], LOAD)
|
|
generators = self.visit(node.children[3])
|
|
result = ast.DictComp(key, value, generators)
|
|
set_location(result, node)
|
|
return result
|
|
index = 0
|
|
items = []
|
|
while len(node.children) > index:
|
|
if is_token(node.children[index], "**"):
|
|
d = self.visit(node.children[index+1], LOAD)
|
|
item = ast.DictUnpacking(d)
|
|
set_location(item, node.children[index].start, node.children[index+1].end)
|
|
index += 3
|
|
else:
|
|
key = self.visit(node.children[index], LOAD)
|
|
value = self.visit(node.children[index+2], LOAD)
|
|
item = ast.KeyValuePair(key, value)
|
|
set_location(item, node.children[index].start, node.children[index+2].end)
|
|
index += 4
|
|
items.append(item)
|
|
result = ast.Dict(items)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
#Helper for visit_dictsetmaker (for sets)
|
|
def _setmaker(self, node):
|
|
if len(node.children) == 2 and not is_token(node.children[1], ","):
|
|
#Comprehension form
|
|
elt = self.visit(node.children[0], LOAD)
|
|
generators = self.visit(node.children[1])
|
|
result = ast.SetComp(elt, generators)
|
|
set_location(result, node)
|
|
return result
|
|
items = self._visit_list(node.children[::2], LOAD)
|
|
result = ast.Set(items)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_while_stmt(self, node):
|
|
test = self.visit(node.children[1], LOAD)
|
|
body = self.visit(node.children[3])
|
|
if len(node.children) == 7:
|
|
orelse = self.visit(node.children[6])
|
|
else:
|
|
orelse = None
|
|
result = ast.While(test, body, orelse)
|
|
set_location(result, node.children[0].start, node.children[2].end)
|
|
return result
|
|
|
|
def visit_flow_stmt(self, node):
|
|
return self.visit(node.children[0])
|
|
|
|
def visit_break_stmt(self, node):
|
|
result = ast.Break()
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_continue_stmt(self, node):
|
|
result = ast.Continue()
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_return_stmt(self, node):
|
|
if len(node.children) == 2:
|
|
result = ast.Return(self.visit(node.children[1], LOAD))
|
|
else:
|
|
result = ast.Return(None)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_raise_stmt(self, node):
|
|
result = ast.Raise()
|
|
set_location(result, node)
|
|
if len(node.children) == 1:
|
|
return result
|
|
result.exc = self.visit(node.children[1], LOAD)
|
|
if len(node.children) > 3:
|
|
if is_token(node.children[2], "from"):
|
|
result.cause = self.visit(node.children[3], LOAD)
|
|
else:
|
|
result.type = result.exc
|
|
del result.exc
|
|
result.inst = self.visit(node.children[3], LOAD)
|
|
if len(node.children) == 6:
|
|
result.tback = self.visit(node.children[5], LOAD)
|
|
return result
|
|
|
|
def visit_yield_stmt(self, node):
|
|
result = ast.Expr(self.visit(node.children[0], LOAD))
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_yield_expr(self, node, ctx):
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
if len(node.children) == 1:
|
|
result = ast.Yield(None)
|
|
else:
|
|
if is_token(node.children[1].children[0], "from"):
|
|
result = ast.YieldFrom(self.visit(node.children[1].children[1], LOAD))
|
|
else:
|
|
result = ast.Yield(self.visit(node.children[1].children[0], LOAD))
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_try_stmt(self, node):
|
|
body = self.visit(node.children[2])
|
|
index = 3
|
|
handlers = []
|
|
while len(node.children) > index and not hasattr(node.children[index], "value"):
|
|
#Except block.
|
|
type, name = self.visit(node.children[index])
|
|
handler_body = self.visit(node.children[index+2])
|
|
handler = ast.ExceptStmt(type, name, handler_body)
|
|
set_location(handler, node.children[index].start , node.children[index+1].end)
|
|
handlers.append(handler)
|
|
index += 3
|
|
if len(node.children) > index and is_token(node.children[index], "else"):
|
|
orelse = self.visit(node.children[index+2])
|
|
else:
|
|
orelse = []
|
|
if is_token(node.children[-3], "finally"):
|
|
finalbody = self.visit(node.children[-1])
|
|
else:
|
|
finalbody = []
|
|
result = ast.Try(body, orelse, handlers, finalbody)
|
|
set_location(result, node.start, node.children[1].end)
|
|
return result
|
|
|
|
def visit_except_clause(self, node):
|
|
type, name = None, None
|
|
if len(node.children) > 1:
|
|
type = self.visit(node.children[1], LOAD)
|
|
if len(node.children) > 3:
|
|
name = self.visit(node.children[3], STORE)
|
|
return type, name
|
|
|
|
def visit_del_stmt(self, node):
|
|
if len(node.children) > 1:
|
|
result = ast.Delete(self._visit_list(node.children[1].children[::2], DEL))
|
|
else:
|
|
result = ast.Delete([])
|
|
set_location(result, node)
|
|
return result
|
|
|
|
visit_subscriptlist = visit_testlist
|
|
visit_testlist1 = visit_testlist
|
|
|
|
def visit_subscript(self, node, ctx):
|
|
if len(node.children) == 1 and not is_token(node.children[0], ":"):
|
|
return self.visit(node.children[0], ctx)
|
|
values = [None, None, None]
|
|
index = 0
|
|
for child in node.children:
|
|
if is_token(child, ":"):
|
|
index += 1
|
|
else:
|
|
values[index] = self.visit(child, LOAD)
|
|
result = ast.Slice(*values)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_sliceop(self, node, ctx):
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
if len(node.children) == 2:
|
|
return self.visit(node.children[1], LOAD)
|
|
else:
|
|
return None
|
|
|
|
def visit_assert_stmt(self, node):
|
|
test = self.visit(node.children[1], LOAD)
|
|
if len(node.children) > 2:
|
|
msg = self.visit(node.children[3], LOAD)
|
|
else:
|
|
msg = None
|
|
result = ast.Assert(test, msg)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_for_stmt(self, node, is_async=False):
|
|
target = self.visit(node.children[1], STORE)
|
|
iter = self.visit(node.children[3], LOAD)
|
|
body = self.visit(node.children[5])
|
|
if len(node.children) == 9:
|
|
orelse = self.visit(node.children[8])
|
|
else:
|
|
orelse = None
|
|
result = ast.For(target, iter, body, orelse)
|
|
result.is_async = is_async
|
|
set_location(result, node.children[0].start, node.children[4].end)
|
|
return result
|
|
|
|
def visit_global_stmt(self, node):
|
|
cls = ast.Global if node.children[0].value == "global" else ast.Nonlocal
|
|
names = [child.value for child in node.children[1::2]]
|
|
result = cls(names)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_lambdef(self, node, ctx):
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
test = self.visit(node.children[-1], LOAD)
|
|
stmt = ast.Return(test)
|
|
set_location(stmt, node.children[-1])
|
|
if is_token(node.children[1], ":"):
|
|
args, vararg, kwonlyargs, kwarg = [], None, [], None
|
|
else:
|
|
args, vararg, kwonlyargs, kwarg = self._get_parameters(node.children[1])
|
|
func = ast.Function("lambda", [], args, vararg, kwonlyargs, kwarg, [stmt], False)
|
|
set_location(func, node)
|
|
if is_token(node.children[1], ":"):
|
|
args = ast.arguments([], [], [], None, None, [])
|
|
else:
|
|
args = self._get_defaults_and_annotations(node.children[1])
|
|
result = ast.Lambda(args, func)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
visit_old_lambdef = visit_lambdef
|
|
|
|
visit_vfpdef = visit_tfpdef
|
|
|
|
def visit_vname(self, node, ctx):
|
|
if ctx is PARAM:
|
|
child = node.children[0]
|
|
return make_name(child.value, ctx, child.start, child.end)
|
|
else:
|
|
return None
|
|
|
|
def visit_star_expr(self, node, ctx):
|
|
result = ast.Starred(self.visit(node.children[1], ctx), ctx)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_with_stmt(self, node, is_async=False):
|
|
body = self.visit(node.children[-1])
|
|
for item in node.children[-3:0:-2]:
|
|
ctx_mngr, opt_vars = self.visit(item)
|
|
withstmt = ast.With(ctx_mngr, opt_vars, body)
|
|
set_location(withstmt, item)
|
|
body = [withstmt]
|
|
set_location(withstmt, node.children[0].start, node.children[-2].end)
|
|
withstmt.is_async = is_async
|
|
return withstmt
|
|
|
|
def visit_with_item(self, node):
|
|
ctx_mngr = self.visit(node.children[0], LOAD)
|
|
if len(node.children) == 1:
|
|
return ctx_mngr, None
|
|
else:
|
|
return ctx_mngr, self.visit(node.children[2], STORE)
|
|
|
|
def visit_async_stmt(self, node):
|
|
return self.visit(node.children[1], True)
|
|
|
|
visit_async_funcdef = visit_async_stmt
|
|
|
|
def visit_print_stmt(self, node):
|
|
if len(node.children) > 1 and is_token(node.children[1], ">>"):
|
|
dest = self.visit(node.children[2], LOAD)
|
|
items = node.children[4::2]
|
|
else:
|
|
dest = None
|
|
items = node.children[1::2]
|
|
values = self._visit_list(items, LOAD)
|
|
nl = not is_token(node.children[-1], ",")
|
|
result = ast.Print(dest, values, nl)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_exec_stmt(self, node):
|
|
body = self.visit(node.children[1], LOAD)
|
|
globals, locals = None, None
|
|
if len(node.children) > 3:
|
|
globals = self.visit(node.children[3], LOAD)
|
|
if len(node.children) > 5:
|
|
locals = self.visit(node.children[5], LOAD)
|
|
result = ast.Exec(body, globals, locals)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_special_operation(self, node, ctx):
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
name = node.children[0].value
|
|
if len(node.children) == 3:
|
|
args = []
|
|
else:
|
|
args = self._visit_list(node.children[2].children[::2], LOAD)
|
|
result = ast.SpecialOperation(name, args)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_string(self, node, ctx):
|
|
|
|
def convert_parts_to_expr():
|
|
if not current_parts:
|
|
return None
|
|
if len(current_parts) == 1:
|
|
string = ast.Str(current_parts[0].s, current_parts[0].prefix, None)
|
|
else:
|
|
# Our string parts may be any combination of byte and unicode
|
|
# strings, as this is valid in Python 2. We therefore decode
|
|
# the strings into unicode before concatenating.
|
|
text = "".join(decode_str(p.s) for p in current_parts)
|
|
string = ast.Str(text, current_parts[0].prefix, current_parts[:])
|
|
start = current_parts[0].lineno, current_parts[0].col_offset
|
|
set_location(string, start, current_parts[-1]._end)
|
|
current_parts[:] = []
|
|
return string
|
|
|
|
if ctx is not LOAD:
|
|
context_error(node)
|
|
parts = []
|
|
for p in self._visit_list(node.children, LOAD):
|
|
if isinstance(p, list):
|
|
parts.extend(p)
|
|
else:
|
|
parts.append(p)
|
|
current_parts = []
|
|
exprs = []
|
|
for part in parts:
|
|
if part is None:
|
|
#Conversion -- currently ignored.
|
|
pass
|
|
elif isinstance(part, ast.StringPart):
|
|
current_parts.append(part)
|
|
else:
|
|
assert isinstance(part, ast.expr), part
|
|
string = convert_parts_to_expr()
|
|
if string:
|
|
exprs.append(string)
|
|
exprs.append(part)
|
|
string = convert_parts_to_expr()
|
|
if string:
|
|
exprs.append(string)
|
|
if len(exprs) == 1:
|
|
return exprs[0]
|
|
result = ast.JoinedStr(exprs)
|
|
set_location(result, node)
|
|
return result
|
|
|
|
def visit_fstring_part(self, node, ctx):
|
|
nodes_to_visit = []
|
|
for node in node.children:
|
|
if node.name == 'format_specifier':
|
|
# Flatten format_specifiers first
|
|
nodes_to_visit += [ n for n in node.children if not n.name == 'FSTRING_SPEC' ]
|
|
else:
|
|
nodes_to_visit += [node]
|
|
|
|
return self._visit_list(nodes_to_visit, ctx)
|
|
|
|
def visit_format_specifier(self, node, ctx):
|
|
# This will currently never be visited because of the above flattening
|
|
assert ctx is LOAD
|
|
#Currently ignored
|
|
return None
|
|
|
|
def visit_CONVERSION(self, node, ctx):
|
|
return None
|
|
|
|
def visit_COLON(self, node, ctx):
|
|
return None
|
|
|
|
def visit_EQUAL(self, node, ctx):
|
|
return None
|
|
|
|
def visit_FSTRING_START(self, node, ctx):
|
|
string = self.visit_STRING(node, ctx)
|
|
# Push the current prefix onto the prefix stack
|
|
self.outer_prefix_stack.append(string.prefix)
|
|
return string
|
|
|
|
def visit_FSTRING_END(self, node, ctx):
|
|
string = self.visit_STRING(node, ctx)
|
|
# We're done with this f-string, so pop its prefix off the prefix stack
|
|
self.outer_prefix_stack.pop()
|
|
return string
|
|
|
|
visit_FSTRING_MID = visit_STRING
|
|
|
|
# In the following function, we decode to `latin-1` in order to preserve
|
|
# the byte values present in the string. This is an undocumented feature of
|
|
# this encoding. See also the `test_python_sanity.py` test file in `/tests`.
|
|
|
|
def decode_str(s):
|
|
if isinstance(s, bytes):
|
|
return str(s, 'latin-1')
|
|
else:
|
|
return s
|
|
|
|
def context_error(node):
|
|
s = SyntaxError("Invalid context")
|
|
s.lineno, s.offset = node.start
|
|
raise s
|
|
|
|
def is_token(node, text):
|
|
'''Holds if `node` is a token (terminal) and its textual value is `text`'''
|
|
return hasattr(node, "value") and node.value == text
|
|
|
|
def get_node_value(node):
|
|
'''Get the value from a NAME node,
|
|
stripping redundant CPT nodes'''
|
|
while hasattr(node, "children"):
|
|
assert len(node.children) == 1
|
|
node = node.children[0]
|
|
return node.value
|
|
|
|
#Mapping from comparison operator strings to ast classes.
|
|
COMP_OP_CLASSES = {
|
|
"<": ast.Lt,
|
|
"<=": ast.LtE,
|
|
">": ast.Gt,
|
|
">=": ast.GtE,
|
|
"==": ast.Eq,
|
|
"<>": ast.NotEq,
|
|
"!=": ast.NotEq,
|
|
"in": ast.In,
|
|
"not in": ast.NotIn,
|
|
"is": ast.Is,
|
|
"is not": ast.IsNot,
|
|
}
|
|
|
|
#Mapping from multiplicative operator strings to ast classes.
|
|
TERM_OP_CLASSES = {
|
|
'*': ast.Mult,
|
|
'/': ast.Div,
|
|
'%': ast.Mod,
|
|
'//': ast.FloorDiv,
|
|
'@': ast.MatMult,
|
|
}
|
|
|
|
#Mapping from additive operator strings to ast classes.
|
|
FACTOR_OP_CLASSES = {
|
|
'+': ast.UAdd,
|
|
'-': ast.USub,
|
|
'~': ast.Invert,
|
|
}
|
|
|
|
#Mapping from assignment operator strings to ast classes.
|
|
AUG_ASSIGN_OPS = {
|
|
'+=': ast.Add,
|
|
'-=': ast.Sub,
|
|
'*=': ast.Mult,
|
|
'/=': ast.Div,
|
|
'%=': ast.Mod,
|
|
'&=': ast.BitAnd,
|
|
'|=': ast.BitOr,
|
|
'^=': ast.BitXor,
|
|
'<<=': ast.LShift,
|
|
'>>=': ast.RShift,
|
|
'**=': ast.Pow,
|
|
'//=': ast.FloorDiv,
|
|
'@=': ast.MatMult,
|
|
}
|
|
|
|
def make_name(name, ctx, start, end):
|
|
'''Create a `Name` ast node'''
|
|
variable = ast.Variable(name)
|
|
node = ast.Name(variable, ctx)
|
|
set_location(node, start, end)
|
|
return node
|
|
|
|
def set_location(astnode, cptnode_or_start, end=None):
|
|
'''Set the location of `astnode` from
|
|
either the CPT node or pair of locations.
|
|
'''
|
|
if end is None:
|
|
astnode.lineno, astnode.col_offset = cptnode_or_start.start
|
|
astnode._end = cptnode_or_start.end
|
|
else:
|
|
astnode.lineno, astnode.col_offset = cptnode_or_start
|
|
astnode._end = end
|
|
|
|
def split_full_prefix(s):
|
|
"""Splits a prefix (or a string starting with a prefix) into prefix and quote parts."""
|
|
quote_start = 0
|
|
# First, locate the end of the prefix (and the start of the quotes)
|
|
while s[quote_start] not in "'\"}":
|
|
quote_start += 1
|
|
# Next, find the end of the quotes. This is either one character past `quote_start`, or three
|
|
# (for triple-quoted strings).
|
|
if s[quote_start:quote_start + 3] in ("'''",'"""'):
|
|
prefix_end = quote_start + 3
|
|
else:
|
|
prefix_end = quote_start + 1
|
|
|
|
return s[:quote_start], s[quote_start:prefix_end]
|
|
|
|
|
|
def split_string(s, outer_prefix):
|
|
"""Splits a string into prefix, quotes, and content."""
|
|
s_prefix, s_quotes = split_full_prefix(s)
|
|
|
|
quote_start = len(s_prefix)
|
|
prefix_end = quote_start + len(s_quotes)
|
|
|
|
# If the string starts with `}`, it is a non-inital string part of an f-string. In this case we
|
|
# must use the prefix and quotes from the outer f-string.
|
|
if s[0] == '}':
|
|
prefix, quotes = split_full_prefix(outer_prefix)
|
|
else:
|
|
prefix, quotes = s_prefix, s_quotes
|
|
|
|
# The string either ends with a `{` (if it comes before an interpolation inside an f-string)
|
|
# or else it ends with the same quotes as it begins with.
|
|
if s[-1] == "{":
|
|
content = s[prefix_end:-1]
|
|
else:
|
|
content = s[prefix_end:-len(quotes)]
|
|
|
|
return prefix.lower(), quotes, content
|
|
|
|
def get_text(s, outer_prefix):
|
|
"""Returns a cleaned-up text version of the string, normalizing the quotes and removing any
|
|
format string marker."""
|
|
prefix, quotes, content = split_string(s, outer_prefix)
|
|
return prefix.strip("fF") + quotes + content + quotes
|
|
|
|
def parse_string(s, logger, outer_prefix):
|
|
'''Gets the prefix and escaped string text'''
|
|
prefix, quotes, content = split_string(s, outer_prefix)
|
|
saved_content = content
|
|
try:
|
|
ends_with_illegal_character = False
|
|
# If the string ends with the same quote character as the outer quotes (and/or backslashes)
|
|
# (e.g. the first string part of `f"""hello"{0}"""`), we must take care to not accidently create
|
|
# the ending quotes at the wrong place. (`literal_eval` would be unhappy with `"""hello""""`
|
|
# as an input.) To do this, we insert an extra space at the end (that we then must remember
|
|
# to remove later on).
|
|
if content.endswith(quotes[0]) or content.endswith('\\'):
|
|
ends_with_illegal_character = True
|
|
content = content + " "
|
|
text = prefix.strip("fF") + quotes + content + quotes
|
|
s = literal_eval(text)
|
|
except Exception as ex:
|
|
# Something has gone wrong, but we still have the original form - Should be OK.
|
|
logger.warning("Unable to parse string %s: %s", text, ex)
|
|
logger.traceback()
|
|
ends_with_illegal_character = False
|
|
s = saved_content
|
|
if isinstance(s, bytes):
|
|
try:
|
|
s = s.decode(sys.getfilesystemencoding())
|
|
except UnicodeDecodeError:
|
|
s = decode_str(s)
|
|
if ends_with_illegal_character:
|
|
s = s[:-1]
|
|
return prefix + quotes, s
|
|
|
|
ESCAPES = ""
|
|
|
|
def get_numeric_value(node):
|
|
'''Gets numeric value from a CPT leaf node.'''
|
|
value = node.value
|
|
value = value.replace("_", "")
|
|
chars = set(value.lower())
|
|
try:
|
|
if u'.' in chars or u'e' in chars or u'j' in chars:
|
|
# Probable float or hex or imaginary
|
|
return literal_eval(value)
|
|
if len(value) > 1 and value[0] == u'0' and value[1] not in u'boxlBOXL':
|
|
# Old-style octal
|
|
value = u'0o' + value[1:]
|
|
if value[-1] in u'lL':
|
|
return literal_eval(value[:-1])
|
|
return literal_eval(value)
|
|
except ValueError:
|
|
raise ParseError("Not a valid numeric value", node.type, node.value, (node.start, node.end))
|
|
|
|
#This rewriting step is performed separately for two reasons.
|
|
# 1. It is complicated
|
|
# 2. In future, we may want to make the AST more like the syntax and less like the semantics.
|
|
# Keeping step separate should make that a bit easier.
|
|
def rewrite_comp(node):
|
|
if hasattr(node, "function"):
|
|
return
|
|
gens = node.generators
|
|
if hasattr(node, "elt"):
|
|
elt = node.elt
|
|
del node.elt
|
|
else:
|
|
elt = ast.Tuple([node.value, node.key], LOAD)
|
|
elt.lineno = node.key.lineno
|
|
elt.col_offset = node.key.col_offset
|
|
elt._end = node.value._end
|
|
del node.key
|
|
del node.value
|
|
y = ast.Yield(elt)
|
|
copy_location(elt, y)
|
|
stmt = ast.Expr(y)
|
|
copy_location(elt, stmt)
|
|
for gen in reversed(gens[1:]):
|
|
for if_ in gen.ifs:
|
|
stmt = ast.If(if_, [stmt], None)
|
|
copy_location(if_, stmt)
|
|
stmt = ast.For(gen.target, gen.iter, [stmt], None)
|
|
if getattr(gen, "is_async", False):
|
|
stmt.is_async = True
|
|
copy_location(node, stmt)
|
|
for if_ in gens[0].ifs:
|
|
stmt = ast.If(if_, [stmt], None)
|
|
copy_location(if_, stmt)
|
|
p0 = ".0"
|
|
pvar = ast.Variable(p0)
|
|
arg = ast.Name(pvar, LOAD)
|
|
copy_location(node, arg)
|
|
stmt = ast.For(gens[0].target, arg, [stmt], None)
|
|
if getattr(gens[0], "is_async", False):
|
|
stmt.is_async = True
|
|
copy_location(node, stmt)
|
|
pvar = ast.Variable(p0)
|
|
arg = ast.Name(pvar, PARAM)
|
|
copy_location(node, arg)
|
|
function = ast.Function(COMP_NAMES[type(node).__name__], [],[arg], None, None, None, [ stmt ])
|
|
copy_location(node, function)
|
|
node.function = function
|
|
node.iterable = gens[0].iter
|
|
del node.generators
|
|
|
|
|
|
COMP_NAMES = {
|
|
'GeneratorExp' : 'genexpr',
|
|
'DictComp' : 'dictcomp',
|
|
'ListComp' : 'listcomp',
|
|
'SetComp' : 'setcomp'
|
|
}
|
|
|
|
def copy_location(src, dest):
|
|
'''Copy location from `src` to `dest`'''
|
|
dest.lineno = src.lineno
|
|
dest.col_offset = src.col_offset
|
|
dest._end = src._end
|
|
|
|
def convert(logger, cpt):
|
|
'''Covert concrete parse tree as specified by blib2to3/Grammar.txt
|
|
to the AST specified by semmle/python/master.py
|
|
'''
|
|
return Convertor(logger).visit(cpt)
|