codeql/python/extractor/semmle/python/parser/ast.py

from blib2to3.pgen2 import token
from ast import literal_eval
from semmle.python import ast
from blib2to3.pgen2.parse import ParseError
import sys

LOAD = ast.Load()
STORE = ast.Store()
PARAM = ast.Param()
DEL = ast.Del()

POSITIONAL = 1
KEYWORD = 2


class ParseTreeVisitor(object):
    '''Standard tree-walking visitor,
    using `node.name` rather than `type(node).__name__`
    '''

    def visit(self, node, extra_arg=None):
        method = 'visit_' + node.name
        if extra_arg is None:
            return getattr(self, method)(node)
        else:
            return getattr(self, method)(node, extra_arg)

class Convertor(ParseTreeVisitor):
    ''' Walk the conrete parse tree, returning an AST.
    The CPT is specified by  blib2to3/Grammar.txt.
    The AST specified by semmle/python/master.py.
    Each `visit_X` method takes a `X` node in the CFG and
    produces some part of the AST, usually a single node.
    '''

    def __init__(self, logger):
        self.logger = logger
        # To handle f-strings nested inside other f-strings, we must keep track of the stack of
        # surrounding prefixes while walking the tree. This is necessary because inside an f-string
        # like `f"hello{f'to{you}dear'}world"`, the string part containing "world" has (in terms of
        # the concrete parse tree) a prefix of `}`, which doesn't tell us how to interpret it (in
        # particular, we can't tell if it's a raw string or not). So instead we look at the top of
        # the prefix stack to figure out what the "current prefix" is. The nested f-string in the
        # example above demonstrates why we must do this as a stack -- we must restore the outer
        # `f"` prefix when we're done with the inner `f'`-prefix string.
        #
        # The stack manipulation itself takes place in the `visit_FSTRING_START` and
        # `visit_FSTRING_END` methods. The text wrangling takes place in the `parse_string` helper
        # function.

        self.outer_prefix_stack = []


    def visit_file_input(self, node):
        body = []
        for s in [self.visit(s) for s in node.children if s.name not in ("ENDMARKER", "NEWLINE")]:
            if isinstance(s, list):
                body.extend(s)
            else:
                body.append(s)
        result = ast.Module(body)
        set_location(result, node)
        return result

    def visit_import_from(self, node):
        level = 0
        index = 1
        module_start = node.children[index].start
        while is_token(node.children[index], "."):
            level += 1
            index += 1
        if is_token(node.children[index], "import"):
            module_end = node.children[index-1].end
            index += 1
            module_name = None
        else:
            module_end = node.children[index].end
            module_name = self.visit(node.children[index])
            index += 2
        if is_token(node.children[index], "*"):
            module = ast.ImportExpr(level, module_name, False)
            set_location(module, module_start, module_end)
            result = ast.ImportFrom(module)
            set_location(result, node)
            return result
        if is_token(node.children[index], "("):
            import_as_names = node.children[index+1]
        else:
            import_as_names = node.children[index]
        aliases = []
        for import_as_name in import_as_names.children[::2]:
            module = ast.ImportExpr(level, module_name, False)
            set_location(module, module_start, module_end)
            aliases.append(self._import_as_name(import_as_name, module))
        result = ast.Import(aliases)
        set_location(result, node)
        return result

    #Helper for visit_import_from
    def _import_as_name(self, node, module):
        name = node.children[0].value
        if len(node.children) == 3:
            asname = node.children[2]
        else:
            asname = node.children[0]
        expr = ast.ImportMember(module, name)
        set_location(expr, node)
        rhs = make_name(asname.value, STORE, asname.start, asname.end)
        result = ast.alias(expr, rhs)
        set_location(result, node)
        return result

    def visit_small_stmt(self, node):
        return self.visit(node.children[0])

    def visit_simple_stmt(self, node):
        return [self.visit(s) for s in node.children if s.name not in ("SEMI", "NEWLINE")]

    def visit_stmt(self, node):
        return self.visit(node.children[0])

    def visit_compound_stmt(self, node):
        return self.visit(node.children[0])

    def visit_pass_stmt(self, node):
        p = ast.Pass()
        set_location(p, node)
        return p

    def visit_classdef(self, node):
        if len(node.children) == 4:
            cls, name, colon, suite = node.children
            args, keywords = [], []
        elif len(node.children) == 7:
            cls, name, _, args, _, colon, suite = node.children
            args, keywords = self.visit(args)
        else:
            assert len(node.children) == 6
            cls, name, _, _, colon, suite = node.children
            args, keywords = [], []
        start = cls.start
        end = colon.end
        suite = self.visit(suite)
        inner = ast.Class(name.value, suite)
        set_location(inner, start, end)
        cls_expr = ast.ClassExpr(name.value, [], args, keywords, inner)
        set_location(cls_expr, start, end)
        name_expr = make_name(name.value, STORE, name.start, name.end)
        result = ast.Assign(cls_expr, [name_expr])
        set_location(result, start, end)
        return result

    def visit_arglist(self, node):
        all_args = self._visit_list(node.children[::2])
        args = [ arg for kind, arg in all_args if kind is POSITIONAL ]
        keywords  = [ arg for kind, arg in all_args if kind is KEYWORD ]
        return args, keywords

    def visit_argument(self, node):
        child = node.children[0]
        if is_token(child, "*"):
            kind, arg = POSITIONAL, ast.Starred(self.visit(node.children[1], LOAD), LOAD)
        elif is_token(child, "**"):
            kind, arg = KEYWORD, ast.DictUnpacking(self.visit(node.children[1], LOAD))
        elif len(node.children) == 3 and is_token(node.children[1], "="):
            try:
                name = get_node_value(child)
            except Exception:
                #Not a legal name
                name = None
                self.logger.warning("Illegal name for keyword on line %s", child.start[0])
            kind, arg = KEYWORD, ast.keyword(name, self.visit(node.children[2], LOAD))
        else:
            arg = self.visit(child, LOAD)
            if len(node.children) == 1:
                return POSITIONAL, arg
            elif len(node.children) == 3 and is_token(node.children[1], ":="):
                return POSITIONAL, self.visit_namedexpr_test(node, LOAD)
            generators = self.visit(node.children[1])
            kind, arg = POSITIONAL, ast.GeneratorExp(arg, generators)
            set_location(arg, node)
            rewrite_comp(arg)
        set_location(arg, node)
        return kind, arg

    def visit_namedexpr_test(self, node, ctx):
        if len(node.children) == 1:
            return self.visit(node.children[0], ctx)
        target = self.visit(node.children[0], STORE)
        value = self.visit(node.children[-1], LOAD)
        result = ast.AssignExpr(value, target)
        set_location(result, node)
        return result

    def visit_test(self, node, ctx):
        if len(node.children) == 1:
            return self.visit(node.children[0], ctx)
        else:
            if ctx is not LOAD:
                context_error(node)
            body = self.visit(node.children[0], ctx)
            test = self.visit(node.children[2], ctx)
            orelse = self.visit(node.children[4], ctx)
            ifexp = ast.IfExp(test, body, orelse)
            set_location(ifexp, node)
            return ifexp

    def visit_or_test(self, node, ctx):
        return self._boolop(node, ast.Or, ctx)

    def visit_and_test(self, node, ctx):
        return self._boolop(node, ast.And, ctx)

    def visit_not_test(self, node, ctx):
        if len(node.children) == 1:
            return self.visit(node.children[0], ctx)
        if ctx is not LOAD:
            context_error(node)
        result = ast.UnaryOp(
            ast.Not(),
            self.visit(node.children[1], ctx)
        )
        set_location(result, node)
        return result

    # Helper for `or` and `and`.
    def _boolop(self, node, opcls, ctx):
        if len(node.children) == 1:
            return self.visit(node.children[0], ctx)
        values = [ self.visit(s, ctx) for s in node.children[::2] ]
        result = ast.BoolOp(opcls(), values)
        set_location(result, node)
        return result

    # Helper for various binary expression visitors.
    def _binary(self, node, opfact, ctx):
        if len(node.children) == 1:
            return self.visit(node.children[0], ctx)
        if ctx is not LOAD:
            context_error(node)
        children = iter(node.children)
        result = self.visit(next(children), LOAD)
        for op in children:
            item = next(children)
            rhs = self.visit(item, LOAD)
            result = ast.BinOp(result, opfact(op), rhs)
            set_location(result, node.start, item.end)
        return result

    def visit_suite(self, node):
        if len(node.children) == 1:
            return self.visit(node.children[0])
        result = []
        for s in [self.visit(s) for s in node.children[2:-1]]:
            if isinstance(s, list):
                result.extend(s)
            else:
                result.append(s)
        return result

    def visit_expr_stmt(self, node):
        if len(node.children) == 1:
            result = ast.Expr(self.visit(node.children[0], LOAD))
            set_location(result, node)
            return result
        if len(node.children) > 1 and is_token(node.children[1], "="):
            return self._assign(node)
        if len(node.children) == 2:
            # Annotated assignment
            target = self.visit(node.children[0], STORE)
            ann = node.children[1]
            type_anno = self.visit(ann.children[1], LOAD)
            if len(ann.children) > 2:
                value = self.visit(ann.children[3], LOAD)
            else:
                value = None
            result = ast.AnnAssign(value, type_anno, target)
        else:
            #Augmented assignment
            lhs = self.visit(node.children[0], LOAD)
            op = self.visit(node.children[1])
            rhs = self.visit(node.children[2], LOAD)
            expr = ast.BinOp(lhs, op, rhs)
            set_location(expr, node)
            result = ast.AugAssign(expr)
        set_location(result, node)
        return result

    def visit_augassign(self, node):
        return AUG_ASSIGN_OPS[node.children[0].value]()

    #Helper for visit_expr_stmt (for assignment)
    def _assign(self, node):
        targets = [ self.visit(t, STORE) for t in node.children[:-1:2]]
        result = ast.Assign(self.visit(node.children[-1], LOAD), targets)
        set_location(result, node)
        return result

    def visit_testlist(self, node, ctx):
        if len(node.children) == 1:
            return self.visit(node.children[0], ctx)
        elts = self._visit_list(node.children[::2], ctx)
        result = ast.Tuple(elts, ctx)
        set_location(result, node)
        return result

    visit_testlist_star_expr = visit_testlist

    def visit_comparison(self, node, ctx):
        if len(node.children) == 1:
            return self.visit(node.children[0], ctx)
        if ctx is not LOAD:
            context_error(node)
        left = self.visit(node.children[0], ctx)
        ops = [ self.visit(op) for op in node.children[1::2]]
        comps = [ self.visit(op, ctx) for op in node.children[2::2]]
        result = ast.Compare(left, ops, comps)
        set_location(result, node)
        return result

    def visit_comp_op(self, node):
        if len(node.children) == 1:
            return COMP_OP_CLASSES[node.children[0].value]()
        else:
            assert len(node.children) == 2
            return ast.IsNot() if node.children[0].value == "is" else ast.NotIn()

    def visit_expr(self, node, ctx):
        return self._binary(node, lambda _: ast.BitOr(), ctx)

    def visit_xor_expr(self, node, ctx):
        return self._binary(node, lambda _: ast.BitXor(), ctx)

    def visit_and_expr(self, node, ctx):
        return self._binary(node, lambda _: ast.BitAnd(), ctx)

    def visit_shift_expr(self, node, ctx):
        return self._binary(
            node,
            lambda op: ast.LShift() if op.value == "<<" else ast.RShift(),
            ctx
        )

    def visit_arith_expr(self, node, ctx):
        return self._binary(
            node,
            lambda op: ast.Add() if op.value == "+" else ast.Sub(),
            ctx
        )

    def visit_term(self, node, ctx):
        return self._binary(
            node,
            lambda op: TERM_OP_CLASSES[op.value](),
            ctx
        )

    def visit_factor(self, node, ctx):
        if len(node.children) == 1:
            return self.visit(node.children[0], ctx)
        result = ast.UnaryOp(
            FACTOR_OP_CLASSES[node.children[0].value](),
            self.visit(node.children[1], ctx)
        )
        set_location(result, node)
        return result

    def visit_power(self, node, ctx):
        '''This part of the Grammar is formulated in a slightly
        awkward way, so we need to recursively handle the `await`
        prefix, then the `** factor` suffix, then the atom and trailers.
        '''

        # Because `await` was a valid identifier in earlier versions of Python,
        # we cannot assume it indicates an `await` expression. We therefore
        # have to look at what follows in order to make a decision. The
        # relevant part of the grammar is
        #
        # power: ['await'] atom trailer* ['**' factor]
        #
        # The case we wish to identify is when 'await' appears, but as an
        # `atom`, and not an `await` token.
        #
        # Because `atom` nodes may no longer be present (see
        # `SKIP_IF_SINGLE_CHILD_NAMES` in `__init__.py`) we instead look at the
        # node following the (potentially) skipped `atom`. In particular, if
        # the following node is a `trailer` or "**" token, we know that the
        # given node cannot be an `await` token, and must be an `atom` instead.
        try:
            next_node = node.children[1]
            next_is_atom = next_node.name != "trailer" and not is_token(next_node, "**")
        except (IndexError, AttributeError):
            # IndexError if `node` has at most one child.
            # AttributeError if `next_node` is a `Leaf` instead of a `Node`.
            next_is_atom = False
        if is_token(node.children[0], "await") and next_is_atom:
            if ctx is not LOAD:
                context_error(node)
            pow = self._power(node.children[1:], ctx)
            result = ast.Await(pow)
            set_location(result, node)
            return result
        else:
            return self._power(node.children, ctx)

    #Helper for visit_power
    def _power(self, children, ctx):
        start = children[0].start
        if len(children) > 1 and is_token(children[-2], "**"):
            if ctx is not LOAD:
                context_error(children[0])
            trailers = children[1:-2]
            pow_expr = self.visit(children[-1], ctx)
        else:
            trailers = children[1:]
            pow_expr = None
        if trailers:
            expr = self.visit(children[0], LOAD)
            for trailer in trailers[:-1]:
                expr = self._apply_trailer(expr, trailer, start, LOAD)
            expr = self._apply_trailer(expr, trailers[-1], start, ctx)
        else:
            expr = self.visit(children[0], ctx)
        if pow_expr:
            expr = ast.BinOp(expr, ast.Pow(), pow_expr)
            set_location(expr, children[0].start, children[-1].end)
        return expr

    #Helper for _power
    def _atom(self, children, ctx):
        start = children[0].start
        if len(children) == 1:
            return self.visit(children[0], ctx)
        atom = self.visit(children[0], LOAD)
        for trailer in children[1:-1]:
            atom = self._apply_trailer(atom, trailer, start, LOAD)
        atom = self._apply_trailer(atom, children[-1], start, ctx)
        return atom

    #Helper for _atom
    def _apply_trailer(self, atom, trailer, start, ctx):
        children = trailer.children
        left = children[0]
        if is_token(left, "("):
            if is_token(children[1], ")"):
                args, keywords = [], []
                end = children[1].end
            else:
                args, keywords = self.visit(children[1])
                end = children[2].end
            result = ast.Call(atom, args, keywords)
        elif is_token(left, "["):
            result = ast.Subscript(atom, self.visit(children[1], LOAD), ctx)
            end = children[2].end
        else:
            assert is_token(left, ".")
            result = ast.Attribute(atom, children[1].value, ctx)
            end = children[1].end
        set_location(result, start, end)
        return result

    def visit_atom(self, node, ctx):
        left = node.children[0]
        if left.value in "[({":
            n = node.children[1]
            if hasattr(n, "value") and n.value in "])}":
                if n.value == ")":
                    result = ast.Tuple([], ctx)
                elif n.value == "]":
                    result = ast.List([], ctx)
                else:
                    result = ast.Dict([])
                set_location(result, node)
                return result
            else:
                result = self.visit(node.children[1], ctx)
                if left.value == "(":
                    result.parenthesised = True
                else:
                    #Meaningful bracketing
                    set_location(result, node)
                if isinstance(result, (ast.GeneratorExp, ast.ListComp, ast.SetComp, ast.DictComp)):
                    rewrite_comp(result)
                return result
        if left.type == token.NAME:
            return make_name(left.value, ctx, left.start, left.end)
        if ctx is not LOAD:
            context_error(node)
        if left.type == token.NUMBER:
            val = get_numeric_value(left)
            result = ast.Num(val, left.value)
            set_location(result, left)
            return result
        if left.value == ".":
            assert len(node.children) == 3 and node.children[2].value == "."
            result = ast.Ellipsis()
            set_location(result, node)
            return result
        assert left.type == token.BACKQUOTE
        result = ast.Repr(self.visit(node.children[1], LOAD))
        set_location(result, node)
        return result

    def visit_STRING(self, node, ctx):
        if ctx is not LOAD:
            context_error(node)
        outer_prefix = self.outer_prefix_stack[-1] if self.outer_prefix_stack else None
        prefix, s = parse_string(node.value, self.logger, outer_prefix)
        text = get_text(node.value, outer_prefix)
        result = ast.StringPart(prefix, text, s)
        set_location(result, node)
        return result

    def visit_NUMBER(self, node, ctx):
        if ctx is not LOAD:
            context_error(node)
        val = get_numeric_value(node)
        result = ast.Num(val, node.value)
        set_location(result, node)
        return result

    def visit_funcdef(self, node, is_async=False):
        # funcdef: 'def' NAME parameters ['->' test] ':' suite
        name = node.children[1].value
        if node.children[3].value == "->":
            return_type = self.visit(node.children[4], LOAD)
            end = node.children[5].end
            body = self.visit(node.children[6])
        else:
            return_type = None
            end = node.children[3].end
            body = self.visit(node.children[4])
        start = node.children[0].start
        params = node.children[2]
        if len(params.children) == 2:
            args, vararg, kwonlyargs, kwarg = [], None, [], None
        else:
            args, vararg, kwonlyargs, kwarg = self._get_parameters(params.children[1])
        func = ast.Function(name, [], args, vararg, kwonlyargs, kwarg, body, is_async)
        set_location(func, start, end)
        if len(params.children) == 2:
            args = ast.arguments([], [], [], None, None, [])
        else:
            args = self._get_defaults_and_annotations(params.children[1])
        funcexpr = ast.FunctionExpr(name, args, return_type, func)
        set_location(funcexpr, start, end)
        name_expr = make_name(name, STORE, node.children[1].start, node.children[1].end)
        result = ast.Assign(funcexpr, [name_expr])
        set_location(result, start, end)
        return result

    #Helper for visit_funcdef and visit_lambdef
    def _get_parameters(self, node):
        '''Returns the quadruple: args, vararg, kwonlyargs, kwarg
        '''
        args = []
        vararg = None
        kwonlyargs = []
        kwarg = None
        children = iter(node.children)
        arg = None
        for child in children:
            if is_token(child, "*"):
                try:
                    child = next(children)
                except StopIteration:
                    pass
                else:
                    if not is_token(child, ","):
                        vararg = self.visit(child, PARAM)
                break
            if is_token(child, ","):
                pass
            elif is_token(child, "/"):
                pass
            elif is_token(child, "="):
                next(children)
            elif is_token(child, "**"):
                child = next(children)
                kwarg = self.visit(child, PARAM)
            else:
                arg = self.visit(child, PARAM)
                args.append(arg)
        #kwonly args
        for child in children:
            if is_token(child, ","):
                pass
            elif is_token(child, "="):
                next(children)
            elif is_token(child, "**"):
                child = next(children)
                kwarg = self.visit(child, PARAM)
            else:
                arg = self.visit(child, PARAM)
                kwonlyargs.append(arg)
        return args, vararg, kwonlyargs, kwarg

    #Helper for visit_funcdef and visit_lambdef
    def _get_defaults_and_annotations(self, node):
        defaults = []
        kw_defaults = []
        annotations = []
        varargannotation = None
        kwargannotation = None
        kw_annotations = []
        children = iter(node.children)
        # Because we want the i'th element of `kw_defaults` to be the default value for
        # the i'th keyword-only argument, when encountering the combined token for the
        # argument name and optional annotation, we add a `None` to `kw_defaults` assuming
        # that there is no default value. If there turns out to be a default value, we
        # remove the `None` and add the real default value. Like-wise for `defaults`.

        # positional-only args and "normal" args
        for child in children:
            if is_token(child, "*"):
                try:
                    child = next(children)
                except StopIteration:
                    pass
                else:
                    if not is_token(child, ","):
                        varargannotation = self.visit(child, LOAD)
                break
            if is_token(child, ","):
                pass
            elif is_token(child, "/"):
                pass
            elif is_token(child, "="):
                child = next(children)
                defaults.pop()
                defaults.append(self.visit(child, LOAD))
            elif is_token(child, "**"):
                child = next(children)
                kwargannotation = self.visit(child, LOAD)
                arg = None
            else:
                # Preemptively assume there is no default argument (indicated by None)
                defaults.append(None)
                annotations.append(self.visit(child, LOAD))

        #kwonly args
        for child in children:
            if is_token(child, ","):
                pass
            elif is_token(child, "="):
                child = next(children)
                kw_defaults.pop()
                kw_defaults.append(self.visit(child, LOAD))
            elif is_token(child, "**"):
                child = next(children)
                kwargannotation = self.visit(child, LOAD)
            else:
                # Preemptively assume there is no default argument (indicated by None)
                kw_defaults.append(None)
                kw_annotations.append(self.visit(child, LOAD))
        result = ast.arguments(defaults, kw_defaults, annotations, varargannotation, kwargannotation, kw_annotations)
        set_location(result, node)
        return result

    def visit_tfpdef(self, node, ctx):
        # TO DO Support tuple parameters
        # No one uses them any more, so this isn't super important.
        child = node.children[0]
        if is_token(child, "("):
            return None
        return self.visit(child, ctx)

    def visit_tname(self, node, ctx):
        if ctx is PARAM:
            child = node.children[0]
            return make_name(child.value, ctx, child.start, child.end)
        elif len(node.children) > 1:
            return self.visit(node.children[2], ctx)
        else:
            return None

    def visit_decorated(self, node):
        asgn = self.visit(node.children[1])
        value = asgn.value
        for deco in reversed(node.children[0].children):
            defn = value
            decorator = self.visit(deco)
            value = ast.Call(decorator, [defn], [])
            copy_location(decorator, value)
        asgn.value = value
        return asgn

    def visit_decorators(self, node):
        return self._visit_list(node.children)

    def visit_decorator(self, node):
        namedexpr_test = node.children[1]
        result = self.visit_namedexpr_test(namedexpr_test, LOAD)
        set_location(result, namedexpr_test)
        return result

    def _visit_list(self, items, ctx=None):
        if ctx is None:
            return [ self.visit(i) for i in items ]
        else:
            return [ self.visit(i, ctx) for i in items ]

    def visit_dotted_name(self, node):
        return ".".join(name.value for name in node.children[::2])

    def visit_NAME(self, name, ctx):
        return make_name(name.value, ctx, name.start, name.end)

    def visit_listmaker(self, node, ctx):
        if len(node.children) == 1 or is_token(node.children[1], ","):
            items = [self.visit(c, ctx) for c in node.children[::2]]
            result = ast.List(items, ctx)
        else:
            if ctx is not LOAD:
                context_error(node)
            elt = self.visit(node.children[0], ctx)
            generators = self.visit(node.children[1])
            result = ast.ListComp(elt, generators)
        set_location(result, node)
        return result

    def visit_testlist_gexp(self, node, ctx):
        if len(node.children) == 1:
            return self.visit(node.children[0], ctx)
        if is_token(node.children[1], ","):
            items = [self.visit(c, ctx) for c in node.children[::2]]
            result = ast.Tuple(items, ctx)
        else:
            if ctx is not LOAD:
                context_error(node)
            elt = self.visit(node.children[0], ctx)
            generators = self.visit(node.children[1])
            result = ast.GeneratorExp(elt, generators)
        set_location(result, node)
        return result

    def visit_comp_for(self, node):
        is_async = is_token(node.children[0], "async")
        target = self.visit(node.children[1+is_async], STORE)
        iter = self.visit(node.children[3+is_async], LOAD)
        if len(node.children) == 5+is_async:
            ifs = []
            end = iter._end
            comp_iter = self.visit(node.children[4+is_async])
            while comp_iter and not isinstance(comp_iter[0], ast.comprehension):
                ifs.append(comp_iter[0])
                end = comp_iter[0]._end
                comp_iter = comp_iter[1:]
            comp = ast.comprehension(target, iter, ifs)
            comp.is_async = is_async
            set_location(comp, node.children[0].start, end)
            return [comp] + comp_iter
        else:
            comp = ast.comprehension(target, iter, [])
            comp.is_async = is_async
            set_location(comp, node)
            return [comp]

    visit_old_comp_for = visit_comp_for

    def visit_comp_iter(self, node):
        return self.visit(node.children[0])

    def visit_comp_if(self, node):
        cond = self.visit(node.children[1], LOAD)
        if len(node.children) == 3:
            comp_list = self.visit(node.children[2])
            return [cond] + comp_list
        else:
            return [cond]

    visit_old_comp_if = visit_comp_if

    visit_old_comp_iter = visit_comp_iter

    def visit_exprlist(self, node, ctx):
        #Despite the name this returns a single expression
        if len(node.children) == 1:
            return self.visit(node.children[0], ctx)
        else:
            elts = self._visit_list(node.children[::2], ctx)
            result = ast.Tuple(elts, ctx)
            set_location(result, node)
            return result

    visit_testlist_safe = visit_exprlist

    def visit_old_test(self, node, ctx):
        return self.visit(node.children[0], ctx)

    def visit_if_stmt(self, node):
        endindex = len(node.children)
        if is_token(node.children[-3], "else"):
            orelse = self.visit(node.children[-1])
            endindex -= 3
        else:
            orelse = None
        while endindex:
            test = self.visit(node.children[endindex-3], LOAD)
            body = self.visit(node.children[endindex-1])
            result = ast.If(test, body, orelse)
            start = node.children[endindex-4].start
            end = node.children[endindex-2].end
            set_location(result, start, end)
            orelse = [result]
            endindex -= 4
        return result

    def visit_import_stmt(self, node):
        return self.visit(node.children[0])

    def visit_import_name(self, node):
        aliases = self.visit(node.children[1])
        result = ast.Import(aliases)
        set_location(result, node)
        return result

    def visit_dotted_as_names(self, node):
        return self._visit_list(node.children[::2])

    def visit_dotted_as_name(self, node):
        child0 = node.children[0]
        dotted_name = self.visit(child0)
        if len(node.children) == 3:
            value = ast.ImportExpr(0, dotted_name, False)
            child2 = node.children[2]
            asname = make_name(child2.value, STORE, child2.start, child2.end)
        else:
            value = ast.ImportExpr(0, dotted_name, True)
            topname = dotted_name.split(".")[0]
            asname = make_name(topname, STORE, child0.start, child0.end)
        set_location(value, child0)
        result = ast.alias(value, asname)
        set_location(result, node)
        return result

    def visit_dictsetmaker(self, node, ctx):
        if ctx is not LOAD:
            context_error(node)
        if is_token(node.children[0], "**") or len(node.children) > 1 and is_token(node.children[1], ":"):
            return self._dictmaker(node)
        else:
            return self._setmaker(node)

    #Helper for visit_dictsetmaker (for dictionaries)
    def _dictmaker(self, node):
        if len(node.children) == 4 and is_token(node.children[1], ":") and not is_token(node.children[3], ","):
            #Comprehension form
            key = self.visit(node.children[0], LOAD)
            value = self.visit(node.children[2], LOAD)
            generators = self.visit(node.children[3])
            result = ast.DictComp(key, value, generators)
            set_location(result, node)
            return result
        index = 0
        items = []
        while len(node.children) > index:
            if is_token(node.children[index], "**"):
                d = self.visit(node.children[index+1], LOAD)
                item = ast.DictUnpacking(d)
                set_location(item, node.children[index].start, node.children[index+1].end)
                index += 3
            else:
                key = self.visit(node.children[index], LOAD)
                value = self.visit(node.children[index+2], LOAD)
                item = ast.KeyValuePair(key, value)
                set_location(item, node.children[index].start, node.children[index+2].end)
                index += 4
            items.append(item)
        result = ast.Dict(items)
        set_location(result, node)
        return result

    #Helper for visit_dictsetmaker (for sets)
    def _setmaker(self, node):
        if len(node.children) == 2 and not is_token(node.children[1], ","):
            #Comprehension form
            elt = self.visit(node.children[0], LOAD)
            generators = self.visit(node.children[1])
            result = ast.SetComp(elt, generators)
            set_location(result, node)
            return result
        items = self._visit_list(node.children[::2], LOAD)
        result = ast.Set(items)
        set_location(result, node)
        return result

    def visit_while_stmt(self, node):
        test = self.visit(node.children[1], LOAD)
        body = self.visit(node.children[3])
        if len(node.children) == 7:
            orelse = self.visit(node.children[6])
        else:
            orelse = None
        result = ast.While(test, body, orelse)
        set_location(result, node.children[0].start, node.children[2].end)
        return result

    def visit_flow_stmt(self, node):
        return self.visit(node.children[0])

    def visit_break_stmt(self, node):
        result = ast.Break()
        set_location(result, node)
        return result

    def visit_continue_stmt(self, node):
        result = ast.Continue()
        set_location(result, node)
        return result

    def visit_return_stmt(self, node):
        if len(node.children) == 2:
            result = ast.Return(self.visit(node.children[1], LOAD))
        else:
            result = ast.Return(None)
        set_location(result, node)
        return result

    def visit_raise_stmt(self, node):
        result = ast.Raise()
        set_location(result, node)
        if len(node.children) == 1:
            return result
        result.exc = self.visit(node.children[1], LOAD)
        if len(node.children) > 3:
            if is_token(node.children[2], "from"):
                result.cause = self.visit(node.children[3], LOAD)
            else:
                result.type = result.exc
                del result.exc
                result.inst = self.visit(node.children[3], LOAD)
                if len(node.children) == 6:
                    result.tback = self.visit(node.children[5], LOAD)
        return result

    def visit_yield_stmt(self, node):
        result = ast.Expr(self.visit(node.children[0], LOAD))
        set_location(result, node)
        return result

    def visit_yield_expr(self, node, ctx):
        if ctx is not LOAD:
            context_error(node)
        if len(node.children) == 1:
            result = ast.Yield(None)
        else:
            if is_token(node.children[1].children[0], "from"):
                result = ast.YieldFrom(self.visit(node.children[1].children[1], LOAD))
            else:
                result = ast.Yield(self.visit(node.children[1].children[0], LOAD))
        set_location(result, node)
        return result

    def visit_try_stmt(self, node):
        body = self.visit(node.children[2])
        index = 3
        handlers = []
        while len(node.children) > index and not hasattr(node.children[index], "value"):
            #Except block.
            type, name = self.visit(node.children[index])
            handler_body = self.visit(node.children[index+2])
            handler = ast.ExceptStmt(type, name, handler_body)
            set_location(handler, node.children[index].start , node.children[index+1].end)
            handlers.append(handler)
            index += 3
        if len(node.children) > index and is_token(node.children[index], "else"):
            orelse = self.visit(node.children[index+2])
        else:
            orelse = []
        if is_token(node.children[-3], "finally"):
            finalbody = self.visit(node.children[-1])
        else:
            finalbody = []
        result = ast.Try(body, orelse, handlers, finalbody)
        set_location(result, node.start, node.children[1].end)
        return result

    def visit_except_clause(self, node):
        type, name = None, None
        if len(node.children) > 1:
            type = self.visit(node.children[1], LOAD)
        if len(node.children) > 3:
            name = self.visit(node.children[3], STORE)
        return type, name

    def visit_del_stmt(self, node):
        if len(node.children) > 1:
            result = ast.Delete(self._visit_list(node.children[1].children[::2], DEL))
        else:
            result = ast.Delete([])
        set_location(result, node)
        return result

    visit_subscriptlist = visit_testlist
    visit_testlist1 = visit_testlist

    def visit_subscript(self, node, ctx):
        if len(node.children) == 1 and not is_token(node.children[0], ":"):
            return self.visit(node.children[0], ctx)
        values = [None, None, None]
        index = 0
        for child in node.children:
            if is_token(child, ":"):
                index += 1
            else:
                values[index] = self.visit(child, LOAD)
        result = ast.Slice(*values)
        set_location(result, node)
        return result

    def visit_sliceop(self, node, ctx):
        if ctx is not LOAD:
            context_error(node)
        if len(node.children) == 2:
            return self.visit(node.children[1], LOAD)
        else:
            return None

    def visit_assert_stmt(self, node):
        test = self.visit(node.children[1], LOAD)
        if len(node.children) > 2:
            msg = self.visit(node.children[3], LOAD)
        else:
            msg = None
        result = ast.Assert(test, msg)
        set_location(result, node)
        return result

    def visit_for_stmt(self, node, is_async=False):
        target = self.visit(node.children[1], STORE)
        iter = self.visit(node.children[3], LOAD)
        body = self.visit(node.children[5])
        if len(node.children) == 9:
            orelse = self.visit(node.children[8])
        else:
            orelse = None
        result = ast.For(target, iter, body, orelse)
        result.is_async = is_async
        set_location(result, node.children[0].start, node.children[4].end)
        return result

    def visit_global_stmt(self, node):
        cls = ast.Global if node.children[0].value == "global" else ast.Nonlocal
        names = [child.value for child in node.children[1::2]]
        result = cls(names)
        set_location(result, node)
        return result

    def visit_lambdef(self, node, ctx):
        if ctx is not LOAD:
            context_error(node)
        test = self.visit(node.children[-1], LOAD)
        stmt = ast.Return(test)
        set_location(stmt, node.children[-1])
        if is_token(node.children[1], ":"):
            args, vararg, kwonlyargs, kwarg = [], None, [], None
        else:
            args, vararg, kwonlyargs, kwarg = self._get_parameters(node.children[1])
        func = ast.Function("lambda", [], args, vararg, kwonlyargs, kwarg, [stmt], False)
        set_location(func, node)
        if is_token(node.children[1], ":"):
            args = ast.arguments([], [], [], None, None, [])
        else:
            args = self._get_defaults_and_annotations(node.children[1])
        result = ast.Lambda(args, func)
        set_location(result, node)
        return result

    visit_old_lambdef = visit_lambdef

    visit_vfpdef = visit_tfpdef

    def visit_vname(self, node, ctx):
        if ctx is PARAM:
            child = node.children[0]
            return make_name(child.value, ctx, child.start, child.end)
        else:
            return None

    def visit_star_expr(self, node, ctx):
        result = ast.Starred(self.visit(node.children[1], ctx), ctx)
        set_location(result, node)
        return result

    def visit_with_stmt(self, node, is_async=False):
        body = self.visit(node.children[-1])
        for item in node.children[-3:0:-2]:
            ctx_mngr, opt_vars = self.visit(item)
            withstmt = ast.With(ctx_mngr, opt_vars, body)
            set_location(withstmt, item)
            body = [withstmt]
        set_location(withstmt, node.children[0].start, node.children[-2].end)
        withstmt.is_async = is_async
        return withstmt

    def visit_with_item(self, node):
        ctx_mngr = self.visit(node.children[0], LOAD)
        if len(node.children) == 1:
            return ctx_mngr, None
        else:
            return ctx_mngr, self.visit(node.children[2], STORE)

    def visit_async_stmt(self, node):
        return self.visit(node.children[1], True)

    visit_async_funcdef = visit_async_stmt

    def visit_print_stmt(self, node):
        if len(node.children) > 1 and is_token(node.children[1], ">>"):
            dest = self.visit(node.children[2], LOAD)
            items = node.children[4::2]
        else:
            dest = None
            items = node.children[1::2]
        values = self._visit_list(items, LOAD)
        nl = not is_token(node.children[-1], ",")
        result = ast.Print(dest, values, nl)
        set_location(result, node)
        return result

    def visit_exec_stmt(self, node):
        body = self.visit(node.children[1], LOAD)
        globals, locals = None, None
        if len(node.children) > 3:
            globals = self.visit(node.children[3], LOAD)
        if len(node.children) > 5:
            locals = self.visit(node.children[5], LOAD)
        result = ast.Exec(body, globals, locals)
        set_location(result, node)
        return result

    def visit_special_operation(self, node, ctx):
        if ctx is not LOAD:
            context_error(node)
        name = node.children[0].value
        if len(node.children) == 3:
            args = []
        else:
            args = self._visit_list(node.children[2].children[::2], LOAD)
        result = ast.SpecialOperation(name, args)
        set_location(result, node)
        return result

    def visit_string(self, node, ctx):

        def convert_parts_to_expr():
            if not current_parts:
                return None
            if len(current_parts) == 1:
                string = ast.Str(current_parts[0].s, current_parts[0].prefix, None)
            else:
                # Our string parts may be any combination of byte and unicode
                # strings, as this is valid in Python 2. We therefore decode
                # the strings into unicode before concatenating.
                text = "".join(decode_str(p.s) for p in current_parts)
                string = ast.Str(text, current_parts[0].prefix, current_parts[:])
            start = current_parts[0].lineno, current_parts[0].col_offset
            set_location(string, start, current_parts[-1]._end)
            current_parts[:] = []
            return string

        if ctx is not LOAD:
            context_error(node)
        parts = []
        for p in self._visit_list(node.children, LOAD):
            if isinstance(p, list):
                parts.extend(p)
            else:
                parts.append(p)
        current_parts = []
        exprs = []
        for part in parts:
            if part is None:
                #Conversion -- currently ignored.
                pass
            elif isinstance(part, ast.StringPart):
                current_parts.append(part)
            else:
                assert isinstance(part, ast.expr), part
                string = convert_parts_to_expr()
                if string:
                    exprs.append(string)
                exprs.append(part)
        string = convert_parts_to_expr()
        if string:
            exprs.append(string)
        if len(exprs) == 1:
            return exprs[0]
        result = ast.JoinedStr(exprs)
        set_location(result, node)
        return result

    def visit_fstring_part(self, node, ctx):
        nodes_to_visit = []
        for node in node.children:
            if node.name == 'format_specifier':
                # Flatten format_specifiers first
                nodes_to_visit += [ n for n in node.children if not n.name == 'FSTRING_SPEC' ]
            else:
                nodes_to_visit += [node]

        return self._visit_list(nodes_to_visit, ctx)

    def visit_format_specifier(self, node, ctx):
        # This will currently never be visited because of the above flattening
        assert ctx is LOAD
        #Currently ignored
        return None

    def visit_CONVERSION(self, node, ctx):
        return None

    def visit_COLON(self, node, ctx):
        return None

    def visit_EQUAL(self, node, ctx):
        return None

    def visit_FSTRING_START(self, node, ctx):
        string = self.visit_STRING(node, ctx)
        # Push the current prefix onto the prefix stack
        self.outer_prefix_stack.append(string.prefix)
        return string

    def visit_FSTRING_END(self, node, ctx):
        string = self.visit_STRING(node, ctx)
        # We're done with this f-string, so pop its prefix off the prefix stack
        self.outer_prefix_stack.pop()
        return string

    visit_FSTRING_MID = visit_STRING

# In the following function, we decode to `latin-1` in order to preserve
# the byte values present in the string. This is an undocumented feature of
# this encoding. See also the `test_python_sanity.py` test file in `/tests`.

def decode_str(s):
    if isinstance(s, bytes):
        return str(s, 'latin-1')
    else:
        return s

def context_error(node):
    s = SyntaxError("Invalid context")
    s.lineno, s.offset = node.start
    raise s

def is_token(node, text):
    '''Holds if `node` is a token (terminal) and its textual value is `text`'''
    return hasattr(node, "value") and node.value == text

def get_node_value(node):
    '''Get the value from a NAME node,
        stripping redundant CPT nodes'''
    while hasattr(node, "children"):
        assert len(node.children) == 1
        node = node.children[0]
    return node.value

#Mapping from comparison operator strings to ast classes.
COMP_OP_CLASSES = {
    "<": ast.Lt,
    "<=": ast.LtE,
    ">": ast.Gt,
    ">=": ast.GtE,
    "==": ast.Eq,
    "<>": ast.NotEq,
    "!=": ast.NotEq,
    "in": ast.In,
    "not in": ast.NotIn,
    "is": ast.Is,
    "is not": ast.IsNot,
}

#Mapping from multiplicative operator strings to ast classes.
TERM_OP_CLASSES = {
    '*': ast.Mult,
    '/': ast.Div,
    '%': ast.Mod,
    '//': ast.FloorDiv,
    '@': ast.MatMult,
}

#Mapping from additive operator strings to ast classes.
FACTOR_OP_CLASSES = {
    '+': ast.UAdd,
    '-': ast.USub,
    '~': ast.Invert,
}

#Mapping from assignment operator strings to ast classes.
AUG_ASSIGN_OPS = {
    '+=': ast.Add,
    '-=': ast.Sub,
    '*=': ast.Mult,
    '/=': ast.Div,
    '%=': ast.Mod,
    '&=': ast.BitAnd,
    '|=': ast.BitOr,
    '^=': ast.BitXor,
    '<<=': ast.LShift,
    '>>=': ast.RShift,
    '**=': ast.Pow,
    '//=': ast.FloorDiv,
    '@=': ast.MatMult,
}

def make_name(name, ctx, start, end):
    '''Create a `Name` ast node'''
    variable = ast.Variable(name)
    node = ast.Name(variable, ctx)
    set_location(node, start, end)
    return node

def set_location(astnode, cptnode_or_start, end=None):
    '''Set the location of `astnode` from
    either the CPT node or pair of locations.
    '''
    if end is None:
        astnode.lineno, astnode.col_offset = cptnode_or_start.start
        astnode._end = cptnode_or_start.end
    else:
        astnode.lineno, astnode.col_offset = cptnode_or_start
        astnode._end = end

def split_full_prefix(s):
    """Splits a prefix (or a string starting with a prefix) into prefix and quote parts."""
    quote_start = 0
    # First, locate the end of the prefix (and the start of the quotes)
    while s[quote_start] not in "'\"}":
        quote_start += 1
    # Next, find the end of the quotes. This is either one character past `quote_start`, or three
    # (for triple-quoted strings).
    if s[quote_start:quote_start + 3] in ("'''",'"""'):
        prefix_end = quote_start + 3
    else:
        prefix_end = quote_start + 1

    return s[:quote_start], s[quote_start:prefix_end]


def split_string(s, outer_prefix):
    """Splits a string into prefix, quotes, and content."""
    s_prefix, s_quotes = split_full_prefix(s)

    quote_start = len(s_prefix)
    prefix_end = quote_start + len(s_quotes)

    # If the string starts with `}`, it is a non-inital string part of an f-string. In this case we
    # must use the prefix and quotes from the outer f-string.
    if s[0] == '}':
        prefix, quotes = split_full_prefix(outer_prefix)
    else:
        prefix, quotes = s_prefix, s_quotes

    # The string either ends with a `{` (if it comes before an interpolation inside an f-string)
    # or else it ends with the same quotes as it begins with.
    if s[-1] == "{":
        content = s[prefix_end:-1]
    else:
        content = s[prefix_end:-len(quotes)]

    return prefix.lower(), quotes, content

def get_text(s, outer_prefix):
    """Returns a cleaned-up text version of the string, normalizing the quotes and removing any
    format string marker."""
    prefix, quotes, content = split_string(s, outer_prefix)
    return prefix.strip("fF") + quotes + content + quotes

def parse_string(s, logger, outer_prefix):
    '''Gets the prefix and escaped string text'''
    prefix, quotes, content = split_string(s, outer_prefix)
    saved_content = content
    try:
        ends_with_illegal_character = False
        # If the string ends with the same quote character as the outer quotes (and/or backslashes)
        # (e.g. the first string part of `f"""hello"{0}"""`), we must take care to not accidently create
        # the ending quotes at the wrong place. (`literal_eval` would be unhappy with `"""hello""""`
        # as an input.) To do this, we insert an extra space at the end (that we then must remember
        # to remove later on).
        if content.endswith(quotes[0]) or content.endswith('\\'):
            ends_with_illegal_character = True
            content = content + " "
        text = prefix.strip("fF") + quotes + content + quotes
        s = literal_eval(text)
    except Exception as ex:
        # Something has gone wrong, but we still have the original form - Should be OK.
        logger.warning("Unable to parse string %s: %s", text, ex)
        logger.traceback()
        ends_with_illegal_character = False
        s = saved_content
    if isinstance(s, bytes):
        try:
            s = s.decode(sys.getfilesystemencoding())
        except UnicodeDecodeError:
            s = decode_str(s)
    if ends_with_illegal_character:
        s = s[:-1]
    return prefix + quotes, s

ESCAPES = ""

def get_numeric_value(node):
    '''Gets numeric value from a CPT leaf node.'''
    value = node.value
    value = value.replace("_", "")
    chars = set(value.lower())
    try:
        if u'.' in chars or u'e' in chars or u'j' in chars:
            # Probable float or hex or imaginary
            return literal_eval(value)
        if len(value) > 1 and value[0] == u'0' and value[1] not in u'boxlBOXL':
            # Old-style octal
            value = u'0o' + value[1:]
        if value[-1] in u'lL':
            return literal_eval(value[:-1])
        return literal_eval(value)
    except ValueError:
        raise ParseError("Not a valid numeric value", node.type, node.value, (node.start, node.end))

#This rewriting step is performed separately for two reasons.
# 1. It is complicated
# 2. In future, we may want to make the AST more like the syntax and less like the semantics.
#    Keeping step separate should make that a bit easier.
def rewrite_comp(node):
    if hasattr(node, "function"):
        return
    gens = node.generators
    if hasattr(node, "elt"):
        elt = node.elt
        del node.elt
    else:
        elt = ast.Tuple([node.value, node.key], LOAD)
        elt.lineno = node.key.lineno
        elt.col_offset = node.key.col_offset
        elt._end = node.value._end
        del node.key
        del node.value
    y = ast.Yield(elt)
    copy_location(elt, y)
    stmt = ast.Expr(y)
    copy_location(elt, stmt)
    for gen in reversed(gens[1:]):
        for if_ in gen.ifs:
            stmt = ast.If(if_, [stmt], None)
            copy_location(if_, stmt)
        stmt = ast.For(gen.target, gen.iter, [stmt], None)
        if getattr(gen, "is_async", False):
            stmt.is_async = True
        copy_location(node, stmt)
    for if_ in gens[0].ifs:
        stmt = ast.If(if_, [stmt], None)
        copy_location(if_, stmt)
    p0 = ".0"
    pvar = ast.Variable(p0)
    arg = ast.Name(pvar, LOAD)
    copy_location(node, arg)
    stmt = ast.For(gens[0].target, arg, [stmt], None)
    if getattr(gens[0], "is_async", False):
        stmt.is_async = True
    copy_location(node, stmt)
    pvar = ast.Variable(p0)
    arg = ast.Name(pvar, PARAM)
    copy_location(node, arg)
    function = ast.Function(COMP_NAMES[type(node).__name__], [],[arg], None, None, None, [ stmt ])
    copy_location(node, function)
    node.function = function
    node.iterable = gens[0].iter
    del node.generators


COMP_NAMES = {
    'GeneratorExp' : 'genexpr',
    'DictComp' : 'dictcomp',
    'ListComp' : 'listcomp',
    'SetComp' : 'setcomp'
}

def copy_location(src, dest):
    '''Copy location from `src` to `dest`'''
    dest.lineno = src.lineno
    dest.col_offset = src.col_offset
    dest._end = src._end

def convert(logger, cpt):
    '''Covert concrete parse tree as specified by blib2to3/Grammar.txt
    to the AST specified by semmle/python/master.py
    '''
    return Convertor(logger).visit(cpt)