null if it is not."""
+
+ return self._simple
+
+ def matches(self, path):
+ return bool(self._pattern.match(path))
+
+ def rewrite(self, path):
+ if self._virtual is None:
+ return None
+ matcher = self._pattern.match(path)
+ if not matcher:
+ return None
+ return self._virtual + matcher.group(1);
+
+ def __unicode__(self):
+ return self._original
+
+ def __lt__(self, other):
+ return self._line < other._line
+
+ __hash__ = None
+
+def _error(message, line):
+ raise Exception(u"%s (line %d)" % (message, line))
diff --git a/python/extractor/semmle/python/AstMeta.py b/python/extractor/semmle/python/AstMeta.py
new file mode 100644
index 00000000000..db0e7560fe2
--- /dev/null
+++ b/python/extractor/semmle/python/AstMeta.py
@@ -0,0 +1,560 @@
+'''Meta nodes for defining database relations'''
+
+from abc import abstractmethod
+
+from semmle.util import fprintf
+
+PREFIX = 'py_'
+
+__all__ = [ 'order' ]
+
+
+parent_nodes = {}
+
+class Node(object):
+ 'Node in the attribute tree, describing relations'
+
+ next_id = 0
+
+ def __init__(self):
+ Node.next_id += 1
+ self._index = Node.next_id
+ self._unique_parent = None
+
+ @property
+ def parents(self):
+ return parent_of(self)
+
+ def add_child(self, child):
+ child.add_parent(self)
+
+ def db_key(self, name):
+ return 'unique int ' + name + ' : ' + self.db_name()
+
+ def is_sub_type(self):
+ return False
+
+ @staticmethod
+ def is_union_type():
+ return False
+
+ def is_case_type(self):
+ return False
+
+ @staticmethod
+ def is_list():
+ return False
+
+ @staticmethod
+ def is_primitive():
+ return False
+
+ def prune(self, node_set):
+ return self
+
+ @abstractmethod
+ def child_offsets(self, n):
+ pass
+
+ @abstractmethod
+ def write_fields(self, out):
+ pass
+
+ @abstractmethod
+ def ql_name(self):
+ pass
+
+ @property
+ def unique_parent(self):
+ if self._unique_parent is None:
+ parents = self.parents
+ if len(parents.child_offsets(self)) < 2:
+ self._unique_parent = True
+ elif parents.is_union_type():
+ self._unique_parent = False
+ for t in parents.types:
+ if len(t.child_offsets(self)) > 1:
+ break
+ else:
+ self._unique_parent = True
+ return self._unique_parent
+
+
+class PrimitiveNode(Node):
+ 'A primitive node: int, str, etc'
+
+ def __init__(self, name, db_name, key, descriptive_name = None):
+ Node.__init__(self)
+ assert isinstance(name, str)
+ self.name = name
+ self.super_type = None
+ self.layout = []
+ self.fields = []
+ self.subclasses = set()
+ self._key = key
+ self._db_name = db_name
+ if descriptive_name is None:
+ self.descriptive_name = self.name
+ else:
+ self.descriptive_name = descriptive_name
+
+ def db_key(self, name):
+ return self._key + ' ' + name + ' : ' + self._db_name + ' ref'
+
+ @property
+ def __name__(self):
+ return self.name
+
+ def ql_name(self):
+ 'Return Java style name if a schema type, otherwise the specified name'
+ if self._db_name[0] == '@':
+ return capitalize(self.name)
+ else:
+ return self._db_name
+
+ def relation_name(self):
+ return pluralize(PREFIX + self.name)
+
+ def db_name(self):
+ return self._db_name
+
+ def add_parent(self, p):
+ parent_nodes[self] = UnionNode.join(parent_of(self), p)
+
+ def fixup(self):
+ pass
+
+ @staticmethod
+ def is_primitive():
+ return True
+
+ def child_offsets(self, n):
+ return set()
+
+ def write_init(self, out):
+ fprintf(out, "%s = PrimitiveNode(%s, %s, %s)\n", self.name,
+ self.name, self._db_name, self._key)
+
+ def write_fields(self, out):
+ pass
+
+
+def parent_of(node):
+ if node in parent_nodes:
+ return parent_nodes[node]
+ else:
+ return None
+
+class ClassNode(Node):
+ 'A node corresponding to a single AST type'
+
+ def __init__(self, name, super_type = None, descriptive_name = None):
+ Node.__init__(self)
+ assert isinstance(name, str)
+ self.name = name
+ self._db_name = name
+ self.super_type = super_type
+ self.layout = []
+ if super_type:
+ self.fields = list(super_type.fields)
+ else:
+ self.fields = []
+ self.subclasses = set()
+ if super_type:
+ super_type.subclasses.add(self)
+ if descriptive_name is None:
+ self.descriptive_name = self.name.lower()
+ else:
+ self.descriptive_name = descriptive_name
+ if self.descriptive_name[0] == '$':
+ self.descriptive_name = self.descriptive_name[1:]
+ elif super_type and ' ' not in self.descriptive_name:
+ self.descriptive_name += ' ' + super_type.descriptive_name
+
+ def field(self, name, field_type, descriptive_name = None, artificial=False, parser_type = None):
+ if descriptive_name is None:
+ self.fields.append((name, field_type, name, artificial, parser_type))
+ else:
+ self.fields.append((name, field_type, descriptive_name, artificial, parser_type))
+
+ def is_stmt_or_expr_subclass(self):
+ if self.super_type is None:
+ return False
+ return self.super_type.name in ('expr', 'stmt')
+
+ def is_sub_type(self):
+ if self.super_type is None:
+ return False
+ return self.super_type.is_case_type()
+
+ def is_case_type(self):
+ return (self.subclasses
+ and parent_of(self))
+
+ def fixup(self):
+ self.add_children()
+ self.compute_layout()
+
+ def add_parent(self, p):
+ parent_nodes[self] = UnionNode.join(parent_of(self), p)
+ if self.super_type:
+ self.super_type.add_parent(p)
+
+ def add_children(self):
+ for f, f_node, _, _, _ in self.fields:
+ self.add_child(f_node)
+
+ def compute_layout(self):
+ fields = self.fields
+ lists = 0
+ for f, f_node, _, _, _ in fields:
+ if (isinstance(f_node, ListNode) and
+ f_node.item_type.__name__ != 'stmt'):
+ lists += 1
+ index = 0
+ inc = 1
+ for f, f_node, docname, artificial, pt in fields:
+ self.layout.append((f, f_node, index, docname, artificial, pt))
+ index += inc
+
+ def relation_name(self):
+ return pluralize(PREFIX + self._db_name)
+
+ def set_name(self, name):
+ self._db_name = name
+
+ @property
+ def __name__(self):
+ return self.name
+
+ def ql_name(self):
+ if self._db_name == 'str':
+ return 'string'
+ elif self._db_name in ('int', 'float'):
+ return self.db_name
+ name = self._db_name
+ return ''.join(capitalize(part) for part in name.split('_'))
+
+ def db_name(self):
+ return '@' + PREFIX + self._db_name
+
+ def dump(self, out):
+ def yes_no(b):
+ return "yes" if b else "no"
+ fprintf(out, "'%s' :\n", self.name)
+ fprintf(out, " QL name: %s\n", self.ql_name())
+ fprintf(out, " Relation name: %s\n", self.relation_name())
+ fprintf(out, " Is case_type %s\n", yes_no(self.is_case_type()))
+ fprintf(out, " Super type: %s\n", self.super_type)
+ fprintf(out, " Layout:\n")
+ for l in self.layout:
+ fprintf(out, " %s, %s, %s, '%s, %s'\n" % l)
+ fprintf(out, " Parents: %s\n\n", parent_of(self))
+
+ def write_init(self, out):
+ if self.super_type:
+ fprintf(out, "%s = ClassNode('%s', %s)\n", self.name,
+ self.name, self.super_type.name)
+ else:
+ fprintf(out, "%s = ClassNode('%s')\n", self.name, self.name)
+
+ def write_fields(self, out):
+ for name, field_type, docname, _, _ in self.fields:
+ fprintf(out, "%s.field('%s', %s, '%s')\n", self.name,
+ name, field_type.__name__, docname)
+ if self.layout:
+ fprintf(out, "\n")
+
+ def __repr__(self):
+ return "Node('%s')" % self.name
+
+ def child_offsets(self, n):
+ #Only used by db-scheme generator, so can be slow
+ found = set()
+ for name, node, offset, _, artificial, _ in self.layout:
+ if node is n:
+ found.add(offset)
+ if self.subclasses:
+ for s in self.subclasses:
+ found.update(s.child_offsets(n))
+ return found
+
+class ListNode(Node):
+ "Node corresponding to a list, parameterized by its member's type"
+
+ def __init__(self, item_node, name=None):
+ Node.__init__(self)
+ self.list_type = None
+ self.layout = ()
+ self.super_type = None
+ self.item_type = item_node
+ self.subclasses = ()
+ self.add_child(item_node)
+ self.name = name
+
+ def relation_name(self):
+ return pluralize(PREFIX + self.__name__)
+
+ def dump(self, out):
+ fprintf(out, "List of %s\n", self.name)
+ fprintf(out, " Parents: %s\n\n", parent_of(self))
+
+ def write_init(self, out):
+ fprintf(out, "%s = ListNode(%s)\n",
+ self.__name__, self.item_type.__name__)
+
+ def write_fields(self, out):
+ pass
+
+ @staticmethod
+ def is_list():
+ return True
+
+ @property
+ def __name__(self):
+ if self.name is None:
+ assert isinstance(self.item_type.__name__, str)
+ return self.item_type.__name__ + '_list'
+ else:
+ return self.name
+
+ @property
+ def descriptive_name(self):
+ return self.item_type.descriptive_name + ' list'
+
+ def db_name(self):
+ return '@' + PREFIX + self.__name__
+
+ def ql_name(self):
+ if self.name is not None:
+ return capitalize(self.name)
+ if self.item_type is str:
+ return 'StringList'
+ elif self.item_type is int:
+ return 'IntList'
+ elif self.item_type is float:
+ return 'FloatList'
+ return capitalize(self.item_type.ql_name()) + 'List'
+
+ def __repr__(self):
+ return "ListNode(%s)" % self.__name__
+
+ def fixup(self):
+ pass
+
+ def add_parent(self, p):
+ parent_nodes[self] = UnionNode.join(parent_of(self), p)
+
+ def child_offsets(self, n):
+ return set((0,1,2,3))
+
+_all_unions = {}
+
+class UnionNode(Node):
+ 'Node representing a set of AST types'
+
+ def __init__(self, *types):
+ Node.__init__(self)
+ assert len(types) > 1
+ self.types = frozenset(types)
+ self.name = None
+ self.super_type = None
+ self.layout = []
+ self.subclasses = ()
+ #Whether this node should be visited in auto-generated extractor.
+ self.visit = False
+
+ @staticmethod
+ def join(t1, t2):
+ if t1 is None:
+ return t2
+ if t2 is None:
+ return t1
+ if isinstance(t1, UnionNode):
+ all_types = set(t1.types)
+ else:
+ all_types = set([t1])
+ if isinstance(t2, UnionNode):
+ all_types = all_types.union(t2.types)
+ else:
+ all_types.add(t2)
+ done = False
+ while not done:
+ for n in all_types:
+ if n.super_type in all_types:
+ all_types.remove(n)
+ break
+ else:
+ done = True
+ return UnionNode._make_union(all_types)
+
+ @staticmethod
+ def _make_union(all_types):
+ if len(all_types) == 1:
+ return next(iter(all_types))
+ else:
+ key = frozenset(all_types)
+ if key in _all_unions:
+ u = _all_unions[key]
+ else:
+ u = UnionNode(*all_types)
+ _all_unions[key] = u
+ return u
+
+ def set_name(self, name):
+ self.name = name
+
+ @staticmethod
+ def is_union_type():
+ return True
+
+ def write_init(self, out):
+ fprintf(out, "%s = UnionNode(%s)\n", self.__name__,
+ ', '.join(t.__name__ for t in self.types))
+ if self.name:
+ fprintf(out, "%s.setname('%s')\n", self.name, self.name)
+
+ def write_fields(self, out):
+ pass
+
+ def fixup(self):
+ pass
+
+ def __hash__(self):
+ return hash(self.types)
+
+ def __eq__(self, other):
+ assert len(self.types) > 1
+ if isinstance(other, UnionNode):
+ return self.types == other.types
+ else:
+ return False
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ @property
+ def __name__(self):
+ if self.name is None:
+ names = [ n.__name__ for n in self.types ]
+ return '_or_'.join(sorted(names))
+ else:
+ return self.name
+
+ @property
+ def descriptive_name(self):
+ if self.name is None:
+ names = [ n.descriptive_name for n in self.types ]
+ return '_or_'.join(sorted(names))
+ else:
+ return self.name
+
+ def db_name(self):
+ return '@' + PREFIX + self.__name__
+
+ def relation_name(self):
+ return pluralize(PREFIX + self.__name__)
+
+ def ql_name(self):
+ if self.name is None:
+ assert len(self.types) > 1
+ names = [ n.ql_name() for n in self.types ]
+ return 'Or'.join(sorted(names))
+ else:
+
+ return ''.join(capitalize(part) for part in self.name.split('_'))
+
+ def add_parent(self, p):
+ for n in self.types:
+ n.add_parent(p)
+
+ def child_offsets(self, n):
+ res = set()
+ for t in self.types:
+ res = res.union(t.child_offsets(n))
+ return res
+
+ def prune(self, node_set):
+ new_set = self.types.intersection(node_set)
+ if len(new_set) == len(self.types):
+ return self
+ if not new_set:
+ return None
+ return UnionNode._make_union(new_set)
+
+def shorten_name(node):
+ p = parent_of(node)
+ if (isinstance(p, UnionNode) and len(p.__name__) > 16
+ and len(p.__name__) > len(node.__name__) + 4):
+ p.set_name(node.__name__ + '_parent')
+
+
+def build_node_relations(nodes):
+ nodes = set(nodes)
+ for node in nodes:
+ node.fixup()
+ for node in sorted(nodes, key=lambda n : n.__name__):
+ shorten_name(node)
+ node_set = set(nodes)
+ for node in (str, int, float, bytes):
+ p = parent_of(node)
+ if p is not None:
+ node_set.add(p)
+ for node in nodes:
+ p = parent_of(node)
+ if p is not None:
+ node_set.add(p)
+ for n in nodes:
+ sub_types = sorted(n.subclasses, key = lambda x : x._index)
+ if n.is_case_type():
+ for index, item in enumerate(sub_types):
+ item.index = index
+ for n in list(nodes):
+ if not n.parents and n.is_list() and n.name is None:
+ #Discard lists with no parents and no name as unreachable
+ node_set.remove(n)
+ #Prune unused nodes from unions.
+ node_set = set(node.prune(node_set) for node in node_set)
+ for node in node_set:
+ if node in parent_nodes:
+ parent_nodes[node] = parent_nodes[node].prune(node_set)
+ for node in node_set:
+ shorten_name(node)
+ result_nodes = {}
+ for n in node_set:
+ if n:
+ result_nodes[n.__name__] = n
+ return result_nodes
+
+def pluralize(name):
+ if name[-1] == 's':
+ if name[-2] in 'aiuos':
+ return name + 'es'
+ else:
+ #Already plural
+ return name
+ elif name.endswith('ex'):
+ return name[:-2] + 'ices'
+ elif name.endswith('y'):
+ return name[:-1] + 'ies'
+ else:
+ return name + 's'
+
+def capitalize(name):
+ 'Unlike the str method capitalize(), leave upper case letters alone'
+ return name[0].upper() + name[1:]
+
+def order(node):
+ if node.is_primitive():
+ return 0
+ if isinstance(node, ClassNode):
+ res = 1
+ while node.super_type:
+ node = node.super_type
+ res += 1
+ return res
+ if isinstance(node, ListNode):
+ return order(node.item_type) + 1
+ else:
+ assert isinstance(node, UnionNode)
+ return max(order(t) for t in node.types)+1
diff --git a/python/extractor/semmle/python/__init__.py b/python/extractor/semmle/python/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/extractor/semmle/python/ast.py b/python/extractor/semmle/python/ast.py
new file mode 100644
index 00000000000..25b93cae72a
--- /dev/null
+++ b/python/extractor/semmle/python/ast.py
@@ -0,0 +1,949 @@
+'''
+Abstract syntax tree classes.
+This is designed to replace the stdlib ast module.
+Unlike the stdlib module, it is version independent.
+
+The classes in this file are based on the corresponding types in the cpython interpreter, copyright PSF.
+'''
+
+
+class AstBase(object):
+ __slots__ = "lineno", "col_offset", "_end",
+
+ def __repr__(self):
+ args = ",".join(repr(getattr(self, field, None)) for field in self.__slots__)
+ return "%s(%s)" % (self.__class__.__name__, args)
+
+class Class(AstBase):
+ 'AST node representing a class definition'
+
+ __slots__ = "name", "body",
+
+ def __init__(self, name, body):
+ self.name = name
+ self.body = body
+
+
+class Function(AstBase):
+ 'AST node representing a function definition'
+
+ __slots__ = "is_async", "name", "type_parameters", "args", "vararg", "kwonlyargs", "kwarg", "body",
+
+ def __init__(self, name, type_parameters, args, vararg, kwonlyargs, kwarg, body, is_async=False):
+ self.name = name
+ self.type_parameters = type_parameters
+ self.args = args
+ self.vararg = vararg
+ self.kwonlyargs = kwonlyargs
+ self.kwarg = kwarg
+ self.body = body
+ self.is_async = is_async
+
+
+class Module(AstBase):
+
+ def __init__(self, body):
+ self.body = body
+
+
+class StringPart(AstBase):
+ '''Implicitly concatenated part of string literal'''
+
+ __slots__ = "prefix", "text", "s",
+
+ def __init__(self, prefix, text, s):
+ self.prefix = prefix
+ self.text = text
+ self.s = s
+
+class alias(AstBase):
+ __slots__ = "value", "asname",
+
+ def __init__(self, value, asname):
+ self.value = value
+ self.asname = asname
+
+
+class arguments(AstBase):
+ __slots__ = "defaults", "kw_defaults", "annotations", "varargannotation", "kwargannotation", "kw_annotations",
+
+ def __init__(self, defaults, kw_defaults, annotations, varargannotation, kwargannotation, kw_annotations):
+ if len(defaults) != len(annotations):
+ raise AssertionError('len(defaults) != len(annotations)')
+ if len(kw_defaults) != len(kw_annotations):
+ raise AssertionError('len(kw_defaults) != len(kw_annotations)')
+ self.kw_defaults = kw_defaults
+ self.defaults = defaults
+ self.annotations = annotations
+ self.varargannotation = varargannotation
+ self.kwargannotation = kwargannotation
+ self.kw_annotations = kw_annotations
+
+
+class boolop(AstBase):
+ pass
+
+class cmpop(AstBase):
+ pass
+
+class comprehension(AstBase):
+ __slots__ = "is_async", "target", "iter", "ifs",
+
+ def __init__(self, target, iter, ifs, is_async=False):
+ self.target = target
+ self.iter = iter
+ self.ifs = ifs
+ self.is_async = is_async
+
+class dict_item(AstBase):
+ pass
+
+class type_parameter(AstBase):
+ pass
+
+class expr(AstBase):
+ __slots__ = "parenthesised",
+
+class expr_context(AstBase):
+ pass
+
+class operator(AstBase):
+ pass
+
+class stmt(AstBase):
+ pass
+
+class unaryop(AstBase):
+ pass
+
+class pattern(AstBase):
+ __slots__ = "parenthesised",
+
+class And(boolop):
+ pass
+
+class Or(boolop):
+ pass
+
+class Eq(cmpop):
+ pass
+
+class Gt(cmpop):
+ pass
+
+class GtE(cmpop):
+ pass
+
+class In(cmpop):
+ pass
+
+class Is(cmpop):
+ pass
+
+class IsNot(cmpop):
+ pass
+
+class Lt(cmpop):
+ pass
+
+class LtE(cmpop):
+ pass
+
+class NotEq(cmpop):
+ pass
+
+class NotIn(cmpop):
+ pass
+
+class DictUnpacking(dict_item):
+ __slots__ = "value",
+
+ def __init__(self, value):
+ self.value = value
+
+
+class KeyValuePair(dict_item):
+ __slots__ = "key", "value",
+
+ def __init__(self, key, value):
+ self.key = key
+ self.value = value
+
+
+class keyword(dict_item):
+ __slots__ = "arg", "value",
+
+ def __init__(self, arg, value):
+ self.arg = arg
+ self.value = value
+
+
+class AssignExpr(expr):
+ __slots__ = "target", "value",
+
+ def __init__(self, value, target):
+ self.value = value
+ self.target = target
+
+
+class Attribute(expr):
+ __slots__ = "value", "attr", "ctx",
+
+ def __init__(self, value, attr, ctx):
+ self.value = value
+ self.attr = attr
+ self.ctx = ctx
+
+
+class Await(expr):
+ __slots__ = "value",
+
+ def __init__(self, value):
+ self.value = value
+
+
+class BinOp(expr):
+ __slots__ = "left", "op", "right",
+
+ def __init__(self, left, op, right):
+ self.left = left
+ self.op = op
+ self.right = right
+
+
+class BoolOp(expr):
+ __slots__ = "op", "values",
+
+ def __init__(self, op, values):
+ self.op = op
+ self.values = values
+
+
+class Bytes(expr):
+ __slots__ = "s", "prefix", "implicitly_concatenated_parts",
+
+ def __init__(self, s, prefix, implicitly_concatenated_parts):
+ self.s = s
+ self.prefix = prefix
+ self.implicitly_concatenated_parts = implicitly_concatenated_parts
+
+
+class Call(expr):
+ __slots__ = "func", "positional_args", "named_args",
+
+ def __init__(self, func, positional_args, named_args):
+ self.func = func
+ self.positional_args = positional_args
+ self.named_args = named_args
+
+
+class ClassExpr(expr):
+ 'AST node representing class creation'
+
+ __slots__ = "name", "type_parameters", "bases", "keywords", "inner_scope",
+
+ def __init__(self, name, type_parameters, bases, keywords, inner_scope):
+ self.name = name
+ self.type_parameters = type_parameters
+ self.bases = bases
+ self.keywords = keywords
+ self.inner_scope = inner_scope
+
+
+class Compare(expr):
+ __slots__ = "left", "ops", "comparators",
+
+ def __init__(self, left, ops, comparators):
+ self.left = left
+ self.ops = ops
+ self.comparators = comparators
+
+
+class Dict(expr):
+ __slots__ = "items",
+
+ def __init__(self, items):
+ self.items = items
+
+
+class DictComp(expr):
+ __slots__ = "key", "value", "generators", "function", "iterable",
+
+ def __init__(self, key, value, generators):
+ self.key = key
+ self.value = value
+ self.generators = generators
+
+
+class Ellipsis(expr):
+ pass
+
+class Filter(expr):
+ '''Filtered expression in a template'''
+
+ __slots__ = "value", "filter",
+
+ def __init__(self, value, filter):
+ self.value = value
+ self.filter = filter
+
+
+class FormattedValue(expr):
+ __slots__ = "value", "conversion", "format_spec",
+
+ def __init__(self, value, conversion, format_spec):
+ self.value = value
+ self.conversion = conversion
+ self.format_spec = format_spec
+
+
+class FunctionExpr(expr):
+
+ 'AST node representing function creation'
+
+ __slots__ = "name", "args", "returns", "inner_scope",
+
+ def __init__(self, name, args, returns, inner_scope):
+ self.name = name
+ self.args = args
+ self.returns = returns
+ self.inner_scope = inner_scope
+
+
+class GeneratorExp(expr):
+ __slots__ = "elt", "generators", "function", "iterable",
+
+ def __init__(self, elt, generators):
+ self.elt = elt
+ self.generators = generators
+
+
+class IfExp(expr):
+ __slots__ = "test", "body", "orelse",
+
+ def __init__(self, test, body, orelse):
+ self.test = test
+ self.body = body
+ self.orelse = orelse
+
+
+class ImportExpr(expr):
+ '''AST node representing module import
+ (roughly equivalent to the runtime call to __import__)'''
+
+ __slots__ = "level", "name", "top",
+
+ def __init__(self, level, name, top):
+ self.level = level
+ self.name = name
+ self.top = top
+
+
+class ImportMember(expr):
+ '''AST node representing 'from import'. Similar to Attribute access,
+ but during import'''
+
+ __slots__ = "module", "name",
+
+ def __init__(self, module, name):
+ self.module = module
+ self.name = name
+
+
+class JoinedStr(expr):
+ __slots__ = "values",
+
+ def __init__(self, values):
+ self.values = values
+
+
+class Lambda(expr):
+ __slots__ = "args", "inner_scope",
+
+ def __init__(self, args, inner_scope):
+ self.args = args
+ self.inner_scope = inner_scope
+
+
+class List(expr):
+ __slots__ = "elts", "ctx",
+
+ def __init__(self, elts, ctx):
+ self.elts = elts
+ self.ctx = ctx
+
+
+class ListComp(expr):
+ __slots__ = "elt", "generators", "function", "iterable",
+
+ def __init__(self, elt, generators):
+ self.elt = elt
+ self.generators = generators
+
+class Match(stmt):
+ __slots__ = "subject", "cases",
+
+ def __init__(self, subject, cases):
+ self.subject = subject
+ self.cases = cases
+
+class Case(stmt):
+ __slots__ = "pattern", "guard", "body",
+
+ def __init__(self, pattern, guard, body):
+ self.pattern = pattern
+ self.guard = guard
+ self.body = body
+
+class Guard(expr):
+ __slots__ = "test",
+
+ def __init__(self, test):
+ self.test = test
+
+class MatchAsPattern(pattern):
+ __slots__ = "pattern", "alias",
+
+ def __init__(self, pattern, alias):
+ self.pattern = pattern
+ self.alias = alias
+
+class MatchOrPattern(pattern):
+ __slots__ = "patterns",
+
+ def __init__(self, patterns):
+ self.patterns = patterns
+
+class MatchLiteralPattern(pattern):
+ __slots__ = "literal",
+
+ def __init__(self, literal):
+ self.literal = literal
+
+class MatchCapturePattern(pattern):
+ __slots__ = "variable",
+
+ def __init__(self, variable):
+ self.variable = variable
+
+class MatchWildcardPattern(pattern):
+ __slots__ = []
+
+class MatchValuePattern(pattern):
+ __slots__ = "value",
+
+ def __init__(self, value):
+ self.value = value
+
+class MatchSequencePattern(pattern):
+ __slots__ = "patterns",
+
+ def __init__(self, patterns):
+ self.patterns = patterns
+
+class MatchStarPattern(pattern):
+ __slots__ = "target",
+
+ def __init__(self, target):
+ self.target = target
+
+class MatchMappingPattern(pattern):
+ __slots__ = "mappings",
+
+ def __init__(self, mappings):
+ self.mappings = mappings
+
+class MatchDoubleStarPattern(pattern):
+ __slots__ = "target",
+
+ def __init__(self, target):
+ self.target = target
+
+class MatchKeyValuePattern(pattern):
+ __slots__ = "key", "value",
+
+ def __init__(self, key, value):
+ self.key = key
+ self.value = value
+
+class MatchClassPattern(pattern):
+ __slots__ = "class_name", "positional", "keyword",
+
+ def __init__(self, class_name, positional, keyword):
+ self.class_name = class_name
+ self.positional = positional
+ self.keyword = keyword
+
+class MatchKeywordPattern(pattern):
+ __slots__ = "attribute", "value",
+
+ def __init__(self, attribute, value):
+ self.attribute = attribute
+ self.value = value
+
+class Name(expr):
+ __slots__ = "variable", "ctx",
+
+ def __init__(self, variable, ctx):
+ self.variable = variable
+ self.ctx = ctx
+
+ @property
+ def id(self):
+ return self.variable.id
+
+class Num(expr):
+ __slots__ = "n", "text",
+
+ def __init__(self, n, text):
+ self.n = n
+ self.text = text
+
+class ParamSpec(type_parameter):
+ __slots__ = "name",
+
+ def __init__(self, name):
+ self.name = name
+
+
+
+class PlaceHolder(expr):
+ '''PlaceHolder variable in template ($name)'''
+
+ __slots__ = "variable", "ctx",
+
+ def __init__(self, variable, ctx):
+ self.variable = variable
+ self.ctx = ctx
+
+ @property
+ def id(self):
+ return self.variable.id
+
+class Repr(expr):
+ __slots__ = "value",
+
+ def __init__(self, value):
+ self.value = value
+
+
+class Set(expr):
+ __slots__ = "elts",
+
+ def __init__(self, elts):
+ self.elts = elts
+
+
+class SetComp(expr):
+ __slots__ = "elt", "generators", "function", "iterable",
+
+ def __init__(self, elt, generators):
+ self.elt = elt
+ self.generators = generators
+
+
+class Slice(expr):
+ '''AST node for a slice as a subclass of expr to simplify Subscripts'''
+
+ __slots__ = "start", "stop", "step",
+
+ def __init__(self, start, stop, step):
+ self.start = start
+ self.stop = stop
+ self.step = step
+
+
+class Starred(expr):
+ __slots__ = "value", "ctx",
+
+ def __init__(self, value, ctx):
+ self.value = value
+ self.ctx = ctx
+
+
+class Str(expr):
+ __slots__ = "s", "prefix", "implicitly_concatenated_parts",
+
+ def __init__(self, s, prefix, implicitly_concatenated_parts):
+ self.s = s
+ self.prefix = prefix
+ self.implicitly_concatenated_parts = implicitly_concatenated_parts
+
+
+class Subscript(expr):
+ __slots__ = "value", "index", "ctx",
+
+ def __init__(self, value, index, ctx):
+ self.value = value
+ self.index = index
+ self.ctx = ctx
+
+
+class TemplateDottedNotation(expr):
+ '''Unified dot notation expression in a template'''
+
+ __slots__ = "value", "attr", "ctx",
+
+ def __init__(self, value, attr, ctx):
+ self.value = value
+ self.attr = attr
+ self.ctx = ctx
+
+
+class Tuple(expr):
+ __slots__ = "elts", "ctx",
+
+ def __init__(self, elts, ctx):
+ self.elts = elts
+ self.ctx = ctx
+
+
+class TypeAlias(stmt):
+ __slots__ = "name", "type_parameters", "value",
+
+ def __init__(self, name, type_parameters, value):
+ self.name = name
+ self.type_parameters = type_parameters
+ self.value = value
+
+class TypeVar(type_parameter):
+ __slots__ = "name", "bound",
+
+ def __init__(self, name, bound):
+ self.name = name
+ self.bound = bound
+
+class TypeVarTuple(type_parameter):
+ __slots__ = "name",
+
+ def __init__(self, name):
+ self.name = name
+
+class UnaryOp(expr):
+ __slots__ = "op", "operand",
+
+ def __init__(self, op, operand):
+ self.op = op
+ self.operand = operand
+
+
+class Yield(expr):
+ __slots__ = "value",
+
+ def __init__(self, value):
+ self.value = value
+
+
+class YieldFrom(expr):
+ __slots__ = "value",
+
+ def __init__(self, value):
+ self.value = value
+
+
+class SpecialOperation(expr):
+ __slots__ = "name", "arguments"
+
+ def __init__(self, name, arguments):
+ self.name = name
+ self.arguments = arguments
+
+
+class AugLoad(expr_context):
+ pass
+
+class AugStore(expr_context):
+ pass
+
+class Del(expr_context):
+ pass
+
+class Load(expr_context):
+ pass
+
+class Param(expr_context):
+ pass
+
+class Store(expr_context):
+ pass
+
+class Add(operator):
+ pass
+
+class BitAnd(operator):
+ pass
+
+class BitOr(operator):
+ pass
+
+class BitXor(operator):
+ pass
+
+class Div(operator):
+ pass
+
+class FloorDiv(operator):
+ pass
+
+class LShift(operator):
+ pass
+
+class MatMult(operator):
+ pass
+
+class Mod(operator):
+ pass
+
+class Mult(operator):
+ pass
+
+class Pow(operator):
+ pass
+
+class RShift(operator):
+ pass
+
+class Sub(operator):
+ pass
+
+class AnnAssign(stmt):
+ __slots__ = "value", "annotation", "target",
+
+ def __init__(self, value, annotation, target):
+ self.value = value
+ self.annotation = annotation
+ self.target = target
+
+
+class Assert(stmt):
+ __slots__ = "test", "msg",
+
+ def __init__(self, test, msg):
+ self.test = test
+ self.msg = msg
+
+
+class Assign(stmt):
+ __slots__ = "targets", "value",
+
+ def __init__(self, value, targets):
+ self.value = value
+ assert isinstance(targets, list)
+ self.targets = targets
+
+
+class AugAssign(stmt):
+ __slots__ = "operation",
+
+ def __init__(self, operation):
+ self.operation = operation
+
+
+class Break(stmt):
+ pass
+
+class Continue(stmt):
+ pass
+
+class Delete(stmt):
+ __slots__ = "targets",
+
+ def __init__(self, targets):
+ self.targets = targets
+
+
+class ExceptStmt(stmt):
+ '''AST node for except handler, as a subclass of stmt in order
+ to better support location and flow control'''
+
+ __slots__ = "type", "name", "body",
+
+ def __init__(self, type, name, body):
+ self.type = type
+ self.name = name
+ self.body = body
+
+
+class ExceptGroupStmt(stmt):
+ '''AST node for except* handler, as a subclass of stmt in order
+ to better support location and flow control'''
+
+ __slots__ = "type", "name", "body",
+
+ def __init__(self, type, name, body):
+ self.type = type
+ self.name = name
+ self.body = body
+
+
+class Exec(stmt):
+ __slots__ = "body", "globals", "locals",
+
+ def __init__(self, body, globals, locals):
+ self.body = body
+ self.globals = globals
+ self.locals = locals
+
+
+class Expr(stmt):
+ __slots__ = "value",
+
+ def __init__(self, value):
+ self.value = value
+
+
+class For(stmt):
+ __slots__ = "is_async", "target", "iter", "body", "orelse",
+
+ def __init__(self, target, iter, body, orelse, is_async=False):
+ self.target = target
+ self.iter = iter
+ self.body = body
+ self.orelse = orelse
+ self.is_async = is_async
+
+
+class Global(stmt):
+ __slots__ = "names",
+
+ def __init__(self, names):
+ self.names = names
+
+
+class If(stmt):
+ __slots__ = "test", "body", "orelse",
+
+ def __init__(self, test, body, orelse):
+ self.test = test
+ self.body = body
+ self.orelse = orelse
+
+
+class Import(stmt):
+ __slots__ = "names",
+
+ def __init__(self, names):
+ self.names = names
+
+
+class ImportFrom(stmt):
+ __slots__ = "module",
+
+ def __init__(self, module):
+ self.module = module
+
+
+class Nonlocal(stmt):
+ __slots__ = "names",
+
+ def __init__(self, names):
+ self.names = names
+
+
+class Pass(stmt):
+ pass
+
+class Print(stmt):
+ __slots__ = "dest", "values", "nl",
+
+ def __init__(self, dest, values, nl):
+ self.dest = dest
+ self.values = values
+ self.nl = nl
+
+
+class Raise(stmt):
+ __slots__ = "exc", "cause", "type", "inst", "tback",
+
+
+class Return(stmt):
+ __slots__ = "value",
+
+ def __init__(self, value):
+ self.value = value
+
+
+class TemplateWrite(stmt):
+ '''Template text'''
+
+ __slots__ = "value",
+
+ def __init__(self, value):
+ self.value = value
+
+
+class Try(stmt):
+ __slots__ = "body", "orelse", "handlers", "finalbody",
+
+ def __init__(self, body, orelse, handlers, finalbody):
+ self.body = body
+ self.orelse = orelse
+ self.handlers = handlers
+ self.finalbody = finalbody
+
+
+class While(stmt):
+ __slots__ = "test", "body", "orelse",
+
+ def __init__(self, test, body, orelse):
+ self.test = test
+ self.body = body
+ self.orelse = orelse
+
+
+class With(stmt):
+ __slots__ = "is_async", "context_expr", "optional_vars", "body",
+
+ def __init__(self, context_expr, optional_vars, body, is_async=False):
+ self.context_expr = context_expr
+ self.optional_vars = optional_vars
+ self.body = body
+ self.is_async = is_async
+
+
+class Invert(unaryop):
+ pass
+
+class Not(unaryop):
+ pass
+
+class UAdd(unaryop):
+ pass
+
+class USub(unaryop):
+ pass
+
+
+class Variable(object):
+ 'A variable'
+
+ def __init__(self, var_id, scope = None):
+ assert isinstance(var_id, str), type(var_id)
+ self.id = var_id
+ self.scope = scope
+
+ def __repr__(self):
+ return 'Variable(%r, %r)' % (self.id, self.scope)
+
+ def __eq__(self, other):
+ if type(other) is not Variable:
+ return False
+ if self.scope is None or other.scope is None:
+ raise TypeError("Scope not set")
+ return self.scope == other.scope and self.id == other.id
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __hash__(self):
+ if self.scope is None:
+ raise TypeError("Scope not set")
+ return 391246 ^ hash(self.id) ^ hash(self.scope)
+
+ def is_global(self):
+ return isinstance(self.scope, Module)
+
+def iter_fields(node):
+ for name in node.__slots__:
+ if hasattr(node, name):
+ yield name, getattr(node, name)
diff --git a/python/extractor/semmle/python/extractor.py b/python/extractor/semmle/python/extractor.py
new file mode 100644
index 00000000000..cafb7ff4ec0
--- /dev/null
+++ b/python/extractor/semmle/python/extractor.py
@@ -0,0 +1,284 @@
+import sys
+import os
+import inspect
+import pkgutil
+from semmle.python import ast
+
+from semmle.python.passes.exports import ExportsPass
+from semmle.python.passes.lexical import LexicalPass
+from semmle.python.passes.flow import FlowPass
+from semmle.python.passes.ast_pass import ASTPass
+from semmle.python.passes.objects import ObjectPass
+from semmle.util import VERSION, uuid, get_analysis_version, get_analysis_major_version
+from semmle.util import makedirs, get_source_file_tag, TrapWriter, base64digest
+from semmle.cache import Cache
+from semmle.logging import WARN, syntax_error_message, Logger
+from semmle.profiling import timers
+
+UTRAP_KEY = 'utrap%s' % VERSION
+
+__all__ = [ 'Extractor', 'CachingExtractor' ]
+
+FLAG_SAVE_TYPES = float, complex, bool, int, bytes, str
+
+class Extractor(object):
+ '''The extractor controls the execution of the all the
+ specialised passes'''
+
+ def __init__(self, trap_folder, src_archive, options, logger: Logger, diagnostics_writer):
+ assert trap_folder
+ self.trap_folder = trap_folder
+ self.src_archive = src_archive
+ self.object_pass = ObjectPass()
+ self.passes = [
+ ASTPass(),
+ ExportsPass(),
+ FlowPass(options.split, options.prune, options.unroll, logger)
+ ]
+ self.lexical = LexicalPass()
+ self.files = {}
+ self.options = options
+ self.handle_syntax_errors = not options.no_syntax_errors
+ self.logger = logger
+ self.diagnostics_writer = diagnostics_writer
+
+ def _handle_syntax_error(self, module, ex):
+ # Write out diagnostics for the syntax error.
+ error = syntax_error_message(ex, module)
+ self.diagnostics_writer.write(error)
+
+ # Emit trap for the syntax error
+ self.logger.debug("Emitting trap for syntax error in %s", module.path)
+ writer = TrapWriter()
+ module_id = writer.get_node_id(module)
+ # Report syntax error as an alert.
+ # Ensure line and col are ints (not None).
+ line = ex.lineno if ex.lineno else 0
+ if line > len(module.lines):
+ line = len(module.lines)
+ col = len(module.lines[-1])-1
+ else:
+ col = ex.offset if ex.offset else 0
+ loc_id = writer.get_unique_id()
+ writer.write_tuple(u'locations_ast', 'rrdddd',
+ loc_id, module_id, 0, 0, 0, 0)
+ syntax_id = u'syntax%d:%d' % (line, col)
+ writer.write_tuple(u'locations_ast', 'nrdddd',
+ syntax_id, module_id, line, col+1, line, col+1)
+ writer.write_tuple(u'py_syntax_error_versioned', 'nss', syntax_id, ex.msg, get_analysis_major_version())
+ trap = writer.get_compressed()
+ self.trap_folder.write_trap("syntax-error", module.path, trap)
+ #Create an AST equivalent to an empty file, so that the other passes produce consistent output.
+ return ast.Module([])
+
+ def _extract_trap_file(self, ast, comments, path):
+ writer = TrapWriter()
+ file_tag = get_source_file_tag(self.src_archive.get_virtual_path(path))
+ writer.write_tuple(u'py_Modules', 'g', ast.trap_name)
+ writer.write_tuple(u'py_module_path', 'gg', ast.trap_name, file_tag)
+ try:
+ for ex in self.passes:
+ with timers[ex.name]:
+ if isinstance(ex, FlowPass):
+ ex.set_filename(path)
+ ex.extract(ast, writer)
+ with timers['lexical']:
+ self.lexical.extract(ast, comments, writer)
+ with timers['object']:
+ self.object_pass.extract(ast, path, writer)
+ except Exception as ex:
+ self.logger.error("Exception extracting module %s: %s", path, ex)
+ self.logger.traceback(WARN)
+ return None
+ return writer.get_compressed()
+
+ def process_source_module(self, module):
+ '''Process a Python source module. Checks that module has valid syntax,
+ then passes passes ast, source, etc to `process_module`
+ '''
+ try:
+ #Ensure that module does not have invalid syntax before extracting it.
+ ast = module.ast
+ except SyntaxError as ex:
+ self.logger.debug("handle syntax errors is %s", self.handle_syntax_errors)
+ if self.handle_syntax_errors:
+ ast = self._handle_syntax_error(module, ex)
+ else:
+ return None
+ ast.name = module.name
+ ast.kind = module.kind
+ ast.trap_name = module.trap_name
+ return self.process_module(ast, module.trap_name, module.bytes_source,
+ module.path, module.comments)
+
+ def process_module(self, ast, module_tag, bytes_source, path, comments):
+ 'Process a module, generating the trap file for that module'
+ self.logger.debug(u"Populating trap file for %s", path)
+ ast.trap_name = module_tag
+ trap = self._extract_trap_file(ast, comments, path)
+ if trap is None:
+ return None
+ with timers['trap']:
+ self.trap_folder.write_trap("python", path, trap)
+ try:
+ with timers['archive']:
+ self.copy_source(bytes_source, module_tag, path)
+ except Exception:
+ import traceback
+ traceback.print_exc()
+ return trap
+
+ def copy_source(self, bytes_source, module_tag, path):
+ if bytes_source is None:
+ return
+ self.files[module_tag] = self.src_archive.get_virtual_path(path)
+ self.src_archive.write(path, bytes_source)
+
+ def write_interpreter_data(self, options):
+ '''Write interpreter data, such as version numbers and flags.'''
+
+ def write_flag(name, value):
+ writer.write_tuple(u'py_flags_versioned', 'uus', name, value, get_analysis_major_version())
+
+ def write_flags(obj, prefix):
+ pre = prefix + u"."
+ for name, value in inspect.getmembers(obj):
+ if name[0] == "_":
+ continue
+ if type(value) in FLAG_SAVE_TYPES:
+ write_flag(pre + name, str(value))
+
+ writer = TrapWriter()
+ for index, name in enumerate((u'major', u'minor', u'micro', u'releaselevel', u'serial')):
+ writer.write_tuple(u'py_flags_versioned', 'sss', u'extractor_python_version.' + name, str(sys.version_info[index]), get_analysis_major_version())
+ write_flags(sys.flags, u'flags')
+ write_flags(sys.float_info, u'float')
+ write_flags(self.options, u'options')
+ write_flag(u'sys.prefix', sys.prefix)
+ path = os.pathsep.join(os.path.abspath(p) for p in options.sys_path)
+ write_flag(u'sys.path', path)
+ if options.path is None:
+ path = ''
+ else:
+ path = os.pathsep.join(self.src_archive.get_virtual_path(p) for p in options.path)
+ if options.language_version:
+ write_flag(u'language.version', options.language_version[-1])
+ else:
+ write_flag(u'language.version', get_analysis_version())
+ write_flag(u'extractor.path', path)
+ write_flag(u'sys.platform', sys.platform)
+ write_flag(u'os.sep', os.sep)
+ write_flag(u'os.pathsep', os.pathsep)
+ write_flag(u'extractor.version', VERSION)
+ if options.context_cost is not None:
+ write_flag(u'context.cost', options.context_cost)
+ self.trap_folder.write_trap("flags", "$flags", writer.get_compressed())
+ if get_analysis_major_version() == 2:
+ # Copy the pre-extracted builtins trap
+ builtins_trap_data = pkgutil.get_data('semmle.data', 'interpreter2.trap')
+ self.trap_folder.write_trap("interpreter", '$interpreter2', builtins_trap_data, extension=".trap")
+ else:
+ writer = TrapWriter()
+ self.object_pass.write_special_objects(writer)
+ self.trap_folder.write_trap("interpreter", '$interpreter3', writer.get_compressed())
+ # Copy stdlib trap
+ if get_analysis_major_version() == 2:
+ stdlib_trap_name = '$stdlib_27.trap'
+ else:
+ stdlib_trap_name = '$stdlib_33.trap'
+ stdlib_trap_data = pkgutil.get_data('semmle.data', stdlib_trap_name)
+ self.trap_folder.write_trap("stdlib", stdlib_trap_name[:-5], stdlib_trap_data, extension=".trap")
+
+ @staticmethod
+ def from_options(options, trap_dir, archive, logger: Logger, diagnostics_writer):
+ '''Convenience method to create extractor from options'''
+ try:
+ trap_copy_dir = options.trap_cache
+ caching_extractor = CachingExtractor(trap_copy_dir, options, logger)
+ except Exception as ex:
+ if options.verbose and trap_copy_dir is not None:
+ print ("Failed to create caching extractor: " + str(ex))
+ caching_extractor = None
+ worker = Extractor(trap_dir, archive, options, logger, diagnostics_writer)
+ if caching_extractor:
+ caching_extractor.set_worker(worker)
+ return caching_extractor
+ else:
+ return worker
+
+ def stop(self):
+ pass
+
+ def close(self):
+ 'close() must be called, or some information will be not be written'
+ #Add name tag to file name, so that multiple extractors do not overwrite each other
+ if self.files:
+ trapwriter = TrapWriter()
+ for _, filepath in self.files.items():
+ trapwriter.write_file(filepath)
+ self.trap_folder.write_trap('folders', uuid('python') + '/$files', trapwriter.get_compressed())
+ self.files = set()
+ for name, timer in sorted(timers.items()):
+ self.logger.debug("Total time for pass '%s': %0.0fms", name, timer.elapsed)
+
+
+def hash_combine(x, y):
+ return base64digest(x + u":" + y)
+
+
+class CachingExtractor(object):
+ '''The caching extractor has a two stage initialization process.
+ After creating the extractor (which will check that the cachedir is valid)
+ set_worker(worker) must be called before the CachingExtractor is valid'''
+
+ def __init__(self, cachedir, options, logger: Logger):
+ if cachedir is None:
+ raise IOError("No cache directory")
+ makedirs(cachedir)
+ self.worker = None
+ self.cache = Cache.for_directory(cachedir, options.verbose)
+ self.logger = logger
+ self.split = options.split
+
+ def set_worker(self, worker):
+ self.worker = worker
+
+ def get_cache_key(self, module):
+ key = hash_combine(module.path, module.source)
+ if not self.split:
+ #Use different key, as not splitting will modify the trap file.
+ key = hash_combine(UTRAP_KEY, key)
+ return hash_combine(key, module.source)
+
+ def process_source_module(self, module):
+ '''Process a Python source module. First look up trap file in cache.
+ In no cached trap file is found, then delegate to normal extractor.
+ '''
+ if self.worker is None:
+ raise Exception("worker is not set")
+ key = self.get_cache_key(module)
+ trap = self.cache.get(key)
+ if trap is None:
+ trap = self.worker.process_source_module(module)
+ if trap is not None:
+ self.cache.set(key, trap)
+ else:
+ self.logger.debug(u"Found cached trap file for %s", module.path)
+ self.worker.trap_folder.write_trap("python", module.path, trap)
+ try:
+ self.worker.copy_source(module.bytes_source, module.trap_name, module.path)
+ except Exception:
+ self.logger.traceback(WARN)
+ return trap
+
+ def process_module(self, ast, module_tag, source_code, path, comments):
+ self.worker.process_module(ast, module_tag, source_code, path, comments)
+
+ def close(self):
+ self.worker.close()
+
+ def write_interpreter_data(self, sys_path):
+ self.worker.write_interpreter_data(sys_path)
+
+ def stop(self):
+ self.worker.stop()
diff --git a/python/extractor/semmle/python/finder.py b/python/extractor/semmle/python/finder.py
new file mode 100644
index 00000000000..632ef920d05
--- /dev/null
+++ b/python/extractor/semmle/python/finder.py
@@ -0,0 +1,377 @@
+'''
+Classes and functions for converting module names into paths and Extractables.
+Implements standard Python import semantics, and is designed to be extensible
+to handle additional features like stub and template files.
+'''
+
+import sys
+import imp
+import os.path
+from semmle.util import FileExtractable, FolderExtractable, BuiltinModuleExtractable, PY_EXTENSIONS, get_analysis_major_version
+from semmle.python.modules import PythonSourceModule, is_script
+
+class Module(object):
+ '''A module. Modules are approximations
+ to Python module objects and are used for
+ analyzing imports.'''
+
+ IS_PACKAGE = False
+ path = None
+ respect_init = True
+
+ def __init__(self, name, package):
+ self.name = name
+ self.package = package
+
+ def get_sub_module(self, name):
+ '''gets the (immediate) sub-module with the given name'''
+ raise NotImplementedError()
+
+ def all_sub_modules(self):
+ '''returns an iterable of all the sub-modules of this module'''
+ raise NotImplementedError()
+
+ def get_extractable(self):
+ '''gets the Extractable for this module'''
+ raise NotImplementedError()
+
+ def find(self, name):
+ '''Returns the named sub-module of this module if this module
+ is a package, otherwise returns `None`'''
+ if '.' in name:
+ top, rest = name.split(".", 1)
+ pkg = self.get_sub_module(top)
+ return pkg.find(rest) if pkg else None
+ else:
+ return self.get_sub_module(name)
+
+ def is_package(self):
+ return self.IS_PACKAGE
+
+class PyModule(Module):
+ ' A Python source code module'
+
+ def __init__(self, name, package, path):
+ Module.__init__(self, name, package)
+ assert isinstance(path, str)
+ self.path = path
+
+ def get_sub_module(self, name):
+ return None
+
+ def all_sub_modules(self):
+ return ()
+
+ def get_extractable(self):
+ return FileExtractable(self.path)
+
+ def load(self, logger=None):
+ return PythonSourceModule(self.name, self.path, logger=logger)
+
+ def __str__(self):
+ return "Python module at %s" % self.path
+
+class BuiltinModule(Module):
+ ' A built-in module'
+
+ def __init__(self, name, package):
+ Module.__init__(self, name, package)
+
+ def get_sub_module(self, name):
+ return None
+
+ def all_sub_modules(self):
+ return ()
+
+ def get_extractable(self):
+ return BuiltinModuleExtractable(self.name)
+
+ def __str__(self):
+ return "Builtin module %s" % self.name
+
+class FilePackage(Module):
+ ' A normal package. That is a folder with an __init__.py'
+
+ IS_PACKAGE = True
+
+ def __init__(self, name, package, path, respect_init=True):
+ Module.__init__(self, name, package)
+ assert isinstance(path, str), type(path)
+ self.path = path
+ self.respect_init = respect_init
+
+ def get_sub_module(self, name):
+ modname = self.name + "." + name if self.name else None
+ basepath = os.path.join(self.path, name)
+ return _from_base(modname, basepath, self, self.respect_init)
+
+ def all_sub_modules(self):
+ return _from_folder(self.name, self.path, self, self.respect_init)
+
+ def load(self):
+ return None
+
+ def get_extractable(self):
+ return FolderExtractable(self.path)
+
+ def __str__(self):
+ return "Package at %s" % self.path
+
+class PthPackage(Module):
+ "A built-in package object generated from a '.pth' file"
+
+ IS_PACKAGE = True
+
+ def __init__(self, name, package, search_path):
+ Module.__init__(self, name, package)
+ self.search_path = search_path
+
+ def get_sub_module(self, name):
+ mname = self.name + "." + name
+ for path in self.search_path:
+ mod = _from_base(mname, os.path.join(path, name), self)
+ if mod is not None:
+ return mod
+ return None
+
+ def all_sub_modules(self):
+ for path in self.search_path:
+ for mod in _from_folder(self.name, path, self):
+ yield mod
+
+ def load(self):
+ return None
+
+ def __str__(self):
+ return "Builtin package (.pth) %s %s" % (self.name, self.search_path)
+
+ def get_extractable(self):
+ return None
+
+#Helper functions
+
+def _from_base(name, basepath, pkg, respect_init=True):
+ if os.path.isdir(basepath):
+ if os.path.exists(os.path.join(basepath, "__init__.py")) or not respect_init:
+ return FilePackage(name, pkg, basepath, respect_init)
+ else:
+ return None
+ for ext in PY_EXTENSIONS:
+ filepath = basepath + ext
+ if os.path.isfile(filepath):
+ return PyModule(name, pkg, filepath)
+ return None
+
+def _from_folder(name, path, pkg, respect_init=True):
+ for file in os.listdir(path):
+ fullpath = os.path.join(path, file)
+ if os.path.isdir(fullpath):
+ if os.path.exists(os.path.join(fullpath, "__init__.py")) or not respect_init:
+ yield FilePackage(name + "." + file if name else None, pkg, fullpath, respect_init)
+ base, ext = os.path.splitext(file)
+ if ext not in PY_EXTENSIONS:
+ continue
+ if os.path.isfile(fullpath):
+ yield PyModule(name + "." + base if name else None, pkg, fullpath)
+
+class AbstractFinder(object):
+
+ def find(self, mod_name):
+ '''Find an extractable object given a module name'''
+ if '.' in mod_name:
+ top, rest = mod_name.split(".", 1)
+ pkg = self.find_top(top)
+ return pkg.find(rest) if pkg else None
+ else:
+ return self.find_top(mod_name)
+
+ def find_top(self, name):
+ '''Find module or package object given a simple (dot-less) name'''
+ raise NotImplementedError()
+
+ def name_from_path(self, path, extensions):
+ '''Find module or package object given a path'''
+ raise NotImplementedError()
+
+class PyFinder(AbstractFinder):
+
+ __slots__ = [ 'path', 'respect_init', 'logger' ]
+
+ def __init__(self, path, respect_init, logger):
+ assert isinstance(path, str), path
+ self.path = os.path.abspath(path)
+ self.respect_init = respect_init
+ self.logger = logger
+
+ def find_top(self, mod_name):
+ basepath = os.path.join(self.path, mod_name)
+ return _from_base(mod_name, basepath, None, self.respect_init)
+
+ def name_from_path(self, path, extensions):
+ rel_path = _relative_subpath(path, self.path)
+ if rel_path is None:
+ return None
+ base, ext = os.path.splitext(rel_path)
+ if ext and ext not in extensions:
+ return None
+ return ".".join(base.split(os.path.sep))
+
+def _relative_subpath(subpath, root):
+ 'Returns the relative path if `subpath` is within `root` or `None` otherwise'
+ try:
+ relpath = os.path.relpath(subpath, root)
+ except ValueError:
+ #No relative path possible
+ return None
+ if relpath.startswith(os.pardir):
+ #Not in root:
+ return None
+ return relpath
+
+class BuiltinFinder(AbstractFinder):
+ '''Finder for builtin modules that are already present in the VM
+ or can be guaranteed to load successfully'''
+
+ def __init__(self, logger):
+ self.modules = {}
+ for name, module in sys.modules.items():
+ self.modules[name] = module
+ try:
+ self.dynload_path = os.path.dirname(imp.find_module("_json")[1])
+ except Exception:
+ if os.name != "nt":
+ logger.warning("Failed to find dynload path")
+ self.dynload_path = None
+
+ def builtin_module(self, name):
+ if "." in name:
+ pname, name = name.rsplit(".", 1)
+ return BuiltinModule(name, self.builtin_module(pname))
+ return BuiltinModule(name, None)
+
+ def find(self, mod_name):
+ mod = super(BuiltinFinder, self).find(mod_name)
+ if mod is not None:
+ return mod
+ #Use `imp` module to find module
+ try:
+ _, filepath, mod_t = imp.find_module(mod_name)
+ except ImportError:
+ return None
+ #Accept builtin dynamically loaded modules like _ctypes or _json
+ if filepath and os.path.dirname(filepath) == self.dynload_path:
+ return BuiltinModule(mod_name, None)
+ return None
+
+ def find_top(self, mod_name):
+ if mod_name in self.modules:
+ mod = self.modules[mod_name]
+ if hasattr(mod, "__file__"):
+ return None
+ if hasattr(mod, "__path__"):
+ return PthPackage(mod_name, None, mod.__path__)
+ return BuiltinModule(mod_name, None)
+ if mod_name in sys.builtin_module_names:
+ return BuiltinModule(mod_name, None)
+ return None
+
+ def name_from_path(self, path, extensions):
+ return None
+
+#Stub file handling
+
+class StubFinder(PyFinder):
+
+ def __init__(self, logger):
+ try:
+ tools = os.environ['ODASA_TOOLS']
+ except KeyError:
+ tools = sys.path[1]
+ logger.debug("StubFinder: can't find ODASA_TOOLS, using '%s' instead", tools)
+ path = os.path.join(tools, "data", "python", "stubs")
+ super(StubFinder, self).__init__(path, True, logger)
+
+
+def _finders_for_path(path, respect_init, logger):
+ finders = [ StubFinder(logger) ]
+ for p in path:
+ if p:
+ finders.append(PyFinder(p, respect_init, logger))
+ finders.append(BuiltinFinder(logger))
+ return finders
+
+
+def finders_from_options_and_env(options, logger):
+ '''Return a list of finders from the given command line options'''
+ if options.path:
+ path = options.path + options.sys_path
+ else:
+ path = options.sys_path
+ path = [os.path.abspath(p) for p in path]
+ if options.exclude:
+ exclude = set(options.exclude)
+ trimmed_path = []
+ for p in path:
+ for x in exclude:
+ if p.startswith(x):
+ break
+ else:
+ trimmed_path.append(p)
+ path = trimmed_path
+ logger.debug("Finder path: %s", path)
+ logger.debug("sys path: %s", sys.path)
+ return _finders_for_path(path, options.respect_init, logger)
+
+
+class Finder(object):
+
+ def __init__(self, finders, options, logger):
+ self.finders = finders
+ self.path_map = {}
+ self.logger = logger
+ self.respect_init = options.respect_init
+
+ def find(self, mod_name):
+ for finder in self.finders:
+ mod = finder.find(mod_name)
+ if mod is not None:
+ return mod
+ self.logger.debug("Cannot find module '%s'", mod_name)
+ return None
+
+ @staticmethod
+ def from_options_and_env(options, logger):
+ return Finder(finders_from_options_and_env(options, logger), options, logger)
+
+ def from_extractable(self, unit):
+ if isinstance(unit, FolderExtractable) or isinstance(unit, FileExtractable):
+ return self.from_path(unit.path)
+ return None
+
+ def from_path(self, path, extensions=PY_EXTENSIONS):
+ if path in self.path_map:
+ return self.path_map[path]
+ if not path or path == "/":
+ return None
+ is_python_2 = (get_analysis_major_version() == 2)
+ if os.path.isdir(path) and not os.path.exists(os.path.join(path, "__init__.py")) and (self.respect_init or not is_python_2):
+ return None
+ pkg = self.from_path(os.path.dirname(path))
+ mod = None
+ if os.path.isdir(path):
+ mod = FilePackage(None, pkg, path)
+ if os.path.isfile(path):
+ base, ext = os.path.splitext(path)
+ if ext in extensions:
+ mod = PyModule(None, pkg, path)
+ if is_script(path):
+ mod = PyModule(None, None, path)
+ self.path_map[path] = mod
+ return mod
+
+ def name_from_path(self, path, extensions=PY_EXTENSIONS):
+ for finder in self.finders:
+ name = finder.name_from_path(path, extensions)
+ if name is not None:
+ return name
+ return None
diff --git a/python/extractor/semmle/python/imports.py b/python/extractor/semmle/python/imports.py
new file mode 100644
index 00000000000..851193e89f5
--- /dev/null
+++ b/python/extractor/semmle/python/imports.py
@@ -0,0 +1,256 @@
+import sys
+from semmle.python import ast
+
+from collections import namedtuple
+
+from semmle.util import VERSION, get_analysis_major_version
+from semmle.cache import Cache
+from semmle.logging import INFO
+
+#Maintain distinct version strings for distinct versions of Python
+IMPORTS_KEY = 'import%s_%x%x' % (VERSION, sys.version_info[0], sys.version_info[1])
+
+import pickle
+
+__all__ = [ 'CachingModuleImporter', 'ModuleImporter', 'importer_from_options' ]
+
+ImportStar = namedtuple('ImportStar', 'level module')
+ImportExpr = namedtuple('ImportExpr', 'level module')
+ImportMember = namedtuple('ImportMember', 'level module name')
+
+def safe_string(txt):
+ try:
+ if isinstance(txt, bytes):
+ try:
+ return txt.decode(sys.getfilesystemencoding(), errors="replace")
+ except Exception:
+ return txt.decode("latin-1")
+ else:
+ return str(txt)
+ except Exception:
+ return u"?"
+
+class SemmleImportError(Exception):
+
+ def __init__(self, module_name, *reasons):
+ reason_txt = u"".join(safe_string(reason) for reason in reasons)
+ module_name = safe_string(module_name)
+ if reason_txt:
+ message = u"Import of %s failed: %s.\n" % (module_name, reason_txt)
+ else:
+ message = u"Import of %s failed.\n" % module_name
+ Exception.__init__(self, message)
+
+ def write(self, out=sys.stdout):
+ out.write(self.args[0])
+
+
+class CachingModuleImporter(object):
+
+ def __init__(self, cachedir, finder, logger):
+ self.worker = ModuleImporter(finder, logger)
+ if cachedir is None:
+ raise IOError("No cache directory")
+ self.cache = Cache.for_directory(cachedir, logger)
+ self.logger = logger
+
+ def get_imports(self, module, loaded_module):
+ import_nodes = self.get_import_nodes(loaded_module)
+ return self.worker.parse_imports(module, import_nodes)
+
+ def get_import_nodes(self, loaded_module):
+ key = loaded_module.get_hash_key(IMPORTS_KEY)
+ if key is None:
+ return self.worker.get_import_nodes(loaded_module)
+ imports = self.cache.get(key)
+ #Unpickle the data
+ if imports is not None:
+ try:
+ imports = pickle.loads(imports)
+ except Exception:
+ self.logger.debug("Failed to unpickle imports for %s", loaded_module.path)
+ imports = None
+ if imports is None:
+ imports = self.worker.get_import_nodes(loaded_module)
+ try:
+ data = pickle.dumps(imports)
+ self.cache.set(key, data)
+ except Exception as ex:
+ # Shouldn't really fail, but carry on anyway
+ self.logger.debug("Failed to save pickled imports to cache for %s: %s", loaded_module.path, ex)
+ else:
+ self.logger.debug("Cached imports file found for %s", loaded_module.path)
+ return imports
+
+class ModuleImporter(object):
+ 'Discovers and records which modules import which other modules'
+
+ def __init__(self, finder, logger):
+
+ self.finder = finder
+ self.logger = logger
+ self.failures = {}
+
+ def get_imports(self, module, loaded_module):
+ import_nodes = self.get_import_nodes(loaded_module)
+ return self.parse_imports(module, import_nodes)
+
+ def get_import_nodes(self, loaded_module):
+ 'Return list of AST nodes representing imports'
+ try:
+ return imports_from_ast(loaded_module.py_ast)
+ except Exception as ex:
+ if isinstance(ex, SyntaxError):
+ # Example: `Syntax Error (line 123) in /home/.../file.py`
+ self.logger.warning("%s in %s", ex, loaded_module.path)
+ # no need to show traceback, it's not an internal bug
+ else:
+ self.logger.warning("Failed to analyse imports of %s : %s", loaded_module.path, ex)
+ self.logger.traceback(INFO)
+ return []
+
+ def _relative_import(self, module, level, mod_name, report_failure = True):
+ for i in range(level):
+ parent = module.package
+ if parent is None:
+ relative_name = level * u'.' + mod_name
+ if relative_name not in self.failures:
+ if report_failure:
+ self.logger.warning("Failed to find %s, no parent package of %s", relative_name, module)
+ self.failures[relative_name] = str(module)
+ return None
+ module = parent
+ res = module
+ if mod_name:
+ res = res.get_sub_module(mod_name)
+ if res is None and report_failure:
+ relative_name = level * '.' + mod_name
+ if relative_name not in self.failures:
+ self.logger.warning("Failed to find %s, %s has no module %s", relative_name, module, mod_name)
+ self.failures[relative_name] = str(module)
+ return res
+
+ def _absolute_import(self, module, mod_name):
+ try:
+ mod = self.finder.find(mod_name)
+ except SemmleImportError as ex:
+ if mod_name not in self.failures:
+ self.logger.warning("%s", ex)
+ self.failures[mod_name] = str(module)
+ return None
+ return mod
+
+ def parse_imports(self, module, import_nodes):
+ imports = set()
+ #If an imported module is a package, then yield its __init__ module as well
+ for imported in self._parse_imports_no_init(module, import_nodes):
+ if imported not in imports:
+ imports.add(imported)
+ assert imported is not None
+ yield imported
+ if not imported.is_package():
+ continue
+ init = imported.get_sub_module(u"__init__")
+ if init is not None and init not in imports:
+ yield init
+
+ def _parse_imports_no_init(self, module, import_nodes):
+ assert not module.is_package()
+ for node in import_nodes:
+ if node.module is None:
+ top = ''
+ parts = []
+ else:
+ parts = node.module.split('.')
+ top, parts = parts[0], parts[1:]
+ if node.level <= 0:
+ if get_analysis_major_version() < 3:
+ #Attempt relative import with level 1
+ imported = self._relative_import(module, 1, top, False)
+ if imported is None:
+ imported = self._absolute_import(module, top)
+ else:
+ imported = self._absolute_import(module, top)
+ else:
+ imported = self._relative_import(module, node.level, top)
+ if imported is None:
+ self.logger.debug("Unable to resolve import: %s", top)
+ continue
+ yield imported
+ for p in parts:
+ inner = imported.get_sub_module(p)
+ if inner is None:
+ self.logger.debug("Unable to resolve import: %s", p)
+ break
+ imported = inner
+ yield imported
+ if isinstance(node, ImportStar):
+ self.logger.debug("Importing all sub modules of %s", imported)
+ #If import module is a package then yield all sub_modules.
+ for mod in imported.all_sub_modules():
+ yield mod
+ elif isinstance(node, ImportMember):
+ mod = imported.get_sub_module(node.name)
+ if mod is not None:
+ self.logger.debug("Unable to resolve import: %s", node.name)
+ yield mod
+
+def imports_from_ast(the_ast):
+ def walk(node, in_function, in_name_main):
+ if isinstance(node, ast.Module):
+ for import_node in walk(node.body, in_function, in_name_main):
+ yield import_node
+ elif isinstance(node, ast.ImportFrom):
+ yield ImportStar(node.module.level, node.module.name)
+ elif isinstance(node, ast.Import):
+ for alias in node.names:
+ imp = alias.value
+ if isinstance(imp, ast.ImportExpr):
+ yield ImportExpr(imp.level, imp.name)
+ else:
+ assert isinstance(imp, ast.ImportMember)
+ yield ImportMember(imp.module.level, imp.module.name, imp.name)
+ elif isinstance(node, ast.FunctionExpr):
+ for _, child in ast.iter_fields(node.inner_scope):
+ for import_node in walk(child, True, in_name_main):
+ yield import_node
+ elif isinstance(node, ast.Call):
+ # Might be a decorator
+ for import_node in walk(node.positional_args, in_function, in_name_main):
+ yield import_node
+ elif isinstance(node, list):
+ for n in node:
+ for import_node in walk(n, in_function, in_name_main):
+ yield import_node
+ elif isinstance(node, ast.stmt):
+ name_eq_main = is_name_eq_main(node)
+ for _, child in ast.iter_fields(node):
+ for import_node in walk(child, in_function, name_eq_main or in_name_main):
+ yield import_node
+ return list(walk(the_ast, False, False))
+
+def name_from_expr(expr):
+ if isinstance(expr, ast.Name):
+ return expr.id
+ if isinstance(expr, ast.Attribute):
+ return name_from_expr(expr.value) + "." + expr.attr
+ raise ValueError("%s is not a name" % expr)
+
+def is_name_eq_main(node):
+ if not isinstance(node, ast.If):
+ return False
+ try:
+ lhs = node.test.left
+ rhs = node.test.comparators[0]
+ return rhs.s == "__main__" and lhs.id == "__name__"
+ except Exception:
+ return False
+
+def importer_from_options(options, finder, logger):
+ try:
+ importer = CachingModuleImporter(options.trap_cache, finder, logger)
+ except Exception as ex:
+ if options.trap_cache is not None:
+ logger.warn("Failed to create caching importer: %s", ex)
+ importer = ModuleImporter(finder, logger)
+ return importer
diff --git a/python/extractor/semmle/python/master.py b/python/extractor/semmle/python/master.py
new file mode 100755
index 00000000000..200340061fc
--- /dev/null
+++ b/python/extractor/semmle/python/master.py
@@ -0,0 +1,504 @@
+#Much of the information in this file is hardcoded into parser.
+#Modify with care and test well.
+#It should be relatively safe to add fields.
+
+
+from semmle.python.AstMeta import Node, PrimitiveNode, ClassNode, UnionNode, ListNode
+from semmle.python.AstMeta import build_node_relations as _build_node_relations
+
+string = PrimitiveNode('str', 'string', 'varchar(1)', 'string')
+bytes_ = PrimitiveNode('bytes', 'string', 'varchar(1)')
+
+location = PrimitiveNode('location', '@location', 'unique int')
+variable = PrimitiveNode('variable', '@py_variable', 'int')
+
+int_ = PrimitiveNode('int', 'int', 'int')
+bool_ = PrimitiveNode('bool', 'boolean', 'boolean')
+number = PrimitiveNode('number', 'string', 'varchar(1)')
+
+Module = ClassNode('Module')
+Class = ClassNode('Class')
+Function = ClassNode('Function')
+
+alias = ClassNode('alias')
+arguments = ClassNode('arguments', None, 'parameters definition')
+boolop = ClassNode('boolop', None, 'boolean operator')
+cmpop = ClassNode('cmpop', None, 'comparison operator')
+comprehension = ClassNode('comprehension')
+comprehension.field('location', location)
+expr = ClassNode('expr', None, 'expression')
+expr.field('location', location)
+expr.field('parenthesised', bool_, 'parenthesised')
+expr_context = ClassNode('expr_context', None, 'expression context')
+operator = ClassNode('operator')
+stmt = ClassNode('stmt', None, 'statement')
+stmt.field('location', location)
+unaryop = ClassNode('unaryop', None, 'unary operation')
+pattern = ClassNode('pattern')
+pattern.field('location', location)
+pattern.field('parenthesised', bool_, 'parenthesised')
+Add = ClassNode('Add', operator, '+')
+And = ClassNode('And', boolop, 'and')
+Assert = ClassNode('Assert', stmt)
+Assign = ClassNode('Assign', stmt, 'assignment')
+Attribute = ClassNode('Attribute', expr)
+AugAssign = ClassNode('AugAssign', stmt, 'augmented assignment statement')
+AugLoad = ClassNode('AugLoad', expr_context, 'augmented-load')
+AugStore = ClassNode('AugStore', expr_context, 'augmented-store')
+BinOp = ClassNode('BinOp', expr, 'binary')
+#Choose a name more consistent with other Exprs.
+BinOp.set_name("BinaryExpr")
+BitAnd = ClassNode('BitAnd', operator, '&')
+BitOr = ClassNode('BitOr', operator, '|')
+BitXor = ClassNode('BitXor', operator, '^')
+BoolOp = ClassNode('BoolOp', expr, 'boolean')
+#Avoid name clash with boolop
+BoolOp.set_name('BoolExpr')
+Break = ClassNode('Break', stmt)
+Bytes = ClassNode('Bytes', expr)
+Call = ClassNode('Call', expr)
+ClassExpr = ClassNode('ClassExpr', expr, 'class definition')
+Compare = ClassNode('Compare', expr)
+Continue = ClassNode('Continue', stmt)
+Del = ClassNode('Del', expr_context, 'deletion')
+Delete = ClassNode('Delete', stmt)
+Dict = ClassNode('Dict', expr, 'dictionary')
+DictComp = ClassNode('DictComp', expr, 'dictionary comprehension')
+Div = ClassNode('Div', operator, '/')
+Ellipsis = ClassNode('Ellipsis', expr)
+Eq = ClassNode('Eq', cmpop, '==')
+ExceptStmt = ClassNode('ExceptStmt', stmt, 'except block')
+ExceptGroupStmt = ClassNode('ExceptGroupStmt', stmt, 'except group block')
+Exec = ClassNode('Exec', stmt)
+Expr_stmt = ClassNode('Expr', stmt)
+Expr_stmt.set_name('Expr_stmt')
+FloorDiv = ClassNode('FloorDiv', operator, '//')
+For = ClassNode('For', stmt)
+FunctionExpr = ClassNode('FunctionExpr', expr, 'function definition')
+GeneratorExp = ClassNode('GeneratorExp', expr, 'generator')
+Global = ClassNode('Global', stmt)
+Gt = ClassNode('Gt', cmpop, '>')
+GtE = ClassNode('GtE', cmpop, '>=')
+If = ClassNode('If', stmt)
+IfExp = ClassNode('IfExp', expr, 'if')
+Import = ClassNode('Import', stmt)
+ImportExpr = ClassNode('ImportExpr', expr, 'import')
+ImportMember = ClassNode('ImportMember', expr, 'from import')
+ImportFrom = ClassNode('ImportFrom', stmt, 'import * statement')
+In = ClassNode('In', cmpop)
+Invert = ClassNode('Invert', unaryop, '~')
+Is = ClassNode('Is', cmpop)
+IsNot = ClassNode('IsNot', cmpop, 'is not')
+LShift = ClassNode('LShift', operator, '<<')
+Lambda = ClassNode('Lambda', expr)
+List = ClassNode('List', expr)
+ListComp = ClassNode('ListComp', expr, 'list comprehension')
+Load = ClassNode('Load', expr_context)
+Lt = ClassNode('Lt', cmpop, '<')
+LtE = ClassNode('LtE', cmpop, '<=')
+Match = ClassNode('Match', stmt)
+#Avoid name clash with regex match
+Match.set_name('MatchStmt')
+Case = ClassNode('Case', stmt)
+Guard = ClassNode('Guard', expr)
+MatchAsPattern = ClassNode('MatchAsPattern', pattern)
+MatchOrPattern = ClassNode('MatchOrPattern', pattern)
+MatchLiteralPattern = ClassNode('MatchLiteralPattern', pattern)
+MatchCapturePattern = ClassNode('MatchCapturePattern', pattern)
+MatchWildcardPattern = ClassNode('MatchWildcardPattern', pattern)
+MatchValuePattern = ClassNode('MatchValuePattern', pattern)
+MatchSequencePattern = ClassNode('MatchSequencePattern', pattern)
+MatchStarPattern = ClassNode('MatchStarPattern', pattern)
+MatchMappingPattern = ClassNode('MatchMappingPattern', pattern)
+MatchDoubleStarPattern = ClassNode('MatchDoubleStarPattern', pattern)
+MatchKeyValuePattern = ClassNode('MatchKeyValuePattern', pattern)
+MatchClassPattern = ClassNode('MatchClassPattern', pattern)
+MatchKeywordPattern = ClassNode('MatchKeywordPattern', pattern)
+Mod = ClassNode('Mod', operator, '%')
+Mult = ClassNode('Mult', operator, '*')
+Name = ClassNode('Name', expr)
+Nonlocal = ClassNode('Nonlocal', stmt)
+Not = ClassNode('Not', unaryop)
+NotEq = ClassNode('NotEq', cmpop, '!=')
+NotIn = ClassNode('NotIn', cmpop, 'not in')
+Num = ClassNode('Num', expr, 'numeric literal')
+Or = ClassNode('Or', boolop)
+Param = ClassNode('Param', expr_context, 'parameter')
+Pass = ClassNode('Pass', stmt)
+Pow = ClassNode('Pow', operator, '**')
+Print = ClassNode('Print', stmt)
+RShift = ClassNode('RShift', operator, '>>')
+Raise = ClassNode('Raise', stmt)
+Repr = ClassNode('Repr', expr, 'backtick')
+Return = ClassNode('Return', stmt)
+Set = ClassNode('Set', expr)
+SetComp = ClassNode('SetComp', expr, 'set comprehension')
+#Add $ to name to prevent doc-gen adding sub type name
+Slice = ClassNode('Slice', expr, '$slice')
+Starred = ClassNode('Starred', expr)
+Store = ClassNode('Store', expr_context)
+Str = ClassNode('Str', expr, 'string literal')
+Sub = ClassNode('Sub', operator, '-')
+Subscript = ClassNode('Subscript', expr)
+Try = ClassNode('Try', stmt)
+Tuple = ClassNode('Tuple', expr)
+UAdd = ClassNode('UAdd', unaryop, '+')
+USub = ClassNode('USub', unaryop, '-')
+UnaryOp = ClassNode('UnaryOp', expr, 'unary')
+#Avoid name clash with 'unaryop'
+UnaryOp.set_name('UnaryExpr')
+While = ClassNode('While', stmt)
+With = ClassNode('With', stmt)
+Yield = ClassNode('Yield', expr)
+YieldFrom = ClassNode('YieldFrom', expr, 'yield-from')
+alias_list = ListNode(alias)
+cmpop_list = ListNode(cmpop)
+comprehension_list = ListNode(comprehension)
+expr_list = ListNode(expr)
+stmt_list = ListNode(stmt)
+string_list = ListNode(string)
+StringPart = ClassNode('StringPart', None, "implicitly concatenated part")
+string_parts_list = ListNode(StringPart)
+pattern_list = ListNode(pattern)
+
+#Template AST Nodes
+TemplateWrite = ClassNode('TemplateWrite', stmt, "template write statement")
+TemplateDottedNotation = ClassNode('TemplateDottedNotation', expr, "template dotted notation expression")
+Filter = ClassNode("Filter", expr, "template filter expression")
+PlaceHolder = ClassNode('PlaceHolder', expr, "template place-holder expression")
+
+Await = ClassNode('Await', expr)
+MatMult = ClassNode('MatMult', operator, '@')
+
+scope = UnionNode(Module, Class, Function)
+scope.set_name('scope')
+
+dict_item = ClassNode('dict_item')
+
+#DoubleStar in calls fn(**{'a': 1, 'c': 3}, **{'b': 2, 'd': 4}) or dict displays {'a': 1, **{'b': 2, 'd': 4}}
+DictUnpacking = ClassNode('DictUnpacking', dict_item, descriptive_name='dictionary unpacking')
+KeyValuePair = ClassNode('KeyValuePair', dict_item, descriptive_name='key-value pair')
+keyword = ClassNode('keyword', dict_item, descriptive_name='keyword argument')
+
+#Initial name must match that in ast module.
+FormattedStringLiteral = ClassNode("JoinedStr", expr, descriptive_name='formatted string literal')
+FormattedStringLiteral.set_name("Fstring")
+
+FormattedValue = ClassNode("FormattedValue", expr, descriptive_name='formatted value')
+
+AnnAssign = ClassNode("AnnAssign", stmt, descriptive_name='annotated assignment')
+
+AssignExpr = ClassNode('AssignExpr', expr, "assignment expression")
+
+SpecialOperation = ClassNode('SpecialOperation', expr, "special operation")
+
+type_parameter = ClassNode('type_parameter', descriptive_name='type parameter')
+type_parameter.field('location', location)
+type_parameter_list = ListNode(type_parameter)
+
+TypeAlias = ClassNode('TypeAlias', stmt, 'type alias')
+ParamSpec = ClassNode('ParamSpec', type_parameter, 'parameter spec')
+TypeVar = ClassNode('TypeVar', type_parameter, 'type variable')
+TypeVarTuple = ClassNode('TypeVarTuple', type_parameter, 'type variable tuple')
+
+
+expr_or_stmt = UnionNode(expr, stmt)
+
+dict_item_list = ListNode(dict_item)
+
+ast_node = UnionNode(expr, stmt, pattern, Module, Class, Function, comprehension, StringPart, dict_item, type_parameter)
+ast_node.set_name('ast_node')
+
+parameter = UnionNode(Name, Tuple)
+parameter.set_name('parameter')
+
+parameter_list = ListNode(parameter)
+
+alias.field('value', expr)
+alias.field('asname', expr, 'name')
+
+arguments.field('kw_defaults', expr_list, 'keyword-only default values')
+arguments.field('defaults', expr_list, 'default values')
+arguments.field('annotations', expr_list)
+arguments.field('varargannotation', expr, '*arg annotation')
+arguments.field('kwargannotation', expr, '**kwarg annotation')
+arguments.field('kw_annotations', expr_list, 'keyword-only annotations')
+
+Assert.field('test', expr, 'value being tested')
+Assert.field('msg', expr, 'failure message')
+
+Assign.field('value', expr)
+Assign.field('targets', expr_list, 'targets')
+
+Attribute.field('value', expr, 'object')
+Attribute.field('attr', string, 'attribute name')
+Attribute.field('ctx', expr_context, 'context')
+
+AugAssign.field('operation', BinOp)
+
+BinOp.field('left', expr, 'left sub-expression')
+BinOp.field('op', operator, 'operator')
+BinOp.field('right', expr, 'right sub-expression')
+
+BoolOp.field('op', boolop, 'operator')
+BoolOp.field('values', expr_list, 'sub-expressions')
+
+Bytes.field('s', bytes_, 'value')
+Bytes.field('prefix', bytes_, 'prefix')
+Bytes.field('implicitly_concatenated_parts', string_parts_list)
+
+Call.field('func', expr, 'callable')
+Call.field('positional_args', expr_list, 'positional arguments')
+Call.field('named_args', dict_item_list, 'named arguments')
+
+Class.field('name', string)
+Class.field('body', stmt_list)
+
+ClassExpr.field('name', string)
+ClassExpr.field('bases', expr_list)
+ClassExpr.field('keywords', dict_item_list, 'keyword arguments')
+ClassExpr.field('inner_scope', Class, 'class scope')
+ClassExpr.field('type_parameters', type_parameter_list, 'type parameters')
+
+Compare.field('left', expr, 'left sub-expression')
+Compare.field('ops', cmpop_list, 'comparison operators')
+Compare.field('comparators', expr_list, 'right sub-expressions')
+
+comprehension.field('iter', expr, 'iterable')
+comprehension.field('target', expr)
+comprehension.field('ifs', expr_list, 'conditions')
+
+Delete.field('targets', expr_list)
+
+Dict.field('items', dict_item_list)
+
+DictUnpacking.field('location', location)
+DictUnpacking.field('value', expr)
+
+DictComp.field('function', Function, 'implementation')
+DictComp.field('iterable', expr)
+
+ExceptStmt.field('type', expr)
+ExceptStmt.field('name', expr)
+ExceptStmt.field('body', stmt_list)
+
+ExceptGroupStmt.field('type', expr)
+ExceptGroupStmt.field('name', expr)
+ExceptGroupStmt.field('body', stmt_list)
+
+Exec.field('body', expr)
+Exec.field('globals', expr)
+Exec.field('locals', expr)
+
+Expr_stmt.field('value', expr)
+
+For.field('target', expr)
+For.field('iter', expr, 'iterable')
+For.field('body', stmt_list)
+For.field('orelse', stmt_list, 'else block')
+For.field('is_async', bool_, 'async')
+
+Function.field('name', string)
+Function.field('args', parameter_list, 'positional parameter list')
+Function.field('vararg', expr, 'tuple (*) parameter')
+Function.field('kwonlyargs', expr_list, 'keyword-only parameter list')
+Function.field('kwarg', expr, 'dictionary (**) parameter')
+Function.field('body', stmt_list)
+Function.field('is_async', bool_, 'async')
+Function.field('type_parameters', type_parameter_list, 'type parameters')
+
+FunctionExpr.field('name', string)
+FunctionExpr.field('args', arguments, 'parameters')
+FunctionExpr.field('returns', expr, 'return annotation')
+FunctionExpr.field('inner_scope', Function, 'function scope')
+
+GeneratorExp.field('function', Function, 'implementation')
+GeneratorExp.field('iterable', expr)
+
+Global.field('names', string_list)
+
+If.field('test', expr)
+If.field('body', stmt_list, 'if-true block')
+If.field('orelse', stmt_list, 'if-false block')
+
+IfExp.field('test', expr)
+IfExp.field('body', expr, 'if-true expression')
+IfExp.field('orelse', expr, 'if-false expression')
+
+Import.field('names', alias_list, 'alias list')
+
+ImportFrom.set_name('ImportStar')
+ImportFrom.field('module', expr)
+
+ImportMember.field('module', expr)
+ImportMember.field('name', string)
+
+keyword.field('location', location)
+keyword.field('value', expr)
+keyword.field('arg', string)
+
+KeyValuePair.field('location', location)
+KeyValuePair.field('value', expr)
+KeyValuePair.field('key', expr)
+
+Lambda.field('args', arguments, 'arguments')
+Lambda.field('inner_scope', Function, 'function scope')
+
+List.field('elts', expr_list, 'element list')
+List.field('ctx', expr_context, 'context')
+
+#For Python 3 a new scope is created and these fields are populated:
+ListComp.field('function', Function, 'implementation')
+ListComp.field('iterable', expr)
+#For Python 2 no new scope is created and these are populated:
+ListComp.field('generators', comprehension_list)
+ListComp.field('elt', expr, 'elements')
+
+Match.field('subject', expr)
+Match.field('cases', stmt_list)
+Case.field('pattern', pattern)
+Case.field('guard', expr)
+Case.field('body', stmt_list)
+Guard.field('test', expr)
+MatchStarPattern.field('target', pattern)
+MatchDoubleStarPattern.field('target', pattern)
+MatchKeyValuePattern.field('key', pattern)
+MatchKeyValuePattern.field('value', pattern)
+MatchClassPattern.field('class', expr)
+MatchKeywordPattern.field('attribute', expr)
+MatchKeywordPattern.field('value', pattern)
+MatchAsPattern.field('pattern', pattern)
+MatchAsPattern.field('alias', expr)
+MatchOrPattern.field('patterns', pattern_list)
+MatchLiteralPattern.field('literal', expr)
+MatchCapturePattern.field('variable', expr)
+MatchValuePattern.field('value', expr)
+MatchSequencePattern.field('patterns', pattern_list)
+MatchMappingPattern.field('mappings', pattern_list)
+MatchClassPattern.field('class_name', expr)
+MatchClassPattern.field('positional', pattern_list)
+MatchClassPattern.field('keyword', pattern_list)
+
+Module.field('name', string)
+Module.field('hash', string , 'hash (not populated)')
+Module.field('body', stmt_list)
+Module.field('kind', string)
+
+ImportExpr.field('level', int_)
+ImportExpr.field('name', string)
+ImportExpr.field('top', bool_, 'top level')
+
+Name.field('variable', variable)
+Name.field('ctx', expr_context, 'context')
+
+Nonlocal.field('names', string_list)
+
+Num.field('n', number, 'value')
+Num.field('text', number)
+
+ParamSpec.field('name', expr)
+
+Print.field('dest', expr, 'destination')
+Print.field('values', expr_list)
+Print.field('nl', bool_, 'new line')
+
+#Python3 has exc & cause
+Raise.field('exc', expr, 'exception')
+Raise.field('cause', expr)
+#Python2 has type, inst, tback
+Raise.field('type', expr)
+Raise.field('inst', expr, 'instance')
+Raise.field('tback', expr, 'traceback')
+
+Repr.field('value', expr)
+
+Return.field('value', expr)
+
+Set.field('elts', expr_list, 'elements')
+
+SetComp.field('function', Function, 'implementation')
+SetComp.field('iterable', expr)
+
+Slice.field('start', expr)
+Slice.field('stop', expr)
+Slice.field('step', expr)
+
+Starred.field('value', expr)
+Starred.field('ctx', expr_context, 'context')
+
+Str.field('s', string, 'text')
+Str.field('prefix', string, 'prefix')
+Str.field('implicitly_concatenated_parts', string_parts_list)
+
+Subscript.field('value', expr)
+Subscript.field('index', expr)
+Subscript.field('ctx', expr_context, 'context')
+
+Try.field('body', stmt_list)
+Try.field('orelse', stmt_list, 'else block')
+Try.field('handlers', stmt_list, 'exception handlers')
+Try.field('finalbody', stmt_list, 'finally block')
+
+Tuple.field('elts', expr_list, 'elements')
+Tuple.field('ctx', expr_context, 'context')
+
+TypeAlias.field('name', expr)
+TypeAlias.field('type_parameters', type_parameter_list)
+TypeAlias.field('value', expr)
+
+TypeVar.field('name', expr)
+TypeVar.field('bound', expr)
+
+TypeVarTuple.field('name', expr)
+
+UnaryOp.field('op', unaryop, 'operator')
+UnaryOp.field('operand', expr)
+
+While.field('test', expr)
+While.field('body', stmt_list)
+While.field('orelse', stmt_list, 'else block')
+
+With.field('context_expr', expr, 'context manager')
+With.field('optional_vars', expr, 'optional variable')
+With.field('body', stmt_list)
+With.field('is_async', bool_, 'async')
+
+Yield.field('value', expr)
+
+YieldFrom.field('value', expr)
+
+#Template AST Nodes
+TemplateWrite.field('value', expr)
+TemplateDottedNotation.field('value', expr, 'object')
+TemplateDottedNotation.field('attr', string, 'attribute name')
+TemplateDottedNotation.field('ctx', expr_context, 'context')
+Filter.field('value', expr, 'filtered value')
+Filter.field('filter', expr, 'filter')
+
+PlaceHolder.field('variable', variable)
+PlaceHolder.field('ctx', expr_context, 'context')
+
+StringPart.field('text', string)
+StringPart.field('location', location)
+
+Await.field('value', expr, 'expression waited upon')
+
+FormattedStringLiteral.field('values', expr_list)
+
+FormattedValue.field('value', expr, "expression to be formatted")
+FormattedValue.field('conversion', string, 'type conversion')
+FormattedValue.field('format_spec', FormattedStringLiteral, 'format specifier')
+
+AnnAssign.field('value', expr)
+AnnAssign.field('annotation', expr)
+AnnAssign.field('target', expr)
+
+SpecialOperation.field('name', string)
+SpecialOperation.field('arguments', expr_list)
+
+AssignExpr.field('value', expr)
+AssignExpr.field('target', expr)
+
+def all_nodes():
+ nodes = [ val for val in globals().values() if isinstance(val, Node) ]
+ return _build_node_relations(nodes)
diff --git a/python/extractor/semmle/python/modules.py b/python/extractor/semmle/python/modules.py
new file mode 100644
index 00000000000..8934d810eb8
--- /dev/null
+++ b/python/extractor/semmle/python/modules.py
@@ -0,0 +1,214 @@
+'''MODULE_TYPES: mapping from type-code returned by
+imp.find_module to Module subclass'''
+
+import semmle.python.parser.tokenizer
+import semmle.python.parser.tsg_parser
+import re
+import os
+from blib2to3.pgen2 import tokenize
+import codecs
+
+from semmle.python.passes.labeller import Labeller
+from semmle.util import base64digest
+from semmle.profiling import timers
+
+__all__ = [ 'PythonSourceModule' ]
+
+class PythonSourceModule(object):
+
+ kind = None
+
+ def __init__(self, name, path, logger, bytes_source = None):
+ assert isinstance(path, str), path
+ self.name = name # May be None
+ self.path = path
+ if bytes_source is None:
+ with timers["load"]:
+ with open(self.path, 'rb') as src:
+ bytes_source = src.read()
+ if BIN_PYTHON.match(bytes_source):
+ self.kind = "Script"
+ self._ast = None
+ self._py_ast = None
+ self._lines = None
+ self._line_types = None
+ self._comments = None
+ self._tokens = None
+ self.logger = logger
+ with timers["decode"]:
+ self.encoding, self.bytes_source = semmle.python.parser.tokenizer.encoding_from_source(bytes_source)
+ if self.encoding != 'utf-8':
+ logger.debug("File '%s' has encoding %s.", path, self.encoding)
+ try:
+ self._source = self.bytes_source.decode(self.encoding)
+ self._illegal_encoding = False
+ except Exception as ex:
+ self.logger.warning("%s has encoding '%s'", path, self.encoding)
+ #Set source to a latin-1 decoding of source string (which cannot fail).
+ #Attempting to get the AST will raise a syntax error as expected.
+ self._source = self.bytes_source.decode("latin-1")
+ self._illegal_encoding = str(ex)
+ self._source = normalize_line_endings(self._source)
+ #Strip BOM
+ if self._source.startswith(u'\ufeff'):
+ self._source = self._source[1:]
+ self._secure_hash = base64digest(self._source)
+ assert isinstance(self._source, str)
+
+ @property
+ def source(self):
+ return self._source
+
+ @property
+ def lines(self):
+ if self._lines is None:
+ def genline():
+ src = self._source
+ #Handle non-linux line endings
+ src = src.replace("\r\n", "\n").replace("\r", "\n")
+ length = len(src)
+ start = 0
+ while True:
+ end = src.find(u'\n', start)
+ if end < 0:
+ if start < length:
+ yield src[start:]
+ return
+ yield src[start:end+1]
+ start = end+1
+ self._lines = list(genline())
+ return self._lines
+
+ @property
+ def tokens(self):
+ if self._tokens is None:
+ with timers["tokenize"]:
+ tokenizer = semmle.python.parser.tokenizer.Tokenizer(self._source)
+ self._tokens = list(tokenizer.tokens())
+ return self._tokens
+
+ @property
+ def ast(self):
+ # The ast will be modified by the labeller, so we cannot share it with the py_ast property.
+ # However, we expect py_ast to be accessed and used before ast, so we avoid reparsing in that case.
+ if self._ast is None:
+ if self._illegal_encoding:
+ message = self._illegal_encoding
+ error = SyntaxError(message)
+ error.filename = self.path
+ error.lineno, error.offset = offending_byte_position(message, self.bytes_source)
+ raise error
+ self._ast = self.py_ast
+ self._ast.trap_name = self.trap_name
+ self._py_ast = None
+ with timers["label"]:
+ Labeller().apply(self)
+ return self._ast
+
+ @property
+ def old_py_ast(self):
+ # The py_ast is the raw ast from the Python parser.
+ if self._py_ast is None:
+ self._py_ast = semmle.python.parser.parse(self.tokens, self.logger)
+ return self._py_ast
+
+ @property
+ def py_ast(self):
+ try:
+ # First, try to parse the source with the old Python parser.
+ return self.old_py_ast
+ except Exception as ex:
+ # If that fails, try to parse the source with the new Python parser (unless it has been
+ # explicitly disabled).
+ #
+ # Like PYTHONUNBUFFERED for Python, we treat any non-empty string as meaning the
+ # flag is enabled.
+ # https://docs.python.org/3/using/cmdline.html#envvar-PYTHONUNBUFFERED
+ if os.environ.get("CODEQL_PYTHON_DISABLE_TSG_PARSER"):
+ if isinstance(ex, SyntaxError):
+ raise ex
+ else:
+ raise SyntaxError("Exception %s while parsing %s" % (ex, self.path))
+ else:
+ try:
+ self._py_ast = semmle.python.parser.tsg_parser.parse(self.path, self.logger)
+ return self._py_ast
+ except SyntaxError as ex:
+ raise ex
+ except Exception as ex:
+ raise SyntaxError("Exception %s in tsg-python while parsing %s" % (ex, self.path))
+
+
+ @property
+ def trap_name(self):
+ return type(self).__name__ + ':' + self.path + ":" + self._secure_hash
+
+ def get_hash_key(self, token):
+ return base64digest(self.path + u":" + self._secure_hash + token)
+
+ def get_encoding(self):
+ 'Returns encoding of source'
+ return self.encoding
+
+ @property
+ def comments(self):
+ ''' Returns an iterable of comments in the form:
+ test, start, end where start and end are line. column
+ pairs'''
+ if self._comments is None:
+ self._lexical()
+ return self._comments
+
+ def close(self):
+ self.bytes_source = None
+ self._source = None
+ self._ast = None
+ self._line_types = None
+ self._comments = None
+ self._lines = None
+
+ def _lexical(self):
+ self._comments = []
+ for kind, text, start, end in self.tokens:
+ if kind == tokenize.COMMENT:
+ self._comments.append((text, start, end))
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.close()
+
+
+NEWLINE = b'\n'
+OFFENDING_BYTE_RE = re.compile(r"decode byte \w+ in position (\d+):")
+
+def offending_byte_position(message, string):
+ m = OFFENDING_BYTE_RE.search(message)
+ if m is None:
+ return (0,0)
+ badposition = int(m.group(1))
+ prefix = string[:badposition]
+ line = prefix.count(NEWLINE) + 1
+ column = badposition - prefix.rfind(NEWLINE) - 1
+ return (line, column)
+
+
+BIN_PYTHON = re.compile(b'#! *(/usr|/bin|/local)*/?(env)? *python')
+
+def is_script(path):
+ '''Is the file at `path` a script? (does it start with #!... python)'''
+ try:
+ with open(path, "rb") as contents:
+ start = contents.read(100)
+ return bool(BIN_PYTHON.match(start))
+ except Exception:
+ return False
+
+def normalize_line_endings(src):
+ #Our tokenizer expects single character `\n`, `\r` or `\f` as line endings.
+ src = src.replace(u'\r\n', u'\n')
+ #Our parser expects that there are no unterminated lines.
+ if src and src[-1] != u'\n':
+ return src + u'\n'
+ return src
diff --git a/python/extractor/semmle/python/parser/__init__.py b/python/extractor/semmle/python/parser/__init__.py
new file mode 100644
index 00000000000..d5eb021f8d3
--- /dev/null
+++ b/python/extractor/semmle/python/parser/__init__.py
@@ -0,0 +1,153 @@
+
+# Black's version of lib2to3 (modified)
+from blib2to3.pytree import type_repr
+from blib2to3 import pygram
+from blib2to3.pgen2 import driver, token
+from blib2to3.pgen2.parse import ParseError, Parser
+from . import ast
+from blib2to3.pgen2 import tokenize, grammar
+from blib2to3.pgen2.token import tok_name
+from semmle.profiling import timers
+
+pygram.initialize()
+syms = pygram.python_symbols
+
+
+GRAMMARS = [
+ ("Python 3", pygram.python3_grammar),
+ ("Python 3 without async", pygram.python3_grammar_no_async),
+ ("Python 2 with print as function", pygram.python2_grammar_no_print_statement),
+ ("Python 2", pygram.python2_grammar),
+]
+
+
+SKIP_IF_SINGLE_CHILD_NAMES = {
+ 'atom',
+ 'power',
+ 'test',
+ 'not_test',
+ 'and_test',
+ 'or_test',
+ 'suite',
+ 'testlist',
+ 'expr',
+ 'xor_expr',
+ 'and_expr',
+ 'shift_expr',
+ 'arith_expr',
+ 'term',
+ 'factor',
+ 'testlist_gexp',
+ 'exprlist',
+ 'testlist_safe',
+ 'old_test',
+ 'comparison',
+}
+
+SKIP_IF_SINGLE_CHILD = {
+ val for name, val in
+ syms.__dict__.items()
+ if name in SKIP_IF_SINGLE_CHILD_NAMES
+}
+
+
+class Leaf(object):
+
+ __slots__ = "type", "value", "start", "end"
+
+ def __init__(self, type, value, start, end):
+ self.type = type
+ self.value = value
+ self.start = start
+ self.end = end
+
+ def __repr__(self):
+ """Return a canonical string representation."""
+ return "%s(%s, %r)" % (self.__class__.__name__,
+ self.name,
+ self.value)
+
+ @property
+ def name(self):
+ return tok_name.get(self.type, self.type)
+
+class Node(object):
+
+ __slots__ = "type", "children", "used_names"
+
+ def __init__(self, type, children):
+ self.type = type
+ self.children = children
+
+ @property
+ def start(self):
+ node = self
+ while isinstance(node, Node):
+ node = node.children[0]
+ return node.start
+
+ @property
+ def end(self):
+ node = self
+ while isinstance(node, Node):
+ node = node.children[-1]
+ return node.end
+
+ def __repr__(self):
+ """Return a canonical string representation."""
+ return "%s(%s, %r)" % (self.__class__.__name__,
+ self.name,
+ self.children)
+
+ @property
+ def name(self):
+ return type_repr(self.type)
+
+def convert(gr, raw_node):
+ type, value, context, children = raw_node
+ if children or type in gr.number2symbol:
+ # If there's exactly one child, return that child instead of
+ # creating a new node.
+ if len(children) == 1 and type in SKIP_IF_SINGLE_CHILD:
+ return children[0]
+ return Node(type, children)
+ else:
+ start, end = context
+ return Leaf(type, value, start, end)
+
+def parse_tokens(gr, tokens):
+ """Parse a series of tokens and return the syntax tree."""
+ p = Parser(gr, convert)
+ p.setup()
+ for tkn in tokens:
+ type, value, start, end = tkn
+ if type in (tokenize.COMMENT, tokenize.NL):
+ continue
+ if type == token.OP:
+ type = grammar.opmap[value]
+ if type == token.INDENT:
+ value = ""
+ if p.addtoken(type, value, (start, end)):
+ break
+ else:
+ # We never broke out -- EOF is too soon (how can this happen???)
+ raise parse.ParseError("incomplete input",
+ type, value, ("", start))
+ return p.rootnode
+
+
+def parse(tokens, logger):
+ """Given a string with source, return the lib2to3 Node."""
+ for name, grammar in GRAMMARS:
+ try:
+ with timers["parse"]:
+ cpt = parse_tokens(grammar, tokens)
+ with timers["rewrite"]:
+ return ast.convert(logger, cpt)
+ except ParseError as pe:
+ lineno, column = pe.context[1]
+ logger.debug("%s at line %d, column %d using grammar for %s", pe, lineno, column, name)
+ exc = SyntaxError("Syntax Error")
+ exc.lineno = lineno
+ exc.offset = column
+ raise exc
diff --git a/python/extractor/semmle/python/parser/ast.py b/python/extractor/semmle/python/parser/ast.py
new file mode 100644
index 00000000000..85d87108e35
--- /dev/null
+++ b/python/extractor/semmle/python/parser/ast.py
@@ -0,0 +1,1491 @@
+from blib2to3.pgen2 import token
+from ast import literal_eval
+from semmle.python import ast
+from blib2to3.pgen2.parse import ParseError
+import sys
+
+LOAD = ast.Load()
+STORE = ast.Store()
+PARAM = ast.Param()
+DEL = ast.Del()
+
+POSITIONAL = 1
+KEYWORD = 2
+
+
+class ParseTreeVisitor(object):
+ '''Standard tree-walking visitor,
+ using `node.name` rather than `type(node).__name__`
+ '''
+
+ def visit(self, node, extra_arg=None):
+ method = 'visit_' + node.name
+ if extra_arg is None:
+ return getattr(self, method)(node)
+ else:
+ return getattr(self, method)(node, extra_arg)
+
+class Convertor(ParseTreeVisitor):
+ ''' Walk the conrete parse tree, returning an AST.
+ The CPT is specified by blib2to3/Grammar.txt.
+ The AST specified by semmle/python/master.py.
+ Each `visit_X` method takes a `X` node in the CFG and
+ produces some part of the AST, usually a single node.
+ '''
+
+ def __init__(self, logger):
+ self.logger = logger
+ # To handle f-strings nested inside other f-strings, we must keep track of the stack of
+ # surrounding prefixes while walking the tree. This is necessary because inside an f-string
+ # like `f"hello{f'to{you}dear'}world"`, the string part containing "world" has (in terms of
+ # the concrete parse tree) a prefix of `}`, which doesn't tell us how to interpret it (in
+ # particular, we can't tell if it's a raw string or not). So instead we look at the top of
+ # the prefix stack to figure out what the "current prefix" is. The nested f-string in the
+ # example above demonstrates why we must do this as a stack -- we must restore the outer
+ # `f"` prefix when we're done with the inner `f'`-prefix string.
+ #
+ # The stack manipulation itself takes place in the `visit_FSTRING_START` and
+ # `visit_FSTRING_END` methods. The text wrangling takes place in the `parse_string` helper
+ # function.
+
+ self.outer_prefix_stack = []
+
+
+ def visit_file_input(self, node):
+ body = []
+ for s in [self.visit(s) for s in node.children if s.name not in ("ENDMARKER", "NEWLINE")]:
+ if isinstance(s, list):
+ body.extend(s)
+ else:
+ body.append(s)
+ result = ast.Module(body)
+ set_location(result, node)
+ return result
+
+ def visit_import_from(self, node):
+ level = 0
+ index = 1
+ module_start = node.children[index].start
+ while is_token(node.children[index], "."):
+ level += 1
+ index += 1
+ if is_token(node.children[index], "import"):
+ module_end = node.children[index-1].end
+ index += 1
+ module_name = None
+ else:
+ module_end = node.children[index].end
+ module_name = self.visit(node.children[index])
+ index += 2
+ if is_token(node.children[index], "*"):
+ module = ast.ImportExpr(level, module_name, False)
+ set_location(module, module_start, module_end)
+ result = ast.ImportFrom(module)
+ set_location(result, node)
+ return result
+ if is_token(node.children[index], "("):
+ import_as_names = node.children[index+1]
+ else:
+ import_as_names = node.children[index]
+ aliases = []
+ for import_as_name in import_as_names.children[::2]:
+ module = ast.ImportExpr(level, module_name, False)
+ set_location(module, module_start, module_end)
+ aliases.append(self._import_as_name(import_as_name, module))
+ result = ast.Import(aliases)
+ set_location(result, node)
+ return result
+
+ #Helper for visit_import_from
+ def _import_as_name(self, node, module):
+ name = node.children[0].value
+ if len(node.children) == 3:
+ asname = node.children[2]
+ else:
+ asname = node.children[0]
+ expr = ast.ImportMember(module, name)
+ set_location(expr, node)
+ rhs = make_name(asname.value, STORE, asname.start, asname.end)
+ result = ast.alias(expr, rhs)
+ set_location(result, node)
+ return result
+
+ def visit_small_stmt(self, node):
+ return self.visit(node.children[0])
+
+ def visit_simple_stmt(self, node):
+ return [self.visit(s) for s in node.children if s.name not in ("SEMI", "NEWLINE")]
+
+ def visit_stmt(self, node):
+ return self.visit(node.children[0])
+
+ def visit_compound_stmt(self, node):
+ return self.visit(node.children[0])
+
+ def visit_pass_stmt(self, node):
+ p = ast.Pass()
+ set_location(p, node)
+ return p
+
+ def visit_classdef(self, node):
+ if len(node.children) == 4:
+ cls, name, colon, suite = node.children
+ args, keywords = [], []
+ elif len(node.children) == 7:
+ cls, name, _, args, _, colon, suite = node.children
+ args, keywords = self.visit(args)
+ else:
+ assert len(node.children) == 6
+ cls, name, _, _, colon, suite = node.children
+ args, keywords = [], []
+ start = cls.start
+ end = colon.end
+ suite = self.visit(suite)
+ inner = ast.Class(name.value, suite)
+ set_location(inner, start, end)
+ cls_expr = ast.ClassExpr(name.value, [], args, keywords, inner)
+ set_location(cls_expr, start, end)
+ name_expr = make_name(name.value, STORE, name.start, name.end)
+ result = ast.Assign(cls_expr, [name_expr])
+ set_location(result, start, end)
+ return result
+
+ def visit_arglist(self, node):
+ all_args = self._visit_list(node.children[::2])
+ args = [ arg for kind, arg in all_args if kind is POSITIONAL ]
+ keywords = [ arg for kind, arg in all_args if kind is KEYWORD ]
+ return args, keywords
+
+ def visit_argument(self, node):
+ child = node.children[0]
+ if is_token(child, "*"):
+ kind, arg = POSITIONAL, ast.Starred(self.visit(node.children[1], LOAD), LOAD)
+ elif is_token(child, "**"):
+ kind, arg = KEYWORD, ast.DictUnpacking(self.visit(node.children[1], LOAD))
+ elif len(node.children) == 3 and is_token(node.children[1], "="):
+ try:
+ name = get_node_value(child)
+ except Exception:
+ #Not a legal name
+ name = None
+ self.logger.warning("Illegal name for keyword on line %s", child.start[0])
+ kind, arg = KEYWORD, ast.keyword(name, self.visit(node.children[2], LOAD))
+ else:
+ arg = self.visit(child, LOAD)
+ if len(node.children) == 1:
+ return POSITIONAL, arg
+ elif len(node.children) == 3 and is_token(node.children[1], ":="):
+ return POSITIONAL, self.visit_namedexpr_test(node, LOAD)
+ generators = self.visit(node.children[1])
+ kind, arg = POSITIONAL, ast.GeneratorExp(arg, generators)
+ set_location(arg, node)
+ rewrite_comp(arg)
+ set_location(arg, node)
+ return kind, arg
+
+ def visit_namedexpr_test(self, node, ctx):
+ if len(node.children) == 1:
+ return self.visit(node.children[0], ctx)
+ target = self.visit(node.children[0], STORE)
+ value = self.visit(node.children[-1], LOAD)
+ result = ast.AssignExpr(value, target)
+ set_location(result, node)
+ return result
+
+ def visit_test(self, node, ctx):
+ if len(node.children) == 1:
+ return self.visit(node.children[0], ctx)
+ else:
+ if ctx is not LOAD:
+ context_error(node)
+ body = self.visit(node.children[0], ctx)
+ test = self.visit(node.children[2], ctx)
+ orelse = self.visit(node.children[4], ctx)
+ ifexp = ast.IfExp(test, body, orelse)
+ set_location(ifexp, node)
+ return ifexp
+
+ def visit_or_test(self, node, ctx):
+ return self._boolop(node, ast.Or, ctx)
+
+ def visit_and_test(self, node, ctx):
+ return self._boolop(node, ast.And, ctx)
+
+ def visit_not_test(self, node, ctx):
+ if len(node.children) == 1:
+ return self.visit(node.children[0], ctx)
+ if ctx is not LOAD:
+ context_error(node)
+ result = ast.UnaryOp(
+ ast.Not(),
+ self.visit(node.children[1], ctx)
+ )
+ set_location(result, node)
+ return result
+
+ # Helper for `or` and `and`.
+ def _boolop(self, node, opcls, ctx):
+ if len(node.children) == 1:
+ return self.visit(node.children[0], ctx)
+ values = [ self.visit(s, ctx) for s in node.children[::2] ]
+ result = ast.BoolOp(opcls(), values)
+ set_location(result, node)
+ return result
+
+ # Helper for various binary expression visitors.
+ def _binary(self, node, opfact, ctx):
+ if len(node.children) == 1:
+ return self.visit(node.children[0], ctx)
+ if ctx is not LOAD:
+ context_error(node)
+ children = iter(node.children)
+ result = self.visit(next(children), LOAD)
+ for op in children:
+ item = next(children)
+ rhs = self.visit(item, LOAD)
+ result = ast.BinOp(result, opfact(op), rhs)
+ set_location(result, node.start, item.end)
+ return result
+
+ def visit_suite(self, node):
+ if len(node.children) == 1:
+ return self.visit(node.children[0])
+ result = []
+ for s in [self.visit(s) for s in node.children[2:-1]]:
+ if isinstance(s, list):
+ result.extend(s)
+ else:
+ result.append(s)
+ return result
+
+ def visit_expr_stmt(self, node):
+ if len(node.children) == 1:
+ result = ast.Expr(self.visit(node.children[0], LOAD))
+ set_location(result, node)
+ return result
+ if len(node.children) > 1 and is_token(node.children[1], "="):
+ return self._assign(node)
+ if len(node.children) == 2:
+ # Annotated assignment
+ target = self.visit(node.children[0], STORE)
+ ann = node.children[1]
+ type_anno = self.visit(ann.children[1], LOAD)
+ if len(ann.children) > 2:
+ value = self.visit(ann.children[3], LOAD)
+ else:
+ value = None
+ result = ast.AnnAssign(value, type_anno, target)
+ else:
+ #Augmented assignment
+ lhs = self.visit(node.children[0], LOAD)
+ op = self.visit(node.children[1])
+ rhs = self.visit(node.children[2], LOAD)
+ expr = ast.BinOp(lhs, op, rhs)
+ set_location(expr, node)
+ result = ast.AugAssign(expr)
+ set_location(result, node)
+ return result
+
+ def visit_augassign(self, node):
+ return AUG_ASSIGN_OPS[node.children[0].value]()
+
+ #Helper for visit_expr_stmt (for assignment)
+ def _assign(self, node):
+ targets = [ self.visit(t, STORE) for t in node.children[:-1:2]]
+ result = ast.Assign(self.visit(node.children[-1], LOAD), targets)
+ set_location(result, node)
+ return result
+
+ def visit_testlist(self, node, ctx):
+ if len(node.children) == 1:
+ return self.visit(node.children[0], ctx)
+ elts = self._visit_list(node.children[::2], ctx)
+ result = ast.Tuple(elts, ctx)
+ set_location(result, node)
+ return result
+
+ visit_testlist_star_expr = visit_testlist
+
+ def visit_comparison(self, node, ctx):
+ if len(node.children) == 1:
+ return self.visit(node.children[0], ctx)
+ if ctx is not LOAD:
+ context_error(node)
+ left = self.visit(node.children[0], ctx)
+ ops = [ self.visit(op) for op in node.children[1::2]]
+ comps = [ self.visit(op, ctx) for op in node.children[2::2]]
+ result = ast.Compare(left, ops, comps)
+ set_location(result, node)
+ return result
+
+ def visit_comp_op(self, node):
+ if len(node.children) == 1:
+ return COMP_OP_CLASSES[node.children[0].value]()
+ else:
+ assert len(node.children) == 2
+ return ast.IsNot() if node.children[0].value == "is" else ast.NotIn()
+
+ def visit_expr(self, node, ctx):
+ return self._binary(node, lambda _: ast.BitOr(), ctx)
+
+ def visit_xor_expr(self, node, ctx):
+ return self._binary(node, lambda _: ast.BitXor(), ctx)
+
+ def visit_and_expr(self, node, ctx):
+ return self._binary(node, lambda _: ast.BitAnd(), ctx)
+
+ def visit_shift_expr(self, node, ctx):
+ return self._binary(
+ node,
+ lambda op: ast.LShift() if op.value == "<<" else ast.RShift(),
+ ctx
+ )
+
+ def visit_arith_expr(self, node, ctx):
+ return self._binary(
+ node,
+ lambda op: ast.Add() if op.value == "+" else ast.Sub(),
+ ctx
+ )
+
+ def visit_term(self, node, ctx):
+ return self._binary(
+ node,
+ lambda op: TERM_OP_CLASSES[op.value](),
+ ctx
+ )
+
+ def visit_factor(self, node, ctx):
+ if len(node.children) == 1:
+ return self.visit(node.children[0], ctx)
+ result = ast.UnaryOp(
+ FACTOR_OP_CLASSES[node.children[0].value](),
+ self.visit(node.children[1], ctx)
+ )
+ set_location(result, node)
+ return result
+
+ def visit_power(self, node, ctx):
+ '''This part of the Grammar is formulated in a slightly
+ awkward way, so we need to recursively handle the `await`
+ prefix, then the `** factor` suffix, then the atom and trailers.
+ '''
+
+ # Because `await` was a valid identifier in earlier versions of Python,
+ # we cannot assume it indicates an `await` expression. We therefore
+ # have to look at what follows in order to make a decision. The
+ # relevant part of the grammar is
+ #
+ # power: ['await'] atom trailer* ['**' factor]
+ #
+ # The case we wish to identify is when 'await' appears, but as an
+ # `atom`, and not an `await` token.
+ #
+ # Because `atom` nodes may no longer be present (see
+ # `SKIP_IF_SINGLE_CHILD_NAMES` in `__init__.py`) we instead look at the
+ # node following the (potentially) skipped `atom`. In particular, if
+ # the following node is a `trailer` or "**" token, we know that the
+ # given node cannot be an `await` token, and must be an `atom` instead.
+ try:
+ next_node = node.children[1]
+ next_is_atom = next_node.name != "trailer" and not is_token(next_node, "**")
+ except (IndexError, AttributeError):
+ # IndexError if `node` has at most one child.
+ # AttributeError if `next_node` is a `Leaf` instead of a `Node`.
+ next_is_atom = False
+ if is_token(node.children[0], "await") and next_is_atom:
+ if ctx is not LOAD:
+ context_error(node)
+ pow = self._power(node.children[1:], ctx)
+ result = ast.Await(pow)
+ set_location(result, node)
+ return result
+ else:
+ return self._power(node.children, ctx)
+
+ #Helper for visit_power
+ def _power(self, children, ctx):
+ start = children[0].start
+ if len(children) > 1 and is_token(children[-2], "**"):
+ if ctx is not LOAD:
+ context_error(children[0])
+ trailers = children[1:-2]
+ pow_expr = self.visit(children[-1], ctx)
+ else:
+ trailers = children[1:]
+ pow_expr = None
+ if trailers:
+ expr = self.visit(children[0], LOAD)
+ for trailer in trailers[:-1]:
+ expr = self._apply_trailer(expr, trailer, start, LOAD)
+ expr = self._apply_trailer(expr, trailers[-1], start, ctx)
+ else:
+ expr = self.visit(children[0], ctx)
+ if pow_expr:
+ expr = ast.BinOp(expr, ast.Pow(), pow_expr)
+ set_location(expr, children[0].start, children[-1].end)
+ return expr
+
+ #Helper for _power
+ def _atom(self, children, ctx):
+ start = children[0].start
+ if len(children) == 1:
+ return self.visit(children[0], ctx)
+ atom = self.visit(children[0], LOAD)
+ for trailer in children[1:-1]:
+ atom = self._apply_trailer(atom, trailer, start, LOAD)
+ atom = self._apply_trailer(atom, children[-1], start, ctx)
+ return atom
+
+ #Helper for _atom
+ def _apply_trailer(self, atom, trailer, start, ctx):
+ children = trailer.children
+ left = children[0]
+ if is_token(left, "("):
+ if is_token(children[1], ")"):
+ args, keywords = [], []
+ end = children[1].end
+ else:
+ args, keywords = self.visit(children[1])
+ end = children[2].end
+ result = ast.Call(atom, args, keywords)
+ elif is_token(left, "["):
+ result = ast.Subscript(atom, self.visit(children[1], LOAD), ctx)
+ end = children[2].end
+ else:
+ assert is_token(left, ".")
+ result = ast.Attribute(atom, children[1].value, ctx)
+ end = children[1].end
+ set_location(result, start, end)
+ return result
+
+ def visit_atom(self, node, ctx):
+ left = node.children[0]
+ if left.value in "[({":
+ n = node.children[1]
+ if hasattr(n, "value") and n.value in "])}":
+ if n.value == ")":
+ result = ast.Tuple([], ctx)
+ elif n.value == "]":
+ result = ast.List([], ctx)
+ else:
+ result = ast.Dict([])
+ set_location(result, node)
+ return result
+ else:
+ result = self.visit(node.children[1], ctx)
+ if left.value == "(":
+ result.parenthesised = True
+ else:
+ #Meaningful bracketing
+ set_location(result, node)
+ if isinstance(result, (ast.GeneratorExp, ast.ListComp, ast.SetComp, ast.DictComp)):
+ rewrite_comp(result)
+ return result
+ if left.type == token.NAME:
+ return make_name(left.value, ctx, left.start, left.end)
+ if ctx is not LOAD:
+ context_error(node)
+ if left.type == token.NUMBER:
+ val = get_numeric_value(left)
+ result = ast.Num(val, left.value)
+ set_location(result, left)
+ return result
+ if left.value == ".":
+ assert len(node.children) == 3 and node.children[2].value == "."
+ result = ast.Ellipsis()
+ set_location(result, node)
+ return result
+ assert left.type == token.BACKQUOTE
+ result = ast.Repr(self.visit(node.children[1], LOAD))
+ set_location(result, node)
+ return result
+
+ def visit_STRING(self, node, ctx):
+ if ctx is not LOAD:
+ context_error(node)
+ outer_prefix = self.outer_prefix_stack[-1] if self.outer_prefix_stack else None
+ prefix, s = parse_string(node.value, self.logger, outer_prefix)
+ text = get_text(node.value, outer_prefix)
+ result = ast.StringPart(prefix, text, s)
+ set_location(result, node)
+ return result
+
+ def visit_NUMBER(self, node, ctx):
+ if ctx is not LOAD:
+ context_error(node)
+ val = get_numeric_value(node)
+ result = ast.Num(val, node.value)
+ set_location(result, node)
+ return result
+
+ def visit_funcdef(self, node, is_async=False):
+ # funcdef: 'def' NAME parameters ['->' test] ':' suite
+ name = node.children[1].value
+ if node.children[3].value == "->":
+ return_type = self.visit(node.children[4], LOAD)
+ end = node.children[5].end
+ body = self.visit(node.children[6])
+ else:
+ return_type = None
+ end = node.children[3].end
+ body = self.visit(node.children[4])
+ start = node.children[0].start
+ params = node.children[2]
+ if len(params.children) == 2:
+ args, vararg, kwonlyargs, kwarg = [], None, [], None
+ else:
+ args, vararg, kwonlyargs, kwarg = self._get_parameters(params.children[1])
+ func = ast.Function(name, [], args, vararg, kwonlyargs, kwarg, body, is_async)
+ set_location(func, start, end)
+ if len(params.children) == 2:
+ args = ast.arguments([], [], [], None, None, [])
+ else:
+ args = self._get_defaults_and_annotations(params.children[1])
+ funcexpr = ast.FunctionExpr(name, args, return_type, func)
+ set_location(funcexpr, start, end)
+ name_expr = make_name(name, STORE, node.children[1].start, node.children[1].end)
+ result = ast.Assign(funcexpr, [name_expr])
+ set_location(result, start, end)
+ return result
+
+ #Helper for visit_funcdef and visit_lambdef
+ def _get_parameters(self, node):
+ '''Returns the quadruple: args, vararg, kwonlyargs, kwarg
+ '''
+ args = []
+ vararg = None
+ kwonlyargs = []
+ kwarg = None
+ children = iter(node.children)
+ arg = None
+ for child in children:
+ if is_token(child, "*"):
+ try:
+ child = next(children)
+ except StopIteration:
+ pass
+ else:
+ if not is_token(child, ","):
+ vararg = self.visit(child, PARAM)
+ break
+ if is_token(child, ","):
+ pass
+ elif is_token(child, "/"):
+ pass
+ elif is_token(child, "="):
+ next(children)
+ elif is_token(child, "**"):
+ child = next(children)
+ kwarg = self.visit(child, PARAM)
+ else:
+ arg = self.visit(child, PARAM)
+ args.append(arg)
+ #kwonly args
+ for child in children:
+ if is_token(child, ","):
+ pass
+ elif is_token(child, "="):
+ next(children)
+ elif is_token(child, "**"):
+ child = next(children)
+ kwarg = self.visit(child, PARAM)
+ else:
+ arg = self.visit(child, PARAM)
+ kwonlyargs.append(arg)
+ return args, vararg, kwonlyargs, kwarg
+
+ #Helper for visit_funcdef and visit_lambdef
+ def _get_defaults_and_annotations(self, node):
+ defaults = []
+ kw_defaults = []
+ annotations = []
+ varargannotation = None
+ kwargannotation = None
+ kw_annotations = []
+ children = iter(node.children)
+ # Because we want the i'th element of `kw_defaults` to be the default value for
+ # the i'th keyword-only argument, when encountering the combined token for the
+ # argument name and optional annotation, we add a `None` to `kw_defaults` assuming
+ # that there is no default value. If there turns out to be a default value, we
+ # remove the `None` and add the real default value. Like-wise for `defaults`.
+
+ # positional-only args and "normal" args
+ for child in children:
+ if is_token(child, "*"):
+ try:
+ child = next(children)
+ except StopIteration:
+ pass
+ else:
+ if not is_token(child, ","):
+ varargannotation = self.visit(child, LOAD)
+ break
+ if is_token(child, ","):
+ pass
+ elif is_token(child, "/"):
+ pass
+ elif is_token(child, "="):
+ child = next(children)
+ defaults.pop()
+ defaults.append(self.visit(child, LOAD))
+ elif is_token(child, "**"):
+ child = next(children)
+ kwargannotation = self.visit(child, LOAD)
+ arg = None
+ else:
+ # Preemptively assume there is no default argument (indicated by None)
+ defaults.append(None)
+ annotations.append(self.visit(child, LOAD))
+
+ #kwonly args
+ for child in children:
+ if is_token(child, ","):
+ pass
+ elif is_token(child, "="):
+ child = next(children)
+ kw_defaults.pop()
+ kw_defaults.append(self.visit(child, LOAD))
+ elif is_token(child, "**"):
+ child = next(children)
+ kwargannotation = self.visit(child, LOAD)
+ else:
+ # Preemptively assume there is no default argument (indicated by None)
+ kw_defaults.append(None)
+ kw_annotations.append(self.visit(child, LOAD))
+ result = ast.arguments(defaults, kw_defaults, annotations, varargannotation, kwargannotation, kw_annotations)
+ set_location(result, node)
+ return result
+
+ def visit_tfpdef(self, node, ctx):
+ # TO DO Support tuple parameters
+ # No one uses them any more, so this isn't super important.
+ child = node.children[0]
+ if is_token(child, "("):
+ return None
+ return self.visit(child, ctx)
+
+ def visit_tname(self, node, ctx):
+ if ctx is PARAM:
+ child = node.children[0]
+ return make_name(child.value, ctx, child.start, child.end)
+ elif len(node.children) > 1:
+ return self.visit(node.children[2], ctx)
+ else:
+ return None
+
+ def visit_decorated(self, node):
+ asgn = self.visit(node.children[1])
+ value = asgn.value
+ for deco in reversed(node.children[0].children):
+ defn = value
+ decorator = self.visit(deco)
+ value = ast.Call(decorator, [defn], [])
+ copy_location(decorator, value)
+ asgn.value = value
+ return asgn
+
+ def visit_decorators(self, node):
+ return self._visit_list(node.children)
+
+ def visit_decorator(self, node):
+ namedexpr_test = node.children[1]
+ result = self.visit_namedexpr_test(namedexpr_test, LOAD)
+ set_location(result, namedexpr_test)
+ return result
+
+ def _visit_list(self, items, ctx=None):
+ if ctx is None:
+ return [ self.visit(i) for i in items ]
+ else:
+ return [ self.visit(i, ctx) for i in items ]
+
+ def visit_dotted_name(self, node):
+ return ".".join(name.value for name in node.children[::2])
+
+ def visit_NAME(self, name, ctx):
+ return make_name(name.value, ctx, name.start, name.end)
+
+ def visit_listmaker(self, node, ctx):
+ if len(node.children) == 1 or is_token(node.children[1], ","):
+ items = [self.visit(c, ctx) for c in node.children[::2]]
+ result = ast.List(items, ctx)
+ else:
+ if ctx is not LOAD:
+ context_error(node)
+ elt = self.visit(node.children[0], ctx)
+ generators = self.visit(node.children[1])
+ result = ast.ListComp(elt, generators)
+ set_location(result, node)
+ return result
+
+ def visit_testlist_gexp(self, node, ctx):
+ if len(node.children) == 1:
+ return self.visit(node.children[0], ctx)
+ if is_token(node.children[1], ","):
+ items = [self.visit(c, ctx) for c in node.children[::2]]
+ result = ast.Tuple(items, ctx)
+ else:
+ if ctx is not LOAD:
+ context_error(node)
+ elt = self.visit(node.children[0], ctx)
+ generators = self.visit(node.children[1])
+ result = ast.GeneratorExp(elt, generators)
+ set_location(result, node)
+ return result
+
+ def visit_comp_for(self, node):
+ is_async = is_token(node.children[0], "async")
+ target = self.visit(node.children[1+is_async], STORE)
+ iter = self.visit(node.children[3+is_async], LOAD)
+ if len(node.children) == 5+is_async:
+ ifs = []
+ end = iter._end
+ comp_iter = self.visit(node.children[4+is_async])
+ while comp_iter and not isinstance(comp_iter[0], ast.comprehension):
+ ifs.append(comp_iter[0])
+ end = comp_iter[0]._end
+ comp_iter = comp_iter[1:]
+ comp = ast.comprehension(target, iter, ifs)
+ comp.is_async = is_async
+ set_location(comp, node.children[0].start, end)
+ return [comp] + comp_iter
+ else:
+ comp = ast.comprehension(target, iter, [])
+ comp.is_async = is_async
+ set_location(comp, node)
+ return [comp]
+
+ visit_old_comp_for = visit_comp_for
+
+ def visit_comp_iter(self, node):
+ return self.visit(node.children[0])
+
+ def visit_comp_if(self, node):
+ cond = self.visit(node.children[1], LOAD)
+ if len(node.children) == 3:
+ comp_list = self.visit(node.children[2])
+ return [cond] + comp_list
+ else:
+ return [cond]
+
+ visit_old_comp_if = visit_comp_if
+
+ visit_old_comp_iter = visit_comp_iter
+
+ def visit_exprlist(self, node, ctx):
+ #Despite the name this returns a single expression
+ if len(node.children) == 1:
+ return self.visit(node.children[0], ctx)
+ else:
+ elts = self._visit_list(node.children[::2], ctx)
+ result = ast.Tuple(elts, ctx)
+ set_location(result, node)
+ return result
+
+ visit_testlist_safe = visit_exprlist
+
+ def visit_old_test(self, node, ctx):
+ return self.visit(node.children[0], ctx)
+
+ def visit_if_stmt(self, node):
+ endindex = len(node.children)
+ if is_token(node.children[-3], "else"):
+ orelse = self.visit(node.children[-1])
+ endindex -= 3
+ else:
+ orelse = None
+ while endindex:
+ test = self.visit(node.children[endindex-3], LOAD)
+ body = self.visit(node.children[endindex-1])
+ result = ast.If(test, body, orelse)
+ start = node.children[endindex-4].start
+ end = node.children[endindex-2].end
+ set_location(result, start, end)
+ orelse = [result]
+ endindex -= 4
+ return result
+
+ def visit_import_stmt(self, node):
+ return self.visit(node.children[0])
+
+ def visit_import_name(self, node):
+ aliases = self.visit(node.children[1])
+ result = ast.Import(aliases)
+ set_location(result, node)
+ return result
+
+ def visit_dotted_as_names(self, node):
+ return self._visit_list(node.children[::2])
+
+ def visit_dotted_as_name(self, node):
+ child0 = node.children[0]
+ dotted_name = self.visit(child0)
+ if len(node.children) == 3:
+ value = ast.ImportExpr(0, dotted_name, False)
+ child2 = node.children[2]
+ asname = make_name(child2.value, STORE, child2.start, child2.end)
+ else:
+ value = ast.ImportExpr(0, dotted_name, True)
+ topname = dotted_name.split(".")[0]
+ asname = make_name(topname, STORE, child0.start, child0.end)
+ set_location(value, child0)
+ result = ast.alias(value, asname)
+ set_location(result, node)
+ return result
+
+ def visit_dictsetmaker(self, node, ctx):
+ if ctx is not LOAD:
+ context_error(node)
+ if is_token(node.children[0], "**") or len(node.children) > 1 and is_token(node.children[1], ":"):
+ return self._dictmaker(node)
+ else:
+ return self._setmaker(node)
+
+ #Helper for visit_dictsetmaker (for dictionaries)
+ def _dictmaker(self, node):
+ if len(node.children) == 4 and is_token(node.children[1], ":") and not is_token(node.children[3], ","):
+ #Comprehension form
+ key = self.visit(node.children[0], LOAD)
+ value = self.visit(node.children[2], LOAD)
+ generators = self.visit(node.children[3])
+ result = ast.DictComp(key, value, generators)
+ set_location(result, node)
+ return result
+ index = 0
+ items = []
+ while len(node.children) > index:
+ if is_token(node.children[index], "**"):
+ d = self.visit(node.children[index+1], LOAD)
+ item = ast.DictUnpacking(d)
+ set_location(item, node.children[index].start, node.children[index+1].end)
+ index += 3
+ else:
+ key = self.visit(node.children[index], LOAD)
+ value = self.visit(node.children[index+2], LOAD)
+ item = ast.KeyValuePair(key, value)
+ set_location(item, node.children[index].start, node.children[index+2].end)
+ index += 4
+ items.append(item)
+ result = ast.Dict(items)
+ set_location(result, node)
+ return result
+
+ #Helper for visit_dictsetmaker (for sets)
+ def _setmaker(self, node):
+ if len(node.children) == 2 and not is_token(node.children[1], ","):
+ #Comprehension form
+ elt = self.visit(node.children[0], LOAD)
+ generators = self.visit(node.children[1])
+ result = ast.SetComp(elt, generators)
+ set_location(result, node)
+ return result
+ items = self._visit_list(node.children[::2], LOAD)
+ result = ast.Set(items)
+ set_location(result, node)
+ return result
+
+ def visit_while_stmt(self, node):
+ test = self.visit(node.children[1], LOAD)
+ body = self.visit(node.children[3])
+ if len(node.children) == 7:
+ orelse = self.visit(node.children[6])
+ else:
+ orelse = None
+ result = ast.While(test, body, orelse)
+ set_location(result, node.children[0].start, node.children[2].end)
+ return result
+
+ def visit_flow_stmt(self, node):
+ return self.visit(node.children[0])
+
+ def visit_break_stmt(self, node):
+ result = ast.Break()
+ set_location(result, node)
+ return result
+
+ def visit_continue_stmt(self, node):
+ result = ast.Continue()
+ set_location(result, node)
+ return result
+
+ def visit_return_stmt(self, node):
+ if len(node.children) == 2:
+ result = ast.Return(self.visit(node.children[1], LOAD))
+ else:
+ result = ast.Return(None)
+ set_location(result, node)
+ return result
+
+ def visit_raise_stmt(self, node):
+ result = ast.Raise()
+ set_location(result, node)
+ if len(node.children) == 1:
+ return result
+ result.exc = self.visit(node.children[1], LOAD)
+ if len(node.children) > 3:
+ if is_token(node.children[2], "from"):
+ result.cause = self.visit(node.children[3], LOAD)
+ else:
+ result.type = result.exc
+ del result.exc
+ result.inst = self.visit(node.children[3], LOAD)
+ if len(node.children) == 6:
+ result.tback = self.visit(node.children[5], LOAD)
+ return result
+
+ def visit_yield_stmt(self, node):
+ result = ast.Expr(self.visit(node.children[0], LOAD))
+ set_location(result, node)
+ return result
+
+ def visit_yield_expr(self, node, ctx):
+ if ctx is not LOAD:
+ context_error(node)
+ if len(node.children) == 1:
+ result = ast.Yield(None)
+ else:
+ if is_token(node.children[1].children[0], "from"):
+ result = ast.YieldFrom(self.visit(node.children[1].children[1], LOAD))
+ else:
+ result = ast.Yield(self.visit(node.children[1].children[0], LOAD))
+ set_location(result, node)
+ return result
+
+ def visit_try_stmt(self, node):
+ body = self.visit(node.children[2])
+ index = 3
+ handlers = []
+ while len(node.children) > index and not hasattr(node.children[index], "value"):
+ #Except block.
+ type, name = self.visit(node.children[index])
+ handler_body = self.visit(node.children[index+2])
+ handler = ast.ExceptStmt(type, name, handler_body)
+ set_location(handler, node.children[index].start , node.children[index+1].end)
+ handlers.append(handler)
+ index += 3
+ if len(node.children) > index and is_token(node.children[index], "else"):
+ orelse = self.visit(node.children[index+2])
+ else:
+ orelse = []
+ if is_token(node.children[-3], "finally"):
+ finalbody = self.visit(node.children[-1])
+ else:
+ finalbody = []
+ result = ast.Try(body, orelse, handlers, finalbody)
+ set_location(result, node.start, node.children[1].end)
+ return result
+
+ def visit_except_clause(self, node):
+ type, name = None, None
+ if len(node.children) > 1:
+ type = self.visit(node.children[1], LOAD)
+ if len(node.children) > 3:
+ name = self.visit(node.children[3], STORE)
+ return type, name
+
+ def visit_del_stmt(self, node):
+ if len(node.children) > 1:
+ result = ast.Delete(self._visit_list(node.children[1].children[::2], DEL))
+ else:
+ result = ast.Delete([])
+ set_location(result, node)
+ return result
+
+ visit_subscriptlist = visit_testlist
+ visit_testlist1 = visit_testlist
+
+ def visit_subscript(self, node, ctx):
+ if len(node.children) == 1 and not is_token(node.children[0], ":"):
+ return self.visit(node.children[0], ctx)
+ values = [None, None, None]
+ index = 0
+ for child in node.children:
+ if is_token(child, ":"):
+ index += 1
+ else:
+ values[index] = self.visit(child, LOAD)
+ result = ast.Slice(*values)
+ set_location(result, node)
+ return result
+
+ def visit_sliceop(self, node, ctx):
+ if ctx is not LOAD:
+ context_error(node)
+ if len(node.children) == 2:
+ return self.visit(node.children[1], LOAD)
+ else:
+ return None
+
+ def visit_assert_stmt(self, node):
+ test = self.visit(node.children[1], LOAD)
+ if len(node.children) > 2:
+ msg = self.visit(node.children[3], LOAD)
+ else:
+ msg = None
+ result = ast.Assert(test, msg)
+ set_location(result, node)
+ return result
+
+ def visit_for_stmt(self, node, is_async=False):
+ target = self.visit(node.children[1], STORE)
+ iter = self.visit(node.children[3], LOAD)
+ body = self.visit(node.children[5])
+ if len(node.children) == 9:
+ orelse = self.visit(node.children[8])
+ else:
+ orelse = None
+ result = ast.For(target, iter, body, orelse)
+ result.is_async = is_async
+ set_location(result, node.children[0].start, node.children[4].end)
+ return result
+
+ def visit_global_stmt(self, node):
+ cls = ast.Global if node.children[0].value == "global" else ast.Nonlocal
+ names = [child.value for child in node.children[1::2]]
+ result = cls(names)
+ set_location(result, node)
+ return result
+
+ def visit_lambdef(self, node, ctx):
+ if ctx is not LOAD:
+ context_error(node)
+ test = self.visit(node.children[-1], LOAD)
+ stmt = ast.Return(test)
+ set_location(stmt, node.children[-1])
+ if is_token(node.children[1], ":"):
+ args, vararg, kwonlyargs, kwarg = [], None, [], None
+ else:
+ args, vararg, kwonlyargs, kwarg = self._get_parameters(node.children[1])
+ func = ast.Function("lambda", [], args, vararg, kwonlyargs, kwarg, [stmt], False)
+ set_location(func, node)
+ if is_token(node.children[1], ":"):
+ args = ast.arguments([], [], [], None, None, [])
+ else:
+ args = self._get_defaults_and_annotations(node.children[1])
+ result = ast.Lambda(args, func)
+ set_location(result, node)
+ return result
+
+ visit_old_lambdef = visit_lambdef
+
+ visit_vfpdef = visit_tfpdef
+
+ def visit_vname(self, node, ctx):
+ if ctx is PARAM:
+ child = node.children[0]
+ return make_name(child.value, ctx, child.start, child.end)
+ else:
+ return None
+
+ def visit_star_expr(self, node, ctx):
+ result = ast.Starred(self.visit(node.children[1], ctx), ctx)
+ set_location(result, node)
+ return result
+
+ def visit_with_stmt(self, node, is_async=False):
+ body = self.visit(node.children[-1])
+ for item in node.children[-3:0:-2]:
+ ctx_mngr, opt_vars = self.visit(item)
+ withstmt = ast.With(ctx_mngr, opt_vars, body)
+ set_location(withstmt, item)
+ body = [withstmt]
+ set_location(withstmt, node.children[0].start, node.children[-2].end)
+ withstmt.is_async = is_async
+ return withstmt
+
+ def visit_with_item(self, node):
+ ctx_mngr = self.visit(node.children[0], LOAD)
+ if len(node.children) == 1:
+ return ctx_mngr, None
+ else:
+ return ctx_mngr, self.visit(node.children[2], STORE)
+
+ def visit_async_stmt(self, node):
+ return self.visit(node.children[1], True)
+
+ visit_async_funcdef = visit_async_stmt
+
+ def visit_print_stmt(self, node):
+ if len(node.children) > 1 and is_token(node.children[1], ">>"):
+ dest = self.visit(node.children[2], LOAD)
+ items = node.children[4::2]
+ else:
+ dest = None
+ items = node.children[1::2]
+ values = self._visit_list(items, LOAD)
+ nl = not is_token(node.children[-1], ",")
+ result = ast.Print(dest, values, nl)
+ set_location(result, node)
+ return result
+
+ def visit_exec_stmt(self, node):
+ body = self.visit(node.children[1], LOAD)
+ globals, locals = None, None
+ if len(node.children) > 3:
+ globals = self.visit(node.children[3], LOAD)
+ if len(node.children) > 5:
+ locals = self.visit(node.children[5], LOAD)
+ result = ast.Exec(body, globals, locals)
+ set_location(result, node)
+ return result
+
+ def visit_special_operation(self, node, ctx):
+ if ctx is not LOAD:
+ context_error(node)
+ name = node.children[0].value
+ if len(node.children) == 3:
+ args = []
+ else:
+ args = self._visit_list(node.children[2].children[::2], LOAD)
+ result = ast.SpecialOperation(name, args)
+ set_location(result, node)
+ return result
+
+ def visit_string(self, node, ctx):
+
+ def convert_parts_to_expr():
+ if not current_parts:
+ return None
+ if len(current_parts) == 1:
+ string = ast.Str(current_parts[0].s, current_parts[0].prefix, None)
+ else:
+ # Our string parts may be any combination of byte and unicode
+ # strings, as this is valid in Python 2. We therefore decode
+ # the strings into unicode before concatenating.
+ text = "".join(decode_str(p.s) for p in current_parts)
+ string = ast.Str(text, current_parts[0].prefix, current_parts[:])
+ start = current_parts[0].lineno, current_parts[0].col_offset
+ set_location(string, start, current_parts[-1]._end)
+ current_parts[:] = []
+ return string
+
+ if ctx is not LOAD:
+ context_error(node)
+ parts = []
+ for p in self._visit_list(node.children, LOAD):
+ if isinstance(p, list):
+ parts.extend(p)
+ else:
+ parts.append(p)
+ current_parts = []
+ exprs = []
+ for part in parts:
+ if part is None:
+ #Conversion -- currently ignored.
+ pass
+ elif isinstance(part, ast.StringPart):
+ current_parts.append(part)
+ else:
+ assert isinstance(part, ast.expr), part
+ string = convert_parts_to_expr()
+ if string:
+ exprs.append(string)
+ exprs.append(part)
+ string = convert_parts_to_expr()
+ if string:
+ exprs.append(string)
+ if len(exprs) == 1:
+ return exprs[0]
+ result = ast.JoinedStr(exprs)
+ set_location(result, node)
+ return result
+
+ def visit_fstring_part(self, node, ctx):
+ nodes_to_visit = []
+ for node in node.children:
+ if node.name == 'format_specifier':
+ # Flatten format_specifiers first
+ nodes_to_visit += [ n for n in node.children if not n.name == 'FSTRING_SPEC' ]
+ else:
+ nodes_to_visit += [node]
+
+ return self._visit_list(nodes_to_visit, ctx)
+
+ def visit_format_specifier(self, node, ctx):
+ # This will currently never be visited because of the above flattening
+ assert ctx is LOAD
+ #Currently ignored
+ return None
+
+ def visit_CONVERSION(self, node, ctx):
+ return None
+
+ def visit_COLON(self, node, ctx):
+ return None
+
+ def visit_EQUAL(self, node, ctx):
+ return None
+
+ def visit_FSTRING_START(self, node, ctx):
+ string = self.visit_STRING(node, ctx)
+ # Push the current prefix onto the prefix stack
+ self.outer_prefix_stack.append(string.prefix)
+ return string
+
+ def visit_FSTRING_END(self, node, ctx):
+ string = self.visit_STRING(node, ctx)
+ # We're done with this f-string, so pop its prefix off the prefix stack
+ self.outer_prefix_stack.pop()
+ return string
+
+ visit_FSTRING_MID = visit_STRING
+
+# In the following function, we decode to `latin-1` in order to preserve
+# the byte values present in the string. This is an undocumented feature of
+# this encoding. See also the `test_python_sanity.py` test file in `/tests`.
+
+def decode_str(s):
+ if isinstance(s, bytes):
+ return str(s, 'latin-1')
+ else:
+ return s
+
+def context_error(node):
+ s = SyntaxError("Invalid context")
+ s.lineno, s.offset = node.start
+ raise s
+
+def is_token(node, text):
+ '''Holds if `node` is a token (terminal) and its textual value is `text`'''
+ return hasattr(node, "value") and node.value == text
+
+def get_node_value(node):
+ '''Get the value from a NAME node,
+ stripping redundant CPT nodes'''
+ while hasattr(node, "children"):
+ assert len(node.children) == 1
+ node = node.children[0]
+ return node.value
+
+#Mapping from comparison operator strings to ast classes.
+COMP_OP_CLASSES = {
+ "<": ast.Lt,
+ "<=": ast.LtE,
+ ">": ast.Gt,
+ ">=": ast.GtE,
+ "==": ast.Eq,
+ "<>": ast.NotEq,
+ "!=": ast.NotEq,
+ "in": ast.In,
+ "not in": ast.NotIn,
+ "is": ast.Is,
+ "is not": ast.IsNot,
+}
+
+#Mapping from multiplicative operator strings to ast classes.
+TERM_OP_CLASSES = {
+ '*': ast.Mult,
+ '/': ast.Div,
+ '%': ast.Mod,
+ '//': ast.FloorDiv,
+ '@': ast.MatMult,
+}
+
+#Mapping from additive operator strings to ast classes.
+FACTOR_OP_CLASSES = {
+ '+': ast.UAdd,
+ '-': ast.USub,
+ '~': ast.Invert,
+}
+
+#Mapping from assignment operator strings to ast classes.
+AUG_ASSIGN_OPS = {
+ '+=': ast.Add,
+ '-=': ast.Sub,
+ '*=': ast.Mult,
+ '/=': ast.Div,
+ '%=': ast.Mod,
+ '&=': ast.BitAnd,
+ '|=': ast.BitOr,
+ '^=': ast.BitXor,
+ '<<=': ast.LShift,
+ '>>=': ast.RShift,
+ '**=': ast.Pow,
+ '//=': ast.FloorDiv,
+ '@=': ast.MatMult,
+}
+
+def make_name(name, ctx, start, end):
+ '''Create a `Name` ast node'''
+ variable = ast.Variable(name)
+ node = ast.Name(variable, ctx)
+ set_location(node, start, end)
+ return node
+
+def set_location(astnode, cptnode_or_start, end=None):
+ '''Set the location of `astnode` from
+ either the CPT node or pair of locations.
+ '''
+ if end is None:
+ astnode.lineno, astnode.col_offset = cptnode_or_start.start
+ astnode._end = cptnode_or_start.end
+ else:
+ astnode.lineno, astnode.col_offset = cptnode_or_start
+ astnode._end = end
+
+def split_full_prefix(s):
+ """Splits a prefix (or a string starting with a prefix) into prefix and quote parts."""
+ quote_start = 0
+ # First, locate the end of the prefix (and the start of the quotes)
+ while s[quote_start] not in "'\"}":
+ quote_start += 1
+ # Next, find the end of the quotes. This is either one character past `quote_start`, or three
+ # (for triple-quoted strings).
+ if s[quote_start:quote_start + 3] in ("'''",'"""'):
+ prefix_end = quote_start + 3
+ else:
+ prefix_end = quote_start + 1
+
+ return s[:quote_start], s[quote_start:prefix_end]
+
+
+def split_string(s, outer_prefix):
+ """Splits a string into prefix, quotes, and content."""
+ s_prefix, s_quotes = split_full_prefix(s)
+
+ quote_start = len(s_prefix)
+ prefix_end = quote_start + len(s_quotes)
+
+ # If the string starts with `}`, it is a non-inital string part of an f-string. In this case we
+ # must use the prefix and quotes from the outer f-string.
+ if s[0] == '}':
+ prefix, quotes = split_full_prefix(outer_prefix)
+ else:
+ prefix, quotes = s_prefix, s_quotes
+
+ # The string either ends with a `{` (if it comes before an interpolation inside an f-string)
+ # or else it ends with the same quotes as it begins with.
+ if s[-1] == "{":
+ content = s[prefix_end:-1]
+ else:
+ content = s[prefix_end:-len(quotes)]
+
+ return prefix.lower(), quotes, content
+
+def get_text(s, outer_prefix):
+ """Returns a cleaned-up text version of the string, normalizing the quotes and removing any
+ format string marker."""
+ prefix, quotes, content = split_string(s, outer_prefix)
+ return prefix.strip("fF") + quotes + content + quotes
+
+def parse_string(s, logger, outer_prefix):
+ '''Gets the prefix and escaped string text'''
+ prefix, quotes, content = split_string(s, outer_prefix)
+ saved_content = content
+ try:
+ ends_with_illegal_character = False
+ # If the string ends with the same quote character as the outer quotes (and/or backslashes)
+ # (e.g. the first string part of `f"""hello"{0}"""`), we must take care to not accidently create
+ # the ending quotes at the wrong place. (`literal_eval` would be unhappy with `"""hello""""`
+ # as an input.) To do this, we insert an extra space at the end (that we then must remember
+ # to remove later on).
+ if content.endswith(quotes[0]) or content.endswith('\\'):
+ ends_with_illegal_character = True
+ content = content + " "
+ text = prefix.strip("fF") + quotes + content + quotes
+ s = literal_eval(text)
+ except Exception as ex:
+ # Something has gone wrong, but we still have the original form - Should be OK.
+ logger.warning("Unable to parse string %s: %s", text, ex)
+ logger.traceback()
+ ends_with_illegal_character = False
+ s = saved_content
+ if isinstance(s, bytes):
+ try:
+ s = s.decode(sys.getfilesystemencoding())
+ except UnicodeDecodeError:
+ s = decode_str(s)
+ if ends_with_illegal_character:
+ s = s[:-1]
+ return prefix + quotes, s
+
+ESCAPES = ""
+
+def get_numeric_value(node):
+ '''Gets numeric value from a CPT leaf node.'''
+ value = node.value
+ value = value.replace("_", "")
+ chars = set(value.lower())
+ try:
+ if u'.' in chars or u'e' in chars or u'j' in chars:
+ # Probable float or hex or imaginary
+ return literal_eval(value)
+ if len(value) > 1 and value[0] == u'0' and value[1] not in u'boxlBOXL':
+ # Old-style octal
+ value = u'0o' + value[1:]
+ if value[-1] in u'lL':
+ return literal_eval(value[:-1])
+ return literal_eval(value)
+ except ValueError:
+ raise ParseError("Not a valid numeric value", node.type, node.value, (node.start, node.end))
+
+#This rewriting step is performed separately for two reasons.
+# 1. It is complicated
+# 2. In future, we may want to make the AST more like the syntax and less like the semantics.
+# Keeping step separate should make that a bit easier.
+def rewrite_comp(node):
+ if hasattr(node, "function"):
+ return
+ gens = node.generators
+ if hasattr(node, "elt"):
+ elt = node.elt
+ del node.elt
+ else:
+ elt = ast.Tuple([node.value, node.key], LOAD)
+ elt.lineno = node.key.lineno
+ elt.col_offset = node.key.col_offset
+ elt._end = node.value._end
+ del node.key
+ del node.value
+ y = ast.Yield(elt)
+ copy_location(elt, y)
+ stmt = ast.Expr(y)
+ copy_location(elt, stmt)
+ for gen in reversed(gens[1:]):
+ for if_ in gen.ifs:
+ stmt = ast.If(if_, [stmt], None)
+ copy_location(if_, stmt)
+ stmt = ast.For(gen.target, gen.iter, [stmt], None)
+ if getattr(gen, "is_async", False):
+ stmt.is_async = True
+ copy_location(node, stmt)
+ for if_ in gens[0].ifs:
+ stmt = ast.If(if_, [stmt], None)
+ copy_location(if_, stmt)
+ p0 = ".0"
+ pvar = ast.Variable(p0)
+ arg = ast.Name(pvar, LOAD)
+ copy_location(node, arg)
+ stmt = ast.For(gens[0].target, arg, [stmt], None)
+ if getattr(gens[0], "is_async", False):
+ stmt.is_async = True
+ copy_location(node, stmt)
+ pvar = ast.Variable(p0)
+ arg = ast.Name(pvar, PARAM)
+ copy_location(node, arg)
+ function = ast.Function(COMP_NAMES[type(node).__name__], [],[arg], None, None, None, [ stmt ])
+ copy_location(node, function)
+ node.function = function
+ node.iterable = gens[0].iter
+ del node.generators
+
+
+COMP_NAMES = {
+ 'GeneratorExp' : 'genexpr',
+ 'DictComp' : 'dictcomp',
+ 'ListComp' : 'listcomp',
+ 'SetComp' : 'setcomp'
+}
+
+def copy_location(src, dest):
+ '''Copy location from `src` to `dest`'''
+ dest.lineno = src.lineno
+ dest.col_offset = src.col_offset
+ dest._end = src._end
+
+def convert(logger, cpt):
+ '''Covert concrete parse tree as specified by blib2to3/Grammar.txt
+ to the AST specified by semmle/python/master.py
+ '''
+ return Convertor(logger).visit(cpt)
diff --git a/python/extractor/semmle/python/parser/dump_ast.py b/python/extractor/semmle/python/parser/dump_ast.py
new file mode 100644
index 00000000000..fbeaabb2939
--- /dev/null
+++ b/python/extractor/semmle/python/parser/dump_ast.py
@@ -0,0 +1,151 @@
+# dump_ast.py
+
+# Functions for dumping the internal Python AST in a human-readable format.
+
+import sys
+import semmle.python.parser.tokenizer
+import semmle.python.parser.tsg_parser
+from semmle.python.parser.tsg_parser import ast_fields
+from semmle.python import ast
+from semmle import logging
+from semmle.python.modules import PythonSourceModule
+
+
+
+def get_fields(cls):
+ """Gets the fields of the given class, followed by the fields of its (single-inheritance)
+ superclasses, if any.
+ Only includes fields for classes in `ast_fields`."""
+ if cls not in ast_fields:
+ return ()
+ s = cls.__bases__[0]
+ return ast_fields[cls] + get_fields(s)
+
+def missing_fields(known, node):
+ """Returns a list of fields in `node` that are not in `known`."""
+ return [field
+ for field in dir(node)
+ if field not in known
+ and not field.startswith("_")
+ and not field in ("lineno", "col_offset")
+ and not (isinstance(node, ast.Name) and field == "id")
+ ]
+
+class AstDumper(object):
+ def __init__(self, output=sys.stdout, no_locations=False):
+ self.output = output
+ self.show_locations = not no_locations
+
+ def visit(self, node, level=0, visited=None):
+ if visited is None:
+ visited = set()
+ if node in visited:
+ output.write("{} CYCLE DETECTED!\n".format(indent))
+ return
+ visited = visited.union({node})
+ output = self.output
+ cls = node.__class__
+ name = cls.__name__
+ indent = ' ' * level
+ if node is None: # Special case for `None` to avoid printing `NoneType`.
+ name = 'None'
+ if cls == str: # Special case for bare strings
+ output.write("{}{}\n".format(indent, repr(node)))
+ return
+ # In some places, we have non-AST nodes in lists, and since these don't have a location, we
+ # simply print their name instead.
+ # `ast.arguments` is special -- it has fields but no location
+ if hasattr(node, 'lineno') and not isinstance(node, ast.arguments) and self.show_locations:
+ position = (node.lineno, node.col_offset, node._end[0], node._end[1])
+ output.write("{}{}: [{}, {}] - [{}, {}]\n".format(indent, name, *position))
+ else:
+ output.write("{}{}\n".format(indent, name))
+
+
+ fields = get_fields(cls)
+ unknown = missing_fields(fields, node)
+ if unknown:
+ output.write("{}UNKNOWN FIELDS: {}\n".format(indent, unknown))
+ for field in fields:
+ value = getattr(node, field, None)
+ # By default, the `parenthesised` field on expressions has no value, so it's easier to
+ # just not print it in that case.
+ if field == "parenthesised" and value is None:
+ continue
+ # Likewise, the default value for `is_async` is `False`, so we don't need to print it.
+ if field == "is_async" and value is False:
+ continue
+ output.write("{} {}:".format(indent,field))
+ if isinstance(value, list):
+ output.write(" [")
+ if len(value) == 0:
+ output.write("]\n")
+ continue
+ output.write("\n")
+ for n in value:
+ self.visit(n, level+2, visited)
+ output.write("{} ]\n".format(indent))
+ # Some AST classes are special in that the identity of the object is the only thing
+ # that matters (and they have no location info). For this reason we simply print the name.
+ elif isinstance(value, (ast.expr_context, ast.boolop, ast.cmpop, ast.operator, ast.unaryop)):
+ output.write(' {}\n'.format(value.__class__.__name__))
+ elif isinstance(value, ast.AstBase):
+ output.write("\n")
+ self.visit(value, level+2, visited)
+ else:
+ output.write(' {}\n'.format(repr(value)))
+
+
+class StdoutLogger(logging.Logger):
+ def log(self, level, fmt, *args):
+ sys.stdout.write(fmt % args + "\n")
+
+def old_parser(inputfile, logger):
+ mod = PythonSourceModule(None, inputfile, logger)
+ logger.close()
+ return mod.old_py_ast
+
+def args_parser():
+ 'Parse command_line, returning options, arguments'
+ from optparse import OptionParser
+ usage = "usage: %prog [options] python-file"
+ parser = OptionParser(usage=usage)
+ parser.add_option("-o", "--old", help="Dump old AST.", action="store_true")
+ parser.add_option("-n", "--new", help="Dump new AST.", action="store_true")
+ parser.add_option("-l", "--no-locations", help="Don't include location info in dump", action="store_true")
+ parser.add_option("-d", "--debug", help="Print debug information.", action="store_true")
+ return parser
+
+def main():
+ parser = args_parser()
+ options, args = parser.parse_args(sys.argv[1:])
+
+ if options.debug:
+ global DEBUG
+ DEBUG = True
+
+ if len(args) != 1:
+ sys.stderr.write("Error: wrong number of arguments.\n")
+ parser.print_help()
+ sys.exit(1)
+
+ inputfile = args[0]
+
+ if options.old and options.new:
+ sys.stderr.write("Error: options --old and --new are mutually exclusive.\n")
+ sys.exit(1)
+
+ if not (options.old or options.new):
+ sys.stderr.write("Error: Must specify either --old or --new.\n")
+ sys.exit(1)
+
+ with StdoutLogger() as logger:
+
+ if options.old:
+ ast = old_parser(inputfile, logger)
+ else:
+ ast = semmle.python.parser.tsg_parser.parse(inputfile, logger)
+ AstDumper(no_locations=options.no_locations).visit(ast)
+
+if __name__ == '__main__':
+ main()
diff --git a/python/extractor/semmle/python/parser/tokenizer.py b/python/extractor/semmle/python/parser/tokenizer.py
new file mode 100644
index 00000000000..6ddf3fa4d03
--- /dev/null
+++ b/python/extractor/semmle/python/parser/tokenizer.py
@@ -0,0 +1,1146 @@
+# This file is AUTO-GENERATED. DO NOT MODIFY
+# To regenerate: run "python3 -m tokenizer_generator.gen_state_machine tokenizer_generator/state_transition.txt tokenizer_generator/tokenizer_template.py"
+
+import codecs
+import re
+import sys
+
+from blib2to3.pgen2.token import *
+
+if sys.version < '3':
+ from array import array
+ def toarray(b):
+ return array('B', b)
+else:
+ def toarray(b):
+ return b
+
+IDENTIFIER_CLASS = 1
+IDENTIFIER_CONTINUE_CLASS = 2
+ERROR_CLASS = 0
+# 3586 entries in ID index
+ID_INDEX = toarray(
+ b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x01\x11\x12\x13\x01\x14\x15\x16\x17\x18\x19\x1a\x1b\x01\x1c'
+ b'\x1d\x1e\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f !"\x1f#$\x1f\x1f\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01'
+ b'\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01%\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01'
+ b'\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01'
+ b'\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01&'
+ b"\x01\x01\x01\x01'\x01()*+,-\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
+ b'\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01.\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x01/0\x01123'
+ b'456789\x01:;<=>?@\x1fABCDEFGHIJKL\x1fMNO\x1f'
+ b'\x01\x01\x01PQR\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x01\x01\x01\x01S\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x01\x01T\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x01\x01UV\x1f\x1fWX\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01'
+ b'\x01\x01\x01\x01\x01\x01\x01Y\x01\x01Z\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x01[\\\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f]\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f^_\x1f`abc\x1f\x1fd\x1f\x1f\x1f\x1f\x1f'
+ b'efg\x1f\x1f\x1f\x1f\x1fhi\x1f\x1f\x1f\x1fj\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01'
+ b'\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01'
+ b'\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01'
+ b'\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01'
+ b'\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01'
+ b'\x01\x01\x01\x01\x01\x01k\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01lm\x01\x01\x01\x01\x01\x01\x01'
+ b'\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01n\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01'
+ b'\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01o\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x01\x01p\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f'
+ b'\x1fq'
+)
+ID_CHUNKS = (
+ toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xaa\n\x00TUUUUU\x15\x80TUUUUU\x15\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x84\x10\x00UUUUU\x15UUUUUUU\x15UU'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x05PUU\x05\x00\x00\x00U\x01\x00\x11\x00\x00\x00\x00'),
+ toarray(b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaaUQPE\x00\x90\x15QUUUUEUUUUUUUUUUUUUUUUUUUUEUU'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x85\xaaPUUUUUUUUUUUUUUUUUUUUUUUUUUUUU'),
+ toarray(b'UUUUUUUUUUUUTUUUUUUUU\x15\x04\x00UUUUUUUUUU\x01\x00\xa8\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\x8a(\x8a\x00\x00UUUUUU\x15@\x15\x00\x00\x00'),
+ toarray(b'\x00\x00\x00\x00\xaa\xaa*\x00UUUUUUUUUU\x95\xaa\xaa\xaa\xaa\xaa\xaa\xaa\nPVUUUUUUUUUUUUUUUUUUUUUUUU\xa4\xaa\x82\xaa\x96\xa2Z\xaa\xaaZA'),
+ toarray(b'\x00\x00\x00\x00YUUUUUUU\xaa\xaa\xaa\xaa\xaa\xaa*TUUUUUUUUUUUUUUUUUUUUU\xa5\xaa\xaa\x06\x00\x00\x00\xaa\xaaZUUUUUUU\x95\xaa\xaa\x05\x10\x08'),
+ toarray(b'UUUUU\xa5\x9a\xaa\xaa\xa9\xa9\n\x00\x00\x00\x00UUUUUU\xa9\x00UU\x15\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUUUQU\x05\x00\x00\x00\x00\x80\xaa\xaa\xaa\x8a\xaa\xaa\xaa\xaa\xaa\xaa\xaa'),
+ toarray(b'\xaaUUUUUUUUUUUUU\xa5\xa6\xaa\xaa\xaa\xaa\xa9\xaaUU\xa5\xa0\xaa\xaaTUUU\xa9TUAAUUUUUQU\x11P\x05\xa6\xaa\x82\x82\x1a\x00\x80\x00E\xa5\xa0\xaa\xaa\x05\x00\x00!'),
+ toarray(b'\xa8T\x15@AUUUUUQUQ\x14\x05\xa2*\x80\x82\n\x08\x00T\x11\x00\xa0\xaa\xaaZ\t\x00\x00\xa8TUEEUUUUUQUQT\x05\xa6\xaa\x8a\x8a\n\x01\x00\x00\x00\xa5\xa0\xaa\xaa\x00\x00\xa4\xaa'),
+ toarray(b'\xa8TUAAUUUUUQUQT\x05\xa6\xaa\x82\x82\n\x00\xa0\x00E\xa5\xa0\xaa\xaa\x04\x00\x00\x00`T\x15PQ\x05\x14Q@\x01\x15PUU\x05\xa0*\xa0\xa2\n\x01\x80\x00\x00\x00\xa0\xaa\xaa\x00\x00\x00\x00'),
+ toarray(b'\xaaVUQQUUUUUQUUU\x05\xa4\xaa\xa2\xa2\n\x00(\x15\x00\xa5\xa0\xaa\xaa\x00\x00\x00\x00\xa9TUQQUUUUUQUUT\x05\xa6\xaa\xa2\xa2\n\x00(\x00\x10\xa5\xa0\xaa\xaa\x14\x00\x00\x00'),
+ toarray(b'\xaaTUQQUUUUUUUUU\x95\xa6\xaa\xa2\xa2\x1a\x00\x95\x00@\xa5\xa0\xaa\xaa\x00\x00PU\xa0TUUU\x15PUUUUUEUU\x04U\x15 \x80\xaa"\xaa\xaa\x00\xa0\xaa\xaa\xa0\x00\x00\x00'),
+ toarray(b'TUUUUUUUUUUUY\xaa*\x00U\x95\xaa*\xaa\xaa\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x14Q\x15UUUUUUDUUY\xaa\xaa\x06U\x11\xaa\n\xaa\xaa\nU\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'\x01\x00\x00\x00\x00\x00\n\x00\xaa\xaa\n\x00\x00\x88\x08\xa0UUTUUUUUUUU\x01\xa8\xaa\xaa\xaa\xaa\xa2U\xa9\xaa\xaa\xa8\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\x02\x00 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUU\x95\xaa\xaa\xaa\xaaj\xaa\xaa\n\x00U\xa5Z\xa5\xa6\x96\xaaZ\xa9VUU\xa5\xaa\xaa\x9a\xaa\xaa\xaa\nUUUUUUUUUE\x00\x04UUUUUUUUUU\x15U'),
+ toarray(b'UUUUUUUUUUUUUUUUUUQ\x05U\x15Q\x05UUUUUUUUUUQ\x05UUUUUUUUQ\x05U\x15Q\x05UUU\x15UUUUUUUUUU'),
+ toarray(b'UUUUQ\x05UUUUUUUUUUUUUUUU\x15\xa8\x00\x00\xa8\xaa\n\x00\x00\x00UUUU\x00\x00\x00\x00UUUUUUUUUUUUUUUUUUUUU\x05U\x05'),
+ toarray(b'TUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUAUUUUTUUUUU\x15\x00UUUUUUUUUUUUUUUUUU\x15PUU\x01\x00'),
+ toarray(b'UUUQ\xa5\x02\x00\x00UUUU\xa5\x02\x00\x00UUUU\xa5\x00\x00\x00UUUQ\xa1\x00\x00\x00UUUUUUUUUUUUU\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa@\x00\t\xaa\xaa\n\x00\x00\x00\x00\x00'),
+ toarray(b'\x00\x00\x80\n\xaa\xaa\n\x00UUUUUUUUUUUUUUUUUUUUUU\x01\x00UUUUUUUUUU\x19\x00UUUUUUUUUUUUUUUUU\x05\x00\x00'),
+ toarray(b'UUUUUUU\x15\xaa\xaa\xaa\x00\xaa\xaa\xaa\x00\x00\xa0\xaa\xaaUUUUUUU\x05U\x01\x00\x00UUUUUUUUUUU\x00UUUUUU\x05\x00\xaa\xaa*\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUU\x95\xaa\x00UUUUUUUUUUUUU\xa9\xaa*\xaa\xaa\xaa\xaa\xaa\xaa\xaa\x82\xaa\xaa\n\x00\xaa\xaa\n\x00\x00@\x00\x00\xaa\xaa\xaa\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'\xaaVUUUUUUUUUUU\xaa\xaa\xaa\xaaVU\x00\xaa\xaa\n\x00\x00\x00\x80\xaa\xaa\x00\x00\x00jUUUUUUU\xa9\xaa\xaaZ\xaa\xaaZUUUUUUUUUU\xa5\xaa\xaa\xaa\x00\x00\x00'),
+ toarray(b'UUUUUUUUU\xaa\xaa\xaa\xaa\xaa\x00\x00\xaa\xaa\nT\xaa\xaaZUUUUUUUU\x05UU\x01\x00UUUUUUUUUU\x15T\x00\x00\x00\x00*\xaa\xaa\xaa\xaa\xaaVYU\x96\x1a\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\x8a\xaa'),
+ toarray(b'UUUUU\x05U\x05UUUUUUUUU\x05U\x05UUDDUUUUUUU\x05UUUUUUUUUUUUUQU\x11PQU\x01UPU\x00UUU\x01PQU\x01'),
+ toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x02\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x04\x00\x00@\x00\x00\x00\x00UUU\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xaa\xaa\x02\x08\xa8\xaa\xaa\x02\x00\x00\x00'),
+ toarray(b'\x10@PUU\x04U\x05\x00\x11QUUU\x05U\x00T\x05\x10\x00\x00\x00\x00UUUUUUUUUU\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUU\x15UUUUUUUUUUU\x15UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x01@\x95Z\x00\x00\x00'),
+ toarray(b'UUUUUUUUUE\x00\x04UUUUUUUUUUUUUU\x00@\x00\x00\x00\x80UUUUU\x15\x00\x00U\x15U\x15U\x15U\x15U\x15U\x15U\x15U\x15\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'),
+ toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'\x00T\x00\x00\x00\x00\x00\x00TU\xa5\xaaT\x05U\x01TUUUUUUUUUUUUUUUUUUUU\x15hUTUUUUUUUUUUUUUUUUUUUUU\x15U'),
+ toarray(b'\x00TUUUUUUUUUUTUUUUUUUUUUUUUUUUUUUUUU\x15\x00\x00\x00\x00UUUUUU\x15\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUU'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUUUUUU\x05'),
+ toarray(b'UUU\x01UUUU\xaa\xaaZ\x00\x00\x00\x00\x00UUUUUUUUUUU\x95\x00\xaa\xaaJUUUUUUU\xa5UUUUUUUUUUUUUUUUUUUU\n\x00\x00\x00'),
+ toarray(b'\x00\x00\x00\x00\x00@UUPUUUUUUUUUUUUUUUUUUUUUUUUUAUUUUUUUUUUUUUP\x15\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00@UU'),
+ toarray(b'ee\x95UUUUU\x95\xaa\x00\x00\x00\x00\x00\x00UUUUUUUUUUUUU\x00\x00\x00ZUUUUUUUUUUUU\xaa\xaa\xaa\xaa\n\x00\x00\xaa\xaa\n\x00\xaa\xaa\xaa\xaaZU@\x94'),
+ toarray(b'\xaa\xaaZUUUUUU\xa5\xaa\nUUUUU\x95\xaa\xaa\xaa\x00\x00\x00UUUUUUU\x01\xaaUUUUUUUUUUU\x95\xaa\xaa\xaa\x02\x00\x00@\xaa\xaa\n\x00UYUU\xaa\xaaZ\x15'),
+ toarray(b'UUUUUUUUUU\xa9\xaa\xaa*\x00\x00\x95UU\n\xaa\xaa\n\x00UUUUU\x15\x90ZUUUUUUUUUUUU\xa6\x96V\xa5\x19\x00\x00\x00\x00\x00@\x05UU\x95\xaaP)\x00\x00'),
+ toarray(b'T\x15T\x15T\x15\x00\x00U\x15U\x15UUUUUUUUUU\x15UUU\x00\x00UUUUUUUUUUUUUUUUUUUUUUUUUUUU\x95\xaa*\n\xaa\xaa\n\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x00\x00\x00UUUUU\x15@UUUUUUUUUUUU\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUU\x05UUUUUUUUUUUUUUUUUUUUUUUUUU\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'U\x15\x00\x00@U\x00dUUQUU\x15U\x11EQUUUUUUUUUUUUUUUUUUUUUUUUUU\x05\x00\x00\x00\x00\x00\x00\x00@UUUUUUUUUUU'),
+ toarray(b'UUUUUUUUUUUUUUU\x05\x00\x00\x00\x00UUUUUUUUUUUUUUUUPUUUUUUUUUUUUU\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUU\x00'),
+ toarray(b'\xaa\xaa\xaa\xaa\x00\x00\x00\x00\xaa\xaa\xaa\xaa\x80\x02\x00\x00\x00\x00\x00\xa8\x00\x00\x00\x00\x00\x00\x00\x00UQUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x01'),
+ toarray(b'\x00\x00\x00\x00\xaa\xaa\n\x00TUUUUU\x15\x80TUUUUU\x15\x00\x00PUUUUUUUUUUUUUUUUUUUUU\x15PUPUPUP\x01\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUTUUUUU\x15UUUU\x15EUUU\x05UUU\x05\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x15\x00'),
+ toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUUUUUUUU\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08'),
+ toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUU\x01UUUUUUUUUUUU\x01\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUU\x00\x00\x00TUUUUUU\x15\x00UUUUUUUUU\xa5*\x00UUUUUUU\x05UUUUUUUUU\x00UUT\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x05\xaa\xaa\n\x00UUUUUUUUU\x00UUUUUUUUU\x00'),
+ toarray(b'UUUUUUUUUU\x00\x00UUUUUUUUUUUUU\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUU\x15\x00\x00UUUUU\x05\x00\x00UU\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'U\x05QUUUUUUUUUUE\x01AUUUUU\x05\x00\x00UUUUU\x15\x00\x00UUUUUUU\x15\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUU\x15\x05\x00\x00'),
+ toarray(b'UUUUU\x05\x00\x00UUUUUU\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUUUUUUUUU\x00P\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'\xa9(\x00\xaaUTTUUUUUU\x05*\x80\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUU\x01UUUUUUU\x01\x00\x00\x00\x00\x00\x00\x00\x00UUTUUUUUU)\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUU\x05\x00\x00UUUUU\x05\x00\x00UUUU\x15\x00\x00\x00UUUU\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUU\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUUUUUUU\x15\x00\x00\x00UUUUUUUUUUUU\x15\x00\x00\x00'),
+ toarray(b'UUUUUUUUU\xaa\x00\x00\xaa\xaa\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUU\x01\x00@\x00\x00UUUUU\xa5\xaa\xaa\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUUU\x15\x00\x00'),
+ toarray(b'jUUUUUUUUUUUUU\xaa\xaa\xaa*\x00\x00\x00\x00\x00\x00\x00\xa0\xaa\xaa\x00\x00\x00\x80jUUUUUUUUUUU\xaa\xaa*\x00\x00\x00\x00\x00UUUUUU\x01\x00\xaa\xaa\n\x00'),
+ toarray(b'jUUUUUUUU\x95\xaa\xaa\xaa\xa2\xaa\xaa\x00)\x00\x00UUUUUUUU\x95\x10\x00\x00jUUUUUUUUUUU\x95\xaa\xaa\xaaV\x01\xa8\x02\xaa\xaa\x1a\x01\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUEUUUUUU\xaa\xaa\xaa\x00 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00U\x15QEUUUEUU\x01\x00UUUUUUUUUUU\x95\xaa\xaa*\x00\xaa\xaa\n\x00'),
+ toarray(b'\xaaTUAAUUUUUQUQT\x85\xa6\xaa\x82\x82\n\x01\x80\x00T\xa5\xa0\xaa\x02\xaa\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUU\xa9\xaa\xaa\xaaj\x15\x00\xaa\xaa\n`\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUUUUUUU\xaa\xaa\xaa\xaa\xaaE\x00\x00\xaa\xaa\n\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUUUUUU\x95\xaa\n\xaa\xaa\x02\x00\x00\x00\x00\x00U\n\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUU\xaa\xaa\xaa\xaa\x02\x01\x00\x00\xaa\xaa\n\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUUUUU\x95\xaa\xaa\xaa\x01\x00\xaa\xaa\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUU\x15\xa8\xaa\xaa\xaa\x00\xaa\xaa\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUU\xaa\xaa\xaa*\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUUUUUUUUUUU\xaa\xaa\n\x00\x00\x00\x00@'),
+ toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUPUUUUUUUUU\xa9\xaa\xa0\xaaF\x02\x00\x00\x00\x00\x00\x00'),
+ toarray(b'\xa9\xaajUUUUUUUUU\x95\xaa\x9a*\x00\x80\x00\x00\xa9\xaa\xaaUUUUUUUUUUU\xa5\xaa\xaa\xaa\n\x04\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUUUUUUUUU\x01\x00'),
+ toarray(b'UUQUUUUUUUU\x95\xaa*\xaa\xaa\x01\x00\x00\x00\xaa\xaa\n\x00\x00\x00\x00\x00PUUUUUUU\xa0\xaa\xaa\xaa\xaa\xaa\xa8\xaa\xaa*\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'U\x15EUUUUUUUUU\xa9* \x8a\xaa\x9a\x00\x00\xaa\xaa\n\x00UEQUUUUUUU\xa5*\x8a\xaa\x01\x00\xaa\xaa\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUU\x95*\x00\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUU\x15\x00\x00\x00\x00UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU'),
+ toarray(b'UUUUUUUUUUUUUUUUU\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUU\x15\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUU\x15\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUUU\x01\x00UUUUUUU\x15\xaa\xaa\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUU\x05\xaa\x02\x00\x00'),
+ toarray(b'UUUUUUUUUUUU\xaa*\x00\x00U\x00\x00\x00\xaa\xaa\n\x00@UUUUU\x00TUUUU\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUUUUUUUUUUU\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUU\x15\x80\xa9\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\x00\x80jUUU\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00E\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x00\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x15\x00\x00\x00'),
+ toarray(b'UUUUUUU\x15\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x15\x00\x00\x00\x00U\x00\x00UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUU\x15\x00UUU\x01UU\x01\x00UU\x05(\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa8\n\xa8*\x00\x80\xaa*\xa8\xaa\x00\x00\x00\x00\x00\x00\x00\xa0\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa0\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUQUUUUUUUUUUUUUUUUUQ\x10\x14TQUUETUTUUUUUUUUUUUUUU'),
+ toarray(b'UE\x15TUQUQUUUUUUE\x15U\x11PUQUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x05UUUUUUQUUUUU\x15UUUUUUU\x15U'),
+ toarray(b'UUUUUQUUUUUUUQUUUUU\x15UUUUUUU\x15UUUUUUQUUUUUUUQUUUUU\x15UU\xa0\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'),
+ toarray(b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa*\x80\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\x02\x00\x08\x00\x00\x00\x02\x00\x00\x00\x00\x80\xaa\xa8\xaa\xaa\xaa\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'\xaa*\xaa\xaa\xaa\xaa\x82\xaa\x8a\xa2*\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUU\x01\xaajU\x05\xaa\xaa\n\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00UUUUUUUUUUU\xaa\xaa\xaa\n\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x01\x00\x00\xaa*\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUU\xaaj\x00\xaa\xaa\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UTUUUUUU\x14ATU\x15UD\x00\x10@DT\x14ADD\x14A\x15U\x15UT\x11UUEUUUU\x00TTEUUUU\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x15\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUUUUUUUU\x01\x00\x00UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU'),
+ toarray(b'UUUUUUU\x05UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x05\x00\x00\x00UUUUUUUUUUUUUUUUUUUU'),
+ toarray(b'UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU\x01\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'UUUUUUU\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'),
+ toarray(b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\x00\x00\x00\x00'),
+)
+#0 = ERROR_CLASS(0)
+#1 = IDENTIFIER_CLASS(1)
+#2 = CharacterClass 2 []
+#3 = CharacterClass 3 ['\t', ' ']
+#4 = CharacterClass 4 ['\n', '\r']
+#5 = CharacterClass 5 ['\x0c']
+#6 = CharacterClass 6 ['!']
+#7 = CharacterClass 7 ['"']
+#8 = CharacterClass 8 ['#']
+#9 = CharacterClass 9 ['$']
+#10 = CharacterClass 10 ['%', '&', '^', '|']
+#11 = CharacterClass 11 ["'"]
+#12 = CharacterClass 12 ['(']
+#13 = CharacterClass 13 [')']
+#14 = CharacterClass 14 ['*']
+#15 = CharacterClass 15 ['+']
+#16 = CharacterClass 16 [',']
+#17 = CharacterClass 17 ['-']
+#18 = CharacterClass 18 ['.']
+#19 = CharacterClass 19 ['/']
+#20 = CharacterClass 20 ['0']
+#21 = CharacterClass 21 ['1']
+#22 = CharacterClass 22 ['2', '3', '4', '5', '6', '7']
+#23 = CharacterClass 23 ['8', '9']
+#24 = CharacterClass 24 [':']
+#25 = CharacterClass 25 [';']
+#26 = CharacterClass 26 ['<']
+#27 = CharacterClass 27 ['=']
+#28 = CharacterClass 28 ['>']
+#29 = CharacterClass 29 ['@']
+#30 = CharacterClass 30 ['A', 'C', 'D', 'c']
+#31 = CharacterClass 31 ['B', 'b']
+#32 = CharacterClass 32 ['E']
+#33 = CharacterClass 33 ['F', 'f']
+#34 = CharacterClass 34 ['J', 'j']
+#35 = CharacterClass 35 ['L']
+#36 = CharacterClass 36 ['N']
+#37 = CharacterClass 37 ['O', 'o']
+#38 = CharacterClass 38 ['R']
+#39 = CharacterClass 39 ['U', 'u']
+#40 = CharacterClass 40 ['X', 'x']
+#41 = CharacterClass 41 ['[']
+#42 = CharacterClass 42 ['\\']
+#43 = CharacterClass 43 [']']
+#44 = CharacterClass 44 ['_']
+#45 = CharacterClass 45 ['`']
+#46 = CharacterClass 46 ['a', 'd']
+#47 = CharacterClass 47 ['e']
+#48 = CharacterClass 48 ['l']
+#49 = CharacterClass 49 ['r']
+#50 = CharacterClass 50 ['s']
+#51 = CharacterClass 51 ['{']
+#52 = CharacterClass 52 ['}']
+#53 = CharacterClass 53 ['~']
+CLASS_TABLE = toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x05\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x06\x07\x08\t\n\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x16\x16\x16\x16\x16\x17\x17\x18\x19\x1a\x1b\x1c\x00\x1d\x1e\x1f\x1e\x1e !\x01\x01\x01"\x01#\x01$%\x01\x01&\x01\x01\'\x01\x01(\x01\x01)*+\n,-.\x1f\x1e./!\x01\x01\x01"\x010\x01\x01%\x01\x0112\x01\'\x01\x01(\x01\x013\n45\x00')
+
+B00 = toarray(b'\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01')
+B01 = toarray(b'iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii')
+B02 = toarray(b'cccabaccdccccccccccccccccccccccccccccccccccccccccccccc')
+B03 = toarray(b'jUje`j*\x07[W=\x06 #9=2;6:D???/3(\')4U\x02U\x10UUUU\x02\x02U!f$U&UUU\x02U"%>')
+B04 = toarray(b'\x08\x08\x08\x08\x08\x08\x08\x05\x08\x08\x08\x04\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x03\x08\x11\x08\x08\x08\x08\x03\x03\x08\x08\x08\x08\x08\x08\x08\x08\x08\x03\x08\x08\x08\x08')
+B05 = toarray(b'\x08\x08\x08\x08\x08\x08\x08\x05\x08\x08\x08\x04\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08')
+B06 = toarray(b'\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\t\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b')
+B07 = toarray(b'\x0e\x0e\x0e\x0e\x0e\x0e\x0e\n\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e\x0e')
+B08 = toarray(b'YVVYYYYYYYYYYYYYYYYYVVVVYYYYYYVVVVVVVVVVVYYYVYVVVVVYYY')
+B09 = toarray(b'\r\r\r\r\r\r\r\r\r\r\r\x0c\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r')
+B10 = toarray(b'\r\r\r\r\r\r\r\x0f\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r')
+B11 = toarray(b'\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a')
+B12 = toarray(b'\x08\x08\x08\x08\x08\x08\x08\x13\x08\x08\x08\x12\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x11\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x11\x08\x08\x08\x08')
+B13 = toarray(b'\x08\x08\x08\x08\x08\x08\x08\x13\x08\x08\x08\x12\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08')
+B14 = toarray(b'\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x14\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16\x16')
+B15 = toarray(b'\x18\x18\x18\x18\x18\x18\x18\x15\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18')
+B16 = toarray(b'\r\r\r\r\r\r\r\r\r\r\r\x17\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r')
+B17 = toarray(b'\r\r\r\r\r\r\r\x19\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r')
+B18 = toarray(b'\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1c\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e')
+B19 = toarray(b'\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1d\x1a\x1a')
+B20 = toarray(b'\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1a\x1d')
+B21 = toarray(b'\x1a\x1a\x1a\x1a\x1f\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a')
+B22 = toarray(b'---------------------------,--------------------------')
+B23 = toarray(b'--------------------------+,,-------------------------')
+B24 = toarray(b'---------------------------,+-------------------------')
+B25 = toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00,\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00.\x00\x00..\x00\x00\x00')
+B26 = toarray(b'111111111111111111111111111011111111111111111111111111')
+B27 = toarray(b'555555555555555555555555555,55555555555555555555555555')
+B28 = toarray(b'888888888888888888887777888888888888888888888888888888')
+B29 = toarray(b'IIIIIIIIIIIIIIIIIIII7777IIIIIIIIQIAIIIIIIIIIOIIPIIIIII')
+B30 = toarray(b'-------------------+-------,--------------------------')
+B31 = toarray(b'--------------+------------,--------------------------')
+B32 = toarray(b'---------------------------,<-------------------------')
+B33 = toarray(b'IIIIIIIIIIIIIIIIIINI@@@@IIIIIIIIQIAAIIIIIIIIBIIPAIIIII')
+B34 = toarray(b'IIIIIIIIIIIIIIIIIINIEEEEIIIIIIIJQIAAIGIILIIIFIIPAIIIII')
+B35 = toarray(b'IIIIIIIIIIIIIIIIIINIEEEEIIIIIIIIQIAAIIIIIIIIFIIPAIIIII')
+B36 = toarray(b'IIIIIIIIIIIIIIIIIIIIJJIIIIIIIIIIIIIAIIIIIIIIKIIIAIIIII')
+B37 = toarray(b'IIIIIIIIIIIIIIIIIIIIGGGIIIIIIIIIIIIAIIIIIIIIHIIIAIIIII')
+B38 = toarray(b'IIIIIIIIIIIIIIIIIIIILLLLIIIIIILLLLIAIIIIIIIIMILLAIIIII')
+B39 = toarray(b'CCCCCCCCCCCCCCCCCCCC@@@@CCCCCCCCCCCCCCCCCCCCCCCCCCCCCC')
+B40 = toarray(b'CCCCCCCCCCCCCCCCCCCCEEEECCCCCCCCCCCCCCCCCCCCCCCCCCCCCC')
+B41 = toarray(b'CCCCCCCCCCCCCCCCCCCCGGGCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC')
+B42 = toarray(b'CCCCCCCCCCCCCCCCCCCCJJCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC')
+B43 = toarray(b'CCCCCCCCCCCCCCCCCCCCLLLLCCCCCCLLLLCCCCCCCCCCCCLLCCCCCC')
+B44 = toarray(b'IIIIIIIIIIIIIIIIIIII7777IIIIIIIIQIAIIIIIIIIIIIIPIIIIII')
+B45 = toarray(b'CCCCCCCCCCCCCCCCCCCC7777CCCCCCCCCCCCCCCCCCCCCCCCCCCCCC')
+B46 = toarray(b'CCCCCCCCCCCCCCCQCQCCSSSSCCCCCCCCCCCCCCCCCCCCCCCCRCCCCC')
+B47 = toarray(b'CCCCCCCCCCCCCCCQCQCCSSSSCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC')
+B48 = toarray(b'IIIIIIIIIIIIIIIIIIIISSSSIIIIIIIIIIAIIIIIIIIITIIIIIIIII')
+B49 = toarray(b'CCCCCCCCCCCCCCCCCCCCSSSSCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC')
+B50 = toarray(b'ZXXZZZZZZZZZZZZZZZZZXXXXZZZZZZXXXXXXXXXXXZZZXZXXXXXZZZ')
+B51 = toarray(b']]]]\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]')
+B52 = toarray(b'____^_________________________________________________')
+B53 = toarray(b'CCCCgCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC')
+B54 = toarray(b'hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh')
+B55 = toarray(b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+B56 = toarray(b'jUjekj*\x07[W=\x06 l9=2;6:D???/3(\')4U\x02U\x10UUUU\x02\x02U!fmU&UUU\x02U"n>')
+B57 = toarray(b'\x1a\x1a\x1a\x1ap\x1a\x1a\x1a\x1a\x1a\x1ao\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a')
+B58 = toarray(b'\x1a\x1a\x1a\x1ap\x1a\x1ao\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a')
+B59 = toarray(b'\x1a\x1a\x1a\x1a\x1f\x1a\x1a\x1a\x1a\x1a\x1aq\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a')
+B60 = toarray(b'ssss\x1fssssssrssssssssssssssssssssssssssssssssssssssssss')
+B61 = toarray(b'ssss\x1fssssssossssssssssssssssssssssssssssssssssssssssss')
+B62 = toarray(b'\x1a\x1a\x1a\x1a\x1f\x1a\x1aq\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a')
+B63 = toarray(b'ssss\x1fssrssssssssssssssssssssssssssssssssssssssssssssss')
+B64 = toarray(b'ssss\x1fssossssssssssssssssssssssssssssssssssssssssssssss')
+B65 = toarray(b'\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1at\x1a\x1a')
+B66 = toarray(b'\x1a\x1a\x1a\x1a\x1f\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1at\x1a\x1a')
+B67 = toarray(b'uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu\x1auu')
+B68 = toarray(b'\x1a\x1a\x1a\x1ap\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1at\x1a\x1a')
+B69 = toarray(b'\x1a\x1a\x1a\x1ap\x1a\x1a\x1a\x1a\x1a\x1ao\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1at\x1a\x1a')
+B70 = toarray(b'vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\x1avv')
+B71 = toarray(b'\x1a\x1a\x1a\x1ap\x1a\x1a\x1a\x1a\x1a\x1aw\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1at\x1a\x1a')
+B72 = toarray(b'\x1a\x1a\x1a\x1ap\x1a\x1ao\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1at\x1a\x1a')
+B73 = toarray(b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\x1axx')
+B74 = toarray(b'\x1a\x1a\x1a\x1ap\x1a\x1aw\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1at\x1a\x1a')
+B75 = toarray(b'\x1a\x1a\x1a\x1a\x1f\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1at\x1a\x1a')
+B76 = toarray(b'ssss\x1fsssssssssssssssssssssssssssssssssssssssssssssssss')
+B77 = toarray(b'\x1a\x1a\x1a\x1a\x1f\x1a\x1a\x1a\x1a\x1a\x1aq\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1at\x1a\x1a')
+B78 = toarray(b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\x1ayy')
+B79 = toarray(b'ssss\x1fsssssswssssssssssssssssssssssssssssssssssssssssss')
+B80 = toarray(b'\x1a\x1a\x1a\x1a\x1f\x1a\x1aq\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1at\x1a\x1a')
+B81 = toarray(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz\x1azz')
+B82 = toarray(b'ssss\x1fsswssssssssssssssssssssssssssssssssssssssssssssss')
+B83 = toarray(b'jUjekj*\x07[W=\x06 l9=2;6:D???|3(\')4U\x02U\x10UUUU\x02\x02U!fmU&UUU\x02U"{>')
+B84 = toarray(b'\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a{\x1a')
+B85 = toarray(b'jUje`j*\x07[W=\x06 #9=2;6:D???/3(\')4U\x02U\x10UUUU\x02\x02U!f$U&UUU\x02U"\x7f>')
+B86 = toarray(b'\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1b\x1a\x1a\x1a\x1a\x1a\x1a\x1a\x1a}~\x1a')
+B87 = toarray(b'\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80')
+
+DEFAULT = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15, B16, B17, B18, B19, B20, B21, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B55, B55, B55, )
+PAREN = (B00, B01, B02, B56, B04, B05, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15, B16, B17, B18, B19, B20, B21, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B55, B55, B55, )
+STRING_S = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B57, B12, B13, B14, B15, B16, B17, B18, B19, B20, B21, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B55, B55, B55, )
+STRING_D = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B58, B12, B13, B14, B15, B16, B17, B18, B19, B20, B21, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B55, B55, B55, )
+STRING_SSS = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B59, B12, B13, B14, B15, B16, B17, B18, B19, B20, B21, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B60, B61, B55, )
+STRING_DDD = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B62, B12, B13, B14, B15, B16, B17, B18, B19, B20, B21, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B63, B64, B55, )
+FSTRING_SDSSSDDD = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B65, B12, B13, B14, B15, B16, B17, B18, B19, B20, B66, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B55, B55, B67, )
+FSTRING_SD = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B68, B12, B13, B14, B15, B16, B17, B18, B19, B20, B66, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B55, B55, B67, )
+FSTRING_START_S = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B69, B12, B13, B14, B15, B16, B17, B18, B19, B20, B66, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B55, B55, B70, )
+FSTRING_S = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B71, B12, B13, B14, B15, B16, B17, B18, B19, B20, B66, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B55, B55, B67, )
+FSTRING_START_D = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B72, B12, B13, B14, B15, B16, B17, B18, B19, B20, B66, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B55, B55, B73, )
+FSTRING_D = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B74, B12, B13, B14, B15, B16, B17, B18, B19, B20, B66, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B55, B55, B67, )
+FSTRING_SSSDDD = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B75, B12, B13, B14, B15, B16, B17, B18, B19, B20, B66, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B76, B76, B67, )
+FSTRING_START_SSS = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B77, B12, B13, B14, B15, B16, B17, B18, B19, B20, B66, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B60, B61, B78, )
+FSTRING_SSS = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B77, B12, B13, B14, B15, B16, B17, B18, B19, B20, B66, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B60, B79, B67, )
+FSTRING_START_DDD = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B80, B12, B13, B14, B15, B16, B17, B18, B19, B20, B66, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B63, B64, B81, )
+FSTRING_DDD = (B00, B01, B02, B03, B04, B05, B06, B07, B08, B09, B10, B80, B12, B13, B14, B15, B16, B17, B18, B19, B20, B66, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B63, B82, B67, )
+FSTRING_EXPR = (B00, B01, B02, B83, B04, B05, B06, B07, B08, B09, B10, B84, B12, B13, B14, B15, B16, B17, B18, B19, B20, B21, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B55, B55, B55, )
+FORMAT_SPECIFIER = (B00, B01, B02, B85, B04, B05, B06, B07, B08, B09, B10, B86, B12, B13, B14, B15, B16, B17, B18, B19, B20, B21, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B55, B55, B55, )
+PENDING_DEDENT = (B00, B01, B02, B87, B04, B05, B06, B07, B08, B09, B10, B11, B12, B13, B14, B15, B16, B17, B18, B19, B20, B21, B22, B23, B24, B25, B22, B26, B27, B28, B29, B30, B31, B32, B33, B34, B35, B36, B37, B38, B39, B40, B41, B42, B43, B44, B45, B46, B47, B48, B49, B50, B51, B52, B53, B54, B55, B55, B55, )
+
+TRANSITION_STATE_NAMES = {
+ id(DEFAULT): 'default',
+ id(PAREN): 'paren',
+ id(STRING_S): 'string_s',
+ id(STRING_D): 'string_d',
+ id(STRING_SSS): 'string_sss',
+ id(STRING_DDD): 'string_ddd',
+ id(FSTRING_SDSSSDDD): 'fstring_sdsssddd',
+ id(FSTRING_SD): 'fstring_sd',
+ id(FSTRING_START_S): 'fstring_start_s',
+ id(FSTRING_S): 'fstring_s',
+ id(FSTRING_START_D): 'fstring_start_d',
+ id(FSTRING_D): 'fstring_d',
+ id(FSTRING_SSSDDD): 'fstring_sssddd',
+ id(FSTRING_START_SSS): 'fstring_start_sss',
+ id(FSTRING_SSS): 'fstring_sss',
+ id(FSTRING_START_DDD): 'fstring_start_ddd',
+ id(FSTRING_DDD): 'fstring_ddd',
+ id(FSTRING_EXPR): 'fstring_expr',
+ id(FORMAT_SPECIFIER): 'format_specifier',
+ id(PENDING_DEDENT): 'pending_dedent',
+}
+START_SUPER_STATE = DEFAULT
+'''
+Lookup table based tokenizer with state popping and pushing capabilities.
+The ability to push and pop state is required for handling parenthesised expressions,
+indentation, and f-strings. We also use it for handling the different quotation mark types,
+but it is not essential for that, merely convenient.
+
+'''
+
+
+
+class Tokenizer(object):
+
+ def __init__(self, text):
+ self.text = text
+ self.index = 0
+ self.line_start_index = 0
+ self.token_start_index = 0
+ self.token_start = 1, 0
+ self.line = 1
+ self.super_state = START_SUPER_STATE
+ self.state_stack = []
+ self.indents = [0]
+
+ def action_0(self):
+ self.index -= 1
+ self.index += 1
+ return None
+
+ def action_1(self):
+ self.token_start_index = self.index
+ self.token_start = self.line, self.index-self.line_start_index
+ self.index += 1
+ return None
+
+ def action_2(self):
+ self.index -= 1
+ self.state_stack.append(self.super_state)
+ self.super_state = STRING_S
+ self.index += 1
+ return None
+
+ def action_3(self):
+ self.state_stack.append(self.super_state)
+ self.super_state = STRING_SSS
+ self.index += 1
+ return None
+
+ def action_4(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [STRING, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_5(self):
+ self.index -= 1
+ self.state_stack.append(self.super_state)
+ self.super_state = STRING_D
+ self.index += 1
+ return None
+
+ def action_6(self):
+ self.state_stack.append(self.super_state)
+ self.super_state = STRING_DDD
+ self.index += 1
+ return None
+
+ def action_7(self):
+ self.index -= 1
+ self.state_stack.append(self.super_state)
+ self.super_state = FSTRING_START_S
+ self.index += 1
+ return None
+
+ def action_8(self):
+ self.state_stack.append(self.super_state)
+ self.super_state = FSTRING_START_SSS
+ self.index += 1
+ return None
+
+ def action_9(self):
+ self.index -= 1
+ self.state_stack.append(self.super_state)
+ self.super_state = FSTRING_START_D
+ self.index += 1
+ return None
+
+ def action_10(self):
+ self.state_stack.append(self.super_state)
+ self.super_state = FSTRING_START_DDD
+ self.index += 1
+ return None
+
+ def action_11(self):
+ self.line_start_index = self.index+1
+ self.line += 1
+ self.index += 1
+ return None
+
+ def action_12(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [LPAR, u"(", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.state_stack.append(self.super_state)
+ self.super_state = PAREN
+ self.index += 1
+ return result
+
+ def action_13(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [LSQB, u"[", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.state_stack.append(self.super_state)
+ self.super_state = PAREN
+ self.index += 1
+ return result
+
+ def action_14(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [LBRACE, u"{", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.state_stack.append(self.super_state)
+ self.super_state = PAREN
+ self.index += 1
+ return result
+
+ def action_15(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [RPAR, u")", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_16(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [RSQB, u"]", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_17(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [RBRACE, u"}", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_18(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [BACKQUOTE, u'`', (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_19(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [OP, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_20(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [OP, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_21(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [CONVERSION, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_22(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [COLONEQUAL, u":=", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_23(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [COLON, u":", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_24(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [COMMA, u",", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_25(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [SEMI, u";", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_26(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [AT, u"@", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_27(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [DOT, u".", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_28(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [RARROW, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_29(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [OP, u'~', (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_30(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [NUMBER, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_31(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [ERRORTOKEN, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_32(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [NUMBER, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_33(self):
+ self.index -= 1
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [NUMBER, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_34(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [NAME, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_35(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [DOLLARNAME, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_36(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [COMMENT, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_37(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [NEWLINE, u"\n", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.line_start_index = self.index+1
+ self.line += 1
+ self.index += 1
+ return result
+
+ def action_38(self):
+ return self.emit_indent()
+
+ def action_39(self):
+ self.index -= 1
+ self.token_start_index = self.index
+ self.token_start = self.line, self.index-self.line_start_index
+ self.index += 1
+ return None
+
+ def action_40(self):
+ self.token_start_index = self.index
+ self.token_start = self.line, self.index-self.line_start_index
+ end = self.line, self.index-self.line_start_index+1
+ result = [ERRORTOKEN, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_41(self):
+ self.token_start_index = self.index
+ self.token_start = self.line, self.index-self.line_start_index
+ self.line_start_index = self.index+1
+ self.line += 1
+ self.index += 1
+ return None
+
+ def action_42(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [RPAR, u")", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.super_state = self.state_stack.pop()
+ self.index += 1
+ return result
+
+ def action_43(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [RSQB, u"]", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.super_state = self.state_stack.pop()
+ self.index += 1
+ return result
+
+ def action_44(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [RBRACE, u"}", (self.line, self.index-self.line_start_index), end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.super_state = self.state_stack.pop()
+ self.index += 1
+ return result
+
+ def action_45(self):
+ self.super_state = self.state_stack.pop()
+ end = self.line, self.index-self.line_start_index+1
+ result = [STRING, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_46(self):
+ self.super_state = self.state_stack.pop()
+ end = self.line, self.index-self.line_start_index+1
+ result = [ERRORTOKEN, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.line_start_index = self.index+1
+ self.line += 1
+ self.index += 1
+ return result
+
+ def action_47(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [FSTRING_MID, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.state_stack.append(self.super_state)
+ self.super_state = FSTRING_EXPR
+ self.index += 1
+ return result
+
+ def action_48(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [FSTRING_START, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.super_state = self.state_stack.pop()
+ self.state_stack.append(self.super_state)
+ self.super_state = FSTRING_S
+ self.state_stack.append(self.super_state)
+ self.super_state = FSTRING_EXPR
+ self.index += 1
+ return result
+
+ def action_49(self):
+ self.super_state = self.state_stack.pop()
+ end = self.line, self.index-self.line_start_index+1
+ result = [FSTRING_END, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_50(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [FSTRING_START, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.super_state = self.state_stack.pop()
+ self.state_stack.append(self.super_state)
+ self.super_state = FSTRING_D
+ self.state_stack.append(self.super_state)
+ self.super_state = FSTRING_EXPR
+ self.index += 1
+ return result
+
+ def action_51(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [FSTRING_START, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.super_state = self.state_stack.pop()
+ self.state_stack.append(self.super_state)
+ self.super_state = FSTRING_SSS
+ self.state_stack.append(self.super_state)
+ self.super_state = FSTRING_EXPR
+ self.index += 1
+ return result
+
+ def action_52(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [FSTRING_START, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.super_state = self.state_stack.pop()
+ self.state_stack.append(self.super_state)
+ self.super_state = FSTRING_DDD
+ self.state_stack.append(self.super_state)
+ self.super_state = FSTRING_EXPR
+ self.index += 1
+ return result
+
+ def action_53(self):
+ self.super_state = self.state_stack.pop()
+ self.token_start_index = self.index
+ self.token_start = self.line, self.index-self.line_start_index
+ self.index += 1
+ return None
+
+ def action_54(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [COLON, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.state_stack.append(self.super_state)
+ self.super_state = FORMAT_SPECIFIER
+ self.index += 1
+ return result
+
+ def action_55(self):
+ end = self.line, self.index-self.line_start_index+1
+ result = [FSTRING_SPEC, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.index += 1
+ return result
+
+ def action_56(self):
+ self.index -= 1
+ end = self.line, self.index-self.line_start_index+1
+ result = [FSTRING_SPEC, self.text[self.token_start_index:self.index+1], self.token_start, end]
+ self.token_start = end
+ self.token_start_index = self.index+1
+ self.super_state = self.state_stack.pop()
+ self.index += 1
+ return result
+
+ def action_57(self):
+ self.super_state = self.state_stack.pop()
+ return self.emit_indent()
+
+
+ def tokens(self, debug=False):
+ text = self.text
+ cls_table = CLASS_TABLE
+ id_index = ID_INDEX
+ id_chunks = ID_CHUNKS
+ max_id = len(id_index)*256
+ action_table = [
+ (1, None), (2, self.action_0), (4, self.action_1), (5, None),
+ (6, None), (7, None), (6, self.action_1), (7, self.action_1),
+ (8, self.action_0), (9, None), (10, None), (11, self.action_2),
+ (11, self.action_3), (3, self.action_4), (11, self.action_5), (11, self.action_6),
+ (12, self.action_1), (13, None), (14, None), (15, None),
+ (16, None), (17, None), (11, self.action_7), (11, self.action_8),
+ (11, self.action_9), (11, self.action_10), (11, None), (18, None),
+ (19, None), (20, None), (21, self.action_0), (11, self.action_11),
+ (3, self.action_12), (3, self.action_13), (3, self.action_14), (3, self.action_15),
+ (3, self.action_16), (3, self.action_17), (3, self.action_18), (22, self.action_1),
+ (23, self.action_1), (24, self.action_1), (25, self.action_1), (26, None),
+ (3, self.action_19), (3, self.action_20), (3, self.action_21), (27, None),
+ (3, self.action_22), (3, self.action_23), (3, self.action_24), (3, self.action_25),
+ (28, self.action_1), (3, self.action_26), (29, self.action_1), (30, None),
+ (3, self.action_27), (32, self.action_1), (31, self.action_1), (33, self.action_1),
+ (3, self.action_28), (26, self.action_1), (3, self.action_29), (34, self.action_1),
+ (34, None), (3, self.action_30), (40, None), (1, self.action_31),
+ (35, self.action_1), (36, None), (41, None), (38, None),
+ (42, None), (3, self.action_32), (37, None), (43, None),
+ (39, None), (44, None), (45, None), (46, None),
+ (47, None), (48, None), (3, self.action_33), (49, None),
+ (50, None), (8, self.action_1), (8, None), (51, self.action_1),
+ (51, None), (3, self.action_34), (3, self.action_35), (52, self.action_1),
+ (3, self.action_36), (52, None), (2, self.action_36), (53, None),
+ (2, self.action_37), (2, None), (2, self.action_11), (3, self.action_38),
+ (53, self.action_1), (3, None), (54, None), (55, self.action_11),
+ (3, self.action_39), (3, self.action_0), (3, self.action_40), (3, self.action_41),
+ (3, self.action_42), (3, self.action_43), (3, self.action_44), (3, self.action_45),
+ (1, self.action_46), (56, None), (57, None), (11, self.action_0),
+ (58, None), (3, self.action_47), (3, self.action_48), (3, self.action_49),
+ (3, self.action_50), (3, self.action_51), (3, self.action_52), (11, self.action_53),
+ (11, self.action_54), (3, self.action_55), (11, self.action_56), (11, self.action_1),
+ (3, self.action_57),
+ ]
+ state = 0
+ try:
+ if debug:
+ while True:
+ c = ord(text[self.index])
+ if c < 128:
+ cls = cls_table[c]
+ elif c >= max_id:
+ cls = ERROR_CLASS
+ else:
+ b = id_chunks[id_index[c>>8]][(c>>2)&63]
+ cls = (b>>((c&3)*2))&3
+ prev_state = state
+ print("char = '%s', state=%d, cls=%d" % (text[self.index], state, cls))
+ state, transition = action_table[self.super_state[state][cls]]
+ print ("%s -> %s on %r in %s" % (prev_state, state, text[self.index], TRANSITION_STATE_NAMES[id(self.super_state)]))
+ if transition:
+ tkn = transition()
+ if tkn:
+ yield tkn
+ else:
+ self.index += 1
+ else:
+ while True:
+ c = ord(text[self.index])
+ if c < 128:
+ cls = cls_table[c]
+ elif c >= max_id:
+ cls = ERROR_CLASS
+ else:
+ b = id_chunks[id_index[c>>8]][(c>>2)&63]
+ cls = (b>>((c&3)*2))&3
+ state, transition = action_table[self.super_state[state][cls]]
+ if transition:
+ tkn = transition()
+ if tkn:
+ yield tkn
+ else:
+ self.index += 1
+ except IndexError as ex:
+ if self.index != len(text):
+ #Reraise index error
+ cls = cls_table[c]
+ trans = self.super_state[state]
+ action_index = trans[cls]
+ action_table[action_index]
+ # Not raised? Must have been raised in transition function.
+ raise ex
+ tkn = self.emit_indent()
+ while tkn is not None:
+ yield tkn
+ tkn = self.emit_indent()
+ end = self.line, self.index-self.line_start_index
+ yield ENDMARKER, u"", self.token_start, end
+ return
+
+ def emit_indent(self):
+ indent = 0
+ index = self.line_start_index
+ current = self.index
+ here = self.line, current-self.line_start_index
+ while index < current:
+ if self.text[index] == ' ':
+ indent += 1
+ elif self.text[index] == '\t':
+ indent = (indent+8) & -8
+ elif self.text[index] == '\f':
+ indent = 0
+ else:
+ #Unexpected state. Emit error token
+ while len(self.indents) > 1:
+ self.indents.pop()
+ result = ERRORTOKEN, self.text[self.token_start_index:self.index+1], self.token_start, here
+ self.token_start = here
+ self.line_start_index = self.index
+ return result
+ index += 1
+ if indent == self.indents[-1]:
+ self.token_start = here
+ self.token_start_index = self.index
+ return None
+ elif indent > self.indents[-1]:
+ self.indents.append(indent)
+ start = self.line, 0
+ result = INDENT, self.text[self.line_start_index:current], start, here
+ self.token_start = here
+ self.token_start_index = current
+ return result
+ else:
+ self.indents.pop()
+ if indent > self.indents[-1]:
+ #Illegal indent
+ result = ILLEGALINDENT, u"", here, here
+ else:
+ result = DEDENT, u"", here, here
+ if indent < self.indents[-1]:
+ #More dedents to do
+ self.state_stack.append(self.super_state)
+ self.super_state = PENDING_DEDENT
+ self.token_start = here
+ self.token_start_index = self.index
+ return result
+
+
+ENCODING_RE = re.compile(br'.*coding[:=]\s*([-\w.]+).*')
+NEWLINE_BYTES = b'\n'
+
+def encoding_from_source(source):
+ 'Returns encoding of source (bytes), plus source strip of any BOM markers.'
+ #Check for BOM
+ if source.startswith(codecs.BOM_UTF8):
+ return 'utf8', source[len(codecs.BOM_UTF8):]
+ if source.startswith(codecs.BOM_UTF16_BE):
+ return 'utf-16be', source[len(codecs.BOM_UTF16_BE):]
+ if source.startswith(codecs.BOM_UTF16_LE):
+ return 'utf-16le', source[len(codecs.BOM_UTF16_LE):]
+ try:
+ first_new_line = source.find(NEWLINE_BYTES)
+ first_line = source[:first_new_line]
+ second_new_line = source.find(NEWLINE_BYTES, first_new_line+1)
+ second_line = source[first_new_line+1:second_new_line]
+ match = ENCODING_RE.match(first_line) or ENCODING_RE.match(second_line)
+ if match:
+ ascii_encoding = match.groups()[0]
+ if sys.version < "3":
+ encoding = ascii_encoding
+ else:
+ encoding = ascii_encoding.decode("ascii")
+ # Handle non-standard encodings that are recognised by the interpreter.
+ if encoding.startswith("utf-8-"):
+ encoding = "utf-8"
+ elif encoding == "iso-latin-1":
+ encoding = "iso-8859-1"
+ elif encoding.startswith("latin-1-"):
+ encoding = "iso-8859-1"
+ elif encoding.startswith("iso-8859-1-"):
+ encoding = "iso-8859-1"
+ elif encoding.startswith("iso-latin-1-"):
+ encoding = "iso-8859-1"
+ return encoding, source
+ except Exception as ex:
+ print(ex)
+ #Failed to determine encoding -- Just treat as default.
+ pass
+ return 'utf-8', source
+
diff --git a/python/extractor/semmle/python/parser/tsg_parser.py b/python/extractor/semmle/python/parser/tsg_parser.py
new file mode 100644
index 00000000000..46784c4e860
--- /dev/null
+++ b/python/extractor/semmle/python/parser/tsg_parser.py
@@ -0,0 +1,495 @@
+# tsg_parser.py
+
+# Functions and classes used for parsing Python files using `tree-sitter-graph`
+
+from ast import literal_eval
+import sys
+import os
+import semmle.python.parser
+from semmle.python.parser.ast import copy_location, decode_str, split_string
+from semmle.python import ast
+import subprocess
+from itertools import groupby
+
+DEBUG = False
+def debug_print(*args, **kwargs):
+ if DEBUG:
+ print(*args, **kwargs)
+
+# Node ids are integers, and so to distinguish them from actual integers we wrap them in this class.
+class Node(object):
+ def __init__(self, id):
+ self.id = id
+ def __repr__(self):
+ return "Node({})".format(self.id)
+
+# A wrapper for nodes containing comments. The old parser does not create such nodes (and therefore
+# there is no `ast.Comment` class) since it accesses the comments via the tokens for the given file.
+class Comment(object):
+ def __init__(self, text):
+ self.text = text
+ def __repr__(self):
+ return "Comment({})".format(self.text)
+
+class SyntaxErrorNode(object):
+ def __init__(self, source):
+ self.source = source
+ def __repr__(self):
+ return "SyntaxErrorNode({})".format(self.source)
+
+# Mapping from tree-sitter CPT node kinds to their corresponding AST node classes.
+tsg_to_ast = {name: cls
+ for name, cls in semmle.python.ast.__dict__.items()
+ if isinstance(cls, type) and ast.AstBase in cls.__mro__
+}
+tsg_to_ast["Comment"] = Comment
+tsg_to_ast["SyntaxErrorNode"] = SyntaxErrorNode
+
+# Mapping from AST node class to the fields of the node. The order of the fields is the order in
+# which they will be output in the AST dump.
+#
+# These fields cannot be extracted automatically, so we set them manually.
+ast_fields = {
+ ast.Module: ("body",), # Note: has no `__slots__` to inspect
+ Comment: ("text",), # Note: not an `ast` class
+ SyntaxErrorNode: ("source",), # Note: not an `ast` class
+ ast.Continue: (),
+ ast.Break: (),
+ ast.Pass: (),
+ ast.Ellipsis: (),
+ ast.MatchWildcardPattern: (),
+}
+
+# Fields that we don't want to dump on every single AST node. These are just the slots of the AST
+# base class, consisting of all of the location information (which we print in a different way).
+ignored_fields = semmle.python.ast.AstBase.__slots__
+
+# Extract fields for the remaining AST classes
+for name, cls in semmle.python.ast.__dict__.items():
+ if name.startswith("_"):
+ continue
+ if not hasattr(cls, "__slots__"):
+ continue
+ slots = tuple(field for field in cls.__slots__ if field not in ignored_fields)
+ if not slots:
+ continue
+ ast_fields[cls] = slots
+
+# A mapping from strings to the AST node classes that represent things like operators.
+# These have to be handled specially, because they have no location information.
+locationless = {
+ "and": ast.And,
+ "or": ast.Or,
+ "not": ast.Not,
+ "uadd": ast.UAdd,
+ "usub": ast.USub,
+ "+": ast.Add,
+ "-": ast.Sub,
+ "~": ast.Invert,
+ "**": ast.Pow,
+ "<<": ast.LShift,
+ ">>": ast.RShift,
+ "&": ast.BitAnd,
+ "|": ast.BitOr,
+ "^": ast.BitXor,
+ "load": ast.Load,
+ "store": ast.Store,
+ "del" : ast.Del,
+ "param" : ast.Param,
+}
+locationless.update(semmle.python.parser.ast.TERM_OP_CLASSES)
+locationless.update(semmle.python.parser.ast.COMP_OP_CLASSES)
+locationless.update(semmle.python.parser.ast.AUG_ASSIGN_OPS)
+
+if 'CODEQL_EXTRACTOR_PYTHON_ROOT' in os.environ:
+ platform = os.environ['CODEQL_PLATFORM']
+ ext = ".exe" if platform == "win64" else ""
+ tools = os.path.join(os.environ['CODEQL_EXTRACTOR_PYTHON_ROOT'], "tools", platform)
+ tsg_command = [os.path.join(tools, "tsg-python" + ext )]
+else:
+ # Get the path to the current script
+ script_path = os.path.dirname(os.path.realpath(__file__))
+ tsg_python_path = os.path.join(script_path, "../../../tsg-python")
+ cargo_file = os.path.join(tsg_python_path, "Cargo.toml")
+ tsg_command = ["cargo", "run", "--quiet", "--release", "--manifest-path="+cargo_file]
+
+def read_tsg_python_output(path, logger):
+ # Mapping from node id (an integer) to a dictionary containing attribute data.
+ node_attr = {}
+ # Mapping a start node to a map from attribute names to lists of (value, end_node) pairs.
+ edge_attr = {}
+
+ command_args = tsg_command + [path]
+ p = subprocess.Popen(command_args, stdout=subprocess.PIPE)
+ for line in p.stdout:
+ line = line.decode(sys.getfilesystemencoding())
+ line = line.rstrip()
+ if line.startswith("node"): # e.g. `node 5`
+ current_node = int(line.split(" ")[1])
+ d = {}
+ node_attr[current_node] = d
+ in_node = True
+ elif line.startswith("edge"): # e.g. `edge 5 -> 6`
+ current_start, current_end = tuple(map(int, line[4:].split("->")))
+ d = edge_attr.setdefault(current_start, {})
+ in_node = False
+ else: # attribute, e.g. `_kind: "Class"`
+ key, value = line[2:].split(": ", 1)
+ if value.startswith("[graph node"): # e.g. `_skip_to: [graph node 5]`
+ value = Node(int(value.split(" ")[2][:-1]))
+ elif value == "#true": # e.g. `_is_parenthesised: #true`
+ value = True
+ elif value == "#false": # e.g. `top: #false`
+ value = False
+ elif value == "#null": # e.g. `exc: #null`
+ value = None
+ else: # literal values, e.g. `name: "k1.k2"` or `level: 5`
+ try:
+ if key =="s" and value[0] == '"': # e.g. `s: "k1.k2"`
+ value = evaluate_string(value)
+ else:
+ value = literal_eval(value)
+ if isinstance(value, bytes):
+ try:
+ value = value.decode(sys.getfilesystemencoding())
+ except UnicodeDecodeError:
+ # just include the bytes as-is
+ pass
+ except Exception as ex:
+ # We may not know the location at this point -- for instance if we forgot to set
+ # it -- but `get_location_info` will degrade gracefully in this case.
+ loc = ":".join(str(i) for i in get_location_info(d))
+ error = ex.args[0] if ex.args else "unknown"
+ logger.warning("Error '{}' while parsing value {} at {}:{}\n".format(error, repr(value), path, loc))
+ if in_node:
+ d[key] = value
+ else:
+ d.setdefault(key, []).append((value, current_end))
+ p.stdout.close()
+ p.terminate()
+ p.wait()
+ logger.info("Read {} nodes and {} edges from TSG output".format(len(node_attr), len(edge_attr)))
+ return node_attr, edge_attr
+
+def evaluate_string(s):
+ s = literal_eval(s)
+ prefix, quotes, content = split_string(s, None)
+ ends_with_illegal_character = False
+ # If the string ends with the same quote character as the outer quotes (and/or backslashes)
+ # (e.g. the first string part of `f"""hello"{0}"""`), we must take care to not accidently create
+ # the ending quotes at the wrong place. To do this, we insert an extra space at the end (that we
+ # then must remember to remove later on.)
+ if content.endswith(quotes[0]) or content.endswith('\\'):
+ ends_with_illegal_character = True
+ content = content + " "
+ s = prefix.strip("fF") + quotes + content + quotes
+ s = literal_eval(s)
+ if isinstance(s, bytes):
+ s = decode_str(s)
+ if ends_with_illegal_character:
+ s = s[:-1]
+ return s
+
+def resolve_node_id(id, node_attr):
+ """Finds the end of a sequence of nodes linked by `_skip_to` fields, starting at `id`."""
+ while "_skip_to" in node_attr[id]:
+ id = node_attr[id]["_skip_to"].id
+ return id
+
+def get_context(id, node_attr, logger):
+ """Gets the context of the node with the given `id`. This is either whatever is stored in the
+ `ctx` attribute of the node, or the result of dereferencing a sequence of `_inherited_ctx` attributes."""
+
+ while "ctx" not in node_attr[id]:
+ if "_inherited_ctx" not in node_attr[id]:
+ logger.error("No context for node {} with attributes {}\n".format(id, node_attr[id]))
+ # A missing context is most likely to be a "load", so return that.
+ return ast.Load()
+ id = node_attr[id]["_inherited_ctx"].id
+ return locationless[node_attr[id]["ctx"]]()
+
+def get_location_info(attrs):
+ """Returns the location information for a node, depending on which fields are set.
+
+ In particular, more specific fields take precedence over (and overwrite) less specific fields.
+ So, `_start_line` and `_start_column` take precedence over `location_start`, which takes
+ precedence over `_location`. Likewise when `end` replaces `start` above.
+
+ If part of the location information is missing, the string `"???"` is substituted for the
+ missing bits.
+ """
+ start_line = "???"
+ start_column = "???"
+ end_line = "???"
+ end_column = "???"
+ if "_location" in attrs:
+ (start_line, start_column, end_line, end_column) = attrs["_location"]
+ if "_location_start" in attrs:
+ (start_line, start_column) = attrs["_location_start"]
+ if "_location_end" in attrs:
+ (end_line, end_column) = attrs["_location_end"]
+ if "_start_line" in attrs:
+ start_line = attrs["_start_line"]
+ if "_start_column" in attrs:
+ start_column = attrs["_start_column"]
+ if "_end_line" in attrs:
+ end_line = attrs["_end_line"]
+ if "_end_column" in attrs:
+ end_column = attrs["_end_column"]
+ # Lines in the `tsg-python` output is 0-indexed, but the AST expects them to be 1-indexed.
+ if start_line != "???":
+ start_line += 1
+ if end_line != "???":
+ end_line += 1
+ return (start_line, start_column, end_line, end_column)
+
+list_fields = {
+ ast.arguments: ("annotations", "defaults", "kw_defaults", "kw_annotations"),
+ ast.Assign: ("targets",),
+ ast.BoolOp: ("values",),
+ ast.Bytes: ("implicitly_concatenated_parts",),
+ ast.Call: ("positional_args", "named_args"),
+ ast.Case: ("body",),
+ ast.Class: ("body",),
+ ast.ClassExpr: ("type_parameters", "bases", "keywords"),
+ ast.Compare: ("ops", "comparators",),
+ ast.comprehension: ("ifs",),
+ ast.Delete: ("targets",),
+ ast.Dict: ("items",),
+ ast.ExceptStmt: ("body",),
+ ast.For: ("body",),
+ ast.Function: ("type_parameters", "args", "kwonlyargs", "body"),
+ ast.Global: ("names",),
+ ast.If: ("body",),
+ ast.Import: ("names",),
+ ast.List: ("elts",),
+ ast.Match: ("cases",),
+ ast.MatchClassPattern: ("positional", "keyword"),
+ ast.MatchMappingPattern: ("mappings",),
+ ast.MatchOrPattern: ("patterns",),
+ ast.MatchSequencePattern: ("patterns",),
+ ast.Module: ("body",),
+ ast.Nonlocal: ("names",),
+ ast.Print: ("values",),
+ ast.Set: ("elts",),
+ ast.Str: ("implicitly_concatenated_parts",),
+ ast.TypeAlias: ("type_parameters",),
+ ast.Try: ("body", "handlers", "orelse", "finalbody"),
+ ast.Tuple: ("elts",),
+ ast.While: ("body",),
+# ast.FormattedStringLiteral: ("arguments",),
+}
+
+def create_placeholder_args(cls):
+ """ Returns a dictionary containing the placeholder arguments necessary to create an AST node.
+
+ In most cases these arguments will be assigned the value `None`, however for a few classes we
+ must substitute the empty list, as this is enforced by asserts in the constructor.
+ """
+ if cls in (ast.Raise, ast.Ellipsis):
+ return {}
+ fields = ast_fields[cls]
+ args = {field: None for field in fields if field != "is_async"}
+ for field in list_fields.get(cls, ()):
+ args[field] = []
+ if cls in (ast.GeneratorExp, ast.ListComp, ast.SetComp, ast.DictComp):
+ del args["function"]
+ del args["iterable"]
+ return args
+
+def parse(path, logger):
+ node_attr, edge_attr = read_tsg_python_output(path, logger)
+ debug_print("node_attr:", node_attr)
+ debug_print("edge_attr:", edge_attr)
+ nodes = {}
+ # Nodes that need to be fixed up after building the graph
+ fixups = {}
+ # Reverse index from node object to node id.
+ node_id = {}
+ # Create all the node objects
+ for id, attrs in node_attr.items():
+ if "_is_literal" in attrs:
+ nodes[id] = attrs["_is_literal"]
+ continue
+ if "_kind" not in attrs:
+ logger.error("Error: Graph node {} with attributes {} has no `_kind`!\n".format(id, attrs))
+ continue
+ # This is not the node we are looking for (so don't bother creating it).
+ if "_skip_to" in attrs:
+ continue
+ cls = tsg_to_ast[attrs["_kind"]]
+ args = ast_fields[cls]
+ obj = cls(**create_placeholder_args(cls))
+ nodes[id] = obj
+ node_id[obj] = id
+ # If this node needs fixing up afterwards, add it to the fixups map.
+ if "_fixup" in attrs:
+ fixups[id] = obj
+ # Set all of the node attributes
+ for id, node in nodes.items():
+ attrs = node_attr[id]
+ if "_is_literal" in attrs:
+ continue
+ expected_fields = ast_fields[type(node)]
+
+ # Set up location information.
+ node.lineno, node.col_offset, end_line, end_column = get_location_info(attrs)
+ node._end = (end_line, end_column)
+
+ if isinstance(node, SyntaxErrorNode):
+ exc = SyntaxError("Syntax Error")
+ exc.lineno = node.lineno
+ exc.offset = node.col_offset
+ raise exc
+
+ # Set up context information, if any
+ if "ctx" in expected_fields:
+ node.ctx = get_context(id, node_attr, logger)
+ # Set the fields.
+ for field, val in attrs.items():
+ if field.startswith("_"): continue
+ if field == "ctx": continue
+ if field != "parenthesised" and field not in expected_fields:
+ logger.warning("Unknown field {} found among {} in node {}\n".format(field, attrs, id))
+
+ # For fields that point to other AST nodes.
+ if isinstance(val, Node):
+ val = resolve_node_id(val.id, node_attr)
+ setattr(node, field, nodes[val])
+ # Special case for `Num.n`, which should be coerced to an int.
+ elif isinstance(node, ast.Num) and field == "n":
+ node.n = literal_eval(val.rstrip("lL"))
+ # Special case for `Name.variable`, for which we must create a new `Variable` object
+ elif isinstance(node, ast.Name) and field == "variable":
+ node.variable = ast.Variable(val)
+ # Special case for location-less leaf-node subclasses of `ast.Node`, such as `ast.Add`.
+ elif field == "op" and val in locationless.keys():
+ setattr(node, field, locationless[val]())
+ else: # Any other value, usually literals of various kinds.
+ setattr(node, field, val)
+
+ # Create all fields pointing to lists of values.
+ for start, field_map in edge_attr.items():
+ start = resolve_node_id(start, node_attr)
+ parent = nodes[start]
+ extra_fields = {}
+ for field_name, value_end in field_map.items():
+ # Sort children by index (in case they were visited out of order)
+ children = [nodes[resolve_node_id(end, node_attr)] for _index, end in sorted(value_end)]
+ # Skip any comments.
+ children = [child for child in children if not isinstance(child, Comment)]
+ # Special case for `Compare.ops`, a list of comparison operators
+ if isinstance(parent, ast.Compare) and field_name == "ops":
+ parent.ops = [locationless[v]() for v in children]
+ elif field_name.startswith("_"):
+ # We can only set the attributes given in `__slots__` on the `start` node, and so we
+ # must handle fields starting with `_` specially. In this case, we simply record the
+ # values and then subsequently update `edge_attr` to refer to these values. This
+ # makes it act as a pseudo-field, that we can access as long as we know the `id`
+ # corresponding to a given node (for which we have the `node_id` map).
+ extra_fields[field_name] = children
+ else:
+ setattr(parent, field_name, children)
+ if extra_fields:
+ # Extend the existing map in `node_attr` with the extra fields.
+ node_attr[start].update(extra_fields)
+
+ # Fixup any nodes that need it.
+ for id, node in fixups.items():
+ if isinstance(node, (ast.JoinedStr, ast.Str)):
+ fix_strings(id, node, node_attr, node_id, logger)
+
+ debug_print("nodes:", nodes)
+ if not nodes:
+ # if the file referenced by path is empty, return an empty module:
+ if os.path.getsize(path) == 0:
+ module = ast.Module([])
+ module.lineno = 1
+ module.col_offset = 0
+ module._end = (1, 0)
+ return module
+ else:
+ raise SyntaxError("Syntax Error")
+ # Fix up start location of outer `Module`.
+ module = nodes[0]
+ if module.body:
+ # Get the location of the first non-comment node.
+ module.lineno = module.body[0].lineno
+ else:
+ # No children! File must contain only comments! Pick the end location as the start location.
+ module.lineno = module._end[0]
+ return module
+
+
+def get_JoinedStr_children(children):
+ """
+ Folds the `Str` and `expr` parts of a `JoinedStr` into a single list, and does this for each
+ `JoinedStr` in `children`. Top-level `StringPart`s are included in the output directly.
+ """
+ for child in children:
+ if isinstance(child, ast.JoinedStr):
+ for value in child.values:
+ yield value
+ elif isinstance(child, ast.StringPart):
+ yield child
+ else:
+ raise ValueError("Unexpected node type: {}".format(type(child)))
+
+def concatenate_stringparts(stringparts, logger):
+ """Concatenates the strings contained in the list of `stringparts`."""
+ try:
+ return "".join(decode_str(stringpart.s) for stringpart in stringparts)
+ except Exception as ex:
+ logger.error("Unable to concatenate string %s getting error %s", stringparts, ex)
+ return stringparts[0].s
+
+
+def fix_strings(id, node, node_attr, node_id, logger):
+ """
+ Reassociates the `StringPart` children of an implicitly concatenated f-string (`JoinedStr`)
+ """
+ # Tests whether something is a string child
+ is_string = lambda node: isinstance(node, ast.StringPart)
+
+ # We have two cases to consider. Either we're given something that came from a
+ # `concatenated_string`, or something that came from an `formatted_string`. The latter case can
+ # be seen as a special case of the former where the list of children we consider is just the
+ # single f-string.
+ children = node_attr[id].get("_children", [node])
+ if isinstance(node, ast.Str):
+ # If the outer node is a `Str`, then we don't have to reassociate, since there are no
+ # f-strings.
+ # In this case we simply have to create the concatenation of its constituent parts.
+ node.implicitly_concatenated_parts = children
+ node.s = concatenate_stringparts(children, logger)
+ node.prefix = children[0].prefix
+ else:
+ # Otherwise, we first have to get the flattened list of all of the strings and/or
+ # expressions.
+ flattened_children = get_JoinedStr_children(children)
+ groups = [list(n) for _, n in groupby(flattened_children, key=is_string)]
+ # At this point, `values` is a list of lists, where each sublist is either:
+ # - a list of `StringPart`s, or
+ # - a singleton list containing an `expr`.
+ # Crucially, `StringPart` is _not_ an `expr`.
+ combined_values = []
+ for group in groups:
+ first = group[0]
+ if isinstance(first, ast.expr):
+ # If we have a list of expressions (which may happen if an interpolation contains
+ # multiple distinct expressions, such as f"{foo:{bar}}", which uses interpolation to
+ # also specify the padding dynamically), we simply append it.
+ combined_values.extend(group)
+ else:
+ # Otherwise, we have a list of `StringPart`s, and we need to create a `Str` node to
+ # it.
+
+ combined_string = concatenate_stringparts(group, logger)
+ str_node = ast.Str(combined_string, first.prefix, None)
+ copy_location(first, str_node)
+ # The end location should be the end of the last part (even if there is only one part).
+ str_node._end = group[-1]._end
+ if len(group) > 1:
+ str_node.implicitly_concatenated_parts = group
+ combined_values.append(str_node)
+ node.values = combined_values
diff --git a/python/extractor/semmle/python/passes/__init__.py b/python/extractor/semmle/python/passes/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/extractor/semmle/python/passes/_pass.py b/python/extractor/semmle/python/passes/_pass.py
new file mode 100644
index 00000000000..94c3b77a63d
--- /dev/null
+++ b/python/extractor/semmle/python/passes/_pass.py
@@ -0,0 +1,11 @@
+
+from abc import abstractmethod
+
+class Pass(object):
+ '''The base class for all extractor passes.
+ Defines a single method 'extract' for all extractors to override'''
+
+ @abstractmethod
+ def extract(self, module, writer):
+ '''Extract trap file data from 'module', writing it to the writer.'''
+ pass
diff --git a/python/extractor/semmle/python/passes/ast_pass.py b/python/extractor/semmle/python/passes/ast_pass.py
new file mode 100644
index 00000000000..363e1007c64
--- /dev/null
+++ b/python/extractor/semmle/python/passes/ast_pass.py
@@ -0,0 +1,232 @@
+
+from semmle.python import ast
+import semmle.python.master
+import sys
+from semmle.python.passes._pass import Pass
+from semmle.util import get_analysis_major_version
+
+__all__ = [ 'ASTPass' ]
+
+class ASTPass(Pass):
+ '''Extract relations from AST.
+ Use AST.Node objects to guide _walking of AST'''
+
+ name = "ast"
+
+ def __init__(self):
+ self.offsets = get_offset_table()
+
+ #Entry point
+ def extract(self, root, writer):
+ try:
+ self.writer = writer
+ if root is None:
+ return
+ self._emit_variable(ast.Variable("__name__", root))
+ self._emit_variable(ast.Variable("__package__", root))
+ # Introduce special variable "$" for use by the points-to library.
+ self._emit_variable(ast.Variable("$", root))
+ writer.write_tuple(u'py_extracted_version', 'gs', root.trap_name, get_analysis_major_version())
+ self._walk(root, None, 0, root, None)
+ finally:
+ self.writer = None
+
+ #Tree _walkers
+
+ def _get_walker(self, node):
+ if isinstance(node, list):
+ return self._walk_list
+ elif isinstance(node, ast.AstBase):
+ return self._walk_node
+ else:
+ return self._emit_primitive
+
+ def _walk(self, node, parent, index, scope, description):
+ self._get_walker(node)(node, parent, index, scope, description)
+
+ def _walk_node(self, node, parent, index, scope, _unused):
+ self._emit_node(node, parent, index, scope)
+ if type(node) is ast.Name:
+ assert (hasattr(node, 'variable') and
+ type(node.variable) is ast.Variable), (node, parent, index, scope)
+ if type(node) in (ast.Class, ast.Function):
+ scope = node
+ # For scopes with a `from ... import *` statement introduce special variable "*" for use by the points-to library.
+ if isinstance(node, ast.ImportFrom):
+ self._emit_variable(ast.Variable("*", scope))
+ for field_name, desc, child_node in iter_fields(node):
+ try:
+ index = self.offsets[(type(node).__name__, field_name)]
+ self._walk(child_node, node, index, scope, desc)
+ except ConsistencyError:
+ ex = sys.exc_info()[1]
+ ex.message += ' in ' + type(node).__name__
+ if hasattr(node, 'rewritten') and node.rewritten:
+ ex.message += '(rewritten)'
+ ex.message += '.' + field_name
+ raise
+
+ def _walk_list(self, node, parent, index, scope, description):
+ assert description.is_list(), description
+ if len(node) == 0:
+ return
+ else:
+ self._emit_list(node, parent, index, description)
+ for i, child in enumerate(node):
+ self._get_walker(child)(child, node, i, scope, description.item_type)
+
+ #Emitters
+ def _emit_node(self, ast_node, parent, index, scope):
+ t = type(ast_node)
+ node = _ast_nodes[t.__name__]
+ #Ensure all stmts have a list as a parent.
+ if isinstance(ast_node, ast.stmt):
+ assert isinstance(parent, list), (ast_node, parent)
+ if node.is_sub_type():
+ rel_name = node.super_type.relation_name()
+ shared_parent = not node.super_type.unique_parent
+ else:
+ rel_name = node.relation_name()
+ shared_parent = node.parents is None or not node.unique_parent
+ if rel_name[-1] != 's':
+ rel_name += 's'
+ if t.__mro__[1] in (ast.cmpop, ast.operator, ast.expr_context, ast.unaryop, ast.boolop):
+ #These nodes may be used more than once, but must have a
+ #unique id for each occurrence in the AST
+ fields = [ self.writer.get_unique_id() ]
+ fmt = 'r'
+ else:
+ fields = [ ast_node ]
+ fmt = 'n'
+ if node.is_sub_type():
+ fields.append(node.index)
+ fmt += 'd'
+ if parent:
+ fields.append(parent)
+ fmt += 'n'
+ if shared_parent:
+ fields.append(index)
+ fmt += 'd'
+ self.writer.write_tuple(rel_name, fmt, *fields)
+ if t.__mro__[1] in (ast.expr, ast.stmt):
+ self.writer.write_tuple(u'py_scopes', 'nn', ast_node, scope)
+
+ def _emit_variable(self, ast_node):
+ self.writer.write_tuple(u'variable', 'nns', ast_node, ast_node.scope, ast_node.id)
+
+ def _emit_name(self, ast_node, parent):
+ self._emit_variable(ast_node)
+ self.writer.write_tuple(u'py_variables', 'nn', ast_node, parent)
+
+ def _emit_primitive(self, val, parent, index, scope, description):
+ if val is None or val is False:
+ return
+ if isinstance(val, ast.Variable):
+ self._emit_name(val, parent)
+ return
+ assert not isinstance(val, ast.AstBase)
+ rel = description.relation_name()
+ if val is True:
+ if description.unique_parent:
+ self.writer.write_tuple(rel, 'n', parent)
+ else:
+ self.writer.write_tuple(rel, 'nd', parent, index)
+ else:
+ f = format_for_primitive(val, description)
+ if description.unique_parent:
+ self.writer.write_tuple(rel, f + 'n', val, parent)
+ else:
+ self.writer.write_tuple(rel, f + 'nd', val, parent, index)
+
+ def _emit_list(self, node, parent, index, description):
+ rel_name = description.relation_name()
+ if description.unique_parent:
+ self.writer.write_tuple(rel_name, 'nn', node, parent)
+ else:
+ self.writer.write_tuple(rel_name, 'nnd', node, parent, index)
+
+_ast_nodes = semmle.python.master.all_nodes()
+if get_analysis_major_version() < 3:
+ _ast_nodes['TryExcept'] = _ast_nodes['Try']
+ _ast_nodes['TryFinally'] = _ast_nodes['Try']
+
+class ConsistencyError(Exception):
+
+ def __str__(self):
+ return self.message
+
+def iter_fields(node):
+ desc = _ast_nodes[type(node).__name__]
+ for name, description, _, _, _ in desc.fields:
+ if hasattr(node, name):
+ yield name, description, getattr(node, name)
+
+
+NUMBER_TYPES = (int, float)
+
+def check_matches(node, node_type, owner, field):
+ if node_type is list:
+ if node.is_list():
+ return
+ else:
+ for t in node_type.__mro__:
+ if t.__name__ == node.__name__:
+ return
+ if node_type in NUMBER_TYPES and node.__name__ == 'number':
+ return
+ raise ConsistencyError("Found %s expected %s for field %s of %s" %
+ (node_type.__name__, node.__name__, field, owner.__name__))
+
+def get_offset_table():
+ '''Returns mapping of (class_name, field_name)
+ pairs to offsets (in relation)'''
+ table = {}
+ nodes = _ast_nodes.values()
+ for node in nodes:
+ for field, _, offset, _, _, _ in node.layout:
+ table[(node.__name__, field)] = offset
+ try_node = _ast_nodes['Try']
+ for field, _, offset, _, _, _ in try_node.layout:
+ table[('TryFinally', field)] = offset
+ table[('TryExcept', field)] = offset
+ return table
+
+
+def format_for_primitive(val, description):
+ if isinstance(val, str):
+ return 'u'
+ elif isinstance(val, bytes):
+ return 'b'
+ elif description.__name__ == 'int':
+ return 'd'
+ else:
+ return 'q'
+
+class ASTVisitor(object):
+ """
+ A node visitor base class that walks the abstract syntax tree and calls a
+ visitor function for every node found. This function may return a value
+ which is forwarded by the `visit` method.
+
+ This class is meant to be subclassed, with the subclass adding visitor
+ methods.
+
+ The visitor functions for the nodes are ``'visit_'`` + class name of the node.
+ """
+
+ def _get_visit_method(self, node):
+ method = 'visit_' + node.__class__.__name__
+ return getattr(self, method, self.generic_visit)
+
+ def visit(self, node):
+ """Visit a node."""
+ self._get_visit_method(node)(node)
+
+ def generic_visit(self, node):
+ """Called if no explicit visitor function exists for a node."""
+ if isinstance(node, ast.AstBase):
+ for _, _, child in iter_fields(node):
+ self.visit(child)
+ elif isinstance(node, list):
+ for item in node:
+ self._get_visit_method(item)(item)
diff --git a/python/extractor/semmle/python/passes/exports.py b/python/extractor/semmle/python/passes/exports.py
new file mode 100644
index 00000000000..5fd69c8e093
--- /dev/null
+++ b/python/extractor/semmle/python/passes/exports.py
@@ -0,0 +1,113 @@
+
+from semmle.python import ast
+from semmle.python.passes._pass import Pass
+
+def write_exports(module, exports, writer):
+ for sym in exports:
+ writer.write_tuple(u'py_exports', 'ns', module, sym)
+
+def list_of_symbols_from_expr(expr):
+ #This should be a list of constant strings
+ if isinstance(expr, (ast.List, ast.Tuple)):
+ exports = []
+ for item in expr.elts:
+ if isinstance(item, ast.Str):
+ exports.append(item.s)
+ return exports
+ return []
+
+def is___all__(node):
+ try:
+ return isinstance(node, ast.Name) and node.variable.id == '__all__'
+ except Exception:
+ return False
+
+def __all___from_stmt(stmt):
+ '''Returns None if __all__ is not defined.
+ If __all__ may be defined then return a conservative approximation'''
+ assert isinstance(stmt, ast.stmt)
+ if isinstance(stmt, ast.If):
+ body_exports = __all___from_stmt_list(stmt.body)
+ if stmt.orelse:
+ orelse_exports = __all___from_stmt_list(stmt.orelse)
+ else:
+ orelse_exports = None
+ # If __all__ = ... on one branch but not other then return []
+ # If defined on neither branch return None
+ if body_exports is None:
+ if orelse_exports is None:
+ return None
+ else:
+ return []
+ else:
+ if orelse_exports is None:
+ return []
+ else:
+ return set(body_exports).intersection(set(orelse_exports))
+ elif isinstance(stmt, ast.Assign):
+ for target in stmt.targets:
+ if is___all__(target):
+ return list_of_symbols_from_expr(stmt.value)
+ return None
+
+def __all___from_stmt_list(stmts):
+ assert isinstance(stmts, list)
+ exports = None
+ for stmt in stmts:
+ ex = __all___from_stmt(stmt)
+ if ex is not None:
+ exports = ex
+ return exports
+
+def is_private_symbol(sym):
+ if sym[0] != '_':
+ return False
+ if len(sym) >= 4 and sym[:2] == '__' and sym[-2:] == '__':
+ return False
+ return True
+
+def globals_from_tree(node, names):
+ 'Add all globals defined in the tree to names'
+ if isinstance(node, list):
+ for subnode in node:
+ globals_from_tree(subnode, names)
+ elif isinstance(node, ast.Assign):
+ for target in node.targets:
+ if isinstance(target, ast.Name):
+ names.add(target.variable.id)
+ elif isinstance(node, ast.If):
+ if node.orelse:
+ left = set()
+ right = set()
+ globals_from_tree(node.body, left)
+ globals_from_tree(node.orelse, right)
+ names.update(left.intersection(right))
+ # Don't decent into other nodes.
+
+def exports_from_ast(node):
+ 'Get a list of symbols exported by the module from its ast.'
+ #Look for assignments to __all__
+ #If not available at top-level, then check if-statements,
+ #but ignore try-except and loops
+ assert type(node) is ast.Module
+ exports = __all___from_stmt_list(node.body)
+ if exports is not None:
+ return exports
+ # No explicit __all__ assignment so gather global assignments
+ exports = set()
+ globals_from_tree(node.body, exports)
+ return [ ex for ex in exports if not is_private_symbol(ex) ]
+
+class ExportsPass(Pass):
+ '''Finds all 'exports' of a module. An export is a symbol that is defined
+ in the __all__ list or, if __all__ is undefined, is defined at top-level
+ and is not private'''
+
+ name = "exports"
+
+ def __init__(self):
+ pass
+
+ def extract(self, ast, writer):
+ exported = exports_from_ast(ast)
+ write_exports(ast, exported, writer)
diff --git a/python/extractor/semmle/python/passes/flow.py b/python/extractor/semmle/python/passes/flow.py
new file mode 100755
index 00000000000..a9148aefd0f
--- /dev/null
+++ b/python/extractor/semmle/python/passes/flow.py
@@ -0,0 +1,1927 @@
+import sys
+import os.path
+import traceback
+from typing import Optional
+
+from semmle.python import ast
+from semmle import util
+from semmle.python.passes.ast_pass import iter_fields
+from semmle.python.passes._pass import Pass
+from semmle.python.passes import pruner
+from semmle.python.passes import splitter
+from semmle.python.passes import unroller
+from semmle.python import modules
+import semmle.graph as graph
+from semmle.logging import Logger
+
+__all__ = [ 'FlowPass' ]
+
+class ConsistencyError(util.SemmleError):
+ pass
+
+def error(node, _):
+ raise ConsistencyError("Unexpected node type " + type(node).__name__)
+
+
+class FlowNode(object):
+ __slots__ = [ 'node' ]
+
+ def __init__(self, node):
+ self.node = node
+
+ def __repr__(self):
+ if hasattr(self.node, "lineno"):
+ return 'FlowNode(%s at %d)' % (type(self.node), self.node.lineno)
+ else:
+ return 'FlowNode(%r)' % self.node
+
+ def copy(self):
+ return FlowNode(self.node)
+
+#Kinds of node sets.
+NORMAL = util.NORMAL_EDGE
+TRUE = util.TRUE_EDGE
+FALSE = util.FALSE_EDGE
+EXCEPTION = util.EXCEPTIONAL_EDGE
+EXHAUSTED = util.EXHAUSTED_EDGE
+
+TRUE_OR_FALSE = TRUE | FALSE
+
+#Set of names of modules that are guaranteed to be in the interpreter regardless of platform
+GUARANTEED_MODULES = {
+ "_ast",
+ "_bisect",
+ "_codecs",
+ "_collections",
+ "_functools",
+ "_heapq",
+ "_io",
+ "_locale",
+ "_md5",
+ "_operator",
+ "_random",
+ "_sha256",
+ "_sha512",
+ "_socket",
+ "_sre",
+ "_struct",
+ "_symtable",
+ "_warnings",
+ "_weakref",
+ "array",
+ "binascii",
+ "cmath",
+ "errno",
+ "gc",
+ "itertools",
+ "marshal",
+ "math",
+ "sys",
+ "syslog",
+ "time",
+ "unicodedata",
+ "zipimport",
+ "zlib",
+}
+
+
+_py3_names = {
+ "ArithmeticError",
+ "AssertionError",
+ "AttributeError",
+ "BaseException",
+ "BlockingIOError",
+ "BrokenPipeError",
+ "BufferError",
+ "BytesWarning",
+ "ChildProcessError",
+ "ConnectionAbortedError",
+ "ConnectionError",
+ "ConnectionRefusedError",
+ "ConnectionResetError",
+ "DeprecationWarning",
+ "EOFError",
+ "Ellipsis",
+ "EnvironmentError",
+ "Exception",
+ "False",
+ "FileExistsError",
+ "FileNotFoundError",
+ "FloatingPointError",
+ "FutureWarning",
+ "GeneratorExit",
+ "IOError",
+ "ImportError",
+ "ImportWarning",
+ "IndentationError",
+ "IndexError",
+ "InterruptedError",
+ "IsADirectoryError",
+ "KeyError",
+ "KeyboardInterrupt",
+ "LookupError",
+ "MemoryError",
+ "NameError",
+ "None",
+ "NotADirectoryError",
+ "NotImplemented",
+ "NotImplementedError",
+ "OSError",
+ "OverflowError",
+ "PendingDeprecationWarning",
+ "PermissionError",
+ "ProcessLookupError",
+ "ReferenceError",
+ "ResourceWarning",
+ "RuntimeError",
+ "RuntimeWarning",
+ "StopIteration",
+ "SyntaxError",
+ "SyntaxWarning",
+ "SystemError",
+ "SystemExit",
+ "TabError",
+ "TimeoutError",
+ "True",
+ "TypeError",
+ "UnboundLocalError",
+ "UnicodeDecodeError",
+ "UnicodeEncodeError",
+ "UnicodeError",
+ "UnicodeTranslateError",
+ "UnicodeWarning",
+ "UserWarning",
+ "ValueError",
+ "Warning",
+ "ZeroDivisionError",
+ "__build_class__",
+ "__debug__",
+ "__doc__",
+ "__import__",
+ "__loader__",
+ "__name__",
+ "__package__",
+ "__spec__",
+ "abs",
+ "all",
+ "any",
+ "ascii",
+ "bin",
+ "bool",
+ "bytearray",
+ "bytes",
+ # "callable", only 3.2+
+ "chr",
+ "classmethod",
+ "compile",
+ "complex",
+ "copyright",
+ "credits",
+ "delattr",
+ "dict",
+ "dir",
+ "divmod",
+ "enumerate",
+ "eval",
+ "exec",
+ "exit",
+ "filter",
+ "float",
+ "format",
+ "frozenset",
+ "getattr",
+ "globals",
+ "hasattr",
+ "hash",
+ "help",
+ "hex",
+ "id",
+ "input",
+ "int",
+ "isinstance",
+ "issubclass",
+ "iter",
+ "len",
+ "license",
+ "list",
+ "locals",
+ "map",
+ "max",
+ "memoryview",
+ "min",
+ "next",
+ "object",
+ "oct",
+ "open",
+ "ord",
+ "pow",
+ "print",
+ "property",
+ "quit",
+ "range",
+ "repr",
+ "reversed",
+ "round",
+ "set",
+ "setattr",
+ "slice",
+ "sorted",
+ "staticmethod",
+ "str",
+ "sum",
+ "super",
+ "tuple",
+ "type",
+ "vars",
+ "zip",
+}
+
+_py2_names = {
+ "ArithmeticError",
+ "AssertionError",
+ "AttributeError",
+ "BaseException",
+ "BufferError",
+ "BytesWarning",
+ "DeprecationWarning",
+ "EOFError",
+ "Ellipsis",
+ "EnvironmentError",
+ "Exception",
+ "False",
+ "FloatingPointError",
+ "FutureWarning",
+ "GeneratorExit",
+ "IOError",
+ "ImportError",
+ "ImportWarning",
+ "IndentationError",
+ "IndexError",
+ "KeyError",
+ "KeyboardInterrupt",
+ "LookupError",
+ "MemoryError",
+ "NameError",
+ "None",
+ "NotImplemented",
+ "NotImplementedError",
+ "OSError",
+ "OverflowError",
+ "PendingDeprecationWarning",
+ "ReferenceError",
+ "RuntimeError",
+ "RuntimeWarning",
+ "StandardError",
+ "StopIteration",
+ "SyntaxError",
+ "SyntaxWarning",
+ "SystemError",
+ "SystemExit",
+ "TabError",
+ "True",
+ "TypeError",
+ "UnboundLocalError",
+ "UnicodeDecodeError",
+ "UnicodeEncodeError",
+ "UnicodeError",
+ "UnicodeTranslateError",
+ "UnicodeWarning",
+ "UserWarning",
+ "ValueError",
+ "Warning",
+ "ZeroDivisionError",
+ "__debug__",
+ "__doc__",
+ "__import__",
+ "__name__",
+ "__package__",
+ "abs",
+ "all",
+ "any",
+ "apply",
+ "basestring",
+ "bin",
+ "bool",
+ "buffer",
+ "bytearray",
+ "bytes",
+ "callable",
+ "chr",
+ "classmethod",
+ "cmp",
+ "coerce",
+ "compile",
+ "complex",
+ "copyright",
+ "credits",
+ "delattr",
+ "dict",
+ "dir",
+ "divmod",
+ "enumerate",
+ "eval",
+ "execfile",
+ "exit",
+ "file",
+ "filter",
+ "float",
+ "format",
+ "frozenset",
+ "getattr",
+ "globals",
+ "hasattr",
+ "hash",
+ "help",
+ "hex",
+ "id",
+ "input",
+ "int",
+ "intern",
+ "isinstance",
+ "issubclass",
+ "iter",
+ "len",
+ "license",
+ "list",
+ "locals",
+ "long",
+ "map",
+ "max",
+ "memoryview",
+ "min",
+ "next",
+ "object",
+ "oct",
+ "open",
+ "ord",
+ "pow",
+ "print",
+ "property",
+ "quit",
+ "range",
+ "raw_input",
+ "reduce",
+ "reload",
+ "repr",
+ "reversed",
+ "round",
+ "set",
+ "setattr",
+ "slice",
+ "sorted",
+ "staticmethod",
+ "str",
+ "sum",
+ "super",
+ "tuple",
+ "type",
+ "unichr",
+ "unicode",
+ "vars",
+ "xrange",
+ "zip",
+}
+
+#Set of names that always exist (for both Python 2 and 3)
+BUILTIN_NAME_ALWAYS_EXISTS = _py2_names.intersection(_py3_names)
+
+# A NodeSet is a conceptually a set of (FlowNode, kind) pairs.
+#This class exists to document the interface.
+class ExampleNodeSet(object):
+ '''This class exists for documentation purposes only.'''
+
+ def branch(self):
+ '''Branch into (true, false) pair of nodesets.'''
+
+ def __add__(self, other):
+ '''Add this node set to another, returning the union'''
+
+ def normalise(self):
+ '''Return normalise form of this node set, turning all kinds into NORMAL'''
+
+ def exception(self):
+ '''Return exception form of this node set, turning all kinds into EXCEPTION'''
+
+ def merge_true_false_pairs(self):
+ '''Return copy of this node set with all pairs of TRUE and FALSE kinds for the same node turned into NORMAL'''
+
+ def add_node(self, node, kind):
+ '''Return a new node set with (node, kind) pair added.'''
+
+ def invert(self):
+ '''Return copy of this node set with all TRUE kinds set to FALSE and vice versa.'''
+
+class EmptyNodeSet(object):
+
+ def branch(self):
+ return self, self
+
+ def __add__(self, other):
+ return other
+
+ def normalise(self):
+ return self
+
+ def exception(self):
+ return self
+
+ def merge_true_false_pairs(self):
+ return self
+
+ def add_node(self, node, kind):
+ return SingletonNodeSet(node, kind)
+
+ def __iter__(self):
+ return iter(())
+
+ def __len__(self):
+ return 0
+
+ def __str__(self):
+ return "{}"
+
+ def invert(self):
+ return self
+
+EMPTY = EmptyNodeSet()
+
+class SingletonNodeSet(object):
+
+ __slots__ = [ 'node', 'kind']
+
+ def __init__(self, node, kind):
+ self.node = node
+ self.kind = kind
+
+ def branch(self):
+ if self.kind == TRUE:
+ return self, EMPTY
+ elif self.kind == FALSE:
+ return EMPTY, self
+ elif self.kind == NORMAL:
+ return SingletonNodeSet(self.node, TRUE), SingletonNodeSet(self.node, FALSE)
+ else:
+ return self, self
+
+ def __add__(self, other):
+ if other is EMPTY:
+ return self
+ else:
+ return other.add_node(self.node, self.kind)
+
+ def normalise(self):
+ return SingletonNodeSet(self.node, NORMAL)
+
+ def exception(self):
+ return SingletonNodeSet(self.node, EXCEPTION)
+
+ def merge_true_false_pairs(self):
+ return self
+
+ def add_node(self, node, kind):
+ if node == self.node and kind == self.kind:
+ return self
+ other = MultiNodeSet()
+ other.append((self.node, self.kind))
+ other.append((node, kind))
+ return other
+
+ def __iter__(self):
+ yield self.node, self.kind
+
+ def __len__(self):
+ return 1
+
+ def invert(self):
+ if self.kind & TRUE_OR_FALSE:
+ return SingletonNodeSet(self.node, self.kind ^ TRUE_OR_FALSE)
+ else:
+ return self
+
+ def unique_node(self):
+ return self.node
+
+ def __str__(self):
+ return "{(%s, %d)}" % (self.node, self.kind)
+
+class MultiNodeSet(list):
+
+ __slots__ = []
+
+ def branch(self):
+ '''Branch into (true, false) pair of nodesets.'''
+ l = EMPTY
+ for node, kind in self:
+ if kind != FALSE:
+ l = l.add_node(node, kind)
+ r = EMPTY
+ for node, kind in self:
+ if kind != TRUE:
+ r = r.add_node(node, kind)
+ return l, r
+
+ def __add__(self, other):
+ if other is EMPTY:
+ return self
+ res = MultiNodeSet(self)
+ if isinstance(other, SingletonNodeSet):
+ res.insert_node(other.node, other.kind)
+ return res
+ for node, kind in other:
+ res.insert_node(node, kind)
+ return res
+
+ def convert(self, the_kind):
+ the_node = self[0][0]
+ for node, kind in self:
+ if node != the_node:
+ break
+ else:
+ return SingletonNodeSet(node, the_kind)
+ res = MultiNodeSet()
+ for node, kind in self:
+ res.insert_node(node, the_kind)
+ return res
+
+ def normalise(self):
+ return self.convert(NORMAL)
+
+ def exception(self):
+ return self.convert(EXCEPTION)
+
+ def merge_true_false_pairs(self):
+ #Common case len() == 2
+ if len(self) == 2:
+ if (self[0][1] | self[0][1]) == TRUE_OR_FALSE and self[0][0] == self[1][0]:
+ return SingletonNodeSet(self[0][0], NORMAL)
+ else:
+ return self
+ #Either no true, or no false edges.
+ all_kinds = 0
+ for node, kind in self:
+ all_kinds |= kind
+ if (all_kinds & TRUE_OR_FALSE) != TRUE_OR_FALSE:
+ return self
+
+ #General, slow and hopefully rare case.
+ nodes = {}
+ for node, kind in self:
+ if node in nodes:
+ nodes[node] |= kind
+ else:
+ nodes[node] = kind
+ res = MultiNodeSet()
+ for node, kind in nodes.items():
+ if (kind & TRUE_OR_FALSE)== TRUE_OR_FALSE:
+ kind = (kind | NORMAL) & (NORMAL | EXCEPTION)
+ for K in (NORMAL, TRUE, FALSE, EXCEPTION):
+ if kind & K:
+ res.insert_node(node, K)
+ return res
+
+ def add_node(self, *t):
+ res = MultiNodeSet(self)
+ res.insert_node(*t)
+ return res
+
+ def insert_node(self, *t):
+ if t not in self:
+ self.append(t)
+
+ def __str__(self):
+ return "{" + ",".join(self) + "}"
+
+ def invert(self):
+ res = MultiNodeSet()
+ for node, kind in self:
+ if kind & TRUE_OR_FALSE:
+ res.insert_node(node, kind ^ TRUE_OR_FALSE)
+ else:
+ res.insert_node(node, kind)
+ return res
+
+class BlockStack(list):
+ '''A stack of blocks (loops or tries).'''
+
+
+ def push_block(self):
+ self.append(EMPTY)
+
+ def pop_block(self):
+ return self.pop()
+
+ def add(self, node_set):
+ self[-1] = self[-1] + node_set
+
+class FlowScope(object):
+
+ def __init__(self, depth, ast_scope):
+ self.entry = FlowNode(ast_scope)
+ self.graph = graph.FlowGraph(self.entry)
+ self.exceptional_exit = FlowNode(ast_scope)
+ self.graph.add_node(self.exceptional_exit)
+ self.graph.annotate_node(self.exceptional_exit, EXCEPTION_EXIT)
+ self.depth = depth
+ self.exception_stack = BlockStack()
+ self.exception_stack.push_block()
+ self.breaking_stack = BlockStack()
+ self.continuing_stack = BlockStack()
+ self.return_stack = BlockStack()
+ self.return_stack.push_block()
+ self.ast_scope = ast_scope
+
+ def inner(self, ast_scope):
+ return FlowScope(self.depth+1, ast_scope)
+
+ def pop_exceptions(self):
+ return self.exception_stack.pop_block()
+
+ def split(self):
+ splitter.do_split(self.ast_scope, self.graph)
+
+ def prune(self):
+ #Remove the always false condition edges.
+ pruner.do_pruning(self.ast_scope, self.graph)
+
+ def unroll(self):
+ unroller.do_unrolling(self.ast_scope, self.graph)
+
+ def write_graph(self, writer):
+ self.graph.delete_unreachable_nodes()
+ #Emit flow graph
+ self._write_flow_nodes(writer)
+ for pred, succ, kind in self.graph.edges():
+ write_successors(writer, pred, succ, kind)
+ if kind != NORMAL and kind != EXHAUSTED:
+ write_successors(writer, pred, succ, NORMAL)
+ #Emit idoms
+ for node, idom in self.graph.idoms():
+ write_idoms(writer, node, idom)
+ #Emit SSA variables
+ for var in self.graph.ssa_variables():
+ write_ssa_var(writer, var)
+ for node, var in self.graph.ssa_definitions():
+ write_ssa_defn(writer, var, node)
+ for node, var in self.graph.ssa_uses():
+ write_ssa_use(writer, node, var)
+ for var, arg in self.graph.ssa_phis():
+ write_ssa_phi(writer, var, arg)
+
+ def _write_flow_nodes(self, writer):
+ blocks = self.graph.get_basic_blocks()
+ for flow, note in self.graph.nodes():
+ if note is not None:
+ write_scope_node(writer, flow, self.ast_scope, note)
+ if flow in blocks:
+ head, index = blocks[flow]
+ write_flow_node(writer, flow, head, index)
+
+
+#Codes for scope entry/exit nodes.
+#These are hardcoded in QL. Do not change them.
+FALL_THROUGH_EXIT = 0
+EXCEPTION_EXIT = 1
+RETURN_EXIT = 2
+ENTRY = -1
+
+class FlowPass(Pass):
+ '''Extracts flow-control information. Currently generates a flow control
+ graph. There is a many-to-one relation between flow-nodes and ast nodes.
+ This enables precise flow control for 'try' statements.
+ Each flow node also has a number. If there are several flow nodes for
+ one ast node, they will all have different numbers.
+ For flow nodes representing a scope (class, function or module) then
+ the numbers are as follows: entry=-1, exceptional exit=1,
+ fallthrough exit=0, explicit return=2
+ '''
+
+ name = "flow"
+
+ def __init__(self, split, prune=True, unroll=False, logger:Optional[Logger] = None):
+ 'Initialize all the tree walkers'
+ self._walkers = {
+ list : self._walk_list,
+ bool : self.skip,
+ int : self.skip,
+ float : self.skip,
+ bytes : self.skip,
+ str : self.skip,
+ complex : self.skip,
+ type(None) : self.skip,
+ ast.Lambda : self._walk_scope_defn,
+ ast.ClassExpr : self._walk_class_expr,
+ ast.FunctionExpr : self._walk_scope_defn,
+ ast.For : self._walk_for_loop,
+ ast.Pass : self._walk_stmt_only,
+ ast.Global : self._walk_stmt_only,
+ ast.Break : self._walk_break,
+ ast.BinOp : self._walk_binop,
+ ast.Compare : self._walk_compare,
+ ast.Continue : self._walk_continue,
+ ast.Raise : self._walk_raise,
+ ast.Return : self._walk_return,
+ ast.Delete : self._walk_delete,
+ ast.While : self._walk_while,
+ ast.If : self._walk_if_stmt,
+ ast.IfExp : self._walk_if_expr,
+ ast.expr_context : self.skip,
+ ast.Slice : self._walk_slice,
+ ast.ExceptStmt : error,
+ ast.comprehension : error,
+ ast.ListComp: self._walk_generator,
+ ast.SetComp: self._walk_generator,
+ ast.DictComp: self._walk_generator,
+ ast.Dict : self._walk_dict,
+ ast.keyword : self._walk_expr_no_raise,
+ ast.KeyValuePair : self._walk_keyword,
+ ast.DictUnpacking : self._walk_yield,
+ ast.Starred : self._walk_yield,
+ ast.arguments : self._walk_arguments,
+ ast.Name : self._walk_name,
+ ast.PlaceHolder : self._walk_name,
+ ast.Num : self._walk_atom,
+ ast.Str : self._walk_atom,
+ ast.Try : self._walk_try,
+ ast.List : self._walk_sequence,
+ ast.Tuple : self._walk_sequence,
+ ast.UnaryOp : self._walk_expr_no_raise,
+ ast.UnaryOp : self._walk_unary_op,
+ ast.Assign : self._walk_assign,
+ ast.ImportExpr : self._walk_import_expr,
+ ast.ImportMember : self._walk_expr,
+ ast.Ellipsis : self._walk_atom,
+ ast.Print : self._walk_post_stmt,
+ ast.alias : self._walk_alias,
+ ast.GeneratorExp: self._walk_generator,
+ ast.Assert: self._walk_assert,
+ ast.AssignExpr: self._walk_assignexpr,
+ ast.AugAssign : self._walk_augassign,
+ ast.Attribute : self._walk_attribute,
+ ast.Subscript : self._walk_subscript,
+ ast.BoolOp : self._walk_bool_expr,
+ ast.TemplateWrite : self._walk_post_stmt,
+ ast.Filter : self._walk_expr_no_raise,
+ ast.Yield : self._walk_yield,
+ ast.YieldFrom : self._walk_yield,
+ ast.Expr : self._walk_skip_stmt,
+ ast.Import : self._walk_skip_stmt,
+ ast.ImportFrom : self._walk_post_stmt,
+ ast.With: self._walk_with,
+ ast.Match: self._walk_match,
+ ast.Case: self._walk_case,
+ ast.Repr : self._walk_expr_no_raise,
+ ast.Nonlocal : self._walk_stmt_only,
+ ast.Exec : self._walk_exec,
+ ast.AnnAssign : self._walk_ann_assign,
+ ast.TypeAlias : self._walk_stmt_only,
+ ast.TypeVar: self.skip,
+ ast.TypeVarTuple: self.skip,
+ ast.ParamSpec: self.skip,
+ ast.SpecialOperation: self._walk_expr_no_raise,
+ ast.Module : error,
+ ast.expr : error,
+ ast.stmt : error,
+ ast.cmpop : error,
+ ast.boolop : error,
+ ast.operator : error,
+ ast.expr_context : error,
+ ast.unaryop : error,
+ ast.AstBase : error,
+ }
+ for t in ast.__dict__.values():
+ if isinstance(t, type) and ast.AstBase in t.__mro__:
+ #Setup walkers
+ expr_walker = self._walk_expr
+ if t.__mro__[1] is ast.expr:
+ if t not in self._walkers:
+ self._walkers[t] = expr_walker
+ elif t.__mro__[1] in (ast.cmpop, ast.boolop, ast.operator,
+ ast.expr_context, ast.unaryop):
+ self._walkers[t] = self.skip
+ self._walkers[ast.TemplateDottedNotation] = self._walkers[ast.Attribute]
+
+ # Initialize walkers for patterns,
+ # These return both a tree and a list of nodes:
+ # - the tree represents the computation needed to evaluate whether the pattern matches,
+ # - the list of nodes represents the bindings resulting from a successful match.
+ self._pattern_walkers = {
+ ast.MatchAsPattern: self._walk_as_pattern,
+ ast.MatchOrPattern: self._walk_or_pattern,
+ ast.MatchLiteralPattern: self._walk_literal_pattern,
+ ast.MatchCapturePattern: self._walk_capture_pattern,
+ ast.MatchWildcardPattern: self._walk_wildcard_pattern,
+ ast.MatchValuePattern: self._walk_value_pattern,
+ ast.MatchSequencePattern: self._walk_sequence_pattern,
+ ast.MatchStarPattern: self._walk_star_pattern,
+ ast.MatchMappingPattern: self._walk_mapping_pattern,
+ ast.MatchDoubleStarPattern: self._walk_double_star_pattern,
+ ast.MatchKeyValuePattern: self._walk_key_value_pattern,
+ ast.MatchClassPattern: self._walk_class_pattern,
+ ast.MatchKeywordPattern: self._walk_keyword_pattern,
+ }
+
+ self.scope = None
+ self.in_try = 0
+ self.in_try_name = 0
+ self.split = split
+ self.prune = prune
+ self.unroll = unroll
+ self.logger = logger or Logger()
+ self.filename = "