Files

486 lines
14 KiB
Python

import ast
from .parser import parse
from collections import defaultdict
from .compiled import SuperState, StateTransitionTable, StateActionListPair
class Transition:
def __init__(self, from_state, to_state, what, do):
assert isinstance(from_state, State)
assert isinstance(to_state, State)
self.from_state = from_state
self.what = what
if not do:
do = None
else:
assert isinstance(do, list)
for item in do:
assert isinstance(item, Action)
do = tuple(do)
self.action = StateActionListPair.get(to_state, do)
def dump(self):
if self.action.actionlist:
return "%s -> %s for %s do %s" % (
self.from_state,
self.action.state,
self.what,
"; ".join(str(do) for do in self.action.actionlist.actions)
)
else:
return "%s -> %s for %s" % (
self.from_state,
self.action.state,
self.what
)
next_state_id = 1
states = {}
class State:
def __init__(self, name):
global next_state_id
if name.isdigit():
assert name == "0"
self.id = 0
self.name = "START"
else:
self.name = name
self.id = next_state_id
next_state_id += 1
@staticmethod
def get(name):
if name not in states:
states[name] = State(name)
return states[name]
@staticmethod
def count():
return len(states)
def __repr__(self):
return "state_%s(%s)" % (self.id, self.name)
@staticmethod
def from_id(id):
for state in states.values():
if state.id == id:
return state
raise ValueError(id)
State.get("0")
ERROR_ACTION = StateActionListPair.get(State.get("error"), None)
next_super_state_id = 0
super_states = {}
class TransitionTable:
def __init__(self, name):
global next_super_state_id
self.name = name
self.id = next_super_state_id
next_super_state_id += 1
self.parent = None
self.transitions = []
self._table = None
def add_transition(self, trans):
self.transitions.append(trans)
def dump(self):
if self.parent:
lines = [ "TransitionTable %s(%s extends %s)" % (self.id, self.name, self.parent.name) ]
else:
lines = [ "TransitionTable %s(%s):" % (self.id, self.name) ]
lines.extend(" " + t.dump() for t in self.transitions)
return "\n".join(lines)
@staticmethod
def get(name, parent=None):
if name not in super_states:
super_states[name] = TransitionTable(name)
return super_states[name]
@staticmethod
def count():
return len(super_states)
def get_table(self, character_classes):
'''Returns the transition table for all states in this super-state'''
if self._table is None:
from_transtions = defaultdict(list)
for t in self.transitions:
from_transtions[t.from_state].append(t)
self._table = { state: self.get_transition_table(state, from_transtions.get(state, ()), character_classes) for state in states.values() }
return self._table
def get_transition_table(self, state, transition_list, character_classes):
table = {}
if self.parent:
parent_table = self.parent.get_table(character_classes)
else:
parent_table = None
default = None
for t in transition_list:
assert state == t.from_state
if isinstance(t.what, Any):
default = t.action
continue
action = t.action
classes = set(character_classes[c] for c in t.what)
for cls in classes:
if cls in table:
raise ValueError("Duplicate transition from %s on %s" % (state, cls))
else:
table[cls] = action
on_identifier = table.get(IDENTIFIER_CLASS, None)
for cls in character_classes.values():
if cls in table:
continue
if on_identifier and cls.is_identifier:
table[cls] = on_identifier
elif default:
table[cls] = default
elif parent_table and state in parent_table:
table[cls] = parent_table[state][cls]
else:
table[cls] = ERROR_ACTION
return StateTransitionTable(table)
def compile(self, character_classes):
return SuperState(self.name, self.get_table(character_classes))
class Any:
def __repr__(self):
return "*"
class Action:
def __repr__(self):
return self.__class__.__name__.lower()
class Emit(Action):
def __init__(self, kind, text):
assert isinstance(kind, str)
assert kind.upper() == kind
self.kind = kind
self.text = text
def __repr__(self):
if self.text is None:
return "emit(" + self.kind + ")"
else:
return "emit(%s, %r)" % (self.kind, self.text)
def __eq__(self, other):
return type(other) is Emit and other.kind == self.kind and other.text == self.text
def __hash__(self):
return 353 ^ hash(self.kind) ^ hash(self.text)
class Push(Action):
def __init__(self, state):
assert isinstance(state, TransitionTable)
self.state = state
def __repr__(self):
return "push(%s)" % self.state.name
def __eq__(self, other):
return type(other) is Push and other.state == self.state
def __hash__(self):
return 59 ^ hash(self.state)
class EmitIndent(Action):
pass
EMIT_INDENT = EmitIndent()
class Pop(Action):
pass
POP = Pop()
class Pushback(Action):
pass
PUSHBACK = Pushback()
class Mark(Action):
pass
MARK = Mark()
class Newline(Action):
pass
NEWLINE = Newline()
class Identifier:
def __repr__(self):
return "UnicodeIdentifiers()"
IDENTIFIER = Identifier()
class IdentifierContinue:
def __repr__(self):
return "IdentifierContinue()"
IDENTIFIER_CONTINUE = IdentifierContinue()
next_char_class_id = 0
class CharacterClass:
def __init__(self, chars, is_identifier = None):
global next_char_class_id
self.chars = chars
self.id = next_char_class_id
next_char_class_id += 1
if is_identifier is None:
self.is_identifier = chars.copy().pop().isidentifier()
else:
self.is_identifier = is_identifier
def __repr__(self):
if self == IDENTIFIER_CLASS:
return "IDENTIFIER_CLASS(%d)" % self.id
elif self == ERROR_CLASS:
return "ERROR_CLASS(%d)" % self.id
else:
return "CharacterClass %s %r" % (self.id, sorted(self.chars))
ERROR_CLASS = CharacterClass(set(), False)
assert ERROR_CLASS.id == 0
IDENTIFIER_CLASS = CharacterClass(set(), True)
IDENTIFIER_CONTINUE_CLASS = CharacterClass(set(), False)
class Machine:
def __init__(self):
self.aliases = {}
self.states = {}
self.aliases["IDENTIFIER"] = IDENTIFIER
self.aliases["IDENTIFIER_CONTINUE"] = IDENTIFIER_CONTINUE
self.aliases['SPACE'] = {' '}
self.start = None
def add_state(self, name):
assert name not in self.states
result = TransitionTable.get(name)
self.states[name] = result
return result
def add_alias(self, name, choices):
assert name not in self.aliases
assert isinstance(choices, set), choices
self.aliases[name] = choices
def dump(self):
r = []
a = r.append
a("Starting super-state: %s" % self.start.name)
a("")
a("Aliases:")
for name_alias in self.aliases.items():
a(" %s = %r" % name_alias)
a("")
for name, state in self.states.items():
a(state.dump())
return "\n".join(r)
@staticmethod
def load(src):
tree = parse(src)
m = Machine()
w = Walker(m)
w.visit(tree)
return m
def get_classes(self):
'''Get the character classes for this machine'''
#There are two predefined classes: Unicode identifiers, and ERROR.
#A character class is a set of characters, such that the transitions
#and actions of the machine are identical for all characters in that class.
char_to_transitions = defaultdict(set)
for s in self.states.values():
for t in s.transitions:
w = t.what
if isinstance(w, Any):
continue
for c in w:
if c is IDENTIFIER or c is IDENTIFIER_CONTINUE:
continue
char_to_transitions[c].add((s, t.from_state, t.action))
equivalence_sets = defaultdict(set)
for c, transition_set in sorted(char_to_transitions.items()):
equivalence_sets[frozenset(transition_set)].add(c)
classes = {}
for char_set in sorted(equivalence_sets.values()):
charcls = CharacterClass(char_set)
for c in char_set:
classes[c] = charcls
classes[IDENTIFIER] = IDENTIFIER_CLASS
classes[IDENTIFIER_CONTINUE] = IDENTIFIER_CONTINUE_CLASS
for i in range(128):
c = chr(i)
if c not in classes:
if c.isidentifier():
classes[c] = IDENTIFIER_CLASS
elif c in "0123456789":
classes[c] = IDENTIFIER_CONTINUE_CLASS
else:
classes[c] = ERROR_CLASS
for cls in classes.values():
if cls is IDENTIFIER_CLASS or cls is IDENTIFIER_CONTINUE_CLASS or cls is ERROR_CLASS:
continue
assert { c for c in cls.chars if c.isidentifier() } == cls.chars or not { c for c in cls.chars if c.isidentifier() }
return classes
class Walker:
def __init__(self, machine):
self.machine = machine
def visit(self, node):
if hasattr(node, "type"):
tag = node.type
else:
tag = node.data
meth = getattr(self, "visit_" + tag, None)
if meth is None:
self.fail(node, tag)
else:
return meth(node)
def fail(self, node, tag):
print(node)
raise NotImplementedError(tag)
def visit_first_child(self, node):
assert len(node.children) == 1
return self.visit(node.children[0])
def visit_children(self, node):
return [ self.visit(child) for child in node.children ]
visit_start = visit_first_child
visit_machine = visit_children
visit_declaration = visit_first_child
def visit_alias_decl(self, node):
assert len(node.children) == 2
choice = self.visit(node.children[1])
self.machine.add_alias(node.children[0].value, choice)
def visit_alias(self, node):
return self.machine.aliases[node.children[0].value]
def visit_char(self, node):
c = ast.literal_eval(node.children[0].value)
assert isinstance(c, str), c
assert len(c) == 1, c
return c
def visit_choice(self, node):
#Convert choices into a set of characters
result = set()
for child in node.children:
item = self.visit(child)
if isinstance(item, set):
result.update(item)
else:
result.add(item)
return result
visit_item = visit_first_child
def visit_table_decl(self, node):
self.current_state = self.visit(node.children[0])
for transition in node.children[1:]:
self.visit(transition)
def visit_table_header(self, node):
name = node.children[0].value
state = self.machine.add_state(name)
if len(node.children) > 1:
base = TransitionTable.get(node.children[1].value)
state.parent = base
return state
def visit_transition(self, node):
# state_choice "->" state "for" (choice | "*") action_list?
from_states = self.visit(node.children[0])
to_state = self.visit(node.children[1])
what = self.visit(node.children[2])
if len(node.children) > 3:
do = self.visit(node.children[3])
else:
do = []
for state in from_states:
trans = Transition(state, to_state, what, do)
self.current_state.add_transition(trans)
visit_state_choice = visit_children
def visit_state(self, node):
return State.get(node.children[0].value)
def visit_any(self, node):
return Any()
visit_action_list = visit_children
visit_action = visit_first_child
def visit_emit(self, node):
if len(node.children) == 2:
return Emit(node.children[0].value, self.visit(node.children[1]))
else:
return Emit(node.children[0].value, None)
def visit_optional_text(self, node):
return node.children[0].value
def visit_push(self, node):
state = TransitionTable.get(node.children[0].value)
return Push(state)
def visit_emit_indent(self, node):
return EMIT_INDENT
def visit_pushback(self, node):
return PUSHBACK
def visit_pop(self, node):
return POP
def visit_mark(self, node):
return MARK
def visit_newline(self, node):
return NEWLINE
def visit_start_decl(self, node):
self.machine.start = TransitionTable.get(node.children[0].value)
def main():
import sys
file = sys.argv[1]
with open(file) as fd:
tree = parse(fd.read())
m = Machine()
w = Walker(m)
w.visit(tree)
print(m.dump())
if __name__ == "__main__":
main()