""" dbscheme format representation """ import logging import re from dataclasses import dataclass from typing import ClassVar, List log = logging.getLogger(__name__) dbscheme_keywords = {"case", "boolean", "int", "string", "type"} @dataclass class Column: schema_name: str type: str binding: bool = False first: bool = False @property def name(self): if self.schema_name in dbscheme_keywords: return self.schema_name + "_" return self.schema_name @property def lhstype(self): if self.type[0] == "@": return "unique int" if self.binding else "int" return self.type @property def rhstype(self): if self.type[0] == "@" and self.binding: return self.type return self.type + " ref" @dataclass class KeySetId: id: str first: bool = False @dataclass class KeySet: ids: List[KeySetId] def __post_init__(self): assert self.ids self.ids = [KeySetId(x) for x in self.ids] self.ids[0].first = True class Decl: is_table = False is_union = False @dataclass class Table(Decl): is_table: ClassVar = True name: str columns: List[Column] keyset: KeySet = None def __post_init__(self): if self.columns: self.columns[0].first = True @dataclass class UnionCase: type: str first: bool = False @dataclass class Union(Decl): is_union: ClassVar = True lhs: str rhs: List[UnionCase] def __post_init__(self): assert self.rhs self.rhs = [UnionCase(x) for x in self.rhs] self.rhs.sort(key=lambda c: c.type) self.rhs[0].first = True @dataclass class SchemeInclude: src: str data: str @dataclass class Scheme: template: ClassVar = 'dbscheme' src: str includes: List[SchemeInclude] declarations: List[Decl] class Re: entity = re.compile( "(?m)" r"(?:^#keyset\[(?P[\w\s,]+)\][\s\n]*)?^(?P\w+)\((?P[^\)]*)\);?" "|" r"^(?P@\w+)\s*=\s*(?P@\w+(?:\s*\|\s*@\w+)*)\s*;?" ) field = re.compile(r"(?m)[\w\s]*\s(?P\w+)\s*:\s*(?P@?\w+)(?P\s+ref)?") key = re.compile(r"@\w+") comment = re.compile(r"(?m)(?s)/\*.*?\*/|//[^\n]*$") def get_column(match): return Column( schema_name=match["field"].rstrip("_"), type=match["type"], binding=not match["ref"], ) def get_table(match): keyset = None if match["tablekeys"]: keyset = KeySet(k.strip() for k in match["tablekeys"].split(",")) return Table( name=match["table"], columns=[get_column(f) for f in Re.field.finditer(match["tablebody"])], keyset=keyset, ) def get_union(match): return Union( lhs=match["union"], rhs=(d[0] for d in Re.key.finditer(match["unionbody"])), ) def iterload(file): with open(file) as file: data = Re.comment.sub("", file.read()) for e in Re.entity.finditer(data): if e["table"]: yield get_table(e) elif e["union"]: yield get_union(e)