mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
156 lines
4.3 KiB
Python
156 lines
4.3 KiB
Python
|
|
import unicodedata
|
|
from . import machine
|
|
|
|
class SuperState:
|
|
|
|
def __init__(self, name, mapping):
|
|
self.name = name
|
|
self.mapping = mapping
|
|
|
|
def as_list_of_bytes(self):
|
|
lst = dict_to_list(self.mapping)
|
|
return [ table.as_bytes() for table in lst ]
|
|
|
|
def as_list_of_transitions(self):
|
|
return dict_to_list(self.mapping)
|
|
|
|
action_id = 0
|
|
all_actions = {}
|
|
|
|
class ActionList:
|
|
|
|
def __init__(self, actions, id):
|
|
self.actions = actions
|
|
self.id = id
|
|
|
|
@staticmethod
|
|
def get(actions):
|
|
global action_id
|
|
assert isinstance(actions, tuple)
|
|
if actions not in all_actions:
|
|
all_actions[actions] = ActionList(actions, action_id)
|
|
action_id += 1
|
|
return all_actions[actions]
|
|
|
|
@staticmethod
|
|
def listall():
|
|
return sorted(all_actions.values(), key = lambda al: al.id)
|
|
|
|
next_pair_id = 0
|
|
pairs = {}
|
|
|
|
class StateActionListPair:
|
|
|
|
def __init__(self, state, actionlist, id):
|
|
self.state = state
|
|
self.actionlist = actionlist
|
|
self.id = id
|
|
|
|
@staticmethod
|
|
def get(state, actionlist):
|
|
global next_pair_id
|
|
if actionlist is not None and not isinstance(actionlist, ActionList):
|
|
actionlist = ActionList.get(actionlist)
|
|
if (state, actionlist) not in pairs:
|
|
pairs[(state, actionlist)] = StateActionListPair(state, actionlist, next_pair_id)
|
|
next_pair_id += 1
|
|
return pairs[(state, actionlist)]
|
|
|
|
@staticmethod
|
|
def listall():
|
|
return sorted(pairs.values(), key = lambda pair: pair.id)
|
|
|
|
next_table_id = 0
|
|
table_ids = {}
|
|
|
|
class StateTransitionTable:
|
|
|
|
def __init__(self, mapping):
|
|
self.mapping = mapping
|
|
|
|
def as_bytes(self):
|
|
lst = dict_to_list(self.mapping)
|
|
return bytes(pair.id for pair in lst)
|
|
|
|
def __getitem__(self, key):
|
|
return self.mapping[key]
|
|
|
|
@property
|
|
def id(self):
|
|
global next_table_id
|
|
b = self.as_bytes()
|
|
if not b in table_ids:
|
|
table_ids[b] = next_table_id
|
|
next_table_id += 1
|
|
return table_ids[b]
|
|
|
|
def dict_to_list(mapping):
|
|
assert isinstance(mapping, dict)
|
|
result = []
|
|
for key, value in mapping.items():
|
|
while key.id >= len(result):
|
|
result.append(None)
|
|
result[key.id] = value
|
|
return result
|
|
|
|
|
|
#Each character is one of id-start, id-continuation or other. Represent "other" as ERROR for all non-ascii characters.
|
|
#See https://www.python.org/dev/peps/pep-3131 for an explanation of what is an identifier.
|
|
OTHER_START = {0x1885, 0x1886, 0x2118, 0x212E, 0x309B, 0x309C}
|
|
OTHER_CONTINUE = {0x00B7, 0x0387, 0x19DA}
|
|
OTHER_CONTINUE.update(range(0x1369, 0x1372))
|
|
ID_CATEGORIES = {"Lu", "Ll", "Lt", "Lm", "Lo", "Nl"}
|
|
CONT_CATEGORIES = {"Mn", "Mc", "Nd", "Pc"}
|
|
|
|
CHUNK_SIZE = 64
|
|
|
|
class IdentifierTable:
|
|
|
|
def __init__(self):
|
|
classes = []
|
|
for i in range(0x110000):
|
|
try:
|
|
c = chr(i)
|
|
except:
|
|
continue
|
|
cat = unicodedata.category(c)
|
|
if cat in ID_CATEGORIES or i in OTHER_START:
|
|
cls = machine.IDENTIFIER_CLASS.id
|
|
elif cat in CONT_CATEGORIES or i in OTHER_CONTINUE:
|
|
cls = machine.IDENTIFIER_CONTINUE_CLASS.id
|
|
else:
|
|
cls = machine.ERROR_CLASS.id
|
|
assert cls in (0,1,2,3)
|
|
classes.append(cls)
|
|
result = []
|
|
for i, cls in enumerate(classes):
|
|
byte, bits = i>>2, cls<<((i&3)*2)
|
|
while byte >= len(result):
|
|
result.append(0)
|
|
result[byte] |= bits
|
|
while result[-1] == 0:
|
|
result.pop()
|
|
while len(result) % CHUNK_SIZE:
|
|
result.append(0)
|
|
self.table = result
|
|
|
|
def as_bytes(self):
|
|
return bytes(self.table)
|
|
|
|
def as_two_level_table(self):
|
|
index = []
|
|
chunks = {}
|
|
next_id = 0
|
|
the_bytes = self.as_bytes()
|
|
for n in range(0, len(the_bytes), CHUNK_SIZE):
|
|
chunk = the_bytes[n:n+CHUNK_SIZE]
|
|
if chunk in chunks:
|
|
index.append(chunks[chunk])
|
|
else:
|
|
index.append(next_id)
|
|
chunks[chunk] = next_id
|
|
next_id += 1
|
|
chunks = [ chunk for (i, chunk) in sorted((i, chunk) for chunk, i in chunks.items())]
|
|
return chunks, index
|