mirror of
https://github.com/github/codeql.git
synced 2025-12-19 02:13:17 +01:00
561 lines
15 KiB
Python
561 lines
15 KiB
Python
'''Meta nodes for defining database relations'''
|
|
|
|
from abc import abstractmethod
|
|
|
|
from semmle.util import fprintf
|
|
|
|
PREFIX = 'py_'
|
|
|
|
__all__ = [ 'order' ]
|
|
|
|
|
|
parent_nodes = {}
|
|
|
|
class Node(object):
|
|
'Node in the attribute tree, describing relations'
|
|
|
|
next_id = 0
|
|
|
|
def __init__(self):
|
|
Node.next_id += 1
|
|
self._index = Node.next_id
|
|
self._unique_parent = None
|
|
|
|
@property
|
|
def parents(self):
|
|
return parent_of(self)
|
|
|
|
def add_child(self, child):
|
|
child.add_parent(self)
|
|
|
|
def db_key(self, name):
|
|
return 'unique int ' + name + ' : ' + self.db_name()
|
|
|
|
def is_sub_type(self):
|
|
return False
|
|
|
|
@staticmethod
|
|
def is_union_type():
|
|
return False
|
|
|
|
def is_case_type(self):
|
|
return False
|
|
|
|
@staticmethod
|
|
def is_list():
|
|
return False
|
|
|
|
@staticmethod
|
|
def is_primitive():
|
|
return False
|
|
|
|
def prune(self, node_set):
|
|
return self
|
|
|
|
@abstractmethod
|
|
def child_offsets(self, n):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def write_fields(self, out):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def ql_name(self):
|
|
pass
|
|
|
|
@property
|
|
def unique_parent(self):
|
|
if self._unique_parent is None:
|
|
parents = self.parents
|
|
if len(parents.child_offsets(self)) < 2:
|
|
self._unique_parent = True
|
|
elif parents.is_union_type():
|
|
self._unique_parent = False
|
|
for t in parents.types:
|
|
if len(t.child_offsets(self)) > 1:
|
|
break
|
|
else:
|
|
self._unique_parent = True
|
|
return self._unique_parent
|
|
|
|
|
|
class PrimitiveNode(Node):
|
|
'A primitive node: int, str, etc'
|
|
|
|
def __init__(self, name, db_name, key, descriptive_name = None):
|
|
Node.__init__(self)
|
|
assert isinstance(name, str)
|
|
self.name = name
|
|
self.super_type = None
|
|
self.layout = []
|
|
self.fields = []
|
|
self.subclasses = set()
|
|
self._key = key
|
|
self._db_name = db_name
|
|
if descriptive_name is None:
|
|
self.descriptive_name = self.name
|
|
else:
|
|
self.descriptive_name = descriptive_name
|
|
|
|
def db_key(self, name):
|
|
return self._key + ' ' + name + ' : ' + self._db_name + ' ref'
|
|
|
|
@property
|
|
def __name__(self):
|
|
return self.name
|
|
|
|
def ql_name(self):
|
|
'Return Java style name if a schema type, otherwise the specified name'
|
|
if self._db_name[0] == '@':
|
|
return capitalize(self.name)
|
|
else:
|
|
return self._db_name
|
|
|
|
def relation_name(self):
|
|
return pluralize(PREFIX + self.name)
|
|
|
|
def db_name(self):
|
|
return self._db_name
|
|
|
|
def add_parent(self, p):
|
|
parent_nodes[self] = UnionNode.join(parent_of(self), p)
|
|
|
|
def fixup(self):
|
|
pass
|
|
|
|
@staticmethod
|
|
def is_primitive():
|
|
return True
|
|
|
|
def child_offsets(self, n):
|
|
return set()
|
|
|
|
def write_init(self, out):
|
|
fprintf(out, "%s = PrimitiveNode(%s, %s, %s)\n", self.name,
|
|
self.name, self._db_name, self._key)
|
|
|
|
def write_fields(self, out):
|
|
pass
|
|
|
|
|
|
def parent_of(node):
|
|
if node in parent_nodes:
|
|
return parent_nodes[node]
|
|
else:
|
|
return None
|
|
|
|
class ClassNode(Node):
|
|
'A node corresponding to a single AST type'
|
|
|
|
def __init__(self, name, super_type = None, descriptive_name = None):
|
|
Node.__init__(self)
|
|
assert isinstance(name, str)
|
|
self.name = name
|
|
self._db_name = name
|
|
self.super_type = super_type
|
|
self.layout = []
|
|
if super_type:
|
|
self.fields = list(super_type.fields)
|
|
else:
|
|
self.fields = []
|
|
self.subclasses = set()
|
|
if super_type:
|
|
super_type.subclasses.add(self)
|
|
if descriptive_name is None:
|
|
self.descriptive_name = self.name.lower()
|
|
else:
|
|
self.descriptive_name = descriptive_name
|
|
if self.descriptive_name[0] == '$':
|
|
self.descriptive_name = self.descriptive_name[1:]
|
|
elif super_type and ' ' not in self.descriptive_name:
|
|
self.descriptive_name += ' ' + super_type.descriptive_name
|
|
|
|
def field(self, name, field_type, descriptive_name = None, artificial=False, parser_type = None):
|
|
if descriptive_name is None:
|
|
self.fields.append((name, field_type, name, artificial, parser_type))
|
|
else:
|
|
self.fields.append((name, field_type, descriptive_name, artificial, parser_type))
|
|
|
|
def is_stmt_or_expr_subclass(self):
|
|
if self.super_type is None:
|
|
return False
|
|
return self.super_type.name in ('expr', 'stmt')
|
|
|
|
def is_sub_type(self):
|
|
if self.super_type is None:
|
|
return False
|
|
return self.super_type.is_case_type()
|
|
|
|
def is_case_type(self):
|
|
return (self.subclasses
|
|
and parent_of(self))
|
|
|
|
def fixup(self):
|
|
self.add_children()
|
|
self.compute_layout()
|
|
|
|
def add_parent(self, p):
|
|
parent_nodes[self] = UnionNode.join(parent_of(self), p)
|
|
if self.super_type:
|
|
self.super_type.add_parent(p)
|
|
|
|
def add_children(self):
|
|
for f, f_node, _, _, _ in self.fields:
|
|
self.add_child(f_node)
|
|
|
|
def compute_layout(self):
|
|
fields = self.fields
|
|
lists = 0
|
|
for f, f_node, _, _, _ in fields:
|
|
if (isinstance(f_node, ListNode) and
|
|
f_node.item_type.__name__ != 'stmt'):
|
|
lists += 1
|
|
index = 0
|
|
inc = 1
|
|
for f, f_node, docname, artificial, pt in fields:
|
|
self.layout.append((f, f_node, index, docname, artificial, pt))
|
|
index += inc
|
|
|
|
def relation_name(self):
|
|
return pluralize(PREFIX + self._db_name)
|
|
|
|
def set_name(self, name):
|
|
self._db_name = name
|
|
|
|
@property
|
|
def __name__(self):
|
|
return self.name
|
|
|
|
def ql_name(self):
|
|
if self._db_name == 'str':
|
|
return 'string'
|
|
elif self._db_name in ('int', 'float'):
|
|
return self.db_name
|
|
name = self._db_name
|
|
return ''.join(capitalize(part) for part in name.split('_'))
|
|
|
|
def db_name(self):
|
|
return '@' + PREFIX + self._db_name
|
|
|
|
def dump(self, out):
|
|
def yes_no(b):
|
|
return "yes" if b else "no"
|
|
fprintf(out, "'%s' :\n", self.name)
|
|
fprintf(out, " QL name: %s\n", self.ql_name())
|
|
fprintf(out, " Relation name: %s\n", self.relation_name())
|
|
fprintf(out, " Is case_type %s\n", yes_no(self.is_case_type()))
|
|
fprintf(out, " Super type: %s\n", self.super_type)
|
|
fprintf(out, " Layout:\n")
|
|
for l in self.layout:
|
|
fprintf(out, " %s, %s, %s, '%s, %s'\n" % l)
|
|
fprintf(out, " Parents: %s\n\n", parent_of(self))
|
|
|
|
def write_init(self, out):
|
|
if self.super_type:
|
|
fprintf(out, "%s = ClassNode('%s', %s)\n", self.name,
|
|
self.name, self.super_type.name)
|
|
else:
|
|
fprintf(out, "%s = ClassNode('%s')\n", self.name, self.name)
|
|
|
|
def write_fields(self, out):
|
|
for name, field_type, docname, _, _ in self.fields:
|
|
fprintf(out, "%s.field('%s', %s, '%s')\n", self.name,
|
|
name, field_type.__name__, docname)
|
|
if self.layout:
|
|
fprintf(out, "\n")
|
|
|
|
def __repr__(self):
|
|
return "Node('%s')" % self.name
|
|
|
|
def child_offsets(self, n):
|
|
#Only used by db-scheme generator, so can be slow
|
|
found = set()
|
|
for name, node, offset, _, artificial, _ in self.layout:
|
|
if node is n:
|
|
found.add(offset)
|
|
if self.subclasses:
|
|
for s in self.subclasses:
|
|
found.update(s.child_offsets(n))
|
|
return found
|
|
|
|
class ListNode(Node):
|
|
"Node corresponding to a list, parameterized by its member's type"
|
|
|
|
def __init__(self, item_node, name=None):
|
|
Node.__init__(self)
|
|
self.list_type = None
|
|
self.layout = ()
|
|
self.super_type = None
|
|
self.item_type = item_node
|
|
self.subclasses = ()
|
|
self.add_child(item_node)
|
|
self.name = name
|
|
|
|
def relation_name(self):
|
|
return pluralize(PREFIX + self.__name__)
|
|
|
|
def dump(self, out):
|
|
fprintf(out, "List of %s\n", self.name)
|
|
fprintf(out, " Parents: %s\n\n", parent_of(self))
|
|
|
|
def write_init(self, out):
|
|
fprintf(out, "%s = ListNode(%s)\n",
|
|
self.__name__, self.item_type.__name__)
|
|
|
|
def write_fields(self, out):
|
|
pass
|
|
|
|
@staticmethod
|
|
def is_list():
|
|
return True
|
|
|
|
@property
|
|
def __name__(self):
|
|
if self.name is None:
|
|
assert isinstance(self.item_type.__name__, str)
|
|
return self.item_type.__name__ + '_list'
|
|
else:
|
|
return self.name
|
|
|
|
@property
|
|
def descriptive_name(self):
|
|
return self.item_type.descriptive_name + ' list'
|
|
|
|
def db_name(self):
|
|
return '@' + PREFIX + self.__name__
|
|
|
|
def ql_name(self):
|
|
if self.name is not None:
|
|
return capitalize(self.name)
|
|
if self.item_type is str:
|
|
return 'StringList'
|
|
elif self.item_type is int:
|
|
return 'IntList'
|
|
elif self.item_type is float:
|
|
return 'FloatList'
|
|
return capitalize(self.item_type.ql_name()) + 'List'
|
|
|
|
def __repr__(self):
|
|
return "ListNode(%s)" % self.__name__
|
|
|
|
def fixup(self):
|
|
pass
|
|
|
|
def add_parent(self, p):
|
|
parent_nodes[self] = UnionNode.join(parent_of(self), p)
|
|
|
|
def child_offsets(self, n):
|
|
return set((0,1,2,3))
|
|
|
|
_all_unions = {}
|
|
|
|
class UnionNode(Node):
|
|
'Node representing a set of AST types'
|
|
|
|
def __init__(self, *types):
|
|
Node.__init__(self)
|
|
assert len(types) > 1
|
|
self.types = frozenset(types)
|
|
self.name = None
|
|
self.super_type = None
|
|
self.layout = []
|
|
self.subclasses = ()
|
|
#Whether this node should be visited in auto-generated extractor.
|
|
self.visit = False
|
|
|
|
@staticmethod
|
|
def join(t1, t2):
|
|
if t1 is None:
|
|
return t2
|
|
if t2 is None:
|
|
return t1
|
|
if isinstance(t1, UnionNode):
|
|
all_types = set(t1.types)
|
|
else:
|
|
all_types = set([t1])
|
|
if isinstance(t2, UnionNode):
|
|
all_types = all_types.union(t2.types)
|
|
else:
|
|
all_types.add(t2)
|
|
done = False
|
|
while not done:
|
|
for n in all_types:
|
|
if n.super_type in all_types:
|
|
all_types.remove(n)
|
|
break
|
|
else:
|
|
done = True
|
|
return UnionNode._make_union(all_types)
|
|
|
|
@staticmethod
|
|
def _make_union(all_types):
|
|
if len(all_types) == 1:
|
|
return next(iter(all_types))
|
|
else:
|
|
key = frozenset(all_types)
|
|
if key in _all_unions:
|
|
u = _all_unions[key]
|
|
else:
|
|
u = UnionNode(*all_types)
|
|
_all_unions[key] = u
|
|
return u
|
|
|
|
def set_name(self, name):
|
|
self.name = name
|
|
|
|
@staticmethod
|
|
def is_union_type():
|
|
return True
|
|
|
|
def write_init(self, out):
|
|
fprintf(out, "%s = UnionNode(%s)\n", self.__name__,
|
|
', '.join(t.__name__ for t in self.types))
|
|
if self.name:
|
|
fprintf(out, "%s.setname('%s')\n", self.name, self.name)
|
|
|
|
def write_fields(self, out):
|
|
pass
|
|
|
|
def fixup(self):
|
|
pass
|
|
|
|
def __hash__(self):
|
|
return hash(self.types)
|
|
|
|
def __eq__(self, other):
|
|
assert len(self.types) > 1
|
|
if isinstance(other, UnionNode):
|
|
return self.types == other.types
|
|
else:
|
|
return False
|
|
|
|
def __ne__(self, other):
|
|
return not self.__eq__(other)
|
|
|
|
@property
|
|
def __name__(self):
|
|
if self.name is None:
|
|
names = [ n.__name__ for n in self.types ]
|
|
return '_or_'.join(sorted(names))
|
|
else:
|
|
return self.name
|
|
|
|
@property
|
|
def descriptive_name(self):
|
|
if self.name is None:
|
|
names = [ n.descriptive_name for n in self.types ]
|
|
return '_or_'.join(sorted(names))
|
|
else:
|
|
return self.name
|
|
|
|
def db_name(self):
|
|
return '@' + PREFIX + self.__name__
|
|
|
|
def relation_name(self):
|
|
return pluralize(PREFIX + self.__name__)
|
|
|
|
def ql_name(self):
|
|
if self.name is None:
|
|
assert len(self.types) > 1
|
|
names = [ n.ql_name() for n in self.types ]
|
|
return 'Or'.join(sorted(names))
|
|
else:
|
|
|
|
return ''.join(capitalize(part) for part in self.name.split('_'))
|
|
|
|
def add_parent(self, p):
|
|
for n in self.types:
|
|
n.add_parent(p)
|
|
|
|
def child_offsets(self, n):
|
|
res = set()
|
|
for t in self.types:
|
|
res = res.union(t.child_offsets(n))
|
|
return res
|
|
|
|
def prune(self, node_set):
|
|
new_set = self.types.intersection(node_set)
|
|
if len(new_set) == len(self.types):
|
|
return self
|
|
if not new_set:
|
|
return None
|
|
return UnionNode._make_union(new_set)
|
|
|
|
def shorten_name(node):
|
|
p = parent_of(node)
|
|
if (isinstance(p, UnionNode) and len(p.__name__) > 16
|
|
and len(p.__name__) > len(node.__name__) + 4):
|
|
p.set_name(node.__name__ + '_parent')
|
|
|
|
|
|
def build_node_relations(nodes):
|
|
nodes = set(nodes)
|
|
for node in nodes:
|
|
node.fixup()
|
|
for node in sorted(nodes, key=lambda n : n.__name__):
|
|
shorten_name(node)
|
|
node_set = set(nodes)
|
|
for node in (str, int, float, bytes):
|
|
p = parent_of(node)
|
|
if p is not None:
|
|
node_set.add(p)
|
|
for node in nodes:
|
|
p = parent_of(node)
|
|
if p is not None:
|
|
node_set.add(p)
|
|
for n in nodes:
|
|
sub_types = sorted(n.subclasses, key = lambda x : x._index)
|
|
if n.is_case_type():
|
|
for index, item in enumerate(sub_types):
|
|
item.index = index
|
|
for n in list(nodes):
|
|
if not n.parents and n.is_list() and n.name is None:
|
|
#Discard lists with no parents and no name as unreachable
|
|
node_set.remove(n)
|
|
#Prune unused nodes from unions.
|
|
node_set = set(node.prune(node_set) for node in node_set)
|
|
for node in node_set:
|
|
if node in parent_nodes:
|
|
parent_nodes[node] = parent_nodes[node].prune(node_set)
|
|
for node in node_set:
|
|
shorten_name(node)
|
|
result_nodes = {}
|
|
for n in node_set:
|
|
if n:
|
|
result_nodes[n.__name__] = n
|
|
return result_nodes
|
|
|
|
def pluralize(name):
|
|
if name[-1] == 's':
|
|
if name[-2] in 'aiuos':
|
|
return name + 'es'
|
|
else:
|
|
#Already plural
|
|
return name
|
|
elif name.endswith('ex'):
|
|
return name[:-2] + 'ices'
|
|
elif name.endswith('y'):
|
|
return name[:-1] + 'ies'
|
|
else:
|
|
return name + 's'
|
|
|
|
def capitalize(name):
|
|
'Unlike the str method capitalize(), leave upper case letters alone'
|
|
return name[0].upper() + name[1:]
|
|
|
|
def order(node):
|
|
if node.is_primitive():
|
|
return 0
|
|
if isinstance(node, ClassNode):
|
|
res = 1
|
|
while node.super_type:
|
|
node = node.super_type
|
|
res += 1
|
|
return res
|
|
if isinstance(node, ListNode):
|
|
return order(node.item_type) + 1
|
|
else:
|
|
assert isinstance(node, UnionNode)
|
|
return max(order(t) for t in node.types)+1
|