Files
codeql/java/kotlin-extractor/generate_dbscheme.py
Ian Lynagh bcbcd612a3 Kotlin: Improve the dbscheme generator
We now work out the supertype relationships based on the sets of leaf
types that are included, rather than simply following the hierarchy of
declarations. This means that we know about more supertype relationships
that exist, so there is less need to cast types.
2022-05-10 18:45:59 +01:00

181 lines
5.5 KiB
Python
Executable File

#!/usr/bin/env python3
import re
import sys
enums = {}
unions = {}
tables = {}
def parse_dbscheme(filename):
with open(filename, 'r') as f:
dbscheme = f.read()
# Remove comments
dbscheme = re.sub(r'/\*.*?\*/', '', dbscheme, flags=re.DOTALL)
dbscheme = re.sub(r'//[^\r\n]*/', '', dbscheme)
# kind enums
for name, kind, body in re.findall(r'case\s+@([^.\s]*)\.([^.\s]*)\s+of\b(.*?);',
dbscheme,
flags=re.DOTALL):
mapping = []
for num, typ in re.findall(r'(\d+)\s*=\s*@(\S+)', body):
mapping.append((int(num), typ))
enums[name] = (kind, mapping)
# unions
for name, rhs in re.findall(r'@(\w+)\s*=\s*(@\w+(?:\s*\|\s*@\w+)*)',
dbscheme,
flags=re.DOTALL):
typs = re.findall(r'@(\w+)', rhs)
unions[name] = typs
# tables
for relname, body in re.findall('\n([\w_]+)(\([^)]*\))',
dbscheme,
flags=re.DOTALL):
columns = list(re.findall('(\S+)\s*:\s*([^\s,]+)(?:\s+(ref)|)', body))
tables[relname] = columns
parse_dbscheme('../ql/lib/config/semmlecode.dbscheme')
type_aliases = {}
for alias, typs in unions.items():
if len(typs) == 1:
real = typs[0]
if real in type_aliases:
real = type_aliases[real]
type_aliases[alias] = real
def unalias(t):
return type_aliases.get(t, t)
type_leaf = set()
type_union = {}
for name, (kind, mapping) in enums.items():
s = set()
for num, typ in mapping:
s.add(typ)
type_leaf.add(typ)
type_union[name] = s
for name, typs in unions.items():
if name not in type_aliases:
type_union[name] = set(map(unalias, typs))
for relname, columns in tables.items():
for _, db_type, ref in columns:
if db_type[0] == '@' and ref == '':
db_type_name = db_type[1:]
if db_type_name not in enums:
type_leaf.add(db_type_name)
type_union_of_leaves = {}
def to_leaves(t):
if t not in type_union_of_leaves:
xs = type_union[t]
leaves = set()
for x in xs:
if x in type_leaf:
leaves.add(x)
else:
to_leaves(x)
leaves.update(type_union_of_leaves[x])
type_union_of_leaves[t] = leaves
for t in type_union:
to_leaves(t)
supertypes = {}
for t in type_leaf:
supers = set()
for sup, s in type_union_of_leaves.items():
if t in s:
supers.add(sup)
supertypes[t] = supers
for t, leaves in type_union_of_leaves.items():
supers = set()
for sup, s in type_union_of_leaves.items():
if t != sup and leaves.issubset(s):
supers.add(sup)
supertypes[t] = supers
def upperFirst(string):
return string[0].upper() + string[1:]
def genTable(kt, relname, columns, enum = None, kind = None, num = None, typ = None):
kt.write('fun TrapWriter.write' + upperFirst(relname))
if kind is not None:
kt.write('_' + typ)
kt.write('(')
for colname, db_type, _ in columns:
if colname != kind:
kt.write(colname + ': ')
if db_type == 'int':
kt.write('Int')
elif db_type == 'float':
kt.write('Double')
elif db_type == 'string':
kt.write('String')
elif db_type == 'date':
kt.write('String')
elif db_type == 'boolean':
kt.write('Boolean')
elif db_type[0] == '@':
label = db_type[1:]
if label == enum:
label = typ
kt.write('Label<out Db' + upperFirst(label) + '>')
else:
raise Exception('Bad db_type: ' + db_type)
kt.write(', ')
kt.write(') {\n')
kt.write(' this.writeTrap("' + relname + '(')
comma = ''
for colname, db_type, _ in columns:
kt.write(comma)
if colname == kind:
kt.write(str(num))
elif db_type == 'string' or db_type == 'date':
kt.write('\\"${escapeTrapString(' + colname + ')}\\"')
else:
# TODO: Any reformatting or escaping necessary?
# e.g. float formats?
kt.write('$' + colname)
comma = ', '
kt.write(')\\n")\n')
kt.write('}\n')
with open('src/main/kotlin/KotlinExtractorDbScheme.kt', 'w') as kt:
kt.write('/* Generated by ' + sys.argv[0] + ': Do not edit manually. */\n')
kt.write('package com.github.codeql\n')
for relname, columns in tables.items():
enum = None
for _, db_type, ref in columns:
if db_type[0] == '@' and ref == '':
db_type_name = db_type[1:]
if db_type_name in enums:
enum = db_type_name
if enum is None:
genTable(kt, relname, columns)
else:
(kind, mapping) = enums[enum]
for num, typ in mapping:
genTable(kt, relname, columns, enum, kind, num, typ)
for typ in sorted(supertypes):
kt.write('sealed interface Db' + upperFirst(typ))
# Sorting makes the output deterministic.
names = sorted(supertypes[typ])
if names:
kt.write(': ')
kt.write(', '.join(map(lambda name: 'Db' + upperFirst(name), names)))
kt.write('\n')
for alias in sorted(type_aliases):
kt.write('typealias Db' + upperFirst(alias) + ' = Db' + upperFirst(type_aliases[alias]) + '\n')