mirror of
https://github.com/github/codeql.git
synced 2025-12-17 01:03:14 +01:00
Move files to ruby subfolder
This commit is contained in:
90
ruby/scripts/merge_stats.py
Normal file
90
ruby/scripts/merge_stats.py
Normal file
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# This script merges a number of stats files to produce a single stats file.
|
||||
|
||||
import sys
|
||||
from lxml import etree
|
||||
import argparse
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--output', required=True, help="Path of the output file.")
|
||||
parser.add_argument('--normalise', required=True, help="Name of the relation to normalise the sizes on.")
|
||||
parser.add_argument('--unscaled-stats', default=[], action='append', help="A stats file which should not be normalised.")
|
||||
parser.add_argument('inputs', nargs='*', help="The other stats files")
|
||||
return parser.parse_args()
|
||||
|
||||
def die(msg):
|
||||
sys.stderr.write('Error: ' + msg + '\n')
|
||||
sys.exit(1)
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
inputs = args.inputs
|
||||
output = args.output
|
||||
normalise = args.normalise
|
||||
unscaled_stats = args.unscaled_stats
|
||||
|
||||
print("Merging %s into %s normalising on '%s'." % (', '.join(inputs), output, normalise))
|
||||
do_xml_files(output, inputs, unscaled_stats, normalise)
|
||||
|
||||
def read_sized_xml(xml_file, name):
|
||||
# Take the size of the named table as the size of the codebase
|
||||
xml = etree.parse(xml_file)
|
||||
ns = xml.xpath("stats/relation[name='%s']/cardinality" % name)
|
||||
if len(ns) == 0:
|
||||
die('Sized stats file ' + xml_file + ' does not have a cardinality for normalisation relation ' + name + '.')
|
||||
n = ns[0]
|
||||
size = int(n.text)
|
||||
return (xml, size)
|
||||
|
||||
def scale(xml, size, max_size):
|
||||
# Scale up the contents of all the <v> and <cardinality> tags
|
||||
for v in xml.xpath(".//v|.//cardinality"):
|
||||
v.text = str((int(v.text) * max_size) // size)
|
||||
|
||||
def do_xml_files(output, scaled_xml_files, unscaled_xml_files, name):
|
||||
# The result starts off empty
|
||||
result = etree.Element("dbstats")
|
||||
|
||||
# Scale all of the stats so that they might have come code bases of
|
||||
# the same size
|
||||
sized_xmls = [read_sized_xml(xml_file, name)
|
||||
for xml_file in scaled_xml_files]
|
||||
if sized_xmls != []:
|
||||
max_size = max([size for (xml, size) in sized_xmls])
|
||||
for (xml, size) in sized_xmls:
|
||||
scale(xml, size, max_size)
|
||||
unsized_xmls = list(map(etree.parse, unscaled_xml_files))
|
||||
xmls = [xml for (xml, size) in sized_xmls] + unsized_xmls
|
||||
|
||||
# Put all the stats in a single XML doc so that we can search them
|
||||
# more easily
|
||||
merged_xml = etree.Element("merged")
|
||||
for xml in xmls:
|
||||
merged_xml.append(xml.getroot())
|
||||
|
||||
# For each value of <e><k>, take the <e> tag with the biggest <e><v>
|
||||
typesizes = etree.SubElement(result, "typesizes")
|
||||
typenames = sorted(set ([ typesize.find("k").text for typesize in merged_xml.xpath("dbstats/typesizes/e")]))
|
||||
for typename in typenames:
|
||||
xs = merged_xml.xpath("dbstats/typesizes/e[k='" + typename + "']")
|
||||
sized_xs = [(int(x.find("v").text), x) for x in xs]
|
||||
(_, x) = max(sized_xs, key = lambda p: p[0])
|
||||
typesizes.append(x)
|
||||
|
||||
# For each value of <relation><name>, take the <relation> tag with
|
||||
# the biggest <relation><cardinality>
|
||||
stats = etree.SubElement(result, "stats")
|
||||
|
||||
relnames = sorted(set ([relation.find("name").text for relation in merged_xml.xpath("dbstats/stats/relation") ]))
|
||||
for relname in relnames:
|
||||
rels = merged_xml.xpath("dbstats/stats/relation[name='" + relname + "']")
|
||||
sized_rels = [(int(rel.find("cardinality").text), rel) for rel in rels]
|
||||
(_, rel) = max(sized_rels, key = lambda p: p[0])
|
||||
stats.append(rel)
|
||||
|
||||
with open(output, 'wb') as f:
|
||||
f.write(etree.tostring(result, pretty_print=True))
|
||||
|
||||
main()
|
||||
Reference in New Issue
Block a user