mirror of
https://github.com/github/codeql.git
synced 2026-04-26 09:15:12 +02:00
Python: Add ability to split and join autogenerated yml files
Verified by joining all files, splitting again, and observing no diff in git. (these operations only take a few seconds on my local machine, so shouldn't be too much of an issue)
This commit is contained in:
30
python/ql/src/meta/ClassHierarchy/join-yml-files.py
Normal file
30
python/ql/src/meta/ClassHierarchy/join-yml-files.py
Normal file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""Concerns were raised about performance on Windows with having 2.5 k files for modeling, and it was recommended we join them all together when shipping.
|
||||
|
||||
This script does that.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import glob
|
||||
import os
|
||||
|
||||
from shared_subclass_functions import *
|
||||
|
||||
if joined_file.exists():
|
||||
sys.exit(f"File {joined_file} already exists")
|
||||
|
||||
package_data = gather_from_existing()
|
||||
as_lists = list()
|
||||
for data in package_data.values():
|
||||
as_lists.extend(list(t) for t in data)
|
||||
as_lists.sort()
|
||||
|
||||
|
||||
to_write = wrap_in_template(as_lists)
|
||||
write_data(to_write, joined_file)
|
||||
|
||||
print("Joined all files into", joined_file)
|
||||
|
||||
for f in glob.glob(f"{subclass_capture_path}/auto-*.model.yml", recursive=True):
|
||||
os.unlink(f)
|
||||
@@ -2,22 +2,17 @@
|
||||
|
||||
import sys
|
||||
import glob
|
||||
from pathlib import Path
|
||||
import json
|
||||
import subprocess
|
||||
from collections import defaultdict
|
||||
import yaml
|
||||
import shutil
|
||||
import os
|
||||
import re
|
||||
|
||||
VERSION = "process-mrva-results 0.0.1"
|
||||
|
||||
mad_path = Path(__file__).parent.parent.parent.parent / "lib/semmle/python/frameworks/data/internal/"
|
||||
from shared_subclass_functions import *
|
||||
|
||||
assert mad_path.exists(), mad_path
|
||||
|
||||
package_data = defaultdict(set)
|
||||
|
||||
|
||||
# process data
|
||||
|
||||
@@ -55,34 +50,9 @@ class CodeQL:
|
||||
return res.decode('utf-8')
|
||||
res += b
|
||||
|
||||
def wrap_in_template(data):
|
||||
return {
|
||||
"extensions": [
|
||||
{
|
||||
"addsTo": {
|
||||
"pack": "codeql/python-all",
|
||||
"extensible": "typeModel",
|
||||
},
|
||||
"data": data,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
def parse_from_file(path: Path) -> set:
|
||||
if not path.exists():
|
||||
return set()
|
||||
|
||||
f = path.open("r")
|
||||
assert f.readline().startswith(f"# {VERSION}\n"), path
|
||||
|
||||
raw_data = yaml.load(f, Loader=yaml.CBaseLoader)
|
||||
assert len(raw_data["extensions"]) == 1, path
|
||||
assert raw_data["extensions"][0]["addsTo"]["extensible"] == "typeModel", path
|
||||
|
||||
return set(tuple(x) for x in raw_data["extensions"][0]["data"])
|
||||
|
||||
|
||||
def gather_from_bqrs_results():
|
||||
package_data = defaultdict(set)
|
||||
with CodeQL() as codeql:
|
||||
if os.path.exists(sys.argv[1]) and not os.path.isdir(sys.argv[1]) and sys.argv[1].endswith(".bqrs"):
|
||||
files = [sys.argv[1]]
|
||||
@@ -98,34 +68,12 @@ def gather_from_bqrs_results():
|
||||
for t in select["#select"]["tuples"]:
|
||||
pkg = t[1]
|
||||
package_data[pkg].add(tuple(t))
|
||||
return package_data
|
||||
|
||||
def gather_from_existing():
|
||||
for f in glob.glob(f"{mad_path}/subclass-capture/auto-*.model.yml", recursive=True):
|
||||
print(f"Processing {f}")
|
||||
|
||||
all_data = parse_from_file(Path(f))
|
||||
pkg = f.split("/")[-1].split(".")[0][5:]
|
||||
package_data[pkg].update(all_data)
|
||||
if __name__ == "__main__":
|
||||
if joined_file.exists():
|
||||
sys.exit(f"File {joined_file} exists, you should split it up first")
|
||||
|
||||
gather_from_bqrs_results()
|
||||
|
||||
for pkg in package_data:
|
||||
if not re.match(r"[a-zA-Z0-9-_]+", pkg):
|
||||
print(f"Skipping {repr(pkg)}")
|
||||
continue
|
||||
|
||||
pkg_path = mad_path / "subclass-capture" / f"auto-{pkg}.model.yml"
|
||||
|
||||
print(f"Writing {pkg_path}")
|
||||
|
||||
all_data = parse_from_file(pkg_path)
|
||||
all_data.update(package_data[pkg])
|
||||
|
||||
as_lists = [list(t) for t in all_data]
|
||||
as_lists.sort()
|
||||
|
||||
data_for_yaml = wrap_in_template(as_lists)
|
||||
|
||||
f = pkg_path.open("w+")
|
||||
f.write(f"# {VERSION}\n")
|
||||
yaml.dump(data_for_yaml, indent=2, stream=f, Dumper=yaml.CDumper)
|
||||
package_data = gather_from_bqrs_results()
|
||||
write_all_package_data_to_files(package_data)
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
from typing import Dict
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
import glob
|
||||
from collections import defaultdict
|
||||
import re
|
||||
|
||||
VERSION = "process-mrva-results 0.0.1"
|
||||
|
||||
mad_path = Path(__file__).parent.parent.parent.parent / "lib/semmle/python/frameworks/data/internal/"
|
||||
|
||||
subclass_capture_path = mad_path / "subclass-capture"
|
||||
|
||||
joined_file = subclass_capture_path / "ALL.model.yml"
|
||||
|
||||
def parse_from_file(path: Path) -> set:
|
||||
if not path.exists():
|
||||
return set()
|
||||
|
||||
f = path.open("r")
|
||||
assert f.readline().startswith(f"# {VERSION}\n"), path
|
||||
|
||||
raw_data = yaml.load(f, Loader=yaml.CBaseLoader)
|
||||
assert len(raw_data["extensions"]) == 1, path
|
||||
assert raw_data["extensions"][0]["addsTo"]["extensible"] == "typeModel", path
|
||||
|
||||
return set(tuple(x) for x in raw_data["extensions"][0]["data"])
|
||||
|
||||
|
||||
def wrap_in_template(data):
|
||||
return {
|
||||
"extensions": [
|
||||
{
|
||||
"addsTo": {
|
||||
"pack": "codeql/python-all",
|
||||
"extensible": "typeModel",
|
||||
},
|
||||
"data": data,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def write_data(data, path: Path):
|
||||
f = path.open("w+")
|
||||
f.write(f"# {VERSION}\n")
|
||||
yaml.dump(data, indent=2, stream=f, Dumper=yaml.CDumper)
|
||||
|
||||
|
||||
def gather_from_existing():
|
||||
package_data = defaultdict(set)
|
||||
for f in glob.glob(f"{subclass_capture_path}/auto-*.model.yml", recursive=True):
|
||||
print(f"Processing {f}")
|
||||
|
||||
all_data = parse_from_file(Path(f))
|
||||
pkg = f.split("/")[-1].split(".")[0][5:]
|
||||
package_data[pkg].update(all_data)
|
||||
return package_data
|
||||
|
||||
|
||||
def write_all_package_data_to_files(package_data: Dict[str, set]):
|
||||
for pkg in package_data:
|
||||
if not re.match(r"[a-zA-Z0-9-_]+", pkg):
|
||||
print(f"Skipping {repr(pkg)}")
|
||||
continue
|
||||
|
||||
pkg_path = subclass_capture_path / f"auto-{pkg}.model.yml"
|
||||
|
||||
print(f"Writing {pkg_path}")
|
||||
|
||||
all_data = parse_from_file(pkg_path)
|
||||
all_data.update(package_data[pkg])
|
||||
|
||||
as_lists = [list(t) for t in all_data]
|
||||
as_lists.sort()
|
||||
|
||||
data_for_yaml = wrap_in_template(as_lists)
|
||||
|
||||
write_data(data_for_yaml, pkg_path)
|
||||
22
python/ql/src/meta/ClassHierarchy/split-yml-files.py
Normal file
22
python/ql/src/meta/ClassHierarchy/split-yml-files.py
Normal file
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""Concerns were raised about performance on Windows with having 2.5 k files for modeling, and it was recommended we join them all together when shipping.
|
||||
|
||||
This script does the opposite, so it's easier to work with locally.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
|
||||
from shared_subclass_functions import *
|
||||
|
||||
if not joined_file.exists():
|
||||
sys.exit(f"File {joined_file} does not exists")
|
||||
|
||||
all_data = parse_from_file(joined_file)
|
||||
package_data = defaultdict(set)
|
||||
for t in all_data:
|
||||
package_data[t[1]].add(t)
|
||||
write_all_package_data_to_files(package_data)
|
||||
|
||||
joined_file.unlink()
|
||||
Reference in New Issue
Block a user