Move sync-identical-files.py into public repo as sync-files.py

We currently use a script to keep certain duplicate QL files in sync across the repo. For historical reasons, this script has lived in the private repo alongside the rest of CodeQL, even though it's only used for files in the public `ql` repo. This PR moves the script into the public `ql` repo. It is still invoked by Jenkins scripts that live in the private repo during CI, but it can also be invoked directly without having a checkout of the private repo. This is useful for anyone who is modifying the dataflow or IR libraries with only a QL checkout.
This commit is contained in:
Dave Bartolomeo
2020-03-29 02:59:14 -04:00
parent 1baf5df342
commit 0952064eb3
2 changed files with 143 additions and 0 deletions

3
.gitignore vendored
View File

@@ -14,6 +14,9 @@
.vs/*
!.vs/VSWorkspaceSettings.json
# Byte-compiled python files
*.pyc
# It's useful (though not required) to be able to unpack codeql in the ql checkout itself
/codeql/
.vscode/settings.json

140
config/sync-files.py Normal file
View File

@@ -0,0 +1,140 @@
#!/usr/bin/env python3
# Due to various technical limitations, we sometimes have files that need to be
# kept identical in the repository. This script loads a database of such
# files and can perform two functions: check whether they are still identical,
# and overwrite the others with a master copy if needed.
import hashlib
import shutil
import os
import sys
import json
import re
from os import path
file_groups = {}
def add_prefix(prefix, relative):
result = path.join(prefix, relative)
if path.commonprefix((path.realpath(result), path.realpath(prefix))) != \
path.realpath(prefix):
raise Exception("Path {} is not below {}".format(
result, prefix))
return result
def load_if_exists(prefix, json_file_relative):
json_file_name = path.join(prefix, json_file_relative)
if path.isfile(json_file_name):
print("Loading file groups from", json_file_name)
with open(json_file_name, 'r', encoding='utf-8') as fp:
raw_groups = json.load(fp)
prefixed_groups = {
name: [
add_prefix(prefix, relative)
for relative in relatives
]
for name, relatives in raw_groups.items()
}
file_groups.update(prefixed_groups)
# Generates a list of C# test files that should be in sync
def csharp_test_files():
test_file_re = re.compile('.*(Bad|Good)[0-9]*\\.cs$')
csharp_doc_files = {
file:os.path.join(root, file)
for root, dirs, files in os.walk("csharp/ql/src")
for file in files
if test_file_re.match(file)
}
return {
"C# test '" + file + "'" : [os.path.join(root, file), csharp_doc_files[file]]
for root, dirs, files in os.walk("csharp/ql/test")
for file in files
if file in csharp_doc_files
}
def file_checksum(filename):
with open(filename, 'rb') as file_handle:
return hashlib.sha1(file_handle.read()).hexdigest()
def check_group(group_name, files, master_file_picker, emit_error):
checksums = {file_checksum(f) for f in files}
if len(checksums) == 1:
return
master_file = master_file_picker(files)
if master_file is None:
emit_error(__file__, 0,
"Files from group '"+ group_name +"' not in sync.")
emit_error(__file__, 0,
"Run this script with a file-name argument among the "
"following to overwrite the remaining files with the contents "
"of that file or run with the --latest switch to update each "
"group of files from the most recently modified file in the group.")
for filename in files:
emit_error(__file__, 0, " " + filename)
else:
print(" Syncing others from", master_file)
for filename in files:
if filename == master_file:
continue
print(" " + filename)
os.replace(filename, filename + '~')
shutil.copy(master_file, filename)
print(" Backups written with '~' appended to file names")
def chdir_repo_root():
root_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')
os.chdir(root_path)
def choose_master_file(master_file, files):
if master_file in files:
return master_file
else:
return None
def choose_latest_file(files):
latest_time = None
latest_file = None
for filename in files:
file_time = os.path.getmtime(filename)
if (latest_time is None) or (latest_time < file_time):
latest_time = file_time
latest_file = filename
return latest_file
local_error_count = 0
def emit_local_error(path, line, error):
print('ERROR: ' + path + ':' + line + " - " + error)
global local_error_count
local_error_count += 1
# This function is invoked directly by a CI script, which passes a different error-handling
# callback.
def sync_identical_files(emit_error):
if len(sys.argv) == 1:
master_file_picker = lambda files: None
elif len(sys.argv) == 2:
if sys.argv[1] == "--latest":
master_file_picker = lambda files: choose_latest_file(files)
elif os.path.isfile(sys.argv[1]):
master_file_picker = lambda files: choose_master_file(sys.argv[1], files)
else:
raise Exception("File not found")
else:
raise Exception("Bad command line or file not found")
chdir_repo_root()
load_if_exists('.', 'config/identical-files.json')
file_groups.update(csharp_test_files())
for group_name, files in file_groups.items():
check_group(group_name, files, master_file_picker, emit_error)
def main():
sync_identical_files(emit_local_error)
if local_error_count > 0:
exit(1)
if __name__ == "__main__":
main()