diff --git a/misc/scripts/models-as-data/convert_extensions.py b/misc/scripts/models-as-data/convert_extensions.py index 5c45c7919c5..28a7b7349bc 100644 --- a/misc/scripts/models-as-data/convert_extensions.py +++ b/misc/scripts/models-as-data/convert_extensions.py @@ -7,7 +7,6 @@ import subprocess import sys import tempfile - def quote_if_needed(v): # string columns if type(v) is str: @@ -15,26 +14,13 @@ def quote_if_needed(v): # bool column return str(v) -def insert_update(rows, key, value): - if key in rows: - rows[key] += value - else: - rows[key] = value - -def merge(*dicts): - merged = {} - for d in dicts: - for entry in d: - insert_update(merged, entry, d[entry]) - return merged - def parseData(data): rows = [{ }, { }] for row in data: d = map(quote_if_needed, row) provenance = row[-1] targetRows = rows[1] if provenance.endswith("generated") else rows[0] - insert_update(targetRows, row[0], " - [" + ', '.join(d) + ']\n') + helpers.insert_update(targetRows, row[0], " - [" + ', '.join(d) + ']\n') return rows @@ -78,7 +64,7 @@ class Converter: sources = self.getAddsTo("ExtractSources.ql", helpers.sourceModelPredicate) sinks = self.getAddsTo("ExtractSinks.ql", helpers.sinkModelPredicate) neutrals = self.getAddsTo("ExtractNeutrals.ql", helpers.neutralModelPredicate) - return [merge(sources[0], sinks[0], summaries[0], neutrals[0]), merge(sources[1], sinks[1], summaries[1], neutrals[1])] + return [helpers.merge(sources[0], sinks[0], summaries[0], neutrals[0]), helpers.merge(sources[1], sinks[1], summaries[1], neutrals[1])] def save(self, extensions): diff --git a/misc/scripts/models-as-data/generate_flow_model.py b/misc/scripts/models-as-data/generate_flow_model.py index c9699d1585b..fd1856fc8be 100644 --- a/misc/scripts/models-as-data/generate_flow_model.py +++ b/misc/scripts/models-as-data/generate_flow_model.py @@ -13,14 +13,16 @@ def quote_if_needed(row): if row != "true" and row != "false": return "\"" + row + "\"" # subtypes column - return row + return row[0].upper() + row[1:] def parseData(data): - rows = "" - for (row) in data: + rows = { } + + for row in data: d = row[0].split(';') + namespace = d[0] d = map(quote_if_needed, d) - rows += " - [" + ', '.join(d) + ']\n' + helpers.insert_update(rows, namespace, " - [" + ', '.join(d) + ']\n') return rows @@ -38,12 +40,10 @@ class Generator: def printHelp(self): print(f"""Usage: -python3 GenerateFlowModel.py [] [--with-sinks] [--with-sources] [--with-summaries] [--with-typebased-summaries] [--dry-run] +python3 GenerateFlowModel.py [--with-sinks] [--with-sources] [--with-summaries] [--with-typebased-summaries] [--dry-run] This generates summary, source and sink models for the code in the database. -The files will be placed in `{self.language}/ql/lib/ext/generated/.model.yml` where -outputYml is the name (and path) of the output YAML file. Usually, models are grouped by their -respective frameworks. +The files will be placed in `{self.language}/ql/lib/ext/generated/`. Which models are generated is controlled by the flags: --with-sinks @@ -57,28 +57,18 @@ If none of these flags are specified, all models are generated except for the ty Example invocations: $ python3 GenerateFlowModel.py /tmp/dbs/my_library_db mylibrary -$ python3 GenerateFlowModel.py /tmp/dbs/my_library_db mylibrary "Friendly Name of Framework" +$ python3 GenerateFlowModel.py /tmp/dbs/my_library_db mylibrary $ python3 GenerateFlowModel.py /tmp/dbs/my_library_db --with-sinks Requirements: `codeql` should both appear on your path. """) - def setenvironment(self, target, database, friendlyName): + def setenvironment(self, database): self.codeQlRoot = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode("utf-8").strip() - if not target.endswith(".model.yml"): - target += ".model.yml" - filename = os.path.basename(target) - if friendlyName is not None: - self.friendlyname = friendlyName - else: - self.friendlyname = filename[:-10] - self.shortname = filename[:-10] self.database = database self.generatedFrameworks = os.path.join( self.codeQlRoot, f"{self.language}/ql/lib/ext/generated/") - self.frameworkTarget = os.path.join(self.generatedFrameworks, filename) - self.typeBasedFrameworkTarget = os.path.join(self.generatedFrameworks, "TypeBased" + filename) self.workDir = tempfile.mkdtemp() os.makedirs(self.generatedFrameworks, exist_ok=True) @@ -117,15 +107,11 @@ Requirements: `codeql` should both appear on your path. if not generator.generateSinks and not generator.generateSources and not generator.generateSummaries and not generator.generateNeutrals and not generator.generateTypeBasedSummaries: generator.generateSinks = generator.generateSources = generator.generateSummaries = generator.generateNeutrals = True - if len(sys.argv) < 3 or len(sys.argv) > 4: + if len(sys.argv) < 2: generator.printHelp() sys.exit(1) - friendlyName = None - if len(sys.argv) == 4: - friendlyName = sys.argv[3] - - generator.setenvironment(sys.argv[2], sys.argv[1], friendlyName) + generator.setenvironment(sys.argv[1]) return generator @@ -141,58 +127,57 @@ Requirements: `codeql` should both appear on your path. def asAddsTo(self, rows, predicate): - if rows.strip() == "": - return "" - return helpers.addsToTemplate.format(f"codeql/{self.language}-all", predicate, rows) - + extensions = { } + for key in rows: + extensions[key] = helpers.addsToTemplate.format(f"codeql/{self.language}-all", predicate, rows[key]) + return extensions def getAddsTo(self, query, predicate): data = self.runQuery(query) rows = parseData(data) return self.asAddsTo(rows, predicate) - def makeContent(self): if self.generateSummaries: summaryAddsTo = self.getAddsTo("CaptureSummaryModels.ql", helpers.summaryModelPredicate) else: - summaryAddsTo = "" + summaryAddsTo = { } if self.generateSinks: sinkAddsTo = self.getAddsTo("CaptureSinkModels.ql", helpers.sinkModelPredicate) else: - sinkAddsTo = "" + sinkAddsTo = { } if self.generateSources: sourceAddsTo = self.getAddsTo("CaptureSourceModels.ql", helpers.sourceModelPredicate) else: - sourceAddsTo = "" + sourceAddsTo = {} if self.generateNeutrals: neutralAddsTo = self.getAddsTo("CaptureNeutralModels.ql", helpers.neutralModelPredicate) else: - neutralAddsTo = "" + neutralAddsTo = { } - return f"""# THIS FILE IS AN AUTO-GENERATED MODELS AS DATA FILE. DO NOT EDIT. -# Definitions of models for the {self.friendlyname} framework. -extensions: -{sinkAddsTo}{sourceAddsTo}{summaryAddsTo}{neutralAddsTo}""" + return helpers.merge(summaryAddsTo, sinkAddsTo, sourceAddsTo, neutralAddsTo) def makeTypeBasedContent(self): if self.generateTypeBasedSummaries: - typeBasedSummaryAddsTo = self.getAddsTo("CaptureTypeBasedSummaryModels.ql", "extSummaryModel") + typeBasedSummaryAddsTo = self.getAddsTo("CaptureTypeBasedSummaryModels.ql", helpers.summaryModelPredicate) else: - typeBasedSummaryAddsTo = "" + typeBasedSummaryAddsTo = { } - return f"""# THIS FILE IS AN AUTO-GENERATED MODELS AS DATA FILE. DO NOT EDIT. -# Definitions of type based summaries in the {self.friendlyname} framework. + return typeBasedSummaryAddsTo + + def save(self, extensions, extension): + # Create a file for each namespace and save models. + extensionTemplate = """# THIS FILE IS AN AUTO-GENERATED MODELS AS DATA FILE. DO NOT EDIT. extensions: -{typeBasedSummaryAddsTo}""" - - def save(self, content, target): - with open(target, "w") as targetYml: - targetYml.write(content) - print("Models as data extensions written to " + target) +{0}""" + for entry in extensions: + target = os.path.join(self.generatedFrameworks, entry + extension) + with open(target, "w") as f: + f.write(extensionTemplate.format(extensions[entry])) + print("Models as data extensions written to " + target) def run(self): @@ -204,7 +189,7 @@ extensions: sys.exit(0) if self.generateSinks or self.generateSinks or self.generateSummaries: - self.save(content, self.frameworkTarget) + self.save(content, ".model.yml") if self.generateTypeBasedSummaries: - self.save(typeBasedContent, self.typeBasedFrameworkTarget) + self.save(typeBasedContent, ".typebased.model.yml") diff --git a/misc/scripts/models-as-data/helpers.py b/misc/scripts/models-as-data/helpers.py index 58594102037..49cccb35cb6 100644 --- a/misc/scripts/models-as-data/helpers.py +++ b/misc/scripts/models-as-data/helpers.py @@ -38,3 +38,16 @@ def readData(workDir, bqrsFile): except KeyError: print('Unexpected JSON output - no tuples found') exit(1) + +def insert_update(rows, key, value): + if key in rows: + rows[key] += value + else: + rows[key] = value + +def merge(*dicts): + merged = {} + for d in dicts: + for entry in d: + insert_update(merged, entry, d[entry]) + return merged