From ae3bbb0a9bb76e8e3f6a36310adcb08a0941792f Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Thu, 12 Jun 2025 16:52:29 +0200 Subject: [PATCH] MaD generator: run `black` formatter` --- misc/scripts/models-as-data/generate_mad.py | 171 +++++++++++++++----- 1 file changed, 129 insertions(+), 42 deletions(-) diff --git a/misc/scripts/models-as-data/generate_mad.py b/misc/scripts/models-as-data/generate_mad.py index 55c7d995b48..2f2b74cf3f7 100755 --- a/misc/scripts/models-as-data/generate_mad.py +++ b/misc/scripts/models-as-data/generate_mad.py @@ -9,20 +9,22 @@ import tempfile import re import argparse + def quote_if_needed(row): if row != "true" and row != "false": - return "\"" + row + "\"" + return '"' + row + '"' # subtypes column return row[0].upper() + row[1:] + def parseData(data): - rows = { } + rows = {} for row in data: - d = row[0].split(';') + d = row[0].split(";") namespace = d[0] d = map(quote_if_needed, d) - helpers.insert_update(rows, namespace, " - [" + ', '.join(d) + ']\n') + helpers.insert_update(rows, namespace, " - [" + ", ".join(d) + "]\n") return rows @@ -39,6 +41,7 @@ $ python3 generate_mad.py /tmp/dbs/my_library_db --with-sinks my_directory Requirements: `codeql` should appear on your path.""" + class Generator: generateSinks = False generateSources = False @@ -55,42 +58,97 @@ class Generator: self.language = language def setenvironment(self, database=None, folder=None): - self.codeQlRoot = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode("utf-8").strip() + self.codeQlRoot = ( + subprocess.check_output(["git", "rev-parse", "--show-toplevel"]) + .decode("utf-8") + .strip() + ) self.database = database or self.database self.folder = folder or self.folder self.generatedFrameworks = os.path.join( - self.codeQlRoot, f"{self.language}/ql/lib/ext/generated/{self.folder}") + self.codeQlRoot, f"{self.language}/ql/lib/ext/generated/{self.folder}" + ) self.workDir = tempfile.mkdtemp() if self.ram is None: threads = self.threads if self.threads > 0 else os.cpu_count() self.ram = 2048 * threads os.makedirs(self.generatedFrameworks, exist_ok=True) - @staticmethod def make(): p = argparse.ArgumentParser( description=description, formatter_class=argparse.RawTextHelpFormatter, - epilog=epilog) + epilog=epilog, + ) p.add_argument("database", help="Path to the CodeQL database") - p.add_argument("folder", nargs="?", default="", help="Optional folder to place the generated files in") - p.add_argument("--language", required=True, help="The language for which to generate models") - p.add_argument("--with-sinks", action="store_true", help="Generate sink models", dest="generateSinks") - p.add_argument("--with-sources", action="store_true", help="Generate source models", dest="generateSources") - p.add_argument("--with-summaries", action="store_true", help="Generate summary models", dest="generateSummaries") - p.add_argument("--with-neutrals", action="store_true", help="Generate neutral models", dest="generateNeutrals") - p.add_argument("--with-typebased-summaries", action="store_true", help="Generate type-based summary models (experimental)", dest="generateTypeBasedSummaries") - p.add_argument("--dry-run", action="store_true", help="Do not write the generated files, just print them to stdout", dest="dryRun") - p.add_argument("--threads", type=int, default=Generator.threads, help="Number of threads to use for CodeQL queries (default %(default)s). `0` means use all available threads.") - p.add_argument("--ram", type=int, help="Amount of RAM to use for CodeQL queries in MB. Default is to use 2048 MB per thread.") + p.add_argument( + "folder", + nargs="?", + default="", + help="Optional folder to place the generated files in", + ) + p.add_argument( + "--language", + required=True, + help="The language for which to generate models", + ) + p.add_argument( + "--with-sinks", + action="store_true", + help="Generate sink models", + dest="generateSinks", + ) + p.add_argument( + "--with-sources", + action="store_true", + help="Generate source models", + dest="generateSources", + ) + p.add_argument( + "--with-summaries", + action="store_true", + help="Generate summary models", + dest="generateSummaries", + ) + p.add_argument( + "--with-neutrals", + action="store_true", + help="Generate neutral models", + dest="generateNeutrals", + ) + p.add_argument( + "--with-typebased-summaries", + action="store_true", + help="Generate type-based summary models (experimental)", + dest="generateTypeBasedSummaries", + ) + p.add_argument( + "--dry-run", + action="store_true", + help="Do not write the generated files, just print them to stdout", + dest="dryRun", + ) + p.add_argument( + "--threads", + type=int, + default=Generator.threads, + help="Number of threads to use for CodeQL queries (default %(default)s). `0` means use all available threads.", + ) + p.add_argument( + "--ram", + type=int, + help="Amount of RAM to use for CodeQL queries in MB. Default is to use 2048 MB per thread.", + ) generator = p.parse_args(namespace=Generator()) - if (not generator.generateSinks and - not generator.generateSources and - not generator.generateSummaries and - not generator.generateNeutrals and - not generator.generateTypeBasedSummaries): + if ( + not generator.generateSinks + and not generator.generateSources + and not generator.generateSummaries + and not generator.generateNeutrals + and not generator.generateTypeBasedSummaries + ): generator.generateSinks = True generator.generateSources = True generator.generateSummaries = True @@ -99,22 +157,37 @@ class Generator: generator.setenvironment() return generator - def runQuery(self, query): print("########## Querying " + query + "...") - queryFile = os.path.join(self.codeQlRoot, f"{self.language}/ql/src/utils/{self.dirname}", query) + queryFile = os.path.join( + self.codeQlRoot, f"{self.language}/ql/src/utils/{self.dirname}", query + ) resultBqrs = os.path.join(self.workDir, "out.bqrs") - cmd = ['codeql', 'query', 'run', queryFile, '--database', self.database, '--output', resultBqrs, "--threads", str(self.threads), "--ram", str(self.ram)] + cmd = [ + "codeql", + "query", + "run", + queryFile, + "--database", + self.database, + "--output", + resultBqrs, + "--threads", + str(self.threads), + "--ram", + str(self.ram), + ] helpers.run_cmd(cmd, "Failed to generate " + query) return helpers.readData(self.workDir, resultBqrs) - def asAddsTo(self, rows, predicate): - extensions = { } + extensions = {} for key in rows: - extensions[key] = helpers.addsToTemplate.format(f"codeql/{self.language}-all", predicate, rows[key]) + extensions[key] = helpers.addsToTemplate.format( + f"codeql/{self.language}-all", predicate, rows[key] + ) return extensions def getAddsTo(self, query, predicate): @@ -125,27 +198,37 @@ class Generator: def makeContent(self): summaryAddsTo = {} if self.generateSummaries: - summaryAddsTo = self.getAddsTo("CaptureSummaryModels.ql", helpers.summaryModelPredicate) + summaryAddsTo = self.getAddsTo( + "CaptureSummaryModels.ql", helpers.summaryModelPredicate + ) sinkAddsTo = {} if self.generateSinks: - sinkAddsTo = self.getAddsTo("CaptureSinkModels.ql", helpers.sinkModelPredicate) + sinkAddsTo = self.getAddsTo( + "CaptureSinkModels.ql", helpers.sinkModelPredicate + ) sourceAddsTo = {} if self.generateSources: - sourceAddsTo = self.getAddsTo("CaptureSourceModels.ql", helpers.sourceModelPredicate) + sourceAddsTo = self.getAddsTo( + "CaptureSourceModels.ql", helpers.sourceModelPredicate + ) neutralAddsTo = {} if self.generateNeutrals: - neutralAddsTo = self.getAddsTo("CaptureNeutralModels.ql", helpers.neutralModelPredicate) + neutralAddsTo = self.getAddsTo( + "CaptureNeutralModels.ql", helpers.neutralModelPredicate + ) return helpers.merge(summaryAddsTo, sinkAddsTo, sourceAddsTo, neutralAddsTo) def makeTypeBasedContent(self): if self.generateTypeBasedSummaries: - typeBasedSummaryAddsTo = self.getAddsTo("CaptureTypeBasedSummaryModels.ql", helpers.summaryModelPredicate) + typeBasedSummaryAddsTo = self.getAddsTo( + "CaptureTypeBasedSummaryModels.ql", helpers.summaryModelPredicate + ) else: - typeBasedSummaryAddsTo = { } + typeBasedSummaryAddsTo = {} return typeBasedSummaryAddsTo @@ -156,13 +239,14 @@ extensions: {0}""" for entry in extensions: # Replace problematic characters with dashes, and collapse multiple dashes. - sanitizedEntry = re.sub(r'-+', '-', entry.replace('/', '-').replace(':', '-')) + sanitizedEntry = re.sub( + r"-+", "-", entry.replace("/", "-").replace(":", "-") + ) target = os.path.join(self.generatedFrameworks, sanitizedEntry + extension) with open(target, "w") as f: f.write(extensionTemplate.format(extensions[entry])) print("Models as data extensions written to " + target) - def run(self): content = self.makeContent() typeBasedContent = self.makeTypeBasedContent() @@ -171,14 +255,17 @@ extensions: print("Models as data extensions generated, but not written to file.") sys.exit(0) - if (self.generateSinks or - self.generateSources or - self.generateSummaries or - self.generateNeutrals): + if ( + self.generateSinks + or self.generateSources + or self.generateSummaries + or self.generateNeutrals + ): self.save(content, ".model.yml") if self.generateTypeBasedSummaries: self.save(typeBasedContent, ".typebased.model.yml") -if __name__ == '__main__': + +if __name__ == "__main__": Generator.make().run()