MaD generator: run black formatter`

This commit is contained in:
Paolo Tranquilli
2025-06-12 16:52:29 +02:00
parent 39a3623b18
commit ae3bbb0a9b

View File

@@ -9,20 +9,22 @@ import tempfile
import re import re
import argparse import argparse
def quote_if_needed(row): def quote_if_needed(row):
if row != "true" and row != "false": if row != "true" and row != "false":
return "\"" + row + "\"" return '"' + row + '"'
# subtypes column # subtypes column
return row[0].upper() + row[1:] return row[0].upper() + row[1:]
def parseData(data): def parseData(data):
rows = { } rows = {}
for row in data: for row in data:
d = row[0].split(';') d = row[0].split(";")
namespace = d[0] namespace = d[0]
d = map(quote_if_needed, d) d = map(quote_if_needed, d)
helpers.insert_update(rows, namespace, " - [" + ', '.join(d) + ']\n') helpers.insert_update(rows, namespace, " - [" + ", ".join(d) + "]\n")
return rows return rows
@@ -39,6 +41,7 @@ $ python3 generate_mad.py /tmp/dbs/my_library_db --with-sinks my_directory
Requirements: `codeql` should appear on your path.""" Requirements: `codeql` should appear on your path."""
class Generator: class Generator:
generateSinks = False generateSinks = False
generateSources = False generateSources = False
@@ -55,42 +58,97 @@ class Generator:
self.language = language self.language = language
def setenvironment(self, database=None, folder=None): def setenvironment(self, database=None, folder=None):
self.codeQlRoot = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode("utf-8").strip() self.codeQlRoot = (
subprocess.check_output(["git", "rev-parse", "--show-toplevel"])
.decode("utf-8")
.strip()
)
self.database = database or self.database self.database = database or self.database
self.folder = folder or self.folder self.folder = folder or self.folder
self.generatedFrameworks = os.path.join( self.generatedFrameworks = os.path.join(
self.codeQlRoot, f"{self.language}/ql/lib/ext/generated/{self.folder}") self.codeQlRoot, f"{self.language}/ql/lib/ext/generated/{self.folder}"
)
self.workDir = tempfile.mkdtemp() self.workDir = tempfile.mkdtemp()
if self.ram is None: if self.ram is None:
threads = self.threads if self.threads > 0 else os.cpu_count() threads = self.threads if self.threads > 0 else os.cpu_count()
self.ram = 2048 * threads self.ram = 2048 * threads
os.makedirs(self.generatedFrameworks, exist_ok=True) os.makedirs(self.generatedFrameworks, exist_ok=True)
@staticmethod @staticmethod
def make(): def make():
p = argparse.ArgumentParser( p = argparse.ArgumentParser(
description=description, description=description,
formatter_class=argparse.RawTextHelpFormatter, formatter_class=argparse.RawTextHelpFormatter,
epilog=epilog) epilog=epilog,
)
p.add_argument("database", help="Path to the CodeQL database") p.add_argument("database", help="Path to the CodeQL database")
p.add_argument("folder", nargs="?", default="", help="Optional folder to place the generated files in") p.add_argument(
p.add_argument("--language", required=True, help="The language for which to generate models") "folder",
p.add_argument("--with-sinks", action="store_true", help="Generate sink models", dest="generateSinks") nargs="?",
p.add_argument("--with-sources", action="store_true", help="Generate source models", dest="generateSources") default="",
p.add_argument("--with-summaries", action="store_true", help="Generate summary models", dest="generateSummaries") help="Optional folder to place the generated files in",
p.add_argument("--with-neutrals", action="store_true", help="Generate neutral models", dest="generateNeutrals") )
p.add_argument("--with-typebased-summaries", action="store_true", help="Generate type-based summary models (experimental)", dest="generateTypeBasedSummaries") p.add_argument(
p.add_argument("--dry-run", action="store_true", help="Do not write the generated files, just print them to stdout", dest="dryRun") "--language",
p.add_argument("--threads", type=int, default=Generator.threads, help="Number of threads to use for CodeQL queries (default %(default)s). `0` means use all available threads.") required=True,
p.add_argument("--ram", type=int, help="Amount of RAM to use for CodeQL queries in MB. Default is to use 2048 MB per thread.") help="The language for which to generate models",
)
p.add_argument(
"--with-sinks",
action="store_true",
help="Generate sink models",
dest="generateSinks",
)
p.add_argument(
"--with-sources",
action="store_true",
help="Generate source models",
dest="generateSources",
)
p.add_argument(
"--with-summaries",
action="store_true",
help="Generate summary models",
dest="generateSummaries",
)
p.add_argument(
"--with-neutrals",
action="store_true",
help="Generate neutral models",
dest="generateNeutrals",
)
p.add_argument(
"--with-typebased-summaries",
action="store_true",
help="Generate type-based summary models (experimental)",
dest="generateTypeBasedSummaries",
)
p.add_argument(
"--dry-run",
action="store_true",
help="Do not write the generated files, just print them to stdout",
dest="dryRun",
)
p.add_argument(
"--threads",
type=int,
default=Generator.threads,
help="Number of threads to use for CodeQL queries (default %(default)s). `0` means use all available threads.",
)
p.add_argument(
"--ram",
type=int,
help="Amount of RAM to use for CodeQL queries in MB. Default is to use 2048 MB per thread.",
)
generator = p.parse_args(namespace=Generator()) generator = p.parse_args(namespace=Generator())
if (not generator.generateSinks and if (
not generator.generateSources and not generator.generateSinks
not generator.generateSummaries and and not generator.generateSources
not generator.generateNeutrals and and not generator.generateSummaries
not generator.generateTypeBasedSummaries): and not generator.generateNeutrals
and not generator.generateTypeBasedSummaries
):
generator.generateSinks = True generator.generateSinks = True
generator.generateSources = True generator.generateSources = True
generator.generateSummaries = True generator.generateSummaries = True
@@ -99,22 +157,37 @@ class Generator:
generator.setenvironment() generator.setenvironment()
return generator return generator
def runQuery(self, query): def runQuery(self, query):
print("########## Querying " + query + "...") print("########## Querying " + query + "...")
queryFile = os.path.join(self.codeQlRoot, f"{self.language}/ql/src/utils/{self.dirname}", query) queryFile = os.path.join(
self.codeQlRoot, f"{self.language}/ql/src/utils/{self.dirname}", query
)
resultBqrs = os.path.join(self.workDir, "out.bqrs") resultBqrs = os.path.join(self.workDir, "out.bqrs")
cmd = ['codeql', 'query', 'run', queryFile, '--database', self.database, '--output', resultBqrs, "--threads", str(self.threads), "--ram", str(self.ram)] cmd = [
"codeql",
"query",
"run",
queryFile,
"--database",
self.database,
"--output",
resultBqrs,
"--threads",
str(self.threads),
"--ram",
str(self.ram),
]
helpers.run_cmd(cmd, "Failed to generate " + query) helpers.run_cmd(cmd, "Failed to generate " + query)
return helpers.readData(self.workDir, resultBqrs) return helpers.readData(self.workDir, resultBqrs)
def asAddsTo(self, rows, predicate): def asAddsTo(self, rows, predicate):
extensions = { } extensions = {}
for key in rows: for key in rows:
extensions[key] = helpers.addsToTemplate.format(f"codeql/{self.language}-all", predicate, rows[key]) extensions[key] = helpers.addsToTemplate.format(
f"codeql/{self.language}-all", predicate, rows[key]
)
return extensions return extensions
def getAddsTo(self, query, predicate): def getAddsTo(self, query, predicate):
@@ -125,27 +198,37 @@ class Generator:
def makeContent(self): def makeContent(self):
summaryAddsTo = {} summaryAddsTo = {}
if self.generateSummaries: if self.generateSummaries:
summaryAddsTo = self.getAddsTo("CaptureSummaryModels.ql", helpers.summaryModelPredicate) summaryAddsTo = self.getAddsTo(
"CaptureSummaryModels.ql", helpers.summaryModelPredicate
)
sinkAddsTo = {} sinkAddsTo = {}
if self.generateSinks: if self.generateSinks:
sinkAddsTo = self.getAddsTo("CaptureSinkModels.ql", helpers.sinkModelPredicate) sinkAddsTo = self.getAddsTo(
"CaptureSinkModels.ql", helpers.sinkModelPredicate
)
sourceAddsTo = {} sourceAddsTo = {}
if self.generateSources: if self.generateSources:
sourceAddsTo = self.getAddsTo("CaptureSourceModels.ql", helpers.sourceModelPredicate) sourceAddsTo = self.getAddsTo(
"CaptureSourceModels.ql", helpers.sourceModelPredicate
)
neutralAddsTo = {} neutralAddsTo = {}
if self.generateNeutrals: if self.generateNeutrals:
neutralAddsTo = self.getAddsTo("CaptureNeutralModels.ql", helpers.neutralModelPredicate) neutralAddsTo = self.getAddsTo(
"CaptureNeutralModels.ql", helpers.neutralModelPredicate
)
return helpers.merge(summaryAddsTo, sinkAddsTo, sourceAddsTo, neutralAddsTo) return helpers.merge(summaryAddsTo, sinkAddsTo, sourceAddsTo, neutralAddsTo)
def makeTypeBasedContent(self): def makeTypeBasedContent(self):
if self.generateTypeBasedSummaries: if self.generateTypeBasedSummaries:
typeBasedSummaryAddsTo = self.getAddsTo("CaptureTypeBasedSummaryModels.ql", helpers.summaryModelPredicate) typeBasedSummaryAddsTo = self.getAddsTo(
"CaptureTypeBasedSummaryModels.ql", helpers.summaryModelPredicate
)
else: else:
typeBasedSummaryAddsTo = { } typeBasedSummaryAddsTo = {}
return typeBasedSummaryAddsTo return typeBasedSummaryAddsTo
@@ -156,13 +239,14 @@ extensions:
{0}""" {0}"""
for entry in extensions: for entry in extensions:
# Replace problematic characters with dashes, and collapse multiple dashes. # Replace problematic characters with dashes, and collapse multiple dashes.
sanitizedEntry = re.sub(r'-+', '-', entry.replace('/', '-').replace(':', '-')) sanitizedEntry = re.sub(
r"-+", "-", entry.replace("/", "-").replace(":", "-")
)
target = os.path.join(self.generatedFrameworks, sanitizedEntry + extension) target = os.path.join(self.generatedFrameworks, sanitizedEntry + extension)
with open(target, "w") as f: with open(target, "w") as f:
f.write(extensionTemplate.format(extensions[entry])) f.write(extensionTemplate.format(extensions[entry]))
print("Models as data extensions written to " + target) print("Models as data extensions written to " + target)
def run(self): def run(self):
content = self.makeContent() content = self.makeContent()
typeBasedContent = self.makeTypeBasedContent() typeBasedContent = self.makeTypeBasedContent()
@@ -171,14 +255,17 @@ extensions:
print("Models as data extensions generated, but not written to file.") print("Models as data extensions generated, but not written to file.")
sys.exit(0) sys.exit(0)
if (self.generateSinks or if (
self.generateSources or self.generateSinks
self.generateSummaries or or self.generateSources
self.generateNeutrals): or self.generateSummaries
or self.generateNeutrals
):
self.save(content, ".model.yml") self.save(content, ".model.yml")
if self.generateTypeBasedSummaries: if self.generateTypeBasedSummaries:
self.save(typeBasedContent, ".typebased.model.yml") self.save(typeBasedContent, ".typebased.model.yml")
if __name__ == '__main__':
if __name__ == "__main__":
Generator.make().run() Generator.make().run()