MaD generator: change default thread and ram

The standalone MaD generator now uses `0` for threads and throttles the
RAM to use 2GB per thread by default.

Also, replaced the hand-written argument parsing with `argparse`.
This commit is contained in:
Paolo Tranquilli
2025-06-12 16:48:24 +02:00
parent 7db31b06d1
commit 39a3623b18
2 changed files with 49 additions and 88 deletions

View File

@@ -242,9 +242,9 @@ def generate_models(config, args, project: Project, database_dir: str) -> None:
generator.generateSinks = should_generate_sinks(project) generator.generateSinks = should_generate_sinks(project)
generator.generateSources = should_generate_sources(project) generator.generateSources = should_generate_sources(project)
generator.generateSummaries = should_generate_summaries(project) generator.generateSummaries = should_generate_summaries(project)
generator.setenvironment(database=database_dir, folder=name)
generator.threads = args.codeql_threads generator.threads = args.codeql_threads
generator.ram = args.codeql_ram generator.ram = args.codeql_ram
generator.setenvironment(database=database_dir, folder=name)
generator.run() generator.run()
@@ -527,7 +527,7 @@ if __name__ == "__main__":
parser.add_argument( parser.add_argument(
"--codeql-ram", "--codeql-ram",
type=int, type=int,
help="What `--ram` value to pass to `codeql` while generating models (by default the flag is not passed)", help="What `--ram` value to pass to `codeql` while generating models (by default 2048 MB per thread)",
default=None, default=None,
) )
parser.add_argument( parser.add_argument(

View File

@@ -7,6 +7,7 @@ import subprocess
import sys import sys
import tempfile import tempfile
import re import re
import argparse
def quote_if_needed(row): def quote_if_needed(row):
if row != "true" and row != "false": if row != "true" and row != "false":
@@ -26,112 +27,76 @@ def parseData(data):
return rows return rows
def printHelp(): description = """\
print(f"""Usage:
python3 generate_mad.py <library-database> [DIR] --language LANGUAGE [--with-sinks] [--with-sources] [--with-summaries] [--with-neutrals] [--with-typebased-summaries] [--dry-run]
This generates summary, source, sink and neutral models for the code in the database. This generates summary, source, sink and neutral models for the code in the database.
The files will be placed in `LANGUAGE/ql/lib/ext/generated/DIR` The files will be placed in `LANGUAGE/ql/lib/ext/generated/DIR`"""
Which models are generated is controlled by the flags:
--with-sinks
--with-sources
--with-summaries
--with-neutrals
--with-typebased-summaries (Experimental)
If none of these flags are specified, all models are generated except for the type based models.
--dry-run: Only run the queries, but don't write to file.
epilog = """\
Example invocations: Example invocations:
$ python3 generate_mad.py /tmp/dbs/my_library_db $ python3 generate_mad.py /tmp/dbs/my_library_db
$ python3 generate_mad.py /tmp/dbs/my_library_db --with-sinks $ python3 generate_mad.py /tmp/dbs/my_library_db --with-sinks
$ python3 generate_mad.py /tmp/dbs/my_library_db --with-sinks my_directory $ python3 generate_mad.py /tmp/dbs/my_library_db --with-sinks my_directory
Requirements: `codeql` should appear on your path."""
Requirements: `codeql` should appear on your path.
""")
class Generator: class Generator:
def __init__(self, language): generateSinks = False
generateSources = False
generateSummaries = False
generateNeutrals = False
generateTypeBasedSummaries = False
dryRun = False
dirname = "modelgenerator"
ram = None
threads = 0
folder = ""
def __init__(self, language=None):
self.language = language self.language = language
self.generateSinks = False
self.generateSources = False
self.generateSummaries = False
self.generateNeutrals = False
self.generateTypeBasedSummaries = False
self.dryRun = False
self.dirname = "modelgenerator"
self.ram = 2**15
self.threads = 8
def setenvironment(self, database=None, folder=None):
def setenvironment(self, database, folder):
self.codeQlRoot = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode("utf-8").strip() self.codeQlRoot = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode("utf-8").strip()
self.database = database self.database = database or self.database
self.folder = folder or self.folder
self.generatedFrameworks = os.path.join( self.generatedFrameworks = os.path.join(
self.codeQlRoot, f"{self.language}/ql/lib/ext/generated/{folder}") self.codeQlRoot, f"{self.language}/ql/lib/ext/generated/{self.folder}")
self.workDir = tempfile.mkdtemp() self.workDir = tempfile.mkdtemp()
if self.ram is None:
threads = self.threads if self.threads > 0 else os.cpu_count()
self.ram = 2048 * threads
os.makedirs(self.generatedFrameworks, exist_ok=True) os.makedirs(self.generatedFrameworks, exist_ok=True)
@staticmethod @staticmethod
def make(): def make():
# Create a generator instance based on command line arguments. p = argparse.ArgumentParser(
if any(s == "--help" for s in sys.argv): description=description,
printHelp() formatter_class=argparse.RawTextHelpFormatter,
sys.exit(0) epilog=epilog)
p.add_argument("database", help="Path to the CodeQL database")
if "--language" in sys.argv: p.add_argument("folder", nargs="?", default="", help="Optional folder to place the generated files in")
language = sys.argv[sys.argv.index("--language") + 1] p.add_argument("--language", required=True, help="The language for which to generate models")
sys.argv.remove("--language") p.add_argument("--with-sinks", action="store_true", help="Generate sink models", dest="generateSinks")
sys.argv.remove(language) p.add_argument("--with-sources", action="store_true", help="Generate source models", dest="generateSources")
else: p.add_argument("--with-summaries", action="store_true", help="Generate summary models", dest="generateSummaries")
printHelp() p.add_argument("--with-neutrals", action="store_true", help="Generate neutral models", dest="generateNeutrals")
sys.exit(0) p.add_argument("--with-typebased-summaries", action="store_true", help="Generate type-based summary models (experimental)", dest="generateTypeBasedSummaries")
p.add_argument("--dry-run", action="store_true", help="Do not write the generated files, just print them to stdout", dest="dryRun")
generator = Generator(language=language) p.add_argument("--threads", type=int, default=Generator.threads, help="Number of threads to use for CodeQL queries (default %(default)s). `0` means use all available threads.")
p.add_argument("--ram", type=int, help="Amount of RAM to use for CodeQL queries in MB. Default is to use 2048 MB per thread.")
if "--with-sinks" in sys.argv: generator = p.parse_args(namespace=Generator())
sys.argv.remove("--with-sinks")
generator.generateSinks = True
if "--with-sources" in sys.argv:
sys.argv.remove("--with-sources")
generator.generateSources = True
if "--with-summaries" in sys.argv:
sys.argv.remove("--with-summaries")
generator.generateSummaries = True
if "--with-neutrals" in sys.argv:
sys.argv.remove("--with-neutrals")
generator.generateNeutrals = True
if "--with-typebased-summaries" in sys.argv:
sys.argv.remove("--with-typebased-summaries")
generator.generateTypeBasedSummaries = True
if "--dry-run" in sys.argv:
sys.argv.remove("--dry-run")
generator.dryRun = True
if (not generator.generateSinks and if (not generator.generateSinks and
not generator.generateSources and not generator.generateSources and
not generator.generateSummaries and not generator.generateSummaries and
not generator.generateNeutrals and not generator.generateNeutrals and
not generator.generateTypeBasedSummaries): not generator.generateTypeBasedSummaries):
generator.generateSinks = generator.generateSources = generator.generateSummaries = generator.generateNeutrals = True generator.generateSinks = True
generator.generateSources = True
n = len(sys.argv) generator.generateSummaries = True
if n < 2: generator.generateNeutrals = True
printHelp()
sys.exit(1)
elif n == 2:
generator.setenvironment(sys.argv[1], "")
else:
generator.setenvironment(sys.argv[1], sys.argv[2])
generator.setenvironment()
return generator return generator
@@ -140,11 +105,7 @@ class Generator:
queryFile = os.path.join(self.codeQlRoot, f"{self.language}/ql/src/utils/{self.dirname}", query) queryFile = os.path.join(self.codeQlRoot, f"{self.language}/ql/src/utils/{self.dirname}", query)
resultBqrs = os.path.join(self.workDir, "out.bqrs") resultBqrs = os.path.join(self.workDir, "out.bqrs")
cmd = ['codeql', 'query', 'run', queryFile, '--database', self.database, '--output', resultBqrs] cmd = ['codeql', 'query', 'run', queryFile, '--database', self.database, '--output', resultBqrs, "--threads", str(self.threads), "--ram", str(self.ram)]
if self.threads is not None:
cmd += ["--threads", str(self.threads)]
if self.ram is not None:
cmd += ["--ram", str(self.ram)]
helpers.run_cmd(cmd, "Failed to generate " + query) helpers.run_cmd(cmd, "Failed to generate " + query)
return helpers.readData(self.workDir, resultBqrs) return helpers.readData(self.workDir, resultBqrs)
@@ -220,4 +181,4 @@ extensions:
self.save(typeBasedContent, ".typebased.model.yml") self.save(typeBasedContent, ".typebased.model.yml")
if __name__ == '__main__': if __name__ == '__main__':
Generator.make().run() Generator.make().run()