MaD generator: tweak the scripts

* fix a bug where the order of model generation was determined by the order in the `download.json` file of the experiment rather than the order in the config file * allow configuring `--ram` and `--threads` in the MaD generator scripts * use no `--ram` and `--threads=0` by default in the bulk generator (single generator defaults are left unchanged) * allow to pass `--dca` multiple times, taking DBs from experiments listed last. This allows to run a subset of the sources in a "fixup" experiment and use it to "patch" a previous run without rerunning everything.
2025-12-16 16:53:25 +01:00 · 2025-06-11 16:39:39 +02:00
parent 28ae39694f
commit 7db31b06d1
2 changed files with 51 additions and 47 deletions
--- a/misc/scripts/models-as-data/generate_mad.py
+++ b/misc/scripts/models-as-data/generate_mad.py
@@ -62,6 +62,8 @@ class Generator:
        self.generateTypeBasedSummaries = False
        self.dryRun = False
        self.dirname = "modelgenerator"
+        self.ram = 2**15
+        self.threads = 8


    def setenvironment(self, database, folder):
@@ -138,8 +140,12 @@ class Generator:
        queryFile = os.path.join(self.codeQlRoot, f"{self.language}/ql/src/utils/{self.dirname}", query)
        resultBqrs = os.path.join(self.workDir, "out.bqrs")

-        helpers.run_cmd(['codeql', 'query', 'run', queryFile, '--database',
-               self.database, '--output', resultBqrs, '--threads', '8', '--ram', '32768'], "Failed to generate " + query)
+        cmd = ['codeql', 'query', 'run', queryFile, '--database', self.database, '--output', resultBqrs]
+        if self.threads is not None:
+            cmd += ["--threads", str(self.threads)]
+        if self.ram is not None:
+            cmd += ["--ram", str(self.ram)]
+        helpers.run_cmd(cmd, "Failed to generate " + query)

        return helpers.readData(self.workDir, resultBqrs)