mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Merge branch 'main' into redsun82/cargo-upgrade-3
This commit is contained in:
@@ -321,7 +321,7 @@ drop = object()
|
||||
def annotate(
|
||||
annotated_cls: type,
|
||||
add_bases: _Iterable[type] | None = None,
|
||||
replace_bases: _Dict[type, type] | None = None,
|
||||
replace_bases: _Dict[type, type | None] | None = None,
|
||||
cfg: bool = False,
|
||||
) -> _Callable[[type], _PropertyModifierList]:
|
||||
"""
|
||||
@@ -329,7 +329,8 @@ def annotate(
|
||||
|
||||
The name of the class used for annotation must be `_`.
|
||||
|
||||
`replace_bases` can be used to replace bases on the annotated class.
|
||||
`replace_bases` can be used to replace bases on the annotated class. Mapping to
|
||||
`None` will remove that base class.
|
||||
"""
|
||||
|
||||
def decorator(cls: type) -> _PropertyModifierList:
|
||||
@@ -341,7 +342,9 @@ def annotate(
|
||||
_ClassPragma(p, value=v)(annotated_cls)
|
||||
if replace_bases:
|
||||
annotated_cls.__bases__ = tuple(
|
||||
replace_bases.get(b, b) for b in annotated_cls.__bases__
|
||||
b
|
||||
for b in (replace_bases.get(b, b) for b in annotated_cls.__bases__)
|
||||
if b is not None
|
||||
)
|
||||
if add_bases:
|
||||
annotated_cls.__bases__ += tuple(add_bases)
|
||||
|
||||
@@ -66,6 +66,20 @@ impl {{name}} {
|
||||
pub fn emit_{{singular_field_name}}(id: trap::Label<Self>{{^is_predicate}}{{#is_repeated}}{{^is_unordered}}, i: usize{{/is_unordered}}{{/is_repeated}}, value: {{base_type}}{{/is_predicate}}, out: &mut trap::Writer) {
|
||||
out.add_tuple("{{table_name}}", vec![id.into(){{^is_predicate}}{{#is_repeated}}{{^is_unordered}}, i.into(){{/is_unordered}}{{/is_repeated}}, value.into(){{/is_predicate}}]);
|
||||
}
|
||||
|
||||
{{#is_repeated}}
|
||||
pub fn emit_{{field_name}}(id: trap::Label<Self>, values: impl IntoIterator<Item={{base_type}}>, out: &mut trap::Writer) {
|
||||
values
|
||||
.into_iter()
|
||||
{{^is_unordered}}
|
||||
.enumerate()
|
||||
.for_each(|(i, value)| Self::emit_{{singular_field_name}}(id, i, value, out));
|
||||
{{/is_unordered}}
|
||||
{{#is_unordered}}
|
||||
.for_each(|value| Self::emit_{{singular_field_name}}(id, value, out));
|
||||
{{/is_unordered}}
|
||||
}
|
||||
{{/is_repeated}}
|
||||
{{/detached_fields}}
|
||||
}
|
||||
{{/has_detached_fields}}
|
||||
|
||||
@@ -5,7 +5,7 @@ Experimental script for bulk generation of MaD models based on a list of project
|
||||
Note: This file must be formatted using the Black Python formatter.
|
||||
"""
|
||||
|
||||
import os.path
|
||||
import pathlib
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Required, TypedDict, List, Callable, Optional
|
||||
@@ -41,7 +41,7 @@ gitroot = (
|
||||
.decode("utf-8")
|
||||
.strip()
|
||||
)
|
||||
build_dir = os.path.join(gitroot, "mad-generation-build")
|
||||
build_dir = pathlib.Path(gitroot, "mad-generation-build")
|
||||
|
||||
|
||||
# A project to generate models for
|
||||
@@ -86,10 +86,10 @@ def clone_project(project: Project) -> str:
|
||||
git_tag = project.get("git-tag")
|
||||
|
||||
# Determine target directory
|
||||
target_dir = os.path.join(build_dir, name)
|
||||
target_dir = build_dir / name
|
||||
|
||||
# Clone only if directory doesn't already exist
|
||||
if not os.path.exists(target_dir):
|
||||
if not target_dir.exists():
|
||||
if git_tag:
|
||||
print(f"Cloning {name} from {repo_url} at tag {git_tag}")
|
||||
else:
|
||||
@@ -191,10 +191,10 @@ def build_database(
|
||||
name = project["name"]
|
||||
|
||||
# Create database directory path
|
||||
database_dir = os.path.join(build_dir, f"{name}-db")
|
||||
database_dir = build_dir / f"{name}-db"
|
||||
|
||||
# Only build the database if it doesn't already exist
|
||||
if not os.path.exists(database_dir):
|
||||
if not database_dir.exists():
|
||||
print(f"Building CodeQL database for {name}...")
|
||||
extractor_options = [option for x in extractor_options for option in ("-O", x)]
|
||||
try:
|
||||
@@ -236,13 +236,16 @@ def generate_models(config, args, project: Project, database_dir: str) -> None:
|
||||
language = config["language"]
|
||||
|
||||
generator = mad.Generator(language)
|
||||
# Note: The argument parser converts with-sinks to with_sinks, etc.
|
||||
generator.generateSinks = should_generate_sinks(project)
|
||||
generator.generateSources = should_generate_sources(project)
|
||||
generator.generateSummaries = should_generate_summaries(project)
|
||||
generator.setenvironment(database=database_dir, folder=name)
|
||||
generator.with_sinks = should_generate_sinks(project)
|
||||
generator.with_sources = should_generate_sources(project)
|
||||
generator.with_summaries = should_generate_summaries(project)
|
||||
generator.threads = args.codeql_threads
|
||||
generator.ram = args.codeql_ram
|
||||
if config.get("single-file", False):
|
||||
generator.single_file = name
|
||||
else:
|
||||
generator.folder = name
|
||||
generator.setenvironment(database=database_dir)
|
||||
generator.run()
|
||||
|
||||
|
||||
@@ -313,7 +316,7 @@ def download_artifact(url: str, artifact_name: str, pat: str) -> str:
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to download file. Status code: {response.status_code}")
|
||||
sys.exit(1)
|
||||
target_zip = os.path.join(build_dir, zipName)
|
||||
target_zip = build_dir / zipName
|
||||
with open(target_zip, "wb") as file:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
file.write(chunk)
|
||||
@@ -321,12 +324,6 @@ def download_artifact(url: str, artifact_name: str, pat: str) -> str:
|
||||
return target_zip
|
||||
|
||||
|
||||
def remove_extension(filename: str) -> str:
|
||||
while "." in filename:
|
||||
filename, _ = os.path.splitext(filename)
|
||||
return filename
|
||||
|
||||
|
||||
def pretty_name_from_artifact_name(artifact_name: str) -> str:
|
||||
return artifact_name.split("___")[1]
|
||||
|
||||
@@ -348,7 +345,7 @@ def download_dca_databases(
|
||||
"""
|
||||
print("\n=== Finding projects ===")
|
||||
project_map = {project["name"]: project for project in projects}
|
||||
analyzed_databases = {}
|
||||
analyzed_databases = {n: None for n in project_map}
|
||||
for experiment_name in experiment_names:
|
||||
response = get_json_from_github(
|
||||
f"https://raw.githubusercontent.com/github/codeql-dca-main/data/{experiment_name}/reports/downloads.json",
|
||||
@@ -361,17 +358,24 @@ def download_dca_databases(
|
||||
artifact_name = analyzed_database["artifact_name"]
|
||||
pretty_name = pretty_name_from_artifact_name(artifact_name)
|
||||
|
||||
if not pretty_name in project_map:
|
||||
if not pretty_name in analyzed_databases:
|
||||
print(f"Skipping {pretty_name} as it is not in the list of projects")
|
||||
continue
|
||||
|
||||
if pretty_name in analyzed_databases:
|
||||
if analyzed_databases[pretty_name] is not None:
|
||||
print(
|
||||
f"Skipping previous database {analyzed_databases[pretty_name]['artifact_name']} for {pretty_name}"
|
||||
)
|
||||
|
||||
analyzed_databases[pretty_name] = analyzed_database
|
||||
|
||||
not_found = [name for name, db in analyzed_databases.items() if db is None]
|
||||
if not_found:
|
||||
print(
|
||||
f"ERROR: The following projects were not found in the DCA experiments: {', '.join(not_found)}"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
def download_and_decompress(analyzed_database: dict) -> str:
|
||||
artifact_name = analyzed_database["artifact_name"]
|
||||
repository = analyzed_database["repository"]
|
||||
@@ -393,19 +397,17 @@ def download_dca_databases(
|
||||
# The database is in a zip file, which contains a tar.gz file with the DB
|
||||
# First we open the zip file
|
||||
with zipfile.ZipFile(artifact_zip_location, "r") as zip_ref:
|
||||
artifact_unzipped_location = os.path.join(build_dir, artifact_name)
|
||||
artifact_unzipped_location = build_dir / artifact_name
|
||||
# clean up any remnants of previous runs
|
||||
shutil.rmtree(artifact_unzipped_location, ignore_errors=True)
|
||||
# And then we extract it to build_dir/artifact_name
|
||||
zip_ref.extractall(artifact_unzipped_location)
|
||||
# And then we extract the language tar.gz file inside it
|
||||
artifact_tar_location = os.path.join(
|
||||
artifact_unzipped_location, f"{language}.tar.gz"
|
||||
)
|
||||
artifact_tar_location = artifact_unzipped_location / f"{language}.tar.gz"
|
||||
with tarfile.open(artifact_tar_location, "r:gz") as tar_ref:
|
||||
# And we just untar it to the same directory as the zip file
|
||||
tar_ref.extractall(artifact_unzipped_location)
|
||||
ret = os.path.join(artifact_unzipped_location, language)
|
||||
ret = artifact_unzipped_location / language
|
||||
print(f"Decompression complete: {ret}")
|
||||
return ret
|
||||
|
||||
@@ -425,8 +427,16 @@ def download_dca_databases(
|
||||
return [(project_map[n], r) for n, r in zip(analyzed_databases, results)]
|
||||
|
||||
|
||||
def get_mad_destination_for_project(config, name: str) -> str:
|
||||
return os.path.join(config["destination"], name)
|
||||
def clean_up_mad_destination_for_project(config, name: str):
|
||||
target = pathlib.Path(config["destination"], name)
|
||||
if config.get("single-file", False):
|
||||
target = target.with_suffix(".model.yml")
|
||||
if target.exists():
|
||||
print(f"Deleting existing MaD file at {target}")
|
||||
target.unlink()
|
||||
elif target.exists():
|
||||
print(f"Deleting existing MaD directory at {target}")
|
||||
shutil.rmtree(target, ignore_errors=True)
|
||||
|
||||
|
||||
def get_strategy(config) -> str:
|
||||
@@ -448,8 +458,7 @@ def main(config, args) -> None:
|
||||
language = config["language"]
|
||||
|
||||
# Create build directory if it doesn't exist
|
||||
if not os.path.exists(build_dir):
|
||||
os.makedirs(build_dir)
|
||||
build_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
database_results = []
|
||||
match get_strategy(config):
|
||||
@@ -469,7 +478,7 @@ def main(config, args) -> None:
|
||||
if args.pat is None:
|
||||
print("ERROR: --pat argument is required for DCA strategy")
|
||||
sys.exit(1)
|
||||
if not os.path.exists(args.pat):
|
||||
if not args.pat.exists():
|
||||
print(f"ERROR: Personal Access Token file '{pat}' does not exist.")
|
||||
sys.exit(1)
|
||||
with open(args.pat, "r") as f:
|
||||
@@ -493,12 +502,9 @@ def main(config, args) -> None:
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# Delete the MaD directory for each project
|
||||
for project, database_dir in database_results:
|
||||
mad_dir = get_mad_destination_for_project(config, project["name"])
|
||||
if os.path.exists(mad_dir):
|
||||
print(f"Deleting existing MaD directory at {mad_dir}")
|
||||
subprocess.check_call(["rm", "-rf", mad_dir])
|
||||
# clean up existing MaD data for the projects
|
||||
for project, _ in database_results:
|
||||
clean_up_mad_destination_for_project(config, project["name"])
|
||||
|
||||
for project, database_dir in database_results:
|
||||
if database_dir is not None:
|
||||
@@ -508,7 +514,10 @@ def main(config, args) -> None:
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--config", type=str, help="Path to the configuration file.", required=True
|
||||
"--config",
|
||||
type=pathlib.Path,
|
||||
help="Path to the configuration file.",
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dca",
|
||||
@@ -519,13 +528,13 @@ if __name__ == "__main__":
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pat",
|
||||
type=str,
|
||||
type=pathlib.Path,
|
||||
help="Path to a file containing the PAT token required to grab DCA databases (the same as the one you use for DCA)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--codeql-ram",
|
||||
type=int,
|
||||
help="What `--ram` value to pass to `codeql` while generating models (by default the flag is not passed)",
|
||||
help="What `--ram` value to pass to `codeql` while generating models (by default 2048 MB per thread)",
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
@@ -538,7 +547,7 @@ if __name__ == "__main__":
|
||||
|
||||
# Load config file
|
||||
config = {}
|
||||
if not os.path.exists(args.config):
|
||||
if not args.config.exists():
|
||||
print(f"ERROR: Config file '{args.config}' does not exist.")
|
||||
sys.exit(1)
|
||||
try:
|
||||
|
||||
@@ -7,65 +7,86 @@ import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
|
||||
def quote_if_needed(v):
|
||||
# string columns
|
||||
if type(v) is str:
|
||||
return "\"" + v + "\""
|
||||
return '"' + v + '"'
|
||||
# bool column
|
||||
return str(v)
|
||||
|
||||
|
||||
def parseData(data):
|
||||
rows = [{ }, { }]
|
||||
rows = [{}, {}]
|
||||
for row in data:
|
||||
d = map(quote_if_needed, row)
|
||||
provenance = row[-1]
|
||||
targetRows = rows[1] if provenance.endswith("generated") else rows[0]
|
||||
helpers.insert_update(targetRows, row[0], " - [" + ', '.join(d) + ']\n')
|
||||
helpers.insert_update(targetRows, row[0], " - [" + ", ".join(d) + "]\n")
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
class Converter:
|
||||
def __init__(self, language, dbDir):
|
||||
self.language = language
|
||||
self.dbDir = dbDir
|
||||
self.codeQlRoot = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode("utf-8").strip()
|
||||
self.codeQlRoot = (
|
||||
subprocess.check_output(["git", "rev-parse", "--show-toplevel"])
|
||||
.decode("utf-8")
|
||||
.strip()
|
||||
)
|
||||
self.extDir = os.path.join(self.codeQlRoot, f"{self.language}/ql/lib/ext/")
|
||||
self.dirname = "modelconverter"
|
||||
self.modelFileExtension = ".model.yml"
|
||||
self.workDir = tempfile.mkdtemp()
|
||||
|
||||
|
||||
def runQuery(self, query):
|
||||
print('########## Querying: ', query)
|
||||
queryFile = os.path.join(self.codeQlRoot, f"{self.language}/ql/src/utils/{self.dirname}", query)
|
||||
print("########## Querying: ", query)
|
||||
queryFile = os.path.join(
|
||||
self.codeQlRoot, f"{self.language}/ql/src/utils/{self.dirname}", query
|
||||
)
|
||||
resultBqrs = os.path.join(self.workDir, "out.bqrs")
|
||||
|
||||
helpers.run_cmd(['codeql', 'query', 'run', queryFile, '--database', self.dbDir, '--output', resultBqrs], "Failed to generate " + query)
|
||||
helpers.run_cmd(
|
||||
[
|
||||
"codeql",
|
||||
"query",
|
||||
"run",
|
||||
queryFile,
|
||||
"--database",
|
||||
self.dbDir,
|
||||
"--output",
|
||||
resultBqrs,
|
||||
],
|
||||
"Failed to generate " + query,
|
||||
)
|
||||
return helpers.readData(self.workDir, resultBqrs)
|
||||
|
||||
|
||||
def asAddsTo(self, rows, predicate):
|
||||
extensions = [{ }, { }]
|
||||
extensions = [{}, {}]
|
||||
for i in range(2):
|
||||
for key in rows[i]:
|
||||
extensions[i][key] = helpers.addsToTemplate.format(f"codeql/{self.language}-all", predicate, rows[i][key])
|
||||
|
||||
return extensions
|
||||
extensions[i][key] = helpers.addsToTemplate.format(
|
||||
f"codeql/{self.language}-all", predicate, rows[i][key]
|
||||
)
|
||||
|
||||
return extensions
|
||||
|
||||
def getAddsTo(self, query, predicate):
|
||||
data = self.runQuery(query)
|
||||
rows = parseData(data)
|
||||
return self.asAddsTo(rows, predicate)
|
||||
|
||||
|
||||
def makeContent(self):
|
||||
summaries = self.getAddsTo("ExtractSummaries.ql", helpers.summaryModelPredicate)
|
||||
sources = self.getAddsTo("ExtractSources.ql", helpers.sourceModelPredicate)
|
||||
sinks = self.getAddsTo("ExtractSinks.ql", helpers.sinkModelPredicate)
|
||||
neutrals = self.getAddsTo("ExtractNeutrals.ql", helpers.neutralModelPredicate)
|
||||
return [helpers.merge(sources[0], sinks[0], summaries[0], neutrals[0]), helpers.merge(sources[1], sinks[1], summaries[1], neutrals[1])]
|
||||
|
||||
return [
|
||||
helpers.merge(sources[0], sinks[0], summaries[0], neutrals[0]),
|
||||
helpers.merge(sources[1], sinks[1], summaries[1], neutrals[1]),
|
||||
]
|
||||
|
||||
def save(self, extensions):
|
||||
# Create directory if it doesn't exist
|
||||
@@ -77,9 +98,11 @@ class Converter:
|
||||
for entry in extensions[0]:
|
||||
with open(self.extDir + "/" + entry + self.modelFileExtension, "w") as f:
|
||||
f.write(extensionTemplate.format(extensions[0][entry]))
|
||||
|
||||
|
||||
for entry in extensions[1]:
|
||||
with open(self.extDir + "/generated/" + entry + self.modelFileExtension, "w") as f:
|
||||
with open(
|
||||
self.extDir + "/generated/" + entry + self.modelFileExtension, "w"
|
||||
) as f:
|
||||
f.write(extensionTemplate.format(extensions[1][entry]))
|
||||
|
||||
def run(self):
|
||||
|
||||
@@ -7,184 +7,229 @@ import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import re
|
||||
import argparse
|
||||
|
||||
|
||||
def quote_if_needed(row):
|
||||
if row != "true" and row != "false":
|
||||
return "\"" + row + "\""
|
||||
return '"' + row + '"'
|
||||
# subtypes column
|
||||
return row[0].upper() + row[1:]
|
||||
|
||||
|
||||
def parseData(data):
|
||||
rows = { }
|
||||
rows = {}
|
||||
|
||||
for row in data:
|
||||
d = row[0].split(';')
|
||||
d = row[0].split(";")
|
||||
namespace = d[0]
|
||||
d = map(quote_if_needed, d)
|
||||
helpers.insert_update(rows, namespace, " - [" + ', '.join(d) + ']\n')
|
||||
helpers.insert_update(rows, namespace, " - [" + ", ".join(d) + "]\n")
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def printHelp():
|
||||
print(f"""Usage:
|
||||
python3 generate_mad.py <library-database> [DIR] --language LANGUAGE [--with-sinks] [--with-sources] [--with-summaries] [--with-neutrals] [--with-typebased-summaries] [--dry-run]
|
||||
|
||||
description = """\
|
||||
This generates summary, source, sink and neutral models for the code in the database.
|
||||
The files will be placed in `LANGUAGE/ql/lib/ext/generated/DIR`
|
||||
|
||||
Which models are generated is controlled by the flags:
|
||||
--with-sinks
|
||||
--with-sources
|
||||
--with-summaries
|
||||
--with-neutrals
|
||||
--with-typebased-summaries (Experimental)
|
||||
If none of these flags are specified, all models are generated except for the type based models.
|
||||
|
||||
--dry-run: Only run the queries, but don't write to file.
|
||||
The files will be placed in `LANGUAGE/ql/lib/ext/generated/DIR`"""
|
||||
|
||||
epilog = """\
|
||||
Example invocations:
|
||||
$ python3 generate_mad.py /tmp/dbs/my_library_db
|
||||
$ python3 generate_mad.py /tmp/dbs/my_library_db --with-sinks
|
||||
$ python3 generate_mad.py /tmp/dbs/my_library_db --with-sinks my_directory
|
||||
|
||||
Requirements: `codeql` should appear on your path."""
|
||||
|
||||
Requirements: `codeql` should appear on your path.
|
||||
""")
|
||||
|
||||
class Generator:
|
||||
def __init__(self, language):
|
||||
with_sinks = False
|
||||
with_sources = False
|
||||
with_summaries = False
|
||||
with_neutrals = False
|
||||
with_typebased_summaries = False
|
||||
dry_run = False
|
||||
dirname = "modelgenerator"
|
||||
ram = None
|
||||
threads = 0
|
||||
folder = ""
|
||||
single_file = None
|
||||
|
||||
def __init__(self, language=None):
|
||||
self.language = language
|
||||
self.generateSinks = False
|
||||
self.generateSources = False
|
||||
self.generateSummaries = False
|
||||
self.generateNeutrals = False
|
||||
self.generateTypeBasedSummaries = False
|
||||
self.dryRun = False
|
||||
self.dirname = "modelgenerator"
|
||||
self.ram = 2**15
|
||||
self.threads = 8
|
||||
|
||||
|
||||
def setenvironment(self, database, folder):
|
||||
self.codeQlRoot = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode("utf-8").strip()
|
||||
self.database = database
|
||||
self.generatedFrameworks = os.path.join(
|
||||
self.codeQlRoot, f"{self.language}/ql/lib/ext/generated/{folder}")
|
||||
def setenvironment(self, database=None, folder=None):
|
||||
self.codeql_root = (
|
||||
subprocess.check_output(["git", "rev-parse", "--show-toplevel"])
|
||||
.decode("utf-8")
|
||||
.strip()
|
||||
)
|
||||
self.database = database or self.database
|
||||
self.folder = folder or self.folder
|
||||
self.generated_frameworks = os.path.join(
|
||||
self.codeql_root, f"{self.language}/ql/lib/ext/generated/{self.folder}"
|
||||
)
|
||||
self.workDir = tempfile.mkdtemp()
|
||||
os.makedirs(self.generatedFrameworks, exist_ok=True)
|
||||
|
||||
if self.ram is None:
|
||||
threads = self.threads if self.threads > 0 else os.cpu_count()
|
||||
self.ram = 2048 * threads
|
||||
os.makedirs(self.generated_frameworks, exist_ok=True)
|
||||
|
||||
@staticmethod
|
||||
def make():
|
||||
# Create a generator instance based on command line arguments.
|
||||
if any(s == "--help" for s in sys.argv):
|
||||
printHelp()
|
||||
sys.exit(0)
|
||||
p = argparse.ArgumentParser(
|
||||
description=description,
|
||||
formatter_class=argparse.RawTextHelpFormatter,
|
||||
epilog=epilog,
|
||||
)
|
||||
p.add_argument("database", help="Path to the CodeQL database")
|
||||
p.add_argument(
|
||||
"folder",
|
||||
nargs="?",
|
||||
default="",
|
||||
help="Optional folder to place the generated files in",
|
||||
)
|
||||
p.add_argument(
|
||||
"--language",
|
||||
required=True,
|
||||
help="The language for which to generate models",
|
||||
)
|
||||
p.add_argument(
|
||||
"--with-sinks",
|
||||
action="store_true",
|
||||
help="Generate sink models",
|
||||
)
|
||||
p.add_argument(
|
||||
"--with-sources",
|
||||
action="store_true",
|
||||
help="Generate source models",
|
||||
)
|
||||
p.add_argument(
|
||||
"--with-summaries",
|
||||
action="store_true",
|
||||
help="Generate summary models",
|
||||
)
|
||||
p.add_argument(
|
||||
"--with-neutrals",
|
||||
action="store_true",
|
||||
help="Generate neutral models",
|
||||
)
|
||||
p.add_argument(
|
||||
"--with-typebased-summaries",
|
||||
action="store_true",
|
||||
help="Generate type-based summary models (experimental)",
|
||||
)
|
||||
p.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Do not write the generated files, just print them to stdout",
|
||||
)
|
||||
p.add_argument(
|
||||
"--threads",
|
||||
type=int,
|
||||
default=Generator.threads,
|
||||
help="Number of threads to use for CodeQL queries (default %(default)s). `0` means use all available threads.",
|
||||
)
|
||||
p.add_argument(
|
||||
"--ram",
|
||||
type=int,
|
||||
help="Amount of RAM to use for CodeQL queries in MB. Default is to use 2048 MB per thread.",
|
||||
)
|
||||
p.add_argument(
|
||||
"--single-file",
|
||||
help="Generate a single file with all models instead of separate files for each namespace, using provided argument as the base filename.",
|
||||
)
|
||||
generator = p.parse_args(namespace=Generator())
|
||||
|
||||
if "--language" in sys.argv:
|
||||
language = sys.argv[sys.argv.index("--language") + 1]
|
||||
sys.argv.remove("--language")
|
||||
sys.argv.remove(language)
|
||||
else:
|
||||
printHelp()
|
||||
sys.exit(0)
|
||||
|
||||
generator = Generator(language=language)
|
||||
|
||||
if "--with-sinks" in sys.argv:
|
||||
sys.argv.remove("--with-sinks")
|
||||
generator.generateSinks = True
|
||||
|
||||
if "--with-sources" in sys.argv:
|
||||
sys.argv.remove("--with-sources")
|
||||
generator.generateSources = True
|
||||
|
||||
if "--with-summaries" in sys.argv:
|
||||
sys.argv.remove("--with-summaries")
|
||||
generator.generateSummaries = True
|
||||
|
||||
if "--with-neutrals" in sys.argv:
|
||||
sys.argv.remove("--with-neutrals")
|
||||
generator.generateNeutrals = True
|
||||
|
||||
if "--with-typebased-summaries" in sys.argv:
|
||||
sys.argv.remove("--with-typebased-summaries")
|
||||
generator.generateTypeBasedSummaries = True
|
||||
|
||||
if "--dry-run" in sys.argv:
|
||||
sys.argv.remove("--dry-run")
|
||||
generator.dryRun = True
|
||||
|
||||
if (not generator.generateSinks and
|
||||
not generator.generateSources and
|
||||
not generator.generateSummaries and
|
||||
not generator.generateNeutrals and
|
||||
not generator.generateTypeBasedSummaries):
|
||||
generator.generateSinks = generator.generateSources = generator.generateSummaries = generator.generateNeutrals = True
|
||||
|
||||
n = len(sys.argv)
|
||||
if n < 2:
|
||||
printHelp()
|
||||
sys.exit(1)
|
||||
elif n == 2:
|
||||
generator.setenvironment(sys.argv[1], "")
|
||||
else:
|
||||
generator.setenvironment(sys.argv[1], sys.argv[2])
|
||||
if (
|
||||
not generator.with_sinks
|
||||
and not generator.with_sources
|
||||
and not generator.with_summaries
|
||||
and not generator.with_neutrals
|
||||
and not generator.with_typebased_summaries
|
||||
):
|
||||
generator.with_sinks = True
|
||||
generator.with_sources = True
|
||||
generator.with_summaries = True
|
||||
generator.with_neutrals = True
|
||||
|
||||
generator.setenvironment()
|
||||
return generator
|
||||
|
||||
|
||||
def runQuery(self, query):
|
||||
print("########## Querying " + query + "...")
|
||||
queryFile = os.path.join(self.codeQlRoot, f"{self.language}/ql/src/utils/{self.dirname}", query)
|
||||
queryFile = os.path.join(
|
||||
self.codeql_root, f"{self.language}/ql/src/utils/{self.dirname}", query
|
||||
)
|
||||
resultBqrs = os.path.join(self.workDir, "out.bqrs")
|
||||
|
||||
cmd = ['codeql', 'query', 'run', queryFile, '--database', self.database, '--output', resultBqrs]
|
||||
if self.threads is not None:
|
||||
cmd += ["--threads", str(self.threads)]
|
||||
if self.ram is not None:
|
||||
cmd += ["--ram", str(self.ram)]
|
||||
cmd = [
|
||||
"codeql",
|
||||
"query",
|
||||
"run",
|
||||
queryFile,
|
||||
"--database",
|
||||
self.database,
|
||||
"--output",
|
||||
resultBqrs,
|
||||
"--threads",
|
||||
str(self.threads),
|
||||
"--ram",
|
||||
str(self.ram),
|
||||
]
|
||||
helpers.run_cmd(cmd, "Failed to generate " + query)
|
||||
|
||||
return helpers.readData(self.workDir, resultBqrs)
|
||||
|
||||
|
||||
def asAddsTo(self, rows, predicate):
|
||||
extensions = { }
|
||||
extensions = {}
|
||||
for key in rows:
|
||||
extensions[key] = helpers.addsToTemplate.format(f"codeql/{self.language}-all", predicate, rows[key])
|
||||
extensions[key] = helpers.addsToTemplate.format(
|
||||
f"codeql/{self.language}-all", predicate, rows[key]
|
||||
)
|
||||
return extensions
|
||||
|
||||
def getAddsTo(self, query, predicate):
|
||||
data = self.runQuery(query)
|
||||
rows = parseData(data)
|
||||
if self.single_file and rows:
|
||||
rows = {self.single_file: "".join(rows.values())}
|
||||
return self.asAddsTo(rows, predicate)
|
||||
|
||||
def makeContent(self):
|
||||
summaryAddsTo = {}
|
||||
if self.generateSummaries:
|
||||
summaryAddsTo = self.getAddsTo("CaptureSummaryModels.ql", helpers.summaryModelPredicate)
|
||||
if self.with_summaries:
|
||||
summaryAddsTo = self.getAddsTo(
|
||||
"CaptureSummaryModels.ql", helpers.summaryModelPredicate
|
||||
)
|
||||
|
||||
sinkAddsTo = {}
|
||||
if self.generateSinks:
|
||||
sinkAddsTo = self.getAddsTo("CaptureSinkModels.ql", helpers.sinkModelPredicate)
|
||||
if self.with_sinks:
|
||||
sinkAddsTo = self.getAddsTo(
|
||||
"CaptureSinkModels.ql", helpers.sinkModelPredicate
|
||||
)
|
||||
|
||||
sourceAddsTo = {}
|
||||
if self.generateSources:
|
||||
sourceAddsTo = self.getAddsTo("CaptureSourceModels.ql", helpers.sourceModelPredicate)
|
||||
if self.with_sources:
|
||||
sourceAddsTo = self.getAddsTo(
|
||||
"CaptureSourceModels.ql", helpers.sourceModelPredicate
|
||||
)
|
||||
|
||||
neutralAddsTo = {}
|
||||
if self.generateNeutrals:
|
||||
neutralAddsTo = self.getAddsTo("CaptureNeutralModels.ql", helpers.neutralModelPredicate)
|
||||
if self.with_neutrals:
|
||||
neutralAddsTo = self.getAddsTo(
|
||||
"CaptureNeutralModels.ql", helpers.neutralModelPredicate
|
||||
)
|
||||
|
||||
return helpers.merge(summaryAddsTo, sinkAddsTo, sourceAddsTo, neutralAddsTo)
|
||||
|
||||
def makeTypeBasedContent(self):
|
||||
if self.generateTypeBasedSummaries:
|
||||
typeBasedSummaryAddsTo = self.getAddsTo("CaptureTypeBasedSummaryModels.ql", helpers.summaryModelPredicate)
|
||||
if self.with_typebased_summaries:
|
||||
typeBasedSummaryAddsTo = self.getAddsTo(
|
||||
"CaptureTypeBasedSummaryModels.ql", helpers.summaryModelPredicate
|
||||
)
|
||||
else:
|
||||
typeBasedSummaryAddsTo = { }
|
||||
typeBasedSummaryAddsTo = {}
|
||||
|
||||
return typeBasedSummaryAddsTo
|
||||
|
||||
@@ -195,29 +240,33 @@ extensions:
|
||||
{0}"""
|
||||
for entry in extensions:
|
||||
# Replace problematic characters with dashes, and collapse multiple dashes.
|
||||
sanitizedEntry = re.sub(r'-+', '-', entry.replace('/', '-').replace(':', '-'))
|
||||
target = os.path.join(self.generatedFrameworks, sanitizedEntry + extension)
|
||||
sanitizedEntry = re.sub(
|
||||
r"-+", "-", entry.replace("/", "-").replace(":", "-")
|
||||
)
|
||||
target = os.path.join(self.generated_frameworks, sanitizedEntry + extension)
|
||||
with open(target, "w") as f:
|
||||
f.write(extensionTemplate.format(extensions[entry]))
|
||||
print("Models as data extensions written to " + target)
|
||||
|
||||
|
||||
def run(self):
|
||||
content = self.makeContent()
|
||||
typeBasedContent = self.makeTypeBasedContent()
|
||||
|
||||
if self.dryRun:
|
||||
if self.dry_run:
|
||||
print("Models as data extensions generated, but not written to file.")
|
||||
sys.exit(0)
|
||||
|
||||
if (self.generateSinks or
|
||||
self.generateSources or
|
||||
self.generateSummaries or
|
||||
self.generateNeutrals):
|
||||
if (
|
||||
self.with_sinks
|
||||
or self.with_sources
|
||||
or self.with_summaries
|
||||
or self.with_neutrals
|
||||
):
|
||||
self.save(content, ".model.yml")
|
||||
|
||||
if self.generateTypeBasedSummaries:
|
||||
if self.with_typebased_summaries:
|
||||
self.save(typeBasedContent, ".typebased.model.yml")
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
Generator.make().run()
|
||||
|
||||
@@ -14,37 +14,53 @@ addsToTemplate = """ - addsTo:
|
||||
data:
|
||||
{2}"""
|
||||
|
||||
|
||||
def remove_dir(dirName):
|
||||
if os.path.isdir(dirName):
|
||||
shutil.rmtree(dirName)
|
||||
print("Removed directory:", dirName)
|
||||
|
||||
|
||||
def run_cmd(cmd, msg="Failed to run command"):
|
||||
print('Running ' + ' '.join(cmd))
|
||||
print("Running " + " ".join(map(str, cmd)))
|
||||
if subprocess.check_call(cmd):
|
||||
print(msg)
|
||||
exit(1)
|
||||
|
||||
|
||||
def readData(workDir, bqrsFile):
|
||||
generatedJson = os.path.join(workDir, "out.json")
|
||||
print('Decoding BQRS to JSON.')
|
||||
run_cmd(['codeql', 'bqrs', 'decode', bqrsFile, '--output', generatedJson, '--format=json'], "Failed to decode BQRS.")
|
||||
print("Decoding BQRS to JSON.")
|
||||
run_cmd(
|
||||
[
|
||||
"codeql",
|
||||
"bqrs",
|
||||
"decode",
|
||||
bqrsFile,
|
||||
"--output",
|
||||
generatedJson,
|
||||
"--format=json",
|
||||
],
|
||||
"Failed to decode BQRS.",
|
||||
)
|
||||
|
||||
with open(generatedJson) as f:
|
||||
results = json.load(f)
|
||||
|
||||
try:
|
||||
return results['#select']['tuples']
|
||||
return results["#select"]["tuples"]
|
||||
except KeyError:
|
||||
print('Unexpected JSON output - no tuples found')
|
||||
print("Unexpected JSON output - no tuples found")
|
||||
exit(1)
|
||||
|
||||
|
||||
def insert_update(rows, key, value):
|
||||
if key in rows:
|
||||
rows[key] += value
|
||||
else:
|
||||
rows[key] = value
|
||||
|
||||
|
||||
def merge(*dicts):
|
||||
merged = {}
|
||||
for d in dicts:
|
||||
|
||||
Reference in New Issue
Block a user