Add RST documentation page

This commit is contained in:
Tamas Vajk
2021-05-05 16:17:18 +02:00
parent beea36191b
commit ef414681be

View File

@@ -19,7 +19,7 @@ def create_empty_database(lang, extension, database):
subprocess_run(["codeql", "database", "init", "--language=" + lang,
"--source-root=/tmp/empty", "--allow-missing-source-root", database])
subprocess_run(["mkdir", "-p", database + "/src/tmp/empty"])
subprocess_run(["touch", database + "/src/tmp/empty/empty." + extension])
subprocess_run(["touch", database + "/src/tmp/empty/empty" + extension])
subprocess_run(["codeql", "database", "finalize",
database, "--no-pre-finalize"])
@@ -89,8 +89,9 @@ def add_package_stats_to_row(row, sorted_cwes, collect):
class LanguageConfig:
def __init__(self, lang, ext, ql_path):
def __init__(self, lang, capitalized_lang, ext, ql_path):
self.lang = lang
self.capitalized_lang = capitalized_lang
self.ext = ext
self.ql_path = ql_path
@@ -108,151 +109,163 @@ if len(sys.argv) > 1:
# Languages for which we want to generate coverage reports.
configs = [
LanguageConfig(
"java", "java", prefix + "java/ql/src/meta/frameworks/Coverage.ql")
"java", "Java", ".java", prefix + "java/ql/src/meta/frameworks/Coverage.ql")
]
for config in configs:
lang = config.lang
ext = config.ext
query_path = config.ql_path
db = "empty-" + lang
ql_output = "output-" + lang + ".csv"
# create_empty_database(lang, ext, db)
run_codeql_query(query_path, db, ql_output)
with open("csv-flow-model-coverage.rst", 'w') as rst_file:
for config in configs:
lang = config.lang
db = "empty-" + lang
ql_output = "output-" + lang + ".csv"
create_empty_database(lang, config.ext, db)
run_codeql_query(config.ql_path, db, ql_output)
packages = {}
parts = set()
kinds = set()
packages = {}
parts = set()
kinds = set()
# Read the generated CSV file, and collect package statistics.
with open(ql_output) as csvfile:
reader = csv.reader(csvfile)
for row in reader:
package = row[0]
if package not in packages:
packages[package] = {
"count": row[1],
"part": {},
"kind": {}
}
part = row[3]
parts.add(part)
if part not in packages[package]["part"]:
packages[package]["part"][part] = 0
packages[package]["part"][part] += int(row[4])
kind = part + ":" + row[2]
kinds.add(kind)
if kind not in packages[package]["kind"]:
packages[package]["kind"][kind] = 0
packages[package]["kind"][kind] += int(row[4])
# Read the generated CSV file, and collect package statistics.
with open(ql_output) as csvfile:
reader = csv.reader(csvfile)
for row in reader:
package = row[0]
if package not in packages:
packages[package] = {
"count": row[1],
"part": {},
"kind": {}
}
part = row[3]
parts.add(part)
if part not in packages[package]["part"]:
packages[package]["part"][part] = 0
packages[package]["part"][part] += int(row[4])
kind = part + ":" + row[2]
kinds.add(kind)
if kind not in packages[package]["kind"]:
packages[package]["kind"][kind] = 0
packages[package]["kind"][kind] += int(row[4])
# Write the denormalized package statistics to a CSV file.
with open("csv-flow-model-coverage-" + lang + ".csv", 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
# Write the denormalized package statistics to a CSV file.
with open("csv-flow-model-coverage-" + lang + ".csv", 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
parts = sorted(parts)
kinds = sorted(kinds)
parts = sorted(parts)
kinds = sorted(kinds)
columns = ["package"]
columns.extend(parts)
columns.extend(kinds)
columns = ["package"]
columns.extend(parts)
columns.extend(kinds)
csvwriter.writerow(columns)
csvwriter.writerow(columns)
for package in sorted(packages):
row = [package]
for part in parts:
append_csv_dict_item(row, packages[package]["part"], part)
for kind in kinds:
append_csv_dict_item(row, packages[package]["kind"], kind)
csvwriter.writerow(row)
for package in sorted(packages):
row = [package]
for part in parts:
append_csv_dict_item(row, packages[package]["part"], part)
for kind in kinds:
append_csv_dict_item(row, packages[package]["kind"], kind)
csvwriter.writerow(row)
# Read the additional framework data, such as URL, friendly name
frameworks = {}
# Read the additional framework data, such as URL, friendly name
frameworks = {}
with open(prefix + "misc/scripts/frameworks-" + lang + ".csv") as csvfile:
reader = csv.reader(csvfile)
next(reader)
for row in reader:
framwork = row[0]
if framwork not in frameworks:
frameworks[framwork] = {
"package": row[2],
"url": row[1]
}
with open(prefix + "misc/scripts/frameworks-" + lang + ".csv") as csvfile:
reader = csv.reader(csvfile)
next(reader)
for row in reader:
framwork = row[0]
if framwork not in frameworks:
frameworks[framwork] = {
"package": row[2],
"url": row[1]
}
# Read the additional CWE data
cwes = {}
# Read the additional CWE data
cwes = {}
with open(prefix + "misc/scripts/cwe-sink-" + lang + ".csv") as csvfile:
reader = csv.reader(csvfile)
next(reader)
for row in reader:
cwe = row[0]
if cwe not in cwes:
cwes[cwe] = {
"sink": row[1],
"label": row[2]
}
with open(prefix + "misc/scripts/cwe-sink-" + lang + ".csv") as csvfile:
reader = csv.reader(csvfile)
next(reader)
for row in reader:
cwe = row[0]
if cwe not in cwes:
cwes[cwe] = {
"sink": row[1],
"label": row[2]
}
with open("rst-csv-flow-model-coverage-" + lang + ".csv", 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
file_name = "rst-csv-flow-model-coverage-" + lang + ".csv"
columns = ["Framework / library", "package",
"remote flow sources", "taint & value steps", "sinks (total)"]
for cwe in sorted(cwes):
columns.append("`" + cwe + "` :sub:`" + cwes[cwe]["label"] + "`")
csvwriter.writerow(columns)
rst_file.write(
config.capitalized_lang + " framework & library support\n")
rst_file.write("================================\n\n")
rst_file.write(".. csv-table:: \n")
rst_file.write(" :file: " + file_name + "\n")
rst_file.write(" :header-rows: 1\n")
rst_file.write(" :class: fullWidthTable\n")
rst_file.write(" :widths: auto\n\n")
processed_packages = set()
# Write CSV file with package statistics and framework data to be used in RST file.
with open(file_name, 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
for framework in sorted(frameworks):
row = []
# Add the framework name to the row
if not frameworks[framework]["url"]:
row.append(framework)
else:
row.append(
"`" + framework + " <" + frameworks[framework]["url"] + ">`_")
columns = ["Framework / library", "package",
"remote flow sources", "taint & value steps", "sinks (total)"]
for cwe in sorted(cwes):
columns.append("`" + cwe + "` :sub:`" +
cwes[cwe]["label"] + "`")
csvwriter.writerow(columns)
# Add the package name to the row
row.append(frameworks[framework]["package"])
processed_packages = set()
prefix = frameworks[framework]["package"]
for framework in sorted(frameworks):
row = []
# Add the framework name to the row
if not frameworks[framework]["url"]:
row.append(framework)
else:
row.append(
"`" + framework + " <" + frameworks[framework]["url"] + ">`_")
# Collect statistics on the current framework
def collect_framework(): return collect_package_stats(
# Add the package name to the row
row.append(frameworks[framework]["package"])
prefix = frameworks[framework]["package"]
# Collect statistics on the current framework
def collect_framework(): return collect_package_stats(
packages,
lambda p: (prefix.endswith("*") and p.startswith(prefix[:-1])) or (not prefix.endswith("*") and prefix == p))
row, f_processed_packages = add_package_stats_to_row(
row, sorted(cwes), collect_framework)
csvwriter.writerow(row)
processed_packages.update(f_processed_packages)
# Collect statistics on all packages that are not part of a framework
row = ["Others", None]
def collect_others(): return collect_package_stats(
packages,
lambda p: (prefix.endswith("*") and p.startswith(prefix[:-1])) or (not prefix.endswith("*") and prefix == p))
lambda p: p not in processed_packages)
row, f_processed_packages = add_package_stats_to_row(
row, sorted(cwes), collect_framework)
row, other_packages = add_package_stats_to_row(
row, sorted(cwes), collect_others)
row[1] = ", ".join(sorted(other_packages))
csvwriter.writerow(row)
processed_packages.update(f_processed_packages)
# Collect statistics on all packages that are not part of a framework
row = ["Others", None]
# Collect statistics on all packages
row = ["Total", None]
def collect_others(): return collect_package_stats(
packages,
lambda p: p not in processed_packages)
def collect_total(): return collect_package_stats(
packages,
lambda p: True)
row, _ = add_package_stats_to_row(
row, sorted(cwes), collect_others)
row, _ = add_package_stats_to_row(
row, sorted(cwes), collect_total)
csvwriter.writerow(row)
# Collect statistics on all packages
row = ["Total", None]
def collect_total(): return collect_package_stats(
packages,
lambda p: True)
row, _ = add_package_stats_to_row(
row, sorted(cwes), collect_total)
csvwriter.writerow(row)
# todo: generate rst page referencing the csv files
csvwriter.writerow(row)