Move and generate files to documentation folder + clean up after the script is executed

This commit is contained in:
Tamas Vajk
2021-05-18 11:41:39 +02:00
parent 6dc46ec1ee
commit f1911e338d
4 changed files with 112 additions and 89 deletions

View File

@@ -14,9 +14,9 @@ on:
paths:
- '.github/workflows/csv-coverage.yml'
- 'misc/scripts/generate-csv-coverage-report.py'
- 'misc/scripts/cwe-sink-*.csv'
- 'misc/scripts/frameworks-*.csv'
- 'java/ql/src/meta/frameworks/Coverage.ql'
- 'java/documentation/library-coverage/cwe-sink.csv'
- 'java/documentation/library-coverage/frameworks.csv'
jobs:
build:
@@ -54,16 +54,15 @@ jobs:
run: unzip -d codeql-cli codeql-linux64.zip
- name: Build modeled package list
run: |
PATH="$PATH:codeql-cli/codeql" python script/misc/scripts/generate-csv-coverage-report.py codeqlModels script
PATH="$PATH:codeql-cli/codeql" python script/misc/scripts/generate-csv-coverage-report.py ci codeqlModels script
- name: Upload CSV package list
uses: actions/upload-artifact@v2
with:
name: csv-flow-model-coverage
path: csv-flow-model-coverage-*.csv
path: flow-model-coverage-*.csv
- name: Upload RST package list
uses: actions/upload-artifact@v2
with:
name: rst-flow-model-coverage
path: |
flow-model-coverage.rst
path: flow-model-coverage-*.rst

View File

@@ -1,8 +1,8 @@
import subprocess
import json
import csv
import sys
import os
import shutil
"""
This script runs the CSV coverage report QL query, and transforms it to a more readable format.
@@ -32,6 +32,7 @@ def run_codeql_query(query, database, output):
"--database", database, "--output", output + ".bqrs"])
subprocess_run(["codeql", "bqrs", "decode", output + ".bqrs",
"--format=csv", "--no-titles", "--output", output])
os.remove(output + ".bqrs")
def append_csv_number(list, value):
@@ -120,13 +121,27 @@ except Exception as e:
print("Error: couldn't invoke CodeQL CLI 'codeql'. Is it on the path? Aborting.", file=sys.stderr)
raise e
# The script can be run in two modes:
# (i) dev: run on the local developer machine, and collect the coverage data. The output is generated into the expected
# folders: {language}/documentation/library-coverage/
# (ii) ci: run in a CI action. The output is generated to the root folder, and then in a subsequent step packaged as a
# build artifact.
mode = "dev"
if len(sys.argv) > 1:
mode = sys.argv[1]
if mode != "dev" and mode != "ci":
print("Unknown execution mode: " + mode +
". Expected either 'dev' or 'ci'.", file=sys.stderr)
exit(1)
query_prefix = ""
data_prefix = ""
if len(sys.argv) > 1:
query_prefix = sys.argv[1] + "/"
if len(sys.argv) > 2:
data_prefix = sys.argv[2] + "/"
query_prefix = sys.argv[2] + "/"
if len(sys.argv) > 3:
data_prefix = sys.argv[3] + "/"
# Languages for which we want to generate coverage reports.
configs = [
@@ -135,100 +150,109 @@ configs = [
]
# The names of input and output files. The placeholder {language} is replaced with the language name.
output_rst = "flow-model-coverage.rst"
documentation_folder = "{language}/documentation/library-coverage/"
output_ql_csv = "output-{language}.csv"
output_csv = "csv-flow-model-coverage-{language}.csv"
input_framework_csv = data_prefix + "misc/scripts/frameworks-{language}.csv"
input_cwe_sink_csv = data_prefix + "misc/scripts/cwe-sink-{language}.csv"
input_framework_csv = data_prefix + documentation_folder + "frameworks.csv"
input_cwe_sink_csv = data_prefix + documentation_folder + "cwe-sink.csv"
with open(output_rst, 'w', newline='') as rst_file:
for config in configs:
lang = config.lang
db = "empty-" + lang
ql_output = output_ql_csv.format(language=lang)
create_empty_database(lang, config.ext, db)
run_codeql_query(config.ql_path, db, ql_output)
if mode == "dev":
output_rst = data_prefix + documentation_folder + "flow-model-coverage.rst"
output_csv = data_prefix + documentation_folder + "flow-model-coverage.csv"
else:
output_rst = "flow-model-coverage-{language}.rst"
output_csv = "flow-model-coverage-{language}.csv"
packages = {}
parts = set()
kinds = set()
for config in configs:
lang = config.lang
db = "empty-" + lang
ql_output = output_ql_csv.format(language=lang)
create_empty_database(lang, config.ext, db)
run_codeql_query(config.ql_path, db, ql_output)
shutil.rmtree(db)
# Read the generated CSV file, and collect package statistics.
with open(ql_output) as csvfile:
reader = csv.reader(csvfile)
for row in reader:
# row: "android.util",1,"remote","source",16
package = row[0]
if package not in packages:
packages[package] = {
"count": row[1],
# part: "summary", "sink", or "source"
"part": {},
# kind: "source:remote", "sink:create-file", ...
"kind": {}
}
packages = {}
parts = set()
kinds = set()
part = row[3]
parts.add(part)
increment_dict_item(row[4], packages[package]["part"], part)
# Read the generated CSV file, and collect package statistics.
with open(ql_output) as csvfile:
reader = csv.reader(csvfile)
for row in reader:
# row: "android.util",1,"remote","source",16
package = row[0]
if package not in packages:
packages[package] = {
"count": row[1],
# part: "summary", "sink", or "source"
"part": {},
# kind: "source:remote", "sink:create-file", ...
"kind": {}
}
kind = part + ":" + row[2]
kinds.add(kind)
increment_dict_item(row[4], packages[package]["kind"], kind)
part = row[3]
parts.add(part)
increment_dict_item(row[4], packages[package]["part"], part)
parts = sorted(parts)
kinds = sorted(kinds)
kind = part + ":" + row[2]
kinds.add(kind)
increment_dict_item(row[4], packages[package]["kind"], kind)
# Write the denormalized package statistics to a CSV file.
with open(output_csv.format(language=lang), 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
os.remove(ql_output)
headers = ["package"]
headers.extend(parts)
headers.extend(kinds)
parts = sorted(parts)
kinds = sorted(kinds)
csvwriter.writerow(headers)
# Write the denormalized package statistics to a CSV file.
with open(output_csv.format(language=lang), 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
for package in sorted(packages):
row = [package]
for part in parts:
append_csv_dict_item(row, packages[package]["part"], part)
for kind in kinds:
append_csv_dict_item(row, packages[package]["kind"], kind)
csvwriter.writerow(row)
headers = ["package"]
headers.extend(parts)
headers.extend(kinds)
# Read the additional framework data, such as URL, friendly name
frameworks = {}
csvwriter.writerow(headers)
with open(input_framework_csv.format(language=lang)) as csvfile:
reader = csv.reader(csvfile)
next(reader)
for row in reader:
# row: Hibernate,https://hibernate.org/,org.hibernate
framwork = row[0]
if framwork not in frameworks:
frameworks[framwork] = {
"package": row[2],
"url": row[1]
}
for package in sorted(packages):
row = [package]
for part in parts:
append_csv_dict_item(row, packages[package]["part"], part)
for kind in kinds:
append_csv_dict_item(row, packages[package]["kind"], kind)
csvwriter.writerow(row)
# Read the additional CWE data
cwes = {}
# Read the additional framework data, such as URL, friendly name
frameworks = {}
with open(input_cwe_sink_csv.format(language=lang)) as csvfile:
reader = csv.reader(csvfile)
next(reader)
for row in reader:
# row: CWE-89,sql,SQL injection
cwe = row[0]
if cwe not in cwes:
cwes[cwe] = {
"sink": row[1],
"label": row[2]
}
with open(input_framework_csv.format(language=lang)) as csvfile:
reader = csv.reader(csvfile)
next(reader)
for row in reader:
# row: Hibernate,https://hibernate.org/,org.hibernate
framwork = row[0]
if framwork not in frameworks:
frameworks[framwork] = {
"package": row[2],
"url": row[1]
}
sorted_cwes = sorted(cwes)
# Read the additional CWE data
cwes = {}
with open(input_cwe_sink_csv.format(language=lang)) as csvfile:
reader = csv.reader(csvfile)
next(reader)
for row in reader:
# row: CWE-89,sql,SQL injection
cwe = row[0]
if cwe not in cwes:
cwes[cwe] = {
"sink": row[1],
"label": row[2]
}
sorted_cwes = sorted(cwes)
with open(output_rst.format(language=lang), 'w', newline='') as rst_file:
rst_file.write(
config.capitalized_lang + " framework & library support\n")
rst_file.write("================================\n\n")
@@ -314,4 +338,4 @@ with open(output_rst, 'w', newline='') as rst_file:
csvwriter.writerow(row)
rst_file.write("\n\n")
rst_file.write("\n")