Merge pull request #4550 from github/query-help-tests

[docs] Add new process to generate query help for help site
This commit is contained in:
James Fletcher
2020-12-02 17:46:39 +00:00
committed by GitHub
13 changed files with 469 additions and 2 deletions

View File

@@ -109,5 +109,5 @@ templates_path = ['_templates']
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
exclude_patterns = ['vale*', '_static', '_templates', 'codeql', 'learn-ql', 'reusables', 'images', 'support', 'ql-training', '_build', '*.py*', 'README.rst']
##############################################################################
exclude_patterns = ['vale*', '_static', '_templates', 'reusables', 'images', 'support', 'ql-training', 'query-help','_build', '*.py*', 'README.rst']
##############################################################################

View File

@@ -0,0 +1,256 @@
import re
import subprocess
import json
import csv
import sys
import os
"""
This script collects CodeQL queries that are part of code scanning query packs,
renders the accompanying query help as markdown, inserts some useful metadata
into the help, and adds a link to the query in the CodeQL repo.
This script requires that 'git' and 'codeql' commands
are on the PATH. It'll try to automatically set the CodeQL search path correctly,
as long as you run the script from one of the following locations:
- anywhere from within a clone of the CodeQL Git repo
- from the parent directory of a clone of the CodeQL Git repo (assuming 'codeql'
and 'codeql-go' directories both exist)
"""
# Define which languages and query packs to consider
languages = [ "cpp", "csharp", "go", "java", "javascript", "python"]
# Query suites to generate help for - lgtm suite should cover the queries that users are interested in
packs = ["lgtm"]
def prefix_repo_nwo(filename):
"""
Replaces an absolute path prefix with a GitHub repository name with owner (NWO).
This function relies on `git` being available.
For example:
/home/alice/git/ql/java/ql/src/MyQuery.ql
becomes:
github/codeql/java/ql/src/MyQuery.ql
If we can't detect a known NWO (e.g. github/codeql, github/codeql-go), the
path will be truncated to the root of the git repo:
ql/java/ql/src/MyQuery.ql
If the filename is not part of a Git repo, the return value is the
same as the input value: the whole path.
"""
dirname = os.path.dirname(filename)
try:
git_toplevel_dir_subp = subprocess_run(
["git", "-C", dirname, "rev-parse", "--show-toplevel"])
except:
# Not a Git repo
return filename
git_toplevel_dir = git_toplevel_dir_subp.stdout.strip()
# Detect 'github/codeql' and 'github/codeql-go' repositories by checking the remote (it's a bit
# of a hack but will work in most cases, as long as the remotes have 'codeql' and 'codeql-go'
# in the URL
git_remotes = subprocess_run(
["git", "-C", dirname, "remote", "-v"]).stdout.strip()
if "codeql-go" in git_remotes:
prefix = "github/codeql-go"
elif "codeql" in git_remotes:
prefix = "github/codeql"
else:
prefix = os.path.basename(git_toplevel_dir)
return os.path.join(prefix, filename[len(git_toplevel_dir)+1:])
def single_spaces(input):
"""
Workaround for https://github.com/github/codeql-coreql-team/issues/470 which causes
some metadata strings to contain newlines and spaces without a good reason.
"""
return " ".join(input.split())
def get_query_metadata(key, metadata, queryfile):
"""Returns query metadata or prints a warning to stderr if a particular piece of metadata is not available."""
if key in metadata:
return single_spaces(metadata[key])
query_id = metadata['id'] if 'id' in metadata else 'unknown'
print("Warning: no '%s' metadata for query with ID '%s' (%s)" %
(key, query_id, queryfile), file=sys.stderr)
return ""
def subprocess_run(cmd):
"""Runs a command through subprocess.run, with a few tweaks. Raises an Exception if exit code != 0."""
return subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy(), check=True)
try: # Check for `git` on path
subprocess_run(["git", "--version"])
except Exception as e:
print("Error: couldn't invoke 'git'. Is it on the path? Aborting.", file=sys.stderr)
raise e
try: # Check for `codeql` on path
subprocess_run(["codeql", "--version"])
except Exception as e:
print("Error: couldn't invoke CodeQL CLI 'codeql'. Is it on the path? Aborting.", file=sys.stderr)
raise e
# Define CodeQL search path so it'll find the CodeQL repositories:
# - anywhere in the current Git clone (including current working directory)
# - the 'codeql' subdirectory of the cwd
#
# (and assumes the codeql-go repo is in a similar location)
codeql_search_path = "./codeql:./codeql-go" # will be extended further down
# Extend CodeQL search path by detecting root of the current Git repo (if any). This means that you
# can run this script from any location within the CodeQL git repository.
try:
git_toplevel_dir = subprocess_run(["git", "rev-parse", "--show-toplevel"])
# Current working directory is in a Git repo. Add it to the search path, just in case it's the CodeQL repo
#git_toplevel_dir = git_toplevel_dir.stdout.strip()
codeql_search_path += ":" + git_toplevel_dir + ":" + git_toplevel_dir + "/../codeql-go"
codeql_search_path = git_toplevel_dir = git_toplevel_dir.stdout.strip()
except:
# git rev-parse --show-toplevel exited with non-zero exit code. We're not in a Git repo
pass
# Iterate over all languages and packs, and resolve which queries are part of those packs
for lang in languages:
code_scanning_queries = subprocess_run(
["codeql", "resolve", "queries", "--search-path", codeql_search_path, "%s-code-scanning.qls" % (lang)]).stdout.strip()
security_extended_queries = subprocess_run(
["codeql", "resolve", "queries", "--search-path", codeql_search_path, "%s-security-extended.qls" % (lang)]).stdout.strip()
security_and_quality_queries = subprocess_run(
["codeql", "resolve", "queries", "--search-path", codeql_search_path, "%s-security-and-quality.qls" % (lang)]).stdout.strip()
# Define empty dictionary to store @name:filename pairs to generate alphabetically sorted Sphinx toctree
index_file_dictionary = {}
for pack in packs:
# Get absolute paths to queries in this pack by using 'codeql resolve queries'
try:
queries_subp = subprocess_run(
["codeql", "resolve", "queries", "--search-path", codeql_search_path, "%s-%s.qls" % (lang, pack)])
except Exception as e:
# Resolving queries might go wrong if the github/codeql and github/codeql-go repositories are not
# on the search path.
print(
"Warning: couldn't find query pack '%s' for language '%s'. Do you have the right repositories in the right places (search path: '%s')?" % (
pack, lang, codeql_search_path),
file=sys.stderr
)
continue
# Define empty dictionary to store @name:filename pairs to generate alphabetically sorted Sphinx toctree later
index_file_dictionary = {}
# Investigate metadata for every query by using 'codeql resolve metadata'
for queryfile in queries_subp.stdout.strip().split("\n"):
query_metadata_json = subprocess_run(
["codeql", "resolve", "metadata", queryfile]).stdout.strip()
meta = json.loads(query_metadata_json)
# Turn an absolute path to a query file into an nwo-prefixed path (e.g. github/codeql/java/ql/src/....)
queryfile_nwo = prefix_repo_nwo(queryfile)
# Generate the query help for each query
try:
query_help = subprocess_run(
["codeql", "generate", "query-help", "--format=markdown", "--warnings=error", queryfile]).stdout.strip()
except:
# Print a message if generate query help fails
print("Failed to generate query help for '%s'" % (queryfile_nwo))
continue
# Pull out relevant query metadata properties that we want to display in the query help
query_name_meta = get_query_metadata('name', meta, queryfile)
query_description = get_query_metadata(
'description', meta, queryfile)
query_id = "ID: " + \
get_query_metadata('id', meta, queryfile) + "\n"
query_kind = "Kind: " + \
get_query_metadata('kind', meta, queryfile) + "\n"
query_severity = "Severity: " + \
get_query_metadata('problem.severity', meta, queryfile) + "\n"
query_precision = "Precision: " + \
get_query_metadata('precision', meta, queryfile) + "\n"
query_tags = "Tags:\n - " + \
get_query_metadata('tags', meta, queryfile).replace(" ", "\n - ") + "\n"
# Build a link to the query source file for display in the query help
if "go" in prefix_repo_nwo(queryfile):
transform_link = prefix_repo_nwo(queryfile).replace(
"codeql-go", "codeql-go/tree/main").replace(" ", "%20").replace("\\", "/")
else:
transform_link = prefix_repo_nwo(queryfile).replace(
"codeql", "codeql/tree/main").replace(" ", "%20").replace("\\", "/")
query_link = "[Click to see the query in the CodeQL repository](https://github.com/" + \
transform_link + ")\n"
if queryfile in code_scanning_queries:
cs_suites = ' - ' + lang +'-code-scanning.qls\n'
else:
cs_suites = ""
if queryfile in security_extended_queries:
se_suites = ' - ' + lang + '-security-extended.qls\n'
else:
se_suites = ""
if queryfile in security_and_quality_queries:
sq_suites = ' - ' +lang + '-security-and-quality.qls\n'
else:
sq_Suites = ""
if queryfile in code_scanning_queries or queryfile in security_extended_queries or queryfile in security_and_quality_queries:
suites_list = "Query suites:\n" + cs_suites + se_suites + sq_suites
else:
suites_list = ""
# Join metadata into a literal block and add query link below
meta_string = "\n"*2 + "```\n" + query_id + query_kind + query_severity + \
query_precision + query_tags + suites_list + "```\n\n" + query_link + "\n"
# Insert metadata block into query help directly under title
full_help = query_help.replace("\n", meta_string, 1)
# Use id property to make name for markdown file, replacing any "/" characters with "-"
query_name = query_id[4:-1].replace("/", "-")
# Populate index_file_dictionary with @name extracted from metadata and corresponding query filename
index_file_dictionary[query_name_meta] = lang + "/" + query_name
# Make paths for output of the form: query-help-markdown/<lang>/<queryfile>.md
docs_dir = 'query-help'
md_dir_path = os.path.join(docs_dir, lang)
md_file_path = os.path.join(md_dir_path, query_name + ".md")
# Make directories for output paths they don't already exist
if not os.path.isdir(md_dir_path):
os.makedirs(md_dir_path)
# Generate query help at chosen path if output file doesn't already exist
if not os.path.exists(md_file_path):
file = open(md_file_path, "x")
file.write(full_help)
file.close()
# Sort index_file_dictionary alphabetically by @name key, and create column of filename values
sorted_index = dict(sorted(index_file_dictionary.items()))
sorted_index = ("\n" + " ").join(sorted_index.values())
# Add directives to make sorted_index a valid toctree for sphinx source files
toc_directive = ".. toctree::\n :titlesonly:\n\n "
toc_include = toc_directive + sorted_index
# Write toctree to rst
toc_file = os.path.join(docs_dir, "toc-" + lang + ".rst")
file = open(toc_file, "x")
file.write(toc_include)
file.close()

View File

@@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
#
# CodeQL query help configuration file
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
# For details of all possible config values,
# see https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Project-specific configuration -----------------------------------
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'CodeQL query help'
# Add md parser to process query help markdown files
extensions =['recommonmark']
source_suffix = {
'.rst': 'restructuredtext',
'.md': 'markdown',
}
# -- Project-specific options for HTML output ----------------------------------------------
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {'font_size': '16px',
'body_text': '#333',
'link': '#2F1695',
'link_hover': '#2F1695',
'show_powered_by': False,
'nosidebar':True,
'head_font_family': '-apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji"',
}
highlight_language = "none"
# Add any paths that contain templates here, relative to this directory.
templates_path = ['../_templates']
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['../_static']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['toc-*', 'readme.md'] # ignore toc-<lang>.rst files as they are 'included' in index pages

View File

@@ -0,0 +1,9 @@
CodeQL query help for C and C++
===============================
.. include:: ../reusables/query-help-overview.rst
For shorter queries that you can use as building blocks when writing your own queries, see the `example queries in the CodeQL repository <https://github.com/github/codeql/tree/main/cpp/ql/examples>`__.
.. include:: toc-cpp.rst

View File

@@ -0,0 +1,8 @@
CodeQL query help for C#
========================
.. include:: ../reusables/query-help-overview.rst
For shorter queries that you can use as building blocks when writing your own queries, see the `example queries in the CodeQL repository <https://github.com/github/codeql/tree/main/csharp/ql/examples>`__.
.. include:: toc-csharp.rst

View File

@@ -0,0 +1,8 @@
CodeQL query help for Go
========================
.. include:: ../reusables/query-help-overview.rst
For shorter queries that you can use as building blocks when writing your own queries, see the `example queries in the CodeQL for Go repository <https://github.com/github/codeql-go/tree/main/ql/examples>`__.
.. include:: toc-go.rst

View File

@@ -0,0 +1,33 @@
CodeQL query help
-----------------
View the query help for the queries included in the ``code-scanning``, ``security-extended``, and ``security-and-quality`` query suites for the languages supported by CodeQL.
- :doc:`CodeQL query help for C and C++ <cpp>`
- :doc:`CodeQL query help for C# <csharp>`
- :doc:`CodeQL query help for Go <go>`
- :doc:`CodeQL query help for Java <java>`
- :doc:`CodeQL query help for JavaScript <javascript>`
- :doc:`CodeQL query help for Python <python>`
.. pull-quote:: Information
Each query help article includes:
- A summary of key metadata for the query.
- Information about which query suites the query is included in.
- A link to the query in the `CodeQL repository <https://github.com/github/codeql>`__.
- A description of the potential vulnerability that the query identifies and a recommendation for how to avoid introducing the problem to your code.
.. toctree::
:hidden:
:titlesonly:
cpp
csharp
go
java
javascript
python

View File

@@ -0,0 +1,8 @@
CodeQL query help for Java query
================================
.. include:: ../reusables/query-help-overview.rst
For shorter queries that you can use as building blocks when writing your own queries, see the `example queries in the CodeQL repository <https://github.com/github/codeql/tree/main/java/ql/examples>`__.
.. include:: toc-java.rst

View File

@@ -0,0 +1,8 @@
CodeQL query help for JavaScript
================================
.. include:: ../reusables/query-help-overview.rst
For shorter queries that you can use as building blocks when writing your own queries, see the `example queries in the CodeQL repository <https://github.com/github/codeql/tree/main/javascript/ql/examples>`__.
.. include:: toc-javascript.rst

View File

@@ -0,0 +1,8 @@
CodeQL query help for Python
============================
.. include:: ../reusables/query-help-overview.rst
For shorter queries that you can use as building blocks when writing your own queries, see the `example queries in the CodeQL repository <https://github.com/github/codeql/tree/main/python/ql/examples>`__.
.. include:: toc-python.rst

View File

@@ -0,0 +1,8 @@
CodeQL query help Sphinx documentation
--------------------------------------
This project supplies the configuration and some boiler plate
index files for the CodeQL query help documentation.
The query help itself is automatically generated by the
"Generate CodeQL query help documentation using Sphinx" workflow.

View File

@@ -0,0 +1,5 @@
Visit the articles below to see the documentation for the queries included in the following query suites:
- ``code-scanning``: queries run by default in CodeQL code scanning on GitHub.
- ``security-extended``: queries from ``code-scanning``, plus extra security queries with slightly lower precision and severity.
- ``security-and-quality``: queries from ``code-scanning``, ``security-extended``, plus extra maintainability and reliability queries.