Merge branch 'github:main' into crypto-test

This commit is contained in:
Nicolas Will
2025-04-30 16:35:26 +02:00
committed by GitHub
6768 changed files with 395014 additions and 163489 deletions

View File

@@ -136,6 +136,7 @@ def make_patches_from_log_file(log_file_lines) -> List[Patch]:
known_start_paths = {
# internal CI runs
"/home/runner/work/semmle-code/semmle-code/ql/": CODEQL_REPO_DIR,
"/Users/runner/work/semmle-code/semmle-code/ql/": CODEQL_REPO_DIR,
"/home/runner/work/semmle-code/semmle-code/target/codeql-java-integration-tests/ql/": CODEQL_REPO_DIR,
"/home/runner/work/semmle-code/semmle-code/" : SEMMLE_CODE_DIR,
# github actions on codeql repo

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3
# Creates a change note and opens it in VSCode for editing.
# Creates a change note and opens it in $EDITOR (or VSCode if the environment
# variable is not set) for editing.
# Expects to receive the following arguments:
# - What language the change note is for
@@ -51,5 +52,6 @@ category: {change_category}
with open(change_note_file, "w") as f:
f.write(change_note)
# Open the change note file in VSCode, reusing the existing window if possible
os.system(f"code -r {change_note_file}")
editor = os.environ.get('EDITOR', 'code -r')
os.system(f"{editor} {change_note_file}")

View File

@@ -30,8 +30,8 @@ arguments = parser.parse_args()
assert hasattr(arguments, "ignore_missing_query_packs")
# Define which languages and query packs to consider
languages = [ "cpp", "csharp", "go", "java", "javascript", "python", "ruby", "swift" ]
packs = [ "code-scanning", "security-and-quality", "security-extended", "security-experimental" ]
languages = [ "actions", "cpp", "csharp", "go", "java", "javascript", "python", "ruby", "swift" ]
packs = [ "code-scanning", "security-and-quality", "security-extended", "security-experimental", "code-quality"]
class CodeQL:
def __init__(self):
@@ -169,7 +169,7 @@ with CodeQL() as codeql:
for pack in packs:
# Get absolute paths to queries in this pack by using 'codeql resolve queries'
try:
queries_subp = codeql.command(["resolve","queries","--search-path", codeql_search_path, "%s-%s.qls" % (lang, pack)])
queries_subp = codeql.command(["resolve","queries","--search-path", codeql_search_path, "%s-%s.qls" % (lang, pack)]).strip()
except Exception as e:
# Resolving queries might go wrong if the github/codeql repository is not
# on the search path.
@@ -183,8 +183,13 @@ with CodeQL() as codeql:
else:
sys.exit("You can use '--ignore-missing-query-packs' to ignore this error")
# Exception for the code-quality suites, which might be empty, but must be resolvable.
if pack == 'code-quality' and queries_subp == '':
print(f'Warning: skipping empty suite code-quality', file=sys.stderr)
continue
# Investigate metadata for every query by using 'codeql resolve metadata'
for queryfile in queries_subp.strip().split("\n"):
for queryfile in queries_subp.split("\n"):
query_metadata_json = codeql.command(["resolve","metadata",queryfile]).strip()
# Turn an absolute path to a query file into an nwo-prefixed path (e.g. github/codeql/java/ql/src/....)

View File

@@ -8,6 +8,7 @@ import shlex
import subprocess
import sys
import tempfile
import re
def quote_if_needed(row):
if row != "true" and row != "false":
@@ -33,8 +34,6 @@ class Generator:
self.generateSources = False
self.generateSummaries = False
self.generateNeutrals = False
self.generateMixedSummaries = False
self.generateMixedNeutrals = False
self.generateTypeBasedSummaries = False
self.dryRun = False
self.dirname = "modelgenerator"
@@ -52,8 +51,6 @@ Which models are generated is controlled by the flags:
--with-sources
--with-summaries
--with-neutrals
--with-mixed-summaries (Experimental). May not be used in conjunction with --with-summaries.
--with-mixed-neutrals (Experimental). Should only be used in conjunction with --with-mixed-summaries.
--with-typebased-summaries (Experimental)
If none of these flags are specified, all models are generated except for the type based models.
@@ -65,7 +62,7 @@ $ python3 GenerateFlowModel.py /tmp/dbs/my_library_db --with-sinks
$ python3 GenerateFlowModel.py /tmp/dbs/my_library_db --with-sinks my_directory
Requirements: `codeql` should both appear on your path.
Requirements: `codeql` should appear on your path.
""")
@@ -85,10 +82,6 @@ Requirements: `codeql` should both appear on your path.
generator.printHelp()
sys.exit(0)
if "--with-summaries" in sys.argv and "--with-mixed-summaries" in sys.argv:
generator.printHelp()
sys.exit(0)
if "--with-sinks" in sys.argv:
sys.argv.remove("--with-sinks")
generator.generateSinks = True
@@ -105,14 +98,6 @@ Requirements: `codeql` should both appear on your path.
sys.argv.remove("--with-neutrals")
generator.generateNeutrals = True
if "--with-mixed-summaries" in sys.argv:
sys.argv.remove("--with-mixed-summaries")
generator.generateMixedSummaries = True
if "--with-mixed-neutrals" in sys.argv:
sys.argv.remove("--with-mixed-neutrals")
generator.generateMixedNeutrals = True
if "--with-typebased-summaries" in sys.argv:
sys.argv.remove("--with-typebased-summaries")
generator.generateTypeBasedSummaries = True
@@ -125,9 +110,7 @@ Requirements: `codeql` should both appear on your path.
not generator.generateSources and
not generator.generateSummaries and
not generator.generateNeutrals and
not generator.generateTypeBasedSummaries and
not generator.generateMixedSummaries and
not generator.generateMixedNeutrals):
not generator.generateTypeBasedSummaries):
generator.generateSinks = generator.generateSources = generator.generateSummaries = generator.generateNeutrals = True
n = len(sys.argv)
@@ -165,37 +148,23 @@ Requirements: `codeql` should both appear on your path.
return self.asAddsTo(rows, predicate)
def makeContent(self):
summaryAddsTo = {}
if self.generateSummaries:
summaryAddsTo = self.getAddsTo("CaptureSummaryModels.ql", helpers.summaryModelPredicate)
else:
summaryAddsTo = { }
sinkAddsTo = {}
if self.generateSinks:
sinkAddsTo = self.getAddsTo("CaptureSinkModels.ql", helpers.sinkModelPredicate)
else:
sinkAddsTo = { }
sourceAddsTo = {}
if self.generateSources:
sourceAddsTo = self.getAddsTo("CaptureSourceModels.ql", helpers.sourceModelPredicate)
else:
sourceAddsTo = {}
neutralAddsTo = {}
if self.generateNeutrals:
neutralAddsTo = self.getAddsTo("CaptureNeutralModels.ql", helpers.neutralModelPredicate)
else:
neutralAddsTo = { }
if self.generateMixedSummaries:
mixedSummaryAddsTo = self.getAddsTo("CaptureMixedSummaryModels.ql", helpers.summaryModelPredicate)
else:
mixedSummaryAddsTo = { }
if self.generateMixedNeutrals:
mixedNeutralAddsTo = self.getAddsTo("CaptureMixedNeutralModels.ql", helpers.neutralModelPredicate)
else:
mixedNeutralAddsTo = { }
return helpers.merge(summaryAddsTo, mixedSummaryAddsTo, sinkAddsTo, sourceAddsTo, neutralAddsTo, mixedNeutralAddsTo)
return helpers.merge(summaryAddsTo, sinkAddsTo, sourceAddsTo, neutralAddsTo)
def makeTypeBasedContent(self):
if self.generateTypeBasedSummaries:
@@ -211,7 +180,9 @@ Requirements: `codeql` should both appear on your path.
extensions:
{0}"""
for entry in extensions:
target = os.path.join(self.generatedFrameworks, entry + extension)
# Replace problematic characters with dashes, and collapse multiple dashes.
sanitizedEntry = re.sub(r'-+', '-', entry.replace('/', '-').replace(':', '-'))
target = os.path.join(self.generatedFrameworks, sanitizedEntry + extension)
with open(target, "w") as f:
f.write(extensionTemplate.format(extensions[entry]))
print("Models as data extensions written to " + target)
@@ -228,9 +199,7 @@ extensions:
if (self.generateSinks or
self.generateSources or
self.generateSummaries or
self.generateNeutrals or
self.generateMixedSummaries or
self.generatedMixedNeutrals):
self.generateNeutrals):
self.save(content, ".model.yml")
if self.generateTypeBasedSummaries:

View File

@@ -86,7 +86,7 @@ case "${lang}" in
csharp | cpp | javascript | python)
scheme_file="${lang}/ql/lib/semmlecode.${lang}.dbscheme"
;;
go | ruby | swift)
go | ruby | rust | swift)
scheme_file="${lang}/ql/lib/${lang}.dbscheme"
;;
*)

81
misc/scripts/stageoverlap.py Executable file
View File

@@ -0,0 +1,81 @@
#!/usr/bin/env python3
import sys
import os
import re
# read first argument
if len(sys.argv) < 2:
print("Usage: stageoverlap.py <dil>")
sys.exit(1)
dilfile = sys.argv[1]
seen_stages = set()
computed_predicates = {}
stage_number = 0
def process_stage(stage, cached):
global stage_number
stage_key = ' '.join(cached)
# skip repeated stages (in case we're looking at DIL for several queries, e.g. from a .qls)
if stage_key in seen_stages:
return
# don't count the query-stage as seen, since we don't want to skip those
if not '#select' in cached:
seen_stages.add(stage_key)
stage_number += 1
print('STAGE ' + str(stage_number) + ':')
print(str(len(cached)) + ' cached predicate(s)')
print(' '.join(cached))
for predicate in stage:
# strip trailing characters matching the regex '#[bf]+', i.e. disregard magic
predicate = re.sub('#[bf]+$', '', predicate)
# TODO: maybe also strip the hash?
# predicate = re.sub('#[a-f0-9]+$', '', predicate)
if predicate in computed_predicates.keys():
# skip db-relations and some generated predicates
if predicate.startswith('@') or predicate.startswith('project#'):
continue
prior_stage = computed_predicates[predicate]
print('Recompute from ' + str(prior_stage) + ': ' + predicate)
else:
computed_predicates[predicate] = stage_number
print()
with open(dilfile, 'r') as f:
stage = []
cached = []
query = False
for line in f:
# skip lines starting with a space, i.e. predicate bodies
if line.startswith(' '): continue
# get the part of the line containing no spaces occuring before the first '('
# this is the predicate name
parenpos = line.find('(')
if parenpos != -1:
start = line.rfind(' ', 0, parenpos)
predicate = line[start+1:parenpos]
if predicate.startswith('`'):
# remove the leading and trailing backticks
predicate = predicate[1:-1]
stage.append(predicate)
continue
# query predicates, aka cached predicates, are written either as
# 'query <predicatename> = ...' on one line, or split across 2+ lines
if line.startswith('query '):
predicate = line.split(' ')[1]
cached.append(predicate)
continue
if line == 'query\n':
query = True
continue
if query:
predicate = line.split(' ')[0]
cached.append(predicate)
query = False
continue
if line == '/* ---------- END STAGE ---------- */\n':
process_stage(stage, cached)
stage = []
cached = []