Merge branch 'github:main' into crypto-test

2026-04-26 17:25:19 +02:00 · 2025-04-30 16:35:26 +02:00
parent 7a96f5682e c8e564b2ba
commit 27c7bf3047
6768 changed files with 395014 additions and 163489 deletions
--- a/misc/scripts/accept-expected-changes-from-ci.py
+++ b/misc/scripts/accept-expected-changes-from-ci.py
@@ -136,6 +136,7 @@ def make_patches_from_log_file(log_file_lines) -> List[Patch]:
            known_start_paths = {
                # internal CI runs
                "/home/runner/work/semmle-code/semmle-code/ql/": CODEQL_REPO_DIR,
+                "/Users/runner/work/semmle-code/semmle-code/ql/": CODEQL_REPO_DIR,
                "/home/runner/work/semmle-code/semmle-code/target/codeql-java-integration-tests/ql/": CODEQL_REPO_DIR,
                "/home/runner/work/semmle-code/semmle-code/" : SEMMLE_CODE_DIR,
                # github actions on codeql repo
--- a/misc/scripts/create-change-note.py
+++ b/misc/scripts/create-change-note.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3

-# Creates a change note and opens it in VSCode for editing.
+# Creates a change note and opens it in $EDITOR (or VSCode if the environment
+# variable is not set) for editing.

 # Expects to receive the following arguments:
 # - What language the change note is for
@@ -51,5 +52,6 @@ category: {change_category}
 with open(change_note_file, "w") as f:
    f.write(change_note)

-# Open the change note file in VSCode, reusing the existing window if possible
-os.system(f"code -r {change_note_file}")
+editor = os.environ.get('EDITOR', 'code -r')
+
+os.system(f"{editor} {change_note_file}")
--- a/misc/scripts/generate-code-scanning-query-list.py
+++ b/misc/scripts/generate-code-scanning-query-list.py
@@ -30,8 +30,8 @@ arguments = parser.parse_args()
 assert hasattr(arguments, "ignore_missing_query_packs")

 # Define which languages and query packs to consider
-languages = [ "cpp", "csharp", "go", "java", "javascript", "python", "ruby", "swift" ]
-packs = [ "code-scanning", "security-and-quality", "security-extended", "security-experimental" ]
+languages = [ "actions", "cpp", "csharp", "go", "java", "javascript", "python", "ruby", "swift" ]
+packs = [ "code-scanning", "security-and-quality", "security-extended", "security-experimental", "code-quality"]

 class CodeQL:
    def __init__(self):
@@ -169,7 +169,7 @@ with CodeQL() as codeql:
            for pack in packs:
                # Get absolute paths to queries in this pack by using 'codeql resolve queries'
                try:
-                    queries_subp = codeql.command(["resolve","queries","--search-path", codeql_search_path, "%s-%s.qls" % (lang, pack)])
+                    queries_subp = codeql.command(["resolve","queries","--search-path", codeql_search_path, "%s-%s.qls" % (lang, pack)]).strip()
                except Exception as e:
                    # Resolving queries might go wrong if the github/codeql repository is not
                    # on the search path.
@@ -183,8 +183,13 @@ with CodeQL() as codeql:
                    else:
                        sys.exit("You can use '--ignore-missing-query-packs' to ignore this error")

+                # Exception for the code-quality suites, which might be empty, but must be resolvable.
+                if pack == 'code-quality' and queries_subp == '':
+                    print(f'Warning: skipping empty suite code-quality', file=sys.stderr)
+                    continue
+
                # Investigate metadata for every query by using 'codeql resolve metadata'
-                for queryfile in queries_subp.strip().split("\n"):
+                for queryfile in queries_subp.split("\n"):
                    query_metadata_json = codeql.command(["resolve","metadata",queryfile]).strip()

                    # Turn an absolute path to a query file into an nwo-prefixed path (e.g. github/codeql/java/ql/src/....)
--- a/misc/scripts/models-as-data/generate_flow_model.py
+++ b/misc/scripts/models-as-data/generate_flow_model.py
@@ -8,6 +8,7 @@ import shlex
 import subprocess
 import sys
 import tempfile
+import re

 def quote_if_needed(row):
    if row != "true" and row != "false":
@@ -33,8 +34,6 @@ class Generator:
        self.generateSources = False
        self.generateSummaries = False
        self.generateNeutrals = False
-        self.generateMixedSummaries = False
-        self.generateMixedNeutrals = False
        self.generateTypeBasedSummaries = False
        self.dryRun = False
        self.dirname = "modelgenerator"
@@ -52,8 +51,6 @@ Which models are generated is controlled by the flags:
    --with-sources
    --with-summaries
    --with-neutrals
-    --with-mixed-summaries (Experimental). May not be used in conjunction with --with-summaries.
-    --with-mixed-neutrals (Experimental). Should only be used in conjunction with --with-mixed-summaries.
    --with-typebased-summaries (Experimental)
 If none of these flags are specified, all models are generated except for the type based models.

@@ -65,7 +62,7 @@ $ python3 GenerateFlowModel.py /tmp/dbs/my_library_db --with-sinks
 $ python3 GenerateFlowModel.py /tmp/dbs/my_library_db --with-sinks my_directory


-Requirements: `codeql` should both appear on your path.
+Requirements: `codeql` should appear on your path.
    """)


@@ -85,10 +82,6 @@ Requirements: `codeql` should both appear on your path.
            generator.printHelp()
            sys.exit(0)

-        if "--with-summaries" in sys.argv and "--with-mixed-summaries" in sys.argv:
-            generator.printHelp()
-            sys.exit(0)
-
        if "--with-sinks" in sys.argv:
            sys.argv.remove("--with-sinks")
            generator.generateSinks = True
@@ -105,14 +98,6 @@ Requirements: `codeql` should both appear on your path.
            sys.argv.remove("--with-neutrals")
            generator.generateNeutrals = True

-        if "--with-mixed-summaries" in sys.argv:
-            sys.argv.remove("--with-mixed-summaries")
-            generator.generateMixedSummaries = True
-
-        if "--with-mixed-neutrals" in sys.argv:
-            sys.argv.remove("--with-mixed-neutrals")
-            generator.generateMixedNeutrals = True
-
        if "--with-typebased-summaries" in sys.argv:
            sys.argv.remove("--with-typebased-summaries")
            generator.generateTypeBasedSummaries = True
@@ -125,9 +110,7 @@ Requirements: `codeql` should both appear on your path.
           not generator.generateSources and
           not generator.generateSummaries and
           not generator.generateNeutrals and
-           not generator.generateTypeBasedSummaries and
-           not generator.generateMixedSummaries and
-           not generator.generateMixedNeutrals):
+           not generator.generateTypeBasedSummaries):
            generator.generateSinks = generator.generateSources = generator.generateSummaries = generator.generateNeutrals = True

        n = len(sys.argv)
@@ -165,37 +148,23 @@ Requirements: `codeql` should both appear on your path.
        return self.asAddsTo(rows, predicate)

    def makeContent(self):
+        summaryAddsTo = {}
        if self.generateSummaries:
            summaryAddsTo = self.getAddsTo("CaptureSummaryModels.ql", helpers.summaryModelPredicate)
-        else:
-            summaryAddsTo = { }

+        sinkAddsTo = {}
        if self.generateSinks:
            sinkAddsTo = self.getAddsTo("CaptureSinkModels.ql", helpers.sinkModelPredicate)
-        else:
-            sinkAddsTo = { }

+        sourceAddsTo = {}
        if self.generateSources:
            sourceAddsTo = self.getAddsTo("CaptureSourceModels.ql", helpers.sourceModelPredicate)
-        else:
-            sourceAddsTo = {}

+        neutralAddsTo = {}
        if self.generateNeutrals:
            neutralAddsTo = self.getAddsTo("CaptureNeutralModels.ql", helpers.neutralModelPredicate)
-        else:
-            neutralAddsTo = { }

-        if self.generateMixedSummaries:
-            mixedSummaryAddsTo = self.getAddsTo("CaptureMixedSummaryModels.ql", helpers.summaryModelPredicate)
-        else:
-            mixedSummaryAddsTo = { }
-
-        if self.generateMixedNeutrals:
-            mixedNeutralAddsTo = self.getAddsTo("CaptureMixedNeutralModels.ql", helpers.neutralModelPredicate)
-        else:
-            mixedNeutralAddsTo = { }
-
-        return helpers.merge(summaryAddsTo, mixedSummaryAddsTo, sinkAddsTo, sourceAddsTo, neutralAddsTo, mixedNeutralAddsTo)
+        return helpers.merge(summaryAddsTo, sinkAddsTo, sourceAddsTo, neutralAddsTo)

    def makeTypeBasedContent(self):
        if self.generateTypeBasedSummaries:
@@ -211,7 +180,9 @@ Requirements: `codeql` should both appear on your path.
 extensions:
 {0}"""
        for entry in extensions:
-            target = os.path.join(self.generatedFrameworks, entry + extension)
+            # Replace problematic characters with dashes, and collapse multiple dashes.
+            sanitizedEntry = re.sub(r'-+', '-', entry.replace('/', '-').replace(':', '-'))
+            target = os.path.join(self.generatedFrameworks, sanitizedEntry + extension)
            with open(target, "w") as f:
                f.write(extensionTemplate.format(extensions[entry]))
            print("Models as data extensions written to " + target)
@@ -228,9 +199,7 @@ extensions:
        if (self.generateSinks or
           self.generateSources or
           self.generateSummaries or
-           self.generateNeutrals or
-           self.generateMixedSummaries or
-           self.generatedMixedNeutrals):
+           self.generateNeutrals):
            self.save(content, ".model.yml")

        if self.generateTypeBasedSummaries:
--- a/misc/scripts/prepare-db-upgrade.sh
+++ b/misc/scripts/prepare-db-upgrade.sh
@@ -86,7 +86,7 @@ case "${lang}" in
  csharp | cpp | javascript | python)
    scheme_file="${lang}/ql/lib/semmlecode.${lang}.dbscheme"
    ;;
-  go | ruby | swift)
+  go | ruby | rust | swift)
    scheme_file="${lang}/ql/lib/${lang}.dbscheme"
    ;;
  *)
--- a/misc/scripts/stageoverlap.py
+++ b/misc/scripts/stageoverlap.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import re
+
+# read first argument
+if len(sys.argv) < 2:
+    print("Usage: stageoverlap.py <dil>")
+    sys.exit(1)
+
+dilfile = sys.argv[1]
+
+seen_stages = set()
+computed_predicates = {}
+stage_number = 0
+
+def process_stage(stage, cached):
+    global stage_number
+    stage_key = ' '.join(cached)
+    # skip repeated stages (in case we're looking at DIL for several queries, e.g. from a .qls)
+    if stage_key in seen_stages:
+        return
+    # don't count the query-stage as seen, since we don't want to skip those
+    if not '#select' in cached:
+        seen_stages.add(stage_key)
+    stage_number += 1
+    print('STAGE ' + str(stage_number) + ':')
+    print(str(len(cached)) + ' cached predicate(s)')
+    print(' '.join(cached))
+    for predicate in stage:
+        # strip trailing characters matching the regex '#[bf]+', i.e. disregard magic
+        predicate = re.sub('#[bf]+$', '', predicate)
+        # TODO: maybe also strip the hash?
+        # predicate = re.sub('#[a-f0-9]+$', '', predicate)
+        if predicate in computed_predicates.keys():
+            # skip db-relations and some generated predicates
+            if predicate.startswith('@') or predicate.startswith('project#'):
+                continue
+            prior_stage = computed_predicates[predicate]
+            print('Recompute from ' + str(prior_stage) + ': ' + predicate)
+        else:
+            computed_predicates[predicate] = stage_number
+    print()
+
+with open(dilfile, 'r') as f:
+    stage = []
+    cached = []
+    query = False
+    for line in f:
+        # skip lines starting with a space, i.e. predicate bodies
+        if line.startswith(' '): continue
+        # get the part of the line containing no spaces occuring before the first '('
+        # this is the predicate name
+        parenpos = line.find('(')
+        if parenpos != -1:
+            start = line.rfind(' ', 0, parenpos)
+            predicate = line[start+1:parenpos]
+            if predicate.startswith('`'):
+                # remove the leading and trailing backticks
+                predicate = predicate[1:-1]
+            stage.append(predicate)
+            continue
+        # query predicates, aka cached predicates, are written either as
+        # 'query <predicatename> = ...' on one line, or split across 2+ lines
+        if line.startswith('query '):
+            predicate = line.split(' ')[1]
+            cached.append(predicate)
+            continue
+        if line == 'query\n':
+            query = True
+            continue
+        if query:
+            predicate = line.split(' ')[0]
+            cached.append(predicate)
+            query = False
+            continue
+        if line == '/* ---------- END STAGE ---------- */\n':
+            process_stage(stage, cached)
+            stage = []
+            cached = []